I am making a plot showing two sets of regression coefficients and standard errors and the graph is as follow:
What I want to do further is to add extra variables without any data on the y-axis. For instance, put a label FeatGender on top of the label FeatGenderMale, or for another example, put a label FeatEU in between the label of FeatPartyIDLiberal Democrats and the label of FeatEUIntegrationSupportEUIntegration. Below is the reduced version of data:
coef se low high sex
1 -0.038848364 0.02104994 -0.080106243 0.002409514 Female
2 0.095831201 0.02793333 0.041081877 0.150580526 Female
3 0.050972670 0.02828353 -0.004463052 0.106408391 Female
4 -0.183558492 0.02454943 -0.231675377 -0.135441606 Female
5 0.044879447 0.02712518 -0.008285914 0.098044808 Female
6 -0.003858672 0.03005477 -0.062766024 0.055048681 Male
7 0.003048763 0.04687573 -0.088827676 0.094925203 Male
8 0.015343897 0.03948959 -0.062055700 0.092743494 Male
9 -0.132600259 0.04146323 -0.213868197 -0.051332322 Male
10 -0.029764559 0.04600719 -0.119938650 0.060409533 Male
Here are my codes:
v_name <- c("FeatGenderMale", "FeatPartyIDLabourParty", "FeatPartyIDLiberalDemocrats",
"FeatEUIntegrationOpposeEUIntegration", "FeatEUIntegrationSupportEUIntegration")
t <- ggplot(temp, aes(x=c(v_name,v_name), y=coef, group=sex, colour=sex))
t +
geom_point(position = position_dodge(width = 0.3)) +
geom_errorbar(aes(ymin = low, ymax = high, width = 0), position = position_dodge(0.3)) +
coord_flip() +
scale_x_discrete(limits = rev(v_name)) +
geom_hline(yintercept = 0.0, linetype = "dotted") +
theme(legend.position = "bottom")
Thanks for the help!
Here's an approach that first applies the v_name into the source data frame, but then uses a longer appended version of the v_name vector for the axis.
library(ggplot2); library(dplyr)
# Add the v_name into the table
temp2 <- temp %>% group_by(sex) %>% mutate(v_name = v_name) %>% ungroup()
# Make the dummy label for axis with add'l entries
v_name2 <- append(v_name, "FeatGender", after = 0)
v_name2 <- append(v_name2, "FeatEU", after = 4)
# Plot using the new table
t <- ggplot(temp2, aes(x=v_name, y=coef, group=sex, colour=sex))
t +
geom_point(position = position_dodge(width = 0.3)) +
geom_errorbar(aes(ymin = low, ymax = high, width = 0), position = position_dodge(0.3)) +
coord_flip() +
# ... but use the larger list of axis names
scale_x_discrete(limits = rev(v_name2)) +
geom_hline(yintercept = 0.0, linetype = "dotted") +
theme(legend.position = "bottom")
Related
This is not my data (for confidentiality reasons), but I have tried to create a reproducible example using a dataset included in the ggplot2 library. I have an histogram summarizing the value of some variable by group (factor of 2 levels). First, I did not want the counts but proportions of the total, so I used that code:
library(ggplot2)
library(dplyr)
df_example <- diamonds %>% as.data.frame() %>% filter(cut=="Premium" | cut=="Ideal")
ggplot(df_example,aes(x=z,fill=cut)) +
geom_histogram(aes(y=after_stat(width*density)),binwidth=1,center=0.5,col="black") +
facet_wrap(~cut) +
scale_x_continuous(breaks=seq(0,9,by=1)) +
scale_y_continuous(labels=scales::percent_format(accuracy=2,suffix="")) +
scale_fill_manual(values=c("#CC79A7","#009E73")) +
labs(x="Depth (mm)",y="Count") +
theme_bw() + theme(legend.position="none")
It gave me this as a result.
enter image description here
The issue is that I would like to print the numeric percentages on top of the bins and haven't find a way to do so.
As I saw it done for printing counts elsewhere, I attempted to print them using stat_bin(), including the same y and label values as the y in geom_histogram, thinking it would print the right numbers:
ggplot(df_example,aes(x=z,fill=cut)) +
geom_histogram(aes(y=after_stat(width*density)),binwidth=1,center=0.5,col="black") +
stat_bin(aes(y=after_stat(width*density),label=after_stat(width*density*100)),geom="text",vjust=-.5) +
facet_wrap(~cut) +
scale_x_continuous(breaks=seq(0,9,by=1)) +
scale_y_continuous(labels=scales::percent_format(accuracy=2,suffix="")) +
scale_fill_manual(values=c("#CC79A7","#009E73")) +
labs(x="Depth (mm)",y="%") +
theme_bw() + theme(legend.position="none")
However, it does print way more values than there are bins, these values do not appear consistent with what is portrayed by the bar heights and they do not print in respect to vjust=-.5 which would make them appear slightly above the bars.
enter image description here
What am I missing here? I know that if there was no grouping variable/facet_wrap, I could use after_stat(count/sum(count)) instead of after_stat(width*density) and it seems that it would have fixed my issue. But I need the histograms for both groups to appear next to each other. Thanks in advance!
You have to use the same arguments in stat_bin as for the histogram when adding your labels to get same binning for both layers and to align the labels with the bars:
library(ggplot2)
library(dplyr)
df_example <- diamonds %>%
as.data.frame() %>%
filter(cut == "Premium" | cut == "Ideal")
ggplot(df_example, aes(x = z, fill = cut)) +
geom_histogram(aes(y = after_stat(width * density)),
binwidth = 1, center = 0.5, col = "black"
) +
stat_bin(
aes(
y = after_stat(width * density),
label = scales::number(after_stat(width * density), scale = 100, accuracy = 1)
),
geom = "text", binwidth = 1, center = 0.5, vjust = -.25
) +
facet_wrap(~cut) +
scale_x_continuous(breaks = seq(0, 9, by = 1)) +
scale_y_continuous(labels = scales::number_format(scale = 100)) +
scale_fill_manual(values = c("#CC79A7", "#009E73")) +
labs(x = "Depth (mm)", y = "%") +
theme_bw() +
theme(legend.position = "none")
I am trying to plot a bar graph using ggplot. The graph is displaying as I would like but I can't figure out how to add an Asterix "*" above some of the bars to show significance. Whenever I try it wither adds them to all of the bars or completely seems to skew the graph.
I need to have an Asterix only above
Group A: Treatment A and Treatment B;
Group B: Treatment A
Thankyou!!
Treatment <- rep(c("Treatment A","Treatment A","Treatment B","Treatment B"), 3)
Group <- c(rep(c("A. Paired cohort"), 4),rep(c("B. Low cohort"), 4),rep(c("C. Normal cohort"), 4))
Outcome <- rep(c("Outcome P","Outcome D"),6)
Percent <- c(6.7,3.3,22.6,16.1,4.9,2.4,25,15,8.2,4.1,20.8,17)
df <- data.frame(Treatment,Group,Outcome,Percent)
#keep original order, not alphabetised
df$Outcome <- factor(df$Outcome, levels = unique(df$Outcome)
#plot graph
ggplot(df,
aes(x=Outcome, y=Percent)) +
geom_bar(aes(fill=Treatment),stat="identity", position="dodge")+
theme_classic() +
scale_fill_grey() +
xlab("") + ylab("%") +
facet_wrap(~Group) +
ylim(0,40)
One option would be to
Add an indicator variable to your data to indicate signifcance using e.g. dplyr::case_when.
This indicator could then be used in geom_text to conditionally add an asterisk as a label on top of the desired bars. To align the * with bars we have to map Treatment on the group aes and make use of position_dodge(width = .9), where .9 is the default width of a geom_bar/col. Additionally I set vjust=-.1 to put the labels slightly above the bars.
library(ggplot2)
library(dplyr)
df$significant <- dplyr::case_when(
grepl("^A", df$Group) & grepl("(A|B)$", df$Treatment) ~ TRUE,
grepl("^B", df$Group) & grepl("A$", df$Treatment) ~ TRUE,
TRUE ~ FALSE
)
# plot graph
ggplot(df, aes(x = Outcome, y = Percent)) +
geom_col(aes(fill = Treatment), position = "dodge") +
geom_text(aes(label = ifelse(significant, "*", ""), group = Treatment),
position = position_dodge(width = .9), vjust = -.1, size = 20 / .pt) +
theme_classic() +
scale_fill_grey() +
labs(x = "", y = "%") +
facet_wrap(~Group) +
ylim(0, 40)
I need plot two grouped barcodes with two dataframes that has distinct number of rows: 6, 5.
I tried many codes in R but I don't know how to fix it
Here are my data frames: The Freq colum must be in Y axis and the inter and intra columns must be the x axis.
> freqinter
inter Freq
1 0.293040975264367 17
2 0.296736775990729 2
3 0.297619926364764 4
4 0.587377012109561 1
5 0.595245125315916 4
6 0.597022018595893 2
> freqintra
intra Freq
1 0 3
2 0.293040975264367 15
3 0.597022018595893 4
4 0.598809552335782 2
5 0.898227748764939 6
I expect to plot the barplots in the same plot and could differ inter e intra values by colour
I want a picture like this one:
You probably want a histogram. Use the raw data if possible. For example:
library(tidyverse)
freqinter <- data.frame(x = c(
0.293040975264367,
0.296736775990729,
0.297619926364764,
0.587377012109561,
0.595245125315916,
0.597022018595893), Freq = c(17,2,4,1,4,2))
freqintra <- data.frame(x = c(
0 ,
0.293040975264367,
0.597022018595893,
0.598809552335782,
0.898227748764939), Freq = c(3,15,4,2,6))
df <- bind_rows(freqinter, freqintra, .id = "id") %>%
uncount(Freq)
ggplot(df, aes(x, fill = id)) +
geom_histogram(binwidth = 0.1, position = 'dodge', col = 1) +
scale_fill_grey() +
theme_minimal()
With the data you posted I don't think you can have this graph to look good. You can't have bars thin enough to differentiate 0.293 and 0.296 when your data ranges from 0 to 0.9.
Maybe you could try to treat it as a factor just to illustrate what you want to do:
freqinter <- data.frame(x = c(
0.293040975264367,
0.296736775990729,
0.297619926364764,
0.587377012109561,
0.595245125315916,
0.597022018595893), Freq = c(17,2,4,1,4,2))
freqintra <- data.frame(x = c(
0 ,
0.293040975264367,
0.597022018595893,
0.598809552335782,
0.898227748764939), Freq = c(3,15,4,2,6))
df <- bind_rows(freqinter, freqintra, .id = "id")
ggplot(df, aes(x = as.factor(x), y = Freq, fill = id)) +
geom_bar(stat = "identity", position = position_dodge2(preserve = "single")) +
theme(axis.text.x = element_text(angle = 90)) +
scale_fill_discrete(labels = c("inter", "intra"))
You can also check the problem by not treating your x variable as a factor:
ggplot(df, aes(x = x, y = Freq, fill = id)) +
geom_bar(stat = "identity", width = 0.05, position = "dodge") +
theme(axis.text.x = element_text(angle = 90)) +
scale_fill_discrete(labels = c("inter", "intra"))
Either the bars must be very thin (small width), or you'll get overlapping x intervals breaking the plot.
I have a time series data for different group components. Each group ID with its various time stamps (given as Date) has an hypo and hyper response data. I would like to plot the time series for each of this group by facet (ggplot) for both (1) Group ID and also by response i.e. (2) Hyper and Hypo response so that the picture by response is one top of another. Any help is appreciated.
A demo data set and what I have done so far is given below.
set.seed(1)
tdat <- data.frame(Group = rep(paste0("GroupID-", c("A","B")),
each = 100),
Date = rep(seq(Sys.Date(), by = "1 day", length = 100), 2),
Fitted = c(cumsum(rnorm(100)), cumsum(rnorm(100))),
Signif = rep(NA, 200))
tdat <- transform(tdat, Hyper = Fitted + 1.5, Hypo = Fitted - 1.5)
## select 1 region per Site as signif
take <- sample(10:70, 2)
take[2] <- take[2] + 100
tdat$Signif[take[1]:(take[1]+25)] <- tdat$Fitted[take[1]:(take[1]+25)]
tdat$Signif[take[2]:(take[2]+25)] <- tdat$Fitted[take[2]:(take[2]+25)]
And the data frame looks like this -
> head(tdat)
Group Date Fitted Signif Hyper Hypo
1 GroupID-A 2017-04-18 -0.6264538 NA 0.8735462 -2.1264538
2 GroupID-A 2017-04-19 -0.4428105 NA 1.0571895 -1.9428105
3 GroupID-A 2017-04-20 -1.2784391 NA 0.2215609 -2.7784391
4 GroupID-A 2017-04-21 0.3168417 NA 1.8168417 -1.1831583
5 GroupID-A 2017-04-22 0.6463495 NA 2.1463495 -0.8536505
6 GroupID-A 2017-04-23 -0.1741189 NA 1.3258811 -1.6741189
The time series is given by Date.
The data I have plotted is given below. However my real data has more group ID's and I really want one picture for each group ID with splitting the image for Hyper and Hypo response.
library(ggplot2)
ggplot(tdat, aes(x = Date, y = Fitted, group = Group)) +
geom_line() +
geom_line(mapping = aes(y = Hyper), lty = "dashed") +
geom_line(mapping = aes(y = Hypo), lty = "dashed") +
geom_line(mapping = aes(y = Signif), lwd = 1.3, colour = "red") +
facet_wrap( ~ Group)
Again any help is appreciated.
Thanks
If you will reshape your data with reshape2 or tidyr or data.table and convert wide to long:
library(reshape2)
tdat2<-melt(tdat,id.vars = c("Group","Date","Signif","Fitted"))
ggplot(tdat2, aes(x = Date, y = value, group = Group)) +
geom_line() +
geom_line(mapping = aes(y = Signif), lwd = 1.3, colour = "red") +
facet_wrap( variable~ Group)
How about something like this, using geom_ribbon to show the Hyper and Hypo values:
tdat %>%
ggplot(aes(Date, Fitted)) +
geom_line(lty = "dashed") +
geom_line(aes(y = Signif), lwd = 1.3, color = "red") +
geom_ribbon(aes(ymin = Hypo, ymax = Hyper, group = Group), alpha = 0.2) +
facet_grid(Group ~ .) +
theme_light()
Result:
this must be a FAQ, but I can't find an exactly similar example in the other answers (feel free to close this if you can point a similar Q&A). I'm still a newbie with ggplot2 and can't seem to wrap my head around it quite so easily.
I have 2 data.frames (that come from separate mixed models) and I'm trying to plot them both into the same graph. The data.frames are:
newdat
id Type pred SE
1 1 15.11285 0.6966029
2 1 13.68750 0.9756909
3 1 13.87565 0.6140860
4 1 14.61304 0.6187750
5 1 16.33315 0.6140860
6 1 16.19740 0.6140860
1 2 14.88805 0.6966029
2 2 13.46270 0.9756909
3 2 13.65085 0.6140860
4 2 14.38824 0.6187750
5 2 16.10835 0.6140860
6 2 15.97260 0.6140860
and
newdat2
id pred SE
1 14.98300 0.6960460
2 13.25893 0.9872502
3 13.67650 0.6150701
4 14.39590 0.6178266
5 16.37662 0.6171588
6 16.08426 0.6152017
As you can see, the second data.frame doesn't have Type, whereas the first does, and therefore has 2 values for each id.
What I can do with ggplot, is plot either one, like this:
fig1
fig2
As you can see, in fig 1 ids are stacked by Type on the x-axis to form two groups of 6 ids. However, in fig 2 there is no Type, but instead just the 6 ids.
What I would like to accomplish is to plot fig2 to the left/right of fig1 with similar grouping. So the resulting plot would look like fig 1 but with 3 groups of 6 ids.
The problem is also, that I need to label and organize the resulting figure so that for newdat the x-axis would include a label for "model1" and for newdat2 a label for "model2", or some similar indicator that they are from different models. And to make things even worse, I need some labels for Type in newdat.
My (hopefully) reproducible (but obviously very bad) code for fig 1:
library(ggplot2)
pd <- position_dodge(width=0.6)
ggplot(newdat,aes(x=Type,y=newdat$pred,colour=id))+
geom_point(position=pd, size=5)
geom_linerange(aes(ymin=newdat$pred-1.96*SE,ymax=newdat$pred+1.96*SE), position=pd, size=1.5, linetype=1) +
theme_bw() +
scale_colour_grey(start = 0, end = .8, name="id") +
coord_cartesian(ylim=c(11, 18)) +
scale_y_continuous(breaks=seq(10, 20, 1)) +
scale_x_discrete(name="Type", limits=c("1","2"))
Code for fig 2 is identical, but without the limits in the last line and with id defined for x-axis in ggplot(aes())
As I understand it, defining stuff at ggplot() makes that stuff "standard" along the whole graph, and I've tried to remove the common stuff and separately define geom_point and geom_linerange for both newdat and newdat2, but no luck so far... Any help is much appreciated, as I'm completely stuck.
How about adding first adding some new variables to each dataset and then combining them:
newdat$model <- "model1"
newdat2$model <- "model2"
newdat2$Type <- 3
df <- rbind(newdat, newdat2)
# head(df)
Then we can plot with:
library(ggplot2)
ggplot(df, aes(x = interaction(model, factor(Type)), y = pred, color = factor(id))) +
geom_point(position = position_dodge(width = 0.6), size = 5) +
geom_linerange(aes(ymin = pred - 1.96 * SE, ymax = pred + 1.96 * SE),
position = position_dodge(width = 0.6),
size = 1.5, linetype = 1)
Alternatively, you pass an additional aesthetic to geom_linerange to further delineate the model type:
ggplot(df, aes(x = interaction(model, factor(Type)), y = pred, color = factor(id))) +
geom_point(position = position_dodge(width = 0.6), size = 5) +
geom_linerange(aes(ymin = pred - 1.96 * SE, ymax = pred + 1.96 * SE, linetype = model),
position = position_dodge(width = 0.6),
size = 1.5)
Finally, you may want to considered facets:
ggplot(df, aes(x = interaction(model, factor(Type)), y = pred, color = factor(id))) +
geom_point(position = position_dodge(width = 0.6), size = 5) +
geom_linerange(aes(ymin = pred - 1.96 * SE, ymax = pred + 1.96 * SE),
position = position_dodge(width = 0.6),
size = 1.5) +
facet_wrap(~ id)