I have a bar graph that looks this:
and I am trying to get standard error bars on it - so two standard error bars for each column (one for the positive Y, and one for the negative N). I am aware of geom_errorh, but I cannot get it to work for this type of bar graph.
Here is a reproducible example with the code that I used to get a bar chart like the one above:
Dataframe
Behavior<-as.character(c("Hammock","Hammock","Climbing Trees","Climbing Trees","Structures","Structures","Grade","Grade"))
Presence<-c("Y","N","Y","N","Y","N","Y","N")
Mean<-as.numeric(c("18.5", "-6.4","3.5","-6.8","13.2","-10.1","4.7","-2.3"))
SD<-as.numeric(c("17.6","-11.9","1.2","-4.4","3.6","-6.25","1.23","-0.4"))
DF<-data.frame(Behavior,Presence,Mean,SD)
Coord Flip Geom Bar
brks <- seq(-20, 20, 2)
lbls = paste0(as.character(c(seq(-20, 0, 2), seq(2, 20, 2))), "")
ggplot(DF, aes(x = Behavior, y = Mean, fill = Presence )) +
geom_bar(data = subset(DF, Presence == "N"), stat = "identity") +
geom_bar(data = subset(DF, Presence == "Y"), stat = "identity") +
scale_y_continuous(breaks = brks,labels = lbls) +
scale_fill_manual(values=c("#0b6bb6", "#6eaf46"),name="", breaks=c("N", "Y"),labels=c("N", "Y"))+
coord_flip()+
theme_bw()+
xlab("Pen Characteristic - Behavior")+
ylab("Average Behavior per Session")+
Is it possible to get the SE bars on this type of graph?
Thanks!
As #Jakub pointed out in his comment, SD values are positive values.
What you normally do is something like this:
library(ggplot2)
set.seed(1)
Behavior <- as.character(c(
"Hammock","Hammock",
"Climbing Trees","Climbing Trees",
"Structures","Structures",
"Grade","Grade"))
Presence <- c("Y","N","Y","N","Y","N","Y","N")
Mean <- as.numeric(
c("18.5", "-6.4",
"3.5","-6.8",
"13.2","-10.1",
"4.7","-2.3"))
SD <- as.numeric(c(
"17.6","-11.9",
"1.2","-4.4",
"3.6","-6.25",
"1.23","-0.4"))
my_sd <- runif(length(Behavior))
DF <- data.frame(Behavior,Presence,Mean,SD, my_sd)
brks <- seq(-20, 20, 2)
ggplot(DF,
aes(x=Behavior, y=Mean, fill=Presence )) +
geom_col() +
scale_y_continuous(breaks = brks) +
scale_fill_manual(values=c("#0b6bb6", "#6eaf46"),
name="",
breaks=c("N", "Y"),
labels=c("N", "Y")) +
coord_flip()+
theme_bw()+
xlab("Pen Characteristic - Behavior") +
ylab("Average Behavior per Session") +
geom_errorbar(aes(ymin=Mean - my_sd, ymax=Mean + my_sd))
Related
This is not my data (for confidentiality reasons), but I have tried to create a reproducible example using a dataset included in the ggplot2 library. I have an histogram summarizing the value of some variable by group (factor of 2 levels). First, I did not want the counts but proportions of the total, so I used that code:
library(ggplot2)
library(dplyr)
df_example <- diamonds %>% as.data.frame() %>% filter(cut=="Premium" | cut=="Ideal")
ggplot(df_example,aes(x=z,fill=cut)) +
geom_histogram(aes(y=after_stat(width*density)),binwidth=1,center=0.5,col="black") +
facet_wrap(~cut) +
scale_x_continuous(breaks=seq(0,9,by=1)) +
scale_y_continuous(labels=scales::percent_format(accuracy=2,suffix="")) +
scale_fill_manual(values=c("#CC79A7","#009E73")) +
labs(x="Depth (mm)",y="Count") +
theme_bw() + theme(legend.position="none")
It gave me this as a result.
enter image description here
The issue is that I would like to print the numeric percentages on top of the bins and haven't find a way to do so.
As I saw it done for printing counts elsewhere, I attempted to print them using stat_bin(), including the same y and label values as the y in geom_histogram, thinking it would print the right numbers:
ggplot(df_example,aes(x=z,fill=cut)) +
geom_histogram(aes(y=after_stat(width*density)),binwidth=1,center=0.5,col="black") +
stat_bin(aes(y=after_stat(width*density),label=after_stat(width*density*100)),geom="text",vjust=-.5) +
facet_wrap(~cut) +
scale_x_continuous(breaks=seq(0,9,by=1)) +
scale_y_continuous(labels=scales::percent_format(accuracy=2,suffix="")) +
scale_fill_manual(values=c("#CC79A7","#009E73")) +
labs(x="Depth (mm)",y="%") +
theme_bw() + theme(legend.position="none")
However, it does print way more values than there are bins, these values do not appear consistent with what is portrayed by the bar heights and they do not print in respect to vjust=-.5 which would make them appear slightly above the bars.
enter image description here
What am I missing here? I know that if there was no grouping variable/facet_wrap, I could use after_stat(count/sum(count)) instead of after_stat(width*density) and it seems that it would have fixed my issue. But I need the histograms for both groups to appear next to each other. Thanks in advance!
You have to use the same arguments in stat_bin as for the histogram when adding your labels to get same binning for both layers and to align the labels with the bars:
library(ggplot2)
library(dplyr)
df_example <- diamonds %>%
as.data.frame() %>%
filter(cut == "Premium" | cut == "Ideal")
ggplot(df_example, aes(x = z, fill = cut)) +
geom_histogram(aes(y = after_stat(width * density)),
binwidth = 1, center = 0.5, col = "black"
) +
stat_bin(
aes(
y = after_stat(width * density),
label = scales::number(after_stat(width * density), scale = 100, accuracy = 1)
),
geom = "text", binwidth = 1, center = 0.5, vjust = -.25
) +
facet_wrap(~cut) +
scale_x_continuous(breaks = seq(0, 9, by = 1)) +
scale_y_continuous(labels = scales::number_format(scale = 100)) +
scale_fill_manual(values = c("#CC79A7", "#009E73")) +
labs(x = "Depth (mm)", y = "%") +
theme_bw() +
theme(legend.position = "none")
I am trying to add captions as it appears in this post.
For that reason, I need the real scale of the plot (x and y axis) when I am using facet_grid. I know that I can use layer_data, since it saves everything from the plot... However, it is not really accurate, because when I try to establish the limits using min and max from that output, the plot changes.
Here you have an example:
library(ggplot2)
library(dplyr)
val1 <- c(2.1490626,2.2035281,1.5927854,3.1399245,2.3967338,3.7915825,4.6691277,3.0727319,2.9230937,2.6239759,3.7664386,4.0160378,1.2500835,4.7648343,0.0000000,5.6740227,2.7510256,3.0709322,2.7998003,4.0809085,2.5178086,5.9713330,2.7779843,3.6724801,4.2648527,3.6841084,2.5597235,3.8477471,2.6587736,2.2742209,4.5862788,6.1989269,4.1167091,3.1769325,4.2404515,5.3627032,4.1576810,4.3387921,1.4024381,0.0000000,4.3999099,3.4381837,4.8269218,2.6308474,5.3481382,4.9549753,4.5389650,1.3002293,2.8648220,2.4015338,2.0962332,2.6774765,3.0581759,2.5786137,5.0539080,3.8545796,4.3429043,4.2233248,2.0434363,4.5980727)
val2 <- c(3.7691229,3.6478055,0.5435826,1.9665861,3.0802654,1.2248374,1.7311236,2.2492826,2.2365337,1.5726119,2.0147144,2.3550348,1.9527204,3.3689502,1.7847986,3.5901329,1.6833872,3.4240479,1.8372175,0.0000000,2.5701453,3.6551315,4.0327091,3.8781182)
df1 <- data.frame(value = val1)
df2 <- data.frame(value = val2)
data <- bind_rows(lst(df1, df2), .id = 'id')
data$Sex <- rep(c("Male", "Female"), times=84/2)
p <- data %>%
ggplot(aes(value)) +
geom_density(lwd = 1.2, colour="red", show.legend = FALSE) +
geom_histogram(aes(y=..density.., fill = id), bins=10, col="black", alpha=0.2) +
facet_grid(id ~ Sex ) +
xlab("type_data") +
ylab("Density") +
ggtitle("title") +
guides(fill=guide_legend(title="legend_title")) +
theme(strip.text.y = element_blank())
p
plot_info <- layer_data(p)
> min(plot_info$density)
[1] 7.166349e-09
> max(plot_info$density)
[1] 0.5738021
As you can see in the plot, the y-axis starts at 0 and if finishes around 0.7 more less. However, the maximum density is 0.57.
If I try to use the info from layer_data:
p + coord_cartesian(clip="off", ylim=c(min(plot_info$density), max(plot_info$density)),
xlim = c(min(plot_info$x), max(plot_info$x)))
The plot changes completely.
Does anyone know how can I get the scales that ggplot2 and facet_grid are using? I need the information of the density (y_axis) and the info from the x_axis.
Yes, to get the scales directly, use layer_scales(p), which gives you the range of the axes rather than just the range of the data, which is what you get from layer_data(p)
p + coord_cartesian(clip = "off",
ylim = layer_scales(p)$y$range$range,
xlim = layer_scales(p)$x$range$range)
Or, to combine this question with your last, where you add the text labels outside of the plotting panels, your result might be something like:
p + coord_cartesian(clip = "off",
ylim = layer_scales(p)$y$range$range,
xlim = layer_scales(p)$x$range$range) +
geom_text(data = data.frame(value = c(0, 6), id = c("df2", "df2"),
Sex = c('Female', 'Male')),
aes(y = -0.15, label = c('Female', 'Male')))
Does this help?
?layer_data
summary(layer_data(p, i = 2))
i is the layer you want to return
Can min the xmin and max the xmax etc
I am trying to plot a bar graph using ggplot. The graph is displaying as I would like but I can't figure out how to add an Asterix "*" above some of the bars to show significance. Whenever I try it wither adds them to all of the bars or completely seems to skew the graph.
I need to have an Asterix only above
Group A: Treatment A and Treatment B;
Group B: Treatment A
Thankyou!!
Treatment <- rep(c("Treatment A","Treatment A","Treatment B","Treatment B"), 3)
Group <- c(rep(c("A. Paired cohort"), 4),rep(c("B. Low cohort"), 4),rep(c("C. Normal cohort"), 4))
Outcome <- rep(c("Outcome P","Outcome D"),6)
Percent <- c(6.7,3.3,22.6,16.1,4.9,2.4,25,15,8.2,4.1,20.8,17)
df <- data.frame(Treatment,Group,Outcome,Percent)
#keep original order, not alphabetised
df$Outcome <- factor(df$Outcome, levels = unique(df$Outcome)
#plot graph
ggplot(df,
aes(x=Outcome, y=Percent)) +
geom_bar(aes(fill=Treatment),stat="identity", position="dodge")+
theme_classic() +
scale_fill_grey() +
xlab("") + ylab("%") +
facet_wrap(~Group) +
ylim(0,40)
One option would be to
Add an indicator variable to your data to indicate signifcance using e.g. dplyr::case_when.
This indicator could then be used in geom_text to conditionally add an asterisk as a label on top of the desired bars. To align the * with bars we have to map Treatment on the group aes and make use of position_dodge(width = .9), where .9 is the default width of a geom_bar/col. Additionally I set vjust=-.1 to put the labels slightly above the bars.
library(ggplot2)
library(dplyr)
df$significant <- dplyr::case_when(
grepl("^A", df$Group) & grepl("(A|B)$", df$Treatment) ~ TRUE,
grepl("^B", df$Group) & grepl("A$", df$Treatment) ~ TRUE,
TRUE ~ FALSE
)
# plot graph
ggplot(df, aes(x = Outcome, y = Percent)) +
geom_col(aes(fill = Treatment), position = "dodge") +
geom_text(aes(label = ifelse(significant, "*", ""), group = Treatment),
position = position_dodge(width = .9), vjust = -.1, size = 20 / .pt) +
theme_classic() +
scale_fill_grey() +
labs(x = "", y = "%") +
facet_wrap(~Group) +
ylim(0, 40)
I need to replicate a certain format of a histogram/barchart. I already did some good modification with ggplot in order to group the categorial x-variable and specifiy the colors with HEX.
Here is what I try to plot/replicate:
Here is a MWE for my data structure:
sex <- sample(0:1, 100, replace=TRUE)
group <- sample(2:5, 100, replace=TRUE)
data <- data.frame(sex, group)
library(ggplot2)
ggplot(data, aes(x = group, group=sex, fill=factor(sex) )) +
geom_histogram(position="dodge", binwidth=0.45) +
theme(axis.title.x=element_blank(), axis.title.y=element_blank()) +
guides(fill=guide_legend(title="sex")) +
scale_y_continuous(labels = scales::percent_format()) +
scale_fill_manual(values=c("#b6181f", "#f6b8bb"))
I get:
Small things I can't handle are:
replace the factor labels on the x-axis, there might be a problem with my histogram-approach, but I also found no practical way with a bar-chart
round the percentage-digits, no decimals for percentages
But most important is, that I don't know how to add a single percentage-value for one group, one sex to the top of each bar..
I am looking forward for some advice :)
First of all I would treat your x-axis data as factors and plot it as bars. Getting percentage value text to the bar top look this question: Show % instead of counts in charts of categorical variables.
Futhermore the y-axis percent values aren't a question of rounding, they actually are no percentage values. y = ..prop.. solves that.
Are you looking for that (I summed everything up)?
sex <- sample(0:1, 100, replace=TRUE)
group <- sample(2:5, 100, replace=TRUE)
data <- data.frame(sex, group)
labs <- c("Score < 7", "Score\n7 bis < 12", "Score\n12 bis < 15",
"Score\n15 bis < 20","Score >= 20")
ggplot(data, aes(x = as.factor(group), y = ..prop.., group = sex, fill = factor(sex) )) +
geom_bar(position = "dodge") +
geom_text(aes(label = scales::percent(..prop..)),
position = position_dodge(width = 0.9), stat = "count", vjust = 2) +
labs(x = NULL, y = NULL) +
guides(fill = guide_legend(title = "sex")) +
scale_y_continuous(labels = scales::percent_format()) +
scale_fill_manual(values=c("#b6181f", "#f6b8bb")) +
scale_x_discrete(labels = labs)
I am building charts that have two lines in the axis text. The first line contains the group name, the second line contains that group population. I build my axis labels as a single character string with the format "LINE1 \n LINE2". Is it possible to assign different font faces and sizes to LINE1 and LINE2, even though they are contained within a single character string? I would like LINE1 to be large and bolded, and LINE2 to be small and unbolded.
Here's some sample code:
Treatment <- rep(c('T','C'),each=2)
Gender <- rep(c('Male','Female'),2)
Response <- sample(1:100,4)
test_df <- data.frame(Treatment, Gender, Response)
xbreaks <- levels(test_df$Gender)
xlabels <- paste(xbreaks,'\n',c('POP1','POP2'))
hist <- ggplot(test_df, aes(x=Gender, y=Response, fill=Treatment, stat="identity"))
hist + geom_bar(position = "dodge") + scale_y_continuous(limits = c(0,
100), name = "") + scale_x_discrete(labels=xlabels, breaks = xbreaks) +
opts(
axis.text.x = theme_text(face='bold',size=12)
)
I tried this, but the result was one large, bolded entry, and one small, unbolded entry:
hist + geom_bar(position = "dodge") + scale_y_continuous(limits = c(0,
100), name = "") + scale_x_discrete(labels=xlabels, breaks = xbreaks) +
opts(
axis.text.x = theme_text(face=c('bold','plain'),size=c('15','10'))
)
Another possible solution is to create separate chart elements, but I don't think that ggplot2 has a 'sub-axis label' element available...
Any help would be very much appreciated.
Cheers,
Aaron
I also think that I could not to make the graph by only using ggplot2 features.
I would use grid.text and grid.gedit.
require(ggplot2)
Treatment <- rep(c('T','C'), each=2)
Gender <- rep(c('Male','Female'), 2)
Response <- sample(1:100, 4)
test_df <- data.frame(Treatment, Gender, Response)
xbreaks <- levels(test_df$Gender)
xlabels <- paste(xbreaks,'\n',c('',''))
hist <- ggplot(test_df, aes(x=Gender, y=Response, fill=Treatment,
stat="identity"))
hist + geom_bar(position = "dodge") +
scale_y_continuous(limits = c(0, 100), name = "") +
scale_x_discrete(labels=xlabels, breaks = xbreaks) +
opts(axis.text.x = theme_text(face='bold', size=12))
grid.text(label="POP1", x = 0.29, y = 0.06)
grid.text(label="POP2", x = 0.645, y = 0.06)
grid.gedit("GRID.text", gp=gpar(fontsize=8))
Please try to tune a code upon according to your environment (e.g. the position of sub-axis labels and the fontsize).
I found another simple solution below:
require(ggplot2)
Treatment <- rep(c('T','C'),each=2)
Gender <- rep(c('Male','Female'),2)
Response <- sample(1:100,4)
test_df <- data.frame(Treatment, Gender, Response)
xbreaks <- levels(test_df$Gender)
xlabels[1] <- expression(atop(bold(Female), scriptstyle("POP1")))
xlabels[2] <- expression(atop(bold(Male), scriptstyle("POP2")))
hist <- ggplot(test_df, aes(x=Gender, y=Response, fill=Treatment,
stat="identity"))
hist +
geom_bar(position = "dodge") +
scale_y_continuous(limits = c(0, 100), name = "") +
scale_x_discrete(label = xlabels, breaks = xbreaks) +
opts(
axis.text.x = theme_text(size = 12)
)
All,
Using Triad's cheat this is the closest I was able to get to solution on this one. Let me know if you have any questions:
library(ggplot2)
spacing <- 0 #We can adjust how much blank space we have beneath the chart here
labels1= paste('Group',c('A','B','C','D'))
labels2 = rep(paste(rep('\n',spacing),collapse=''),length(labels1))
labels <- paste(labels1,labels2)
qplot(1:4,1:4, geom="blank") +
scale_x_continuous(breaks=1:length(labels), labels=labels) + xlab("")+
opts(plot.margin = unit(c(1, 1, 3, 0.5), "lines"),
axis.text.x = theme_text(face='bold', size=14))
xseq <- seq(0.15,0.9,length.out=length(labels)) #Assume for now that 0.15 and 0.9 are constant plot boundaries
sample_df <- data.frame(group=rep(labels1,each=2),subgroup=rep(c('a','b'),4),pop=sample(1:10,8))
popLabs <- by(sample_df,sample_df$group,function(subData){
paste(paste(subData$subgroup,' [n = ', subData$pop,']',sep=''),collapse='\n')
})
gridText <- paste("grid.text(label='\n",popLabs,"',x=",xseq,',y=0.1)',sep='')
sapply(gridText, function(x){ #Evaluate parsed character string for each element of gridText
eval(parse(text=x))
})
grid.gedit("GRID.text", gp=gpar(fontsize=12))
Cheers,
Aaron