Ggplot - How to add 2 different error bars? - r

I try to make a bar plot with 2 different confidence intervals - one for the proportion of females at a sample and the other for the proportion of males. Each category has of course two different confidence intervals, how can I make this graph with only 2 and not 4 confidence intervals?
ggplot(data, aes(x= GENDER)) +
geom_bar(aes(y = (..count..)/sum(..count..)), stat="count", fill=c("deeppink","deepskyblue"), alpha=0.7) +
scale_y_continuous("Percent",labels = scales::percent)+
geom_text(aes(label = scales::percent((..count..)/sum(..count..)),
y= ((..count..)/sum(..count..))), stat="count",vjust = 5) +
geom_errorbar (aes(ymin = ymin1, ymax =ymax1), width=0.4, colour = "red", alpha =0.9, size= 1.3)+
geom_errorbar (aes(ymin = ymin2, ymax =ymax2), width=0.4, colour = "red", alpha =0.9, size= 1.3)
Thanks ahead!

You only need to call geom_errorbar once, where ymin and ymax are vectors. The way you've coded it, ggplot is plotting both error bars in both positions because it is expecting a vector of positions for ymin and ymax equal to the number of bars in your plot. E.g.
ymin = c(ymin1,ymin2)
ymax = c(ymax1,ymax2)
ggplot(data, aes(x= GENDER)) +
geom_bar(aes(y = (..count..)/sum(..count..)), stat="count", fill=c("deeppink","deepskyblue"), alpha=0.7) +
scale_y_continuous("Percent",labels = scales::percent)+
geom_text(aes(label = scales::percent((..count..)/sum(..count..)),
y= ((..count..)/sum(..count..))), stat="count",vjust = 5) +
geom_errorbar (aes(ymin = ymin, ymax =ymax), width=0.4, colour = "red", alpha =0.9, size= 1.3)

You could also manage the data ahead of time with dplyr::summarise() to make a dataset that only has one observation per group that has whatever summary values you want. This makes the plotting code a bit more streamlined. I did this on the Chile data from the carData package
data("Chile", package="carData")
library(dplyr)
library(ggplot2)
Chile %>%
group_by(sex) %>%
dplyr::summarise(n_g= n(),
n = sum(!is.na(Chile$sex)),
prop = n_g/n,
ymin = binom.test(n_g, n)$conf.int[1],
ymax = binom.test(n_g, n)$conf.int[2]) %>%
ggplot(aes(x=sex, y=prop, ymin=ymin, ymax=ymax) )+
geom_bar(stat="identity", alpha=.7, fill=c("deeppink","deepskyblue")) +
geom_errorbar(width=.4, colour="red", alpha=.9, size=1.3) +
geom_text(aes(label = scales::percent(prop)), vjust = 5)
EDIT
changed to use prop.test() as in the OP's comment.
data("Chile", package="carData")
library(dplyr)
library(ggplot2)
data <- Chile
data <- data %>% rename("GENDER" = "sex")
data %>%
group_by(GENDER) %>%
dplyr::summarise(n_g= n(),
n = sum(!is.na(data$GENDER)),
prop = n_g/n,
ymin = prop.test(n_g, n, p=.7, alternative="greater")$conf.int[1],
ymax = prop.test(n_g, n, p=.7, alternative="greater")$conf.int[2]) %>%
ggplot(aes(x=GENDER, y=prop, ymin=ymin, ymax=ymax) )+
geom_bar(stat="identity", alpha=.7, fill=c("deeppink","deepskyblue")) +
geom_errorbar(width=.4, colour="red", alpha=.9, size=1.3) +
geom_text(aes(label = scales::percent(prop)), vjust = 5)

Related

How to control ordering of stacked bar chart based on stack segment height?

I am creating a taxanomic bar chart using ggplot but I need to change the layout of the stacked segments. This is my code to create the plot:
ggplot(full_genus_new, aes(x=as.factor(Var2), y=value, fill=Var1)) +
geom_bar(stat="identity", position = "fill", aes(order = -value)) +
scale_x_discrete() +
ylab("Relative Microbial Activity") +
facet_grid(cols=vars(PEDIS), scales = "free_x", space = "free_x") +
theme_classic() +
scale_fill_manual(values = Cb64k) +
scale_y_continuous(labels=percent_format()) +
theme(legend.position = "bottom", legend.text = element_text(size=8)) +
guides(fill=guide_legend(title="Taxa")) +
xlab("Patient") +
theme(legend.key.size = unit(0.5,'cm'))
Which produces the following:
How can I reorder the stacked segments based on size so that the larger segments start at the bottom and decrease in size until the top of the y axis is reached?
We can manually stack them with rectangles ordered by size:
library(ggplot2)
library(dplyr)
#example data
d <- mtcars %>%
group_by(am, cyl) %>%
summarise(s = sum(mpg)) %>%
mutate(s_pc = s/sum(s) * 100) %>%
arrange(am, -s_pc) %>%
group_by(am) %>%
mutate(ymin = cumsum(lag(s_pc, default = 0)),
ymax = cumsum(s_pc),
xmin = am - 0.4,
xmax = am + 0.4,
fill = factor(cyl))
#use rectangle
ggplot(d, aes(xmin = xmin, xmax = xmax,
ymin = ymin, ymax = ymax, fill = fill)) +
geom_rect() +
theme_minimal()

why is the geom_errorbar only giving me little lines in the middle of my plot

I have an issue when using geom_errorbar. All i get is little lines in the middle ,rather than the whole range?!
gd <- data_long %>%
group_by(ISItime, FIRST_cat) %>%
summarise(mean_ISI= mean(ISIscore, na.rm = TRUE),
sd_ISI= sd(ISIscore, na.rm = TRUE))
gd<- gd %>%
mutate(ymax = mean_ISI + sd_ISI, ymin = mean_ISI - sd_ISI)
ggplot(gd, aes(x = ISItime, y = mean_ISI, group= FIRST_cat, color= FIRST_cat)) +
geom_line (alpha= 0.9, size = 2) + theme_bw() +
geom_line()+
geom_errorbar( aes(x=ISItime, ymin=mean_ISI-sd_ISI, ymax=mean_ISI+sd_ISI), width=0.6,
colour="orange", alpha=0.9, size=.3, position=position_dodge(.3))+
ylim(c(2,12))+ labs(x="Month", y="Mean ISI Score") +
scale_x_discrete(labels=c("Pre-Study ISI","T0","T1","T2","T3",
"T4","T5","T6","T7","T8","T9",
"T10","T11","T12")) +
labs(x = "Timepoint")
If anyone can help, i would be so grateful. I am so confused at what i have done wrong, and why i am not getting proper error bars.
Thanks.

adding summary statistics to two factor boxplot

I would like to add summary statistics (e.g. mean) to the boxplot which have two factors. I have tried this:
library(ggplot2)
ggplot(ToothGrowth, aes(x = factor(dose), y = len)) +
stat_boxplot(geom = "errorbar", aes(col = supp, fill=supp), position = position_dodge(width = 0.85)) +
geom_boxplot(aes(col = supp, fill=supp), notch=T, notchwidth = 0.5, outlier.size=2, position = position_dodge(width = 0.85)) +
stat_summary(fun.y=mean, aes(supp,dose), geom="point", shape=20, size=7, color="violet", fill="violet") +
scale_color_manual(name = "SUPP", values = c("blue", "darkgreen")) +
scale_fill_manual(name = "SUPP", values = c("lightblue", "green"))
I got this picture:
It is possible somehow put the sample size of each box (e.g. top of the whiskers)? I have tried this:
ggplot(ToothGrowth, aes(x = factor(dose), y = len)) +
stat_boxplot(geom = "errorbar", aes(col = supp, fill=supp), position = position_dodge(width = 0.85)) +
geom_boxplot(aes(col = supp, fill=supp), notch=T, notchwidth = 0.5, outlier.size=2, position = position_dodge(width = 0.85)) +
stat_summary(fun.y=mean,aes(supp,dose),geom="point", shape=20, size=7, color="violet", fill="violet") +
scale_color_manual(name = "SUPP", values = c("blue", "darkgreen")) +
scale_fill_manual(name = "SUPP", values = c("lightblue", "green")) +
geom_text(data = ToothGrowth,
group_by(dose, supp),
summarize(Count = n(),
q3 = quantile(ToothGrowth, 0.75),
iqr = IQR(ToothGrowth),
aes(x= dose, y = len,label = paste0("n = ",Count, "\n")), position = position_dodge(width = 0.75)))
You can state the aesthetics just once by putting them in the main ggplot call and then they will apply to all of the geom layers: ggplot(ToothGrowth, aes(x = factor(dose), y = len, color=supp, fill=supp))
For the count of observations: The data summary step in geom_text isn't coded properly. Also, to set len (the y-value) for the text placement, the summarize function needs to output values for len.
To add the mean values in the correct locations on the x-axis, use stat_summary with the exact same aesthetics as the other geoms and stats. I've overridden the color aesthetic by setting the color to yellow so that the point markers will be visible on top of the box plot fill colors.
The code to implement the plot is below:
library(tidyverse)
pd = position_dodge(0.85)
ggplot(ToothGrowth, aes(x = factor(dose), y = len, color=supp, fill=supp)) +
stat_boxplot(geom = "errorbar", position = pd) +
geom_boxplot(notch=TRUE, notchwidth=0.5, outlier.size=2, position=pd) +
stat_summary(fun.y=mean, geom="point", shape=3, size=2, colour="yellow", stroke=1.5,
position=pd, show.legend=FALSE) +
scale_color_manual(name = "SUPP", values = c("blue", "darkgreen")) +
scale_fill_manual(name = "SUPP", values = c("lightblue", "green")) +
geom_text(data = ToothGrowth %>% group_by(dose, supp) %>%
summarize(Count = n(),
len=max(len) + 0.05 * diff(range(ToothGrowth$len))),
aes(label = paste0("n = ", Count)),
position = pd, size=3, show.legend = FALSE) +
theme_bw()
Note that the notch goes outside the hinges for all of the box plots. Also, having the sample size just above the maximum of each boxplot seems distracting and unnecessary to me. You could place all of the text annotations at the bottom of the plot like this:
geom_text(data = ToothGrowth %>% group_by(dose, supp) %>%
summarize(Count = n()) %>%
ungroup %>%
mutate(len=min(ToothGrowth$len) - 0.05 * diff(range(ToothGrowth$len))),
aes(label = paste0("n = ", Count)),
position = pd, size=3, show.legend = FALSE) +

Aligning subsetted data points with ggplot2

I'm trying to build a complex figure that overlays individual data points on a boxplot to display both summary statistics as well as dispersion of the raw data. I have 2 questions in rank order of importance:
How do I center the jittered points around the middle of their respective box plot?
How can I remove the dark dots from the "drv" legend?
Code:
library(ggplot2)
library(dplyr)
mpg$cyl <- as.factor(mpg$cyl)
mpg %>% filter(fl=="p" | fl=="r" & cyl!="5") %>% sample_n(100) %>% ggplot(aes(cyl, hwy, fill=drv)) +
stat_boxplot(geom = "errorbar", width=0.5, position = position_dodge(1)) +
geom_boxplot(position = position_dodge(1), outlier.shape = NA)+
geom_point(aes(fill=drv, shape=fl), color="black", show.legend=TRUE, alpha=0.5, size=3, position = position_jitterdodge(dodge.width = 1)) +
scale_shape_manual(values = c(21,23))
It looks like the current dodging for geom_point is based on both fill and shape. Use group to indicate you only want to dodge on drv.
You can use override.aes in guide_legend to remove the points from the fill legend.
mpg %>%
filter(fl=="p" | fl=="r" & cyl!="5") %>%
sample_n(100) %>%
ggplot(aes(cyl, hwy, fill=drv)) +
stat_boxplot(geom = "errorbar", width=0.5, position = position_dodge(1)) +
geom_boxplot(position = position_dodge(1), outlier.shape = NA)+
geom_point(aes(fill = drv, shape = fl, group = drv), color="black",
alpha =0.5, size=3,
position = position_jitterdodge(jitter.width = .1, dodge.width = 1)) +
scale_shape_manual (values = c(21,23) ) +
guides(fill = guide_legend(override.aes = list(shape = NA) ) )

ggplot2: doughnuts, how to conditional color fill with if_else

Following guides like ggplot Donut chart I am trying to draw small gauges, doughnuts with a label in the middle, with the intention to put them later on on a map.
If the value reaches a certain threshold I would like the fill of the doughnut to change to red. Is it possible to achieve with if_else (it would be most natural but it does not work).
library(tidyverse)
df <- tibble(ID=c("A","B"),value=c(0.7,0.5)) %>% gather(key = cat,value = val,-ID)
ggplot(df, aes(x = val, fill = cat)) + scale_fill_manual(aes,values = c("red", "yellow"))+
geom_bar(position="fill") + coord_polar(start = 0, theta="y")
ymax <- max(df$val)
ymin <- min(df$val)
p2 = ggplot(df, aes(fill=cat, y=0, ymax=1, ymin=val, xmax=4, xmin=3)) +
geom_rect(colour="black",stat = "identity") +
scale_fill_manual(values = if_else (val > 0.5, "red", "black")) +
geom_text( aes(x=0, y=0, label= scales::percent (1-val)), position = position_dodge(0.9))+
coord_polar(theta="y") +
xlim(c(0, 4)) +
theme_void() +
theme(legend.position="none") +
scale_y_reverse() + facet_wrap(facets = "ID")
Scale fill manual values= if else.... this part does not work, the error says: Error in if_else(val > 0.5, "red", "black") : object 'val' not found. Is it my error, or some other solution exists?
I also realize my code is not optimal, initially gather waited for more variables to be included in the plot, but I failed to stack one variable on top of the other. Now one variable should be enough to indicate the percentage of completion. I realise my code is redundant for the purpose. Can you help me out?
A solution for the color problem is to first create a variable in the data and then use that to map the color in the plot:
df <- tibble(ID=c("A","B"),value=c(0.7,0.5)) %>% gather(key = cat,value = val,-ID) %>%
mutate(color = if_else(val > 0.5, "red", "black"))
p2 = ggplot(df, aes(fill=color, y=0, ymax=1, ymin=val, xmax=4, xmin=3)) +
geom_rect(colour="black",stat = "identity") +
scale_fill_manual(values = c(`red` = "red", `black` = "black")) +
geom_text( aes(x=0, y=0, label= scales::percent (1-val)), position = position_dodge(0.9))+
coord_polar(theta="y") +
xlim(c(0, 4)) +
theme_void() +
theme(legend.position="none") +
scale_y_reverse() + facet_wrap(facets = "ID")
The result would be:

Resources