Custom legend for geom_boxplot with connective mean line - r

I have combined ggplot boxplots with a connecting line which is the mean average.
How could I create a legend so people know the blue circle points represent the mean average for each boxplot?
library(ggplot2)
library(scales)
options(scipen=11)
ggplot(Price, aes(x=Price$stage_code, y=Price$Realvalue)) +
scale_y_continuous(labels = comma) +
geom_boxplot(notch=FALSE, outlier.shape=NA, fill="red", alpha=0.2) +
coord_cartesian(ylim=c(0,1000000000)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
ggtitle("Average True Value of Listed Mining Companies\nThroughout Mine Development Stages") +
xlab("Project Development Stages") +
ylab("Number of Diluted Stocks x Closing Stock Price") +
stat_summary(fun.y=mean, geom="line", linetype="dotted",
size=1.4, color = "Blue",alpha=0.6, aes(group=1)) +
stat_summary(fun.y=mean, geom="point", alpha=1, color="darkblue",
size=3.2, shape = 21, fill = "lightblue", stroke = 2)

ggplot2 only adds legends for colors assigned based on variables.
edit: I realized from this answer that the legend can be added manually. This is a much better approach.
Just map the color within aes, and use scale_color_manual to add a title and specify the colors:
stat_summary(aes(color="Legend"),fun.y=mean, geom="point", alpha=1,
size=3.2, shape = 21, fill = "lightblue", stroke = 2) +
scale_colour_manual("Legend title", values="darkblue")

Related

Color histogram bars using stat_bin ggplot

I would like to change the fill and color of my histogram bars, However the current approach does not work,
I am using the following fill and color:
ggplot(dfAllCounts, aes(x=months)) +
stat_bin(binwidth=6, geom="text", aes(label=after_stat(count)), vjust=-1, fill="#d2aa47", color = '#163B8B')
However, the actual plot is not displayed propertly:
Done!
Fixed with this:
br <- seq(0, 178, 10)
ggplot(dfAllCounts, aes(x=months)) +
stat_bin(binwidth=6, fill="#d2aa47", color = '#163B8B', size = .8, alpha = 0.3) +
stat_bin(binwidth=6, geom="text", aes(label=..count..), vjust=-1) +
ylim(c(0, 175)) +
scale_x_continuous(breaks = br)```

Add individual observations on ggplot (geom_line)

I have the following data frame data frame and I am plotting the average (Accuracy) per level. But I want to also the individual data points with shapes (e.g.Accuracy1, Accuracy2, Accuracy3 etc) on the line. Anyone who could help me? Thanks
ggplot(data=Accuracy_means, aes(x=Effort_Level, y=Accuracy,
group=1)) +
geom_errorbar(aes(ymin=Accuracy-se, ymax=Accuracy+se), width=.05, size=1) +
geom_line(size=1)+
geom_hline(yintercept=c(-0.5,0.5), linetype="dashed", colour="black", size=0.5)+
ylim(0,1)+
coord_fixed(ratio = 2.5)+
theme_classic()
It's not clear if you want to change the line type. If so, here is an approach using gather from tidyr.
library(tidyverse)
Accuracy_means %>%
gather(key = accuracy_vars, value = values, -Effort_Level, -Accuracy, -se) %>%
ggplot(aes(x=Effort_Level,
y=values)) +
geom_errorbar(aes(ymin=Accuracy-se, ymax=Accuracy+se), colour = "red", width =0.05, size = 0.5) +
geom_line(aes(linetype = accuracy_vars), size=1) +
geom_line(aes(y = Accuracy), colour = "red")+
coord_fixed(ratio = 2.5)+
theme_classic()

ggplot2 geom_jitterdodge points and with overlayed dodged boxplots: I want to preserve color in points but force boxplots to be black

Using ggplot2, I want to geom_jitterdodge a swarm of points with overlayed dodged boxplots. The trick is that I want the boxplots to be black, not colored like the points. The point plot looks like this:
It's easy enough to get boxplots in place:
The code for that looks like this:
D_cohort1 %>%
filter(!is.na(pssa_ela_code)) %>%
ggplot(aes(x=timepoint,
y=dibels_lnf,
color=pssa_ela_code)) +
geom_point(alpha=1/6, size=2, width=1/3, height=0,
position=position_jitterdodge()) +
geom_boxplot(fill=NA, outlier.shape=NA,
position=position_dodge2(padding=.3)) +
facet_grid(rows=vars(school_type)) +
guides(colour = guide_legend(override.aes = list(alpha=1))) +
labs(title="Figure A.1: DIBELS LNF Scores at each Timepoint") +
theme_cowplot() +
theme(plot.background=element_rect(fill="aliceblue"),
panel.border=element_rect(color="black", fill=NA),
legend.position = c(.85,.87),
legend.text = element_text(size = rel(.7)))
For visibilities sake, I want the boxplot lines to be black, but I can't quite figure out how to get there. Closest I've come is this (same as before but for the call to geom_boxplot():
D_cohort1 %>%
filter(!is.na(pssa_ela_code)) %>%
ggplot(aes(x=timepoint,
y=dibels_lnf,
color=pssa_ela_code)) +
geom_point(alpha=1/6, size=2, width=1/3, height=0,
position=position_jitterdodge()) +
geom_boxplot(aes(color=NULL, group=fct_cross(timepoint, pssa_ela_code)),
fill=NA, outlier.shape=NA,
position=position_dodge2(padding=.3)) +
facet_grid(rows=vars(school_type)) +
guides(colour = guide_legend(override.aes = list(alpha=1))) +
labs(title="Figure A.1: DIBELS LNF Scores at each Timepoint") +
theme_cowplot() +
theme(plot.background=element_rect(fill="aliceblue"),
panel.border=element_rect(color="black", fill=NA),
legend.position = c(.85,.87),
legend.text = element_text(size = rel(.7)))
That gets the color effect I want, but positions the boxplots incorrectly. Shown here:
How can I achieve the effect I want: correctly positioned, black boxplots over colored points?
Ok. I slept on it and was able to come up with a solution this morning. The effect I want is shown below. The code used to get there is this:
D_cohort1 %>%
filter(!is.na(pssa_ela_code)) %>%
ggplot(aes(x=timepoint,
y=dibels_lnf,
color=pssa_ela_code)) +
geom_point(alpha=1/6, size=2, width=1/3, height=0,
position=position_jitterdodge()) +
geom_boxplot(aes(color=NULL, fill=pssa_ela_code),
outlier.shape=NA, alpha=0,
position=position_dodge2(padding=.3)) +
facet_grid(rows=vars(school_type)) +
guides(colour = guide_legend(override.aes = list(alpha=1))) +
labs(title="Figure A.1: DIBELS LNF Scores at each Timepoint") +
theme_cowplot() +
theme(plot.background=element_rect(fill="aliceblue"),
panel.border=element_rect(color="black", fill=NA),
legend.position = c(.85,.87),
legend.text = element_text(size = rel(.7)))
It's the same as before but for the call to geom_boxplot(). It took over-riding the color aesthetic and setting fill. Then, alpha=0 makes the fill fully transparent, which is what I want.

geom_path is not drawing a line

I'm making depth profiles with ggplot. Some of the lines are drawn between the variable points using geom_path but some are not, even when I try adding "group=1" (which was the only solution I've found for this problem). I'm doing multiple plots for different lakes and for each lake there is one or multiple variables not getting a line by using geom_path. For the code below only the Chl.a variable is not drawing a line, all the others do. What could this depend on?
I also tried geom_line instead but this only worked for some variables since the it draws the line following the x-axis, but I want the line to go vertically following the y-axis. Can I achieve this using geom_line since geom_path doesn't seem to work for all variables?
gs <- ggplot(goodspirit, aes(y=goodspirit$Depth.m)) +
geom_point(aes(x=Temp, colour= "Temp")) +
geom_path(aes(x=Temp, color = "Temp"), size=1.5) +
geom_point(aes(x=zDOmg, color ="z(DO mg/L)")) +
geom_path(aes(x=zDOmg, color ="z(DO mg/L)"), size=1.5) +
geom_point(aes(x=Chl.a, color ="Chl.a"), na.rm = TRUE) +
geom_path(aes(x=Chl.a, color ="Chl.a"), na.rm = TRUE, size=1.5) +
geom_point(aes(x=zN2O, color ="z(N2O.nM)"), na.rm = TRUE) +
geom_line(aes(x=zN2O, color ="z(N2O.nM)"), na.rm = TRUE, size=1.5) +
geom_point(aes(x=Sal.ppt, color ="Salinity.ppt"), na.rm = TRUE) +
geom_line(aes(x=Sal.ppt, color ="Salinity.ppt"), na.rm = TRUE, size=1.5)+
geom_point(aes(x=zph, color ="z(pH)")) +
geom_path(aes(x=zph, color ="z(pH)"), size=1.5) +
scale_x_continuous(position = "top", limits=c(-3,5), expand = c(0,0))+
scale_y_reverse(expand = c(0.05,0))+
ylab("Depth (m)") + xlab("x") + ggtitle("Good spirit lake") + labs(colour
= "Parameters") +
theme(plot.title = element_text(hjust = 0.5)) + theme_light()
gs
enter image description here

R - setting manual color based on conditional value

I'm trying to assign a color to a ggplot bar graph based on whether the value is above or below 0.5.
Here is reproducible code below and graph without the color assigned.
dnow <- data.frame(x=rep(c("protected areas","wildnerness areas","private lands","multi-use lands"), each=25), y=runif(100))
ggplot(dnow,aes(x=x, y=y)) + stat_summary(fun.y=mean, geom="bar", position=position_dodge(1)) +
stat_summary(fun.data = mean_se,geom="errorbar", color="grey40",position=position_dodge(1), width=.2) +
geom_hline(yintercept = 0.5) + labs(y="Mean Agreement") + theme_bw() +
theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.title.y=element_blank()) +
coord_flip()
Based on the following stackoverflow question (Setting a conditional color by stat_summary in ggplot) I tried to manually assign colors based on the threshold value of 0.5 using aes in the stat_summary so that bars with values over 0.5 are green and bars with values under 0.5 are red.
The code and output are below. The graph however does not look correct. It created two bars with a "true" or "false" instead of coloring the single bar based on the threshold value. Not sure how to resolve this.
ggplot(dnow,aes(x=x, y=y)) + stat_summary(fun.y=mean, geom="bar", aes(fill = y > 0.5), position=position_dodge(1)) +
stat_summary(fun.data = mean_se,geom="errorbar", color="grey40",position=position_dodge(1), width=.2) +
geom_hline(yintercept = 0.5) + labs(y="Mean Agreement") + theme_bw() +
theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.title.y=element_blank()) +
scale_fill_manual(values = c('red', 'green')) + coord_flip()
You could simply change your y to ..y.. in your aes. Although it is probably best to aggregate your data before hand and use geom_bar similar to the post you linked. This should work:
ggplot(dnow,aes(x=x, y=y)) + stat_summary(fun.y=mean, geom="bar", aes(fill = ..y.. > 0.5), position=position_dodge(1)) +
stat_summary(fun.data = mean_se,geom="errorbar", color="grey40",position=position_dodge(1), width=.2) +
geom_hline(yintercept = 0.5) + labs(y="Mean Agreement") + theme_bw() +
theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.title.y=element_blank()) +
scale_fill_manual(values = c('red', 'green')) + coord_flip()
..y.. refers to the computed mean from fun.y.
The reason is that you use the original values in your data frame for coloring, not the computed summary statistics. By default grouping will happen on every discrete variable: in your case the x and the value > 0.5. So the stat summary function will compute means for 8 groups, and color the bars based on this. I do not know of any way to color directly based on the computed means. However you could precompute the means in each group, and color based on this pre-computed mean: thus you would have one mean for every x.

Resources