ggplot2 add legend for each geom_point manually - r

I created a plot using 2 separate data sets so that I could create different errorbars. The first data set has error bars that go down only whereas the second data set has error bars that go up only. This prevents unnecessary overlap in the plot. I also used a compound shape for one of the groups.
I want to create a legend based on these shapes (not a colour), but I can't seem to figure it out. Here is the plot code.
p <-ggplot()
p + geom_point(data=df.figure.1a, aes(x=Hour, y=Mean), shape=5, size=4) +
geom_point(data=df.figure.1a, aes(x=Hour, y=Mean), shape=18, size=3) +
geom_errorbar(data=df.figure.1a, aes(x=Hour, y=Mean, ymin = Mean - SD, ymax = Mean), size=0.7, width = 0.4) +
geom_point(data=df.figure.1b, aes(x=Hour, y=Mean), shape=17, size=4) +
geom_errorbar(data=df.figure.1b, aes(x=Hour, y=Mean, ymin = Mean, ymax = Mean + SD), size=0.7, width = 0.4)

Related

How to flip a geom_area to be under the line when using scale_y_reverse()

I had to flip the axis of my line, but still need the geom_area to be under the curve. However I cannot figure out how to do so.
This is the line of code I tried
ggplot(PalmBeachWell, aes(x=Date, y=Depth.to.Water.Below.Land.Surface.in.ft.)) +
geom_area(position= "identity", fill='lightblue') +
theme_classic() +
geom_line(color="blue") +
scale_y_reverse()
and here is what i got
One option would be to use a geom_ribbon to fill the area above the curve which after applying scale_y_reverse will result in a fill under the curve.
Using some fake example data based on the ggplot2::economics dataset:
library(ggplot2)
PalmBeachWell <- economics[c("date", "psavert")]
names(PalmBeachWell) <- c("Date", "Depth.to.Water.Below.Land.Surface.in.ft.")
ggplot(PalmBeachWell, aes(x = Date, y = Depth.to.Water.Below.Land.Surface.in.ft.)) +
geom_ribbon(aes(ymin = Depth.to.Water.Below.Land.Surface.in.ft., ymax = Inf),
fill = "lightblue"
) +
geom_line(color = "blue") +
scale_y_reverse() +
theme_classic()

Add bar to bottom of point plot

I am creating a point plot and I wanted to add a bar to the bottom of a point plot. I can't seem to find out how to do this in the ggplot documentation. I was hoping to add at bar the spanned the entire x-axis with a set y-axis value. Here is an example of the data I am working with
d=data.frame(drink=c("coffee","tea","water"), mean=c(5,6,9), lower=c(4.5,5.6,8.7), upper=c(5.5,6.3,9.5))
and here is the code I am using
ggplot() +
geom_errorbar(data=d, mapping=aes(x=drink, ymin=upper, ymax=lower), width=0.2, size=1, color="blue") +
geom_point(data=d, mapping=aes(x=drink, y=mean), size=4, shape=21, fill="white") +
scale_y_continuous(n.breaks = 10) + ylim(0, 12)
Here is what the plot currently looks like
and this is what I want to add
The annotate() function allows you to directly specify a layer without intermediate data.frame. In ggplot2, the -Inf/Inf values for continuous variables indicate to place something at the extremes.
library(ggplot2)
d=data.frame(drink=c("coffee","tea","water"),
mean=c(5,6,9),
lower=c(4.5,5.6,8.7),
upper=c(5.5,6.3,9.5))
ggplot(d) +
geom_errorbar(
mapping=aes(x=drink, ymin=upper, ymax=lower),
width=0.2, size=1, color="blue") +
geom_point(
mapping=aes(x=drink, y=mean),
size=4, shape=21, fill="white") +
scale_y_continuous(n.breaks = 10, limits = c(0, 12)) +
annotate("rect", xmin = -Inf, xmax = Inf,
ymin = -Inf, ymax = 1, fill = "black")
Created on 2021-09-13 by the reprex package (v2.0.1)

Add mean to grouped box plot in R with ggplot2

I created a grouped box plot in R with ggplot2. However, when I want to add the mean, the dots appear between the two boxes in a group. How can I change it such that the dots are within each box?
Here my code:
ggplot(results, aes(x=treatment, y=effect, fill=sex)) +
geom_boxplot() +
stat_summary(fun.y=mean, geom="point", shape=20, size=3, color="red")`
You can use position_dodge2. Because points and boxplots have differing widths, you will need to trial and error with the width argument to centralise the dots.
ggplot(mtcars, aes(x=factor(gear), y=hp, fill=factor(vs))) +
geom_boxplot() +
stat_summary(fun.y=mean, geom="point", shape=20, size=3, color="red",
position = position_dodge2(width = 0.75,
preserve = "single"))
In most cases, you will not be able to place the points inside each grouped box as they overlap with each other through the axes. One alternative is to use facet_wrap.
Here is one example with iris data:
ggplot(iris, aes(x=Sepal.Length, y=Sepal.Width, fill=Species)) +
geom_point(aes(color = Species), shape = 20, size = 3) +
geom_boxplot(alpha = 0.8) +
facet_wrap(~Species)
If you don't want the color of the points to be the same as the color of the boxplots, you have to remove the grouping variable from the aes inside the geom_point. Again, with the iris example,
ggplot(iris, aes(x=Sepal.Length, y=Sepal.Width, fill=Species)) +
geom_boxplot(alpha = 0.8) +
geom_point(shape = 20, size = 3, color = 'red') +
facet_wrap(~Species)
Note that the ggplot2 package works in layers. Therefore, if you add the geom_point layer after the geom_boxplot layer, the points will be on the top of the boxplot. If you add the geom_point layer before the geom_boxplot layer, the points will be in the background.
Edit:
If what you want is to add a single point in your boxplot to indicate the mean, you can do something like:
iris %>%
group_by(Species) %>%
mutate(mean.y = mean(Sepal.Width),
mean.x = mean(Sepal.Length)) %>%
ggplot(aes(x=Sepal.Length, y=Sepal.Width, fill=Species)) +
geom_boxplot(alpha = 0.8) +
geom_point(aes(y = mean.y, x = mean.x), shape = 20, size = 3, color = 'red')
But be aware that it would probably require some calibration on the x axis to make it exactly in the middle of each box.

Use position_jitterdodge to plot points, and add highlighted points that are also dodged

I have some data where x is categorical, y is numeric, and color.var is another categorical variable that I would like to color by. My goal is to plot all of the points using position_jitterdodge(), and then highlight a couple of the points, draw a line between them, and add labels, while making sure these highlighted points line up with the corresponding strips of points that were plotted using position_jitterdodge(). The highlighted points are aligned properly when all factors are present in the variable used to dodge, but it does not work well when some factors are missing.
Minimal (non-)working example
library(ggplot2)
Generate some data
d = data.frame(x = c(rep('x1', 1000), rep('x2', 1000)),
y = runif(n=2000, min=0, max=1),
color.var= rep(c('color1', 'color2'), 1000),
facet.var = rep(c('facet1', 'facet1', 'facet2', 'facet2'), 500))
head(d)
dd = d[c(1,2,3,4,1997,1998, 1999,2000),]
dd
df1 = dd[dd$color.var=='color1',] ## data for first set of points, labels, and the line connecting them
df2 = dd[dd$color.var=='color2',] ## data for second set of points, labels, and the line connecting them
df1
dw = .75 ## Define the dodge.width
Plot all points
Here are all of the points, separated using position_jitterdodge() and the aesthetic fill.
ggplot() +
geom_point(data=d, aes(x=x, y=y, fill=color.var), position=position_jitterdodge(dodge.width=dw), size=3, alpha=1, shape=21, color='darkgray') +
facet_wrap(~facet.var) +
scale_fill_manual(values=c( 'lightblue','gray'))+
theme(axis.title = element_blank()) +
theme(legend.position="top")
That works well.
Additional highlighted points.
Here is the same plot, with additional points in dd added.
ggplot() +
geom_point(data=d, aes(x=x, y=y, fill =color.var), position=position_jitterdodge(dodge.width=dw), size=3, alpha=1, shape=21, color='darkgray') +
geom_point(data=dd, aes(x=x, y=y, color=color.var ), position=position_dodge(width=.75), size=4 ) +
geom_line(data=dd, aes(x=x, y=y, color=color.var, group=color.var ), position=position_dodge(width=.75), size=1 ) +
geom_label(data=dd, aes(x=x, y=y, color=color.var, group=color.var, label=round(y,1)), position=position_dodge(width=.75), vjust=-.5) +
facet_wrap(~facet.var) +
scale_fill_manual(values=c( 'lightblue','gray'))+
scale_color_manual(values=c( 'blue', 'gray40')) +
theme(axis.title = element_blank())+
theme(legend.position="top")
This is what I want it to look like. However, this only works properly if both factors of the color.var variable are in the set of points to highlight.
If both factors aren't present in the new data, the horizonal alignment fails.
Highlight points, only one factor present
Here is an example where only the 'color1' factor (blue) is present. Note that data=dd was replaced with data=df1 (data that only contains blue highlighted dots) in this code.
ggplot() +
geom_point(data=d, aes(x=x, y=y, fill =color.var), position=position_jitterdodge(dodge.width=dw), size=3, alpha=1, shape=21, color='darkgray') +
geom_point(data=df1, aes(x=x, y=y, color=color.var ), position=position_dodge(width=.75), size=4 ) +
geom_line(data=df1, aes(x=x, y=y, color=color.var, group=color.var ), position=position_dodge(width=.75), size=1 ) +
geom_label(data=df1, aes(x=x, y=y, color=color.var, group=color.var, label=round(y,1)), position=position_dodge(width=.75), vjust=-.5) +
facet_wrap(~facet.var) +
scale_fill_manual(values=c( 'lightblue','gray'))+
scale_color_manual(values=c( 'blue', 'gray40')) +
theme(axis.title = element_blank())+
theme(legend.position="top") +
scale_x_discrete(drop=F)
The highlight blue dots appear between the blue and gray dots, instead of aligned with the blue dots. Note that the additional code scale_x_discrete(drop=F) had no apparent effect on the alignment.
A manual solution
One possible fix is to edit the x coordinate manually, like this
ggplot(data=d, aes(x=x, y=y)) +
geom_point(aes(fill=color.var), position=position_jitterdodge(dodge.width=dw), size=3, alpha=1, shape=21, color='darkgray') +
geom_point(data=df1, aes(x=as.numeric(x)-dw/4, y=y), alpha=.9, size=4 , color='blue') + ## first set of points
geom_line( data=df1, aes(x=as.numeric(x)-dw/4, y=y , group=color.var ), color='blue', size=1) + ## first line
geom_label(data=df1, aes(x=as.numeric(x)-dw/4, y=y , label=round(y,1)), color='blue', vjust=-.25)+ ## first set of labels
facet_wrap(~facet.var) +
scale_fill_manual(values=c( 'lightblue','gray'))+
theme(axis.title = element_blank() +
theme(legend.position="top")
An adjustment of 1/4 of the dodge.width seems to work. This works fine, but it seems like there should be a better way, especially since I will eventually want to do this with 4-5 sets of highlighted points/lines, which may all be all be the same color.var, like the blue 'color1' factor above. Repeating this 4-5 times would be cumbersome. I will also eventually want to do this will 5-10 different figures. I suppose dodge.width*1/4 will always work, and copying and pasting might do the trick, but would like to know if there is a better way.
Here is a solution based on #aosmith's comment. Basically, just need to add this code before using ggplot:
library(dplyr) ## needed for group_by()
library(tidyr) ## needed for complete()
df1 = df1 %>% group_by(facet.var, x) %>% complete(color.var)
That adds extra rows to the data so that all the levels of color.var are present. Then the code given in the question, along with a couple of small edits that fix the legend, can be used:
ggplot() +
geom_point(data=d , aes(x=x, y=y, fill =color.var), position=position_jitterdodge(dodge.width=dw), size=3, alpha=1, shape=21, color='darkgray', show.legend=T) +
geom_point(data=df1, aes(x=x, y=y, color=color.var ), position=position_dodge(width=.75), size=4, show.legend=T ) +
geom_line( data=df1, aes(x=x, y=y, color=color.var, group=color.var ), position=position_dodge(width=.75), size=1, show.legend=F ) +
geom_label(data=df1, aes(x=x, y=y, color=color.var, group=color.var, label=round(y,1)), position=position_dodge(width=.75), vjust=-.5, show.legend=F) +
facet_wrap(~facet.var) +
scale_fill_manual( values=c( 'lightblue','gray'), name='Background dots', guide=guide_legend(override.aes = list(color=c('lightblue', 'gray')))) +
scale_color_manual(values=c( 'blue', 'gray40') , name='Highlighted dots') +
theme(axis.title = element_blank())+
theme(legend.position="top")+
scale_x_discrete(drop=F)

Center error bars (geom_errorbar) horizontally on bars (geom_bar)

How would I position the error bars in the centre of the appropriately coloured bars?
df1 <- data.frame(
supp = c("OJ","OJ","OJ","VC","VC","VC"),
dose = c(0.5,1,2,0.5,1,2),
len = c(13.23,22.7,26.06,7.98,16.77,26.14),
se = c(1.41,1.27,0.84,0.87,0.8,1.52)
)
df1$dose <- factor(df1$dose)
ggplot(df1, aes(x=dose, y=len, fill=supp)) +
geom_bar(stat="identity", position=position_dodge()) +
scale_fill_manual(name = "", values = c("deepskyblue1", "green")) +
geom_errorbar(data = df1[1:3,], aes(ymin=len-se, ymax=len+se), width=.4, colour=c("deepskyblue1"), position=position_dodge(.9)) +
geom_errorbar(data = df1[4:6,], aes(ymin=len-se, ymax=len+se), width=.4, colour=c("green"), position=position_dodge(.9))
As data for error bars are located in the same data frame where data for the bars you don't need to provide argument data= in geom_errorbar() and also there is no need to call geom_errorbar() twice.
You should provide in geom_errorbar() ymin and ymax values in aes(), also color=supp in aes() will ensure that error bars are colored according to supp values. To get the same colors as for bars, add scale_color_manual() with the same color names. With argument position=position_dodge(0.9) you can get errorbars in center of bars.
ggplot(df1, aes(x=dose, y=len, fill=supp)) +
geom_bar(stat="identity", position=position_dodge()) +
scale_fill_manual(name = "", values = c("deepskyblue1", "green")) +
geom_errorbar(aes(ymin=len-se, ymax=len+se,color=supp), width=.4,
position=position_dodge(.9))+
scale_color_manual(name = "", values = c("deepskyblue1", "green"))

Resources