Defining order of appearence in geom_boxplot - r

I have this data.frame:
df = data.frame(x = c(1,1,2,2,3,3), factor = c("crb","crb","ctx","ctx","bsl","bsl"), factor.level = c("pat","mat","pat","mat","pat","mat"), factor.level.color = c("blue","red","blue","red","blue","red"), ymin = c(0.031,0.152,0.071,0.026,0.051,0.032), lower = c(0.119,0.522,0.415,0.185,0.287,0.285), middle = c(0.298,0.701,0.615,0.384,0.500,0.499), upper = c(0.477,0.880,0.814,0.584,0.714,0.714), ymax = c(0.848,0.870,0.874,0.929,0.967,0.950),stringsAsFactors=F)
and I'd like to plot it in a geom_boxplot so that df$x defines the x-axis location, df$ymin, df$lower, df$middle, df$upper, df$ymax define the boxes, df$factor defines the x-axis ticks labels, df$factor.level.color defines the color of the boxes, and df$factor.level should appear in the legend (unique values). A pair of boxes should be plotted on the same x-axis location. For this reason a transparency factor needs to be defined
This is an improvised version of how I want it to look like:
(obviously the x and y axes tick values are missing)
I thought this code is that I need:
pl = ggplot(data = df, aes(x = x)) + geom_boxplot(aes(lower = lower,upper = upper, middle = middle, ymin = ymin, ymax = ymax,color = factor.level.color, fill = factor.level.color),
position = position_dodge(width = 0), width = 0.5, alpha = 0.5, stat = "identity") + scale_fill_identity("factor level", guide="legend", labels = df$factor.level) +
scale_color_identity("factor level", guide = "legend", labels = df$factor.level) +
scale_x_discrete(breaks = seq(1,length(unique(df$factor)),1), labels = unique(df$factor), limits = seq(1,length(unique(df$factor)),1)) +
labs(x = "factor",y = "fraction") + theme(axis.title.x = element_text(size = rel(0.8)), axis.title.y = element_text(size = rel(0.8)))
But the outcome is a bit messed up:
I guess levels need to be somehow defined but I don't know how to do that.

Try aes(x = factor(x)) in the call to ggplot(...)
pl = ggplot(data = df, aes(x = factor(x))) +
geom_boxplot(aes(lower = lower,upper = upper, middle = middle,
ymin = ymin, ymax = ymax,
color = factor.level.color, fill = factor.level.color),
position = position_dodge(width = 0),
width = 0.5, alpha = 0.5, stat = "identity") +
scale_fill_identity("factor level", guide="legend", labels = df$factor.level) +
scale_color_identity("factor level", guide = "legend", labels = df$factor.level) +
scale_x_discrete(breaks = seq(1,length(unique(df$factor)),1),
labels = unique(df$factor),
limits = seq(1,length(unique(df$factor)),1)) +
labs(x = "factor",y = "fraction") +
theme(axis.title.x = element_text(size = rel(0.8)), axis.title.y = element_text(size = rel(0.8)))

Related

Creating a second legend with ggplot?

I created the following plot using ggplot:
y1 <- runif(20,-2,7)
y2 <- c(-0.30306664,0.14744265 , 0.43857131 ,-0.04536794 ,-1.41432016,0.51887010 , 6.34925495 , 2.82511601 , 2.84251791, 4.05300569,-2.34208042, -0.29278747 , 0.49661933 , 0.75099908 ,1.12097713,2.72244949 , 2.23933230 , 1.86667714 , 2.17540024 , 7.56568823)
x <- 2001:2020
ggplot() +
geom_rect(aes(xmin=2006.90, xmax=2009.15,ymin=-Inf,ymax=10, fill='blue'), alpha= 0.4)+geom_rect(aes(xmin=2019.80, xmax=Inf,ymin=-Inf,ymax=10, fill='orange'), alpha= 0.3)+geom_rect(aes(xmin=2009.90, xmax=2013.15,ymin=-Inf,ymax=10, fill="lightgreen"), alpha= 0.4)+
geom_line(aes(x=x,y = y1),colour="black")+geom_line(aes(x=x,y = y2),colour="red")+
geom_point(aes(x=x,y = y1),col="black")+
geom_point(aes(x=x,y = y2),col="red")+
theme_classic()+
scale_fill_manual(name="",values = c("lightblue","lightgreen","orange"),labels=c(" R","k","C"))+theme(legend.position = "bottom")+ theme(axis.text.x = element_text(angle = 90))+geom_hline(yintercept = 0, color="black", size=1)
I have one legend to explain the content of the rectangles of the graph, but I need to add another legend to explain the two lines which are black and red. I wondered how to add another legend with a different position than the one that already exists to explain the names of the lines?
Can anyone help?
Move color inside aes, add scale_color_identity to get the right colors and to set the labels for the legend:
library(ggplot2)
ggplot() +
geom_rect(aes(xmin = 2006.90, xmax = 2009.15, ymin = -Inf, ymax = 10, fill = "blue"), alpha = 0.4) +
geom_rect(aes(xmin = 2019.80, xmax = Inf, ymin = -Inf, ymax = 10, fill = "orange"), alpha = 0.3) +
geom_rect(aes(xmin = 2009.90, xmax = 2013.15, ymin = -Inf, ymax = 10, fill = "lightgreen"), alpha = 0.4) +
geom_line(aes(x = x, y = y1, colour = "black")) +
geom_line(aes(x = x, y = y2, colour = "red")) +
geom_point(aes(x = x, y = y1, col = "black")) +
geom_point(aes(x = x, y = y2, col = "red")) +
scale_color_identity(name = NULL, labels = c(black = "Label 1", red = "Label 2"), guide = "legend") +
theme_classic() +
scale_fill_manual(name = "", values = c("lightblue", "lightgreen", "orange"), labels = c(" Rezession", "krise", "Corona 2020-")) +
theme(legend.position = "bottom") +
theme(axis.text.x = element_text(angle = 90)) +
geom_hline(yintercept = 0, color = "black", size = 1)

Combine legend for fill and colour ggplot to give only single legend

I am plotting a smooth to my data using geom_smooth and using geom_ribbon to plot shaded confidence intervals for this smooth. No matter what I try I cannot get a single legend that represents both the smooth and the ribbon correctly, i.e I am wanting a single legend that has the correct colours and labels for both the smooth and the ribbon. I have tried using + guides(fill = FALSE), guides(colour = FALSE), I also read that giving both colour and fill the same label inside labs() should produce a single unified legend.
Any help would be much appreciated.
Note that I have also tried to reset the legend labels and colours using scale_colour_manual()
The below code produces the below figure. Note that there are two curves here that are essentially overlapping. The relabelling and setting couours has worked for the geom_smooth legend but not the geom_ribbon legend and I still have two legends showing which is not what I want.
ggplot(pred.dat, aes(x = age.x, y = fit, colour = tagged)) +
geom_smooth(size = 1.2) +
geom_ribbon(aes(ymin = lci, ymax = uci, fill = tagged), alpha = 0.2, colour = NA) +
theme_classic() +
labs(x = "Age (days since hatch)", y = "Body mass (g)", colour = "", fill = "") +
scale_colour_manual(labels = c("Untagged", "Tagged"), values = c("#3399FF", "#FF0033")) +
theme(axis.title.x = element_text(face = "bold", size = 14),
axis.title.y = element_text(face = "bold", size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.text = element_text(size = 12))
The problem is that you provide new labels for the color-aesthetic but not for the fill-aesthetic. Consequently ggplot shows two legends because the labels are different.
You can either also provide the same labels for the fill-aesthetic (code option #1 below) or you can set the labels for the levels of your grouping variable ("tagged") before calling ggplot (code option #2).
library(ggplot2)
#make some data
x = seq(0,2*pi, by = 0.01)
pred.dat <- data.frame(x = c(x,x),
y = c(sin(x), cos(x)) + rnorm(length(x) * 2, 0, 1),
tag = rep(0:1, each = length(x)))
pred.dat$lci <- c(sin(x), cos(x)) - 0.4
pred.dat$uci <- c(sin(x), cos(x)) + 0.4
#option 1: set labels within ggplot call
pred.dat$tagged <- as.factor(pred.dat$tag)
ggplot(pred.dat, aes(x = x, y = y, color = tagged, fill = tagged)) +
geom_smooth(size = 1.2) +
geom_ribbon(aes(ymin = lci, ymax = uci), alpha = 0.2, color = NA) +
scale_color_manual(labels = c("untagged", "tagged"), values = c("#F8766D", "#00BFC4")) +
scale_fill_manual(labels = c("untagged", "tagged"), values = c("#F8766D", "#00BFC4")) +
theme_classic() + theme(legend.title = element_blank())
#option 2: set labels before ggplot call
pred.dat$tagged <- factor(pred.dat$tag, levels = 0:1, labels = c("untagged", "tagged"))
ggplot(pred.dat, aes(x = x, y = y, color = tagged, fill = tagged)) +
geom_smooth(size = 1.2) +
geom_ribbon(aes(ymin = lci, ymax = uci), alpha = 0.2, color = NA) +
theme_classic() + theme(legend.title = element_blank())

Increase the margin of every second x-axis tick ggplot2

I'm looking for a way to move every second x-axis tick downwards and have the tick line go down with it.
I can change the general margin and tick length for all ticks with:
#MWE
library(ggplot2)
ggplot(cars, aes(dist, speed))+
geom_point()+
theme(
axis.ticks.length.x = unit(15, "pt")
)
But, I would like the x-axis ticks 0, 50, and 100 (i.e., every second tick) to be without the added top margin.
A generalized answer is preferred as my x-axis is categorical and not numerical (and contains 430 ticks, so nothing I can set by hand).
Any ideas?
Edit:
Output should be:
Edit2:
A more intricate example would be:
#MWE
ggplot(diamonds, aes(cut, price, fill = clarity, group = clarity))+
geom_col(position = 'dodge')+
theme(
axis.ticks.length.x = unit(15, "pt")
)
Edit -- added categorical approach at bottom.
Here's a hack. Hope there's a better way!
ticks <- data.frame(
x = 25*0:5,
y = rep(c(-0.2, -2), 3)
)
ggplot(cars, aes(dist, speed))+
geom_point()+
geom_rect(fill = "white", xmin = -Inf, xmax = Inf,
ymin = 0, ymax = -5) +
geom_segment(data = ticks,
aes(x = x, xend = x,
y = 0, yend = y)) +
geom_text(data = ticks,
aes(x = x, y = y, label = x), vjust = 1.5) +
theme(axis.ticks.x = element_blank()) +
scale_x_continuous(breaks = 25*0:5, labels = NULL, name = "") +
coord_cartesian(clip = "off")
Here's a similar approach used with a categorical x.
cats <- sort(as.character(unique(diamonds$cut)))
ticks <- data.frame(x = cats)
ticks$y = ifelse(seq_along(cats) %% 2, -500, -2000)
ggplot(diamonds, aes(cut, price, fill = clarity, group = clarity))+
geom_col(position = 'dodge') +
annotate("rect", fill = "white",
xmin = 0.4, xmax = length(cats) + 0.6,
ymin = 0, ymax = -3000) +
geom_segment(data = ticks, inherit.aes = F,
aes(x = x, xend = x,
y = 0, yend = y)) +
geom_text(data = ticks, inherit.aes = F,
aes(x = x, y = y, label = x), vjust = 1.5) +
scale_x_discrete(labels = NULL, name = "cut") +
scale_y_continuous(expand = expand_scale(mult = c(0, 0.05))) +
theme(axis.ticks.x = element_blank()) +
coord_cartesian(clip = "off")

customize two legends inside one graph in ggplot2

I wanted to comment on the following doubt.
Using this code:
Plot<-data.frame(Age=c(0,0,0,0,0),Density=c(0,0,0,0,0),Sensitivity=c(0,0,0,0,0),inf=c(0,0,0,0,0),sup=c(0,0,0,0,0),tde=c(0,0,0,0,0))
Plot[1,]<-c(1,1,0.857,0.793,0.904,0.00209834)
Plot[2,]<-c(1,2,0.771 ,0.74,0.799,0.00348286)
Plot[3,]<-c(1,3,0.763 ,0.717,0.804,0.00577784)
Plot[4,]<-c(1,4,0.724 ,0.653,0.785,0.00504161)
Plot[5,]<-c(2,1,0.906,0.866,0.934,0.00365742)
Plot[6,]<-c(2,2,0.785 ,0.754,0.813,0.00440399)
Plot[7,]<-c(2,3,0.660,0.593,0.722,0.00542849)
Plot[8,]<-c(2,4,0.544,0.425,0.658,0.00433052)
names(Plot)<-c("Age","Mammographyc density","Sensitivity","inf","sup","tde")
Plot$Age<-c("50-59","50-59","50-59","50-59","60-69","60-69","60-69","60-69")
Plot$Density<-c("Almost entirely fat","Scattered fibroglandular density","Heterogeneously dense","Extremely dense","Almost entirely fat","Scattered fibroglandular density","Heterogeneously dense","Extremely dense")
levels(Plot$Age)<-c("50-59","60-69")
levels(Plot$Density)<-c("Almost entirely fat","Scattered fibroglandular density","Heterogeneously dense","Extremely dense")
pd <- position_dodge(0.2) #
Plot$Density <- reorder(Plot$Density, 1-Plot$Sensitivity)
ggplot(Plot, aes(x = Density, y = 100*Sensitivity, colour=Age)) +
geom_errorbar(aes(ymin = 100*inf, ymax = 100*sup), width = .1, position = pd) +
geom_line(position = pd, aes(group = Age), linetype = c("dashed")) +
geom_point(position = pd, size = 4)+
scale_y_continuous(expand = c(0, 0),name = 'Sensitivity (%)',sec.axis = sec_axis(~./5, name = 'Breast cancer detection rate (per 1000 mammograms)', breaks = c(0,5,10,15,20),
labels = c('0‰',"5‰", '10‰', '15‰', '20‰')), limits = c(0,100)) +
geom_line(position = pd, aes(x = Density, y = tde * 5000, colour = Age, group = Age), linetype = c("dashed"), data = Plot) +
geom_point(shape=18,aes(x = Density, y = tde * 5000, colour = Age, group = Age), position = pd, size = 4) +
theme_light() +
scale_color_manual(name="Age (years)",values = c("50-59"= "grey55", "60-69" = "grey15")) +
theme(legend.position="bottom") + guides(colour = guide_legend(), size = guide_legend(),
shape = guide_legend())
I have made the following graph,
in which the axis on the left is the scale of the circles and the axis on the right is the scale of the diamonds. The fact is that I would like to have a legend approximately like this:
But it is impossible for me, I have tried suggestions of other threads like scale_shape and different commands in guides but I have not got success. I just want to make clear the difference in what shape and color represent.
Would someone know how to help me?
Best regards,
What you should do is a panel plot to avoid the confusion of double axes:
library(dplyr)
library(tidyr)
Plot %>%
gather(measure, Result, Sensitivity, tde) %>%
ggplot(aes(x = Density, y = Result, colour=Age)) +
geom_errorbar(aes(ymin = inf, ymax = sup), width = .1, position = pd,
data = . %>% filter(measure == "Sensitivity")) +
geom_line(aes(group = Age), position = pd, linetype = "dashed") +
geom_point(position = pd, size = 4)+
# scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
scale_y_continuous(labels = scales::percent) +
facet_wrap(~measure, ncol = 1, scales = "free_y") +
theme_light() +
scale_color_manual(name="Age (years)",values = c("50-59"= "grey55", "60-69" = "grey15")) +
theme(legend.position="bottom")
But to do what you asked, you problem is that you have only 1 non-positional aesthetic mapped so you cannot get more than one legend. To force a second legend, you need to add a second mapping. It can be a dummy mapping that has no effect, as below we map alpha but then manually scale both levels to 100%. This solution is not advisable because, as you have done in your example of a desired legend, it is easy to mix up the mappings and have your viz tell a lie by mislabeling which points are sensitivity and which are detection rate.
ggplot(Plot, aes(x = Density, y = 100*Sensitivity, colour=Age, alpha = Age)) +
geom_errorbar(aes(ymin = 100*inf, ymax = 100*sup), width = .1, position = pd) +
geom_line(position = pd, aes(group = Age), linetype = c("dashed")) +
geom_point(position = pd, size = 4)+
scale_y_continuous(expand = c(0, 0),name = 'Sensitivity (%)',sec.axis = sec_axis(~./5, name = 'Breast cancer detection rate (per 1000 mammograms)', breaks = c(0,5,10,15,20),
labels = c('0‰',"5‰", '10‰', '15‰', '20‰')), limits = c(0,100)) +
geom_line(position = pd, aes(x = Density, y = tde * 5000, colour = Age, group = Age), linetype = c("dashed"), data = Plot) +
geom_point(shape=18,aes(x = Density, y = tde * 5000, colour = Age, group = Age), position = pd, size = 4) +
theme_light() +
scale_color_manual(name="Age (years)",values = c("50-59"= "grey55", "60-69" = "grey15")) +
scale_alpha_manual(values = c(1, 1)) +
guides(alpha = guide_legend("Sensitivity"),
color = guide_legend("Detection Rate", override.aes = list(shape = 18))) +
theme(legend.position="bottom")

Customize the background color of facets in ggplot2

Let's say I want to plot my data:
my.df = data.frame(mean = c(0.045729661,0.030416531,0.043202944,0.025600973,0.040526913,0.046167044,0.029352414,0.021477789,0.027580529,0.017614864,0.020324659,0.027547972,0.0268722,0.030804717,0.021502093,0.008342398,0.02295506,0.022386184,0.030849534,0.017291356,0.030957321,0.01871551,0.016945678,0.014143042,0.026686185,0.020877973,0.028612298,0.013227244,0.010710895,0.024460647,0.03704981,0.019832982,0.031858501,0.022194059,0.030575241,0.024632496,0.040815748,0.025595652,0.023839083,0.026474704,0.033000706,0.044125751,0.02714219,0.025724641,0.020767752,0.026480009,0.016794441,0.00709195), std.dev = c(0.007455271,0.006120299,0.008243454,0.005552582,0.006871527,0.008920899,0.007137174,0.00582671,0.007439398,0.005265133,0.006180637,0.008312494,0.006628951,0.005956211,0.008532386,0.00613411,0.005741645,0.005876588,0.006640122,0.005339993,0.008842722,0.006246828,0.005532832,0.005594483,0.007268493,0.006634795,0.008287031,0.00588119,0.004479003,0.006333063,0.00803285,0.006226441,0.009681048,0.006457784,0.006045368,0.006293256,0.008062195,0.00857954,0.008160441,0.006830088,0.008095485,0.006665062,0.007437581,0.008599525,0.008242957,0.006379928,0.007168385,0.004643819), parent.origin = c("paternal","paternal","paternal","paternal","paternal","paternal","maternal","maternal","maternal","maternal","maternal","maternal","paternal","paternal","paternal","paternal","paternal","paternal","maternal","maternal","maternal","maternal","maternal","maternal","maternal","maternal","maternal","maternal","maternal","maternal","paternal","paternal","paternal","paternal","paternal","paternal","maternal","maternal","maternal","maternal","maternal","maternal","paternal","paternal","paternal","paternal","paternal","paternal"), group = c("F1r:M","F1r:M","F1r:M","F1r:M","F1r:M","F1r:M","F1r:M","F1r:M","F1r:M","F1r:M","F1r:M","F1r:M","F1r:F","F1r:F","F1r:F","F1r:F","F1r:F","F1r:F","F1r:F","F1r:F","F1r:F","F1r:F","F1r:F","F1r:F","F1i:M","F1i:M","F1i:M","F1i:M","F1i:M","F1i:M","F1i:M","F1i:M","F1i:M","F1i:M","F1i:M","F1i:M","F1i:F","F1i:F","F1i:F","F1i:F","F1i:F","F1i:F","F1i:F","F1i:F","F1i:F","F1i:F","F1i:F","F1i:F"), replicate = c(1,2,3,4,5,6,1,2,3,4,5,6,1,2,3,4,5,6,1,2,3,4,5,6,1,2,3,4,5,6,1,2,3,4,5,6,1,2,3,4,5,6,1,2,3,4,5,6))
As follows:
library(ggplot2)
p1 = ggplot(data = my.df, aes(factor(replicate), color = factor(parent.origin)))
p1 = p1 + geom_boxplot(aes(fill = factor(parent.origin),lower = mean - std.dev, upper = mean + std.dev, middle = mean, ymin = mean - 3*std.dev, ymax = mean + 3*std.dev), position = position_dodge(width = 0), width = 0.5, alpha = 0.5, stat="identity") + facet_wrap(~group, ncol = 4)+scale_fill_manual(values = c("red","blue"),labels = c("maternal","paternal"),name = "parental allele")+scale_colour_manual(values = c("red","blue"),labels = c("maternal","paternal"),name = "parental allele")
p1 = p1 + theme(panel.background = element_rect(fill = 'white', colour = 'black'), legend.position = "none")
This gives:
My question is how do I control the background color in the facet labels? I mean, I want to replace the gray background of the "F1i:F","F1i:M","F1r:F",F1r:M" labels with a white background.
Facet labels background is controled with argument strip.background= of theme().
+ theme(strip.background=element_rect(fill="white"))

Resources