how to add manually a legend to ggplot [duplicate] - r

This question already has an answer here:
ggplot2 add manual legend for two data series
(1 answer)
Closed 2 years ago.
I want to add manually a legend to ggplot in r. The problem of my code is that it does not show the right symbols (blue point, blue dashed line and red solid line). Here the code and the plot.
predict_ID1.4.5.6.7 <- predict(lm_mRNATime, ID1.4.5.6.7)
ID1.4.5.6.7$predicted_mRNA <- predict_ID1.4.5.6.7
colors <- c("data" = "Blue", "predicted_mRNA" = "red","fit"="Blue")
ggplot( data = ID1.4.5.6.7, aes(x=Time,y=mRNA,color="data")) +
geom_point()+
scale_x_discrete(limits=c('0','20','40','60','120'))+
labs(title="ID-1,ID-4,ID-5,ID-6,ID-7",y="mRNA", x="Time [min]", color = "Legend") +
scale_color_manual(values = colors)+
geom_line(aes(x=Time,y=predicted_mRNA,color="predicted_mRNA"),lwd=1.3)+
geom_smooth(method = "lm",aes(color="fit",lty=2),se=TRUE,lty=2)+
scale_color_manual(values = colors)+
theme(plot.title = element_text(hjust = 0.5),plot.subtitle = element_text(hjust = 0.5))
How can I modify the code in order to get the symbols associated to the plot in the legend ?

The hardest part here was recreating your data set for demonstration purposes. It's always better to add a reproducible example. Anyway, the following should be close:
library(ggplot2)
set.seed(123)
ID1.4.5.6.7 <- data.frame(Time = c(rep(1, 3),
rep(c(2, 3, 4, 5), each = 17)),
mRNA = c(rnorm(3, 0.1, 0.25),
rnorm(17, 0, 0.25),
rnorm(17, -0.04, 0.25),
rnorm(17, -0.08, 0.25),
rnorm(17, -0.12, 0.25)))
lm_mRNATime <- lm(mRNA ~ Time, data = ID1.4.5.6.7)
Now we run your code with the addition of a custom colour guide:
predict_ID1.4.5.6.7 <- predict(lm_mRNATime, ID1.4.5.6.7)
ID1.4.5.6.7$predicted_mRNA <- predict_ID1.4.5.6.7
colors <- c("data" = "Blue", "predicted_mRNA" = "red", "fit" = "Blue")
p <- ggplot( data = ID1.4.5.6.7, aes(x = Time, y = mRNA, color = "data")) +
geom_point() +
geom_line(aes(x = Time, y = predicted_mRNA, color = "predicted_mRNA"),
lwd = 1.3) +
geom_smooth(method = "lm", aes(color = "fit", lty = 2),
se = TRUE, lty = 2) +
scale_x_discrete(limits = c('0', '20', '40', '60', '120')) +
scale_color_manual(values = colors) +
labs(title = "ID-1, ID-4, ID-5, ID-6, ID-7",
y = "mRNA", x = "Time [min]", color = "Legend") +
guides(color = guide_legend(
override.aes = list(shape = c(16, NA, NA),
linetype = c(NA, 2, 1)))) +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
legend.key.width = unit(30, "points"))

Related

add text to plot or legend for second axis only R

I very nearly have the plot that I want so I think this will be an easy fix for someone more R savvy than myself
Sample_ID <- c("P1014B", "P1014F", "P1036A", "P1036B", "P1036C", "P1036D", "P1036E", "P1036F")
`CONTAMINATION_SCORE (NA)` <- c(2677, 1021, 870, 6831, 1324, 4175, 1370, 875)
`CONTAMINATION_P_VALUE (NA)` <- c(0.101, 1.000, 1.000, 0.000, 1.000, 0.036, 1.000, 1.000)
contam_reads <- data.frame(Sample_ID, `CONTAMINATION_SCORE (NA)`, `CONTAMINATION_P_VALUE (NA)`, check.names = FALSE)
I have a plot that is very near to what I want:
cols = c("P-value upper Limit" = "black","Contam. score upper limit" = "red")
ggplot(contam_reads[-c(1,2),], aes(x=Sample_ID, y=`CONTAMINATION_SCORE (NA)`)) +
geom_bar(stat="identity", fill="cyan4") +
geom_hline(aes(yintercept=contam_reads$`CONTAMINATION_SCORE (NA)`[1]), col = cols[2], size = 1.5, linetype=2) +
geom_hline(aes(yintercept=contam_reads$`CONTAMINATION_P_VALUE (NA)`[1]*10000), col = cols[1], size = 1, linetype=2) +
geom_point(aes(x=Sample_ID, y=`CONTAMINATION_P_VALUE (NA)`* 10000),stat="identity",color="red", size = 1.5, show.legend = T) +
labs(title= "DNA Library QC Metrics for Contamination",
x="Sample ID",y="Contamination Score") +
scale_y_continuous(sec.axis=sec_axis(~./10000,name="P-value", breaks = seq(0,1,0.1))) +
theme(
axis.title.y = element_text(color = "cyan4",size=15),
axis.text.y = element_text(color = "cyan4"),
axis.title.y.right = element_text(color = "red"),
axis.text.y.right = element_text(color = "red")
) +
scale_colour_manual(values=cols)
I can't get the legend to be a line and not dots, and I can't get the first dot in the legend to turn black.
You could fix your legend by mapping on aesthetics, i.e. instead of setting colors for the hlines as arguments map on the color aes and set show.legend=FALSE for geom_point to show just the lines. To this end I also use the data argument to pass just the first line of the dataset to the hlines.
Note 1: I added a named vector with labels for your scores and values. Also as names I use abbreviations for both the labels and color vector which makes it much easier to refer to a specific value in the ggplot code and makes the code easier to read.
Note 2: In my plot the colors for the lines are swapped which is right if p-values should be "red" and scores "black". Perhaps you confused that which could easily happen if one uses e.g. cols[1] to refer to colors.
cols <- c("pval" = "red", "score" = "black")
labels <- c("pval" = "P-value upper Limit", "score" = "Contam. score upper limit")
library(ggplot2)
ggplot(contam_reads[-c(1, 2), ], aes(x = Sample_ID, y = `CONTAMINATION_SCORE (NA)`)) +
geom_bar(stat = "identity", fill = "cyan4") +
geom_hline(
data = contam_reads[1, ],
aes(yintercept = `CONTAMINATION_SCORE (NA)`, color = "score"),
size = 1.5, linetype = 2
) +
geom_hline(
data = contam_reads[1, ],
aes(yintercept = `CONTAMINATION_P_VALUE (NA)` * 10000, color = "pval"),
size = 1, linetype = 2
) +
geom_point(aes(x = Sample_ID, y = `CONTAMINATION_P_VALUE (NA)` * 10000, color = "pval"),
stat = "identity", size = 1.5, show.legend = FALSE
) +
labs(
title = "DNA Library QC Metrics for Contamination",
x = "Sample ID", y = "Contamination Score",
color = NULL
) +
scale_y_continuous(sec.axis = sec_axis(~ . / 10000, name = "P-value", breaks = seq(0, 1, 0.1))) +
theme(
axis.title.y = element_text(color = "cyan4", size = 15),
axis.text.y = element_text(color = "cyan4"),
axis.title.y.right = element_text(color = "red"),
axis.text.y.right = element_text(color = "red")
) +
scale_colour_manual(values = cols, labels = labels)

How to fill stat_ellipse with patterns with transparent backgrounds in R?

I am using stat_ellipse in R to generate ellipse area polygons from the data. However, they overlap significantly and turning the alpha level to transparent "kind of" works. I wanted to see if there was a way to fill specific ellipses with a pattern that has a transparent background since they overlap so much. Maybe I could have some solid colors and others patterns?
This is my working plot code now:
ggplot(data = claw3,
aes(x = iso1,
y = iso2,
fill = group,
lty = community,
shape = community)) +
stat_ellipse(aes(group = interaction(group, community),
lty = community),
alpha = 0.85, #trasparent level trying to make 2012 West more visible
color = "black",
level = p.ell,
type = "norm",
geom = "polygon",
size = 1.1) +
geom_point(aes(fill = group), size = 2, alpha = 1, color = "black") +
scale_fill_manual(values = c("blue", "grey30","00FFFFFF"),labels = c("2012", "2014","2016"))+
scale_color_manual(values = c( "blue", "grey30","00FFFFFF"))+
scale_linetype_manual(values = c("dotted","solid"))+
scale_shape_manual(values = c(21, 24))+
guides(shape = guide_legend(override.aes = list(fill = "white")), #overrides legend for the community boxes filled white
fill = guide_legend(override.aes = list(shape = NA, size = 1))) + #overrides legend for group removes shapes in year
ylab(expression(paste(delta^{15}, "N (\u2030)"))) +
xlab(expression(paste(delta^{13}, "C (\u2030)"))) +
scale_x_continuous(breaks= seq(-26.5, -19.5, by = 1),
#labels = c( -24, rep("", 2), -23, rep("", 2), -21),
limits = c(-26.5, -19.5),
expand = c(0, 0)) +
scale_y_continuous(breaks= seq(4, 11),
labels = c(4, "",6, "",8, "", 10, ""),
limits = c(3.5, 11),
expand = c(0, 0)) +
theme(text = element_text(size=14)) +
theme_classic(base_size = 14) +
theme(legend.title = element_blank())
I have recently found ggpattern, but it does not look like its friendly with stat_ellipse or I really just don't understand where to put it. I believe Id have to remove the scale_fill_manual and scale_color_manual commands, but thats about it.

How can I add hatches, stripes or another pattern or texture to a barplot in ggplot?

Suppose I have data with both an ordinal variable and a categorical variable:
set.seed(35)
df <- data.frame(Class = factor(rep(c(1,2),times = 80), labels = c("Math","Science")),
StudyTime = factor(sort(sample(1:4, 16, prob = c(0.25,0.3,0.3,0.15), replace = TRUE)),labels = c("<5","5-10","10-20",">20")),
Nerd = factor(sapply(rep(c(0.1,0.3,0.5,0.8),c(30,50,50,30)), function(x)sample(c("Nerd","NotNerd"),size = 1, prob = c(x,1-x))),levels = c("NotNerd","Nerd")))
One could use ggplot and geom_bar with x, fill and alpha (or color) aesthetic mappings to visualize the relationship between these variables.
ggplot(data = df, aes(x = Class, fill = StudyTime, alpha = Nerd)) +
geom_bar(position = "dodge", color = "black") +
scale_alpha_manual(values = c(Nerd = 0.5, NotNerd = 1)) +
scale_fill_manual(values = colorRampPalette(c("#0066CC","#FFFFFF","#FF8C00"))(4)) +
labs(x = "Class", y = "Number of Students", alpha = "Nerd?") +
theme(legend.key.height = unit(1, "cm"))
However, alpha and color are not ideal. A better alternative might be to apply a pattern such as stripes or a crosshatch.
The accepted answer to this question from over 10 years ago says to use colors, and the most upvoted answer (while clever) uses over 100 lines of code.
This question received some upvotes but no new answers.
Is there any better alternative to adding a pattern such as can be seen here?
One approach is to use the ggpattern package written by Mike FC (no affiliation):
library(ggplot2)
#remotes::install_github("coolbutuseless/ggpattern")
library(ggpattern)
ggplot(data = df, aes(x = Class, fill = StudyTime, pattern = Nerd)) +
geom_bar_pattern(position = position_dodge(preserve = "single"),
color = "black",
pattern_fill = "black",
pattern_angle = 45,
pattern_density = 0.1,
pattern_spacing = 0.025,
pattern_key_scale_factor = 0.6) +
scale_fill_manual(values = colorRampPalette(c("#0066CC","#FFFFFF","#FF8C00"))(4)) +
scale_pattern_manual(values = c(Nerd = "stripe", NotNerd = "none")) +
labs(x = "Class", y = "Number of Students", pattern = "Nerd?") +
guides(pattern = guide_legend(override.aes = list(fill = "white")),
fill = guide_legend(override.aes = list(pattern = "none")))
The package appears to support a number of common geometries. Here is an example of using geom_tile to combine a continuous variable with a categorical variable:
set.seed(40)
df2 <- data.frame(Row = rep(1:9,times=9), Column = rep(1:9,each=9),
Evaporation = runif(81,50,100),
TreeCover = sample(c("Yes", "No"), 81, prob = c(0.3,0.7), replace = TRUE))
ggplot(data=df2, aes(x=as.factor(Row), y=as.factor(Column),
pattern = TreeCover, fill= Evaporation)) +
geom_tile_pattern(pattern_color = NA,
pattern_fill = "black",
pattern_angle = 45,
pattern_density = 0.5,
pattern_spacing = 0.025,
pattern_key_scale_factor = 1) +
scale_pattern_manual(values = c(Yes = "circle", No = "none")) +
scale_fill_gradient(low="#0066CC", high="#FF8C00") +
coord_equal() +
labs(x = "Row",y = "Column") +
guides(pattern = guide_legend(override.aes = list(fill = "white")))

Removing the border of legend symbol

I was trying to plot some predicted vs. actual data, something that resembles the following:
# Some random data
x <- seq(1: 10)
y_pred <- runif(10, min = -10, max = 10)
y_obs <- y_pred + rnorm(10)
# Faking a CI
Lo.95 <- y_pred - 1.96
Hi.95 <- y_pred + 1.96
my_df <- data.frame(x, y_pred, y_obs, Lo.95, Hi.95)
ggplot(my_df, aes(x = x, y = y_pred)) +
geom_line(aes(colour = "Forecasted Data"), size = 1.2) +
geom_point(aes(x = x, y = y_obs, colour = "Actual Data"), size = 3) +
geom_ribbon(aes(ymin=Lo.95, ymax=Hi.95, x=x, linetype = NA, colour = "Confidence Interval"), alpha=0.2) +
theme_grey() +
scale_colour_manual(
values = c("gray30", "blue", "red"),
guide = guide_legend(override.aes = list(
border=c(NA, NA, NA),
fill=c("gray30", "white", "white"),
linetype = c("blank", "blank", "solid"),
shape = c(NA, 19, NA))))
The plot looks like this:
The only issue I have with this plot is the red border surrounding the legend item symbol for the line (i.e. the forecasted data). Is there any way I can remove it without breaking the rest of my plot?
I think geom_ribbon was the problem. If we take its color & fill out of aes, everything looks fine
library(ggplot2)
# Some random data
x <- seq(1: 10)
y_pred <- runif(10, min = -10, max = 10)
y_obs <- y_pred + rnorm(10)
# Faking a CI
Lo.95 <- y_pred - 1.96
Hi.95 <- y_pred + 1.96
my_df <- data.frame(x, y_pred, y_obs, Lo.95, Hi.95)
m1 <- ggplot(my_df, aes(x = x, y = y_pred)) +
geom_point(aes(x = x, y = y_obs, colour = "Actual"), size = 3) +
geom_line(aes(colour = "Forecasted"), size = 1.2) +
geom_ribbon(aes(x = x, ymin = Lo.95, ymax = Hi.95),
fill = "grey30", alpha = 0.2) +
scale_color_manual("Legend",
values = c("blue", "red"),
labels = c("Actual", "Forecasted")) +
guides( color = guide_legend(
order = 1,
override.aes = list(
color = c("blue", "red"),
fill = c("white", "white"),
linetype = c("blank", "solid"),
shape = c(19, NA)))) +
theme_bw() +
# remove legend key border color & background
theme(legend.key = element_rect(colour = NA, fill = NA),
legend.box.background = element_blank())
m1
As we leave Confidence Interval out of aes, we no longer have its legend. One workaround is to create an invisible point and take one unused geom to manually create a legend key. Here we can use size/shape (credit to this answer)
m2 <- m1 +
geom_point(aes(x = x, y = y_obs, size = "Confidence Interval", shape = NA)) +
guides(size = guide_legend(NULL,
order = 2,
override.aes = list(shape = 15,
color = "lightgrey",
size = 6))) +
# Move legends closer to each other
theme(legend.title = element_blank(),
legend.justification = "center",
legend.spacing.y = unit(0.05, "cm"),
legend.margin = margin(0, 0, 0, 0),
legend.box.margin = margin(0, 0, 0, 0))
m2
Created on 2018-03-19 by the reprex package (v0.2.0).
A better way to address this question would be to specify show.legend = F option in the geom_ribbon(). This will eliminate the need for the second step for adding and merging the legend key for the confidence interval. Here is the code with slight modifications.
ggplot(my_dff, aes(x = x, y = y_pred)) +
geom_line(aes(colour = "Forecasted Data"), size = 1) +
geom_point(aes(x = x, y = y_obs, colour = "Actual Data"), size = 1) +
geom_ribbon(aes(ymin=Lo.95, ymax=Hi.95, x=x, linetype = NA, colour = "Confidence Interval"), alpha=0.2, show.legend = F) +
theme_grey() +
scale_colour_manual(
values = c("blue", "gray30", "red"))+
guides(color = guide_legend(
override.aes = list(linetype = c(1, 1, 0)),
shape = c(1, NA, NA),
reverse = T))
My plot
Credit to https://stackoverflow.com/users/4282026/marblo
for their answer to similar question.

annotate ggplot with discrete axis (w/ reproducible example)

I'm struggling to find a straightforward solution to fix my plot. The problem stems down to the discrete nature of the x-axis. I want to annotate the plot with text and segments in order to show statistical results.
1) I want to print the p-value between "Baby" and "Queen" as well as between "Queen" and "Worker", but ggplot only allows to annotate above each label, not between them.
2) Similarly, I want the first two geom_segments to be separated, but ggplot won't let me end the first one at something like "Queen"-0.1 and start the second one at "Queen"+0.1 as it is mixing factors and numbers.
Fully reproducible example below, with issues on line 12, 13 and 18:
data <- data.frame(Group.1 = rep(c("A","B"),3),Group.2 = c("Baby","Baby","Worker","Worker","Queen","Queen"),
value = c(0.18,0.30,0.09,0.25,-0.26,-0.55))
boxplot_candidates <- ggplot(aes(y=value,x=Group.2,fill=Group.2),data= data) + theme_bw() +
scale_fill_manual(values=c("lightgreen","darkgreen","goldenrod1"),name="") +
theme(plot.title = element_text(face="bold", size=18, hjust=0)) +
labs(x="",y="Transcript expression\n(log2-centered TMM-nornalised TPMs)") +
theme(plot.title=element_text(size=18, vjust=2),legend.position="", legend.text=element_text(size=14),
axis.text.x = element_text(size = 14, colour = "black"),
axis.text.y = element_text(size = 14, colour = "black"),
axis.title.y=element_text(size = 14, colour = "black",vjust=1),
axis.title.x=element_text(size = 14, colour = "black")) +
geom_segment(aes(x="Baby",xend="Queen",y=0.7,yend=0.7)) + ##### MAKE XEND SMALLER
geom_segment(aes(x="Queen",xend="Worker",y=0.7,yend=0.7)) + ##### MAKE XEND LARGER
geom_segment(aes(x="Baby",xend="Worker",y=1.2,yend=1.2)) +
ylim(-1.5,1.5) + stat_boxplot(geom ='errorbar') +
geom_boxplot(notch=F,outlier.shape=NA) +
geom_point(size=2,position = position_jitter(width = 0.2)) + stat_summary(fun.y=mean, colour = "white",geom="point", size=4) +
annotate("text", x = as.factor(unique(data$Group.2)),y=c(0.8,0.8,1.3),
label = c("p < 0.001","p < 0.001","p = 0.89"),family="",fontface = 3,size=4) ##### PRINT "p < 0.001" BETWEEN LABELS
print(boxplot_candidates)
Categorical variables are simply placed at locations 1, 2, 3, etc. If you want to reach locations between two categorical variables, you can use coordinates such as 1.2 or 1.5 etc.
Here is a reproducible example with all the irrelevant theme code stripped out:
data <- data.frame(Group.1 = rep(c("A", "B"), 3),
Group.2 = c("Baby", "Baby", "Worker", "Worker", "Queen", "Queen"),
value = c(0.18, 0.30, 0.09, 0.25, -0.26, -0.55))
ggplot(data, aes(y = value, x = Group.2, fill = Group.2)) +
stat_boxplot(geom = 'errorbar') +
geom_boxplot(notch = F, outlier.shape = NA) +
geom_segment(aes(x=1.1, xend=1.9, y=0.7, yend=0.7)) +
geom_segment(aes(x=2.1, xend=2.9, y=0.7, yend=0.7)) +
geom_segment(aes(x=1.1, xend=2.9, y=1.2, yend=1.2)) +
geom_point(size = 2, position = position_jitter(width = 0.2)) +
stat_summary(fun.y = mean, colour = "white", geom = "point", size = 4) +
annotate("text",
x = c(1.5, 2.5, 2),
y = c(0.8, 0.8, 1.3),
label = c("p < 0.001", "p < 0.001", "p = 0.89"),
family = "", fontface = 3, size=4) +
scale_fill_manual(values=c("lightgreen", "darkgreen", "goldenrod1"),
guide = "none") +
ylim(-1.5, 1.5) +
labs(x="", y="Transcript expression\n(log2-centered TMM-nornalised TPMs)") +
theme_bw()

Resources