How to add a legend manually for line chart - r

i need the plan legend
How to add a legend manually for geom_line
ggplot(data = impact_end_Current_yr_m_actual, aes(x = month, y = gender_value)) +
geom_col(aes(fill = gender))+theme_classic()+
geom_line(data = impact_end_Current_yr_m_plan, aes(x=month, y= gender_value, group=1),color="#288D55",size=1.2)+
geom_point(data = impact_end_Current_yr_m_plan, aes(x=month, y=gender_value))+
theme(axis.line.y = element_blank(),axis.ticks = element_blank(),legend.position = "bottom", axis.text.x = element_text(face = "bold", color = "black", size = 10, angle = 0, hjust = 1))+
labs(x="", y="End Beneficiaries (in Num)", fill="")+
scale_fill_manual(values=c("#284a8d", "#00B5CE","#0590eb","#2746c2"))+
scale_y_continuous(labels = function(x) format(x, scientific = FALSE)

The neatest way to do it I think is to add colour = "[label]" into the aes() section of geom_line() then put the manual assigning of a colour into scale_colour_manual() here's an example from mtcars (apologies that it uses stat_summary instead of geom_line but does the same trick):
library(tidyverse)
mtcars %>%
ggplot(aes(gear, mpg, fill = factor(cyl))) +
stat_summary(geom = "bar", fun = mean, position = "dodge") +
stat_summary(geom = "line",
fun = mean,
size = 3,
aes(colour = "Overall mean", group = 1)) +
scale_fill_discrete("") +
scale_colour_manual("", values = "black")
Created on 2020-12-08 by the reprex package (v0.3.0)
The limitation here is that the colour and fill legends are necessarily separate. Removing labels (blank titles in both scale_ calls) doesn't them split them up by legend title.
In your code you would probably want then:
...
ggplot(data = impact_end_Current_yr_m_actual, aes(x = month, y = gender_value)) +
geom_col(aes(fill = gender))+
geom_line(data = impact_end_Current_yr_m_plan,
aes(x=month, y= gender_value, group=1, color="Plan"),
size=1.2)+
scale_color_manual(values = "#288D55") +
...
(but I cant test on your data so not sure if it works)

Related

Include 2nd variable labels on an existing Variable vs sample plot geom_jitter

I have a geom_jitter plot showing Variables between 2 samples, I would like to include the Group-variable parameters on the left of the plot, setting a separation by lines like in the figure below. Thus, Variables are organised by Group.
Here is a reproducible example:
data<- tibble::tibble(
Variable = c("A","B","C","D","E", "F"),
Group = c("Asia","Asia","Europe","Europe","Africa","America"),
sample1 = c(0.38,0.22,0.18,0.12,0.1,0),
sample2 = c(0.23,0.2,0,0.12,0.11,0.15))
library(reshape2)
data2<- melt(data,
id.vars=c("Variable", "Group"),
measure.vars=c("sample1", "sample2"),
variable.name="Sample",
value.name="value")
data22[is.na(data22)] <- 0
library(ggplot2)
ggplot(data2, aes(x = Sample, y = Variable, label=NA)) +
geom_point(aes(size = value, colour = value)) +
geom_text(hjust = 1, size = 2) +
# scale_size(range = c(1,3)) +
theme_bw()+
scale_color_gradient(low = "lightblue", high = "darkblue")
Here is the current output I have:
And this is the format I would like:
To get a polished version of the plot most similar to your ideal plot, you can use facet_grid() plus some theme() customization.
ggplot(data2, aes(x = Sample, y = Variable, label=NA)) +
geom_point(aes(size = value, colour = value)) +
geom_text(hjust = 1, size = 2) +
# scale_size(range = c(1,3)) +
theme_bw()+
scale_color_gradient(low = "lightblue", high = "darkblue") +
facet_grid(Group~., scales = "free", switch = "y") +
theme(strip.placement = "outside",
strip.text.y = element_text(angle = 180),
panel.spacing = unit(0, "cm"))

Combine/Overlay boxplot with histogram in R

I need to combine the boxplot with the histogram using ggplot2. So far I have this code.
library(dplyr)
library(ggplot2)
data(mtcars)
dat <- mtcars %>% dplyr::select(carb, wt) %>%
dplyr::group_by(carb) %>% dplyr::mutate(mean_wt = mean(wt), carb_count = n())
plot<-ggplot(data=mtcars, aes(x=carb, y=..count..)) +
geom_histogram(alpha=0.3, position="identity", lwd=0.2,binwidth=1)+
theme_bw()+
theme(panel.border = element_rect(colour = "black", fill=NA, size=0.7))+
geom_text(data=aggregate(mean_wt~carb+carb_count,dat,mean), aes(carb, carb_count+0.5, label=round(mean_wt,1)), color="black")
plot + geom_boxplot(data = mtcars,mapping = aes(x = carb, y = 6*wt,group=carb),
color="black", fill="red", alpha=0.2,width=0.1,outlier.shape = NA)+
scale_y_continuous(name = "Count",
sec.axis = sec_axis(~./6, name = "Weight"))
This results in
However, I dont want the secondary y axis to be the same length of primary y axis. I want the secondary y axis to be smaller and on the top right corner only. Lets say secondary y axis should scale between 20-30 of primary y axis and the box plot should also scale with the axis.
Can anyone help me with this?
Here's one approach, where I adjusted the secondary axis formula and tweaked the way it's labeled. (EDIT: adjusted to make boxplots bigger, per OP comment.)
plot + geom_boxplot(data = mtcars,
# Adj'd scaling so each 1 wt = 2.5 count
aes(x = carb, y = (wt*2.5)+10,group=carb),
color="black", fill="red", alpha=0.2,
width=0.5, outlier.shape = NA)+ # Wider width
scale_y_continuous(name = "Count", # Adj'd labels to limit left to 0, 5, 10
breaks = 5*0:5, labels = c(5*0:2, rep("", 3)),
# Adj'd scaling to match the wt scaling
sec.axis = sec_axis(~(.-10)/2.5, name = "Weight",
breaks = c(0:5))) +
theme(axis.title.y.left = element_text(hjust = 0.15, vjust = 1),
axis.title.y.right = element_text(hjust = 0.15, vjust = 1))
You might also consider an alternative using the patchwork package, coincidentally written by the same developer who implemented secondary scales in ggplot2...
# Alternative solution using patchwork
library(patchwork)
plot2 <- ggplot(data=mtcars, aes(x=carb, y=..count..)) +
theme_bw()+
theme(panel.border = element_rect(colour = "black", fill=NA, size=0.7))+
geom_boxplot(data = mtcars,
aes(x = carb, y = wt, group=carb),
color="black", fill="red", alpha=0.2,width=0.1,outlier.shape = NA) +
scale_y_continuous(name = "Weight") +
scale_x_continuous(labels = NULL, name = NULL,
expand = c(0, 0.85), breaks = c(2,4,6,8))
plot2 + plot + plot_layout(nrow = 2, heights = c(1,3)) +
labs(x=NULL)

Combining legends for two different aesthetics fails

library(ggplot2)
x <- data.frame(Specimen=c("A","B","C","D"), Value=rep(0.5,4),
Type=c("c1","c1","c2","c2"), Treatment=factor(rep("A", 4)),
bar=c("hot", "cold", "cold", "cold"))
list2env(split(x, x$Type), envir = .GlobalEnv)
p1 <- ggplot() +
geom_bar(data=c1, aes(x = Treatment, y = Value, fill = Specimen, colour=bar),
stat="identity", position="fill", width=0.5) +
scale_fill_manual("",values=c("gold", "green"))+
scale_color_manual("",values=c("gray40","black")) +
scale_y_continuous(expand = c(0, 0),labels = scales::percent) +
theme(legend.position = "bottom") +
coord_flip()
p2 <- ggplot() +
geom_bar(data=c2, aes(x = Treatment, y = Value, fill = Specimen),
stat="identity", position="fill", col="gray40", width=0.5) +
scale_fill_manual("",values=c("red", "blue"))+
scale_y_continuous(expand = c(0, 0),labels = scales::percent) +
theme(legend.position = "bottom",
axis.text.y=element_blank()) +
xlab("")+
coord_flip()
library(cowplot)
plot_grid(p1,p2, nrow=1, align="v")
In this example, i had to shut down the guide for color, as i couldnt combine it with the guide for fill, despite following the guidelines proposed in this question.
After turning off the guide for col in p1 (guide=F), the legends now appear to be differently drawn (one with col="gray40", the other without any border, as the col-guide is set to false):
]1
How to combine the two legends in p1?
fill and color are mapped to two different varaibles, it's only by chance that in this (trivial) case "A" is always "hot" and "B" is always "cold".
You can map both fill and color to Specimen or bar, but different variable will always result in different legends.
An alternative may be to create an interaction between the two varaibles:
library(ggplot2)
ggplot() +
geom_col(data=c1, aes(x = Treatment,
y = Value,
fill = interaction(Specimen, bar, sep = '-'),
color = interaction(Specimen, bar, sep = '-')),
position="fill", width=0.5) +
scale_fill_manual("",values=c("gold", "green")) +
scale_color_manual("",values=c("gray40", "black")) +
scale_y_continuous(expand = c(0, 0),labels = scales::percent) +
theme(legend.position = "bottom") +
coord_flip()
Created on 2018-05-08 by the reprex package (v0.2.0).

Duplicate items in ggplot2 legend

I have produced a plot with stacked columns and two lines in ggplot2. However, the legend items of the lines also show in the legend of the columns. Any one knows how to remove them from the column legend?
Code below:
##Remove Objects
rm(list=ls(all=TRUE))
##Load packages
library(ggplot2)
library(dplyr)
library(reshape)
##Data
set.seed(12345)
d.fig6.1 <- data.frame(mm=c("Jan","Feb","Mar","Apr","May","Jun",
"Jul","Aug","Sep","Oct","Nov","Dec"),a.1=(rnorm(12)*5)^2)
d.fig6.1$a.2 <- (rnorm(12)*5)^2
d.fig6.1$a.3 <- (rnorm(12)*5)^2
d.fig6.1$a.4 <- (rnorm(12)*5)^2
d.fig6.1$a.5 <- (rnorm(12)*5)^2
d.fig6.1$a.6 <- (rnorm(12)*5)^2
d.fig6.1$a.7 <- (rnorm(12)*5)^2
d.fig6.2 <- data.frame(mm=c("Jan","Feb","Mar","Apr","May","Jun",
"Jul","Aug","Sep","Oct","Nov","Dec"),a.8=(rnorm(12)*5)^2)
d.fig6.2$a.9 <- (rnorm(12)*5)^2
d.fig6.1 <- melt(d.fig6.1,id="mm")
d.fig6.2 <- melt(d.fig6.2,id="mm")
d.fig6.1
d.fig6.2
##Plot
theme_set(theme_bw(7)) #25
cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2",
"#D55E00", "#CC79A7","red")
sp.6 <- ggplot(d.fig6.1, aes(x=mm, y=value, fill=variable)) + geom_col()
+ labs(x="") + labs(y="[Units]")
+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
+ scale_fill_manual(values=cbPalette,name="")
+ geom_text(data=d.fig6.1, aes(label = round(value,digits=2)), position = position_stack(vjust=0.5), size=2)
+ theme(legend.title = element_blank())
+ geom_line(data=d.fig6.2, aes(x=as.numeric(mm), y=value, color=variable),size=1,inherit.aes = FALSE)
+ geom_text(data=d.fig6.2, aes(label=round(value,digits=2)),hjust=0, vjust=0, size=2.5)
sp.6
Move fill = variable out of the top level ggplot(aes(...)) mapping. Keep only the aesthetic mappings common to all geoms there. This way you don't really need inherit.aes = FALSE, either:
ggplot(d.fig6.1,
aes(x = mm, y = value, label = round(value, digits = 2))) +
geom_col(aes(fill = variable)) +
geom_text(position = position_stack(vjust = 0.5), size = 2) +
geom_line(data = d.fig6.2,
aes(color = variable, group = variable),
size = 1) +
geom_text(data = d.fig6.2,
hjust = 0, vjust = 0, size = 2.5) +
labs(x = "", y = "[Units]") +
scale_fill_manual(values = cbPalette, name="") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.title = element_blank())
Explanation: By including fill = variable in the top level aesthetic mapping, every geom that doesn't have inherit.aes = FALSE set explicitly will inherit this as part of the aesthetic mapping for its level.
In this case, geom_line & the second geom_text both use a different data source (d.fig6.2 instead of d.fig6.1), but geom_text still inherits fill = variable from the top level mapping. So the variable values from d.fig6.2 ("a.8" & "a.9") are added to the fill palette, even though they aren't used anywhere.
I stripped down your code for a more minimal example that reproduces the problem below:
# this will result in a.8 & a.9 in the column legend
# because geom_text inherits aes(fill = variable) from ggplot()
ggplot(d.fig6.1,
aes(x = mm, y = value, fill = variable,
label = round(value, digits = 2))) +
geom_col() +
geom_text(data = d.fig6.2)
# this will not
# because aes(fill = variable) is moved from ggplot() to geom_col()
ggplot(d.fig6.1,
aes(x = mm, y = value,
label = round(value, digits = 2))) +
geom_col(aes(fill = variable)) +
geom_text(data = d.fig6.2)
# this will not, either
# because geom_text() includes all the aes mappings it requires, & inherit.aes = FALSE
ggplot(d.fig6.1,
aes(x = mm, y = value, fill = variable,
label = round(value, digits = 2))) +
geom_col() +
geom_text(data = d.fig6.2,
aes(x = mm, y = value,
label = round(value, digits = 2)),
inherit.aes = FALSE)
Try adding + guides(color = FALSE) at the end.

adding summary statistics to two factor boxplot

I would like to add summary statistics (e.g. mean) to the boxplot which have two factors. I have tried this:
library(ggplot2)
ggplot(ToothGrowth, aes(x = factor(dose), y = len)) +
stat_boxplot(geom = "errorbar", aes(col = supp, fill=supp), position = position_dodge(width = 0.85)) +
geom_boxplot(aes(col = supp, fill=supp), notch=T, notchwidth = 0.5, outlier.size=2, position = position_dodge(width = 0.85)) +
stat_summary(fun.y=mean, aes(supp,dose), geom="point", shape=20, size=7, color="violet", fill="violet") +
scale_color_manual(name = "SUPP", values = c("blue", "darkgreen")) +
scale_fill_manual(name = "SUPP", values = c("lightblue", "green"))
I got this picture:
It is possible somehow put the sample size of each box (e.g. top of the whiskers)? I have tried this:
ggplot(ToothGrowth, aes(x = factor(dose), y = len)) +
stat_boxplot(geom = "errorbar", aes(col = supp, fill=supp), position = position_dodge(width = 0.85)) +
geom_boxplot(aes(col = supp, fill=supp), notch=T, notchwidth = 0.5, outlier.size=2, position = position_dodge(width = 0.85)) +
stat_summary(fun.y=mean,aes(supp,dose),geom="point", shape=20, size=7, color="violet", fill="violet") +
scale_color_manual(name = "SUPP", values = c("blue", "darkgreen")) +
scale_fill_manual(name = "SUPP", values = c("lightblue", "green")) +
geom_text(data = ToothGrowth,
group_by(dose, supp),
summarize(Count = n(),
q3 = quantile(ToothGrowth, 0.75),
iqr = IQR(ToothGrowth),
aes(x= dose, y = len,label = paste0("n = ",Count, "\n")), position = position_dodge(width = 0.75)))
You can state the aesthetics just once by putting them in the main ggplot call and then they will apply to all of the geom layers: ggplot(ToothGrowth, aes(x = factor(dose), y = len, color=supp, fill=supp))
For the count of observations: The data summary step in geom_text isn't coded properly. Also, to set len (the y-value) for the text placement, the summarize function needs to output values for len.
To add the mean values in the correct locations on the x-axis, use stat_summary with the exact same aesthetics as the other geoms and stats. I've overridden the color aesthetic by setting the color to yellow so that the point markers will be visible on top of the box plot fill colors.
The code to implement the plot is below:
library(tidyverse)
pd = position_dodge(0.85)
ggplot(ToothGrowth, aes(x = factor(dose), y = len, color=supp, fill=supp)) +
stat_boxplot(geom = "errorbar", position = pd) +
geom_boxplot(notch=TRUE, notchwidth=0.5, outlier.size=2, position=pd) +
stat_summary(fun.y=mean, geom="point", shape=3, size=2, colour="yellow", stroke=1.5,
position=pd, show.legend=FALSE) +
scale_color_manual(name = "SUPP", values = c("blue", "darkgreen")) +
scale_fill_manual(name = "SUPP", values = c("lightblue", "green")) +
geom_text(data = ToothGrowth %>% group_by(dose, supp) %>%
summarize(Count = n(),
len=max(len) + 0.05 * diff(range(ToothGrowth$len))),
aes(label = paste0("n = ", Count)),
position = pd, size=3, show.legend = FALSE) +
theme_bw()
Note that the notch goes outside the hinges for all of the box plots. Also, having the sample size just above the maximum of each boxplot seems distracting and unnecessary to me. You could place all of the text annotations at the bottom of the plot like this:
geom_text(data = ToothGrowth %>% group_by(dose, supp) %>%
summarize(Count = n()) %>%
ungroup %>%
mutate(len=min(ToothGrowth$len) - 0.05 * diff(range(ToothGrowth$len))),
aes(label = paste0("n = ", Count)),
position = pd, size=3, show.legend = FALSE) +

Resources