I have data similar to the one I've created below:
set.seed(42)
dates <- seq.Date(as.Date("2012-08-01"), as.Date("2014-08-30"), "day")
n <- length(dates)
dat <- data.frame(date = dates,
category = rep(LETTERS[1:4], n/2),
daily_count = sample(18:100, n, replace=TRUE))
#following to be used for creating dotted lines; highlighting a certain point for each category
point_dates <- sample(seq.Date(as.Date("2012-08-01"), as.Date("2014-08-30"), "month"),4)
category_name <- list("A", "B", "C", "D")
I am creating a boxplot for each category using facet_wrap, and point_dates are important for me as they shows the point of interest in each boxplot. This is how I am creating the plot:
ggplot(dat) +
geom_boxplot(aes(y = daily_count,
x = yearmonth(date),
group = paste(yearmonth(date), category),
fill = category)) +
labs(x = 'Month & Year',
y = 'Count',
fill = "Category") +
theme_bw() +
theme(axis.text=element_text(size=10),
axis.title=element_text(size=10),
legend.position="none") +
geom_vline(xintercept = lubridate::ymd("2013-08-23"), linetype=1, colour="red", size = 0.5)+
sapply(point_dates[[1]], function(xint) geom_vline(data=filter(dat,
category==category_name[[1]]),aes(xintercept = xint),
linetype=3, colour="black", size = 1))+
sapply(point_dates[[2]], function(xint) geom_vline(data=filter(dat,
category==category_name[[2]]),aes(xintercept = xint),
linetype=3, colour="black", size = 1))+
sapply(point_dates[[3]], function(xint) geom_vline(data=filter(dat,
category==category_name[[3]]),aes(xintercept = xint),
linetype=3, colour="black", size = 1))+
sapply(point_dates[[4]], function(xint) geom_vline(data=filter(dat,
category==category_name[[4]]),aes(xintercept = xint),
linetype=3, colour="black", size = 1))+
facet_wrap(~category, nrow = 2)
And this is the output of the code:
The plot is being created just fine. My question is, is there any better way (loop may be?) that would help me get rid of writing sapply multiple times. Because the number of categories may change (increase/decrease), that would be to change the code everytime.
Any guidance please?
I'm not sure that this is the best way, but you could do all of them in one go using map2 from tidyr. This would save you time from having to write out individual sapply.
library(tidyverse)
ggplot(dat) +
geom_boxplot(aes(y = daily_count,
x = yearmonth(date),
group = paste(yearmonth(date), category),
fill = category)) +
labs(x = 'Month & Year',
y = 'Count',
fill = "Category") +
theme_bw() +
theme(axis.text=element_text(size=10),
axis.title=element_text(size=10),
legend.position="none") +
geom_vline(xintercept = lubridate::ymd("2013-08-23"),
linetype=1, colour="red", size = 0.5)+
map2(point_dates, category_name,
~geom_vline(data=filter(dat, category==.y),
aes(xintercept = .x),
linetype=3, colour="black", size = 1))+
facet_wrap(~category, nrow = 2)
You can use map() to iterate the calls to sapply():
ggplot(dat) +
geom_boxplot(aes(y = daily_count,
x = yearmonth(date),
group = paste(yearmonth(date), category),
fill = category)) +
labs(x = 'Month & Year',
y = 'Count',
fill = "Category") +
theme_bw() +
theme(axis.text=element_text(size=10),
axis.title=element_text(size=10),
legend.position="none") +
geom_vline(xintercept = lubridate::ymd("2013-08-23"), linetype=1, colour="red", size = 0.5)+
map(seq_along(unique(dat$category)), ~sapply(point_dates[[.]], function(xint) geom_vline(data=filter(dat,
category==category_name[[.]]),aes(xintercept = xint),
linetype=3, colour="black", size = 1))) +
facet_wrap(~category, nrow = 2)
If i got it correct, you have already defined the dates for each group. So make the first plot:
library(ggplot2)
library(tsibble)
g = ggplot(dat) +
geom_boxplot(aes(y = daily_count,
x = yearmonth(date),
group = paste(yearmonth(date), category),
fill = category)) +
labs(x = 'Month & Year',
y = 'Count',
fill = "Category") +
theme_bw() +
theme(axis.text=element_text(size=10),
axis.title=element_text(size=10),
legend.position="none") +
geom_vline(xintercept = lubridate::ymd("2013-08-23"), linetype=1, colour="red", size = 0.5)+
facet_wrap(~category, nrow = 2)
You just need to provide a new data frame and call geom_vline:
tmp = data.frame(category=unlist(category_name),date=point_dates)
g + geom_vline(data=tmp,aes(xintercept = date),
linetype=3, colour="black", size = 1)
Related
I'm trying to create a plot like this image below where the individual data lines are in between the box plots. Image to create in R ggplot2
The closest I am getting is something like this:
Image using ggplot2 but it looks a bit cluttered with the lines/points behind.
data1 %>%
ggplot(aes(Time,Trait)) +
geom_line(aes(group=ID), position = "identity")+
geom_point(aes(group=ID), shape=21, colour="black", size=2, position = "identity")+
geom_boxplot(width=.5,position = position_dodge(width=0.9), fill="white") +
stat_summary(fun.data= mean_cl_boot, geom = "errorbar", width = 0.1, position = position_dodge(width = .9)) +
stat_summary(fun = mean, geom = "point", shape = 18, size=3, position = "identity")+
facet_wrap(~Cond) +
theme_classic()
Any tips would be greatly appreciated!
One option to achieve your desired result would be to make use of continuous x scale. Doing so makes it possible to shift the box plots to the left or to right and vice versa for the points and lines:
Making use of some random data to mimic your real data set.
data1$Time1 <- as.numeric(factor(data1$Time, levels = c("Pre", "Post")))
data1$Time_box <- data1$Time1 + .1 * ifelse(data1$Time == "Pre", -1, 1)
data1$Time_lp <- data1$Time1 + .1 * ifelse(data1$Time == "Pre", 1, -1)
library(ggplot2)
ggplot(data1, aes(x = Time_box, y = Trait)) +
geom_line(aes(x = Time_lp, group=ID), position = "identity")+
geom_point(aes(x = Time_lp, group=ID), shape=21, colour="black", size=2, position = "identity")+
geom_boxplot(aes(x = Time_box, group=Time1), width=.25, fill="white") +
stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.1) +
stat_summary(fun = mean, geom = "point", shape = 18, size=3, position = "identity") +
scale_x_continuous(breaks = c(1, 2), labels = c("Pre", "Post")) +
facet_wrap(~Cond) +
theme_classic()
DATA
set.seed(42)
data1 <- data.frame(
ID = rep(1:10, 4),
Time = rep(c("Pre", "Post"), each = 10),
Trait = runif(40),
Cond = rep(c("MBSR", "SME"), each = 20)
)
EDIT If you want to two boxplots side by side it's basically the same. However in that case you have to map the interaction of Time1 and the variable mapped on fill on the group aesthetic in geom_boxplot (and probably the error bars as well):
library(ggplot2)
set.seed(42)
data1 <- data.frame(
ID = rep(1:10, 4),
Time = rep(c("Pre", "Post"), each = 10),
Fill = rep(c("Fill1", "Fill2"), each = 5),
Trait = runif(40),
Cond = rep(c("MBSR", "SME"), each = 20)
)
ggplot(data1, aes(x = Time_box, y = Trait)) +
geom_line(aes(x = Time_lp, group=ID, color = Fill), position = "identity")+
geom_point(aes(x = Time_lp, group=ID, fill = Fill), shape=21, colour="black", size=2, position = "identity")+
geom_boxplot(aes(x = Time_box, group=interaction(Time1, Fill) , fill = Fill), width=.25) +
stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.1) +
stat_summary(fun = mean, geom = "point", shape = 18, size=3, position = "identity") +
scale_x_continuous(breaks = c(1, 2), labels = c("Pre", "Post")) +
facet_wrap(~Cond) +
theme_classic()
I am trying to add legend on ggplot2.
When I use the following codes, I get the following plot.
ggplot() +
geom_bar(data = profitCountries, aes(y = (revenue), x = residence), stat="identity",
fill="darkgreen", color = "black") +
geom_bar(data = profitCountries, aes(y = -(total_spend), x = residence), stat="identity",
fill="red", color = "black") +
geom_line(data = profitCountries, aes(y = total_profit, x = residence, group = 1), size = 1.5,
color = "blue" ) +
scale_y_continuous(breaks = seq(-500000,500000,100000), limits=c(-500000, 500000) ) +
xlab('Countries') + ggtitle('Campaign spending and revenue by countries') +
ylab('Campaign spending Revenue') + theme_grey()
As suggested in other posts, I added color inside aes(). When I try to do that using the following code, I get the following plot.
ggplot() +
geom_bar(data = profitCountries, aes(y = (revenue), x = residence, fill="darkgreen"), stat="identity",
color = "black") +
geom_bar(data = profitCountries, aes(y = -(total_spend), x = residence, fill="red"), stat="identity",
color = "black") +
geom_line(data = profitCountries, aes(y = total_profit, x = residence, group = 1, color = "blue"),
size = 2 ) +
scale_y_continuous(breaks = seq(-500000,500000,100000), limits=c(-500000, 500000) ) +
xlab('Countries') + ggtitle('Campaign spending and revenue by countries') +
ylab('Campaign spending Revenue') + theme_grey()
In the second plot, the colors change and two legends are created. Is there anyway to solve this?
You are currently mapping the character vector green to a manual scale whose colors are automatically determined.
You probably want
ggplot(profitCountries, aes(residence)) +
geom_bar(aes(y = (revenue), fill="Revenue"), stat="identity",
color = "black") +
geom_bar(aes(y = -(total_spend), fill="Campaign Spending"), stat="identity",
color = "black") +
geom_line(aes(y = total_profit, group = 1, color = "Net"), size = 2) +
scale_y_continuous(breaks = seq(-500000,500000,100000), limits=c(-500000, 500000) ) +
xlab('Countries') +
ggtitle('Campaign spending and revenue by countries') +
ylab('Campaign spending Revenue') +
theme_grey() +
scale_fill_manual(values = c("Revenue" = "darkgreen", "Campaign Spending" = "red")) +
scale_color_manual(values = c("Net" = "blue"))
Still getting to grips with ggplot. My question: How do I manually change the line size? I've tried with scale_size_manual but it didn't seem to work.
setup:
test.mat <- data.frame(matrix(nrow=32, ncol =3))
test.mat[,1] = rep(1:16,2)
test.mat[1:16,2] = as.character(rep("Cohort Alpha"),16)
test.mat[17:32,2] = as.character(rep("Factor Alpha"), 16)
test.mat[,3] = rnorm(32,0,1)
colnames(test.mat) = c("Window", "type", "value")
ggplot(test.mat, aes(x=Window, y=value)) +
geom_line(aes(colour = type, linetype = type)) +
theme_classic() +
scale_colour_manual("type", values = c("black", "steelblue")) +
scale_linetype_manual("type", values = c("solid", "solid")) +
scale_size_manual("type", values = c(5, 1.4), guide = "none")
specify size inside aes() function as follows:
ggplot(test.mat, aes(x=Window, y=value)) +
geom_line(aes(colour = type, linetype = type, size = type)) +
theme_classic() +
scale_colour_manual("type", values = c("black", "steelblue")) +
scale_linetype_manual("type", values = c("solid", "solid")) +
scale_size_manual("type", values = c(5, 1.4), guide = "none")
Just turning #NelsonGon comment into an answer.
Is this what you want?
test.mat <- data.frame(matrix(nrow=32, ncol =3))
test.mat[,1] = rep(1:16,2)
test.mat[1:16,2] = as.character(rep("Cohort Alpha"),16)
test.mat[17:32,2] = as.character(rep("Factor Alpha"), 16)
test.mat[,3] = rnorm(32,0,1)
colnames(test.mat) = c("Window", "type", "value")
# -------------------------------------------------------------------------
base <- ggplot(test.mat, aes(x=Window, y=value))
#Here is where you need to add size
line_size <- base + geom_line(aes(colour = type, linetype = type), size=3)
line_size + theme_classic() +
scale_colour_manual("type", values = c("black", "steelblue")) +
scale_linetype_manual("type", values = c("solid", "solid")) +
scale_size_manual("type", values = c(5, 1.4), guide = "none")
output
Update
If you want variable thickness for the individual lines, you can do as follows.
base <- ggplot(test.mat, aes(x=Window, y=value))
#Use an ifelse to add variable thickness
line_size <- base + geom_line(aes(colour = type, size=ifelse(type=="Cohort Alpha",1,2)))
line_size + guides(size = FALSE)
To follow up on my comment on deepseefan's answer
base +
geom_line(aes(colour = type,
size=factor(ifelse(type=="Cohort Alpha", "thick", "thin"),
levels=c("thick","thin")))) +
scale_colour_manual(values = c("black", "steelblue")) +
scale_size_manual(values = c(5, 1.4), guide = FALSE)
I have created a plot with the following dataset:
Locus;Island;AR;Type;Shapetype
MS1;ST;4,6315;MS;NA
MS1;FG;3,9689;MS;NA
MS1;SN;3;MS;NA
MS2;ST;2;MS;NA
MS2;FG;2;MS;NA
MS2;SN;2;MS;NA
MS3;ST;7,5199;MS;NA
MS3;FG;5,5868;MS;NA
MS3;SN;3;MS;NA
MS4;ST;2,9947;MS;NA
MS4;FG;3;MS;NA
MS4;SN;2;MS;NA
MS5;ST;9,0726;MS;NA
MS5;FG;5,6759;MS;NA
MS5;SN;2,963;MS;NA
MS6;ST;6,5779;MS;NA
MS6;FG;5,6842;MS;NA
MS6;SN;2;MS;NA
MS7;ST;2;MS;NA
MS7;FG;1;MS;NA
MS7;SN;1;MS;NA
MS8;ST;3,97;MS;NA
MS8;FG;2,9032;MS;NA
MS8;SN;1;MS;NA
MS9;ST;2;MS;NA
MS9;FG;1,9977;MS;NA
MS9;SN;2;MS;NA
MS10;ST;3,9733;MS;NA
MS10;FG;3,9971;MS;NA
MS10;SN;2;MS;NA
MS11;ST;7,4172;MS;NA
MS11;FG;5,6471;MS;NA
MS11;SN;3;MS;NA
MS12;ST;2;MS;NA
MS12;FG;2;MS;NA
MS12;SN;2;MS;NA
MS13;ST;5,6135;MS;NA
MS13;FG;3;MS;NA
MS13;SN;2;MS;NA
MT;ST;12;MT;NA
MT;FG;3;MT;NA
MT;SN;2;MT;NA
TLR1LA;ST;3,68;TLR;TLR1LA
TLR1LA;FG;4,4;TLR;TLR1LA
TLR1LA;SN;1;TLR;TLR1LA
TLR1LB;ST;3,99;TLR;TLR1LB
TLR1LB;FG;5;TLR;TLR1LB
TLR1LB;SN;1;TLR;TLR1LB
TLR2A;ST;4,9;TLR;TLR2A
TLR2A;FG;5;TLR;TLR2A
TLR2A;SN;2;TLR;TLR2A
TLR2B;ST;5,64;TLR;TLR2B
TLR2B;FG;4;TLR;TLR2B
TLR2B;SN;3;TLR;TLR2B
TLR3;ST;1;TLR;TLR3
TLR3;FG;3;TLR;TLR3
TLR3;SN;3;TLR;TLR3
TLR4;ST;1;TLR;TLR4
TLR4;FG;2,89;TLR;TLR4
TLR4;SN;2;TLR;TLR4
TLR5;ST;2,9;TLR;TLR5
TLR5;FG;2;TLR;TLR5
TLR5;SN;2;TLR;TLR5
TLR21;ST;2,91;TLR;TLR21
TLR21;FG;1;TLR;TLR21
TLR21;SN;1;TLR;TLR21
Here's the code for the plot:
ggplot(comb, aes(Island, AR, group = Locus, colour = (factor(Type)))) +
geom_line(aes(colour = factor(Type), alpha = factor(Type), size = factor(Type))) +
scale_alpha_manual(values = c("MS"=0.2, "MT"=0.2, "TLR" = 1)) +
scale_size_manual(values = c("MS"=0.5, "MT"=0.5, "TLR" = 0.3)) +
xlab("Island") +
ylab("Allelic Richness") +
scale_x_discrete(labels = c("Santiago", "Fogo", "Sao Nicolau"),
limits = c("ST", "FG", "SN")) +
geom_point(aes(shape = (factor(Shapetype)))) +
scale_shape_manual(values = c(1,2,3,4,5,6,7,8,9,10),
breaks=c("TLR1LA","TLR1LB","TLR2A","TLR2B","TLR3",
"TLR4", "TLR5","TLR21", "MS", "MT")) +
scale_colour_manual(values = c("Red","Blue","Black"),
breaks=c("TLR1LA","TLR1LB","TLR2A","TLR2B","TLR3",
"TLR4","TLR5","TLR21", "MS", "MT")) +
theme_bw() +
labs(shape="Functional", colour="Neutral")
The plot is okay, however, I need to remove the legend that is created for the alpha values. I have tried to use both + scale_alpha(guide = 'none')and guide = 'none', but none of them seem to work (I may be placing them in the wrong places, though). I suspect that they do not work, because of the manual adjustment of the alpha values.
Please be aware that this is not a minimal example.
Please note that your alpha legend is also your size legend, but this is very hard to see since your sizes are very similar. Set guide = 'none' in both scale_alpha_manual and scale_size_manual to remove that portion of the legend.
If you only do it in scale_alpha_manual you can actually see that the alpha becomes 1 for those lines, so it works as intended. So #Thierry's answer is correct.
Full code
ggplot(comb, aes(Island, AR, group = Locus, colour = (factor(Type)))) +
geom_line(aes(colour = factor(Type), alpha = factor(Type), size = factor(Type))) +
scale_alpha_manual(values = c("MS"=0.2, "MT"=0.2, "TLR" = 1), guide = 'none') +
scale_size_manual(values = c("MS"=0.5, "MT"=0.5, "TLR" = 0.3), guide = 'none') +
xlab("Island") +
ylab("Allelic Richness") +
scale_x_discrete(labels = c("Santiago", "Fogo", "Sao Nicolau"),
limits = c("ST", "FG", "SN")) +
geom_point(aes(shape = (factor(Shapetype)))) +
scale_shape_manual(values = c(1,2,3,4,5,6,7,8,9,10),
breaks=c("TLR1LA","TLR1LB","TLR2A","TLR2B","TLR3",
"TLR4", "TLR5","TLR21", "MS", "MT")) +
scale_colour_manual(values = c("Red","Blue","Black"),
breaks=c("TLR1LA","TLR1LB","TLR2A","TLR2B","TLR3",
"TLR4","TLR5","TLR21", "MS", "MT")) +
theme_bw() +
labs(shape="Functional", colour="Neutral")
Result
(Note that my y-axis is wrong because your data includes comma's and I was lazy.)
guide = "none" should do the trick
ggplot(
comb,
aes(Island, AR, group = Locus, colour = (factor(Type)))
) +
geom_line(aes(alpha = factor(Type), size = factor(Type))) +
geom_point(aes(shape = factor(Shapetype))) +
scale_x_discrete(
"Island",
labels = c("Santiago", "Fogo", "Sao Nicolau"),
limits = c("ST", "FG", "SN")
) +
ylab("Allelic Richness") +
scale_alpha_manual(values = c("MS"=0.2, "MT"=0.2, "TLR" = 1), guide = "none") +
scale_size_manual(values = c("MS"=0.5, "MT"=0.5, "TLR" = 0.3)) +
scale_shape_manual(
"Functional",
values = c(1,2,3,4,5,6,7,8,9,10),
breaks = c("TLR1LA","TLR1LB","TLR2A","TLR2B","TLR3","TLR4","TLR5","TLR21", "MS", "MT")
) +
scale_colour_manual(
"Neutral",
values = c("Red","Blue","Black"),
breaks = c("TLR1LA","TLR1LB","TLR2A","TLR2B","TLR3","TLR4","TLR5","TLR21", "MS", "MT")
) +
theme_bw()
Starting with this basic plot
bp <- df %>%
ggplot(aes(column_of_interest, alpha = 0.25)) + geom_density()
from r cookbook, where bp is your ggplot -
Remove legend for a particular aesthetic (alpha):
bp + guides(alpha="none")
I would like to have the name of each bar under each bar (in my case the names are "Round" and they happen to be 1, 2, ... 12)
Here is my current code:
ggplot(data=draft1, aes(x = Round, y = mean.age)) +
geom_bar(stat = "identity", fill = "steelblue", color = "black", width = 0.7) +
ylab("Average age of retirement") + ylim(c(0,40)) +
ggtitle("Average age of retirement by rounds of all players") +
geom_text(aes(label = mean.age), position=position_dodge(width=0.9), vjust = -0.5)
Here is the current output:
set your Round to be a factor
ggplot(data=draft1, aes(x = factor(Round), y = mean.age)) +
Or use scale_x_continuous()
ggplot(data=draft1, aes(x = Round, y = mean.age)) +
... +
scale_x_continuous(breaks=(seq(1:12)))
Just add a discrete scale to x:
library(ggplot2)
draft1 = data.frame(Round = seq(1, 12), mean.age = sample(29:32))
ggplot(data=draft1, aes(x = Round, y = mean.age)) +
geom_bar(stat = "identity", fill = "steelblue", color = "black", width = 0.7) +
ylab("Average age of retirement") + ylim(c(0,40)) +
ggtitle("Average age of retirement by rounds of all players") +
geom_text(aes(label = mean.age), position=position_dodge(width=0.9), vjust = -0.5) +
scale_x_discrete()