Interaction Plot in ggplot2 - r

I'm trying to make interaction plot with ggplot2. My code is below:
library(ggplot2)
p <- qplot(as.factor(dose), len, data=ToothGrowth, geom = "boxplot", color = supp) + theme_bw()
p <- p + labs(x="Dose", y="Response")
p <- p + stat_summary(fun.y = mean, geom = "point", color = "blue")
p <- p + stat_summary(fun.y = mean, geom = "line", aes(group = 1))
p <- p + opts(axis.title.x = theme_text(size = 12, hjust = 0.54, vjust = 0))
p <- p + opts(axis.title.y = theme_text(size = 12, angle = 90, vjust = 0.25))
print(p)
How can I plot dose-supp level combination means rather than only dose level means which I'm getting here? Thanks in advance for your help.

You can precalculate the values in their own data frame:
toothInt <- ddply(ToothGrowth,.(dose,supp),summarise, val = mean(len))
ggplot(ToothGrowth, aes(x = factor(dose), y = len, colour = supp)) +
geom_boxplot() +
geom_point(data = toothInt, aes(y = val)) +
geom_line(data = toothInt, aes(y = val, group = supp)) +
theme_bw()
Note that using ggplot rather than qplot makes the graph construction a lot clearer for more complex plots like these (IMHO).

You can compute your summaries by the appropriate groups (supp):
p <- qplot(as.factor(dose), len, data=ToothGrowth, geom = "boxplot", color = supp) + theme_bw()
p <- p + labs(x="Dose", y="Response")
p <- p + stat_summary(fun.y = mean, geom = "point", color = "blue", aes(group=supp))
p <- p + stat_summary(fun.y = mean, geom = "line", aes(group = supp))
p <- p + opts(axis.title.x = theme_text(size = 12, hjust = 0.54, vjust = 0))
p <- p + opts(axis.title.y = theme_text(size = 12, angle = 90, vjust = 0.25))
print(p)
Or converting to ggplot syntax (and combining into one expression)
ggplot(ToothGrowth, aes(as.factor(dose), len, colour=supp)) +
geom_boxplot() +
stat_summary(aes(group=supp), fun.y = mean, geom="point", colour="blue") +
stat_summary(aes(group=supp), fun.y = mean, geom="line") +
scale_x_discrete("Dose") +
scale_y_continuous("Response") +
theme_bw() +
opts(axis.title.x = theme_text(size = 12, hjust = 0.54, vjust = 0),
axis.title.y = theme_text(size = 12, angle = 90, vjust = 0.25))
EDIT:
To make this work with 0.9.3, it effectively becomes Joran's answer.
library("plyr")
summ <- ddply(ToothGrowth, .(supp, dose), summarise, len = mean(len))
ggplot(ToothGrowth, aes(as.factor(dose), len, colour=supp)) +
geom_boxplot() +
geom_point(data = summ, aes(group=supp), colour="blue",
position = position_dodge(width=0.75)) +
geom_line(data = summ, aes(group=supp),
position = position_dodge(width=0.75)) +
scale_x_discrete("Dose") +
scale_y_continuous("Response") +
theme_bw() +
theme(axis.title.x = element_text(size = 12, hjust = 0.54, vjust = 0),
axis.title.y = element_text(size = 12, angle = 90, vjust = 0.25))

If you think you might need a more general approach, you could try function rxnNorm in package HandyStuff (github.com/bryanhanson/HandyStuff). Disclaimer: I'm the author. Disclaimer #2: the box plot option doesn't quite work right, but all the other options are fine.
Here's an example using the ToothGrowth data:
p <- rxnNorm(data = ToothGrowth, res = "len", fac1 = "dose", fac2 = "supp", freckles = TRUE, method = "iqr", fac2cols = c("red", "green"))
print(p)

a much easier way. without ddply. directly with ggplot2.
ggplot(ToothGrowth, aes(x = factor(dose) , y=len , group = supp, color = supp)) +
geom_boxplot() +
geom_smooth(method = lm, se=F) +
xlab("dose") +
ylab("len")

Related

Creating ggplot geom_point() with position dodge 's-shape'

I am trying to create a plot like the one below. I'd like the order the points in each category in such a way that they form an s-shape. Is it possible to do this in ggplot?
Similar data available here
What I have so far:
somatic.variants <- read.delim("data/Lawrence.S2.txt", stringsAsFactors=T)
cancer_rates <- tapply(somatic.variants$logn_coding_mutations, somatic.variants$tumor_type, median)
cancer_rates <- cancer_rates[order(cancer_rates, decreasing=F)]
somatic.variants$tumor_type <- factor(somatic.variants$tumor_type, levels = names(cancer_rates))
library(ggplot2)
library(GGally)
ggplot(data = somatic.variants,
mapping = aes(x = tumor_type,
y = log10(n_coding_mutations))) +
geom_point(position = position_dodge2()) +
scale_x_discrete(position = "top") +
scale_y_continuous(labels = c(0,10,100,1000,10000), expand = c(0,0)) +
geom_stripped_cols() +
theme_bw() +
theme(axis.title.x = element_blank(),
axis.text.x = element_text(angle = 315, hjust = 1, size = 12),
panel.grid = element_blank()) +
labs(y = "Coding mutations count") +
stat_summary(fun = median,
geom="crossbar",
size = 0.25,
width = 0.9,
group = 1,
show.legend = FALSE,
color = "#FF0000")
This could be achieved by
grouping the data by x-axis categories
arranging by the y-axis value
which ensures that the points are plotted in ascending order of the values for each category.
somatic.variants <- read.delim("https://gist.githubusercontent.com/wudustan/57deecdaefa035c1ecabf930afde295a/raw/1594d51a1e3b52f674ff746caace3231fd31910a/Lawrence.S2.txt", stringsAsFactors=T)
cancer_rates <- tapply(somatic.variants$logn_coding_mutations, somatic.variants$tumor_type, median)
cancer_rates <- cancer_rates[order(cancer_rates, decreasing=F)]
somatic.variants$tumor_type <- factor(somatic.variants$tumor_type, levels = names(cancer_rates))
library(ggplot2)
library(GGally)
library(dplyr)
somatic.variants <- somatic.variants %>%
group_by(tumor_type) %>%
arrange(n_coding_mutations)
ggplot(data = somatic.variants,
mapping = aes(x = tumor_type,
y = log10(n_coding_mutations))) +
geom_point(position = position_dodge2(.9), size = .25) +
scale_x_discrete(position = "top") +
scale_y_continuous(labels = c(0,10,100,1000,10000), expand = c(0,0)) +
geom_stripped_cols() +
theme_bw() +
theme(axis.title.x = element_blank(),
axis.text.x = element_text(angle = 315, hjust = 1, size = 12),
panel.grid = element_blank()) +
labs(y = "Coding mutations count") +
stat_summary(fun = median,
geom="crossbar",
size = 0.25,
width = 0.9,
group = 1,
show.legend = FALSE,
color = "#FF0000")
#> Warning: Removed 29 rows containing non-finite values (stat_summary).

How to put the y axis label over 2 lines ggplot

I am trying to make the y axis of the second plot over 2 lines. Using '\n' for the first plot worked fine but using it on the second makes the text in odd places (maybe because of the italics).
p1 <- ggplot(data = new_data) +
geom_line(mapping = aes(x = Date,
y = Proportion,
group = Species,
colour = Species)) +
scale_colour_manual(values=c(Golden_Trevally="goldenrod2",
Red_Snapper="firebrick2",
Sharksucker_Remora="darkolivegreen3",
Juvenile_Remora="aquamarine2")) +
xlab("Date (2014-2018)") +
ylab("Total Presence \n Per Month ") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
theme(legend.position="top") +
labs(colour = "Hitchhiker Species")
new_data_counts <- new_data %>% select(Date, Count)
new_data_counts <- new_data_counts[!duplicated(new_data_counts),]
p2 <- ggplot(data = new_data_counts) +
geom_bar(mapping = aes(x = Date, y = Count), stat = 'identity') +
xlab("Date (2014-2018)") +
ylab("Total Number of "~italic(\nM.alfredi)~" Encounters") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
grid.arrange(p1,p2)
You can try this:
geom_line(mapping = aes(x = Date,
y = Proportion,
group = Species,
colour = Species)) +
scale_colour_manual(values=c(Golden_Trevally="goldenrod2",
Red_Snapper="firebrick2",
Sharksucker_Remora="darkolivegreen3",
Juvenile_Remora="aquamarine2")) +
xlab("Date (2014-2018)") +
ylab("Total Presence \n Per Month ") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
theme(legend.position="top") +
labs(colour = "Hitchhiker Species")
new_data_counts <- new_data %>% select(Date, Count)
new_data_counts <- new_data_counts[!duplicated(new_data_counts),]
p2 <- ggplot(data = new_data_counts) +
geom_bar(mapping = aes(x = Date, y = Count), stat = 'identity') +
labs(x="Date (2014-2018)",
y=expression(atop(paste("Total Number of"), paste(italic("M.alfredi"), " Encounters")))) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
grid.arrange(p1,p2)
You need space between \n and M.alfredi in p2. Since there is no reproducible example, here is my suggestion for the second plot,
p2 <- ggplot(data = new_data_counts) +
geom_bar(mapping = aes(x = Date, y = Count), stat = 'identity') +
xlab("Date (2014-2018)") +
ylab("Total Number of "~italic(\n M.alfredi)~" Encounters") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
It should solve your problem.

Log scale on y axis but data have negative values

I am trying to create a boxplot with a log y axis as I have some very small values and then some much higher values which do not work well in a boxplot with a continuous y axis. However, I have negative values which obviously do not work with a log scale. I was wondering if there was a way around this so that I can display my data on a boxplot which is still easy to interpret but has a more appropriate scale on the y axis.
p <- ggplot(data = Elstow.monthly.fluxes, aes(x = Month1, y = CH4.Flux)) + stat_boxplot(geom = "errorbar", linetype = 1, width = 0.5) + geom_boxplot() +
xlab(expression("Month")) + ylab(expression(~CH[4]~Flux~(µg~CH[4]~m^{-2}~d^{-1}))) +
scale_y_continuous(breaks = seq(-5000,40000,5000), limits = c(-5000,40000))+
theme(axis.text.x = element_text(colour = "black")) + theme(axis.text.y = element_text(colour =
"black")) +
theme(panel.background = element_rect("white", "black")) +
theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) +
theme(axis.text = element_text(size = 12))+ theme(axis.title = element_text(size = 14))+
theme(axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 0, l = 0))) +
theme(axis.title.x = element_text(margin = margin(t = 15, r = 0, b = 0, l = 0))) +
geom_hline(yintercept = 0, linetype ="dashed", colour = "black")
While you could indeed use the secondary axis to get the labels you want as Zhiqiang suggests, you could also use a transformation that fits your needs.
Consider the following skewed boxplots:
df <- data.frame(
x = rep(letters[1:2], each = 500),
y = rlnorm(1000) - 2
)
ggplot(df, aes(x, y)) +
geom_boxplot()
Instead, you could use the pseudo-log transformation to visualise your data:
ggplot(df, aes(x, y)) +
geom_boxplot() +
scale_y_continuous(trans = scales::pseudo_log_trans())
Alternatively, you could make any transformation you want. I personally like the inverse hyperbolic sine transformation, which is very much like the pseudo-log:
asinh_trans <- scales::trans_new(
"inverse_hyperbolic_sine",
transform = function(x) {asinh(x)},
inverse = function(x) {sinh(x)}
)
ggplot(df, aes(x, y)) +
geom_boxplot() +
scale_y_continuous(trans = asinh_trans)
I have a silly solution: trick the secondary axis to re-scale y axis. I do not have your data, just made up some numbers for the purpose of demonstration.
First convert y values as logy = log(y + 5000). When generating the graph, transform the values back to the original scale. I borrow the second axis to display the values. I am pretty sure others may have more elegant ways to do this.
I was lazy for not trying to find the right way to remove the primary y axis tick labels, just used breaks = c(0).
df<-data.frame(y = runif(33, min=-5000, max=40000),
x = rep(c("Aug", "Sep", "Oct"),33))
library(tidyverse)
df$logy = log(df$y+5000)
p <- ggplot(data = df, aes(x = x, y = logy)) +
stat_boxplot(geom = "errorbar", linetype = 1, width = 0.5) +
geom_boxplot() +
xlab(expression("Month")) +
ylab(expression(~CH[4]~Flux~(µg~CH[4]~m^{-2}~d^{-1}))) +
scale_y_continuous(sec.axis = sec_axis(~(exp(.) -5000),
breaks = c(-4000, 0, 5000, 10000, 20000, 40000)),
breaks = c(0))+
theme(axis.text.x = element_text(colour = "black")) +
theme(axis.text.y = element_text(colour = "black")) +
theme(panel.background = element_rect("white", "black")) +
theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) +
theme(axis.text = element_text(size = 12))+
theme(axis.title = element_text(size = 14))+
theme(axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 0, l = 0))) +
theme(axis.title.x = element_text(margin = margin(t = 15, r = 0, b = 0, l = 0))) +
geom_hline(yintercept = log(5000), linetype ="dashed", colour = "black")
p
coord_trans() is applied after the statistics are calculated (unlike scale). This can be combined with the pseudo_log_trans to cope with negatives.
library(plotly)
set.seed(1234)
dat <- data.frame(cond = factor(rep(c("A","B"), each=200)), rating = c(rnorm(200),rnorm(200, mean=500)))
pseudoLog <- scales::pseudo_log_trans(base = 10)
p <- ggplot(dat, aes(x=cond, y=rating)) + geom_boxplot() + coord_trans(y=pseudoLog)

Adding significance levels in faceted plots with two results in the same x axis

I know that there are many questions already open to add significance level in ggplot2 in faceted plots. However, as a R beginner, I did not found a solution for my plot.
My data (.txt file) is available in this link:
1drv.ms/t/s!AsLAxDXdkA9Mg8oXdJ-qxD5AeB4KAw
There are four columns: three factor levels (temperature, parasitoid species and behavior) and a numeric level (number of parasitism and host-killing).
I run the plot with the code:
ggplot(mydata, aes(x = temperature, y = value, fill = species)) +
facet_grid(. ~ behavior) +
stat_summary(fun.y = mean, geom = "bar", position = "dodge", stat="identity") +
stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1) +
labs(x = "Temperature", y = "Nº of parasitized or host-killed larvae") +
theme(legend.position = "bottom", legend.title = element_blank(), legend.text = element_text(size = 11, face = "italic"))
Now I want to add the significance level in each plot, comparing separately the results in Parasitism and Host-Killing in both temperatures. So I have 6 contrasts in each plot. I tested the option using the function stat_compare_means. However, as advised in this Rblogger tutorial (https://www.r-bloggers.com/add-p-values-and-significance-levels-to-ggplots/) I should make a list telling what I want to compare. But, in this case, I have two results for 25°C and two for 30°C. How I could create this list mentioning all the contrasts?
If anybody could clarify for me how I can solve this, it would help me a lot. Thanks.
stat_compare_means worked "out of the box" (which makes sense, since it's using the grouping defined in the ggplot function.
ggplot(mydata, aes(x = temp, y = value, fill = factor(species))) +
facet_grid(. ~ behavior) +
stat_summary(fun.y = mean, geom = "bar", position = position_dodge(width = 1), stat="identity") +
stat_summary(fun.data = mean_sdl, geom="errorbar", position = position_dodge(width = 1), width=0.25, stat="identity", fun.args = list(mult = 1)) +
labs(x = "Temperature", y = "Nº of parasitized or host-killed larvae") +
theme(legend.position = "bottom", legend.title = element_blank(), legend.text = element_text(size = 11, face = "italic")) +
stat_compare_means(method = "t.test")
Alternatively, switch the x and fill variables:
ggplot(mydata, aes(x = factor(species), y = value, fill = factor(temp))) +
facet_grid(. ~ behavior) +
stat_summary(fun.y = mean, geom = "bar", position = position_dodge(width = 1), stat="identity") +
stat_summary(fun.data = mean_sdl, geom="errorbar", position = position_dodge(width = 1), width=0.25, stat="identity", fun.args = list(mult = 1)) +
labs(x = "Temperature", y = "Nº of parasitized or host-killed larvae") +
theme(legend.position = "bottom", legend.text = element_text(size = 11, face = "italic")) +
stat_compare_means(method = "t.test")
Thanks #thc for your help and patience. As I decided to use letters over the error bars, I had to create a new data.frame for each letter and them add the geom_text. This got it right with the below code, after testing many x and y values to insert the letters in the right point. But certainly it has an easier way to do it that I don't know it.
sigvals1 <- data.frame(x=0.75,y=222.4, text=c("a"), behavior=c("Parasitism"))
sigvals2 <- data.frame(x=1.207,y=190.55, text=c("a"), behavior=c("Parasitism"))
sigvals3 <- data.frame(x=1.75,y=117.7, text=c("b"), behavior=c("Parasitism"))
sigvals4 <- data.frame(x=2.209,y=103, text=c("b"), behavior=c("Parasitism"))
sigvals5 <-data.frame(x = 0.75, y = 74.8, text=c("A"), behavior = c("Host-Killing"))
sigvals6 <-data.frame(x = 1.20, y = 92.97, text=c("B"), behavior = c("Host-Killing"))
sigvals7 <-data.frame(x = 1.74, y = 49.8, text=c("C"), behavior = c("Host-Killing"))
sigvals8 <-data.frame(x = 2.196, y = 43, text=c("C"), behavior = c("Host-Killing"))
plot +
geom_text(data=sigvals1, aes(x=x,y=y, label=text, fill=NA), hjust=0) +
geom_text(data=sigvals2, aes(x=x,y=y, label=text, fill=NA), hjust=0) +
geom_text(data=sigvals3, aes(x=x,y=y, label=text, fill=NA), hjust=0)+
geom_text(data=sigvals4, aes(x=x,y=y, label=text, fill=NA), hjust=0) +
geom_text(data=sigvals5, aes(x=x,y=y, label=text, fill=NA), hjust=0) +
geom_text(data=sigvals6, aes(x=x,y=y, label=text, fill=NA), hjust=0) +
geom_text(data=sigvals7, aes(x=x,y=y, label=text, fill=NA), hjust=0) +
geom_text(data=sigvals8, aes(x=x,y=y, label=text, fill=NA), hjust=0)

remove axis.ticks from some panels in facet_wrap

I am using ggplot to plot y versus x for two factors f1 and f2 using the facet_wrap. I want to keep the ticks for the y axis only for the first column (representing given value of factor f2) and remove the others. Is there away to do this? I tried many ways (including scale = free_y) but no success. Below is a simple code:
y = rnorm(100)
x = rnorm(100)
type = rep(1:5,20)
f1 = sample(LETTERS[1:3], 100, replace=TRUE, prob=c(0.3, 0.3, 0.4) )
f2 = sample(LETTERS[4:6], 100, replace=TRUE, prob=c(0.3, 0.3, 0.4) )
df = data.frame(cbind(x, y,f1,f2, type))
df$x = as.numeric(as.character(df$x)); df$y = as.numeric(as.character(df$y))
p1 = ggplot(data = df, aes(x, y, linetype = type)) +
geom_line(aes(linetype = type))+ scale_linetype_manual(values=c("solid", "F1", "dotted", "twodash","dashed")) +
scale_size_manual(values=c(0.5, 0.5, 0.5,0.5,0.5)) +
geom_point(size=0.5, shape=21, fill="black") +
labs(y="y") +
facet_wrap( ~ f1 + f2 , ncol=3, scales = "free_y") +
theme_bw() +
theme(panel.margin = unit(0.8, "lines")) +
theme(plot.title = element_text(size = rel(1.2))) +
theme(axis.ticks.length=unit(0.2,"cm")) +
theme(strip.text.x = element_text(size=11)) +
theme(strip.background = element_rect(colour="white", fill="gray"))
p1
Questions:
How to keep the ticks for the y axis only for the first column in the left (i.e factor f2 = "D"). I know the y axis have different levels but this is not an issue for me.
many thanks
Abderrahim
I think you are actually after facet_grid as opposed to facet_wrap.
See the below:
p1 <- ggplot(data = df, aes(x, y, linetype = type)) +
geom_line(aes(linetype = type))+ scale_linetype_manual(values=c("solid", "F1", "dotted", "twodash","dashed")) +
scale_size_manual(values=c(0.5, 0.5, 0.5,0.5,0.5)) +
geom_point(size=0.5, shape=21, fill="black") +
labs(y="y") +
facet_grid( f1~f2 ) +
theme_bw() +
theme(panel.margin = unit(0.8, "lines")) +
theme(plot.title = element_text(size = rel(1.2))) +
theme(axis.ticks.length=unit(0.2,"cm")) +
theme(strip.text.x = element_text(size=11)) +
theme(strip.background = element_rect(colour="white", fill="gray"))
p1

Resources