Overlay different datasets in same facetted plot in ggplot2 - r

I need to gather two facet columns into one column with ggplot2.
In the following example, I need to overlay the content of the two columns DEG and RAN into one, while giving different colours to DEG and RAN data (small points and smooth line) and provide the corresponding legend (so I can distinguish them as they are overlayed).
I feel my code is not too, too far from what I need, but the relative complexity of the dataset blocks me. How to go about achieving this in ggplot2?
Here's my code so far:
require(reshape2)
library(ggplot2)
library(RColorBrewer)
fileName = paste("./4.csv", sep = "") # csv file available here: https://www.dropbox.com/s/bm9hd0t5ak74k89/4.csv?dl=0
mydata = read.csv(fileName,sep=",", header=TRUE)
dataM = melt(mydata,c("id"))
dataM = cbind(dataM,colsplit(dataM$variable,pattern = "_",names = c("NM", "ORD", "CAT")))
dataM$variable <- NULL
dataM <- dcast(dataM, ... ~ CAT, value.var = "value")
my_palette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
ggplot(dataM, aes(x=NR ,y= ASPL)) +
geom_point(size = .4,alpha = .5) +
stat_smooth(se = FALSE, size = .5) +
theme_bw() +
theme(plot.background = element_blank(),
axis.line = element_blank(),
legend.key = element_blank(),
legend.title = element_blank()) +
scale_y_continuous("ASPL", expand=c(0,0), limits = c(1, 7)) +
scale_x_continuous("NR", expand=c(0,0), limits = c(0, 100)) +
theme(legend.position="bottom") +
theme(axis.title.x = element_text(vjust=-0.3, face="bold", size=12)) +
theme(axis.title.y = element_text(vjust=1.5, face="bold", size=12)) +
ggtitle("Title") + theme(plot.title = element_text(lineheight=.8, face="bold")) +
theme(title = element_text(vjust=2)) +
facet_grid(NM ~ ORD)
Here's what it gives me right now:
Extra question: how come DEG/SF doesn't show a smooth line?

You can use the group aesthetic to define that data points with the same value of ORD belong together. You can also map aesthetics shape and color to this variable. You can also use . to specify that the facets are not split along a specific dimension.
I have made the changes to your code below after transforming NR and ASPL to numeric variables:
dataM$NR <- as.integer(dataM$NR)
dataM$ASPL <- as.numeric(dataM$ASPL)
ggplot(dataM, aes(x=NR ,y= ASPL, group=ORD, color=ORD)) +
geom_point(size = .7,alpha = .5, aes(shape=ORD)) + ## increased size
stat_smooth(se = FALSE, size = .5) +
theme_bw() +
theme(plot.background = element_blank(),
axis.line = element_blank(),
legend.key = element_blank(),
legend.title = element_blank()) +
scale_y_continuous("ASPL", expand=c(0,0), limits = c(1, 7)) +
scale_x_continuous("NR", expand=c(0,0), limits = c(0, 100)) +
theme(legend.position="bottom") +
theme(axis.title.x = element_text(vjust=-0.3, face="bold", size=12)) +
theme(axis.title.y = element_text(vjust=1.5, face="bold", size=12)) +
ggtitle("Title") + theme(plot.title = element_text(lineheight=.8, face="bold")) +
theme(title = element_text(vjust=2)) +
facet_grid(NM ~.)

Related

How to create a plot by two subgroups using interaction() in ggplot (combination of several plots)

I am trying to create plots by two subgroups, just like in the picture below (ggplot2 multiple sub groups of a bar chart). However, I would like to do that for a combination of plots.
When I tried to do that, instead of having the categories clearly separated (like in the example with Irrigated/Dry and Variety1/Variety 2), my variables "growth" and "year" are being collapsed together.
I am having trouble incorporating into my code this tiny modification. I would like for my variable year to be just like "Irrigated/Dry", and the variable growth as "Variety1/Variety2" Right now, this is how the plot looks like:
Here is my code:
library(ggplot2)
#Creates the data
d <- expand.grid(c(.3, .8), c(.3, 0.8), c(0, 0.5), c(1, 2), c("Oregon"," California"), c("2010","2011"))
colnames(d) <- c("gamma1", "gamma2", "growth", "store", "state", "year")
d$sells <- rnorm(64)
d$gamma_plot <- as.factor(paste(d$gamma1, d$gamma2, sep = "_"))
d$store <- as.factor(d$store)
d$growth <- factor(d$growth)
d$gamma_plot = factor(d$gamma_plot,
labels=c(expression(paste(gamma[1],"=", gamma[2]," = 0.3")),
expression(paste(gamma[1], " = 0.3 ", gamma[2], " = 0.8")),
expression(paste(gamma[1], " = 0.8 ", gamma[2], " = 0.3")),
expression(paste(gamma[1],"=", gamma[2]," = 0.8"))
)
)
d$store = factor(d$store,
labels = c(expression(paste(store[1], " = 1")),
expression(paste(store[2], " = 2"))
)
)
#Creates the plot
p = ggplot(data=d, aes(x=interaction(year, growth), y=sells, fill=state)) +
geom_col(position="dodge") +
theme_bw() +
facet_grid(store ~ gamma_plot, labeller = label_parsed) +
theme(legend.title = element_blank(), legend.position="bottom",
panel.grid.major = element_blank(),
legend.key.size = unit(0.10, "cm"),
legend.key.width = unit(0.15,"cm")) +
guides(fill = guide_legend(nrow = 1)) +
labs(x=expression(growth), y = "Sells")
EDITED:
The two solutions given to my question were great and I really appreciate it.
I have decided to alter the plot a little and have an interaction between gamma_plot and growth instead. I could not make R understand that gamma_plot was an expression. Any ideas?
#Creates the plot using teunbrand's code :)
ggplot(data=d, aes(x=interaction(growth, gamma_plot, sep = "&"), y=sells, fill=year)) +
geom_col(position="dodge") +
theme_bw() +
facet_grid(store ~ state, labeller = label_parsed) +
theme(legend.title = element_blank(), legend.position="bottom",
panel.grid.major = element_blank(),
legend.key.size = unit(0.10, "cm"),
legend.key.width = unit(0.15,"cm"),
axis.text.x = element_text(margin = margin(2,2,2,2))) +
scale_x_discrete(guide = guide_axis_nested(delim = "&")) +
guides(fill = guide_legend(nrow = 1)) +
labs(x=expression(growth), y = "Sells")
How about this option:
library(ggplot2)
ggplot(data=d, aes(x = interaction(year, growth), y=sells, fill = state)) +
geom_col(position="dodge") +
scale_x_discrete(labels = unique(interaction(d$year, factor(d$growth), sep = "\n")))+
theme_bw() +
facet_grid(store ~ gamma_plot, labeller = label_parsed) +
theme(legend.title = element_blank(), legend.position="bottom",
panel.grid.major = element_blank(),
legend.key.size = unit(0.10, "cm"),
legend.key.width = unit(0.15,"cm")) +
guides(fill = guide_legend(nrow = 1)) +
labs(x = expression(Year~growth), y = "Sells")
Created on 2020-07-10 by the reprex package (v0.3.0)
As far as I'm aware there is no axis hierarchy in ggplot2. Normally, one would use facets to seperate the year from growth, but it seems like you're already using the facets to seperate out something else.
Example how one would use facets in this case:
ggplot(data=d, aes(x=interaction(growth), y=sells, fill=state)) +
geom_col(position="dodge") +
theme_bw() +
facet_grid(store ~ gamma_plot + year, labeller = label_parsed, switch = "x") +
theme(legend.title = element_blank(), legend.position="bottom",
panel.grid.major = element_blank(),
strip.placement = "outside",
legend.key.size = unit(0.10, "cm"),
legend.key.width = unit(0.15,"cm")) +
guides(fill = guide_legend(nrow = 1)) +
labs(x=expression(growth), y = "Sells")
Seeing as the above is not really a good option, I recommend looking for extention packages that offer what you seek. If you'll allow me to be so bold, there is a function in a github package I wrote that formats axes in a nested fashion. Example below:
library(ggh4x)
ggplot(data=d, aes(x=interaction(growth, year, sep = "&"), y=sells, fill=state)) +
geom_col(position="dodge") +
theme_bw() +
facet_grid(store ~ gamma_plot, labeller = label_parsed) +
theme(legend.title = element_blank(), legend.position="bottom",
panel.grid.major = element_blank(),
legend.key.size = unit(0.10, "cm"),
legend.key.width = unit(0.15,"cm"),
axis.text.x = element_text(margin = margin(2,2,2,2))) +
scale_x_discrete(guide = guide_axis_nested(delim = "&")) +
guides(fill = guide_legend(nrow = 1)) +
labs(x=expression(growth), y = "Sells")
Edit:
With regards to the follow up question about the spacing between years; I can't think of an elegant solution but the following would get the job done. It converts the discrete axis to a continuous one.
# Precalculate interaction
d$interaction <- interaction(d$growth, d$year, sep = "&")
nudge <- 1 # How much you want to nudge
# Use ifelse to nudge position and use factor as integer
ggplot(data=d, aes(x=ifelse(as.numeric(interaction) > 2,
as.numeric(interaction) + nudge,
as.numeric(interaction)),
y=sells, fill=state)) +
geom_col(position="dodge") +
theme_bw() +
facet_grid(store ~ gamma_plot, labeller = label_parsed) +
theme(legend.title = element_blank(), legend.position="bottom",
panel.grid.major = element_blank(),
legend.key.size = unit(0.10, "cm"),
legend.key.width = unit(0.15,"cm"),
axis.text.x = element_text(margin = margin(2,2,2,2))) +
# Using a continuous axis here
scale_x_continuous(breaks = c(1,2,3 + nudge, 4 + nudge),
labels = levels(d$interaction),
guide = guide_axis_nested(delim = "&")) +
guides(fill = guide_legend(nrow = 1)) +
labs(x=expression(growth), y = "Sells")

Ho to add the count values calculated in the geom_histogram on top of the bars in R

I'd like to add the count values calculated in the geom_histogram function on ggplot2. I've put the ggplot2 into a loop so I can produce multiple plots, in my case 30 but for ease, here is a dummy set for only four plots. Facet wrap didn't work as the geom density was pooling the data across all factors before calculating proportions, rather than within a factor/variable. To produce this plot, I've essentially mixed a whole bunch of code from various sources so credit to them.
library(dplyr)
library(ggplot2)
library(ggridges)
library(reshape2)
library(gridExtra)
#Make the data#
df.fact <- data.frame("A"=rnorm(400, mean = 350, sd=160),"B"=rnorm(400, mean = 300, sd=100), "C"=rnorm(400, mean = 200, sd=80), names=rep(factor(LETTERS[23:26]), 100))
df.test<-melt(df.fact, id.vars = "names", value.name = "Length2")
names(df.test)[names(df.test) =="variable"] <- "TSM.FACT"
#Create the plotlist##
myplots <- list()
#Loop for plots##
for(i in 1:(length(unique(df.test$names)))){
p1 <- eval(substitute(
ggplot(data=df.test[df.test$names == levels(df.test$names)[i],], aes(x=Length2, group=TSM.FACT, colour = TSM.FACT, fill=TSM.FACT)) +
geom_histogram(aes( y = stat(width*density)), position = "dodge", binwidth = 50, alpha =0.4, show.legend=T)+
ggtitle(paste0(levels(df.test$names)[i]))+
geom_density_line(stat="density", aes(y=(..count..)/sum(..count..)*50), alpha=0.3, size=0.5, show.legend=F) +
geom_vline(data=ddply(df.test[df.test$names == levels(df.test$names)[i],], ~ TSM.FACT, numcolwise(mean)), mapping=aes(xintercept = Length2, group=TSM.FACT, colour=TSM.FACT), linetype=2, size=1, show.legend=F) +
scale_y_continuous(labels = percent_format()) +
ylab("relative frequency") +
scale_color_manual(values= c("#00B2EE", "#1E90FF", "#104E8B")) +
scale_fill_manual(values= c("#00B2EE", "#1E90FF", "#104E8B")) +
theme_bw() + theme(
plot.title = element_text(lineheight=0.5, hjust= 0.5, size=10),
strip.text.y = element_text(hjust = 1, angle = 0),
strip.text.x = element_text(size=10, vjust = 0.9),
strip.text=element_text(margin = margin(t=0.3,r=1,b=0.3,l=1), size=8, debug = F, vjust=0.2),
strip.background = element_blank(),
axis.text.x = element_text(size=8, angle=0, vjust=0.2, margin = margin(t=0.3,r=0.1,b=0.3,l=0.1)),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
axis.line.x=element_line(colour="black"),
axis.line.y=element_line(colour="black"),
panel.grid.minor = element_blank(),
panel.border=element_blank(),
panel.background=element_blank(),
legend.position=(c(0.9,0.9)),
legend.title = element_blank(),
legend.key = element_blank()),
list(i = i)))
print(i)
print(p1)
myplots[[i]] <- p1
plot(p1)
}
#Join the plots
panelplot=grid.arrange(plotlist = myplots, grobs = myplots, shared.legend=T)
Unfortunately I am unable to reproduce your example. I can recommend adding a column that includes the sum of each bar (let's name it "Bar")
The required addition to the ggplot code then involves:
geom_text(aes(label = Bar), position = position_stack(vjust = 1)) +
The text height above the bar can be adjusted with vjust

Combine two plots in ggplot?

I need to create a ggplot that is a column plot overlayed with a line plot. The line plot shows mean values, while the column plot shows how the mean values relate to benchmark values. I've managed to create two separate plots in ggplot, but I'm having trouble combining them.
My line plot looks like this:
And is created using this code:
benchMarkLine <- ggplot(UEQScores, aes(x=Scale, y=Score, group=1)) +
geom_line(size = 1.4, colour = "black") +
geom_point(size = 2.4, colour = "black") +
scale_y_continuous(name = "Score", breaks = seq(0, 2.5, 0.25), limits = c(0, 2.5)) +
scale_x_discrete(name = "Scale") +
ggtitle("Mean Scores") +
theme_bw() + # Set black and white theme +
theme(plot.title = element_text(hjust = 0.5, size=10), # Centre plot title
panel.grid.major = element_blank(), # Turn off major gridlines
panel.grid.minor = element_blank(), # Turn off minor gridlines
axis.title.x = element_text(size=10),
axis.text.x = element_text(angle=30, vjust=0.6),
axis.title.y = element_text(size=10))
benchMarkLine
My Column plot looks like this:
And was created with the following code:
benchmarkColPlot <- ggplot(benchmark_long, aes(x=factor(Scale, scaleLevels), y=value, fill=factor(cat, bmLevels))) +
geom_col(position="fill") +
scale_fill_manual(values = bmColours) +
scale_y_continuous(name = "Score", breaks = seq(-1.0, 1.0, 0.25), limits = c(-1, 1)) +
scale_x_discrete(name = "Scale") +
ggtitle("Benchmark Scores") +
theme_bw() + # Set black and white theme +
theme(plot.title = element_text(hjust = 0.5, size=10), # Centre plot title
panel.grid.major = element_blank(), # Turn off major gridlines
panel.grid.minor = element_blank(), # Turn off minor gridlines
axis.title.x = element_text(size=10),
axis.text.x = element_text(angle=30, vjust=0.6),
axis.title.y = element_text(size=10),
legend.title = element_blank())
benchmarkColPlot
How can I combine these two? I tried inserting geom_line(UEQScores, aes(x=Scale, y=Score, group=1)) + above geom_col(position="fill") + in the column plot code, but I just get the following error:
Error: `mapping` must be created by `aes()`
How do I combine these two plots?
OK, I've given up on this - I just created the chart in Excel as it seems to be a bit easier for what I'm doing here.

putting two different legends in two columns in ggplot2

I have the following reproducible code which gets me the plot listed below:
require(ggplot2)
set.seed(123)
ChickWt <- data.frame(ChickWeight, AR = sample(c("p=0", "p=1", "hat(p)"), size = 578, replace = T))
exprvec <- expression( p==hat(p), p==0, p==1)
p1 <- ggplot(ChickWt, aes(x=Time, y=weight, colour=Diet, Group = Chick, linetype = AR)) + geom_line()
p1 <- p1 + scale_linetype_manual(values=c(2,4,1), labels = exprvec,name="AR order") + theme_bw() + theme(legend.justification=c(1,-0.2), legend.position=c(0.3,0.2), legend.text=element_text(size=10), legend.title=element_text(size=10), axis.title.x=element_text(size=10), axis.title.y=element_text(size = 10), legend.key = element_blank(), legend.background = element_rect(color="black",size = 0.1)) + ylim(c(0,400)) + guides(fill=guide_legend(ncol=2))
but I would like the legend on Diet and AR order in two separate columns. How do I get this to work? Clearly, the guides(fill=guide_legend(ncol=2)) has no effect, perhaps because these are two separate legends.
Thanks for suggestions!
The reason that guides(fill=guide_legend(ncol=2)) does not work is because it only refers to the fill-legend and not to the linetype-legend. You can position the legends next to each other by using legend.box = "horizontal":
ggplot(ChickWt, aes(x=Time, y=weight, colour=Diet, Group = Chick, linetype = AR)) +
geom_line() +
scale_linetype_manual(values=c(2,4,1), labels = exprvec,name="AR order") +
theme_bw() +
theme(legend.justification=c(1,-0.2),
legend.position=c(0.3,0.2),
legend.text=element_text(size=10),
legend.title=element_text(size=10),
axis.title.x=element_text(size=10),
axis.title.y=element_text(size = 10),
legend.key = element_blank(),
legend.background = element_rect(color="black",size = 0.1),
legend.box = "horizontal") +
ylim(c(0,400))
which gives:

Width of error bars in line plot using ggplot2

I have some data with standard errors associated and would like to display these with error bars. That's what I have:
# generate some data
hod <- data.frame(h = c(1:24,1:24,1:24), mean = 1:(24*3) + runif(24*3, 0, 5),ci = runif(24*3, 0, 2), t = c(rep("a",24),rep("b",24),rep("c",24)))
pd <- position_dodge(0.3)
dayplot <- ggplot(hod, aes(x=h, y=mean, colour=as.factor(t),group=as.factor(t))) +
geom_line(position=pd, size=1) +
geom_errorbar(aes(ymin=mean-ci, ymax=mean+ci),
width=1,
size=0.5,
position=pd) +
geom_point(position=pd, shape=21, size=1, fill="white") +
scale_x_continuous(limits=c(-0.5,23.5),
breaks=c(0:8*3),
labels=ifelse(
c(0:8*3) < 10,
paste('0',c(0:8*3),':00',sep=''),
paste(c(0:8*3),':00',sep='')
)
) +
xlab("Hour of day") + ylab(ylabel) + labs(title = varlabels[var]) +
theme_minimal() +
theme(plot.margin = unit(c(1,0,1,1), "cm"),
axis.title.x = element_text(vjust=-1),
axis.title.y = element_text(angle=90, vjust=0),
legend.margin = unit(c(0), "cm"),
legend.key.height = unit(c(0.9), "cm"),
panel.grid.major = element_line(colour=rgb(0.87,0.87,0.87)),
panel.grid.minor = element_blank(),
plot.background = element_rect(fill = rgb(0.97,0.97,0.97), linetype=0)
)
The only thing of interest is probably:
geom_errorbar(aes(ymin=mean-ci, ymax=mean+ci),
width=1,
size=0.5,
position=pd)
It gives:
Now when I group the data by a factor variable (as.factor(t)), I get several lines instead of one, which is what I want, BUT, as you can see, the horizontal lines at the error bars are more narrow, and I can't figure out why. I tried changing and even taking away the width and size attribute of geom_errorbar, but nothing happens. Is there a way to have the same width of the horizontal lines for every chart, no matter the data? I mean, why should it vary? Or does that width convey some information?
Below is a reproducible example using random data. The fix to the problem is to multiply the width by the number of classes/factors that you have. In the plot below, since I used three factors, using a width of 3 fixes the problem. ggplot2 seems to calculate the relative width by the number of data points in your dataset, rather than the numeric values on the x-axis. This is (IMO) a bug.
library(ggplot2)
library(grid)
#plot with factors
hod <- data.frame(h = c(1:24,1:24,1:24), mean = 1:(24*3) + runif(24*3, 0, 5),ci = runif(24*3, 0, 2), t = c(rep("a",24),rep("b",24),rep("c",24)))
pd <- position_dodge(0.3)
dayplot <- ggplot(hod, aes(x=h, y=mean, colour=as.factor(t),group=as.factor(t))) +
geom_line(position=pd, size=1) +
geom_errorbar(aes(ymin=mean-ci, ymax=mean+ci),
width=1,
size=0.5,
position=pd) +
geom_point(position=pd, shape=21, size=1, fill="white") +
scale_x_continuous(limits=c(-0.5,23.5),
breaks=c(0:8*3),
labels=ifelse(
c(0:8*3) < 10,
paste('0',c(0:8*3),':00',sep=''),
paste(c(0:8*3),':00',sep='')
)
) +
xlab("Hour of day") +
theme_minimal() +
theme(plot.margin = unit(c(1,0,1,1), "cm"),
axis.title.x = element_text(vjust=-1),
axis.title.y = element_text(angle=90, vjust=0),
legend.margin = unit(c(0), "cm"),
legend.key.height = unit(c(0.9), "cm"),
panel.grid.major = element_line(colour=rgb(0.87,0.87,0.87)),
panel.grid.minor = element_blank(),
plot.background = element_rect(fill = rgb(0.97,0.97,0.97), linetype=0)
)
print(dayplot)
#plot without factors
hod <- data.frame(h = c(1:24,1:24,1:24), mean = 1:(24) + runif(24, 0, 5),ci = runif(24, 0, 2))
pd <- position_dodge(0.3)
dayplot <- ggplot(hod, aes(x=h, y=mean)) +
geom_line(position=pd, size=1) +
geom_errorbar(aes(ymin=mean-ci, ymax=mean+ci),
width=1,
size=0.5,
position=pd) +
geom_point(position=pd, shape=21, size=1, fill="white") +
scale_x_continuous(limits=c(-0.5,23.5),
breaks=c(0:8*3),
labels=ifelse(
c(0:8*3) < 10,
paste('0',c(0:8*3),':00',sep=''),
paste(c(0:8*3),':00',sep='')
)
) +
xlab("Hour of day") +
theme_minimal() +
theme(plot.margin = unit(c(1,0,1,1), "cm"),
axis.title.x = element_text(vjust=-1),
axis.title.y = element_text(angle=90, vjust=0),
legend.margin = unit(c(0), "cm"),
legend.key.height = unit(c(0.9), "cm"),
panel.grid.major = element_line(colour=rgb(0.87,0.87,0.87)),
panel.grid.minor = element_blank(),
plot.background = element_rect(fill = rgb(0.97,0.97,0.97), linetype=0)
)
print(dayplot)
I have managed to solve a similar issue. In my case I wanted to set both horizontal and vertical errorbar heads to the same size - regardless of the aspect ratio of the plot.
Based on the original posted code:
f <- ggplot_build(dayplot)
f$plot$layers[[5]]$geom_params$width <- 0.02 * diff(f$layout$panel_params[[1]]$x.range)
f$plot$layers[[6]]$geom_params$height <- 0.02 * diff(f$layout$panel_params[[1]]$y.range)
dayplot <- f$plot
This will set the errorbar head to 2% of the axis range. Maybe could solve your issue.

Resources