Adding a single legend for two horizontal lines in ggplot - r

I little experience with ggplot2. I am trying to plot coverage probability and cohort size using the code below:
library("reshape2")
library("ggplot2")
library(latex2exp)
CP1 <-c(0.953,0.942,0.947,0.958)
CP2 <- c(0.937,0.952,0.955,0.957)
cohort <- c(500,1000,5000,10000)
mdata <- data.frame(rate1=CP1,rate2=CP2,cohort.size=cohort)
mydata <- melt(mdata,id='cohort.size',value.name="CP")
ggplot(mydata , aes(x=cohort.size, y=CP)) +
geom_line(size=1,aes(colour=variable)) +
geom_point( size=4, shape=0)+ coord_cartesian(ylim = c(0,1)) +
scale_x_continuous(breaks=c(500,1000,5000,10000))+
scale_color_discrete(labels = unname(TeX(c(" $\\r_1$", "$\\r_2$")))) +
geom_hline(yintercept =c(0.936,0.964) ,linetype="dashed") +
theme(legend.title = element_blank(), axis.title.x = element_text(color="#993333", size=14, face="bold"),
axis.title.y = element_text(color="#993333", size=14, face="bold"),
plot.title = element_text(color="#993333", size=14, face="bold"),
legend.position = c(.85, .85),
legend.justification = c("right", "top"),
legend.box.just = "right",
legend.margin = margin(6, 6, 6, 6),legend.text=element_text(size=20)) + xlab("Cohort Size") + ylab("Coverage Proability")+
annotate("text",
x = 8700,
y = 0.68,
label =expression(bold(paste("MN=57% \n AB=38% \n XYZ=5%" ))),parse = TRUE,size=5)
I have three questions:
1. When I run the code, I get a warning; how can I fix it.
2. There are two horizontal black dashed lines and I want to have just one legend for both to represent '95% CL'.
3. I feel the code is too much, is there a much simpler way of writing it using ggplot2 only.
Thanks!!

I can't install latex2exp. Without this package, you simply can try this and in my opinion all three questions are solved:
ggplot(mydata , aes(x=cohort.size, y=CP)) +
geom_line(size=1,aes(colour=variable)) +
geom_point( size=4, shape=0)+
geom_hline(data = data.frame(yintercept =c(0.936,0.964)),
aes(yintercept =yintercept, linetype ='95% CL')) +
scale_linetype_manual("", values = 2) +
ylim(0,1) +
annotate("text",
x = 8700,
y = 0.68,
label = paste("MN=57%\n AB=38%\n XYZ=5%" ),
size=5, fontface =2)

Related

ggplot2 density plot legend shows wrong group names and wrong colors

The following code plots two partly overlapping density distributions from two independent dataframes with different lenghts.
library(ggplot2)
#Define colors to be used in plot for each group
mycolRO <- rgb(0.8, 0.2, 0, max = 1, alpha = 0.5) #Color for Group "Road"
mycolRA <- rgb(0.2, 0.6, 0.4, max = 1, alpha = 0.5) #Color for Group "Rail"
#Create some data
dfRoad <- data.frame(DiffRO=2+rnorm(300))
dfRail <- data.frame(DiffRA=rnorm(500))
#Plot density distributions
ggplot() +
geom_density(aes(x=DiffRO, fill = mycolRO, alpha=0.5), data=dfRoad) +
geom_density(aes(x=DiffRA, fill = mycolRA, alpha=0.5), data=dfRail) +
xlim(-6, 6) +
theme_classic() +
ggtitle("") +
xlab("Value") +
ylab("Density") +
theme(plot.title = element_text(color="black", size=17, face="bold"),
axis.title.x = element_text(color="black", size=17, face="bold"),
axis.title.y = element_text(color="black", size=17, face="bold"),
axis.text=element_text(size=15))+
labs(fill = "Group")+
theme(legend.title = element_text(color = "black", size = 15), legend.text = element_text(color = "black", size=12))+
theme(legend.position = c(0.2,0.8), legend.direction = "vertical")+
guides(alpha=FALSE)
The legend does show the correct base color, but not with the transparency (alpha) value defined above, which should be alpha=0.5.
Furthermore I would like to see the correct variable names ("DiffRO" and "DiffRA") as legend entries instead of the color codes.
Thanks for any help.
Here are two ways of doing what you want.
Common points to both are:
The colors are set manually with scale_fill_manual.
theme calls are simplified, there is no need to call theme repeatedly.
First, I will recreate the data, this time setting the RNG seed before calling rnorm.
set.seed(1234)
dfRoad <- data.frame(DiffRO = 2 + rnorm(300))
dfRail <- data.frame(DiffRA = rnorm(500))
Your way, corrected.
The legend labels must also be set manually in scale_fill_manual.
#Plot density distributions
ggplot() +
geom_density(aes(x=DiffRO, fill = mycolRO, alpha=0.5), data=dfRoad) +
geom_density(aes(x=DiffRA, fill = mycolRA, alpha=0.5), data=dfRail) +
xlim(-6, 6) +
ggtitle("") +
xlab("Value") +
ylab("Density") +
scale_fill_manual(labels = c("Road", "Rail"),
values = c(mycolRO, mycolRA)) +
theme_classic() +
theme(plot.title = element_text(color="black", size=17, face="bold"),
axis.title.x = element_text(color="black", size=17, face="bold"),
axis.title.y = element_text(color="black", size=17, face="bold"),
axis.text=element_text(size=15),
legend.title = element_text(color = "black", size = 15),
legend.text = element_text(color = "black", size=12),
legend.position = c(0.2,0.8), legend.direction = "vertical")+
labs(fill = "Group") +
guides(alpha = FALSE)
Another way, simpler.
The data is combined and reformated from two different data sets in one data set only. To do this I use package reshape2.
dflong <- reshape2::melt(dfRoad)
dflong <- rbind(dflong, reshape2::melt(dfRail))
Note that now only one call to geom_density is needed and that the legend labels are automatic.
ggplot(dflong, aes(x = value, group = variable, fill = variable, alpha = 0.5)) +
geom_density() +
xlim(-6, 6) +
ggtitle("") +
xlab("Value") +
ylab("Density") +
scale_fill_manual(values = c(mycolRA, mycolRO)) +
theme_classic() +
theme(plot.title = element_text(color="black", size=17, face="bold"),
axis.title.x = element_text(color="black", size=17, face="bold"),
axis.title.y = element_text(color="black", size=17, face="bold"),
axis.text = element_text(size=15),
legend.title = element_text(color = "black", size = 15),
legend.text = element_text(color = "black", size=12),
legend.position = c(0.2,0.8), legend.direction = "vertical") +
labs(fill = "Group") +
guides(alpha = FALSE)

Ho to add the count values calculated in the geom_histogram on top of the bars in R

I'd like to add the count values calculated in the geom_histogram function on ggplot2. I've put the ggplot2 into a loop so I can produce multiple plots, in my case 30 but for ease, here is a dummy set for only four plots. Facet wrap didn't work as the geom density was pooling the data across all factors before calculating proportions, rather than within a factor/variable. To produce this plot, I've essentially mixed a whole bunch of code from various sources so credit to them.
library(dplyr)
library(ggplot2)
library(ggridges)
library(reshape2)
library(gridExtra)
#Make the data#
df.fact <- data.frame("A"=rnorm(400, mean = 350, sd=160),"B"=rnorm(400, mean = 300, sd=100), "C"=rnorm(400, mean = 200, sd=80), names=rep(factor(LETTERS[23:26]), 100))
df.test<-melt(df.fact, id.vars = "names", value.name = "Length2")
names(df.test)[names(df.test) =="variable"] <- "TSM.FACT"
#Create the plotlist##
myplots <- list()
#Loop for plots##
for(i in 1:(length(unique(df.test$names)))){
p1 <- eval(substitute(
ggplot(data=df.test[df.test$names == levels(df.test$names)[i],], aes(x=Length2, group=TSM.FACT, colour = TSM.FACT, fill=TSM.FACT)) +
geom_histogram(aes( y = stat(width*density)), position = "dodge", binwidth = 50, alpha =0.4, show.legend=T)+
ggtitle(paste0(levels(df.test$names)[i]))+
geom_density_line(stat="density", aes(y=(..count..)/sum(..count..)*50), alpha=0.3, size=0.5, show.legend=F) +
geom_vline(data=ddply(df.test[df.test$names == levels(df.test$names)[i],], ~ TSM.FACT, numcolwise(mean)), mapping=aes(xintercept = Length2, group=TSM.FACT, colour=TSM.FACT), linetype=2, size=1, show.legend=F) +
scale_y_continuous(labels = percent_format()) +
ylab("relative frequency") +
scale_color_manual(values= c("#00B2EE", "#1E90FF", "#104E8B")) +
scale_fill_manual(values= c("#00B2EE", "#1E90FF", "#104E8B")) +
theme_bw() + theme(
plot.title = element_text(lineheight=0.5, hjust= 0.5, size=10),
strip.text.y = element_text(hjust = 1, angle = 0),
strip.text.x = element_text(size=10, vjust = 0.9),
strip.text=element_text(margin = margin(t=0.3,r=1,b=0.3,l=1), size=8, debug = F, vjust=0.2),
strip.background = element_blank(),
axis.text.x = element_text(size=8, angle=0, vjust=0.2, margin = margin(t=0.3,r=0.1,b=0.3,l=0.1)),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
axis.line.x=element_line(colour="black"),
axis.line.y=element_line(colour="black"),
panel.grid.minor = element_blank(),
panel.border=element_blank(),
panel.background=element_blank(),
legend.position=(c(0.9,0.9)),
legend.title = element_blank(),
legend.key = element_blank()),
list(i = i)))
print(i)
print(p1)
myplots[[i]] <- p1
plot(p1)
}
#Join the plots
panelplot=grid.arrange(plotlist = myplots, grobs = myplots, shared.legend=T)
Unfortunately I am unable to reproduce your example. I can recommend adding a column that includes the sum of each bar (let's name it "Bar")
The required addition to the ggplot code then involves:
geom_text(aes(label = Bar), position = position_stack(vjust = 1)) +
The text height above the bar can be adjusted with vjust

Combine two plots in ggplot?

I need to create a ggplot that is a column plot overlayed with a line plot. The line plot shows mean values, while the column plot shows how the mean values relate to benchmark values. I've managed to create two separate plots in ggplot, but I'm having trouble combining them.
My line plot looks like this:
And is created using this code:
benchMarkLine <- ggplot(UEQScores, aes(x=Scale, y=Score, group=1)) +
geom_line(size = 1.4, colour = "black") +
geom_point(size = 2.4, colour = "black") +
scale_y_continuous(name = "Score", breaks = seq(0, 2.5, 0.25), limits = c(0, 2.5)) +
scale_x_discrete(name = "Scale") +
ggtitle("Mean Scores") +
theme_bw() + # Set black and white theme +
theme(plot.title = element_text(hjust = 0.5, size=10), # Centre plot title
panel.grid.major = element_blank(), # Turn off major gridlines
panel.grid.minor = element_blank(), # Turn off minor gridlines
axis.title.x = element_text(size=10),
axis.text.x = element_text(angle=30, vjust=0.6),
axis.title.y = element_text(size=10))
benchMarkLine
My Column plot looks like this:
And was created with the following code:
benchmarkColPlot <- ggplot(benchmark_long, aes(x=factor(Scale, scaleLevels), y=value, fill=factor(cat, bmLevels))) +
geom_col(position="fill") +
scale_fill_manual(values = bmColours) +
scale_y_continuous(name = "Score", breaks = seq(-1.0, 1.0, 0.25), limits = c(-1, 1)) +
scale_x_discrete(name = "Scale") +
ggtitle("Benchmark Scores") +
theme_bw() + # Set black and white theme +
theme(plot.title = element_text(hjust = 0.5, size=10), # Centre plot title
panel.grid.major = element_blank(), # Turn off major gridlines
panel.grid.minor = element_blank(), # Turn off minor gridlines
axis.title.x = element_text(size=10),
axis.text.x = element_text(angle=30, vjust=0.6),
axis.title.y = element_text(size=10),
legend.title = element_blank())
benchmarkColPlot
How can I combine these two? I tried inserting geom_line(UEQScores, aes(x=Scale, y=Score, group=1)) + above geom_col(position="fill") + in the column plot code, but I just get the following error:
Error: `mapping` must be created by `aes()`
How do I combine these two plots?
OK, I've given up on this - I just created the chart in Excel as it seems to be a bit easier for what I'm doing here.

Overlay different datasets in same facetted plot in ggplot2

I need to gather two facet columns into one column with ggplot2.
In the following example, I need to overlay the content of the two columns DEG and RAN into one, while giving different colours to DEG and RAN data (small points and smooth line) and provide the corresponding legend (so I can distinguish them as they are overlayed).
I feel my code is not too, too far from what I need, but the relative complexity of the dataset blocks me. How to go about achieving this in ggplot2?
Here's my code so far:
require(reshape2)
library(ggplot2)
library(RColorBrewer)
fileName = paste("./4.csv", sep = "") # csv file available here: https://www.dropbox.com/s/bm9hd0t5ak74k89/4.csv?dl=0
mydata = read.csv(fileName,sep=",", header=TRUE)
dataM = melt(mydata,c("id"))
dataM = cbind(dataM,colsplit(dataM$variable,pattern = "_",names = c("NM", "ORD", "CAT")))
dataM$variable <- NULL
dataM <- dcast(dataM, ... ~ CAT, value.var = "value")
my_palette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
ggplot(dataM, aes(x=NR ,y= ASPL)) +
geom_point(size = .4,alpha = .5) +
stat_smooth(se = FALSE, size = .5) +
theme_bw() +
theme(plot.background = element_blank(),
axis.line = element_blank(),
legend.key = element_blank(),
legend.title = element_blank()) +
scale_y_continuous("ASPL", expand=c(0,0), limits = c(1, 7)) +
scale_x_continuous("NR", expand=c(0,0), limits = c(0, 100)) +
theme(legend.position="bottom") +
theme(axis.title.x = element_text(vjust=-0.3, face="bold", size=12)) +
theme(axis.title.y = element_text(vjust=1.5, face="bold", size=12)) +
ggtitle("Title") + theme(plot.title = element_text(lineheight=.8, face="bold")) +
theme(title = element_text(vjust=2)) +
facet_grid(NM ~ ORD)
Here's what it gives me right now:
Extra question: how come DEG/SF doesn't show a smooth line?
You can use the group aesthetic to define that data points with the same value of ORD belong together. You can also map aesthetics shape and color to this variable. You can also use . to specify that the facets are not split along a specific dimension.
I have made the changes to your code below after transforming NR and ASPL to numeric variables:
dataM$NR <- as.integer(dataM$NR)
dataM$ASPL <- as.numeric(dataM$ASPL)
ggplot(dataM, aes(x=NR ,y= ASPL, group=ORD, color=ORD)) +
geom_point(size = .7,alpha = .5, aes(shape=ORD)) + ## increased size
stat_smooth(se = FALSE, size = .5) +
theme_bw() +
theme(plot.background = element_blank(),
axis.line = element_blank(),
legend.key = element_blank(),
legend.title = element_blank()) +
scale_y_continuous("ASPL", expand=c(0,0), limits = c(1, 7)) +
scale_x_continuous("NR", expand=c(0,0), limits = c(0, 100)) +
theme(legend.position="bottom") +
theme(axis.title.x = element_text(vjust=-0.3, face="bold", size=12)) +
theme(axis.title.y = element_text(vjust=1.5, face="bold", size=12)) +
ggtitle("Title") + theme(plot.title = element_text(lineheight=.8, face="bold")) +
theme(title = element_text(vjust=2)) +
facet_grid(NM ~.)

Width of error bars in line plot using ggplot2

I have some data with standard errors associated and would like to display these with error bars. That's what I have:
# generate some data
hod <- data.frame(h = c(1:24,1:24,1:24), mean = 1:(24*3) + runif(24*3, 0, 5),ci = runif(24*3, 0, 2), t = c(rep("a",24),rep("b",24),rep("c",24)))
pd <- position_dodge(0.3)
dayplot <- ggplot(hod, aes(x=h, y=mean, colour=as.factor(t),group=as.factor(t))) +
geom_line(position=pd, size=1) +
geom_errorbar(aes(ymin=mean-ci, ymax=mean+ci),
width=1,
size=0.5,
position=pd) +
geom_point(position=pd, shape=21, size=1, fill="white") +
scale_x_continuous(limits=c(-0.5,23.5),
breaks=c(0:8*3),
labels=ifelse(
c(0:8*3) < 10,
paste('0',c(0:8*3),':00',sep=''),
paste(c(0:8*3),':00',sep='')
)
) +
xlab("Hour of day") + ylab(ylabel) + labs(title = varlabels[var]) +
theme_minimal() +
theme(plot.margin = unit(c(1,0,1,1), "cm"),
axis.title.x = element_text(vjust=-1),
axis.title.y = element_text(angle=90, vjust=0),
legend.margin = unit(c(0), "cm"),
legend.key.height = unit(c(0.9), "cm"),
panel.grid.major = element_line(colour=rgb(0.87,0.87,0.87)),
panel.grid.minor = element_blank(),
plot.background = element_rect(fill = rgb(0.97,0.97,0.97), linetype=0)
)
The only thing of interest is probably:
geom_errorbar(aes(ymin=mean-ci, ymax=mean+ci),
width=1,
size=0.5,
position=pd)
It gives:
Now when I group the data by a factor variable (as.factor(t)), I get several lines instead of one, which is what I want, BUT, as you can see, the horizontal lines at the error bars are more narrow, and I can't figure out why. I tried changing and even taking away the width and size attribute of geom_errorbar, but nothing happens. Is there a way to have the same width of the horizontal lines for every chart, no matter the data? I mean, why should it vary? Or does that width convey some information?
Below is a reproducible example using random data. The fix to the problem is to multiply the width by the number of classes/factors that you have. In the plot below, since I used three factors, using a width of 3 fixes the problem. ggplot2 seems to calculate the relative width by the number of data points in your dataset, rather than the numeric values on the x-axis. This is (IMO) a bug.
library(ggplot2)
library(grid)
#plot with factors
hod <- data.frame(h = c(1:24,1:24,1:24), mean = 1:(24*3) + runif(24*3, 0, 5),ci = runif(24*3, 0, 2), t = c(rep("a",24),rep("b",24),rep("c",24)))
pd <- position_dodge(0.3)
dayplot <- ggplot(hod, aes(x=h, y=mean, colour=as.factor(t),group=as.factor(t))) +
geom_line(position=pd, size=1) +
geom_errorbar(aes(ymin=mean-ci, ymax=mean+ci),
width=1,
size=0.5,
position=pd) +
geom_point(position=pd, shape=21, size=1, fill="white") +
scale_x_continuous(limits=c(-0.5,23.5),
breaks=c(0:8*3),
labels=ifelse(
c(0:8*3) < 10,
paste('0',c(0:8*3),':00',sep=''),
paste(c(0:8*3),':00',sep='')
)
) +
xlab("Hour of day") +
theme_minimal() +
theme(plot.margin = unit(c(1,0,1,1), "cm"),
axis.title.x = element_text(vjust=-1),
axis.title.y = element_text(angle=90, vjust=0),
legend.margin = unit(c(0), "cm"),
legend.key.height = unit(c(0.9), "cm"),
panel.grid.major = element_line(colour=rgb(0.87,0.87,0.87)),
panel.grid.minor = element_blank(),
plot.background = element_rect(fill = rgb(0.97,0.97,0.97), linetype=0)
)
print(dayplot)
#plot without factors
hod <- data.frame(h = c(1:24,1:24,1:24), mean = 1:(24) + runif(24, 0, 5),ci = runif(24, 0, 2))
pd <- position_dodge(0.3)
dayplot <- ggplot(hod, aes(x=h, y=mean)) +
geom_line(position=pd, size=1) +
geom_errorbar(aes(ymin=mean-ci, ymax=mean+ci),
width=1,
size=0.5,
position=pd) +
geom_point(position=pd, shape=21, size=1, fill="white") +
scale_x_continuous(limits=c(-0.5,23.5),
breaks=c(0:8*3),
labels=ifelse(
c(0:8*3) < 10,
paste('0',c(0:8*3),':00',sep=''),
paste(c(0:8*3),':00',sep='')
)
) +
xlab("Hour of day") +
theme_minimal() +
theme(plot.margin = unit(c(1,0,1,1), "cm"),
axis.title.x = element_text(vjust=-1),
axis.title.y = element_text(angle=90, vjust=0),
legend.margin = unit(c(0), "cm"),
legend.key.height = unit(c(0.9), "cm"),
panel.grid.major = element_line(colour=rgb(0.87,0.87,0.87)),
panel.grid.minor = element_blank(),
plot.background = element_rect(fill = rgb(0.97,0.97,0.97), linetype=0)
)
print(dayplot)
I have managed to solve a similar issue. In my case I wanted to set both horizontal and vertical errorbar heads to the same size - regardless of the aspect ratio of the plot.
Based on the original posted code:
f <- ggplot_build(dayplot)
f$plot$layers[[5]]$geom_params$width <- 0.02 * diff(f$layout$panel_params[[1]]$x.range)
f$plot$layers[[6]]$geom_params$height <- 0.02 * diff(f$layout$panel_params[[1]]$y.range)
dayplot <- f$plot
This will set the errorbar head to 2% of the axis range. Maybe could solve your issue.

Resources