Putting horizontal lines on grouped boxplots - r

I am trying to make a boxplot with this basic code:
design=c("Red","Green","Blue")
actions=c("1","2","3","4","5","6","7","8")
proportion=(seq(1:240)+sample(1:500, 240, replace=T))/2000
df=data.frame(design, actions , proportion)
ggplot(df, aes(x=actions, y=proportion, fill=design)) +
geom_boxplot()+
xlab(TeX("group"))+
ylab("Y value")+
ggtitle("Y values for each group stratified by color")
Producing something like this:
I want to add horizontal lines for "true" Y values that are different for each group.
Does anyone have any tips for doing this? I don't know how to extract the width of each group of boxes, otherwise I could use geom_segment.
Here is a MWE with a non-grouped boxplot:
dBox <- data.frame(y = rnorm(10),group="1")
dBox=rbind(dBox,data.frame(y=rnorm(10),group="2"))
dLines <- data.frame(X =c(-0.36, 0.015),
Y = c(0.4, -0.2),
Xend = c(0.-0.015, 0.36),
Yend=c(0.4, -0.2),
group = c("True", "True"),
color = c("black", "red"))
ggplot(dBox, aes(x=0, y=y,fill=group)) +
geom_boxplot(outlier.shape = 1)+
geom_segment(data = dLines, aes(x = X, xend = Xend, y = Y, yend = Yend),color="red",size=1.5,linetype=1) +
theme(legend.background = element_rect(fill = "white", size = 0.1, linetype = "solid", colour = "black"))
This produces something like this:
However, it's difficult to make the geom_segments line up with the boxes exactly, and to then extend this to the grouped boxplot setting.
Thanks!

This can be done using a workaround with facets:
lines = data.frame(actions = 1:8, proportion=abs(rnorm(8)))
design=c("Red","Green","Blue")
actions=c("1","2","3","4","5","6","7","8")
proportion=(seq(1:240)+sample(1:500, 240, replace=T))/2000
df=data.frame(design, actions , proportion)
lines = data.frame(actions = 1:8, proportion=abs(rnorm(8)))
p = ggplot(df, aes(x=actions, y=proportion, fill=design)) +
geom_boxplot()+
xlab("group")+
ylab("Y value")+
ggtitle("Y values for each group stratified by color") +
facet_grid(~actions, scale='free_x') +
theme(
panel.spacing.x = unit(0, "lines"),
strip.background = element_blank(),
strip.text.x = element_blank())
p + geom_hline(aes(yintercept = proportion), lines)
You could probably fiddle around with removing the spaces between the facets to make it look more like what you intended.
Thanks to #eugene100hickey for pointing out how to remove spacing between facets.

theme(panel.spacing.x) can remove those pesky lines:
p + geom_hline(aes(yintercept = proportion), lines) +
theme(panel.spacing.x = unit(0, "lines"))

Related

ggplot: Add annotations using separate data above faceted chart

I'm trying to add set of markers with text above the top of a faceted chart to indicate certain points of interest in the value of x. Its important that they appear in the right position left to right (as per the main scale), including when the overall ggplot changes size.
Something like this...
However, I'm struggling to:
place it in the right vertical position (above the facets). In my
reprex below (a simplified version of the original), I tried using a
value of the factor (Merc450 SLC), but this causes issues such as adding that to
every facet including when it is not part of that facet and doesn't
actually go high enough. I also tried converting the factor to a number using as.integer, but this causes every facet to include all factor values, when they obviously shouldn't
apply to the chart as a whole, not each
facet
Note that in the full solution, the marker x values are independent of the main data.
I have tried using cowplot to draw it separately and overlay it, but that seems to:
affect the overall scale of the main plot, with the facet titles on the right being cropped
is not reliable in placing the markers at the exact location along the x scale
Any pointers welcome.
library(tidyverse)
mtcars2 <- rownames_to_column(mtcars, var = "car") %>%
mutate(make = stringr::word(car, 1)) %>%
filter(make >= "m" & make < "n")
markers <- data.frame(x = c(max(mtcars2$mpg), rep(runif(nrow(mtcars2), 1, max(mtcars2$mpg))), max(mtcars2$mpg))) %>%
mutate(name = paste0("marker # ", round(x)))
ggplot(mtcars2, aes()) +
# Main Plot
geom_tile(aes(x = mpg, y = car, fill = cyl), color = "white") +
# Add Markers
geom_point(data = markers, aes(x = x, y = "Merc450 SLC"), color = "red") +
# Marker Labels
geom_text(data = markers, aes(x = x, "Merc450 SLC",label = name), angle = 45, size = 2.5, hjust=0, nudge_x = -0.02, nudge_y = 0.15) +
facet_grid(make ~ ., scales = "free", space = "free") +
theme_minimal() +
theme(
# Facets
strip.background = element_rect(fill="Gray90", color = "white"),
panel.background = element_rect(fill="Gray95", color = "white"),
panel.spacing.y = unit(.7, "lines"),
plot.margin = margin(50, 20, 20, 20)
)
Perhaps draw two separate plots and assemble them together with patchwork:
library(patchwork)
p1 <- ggplot(markers, aes(x = x, y = 0)) +
geom_point(color = 'red') +
geom_text(aes(label = name),
angle = 45, size = 2.5, hjust=0, nudge_x = -0.02, nudge_y = 0.02) +
scale_y_continuous(limits = c(-0.01, 0.15), expand = c(0, 0)) +
theme_minimal() +
theme(axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank())
p2 <- ggplot(mtcars2, aes(x = mpg, y = car, fill = cyl)) +
geom_tile(color = "white") +
facet_grid(make ~ ., scales = "free", space = "free") +
theme_minimal() +
theme(
strip.background = element_rect(fill="Gray90", color = "white"),
panel.background = element_rect(fill="Gray95", color = "white"),
panel.spacing.y = unit(.7, "lines")
)
p1/p2 + plot_layout(heights = c(1, 9))
It required some workaround with plot on different plot and using cowplot alignment function to align them on the same axis. Here is a solution
library(tidyverse)
library(cowplot)
# define a common x_axis to ensure that the plot are on same scales
# This may not needed as cowplot algin_plots also adjust the scale however
# I tended to do this extra step to ensure.
x_axis_common <- c(min(mtcars2$mpg, markers$x) * .8,
max(mtcars2$mpg, markers$x) * 1.1)
# Plot contain only marker
plot_marker <- ggplot() +
geom_point(data = markers, aes(x = x, y = 0), color = "red") +
# Marker Labels
geom_text(data = markers, aes(x = x, y = 0,label = name),
angle = 45, size = 2.5, hjust=0, nudge_x = 0, nudge_y = 0.001) +
# using coord_cartesian to set the zone of plot for some scales
coord_cartesian(xlim = x_axis_common,
ylim = c(-0.005, 0.03), expand = FALSE) +
# using theme_nothing from cow_plot which remove all element
# except the drawing
theme_nothing()
# main plot with facet
main_plot <- ggplot(mtcars2, aes()) +
# Main Plot
geom_tile(aes(x = mpg, y = car, fill = cyl), color = "white") +
coord_cartesian(xlim = x_axis_common, expand = FALSE) +
# Add Markers
facet_grid(make ~ ., scales = "free_y", space = "free") +
theme_minimal() +
theme(
# Facets
strip.background = element_rect(fill="Gray90", color = "white"),
panel.background = element_rect(fill="Gray95", color = "white"),
panel.spacing.y = unit(.7, "lines"),
plot.margin = margin(0, 20, 20, 20)
)
Then align the plot and plot them using cow_plot
# align the plots together
temp <- align_plots(plot_marker, main_plot, axis = "rl",
align = "hv")
# plot them with plot_grid also from cowplot - using rel_heights for some
# adjustment
plot_grid(temp[[1]], temp[[2]], ncol = 1, rel_heights = c(1, 8))
Created on 2021-05-03 by the reprex package (v2.0.0)

Line plot legend does not appear

I am trying to plot a line plot in r by using ggplot. Unfortunately, the legend does not show up. Can anyone help me?
My code looks like the following:
dfdatavgsM=data.frame(datum, avgsätzegespMT, avgsätzegespML)
ggplot(data = dfdatavgsM, aes(x=datum, color=Wettbewerbsart))
+ geom_line(data=dfdatavgsM, aes(y = avgsätzegespML),color="red")
+ geom_line(data=dfdatavgsM, aes(y = avgsätzegespMT), color="blue")
+ geom_vline(xintercept=2011, size = 0.6)
+ scale_y_continuous(name="Anzahl an Sätzen")
+ scale_x_datetime(name = "Saison" ,date_breaks = ("2 year"),date_labels = "%Y")
+ ggtitle("Wettbewerbsintensität in Spielen mit |∆TTR| ≤ 118") + theme(panel.background = element_rect(fill = "white", colour = "black"))
+ theme(panel.grid.major = element_line(size = 0.25, linetype = 'solid', colour = "light grey")) + theme(axis.ticks = element_line(size = 1))
The legend will only appear if you use color inside an aes statement. You will need to reshape your data to 'long' format (e.g. with tidyr::gather), and have a single geom_line term and an aes including color=type. Something like this (not tested as I don't have your data)...
library(tidyverse)
dfdatavgsM=data.frame(datum, avgsätzegespMT, avgsätzegespML)
dfdatavgsMlong <- dfdatavgsM %>% gather(key = type, value = value, -datum)
ggplot(data = dfdatavgsMlong, aes(x=datum, y=value, color=type)) +
geom_line()

Manually change order of y axis items on complicated stacked bar chart in ggplot2

I've been stuck on an issue and can't find a solution. I've tried many suggestions on Stack Overflow and elsewhere about manually ordering a stacked bar chart, since that should be a pretty simple fix, but those suggestions don't work with the huge complicated mess of code I plucked from many places. My only issue is y-axis item ordering.
I'm making a series of stacked bar charts, and ggplot2 changes the ordering of the items on the y-axis depending on which dataframe I am trying to plot. I'm trying to make 39 of these plots and want them to all have the same ordering. I think ggplot2 only wants to plot them in ascending order of their numeric mean or something, but I'd like all of the bar charts to first display the group "Bird Advocates" and then "Cat Advocates." (This is also the order they appear in my data frame, but that ordering is lost at the coord_flip() point in plotting.)
I think that taking the data frame through so many changes is why I can't just add something simple at the end or use the reorder() function. Adding things into aes() also doesn't work, since the stacked bar chart I'm creating seems to depend on those items being exactly a certain way.
Here's one of my data frames where ggplot2 is ordering my y-axis items incorrectly, plotting "Cat Advocates" before "Bird Advocates":
Group,Strongly Opposed,Opposed,Slightly Opposed,Neutral,Slightly Support,Support,Strongly Support
Bird Advocates,0.005473026,0.010946052,0.012509773,0.058639562,0.071149335,0.31118061,0.530101642
Cat Advocates,0.04491726,0.07013396,0.03624901,0.23719464,0.09141056,0.23404255,0.28605201
And here's all the code that takes that and turns it into a plot:
library(ggplot2)
library(reshape2)
library(plotly)
#Importing data from a .csv file
data <- read.csv("data.csv", header=TRUE)
data$s.Strongly.Opposed <- 0-data$Strongly.Opposed-data$Opposed-data$Slightly.Opposed-.5*data$Neutral
data$s.Opposed <- 0-data$Opposed-data$Slightly.Opposed-.5*data$Neutral
data$s.Slightly.Opposed <- 0-data$Slightly.Opposed-.5*data$Neutral
data$s.Neutral <- 0-.5*data$Neutral
data$s.Slightly.Support <- 0+.5*data$Neutral
data$s.Support <- 0+data$Slightly.Support+.5*data$Neutral
data$s.Strongly.Support <- 0+data$Support+data$Slightly.Support+.5*data$Neutral
#to percents
data[,2:15]<-data[,2:15]*100
#melting
mdfr <- melt(data, id=c("Group"))
mdfr<-cbind(mdfr[1:14,],mdfr[15:28,3])
colnames(mdfr)<-c("Group","variable","value","start")
#remove dot in level names
mylevels<-c("Strongly Opposed","Opposed","Slightly Opposed","Neutral","Slightly Support","Support","Strongly Support")
mdfr$variable<-droplevels(mdfr$variable)
levels(mdfr$variable)<-mylevels
pal<-c("#bd7523", "#e9aa61", "#f6d1a7", "#999999", "#c8cbc0", "#65806d", "#334e3b")
ggplot(data=mdfr) +
geom_segment(aes(x = Group, y = start, xend = Group, yend = start+value, colour = variable,
text=paste("Group: ",Group,"<br>Percent: ",value,"%")), size = 5) +
geom_hline(yintercept = 0, color =c("#646464")) +
coord_flip() +
theme(legend.position="top") +
theme(legend.key.width=unit(0.5,"cm")) +
guides(col = guide_legend(ncol = 12)) + #has 7 real columns, using to adjust legend position
scale_color_manual("Response", labels = mylevels, values = pal, guide="legend") +
theme(legend.title = element_blank()) +
theme(axis.title.x = element_blank()) +
theme(axis.title.y = element_blank()) +
theme(axis.ticks = element_blank()) +
theme(axis.text.x = element_blank()) +
theme(legend.key = element_rect(fill = "white")) +
scale_y_continuous(breaks=seq(-100,100,100), limits=c(-100,100)) +
theme(panel.background = element_rect(fill = "#ffffff"),
panel.grid.major = element_line(colour = "#CBCBCB"))
The plot:
I think this works, you may need to play around with the axis limits/breaks:
library(dplyr)
mdfr <- mdfr %>%
mutate(group_n = as.integer(case_when(Group == "Bird Advocates" ~ 2,
Group == "Cat Advocates" ~ 1)))
ggplot(data=mdfr) +
geom_segment(aes(x = group_n, y = start, xend = group_n, yend = start + value, colour = variable,
text=paste("Group: ",Group,"<br>Percent: ",value,"%")), size = 5) +
scale_x_continuous(limits = c(0,3), breaks = c(1, 2), labels = c("Cat", "Bird")) +
geom_hline(yintercept = 0, color =c("#646464")) +
theme(legend.position="top") +
theme(legend.key.width=unit(0.5,"cm")) +
coord_flip() +
guides(col = guide_legend(ncol = 12)) + #has 7 real columns, using to adjust legend position
scale_color_manual("Response", labels = mylevels, values = pal, guide="legend") +
theme(legend.title = element_blank()) +
theme(axis.title.x = element_blank()) +
theme(axis.title.y = element_blank()) +
theme(axis.ticks = element_blank()) +
theme(axis.text.x = element_blank()) +
theme(legend.key = element_rect(fill = "white"))+
scale_y_continuous(breaks=seq(-100,100,100), limits=c(-100,100)) +
theme(panel.background = element_rect(fill = "#ffffff"),
panel.grid.major = element_line(colour = "#CBCBCB"))
produces this plot:
You want to factor the 'Group' variable in the order by which you want the bars to appear.
mdfr$Group <- factor(mdfr$Group, levels = c("Bird Advocates", "Cat Advocates")

Create legend with manual shapes and colours

I use bars and line to create my plot. The demo code is:
timestamp <- seq(as.Date('2010-01-01'),as.Date('2011-12-01'),by="1 mon")
data1 <- rnorm(length(timestamp), 3000, 30)
data2 <- rnorm(length(timestamp), 30, 3)
df <- data.frame(timestamp, data1, data2)
p <- ggplot()
p <- p + geom_histogram(data=df,aes(timestamp,data1),colour="black",stat="Identity",bindwidth=10)
p <- p + geom_line(data=df,aes(timestamp,y=data2*150),colour="red")
p <- p + scale_y_continuous(sec.axis = sec_axis(~./150, name = "data2"))
p <- p + scale_colour_manual(name="Parameter", labels=c("data1", "data2"), values = c('black', 'red'))
p <- p+ scale_shape_manual(name="Parameter", labels=c("data1", "data2"), values = c(15,95))
p
This results in a plot like this:
This figure does not have a legend. I followed this answer to create a customized legend but it is not working in my case. I want a square and line shape in my legend corresponding to bars and line. How can we get it?
I want legend as shown in below image:
For the type of data you want to display, geom_bar is a better fit then geom_histogram. When you to manipulate the appaerance of the legend(s), you need to place the colour = ... parts inside the aes. To get the desired result it probably best to use different types of legend for the line and the bars. In that way you are better able to change the appearance of the legends with guide_legend and override.aes.
A proposal for your problem:
ggplot(data = df) +
geom_bar(aes(x = timestamp, y = data1, colour = "black"),
stat = "Identity", fill = NA) +
geom_line(aes(x = timestamp, y = data2*150, linetype = "red"), colour = "red", size = 1) +
scale_y_continuous(sec.axis = sec_axis(~./150, name = "data2")) +
scale_linetype_manual(labels = "data2", values = "solid") +
scale_colour_manual(name = "Parameter\n", labels = "data1", values = "black") +
guides(colour = guide_legend(override.aes = list(colour = "black", size = 1),
order = 1),
linetype = guide_legend(title = NULL,
override.aes = list(linetype = "solid",
colour = "red",
size = 1),
order = 2)) +
theme_minimal() +
theme(legend.key = element_rect(fill = "white", colour = NA),
legend.spacing = unit(0, "lines"))
which gives:

Changing line color when I have a geom_errorbar with ggplot

I have the following code:
library(ggplot2)
library(gridExtra)
data = data.frame(fit = c(9.8,15.4,17.6,21.6,10.8), lower = c(7.15,12.75,14.95,18.95,8.15), upper = c(12.44,18.04,20.24,24.24,13.44), factors = c(15,20,25,30,35), var = rep("Fator", 5))
gp <- ggplot(data, aes(x=factors, y=fit, ymax=upper, ymin=lower))
gp <- gp + geom_line(aes(group=var),size=1.2) +
geom_errorbar(width=.8, size=1, aes(colour='red')) +
geom_point(size=4, shape=21, fill="grey") +
labs(x = paste("\n",data$var[1],sep=""), y =paste("Values","\n",sep="")) +
theme(legend.position = 'none', axis.text = element_text(size = 11), plot.margin=unit(c(0.4,0.4,0.4,0.4), "cm"), axis.text.x = element_text(angle=45, hjust = 1, vjust = 1)) +
ylim((min(data$lower)), (max(data$upper)))
I want to change the line color after I have the ggplot object. I'm trying:
gp + scale_color_manual(values = "green")
but it change the error bar color and not the line color.
1)What should I do to change the line color?
2)How can I change the points color?
Thanks!
Try this:
gp$layers[[1]] <- NULL
gp + geom_line(aes(group = var),color = "green",size = 1.2)
A similar technique should work for the points layer. Technique was dredged up from my memories of a similar question.
I just looked at the contents of gp$layers manually to see which was which. I presume that the order will be the order in which they appear in your code, but I wouldn't necessarily rely on that.

Resources