How to customize a boxplot legend indicating mean, outliers, median, etc? - r

I have a boxplot and by my supervisor's advice I have to indicate the mean, outliers and median in the legend, like this image:
How can I do this using ggplot2?
library(ggplot2)
A <- 1:20
DF <- data.frame(A)
ggplot(data = DF) +
geom_boxplot(aes(x = "", y = A))

There is no straightforward way. But you could make a custom legend using another plot:
p <- ggplot(mtcars) +
geom_boxplot(aes(x = factor(cyl), y = mpg))
d1 <- data.frame(x = 1, y = c(1:1000, 1502))
d2 <- data.frame(
y = c(boxplot.stats(d1$y)$stats, 1502),
x = 1,
label = c('min', '1st quartile', 'median', '3rd quartile', 'max', 'outlier')
)
leg <- ggplot(d1, aes(x, y)) + geom_boxplot(width = 0.2) +
geom_text(aes(x = 1.15, label = label), d2, hjust = 0) +
xlim(0.9, 1.5) +
theme_void() + theme(panel.background = element_rect(fill = 'white', color = 1))
p + annotation_custom(ggplotGrob(leg), xmin = 3, xmax = 3.5, ymin = 25, ymax = 35)

Related

ggplot geom_rect color gradient (without reference to data)?

I was wondering if it is possible to have a geom_rect with a color gradient without a data reference, i.e. outside of aes().
I would like the two rectangles in the bottom of the following plot to show a color gradient from red to white (left to right) and the top one to show a color gradient from yellow to white.
Is this possible in a simple way or do I have to create data to refer to?
ggplot() +
geom_rect(aes(xmin = c(1, 3), xmax = c(2.5, 4), ymin = c(1, 1), ymax = c(2, 2)), color = "black", fill = "red") +
geom_rect(aes(xmin = 1, xmax = 3.5, ymin = 3, ymax = 4), color = "black", fill = "yellow") +
theme_bw() +
theme(panel.grid = element_blank())
I tried to use scale_fill_gradient with geom_tile but this doesn't really do what I want: 1. the two supposed-to-be-red rectangles share a gradient and don't start with pure red each and 2. I can't manage to use two different scale_fill_gradient's in one plot.
foo <- tibble(x = seq(from = 1, to = 2.5, by = 0.001),
y = rep(1, 1501))
bar <- tibble(x = seq(from = 3, to = 4, by = 0.001),
y = rep(1, 1001))
foobar <- tibble(x = seq(from = 1, to = 3.5, by = 0.001),
y = rep(3, 2501))
ggplot() +
geom_tile(data = foo, aes(x = x, y = y, fill = x)) +
geom_tile(data = bar, aes(x = x, y = y, fill = x)) +
scale_fill_gradient(low = 'red', high = 'white') +
geom_tile(data = foobar, aes(x = x, y = y, fill = x)) +
scale_fill_gradient(low = 'yellow', high = 'white') +
theme_bw() +
theme(panel.grid = element_blank())
You could use the function new_scale_fill from ggnewscale between your two different scale_fill_gradient functions in your plot process. This will reset your aesthetics to make it possible to use another gradient like this:
library(tibble)
foo <- tibble(x = seq(from = 1, to = 2.5, by = 0.001),
y = rep(1, 1501))
bar <- tibble(x = seq(from = 3, to = 4, by = 0.001),
y = rep(1, 1001))
foobar <- tibble(x = seq(from = 1, to = 3.5, by = 0.001),
y = rep(3, 2501))
library(ggplot2)
library(ggnewscale)
ggplot() +
geom_tile(data = foo, aes(x = x, y = y, fill = x)) +
geom_tile(data = bar, aes(x = x, y = y, fill = x)) +
scale_fill_gradient(low = 'red', high = 'white') +
new_scale_fill() +
geom_tile(data = foobar, aes(x = x, y = y, fill = x)) +
scale_fill_gradient(low = 'yellow', high = 'white') +
theme_bw() +
theme(panel.grid = element_blank())
Created on 2022-09-23 with reprex v2.0.2
To add for each geom_tile a gradient color, you could use for each tile new_scale_fill like this:
library(ggplot2)
library(ggnewscale)
ggplot() +
geom_tile(data = foo, aes(x = x, y = y, fill = x)) +
scale_fill_gradient(low = 'red', high = 'white', guide = 'none') +
new_scale_fill() +
geom_tile(data = bar, aes(x = x, y = y, fill = x)) +
scale_fill_gradient(low = 'red', high = 'white') +
new_scale_fill() +
geom_tile(data = foobar, aes(x = x, y = y, fill = x)) +
scale_fill_gradient(low = 'yellow', high = 'white') +
theme_bw() +
theme(panel.grid = element_blank())
Created on 2022-09-23 with reprex v2.0.2

Creating a facet theme/design plot in ggplot2 without using facet_

Is there any possibility to create a facet_wrap looking plot in ggplot2 without using facet_wrap() The reason I would like to achieve this is to match some other design. In the plot without_facet below, can I somehow add "Setosa" in the top, so it looks like the with_facet plot, without using facet_wrap.
library(ggplot2)
df <- iris[iris$Species == 'setosa', ]
with_facet <- ggplot(df, aes(x = Sepal.Length, y = Sepal.Width)) + geom_point() +facet_wrap(~Species)
with_facet
without_facet <- ggplot(df, aes(x = Sepal.Length, y = Sepal.Width)) + geom_point()
You can try
ggplot(df, aes(x = Sepal.Length, y = Sepal.Width)) +
geom_point() +
ggtitle("setosa") +
theme(plot.title = element_text(hjust = 0.5))
A more "hackish"-one could be this hardcoded approach:
ggplot(df, aes(x = Sepal.Length, y = Sepal.Width)) +
geom_point() +
ggtitle("setosa") +
geom_rect(xmin = 4.225, xmax = 5.875 , ymin=4.5, ymax=4.6, fill ="lightgrey") +
coord_cartesian(clip = 'off', expand = 0.05) +
theme(plot.title = element_text(hjust = 0.5, size = 12),
plot.margin = margin(t = 30, r = 20, b = 20, l = 20, unit = "pt"))
From Paleolimbot answer on Gitbub (https://github.com/tidyverse/ggplot2/issues/2344)
element_textbox <- function(...) {
el <- element_text(...)
class(el) <- c("element_textbox", class(el))
el
}
element_grob.element_textbox <- function(element, ...) {
text_grob <- NextMethod()
rect_grob <- element_grob(calc_element("strip.background", theme_bw()))
ggplot2:::absoluteGrob(
grid::gList(
element_grob(calc_element("strip.background", theme_bw())),
text_grob
),
height = grid::grobHeight(text_grob),
width = grid::unit(1, "npc")
)
}
From my original question, I added theme_bw()
library(ggplot2)
library(gridExtra)
df <- iris[iris$Species == 'setosa', ]
with_facet <- ggplot(df, aes(x = Sepal.Length, y = Sepal.Width)) + geom_point() +
facet_wrap(~Species) +
theme(plot.background = element_rect(color = 'black')) + theme_bw()
without_facet <- ggplot(df, aes(x = Sepal.Length, y = Sepal.Width)) +
geom_point() +
ggtitle("setosa") +
theme_bw() +
theme(
plot.title = element_textbox(
hjust = 0.5, margin = margin(t = 5, b = 5), size = 10
),
)
grid.arrange(with_facet, without_facet)
Not identical, but works for my purpose.
This might be one option:
library(ggplot2)
df <- iris[iris$Species == 'setosa', ]
# with annotate:
with_annotate <-
ggplot(df, aes(x = Sepal.Length, y = Sepal.Width)) +
geom_point() +
annotate('text', x = 5, y = 4.7, label = "setosa", size = 12)
with_annotate
#or if you do not want the heading to print over the plot area
with_coord_cart <-
ggplot(df, aes(x = Sepal.Length, y = Sepal.Width)) +
geom_point() +
annotate('text', x = 5, y = 4.7, label = "setosa", size = 8)+
coord_cartesian(ylim = c(2, 4.5), clip = 'off') +
theme(plot.margin = margin(2, 1, 1, 1, "lines"))
with_coord_cart
Which gives you:
Note: I had deleted it because it seemed no longer relevant after the original question was updated.
I am not sure I understand correctly. In case you want to arrange different plots together:
library(gridExtra)
grid.arrange(without_facet,
without_facet,
without_facet,
without_facet, nrow = 2)

Plot coloured boxes around axis label

Consider this simple example
library(tidyverse)
tibble(x = as.factor(c('good', 'neutral', 'bad')),
y = as.factor(c('bad', 'neutral', 'bad'))) %>%
ggplot(aes(x = x, y = y)) + geom_point()
I would like to put the x labels (good, neutral, bad) in different colored boxes. For instance, good (on both the x and y axis) would be surrounded on a small green box, and so on.
Can I do that in ggplot2?
Like this?
tibble(x = as.factor(c('good', 'neutral', 'bad')),
y = as.factor(c('bad', 'neutral', 'bad'))) %>%
ggplot(aes(x = x, y = y)) +
geom_point() +
theme(axis.text.x = element_text(color = c('red', 'blue', 'green')))
Your Plot:
EDIT
An alternate pretty Ghetto solution using grid
tibble(x = as.factor(c('good', 'neutral', 'bad')),
y = as.factor(c('bad', 'neutral', 'bad'))) %>%
ggplot(aes(x = x, y = y)) +
geom_point()
grid::grid.polygon(x = c(.3,.3,.25,.25), y = c(.07,.04,.04,.07),gp = gpar(col = 'green', fill = 'green', alpha = .5))
grid::grid.polygon(x = c(.525,.525,.575,.575), y = c(.07,.04,.04,.07),gp = gpar(col = 'red', fill = 'red', alpha = .5))
grid::grid.polygon(x = c(.79,.79,.86,.86), y = c(.07,.04,.04,.07),gp = gpar(col = 'blue', fill = 'blue', alpha = .5))
Solution using geom_label outside the plot area:
ggplot(data, aes(x, y)) +
geom_point() +
geom_label(aes(0.3, y, label = y, fill = y), hjust = 0) +
geom_label(aes(x, 0.45, label = x, fill = x)) +
theme_minimal() +
theme(
axis.text = element_blank(),
axis.ticks = element_blank(),
legend.position = "none"
) +
coord_cartesian(xlim = c(1, 3), ylim = c(1, 2), clip = "off")
Another solution
You should create geom_rect with borders, but without fill and plot them outside the plot area (using coord_cartesian):
library(tidyverse)
data <- tibble(
x = as.factor(c('good', 'neutral', 'bad')),
y = as.factor(c('bad', 'neutral', 'bad'))
)
ggplot(data, aes(x, y)) +
geom_point() +
# put rects on y-axis
geom_rect(aes(xmin = 0.1, xmax = 0.45, color = y,
ymin = as.numeric(y) - 0.1, ymax = as.numeric(y) + 0.1),
fill = NA, size = 3) +
# put rects on x-axis
geom_rect(aes(ymin = 0.3, ymax = 0.4, color = x,
xmin = as.numeric(x) - 0.15, xmax = as.numeric(x) + 0.15),
fill = NA, size = 3) +
# Here it's important to specify that your axis goes from 1 to max number of levels
coord_cartesian(xlim = c(1, 3), ylim = c(1, 2), clip = "off")
Another approach
Create a vector of colors and pass them into axis.text.x() option of theme().
# data
x = as.factor(c('good', 'neutral', 'bad'))
y = as.factor(c('bad', 'neutral', 'bad'))
df<- data.frame(x,y)
# create a vector of colors
mycolors<- c("red","blue","green")
library(ggplot2)
ggplot(df, aes(x = x, y=y))+
geom_point()+
theme(axis.text.x = element_text(colour = mycolors))
One approach could be this:
tibble(x = as.factor(c('good', 'neutral', 'bad')),
y = as.factor(c('bad', 'neutral', 'bad'))) %>%
ggplot(aes(x = x, y = y)) + geom_point()+
geom_rect(aes(xmin=0.5, xmax=1.5, ymin=-Inf, ymax=Inf), fill="red", alpha=0.1)+
geom_rect(aes(xmin=1.5, xmax=2.5, ymin=-Inf, ymax=Inf), fill="yellow", alpha=0.1)+
geom_rect(aes(xmin=2.5, xmax=3.5, ymin=-Inf, ymax=Inf), fill="green", alpha=0.1)
With geom_rect() you can add colored backgrounds:

Create a RDA biplot using extracted RDA results and merge to graphs

Currently trying to use extracted data to create two seperate RDA biplots. Using the code that follows:
p <- ggplot()
p + geom_vline(x=0,colour="grey50") +
geom_hline(y=0,colour="grey50") +
geom_text(data = PHYTOPLANKTON_coordinates_scaling_2, aes(x = RDA1, y = RDA2,
label=rownames(PHYTOPLANKTON_coordinates_scaling_2)), angle=45, size=3,
colour = 'blue') +
geom_segment(data = WQ_coordinates_scaling_2, aes(x = 0, y = 0,
xend = RDA1, yend = RDA2), size = 0.5, colour = 'red') +
geom_text(WQ_coordinates_scaling_2, aes(x = RDA1, y = RDA2,
label = rownames(WQ_coordinates_scaling_2)), size = 5, angle = 45,
vjust = 1, colour = 'violet') +
theme_bw()
This results in:
Error: unexpected '=' in: " + geom_text(data =
PHYTOPLANKTON_coordinates_scaling_2, aes(x = RDA1, y = RDA2,
+ label="
+ colour = 'blue') Error: unexpected ')' in " + colour =
'blue')"
set.seed(123)
# Generate toy data
n <- 20
PHYTOPLANKTON_coordinates_scaling_2 <-
data.frame(RDA1 = rnorm(n), RDA2 = rnorm(n))
rownames(PHYTOPLANKTON_coordinates_scaling_2) <- LETTERS[1:n]
k <- 4
WQ_coordinates_scaling_2 <-
data.frame(RDA1 = rnorm(k), RDA2 = rnorm(k))
rownames(WQ_coordinates_scaling_2) <- paste0("V",1:k)
# Plot data
library(ggplot2)
p <- ggplot()
p + geom_vline(xintercept=0,colour="grey50") +
geom_hline(yintercept=0,colour="grey50") +
geom_text(data=PHYTOPLANKTON_coordinates_scaling_2, aes(x=RDA1, y=RDA2,
label=rownames(PHYTOPLANKTON_coordinates_scaling_2)), angle=45, size=3,
colour = 'blue') +
geom_segment(data=WQ_coordinates_scaling_2, aes(x = 0, y = 0,
xend = RDA1, yend = RDA2), size = 0.5, colour = 'red') +
geom_text(data=WQ_coordinates_scaling_2, aes(x=RDA1, y=RDA2,
label = rownames(WQ_coordinates_scaling_2)), size = 5, angle = 45,
vjust = 1, colour = 'violet') +
theme_bw()

Showing median value in grouped boxplot in R

I have created boxplots using ggplot2 with this code.
plotgraph <- function(x, y, colour, min, max)
{
plot1 <- ggplot(dims, aes(x = x, y = y, fill = Region)) +
geom_boxplot()
#plot1 <- plot1 + scale_x_discrete(name = "Blog Type")
plot1 <- plot1 + labs(color='Region') + geom_hline(yintercept = 0, alpha = 0.4)
plot1 <- plot1 + scale_y_continuous(breaks=c(seq(min,max,5)), limits = c(min, max))
plot1 <- plot1 + labs(x="Blog Type", y="Dimension Score") + scale_fill_grey(start = 0.3, end = 0.7) + theme_grey()
plot1 <- plot1 + theme(legend.justification = c(1, 1), legend.position = c(1, 1))
return(plot1)
}
plot1 <- plotgraph (Blog, Dim1, Region, -30, 25)
A part of data I use is reproduced here.
Blog,Region,Dim1,Dim2,Dim3,Dim4
BlogsInd.,PK,-4.75,13.47,8.47,-1.29
BlogsInd.,PK,-5.69,6.08,1.51,-1.65
BlogsInd.,PK,-0.27,6.09,0.03,1.65
BlogsInd.,PK,-2.76,7.35,5.62,3.13
BlogsInd.,PK,-8.24,12.75,3.71,3.78
BlogsInd.,PK,-12.51,9.95,2.01,0.21
BlogsInd.,PK,-1.28,7.46,7.56,2.16
BlogsInd.,PK,0.95,13.63,3.01,3.35
BlogsNews,PK,-5.96,12.3,6.5,1.49
BlogsNews,PK,-8.81,7.47,4.76,1.98
BlogsNews,PK,-8.46,8.24,-1.07,5.09
BlogsNews,PK,-6.15,0.9,-3.09,4.94
BlogsNews,PK,-13.98,10.6,4.75,1.26
BlogsNews,PK,-16.43,14.49,4.08,9.91
BlogsNews,PK,-4.09,9.88,-2.79,5.58
BlogsNews,PK,-11.06,16.21,4.27,8.66
BlogsNews,PK,-9.04,6.63,-0.18,5.95
BlogsNews,PK,-8.56,7.7,0.71,4.69
BlogsNews,PK,-8.13,7.26,-1.13,0.26
BlogsNews,PK,-14.46,-1.34,-1.17,14.57
BlogsNews,PK,-4.21,2.18,3.79,1.26
BlogsNews,PK,-4.96,-2.99,3.39,2.47
BlogsNews,PK,-5.48,0.65,5.31,6.08
BlogsNews,PK,-4.53,-2.95,-7.79,-0.81
BlogsNews,PK,6.31,-9.89,-5.78,-5.13
BlogsTech,PK,-11.16,8.72,-5.53,8.86
BlogsTech,PK,-1.27,5.56,-3.92,-2.72
BlogsTech,PK,-11.49,0.26,-1.48,7.09
BlogsTech,PK,-0.9,-1.2,-2.03,-7.02
BlogsTech,PK,-12.27,-0.07,5.04,8.8
BlogsTech,PK,6.85,1.27,-11.95,-10.79
BlogsTech,PK,-5.21,-0.89,-6,-2.4
BlogsTech,PK,-1.06,-4.8,-8.62,-2.42
BlogsTech,PK,-2.6,-4.58,-2.07,-3.25
BlogsTech,PK,-0.95,2,-2.2,-3.46
BlogsTech,PK,-0.82,7.94,-4.95,-5.63
BlogsTech,PK,-7.65,-5.59,-3.28,-0.54
BlogsTech,PK,0.64,-1.65,-2.36,-2.68
BlogsTech,PK,-2.25,-3,-3.92,-4.87
BlogsTech,PK,-1.58,-1.42,-0.38,-5.15
Columns,PK,-5.73,3.26,0.81,-0.55
Columns,PK,0.37,-0.37,-0.28,-1.56
Columns,PK,-5.46,-4.28,2.61,1.29
Columns,PK,-3.48,2.38,12.87,3.73
Columns,PK,0.88,-2.24,-1.74,3.65
Columns,PK,-2.11,4.51,8.95,2.47
Columns,PK,-10.13,10.73,9.47,-0.47
Columns,PK,-2.08,1.04,0.11,0.6
Columns,PK,-4.33,5.65,2,-0.77
Columns,PK,1.09,-0.24,-0.92,-0.17
Columns,PK,-4.23,-4.01,-2.32,6.26
Columns,PK,-1.46,-1.53,9.83,5.73
Columns,PK,9.37,-1.32,1.27,-4.12
Columns,PK,5.84,-2.42,-5.21,1.07
Columns,PK,8.21,-9.36,-5.87,-3.21
Columns,PK,7.34,-7.3,-2.94,-5.86
Columns,PK,1.83,-2.77,1.47,-4.02
BlogsInd.,PK,14.39,-0.55,-5.42,-4.7
BlogsInd.,US,22.02,-1.39,2.5,-3.12
BlogsInd.,US,4.83,-3.58,5.34,9.22
BlogsInd.,US,-3.24,2.83,-5.3,-2.07
BlogsInd.,US,-5.69,15.17,-14.27,-1.62
BlogsInd.,US,-22.92,4.1,5.79,-3.88
BlogsNews,US,0.41,-2.03,-6.5,2.81
BlogsNews,US,-4.42,8.49,-8.04,2.04
BlogsNews,US,-10.72,-4.3,3.75,11.74
BlogsNews,US,-11.29,2.01,0.67,8.9
BlogsNews,US,-2.89,0.08,-1.59,7.06
BlogsNews,US,-7.59,8.51,3.02,12.33
BlogsNews,US,-7.45,23.51,2.79,0.48
BlogsNews,US,-12.49,15.79,-9.86,18.29
BlogsTech,US,-11.59,6.38,11.79,-7.28
BlogsTech,US,-4.6,4.12,7.46,3.36
BlogsTech,US,-22.83,2.54,10.7,5.09
BlogsTech,US,-4.83,3.37,-8.12,-0.9
BlogsTech,US,-14.76,29.21,6.23,9.33
Columns,US,-15.93,12.85,19.47,-0.88
Columns,US,-2.78,-1.52,8.16,0.24
Columns,US,-16.39,13.08,11.07,7.56
Even though I have tried to add detailed scale on y-axis, it is hard for me to pinpoint exact median score for each boxplot. So I need to print median value within each boxplot. There was another answer available (for faceted boxplot) which does not work for me as the printed values are not within the boxes but jammed together in the middle. It will be great to be able to print them within (middle and above the median line of) boxplots.
Thanks for your help.
Edit: I make a grouped graph as below.
Add
library(dplyr)
dims=dims%>%
group_by(Blog,Region)%>%
mutate(med=median(Dim1))
plotgraph <- function(x, y, colour, min, max)
{
plot1 <- ggplot(dims, aes(x = x, y = y, fill = Region)) +
geom_boxplot()+
labs(color='Region') +
geom_hline(yintercept = 0, alpha = 0.4)+
scale_y_continuous(breaks=c(seq(min,max,5)), limits = c(min, max))+
labs(x="Blog Type", y="Dimension Score") + scale_fill_grey(start = 0.3, end = 0.7) +
theme_grey()+
theme(legend.justification = c(1, 1), legend.position = c(1, 1))+
geom_text(aes(y = med,x=x, label = round(med,2)),position=position_dodge(width = 0.8),size = 3, vjust = -0.5,colour="blue")
return(plot1)
}
plot1 <- plotgraph (Blog, Dim1, Region, -30, 25)
Which gives (the text colour can be tweaked to something less tacky):
Note: You should consider using non-standard evaluation in your function rather than having it require the use of attach()
Edit:
One liner, not as clean I wanted it to be since I ran into problems with dplyr not properly aggregating the data even though it says the grouping was performed.
This function assume the dataframe is always called dims
library(ggplot2)
library(reshape2)
plotgraph <- function(x, y, colour, min, max)
{
plot1 <- ggplot(dims, aes_string(x = x, y = y, fill = colour)) +
geom_boxplot()+
labs(color=colour) +
geom_hline(yintercept = 0, alpha = 0.4)+
scale_y_continuous(breaks=c(seq(min,max,5)), limits = c(min, max))+
labs(x="Blog Type", y="Dimension Score") +
scale_fill_grey(start = 0.3, end = 0.7) +
theme_grey()+
theme(legend.justification = c(1, 1), legend.position = c(1, 1))+
geom_text(data= melt(with(dims, tapply(eval(parse(text=y)),list(eval(parse(text=x)),eval(parse(text=colour))), median)),varnames=c("Blog","Region"),value.name="med"),
aes_string(y = "med",x=x, label = "med"),position=position_dodge(width = 0.8),size = 3, vjust = -0.5,colour="blue")
return(plot1)
}
plot1 <- plotgraph ("Blog", "Dim1", "Region", -30, 25)
Assuming that Blog is your dataframe, the following should work:
min <- -30
max <- 25
meds <- aggregate(Dim1~Region, Blog, median)
plot1 <- ggplot(Blog, aes(x = Region, y = Dim1, fill = Region)) +
geom_boxplot()
plot1 <- plot1 + labs(color='Region') + geom_hline(yintercept = 0, alpha = 0.4)
plot1 <- plot1 + scale_y_continuous(breaks=c(seq(min,max,5)), limits = c(min, max))
plot1 <- plot1 + labs(x="Blog Type", y="Dimension Score") + scale_fill_grey(start = 0.3, end = 0.7) + theme_grey()
plot1 + theme(legend.justification = c(1, 1), legend.position = c(1, 1)) +
geom_text(data = meds, aes(y = Dim1, label = round(Dim1,2)),size = 5, vjust = -0.5, color='white')

Resources