Order by y-Value in Facet Plot - r

Consider the following data.frame
RANK_GROUP <- as.factor(c(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1))
CHANNEL_CATEGORY <- as.factor(c(1, 2, 10, 15, 17, 19, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 43, 44, 1, 2, 10, 15, 17, 19, 20, 22, 23, 24, 25, 26, 27, 28, 29, 43))
CATEGORY_COUNT <- c(105, 23, 417, 10, 58, 6, 535, 211, 215, 465, 28, 273, 70, 47, 7,1,21,3,69, 14, 493, 3, 44, 3, 516, 162, 253, 516, 24, 228, 64, 59, 2, 45)
data <- data.frame(RANK_GROUP, CHANNEL_CATEGORY,CATEGORY_COUNT)
I want to make a Facet-Plot with a barplot for each distribution:
ggplot(data = data) +
aes(x=CHANNEL_CATEGORY, y = CATEGORY_COUNT) +
geom_bar(stat="identity", position ="dodge", colour="black") +
facet_grid(. ~ RANK_GROUP)
How can I order the plots according to their y-value withing each facet-plot?

took the help of cookbook,
library(dplyr)
pd <- data %>%
group_by(RANK_GROUP) %>%
top_n(nrow(data), abs(CATEGORY_COUNT)) %>%
ungroup() %>%
arrange(RANK_GROUP, CATEGORY_COUNT) %>%
mutate(order = row_number())
pd$order <- as.factor(pd$order)
ggplot(data = pd) +
aes(x=order, y = CATEGORY_COUNT) +
geom_bar(stat="identity", position ="dodge", colour="black") +
facet_grid(. ~ RANK_GROUP)+
scale_x_discrete(labels = CHANNEL_CATEGORY , breaks = order)+
theme(axis.text.x = element_text(angle = 60, hjust = .5, size = 8)) +
labs(x="Channel")

Related

From Boxplot to Barplot in ggplot possible?

I have to do a ggplot barplot with errorbars, Tukey sig. letters for plants grown with different fertilizer concentraitions.
The data should be grouped after the dif. concentrations and the sig. letters should be added automaticaly.
I have already a code for the same problem but for Boxplot - which is working nicely. I tried several tutorials with barplots but I always get the problem; stat_count() can only have an x or y aesthetic.
So I thought, is it possible to get my boxplot code to a barplot code? I tried but I couldnt do it :) And if not - how do I automatically add tukeyHSD Test result sig. letters to a ggplot barplot?
This is my Code for the boxplot with the tukey letters:
    value_max = Dünger, group_by(Duenger.g), summarize(max_value = max(Höhe.cm))
hsd=HSD.test(aov(Höhe.cm~Duenger.g, data=Dünger),
trt = "Duenger.g", group = T) sig.letters <- hsd$groups[order(row.names(hsd$groups)), ]
J <- ggplot(Dünger, aes(x = Duenger.g, y = Höhe.cm))+ geom_boxplot(aes(fill= Duenger.g))+ scale_fill_discrete(labels=c("0.5g", '1g', "2g", "3g", "4g"))+ geom_text(data = value_max, aes(x=Duenger.g, y = 0.1 + max_value, label = sig.letters$groups), vjust=0)+ stat_boxplot(geom = 'errorbar', width = 0.1)+ ggtitle("Auswirkung von Dünger auf die Höhe von Pflanzen") + xlab("Dünger in g") + ylab("Höhe in cm"); J
This is how it looks:
boxplot with tukey
Data from dput:
structure(list(Duenger.g = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,
0.5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4), plant = c(1, 2, 3, 4, 5, 7, 10, 11, 12, 13, 14, 18, 19,
21, 23, 24, 25, 26, 27, 29, 30, 31, 33, 34, 35, 37, 38, 39, 40,
41, 42, 43, 44, 48, 49, 50, 53, 54, 55, 56, 57, 58, 61, 62, 64,
65, 66, 67, 68, 69, 70, 71, 72, 73, 75, 79, 80, 81, 83, 85, 86,
88, 89, 91, 93, 99, 100, 102, 103, 104, 105, 106, 107, 108, 110,
111, 112, 113, 114, 115, 116, 117, 118, 120, 122, 123, 125, 126,
127, 128, 130, 131, 132, 134, 136, 138, 139, 140, 141, 143, 144,
145, 146, 147, 149), height.cm = c(5.7, 2.8, 5.5, 8, 3.5, 2.5,
4, 6, 10, 4.5, 7, 8.3, 11, 7, 8, 2.5, 7.4, 3, 14.5, 7, 12, 7.5,
30.5, 27, 6.5, 19, 10.4, 12.7, 27.3, 11, 11, 10.5, 10.5, 13,
53, 12.5, 12, 6, 12, 35, 8, 16, 56, 63, 69, 62, 98, 65, 77, 32,
85, 75, 33.7, 75, 55, 38.8, 39, 46, 35, 59, 44, 31.5, 49, 34,
52, 37, 43, 38, 28, 14, 28, 19, 20, 23, 17.5, 32, 16, 17, 24.7,
34, 50, 12, 14, 21, 33, 39.3, 41, 29, 35, 48, 40, 65, 35, 10,
26, 34, 41, 32, 38, 23.5, 22.2, 20.5, 29, 34, 45)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -105L))
Thank you
mirai
A bar chart and a boxplot are two different things. By default geom_boxplot computes the boxplot stats by default (stat="boxplot"). In contrast when you use geom_bar it will by default count the number of observations (stat="count") which are then mapped on y. That's the reason why you get an error. Hence, simply replacing geom_boxplot by geom_bar will not give your your desired result. Instead you could use e.g. stat_summary to create your bar chart with errorbars. Additionally I created a summary dataset to add the labels on the top of the error bars.
library(ggplot2)
library(dplyr)
library(agricolae)
Dünger <- Dünger |>
rename("Höhe.cm" = height.cm) |>
mutate(Duenger.g = factor(Duenger.g))
hsd <- HSD.test(aov(Höhe.cm ~ Duenger.g, data = Dünger), trt = "Duenger.g", group = T)
sig.letters <- hsd$groups %>% mutate(Duenger.g = row.names(.))
duenger_sum <- Dünger |>
group_by(Duenger.g) |>
summarize(mean_se(Höhe.cm)) |>
left_join(sig.letters, by = "Duenger.g")
ggplot(Dünger, aes(x = Duenger.g, y = Höhe.cm, fill = Duenger.g)) +
stat_summary(geom = "bar", fun = "mean") +
stat_summary(geom = "errorbar", width = .1) +
scale_fill_discrete(labels = c("0.5g", "1g", "2g", "3g", "4g")) +
geom_text(data = duenger_sum, aes(y = ymax, label = groups), vjust = 0, nudge_y = 1) +
labs(
title = "Auswirkung von Dünger auf die Höhe von Pflanzen",
x = "Dünger in g", y = "Höhe in cm"
)
#> No summary function supplied, defaulting to `mean_se()`
But as the summary dataset now already contains the mean and the values for the error bars a second option would be to do:
ggplot(duenger_sum, aes(x = Duenger.g, y = y, fill = Duenger.g)) +
geom_col() +
geom_errorbar(aes(ymin = ymin, ymax = ymax), width = .1) +
scale_fill_discrete(labels = c("0.5g", "1g", "2g", "3g", "4g")) +
geom_text(aes(y = ymax, label = groups), vjust = 0, nudge_y = 1) +
labs(
title = "Auswirkung von Dünger auf die Höhe von Pflanzen",
x = "Dünger in g", y = "Höhe in cm"
)

How can I edit the common legend title name using ggplot2 and ggpubr?

I am using ggpubr to combine multiple graphs in a single plot, but cannot seem to correctly generate one graph with the title that I would like. I would like the title to say "Customized legend," given that it is a common legend for both graphs. Does anybody know how I can do this?
Here is my data:
data1 = data.frame(var1 = c(1,
1,
1,
1,
2,
2,
2,
2,
3,
3,
3,
3,
4,
4,
4,
4,
5,
5,
5,
5,
6,
6,
6,
6,
7,
7,
7,
7,
8,
8,
8,
8,
9,
9,
9,
9,
10,
10,
10,
10,
11,
11,
11,
11,
12,
12,
12,
12,
13,
13,
13,
13,
14,
14,
14,
14,
15,
15,
15,
15,
16,
16,
16,
16,
17,
17,
17,
17,
18,
18,
18,
18,
19,
19,
19,
19,
20,
20,
20,
20,
21,
21,
21,
21,
22,
22,
22,
22,
23,
23,
23,
23,
24,
24,
24,
24,
25,
25,
25,
25,
26,
26,
26,
26,
27,
27,
27,
27,
28,
28,
28,
28,
29,
29,
29,
29,
30,
30,
30,
30,
31,
31,
31,
31,
32,
32,
32,
32,
33,
33,
33,
33),
var2 = c(1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4),
var3 = c(113,
89,
99,
41,
72,
64,
39,
139,
135,
17,
3,
135,
63,
126,
34,
87,
84,
125,
123,
18,
115,
11,
68,
85,
48,
95,
56,
129,
41,
78,
82,
122,
124,
4,
60,
132,
67,
128,
46,
79,
110,
88,
19,
88,
88,
126,
30,
11,
52,
66,
15,
52,
6,
74,
14,
101,
88,
70,
58,
20,
104,
76,
134,
23,
40,
1,
47,
25,
49,
110,
96,
100,
106,
26,
93,
19,
87,
41,
13,
40,
63,
87,
137,
105,
89,
95,
24,
49,
112,
92,
45,
105,
112,
105,
114,
129,
84,
33,
95,
95,
15,
90,
1,
62,
20,
7,
18,
96,
4,
71,
42,
94,
45,
102,
55,
98,
124,
80,
76,
97,
41,
31,
25,
21,
135,
138,
121,
93,
17,
13,
49,
26))
data2 <- data.frame(var1a = c(1,
1,
1,
1,
2,
2,
2,
2,
3,
3,
3,
3,
4,
4,
4,
4,
5,
5,
5,
5,
6,
6,
6,
6,
7,
7,
7,
7,
8,
8,
8,
8,
9,
9,
9,
9,
10,
10,
10,
10,
11,
11,
11,
11,
12,
12,
12,
12,
13,
13,
13,
13,
14,
14,
14,
14,
15,
15,
15,
15,
16,
16,
16,
16,
17,
17,
17,
17,
18,
18,
18,
18,
19,
19,
19,
19,
20,
20,
20,
20,
21,
21,
21,
21,
22,
22,
22,
22,
23,
23,
23,
23,
24,
24,
24,
24,
25,
25,
25,
25,
26,
26,
26,
26,
27,
27,
27,
27,
28,
28,
28,
28,
29,
29,
29,
29,
30,
30,
30,
30,
31,
31,
31,
31,
32,
32,
32,
32,
33,
33,
33,
33),
var2a = c(1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4),
var3a = c(113,
89,
99,
41,
72,
64,
39,
139,
135,
17,
3,
135,
63,
126,
34,
87,
84,
125,
123,
18,
115,
11,
68,
85,
48,
95,
56,
129,
41,
78,
82,
122,
124,
4,
60,
132,
67,
128,
46,
79,
110,
88,
19,
88,
88,
126,
30,
11,
52,
66,
15,
52,
6,
74,
14,
101,
88,
70,
58,
20,
104,
76,
134,
23,
40,
1,
47,
25,
49,
110,
96,
100,
106,
26,
93,
19,
87,
41,
13,
40,
63,
87,
137,
105,
89,
95,
24,
49,
112,
92,
45,
105,
112,
105,
114,
129,
84,
33,
95,
95,
15,
90,
1,
62,
20,
7,
18,
96,
4,
71,
42,
94,
45,
102,
55,
98,
124,
80,
76,
97,
41,
31,
25,
21,
135,
138,
121,
93,
17,
13,
49,
26))
Here is the code that I am using:
#Open packages
library(ggplot2)
library(ggpubr)
#Set the theme
theme_set(theme_pubr())
#Change necessary columns to factor
data1$var2 <- factor(data1$var2, levels = c(1,2,3,4))
data2$var2a <- factor(data2$var2a, levels = c(1,2,3,4))
#Generate the plots
#Generate plots
plot1 <- ggplot(data1, aes(x = var1, y = var3, group = var2)) +
geom_line(size = 1.5, aes(linetype = var2, color = var2)) +
xlab('x_label') +
ylab('y_label')+
scale_fill_discrete(name = 'customized legend')
plot2 <- ggplot(data2, aes(x = var1a, y = var3a, group = var2a)) +
geom_line(size = 1.5, aes(linetype = var2a, color = var2a)) +
xlab('x_label') +
ylab('y_label')+
scale_fill_discrete(name = 'customized legend')
#Combine both into one picture
fig <- ggarrange(plot1, plot2,
ncol = 2,
nrow = 1,
common.legend = TRUE,
legend = "bottom")
fig
Since you didn't use the fill aesthetic in your ggplot, you should not use scale_fill_discrete. What you need is to set the legend title of linetype and color to "customized legend", since those are the aesthetics that you used.
library(ggplot2)
library(ggpubr)
plot1 <- ggplot(data1, aes(x = var1, y = var3, group = var2)) +
geom_line(size = 1.5, aes(linetype = var2, color = var2)) +
xlab('x_label') +
ylab('y_label') +
labs(linetype = "customized legend", color = "customized legend")
plot2 <- ggplot(data2, aes(x = var1a, y = var3a, group = var2a)) +
geom_line(size = 1.5, aes(linetype = var2a, color = var2a)) +
xlab('x_label') +
ylab('y_label') +
labs(linetype = "customized legend", color = "customized legend")
#Combine both into one picture
ggarrange(plot1, plot2,
ncol = 2,
nrow = 1,
common.legend = TRUE,
legend = "bottom")

Why are my error bars on my graph out of place?

I have a graph that I'm trying to make with ggplot and gridExtra, but my error bars are out of place. I want the error bars to be at the top of each bar, not where they are now. What can I do to correct them?
Also, what ggsave parameters will generate a graph with the same pixel parameters that I am using with the r png base function? ggsave seems to work more consistently than this function, so I need to use it.
Data:
###Open packages###
library(readxl)
library(readr)
library(dplyr)
library(tidyr)
library(ggplot2)
library(gridExtra)
#Dataframes
set1 <- data.frame(type = c(1,
1,
1,
1,
1,
1,
1,
1,
1,
2,
2,
2,
2,
2,
2,
2,
2,
2,
3,
3,
3,
3,
3,
3,
3,
3,
3),
flowRate = c(24,
24,
24,
45,
45,
45,
58,
58,
58,
24,
24,
24,
45,
45,
45,
58,
58,
58,
24,
24,
24,
45,
45,
45,
58,
58,
58),
speed = c(0.563120137230256,
0.301721535875508,
0.170683367727845,
0.698874950490133,
0.158488731250147,
0.162788814307903,
0.105943103772245,
0.682354871986346,
0.17945825301837,
0.806637519498752,
0.599304186634932,
0.268788206619179,
0.518615600601962,
0.907628477211427,
0.144209408332705,
0.161586044320138,
0.946354993801663,
0.488881557759483,
0.497120443885793,
0.666120238846602,
0.264813203831783,
0.717007333314455,
0.95119232422312,
0.833669574933742,
0.450082932184122,
0.309570971522678,
0.732874401666482))
set2 <- data.frame(type = c(1,
1,
1,
1,
1,
1,
1,
1,
1,
2,
2,
2,
2,
2,
2,
2,
2,
2,
3,
3,
3,
3,
3,
3,
3,
3,
3),
flowRate = c(24,
24,
24,
45,
45,
45,
58,
58,
58,
24,
24,
24,
45,
45,
45,
58,
58,
58,
24,
24,
24,
45,
45,
45,
58,
58,
58),
speed = c(0.489966876244169,
0.535542121502899,
0.265940150225231,
0.399521957817437,
0.0831661276630631,
0.302201301891001,
0.78194419406759,
0.202331797255324,
0.192182716686147,
0.163038660094618,
0.658020173938572,
0.735633308902771,
0.480982144690572,
0.749452781972296,
0.491759702396918,
0.459610541236644,
0.397660083986082,
0.939983924945833,
0.128956722185581,
0.998492083119223,
0.440514184126494,
0.242917958355044,
0.350643319960552,
0.02613674288471,
0.71625407018877,
0.589325978787179,
0.649116781211748))
Code:
#Standard error of the mean function
sem <- function(x) sd(x)/sqrt(length(x))
#Aggregate dataframes, mean and Standard Error
mean_set1 <- aggregate(set1, by=list(set1$flowRate, set1$speed), mean)
mean_set1 <- select(mean_set1, -Group.1, -Group.2)
mean_set1 <- arrange(mean_set1, type, flowRate)
sem_set1 <- aggregate(set1, by=list(set1$flowRate, set1$speed), sem)
sem_set1 <- as.data.frame(sem_set1)
sem_set1 <- cbind(mean_set1$type, mean_set1$flowRate, sem_set1$Group.2)
sem_set1 <- as.data.frame(sem_set1)
mean_set2 <- aggregate(set2, by=list(set2$flowRate, set2$speed), mean)
mean_set2 <- select(mean_set2, -Group.1, -Group.2)
mean_set2 <- arrange(mean_set2, type, flowRate)
sem_set2 <- aggregate(set2, by=list(set2$flowRate, set2$speed), sem)
sem_set2 <- as.data.frame(sem_set2)
sem_set2 <- cbind(mean_set2$type, mean_set2$flowRate, sem_set2$Group.2)
sem_set2 <- as.data.frame(sem_set2)
#Graph sets
set1_graph <- ggplot(mean_set1, aes(x=type, y=speed, fill=factor(flowRate)))+
geom_bar(stat="identity",width=0.6, position="dodge", col="black")+
scale_fill_discrete(name="Flow Rate")+
xlab("type")+ylab("Speed")+
geom_errorbar(aes(ymin= mean_set1$speed,ymax=mean_set1$speed+sem_set1$V3), width=0.2, position = position_dodge(0.6))
set2_graph <- ggplot(mean_set2, aes(x=type, y=speed, fill=factor(flowRate)))+
geom_bar(stat="identity",width=0.6, position="dodge", col="black")+
scale_fill_discrete(name="Speed")+
xlab("type")+ylab("Flow Rate")+
geom_errorbar(aes(ymin= mean_set2$speed,ymax=mean_set2$speed+sem_set2$V3), width=0.2, position = position_dodge(0.6))
#Grid.arrange and save image
png("image.png", width = 1000, height = 700)
grid.arrange(set1_graph, set2_graph,nrow=1, ncol=2)
dev.off()

Specialised Boxplot: Plotting Lines to the Error Bars to Highlight the Data Range in R

Overview
I have a data frame called ANOVA.Dataframe.1 (see below) containing the dependent variable called 'Canopy_Index', and the independent variable called 'Urbanisation_index".
My aim is to produce a boxplot (exactly the same as the desired result below) for Canopy Cover (%) for each category of the Urbanisation Index with plotted lines pointing towards both the bottom and top of the error bars to highlight the data range.
I have searched intensively in order to find the code to produce the desired boxplot this (please see the desired result), but I was unsuccessful, and I'm also unsure if these boxplots have a specialised name.
Perhaps this can be achieved in either ggplot or Base R
If anyone can help, I would be deeply appreciative.
Desired Result ( Reference)
I can produce an ordinary boxplot with the R-code below, but I cannot figure out how to implement the lines pointing towards the ends of the error bars.
R-code
Boxplot.obs1.Canopy.Urban<-boxplot(ANOVA.Dataframe.1$Canopy_Index~ANOVA.Dataframe.1$Urbanisation_index,
main="Mean Canopy Index (%) for Categories of the Urbansiation Index",
xlab="Urbanisation Index",
ylab="Canopy Index (%)")
Boxplot produced from R-code
Data frame 1
structure(list(Urbanisation_index = c(2, 2, 4, 4, 3, 3, 4, 4,
4, 2, 4, 3, 4, 4, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2,
2, 2, 2, 4, 4, 3, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 4, 4, 4,
4, 4, 4, 4), Canopy_Index = c(65, 75, 55, 85, 85, 85, 95, 85,
85, 45, 65, 75, 75, 65, 35, 75, 65, 85, 65, 95, 75, 75, 75, 65,
75, 65, 75, 95, 95, 85, 85, 85, 75, 75, 65, 85, 75, 65, 55, 95,
95, 95, 95, 45, 55, 35, 55, 65, 95, 95, 45, 65, 45, 55)), row.names = c(NA,
-54L), class = "data.frame")
Dataframe 2
structure(list(Urbanisation_index = c(2, 2, 4, 4, 3, 3, 4, 4,
4, 3, 4, 4, 4, 4, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2,
2, 2, 2, 4, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 4, 4, 4, 4, 4, 4, 4
), Canopy_Index = c(5, 45, 5, 5, 5, 5, 45, 45, 55, 15, 35, 45,
5, 5, 5, 5, 5, 5, 35, 15, 15, 25, 25, 5, 5, 5, 5, 5, 5, 15, 25,
15, 35, 25, 45, 5, 25, 5, 5, 5, 5, 55, 55, 15, 5, 25, 15, 15,
15, 15)), row.names = c(NA, -50L), class = "data.frame")
Alice, is this what you are looking for?
You can do everything with ggplot2, but for non standard things you have to play with it for a while. My code:
library(tidyverse)
library(wrapr)
df %.>%
ggplot(data = ., aes(
x = Urbanisation_index,
y = Canopy_Index,
group = Urbanisation_index
)) +
stat_boxplot(
geom = 'errorbar',
width = .25
) +
geom_boxplot() +
geom_line(
data = group_by(., Urbanisation_index) %>%
summarise(
bot = min(Canopy_Index),
top = max(Canopy_Index)
) %>%
gather(pos, val, bot:top) %>%
select(
x = Urbanisation_index,
y = val
) %>%
mutate(gr = row_number()) %>%
bind_rows(
tibble(
x = 0,
y = max(.$y) * 1.15,
gr = 1:8
)
),
aes(
x = x,
y = y,
group = gr
)) +
theme_light() +
theme(panel.grid = element_blank()) +
coord_cartesian(
xlim = c(min(.$Urbanisation_index) - .5, max(.$Urbanisation_index) + .5),
ylim = c(min(.$Canopy_Index) * .95, max(.$Canopy_Index) * 1.05)
) +
ylab('Company Index (%)') +
xlab('Urbanisation Index')

Merge and Perfectly Align Histogram and Boxplot using ggplot2

since yesterday I am reading answers and websites in order to combine and align in one plot an histogram and a boxplot generated using ggplot2 package.
This question differs from others because the boxplot chart needs to be reduced in height and aligned to the left outer margin of the histogram.
Considering the following dataset:
my_df <- structure(list(id = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75,
76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91,
92, 93, 94, 95, 96, 97, 98, 99, 100), value= c(18, 9, 3,
4, 3, 13, 12, 5, 8, 37, 64, 107, 11, 11, 8, 18, 5, 13, 13, 14,
11, 11, 9, 14, 11, 14, 12, 10, 11, 10, 5, 3, 8, 11, 12, 11, 7,
6, 6, 4, 11, 8, 14, 13, 14, 15, 10, 2, 4, 4, 8, 15, 21, 9, 5,
7, 11, 6, 11, 2, 6, 16, 5, 11, 21, 33, 12, 10, 13, 33, 35, 7,
7, 9, 2, 21, 32, 19, 9, 8, 3, 26, 37, 5, 6, 10, 18, 5, 70, 48,
30, 10, 15, 18, 7, 4, 19, 10, 4, 32)), row.names = c(NA, 100L
), class = "data.frame", .Names = c("id", "value"))
I generated the boxplot:
require(dplyr)
require(ggplot2)
my_df %>% select(value) %>%
ggplot(aes(x="", y = value)) +
geom_boxplot(fill = "lightblue", color = "black") +
coord_flip() +
theme_classic() +
xlab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank())
and I generated the histogram
my_df %>% select(id, value) %>%
ggplot() +
geom_histogram(aes(x = value, y = (..count..)/sum(..count..)),
position = "identity", binwidth = 1,
fill = "lightblue", color = "black") +
ylab("Relative Frequency") +
theme_classic()
The result I am looking to obtain is a single plot like:
Note that the boxplot must be reduced in height and the ticks must be exactly aligned in order to give a different perspective of the same visual.
You can use either egg, cowplot or patchwork packages to combine those two plots. See also this answer for more complex examples.
library(dplyr)
library(ggplot2)
plt1 <- my_df %>% select(value) %>%
ggplot(aes(x="", y = value)) +
geom_boxplot(fill = "lightblue", color = "black") +
coord_flip() +
theme_classic() +
xlab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank())
plt2 <- my_df %>% select(id, value) %>%
ggplot() +
geom_histogram(aes(x = value, y = (..count..)/sum(..count..)),
position = "identity", binwidth = 1,
fill = "lightblue", color = "black") +
ylab("Relative Frequency") +
theme_classic()
egg
# install.packages("egg", dependencies = TRUE)
egg::ggarrange(plt2, plt1, heights = 2:1)
cowplot
# install.packages("cowplot", dependencies = TRUE)
cowplot::plot_grid(plt2, plt1,
ncol = 1, rel_heights = c(2, 1),
align = 'v', axis = 'lr')
patchwork
# install.packages("devtools", dependencies = TRUE)
# devtools::install_github("thomasp85/patchwork")
library(patchwork)
plt2 + plt1 + plot_layout(nrow = 2, heights = c(2, 1))

Resources