Related
I have to do a ggplot barplot with errorbars, Tukey sig. letters for plants grown with different fertilizer concentraitions.
The data should be grouped after the dif. concentrations and the sig. letters should be added automaticaly.
I have already a code for the same problem but for Boxplot - which is working nicely. I tried several tutorials with barplots but I always get the problem; stat_count() can only have an x or y aesthetic.
So I thought, is it possible to get my boxplot code to a barplot code? I tried but I couldnt do it :) And if not - how do I automatically add tukeyHSD Test result sig. letters to a ggplot barplot?
This is my Code for the boxplot with the tukey letters:
value_max = Dünger, group_by(Duenger.g), summarize(max_value = max(Höhe.cm))
hsd=HSD.test(aov(Höhe.cm~Duenger.g, data=Dünger),
trt = "Duenger.g", group = T) sig.letters <- hsd$groups[order(row.names(hsd$groups)), ]
J <- ggplot(Dünger, aes(x = Duenger.g, y = Höhe.cm))+ geom_boxplot(aes(fill= Duenger.g))+ scale_fill_discrete(labels=c("0.5g", '1g', "2g", "3g", "4g"))+ geom_text(data = value_max, aes(x=Duenger.g, y = 0.1 + max_value, label = sig.letters$groups), vjust=0)+ stat_boxplot(geom = 'errorbar', width = 0.1)+ ggtitle("Auswirkung von Dünger auf die Höhe von Pflanzen") + xlab("Dünger in g") + ylab("Höhe in cm"); J
This is how it looks:
boxplot with tukey
Data from dput:
structure(list(Duenger.g = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,
0.5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4), plant = c(1, 2, 3, 4, 5, 7, 10, 11, 12, 13, 14, 18, 19,
21, 23, 24, 25, 26, 27, 29, 30, 31, 33, 34, 35, 37, 38, 39, 40,
41, 42, 43, 44, 48, 49, 50, 53, 54, 55, 56, 57, 58, 61, 62, 64,
65, 66, 67, 68, 69, 70, 71, 72, 73, 75, 79, 80, 81, 83, 85, 86,
88, 89, 91, 93, 99, 100, 102, 103, 104, 105, 106, 107, 108, 110,
111, 112, 113, 114, 115, 116, 117, 118, 120, 122, 123, 125, 126,
127, 128, 130, 131, 132, 134, 136, 138, 139, 140, 141, 143, 144,
145, 146, 147, 149), height.cm = c(5.7, 2.8, 5.5, 8, 3.5, 2.5,
4, 6, 10, 4.5, 7, 8.3, 11, 7, 8, 2.5, 7.4, 3, 14.5, 7, 12, 7.5,
30.5, 27, 6.5, 19, 10.4, 12.7, 27.3, 11, 11, 10.5, 10.5, 13,
53, 12.5, 12, 6, 12, 35, 8, 16, 56, 63, 69, 62, 98, 65, 77, 32,
85, 75, 33.7, 75, 55, 38.8, 39, 46, 35, 59, 44, 31.5, 49, 34,
52, 37, 43, 38, 28, 14, 28, 19, 20, 23, 17.5, 32, 16, 17, 24.7,
34, 50, 12, 14, 21, 33, 39.3, 41, 29, 35, 48, 40, 65, 35, 10,
26, 34, 41, 32, 38, 23.5, 22.2, 20.5, 29, 34, 45)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -105L))
Thank you
mirai
A bar chart and a boxplot are two different things. By default geom_boxplot computes the boxplot stats by default (stat="boxplot"). In contrast when you use geom_bar it will by default count the number of observations (stat="count") which are then mapped on y. That's the reason why you get an error. Hence, simply replacing geom_boxplot by geom_bar will not give your your desired result. Instead you could use e.g. stat_summary to create your bar chart with errorbars. Additionally I created a summary dataset to add the labels on the top of the error bars.
library(ggplot2)
library(dplyr)
library(agricolae)
Dünger <- Dünger |>
rename("Höhe.cm" = height.cm) |>
mutate(Duenger.g = factor(Duenger.g))
hsd <- HSD.test(aov(Höhe.cm ~ Duenger.g, data = Dünger), trt = "Duenger.g", group = T)
sig.letters <- hsd$groups %>% mutate(Duenger.g = row.names(.))
duenger_sum <- Dünger |>
group_by(Duenger.g) |>
summarize(mean_se(Höhe.cm)) |>
left_join(sig.letters, by = "Duenger.g")
ggplot(Dünger, aes(x = Duenger.g, y = Höhe.cm, fill = Duenger.g)) +
stat_summary(geom = "bar", fun = "mean") +
stat_summary(geom = "errorbar", width = .1) +
scale_fill_discrete(labels = c("0.5g", "1g", "2g", "3g", "4g")) +
geom_text(data = duenger_sum, aes(y = ymax, label = groups), vjust = 0, nudge_y = 1) +
labs(
title = "Auswirkung von Dünger auf die Höhe von Pflanzen",
x = "Dünger in g", y = "Höhe in cm"
)
#> No summary function supplied, defaulting to `mean_se()`
But as the summary dataset now already contains the mean and the values for the error bars a second option would be to do:
ggplot(duenger_sum, aes(x = Duenger.g, y = y, fill = Duenger.g)) +
geom_col() +
geom_errorbar(aes(ymin = ymin, ymax = ymax), width = .1) +
scale_fill_discrete(labels = c("0.5g", "1g", "2g", "3g", "4g")) +
geom_text(aes(y = ymax, label = groups), vjust = 0, nudge_y = 1) +
labs(
title = "Auswirkung von Dünger auf die Höhe von Pflanzen",
x = "Dünger in g", y = "Höhe in cm"
)
I am using ggpubr to combine multiple graphs in a single plot, but cannot seem to correctly generate one graph with the title that I would like. I would like the title to say "Customized legend," given that it is a common legend for both graphs. Does anybody know how I can do this?
Here is my data:
data1 = data.frame(var1 = c(1,
1,
1,
1,
2,
2,
2,
2,
3,
3,
3,
3,
4,
4,
4,
4,
5,
5,
5,
5,
6,
6,
6,
6,
7,
7,
7,
7,
8,
8,
8,
8,
9,
9,
9,
9,
10,
10,
10,
10,
11,
11,
11,
11,
12,
12,
12,
12,
13,
13,
13,
13,
14,
14,
14,
14,
15,
15,
15,
15,
16,
16,
16,
16,
17,
17,
17,
17,
18,
18,
18,
18,
19,
19,
19,
19,
20,
20,
20,
20,
21,
21,
21,
21,
22,
22,
22,
22,
23,
23,
23,
23,
24,
24,
24,
24,
25,
25,
25,
25,
26,
26,
26,
26,
27,
27,
27,
27,
28,
28,
28,
28,
29,
29,
29,
29,
30,
30,
30,
30,
31,
31,
31,
31,
32,
32,
32,
32,
33,
33,
33,
33),
var2 = c(1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4),
var3 = c(113,
89,
99,
41,
72,
64,
39,
139,
135,
17,
3,
135,
63,
126,
34,
87,
84,
125,
123,
18,
115,
11,
68,
85,
48,
95,
56,
129,
41,
78,
82,
122,
124,
4,
60,
132,
67,
128,
46,
79,
110,
88,
19,
88,
88,
126,
30,
11,
52,
66,
15,
52,
6,
74,
14,
101,
88,
70,
58,
20,
104,
76,
134,
23,
40,
1,
47,
25,
49,
110,
96,
100,
106,
26,
93,
19,
87,
41,
13,
40,
63,
87,
137,
105,
89,
95,
24,
49,
112,
92,
45,
105,
112,
105,
114,
129,
84,
33,
95,
95,
15,
90,
1,
62,
20,
7,
18,
96,
4,
71,
42,
94,
45,
102,
55,
98,
124,
80,
76,
97,
41,
31,
25,
21,
135,
138,
121,
93,
17,
13,
49,
26))
data2 <- data.frame(var1a = c(1,
1,
1,
1,
2,
2,
2,
2,
3,
3,
3,
3,
4,
4,
4,
4,
5,
5,
5,
5,
6,
6,
6,
6,
7,
7,
7,
7,
8,
8,
8,
8,
9,
9,
9,
9,
10,
10,
10,
10,
11,
11,
11,
11,
12,
12,
12,
12,
13,
13,
13,
13,
14,
14,
14,
14,
15,
15,
15,
15,
16,
16,
16,
16,
17,
17,
17,
17,
18,
18,
18,
18,
19,
19,
19,
19,
20,
20,
20,
20,
21,
21,
21,
21,
22,
22,
22,
22,
23,
23,
23,
23,
24,
24,
24,
24,
25,
25,
25,
25,
26,
26,
26,
26,
27,
27,
27,
27,
28,
28,
28,
28,
29,
29,
29,
29,
30,
30,
30,
30,
31,
31,
31,
31,
32,
32,
32,
32,
33,
33,
33,
33),
var2a = c(1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4),
var3a = c(113,
89,
99,
41,
72,
64,
39,
139,
135,
17,
3,
135,
63,
126,
34,
87,
84,
125,
123,
18,
115,
11,
68,
85,
48,
95,
56,
129,
41,
78,
82,
122,
124,
4,
60,
132,
67,
128,
46,
79,
110,
88,
19,
88,
88,
126,
30,
11,
52,
66,
15,
52,
6,
74,
14,
101,
88,
70,
58,
20,
104,
76,
134,
23,
40,
1,
47,
25,
49,
110,
96,
100,
106,
26,
93,
19,
87,
41,
13,
40,
63,
87,
137,
105,
89,
95,
24,
49,
112,
92,
45,
105,
112,
105,
114,
129,
84,
33,
95,
95,
15,
90,
1,
62,
20,
7,
18,
96,
4,
71,
42,
94,
45,
102,
55,
98,
124,
80,
76,
97,
41,
31,
25,
21,
135,
138,
121,
93,
17,
13,
49,
26))
Here is the code that I am using:
#Open packages
library(ggplot2)
library(ggpubr)
#Set the theme
theme_set(theme_pubr())
#Change necessary columns to factor
data1$var2 <- factor(data1$var2, levels = c(1,2,3,4))
data2$var2a <- factor(data2$var2a, levels = c(1,2,3,4))
#Generate the plots
#Generate plots
plot1 <- ggplot(data1, aes(x = var1, y = var3, group = var2)) +
geom_line(size = 1.5, aes(linetype = var2, color = var2)) +
xlab('x_label') +
ylab('y_label')+
scale_fill_discrete(name = 'customized legend')
plot2 <- ggplot(data2, aes(x = var1a, y = var3a, group = var2a)) +
geom_line(size = 1.5, aes(linetype = var2a, color = var2a)) +
xlab('x_label') +
ylab('y_label')+
scale_fill_discrete(name = 'customized legend')
#Combine both into one picture
fig <- ggarrange(plot1, plot2,
ncol = 2,
nrow = 1,
common.legend = TRUE,
legend = "bottom")
fig
Since you didn't use the fill aesthetic in your ggplot, you should not use scale_fill_discrete. What you need is to set the legend title of linetype and color to "customized legend", since those are the aesthetics that you used.
library(ggplot2)
library(ggpubr)
plot1 <- ggplot(data1, aes(x = var1, y = var3, group = var2)) +
geom_line(size = 1.5, aes(linetype = var2, color = var2)) +
xlab('x_label') +
ylab('y_label') +
labs(linetype = "customized legend", color = "customized legend")
plot2 <- ggplot(data2, aes(x = var1a, y = var3a, group = var2a)) +
geom_line(size = 1.5, aes(linetype = var2a, color = var2a)) +
xlab('x_label') +
ylab('y_label') +
labs(linetype = "customized legend", color = "customized legend")
#Combine both into one picture
ggarrange(plot1, plot2,
ncol = 2,
nrow = 1,
common.legend = TRUE,
legend = "bottom")
I'm making bar charts that show the difference between groups over separate periods, however I also want to shade out certain bars that don't meet certain criteria (i.e CV > 20 or Sample_size < 30).
Can someone please help me to do this? I've attached a picture as an example of what I'm looking for my output to look like (mocked up in excel), and some example data below.
Thanks a million!!
Example data:
structure(list(Group = c("A", "B", "C", "D", "E", "F", "G", "H",
"I", "J", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J"),
Perc = c(38, 38, 46, 27, 18, 39, 43, 27, 38, 40, 39, 39,
50, 32, 33, 41, 52, 32, 43, 43), Lower_CI = c(36, 36, 36,
15, 8, 31, 30, 14, 18, 23, 38, 37, 40, 21, 15, 31, 40, 17,
24, 27), Upper_CI = c(39, 40, 57, 39, 27, 47, 55, 40, 58,
56, 41, 41, 60, 44, 51, 51, 64, 47, 63, 60), CV = c(2, 2,
11, 12, 28, 11, 15, 6, 18, 13, 2, 2, 10, 19, 5, 13, 12, 23,
14, 20), Sample_size = c(30, 15, 140, 40, 69, 75, 56, 256,
45, 536, 4852, 785, 36, 48, 315, 120, 83, 15, 123, 45), Period = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"))
You can try a dplyr and ggplot2 pipeline. You can integrate a new value for Period using mutate(). After that you can also keep the same order for Group formating it as a factor. The plot you want is a dodged bar plot. Using ggplot2 functions you can reach that. Finally, move the axis using coord_flip(). The additional level created can be dropped using breaks inside scale_fill_manual(). Here the code:
library(ggplot2)
library(dplyr)
#Code
df %>%
mutate(Period=ifelse(CV>20 | Sample_size<30,3,Period),
Group=factor(Group,levels = rev(unique(Group)),ordered = T))%>%
ggplot(aes(x=Group,y=Perc,fill=factor(Period)))+
geom_bar(stat = 'identity',position = position_dodge(0.9))+
geom_errorbar(aes(x=Group,ymin=Lower_CI,ymax=Upper_CI,group=Period),
position = position_dodge(0.9))+
scale_fill_manual(values=c('cornflowerblue','orange','gray'),
breaks=c('1','2'))+
labs(fill='Period')+
theme_bw()+
coord_flip()
Output:
Some data used:
#Data
df <- structure(list(Group = c("A", "B", "C", "D", "E", "F", "G", "H",
"I", "J", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J"),
Perc = c(38, 38, 46, 27, 18, 39, 43, 27, 38, 40, 39, 39,
50, 32, 33, 41, 52, 32, 43, 43), Lower_CI = c(36, 36, 36,
15, 8, 31, 30, 14, 18, 23, 38, 37, 40, 21, 15, 31, 40, 17,
24, 27), Upper_CI = c(39, 40, 57, 39, 27, 47, 55, 40, 58,
56, 41, 41, 60, 44, 51, 51, 64, 47, 63, 60), `(-)` = c(2,
2, 10, 12, 10, 8, 13, 13, 20, 17, 1, 2, 10, 11, 18, 10, 12,
15, 19, 16), `(+)` = c(1, 2, 11, 12, 9, 8, 12, 13, 20, 16,
2, 2, 10, 12, 18, 10, 12, 15, 20, 17), CV = c(2, 2, 11, 12,
28, 11, 15, 6, 18, 13, 2, 2, 10, 19, 5, 13, 12, 23, 14, 20
), Sample_size = c(30, 15, 140, 40, 69, 75, 56, 256, 45,
536, 4852, 785, 36, 48, 315, 120, 83, 15, 123, 45), Period = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"))
You could do something like this:
library(ggplot2)
#create variable for color/fill
df$col <- df$Period
df$col[df$Sample_size < 30 | df$CV > 20] = 3
df$col <- as.factor(df$col)
#plot
ggplot(data = df, aes(x = Group, y = Perc, group = Period,
color = col, fill = col)) +
geom_bar(stat = "identity", position = position_dodge()) +
geom_errorbar(aes(ymin = Lower_CI, ymax = Upper_CI),
position = position_dodge())+
scale_color_manual(values = c("blue", "orange", "grey"),
breaks = c("1", "2"),) +
scale_fill_manual(values = c("blue", "orange", "grey"),
breaks = c("1", "2")) +
coord_flip()+
theme_bw() + theme(legend.title = element_blank())
Consider the following data.frame
RANK_GROUP <- as.factor(c(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1))
CHANNEL_CATEGORY <- as.factor(c(1, 2, 10, 15, 17, 19, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 43, 44, 1, 2, 10, 15, 17, 19, 20, 22, 23, 24, 25, 26, 27, 28, 29, 43))
CATEGORY_COUNT <- c(105, 23, 417, 10, 58, 6, 535, 211, 215, 465, 28, 273, 70, 47, 7,1,21,3,69, 14, 493, 3, 44, 3, 516, 162, 253, 516, 24, 228, 64, 59, 2, 45)
data <- data.frame(RANK_GROUP, CHANNEL_CATEGORY,CATEGORY_COUNT)
I want to make a Facet-Plot with a barplot for each distribution:
ggplot(data = data) +
aes(x=CHANNEL_CATEGORY, y = CATEGORY_COUNT) +
geom_bar(stat="identity", position ="dodge", colour="black") +
facet_grid(. ~ RANK_GROUP)
How can I order the plots according to their y-value withing each facet-plot?
took the help of cookbook,
library(dplyr)
pd <- data %>%
group_by(RANK_GROUP) %>%
top_n(nrow(data), abs(CATEGORY_COUNT)) %>%
ungroup() %>%
arrange(RANK_GROUP, CATEGORY_COUNT) %>%
mutate(order = row_number())
pd$order <- as.factor(pd$order)
ggplot(data = pd) +
aes(x=order, y = CATEGORY_COUNT) +
geom_bar(stat="identity", position ="dodge", colour="black") +
facet_grid(. ~ RANK_GROUP)+
scale_x_discrete(labels = CHANNEL_CATEGORY , breaks = order)+
theme(axis.text.x = element_text(angle = 60, hjust = .5, size = 8)) +
labs(x="Channel")
I have plotted a scatter plot with the point size scaled by frequency:
g<-ggplot(d, aes(x=Treatment, y= Seam.Cell.Number, size=Frequency))+geom_point(aes(colour=Strain))+ scale_size_continuous(range = c(3, 10), breaks=c(0,1, 2, 3, 4, 5,6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50))+guides(size=FALSE)
Now I am trying to plot means with standard error bars on top. I have the mean and standard error already calculated in columns in my csv file. So so far I have attempted:
g+geom_point(aes(x=Treatment,y=Mean))+geom_errorbar(aes(ymin=Mean-Standard.Error, ymax=Mean+Standard.Error, width=.4))+theme(axis.text.x = element_blank())+theme(legend.key = element_rect(colour = "black"))
And:
g+layer(data=d, mapping=aes(x=Treatment,y=Mean), geom="point")+geom_errorbar(aes(ymin=Mean-Standard.Error, ymax=Mean+Standard.Error), width=.4)+ylab("Seam Cell Number")
But they both give me very fat error bars/data points. It seems they are being affected by my size scaling in object g. I have tried to modify the size and width of the error bars, and I have tried to modify the size of the data points, both in these last bits of code, but to no avail. Is there a way to 'cancel' the size command for this layer?
If you reverse the order of your ggplot, you may be able to avoid the size distortion on the error bars.
Not having reproducible data, I made some up.
df <- data.frame(Treatment = (1:100), Seam.Cell.Number = 3:102, Frequency = 5:104,
Strain = rep(c("A", "B", "C", "D"), 25))
std <- function(x) sd(x)/sqrt(length(x))
Mean <- mean(df$Treatment)
df$Standard.Error <- std(df$Treatment)
g <- ggplot(df, aes(x = Treatment, y = Seam.Cell.Number)) +
geom_point(aes(x=Treatment, y=Mean)) +
geom_errorbar(aes(ymin=Mean-df$Standard.Error, ymax=Mean+df$Standard.Error, width=.4))+
theme(axis.text.x = element_blank())+
theme(legend.key = element_rect(colour = "black"))
g + geom_point(aes(colour=Strain)) +
scale_size_continuous(range = c(3, 10), breaks=c(0,1, 2, 3, 4, 5,6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42,
43, 44, 45, 46, 47, 48, 49, 50)) +
guides(size=FALSE)