Change ticks position inwards in ggplot - r

I want to change the position of the ticks in a ggplot plot into an inward position. The axis.ticks.margin is deprecated, so I have tried to play around with axis.text function but could not manage to do it. Anyone can help me out?
Here is an example dataset df
df<- structure(list(X1 = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41), X2 = c(0,
10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140,
150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250, 260, 270,
280, 290, 300, 310, 320, 330, 340, 350, 360, 370, 380, 390, 400
)), .Names = c("X1", "X2"), row.names = c(NA, -41L), class = "data.frame")
Below is what I have tried so far:
library(ggplot2)
ggplot(df, aes(x=Age, y=GPP)) +
geom_point()+
theme(panel.grid = element_blank(),
element_text(hjust=seq(from=0,to=1,length.out=6)),
axis.text.y = element_text(margin=margin(5,5,10,5,"pt")))

ggplot(df, aes(x=Age, y=GPP)) +
geom_point()+
theme(panel.grid = element_blank(),
axis.ticks.length=unit(-0.25, "cm"), axis.text.x = element_text(margin=unit(c(0.5,0.5,0.5,0.5), "cm")), axis.text.y = element_text(margin=unit(c(0.5,0.5,0.5,0.5), "cm")))

Related

How to adjust labels' font size to size of the bars in ggplot2?

Good morning,
I have a ggplot2 bar graph inside a loop, in which the size of bars changes in every run of the loop:
ggplot(draft, aes(x=draft[,2], y=draft[,i])) +
geom_bar(stat="identity",fill="navyblue") +
geom_text(label=draft[,i],size=4, vjust=1.2, colour = "white",fontface=2) +
labs(title = paste("Session trends for",colnames(draft)[i],"-",player))+
theme(axis.text.x = element_text(angle = 0,color="black",size=8),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
plot.title = element_text(color="black", size=10, face="bold",hjust = 0.5),
legend.position = "none",panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black"))
In the geom_text part (labels of the bars), as you can see, size=4. This is fine for some graphs, but it is too big/small for others. My question is:
Is there any way to adjust the font size automatically to the size of the bar?
Thank you very much in advance.
Reproducible example.
This is the dataframe from where I represent the data:
> dput(draft)
structure(list(Player = c("Shaquil Delos", "Shaquil Delos", "Shaquil Delos",
"Shaquil Delos", "Shaquil Delos", "Shaquil Delos", "Shaquil Delos",
"Shaquil Delos", "Shaquil Delos", "Shaquil Delos", "Shaquil Delos",
"Shaquil Delos", "Shaquil Delos", "Shaquil Delos", "Shaquil Delos",
"Shaquil Delos"), Date = structure(c(19371, 19370, 19369, 19368,
19367, 19364, 19363, 19362, 19361, 19360, 19359, 19356, 19355,
19354, 19353, 19349), class = "Date"), week = c(29, 29, 29, 29,
29, 28, 28, 28, 28, 28, 28, 27, 27, 27, 27, 26), TD = c(6638,
2660, 4761, 4956, 3984, 4001, 3688, 4476, 4616, 4666, 4120, 3782,
3701, 4398, 4275, 3222), Time = c(96, 67, 70, 75, 38, 33, 55,
68, 59, 57, 56, 35, 45, 56, 46, 21), Vmax = c(9.4, 4.7, 8.3,
8.8, 6.9, 9.5, 6.5, 6, 9.2, 7.1, 7.3, 9, 6.5, 6.8, 6.2, 4.6),
Amax = c(4.5, 3.1, 4.1, 4.9, 3.8, 3.8, 4.4, 3.9, 4.5, 4.4,
4, 3.4, 4.3, 3.5, 3.2, 1.5), Dmax = c(-5.9, -2.8, -4.2, -6.2,
-3.9, -2.7, -4.9, -3.5, -2.7, -4.6, -3.5, -2.7, -4.5, -3.7,
-3.8, -1.4), Aerobic = c(462, 44, 589, 280, 175, 546, 333,
831, 303, 959, 225, 125, 194, 188, 534, 665), HSD = c(177,
0, 475, 86, 59, 463, 56, 14, 384, 92, 119, 393, 54, 262,
24, 0), SD = c(100, 0, 78, 39, 0, 149, 0, 0, 125, 3, 6, 141,
0, 0, 0, 0), Nsprints = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), DEC3 = c(7, 0, 2, 5, 0,
0, 8, 0, 0, 3, 0, 0, 3, 0, 0, 0), ACC3 = c(4, 0, 1, 4, 0,
0, 9, 0, 3, 4, 0, 0, 9, 0, 0, 0), RHIE = c(15, 3, 19, 10,
5, 5, 17, 6, 6, 10, 10, 3, 10, 5, 5, 2), MIP1 = c(161, 112,
169, 147, 184, 285, 162, 266, 170, 248, 197, 222, 167, 177,
232, 254), MIP3 = c(359, 309, 375, 345, 535, 738, 431, 688,
479, 709, 531, 549, 476, 461, 662, 724), MIP5 = c(565, 473,
560, 537, 868, 1049, 589, 851, 673, 1152, 845, 875, 682,
619, 983, 1166), ACC = c(78, 14, 64, 46, 27, 20, 51, 32,
29, 56, 40, 12, 45, 26, 21, 0), ACC2 = c(18, 1, 14, 15, 3,
9, 24, 9, 16, 12, 9, 6, 25, 4, 3, 0), DEC = c(67, 11, 42,
48, 23, 14, 36, 22, 14, 49, 32, 7, 30, 20, 15, 0), DEC2 = c(21,
0, 12, 14, 5, 0, 20, 4, 0, 11, 4, 0, 16, 1, 2, 0), Explosive = c(12,
9, 15, 13, 5, 1, 45, 11, 3, 8, 16, 0, 14, 11, 6, 0), TRIMP = c(107,
0, 78, 51, 105, 60, 85, 93, 55, 93, 102, 44, 89, 110, 109,
47), TRIMP4 = c(0, 0, 3, 12, 20, 8, 0, 0, 0, 13, 1, 2, 2,
15, 17, 15), TRIMP5 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0), TD.min = c(69.3, 39.9, 68.1, 66, 103.6, 121.4,
67.2, 65.7, 78.7, 82.1, 77.5, 109.3, 82.5, 78.8, 92.8, 151
), HRavg = c(66, 0, 65, 65, 79, 69, 67, 65, 60, 67, 74, 70,
71, 71, 75, 73), Vmax.perc = c(100, 50, 88, 93, 73, 101,
69, 64, 98, 75, 78, 95, 70, 72, 66, 49), Amax.perc = c(88,
62, 80, 96, 74, 75, 86, 77, 89, 87, 78, 67, 85, 68, 63, 30
), Dmax.perc = c(104, 50, 74, 109, 69, 48, 85, 61, 47, 81,
61, 47, 79, 65, 66, 25)), row.names = c("21", "211", "22",
"19", "191", "2", "20", "212", "201", "213", "221", "1", "11",
"202", "203", "18"), class = "data.frame")
And this is the bars graph I am building:
ggplot(draft, aes(x=draft[,2], y=draft[,1])) +
geom_bar(stat="identity",fill="navyblue") +
geom_text(label=draft[,i],size=4, vjust=1.2, colour = "white",fontface=2) +
labs(title = paste("Session trends for",colnames(draft)[1],"-",player))+
theme(axis.text.x = element_text(angle = 0,color="black",size=8),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
plot.title = element_text(color="black", size=10, face="bold",hjust = 0.5),
legend.position = "none",panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black"))
I may have misunderstood, but perhaps this type of approach would work for your use-case?
library(tidyverse)
draft <- structure(list(Player = c("Shaquil Delos", "Shaquil Delos", "Shaquil Delos",
"Shaquil Delos", "Shaquil Delos", "Shaquil Delos", "Shaquil Delos",
"Shaquil Delos", "Shaquil Delos", "Shaquil Delos", "Shaquil Delos",
"Shaquil Delos", "Shaquil Delos", "Shaquil Delos", "Shaquil Delos",
"Shaquil Delos"), Date = structure(c(19371, 19370, 19369, 19368,
19367, 19364, 19363, 19362, 19361, 19360, 19359, 19356, 19355,
19354, 19353, 19349), class = "Date"), week = c(29, 29, 29, 29,
29, 28, 28, 28, 28, 28, 28, 27, 27, 27, 27, 26), TD = c(6638,
2660, 4761, 4956, 3984, 4001, 3688, 4476, 4616, 4666, 4120, 3782,
3701, 4398, 4275, 3222), Time = c(96, 67, 70, 75, 38, 33, 55,
68, 59, 57, 56, 35, 45, 56, 46, 21), Vmax = c(9.4, 4.7, 8.3,
8.8, 6.9, 9.5, 6.5, 6, 9.2, 7.1, 7.3, 9, 6.5, 6.8, 6.2, 4.6),
Amax = c(4.5, 3.1, 4.1, 4.9, 3.8, 3.8, 4.4, 3.9, 4.5, 4.4,
4, 3.4, 4.3, 3.5, 3.2, 1.5), Dmax = c(-5.9, -2.8, -4.2, -6.2,
-3.9, -2.7, -4.9, -3.5, -2.7, -4.6, -3.5, -2.7, -4.5, -3.7,
-3.8, -1.4), Aerobic = c(462, 44, 589, 280, 175, 546, 333,
831, 303, 959, 225, 125, 194, 188, 534, 665), HSD = c(177,
0, 475, 86, 59, 463, 56, 14, 384, 92, 119, 393, 54, 262,
24, 0), SD = c(100, 0, 78, 39, 0, 149, 0, 0, 125, 3, 6, 141,
0, 0, 0, 0), Nsprints = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), DEC3 = c(7, 0, 2, 5, 0,
0, 8, 0, 0, 3, 0, 0, 3, 0, 0, 0), ACC3 = c(4, 0, 1, 4, 0,
0, 9, 0, 3, 4, 0, 0, 9, 0, 0, 0), RHIE = c(15, 3, 19, 10,
5, 5, 17, 6, 6, 10, 10, 3, 10, 5, 5, 2), MIP1 = c(161, 112,
169, 147, 184, 285, 162, 266, 170, 248, 197, 222, 167, 177,
232, 254), MIP3 = c(359, 309, 375, 345, 535, 738, 431, 688,
479, 709, 531, 549, 476, 461, 662, 724), MIP5 = c(565, 473,
560, 537, 868, 1049, 589, 851, 673, 1152, 845, 875, 682,
619, 983, 1166), ACC = c(78, 14, 64, 46, 27, 20, 51, 32,
29, 56, 40, 12, 45, 26, 21, 0), ACC2 = c(18, 1, 14, 15, 3,
9, 24, 9, 16, 12, 9, 6, 25, 4, 3, 0), DEC = c(67, 11, 42,
48, 23, 14, 36, 22, 14, 49, 32, 7, 30, 20, 15, 0), DEC2 = c(21,
0, 12, 14, 5, 0, 20, 4, 0, 11, 4, 0, 16, 1, 2, 0), Explosive = c(12,
9, 15, 13, 5, 1, 45, 11, 3, 8, 16, 0, 14, 11, 6, 0), TRIMP = c(107,
0, 78, 51, 105, 60, 85, 93, 55, 93, 102, 44, 89, 110, 109,
47), TRIMP4 = c(0, 0, 3, 12, 20, 8, 0, 0, 0, 13, 1, 2, 2,
15, 17, 15), TRIMP5 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0), TD.min = c(69.3, 39.9, 68.1, 66, 103.6, 121.4,
67.2, 65.7, 78.7, 82.1, 77.5, 109.3, 82.5, 78.8, 92.8, 151
), HRavg = c(66, 0, 65, 65, 79, 69, 67, 65, 60, 67, 74, 70,
71, 71, 75, 73), Vmax.perc = c(100, 50, 88, 93, 73, 101,
69, 64, 98, 75, 78, 95, 70, 72, 66, 49), Amax.perc = c(88,
62, 80, 96, 74, 75, 86, 77, 89, 87, 78, 67, 85, 68, 63, 30
), Dmax.perc = c(104, 50, 74, 109, 69, 48, 85, 61, 47, 81,
61, 47, 79, 65, 66, 25)), row.names = c("21", "211", "22",
"19", "191", "2", "20", "212", "201", "213", "221", "1", "11",
"202", "203", "18"), class = "data.frame")
for (i in 4:31) {
print(ggplot(draft, aes(x=draft[,2], y=draft[,i])) +
geom_bar(stat="identity",fill="navyblue") +
geom_text(label=as.integer(draft[,i]), size=5-nchar(as.integer(draft[3,i])),
vjust=1.2, colour = "white",fontface=2) +
labs(title = paste("Session trends for", colnames(draft)[i],"-", "player"))+
theme(axis.text.x = element_text(angle = 0,color="black",size=8),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
plot.title = element_text(color="black", size=10, face="bold",hjust = 0.5),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black")))
}
Created on 2023-01-16 with reprex v2.0.2

From Boxplot to Barplot in ggplot possible?

I have to do a ggplot barplot with errorbars, Tukey sig. letters for plants grown with different fertilizer concentraitions.
The data should be grouped after the dif. concentrations and the sig. letters should be added automaticaly.
I have already a code for the same problem but for Boxplot - which is working nicely. I tried several tutorials with barplots but I always get the problem; stat_count() can only have an x or y aesthetic.
So I thought, is it possible to get my boxplot code to a barplot code? I tried but I couldnt do it :) And if not - how do I automatically add tukeyHSD Test result sig. letters to a ggplot barplot?
This is my Code for the boxplot with the tukey letters:
    value_max = Dünger, group_by(Duenger.g), summarize(max_value = max(Höhe.cm))
hsd=HSD.test(aov(Höhe.cm~Duenger.g, data=Dünger),
trt = "Duenger.g", group = T) sig.letters <- hsd$groups[order(row.names(hsd$groups)), ]
J <- ggplot(Dünger, aes(x = Duenger.g, y = Höhe.cm))+ geom_boxplot(aes(fill= Duenger.g))+ scale_fill_discrete(labels=c("0.5g", '1g', "2g", "3g", "4g"))+ geom_text(data = value_max, aes(x=Duenger.g, y = 0.1 + max_value, label = sig.letters$groups), vjust=0)+ stat_boxplot(geom = 'errorbar', width = 0.1)+ ggtitle("Auswirkung von Dünger auf die Höhe von Pflanzen") + xlab("Dünger in g") + ylab("Höhe in cm"); J
This is how it looks:
boxplot with tukey
Data from dput:
structure(list(Duenger.g = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,
0.5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4), plant = c(1, 2, 3, 4, 5, 7, 10, 11, 12, 13, 14, 18, 19,
21, 23, 24, 25, 26, 27, 29, 30, 31, 33, 34, 35, 37, 38, 39, 40,
41, 42, 43, 44, 48, 49, 50, 53, 54, 55, 56, 57, 58, 61, 62, 64,
65, 66, 67, 68, 69, 70, 71, 72, 73, 75, 79, 80, 81, 83, 85, 86,
88, 89, 91, 93, 99, 100, 102, 103, 104, 105, 106, 107, 108, 110,
111, 112, 113, 114, 115, 116, 117, 118, 120, 122, 123, 125, 126,
127, 128, 130, 131, 132, 134, 136, 138, 139, 140, 141, 143, 144,
145, 146, 147, 149), height.cm = c(5.7, 2.8, 5.5, 8, 3.5, 2.5,
4, 6, 10, 4.5, 7, 8.3, 11, 7, 8, 2.5, 7.4, 3, 14.5, 7, 12, 7.5,
30.5, 27, 6.5, 19, 10.4, 12.7, 27.3, 11, 11, 10.5, 10.5, 13,
53, 12.5, 12, 6, 12, 35, 8, 16, 56, 63, 69, 62, 98, 65, 77, 32,
85, 75, 33.7, 75, 55, 38.8, 39, 46, 35, 59, 44, 31.5, 49, 34,
52, 37, 43, 38, 28, 14, 28, 19, 20, 23, 17.5, 32, 16, 17, 24.7,
34, 50, 12, 14, 21, 33, 39.3, 41, 29, 35, 48, 40, 65, 35, 10,
26, 34, 41, 32, 38, 23.5, 22.2, 20.5, 29, 34, 45)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -105L))
Thank you
mirai
A bar chart and a boxplot are two different things. By default geom_boxplot computes the boxplot stats by default (stat="boxplot"). In contrast when you use geom_bar it will by default count the number of observations (stat="count") which are then mapped on y. That's the reason why you get an error. Hence, simply replacing geom_boxplot by geom_bar will not give your your desired result. Instead you could use e.g. stat_summary to create your bar chart with errorbars. Additionally I created a summary dataset to add the labels on the top of the error bars.
library(ggplot2)
library(dplyr)
library(agricolae)
Dünger <- Dünger |>
rename("Höhe.cm" = height.cm) |>
mutate(Duenger.g = factor(Duenger.g))
hsd <- HSD.test(aov(Höhe.cm ~ Duenger.g, data = Dünger), trt = "Duenger.g", group = T)
sig.letters <- hsd$groups %>% mutate(Duenger.g = row.names(.))
duenger_sum <- Dünger |>
group_by(Duenger.g) |>
summarize(mean_se(Höhe.cm)) |>
left_join(sig.letters, by = "Duenger.g")
ggplot(Dünger, aes(x = Duenger.g, y = Höhe.cm, fill = Duenger.g)) +
stat_summary(geom = "bar", fun = "mean") +
stat_summary(geom = "errorbar", width = .1) +
scale_fill_discrete(labels = c("0.5g", "1g", "2g", "3g", "4g")) +
geom_text(data = duenger_sum, aes(y = ymax, label = groups), vjust = 0, nudge_y = 1) +
labs(
title = "Auswirkung von Dünger auf die Höhe von Pflanzen",
x = "Dünger in g", y = "Höhe in cm"
)
#> No summary function supplied, defaulting to `mean_se()`
But as the summary dataset now already contains the mean and the values for the error bars a second option would be to do:
ggplot(duenger_sum, aes(x = Duenger.g, y = y, fill = Duenger.g)) +
geom_col() +
geom_errorbar(aes(ymin = ymin, ymax = ymax), width = .1) +
scale_fill_discrete(labels = c("0.5g", "1g", "2g", "3g", "4g")) +
geom_text(aes(y = ymax, label = groups), vjust = 0, nudge_y = 1) +
labs(
title = "Auswirkung von Dünger auf die Höhe von Pflanzen",
x = "Dünger in g", y = "Höhe in cm"
)

plotting a graph with multiple bars in R

I am struggling to plot the following data and think it is because of the format of the data.
structure(list(HE_Provider = c("Coventry University", "The University of Leicester",
"Total"), Bath_and_North_East_Somerset = c(15, 20, 205), Bedford = c(85,
90, 1040), Blackburn_with_Darwen = c(10, 20, 95), Blackpool = c(10,
5, 60), `Bournemouth,_Poole_and_Christchurch` = c(35, 15, 285
), Bracknell_Forest = c(15, 10, 210), Buckinghamshire = c(195,
145, 1835), Cambridgeshire = c(130, 160, 2500), Central_Bedfordshire = c(115,
70, 1120), Cheshire_East = c(45, 55, 935), Cheshire_West_and_Chester = c(25,
40, 535), City_of_Bristol = c(40, 35, 390), City_of_Derby = c(65,
135, 4115), City_of_Kingston_upon_Hull = c(25, 20, 265), City_of_Leicester = c(315,
1275, 6860), City_of_Nottingham = c(65, 145, 5405), City_of_Plymouth = c(15,
10, 135), City_of_Portsmouth = c(15, 15, 130), City_of_Southampton = c(15,
20, 140), `City_of_Stoke-on-Trent` = c(50, 15, 475), City_of_York = c(35,
20, 350), Cornwall = c(25, 25, 300), County_Durham = c(20, 40,
330), Cumbria = c(30, 20, 305), Darlington = c(0, 15, 110), Derbyshire = c(100,
145, 6925), Devon = c(50, 50, 630), Dorset = c(30, 20, 285),
East_Riding_of_Yorkshire = c(75, 45, 760), East_Sussex = c(55,
50, 650), Essex = c(365, 180, 3320), Gloucestershire = c(150,
85, 905), Greater_London = c(5550, 1930, 18285), Greater_Manchester = c(245,
280, 2820), Halton = c(5, 10, 80), Hampshire = c(180, 120,
1485), Hartlepool = c(5, 10, 55), Herefordshire = c(50, 15,
235), Hertfordshire = c(385, 270, 4815), Isle_of_Wight = c(10,
5, 90), Isles_of_Scilly = c(0, 0, 0), Kent = c(365, 195,
2590), Lancashire = c(75, 125, 985), Leicestershire = c(540,
980, 8010), Lincolnshire = c(145, 190, 7710), Luton = c(105,
75, 685), Medway = c(95, 35, 425), Merseyside = c(75, 120,
975), Middlesbrough = c(10, 5, 65), Milton_Keynes = c(265,
170, 2205), Norfolk = c(120, 115, 2410), North_East_Lincolnshire = c(20,
10, 810), North_Lincolnshire = c(20, 20, 810), North_Somerset = c(25,
15, 205), North_Yorkshire = c(500, 80, 1160), Northamptonshire = c(680,
510, 7505), Northumberland = c(10, 25, 235), Nottinghamshire = c(140,
185, 9410), Oxfordshire = c(280, 135, 1785), Peterborough = c(85,
135, 1560), Reading = c(75, 25, 260), Redcar_and_Cleveland = c(5,
5, 90), Rutland = c(5, 35, 345), Shropshire = c(60, 30, 500
), Slough = c(95, 40, 270), Somerset = c(40, 40, 490), South_Gloucestershire = c(40,
25, 310), South_Yorkshire = c(105, 180, 3220), `Southend-on-Sea` = c(35,
25, 345), Staffordshire = c(370, 150, 3825), `Stockton-on-Tees` = c(20,
15, 145), Suffolk = c(115, 115, 1935), Surrey = c(195, 155,
2900), Swindon = c(50, 25, 225), Telford_and_Wrekin = c(60,
20, 360), Thurrock = c(140, 40, 370), Torbay = c(5, 5, 65
), Tyne_and_Wear = c(45, 60, 680), Warrington = c(20, 20,
290), Warwickshire = c(2080, 210, 2825), West_Berkshire = c(35,
25, 300), West_Midlands = c(8315, 915, 8220), West_Sussex = c(105,
95, 1115), West_Yorkshire = c(200, 245, 3005), Wiltshire = c(90,
55, 630), Windsor_and_Maidenhead = c(40, 25, 405), Wokingham = c(70,
35, 395), Worcestershire = c(350, 110, 1350), `England_(county_unitary_authority_unknown)` = c(0,
10, 770), Total_England = c(24990, 11530, 154930), Total = c(25380,
11845, 158480)), row.names = c(NA, -3L), class = "data.frame")
I would like to plot the Region on the bottom but don't have a title for these regions, with the numbers up the y axis and the fill being the university.
This type of problems generally has to do with reshaping the data. The format should be the long format and the data is in wide format. See this post on how to reshape the data from wide to long format.
Reshape the data and plot with geom_col.
suppressPackageStartupMessages({
library(dplyr)
library(tidyr)
library(ggplot2)
})
df1 %>%
select(-matches("England"), -matches("Total")) %>%
pivot_longer(-HE_Provider, names_to = "Region") %>%
ggplot(aes(Region, value, fill = HE_Provider)) +
geom_col() +
theme_bw(base_size = 10) +
theme(axis.text.x = element_text(size = 7, angle = 75, vjust = 1, hjust = 1),
legend.position = "bottom")
Created on 2022-12-06 with reprex v2.0.2
We could bring the data in long format. For y we used log scale:
library(tidyverse)
df %>%
pivot_longer(-HE_Provider) %>%
group_by(HE_Provider, name) %>%
summarise(sum_value = sum(value)) %>%
ggplot(aes(x=name, y=log(sum_value), fill=HE_Provider))+
geom_col(position=position_dodge())+
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1))

How to use columns as x-axis in RStudio

Here is my data:
How do I make it so that the column names appear on the x axis? I will probably use the facet function so that the number values aren't next to the duration values, so one graph will have these on the x axis: "Number Looks", "Number Gesture", "Number Reach", "Number Other" for group A, and another graph will have these on the x axis: "Duration Looks", "Duration Gesture", "Duration Reach", "Duration Other" for group A, with the data below the column titles as the y-axis values. I will also have to generate the data for group B in the same way
Here is how we could achieve your task:
Bring your data in the correct format with pivot_longer
Use filter for each number and Duration
Now you have to separate dataframes
plot them individually with ggplot2 using facet_wrap for group A and B
The output arranged with plot_grid from cowplot package!
library(cowplot)
library(tidyverse)
df_number <- df %>%
pivot_longer(
cols = 3:12,
names_to = "names",
values_to = "values"
) %>%
filter(grepl('Number', names))
df_Duration <- df %>%
pivot_longer(
cols = 3:12,
names_to = "names",
values_to = "values"
) %>%
filter(grepl('Duration', names))
plot_number <- ggplot(df_number, aes(x=factor(names), y=values)) +
geom_bar(stat = "identity") +
xlab("Number") +
ylab("Value") +
facet_wrap(~Group) +
theme_bw()
plot_Duration <- ggplot(df_Duration, aes(x=factor(names), y=values)) +
geom_bar(stat = "identity") +
xlab("Duration") +
ylab("Value") +
facet_wrap(~Group) +
theme_bw()
plot_grid(plot_number, plot_Duration, labels = "AUTO")
data:
df <- structure(list(Participant = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17), Group = c("A", "A", "A", "A", "A",
"A", "A", "A", "B", "B", "B", "B", "B", "B", "B", "B", "B"),
Number_Looks = c(47, 94, 23, 64, 99, 38, 85, 38, 20, 10,
34, 54, 87, 78, 45, 63, 32), Duration_Look = c(247, 294,
223, 264, 299, 238, 285, 238, 220, 210, 234, 254, 287, 278,
245, 263, 232), Number_Gesture = c(39, 86, 15, 56, 91, 30,
77, 30, 12, 20, 26, 46, 79, 70, 37, 55, 24), Duration_Gesture = c(29,
76, 5, 46, 81, 20, 67, 20, 20, 10, 16, 36, 69, 60, 27, 45,
14), Number_Reach = c(40, 87, 16, 57, 92, 31, 78, 31, 13,
21, 27, 47, 80, 71, 38, 56, 25), Duration_Reach = c(89, 136,
65, 106, 141, 80, 127, 80, 80, 70, 76, 96, 129, 120, 87,
105, 74), Number_Other = c(52, 99, 28, 69, 104, 43, 90, 43,
25, 33, 39, 59, 92, 83, 50, 68, 37), Duration_Other = c(339,
386, 315, 356, 391, 330, 377, 330, 330, 320, 326, 346, 379,
370, 337, 355, 324), Number_Sound = c(152, 199, 128, 169,
204, 143, 190, 143, 125, 133, 139, 159, 192, 183, 150, 168,
137), Duration_Sound = c(319, 366, 295, 336, 371, 310, 357,
310, 310, 300, 306, 326, 359, 350, 317, 335, 304)), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -17L))

Merge and Perfectly Align Histogram and Boxplot using ggplot2

since yesterday I am reading answers and websites in order to combine and align in one plot an histogram and a boxplot generated using ggplot2 package.
This question differs from others because the boxplot chart needs to be reduced in height and aligned to the left outer margin of the histogram.
Considering the following dataset:
my_df <- structure(list(id = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75,
76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91,
92, 93, 94, 95, 96, 97, 98, 99, 100), value= c(18, 9, 3,
4, 3, 13, 12, 5, 8, 37, 64, 107, 11, 11, 8, 18, 5, 13, 13, 14,
11, 11, 9, 14, 11, 14, 12, 10, 11, 10, 5, 3, 8, 11, 12, 11, 7,
6, 6, 4, 11, 8, 14, 13, 14, 15, 10, 2, 4, 4, 8, 15, 21, 9, 5,
7, 11, 6, 11, 2, 6, 16, 5, 11, 21, 33, 12, 10, 13, 33, 35, 7,
7, 9, 2, 21, 32, 19, 9, 8, 3, 26, 37, 5, 6, 10, 18, 5, 70, 48,
30, 10, 15, 18, 7, 4, 19, 10, 4, 32)), row.names = c(NA, 100L
), class = "data.frame", .Names = c("id", "value"))
I generated the boxplot:
require(dplyr)
require(ggplot2)
my_df %>% select(value) %>%
ggplot(aes(x="", y = value)) +
geom_boxplot(fill = "lightblue", color = "black") +
coord_flip() +
theme_classic() +
xlab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank())
and I generated the histogram
my_df %>% select(id, value) %>%
ggplot() +
geom_histogram(aes(x = value, y = (..count..)/sum(..count..)),
position = "identity", binwidth = 1,
fill = "lightblue", color = "black") +
ylab("Relative Frequency") +
theme_classic()
The result I am looking to obtain is a single plot like:
Note that the boxplot must be reduced in height and the ticks must be exactly aligned in order to give a different perspective of the same visual.
You can use either egg, cowplot or patchwork packages to combine those two plots. See also this answer for more complex examples.
library(dplyr)
library(ggplot2)
plt1 <- my_df %>% select(value) %>%
ggplot(aes(x="", y = value)) +
geom_boxplot(fill = "lightblue", color = "black") +
coord_flip() +
theme_classic() +
xlab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank())
plt2 <- my_df %>% select(id, value) %>%
ggplot() +
geom_histogram(aes(x = value, y = (..count..)/sum(..count..)),
position = "identity", binwidth = 1,
fill = "lightblue", color = "black") +
ylab("Relative Frequency") +
theme_classic()
egg
# install.packages("egg", dependencies = TRUE)
egg::ggarrange(plt2, plt1, heights = 2:1)
cowplot
# install.packages("cowplot", dependencies = TRUE)
cowplot::plot_grid(plt2, plt1,
ncol = 1, rel_heights = c(2, 1),
align = 'v', axis = 'lr')
patchwork
# install.packages("devtools", dependencies = TRUE)
# devtools::install_github("thomasp85/patchwork")
library(patchwork)
plt2 + plt1 + plot_layout(nrow = 2, heights = c(2, 1))

Resources