(EDIT: The issues raised in this post are solved below)
I am trying to get my bar plot to look a certain way. I hope you can follow me without really explaining what the purpose of this plot is or the context of the data, so I'll dive right in.
I'm using this code
ggplot(ds, aes(x=fraction, y=AUC)) +
geom_bar(aes(fill=factor(demographics, c("adjusted", "not adjusted"))), position=position_dodge(width=0.9), stat="identity") +
facet_grid(~FN, switch="x") +
geom_text(aes(label=round(AUC, 2), fontface="bold", vjust=-0.4), position=position_dodge(width=0.9), size=2.75) +
theme(legend.title=element_blank(), legend.position="bottom",
axis.text.y=element_blank(),
axis.title.x=element_blank(), axis.ticks.x=element_blank(), axis.ticks.y=element_blank(),
panel.background=element_blank())
to produce the following plot
What I want to see, however, is
that the AUC-values on top of the bars are correctly aligned with every single bar (grouped by "demographics" [adjusted/not adjusted]) and not with the middle of the group "fraction" (serum/plasma) as it is right now
that the two bars in the last facet "FN=5" each have only half of their current width (to match the width of the other bars in facets "FN=1" to "FN=4"
that there is another label printed vertically within every bar. This label is stored in the character variable "features" contained in the data set "ds" used to print the plot
ds <- structure(list(ds = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L), .Label = c("1",
"2", "3", "4"), class = "factor"), FN = structure(c(1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 5L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L,
5L), .Label = c("FN=1", "FN=2", "FN=3", "FN=4", "FN=5"), class = "factor"),
fraction = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("serum",
"plasma"), class = "factor"), demographics = structure(c(1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L), .Label = c("not adjusted", "adjusted (age, sex, education, ethnicity, ApoE4)"
), class = "factor"), AUC = c(0.741264171455195, 0.749961174095356,
0.797639384997671, 0.763006678055599, 0.894704146606616,
0.9038670601025, 0.90169280944246, 0.912564062742662, 0.912564062742662,
0.672077922077922, 0.715367965367965, 0.746753246753247,
0.791125541125541, 0.804112554112554, 0.827922077922078,
0.829004329004329, 0.83982683982684, 0.83982683982684, 0.741264171455195,
0.749961174095356, 0.797639384997671, 0.763006678055599,
0.894704146606616, 0.9038670601025, 0.90169280944246, 0.912564062742662,
0.912564062742662, 0.672077922077922, 0.715367965367965,
0.746753246753247, 0.791125541125541, 0.804112554112554,
0.827922077922078, 0.829004329004329, 0.83982683982684, 0.83982683982684
), features = c("p21", "p3 + p21", "p3 + p7 + p19", "p1 + p8 + p21 + p23",
"p8", "p11 + p24", "p14 + p17 + p24", "p11 + p13 + p14 + p17",
"p11 + p13 + p14 + p17", "p13", "p9 + p17", "p9 + p14 + p17",
"p7 + p9 + p17 + p19", "p20", "p9 + p19", "p9 + p17 + p19",
"p4 + p8 + p9 + p17", "p4 + p8 + p9 + p17", "p21", "p3 + p21",
"p3 + p7 + p19", "p1 + p8 + p21 + p23", "p8", "p11 + p24",
"p14 + p17 + p24", "p11 + p13 + p14 + p17", "p11 + p13 + p14 + p17",
"p13", "p9 + p17", "p9 + p14 + p17", "p7 + p9 + p17 + p19",
"p20", "p9 + p19", "p9 + p17 + p19", "p4 + p8 + p9 + p17",
"p4 + p8 + p9 + p17")), .Names = c("ds", "FN", "fraction",
"demographics", "AUC", "features"), row.names = c(NA, -36L), class = "data.frame")
EDIT: Eventually, using the help in the comments section, I used this code
ggplot(ds, aes(x=fraction, y=AUC, fill=factor(demographics, c("adjusted (age, sex, education, ethnicity, ApoE4)", "not adjusted")))) +
geom_bar(position=position_dodge(width=0.9), stat="identity") +
facet_grid(~FN) +
geom_text(aes(label=round(AUC, 2), fontface="bold", vjust=1), position=position_dodge(width=0.9), size=2.75) +
geom_text(aes(y=0.3, label=features, fontface="bold"), color="white", position=position_dodge(width=0.9), angle=90, size=3) +
theme_bw() +
theme(legend.title=element_blank(), legend.position="bottom",
axis.text.y=element_blank(),
axis.title.x=element_blank(), axis.title.y=element_text(face="bold"), axis.ticks.x=element_blank(), axis.ticks.y=element_blank(),
panel.background=element_blank(), panel.grid.minor=element_blank(), panel.grid.major=element_blank())
to produce the results I initially wanted (I changed my mind on halving the width of the columns in facet "FN=5" though), looking like this
To increase readability, I decided to flip the plot using this code
ggplot(ds, aes(x=fraction, y=AUC, fill=factor(demographics, c("not adjusted", "adjusted (age, sex, education, ethnicity, ApoE4)")))) +
geom_bar(position=position_dodge(width=0.9), stat="identity") +
facet_grid(FN~.) +
geom_text(aes(y=AUC, label=round(AUC, 2), fontface="bold"), position=position_dodge(width=0.9), hjust=1.15, size=3.25) +
geom_text(aes(y=0.4, label=features, fontface="bold"), position=position_dodge(width=0.9), color="white", size=3) +
theme_bw() +
theme(legend.title=element_blank(), legend.position="bottom",
axis.title.x=element_text(face="bold"), axis.title.y=element_blank(),
axis.ticks.x=element_blank(), axis.ticks.y=element_blank(),
panel.background=element_blank(), panel.grid.minor=element_blank(), panel.grid.major=element_blank(),
strip.text.y = element_text(angle=0)) + coord_flip()
which results in a horizontal bar plot now looking like this (colors and order of legend keys are not quite right yet)
Related
I am trying to make scales for the y-axis free. But it looks like it is not working. I want adaptation to have its own scale and post adaptation also having its own scales. I am trying to make scales for the y-axis free. But it looks like it is not working. I want adaptation to have its own scale and post adaptation also having its own scales.
tgc <- structure(list(Group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("Visible", "Remembered"), class = "factor"),
Condition = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L
), .Label = c("CEN", "IPS", "CTL"), class = "factor"), test = structure(c(1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("Pre-test", "Post-test"
), class = "factor"), Session = structure(c(1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L), .Label = c("Adaptation", "Post-adaptation"
), class = "factor"), N = c(12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12), EE = c(2.134379625, 0.333942625, 1.742841125, 0.317361916666667,
2.84197270833333, 0.307057416666667, 2.403112375, 0.281202,
3.49590529166667, 0.305657666666667, 2.85211466666667, 0.3131155,
1.44857545833333, 0.269328166666667, 1.740270875, 0.243361833333333,
2.10702266666667, 0.286209125, 2.145855125, 0.305474083333333,
1.60016616666667, 0.281528625, 1.94182179166667, 0.294655916666667
), sd = c(0.727246182828044, 0.0816168443914292, 0.549168068103643,
0.0894916121701392, 1.14554677132408, 0.0958562360654162,
1.06827971273128, 0.0953131237162305, 1.18204258551111, 0.0896670491921828,
1.32864473484909, 0.109865886496798, 0.605344957514288, 0.0815454655757737,
0.833908172662699, 0.0798994165789182, 1.11582277105041,
0.0976064300150272, 0.667812406644538, 0.142929179817685,
0.686043669971901, 0.109794818975944, 1.39509308576833, 0.161854932615856
), se = c(0.209937889711449, 0.0235607535398997, 0.158531165974993,
0.0258340031883217, 0.330690868396632, 0.0276713118479362,
0.308385789857611, 0.0275145288174349, 0.341226302469221,
0.0258846474942731, 0.383546697661249, 0.0317155495718416,
0.174748037086728, 0.0235401482506832, 0.240728553983119,
0.0230649748349663, 0.322110288616933, 0.0281765493219072,
0.192780836372198, 0.0412601002213964, 0.198043748767058,
0.0316950341456936, 0.402728684306467, 0.0467234944577166
), ci = c(0.462070179795855, 0.0518568689018959, 0.348924743722983,
0.0568602576432562, 0.727845693918804, 0.0609041467375754,
0.678752547059741, 0.0605590696140879, 0.751034027967696,
0.0569717250090983, 0.844180589754564, 0.069805453951774,
0.384617836383033, 0.0518115169661108, 0.529839974927164,
0.0507656673296478, 0.708959965158704, 0.0620161669201078,
0.424307760005262, 0.0908128682911871, 0.435891352085212,
0.0697602998032695, 0.886399857701764, 0.102837717929058)), row.names = c(NA,
-24L), class = "data.frame")
library(ggh4x)
p <- ggplot(tgc, aes(x = Condition, y = EE), fill = test) +
geom_errorbar(aes(ymin=EE-se, ymax=EE+se, group = test), position = position_dodge(0.5), width=.1) +
geom_bar(aes(fill = test), stat = "identity", width = 0.5, color = "black", position='dodge') + ylim(0,4) + theme_bw() + theme(
axis.text.x = element_text(size = 12,face="bold"),#, angle = 10, hjust = .5, vjust = .5),
axis.text.y = element_text(size = 12, face = "bold"),
axis.title.y = element_text(vjust= 1.8, size = 16),
axis.title.x = element_text(vjust= -0.5, size = 16),
axis.title = element_text(face = "bold")) + xlab("Workspace") + ylab("EE (cm)") + theme(legend.position="top") +
scale_fill_manual(values = c("grey80", "grey20")) + facet_nested(. ~ Session + Group, scales = "free_y") + theme(aspect.ratio = 6/4)
p + guides(fill=guide_legend(title="Test:")) + theme(legend.text=element_text(size=14),legend.title=element_text(size=14) ) +
theme(strip.text = element_text(face="bold", size=12))
The facet_nested() function is based on facet_grid(), in which you can have y scales that vary between rows in the grid, but not within a row.
Let's make the base of the plot.
library(ggplot2)
#> Warning: package 'ggplot2' was built under R version 4.1.1
library(ggh4x)
tgc <- structure(list(Group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("Visible", "Remembered"), class = "factor"),
Condition = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L
), .Label = c("CEN", "IPS", "CTL"), class = "factor"), test = structure(c(1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("Pre-test", "Post-test"
), class = "factor"), Session = structure(c(1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L), .Label = c("Adaptation", "Post-adaptation"
), class = "factor"), N = c(12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12), EE = c(2.134379625, 0.333942625, 1.742841125, 0.317361916666667,
2.84197270833333, 0.307057416666667, 2.403112375, 0.281202,
3.49590529166667, 0.305657666666667, 2.85211466666667, 0.3131155,
1.44857545833333, 0.269328166666667, 1.740270875, 0.243361833333333,
2.10702266666667, 0.286209125, 2.145855125, 0.305474083333333,
1.60016616666667, 0.281528625, 1.94182179166667, 0.294655916666667
), sd = c(0.727246182828044, 0.0816168443914292, 0.549168068103643,
0.0894916121701392, 1.14554677132408, 0.0958562360654162,
1.06827971273128, 0.0953131237162305, 1.18204258551111, 0.0896670491921828,
1.32864473484909, 0.109865886496798, 0.605344957514288, 0.0815454655757737,
0.833908172662699, 0.0798994165789182, 1.11582277105041,
0.0976064300150272, 0.667812406644538, 0.142929179817685,
0.686043669971901, 0.109794818975944, 1.39509308576833, 0.161854932615856
), se = c(0.209937889711449, 0.0235607535398997, 0.158531165974993,
0.0258340031883217, 0.330690868396632, 0.0276713118479362,
0.308385789857611, 0.0275145288174349, 0.341226302469221,
0.0258846474942731, 0.383546697661249, 0.0317155495718416,
0.174748037086728, 0.0235401482506832, 0.240728553983119,
0.0230649748349663, 0.322110288616933, 0.0281765493219072,
0.192780836372198, 0.0412601002213964, 0.198043748767058,
0.0316950341456936, 0.402728684306467, 0.0467234944577166
), ci = c(0.462070179795855, 0.0518568689018959, 0.348924743722983,
0.0568602576432562, 0.727845693918804, 0.0609041467375754,
0.678752547059741, 0.0605590696140879, 0.751034027967696,
0.0569717250090983, 0.844180589754564, 0.069805453951774,
0.384617836383033, 0.0518115169661108, 0.529839974927164,
0.0507656673296478, 0.708959965158704, 0.0620161669201078,
0.424307760005262, 0.0908128682911871, 0.435891352085212,
0.0697602998032695, 0.886399857701764, 0.102837717929058)), row.names = c(NA,
-24L), class = "data.frame")
p <- ggplot(tgc, aes(x = Condition, y = EE), fill = test) +
geom_errorbar(aes(ymin=EE-se, ymax=EE+se, group = test),
position = position_dodge(0.5), width=.1) +
geom_bar(aes(fill = test),
stat = "identity", width = 0.5, color = "black", position='dodge') +
theme_bw() + theme(
axis.text.x = element_text(size = 12,face="bold"),
axis.text.y = element_text(size = 12, face = "bold"),
axis.title.y = element_text(vjust= 1.8, size = 16),
axis.title.x = element_text(vjust= -0.5, size = 16),
axis.title = element_text(face = "bold")) +
xlab("Workspace") + ylab("EE (cm)") +
theme(legend.position="top") +
scale_fill_manual(values = c("grey80", "grey20")) +
theme(aspect.ratio = 6/4) +
guides(fill=guide_legend(title="Test:")) +
theme(legend.text=element_text(size=14),legend.title=element_text(size=14) ) +
theme(strip.text = element_text(face="bold", size=12))
If you wanted to have a non-grid layout, it is probably easier to use facet_nested_wrap() instead.
p + facet_nested_wrap(~Session + Group, scales = "free_y", nrow = 1)
Alternatively, if you must retain the grid layout and want the y-axes to be independent within a row, you can use independent = "y".
p + facet_nested(~Session + Group, scales = "free_y", independent = "y")
Created on 2021-11-09 by the reprex package (v2.0.1)
It is not directly an answer to that question, but an alternative would be to create the panels separately. You can do this semi-programmatically by creating a list first. The biggest disadvantage that I can see in this way is that patchwork still cannot merge / combine x-/y- axis titles like it does with legends, so you would need to create each plot separately which would in turn defeat the point of this approach...
library(tidyverse)
library(ggh4x)
library(patchwork)
ls_p <-
tgc %>%
split(., .$Session) %>%
map(function(x){
## I've tried to de-clutter your code.
ggplot(x, aes(x = Condition, y = EE, fill = test)) +
geom_errorbar(aes(ymin = EE - se, ymax = EE + se, group = test), position = position_dodge(0.5), width = .1) +
geom_col(width = 0.5, color = "black", position = "dodge") +
labs(x= "Workspace", y = "EE (cm)") +
scale_fill_manual("Test:", values = c("grey80", "grey20")) +
facet_nested(. ~ Session + Group, scales = "free_y") +
## I am using cowplot::theme_minimal because I think it's a well designed theme
cowplot::theme_minimal_hgrid()
})
wrap_plots(ls_p) + plot_layout(guides = "collect") &
theme(legend.position = "top")
In my study its important to show how each individual adapted to to the training, and not just the group mean and median change.
As a beginner in R, im happy that ive got as far as my current boxplot with 3 groups, where I have via geom_point added individual dots, but I cant seem to get geom_line to connect lines between dots within each group.
All help highly appreciated.
Ive tried to follow a similar posts advise but it did not respond to my data, Connect ggplot boxplots using lines and multiple factor
I dont know if i should be pasting my data.frame into here
Basically column 1 is which "Group" (Heavy, Optimal, Control), column 2 "Time_point" is whether its pre or post measurements (F0_pre, F0_post) and column 3 "F0" are the values
ggplot(Studydata, aes(Group,F0,fill = Time_point)) +
geom_boxplot() +
stat_summary(fun.y = mean, geom = "point", size=3, shape=23,
position = position_dodge(width = .75)) +
geom_point(position=position_dodge(width=0.75),aes(group=Time_point)) +
scale_y_continuous("F0 (N/kg)",limits=c(5,10),breaks=c(5,6,7,8,9,10),
expand = c(0,0)) +
theme(axis.line = element_line(color = "black",size = 1, linetype = "solid"))+
theme_classic() +
scale_fill_manual(values=c("#999999", "#FFFFFF"), name = "Time point", labels = c("Pre", "Post"))
structure(list(Group = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("Control", "Heavy", "Optimal"), class = "factor"),
Time_point = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("F0_pre", "F0_post"), class = "factor"),
F0 = c(7.30353192, 7.16108594, 7.662873671, 7.319494415,
7.690339929, 6.640005807, 6.848095385, 6.1605622, 8.300462597,
6.906034443, 7.644367174, 7.021959506, 7.042100127, 7.375865657,
8.506645287, 6.373721759, 7.507468154, 7.057438325, 7.147624225,
7.958957761, 7.439431197, 7.974165294, 8.125949745, 6.532471264,
7.481686188, 7.542614257, 7.247552687, 6.91, 7.609185039,
7.809989766, 8.151059576, 7.847938658, 7.999819081, 7.935556724,
7.679970645, 6.761378005, 8.157705923, 7.545437794, 9.395395275,
7.455579962, 7.917317173, 7.465252201, 8.567501942, 7.786701877,
7.4971379, 7.649121924, 6.942119866, 7.466501673, 7.653161086,
8.220328678, 8.173918564, 7.431310356, 7.98999627, 7.529664586,
7.518519833, 6.905140493)), row.names = c(NA, -56L), class = "data.frame")
You need a variable in your data frame indicating what observation represents each individual (so you can relate F0_pre and F0_post for each individual). I'm assuming they're in the same order in both time points so we add the column:
Studydata$id <- rep(1:28, 2)
Next: Since your x-axis is the group, each of the boxplots for each group is in the exact same place (you seem them side-by-side because it uses position("dodge") internally). Since we want to connect lines using this variable, let's use it as the x-axis, and also convert it to numerical, using geom_line() with factor variables is a pain:
Studydata$Time_point <- as.numeric(as.factor(Studydata$Time_point)) - 1
Now your column has 0 instead of "F0_pre" and 1 instead of "F0_pre". Construct the plot with:
ggplot(Studydata, aes(x = Time_point, y = F0)) +
geom_boxplot(aes(fill = factor(Time_point))) +
facet_grid(~Group) +
stat_summary(aes(group = 1), fun.y = mean, geom = "point", size=3, shape=23,
position = position_dodge(width = .75)) +
geom_point(alpha = 0.5) +
scale_y_continuous("F0 (N/kg)",limits=c(5,10),breaks=c(5,6,7,8,9,10),
expand = c(0,0)) +
scale_x_continuous("F0 (N/kg)",limits=c(-0.5,1.5),breaks=c(0,1)) +
theme(axis.line = element_line(color = "black",size = 1, linetype = "solid"))+
theme_classic() +
scale_fill_manual(values=c("#999999", "#FFFFFF"), name = "Time point", labels = c("Pre", "Post")) +
geom_line(aes(group = factor(id)), color = "green")
Result:
Some notes:
Do you really need to add the points if you have the lines? Points clutter the graphic and also make it hard to distinguish what were the points considered outliers in the boxplot (I tried to fix this by using small alpha = 0.5, which makes non-outlier points more transparent), while the lines can show the same information.
I used green lines, again, to distinguish between these lines and lines generated by boxplot. I highly recommend them to have different colors/types.
I have the following graph:
And would like to make what I thought would be a very simple change: I would like to remove the top, right and bottom sides of the left facet label border lines.
How do I do I remove those lines, or draw the equivalent of the right hand lines? I would rather not muck about with grobs, if possible, but won't say no to any solution that works.
Graph code:
library(ggplot2)
library(dplyr)
library(forcats)
posthoc1 %>%
mutate(ordering = -as.numeric(Dataset) + Test.stat,
Species2 = fct_reorder(Species2, ordering, .desc = F)) %>%
ggplot(aes(x=Coef, y=Species2, reorder(Coef, Taxa), group=Species2, colour=Taxa)) +
geom_point(size=posthoc1$Test.stat*.25, show.legend = FALSE) +
ylab("") +
theme_classic(base_size = 20) +
facet_grid(Taxa~Dataset, scales = "free_y", space = "free_y", switch = "y") +
geom_vline(xintercept = 0) +
theme(axis.text.x=element_text(colour = "black"),
strip.placement = "outside",
strip.background.x=element_rect(color = NA, fill=NA),
strip.background.y=element_rect(color = "black", fill=NA)) +
coord_cartesian(clip = "off") +
scale_x_continuous(limits=NULL)
Data:
structure(list(Dataset = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 5L, 5L, 5L, 5L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L), .Label = c("All.habitat", "Aut.habitat", "Habitat.season",
"Lit.season", "Spr.habitat"), class = "factor"), Species = structure(c(1L,
2L, 3L, 5L, 6L, 10L, 11L, 12L, 13L, 1L, 3L, 5L, 6L, 13L, 1L,
2L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 13L), .Label = c("Ar.sp1",
"Ar.sp2", "Arc.sp1", "B.pus", "Dal.sp1.bumps", "Dip.unID", "I.palladium",
"Pale", "Ph.sp3", "Port", "Somethus", "sty", "Sty.sp1"), class = "factor"),
Species2 = structure(c(2L, 9L, 1L, 4L, 5L, 7L, 11L, 12L,
13L, 2L, 1L, 4L, 5L, 13L, 2L, 9L, 4L, 5L, 6L, 10L, 8L, 7L,
11L, 13L), .Label = c("Arcitalitrus sp1", "Armadillidae sp1 ",
"Brachyiulus pusillus ", "Dalodesmidae sp1", "Diplopoda",
"Isocladosoma pallidulum ", "Ommatoiulus moreleti ", "Philosciidae sp2",
"Porcellionidae sp1", "Siphonotidae sp2", "Somethus sp1",
"Styloniscidae ", "Styloniscidae sp1"), class = "factor"),
Taxa = structure(c(3L, 3L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
1L, 2L, 2L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 3L), .Label = c("Amphipoda",
"Diplopoda", "Isopoda"), class = "factor"), Variable = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Autumn", "Litter",
"Spring", "Summer"), class = "factor"), Coef = c(1.911502938,
2.086917154, 1.571872993, 12.61184801, 15.6161116, -1.430032837,
-12.51944478, 12.33934516, -8.040249562, 8.08258816, 1.780142396,
12.88982576, 16.78107544, -13.22641153, 1.68810887, 2.093965381,
12.27209197, 15.08328526, -6.334640911, -11.29985948, -11.62658947,
-1.676293808, -6.246555908, -3.470297147), SE = c(0.403497472,
2.21607562, 0.348600794, 2.423896379, 0.509468128, 3.423013791,
2.382857733, 1.775086895, 2.087788334, 2.23631504, 0.33402261,
2.518562443, 0.459720131, 1.950974996, 0.2476205, 0.235648095,
1.815155489, 0.325804415, 2.564680067, 2.437104984, 2.212583358,
2.677618401, 2.324019051, 0.420436743), Test.stat = c(18.36532749,
13.27324683, 13.29039037, 20.50277493, 44.06097153, 10.55234932,
14.64951518, 13.22575401, 20.16415411, 16.55627107, 11.81407568,
15.15213717, 40.67205188, 12.62233207, 37.60085488, 16.90879258,
20.20215107, 80.30520371, 13.35250626, 13.01692428, 17.52987519,
20.03658771, 12.02467914, 53.5052683)), row.names = 10:33, class = "data.frame")
This solution is based on grobs: find positions of "strip-l" (left strips) and then substitute the rect grobs with line grobs.
p <- posthoc1 %>%
mutate(ordering = -as.numeric(Dataset) + Test.stat,
Species2 = fct_reorder(Species2, ordering, .desc = F)) %>%
ggplot(aes(x=Coef, y=Species2, reorder(Coef, Taxa), group=Species2, colour=Taxa)) +
geom_point(size=posthoc1$Test.stat*.25, show.legend = FALSE) +
ylab("") +
theme_classic(base_size = 20) +
facet_grid(Taxa~Dataset, scales = "free_y", space = "free_y", switch = "y") +
geom_vline(xintercept = 0) +
theme(axis.text.x=element_text(colour = "black"),
strip.placement = "outside",
#strip.background.x=element_rect(color = "white", fill=NULL),
strip.background.y=element_rect(color = NA)
) +
coord_cartesian(clip = "off") +
scale_x_continuous(limits=NULL)
library(grid)
q <- ggplotGrob(p)
lg <- linesGrob(x=unit(c(0,0),"npc"), y=unit(c(0,1),"npc"),
gp=gpar(col="red", lwd=4))
for (k in grep("strip-l",q$layout$name)) {
q$grobs[[k]]$grobs[[1]]$children[[1]] <- lg
}
grid.draw(q)
Data
> dput(my.precious)
structure(list(Vehicle.ID2 = c("2351.2360", "503.496", "2508.2498",
"2256.2243", "952.946", "2327.2315", "683.682", "880.866", "347.342",
"115.116", "2239.2229", "1680.1675", "1044.1029", "323.321",
"2354.2337", "1628.1621", "1603.1598", "417.404", "1291.1285",
"84.78", "2861.2855", "2804.2802", "1084.1080", "1885.1876",
"1778.1775", "1509.1505", "379.372", "2620.2616", "1146.1133",
"2476.2472", "750.737", "2119.2112", "411.397", "1515.1512",
"2204.2194", "879.872", "986.981", "1129.1124", "2954.2948",
"2928.2924", "462.438", "2629.2620", "2962.2950", "615.610",
"1405.1400", "806.800", "1767.1765", "199.192", "1888.1878",
"2525.2517", "142.141", "687.682", "1446.1445", "39.27", "2556.2550",
"292.281", "2034.2017", "2464.2447", "2046.2037", "2567.2552",
"705.697", "180.175", "1701.1699", "2086.2071", "2427.2402",
"965.961", "1561.1558", "2185.2180", "2148.2138", "2589.2582",
"1770.1761", "1027.1032", "2995.2982", "973.967", "405.399",
"2115.2106", "2754.2742", "2586.2576", "1733.1729", "943.928",
"1245.1239", "31.18", "146.141", "1865.1861", "588.579", "2216.2212",
"513.501", "1470.1467", "518.515", "2348.2339", "2212.2208",
"1504.1489", "2814.2812", "2618.2615", "2597.2593", "3018.3009",
"1641.1638", "929.917", "2052.2045", "1702.1694"), Vehicle.class = structure(c(1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("Car following", "Heavy-vehicle following"
), class = "factor"), PrecVehClass = structure(c(2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("Motorcycle", "Car", "Heavy-vehicle"), class = "factor"),
CC2 = c(32.5766501673563, 33.1462524122711, 114.985655309494,
0, 19.6198370044607, 6.33947396494466, 4.41629586850399,
45.7201738350116, 77.2852308366414, 23.4653247796564, 113.858471174095,
18.2949618097755, 15.1430447619764, 18.7949281381009, 56.150849563362,
0.871136231063019, 10.1789190682619, 21.8538402563161, 24.4424229038064,
21.8644774356173, 78.8898916107299, 59.0436899337149, 34.952193382661,
30.0676154315454, 12.1631954913147, 22.0999532188296, 34.4320551117948,
51.6072494224724, 49.8285734316947, 83.7391153614881, 68.7393621760813,
23.3109392847383, 0, 63.8918058981795, 0.117898698373665,
35.9301550863017, 41.408066837246, 67.9609018034737, 77.6228604725088,
50.3819848446467, 158.427611013205, 61.7191536455709, 63.4184192224484,
52.3067956266756, 56.239305476488, 23.4972280626377, 0, 5.44649970936757,
45.325372359443, 44.140432941474, 26.4621220704583, 21.9722600148252,
0, 47.5859211404629, 65.4619356384739, 50.3173084316458,
7.14323295461026, 49.9184456786638, 57.632603327405, 70.4138804098259,
27.3086664432516, 39.2627818278854, 13.8954239118315, 16.5224386897373,
0.336396348580877, 34.6684621497679, 0.80866365546683, 63.8680515267192,
14.7996906960015, 61.5616857306764, 65.3043233970858, 21.5517378489972,
26.6451085013455, 16.4717475328769, 34.5554653009784, 36.647363180998,
86.7844694571702, 157.154018248369, 47.5411300112071, 2.64972923204488,
15.45052725276, 10.0503437206614, 0, 7.95701592069599, 65.2275028899913,
16.6622992517697, 0.084677923994235, 23.5450734083073, 20.7709172539573,
29.1191855784058, 82.1117069705742, 53.0859602212412, 37.6419285717603,
82.0220785025156, 42.6655290135778, 68.302184817338, 62.2055693283554,
22.0752327366978, 16.2898985629383, 48.0306011348524)), .Names = c("Vehicle.ID2",
"Vehicle.class", "PrecVehClass", "CC2"), class = c("tbl_df",
"data.frame"), row.names = c(NA, -100L))
What I want to do and the relevant code
I want to plot the distribution of the variable 'CC2' in facet_wraps of 'Vehicle.class' and 'PrecVehClass'. Also, I want to display the mean value, standard deviation and number of pairs on the plots. I used following code:
my.theme<-function(base_size = 12, base_family = "Trebuchet MS")
{theme(plot.title = element_text(size = rel(1)), panel.grid.major=element_line(color='grey'), panel.grid.minor=element_line(color='grey', linetype='dashed'), legend.position='right', legend.title=element_blank(),legend.background = element_blank(), strip.text = element_text(size=13, face="bold",lineheight=4), strip.background = element_rect(colour="black", fill="white"),legend.title = element_text(colour="black", size=16, face="bold"), legend.text = element_text(colour="black", size = 16), axis.title.x = element_text(face="bold", size=14), axis.title.y = element_text(face="bold", size=14))
}
pairs.CC2 <- ddply(my.precious, .(Vehicle.class, PrecVehClass), function(x) length(unique(x$Vehicle.ID2)))
means.CC2 <- ddply(my.precious, .(Vehicle.class, PrecVehClass), function(x) mean(x$CC2, na.rm=T))
sd.CC2 <- ddply(my.precious, .(Vehicle.class, PrecVehClass), function(x) sd(x$CC2, na.rm=T))
ggplot() +
geom_histogram(data=my.precious, aes(x=CC2, y=..count../sum(..count..)*100),color="black", fill="grey", alpha=0.5) +
facet_wrap(Vehicle.class~PrecVehClass, scale="free_y") +
labs(x = "Distance in addition to safety distance (ft)", y="percentage") +
theme_bw() + my.theme() +
geom_text(data=pairs.CC2, aes(x=200, y=0.4, label=paste(V1, "pairs", sep=" ")), size=5, face="italic") +
geom_vline(data=means.CC2, aes(xintercept=V1), color="blue", linetype = "longdash", size=1) + geom_text(data=means.CC2, aes(x=mean(V1, na.rm=T),y=0.4, label=paste("Mean=", round(V1,1), "ft",sep=" ")), size=5) + geom_text(data=sd.CC2, aes(x=mean(V1, na.rm=T),y=0.35, label=paste("SD=", round(V1,1), sep=" ")), size=5)
This plots following:
Problem and question
You can see the 'mean', 'SD' and 'pairs' texts are not at visually pleasing locations. For this sample data I can relatively easily adjust the positions by controlling x and y arguments in geom_text but in the original data there are atleast 2 more facets for this data frame. And there are lots of other data frames having same kind of distributions which I want to plot. How can I ensure that these text annotations are placed on same locations e.g. top right or top left in every facet so that there is uniformity and plots look publication quality?
You can gain more control over label placement by creating a data frame with the summary information that includes y-position values. The summary data frame just has to include the facetting variables so that geom_text can automatically place labels at different y-positions for different facets. For example:
library(ggplot2)
library(dplyr)
# Pre-summarize the data into histogram bins. We need this to calculate appropriate
# values for the y-position of the labels
hist.bins = my.precious %>%
group_by(Vehicle.class, PrecVehClass,
breaks=cut(CC2, seq(0,max(CC2)+5,5),
seq(5,max(CC2)+5,5), include.lowest=TRUE)) %>%
summarise(count=n()) %>%
ungroup() %>%
mutate(percent=count/sum(count)*100)
# Data frame with y-position of labels. I've set the value to 90% of the maximum
# value of percent, but you can set it to whatever you like, or vary it by group.
pos = hist.bins %>% group_by(Vehicle.class, PrecVehClass) %>%
summarise(y.pos = 0.9 * max(percent))
# Data frame with summary stats
CC2stats = my.precious %>% group_by(Vehicle.class, PrecVehClass) %>%
summarise(mean=mean(CC2, na.rm=T),
sd = sd(CC2, na.rm=T),
pairs=length(unique(Vehicle.ID2)))
# Merge y-positions into CC2stats
CC2stats = merge(CC2stats, pos, by=c("Vehicle.class", "PrecVehClass"))
# Plot histogram
ggplot() +
geom_histogram(data=my.precious, aes(x=CC2, y=..count../sum(..count..)*100),
color="black", fill="grey", alpha=0.5,
breaks=seq(0,max(my.precious$CC2)+5,5)) +
facet_wrap(Vehicle.class~PrecVehClass, scale="free_y") +
labs(x = "Distance in addition to safety distance (ft)", y="percentage") +
theme_bw() + my.theme() +
# Add text labels using CC2stats data frame
geom_text(data=CC2stats, aes(x=140, y=y.pos,
label=paste(pairs, " pairs", sep=" ")),
size=5, face="italic") +
geom_vline(data=CC2stats, aes(xintercept=mean),
color="blue", linetype = "longdash", size=1) +
geom_text(data=CC2stats,
aes(x=140,y=0.95*y.pos, label=paste0("Mean = ", round(mean,1),
" ft",sep=" ")), size=5) +
geom_text(data=CC2stats,
aes(x=140,y=0.90*y.pos, label=paste0("SD = ", round(sd,1), sep=" ")),
size=5)
Note that I've included a breaks argument in geom_histogram. This is so that the breaks in the graph will correspond to the breaks in hist.bins, which ensures that the maximum value of hist.bins$percent will correspond to the y-range in the graph.
And here's the result:
It turns out that ggplot stores the axis limits in a "ggplot object" produced when the plot is rendered. You can create but not render with ggplot_build(...) and then access these (albeit in a roundabout way). Calling you original data, df, and using your pairs.CC2, mean.CC2, and sd.CC2,
# build the plot absent the mean, sd, and pairs annotations
ggp <-ggplot() +
geom_histogram(data=df, aes(x=CC2, y=..count../sum(..count..)*100),color="black", fill="grey", alpha=0.5) +
facet_wrap(Vehicle.class~PrecVehClass, scale="free_y") +
labs(x = "Distance in addition to safety distance (ft)", y="percentage") +
theme_bw() + my.theme() +
geom_vline(data=means.CC2, aes(xintercept=V1), color="blue", linetype = "longdash", size=1)
# extract x- and y-range information for each panel (facet)
panels <- ggplot_build(ggp)[["panel"]]
limits <- do.call(rbind,lapply(panels$ranges,
function(range)c(range$x.range,range$y.range)))
colnames(limits) <- c("x.lo","x.hi","y.lo","y.hi")
# combine this with your mean, sd, and pairs data
labs <- cbind(means.CC2,sd=sd.CC2$V1,pairs=pairs.CC2$V1,limits)
# use labs to drive the placement of the annotations
ggp +
geom_text(data=labs, aes(x=x.hi,y=y.hi-0.0*(y.hi-y.lo),label=paste(pairs,"pairs",sep=" ")), size=5,hjust=1)+
geom_text(data=labs, aes(x=x.hi,y=y.hi-0.1*(y.hi-y.lo),label=paste("Mean=", round(V1,1), "ft",sep=" ")), size=5,hjust=1) +
geom_text(data=labs, aes(x=x.hi,y=y.hi-0.2*(y.hi-y.lo),label=paste("SD=", round(sd,1),sep=" ")), size=5,hjust=1)
Produces this:
I have this data:
datat <- structure(list(Carga = structure(c(2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L), .Label = c("Outra", "88"), class = "factor"),
Categoria = structure(c(1L, 1L, 3L, 3L, 2L, 2L, 1L, 1L, 3L,
3L, 2L, 2L), .Label = c("A", "G", "B"), class = "factor"),
Vagas = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L), .Label = c("Ocupadas", "Autorizadas"), class = "factor"),
Cat.A.88 = c(26, 1, 30, 1, 18, 0, 57, 0, 39, 0, 0, 0)), .Names = c("Carga",
"Categoria", "Vagas", "Cat.A.88"), class = "data.frame", row.names = c(NA,
-12L))
and this plot:
ggplot(datat, aes(x=Carga, y=Cat.A.88, fill=Vagas)) + geom_bar(stat='identity', position='dodge') + ylab('Vagas') + xlab('Carga horĂ¡ria') + facet_grid(. ~ Categoria) + coord_flip()
The legend colours are in inverse order if compared with plot colours (plot have green before red, and legend have red before green). I want they appers in the same order. I tried add the parameter order=-as.numeric(Vagas) in aes(), but didn't changed anything.
This should help:
ggplot(datat, aes(x=Carga, y=Cat.A.88, fill=Vagas)) +
geom_bar(stat='identity', position='dodge') + ylab('Vagas') +
xlab('Carga horĂ¡ria') + facet_grid(. ~ Categoria) + coord_flip() +
guides(fill = guide_legend(reverse=T))