Two legends for polar ggplot (with one customized) - r

Here is my data:
data <- structure(list(Indicator = structure(c(1L, 1L, 1L, 2L, 2L, 2L,
3L, 3L, 3L, 6L, 6L, 6L, 6L, 4L, 4L, 5L, 5L, 1L, 1L, 1L, 2L, 2L,
2L, 3L, 3L, 3L, 6L, 6L, 6L, 6L, 4L, 4L, 5L, 5L, 1L, 1L, 1L, 2L,
2L, 2L, 3L, 3L, 3L, 6L, 6L, 6L, 6L, 4L, 4L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 6L, 6L, 6L, 6L, 4L, 4L, 5L, 5L, 1L, 1L,
1L, 2L, 2L, 2L, 3L, 3L, 3L, 6L, 6L, 6L, 6L, 4L, 4L, 5L, 5L), .Label = c("Causality",
"Climatechangeriskperceptions", "Currentadaptationoptions", "Fishingasalivelihoodactivity",
"Governance", "Roleofshadowstateactors"), class = "factor"),
Village = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L), .Label = c("Andra", "lahapau", "Pelipowai", "Ponam",
"Tulu"), class = "factor"), Variables = structure(c(13L,
3L, 10L, 11L, 12L, 16L, 5L, 8L, 1L, 2L, 15L, 17L, 6L, 14L,
9L, 4L, 7L, 13L, 3L, 10L, 11L, 12L, 16L, 5L, 8L, 1L, 2L,
15L, 17L, 6L, 14L, 9L, 4L, 7L, 13L, 3L, 10L, 11L, 12L, 16L,
5L, 8L, 1L, 2L, 15L, 17L, 6L, 14L, 9L, 4L, 7L, 13L, 3L, 10L,
11L, 12L, 16L, 5L, 8L, 1L, 2L, 15L, 17L, 6L, 14L, 9L, 4L,
7L, 13L, 3L, 10L, 11L, 12L, 16L, 5L, 8L, 1L, 2L, 15L, 17L,
6L, 14L, 9L, 4L, 7L), .Label = c("alternativelivelihood",
"anyactorsinvolvedinsustainability", "Attributionfactors",
"discusswithelectedleaders", "Effortsdirectedtoreducerisks",
"fishercommunityinfluence", "Infrastructureeffectiveness",
"multiplicityofactors", "Occupationforchildren", "Reversibility",
"Riskasamajorconsideration", "Riskbeingaddressed", "Statusoffisheries",
"Timefishing", "Whatwasdone", "Whoisatrisk", "whowasinvolved?"
), class = "factor"), legend.var = structure(c(1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L), .Label = c("a", "b", "c", "d", "e", "f", "g",
"h", "i", "j", "k", "l", "m", "n", "o", "p", "q"), class = "factor"),
score = c(1, 0.97, 1, 0.76, 0.794, 1, 0.71, 0.9, 0.5, 1,
1, 1, 1, 0.49, 0.72, 1, 0.7, 1, 1, 0, 0.67, 0.5, 1, 0.2,
1, 1, 0.7, 0.4, 0.5, 0.3, 0.67, 0.5, 0.7, 0.8, 1, 0, 0.46,
0.56, 0.375, 1, 0.13, 0.3, 0.5, 0.3, 0.3, 0.4, 0.6, 1, 1,
0.7, 0.8, 1, 0.86, 0.69, 0.51, 0.429, 1, 0.44, 0.3, 0.5,
0.6, 0.6, 0.7, 0.8, 0.4, 0.79, 0.8, 1, 1, 0.82, 0.85, 0.25,
0.226, 1, 0.18, 0.1, 1, 0.7, 0.3, 0.6, 0.3, 0.48, 0.16, 0.4,
0.8)), .Names = c("Indicator", "Village", "Variables", "legend.var",
"score"), class = "data.frame", row.names = c(NA, -85L))
I have made polar plots as follows:
library(ggplot2)
ggplot(data, aes(x = legend.var, y = score, fill = Indicator), color='black') +
geom_bar(width = 1, alpha=0.5, stat="identity") +
scale_y_continuous() +
coord_polar() +
theme( axis.ticks = element_blank()) +
facet_wrap(~Village, nrow=2, ncol=3) +
guides(colour = guide_legend(title.hjust = 0.5))
As you can see in the data, column legend.var is grouped by the column indicator. I would like to insert a table or a second legend which associates each of the indicator with the legend.var and variables column. Ideally if this is inserted as a second legend, the unique items comprised in legend.var column would have the same fill color as the corresponding indicator. The fill legend is based on the column indicator. The inserted table/extra legend would comprise columns legend.var with a unique alphabet and column variables with the meaning of the respective alphabet. These can then have the same fill color as the corresponding indicator. I hope this is clear.

Here is a solution using a gtable:
library(ggplot2)
p <- ggplot(data, aes(x = legend.var, y = score, fill = Indicator), color='black') +
geom_bar(width = 1, alpha=0.5, stat="identity") +
scale_y_continuous() +
coord_polar() +
theme( axis.ticks = element_blank()) +
facet_wrap(~Village, nrow=2, ncol=3) +
guides(colour = guide_legend(title.hjust = 0.5)) +
theme(legend.position=c(0.85,0.25))
#create table
library(gridExtra)
tab <- tableGrob(unique(data[, c("legend.var", "Variables")]),
show.rownames=FALSE, gpar.coretext=gpar(fontsize=10),
gpar.coltext=gpar(fontsize=10, fontface='bold'),
gpar.corefill = gpar(fill = "grey90", col = "white"),
gpar.colfill = gpar(fill = "grey80", col = "white"))
#arrange grobs
library(gtable)
a <- gtable(unit(c(0.7, 0.3) ,c("npc")), unit(1, "npc"))
a <- gtable_add_grob(a, ggplotGrob(p),1,1)
a <- gtable_add_grob(a, tab,1,2)
#plot
grid.draw(a)

For a start, you may try something like this. You need to adjust arrangement and layout according to your own preferences.
library(ggplot2)
library(gridExtra)
gg <- ggplot(data, aes(x = legend.var, y = score, fill = Indicator), color='black') +
geom_bar(width = 1, alpha=0.5, stat="identity") +
coord_polar() +
theme(axis.ticks = element_blank()) +
facet_wrap(~Village, nrow=2, ncol = 3)
# create a table that translates legend.var to Variables
tbl <- unique(data[ , c("legend.var", "Variables")])
# create a table grob
tt <- tableGrob(d = tbl,
col.just = "left",
gpar.coretext = gpar(col = "black", cex = 0.5),
gpar.coltext = gpar(col = "black", cex = 0.5, fontface = "bold"),
gpar.rowtext = gpar(col = "black", cex = 0.5, fontface = "italic"))
# arrange plot and table grob
grid.arrange(gg, tt, ncol = 2)
update with a quick and dirty ggplot-only alternative
# create labels
labs <- with(tbl, paste(legend.var, Variables))
gg <- ggplot(data, aes(x = legend.var, y = score, fill = Indicator, col = Variables)) +
geom_bar(width = 1, alpha = 0.5, stat = "identity") +
coord_polar() +
theme(axis.ticks = element_blank()) +
facet_wrap(~ Village, nrow = 2, ncol = 3) +
scale_color_grey(labels = labs, start = 0.8, end = 0.9)
gg

Related

Plot Y values against the time grouped by an ID

I want make a time series plot grouped by ID. My dataset has 42 different IDs with 7 different timeframes. The timeframe varies per ID and ranges from 9/2016 to 8/2018. I.e., ID1 can start 10/2016 and end 7/2017 (with 7 rows containing a different date) and ID40 can start 11/2016 and ends 6/2018 (also with 7 rows containing a different date). I try to plot this with the following code
p <- ggplot(data = df6, aes(x = START, y = AI, col = ID, group = ID))
p + geom_point(size = 1.2,
alpha = .8) + stat_smooth(aes(group = 1)) + stat_summary(aes(group = 1), geom =
"point", fun.y = mean,
shape = 17, size = 3) + theme_minimal() + theme(axis.text.x = element_text(angle =
90, vjust = 0.5, hjust=1))
This gives me the following graph:
As one can see the X-axis is not chronological. I should start at 09/2016 and end at 08/2018 and then correspond with the Y value based on the ID. I got the following dataset:
structure(list(ID = c("ID1", "ID1", "ID1", "ID1", "ID1", "ID1",
"ID1", "ID10", "ID10", "ID10", "ID10", "ID10", "ID10", "ID10",
"ID11", "ID11", "ID11", "ID11", "ID11", "ID12"), Time = c("1",
"2", "3", "4", "5", "6", "7", "1", "2", "3", "4", "5", "6", "7",
"1", "2", "3", "4", "5", "1"), AI = c(0.393672183448241, 0.4876954603533,
0.411717908455957, 0.309769862660288, 0.149826889496538, 0.2448558592586,
0.123606753324621, 0.296109333767922, 0.309960002123076, 0.445886231347992,
0.370013553008003, 0.393414429902431, 0.318940511323733, 0.131112361225666,
0.31961673567578, 0.227268892979164, 0.433471105477564, 0.207184572401005,
0.144257239122978, 0.520204263001733), AI_VAR = c(0.154977788020905,
0.237846862049217, 0.169511636143347, 0.0959573678125739, 0.0224480968162077,
0.0599543918132674, 0.0152786294674538, 0.0876807375444826, 0.0960752029161373,
0.198814531305715, 0.136910029409606, 0.154774913655455, 0.101723049763444,
0.0171904512661696, 0.102154857724042, 0.0516511497159746, 0.187897199283942,
0.0429254470409874, 0.020810151039384, 0.270612475245176), activity = c(0,
0.303472222222222, 0.232638888888889, 0.228472222222222, 0.348611111111111,
0.215972222222222, 0.123611111111111, 0.357638888888889, 0.235416666666667,
0.233333333333333, 0.2875, 0.353472222222222, 0.356944444444444,
0.149305555555556, 0.448611111111111, 0.213888888888889, 0.248611111111111,
0.288888888888889, 0.25625, 0.238888888888889), ZIM_SD = c(0,
0.148002025121106, 0.095781596758851, 0.0707738088994687, 0.0522313184217097,
0.0528820640482116, 0.0152791681192935, 0.105900213118389, 0.0729697504998075,
0.104040120647865, 0.106378896489801, 0.139061072791901, 0.113844043625277,
0.0195758039329988, 0.143383618921218, 0.0486102909983211, 0.107765733167339,
0.059853320915846, 0.036965917525263, 0.124271018383747), ZIM_VAR = c(0,
0.0721799157746582, 0.039434998686126, 0.0219235930627339, 0.00782565597342798,
0.0129484832318932, 0.00188860836472692, 0.0313580415523671,
0.0226177040198407, 0.0463900573046668, 0.0393616334552618, 0.0547086326740462,
0.0363094774850072, 0.00256662987654616, 0.0458278042289798,
0.0110476070225835, 0.0467133314886466, 0.0124006847007297, 0.00533260120384214,
0.0646463135307921), CHECK = c(10L, 13L, 11L, 7L, 7L, 5L, 4L,
36L, 36L, 34L, 34L, 32L, 29L, 21L, 28L, 27L, 26L, 25L, 21L, 36L
), BULBAR = c(2L, 4L, 4L, 4L, 4L, 2L, 2L, 9L, 9L, 9L, 9L, 9L,
7L, 6L, 12L, 12L, 11L, 11L, 11L, 11L), FINE = c(0L, 0L, 0L, 0L,
0L, 0L, 0L, 9L, 9L, 8L, 8L, 7L, 6L, 4L, 2L, 1L, 1L, 1L, 0L, 7L
), GROSS = c(2L, 2L, 2L, 2L, 2L, 2L, 1L, 9L, 9L, 9L, 9L, 8L,
8L, 6L, 3L, 3L, 3L, 3L, 2L, 6L), RESPI = c(6L, 7L, 5L, 1L, 1L,
1L, 1L, 9L, 9L, 8L, 8L, 8L, 8L, 5L, 11L, 11L, 11L, 10L, 8L, 12L
), GROSS_RENEWD = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 6L, 6L, 6L, 6L,
5L, 5L, 4L, 3L, 3L, 3L, 3L, 2L, 3L), ACTIVE = c(2L, 2L, 2L, 2L,
2L, 2L, 1L, 18L, 18L, 17L, 17L, 15L, 14L, 10L, 5L, 4L, 4L, 4L,
2L, 13L), NON.ACTIVE = c(8L, 11L, 9L, 5L, 5L, 3L, 3L, 18L, 18L,
17L, 17L, 17L, 15L, 11L, 23L, 23L, 22L, 21L, 19L, 23L), START = c("09/2016",
"11/2016", "01/2017", "04/2017", "06/2017", "10/2017", "02/2018",
"10/2016", "12/2016", "02/2017", "04/2017", "07/2017", "11/2017",
"04/2018", "10/2016", "12/2016", "02/2017", "04/2017", "07/2017",
"10/2016"), STOP = c("10/2016", "11/2016", "01/2017", "04/2017",
"06/2017", "10/2017", "03/2018", "10/2016", "12/2016", "02/2017",
"04/2017", "07/2017", "11/2017", "04/2018", "10/2016", "12/2016",
"02/2017", "04/2017", "07/2017", "10/2016")), row.names = c(NA,
20L), class = "data.frame")
In general I want the column START to start with the begin date and end with the last date when it is plotted
You should convert your "START" column to a date format. You could use the package zoo with the function as.yearmon for that. To start the axis with your start date and end it with the end date, you could create a vector of date breaks using the min (start) date and max (end) date. Here is a reproducible example:
library(ggplot2)
library(zoo)
library(dplyr)
df6 <- df6 %>%
mutate(START = as.Date(as.yearmon(START, format = '%m/%Y')))
breaks.vec <- c(min(df6$START),
seq(from=min(df6$START), to=max(df6$START), by = 'month'))
ggplot(data = df6, aes(x = START, y = AI, col = ID, group = ID)) +
geom_point(size = 1.2, alpha = .8) +
stat_smooth(aes(group = 1)) +
stat_summary(aes(group = 1), geom = "point", fun.y = mean, shape = 17, size = 3) +
scale_x_date(breaks = breaks.vec, date_labels = "%m/%Y") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
#> Warning: `fun.y` is deprecated. Use `fun` instead.
#> `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Created on 2022-10-17 with reprex v2.0.2

How to change range of heatmap using gheatmap in R

Im trying to add a heatmap to my phylogenetic tree. The range of the heatmap should be from 0 to 100 instead it only covers the the min and max of the values. Can I reset the range of the heatmap?
thanks.
library(ggtree)
library(ggplot2)
library(ggstance)
df1 <- structure(
list(id = structure(
c(5L, 15L, 29L, 18L, 24L, 21L,
13L, 11L, 8L, 25L, 23L, 9L, 16L, 3L, 6L, 2L, 20L, 27L, 30L, 17L,
14L, 4L, 1L, 7L, 22L, 28L, 10L, 12L, 26L, 19L),
.Label = c("t1",
"t10", "t11", "t12", "t13", "t14", "t15", "t16", "t17", "t18",
"t19", "t2", "t20", "t21", "t22", "t23", "t24", "t25", "t26",
"t27", "t28", "t29", "t3", "t30", "t4", "t5", "t6", "t7", "t8",
"t9"), class = "factor"),
location = structure(c(1L, 3L, 2L,
1L, 2L, 3L, 3L, 2L, 3L, 2L, 3L, 3L, 2L, 2L, 1L, 1L, 3L, 2L, 1L,
1L, 3L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 1L, 1L),
.Label = c("CZ", "GZ", "HK"), class = "factor"),
Value = c(22L, 10L, 33L, 12L, NA,
NA, NA, NA, NA, NA, NA, NA, 45L, 89L, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 80L, NA, NA, NA, NA)),
class = "data.frame", row.names = c(NA,
-30L))
tr <- rtree(30)
p <- ggtree(tr)
#df1<- your_example_data
p1 <- p %<+% df1 + geom_tippoint(aes(color=location))+ guides(color = "none")
d2 <- data.frame( val=rnorm(30, mean= 50, sd=20))
rownames(d2)<- tr$tip.label
library(ggnewscale)
p1 <- p1 + new_scale_fill()
p2<- gheatmap(p1, d2 ,offset=0.015, width=0.05,
colnames_angle=45, colnames_offset_y = 0.25,colnames_offset_x =0.001, colnames=TRUE,
colnames_position='top',font.size = 3)+
scale_fill_viridis_c(option="A", name="query\ncoverage\npercentage")
p2
Try using scale_fill_gradientn. I don't have ggtree in my library collection, but it should work with it too. data$Z are the values used in the legend.
min(data$Z)
[1] 10.43507
# using geom_tile instead
ggplot(data, aes(X, Y, fill= Z)) +
geom_tile() +
scale_fill_gradientn(limits = c(0,max(data$Z)),
colours=viridis(10,o="A"),
breaks=c(0,max(data$Z)),
labels=c(0,max(data$Z)))
Data
data <- structure(list(X = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L), .Label = c("A", "B", "C", "D", "E", "F", "G", "H",
"I", "J"), class = "factor"), Y = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L), .Label = c("var1", "var2", "var3", "var4", "var5",
"var6", "var7", "var8", "var9", "var10"), class = "factor"),
Z = c(33.991562910378, 35.5263787321746, 24.5632742531598,
18.0254957079887, 45.778294513002, 38.0070783570409, 38.8778781332076,
13.9182714093477, 13.2864724285901, 12.3245238792151, 45.4634746629745,
43.5207717958838, 14.6174691990018, 14.6395265311003, 16.3748204801232,
37.5898649636656, 46.154183940962, 21.7670671269298, 45.9928634669632,
15.2300526481122, 42.3459290526807, 36.1509132292122, 13.004608694464,
17.2632187511772, 24.1008642502129, 21.0504860430956, 47.8412099648267,
22.8905160259455, 26.2689692527056, 42.2642367053777, 49.7228981740773,
18.5286565497518, 19.9640860501677, 19.8192273359746, 46.2587429210544,
45.3112288471311, 14.0251182205975, 46.5721819829196, 19.2603973485529,
11.8241156637669, 43.5814412590116, 12.3338401783258, 34.6708638872951,
16.535308547318, 12.5870429351926, 17.7716215513647, 38.3571200724691,
40.5572446156293, 38.3018106594682, 36.1261784471571, 23.6329158209264,
38.2715854980052, 31.8956978339702, 19.8036628682166, 41.236245688051,
42.5284101255238, 47.3572976142168, 10.9305525757372, 41.5727174282074,
39.237065333873, 41.6476187948138, 43.6902561411262, 39.2061061505228,
18.3187866955996, 42.8791201952845, 33.8544269837439, 17.3525733780116,
14.5423825085163, 46.209614733234, 24.5643785689026, 35.3784507885575,
44.3101883865893, 45.7905176281929, 36.0531417001039, 44.190902383998,
32.4274326208979, 33.8546730671078, 43.7150628026575, 44.4308217708021,
27.6862936094403, 39.8551124054939, 10.4350713547319, 35.6894047465175,
28.6168400477618, 18.5768875014037, 17.1367645263672, 30.369380293414,
17.7864238992333, 36.1986118741333, 43.2466325163841, 49.581032032147,
49.736803509295, 40.3205085452646, 27.0655540842563, 42.9749015253037,
30.9310132544488, 23.7332978192717, 35.1737863756716, 40.4224442131817,
15.6103290617466)), out.attrs = list(dim = c(X = 10L, Y = 10L
), dimnames = list(X = c("X=A", "X=B", "X=C", "X=D", "X=E", "X=F",
"X=G", "X=H", "X=I", "X=J"), Y = c("Y=var1", "Y=var2", "Y=var3",
"Y=var4", "Y=var5", "Y=var6", "Y=var7", "Y=var8", "Y=var9", "Y=var10"
))), row.names = c(NA, -100L), class = "data.frame")

Cannot plot the correct x-axis in ggplot2

I am plotting the following data using ggplot2 in R.
dat<-structure(list(Month = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L,
8L, 8L, 8L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L,
12L, 12L, 12L, 12L), grp1 = structure(c(1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 1L, 2L, 3L,
1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L), .Label = c("(-Inf,2]", "(2,7]", "(7,14]",
"(14, Inf]"), class = "factor"), n = c(71L, 59L, 36L, 10L, 55L,
73L, 18L, 10L, 97L, 82L, 22L, 5L, 120L, 79L, 15L, 2L, 140L, 62L,
15L, 174L, 60L, 11L, 188L, 71L, 2L, 183L, 53L, 2L, 211L, 50L,
2L, 171L, 69L, 7L, 1L, 98L, 85L, 13L, 6L, 72L, 62L, 24L, 9L)), class
= "data.frame", row.names = c(NA,-43L))
Here's my script:
library(ggplot2)
p<-ggplot(data=test,aes(Month, n, fill = grp1))
p<- p + geom_col()
p <- p + theme(panel.background=element_rect(fill="white"),
plot.margin = margin(0.5,0.5,0.5,0.5, "cm"),
panel.border=element_rect(colour="black",fill=NA,size=1),
axis.line.x=element_line(colour="black"),
axis.line.y=element_line(colour="black"),
axis.text=element_text(size=20,colour="black",family="sans"),
axis.title=element_text(size=20,colour="black",family="sans"),
legend.position = "right", legend.key = element_rect(fill = 'white'))
p <- p + scale_y_continuous(limits = c(0,300),breaks=c(seq(0,300,50)), expand=c(0,0))
p <- p + scale_x_discrete(breaks=c(seq(1,12,1)),labels=c("JAN","FEB","MAR","APR","MAY","JUN","JUL","AUG","SEP","OCT","NOV","DEC"),expand=c(0,0))
p <- p + labs(x = "Month", y = "Number of Days")
Here's the output:
Why is it that I cannot plot the x-axis values?
If I don't set the scale_x_discrete, the plot will look like this:
Any ideas on how to solve this?
I'll appreciate any help.
If you want the Month name along the xaxis, then you can add in as.factor(Month) to your ggplot script. Heres an example:-
p<-ggplot(data=dat,aes(as.factor(Month), n, fill = grp1))
p<- p + geom_col()
p <- p + theme(panel.background=element_rect(fill="white"),
plot.margin = margin(0.5,0.5,0.5,0.5, "cm"),
panel.border=element_rect(colour="black",fill=NA,size=1),
axis.line.x=element_line(colour="black"),
axis.line.y=element_line(colour="black"),
axis.text=element_text(size=20,colour="black",family="sans"),
axis.title=element_text(size=20,colour="black",family="sans"),
legend.position = "right", legend.key = element_rect(fill = 'white'))
p <- p + scale_y_continuous(limits = c(0,300),breaks=c(seq(0,300,50)), expand=c(0,0))
p <- p + scale_x_discrete(breaks=c(seq(1,12,1)),labels=c("JAN","FEB","MAR","APR","MAY","JUN","JUL","AUG","SEP","OCT","NOV","DEC"),expand=c(0,0))
p <- p + labs(x = "Month", y = "Number of Days")
p
Which gives you this:-

How to apply p-value for each group of dataframe in R using facet_wrap in ggpubr

I have a data that looks like this:
melted.df <- structure(list(Time = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L), .Label = c("24",
"36", "48", "72"), class = "factor"), id = c(1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 21L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L,
18L, 19L, 20L, 21L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L), Samples = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L), .Label = c("WT_Ago2_800", "WT_Ago2_400", "WT_Ago2_200",
"WT_Ago4_800"), class = "factor"), Size = c(0, 0, 0, 0, 0, 0,
0.3, 0, 0, 0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.8, 0.5, 0, 0,
0, 0, 0, 0, 0.1, 0.65, 0.2, 0.85, 0.725, 0.575, 0.1, 1.1, 0.9,
1.325, 1, 0.8, 0.5, 2.2, 1.65, 0, 0, 0, 0, 0, 0, 0.825, 1.175,
0.1, 0.55, 0.85, 0.85, 1.1, 1.4, 0.6, 0.95, 1.15, 0.975, 2.35,
1.15, 2.1, 0, 0, 0, 0, 0, 0, 0.65, 1.4, 0.55, 0.1, 0.7, 1.1,
0.95, 1.85, 0.85, 0.1, 1.5, 1.25, 1.8, 1.75, 2.15)), row.names = c(NA,
-84L), class = "data.frame")
This data consists of 4 time frames (24, 36, 48 and 72 hours). I want to use the code below to paste the p values calculated as stat.test for each time.levels and apply that to each facet_wrap. If you check for i=1, there is no p-value so it's nothing you would want to apply to the figure, and if you do i=2, you would get p-values applied to the figure. The problem is that I couldn't get the p-value applied to its respective facets. It just applies same p-value in all facets. How can I get this resolved?
code:
library(devtools)
# install_github("https://github.com/kassambara/rstatix")
library(rstatix) # https://github.com/kassambara/rstatix
library(stringi)
library(ggpubr)
time.levels <- levels(melted.df$Time)
stat.test <- NULL
for (i in 1:length(time.levels)){
stat.test <- aov(Size ~ Samples, data = melted.df[melted.df$Time == time.levels[i],]) %>%
tukey_hsd()
# stat.test <- rbind(stat.test, tmp.stat)
bp <- ggboxplot(melted.df, x = "Samples", y = "Size") +
facet_wrap(vars(Time))+
stat_pvalue_manual(
stat.test, label = "p.adj",
y.position = c(2, 2.5, 3, 3.5, 3.8, 4)
)
bp
}
Note. All your values in Size for Time == 24L are zero:
> filter(melted.df, Time == 24L) %>% select(Size) %>% summary
Size
Min. :0
1st Qu.:0
Median :0
Mean :0
3rd Qu.:0
Max. :0
If you wish to proceed anyway, you should make the plots individually and then use gridExtra::grid.arrange:
library(gridExtra)
bp <- vector("list", length = length(time.levels))
for (i in seq_along(time.levels)) {
sdf <- melted.df[melted.df$Time == time.levels[i],]
stat.test <- aov(Size ~ Samples, data = sdf) %>%
tukey_hsd()
bp[[i]] <- ggboxplot(sdf, x = "Samples", y = "Size") +
facet_wrap(vars(Time))+
stat_pvalue_manual(
stat.test, label = "p.adj",
y.position = c(2, 2.5, 3, 3.5, 3.8, 4)
)
}
do.call(grid.arrange, bp)
Note that you have to use the subset data.frame sdf as the input for ggboxplot.
You don't need to use gridExtra::grid.arrange.
Here is a clean solution.
library(rstatix) # latest version
library(ggpubr) # latest version
stat.test <- melted.df %>%
group_by(Time) %>%
tukey_hsd(Size ~ Samples)
ggboxplot(melted.df, x = "Samples", y = "Size", facet.by = "Time") +
stat_pvalue_manual(
stat.test, label = "p.adj",
y.position = c(2, 2.5, 3, 3.5, 3.8, 4)
)

How do I melt/gather multiple variables (error bars) into one for mapping to geom_bar?

I'll start with my goal which is to generate graphs for each of my variables (magnitude [mag], duration [dura] and distance [dist] but with distinct error bars for train and test. :
Almost finished graph
I have a data-frame that looks like this: (screenshot + dput below). It shows the responses (magnitude, distance, duration) during train and test of various biological strains along with their standard error (SEM). For example, the duration response at train is in column "train_avg_dura" and at test is "test_avg_dura". The standard error for each of these is in the columns train_duraSEM and test_duraSEM
df_group_sum.wide (data-frame)
dput data:
df_group_sum.wide <-
structure(list(strain = structure(1:8, .Label = c("N2", "acy-1(LOF)",
"acy-1(GOF)", "pde-4", "unc-43", "crh-1", "glr-1", "avr-14"), class = "factor"),
test_avg_dist = c(0.23102447163515, 0.198503787878788, 0.23892936802974,
0.247270588235294, 0.148316666666667, 0.195762711864407,
0.204740740740741, 0.238755154639175), test_avg_dura = c(1.04759733036707,
1.15537878787879, 0.914684014869888, 1.12286274509804, 0.828916666666667,
0.785491525423729, 0.788407407407407, 1.02309278350515),
test_avg_mag = c(0.112163461525871, 0.113447031611172, 0.15930172539742,
0.105397926645665, 0.0370000063024116, 0.0823626968797451,
0.0441620688813484, 0.135786546158742), test_distSEM = c(0.00460504533342531,
0.0050568065734325, 0.00945562739572128, 0.00524044558789062,
0.00882224860763199, 0.00983820301449839, 0.0162322856355826,
0.00738407922404085), test_duraSEM = c(0.0187491841242793,
0.0287113186085301, 0.0283764910080623, 0.0215386973519077,
0.0471018319675206, 0.0341593217329755, 0.0564553992545153,
0.0271939362203803), test_magSEM = c(0.00335619679815181,
0.00443251320170775, 0.00919066553588191, 0.00432150262248429,
0.00400887448034098, 0.00664866437888279, 0.00575860867691942,
0.00524462205156711), train_avg_dist = c(0.337652222222222,
0.294218518518519, 0.338651851851852, 0.311313725490196,
0.254675, 0.2737, 0.390688888888889, 0.314817948717949),
train_avg_dura = c(1.3543, 1.429, 1.19151851851852, 1.37256862745098,
1.236, 1.06376666666667, 1.41396296296296, 1.31512820512821
), train_avg_mag = c(0.1930557426236, 0.19297076970836, 0.212916856705011,
0.127417008935649, 0.0841239843171108, 0.117210954090848,
0.115413610503398, 0.179227387006556)), class = "data.frame", .Names = c("strain",
"test_avg_dist", "test_avg_dura", "test_avg_mag", "test_distSEM",
"test_duraSEM", "test_magSEM", "train_avg_dist", "train_avg_dura",
"train_avg_mag"), row.names = c(NA, -8L))
The problem I am having is how to add error bars using SEM since I need them to be merged into one variable as opposed to two when I map the variable to geom_bar. I think this is a melt issue but I can't figure it out.
Update:
The melted data-frame I used to plot the graph is as follows:
structure(list(strain = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L), .Label = c("N2", "acy-1(LOF)",
"acy-1(GOF)", "pde-4", "unc-43", "crh-1", "glr-1", "avr-14"), class = "factor"),
variable = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 14L,
14L, 14L, 14L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 15L,
15L, 15L, 15L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 17L,
17L, 17L, 17L, 17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L, 18L,
18L, 18L, 18L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 20L,
20L, 20L, 20L, 20L, 20L, 20L, 20L), .Label = c("test_avg_dist",
"test_avg_dura", "test_avg_mag", "test_avg_prob", "test_avg_spd",
"test_distSEM", "test_duraSEM", "test_magSEM", "test_probSEM",
"test_spdSEM", "train_avg_dist", "train_avg_dura", "train_avg_mag",
"train_avg_prob", "train_avg_spd", "train_distSEM", "train_duraSEM",
"train_magSEM", "train_probSEM", "train_spdSEM"), class = "factor"),
value = c(0.23102447163515, 0.198503787878788, 0.23892936802974,
0.247270588235294, 0.148316666666667, 0.195762711864407,
0.204740740740741, 0.238755154639175, 1.04759733036707, 1.15537878787879,
0.914684014869888, 1.12286274509804, 0.828916666666667, 0.785491525423729,
0.788407407407407, 1.02309278350515, 0.112163461525871, 0.113447031611172,
0.15930172539742, 0.105397926645665, 0.0370000063024116,
0.0823626968797451, 0.0441620688813484, 0.135786546158742,
0.457040018571118, 0.563727434411572, 0.624264612406578,
0.392625726149316, 0.219488346025285, 0.355836464305103,
0.158243463050796, 0.549997886634136, 0.218104671667048,
0.175578055416405, 0.256197987699313, 0.218534931269605,
0.181253278716812, 0.235434749265196, 0.236043513165036,
0.229165553562148, 0.00460504533342531, 0.0050568065734325,
0.00945562739572128, 0.00524044558789062, 0.00882224860763199,
0.00983820301449839, 0.0162322856355826, 0.00738407922404085,
0.0187491841242793, 0.0287113186085301, 0.0283764910080623,
0.0215386973519077, 0.0471018319675206, 0.0341593217329755,
0.0564553992545153, 0.0271939362203803, 0.00335619679815181,
0.00443251320170775, 0.00919066553588191, 0.00432150262248429,
0.00400887448034098, 0.00664866437888279, 0.00575860867691942,
0.00524462205156711, 0.00460504533342531, 0.0050568065734325,
0.00945562739572128, 0.00524044558789062, 0.00882224860763199,
0.00983820301449839, 0.0162322856355826, 0.00738407922404085,
0.00148090077905166, 0.00224725406956702, 0.00293788372166611,
0.00142518092482957, 0.00475313026432338, 0.00259537819051875,
0.00439432015310276, 0.00179190641262238, 0.337652222222222,
0.294218518518519, 0.338651851851852, 0.311313725490196,
0.254675, 0.2737, 0.390688888888889, 0.314817948717949, 1.3543,
1.429, 1.19151851851852, 1.37256862745098, 1.236, 1.06376666666667,
1.41396296296296, 1.31512820512821, 0.1930557426236, 0.19297076970836,
0.212916856705011, 0.127417008935649, 0.0841239843171108,
0.117210954090848, 0.115413610503398, 0.179227387006556,
0.525206741295172, 0.606796097537911, 0.592920766963248,
0.383218177729097, 0.294853306191478, 0.37983654970313, 0.244065736387288,
0.529995494304863, 0.245519078777542, 0.204069564920836,
0.279438682643543, 0.223741850875084, 0.203505986396722,
0.244494243449087, 0.263225928969608, 0.235094347033923,
0.00509151719343593, 0.00741331297357774, 0.0110354960774679,
0.0058641318136066, 0.0114389388703232, 0.0108143010933781,
0.0182904578688527, 0.00913426247712326, 0.0167858570502119,
0.0279705569908445, 0.030133138276768, 0.0219057666071679,
0.0479637760140276, 0.0332974908188985, 0.0605392786801207,
0.0323033076008837, 0.00498395111761598, 0.0081988397756359,
0.0107052683837969, 0.00442352355941589, 0.00723029142814287,
0.00764631328347674, 0.00980735575566329, 0.00789476278044047,
0.00509151719343593, 0.00741331297357774, 0.0110354960774679,
0.0058641318136066, 0.0114389388703232, 0.0108143010933781,
0.0182904578688527, 0.00913426247712326, 0.00139403793044242,
0.00220415921330836, 0.00299625483623813, 0.00144528089431754,
0.00441088530148196, 0.00248394605240026, 0.00319027562414684,
0.00174638373495128)), row.names = c(NA, -160L), .Names = c("strain",
"variable", "value"), class = "data.frame")
The code I used to plot this (after removing SEM rows) is as follows:
(abs_bar_mag <-
df_group_sum.long %>%
filter(grepl("mag", variable)) %>%
ggplot(aes(x = strain,
y = value,
fill = variable))+
scale_fill_manual(values=c("lightseagreen", "indianred1"))+
geom_bar(stat="identity", position = "dodge") +
#geom_errorbar(aes(ymin=value-1, ymax=value+1), width=.1, position = position_dodge(width=0.9)) +
theme(panel.background = element_blank()) +
theme(text = element_text(size = 20),
axis.line = element_line(colour = "black")) +
ggtitle("") +
theme(plot.title = element_text(size = 30, hjust = 0.5, face = "bold"),
axis.text = element_text(size = 70),
strip.text = element_text(size = 40),
axis.text.x = element_text(angle = 65, hjust = 1, size = 40),
axis.title.y = (element_text(size = 65)))
+
labs(colour = "",
y = "Magnitude",
x = "") +
scale_colour_manual(values = rev())
)
I appreciate any pointers or solutions you may have!
Thanks,
Aram
The issue here is that the avg columns and the SEM (standard error) columns need to stay together. This requires to reshape two value columns simultaneously. See section 3.a of Efficient reshaping using data.tables for more details.
Therefore, we start with the data in wide format (df_group_sum.wide). To be in line with the code provided by the OP, only magnitudes are plotted.
library(data.table)
library(ggplot2)
molten <- melt(
data.table(df_group_sum.wide), id.vars = "strain",
measure.vars = patterns("avg_mag$", "magSEM$"),
value.name = c("avg", "SEM"))[
, variable := forcats::lvls_revalue(variable, c("test_mag", "train_mag"))][]
molten
strain variable avg SEM
1: N2 test_mag 0.11216346 0.003356197
2: acy-1(LOF) test_mag 0.11344703 0.004432513
3: acy-1(GOF) test_mag 0.15930173 0.009190666
4: pde-4 test_mag 0.10539793 0.004321503
5: unc-43 test_mag 0.03700001 0.004008874
6: crh-1 test_mag 0.08236270 0.006648664
7: glr-1 test_mag 0.04416207 0.005758609
8: avr-14 test_mag 0.13578655 0.005244622
9: N2 train_mag 0.19305574 NA
10: acy-1(LOF) train_mag 0.19297077 NA
11: acy-1(GOF) train_mag 0.21291686 NA
12: pde-4 train_mag 0.12741701 NA
13: unc-43 train_mag 0.08412398 NA
14: crh-1 train_mag 0.11721095 NA
15: glr-1 train_mag 0.11541361 NA
16: avr-14 train_mag 0.17922739 NA
ggplot(molten,
aes(strain, avg, ymin = avg - SEM, ymax = avg + SEM, fill = variable)) +
geom_col(position = "dodge") +
geom_errorbar(width=.1, position = position_dodge(width=0.9)) +
scale_fill_manual(values=c("lightseagreen", "indianred1")) +
theme_bw() +
labs(fill = "", y = "Magnitude", x = "")
The OP also has provided a data.frame in long format df_group_sum.long which does contain more data than df_group_sum.wide. These should be plotted as well, now.
By looking at the variable names
unique(df_group_sum.long$variable)
[1] test_avg_dist test_avg_dura test_avg_mag test_avg_prob test_avg_spd
[6] test_distSEM test_duraSEM test_magSEM test_probSEM test_spdSEM
[11] train_avg_dist train_avg_dura train_avg_mag train_avg_prob train_avg_spd
[16] train_distSEM train_duraSEM train_magSEM train_probSEM train_spdSEM
20 Levels: test_avg_dist test_avg_dura test_avg_mag test_avg_prob ... train_spdSEM
the data.frame seems to contain aggregated data (avg and SEM) of five different variables (dist, dura, mag, prob, spd) of two data sets (train and test). Again, avg and SEM need to stay together on one row for plotting bar charts with error bars.
Unfortunately, the naming scheme is inconsistent. It would have been better if the variables containing standard errors would have been named similar to train_avg_mag, e.g., train_SEM_mag instead of train_magSEM.
So, the first step is to split up the variable names to get the different groups separately:
library(data.table)
DT <- data.table(df_group_sum.long)
DT[, c("dataset", "measure", "variable") :=
DT[, tstrsplit(variable, "_|SEM$")][is.na(V3), `:=`(V3 = V2, V2 = "SEM")]]
DT
strain variable value dataset measure
1: N2 dist 0.231024472 test avg
2: acy-1(LOF) dist 0.198503788 test avg
3: acy-1(GOF) dist 0.238929368 test avg
4: pde-4 dist 0.247270588 test avg
5: unc-43 dist 0.148316667 test avg
---
156: pde-4 spd 0.001445281 train SEM
157: unc-43 spd 0.004410885 train SEM
158: crh-1 spd 0.002483946 train SEM
159: glr-1 spd 0.003190276 train SEM
160: avr-14 spd 0.001746384 train SEM
unique(DT[, variable])
"dist" "dura" "mag" "prob" "spd"
unique(DT[, dataset])
"test" "train"
unique(DT[, measure])
"avg" "SEM"
Now, the abbreviated variable names are replaced by their full names using an update on join:
abbr2full <- data.table(
variable = c("dist", "dura", "mag"),
full = c("Distance", "Duration", "Magnitude")
)
DT[abbr2full, on = "variable", variable := full][]
Finally, a facetted plot of all five variables is created. dcast() is used to reshape the data from long to a wide format where each row has two measures avg and SEM.
library(ggplot2)
ggplot(dcast(DT, ... ~ measure),
aes(strain, avg, ymin = avg - SEM, ymax = avg + SEM, fill = dataset)) +
geom_col(position = "dodge") +
geom_errorbar(width=.1, position = position_dodge(width=0.9)) +
scale_fill_manual(values=c("lightseagreen", "indianred1")) +
theme_bw() +
labs(fill = "", y = "Average", x = "") +
facet_wrap(~ variable, scales = "free_y") +
theme(axis.text.x = element_text(angle = 65, hjust = 1))

Resources