Plot Y values against the time grouped by an ID - r

I want make a time series plot grouped by ID. My dataset has 42 different IDs with 7 different timeframes. The timeframe varies per ID and ranges from 9/2016 to 8/2018. I.e., ID1 can start 10/2016 and end 7/2017 (with 7 rows containing a different date) and ID40 can start 11/2016 and ends 6/2018 (also with 7 rows containing a different date). I try to plot this with the following code
p <- ggplot(data = df6, aes(x = START, y = AI, col = ID, group = ID))
p + geom_point(size = 1.2,
alpha = .8) + stat_smooth(aes(group = 1)) + stat_summary(aes(group = 1), geom =
"point", fun.y = mean,
shape = 17, size = 3) + theme_minimal() + theme(axis.text.x = element_text(angle =
90, vjust = 0.5, hjust=1))
This gives me the following graph:
As one can see the X-axis is not chronological. I should start at 09/2016 and end at 08/2018 and then correspond with the Y value based on the ID. I got the following dataset:
structure(list(ID = c("ID1", "ID1", "ID1", "ID1", "ID1", "ID1",
"ID1", "ID10", "ID10", "ID10", "ID10", "ID10", "ID10", "ID10",
"ID11", "ID11", "ID11", "ID11", "ID11", "ID12"), Time = c("1",
"2", "3", "4", "5", "6", "7", "1", "2", "3", "4", "5", "6", "7",
"1", "2", "3", "4", "5", "1"), AI = c(0.393672183448241, 0.4876954603533,
0.411717908455957, 0.309769862660288, 0.149826889496538, 0.2448558592586,
0.123606753324621, 0.296109333767922, 0.309960002123076, 0.445886231347992,
0.370013553008003, 0.393414429902431, 0.318940511323733, 0.131112361225666,
0.31961673567578, 0.227268892979164, 0.433471105477564, 0.207184572401005,
0.144257239122978, 0.520204263001733), AI_VAR = c(0.154977788020905,
0.237846862049217, 0.169511636143347, 0.0959573678125739, 0.0224480968162077,
0.0599543918132674, 0.0152786294674538, 0.0876807375444826, 0.0960752029161373,
0.198814531305715, 0.136910029409606, 0.154774913655455, 0.101723049763444,
0.0171904512661696, 0.102154857724042, 0.0516511497159746, 0.187897199283942,
0.0429254470409874, 0.020810151039384, 0.270612475245176), activity = c(0,
0.303472222222222, 0.232638888888889, 0.228472222222222, 0.348611111111111,
0.215972222222222, 0.123611111111111, 0.357638888888889, 0.235416666666667,
0.233333333333333, 0.2875, 0.353472222222222, 0.356944444444444,
0.149305555555556, 0.448611111111111, 0.213888888888889, 0.248611111111111,
0.288888888888889, 0.25625, 0.238888888888889), ZIM_SD = c(0,
0.148002025121106, 0.095781596758851, 0.0707738088994687, 0.0522313184217097,
0.0528820640482116, 0.0152791681192935, 0.105900213118389, 0.0729697504998075,
0.104040120647865, 0.106378896489801, 0.139061072791901, 0.113844043625277,
0.0195758039329988, 0.143383618921218, 0.0486102909983211, 0.107765733167339,
0.059853320915846, 0.036965917525263, 0.124271018383747), ZIM_VAR = c(0,
0.0721799157746582, 0.039434998686126, 0.0219235930627339, 0.00782565597342798,
0.0129484832318932, 0.00188860836472692, 0.0313580415523671,
0.0226177040198407, 0.0463900573046668, 0.0393616334552618, 0.0547086326740462,
0.0363094774850072, 0.00256662987654616, 0.0458278042289798,
0.0110476070225835, 0.0467133314886466, 0.0124006847007297, 0.00533260120384214,
0.0646463135307921), CHECK = c(10L, 13L, 11L, 7L, 7L, 5L, 4L,
36L, 36L, 34L, 34L, 32L, 29L, 21L, 28L, 27L, 26L, 25L, 21L, 36L
), BULBAR = c(2L, 4L, 4L, 4L, 4L, 2L, 2L, 9L, 9L, 9L, 9L, 9L,
7L, 6L, 12L, 12L, 11L, 11L, 11L, 11L), FINE = c(0L, 0L, 0L, 0L,
0L, 0L, 0L, 9L, 9L, 8L, 8L, 7L, 6L, 4L, 2L, 1L, 1L, 1L, 0L, 7L
), GROSS = c(2L, 2L, 2L, 2L, 2L, 2L, 1L, 9L, 9L, 9L, 9L, 8L,
8L, 6L, 3L, 3L, 3L, 3L, 2L, 6L), RESPI = c(6L, 7L, 5L, 1L, 1L,
1L, 1L, 9L, 9L, 8L, 8L, 8L, 8L, 5L, 11L, 11L, 11L, 10L, 8L, 12L
), GROSS_RENEWD = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 6L, 6L, 6L, 6L,
5L, 5L, 4L, 3L, 3L, 3L, 3L, 2L, 3L), ACTIVE = c(2L, 2L, 2L, 2L,
2L, 2L, 1L, 18L, 18L, 17L, 17L, 15L, 14L, 10L, 5L, 4L, 4L, 4L,
2L, 13L), NON.ACTIVE = c(8L, 11L, 9L, 5L, 5L, 3L, 3L, 18L, 18L,
17L, 17L, 17L, 15L, 11L, 23L, 23L, 22L, 21L, 19L, 23L), START = c("09/2016",
"11/2016", "01/2017", "04/2017", "06/2017", "10/2017", "02/2018",
"10/2016", "12/2016", "02/2017", "04/2017", "07/2017", "11/2017",
"04/2018", "10/2016", "12/2016", "02/2017", "04/2017", "07/2017",
"10/2016"), STOP = c("10/2016", "11/2016", "01/2017", "04/2017",
"06/2017", "10/2017", "03/2018", "10/2016", "12/2016", "02/2017",
"04/2017", "07/2017", "11/2017", "04/2018", "10/2016", "12/2016",
"02/2017", "04/2017", "07/2017", "10/2016")), row.names = c(NA,
20L), class = "data.frame")
In general I want the column START to start with the begin date and end with the last date when it is plotted

You should convert your "START" column to a date format. You could use the package zoo with the function as.yearmon for that. To start the axis with your start date and end it with the end date, you could create a vector of date breaks using the min (start) date and max (end) date. Here is a reproducible example:
library(ggplot2)
library(zoo)
library(dplyr)
df6 <- df6 %>%
mutate(START = as.Date(as.yearmon(START, format = '%m/%Y')))
breaks.vec <- c(min(df6$START),
seq(from=min(df6$START), to=max(df6$START), by = 'month'))
ggplot(data = df6, aes(x = START, y = AI, col = ID, group = ID)) +
geom_point(size = 1.2, alpha = .8) +
stat_smooth(aes(group = 1)) +
stat_summary(aes(group = 1), geom = "point", fun.y = mean, shape = 17, size = 3) +
scale_x_date(breaks = breaks.vec, date_labels = "%m/%Y") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
#> Warning: `fun.y` is deprecated. Use `fun` instead.
#> `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Created on 2022-10-17 with reprex v2.0.2

Related

Cannot plot the correct x-axis in ggplot2

I am plotting the following data using ggplot2 in R.
dat<-structure(list(Month = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L,
8L, 8L, 8L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L,
12L, 12L, 12L, 12L), grp1 = structure(c(1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 1L, 2L, 3L,
1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L), .Label = c("(-Inf,2]", "(2,7]", "(7,14]",
"(14, Inf]"), class = "factor"), n = c(71L, 59L, 36L, 10L, 55L,
73L, 18L, 10L, 97L, 82L, 22L, 5L, 120L, 79L, 15L, 2L, 140L, 62L,
15L, 174L, 60L, 11L, 188L, 71L, 2L, 183L, 53L, 2L, 211L, 50L,
2L, 171L, 69L, 7L, 1L, 98L, 85L, 13L, 6L, 72L, 62L, 24L, 9L)), class
= "data.frame", row.names = c(NA,-43L))
Here's my script:
library(ggplot2)
p<-ggplot(data=test,aes(Month, n, fill = grp1))
p<- p + geom_col()
p <- p + theme(panel.background=element_rect(fill="white"),
plot.margin = margin(0.5,0.5,0.5,0.5, "cm"),
panel.border=element_rect(colour="black",fill=NA,size=1),
axis.line.x=element_line(colour="black"),
axis.line.y=element_line(colour="black"),
axis.text=element_text(size=20,colour="black",family="sans"),
axis.title=element_text(size=20,colour="black",family="sans"),
legend.position = "right", legend.key = element_rect(fill = 'white'))
p <- p + scale_y_continuous(limits = c(0,300),breaks=c(seq(0,300,50)), expand=c(0,0))
p <- p + scale_x_discrete(breaks=c(seq(1,12,1)),labels=c("JAN","FEB","MAR","APR","MAY","JUN","JUL","AUG","SEP","OCT","NOV","DEC"),expand=c(0,0))
p <- p + labs(x = "Month", y = "Number of Days")
Here's the output:
Why is it that I cannot plot the x-axis values?
If I don't set the scale_x_discrete, the plot will look like this:
Any ideas on how to solve this?
I'll appreciate any help.
If you want the Month name along the xaxis, then you can add in as.factor(Month) to your ggplot script. Heres an example:-
p<-ggplot(data=dat,aes(as.factor(Month), n, fill = grp1))
p<- p + geom_col()
p <- p + theme(panel.background=element_rect(fill="white"),
plot.margin = margin(0.5,0.5,0.5,0.5, "cm"),
panel.border=element_rect(colour="black",fill=NA,size=1),
axis.line.x=element_line(colour="black"),
axis.line.y=element_line(colour="black"),
axis.text=element_text(size=20,colour="black",family="sans"),
axis.title=element_text(size=20,colour="black",family="sans"),
legend.position = "right", legend.key = element_rect(fill = 'white'))
p <- p + scale_y_continuous(limits = c(0,300),breaks=c(seq(0,300,50)), expand=c(0,0))
p <- p + scale_x_discrete(breaks=c(seq(1,12,1)),labels=c("JAN","FEB","MAR","APR","MAY","JUN","JUL","AUG","SEP","OCT","NOV","DEC"),expand=c(0,0))
p <- p + labs(x = "Month", y = "Number of Days")
p
Which gives you this:-

Change x axis labels to hours (time) on geom_tile()

Here is a geom_tile displaying hours and days of the week, how can it made to display each hour (i.e. 00:00 through to 23:00 on the x axis)?
library(tidyverse)
df %>%
ggplot(aes(hour, day, fill = value)) +
geom_tile(colour = "ivory")
Currently it displays every fifth hour:
I have tried a bunch of different things, and would prefer a 'best practice' way (i.e. without manually generating labels), but in case labels are needed, here's one way to produce them hour_labs <- 0:23 %>% { ifelse(nchar(.) == 1, paste0("0", .), .) } %>% paste0(., ":00")
Data for reproducible example
df <- structure(list(day = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L), .Label = c("Sunday",
"Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"
), class = c("ordered", "factor")), hour = c(0L, 2L, 3L, 5L,
6L, 7L, 8L, 10L, 11L, 12L, 13L, 18L, 21L, 22L, 23L, 0L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 20L, 21L, 22L,
23L, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
20L, 21L, 22L, 23L, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 13L, 14L, 20L, 21L, 22L, 23L, 0L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 15L, 20L, 21L, 22L, 23L, 0L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 11L, 13L, 14L, 15L, 16L,
19L, 21L, 0L, 1L, 2L, 3L, 7L, 8L, 10L, 13L, 14L, 22L, 23L), value = c(1L,
1L, 1L, 2L, 1L, 3L, 1L, 1L, 2L, 1L, 3L, 1L, 2L, 13L, 13L, 24L,
39L, 21L, 17L, 25L, 22L, 27L, 28L, 19L, 6L, 2L, 2L, 1L, 2L, 2L,
7L, 23L, 38L, 18L, 26L, 21L, 20L, 31L, 40L, 35L, 22L, 5L, 3L,
2L, 7L, 4L, 3L, 3L, 3L, 17L, 13L, 23L, 24L, 19L, 31L, 13L, 35L,
50L, 22L, 13L, 7L, 2L, 1L, 1L, 1L, 1L, 3L, 14L, 17L, 33L, 32L,
32L, 25L, 29L, 27L, 38L, 26L, 11L, 8L, 4L, 5L, 5L, 3L, 1L, 1L,
3L, 14L, 21L, 24L, 22L, 25L, 26L, 23L, 58L, 36L, 26L, 6L, 3L,
1L, 5L, 3L, 1L, 1L, 3L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L,
1L, 1L)), row.names = c(NA, -116L), groups = structure(list(day = structure(1:7, .Label = c("Sunday",
"Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"
), class = c("ordered", "factor")), .rows = structure(list(1:15,
16:33, 34:51, 52:69, 70:88, 89:105, 106:116), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr"))), row.names = c(NA, 7L), class = c("tbl_df", "tbl",
"data.frame"), .drop = TRUE), class = c("grouped_df", "tbl_df",
"tbl", "data.frame"))
Here's one way using sprintf to construct labels.
library(dplyr)
library(ggplot2)
df %>%
mutate(lab = sprintf('%02d:00', hour)) %>%
ggplot() + aes(lab, day, fill = value) +
geom_tile(colour = "ivory") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
To complete the missing times apart from #Eric Watt's suggestion we can also use complete.
df %>%
mutate(lab = sprintf('%02d:00', hour)) %>%
tidyr::complete(lab = sprintf('%02d:00', 0:23)) %>%
ggplot() + aes(lab, day, fill = value) +
geom_tile(colour = "ivory") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
I would suggest making sure your data type is correctly representing your data. If your hour column is representing time in hours, then it should be a time based structure. For example:
df$hour <- as.POSIXct(as.character(df$hour), format = "%H", tz = "UTC")
Then you can tell ggplot that the x axis is a datetime variable using scale_x_datetime.
ggplot(df, aes(hour, day, fill = value)) +
geom_tile(colour = "ivory") +
scale_x_datetime(labels = date_format("%H:%M")) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
If you want a break for every hour, you can input that as breaks:
ggplot(df, aes(hour, day, fill = value)) +
geom_tile(colour = "ivory") +
scale_x_datetime(breaks = as.POSIXct(as.character(0:23), format = "%H", tz = "UTC"),
labels = date_format("%H:%M")) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
You can also use the scales package which has handy formatting options such as date_breaks:
library(scales)
ggplot(df, aes(hour, day, fill = value)) +
geom_tile(colour = "ivory") +
scale_x_datetime(breaks = date_breaks("1 hour"),
labels = date_format("%H:%M")) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

Plotting labels of Points outside of Polygon with ggplot2/ggrepel

I am trying to do a map of my study areas using ggplot2 as I have seen some good outputs. I attached what I got from the following codes
The shapefile can be downloaded with this link.
library(ggplot2)
library(ggrepel)
points <- structure(list(city = structure(
c(
9L, 1L, 1L, 1L, 7L, 1L, 3L, 1L, 1L, 1L, 2L, 1L, 1L, 8L, 1L, 1L, 5L, 1L, 4L, 1L, 1L, 6L),
.Label = c("", "Adelaide", "Brisbane", "Canberra", "Darwin", "Hobart", "Melbourne", "Perth", "Sydney"),
class = "factor"),
site = structure(c(19L, 20L, 21L, 22L, 14L, 15L, 5L, 6L, 7L, 8L, 2L, 3L, 4L, 16L, 17L, 18L, 12L, 13L, 9L, 10L, 11L, 1L),
.Label = c("", "ADL1", "ADL2", "ADL3", "BNE1", "BNE2", "BNE3", "BNE4", "CBR1", "CBR2", "CBR3", "DRW1", "DRW2", "MEL1", "MEL2", "PER1", "PER2", "PER3", "SYD1",
"SYD2", "SYD3", "SYD4"), class = "factor"),
station = structure(c(8L, 5L, 12L, 16L, 10L, 3L, 22L, 18L, 17L, 20L, 2L, 14L, 11L, 4L, 7L, 19L, 15L, 21L, 6L, 9L, 13L, 1L),
.Label = c("", "Adelaide CBD", "Alphington", "Caversham", "Chullora", "Civic", "Duncraig", "Earlwood", "Florey", "Footscray", "Le Fevre 2", "Liverpool", "Monash", "Netley", "Palmerston", "Richmond", "Rocklea", "South Brisbane", "South Lake", "Springwood", "Winnellie", "Woolloongabba"), class = "factor"),
latitude = c(-33.9178, -33.8939, -33.9328, -33.6183, -37.8048, -37.7783, -27.4975, -27.4848, -27.5358, -27.6125, -34.9289, -34.9438, -34.7913, -31.9505, NA, NA, -12.50779, -12.4243233, -35.285307, -35.220606, -35.418302, -42.8821),
longitude = c(151.1347, 151.045, 150.9058, 150.7458, 144.8727, 145.0306, 153.035, 153.0321, 152.9934, 153.1356, 138.6011, 138.5492, 138.498, 115.8605, NA, NA, 130.94853, 130.8933502, 149.131579, 149.043539, 149.094018, 147.3272)),
.Names = c("city", "site", "station",
"latitude", "longitude"), class = "data.frame", row.names = c(NA, -22L))
AUS<-readRDS("gadm36_AUS_1_sp.rds")
ggplot() + geom_polygon(data = AUS, aes(x=long, y = lat, group = group, size=0.01),
fill = NA, color = "black") +
geom_point(data=points, aes(x=longitude, y=latitude), color ="blue", size=0.5) +
coord_fixed(1) +
geom_label_repel(data = points, aes(x=longitude, y=latitude, label=city),
box.padding = 1.2, point.padding = 1) +
theme_classic() + ylim(-60,0)+ xlim(100,180) + scale_size_identity()
Australia Map and my study areas:
Is there a way I can enlarge/expand the polygon so my points would be separated? Also can I direct all labels outside the polygon?

How do I melt/gather multiple variables (error bars) into one for mapping to geom_bar?

I'll start with my goal which is to generate graphs for each of my variables (magnitude [mag], duration [dura] and distance [dist] but with distinct error bars for train and test. :
Almost finished graph
I have a data-frame that looks like this: (screenshot + dput below). It shows the responses (magnitude, distance, duration) during train and test of various biological strains along with their standard error (SEM). For example, the duration response at train is in column "train_avg_dura" and at test is "test_avg_dura". The standard error for each of these is in the columns train_duraSEM and test_duraSEM
df_group_sum.wide (data-frame)
dput data:
df_group_sum.wide <-
structure(list(strain = structure(1:8, .Label = c("N2", "acy-1(LOF)",
"acy-1(GOF)", "pde-4", "unc-43", "crh-1", "glr-1", "avr-14"), class = "factor"),
test_avg_dist = c(0.23102447163515, 0.198503787878788, 0.23892936802974,
0.247270588235294, 0.148316666666667, 0.195762711864407,
0.204740740740741, 0.238755154639175), test_avg_dura = c(1.04759733036707,
1.15537878787879, 0.914684014869888, 1.12286274509804, 0.828916666666667,
0.785491525423729, 0.788407407407407, 1.02309278350515),
test_avg_mag = c(0.112163461525871, 0.113447031611172, 0.15930172539742,
0.105397926645665, 0.0370000063024116, 0.0823626968797451,
0.0441620688813484, 0.135786546158742), test_distSEM = c(0.00460504533342531,
0.0050568065734325, 0.00945562739572128, 0.00524044558789062,
0.00882224860763199, 0.00983820301449839, 0.0162322856355826,
0.00738407922404085), test_duraSEM = c(0.0187491841242793,
0.0287113186085301, 0.0283764910080623, 0.0215386973519077,
0.0471018319675206, 0.0341593217329755, 0.0564553992545153,
0.0271939362203803), test_magSEM = c(0.00335619679815181,
0.00443251320170775, 0.00919066553588191, 0.00432150262248429,
0.00400887448034098, 0.00664866437888279, 0.00575860867691942,
0.00524462205156711), train_avg_dist = c(0.337652222222222,
0.294218518518519, 0.338651851851852, 0.311313725490196,
0.254675, 0.2737, 0.390688888888889, 0.314817948717949),
train_avg_dura = c(1.3543, 1.429, 1.19151851851852, 1.37256862745098,
1.236, 1.06376666666667, 1.41396296296296, 1.31512820512821
), train_avg_mag = c(0.1930557426236, 0.19297076970836, 0.212916856705011,
0.127417008935649, 0.0841239843171108, 0.117210954090848,
0.115413610503398, 0.179227387006556)), class = "data.frame", .Names = c("strain",
"test_avg_dist", "test_avg_dura", "test_avg_mag", "test_distSEM",
"test_duraSEM", "test_magSEM", "train_avg_dist", "train_avg_dura",
"train_avg_mag"), row.names = c(NA, -8L))
The problem I am having is how to add error bars using SEM since I need them to be merged into one variable as opposed to two when I map the variable to geom_bar. I think this is a melt issue but I can't figure it out.
Update:
The melted data-frame I used to plot the graph is as follows:
structure(list(strain = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L), .Label = c("N2", "acy-1(LOF)",
"acy-1(GOF)", "pde-4", "unc-43", "crh-1", "glr-1", "avr-14"), class = "factor"),
variable = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 14L,
14L, 14L, 14L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 15L,
15L, 15L, 15L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 17L,
17L, 17L, 17L, 17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L, 18L,
18L, 18L, 18L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 20L,
20L, 20L, 20L, 20L, 20L, 20L, 20L), .Label = c("test_avg_dist",
"test_avg_dura", "test_avg_mag", "test_avg_prob", "test_avg_spd",
"test_distSEM", "test_duraSEM", "test_magSEM", "test_probSEM",
"test_spdSEM", "train_avg_dist", "train_avg_dura", "train_avg_mag",
"train_avg_prob", "train_avg_spd", "train_distSEM", "train_duraSEM",
"train_magSEM", "train_probSEM", "train_spdSEM"), class = "factor"),
value = c(0.23102447163515, 0.198503787878788, 0.23892936802974,
0.247270588235294, 0.148316666666667, 0.195762711864407,
0.204740740740741, 0.238755154639175, 1.04759733036707, 1.15537878787879,
0.914684014869888, 1.12286274509804, 0.828916666666667, 0.785491525423729,
0.788407407407407, 1.02309278350515, 0.112163461525871, 0.113447031611172,
0.15930172539742, 0.105397926645665, 0.0370000063024116,
0.0823626968797451, 0.0441620688813484, 0.135786546158742,
0.457040018571118, 0.563727434411572, 0.624264612406578,
0.392625726149316, 0.219488346025285, 0.355836464305103,
0.158243463050796, 0.549997886634136, 0.218104671667048,
0.175578055416405, 0.256197987699313, 0.218534931269605,
0.181253278716812, 0.235434749265196, 0.236043513165036,
0.229165553562148, 0.00460504533342531, 0.0050568065734325,
0.00945562739572128, 0.00524044558789062, 0.00882224860763199,
0.00983820301449839, 0.0162322856355826, 0.00738407922404085,
0.0187491841242793, 0.0287113186085301, 0.0283764910080623,
0.0215386973519077, 0.0471018319675206, 0.0341593217329755,
0.0564553992545153, 0.0271939362203803, 0.00335619679815181,
0.00443251320170775, 0.00919066553588191, 0.00432150262248429,
0.00400887448034098, 0.00664866437888279, 0.00575860867691942,
0.00524462205156711, 0.00460504533342531, 0.0050568065734325,
0.00945562739572128, 0.00524044558789062, 0.00882224860763199,
0.00983820301449839, 0.0162322856355826, 0.00738407922404085,
0.00148090077905166, 0.00224725406956702, 0.00293788372166611,
0.00142518092482957, 0.00475313026432338, 0.00259537819051875,
0.00439432015310276, 0.00179190641262238, 0.337652222222222,
0.294218518518519, 0.338651851851852, 0.311313725490196,
0.254675, 0.2737, 0.390688888888889, 0.314817948717949, 1.3543,
1.429, 1.19151851851852, 1.37256862745098, 1.236, 1.06376666666667,
1.41396296296296, 1.31512820512821, 0.1930557426236, 0.19297076970836,
0.212916856705011, 0.127417008935649, 0.0841239843171108,
0.117210954090848, 0.115413610503398, 0.179227387006556,
0.525206741295172, 0.606796097537911, 0.592920766963248,
0.383218177729097, 0.294853306191478, 0.37983654970313, 0.244065736387288,
0.529995494304863, 0.245519078777542, 0.204069564920836,
0.279438682643543, 0.223741850875084, 0.203505986396722,
0.244494243449087, 0.263225928969608, 0.235094347033923,
0.00509151719343593, 0.00741331297357774, 0.0110354960774679,
0.0058641318136066, 0.0114389388703232, 0.0108143010933781,
0.0182904578688527, 0.00913426247712326, 0.0167858570502119,
0.0279705569908445, 0.030133138276768, 0.0219057666071679,
0.0479637760140276, 0.0332974908188985, 0.0605392786801207,
0.0323033076008837, 0.00498395111761598, 0.0081988397756359,
0.0107052683837969, 0.00442352355941589, 0.00723029142814287,
0.00764631328347674, 0.00980735575566329, 0.00789476278044047,
0.00509151719343593, 0.00741331297357774, 0.0110354960774679,
0.0058641318136066, 0.0114389388703232, 0.0108143010933781,
0.0182904578688527, 0.00913426247712326, 0.00139403793044242,
0.00220415921330836, 0.00299625483623813, 0.00144528089431754,
0.00441088530148196, 0.00248394605240026, 0.00319027562414684,
0.00174638373495128)), row.names = c(NA, -160L), .Names = c("strain",
"variable", "value"), class = "data.frame")
The code I used to plot this (after removing SEM rows) is as follows:
(abs_bar_mag <-
df_group_sum.long %>%
filter(grepl("mag", variable)) %>%
ggplot(aes(x = strain,
y = value,
fill = variable))+
scale_fill_manual(values=c("lightseagreen", "indianred1"))+
geom_bar(stat="identity", position = "dodge") +
#geom_errorbar(aes(ymin=value-1, ymax=value+1), width=.1, position = position_dodge(width=0.9)) +
theme(panel.background = element_blank()) +
theme(text = element_text(size = 20),
axis.line = element_line(colour = "black")) +
ggtitle("") +
theme(plot.title = element_text(size = 30, hjust = 0.5, face = "bold"),
axis.text = element_text(size = 70),
strip.text = element_text(size = 40),
axis.text.x = element_text(angle = 65, hjust = 1, size = 40),
axis.title.y = (element_text(size = 65)))
+
labs(colour = "",
y = "Magnitude",
x = "") +
scale_colour_manual(values = rev())
)
I appreciate any pointers or solutions you may have!
Thanks,
Aram
The issue here is that the avg columns and the SEM (standard error) columns need to stay together. This requires to reshape two value columns simultaneously. See section 3.a of Efficient reshaping using data.tables for more details.
Therefore, we start with the data in wide format (df_group_sum.wide). To be in line with the code provided by the OP, only magnitudes are plotted.
library(data.table)
library(ggplot2)
molten <- melt(
data.table(df_group_sum.wide), id.vars = "strain",
measure.vars = patterns("avg_mag$", "magSEM$"),
value.name = c("avg", "SEM"))[
, variable := forcats::lvls_revalue(variable, c("test_mag", "train_mag"))][]
molten
strain variable avg SEM
1: N2 test_mag 0.11216346 0.003356197
2: acy-1(LOF) test_mag 0.11344703 0.004432513
3: acy-1(GOF) test_mag 0.15930173 0.009190666
4: pde-4 test_mag 0.10539793 0.004321503
5: unc-43 test_mag 0.03700001 0.004008874
6: crh-1 test_mag 0.08236270 0.006648664
7: glr-1 test_mag 0.04416207 0.005758609
8: avr-14 test_mag 0.13578655 0.005244622
9: N2 train_mag 0.19305574 NA
10: acy-1(LOF) train_mag 0.19297077 NA
11: acy-1(GOF) train_mag 0.21291686 NA
12: pde-4 train_mag 0.12741701 NA
13: unc-43 train_mag 0.08412398 NA
14: crh-1 train_mag 0.11721095 NA
15: glr-1 train_mag 0.11541361 NA
16: avr-14 train_mag 0.17922739 NA
ggplot(molten,
aes(strain, avg, ymin = avg - SEM, ymax = avg + SEM, fill = variable)) +
geom_col(position = "dodge") +
geom_errorbar(width=.1, position = position_dodge(width=0.9)) +
scale_fill_manual(values=c("lightseagreen", "indianred1")) +
theme_bw() +
labs(fill = "", y = "Magnitude", x = "")
The OP also has provided a data.frame in long format df_group_sum.long which does contain more data than df_group_sum.wide. These should be plotted as well, now.
By looking at the variable names
unique(df_group_sum.long$variable)
[1] test_avg_dist test_avg_dura test_avg_mag test_avg_prob test_avg_spd
[6] test_distSEM test_duraSEM test_magSEM test_probSEM test_spdSEM
[11] train_avg_dist train_avg_dura train_avg_mag train_avg_prob train_avg_spd
[16] train_distSEM train_duraSEM train_magSEM train_probSEM train_spdSEM
20 Levels: test_avg_dist test_avg_dura test_avg_mag test_avg_prob ... train_spdSEM
the data.frame seems to contain aggregated data (avg and SEM) of five different variables (dist, dura, mag, prob, spd) of two data sets (train and test). Again, avg and SEM need to stay together on one row for plotting bar charts with error bars.
Unfortunately, the naming scheme is inconsistent. It would have been better if the variables containing standard errors would have been named similar to train_avg_mag, e.g., train_SEM_mag instead of train_magSEM.
So, the first step is to split up the variable names to get the different groups separately:
library(data.table)
DT <- data.table(df_group_sum.long)
DT[, c("dataset", "measure", "variable") :=
DT[, tstrsplit(variable, "_|SEM$")][is.na(V3), `:=`(V3 = V2, V2 = "SEM")]]
DT
strain variable value dataset measure
1: N2 dist 0.231024472 test avg
2: acy-1(LOF) dist 0.198503788 test avg
3: acy-1(GOF) dist 0.238929368 test avg
4: pde-4 dist 0.247270588 test avg
5: unc-43 dist 0.148316667 test avg
---
156: pde-4 spd 0.001445281 train SEM
157: unc-43 spd 0.004410885 train SEM
158: crh-1 spd 0.002483946 train SEM
159: glr-1 spd 0.003190276 train SEM
160: avr-14 spd 0.001746384 train SEM
unique(DT[, variable])
"dist" "dura" "mag" "prob" "spd"
unique(DT[, dataset])
"test" "train"
unique(DT[, measure])
"avg" "SEM"
Now, the abbreviated variable names are replaced by their full names using an update on join:
abbr2full <- data.table(
variable = c("dist", "dura", "mag"),
full = c("Distance", "Duration", "Magnitude")
)
DT[abbr2full, on = "variable", variable := full][]
Finally, a facetted plot of all five variables is created. dcast() is used to reshape the data from long to a wide format where each row has two measures avg and SEM.
library(ggplot2)
ggplot(dcast(DT, ... ~ measure),
aes(strain, avg, ymin = avg - SEM, ymax = avg + SEM, fill = dataset)) +
geom_col(position = "dodge") +
geom_errorbar(width=.1, position = position_dodge(width=0.9)) +
scale_fill_manual(values=c("lightseagreen", "indianred1")) +
theme_bw() +
labs(fill = "", y = "Average", x = "") +
facet_wrap(~ variable, scales = "free_y") +
theme(axis.text.x = element_text(angle = 65, hjust = 1))

Two legends for polar ggplot (with one customized)

Here is my data:
data <- structure(list(Indicator = structure(c(1L, 1L, 1L, 2L, 2L, 2L,
3L, 3L, 3L, 6L, 6L, 6L, 6L, 4L, 4L, 5L, 5L, 1L, 1L, 1L, 2L, 2L,
2L, 3L, 3L, 3L, 6L, 6L, 6L, 6L, 4L, 4L, 5L, 5L, 1L, 1L, 1L, 2L,
2L, 2L, 3L, 3L, 3L, 6L, 6L, 6L, 6L, 4L, 4L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 6L, 6L, 6L, 6L, 4L, 4L, 5L, 5L, 1L, 1L,
1L, 2L, 2L, 2L, 3L, 3L, 3L, 6L, 6L, 6L, 6L, 4L, 4L, 5L, 5L), .Label = c("Causality",
"Climatechangeriskperceptions", "Currentadaptationoptions", "Fishingasalivelihoodactivity",
"Governance", "Roleofshadowstateactors"), class = "factor"),
Village = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L), .Label = c("Andra", "lahapau", "Pelipowai", "Ponam",
"Tulu"), class = "factor"), Variables = structure(c(13L,
3L, 10L, 11L, 12L, 16L, 5L, 8L, 1L, 2L, 15L, 17L, 6L, 14L,
9L, 4L, 7L, 13L, 3L, 10L, 11L, 12L, 16L, 5L, 8L, 1L, 2L,
15L, 17L, 6L, 14L, 9L, 4L, 7L, 13L, 3L, 10L, 11L, 12L, 16L,
5L, 8L, 1L, 2L, 15L, 17L, 6L, 14L, 9L, 4L, 7L, 13L, 3L, 10L,
11L, 12L, 16L, 5L, 8L, 1L, 2L, 15L, 17L, 6L, 14L, 9L, 4L,
7L, 13L, 3L, 10L, 11L, 12L, 16L, 5L, 8L, 1L, 2L, 15L, 17L,
6L, 14L, 9L, 4L, 7L), .Label = c("alternativelivelihood",
"anyactorsinvolvedinsustainability", "Attributionfactors",
"discusswithelectedleaders", "Effortsdirectedtoreducerisks",
"fishercommunityinfluence", "Infrastructureeffectiveness",
"multiplicityofactors", "Occupationforchildren", "Reversibility",
"Riskasamajorconsideration", "Riskbeingaddressed", "Statusoffisheries",
"Timefishing", "Whatwasdone", "Whoisatrisk", "whowasinvolved?"
), class = "factor"), legend.var = structure(c(1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L), .Label = c("a", "b", "c", "d", "e", "f", "g",
"h", "i", "j", "k", "l", "m", "n", "o", "p", "q"), class = "factor"),
score = c(1, 0.97, 1, 0.76, 0.794, 1, 0.71, 0.9, 0.5, 1,
1, 1, 1, 0.49, 0.72, 1, 0.7, 1, 1, 0, 0.67, 0.5, 1, 0.2,
1, 1, 0.7, 0.4, 0.5, 0.3, 0.67, 0.5, 0.7, 0.8, 1, 0, 0.46,
0.56, 0.375, 1, 0.13, 0.3, 0.5, 0.3, 0.3, 0.4, 0.6, 1, 1,
0.7, 0.8, 1, 0.86, 0.69, 0.51, 0.429, 1, 0.44, 0.3, 0.5,
0.6, 0.6, 0.7, 0.8, 0.4, 0.79, 0.8, 1, 1, 0.82, 0.85, 0.25,
0.226, 1, 0.18, 0.1, 1, 0.7, 0.3, 0.6, 0.3, 0.48, 0.16, 0.4,
0.8)), .Names = c("Indicator", "Village", "Variables", "legend.var",
"score"), class = "data.frame", row.names = c(NA, -85L))
I have made polar plots as follows:
library(ggplot2)
ggplot(data, aes(x = legend.var, y = score, fill = Indicator), color='black') +
geom_bar(width = 1, alpha=0.5, stat="identity") +
scale_y_continuous() +
coord_polar() +
theme( axis.ticks = element_blank()) +
facet_wrap(~Village, nrow=2, ncol=3) +
guides(colour = guide_legend(title.hjust = 0.5))
As you can see in the data, column legend.var is grouped by the column indicator. I would like to insert a table or a second legend which associates each of the indicator with the legend.var and variables column. Ideally if this is inserted as a second legend, the unique items comprised in legend.var column would have the same fill color as the corresponding indicator. The fill legend is based on the column indicator. The inserted table/extra legend would comprise columns legend.var with a unique alphabet and column variables with the meaning of the respective alphabet. These can then have the same fill color as the corresponding indicator. I hope this is clear.
Here is a solution using a gtable:
library(ggplot2)
p <- ggplot(data, aes(x = legend.var, y = score, fill = Indicator), color='black') +
geom_bar(width = 1, alpha=0.5, stat="identity") +
scale_y_continuous() +
coord_polar() +
theme( axis.ticks = element_blank()) +
facet_wrap(~Village, nrow=2, ncol=3) +
guides(colour = guide_legend(title.hjust = 0.5)) +
theme(legend.position=c(0.85,0.25))
#create table
library(gridExtra)
tab <- tableGrob(unique(data[, c("legend.var", "Variables")]),
show.rownames=FALSE, gpar.coretext=gpar(fontsize=10),
gpar.coltext=gpar(fontsize=10, fontface='bold'),
gpar.corefill = gpar(fill = "grey90", col = "white"),
gpar.colfill = gpar(fill = "grey80", col = "white"))
#arrange grobs
library(gtable)
a <- gtable(unit(c(0.7, 0.3) ,c("npc")), unit(1, "npc"))
a <- gtable_add_grob(a, ggplotGrob(p),1,1)
a <- gtable_add_grob(a, tab,1,2)
#plot
grid.draw(a)
For a start, you may try something like this. You need to adjust arrangement and layout according to your own preferences.
library(ggplot2)
library(gridExtra)
gg <- ggplot(data, aes(x = legend.var, y = score, fill = Indicator), color='black') +
geom_bar(width = 1, alpha=0.5, stat="identity") +
coord_polar() +
theme(axis.ticks = element_blank()) +
facet_wrap(~Village, nrow=2, ncol = 3)
# create a table that translates legend.var to Variables
tbl <- unique(data[ , c("legend.var", "Variables")])
# create a table grob
tt <- tableGrob(d = tbl,
col.just = "left",
gpar.coretext = gpar(col = "black", cex = 0.5),
gpar.coltext = gpar(col = "black", cex = 0.5, fontface = "bold"),
gpar.rowtext = gpar(col = "black", cex = 0.5, fontface = "italic"))
# arrange plot and table grob
grid.arrange(gg, tt, ncol = 2)
update with a quick and dirty ggplot-only alternative
# create labels
labs <- with(tbl, paste(legend.var, Variables))
gg <- ggplot(data, aes(x = legend.var, y = score, fill = Indicator, col = Variables)) +
geom_bar(width = 1, alpha = 0.5, stat = "identity") +
coord_polar() +
theme(axis.ticks = element_blank()) +
facet_wrap(~ Village, nrow = 2, ncol = 3) +
scale_color_grey(labels = labs, start = 0.8, end = 0.9)
gg

Resources