plot group and category means with group_by - r

I am new to R and trying to figure out a way to plot means for individual samples as well as group means with ggplot.
I am following this articles on R-bloggers (last paragraph):
https://www.r-bloggers.com/plotting-individual-observations-and-group-means-with-ggplot2/
This is my code:
gd <- meanplot1 %>%
group_by(treatment, value) %>%
summarise(measurement = mean(measurement))
ggplot(meanplot1, aes(x=value, y=measurement, color=treatment)) +
geom_line(aes(group=sample), alpha=0.3) +
geom_line(data=gd, size=3, alpha=0.9) +
theme_bw()
Whilst the sample means are being shown, the group means arenĀ“t. I get the error
geom_path: Each group consists of only one observation. Do you need
to adjust the group aesthetic?
Upon adding group=1, I get a weirdly mixed category mean, but not what I am looking for..
I scrolled through a lot of articles already, but couldnt find an answer - I would be so happy if somebody could help me out here!! :)
My data (meanplot1) is formatted like this:
treatment sample value measurement
1 control, control 1, initial, 20,
2 control, control 1, 26, NA,
3 control, control 1, 26', 28,
12 control, control 2, initial, 22,
13 control control 2, 26, NA,
14 control control 2, 26', 36,
15 control control 2, 28, 45,
67 stressed, stress 1, initial, 37,
68 stressed, stress 1, 26, NA,
69 stressed, stress 1, 26', 17,
78 stressed, stress 2, initial, 36,
79 stressed, stress 2, 26, NA,
80 stressed, stress 2, 26', 25,
I am hoping to see 6 lines, one mean for stress 1, stress 2, control 1 and control 2, and one mean for all treatment=control, and one for all treatment=stressed
output dput(gd):
structure(list(treatment = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("control", "stressed"), class = "factor"), value = structure(c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L), .Label = c("26", "26'", "28", "28'",
"30", "30'", "32", "32'", "34", "34'", "initial"), class = "factor"),
measurement = c(NA, 32.3333333333333, 39.5, 30.3333333333333,
31.8333333333333, 31.8333333333333, NA, 36, 34.6666666666667,
36, 24.6666666666667, NA, 25.3333333333333, 33.3333333333333,
32, 50.1666666666667, 39.1666666666667, NA, 33.5, 24.3333333333333,
27.3333333333333, 36)), class = c("grouped_df", "tbl_df",
"tbl", "data.frame"), row.names = c(NA, -22L), vars = list(treatment), drop = TRUE, .Names = c("treatment",
"value", "measurement"))
output dput(meanplot1):
structure(list(treatment = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("control",
"stressed"), class = "factor"), sample = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L), .Label = c("control 1",
"control 2", "control 3", "control 4", "control 5", "control 6",
"stress 1", "stress 2", "stress 3", "stress 4", "stress 5", "stress 6"
), class = "factor"), value = structure(c(11L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L), .Label = c("26", "26'",
"28", "28'", "30", "30'", "32", "32'", "34", "34'", "initial"
), class = "factor"), measurement = c(20L, NA, 28L, 18L, 17L,
19L, 34L, NA, 23L, 29L, 27L, 22L, NA, 36L, 45L, 31L, 40L, 44L,
NA, 49L, 40L, 39L, 32L, NA, 35L, 57L, 30L, 37L, 29L, NA, 44L,
37L, 46L, 20L, NA, 39L, 27L, 30L, 40L, 25L, NA, 29L, 50L, 30L,
26L, NA, 28L, 45L, 47L, 27L, 35L, NA, 24L, 22L, 35L, 28L, NA,
28L, 45L, 27L, 28L, 24L, NA, 47L, 30L, 39L, 37L, NA, 17L, 29L,
29L, 31L, 29L, NA, 37L, 21L, 27L, 36L, NA, 25L, 41L, 51L, 66L,
50L, NA, 33L, 25L, 22L, 36L, NA, 33L, 45L, 26L, 72L, 59L, NA,
33L, 26L, 25L, 33L, NA, 21L, 33L, 25L, 29L, 21L, NA, 26L, 20L,
16L, 22L, NA, 30L, 27L, 28L, 57L, 41L, NA, 28L, 23L, 17L, 52L,
NA, 26L, 25L, 33L, 46L, 35L, NA, 44L, 31L, 57L)), .Names = c("treatment",
"sample", "value", "measurement"), class = "data.frame", row.names = c(NA,
-132L))

I suppose you are aiming to plot the treatment means.
By default, since you are using a categorical x-axis, the grouping is set to the interaction between x and color. You only want to group by treatment, however. So we'll add the correct grouping to the call.
ggplot(meanplot1, aes(x = value, y = measurement, color=treatment)) +
geom_line(aes(group=sample), alpha=0.3) +
geom_line(aes(group = treatment), gd, size=3, alpha=0.9) +
theme_bw()
Also note that
ggplot(meanplot1, aes(x=value, y=measurement, color=treatment)) +
geom_line(aes(group=sample), alpha=0.3) +
stat_summary(aes(group = treatment), fun.y = mean, geom = 'line', size=3, alpha=0.9) +
theme_bw()
Gives the same plot, without the interruption.

Related

Change x axis labels to hours (time) on geom_tile()

Here is a geom_tile displaying hours and days of the week, how can it made to display each hour (i.e. 00:00 through to 23:00 on the x axis)?
library(tidyverse)
df %>%
ggplot(aes(hour, day, fill = value)) +
geom_tile(colour = "ivory")
Currently it displays every fifth hour:
I have tried a bunch of different things, and would prefer a 'best practice' way (i.e. without manually generating labels), but in case labels are needed, here's one way to produce them hour_labs <- 0:23 %>% { ifelse(nchar(.) == 1, paste0("0", .), .) } %>% paste0(., ":00")
Data for reproducible example
df <- structure(list(day = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L), .Label = c("Sunday",
"Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"
), class = c("ordered", "factor")), hour = c(0L, 2L, 3L, 5L,
6L, 7L, 8L, 10L, 11L, 12L, 13L, 18L, 21L, 22L, 23L, 0L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 20L, 21L, 22L,
23L, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
20L, 21L, 22L, 23L, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 13L, 14L, 20L, 21L, 22L, 23L, 0L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 15L, 20L, 21L, 22L, 23L, 0L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 11L, 13L, 14L, 15L, 16L,
19L, 21L, 0L, 1L, 2L, 3L, 7L, 8L, 10L, 13L, 14L, 22L, 23L), value = c(1L,
1L, 1L, 2L, 1L, 3L, 1L, 1L, 2L, 1L, 3L, 1L, 2L, 13L, 13L, 24L,
39L, 21L, 17L, 25L, 22L, 27L, 28L, 19L, 6L, 2L, 2L, 1L, 2L, 2L,
7L, 23L, 38L, 18L, 26L, 21L, 20L, 31L, 40L, 35L, 22L, 5L, 3L,
2L, 7L, 4L, 3L, 3L, 3L, 17L, 13L, 23L, 24L, 19L, 31L, 13L, 35L,
50L, 22L, 13L, 7L, 2L, 1L, 1L, 1L, 1L, 3L, 14L, 17L, 33L, 32L,
32L, 25L, 29L, 27L, 38L, 26L, 11L, 8L, 4L, 5L, 5L, 3L, 1L, 1L,
3L, 14L, 21L, 24L, 22L, 25L, 26L, 23L, 58L, 36L, 26L, 6L, 3L,
1L, 5L, 3L, 1L, 1L, 3L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L,
1L, 1L)), row.names = c(NA, -116L), groups = structure(list(day = structure(1:7, .Label = c("Sunday",
"Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"
), class = c("ordered", "factor")), .rows = structure(list(1:15,
16:33, 34:51, 52:69, 70:88, 89:105, 106:116), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr"))), row.names = c(NA, 7L), class = c("tbl_df", "tbl",
"data.frame"), .drop = TRUE), class = c("grouped_df", "tbl_df",
"tbl", "data.frame"))
Here's one way using sprintf to construct labels.
library(dplyr)
library(ggplot2)
df %>%
mutate(lab = sprintf('%02d:00', hour)) %>%
ggplot() + aes(lab, day, fill = value) +
geom_tile(colour = "ivory") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
To complete the missing times apart from #Eric Watt's suggestion we can also use complete.
df %>%
mutate(lab = sprintf('%02d:00', hour)) %>%
tidyr::complete(lab = sprintf('%02d:00', 0:23)) %>%
ggplot() + aes(lab, day, fill = value) +
geom_tile(colour = "ivory") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
I would suggest making sure your data type is correctly representing your data. If your hour column is representing time in hours, then it should be a time based structure. For example:
df$hour <- as.POSIXct(as.character(df$hour), format = "%H", tz = "UTC")
Then you can tell ggplot that the x axis is a datetime variable using scale_x_datetime.
ggplot(df, aes(hour, day, fill = value)) +
geom_tile(colour = "ivory") +
scale_x_datetime(labels = date_format("%H:%M")) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
If you want a break for every hour, you can input that as breaks:
ggplot(df, aes(hour, day, fill = value)) +
geom_tile(colour = "ivory") +
scale_x_datetime(breaks = as.POSIXct(as.character(0:23), format = "%H", tz = "UTC"),
labels = date_format("%H:%M")) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
You can also use the scales package which has handy formatting options such as date_breaks:
library(scales)
ggplot(df, aes(hour, day, fill = value)) +
geom_tile(colour = "ivory") +
scale_x_datetime(breaks = date_breaks("1 hour"),
labels = date_format("%H:%M")) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

How to create histograms for each unique combination of levels from two factors?

I cannot figure out how to use a loop to plot one histogram for each unique combination of levels from TWO factors.
Here is my data: https://www.dropbox.com/sh/exsjhu23fnpwf4r/AABvitLBN1nRMpXcyYMVIOIDa?dl=0
# perhaps need to have factors
df$freq <- as.factor(df$freq)
df$time <- as.factor(df$time)
I learned how to use a loop to plot histograms for ONE factor levels:
# space for plots
windows(width=19, height=10)
par(las=1, cex.lab=0.75, cex.axis=0.6, bty="n", mgp=c(1, 0.6, 0),
oma=c(2, 4, 2, 0) + 0.1, mar=c(4, 0, 3, 3) + 0.1)
a <- layout(matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21), nrow=3, ncol=7, byrow=T))
layout.show(a)
# loop
for (i in 1:length(unique(df$freq))) {
value <- subset(df, freq == unique (df$freq)[i])
hist(value$thr, main=paste0("freq: ", unique(df$freq)[i]))
}
I tried variations of this loop for TWO factors but that unfortunately does not work:
for (i in 1:length(unique(df[c("freq", "time")]))) {
value <- subset(df, freq == unique (df$freq)[i] & time == unique(df$time)[i])
hist(value$thr, main=paste0("freq: ", unique(df$freq)[i]))
}
I would also like to learn how to label each histogram based on the levels of TWO factors (not just one)...
It's more convenient to use by here.
For the titles we prefer characters to factors.
df1[c("freq", "time")] <- lapply(df1[c("freq", "time")], as.character)
Then open windows,
windows(width=19, height=10)
par(las=1, cex.lab=0.75, cex.axis=0.6, bty="n", mgp=c(1, 0.6, 0),
oma=c(2, 4, 2, 0) + 0.1, mar=c(4, 0, 3, 3) + 0.1)
a <- layout(matrix(1:21, 3, 7))
layout.show(a)
and plot.
by(df1, df1[c("freq", "time")], function(x)
hist(x$thr, main=paste("freq:", paste(x[1, c(1, 3)], collapse=","))))
Result
Edit
To get the specific order we probably have to do some more stuff.
df1[c("freq", "time")] <- lapply(df1[c("freq", "time")], as.character)
windows(width=19, height=10)
par(las=1, cex.lab=0.75, cex.axis=0.6, bty="n", mgp=c(1, 0.6, 0),
oma=c(2, 4, 2, 0) + 0.1, mar=c(4, 0, 3, 3) + 0.1)
a <- layout(matrix(1:21, 3, 7, byrow=TRUE)) # with byrow
layout.show(a)
l <- split(df1, df1[c("freq", "time")])
m <- t(sapply(l, function(x) x[1, c(1, 3)])) # matrix of first rows of each subset
m[, 2] <- sub("m", "", m[, 2]) # use the values...
m <- apply(m, 1:2, as.numeric) # ... make numeric
Now we obtain the histograms within a lapply over the list ordered by m.
lapply(l[order(m[, 2], m[, 1])], function(x)
hist(x$thr, main=paste("freq:", paste(x[1, c(1, 3)], collapse=","))))
New Result
Data
df1 <- structure(list(freq = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L), .Label = c("4",
"8", "12.5", "16", "20", "25", "31.5"), class = "factor"), thr = c(60L,
25L, 20L, 15L, 15L, 30L, 35L, 60L, 25L, 10L, 15L, 15L, 30L, 35L,
55L, 30L, 15L, 15L, 10L, 25L, 40L, 50L, 25L, 15L, 10L, 15L, 20L,
40L, 50L, 30L, 10L, 15L, 15L, 20L, 25L, 50L, 25L, 10L, 10L, 10L,
20L, 25L, 45L, 20L, 10L, 10L, 10L, 20L, 25L, 45L, 15L, 10L, 10L,
10L, 20L, 30L, 60L, 30L, 10L, 10L, 10L, 15L, 30L, 50L, 25L, 10L,
10L, 10L, 20L, 30L, 45L, 25L, 15L, 10L, 15L, 30L, 35L, 50L, 25L,
15L, 10L, 15L, 25L, 35L, 60L, 25L, 10L, 10L, 15L, 20L, 30L, 60L,
25L, 5L, 5L, 10L, 20L, 30L, 45L, 20L, 5L, 10L, 10L, 20L, 30L,
45L, 20L, 10L, 10L, 10L, 20L, 30L, 60L, 30L, 15L, 10L, 15L, 25L,
30L, 55L, 25L, 10L, 10L, 10L, 20L, 30L, 55L, 35L, 10L, 10L, 10L,
20L, 30L, 60L, 35L, 15L, 10L, 10L, 15L, 25L, 50L, 30L, 10L, 10L,
10L, 20L, 25L, 55L, 25L, 10L, 10L, 15L, 25L, 25L, 65L, 30L, 10L,
10L, 15L, 20L, 30L, 60L, 30L, 15L, 15L, 15L, 15L, 30L, 55L, 35L,
15L, 15L, 15L, 25L, 35L, 55L, 35L, 15L, 15L, 15L, 25L, 35L, 60L,
35L, 15L, 15L, 15L, 25L, 35L, 60L, 30L, 10L, 10L, 15L, 25L, 35L,
55L, 30L, 15L, 10L, 10L, 25L, 30L, 50L, 25L, 10L, 10L, 10L, 20L,
30L, 55L, 30L, 10L, 10L, 15L, 20L, 30L, 55L, 30L, 10L, 15L, 20L,
25L, 35L, 55L, 25L, 15L, 15L, 15L, 25L, 40L, 50L, 20L, 10L, 10L,
20L, 30L, 40L, 45L, 25L, 10L, 10L, 10L, 20L, 30L, 50L, 25L, 10L,
10L, 10L, 20L, 25L, 55L, 20L, 10L, 10L, 15L, 25L, 35L, 50L, 20L,
10L, 10L, 15L, 25L, 30L, 45L, 20L, 15L, 10L, 10L, 20L, 30L, 50L,
20L, 15L, 15L, 15L, 20L, 30L, 60L, 35L, 15L, 10L, 15L, 25L, 30L,
60L, 35L, 15L, 15L, 15L, 30L, 35L, 55L, 25L, 10L, 15L, 15L, 25L,
35L, 50L, 30L, 10L, 15L, 15L, 25L, 35L, 55L, 25L, 20L, 15L, 15L,
25L, 30L, 55L, 25L, 15L, 15L, 15L, 30L, 35L), time = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L), .Label = c("3m", "6m", "9m"), class = "factor")), row.names = c(NA,
-322L), class = "data.frame")

R diff lead difference between two columns?

Here is my dataframe:
structure(list(replicate = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L,
7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 10L, 10L,
10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L,
14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L), press_id = c(1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L), start_time = c(164429106370979,
164429411618825, 164429837271940, 164430399454285, 164429106370980,
164429411618826, 164429837271941, 164430399454286, 164429106370981,
164429411618827, 164429837271942, 164430399454287, 164429106370982,
164429411618828, 164429837271943, 164430399454288, 164429106370983,
164429411618829, 164429837271944, 164430399454289, 164429106370984,
164429411618830, 164429837271945, 164430399454290, 164429106370985,
164429411618831, 164429837271946, 164430399454291, 164429106370986,
164429411618832, 164429837271947, 164430399454292, 164429106370987,
164429411618833, 164429837271948, 164430399454293, 164429106370988,
164429411618834, 164429837271949, 164430399454294, 164429106370989,
164429411618835, 164429837271950, 164430399454295, 164429106370990,
164429411618836, 164429837271951, 164430399454296, 164429106370991,
164429411618837, 164429837271952, 164430399454297, 164429106370992,
164429411618838, 164429837271953, 164430399454298, 164429106370993,
164429411618839, 164429837271954, 164430399454299), end_time = c(164429182443825,
164429512525748, 164429903243170, 164430465927555, 164429182443826,
164429512525749, 164429903243171, 164430465927556, 164429182443827,
164429512525750, 164429903243172, 164430465927557, 164429182443828,
164429512525751, 164429903243173, 164430465927558, 164429182443829,
164429512525752, 164429903243174, 164430465927559, 164429182443830,
164429512525753, 164429903243175, 164430465927560, 164429182443831,
164429512525754, 164429903243176, 164430465927561, 164429182443832,
164429512525755, 164429903243177, 164430465927562, 164429182443833,
164429512525756, 164429903243178, 164430465927563, 164429182443834,
164429512525757, 164429903243179, 164430465927564, 164429182443835,
164429512525758, 164429903243180, 164430465927565, 164429182443836,
164429512525759, 164429903243181, 164430465927566, 164429182443837,
164429512525760, 164429903243182, 164430465927567, 164429182443838,
164429512525761, 164429903243183, 164430465927568, 164429182443839,
164429512525762, 164429903243184, 164430465927569)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -60L), vars = c("replicate",
"press_id"), drop = TRUE, indices = list(0L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L,
18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L,
30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L,
42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L,
54L, 55L, 56L, 57L, 58L, 59L), group_sizes = c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
replicate = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L,
7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L,
11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L,
14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L), press_id = c(1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L,
1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L)), class = "data.frame", row.names = c(NA,
-60L), vars = c("replicate", "press_id"), drop = TRUE, indices = list(
0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L,
26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L,
38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L,
50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L), group_sizes = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
replicate = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L,
7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L,
11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L,
14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L), press_id = c(1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L,
1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L)), class = "data.frame", row.names = c(NA,
-60L), vars = c("replicate", "press_id"), drop = TRUE, .Names = c("replicate",
"press_id")), .Names = c("replicate", "press_id")), .Names = c("replicate",
"press_id", "start_time", "end_time"))
I want to get the inter press_id time diff for example:
replicate press_id start_time end_time time_diff
1 1 1.644291e+14 1.644292e+14 0 (it's a first row)
1 2 1.644294e+14 1.644295e+14 1.644294e+14 - 1.644292e+14
1 3 1.644298e+14 1.644299e+14 1.644298e+14 - 1.644295e+14
1 4 1.644304e+14 1.644305e+14 .....
2 1 1.644291e+14 1.644292e+14
2 2 1.644294e+14 1.644295e+14
2 3 1.644298e+14 1.644299e+14
2 4 1.644304e+14 1.644305e+14
I am trying to do this using mutate, lag, lead and diff but without any luck. I have grouped, and ungrouped the dataset, nothing helped me.
df %>%
group_by(replicate) %>%
mutate(d = ifelse(row_number() == 1, 0, lead(start_time) - end_time))
df %>%
group_by(replicate) %>%
mutate(d = start_time - lag(end_time))
And if you want zeroes except NAs for the first row of each unique value in the replicate column, you could do:
df %>%
group_by(replicate) %>%
mutate(d = start_time - lag(end_time),
d = ifelse(is.na(d), 0, d))
Or just:
df %>%
group_by(replicate) %>%
mutate(d = ifelse(row_number() == 1, 0, start_time - lag(end_time)))

add secondary grouping label to x axis in ggplot2

I would like to create a stacked bar graph that contains two levels of x-axis labels. For each stacked bar there is the primary label (dat$HUC_12_NAM), then I would like to group these stacked bars by dat$HUC_10_NAM and label this group as well. I could likely use annotate to manually define and place the labels, but that would be very time consuming, clunky, and could easily result in mis-labeling.
Here is the data....
dat <- structure(list(HUC_12_NAM = structure(c(3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Apostle Islands",
"Raspberry River-Frontal Lake Superior", "Sand River", "Saxine Creek-Frontal Lake Superior"
), class = "factor"), HUC_10_NAM = structure(c(2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Chequamegon Bay-Frontal Lake Superior",
"Sand River-Frontal Lake Superior"), class = "factor"), variable = structure(c(9L,
8L, 4L, 1L, 6L, 11L, 14L, 13L, 10L, 7L, NA, 5L, 15L, 3L, 2L,
12L, 8L, 6L, 3L, 2L, 4L, 1L, 15L, 5L, 11L, 14L, 10L, 9L, 13L,
7L, 12L, NA, 12L, 4L, 10L, 8L, 3L, NA, 2L, 6L, 1L, 13L, 7L, 11L,
9L, 14L, 5L, 15L, 9L, 1L, 8L, 12L, 10L, 4L, 3L, 11L, NA, 7L,
15L, 13L, 14L, 6L, 5L, 2L), .Label = c("Agriculture", "Barren land",
"Developed - High intensity", "Developed - Medium intensity",
"Developed - Low intensity", "Developed - Open space", "Evergreen forest",
"Deciduous forest", "Mixed forest", "Herbaceous", "Pasture",
"Shrub", "Woody wetland", "Herbaceous wetland", "Water"), class = "factor"),
perc_veg = c(11.8839579283911, 57.2626205743974, 0.00544969027593598,
0.514995731075951, 2.59586913477084, 2.53864738687351, 0.108085523806064,
5.3007320750604, 0.731166778688078, 6.04007338916238, 0,
0.0953695798288797, 0.11807662264528, 0, 0.00363312685062399,
12.8013224581736, 58.9563880536275, 4.47423752571726, 0.0158260043860641,
0.101738599624698, 0.0633040175442563, 0.180868621555018,
1.07390744048292, 0.300694083335217, 2.65876873685876, 0.00226085776943772,
0.065564875313694, 15.484614862879, 2.68363817232258, 7.99665393050123,
5.94153421808234, 0, 2.79708137828397, 0.0260443580892536,
0.0078546476777114, 30.3801236073503, 0.028524773145373,
0, 0.470038653134625, 1.99838773021352, 0.0355526158043779,
4.43084809524794, 23.6515843651171, 0.169081626325472, 32.6501167862089,
0.595713015978007, 0.174455858947064, 2.5845924884764, 23.2366527830367,
0.25141991669822, 52.6482393032942, 3.73494888299886, 0.136312003029156,
0.00605831124574025, 0, 1.85535781900795, 0, 11.0851950018932,
0.427110942824688, 2.85800833017796, 0, 3.54714123438092,
0.146914047709201, 0.0666414237031428)), .Names = c("HUC_12_NAM",
"HUC_10_NAM", "variable", "perc_veg"), row.names = c(1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L,
30L, 31L, 32L, 81L, 82L, 83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L,
91L, 92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L,
103L, 104L, 105L, 106L, 107L, 108L, 109L, 110L, 111L, 112L), class = "data.frame")
And here is the current stacked bar plot...
library(ggplot2)
p <- ggplot () + geom_bar(data=dat,aes(x=HUC_12_NAM,y=perc_veg,fill=variable),stat='identity')
p <- p + coord_flip() #this helps fit the xlabel
p
And the resulting plot...
The next label, or grouping, would be from dat$HUC_10_NAM and in this example would add two additional labels, 'Sand River-Frontal Lake Superior' and 'Chequamegon Bay-Frontal Lake Superior'.
Maybe this would just be too cluttered...especially with the long names. But, I would like to see if there is a way to add these second level labels quickly and easily.
Thanks
-cherrytree
If you're willing to facet instead of adding a second row of labels, then you can do this:
ggplot(data=dat, aes(x=HUC_12_NAM, y=perc_veg, fill=variable)) +
geom_bar(stat='identity') +
facet_grid(. ~ HUC_10_NAM, scales="free")
Incidentally, you can reformat the longer labels with a line-break, for example:
dat[,1:2] = lapply(1:2, function(x) gsub("-","\n", dat[,x]))

Showing data.frame as table or matrix in R

I want to show the following data.frame
df <- structure(list(Variety = structure(c(2L, 3L, 4L, 5L, 6L, 7L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L), .Label = c("F2022",
"F9917", "Hegari", "JS2002", "JS263", "PC1", "Sadabahar"), class = "factor"),
Priming = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 1L), .Label = c("CaCl2",
"Dry", "Hydro", "KCL", "KNO3", "NaCl", "Onfarm"), class = "factor"),
Letters = structure(c(1L, 3L, 10L, 11L, 10L, 19L, 27L, 5L,
28L, 11L, 18L, 20L, 9L, 1L, 22L, 14L, 30L, 26L, 24L, 3L,
22L, 9L, 16L, 10L, 15L, 25L, 6L, 7L, 17L, 30L, 18L, 13L,
20L, 27L, 19L, 29L, 23L, 2L, 8L, 12L, 6L, 31L, 8L, 22L, 4L,
32L, 21L, 33L, 2L), .Label = c("a", "at", "bcd", "bclq",
"bcq", "bd", "bds", "chlq", "ds", "e", "efg", "efgmnor",
"efgnor", "efgnr", "efgr", "eg", "fgmnor", "fmnor", "hijkl",
"hijkp", "hikl", "hklq", "ijkmp", "ijmop", "jmop", "mno",
"mnop", "mnor", "su", "t", "uv", "v", "w"), class = "factor")), .Names = c("Variety",
"Priming", "Letters"), row.names = c(NA, -49L), class = "data.frame")
as Table or matrix with Ordered Variety names along rows and Ordered Priming names along columns and showing Letter column in the main body of the table in R.
I could not figure out how to do this. Any help will be highly appreciated. Thanks in advance.
This should do it.
d <- d[order(d$Variety,d$Priming),]
dw <- reshape(data = d, idvar = 'Variety', timevar = 'Priming', direction = 'wide')
dw
You might want to edit the column names.
names(dw) <- gsub('Letters.', '', names(dw), fixed = TRUE)
Simple one
library(reshape2)
acast(data=df, formula=Variety~Priming)
CaCl2 Dry Hydro KCL KNO3 NaCl Onfarm
F2022 at mnop a hklq bds hijkl uv
F9917 a bcq hklq ds fgmnor su chlq
Hegari bcd mnor efgnr eg t ijkmp hklq
JS2002 e efg t e fmnor at bclq
JS263 efg fmnor mno efgr efgnor chlq v
PC1 e hijkp ijmop jmop hijkp efgmnor hikl
Sadabahar hijkl ds bcd bd mnop bd w

Resources