Related
I'm trying to write a function to pass quoted items for constructing multiple ggplots.The following code works great and does what I want.
fig2.data %>%
ggplot(aes(x = Surgery, y = BALF_Protein, fill = Exposure)) +
stat_summary(geom = "errorbar", fun.data = mean_se, position = "dodge") +
stat_summary(geom = "bar", fun = mean, position = "dodge") +
theme_classic() +
scale_fill_manual(values=c("lightgrey","darkgrey")) +
facet_grid(cols = vars(Duration))
Using this guide I constructed the following function and called the function.
plotf <- function(x, y, fill, facet){
x_var <- enquo(x)
y_var <- enquo(y)
facet_var <- enquo(facet)
fill_var <- enquo(fill)
ggplot(fig2.data, aes(x = !!x_var, y = !!y_var, fill = !!fill_var)) +
stat_summary(geom = "errorbar", fun.data = mean_se, position = "dodge") +
stat_summary(geom = "bar", fun = mean, position = "dodge") +
theme_classic() +
scale_fill_manual(values=c("lightgrey","darkgrey")) +
facet_grid(cols = vars(!!facet_var))
}
plotf(x = "Surgery", y = "BALF_Protein", fill = "Exposure", facet = "Duration")
My graph rendered without errors, but it is not rendered the same way.
What am I doing wrong?
Thank you #Stefan
I don't understand why, but calling it as you suggested worked. How is that going to work when I want to loop over a vector of variable names to call the function and those are going to be passed as quoted. Use syms() ?
plotf(x = Surgery, y = BALF_Protein, fill = Exposure, facet = Duration)
ReproData here with some rnorm() so your plot might be slightly different heights.
fig2.data <- structure(list(Surgery = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("SHAM", "HEP VAG"
), class = "factor"), Exposure = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Air",
"Ozone"), class = "factor"), Duration = structure(c(2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1d",
"2d"), class = "factor"), BALF_Protein = c(64.2302655135303,
75.8662498743628, 66.944160651771, 64.3494818599307, 93.5733806883362,
93.9843061725941, 94.9296956493259, 85.5985055395191, 80.4974511604734,
70.6316004306272, 85.3439438112908, 79.4666853120619, 84.7319693413318,
224.606438793638, 78.4487502522719, 78.2128699744882, 92.0151032176434,
79.2127901600167, 83.0909690767245, 92.0325415462662, 60.6200784843927,
97.7183404856683, 68.7510921525122, 41.9625493809036, 311.769822036931,
450.597937801349, 283.639976251784, 190.840750069959, 187.810222461528,
203.735530975931, 547.003463243173, 517.871472878502, 164.167773487012,
202.777306107217, 666.896662547508, 361.46103562071, 270.119121964956,
234.635143377769, 94.4541075117046, 91.1060986818939, 142.774777316869,
300.021992736686, 279.775933301683, 246.554185364089, 298.964364163939,
193.737945537319, 232.918974192744, 150.384203703162)), row.names = c(NA,
-48L), class = "data.frame")
I am very new to R (a few months experience from online learning and reading) and have no coding experience before this.
I have been using a data set obtained from work (healthcare) for some practice. I wanted to demonstrate certain patient outcomes over time (by month) in this data set.
I've separated the data by month into a separate data frames that I have stored in a list. I then narrowed down each data frame within the list to the 3 post-operative outcomes that I want to look at. All three outcomes are binary (Y or N).
I would like to know if there is anyway I can work out the percentages of "Y" for each of these outcomes by month, and then store this in an object that I can then plot to show the trend over time (by month).
Have I approached this problem completely wrongly? Should I not have used a list at all?
I managed to get to a point where I have a list of tables of Y's and N's but am now completely clueless as to what to do from there.
list(structure(list(Mobilised_D1 = structure(c(2L, 1L, 1L, 1L,
2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L), .Label = c("N", "Y"), class =
"factor"),
Catheter_rm_D1 = structure(c(2L, 1L, 1L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 1L, 1L, 1L, 2L), .Label = c("N", "Y"), class = "factor"),
Diet_D1 = structure(c(2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L), .Label = c("N", "Y"), class = "factor")), class =
"data.frame", row.names = 2:15),
structure(list(Mobilised_D1 = structure(c(1L, 2L, 1L, 1L,
2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("N",
"Y"), class = "factor"), Catheter_rm_D1 = structure(c(1L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L
), .Label = c("N", "Y"), class = "factor"), Diet_D1 = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("N", "Y"), class = "factor")), class = "data.frame",
row.names = 16:31),
structure(list(Mobilised_D1 = structure(c(2L, 1L, 1L, 2L,
1L, 1L, 1L, 2L, 1L, 1L, 2L), .Label = c("N", "Y"), class = "factor"),
Catheter_rm_D1 = structure(c(1L, 1L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 1L, 2L), .Label = c("N", "Y"), class = "factor"),
Diet_D1 = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("N", "Y"), class = "factor")), class =
"data.frame", row.names = 32:42),
structure(list(Mobilised_D1 = structure(c(2L, 1L, 1L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("N",
"Y"), class = "factor"), Catheter_rm_D1 = structure(c(2L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L), .Label = c("N", "Y"), class = "factor"), Diet_D1 =
structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L), .Label = c("N", "Y"), class = "factor")), class = "data.frame",
row.names = 43:60),
structure(list(Mobilised_D1 = structure(c(1L, 1L, 1L, 2L,
2L, 1L, 1L, 1L, NA, 2L, 1L, 1L, 2L, NA), .Label = c("N",
"Y"), class = "factor"), Catheter_rm_D1 = structure(c(1L,
2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("N",
"Y"), class = "factor"), Diet_D1 = structure(c(2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("N",
"Y"), class = "factor")), class = "data.frame", row.names = 61:74),
structure(list(Mobilised_D1 = structure(c(1L, 2L, 2L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L), .Label = c("N",
"Y"), class = "factor"), Catheter_rm_D1 = structure(c(1L,
1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L
), .Label = c("N", "Y"), class = "factor"), Diet_D1 = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("N", "Y"), class = "factor")), class = "data.frame",
row.names = 75:90))
For each component of the input list, L, take the indicated mean arranging that into a multivariate time series with one row per month. Then plot it on a single panel. Remove facet=NULL if you want each series in a separate panel.
library(zoo)
library(ggplot2)
series <- zoo( t(sapply(L, function(x) colMeans(x == "Y"))) )
autoplot(series, facet = NULL) + geom_point()
(continued after graph)
Alternative
An alternative is to create a data frame DF from L along with a month vector aggregating by month as shown. This makes use of the fact that DF will have row names consisting of the month followed by a decimal point and a row number from the original component that each input row was was constructed from.
DF <- do.call("rbind", setNames(L, seq_along(L)))
month <- as.integer(rownames(DF))
series <- aggregate(zoo(DF == "Y"), month, mean)
autoplot(series, facet = NULL) + geom_point()
UPDATED:
Data has now been updated to full chemistry values as opposed to mean values.
I am attempting to create a box and whisker plot in r, on a very small dataset. My data is not behaving itself or I am missing some glaringly obvious error.
This is the code i have for making said plot
library(ggplot2)
Methanogenesis_Data=read.csv("CO2-CH4 Rates.csv")
attach(Methanogenesis_Data)
summary(Methanogenesis_Data)
str(Methanogenesis_Data)
boxplot(CH4rate~Patch+Temperature, data = Methanogenesis_Data,
xlab="Patch", ylab="CH4 Production")
cols<-c("red", "blue")
From this small dataset.
structure(list(Patch = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Gravel", "Macrophytes",
"Marginal"), class = "factor"), Temperature = structure(c(2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L), .Label = c("Cold",
"Warm"), class = "factor"), CH4rate = c(0.001262595, 0.00138508,
0.001675944, 0.001592354, 0.002169233, 0.001772964, 0.002156633,
0.002864403, 0.002301383, 0.002561042, 0.005189598, 0.004557227,
0.008484851, 0.006867866, 0.007438633, 0.005405327, 0.006381582,
0.008860084, 0.007615417, 0.007705906, 0.009198508, 0.00705233,
0.007943024, 0.008319768, 0.010362114, 0.007822153, 0.010339339,
0.009252302, 0.008249555, 0.008197657), CO2rate = c(0.002274825,
0.002484866, 0.003020209, 0.00289133, 0.003927232, 0.003219346,
0.003922613, 0.005217026, 0.00418674, 0.00466427, 0.009427322,
0.008236453, 0.015339532, 0.012494729, 0.013531303, 0.009839847,
0.011624428, 0.016136746, 0.0138831, 0.014051034, 0.016753211,
0.012780956, 0.01445912, 0.01515584, 0.01883252, 0.014249452,
0.018849478, 0.016863299, 0.015045964, 0.014941168)), .Names = c("Patch",
"Temperature", "CH4rate", "CO2rate"), class = "data.frame", row.names =
c(NA,
-30L))
The plot I get as output is good, however I would like the Variables on the X axis to simply display "Gravel" "Macrophytes" "Marginal" as opposed to each of those variables with Warm and Cold. Thanks for any assistance
THIS IS WHAT I AM TRYING TO ACHEIVE -----> Exact Boxplot I want to create
Following your update with an example graph :
I have also included the formating for the legend position. If you want to edit the y axis label to include subscript I would suggest you read over this. I have included a blank title for relabelling.
test <- structure(list(Patch = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Gravel", "Macrophytes",
"Marginal"), class = "factor"), Temperature = structure(c(2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L), .Label = c("Cold",
"Warm"), class = "factor"), CH4rate = c(0.001262595, 0.00138508,
0.001675944, 0.001592354, 0.002169233, 0.001772964, 0.002156633,
0.002864403, 0.002301383, 0.002561042, 0.005189598, 0.004557227,
0.008484851, 0.006867866, 0.007438633, 0.005405327, 0.006381582,
0.008860084, 0.007615417, 0.007705906, 0.009198508, 0.00705233,
0.007943024, 0.008319768, 0.010362114, 0.007822153, 0.010339339,
0.009252302, 0.008249555, 0.008197657), CO2rate = c(0.002274825,
0.002484866, 0.003020209, 0.00289133, 0.003927232, 0.003219346,
0.003922613, 0.005217026, 0.00418674, 0.00466427, 0.009427322,
0.008236453, 0.015339532, 0.012494729, 0.013531303, 0.009839847,
0.011624428, 0.016136746, 0.0138831, 0.014051034, 0.016753211,
0.012780956, 0.01445912, 0.01515584, 0.01883252, 0.014249452,
0.018849478, 0.016863299, 0.015045964, 0.014941168)), .Names = c("Patch",
"Temperature", "CH4rate", "CO2rate"), class = "data.frame", row.names =
c(NA,
-30L))
Now I will create two data sets one for each graph just for simplicity you could leave them combined and facet but for formatting purposes this might be easier.
CH4rate <- test %>%
gather("id", "value", 3:4) %>%
filter(id == "CH4rate")
CO2rate <- test %>%
gather("id", "value", 3:4) %>%
filter(id == "CO2rate")
First plot:
ggplot(CH4rate) +
geom_boxplot(mapping = aes(x = Patch, y = value, fill=factor(Temperature, levels = c("Warm", "Cold")))) +
theme(legend.position = c(0.15, 0.9), panel.background = element_rect(fill = "white", colour = "grey50")) +
labs(title = "Title of graph", x="Patch Type", y = "CH4rate") +
scale_fill_manual(name = "", values = c("orange", "light blue")
, labels = c("Cold" = "Incubated at 10˙C", "Warm" = "Incubated at 26˙C"))
Second plot:
ggplot(CO2rate) +
geom_boxplot(mapping = aes(x = Patch, y = value, fill=factor(Temperature, levels = c("Warm", "Cold")))) +
theme(legend.position = c(0.15, 0.9), panel.background = element_rect(fill = "white", colour = "grey50")) +
labs(title = "Title of graph", x="Patch Type", y = "CO2rate") +
scale_fill_manual(name = "", values = c("orange", "light blue")
, labels = c("Cold" = "Incubated at 10˙C", "Warm" = "Incubated at 26˙C"))
I'd like to produce a step plot using ggplot2. I have datetime along the x-axis, and four discrete non-numeric factor levels on the y-axis. I'd like a line with steps between the levels where appropriate, but right now the code i've produced in ggplot2 isn't working. Any advice greatly appreciated!
So far I have the following code:
ggplot(s2, aes(x = datetime, y = activity)) + geom_step(colour = "blue")
which produces:
However, what I'd like is something that looks like this (I switched to geom_point, which works fine, and added the step lines by hand):
Here's a subset of my data, as a data.frame with with datetime (POSIXct) and activity (factor):
s2 <- structure(list(`datetime` = structure(c(1496102400L,
1496109600L, 1496116800L, 1496124000L, 1496131200L, 1496138400L,
1496145600L, 1496152800L, 1496160000L, 1496167200L, 1496174400L,
1496181600L, 1496188800L, 1496196000L, 1496203200L, 1496210400L,
1496217600L, 1496224800L, 1496232000L, 1496239200L, 1496246400L,
1496253600L, 1496260800L, 1496268000L, 1496275200L, 1496282400L,
1496289600L, 1496296800L, 1496304000L, 1496311200L, 1496318400L,
1496325600L, 1496332800L, 1496340000L, 1496347200L, 1496354400L,
1496361600L, 1496368800L, 1496376000L, 1496383200L, 1496390400L,
1496397600L, 1496404800L, 1496412000L, 1496419200L, 1496426400L,
1496433600L, 1496440800L, 1496448000L, 1496455200L), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), activity = structure(c(1L, 2L, 3L,
4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("HO",
"TR", "FO", "SE"), class = "factor")), .Names = c("datetime",
"activity"), row.names = c(NA, -50L), class = "data.frame")
I am trying to create a plot in ggplot showing the mean home range size of an animal according to different sexes, treatments, time periods and seasons. I get an error in R saying
Error: Aesthetics must be either length 1 or the same as the data (24): x, y, colour, shape"
I have read similar posts about this error but I haven't been able to figure it out yet. There are no NA's in these columns and my numerical variables are being treated as such. Not sure if the error has to do with a need to sub set the data but I don't understand how I should do that. My code runs fine up until the ggplot part and it is the following:
library("ggplot2")
library("dplyr")
lion_HR_size <- read.csv(file = "https://dl.dropboxusercontent.com/u/23723553/lion_sample_data.csv",
header= TRUE, row.names=1)
# Mean of home range size by season, treatment, sex and time
Mean_HR <- lion_HR_size %>%
group_by(season, treatment, sex, time) %>%
summarize(
mean_HR = mean(Area_HR_km),
se_HR = sd(Area_HR_km)/sqrt(n()),
lwrHR = mean_HR - se_HR,
uprHR = mean_HR + se_HR)
limitsHR <- aes(ymin = lwrHR, ymax= uprHR)
ggplot(Mean_HR,
aes(x=season,
y= Mean_HR,
colour=season,
shape= season)) +
geom_point( size = 6, alpha = 0.5)+
facet_grid(sex ~ treatment+time)+
geom_errorbar(limitsHR, width = 0.1, col = 'red', alpha = 0.8)+
theme_bw()
As requested, the dput(Mean_HR) output is the following:
dput(Mean_HR)
structure(list(season = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L), .Label = c("Early_dry", "Late_dry", "Wet"), class = "factor"),
treatment = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L
), .Label = c("C", "E"), class = "factor"), sex = structure(c(1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("F", "M"), class = "factor"),
time = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("A",
"B"), class = "factor"), mean_HR = c(141.594090181, 138.327188493,
509.287443507692, 345.296845642381, 157.634028930833, 184.202160663125,
252.464096340667, 255.078012825, 59.8485325981818, 143.158189516522,
439.990400912593, 175.410885601333, 221.338774452381, 100.942251723636,
127.961533612727, 167.199563142143, 120.60363022375, 142.351764574211,
249.03854219, 330.018734301176, 123.992902995714, 219.886321226667,
307.869373359167, 296.019550844286), se_HR = c(18.6245437612391,
29.2548378154774, 127.987824704623, 78.9236194797204, 20.8897993194466,
43.1314245224751, 57.6327505533691, 32.1129054260719, 9.383853530199,
38.7678333459788, 130.348285186224, 31.707304307485, 29.1561478797825,
15.4038723326613, 18.1932127432015, 37.791782522185, 32.7089231722616,
33.2629181623941, 46.1500408067739, 88.8736578370159, 15.8046627788777,
36.9665360444972, 70.1560303348504, 87.1340476758794), lwrHR = c(122.969546419761,
109.072350677523, 381.29961880307, 266.373226162661, 136.744229611387,
141.07073614065, 194.831345787298, 222.965107398928, 50.4646790679828,
104.390356170543, 309.642115726369, 143.703581293848, 192.182626572598,
85.5383793909751, 109.768320869526, 129.407780619958, 87.8947070514884,
109.088846411816, 202.888501383226, 241.145076464161, 108.188240216837,
182.91978518217, 237.713343024316, 208.885503168406), uprHR = c(160.218633942239,
167.582026308477, 637.275268212315, 424.220465122101, 178.52382825028,
227.3335851856, 310.096846894036, 287.190918251072, 69.2323861283808,
181.9260228625, 570.338686098816, 207.118189908818, 250.494922332163,
116.346124056298, 146.154746355929, 204.991345664328, 153.312553396012,
175.614682736605, 295.188582996774, 418.892392138192, 139.797565774592,
256.852857271164, 378.025403694017, 383.153598520165)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -24L), vars = list(
season, treatment, sex), drop = TRUE, .Names = c("season",
"treatment", "sex", "time", "mean_HR", "se_HR", "lwrHR", "uprHR"
))
Could someone help me understand this error and how to fix it in my code? Many thanks!
Not entirely sure myself why/how the limitsHR <- ... statement works. I would have expected it to stop on not being able to find the lwrHR and uprHR objects in the workspace.
Anyhow, ggplot has a nice function mean_se() that will help you tremendously.
ggplot(data = lion_HR_size, mapping = aes(x = season, y = Area_HR_km,
colour=season, shape= season)) +
stat_summary(fun.data = mean_se) +
facet_grid(sex ~ treatment+time)+
theme_bw()