tidy eval ggplot2 NSE not rendering correctly - r

I'm trying to write a function to pass quoted items for constructing multiple ggplots.The following code works great and does what I want.
fig2.data %>%
ggplot(aes(x = Surgery, y = BALF_Protein, fill = Exposure)) +
stat_summary(geom = "errorbar", fun.data = mean_se, position = "dodge") +
stat_summary(geom = "bar", fun = mean, position = "dodge") +
theme_classic() +
scale_fill_manual(values=c("lightgrey","darkgrey")) +
facet_grid(cols = vars(Duration))
Using this guide I constructed the following function and called the function.
plotf <- function(x, y, fill, facet){
x_var <- enquo(x)
y_var <- enquo(y)
facet_var <- enquo(facet)
fill_var <- enquo(fill)
ggplot(fig2.data, aes(x = !!x_var, y = !!y_var, fill = !!fill_var)) +
stat_summary(geom = "errorbar", fun.data = mean_se, position = "dodge") +
stat_summary(geom = "bar", fun = mean, position = "dodge") +
theme_classic() +
scale_fill_manual(values=c("lightgrey","darkgrey")) +
facet_grid(cols = vars(!!facet_var))
}
plotf(x = "Surgery", y = "BALF_Protein", fill = "Exposure", facet = "Duration")
My graph rendered without errors, but it is not rendered the same way.
What am I doing wrong?

Thank you #Stefan
I don't understand why, but calling it as you suggested worked. How is that going to work when I want to loop over a vector of variable names to call the function and those are going to be passed as quoted. Use syms() ?
plotf(x = Surgery, y = BALF_Protein, fill = Exposure, facet = Duration)
ReproData here with some rnorm() so your plot might be slightly different heights.
fig2.data <- structure(list(Surgery = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("SHAM", "HEP VAG"
), class = "factor"), Exposure = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Air",
"Ozone"), class = "factor"), Duration = structure(c(2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1d",
"2d"), class = "factor"), BALF_Protein = c(64.2302655135303,
75.8662498743628, 66.944160651771, 64.3494818599307, 93.5733806883362,
93.9843061725941, 94.9296956493259, 85.5985055395191, 80.4974511604734,
70.6316004306272, 85.3439438112908, 79.4666853120619, 84.7319693413318,
224.606438793638, 78.4487502522719, 78.2128699744882, 92.0151032176434,
79.2127901600167, 83.0909690767245, 92.0325415462662, 60.6200784843927,
97.7183404856683, 68.7510921525122, 41.9625493809036, 311.769822036931,
450.597937801349, 283.639976251784, 190.840750069959, 187.810222461528,
203.735530975931, 547.003463243173, 517.871472878502, 164.167773487012,
202.777306107217, 666.896662547508, 361.46103562071, 270.119121964956,
234.635143377769, 94.4541075117046, 91.1060986818939, 142.774777316869,
300.021992736686, 279.775933301683, 246.554185364089, 298.964364163939,
193.737945537319, 232.918974192744, 150.384203703162)), row.names = c(NA,
-48L), class = "data.frame")

Related

Error: Argument 20 matches multiple formal arguments in venn.diagram function

I'm very new to the venn.diagram() function, and am trying to create a simple venn diagram. Here is the data I am using:
structure(list(Transmitter = c("1657", "1657", "1658", "1659",
"1659", "1660", "1660", "1661", "1662", "1663", "1663", "1664",
"1664", "1666", "1667", "1667", "1668", "1668", "1669", "1670",
"1671", "1671", "1672", "1672", "1673", "1673", "1674", "1674",
"1675", "1675", "1676", "1676", "1678", "1679", "1679", "1680",
"1681", "1681", "1682", "1682", "1683", "1684", "1685", "1686",
"1686", "9782", "9782", "24166", "24166", "24167", "24168", "24169",
"24170", "24171", "24172", "24173", "24174", "24175", "24175",
"24176", "24177", "24178", "24179", "24179", "24180", "24181",
"24182", "24183", "24184", "24184", "24185", "24186", "24187",
"24188", "24189", "24190", "24191", "24192", "24193", "24194",
"24194", "24195", "24195", "24196", "24197", "24198", "24198",
"24199", "24199", "24200", "24201", "24203", "24204", "24204",
"24206", "24207", "24209", "24210", "24211", "24212", "24212",
"24213", "24214", "24215", "24216", "24216", "24217", "24218",
"24219", "30759", "30760", "30761", "30761", "30761", "30762",
"30763", "30764", "30765", "30765", "30765", "30766", "30766",
"30766", "30767", "30767", "30768", "30768", "30768", "30769",
"30769", "30769", "30770", "30771", "30772", "30772", "30772",
"30773", "30773", "30773", "30774", "30774", "30775", "30775",
"30776", "30776", "30777", "30777", "30777", "30778", "30778",
"30779", "30780", "30780", "30780", "30781", "30782", "30782",
"30783", "30784", "30785", "30786", "30787", "30788", "30788"
), Direction = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L,
1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L,
2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L), .Label = c("Marine",
"River"), class = "factor")), row.names = c(NA, -164L), class = "data.frame")
I want to create a venn diagram with a circle for each direction. Inside each circle is a number indicating the number of transmitters that are considered 'river', 'marine' or both.
This is some code I modified from a website:
install.packages('VennDiagram')
library(VennDiagram)
venn.diagram(
x = list(
lasts2WOFD %>% filter(Direction == 'Marine') %>% select(Transmitter) %>% unlist() ,
lasts2WOFD %>% filter(Direction == 'River') %>% select(Transmitter) %>% unlist()
),
category.names = c("Marine" , "Fresh"),
filename = 'VennDiagram',
output = TRUE ,
imagetype="png" ,
height = 480 ,
width = 480 ,
resolution = 300,
compression = "lzw",
lwd = 1,
col=c("#440154ff", '#21908dff'),
fill = c(alpha("#440154ff",0.3), alpha('#21908dff',0.3)),
cex = 0.5,
fontfamily = "sans",
cat.cex = 0.3,
cat.default.pos = "outer",
cat.pos = c(-27, 27),
cat.dist = c(0.055, 0.055),
cat.fontfamily = "sans",
cat.col = c("#440154ff", '#21908dff'),
rotation = 1
)
When run, I get this error:
Error in VennDiagram::draw.pairwise.venn(area1 = length(x[[1]]), area2 = length(x[[2]]), :
argument 20 matches multiple formal arguments
Regarding your question, I had a look at the source code of VennDiagram and I saw that rotation is part of venn.diagram, but not of draw.pairwise.venn. The parameter gets passed but cannot be used. Simply remove rotation=1 and it should work.
I understand this does not answer your question, but I just wanted to let you know that you can get the diagram with other packages. My nVennR package can do that in a couple of steps. If your object is called lasts2WOFD,
>library(nVennR)
>myV <- plotVenn(list(River=subset(lasts2WOFD, Direction == "River")$Transmitter, Marine=subset(lasts2WOFD, Direction == "Marine")$Transmitter))
The result would be:
You can control the output as explained in the vignette. You can also export a vectorial svg file that you can edit afterwards.

Stacked barplot using ggplot2 - data visualisation

I have very little experience with R and am trying to make a stacked barplot using ggplot2.
I have 2 groups - control and experimental, and 2 choices - red and green. I'm not sure how to organise my data.
There were 80 animals in my trial (control n=40, experimental n=40) and they were given the choice of red and green substrate, I noted which substrate they chose, and that's the data I'm trying to plot.
I would essentially want 'Experimental' and 'Control on the x-axis, and the number of choices on the y-axis (e.g. Control, Red n=20, Control, Green = 12 etc).
Any help would be appreciated!
Edited to add:
This is the graph it's outputting
This is the code I'm using (including suggested adjustments):
df <- data.frame(group = rep(c("control", "experimental"), each = 40),
substrate = sample (c("red","green"), 80, TRUE))
ggplot(df, aes(x = group, y = substrate, fill = substrate)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = c("red", "green"))
This is the output:
structure(list(group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("control", "experimental"
), class = "factor"), substrate = structure(c(1L, 2L, 1L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L,
2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L), .Label = c("green",
"red"), class = "factor")), class = "data.frame", row.names = c(NA,
-80L))
output from df(behaviour) - original dataframe
structure(list(group = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Control", "Experimental"
), class = "factor"), substrate = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Green",
"Red"), class = "factor")), class = "data.frame", row.names = c(NA,
-80L))
Your data:
behaviour=structure(list(group = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Control", "Experimental"
), class = "factor"), substrate = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Green",
"Red"), class = "factor")), class = "data.frame", row.names = c(NA,
-80L))
We can tabulate your data:
table(behaviour$group,behaviour$substrate)
Green Red
Control 10 30
Experimental 27 13
So you can only specify fill or y with geom_bar. In your case, you specify the fill, the geom_bar() function will do the counting for you:
ggplot(behaviour,aes(x=group,fill=substrate))+
geom_bar() + scale_fill_manual(values=c("#29c7ac","#c02739"))
You could have your data like this, with one row for each observation (i.e. each animal), with the group and the substrate recorded for each:
df <- data.frame(group = rep(c("control", "experimental"), each = 40),
substrate = rep(c("green", "red", "green", "red"), c(10, 30, 27, 13)))
Now define your plot using ggplot, specifying group as your x axis, and ..count.. as your y axis. Use geom_bar to get the stacked bars you are looking for, and finally use scale_fill_manual to set the colours:
library(ggplot2)
ggplot(df, aes(x = group, y = ..count.., fill = substrate)) +
geom_bar(colour = "black") +
scale_fill_manual(values = c("green", "red"))

Error when running poisson regression with a binary outcome

I am trying to run a poisson regression to predict a common binary outcome.
This is my first attempt at using dput - if I have used it inappropriately, please let me know so I can correct it.
Example data:
df <- structure(list(id = 1:30, sex = structure(c(1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L,
2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L), .Label = c("Female", "Male"
), class = "factor"), migStat = structure(c(1L, 2L, 1L, 1L, 1L,
1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L), .Label = c("Australian-born",
"Migrant"), class = "factor"), mhAreaBi = structure(c(1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L), .Label = c("Metropolitan",
"Regional"), class = "factor"), empStatBi = structure(c(2L, 2L,
1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L,
2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Student / employed",
"Unemployed"), class = "factor"), pensBenBi = structure(c(1L,
2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L,
1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L), .Label = c("No benefit",
"In receipt of pension benefit"), class = "factor"), maritStatBi = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L), .Label = c("Married (including de facto)",
"Not married"), class = "factor"), cto = structure(c(1L, 2L,
2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L,
2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L), .Label = c("No",
"Yes"), class = "factor")), .Names = c("id", "sex", "migStat",
"mhAreaBi", "empStatBi", "pensBenBi", "maritStatBi", "cto"), row.names = c(NA,
-30L), class = "data.frame")
When running the regression using glm in R, I receive an error:
fit <- glm(cto ~ sex + migStat + mhAreaBi + empStatBi + pensBenBi + maritStatBi, df, family = poisson)
Error in if (any(y < 0)) stop("negative values not allowed for the 'Poisson' family") :
missing value where TRUE/FALSE needed
In addition: Warning message:
In Ops.factor(y, 0) : ‘<’ not meaningful for factors
The same error has been explained briefly in this thread:
Because the "<" operator is not defined for factors the result that is
passed to if is of length 0. Setting the factor variable on the RHS
and using the integer values on hte LHS succeeds.
The error does not appear when I convert the outcome to an integer; however, this:
seems to defeat the purpose of predicting a binary outcome (unless a numeric variable with range 0-1 is treated the same as a factor variable with two levels); and
does not seem necessary (at least according to this post, which uses geeglm from geepack to predict a binary outcome [unfortunately, I receive the same error when I adapt the code to my own dataset])
Questions:
Could I receive further explanation of the error?
If I convert my outcome to an integer with range 0-1, will glm treat it the same as a binary variable? If not, is there an approach better suited to running a regression for a common binary outcome?
I think the best option here is:
df$cto_binary <- as.numeric(df$cto == "Yes")
fit <- glm(cto_binary ~ sex + migStat + mhAreaBi + empStatBi + pensBenBi + maritStatBi,
df, family = poisson)
As this way you explicitly show in your code what will be a 1/success in your binary outcome and don't get tripped up by things like the ordering of factor levels. Note that in R as.numeric(c(FALSE, TRUE)) gives c(0, 1), so you always know what you're going to get from a logical comparison.

ggplot2 error: Aesthetics must be either length 1 or the same as the data (24)

I am trying to create a plot in ggplot showing the mean home range size of an animal according to different sexes, treatments, time periods and seasons. I get an error in R saying
Error: Aesthetics must be either length 1 or the same as the data (24): x, y, colour, shape"
I have read similar posts about this error but I haven't been able to figure it out yet. There are no NA's in these columns and my numerical variables are being treated as such. Not sure if the error has to do with a need to sub set the data but I don't understand how I should do that. My code runs fine up until the ggplot part and it is the following:
library("ggplot2")
library("dplyr")
lion_HR_size <- read.csv(file = "https://dl.dropboxusercontent.com/u/23723553/lion_sample_data.csv",
header= TRUE, row.names=1)
# Mean of home range size by season, treatment, sex and time
Mean_HR <- lion_HR_size %>%
group_by(season, treatment, sex, time) %>%
summarize(
mean_HR = mean(Area_HR_km),
se_HR = sd(Area_HR_km)/sqrt(n()),
lwrHR = mean_HR - se_HR,
uprHR = mean_HR + se_HR)
limitsHR <- aes(ymin = lwrHR, ymax= uprHR)
ggplot(Mean_HR,
aes(x=season,
y= Mean_HR,
colour=season,
shape= season)) +
geom_point( size = 6, alpha = 0.5)+
facet_grid(sex ~ treatment+time)+
geom_errorbar(limitsHR, width = 0.1, col = 'red', alpha = 0.8)+
theme_bw()
As requested, the dput(Mean_HR) output is the following:
dput(Mean_HR)
structure(list(season = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L), .Label = c("Early_dry", "Late_dry", "Wet"), class = "factor"),
treatment = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L
), .Label = c("C", "E"), class = "factor"), sex = structure(c(1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("F", "M"), class = "factor"),
time = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("A",
"B"), class = "factor"), mean_HR = c(141.594090181, 138.327188493,
509.287443507692, 345.296845642381, 157.634028930833, 184.202160663125,
252.464096340667, 255.078012825, 59.8485325981818, 143.158189516522,
439.990400912593, 175.410885601333, 221.338774452381, 100.942251723636,
127.961533612727, 167.199563142143, 120.60363022375, 142.351764574211,
249.03854219, 330.018734301176, 123.992902995714, 219.886321226667,
307.869373359167, 296.019550844286), se_HR = c(18.6245437612391,
29.2548378154774, 127.987824704623, 78.9236194797204, 20.8897993194466,
43.1314245224751, 57.6327505533691, 32.1129054260719, 9.383853530199,
38.7678333459788, 130.348285186224, 31.707304307485, 29.1561478797825,
15.4038723326613, 18.1932127432015, 37.791782522185, 32.7089231722616,
33.2629181623941, 46.1500408067739, 88.8736578370159, 15.8046627788777,
36.9665360444972, 70.1560303348504, 87.1340476758794), lwrHR = c(122.969546419761,
109.072350677523, 381.29961880307, 266.373226162661, 136.744229611387,
141.07073614065, 194.831345787298, 222.965107398928, 50.4646790679828,
104.390356170543, 309.642115726369, 143.703581293848, 192.182626572598,
85.5383793909751, 109.768320869526, 129.407780619958, 87.8947070514884,
109.088846411816, 202.888501383226, 241.145076464161, 108.188240216837,
182.91978518217, 237.713343024316, 208.885503168406), uprHR = c(160.218633942239,
167.582026308477, 637.275268212315, 424.220465122101, 178.52382825028,
227.3335851856, 310.096846894036, 287.190918251072, 69.2323861283808,
181.9260228625, 570.338686098816, 207.118189908818, 250.494922332163,
116.346124056298, 146.154746355929, 204.991345664328, 153.312553396012,
175.614682736605, 295.188582996774, 418.892392138192, 139.797565774592,
256.852857271164, 378.025403694017, 383.153598520165)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -24L), vars = list(
season, treatment, sex), drop = TRUE, .Names = c("season",
"treatment", "sex", "time", "mean_HR", "se_HR", "lwrHR", "uprHR"
))
Could someone help me understand this error and how to fix it in my code? Many thanks!
Not entirely sure myself why/how the limitsHR <- ... statement works. I would have expected it to stop on not being able to find the lwrHR and uprHR objects in the workspace.
Anyhow, ggplot has a nice function mean_se() that will help you tremendously.
ggplot(data = lion_HR_size, mapping = aes(x = season, y = Area_HR_km,
colour=season, shape= season)) +
stat_summary(fun.data = mean_se) +
facet_grid(sex ~ treatment+time)+
theme_bw()

Boxplot with two levels and multiple data.frames

I have 4 data.frames with two factor levels in each data.frame. df1 is reproduced below. Please duplicate df1 to produce df2...df4.
How can I produce boxplots with ggplot2 such that my final figure looks very similar to the figure below? The seasons in the figure represent the dataframe names while present and future represent level names and the legend represents heavy, heavy, heaviest in the data reproduced here.
Ignore the dotted horizontal red line.
df1= structure(list(id = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("NN", "SS"), class = "factor"),
heavy = c(0.136230125, 0.136281211, 0.136038018, 0.135392862,
0.137088902, 0.136028293, 0.13640057, 0.135317058, 0.13688615,
0.136448994, 0.137089424, 0.136810847, 0.135865471, 0.136130096,
0.136361327, 0.137796714, 0.136052839, 0.135892646, 0.13544437,
0.136452363, 0.135367421, 0.135617509, 0.138202559, 0.135396942,
0.135930092, 0.135661805, 0.135666, 0.135860128, 0.137648687,
0.136057353, 0.136057731, 0.135162399, 0.136080113, 0.135285036,
0.136204839, 0.138058091, 0.137215664, 0.135696637, 0.135863902,
0.135733243, 0.138274445, 0.136632122, 0.137787919, 0.135033093,
0.136926798, 0.136766413, 0.13690947, 0.135203152, 0.138370968,
0.136862356, 0.136083112, 0.138212845, 0.135964773, 0.13583601,
0.134923731, 0.135828965, 0.136272539, 0.138127602, 0.137028323,
0.136526836, 0.136407397, 0.137025373, 0.138358757, 0.137858521,
0.135464076, 0.136302506, 0.135528362, 0.137540677, 0.136455865,
0.138470144, 0.137227895, 0.136296955, 0.136792631, 0.135875782,
0.13815733, 0.136383864, 0.136696618, 0.13857652, 0.136700903,
0.136743873, 0.136033619, 0.135970522, 0.135816385, 0.136003984,
0.136583925, 0.136768202, 0.136292002, 0.136316737, 0.136540075,
0.136051218, 0.135924119, 0.136736303, 0.136946894, 0.136266073,
0.136263692, 0.136399301, 0.13611577, 0.135857095, 0.136769488,
0.136072466, 0.135564224, 0.136496131, 0.137659507, 0.136704681,
0.136542173, 0.136777403, 0.135771538, 0.13665463, 0.136984748,
0.137717859, 0.138195237, 0.136232227, 0.135956814), heavier = c(0.227332679,
0.227200132, 0.227299118, 0.227289816, 0.22724478, 0.227082442,
0.227861315, 0.227055561, 0.227112284, 0.228651438, 0.228158412,
0.228789678, 0.227188949, 0.228850198, 0.227246991, 0.227359368,
0.227359531, 0.227310607, 0.229490445, 0.227295226, 0.227958185,
0.228104958, 0.227254823, 0.22715392, 0.228062515, 0.227509559,
0.227143662, 0.230048719, 0.227860836, 0.228467792, 0.227263728,
0.227222794, 0.227165592, 0.227140611, 0.228424335, 0.227356425,
0.227243374, 0.228936267, 0.227320467, 0.22738371, 0.227694891,
0.227270428, 0.227751798, 0.228803279, 0.227330453, 0.229679261,
0.228999206, 0.227227604, 0.227247085, 0.227198567, 0.229234921,
0.227211613, 0.23007234, 0.226793036, 0.226474338, 0.226654333,
0.229964991, 0.22880328, 0.22700099, 0.226640822, 0.227522393,
0.227463578, 0.227832692, 0.227293936, 0.230154101, 0.229813709,
0.22761097, 0.227445308, 0.228669159, 0.22660539, 0.229017398,
0.230421347, 0.227041103, 0.227583471, 0.229547568, 0.22676335,
0.226737661, 0.229922588, 0.226907188, 0.227102239, 0.226469073,
0.230680908, 0.227763879, 0.226882448, 0.226741993, 0.226693024,
0.22671415, 0.226773662, 0.227795194, 0.226983096, 0.226647946,
0.226799552, 0.226759218, 0.22692942, 0.226601519, 0.227098192,
0.226886889, 0.226959012, 0.226552119, 0.226809761, 0.226786285,
0.226709252, 0.226834015, 0.228033943, 0.226693494, 0.22748613,
0.227608804, 0.22685023, 0.226586619, 0.227718907, 0.228890098,
0.226701909, 0.230919944), heaviest = c(0.316870607, 0.316772978,
0.316851707, 0.317017543, 0.316673994, 0.317224709, 0.319234458,
0.31861305, 0.319804304, 0.318605816, 0.316930034, 0.31688398,
0.316789552, 0.320783976, 0.317094325, 0.31809319, 0.317134565,
0.318173976, 0.317213167, 0.317084404, 0.321712205, 0.317128056,
0.316866913, 0.3170489, 0.31712423, 0.31684494, 0.319497635,
0.316932301, 0.316864646, 0.317279005, 0.316887692, 0.317134437,
0.316792589, 0.320894499, 0.319883014, 0.316924639, 0.316575642,
0.31686389, 0.316985994, 0.321566256, 0.316683995, 0.320299883,
0.317308965, 0.318151948, 0.316479828, 0.319857732, 0.317171909,
0.322137849, 0.316526917, 0.316870364, 0.322205784, 0.317055758,
0.320329144, 0.318015397, 0.318719989, 0.317910658, 0.317292016,
0.321348723, 0.319915048, 0.317160762, 0.318773245, 0.319627925,
0.31869767, 0.322422407, 0.32082693, 0.318034899, 0.318760783,
0.318325502, 0.320739086, 0.317216142, 0.32284544, 0.319466593,
0.318740499, 0.317489944, 0.319064923, 0.322014928, 0.317353897,
0.318904583, 0.317931141, 0.323295254, 0.318924712, 0.318965677,
0.317700019, 0.31793468, 0.317699508, 0.317168657, 0.318903983,
0.317493401, 0.317511406, 0.317483897, 0.31748495, 0.317776804,
0.318893431, 0.317663608, 0.316978585, 0.317473467, 0.317500429,
0.317144259, 0.317330826, 0.317610353, 0.317881476, 0.31707787,
0.317728374, 0.317452137, 0.31938939, 0.317199373, 0.31898747,
0.318878952, 0.317987024, 0.318951952, 0.318419561, 0.319568088,
0.321165413)), .Names = c("id", "heavy", "heavier", "heaviest"
), class = "data.frame", row.names = c(NA, -113L))
## create some data.frames: this results in a list of four dfs
createDF <- quote(data.frame(id=sample(c("NN", "SS"), 100, rep=T),
heavy=runif(100),
heavier=runif(100),
heaviest=runif(100)))
dfs <- lapply(1:4, function(i) eval(createDF))
## join and shape them
library(reshape2)
dat <- do.call(rbind, dfs)
dat$dfid <- paste("df", rep(1:4, times=sapply(dfs, nrow)))
dat <- melt(dat, id.vars=c("id", "dfid"))
ggplot(dat, aes(id, value, group=interaction(variable, id), fill=variable)) +
geom_boxplot() +
facet_grid(~dfid)
Something like this?
df1$season<- 'winter'
df2$season<- 'spring'
df3$season<- 'summer'
df4$season<- 'fall'
df1.m <- melt(df1, id.vars=c('id', 'season'), variable.name='weight', value.name='weight')
df2.m <- melt(df2, id.vars=c('id', 'season'), variable.name='weight', value.name='weight')
df3.m <- melt(df3, id.vars=c('id', 'season'), variable.name='weight', value.name='weight')
df4.m <- melt(df4, id.vars=c('id', 'season'), variable.name='weight', value.name='weight')
df.all <- rbind(df1.m, df2.m, df3.m, df4.m)
ggplot(df.all, aes(x=id, y=weight, fill=weightCat)) + geom_boxplot() + facet_grid(. ~ season)

Resources