Related
I am trying to build a stacked barplot that looks like so...
My dataframe looks like this:
df = structure(list(Year_Publication = c(1989L, 1994L, 2001L, 2002L,
2002L, 2004L, 2006L, 2007L, 2009L, 2011L, 2012L, 2013L, 2014L,
2015L, 2016L, 2017L, 2017L, 2017L, 2018L, 2018L, 2018L, 2019L,
2019L, 2019L, 2020L, 2020L, 2020L, 2020L, 2021L, 2021L, 2022L,
2022L, 2022L, 2022L), Taxa = c("Cervidae", "Teleostei", "Chondrichtyes",
"Chondrichtyes", "Gastropoda", "Teleostei", "Malacostraca", "Teleostei",
"Teleostei", "Teleostei", "Teleostei", "Teleostei", "Teleostei",
"Teleostei", "Teleostei", "Chondrichtyes", "Teleostei", "Chondrostei",
"Teleostei", "Chondrichtyes", "Decapoda", "Teleostei", "Gastropoda",
"Chondrichtyes", "Chondrostei", "Teleostei", "Bivalvia", "Tetrapoda",
"Teleostei", "Orthoptera", "Chondrichtyes", "Teleostei", "Reptilia",
"Bovidae"), Total_Species_Per_Taxa = c(1L, 2L, 1L, 1L, 3L, 1L,
1L, 1L, 2L, 4L, 2L, 1L, 1L, 2L, 4L, 2L, 4L, 1L, 7L, 3L, 1L, 6L,
1L, 3L, 1L, 7L, 2L, 1L, 1L, 1L, 4L, 4L, 1L, 1L), Total_Species_Per_Pub = c(1L,
2L, 1L, 4L, 4L, 1L, 1L, 1L, 2L, 4L, 2L, 1L, 1L, 2L, 4L, 7L, 7L,
7L, 11L, 11L, 11L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 2L, 2L,
10L, 10L, 10L, 10L)), class = "data.frame", row.names = c(NA,
-34L))
and I have tried this code:
ggplot(data = taxa_diversity, aes(x = `Year_Publication`, fill = Taxa)) +
geom_bar(aes(y = `Total_Species_Per_Pub`), stat = "count")
AND
ggplot(data = taxa_diversity, aes(x = `Year_Publication`, y = `Total_Species_Per_Pub`, fill = Taxa)) +
geom_bar(stat = "count")
But I keep getting this error
Error in `geom_bar()`:
! Problem while computing stat.
ℹ Error occurred in the 1st layer.
Caused by error in `setup_params()`:
! `stat_count()` must only have an x or y aesthetic.
Run `rlang::last_error()` to see where the error occurred.
I know it's an error with the y-axis, because I can get it to work when I don't include an command for the y-axis... but then it gives me a graph with the wrong values on the y-axis.
Any idea how to fix this?
Update (removed basic answer):
Using geom_col() we could set y in aes:
my_color <- c("#41859f", "#404040", "#ea9f91","#bfd2d9",
"#981f26", "#575a7b", "#ce2a0a", "#eddca3",
"#2c6049", "#41859f", "#404040", "#ea9f91")
library(ggplot2)
ggplot(data =df, aes(x = `Year_Publication`, y = `Total_Species_Per_Pub`, fill = Taxa)) +
geom_col()+
scale_x_continuous("Year_Publication", labels = as.character(df$Year_Publication), breaks = df$Year_Publication)+
scale_fill_manual(values=my_color)+
theme_classic()
I need to perform an analysis with glmer on many different subgroups of a large dataset and only extract the estimate and z-value of each model. This works perfectly fine if I only use a small subset of my data (or some dummy data, as attached below), but when I try to include the whole data set, it takes forever. Currently I am using this bit of code:
slope_range <- df %>%
group_by(region, year, species) %>%
summarise(slope = coef(summary(glmer(presence ~ transect + (1 | road), family = "binomial")))[2],
p_val = coef(summary(glmer(presence ~ transect + (1 | road), family = "binomial")))[6])
As I said, this works fine, but very slow on a large data set. I'm aware that I could also just write multiple loops, but I assume this would take even longer. Does anyone have a better solution of what could be done to make it faster? Thanks!
Dummy data:
> dput(df)
structure(list(region = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("ARG", "CHE"), class = "factor"),
transect = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L), presence = c(1L, 1L,
1L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 0L,
0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 1L,
0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L,
1L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L,
1L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L,
1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L,
0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 1L,
0L, 1L, 1L, 1L, 0L, 1L, 0L, 0L), year = c(2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L), species = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("a", "b"), class = "factor"),
road = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("FG", "MK", "PL", "XY"), class = "factor")), class = "data.frame", row.names = c(NA,
-160L))
You are calling coef(summary(glmer(...))) twice for each group, so you can cut the execution time roughly in half by fitting the model and extracting the coefficients once for each group. The following code will extract all the coefficients and their Z and p-values, not just the two values you specified, which I think is preferable if you might end up needing them later. Of course it can be easily modified to discard the other coefficients and keep only the two you specified.
code
library(tidyverse)
library(lme4)
df %>%
group_by(region, year, species) %>%
group_modify(~ data.frame(variable = c('Intercept', 'transect'),
coef(summary(glmer(presence ~ transect + (1 | road), family = "binomial", data = .)))))
output
# A tibble: 16 x 8
# Groups: region, year, species [8]
region year species variable Estimate Std..Error z.value Pr...z..
<fct> <int> <fct> <fct> <dbl> <dbl> <dbl> <dbl>
1 ARG 2007 a Intercept 6.11 2.81 2.17 0.0300
2 ARG 2007 a transect -0.743 0.361 -2.06 0.0398
3 ARG 2007 b Intercept 1.91 1.22 1.57 0.116
4 ARG 2007 b transect -0.396 0.208 -1.90 0.0570
5 ARG 2017 a Intercept 3.95 1.73 2.28 0.0223
6 ARG 2017 a transect -0.654 0.275 -2.38 0.0174
7 ARG 2017 b Intercept 2.44 1.33 1.83 0.0668
8 ARG 2017 b transect -0.396 0.208 -1.90 0.0570
9 CHE 2007 a Intercept 3.95 1.73 2.28 0.0223
10 CHE 2007 a transect -0.654 0.275 -2.38 0.0174
11 CHE 2007 b Intercept 2.44 1.33 1.83 0.0668
12 CHE 2007 b transect -0.396 0.208 -1.90 0.0570
13 CHE 2017 a Intercept 6.11 2.81 2.17 0.0300
14 CHE 2017 a transect -0.743 0.361 -2.06 0.0398
15 CHE 2017 b Intercept 1.91 1.22 1.57 0.116
16 CHE 2017 b transect -0.396 0.208 -1.90 0.0570
You could use a parallel approach as suggested earlier, e.g. with parallel::mclapply (on my 6-core machine using more than 4 cores gave only marginal improvements, though).
You could speed up glmer using nAGQ=0, at the cost of precision (see https://stats.stackexchange.com/questions/132841/default-lme4-optimizer-requires-lots-of-iterations-for-high-dimensional-data).
Example code with benchmarks:
invisible(lapply(c("lme4", "data.table", "tidyverse", "parallel", "microbenchmark"),
require, character.only = TRUE))
#> Loading required package: lme4
#> Loading required package: Matrix
#> Loading required package: data.table
#> Loading required package: tidyverse
#> Loading required package: parallel
#> Loading required package: microbenchmark
df <- structure(list(region = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("ARG", "CHE"), class = "factor"),
transect = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L), presence = c(1L, 1L,
1L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 0L,
0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 1L,
0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L,
1L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L,
1L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L,
1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L,
0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 1L,
0L, 1L, 1L, 1L, 0L, 1L, 0L, 0L), year = c(2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L), species = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("a", "b"), class = "factor"),
road = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("FG", "MK", "PL", "XY"), class = "factor")), class = "data.frame", row.names = c(NA,
-160L))
## Your function for comparison
tidy_fun <- function(){
df %>%
group_by(region, year, species) %>%
summarise(slope = coef(summary(glmer(presence ~ transect + (1 | road), family = "binomial")))[2],
p_val = coef(summary(glmer(presence ~ transect + (1 | road), family = "binomial")))[6])
}
gf2 <- function(presence, transect, road, nAGQ = 1L) {
res <- coef(summary(glmer(presence ~ transect + (1 | road), family = "binomial", nAGQ=nAGQ)))
return(data.table(slope=res[2], p_val=res[6]))
}
parLM <- function(mc.cores=4L, nAGQ=1L){
DT <- data.table(df, key = c("region","year","species"))
iDT <- DT[,by=.(region, year, species),.(irange=.(range(.I)))]
result <- mclapply(seq(nrow(iDT)),
function(x) DT[do.call(seq, as.list(iDT[x, irange][[1]])),
.(gf2(presence, transect, road, nAGQ=nAGQ))], mc.cores=mc.cores)
return(cbind(iDT, rbindlist(result))[,-4])
}
microbenchmark(
original = suppressMessages(tidy_fun()),
multicore = parLM(mc.cores = 4L, nAGQ = 1L),
singlecore.nAGQ0 = parLM(mc.cores = 1L, nAGQ = 0L),
multicore.nAGQ0 = parLM(mc.cores = 4L, nAGQ = 0L),
times=10L)
#> Unit: milliseconds
#> expr min lq mean median uq max neval
#> original 898.2732 925.0621 963.7452 940.9577 973.0648 1157.0030 10
#> multicore 319.1234 334.4151 347.8024 344.1370 362.6539 373.8189 10
#> singlecore.nAGQ0 237.4782 245.4084 262.6290 268.1308 274.8516 280.7944 10
#> multicore.nAGQ0 132.3356 132.9963 137.2777 135.8659 141.5145 144.2564 10
#> cld
#> d
#> c
#> b
#> a
When I produce a frequency plot:
Data <- structure(list(Venue = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("Conference", "Journal"), class = "factor"), Year = c(2008L,
2009L, 2010L, 2011L, 2012L, 2013L, 2014L, 2015L, 2016L, 2017L,
2018L, 2019L, 2008L, 2009L, 2010L, 2011L, 2012L, 2013L, 2014L,
2015L, 2016L, 2017L, 2018L), Frequency = c(0L, 0L, 0L, 0L, 1L,
1L, 2L, 1L, 4L, 4L, 11L, 3L, 2L, 1L, 0L, 0L, 3L, 5L, 3L, 7L,
8L, 19L, 10L)), class = "data.frame", row.names = c(NA, -23L))
library(ggplot2)
ggplot(Data, aes(x = Year, y = Frequency, fill = Venue, label = Frequency)) +
geom_bar(stat = "identity") +
geom_text(size = 3, position = position_stack(vjust = 0.5))
I receive in the plot value with zero and the year in x axis does not seem as the data frame
How is it possible to remove zero frequency from plot (but keep from year i.e. 2012 the record in the plot) and show in x axis all years for every bar?
Is this what you want?
The code to get it is:
ggplot(Data, aes(x = as.character(Year), y = Frequency, fill = Venue,
label = ifelse(Frequency > 0, Frequency, numeric(0)))) +
geom_bar(stat = "identity") +
geom_text(size = 3, position = position_stack(vjust = 0.5)) +
scale_x_discrete(name ="Year")
First at all I would like to apologise if I did not use the correct jargon.
I have the dataset as below which contains a wide range of categories
Here some excerpt from dput (using droplevels)
structure(list(
x = c(2010L, 2010L, 2010L, 2010L, 2010L, 2010L,
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L,
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L,
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L,
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L,
2010L, 2010L), *[ME: there are more years than 2010...]*
y = c(7.85986, 185.81068, 107.24097, 7094.74649,
1.4982, 185.77319, 5090.79354, 167.58584, 4189.64609, 157.08277,
3927.06932, 2.86732, 71.683, 4.70123, 117.53085, 2.93452, 73.36292,
1.4982, 18.18734, 901.14744, 0.90268, 13.77532, 613.38298, 0.01845,
0.0681, 7.19925, 3.75315, 0.14333, 136.54008, 0.04766, 0.59077,
28.97255, 0.38608, 115.05258, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
x1 = structure(c(4L, 2L, 3L, 1L, 4L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 4L, 2L, 1L, 4L, 2L, 1L, 4L, 2L,
1L, 2L, 4L, 1L, 4L, 2L, 1L, 4L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L), .Label = c("All greenhouse gases - (CO2 equivalent)",
"CH4", "CO2", "N2O"), class = "factor"),
x2 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Austria",
class = "factor"),
x4 = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 4L,
4L, 5L, 5L, 6L, 6L, 7L, 7L, 8L, 8L, 8L, 9L, 9L, 9L, 10L,
10L, 10L, 11L, 11L, 11L, 12L, 12L, 12L, 13L, 13L, 14L, 14L,
15L, 15L, 16L, 16L, 17L, 17L, 18L, 18L), .Label = c("3",
"3.1", "3.A", "3.A.1", "3.A.2", "3.A.3", "3.A.4", "3.B",
"3.B.1", "3.B.2", "3.B.3", "3.B.4", "3.B.5", "3.C", "3.C.1",
"3.C.2", "3.C.3", "3.C.4"), class = "factor")), class = "data.frame",
row.names = c(NA,
-44L))
I want to know whether the of the sum of subcategories in x4 (e.g. 3.B.1+3.B.2+...+3.B.n) equal the figure stated in the parent category (e.g. 3.B). (i.e. the in the csv stated sum) for a given year and country. I want to verify the sums.
For get the sum of the subcategories I have this
sum(df$y[df$x4 %in% c("3.A.1", "3.A.2", "3.A.3", "3.A.4") & x ==
"2010" & x2 == "Austria"])
To receive the sum of the parent category I have this
sum(df$y[df$x4 %in% c("3.A") & x == "2010" & x2 == "Austria"])
Next I would need an operation which checks whether the results of both codes are equal (True/false). However, I have more than 20 countries, 20 years, dozens of categories to check. With my newby approach I would be writing code for ages...
is there anyway to automate this? Basically, I am looking for a code which is able to do the following
1) Run for one category, go to next one
2) once done with categories change year and start again with categories
3) ... same for countries....
Any sort of help would be appreciated and even a suggestions how to use the right jargon in the title. Thanks in any case
Here's a potential solution using dplyr (might require some tweaking based on the full dataset):
require(dplyr)
# Create two columns - one that shows only the parent category number, and one that tells you if it's a parent or child; note that the regex here makes some assumptions on the format of your data.
mutate(df,parent=gsub("(.?\\..?)\\..*", "\\1", df$x4),
type=ifelse(parent==x4,"Parent","Child")) %>%
# Sum the children y's by category, year and country
group_by(parent, type, x, x2) %>%
summarize(sum(y)) %>%
# See if the sum of the children is equal to the parent y
tidyr::spread(type,`sum(y)`) %>%
mutate(equals=isTRUE(all.equal(Child,Parent)))
Result using your (new) data:
parent x x2 Child Parent equals
<chr> <int> <fct> <dbl> <dbl> <lgl>
1 3 2010 Austria NA 7396. FALSE
2 3.1 2010 Austria NA 5278. FALSE
3 3.A 2010 Austria 4357. 4357. TRUE
4 3.B 2010 Austria 921. 921. TRUE
5 3.C 2010 Austria 0 0 TRUE
I can see from your new data that you have two levels of parents. My solution will only work for the second level (e.g. 3.1 and its children), but can be easily tweaked to also work for the top level.
I need an xy plot which plots means and error bars for x and y with three factors. The three factors are Year (2004-2012), Species (FW, HB), and Region (Kodiak, Shumagin Islands); xmean=mean d13C and ymean=mean 15N.
I can get reasonably close using the following code, but am missing one factor and it's not very aesthetically pleasing. I also get a warning message for exceeding the shape palette.
library(ggplot2)
library(plyr)
GAP_Whales<-structure(list(Species = structure(c(2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 2L), .Label = c("FW", "HB"), class = "factor"), Year = c(2007L,
2007L, 2007L, 2004L, 2004L, 2004L, 2004L, 2004L, 2004L, 2004L,
2004L, 2004L, 2004L, 2004L, 2004L, 2004L, 2004L, 2004L, 2004L,
2004L, 2004L, 2004L, 2004L, 2004L, 2004L, 2004L, 2004L, 2004L,
2004L, 2004L, 2004L, 2004L, 2004L, 2004L, 2004L, 2004L, 2004L,
2004L, 2004L, 2004L, 2004L, 2004L, 2004L, 2004L, 2004L, 2004L,
2004L, 2004L, 2004L, 2004L, 2005L, 2005L, 2005L, 2005L, 2005L,
2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2005L,
2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2005L,
2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2005L,
2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2005L,
2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2005L,
2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2005L,
2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2005L,
2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2006L, 2006L, 2006L,
2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L,
2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L,
2006L, 2006L, 2006L, 2006L, 2007L, 2005L, 2005L, 2005L, 2005L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2008L, 2008L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L,
2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L,
2008L, 2008L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L,
2009L, 2009L, 2005L, 2005L, 2007L, 2007L, 2007L, 2008L, 2008L,
2008L, 2008L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L,
2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2010L, 2010L, 2010L,
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L,
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L,
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L,
2010L, 2010L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L,
2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L,
2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2007L
), Region = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("Kodiak", "Shumagin Is."), class = "factor"), d13C = c(-17.741,
-17.841, -17.382, -16.955, -17.504, -17.42814286, -15.89, -16.421,
-17.21328571, -17.90142857, -17.654, -19.225, -19.03361128, -18.29057143,
-17.28, -16.897, -18.18585714, -18, -17.619, -17.47014286, -18.382,
-16.807, -18.55242857, -18.527, -17.72557143, -17.06471429, -18.02757143,
-17.599, -17.57614286, -17.36385714, -17.19728571, -18.09871429,
-17.3, -15.928, -17.26071429, -17.85271429, -17.39342857, -16.98,
-16.847, -17.84728571, -16.673, -17.174, -16.277, -17.965, -17.60985714,
-16.6, -17.47885714, -17.46, -17.73342857, -17.028, -18.332,
-18.548, -18.22, -18.035, -17.138, -17.387, -18.314, -18.27,
-17.904, -18.497, -18.264, -18.593, -18.264, -18.008, -17.687,
-18.227, -17.849, -17.713, -18.017, -18.723, -18.793, -17.914,
-18.31, -18.116, -18.65, -17.587, -17.985, -18.793, -17.833,
-17.613, -17.942, -17.86, -17.749, -17.222, -17.286, -17.842,
-18.042, -17.912, -17.858, -18.916, -18.443, -18.638, -18.085,
-17.974, -17.997, -18.387, -18.129, -17.85, -18.699, -18.754,
-18.022, -18.636, -18.197, -18.645, -18.149, -18.157, -18.695,
-18.413, -17.978, -18.447, -17.854, -19.433, -18.251, -17.527,
-17.732, -18.42, -18.089, -17.498, -18.805, -17.677, -17.721,
-18.194, -18.063, -16.987, -18.34342857, -18.46185714, -17.56328571,
-17.84671429, -17.93814286, -18.10157143, -17.786, -17.78442857,
-17.38885714, -16.61228571, -15.97971429, -18.73614286, -18.26371429,
-18.98442857, -17.47014286, -18.12185714, -17.89457143, -18.17728571,
-18.234, -18.83871429, -18.82342857, -18.34314286, -18.43685714,
-18.66757143, -18.6295, -17.553, -17.72555609, -17.42890918,
-18.0937904, -17.3725821, -21.242, -20.107, -19.11, -17.771,
-18.125, -18.577, -17.781, -19.292, -16.776, -20.212, -20.539,
-17.972, -17.986, -18.634, -17.352, -17.409, -17.93, -17.458,
-17.53, -17.321, -17.11, -17.929, -17.244, -17.858, -17.251,
-18.06, -18.22, -18.142, -19.314, -18.412, -17.941, -17.909,
-18.114, -18.783, -18.181, -17.754, -18.484, -17.463, -18.379,
-18.19, -18.227, -17.414, -17.824, -17.436, -17.432, -17.171,
-17.483, -17.64, -17.639, -18.572, -18.545, -18.007, -18.033,
-18.102, -18.301, -17.731, -17.565, -17.68, -17.751, -18.134,
-18.409, -18.336, -18.888, -18.477, -18.25, -18.121, -18.082,
-17.914, -19.337, -19.228, -18.455, -18.657, -18.081, -18.23,
-18.777, -18.935, -18.823, -19.49, -18.383, -18.73, -18.152,
-18.582, -18.653, -18.407, -18.024, -18.994, -17.831, -17.947,
-17.57, -18.142, -17.691, -17.869, -18.513, -18.522, -17.923,
-18.353, -18.278, -17.664, -17.995, -17.786, -18.161, -18.119,
-18.125, -17.098, -17.576, -18.099, -18.713, -17.4, -17.622,
-17.532, -18.007, -18.146, -16.692, -18.678, -19.18, -18.522,
-18.572, -18.476, -19.144, -17.709, -17.742, -18.606, -18.267,
-18.543, -18.301, -19.117, -18.75, -19.394, -19.219, -18.179,
-18.681, -18.835, -18.456, -18.323, -18.148, -18.263, -17.965,
-19.337, -18.301, -19.046, -18.768, -18.017, -17.928, -17.314
), d15N = c(14.166, 14.279, 14.092, 13.464, 13.4, 13.179, 12.895,
13.537, 13.857, 13.775, 14.147, 12.017, 12.531, 12.329, 13.414,
13.777, 12.639, 13.135, 13.833, 13.68, 12.317, 12.237, 11.707,
12.318, 13.574, 14.77, 12.722, 13.772, 13.658, 13.804, 14.07,
15.182, 14.143, 13.54, 12.932, 13.77, 14.332, 12.642, 13.166,
12.412, 12.452, 14.09971429, 13.14, 13.643, 13.393, 13.759, 13.791,
13.244, 12.997, 13.86, 15.53828571, 14.42107143, 14.88228571,
13.32828571, 14.17421429, 12.94985714, 13.21614286, 11.18814286,
12.53371429, 12.67442857, 13.50585714, 12.64092857, 12.83257143,
12.03907143, 12.54642857, 13.70371429, 13.18142857, 14.76085714,
12.74385714, 13.7225, 11.76364286, 13.66457143, 12.65378571,
12.50114286, 14.27671429, 14.10342857, 14.3445, 11.72657143,
12.90221429, 14.71314286, 14.71907143, 14.04371429, 13.75092857,
13.74578571, 14.94164286, 13.07035714, 13.07685714, 12.8775,
13.86664286, 12.87185714, 13.75214286, 13.20285714, 12.46021429,
13.13914286, 13.82028571, 12.52585714, 13.4975, 12.88071429,
12.48042857, 14.29857143, 13.56214286, 13.41, 13.52985714, 13.55592857,
12.80007143, 12.91257143, 13.37457143, 13.60371429, 13.88671429,
13.44635714, 14.18214286, 10.09042857, 12.11571429, 13.00771429,
15.45157143, 13.33135714, 14.58378571, 11.78642857, 12.47628571,
14.46642857, 12.37064286, 13.44335714, 12.39628571, 14.08, 14.0505,
14.34, 14.0145, 13.926, 13.2355, 13.111, 12.3725, 13.888, 13.1075,
14.015, 14.9595, 12.857, 13.277, 12.457, 12.137, 13.124, 13.299,
12.811, 12.231, 11.829, 12.263, 13.036, 13.331, 12.76, 12.262,
14.026, 13.452, 13.769, 13.221, 13.059, 12.754, 12.637, 13.025,
15.123, 14.006, 12.605, 12.636, 14.229, 15.527, 11.583, 13.004,
12.851, 12.921, 12.273, 13.922, 13.429, 12.494, 13.803, 13.55,
13.387, 14.887, 14.248, 14.673, 14.603, 12.879, 12.4, 13.676,
13.648, 13.067, 13.353, 11.703, 14.118, 12.78, 12.293, 12.68,
13.494, 13.309, 13.838, 12.688, 14.418, 14.357, 14.587, 14.714,
14.435, 13.418, 13.013, 12.631, 12.704, 13.091, 12.953, 12.751,
12.409, 12.921, 12.216, 12.594, 12.698, 14.891, 14.692, 13.187,
13.451, 13.023, 11.957, 12.401, 12.527, 13.47, 11.771, 11.848,
12.399, 12.502, 12.678, 12.768, 12.716, 12.671, 12.61, 13.132,
12.999, 13.251, 11.048, 14.384, 12.688, 13.196, 12.875, 13.495,
12.895, 12.992, 12.888, 13.044, 14.195, 13.643, 13.042, 13.15,
13.437, 13.835, 14.884, 13.136, 14.384, 13.927, 14.914, 12.978,
12.841, 13.793, 14.312, 14.219, 14.36, 13.529, 11.837, 13.166,
13.103, 12.798, 13.529, 12.813, 9.574, 13.859, 12.548, 13.405,
12.6, 12.373, 12.964, 12.896, 13.067, 13.896, 14.533, 14.024,
13.042, 13.213, 13.857, 12.857, 12.393, 11.841, 13.702, 13.634,
14.391, 13.719, 13.181, 13.566, 13.314, 13.457, 12.871, 12.383,
13.62, 13.753, 13.388, 12.856, 14.408)), .Names = c("Species",
"Year", "Region", "d13C", "d15N"), class = "data.frame", row.names = c(NA,
-298L))
means <- ddply(GAP_Whales, .(Species, Year, Region), function(x) c(xmean=mean(x$d13C), xsd=sd(x$d13C), ymean=mean(x$d15N), ysd=sd(x$d15N)))
Species<-as.factor(means$Species)
Region<-as.factor(means$Region)
Year<-as.factor(means$Year)
p<-ggplot(means, aes(x=means$xmean, y=means$ymean))
p<-p+geom_point(aes(shape=factor(Year), color=factor(Region)))
p<-p + geom_errorbar(aes(ymin=ymean-ysd, ymax=ymean+ysd), width=.1)+
geom_errorbarh(aes(xmin=xmean-xsd, xmax=xmean+xsd), width=.1)
p