Calculating Intake energy with loops - r

I am trying to run through an old coworkers script and I am hoping someone can help inform me of what exactly he did during this code segment. Earlier in the script we calculated the intake rate for several prey species and now it appears that we are grouping them based on unique locations. The section of code after this requires that there be 41 rows ( 1 row for each unique location in the complete dataset). I believe that the code subsets the data based on latitude and then adds an 'alpha' column. The main issue that I am having is what is this line calculating: x= x + d$Intakerate_kjday[j]*d$alpha[j]. For locations that had several prey items (profit.fall.38.4.959) is this code summing up "intakerate_kjday" and "alpha" and then multiplying them together? When the code is performed I receive the error Error in
`[<-.data.frame`(`*tmp*`, k, , value = c("2", "Bishop's Head", : replacement has 6 items, need 7
I would really appreciate any insight into what he was trying to calculate and a potential work around. Thank you.
dput(profit)
structure(list(Sample.ID = structure(c(5L, 19L, 27L, 28L, 30L,
38L, 12L, 62L, 49L, 29L, 25L, 17L, 61L, 67L, 27L, 26L, 32L, 9L,
47L, 45L, 5L, 26L, 27L, 44L, 45L, 4L, 1L, 43L, 19L, 35L), .Label = c("Barren Island Mud 1",
"BH High 1", "BH High 2", "BH Low 1", "BH Low 2", "BH Low 3",
"BHH 1 C", "BHH 2 E", "BHL 1 E", "BHL 2", "BHL 3 (B)", "BHM 1 C",
"BI High 1", "BI Low 1", "BI Low 2C", "BI Low 3", "BI Mud", "BIHI High B",
"BIL1 (low) E", "BIL1E", "BIL2 E", "BIL2E", "BW Fresh 1", "BW Fresh 2",
"BW High 1", "BW High 2", "BW High 5", "BW Low 3", "BW Money Stump",
"BW Mud 1", "BW SAV 1", "BW SAV 2", "BWH 1 D", "BWH 2", "BWH 3",
"BWH 5", "BWL 1", "BWL 2", "BWL 3", "BWM 1", "BWMS D", "EN High 2",
"EN High 4", "EN High 5", "EN Low 1", "EN Low 2", "EN Mud 2",
"ENH3 A High", "ENH4 A High", "ENH5 A High", "ENL1 Low E", "ENM1 A Mud",
"ENS1 SAV", "ENS2 SAV 2C", "ENS3 SAV 3E", "High 3C", "MWP 29 Low 1",
"MWP 30 Mud 1", "MWP 31 Low 2", "MWP 32 Mud 2", "MWP 33 Low 3",
"MWP 34 Low 4", "PWRC Fresh", "WP 27 HM-MARC", "WP 28 HM-MARC",
"WP 30 IT MARE", "WP29 LM-MARC"), class = "factor"), Season = structure(c(2L,
3L, 2L, 2L, 2L, 3L, 3L, 2L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L,
3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 2L, 3L, 3L), .Label = c("",
"Fall", "Spring", "Spring?"), class = "factor"), Refuge = structure(c(3L,
2L, 5L, 5L, 5L, 5L, 4L, 7L, 6L, 5L, 5L, 2L, 7L, 7L, 5L, 5L, 5L,
4L, 6L, 6L, 3L, 5L, 5L, 6L, 6L, 3L, 2L, 6L, 2L, 5L), .Label = c("",
"Barren Island", "Bishop's Head", "Bishops Head", "Blackwater",
"Eastern Neck", "Martin", "PWRC"), class = "factor"), Habitat.Type = structure(c(3L,
3L, 2L, 3L, 4L, 3L, 4L, 3L, 2L, 3L, 2L, 4L, 3L, 3L, 2L, 2L, 5L,
3L, 4L, 3L, 3L, 2L, 2L, 2L, 3L, 3L, 4L, 2L, 3L, 2L), .Label = c("Fresh",
"High", "Low", "Mud", "SAV"), class = "factor"), Longitude = c(-76.03896,
-76.26205, -76.05714, -76.06332, -76.14641, -76.23522, -76.03869,
-75.99733, -76.21661, -76.23491, -76.22003, -76.26163, -75.99354,
-76.01407, -76.05714, -76.01762, -76.10363, -76.04883, -76.21547,
-76.23986, -76.03896, -76.01762, -76.05714, -76.2181, -76.23986,
-76.04883, -76.26163, -76.21661, -76.26205, -76.0235), Latitude = c(38.22447,
38.33905, 38.40959, 38.39708, 38.41795, 38.43055, 38.23255, 37.99369,
39.03264, 38.43141, 38.41026, 38.33606, 37.98833, 38.01108, 38.40959,
38.41913, 38.40351, 38.22694, 39.04036, 39.02677, 38.22447, 38.41913,
38.40959, 39.03887, 39.02677, 38.22694, 38.33606, 39.03264, 38.33905,
38.39138), Prey = structure(c(11L, 41L, 35L, 30L, 41L, 41L, 41L,
3L, 18L, 31L, 40L, 9L, 41L, 38L, 30L, 13L, 35L, 41L, 20L, 27L,
4L, 40L, 13L, 35L, 41L, 5L, 5L, 15L, 22L, 20L), .Label = c("Hydrobia",
"Hydrobia genus", "Hydrobia sp.", "Hydrobia spp", "Melampus bidentatus",
"Ruppia (maritima or rostellata)", "Ruppia genus", "Ruppia maritima",
"Schoenoplectus pungens", "Schoenoplectus robustus", "Schoenoplectus spp",
"Schoenoplectus spp.", "Scirpus acutus", "Scirpus acutus?", "Scirpus americanus",
"Scirpus fluviatilis", "Scirpus genus", "Scirpus genus 1", "Scirpus genus 1?",
"Scirpus genus 2", "Scirpus genus 3", "Scirpus genus?", "Scirpus heterochaetus",
"Scirpus meterochaetus", "Scirpus mevadensis", "Scirpus olney?",
"Scirpus olneyi", "Scirpus paludosis", "Scirpus paludosus", "Scirpus robustus",
"Scirpus robustus?", "Scirpus species", "Scirpus subterminalis",
"Scirpus subtermiralis", "Scirpus validus", "Spartina alterniflora",
"Spartina genus", "Spartina genus?", "Spartina patens", "Spartina pectinata",
"Zannichallia palustris"), class = "factor"), Density = c(2.36e-05,
0.000101477, 0.000335244, 1.17e-05, 1.91e-06, 2.8e-06, 1.72e-05,
1.34e-05, 2.71e-05, 0.000107843, 2.16e-06, 4.46e-06, 1.22e-05,
6.61e-05, 0.000263052, 3.91e-05, 0.00034925, 3.69e-06, 8.02e-06,
2.04e-05, 2.9e-05, 2.05e-05, 0.000564046, 0.001912535, 2.04e-05,
0.001117905, 0.00255132, 9.03e-05, 4.23e-05, 0.000248282), Intakerate_kcals = c(-3.5399430250046e-07,
7.6382794280604e-14, -5.02872205332896e-06, -1.7549698484651e-07,
2.70599529637464e-17, 5.81535679492809e-17, 2.19440708445348e-15,
4.34155540862746e-08, -4.06493587341127e-07, -1.61763139817e-06,
-3.23994151550826e-08, -6.68988064422799e-08, 1.10402768540446e-15,
-9.91487886840506e-07, -3.94580269988612e-06, -5.8649138992111e-07,
-5.23882134070119e-06, 1.00998060784975e-16, -1.2029789281118e-07,
-3.05994985702607e-07, 9.3958523768985e-08, -3.07494963928282e-07,
-8.46097103856411e-06, -2.86925082960488e-05, 3.08688633134856e-15,
3.62058033172122e-06, 8.25888178764606e-06, -1.35448644277712e-06,
-6.34490870510011e-07, -3.72424640639279e-06), Intakerate_kjs = c(-1.48111216166192e-06,
3.19585611270047e-13, -2.10401730711284e-05, -7.34279384597799e-07,
1.13218843200315e-16, 2.43314528299791e-16, 9.18139924135334e-15,
1.81650678296973e-07, -1.70076916943527e-06, -6.76816976994329e-06,
-1.35559153008866e-07, -2.79904606154499e-07, 4.61925183573226e-15,
-4.14838531854068e-06, -1.65092384963235e-05, -2.45387997542992e-06,
-2.19192284894938e-05, 4.22575886324335e-16, -5.03326383521979e-07,
-1.28028302017971e-06, 3.93122463449433e-07, -1.28655892907593e-06,
-3.54007028253523e-05, -0.000120049454710668, 1.29155324103624e-14,
1.51485081079216e-05, 3.45551613995111e-05, -5.66717127657947e-06,
-2.65470980221389e-06, -1.55822469643474e-05), Intakerate_kjday = c(-0.12796809076759,
2.76121968137321e-08, -1.81787095334549, -0.0634417388292498,
9.78210805250721e-12, 2.1022375245102e-11, 7.93272894452929e-10,
0.0156946186048585, -0.146946456239208, -0.584769868123101, -0.011712310819966,
-0.0241837579717487, 3.99103358607267e-10, -0.358420491521915,
-1.42639820608235, -0.212015229877145, -1.89382134149226, 3.65105565784226e-11,
-0.043487399536299, -0.110616452943527, 0.033965780842031, -0.111158691472161,
-3.05862072411043, -10.3722728870017, 1.11590200025531e-09, 1.30883110052443,
2.98556594491776, -0.489643598296466, -0.22936692691128, -1.34630613771962
)), row.names = c(NA, -30L), class = "data.frame")
lat=unique(profit$Latitude)
## for each location I am calculating the weight for Fall only
nfall=0
latfall<-c(double())
for(i in lat){
name = paste0("profit.fall.",round(i,5))
x = subset(profit,Latitude==i & Season=="Fall")
if(nrow(x)>=1){
for(j in 1:nrow(x)){
x$alpha[j]<- 1 # used to be this x$Density[j]/sum(x$Density)
}
nfall= nfall+1
assign(name, data.frame(x))
latfall<-c(latfall,round(i,5))
print(name)
}
}
View(profit.fall.38.4.959)
profit.fall.all <- data.frame(matrix(ncol=7,nrow=nfall))
names(profit.fall.all)[1]<-'Id'
names(profit.fall.all)[2]<-'Refuge'
names(profit.fall.all)[3]<-'Season'
names(profit.fall.all)[4]<-'HType'
names(profit.fall.all)[5]<-'Lat'
names(profit.fall.all)[6]<-'Long'
names(profit.fall.all)[7]<-'IntakeEnergy'
View(profit.fall.all)
k=0
lat=latfall
for(i in lat){
df=as.name(paste0('profit.fall.',i))
d=get(as.character(df))
x=0
for(j in 1:nrow(d)){
x= x + d$Intakerate_kjday[j]*d$alpha[j]
}
k=k+1
new_row <- c(k,as.character(d$Refuge[1]),as.character(d$Season[1]),as.character(d$Habitat.Type[1]),as.numeric(d$Latitude[1]),as.numeric(d$Longtitude[1]),as.numeric(x))
#names(new_row)<-c("id","Refuge","Season","HType","Lat","Long","Intakerate_kjday")
#profit.spring.all <- rbind(profit.spring.all, new_row)
profit.fall.all[k,] <- new_row
}
View(profit.fall.all)

The code in question apparently computes (very inefficiently and inaccurately)
sum(d$Intakerate_kjday * d$alpha)
Your error however suggests, that a column is missing in one of the data frames.
Take a look at new_row here:
for(i in lat){
df=as.name(paste0('profit.fall.',i))
d=get(as.character(df))
x=0
for(j in 1:nrow(d)){
x= x + d$Intakerate_kjday[j]*d$alpha[j]
}
k=k+1
new_row <- c(k,as.character(d$Refuge[1]),as.character(d$Season[1]),as.character(d$Habitat.Type[1]),as.numeric(d$Latitude[1]),as.numeric(d$Longtitude[1]),as.numeric(x))
#names(new_row)<-c("id","Refuge","Season","HType","Lat","Long","Intakerate_kjday")
#profit.spring.all <- rbind(profit.spring.all, new_row)
if (length(new_row) != ncol(profit.fall.all)) {
# Catch the bad df
browser()
}
profit.fall.all[k,] <- new_row
}

Related

xAxis order of R highcharter column plot

With the following data frame:
dta <- structure(list(sociodemographic_var = structure(c(3L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 18L, 19L, 20L, 21L, 22L,
23L, 24L, 26L, 18L, 20L, 21L, 26L, 13L, 16L, 21L, 22L, 26L, 26L,
9L, 13L, 17L, 18L, 20L, 21L, 23L, 26L, 20L, 26L), levels = c("1st grade",
"2nd grade", "3rd grade", "4th grade", "5th grade", "6th grade",
"7th grade", "8th grade", "9th grade", "10th grade", "11th grade",
"12th grade, no diploma", "High school graduate", "GED or equivalent",
"Some college, no degree", "Less than 1 year of college credit/post-secondary education (or less than 10 classes)",
"One year or more of college credit, no degree", "Associate degree: Occupational, Technical, or Vocational",
"Associate degree: Academic Program", "Bachelor's degree (ex. BA, AB, BS, BBS)",
"Master's degree (ex. MA, MS, MEng, MEd, MBA)", "Professional School degree (ex. MD, DDS, DVN, JD)",
"Doctoral degree (ex. PhD, EdD)", "Refused to answer", "Don't Know",
"unknown"), class = "factor"), event = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
3L, 3L, 3L, 3L, 5L, 5L, 5L, 5L, 5L, 7L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 11L, 11L), levels = c("Baseline", "0.5 Year", "1 Year",
"1.5 Year", "2 Year", "2.5 Year", "3 Year", "3.5 Year", "4 Year",
"4.5 Year", "5 Year", "5.5 Year", "6 Year", "Screener"), class = "factor"),
visit_type = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), levels = c("on-site", "hybrid", "remote", "unknown"), class = "factor"),
n = c(2L, 13L, 5L, 9L, 15L, 18L, 26L, 25L, 192L, 27L, 485L,
224L, 183L, 1011L, 666L, 55L, 78L, 3L, 9L, 1L, 1L, 2L, 208L,
1L, 1L, 1L, 1L, 126L, 28L, 1L, 1L, 2L, 2L, 3L, 4L, 1L, 543L,
1L, 300L)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-39L))
I would assume that, generating a highcharter bar plot with:
library(highcharter) # v0.9.4
dta |>
hchart(type = "column", hcaes(x = "event", y = "n", group = "sociodemographic_var")) |>
hc_yAxis(title = list(text = "%"), max = 115, endOnTick = FALSE, stackLabels = list(enabled = TRUE)) |>
hc_xAxis(title = "") |>
hc_plotOptions(series = list(stacking = "percent"))
the xAxis would be ordered by levels(dta$event):
levels(dta$event)
[1] "Baseline" "0.5 Year" "1 Year" "1.5 Year" "2 Year" "2.5 Year" "3 Year" "3.5 Year" "4 Year" "4.5 Year" "5 Year" "5.5 Year"
[13] "6 Year" "Screener"
But the ordering is different and neither alphabetical nor based on the total number of values:
I am interested to understand why it's the case and how to set the order right.
You can add categories to your hc_xAxis to make an order like this:
library(highcharter)
dta |>
hchart(type = "column", hcaes(x = "event", y = "n", group = "sociodemographic_var")) |>
hc_yAxis(title = list(text = "%"), max = 115, endOnTick = FALSE, stackLabels = list(enabled = TRUE)) |>
hc_xAxis(title = "", categories = levels(dta$event)) |>
hc_plotOptions(series = list(stacking = "percent"))
Output:

R novice and trying to improve the appearance of grouped box plots

I found some online datasets and managed to make some complex box plots that had most of the features I was looking for. I'd appreciate the community's help in making these plots look better, such as:
removing axes lines,
adding tick marks and making them point inwards,
changing the background color or font of facet_wrap,
and removing "Label" in my attached plots.
The program Veusz allows you to change whisker mode to (e.g. I.5 IQR, 9/91 percentile, 1 stddev) and it would be nice to have that option as well. I also don't understand why the data points in my first box plot (linked below) are off center.
Linked below are screen shots of some grouped box plots that I made from my own data. I learn best by breaking and fixing things, and if someone has the time to write out the code for a box plot with lots of features, I will deconstruct it to see what each part does and search for the code online to get a better understanding of how it works.
Box plot of my data 1
Box plot of my data 2
Box plot of my data 3
structure(list(X. = structure(c(1L, 12L, 23L, 34L, 45L, 56L,
67L, 71L, 72L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 24L, 25L, 26L, 27L,
28L, 29L, 30L, 31L, 32L, 33L, 35L, 36L, 37L, 38L, 39L, 40L, 41L,
42L, 43L, 44L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L,
57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 68L, 69L, 70L
), .Label = c("# 1", "# 10", "# 11", "# 12", "# 13", "# 14",
"# 15", "# 16", "# 17", "# 18", "# 19", "# 2", "# 20", "# 21",
"# 22", "# 23", "# 24", "# 25", "# 26", "# 27", "# 28", "# 29",
"# 3", "# 30", "# 31", "# 32", "# 33", "# 34", "# 35", "# 36",
"# 37", "# 38", "# 39", "# 4", "# 40", "# 41", "# 42", "# 43",
"# 44", "# 45", "# 46", "# 47", "# 48", "# 49", "# 5", "# 50",
"# 51", "# 52", "# 53", "# 54", "# 55", "# 56", "# 57", "# 58",
"# 59", "# 6", "# 60", "# 61", "# 62", "# 63", "# 64", "# 65",
"# 66", "# 67", "# 68", "# 69", "# 7", "# 70", "# 71", "# 72",
"# 8", "# 9"), class = "factor"), Label = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L), .Label = c("Sample 1", "Sample 2", "Sample 3"
), class = "factor"), Rescan = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 4L), .Label = c("Rescan 0", "Rescan 1", "Rescan 2", "Rescan 3"
), class = "factor"), Porosity = c(15.19, 15.72, 15.83, 15.57,
15.11, 14.15, 17.24, 17.53, 18.21, 18.8, 18.32, 19.59, 17.4,
17.98, 19.33, 18.94, 18.32, 18.17, 19.67, 20.55, 19.04, 18.18,
19.59, 18.19, 18.97, 18.64, 18.83, 17.24, 18.09, 17.74, 22.28,
22.29, 21.35, 21.96, 23.12, 22.9, 22.9, 21.06, 23.34, 22.82,
21.42, 20.48, 21.22, 22.75, 21.62, 22.24, 24.28, 20.48, 14.79,
13.69, 13.4, 14.46, 14.13, 13.55, 20.67, 19.81, 21.2, 20.77,
22.29, 21.94, 19.49, 19.29, 19.43, 20.31, 21.77, 19.39, 22.37,
21.46, 21.86, 21.58, 21.82, 23.02)), class = "data.frame", row.names = c(NA,
-72L))
Here's an example of your plot with the things you wanted to do. I suppose you can adjust the code to your needs from here:
ggplot(mydata, aes(Label,Porosity,fill=Label))+
geom_boxplot()+
# shift strips down
facet_wrap(~Rescan, strip.position = "bottom")+
# add exact points
geom_point(alpha=0.2)+
# add your preferred colors here (hexcode also works fine)
scale_fill_manual(values=c("red","blue","green"))+
# appearance
theme_classic()+
# legend options
theme(legend.title = element_blank(),
legend.text = element_text(color ="black", size = 8),
legend.position = "top", # "bottom" or "right"
legend.key.size = unit(1, "cm"),
legend.spacing.x = unit(5, "mm"),
legend.direction = "horizontal", # or "vertical"
legend.background = element_blank())+
# reverse legend keys
guides(fill = guide_legend(reverse = F))+ # set T to take action
# scaling y-axis
scale_y_continuous(expand = c(0, 0), limits = c(0,max(mydata$Porosity)),breaks = seq(0,max(mydata$Porosity,10)))+
# paramaters of the axes
theme(
axis.text = element_text(color = "black", angle = 0, hjust = 0.5, vjust = 0.5, size = 8),
# axis.title = element_blank(), # activate for no axis titles
axis.line = element_line(color = "black", size = 0.5), # use element_blank() for no lines
axis.ticks.length=unit(-0.1, "cm"), # negative values turn them inside
plot.background = element_blank(),
text = element_text(family = "Arial"),
strip.background = element_blank(),
strip.placement = "outside")+ # or "inside"
# name your axes
ylab("your y lab")+
xlab("your x lab")+
# add x-axis in each facet
annotate("segment", y=0,yend=0,x=0,xend=Inf)
Hopefulyy this answers some of the questions, but to be honest it's hard to pull out the individual questions you are asking. Perhaps put them in dot points?
overall look:
There are some pre-made themes you can try, I like:
theme_void() - removes most things
theme_classic() - makes the background nicer to look at
theme_minimal() - removes the outer border and makes the background prettier
You can add them as a layer with + theme_void() on the end of the plotting code.
For all other specific customisations look at ?theme() as there are a whole bunch of things you can do.
labels:
To remove the title 'label' add labs(legend = '') as a layer. You can also use this to modify the x, y, caption and title text.
If you want to remove the legend entirely, you can add show.legend = F inside your geom_jitter() layer. (e.g. geom_jitter(show.legend = F)) This will mean it shows on the graph, but nothing appears in the legend.
facets:
To change the colour of the background in facte_wrap use theme(strip.background = element_rect(color = 'desired_colour'))
To change the colour of the text in facte_wrap use theme(strip.text = element_text(color = 'desired_colour'))
axis lines:
add theme(axis.line = element_blank())
points:
Your points are off center due to the geom_jitter(). Try geom_point() instead.

Repeated measures ANOVA in R (split-plot design)

I am trying to fit a repeated measures ANOVA from an experiment with a split-split-plot design and several measures over time.
The experimental design is as follows:
I have 9 blocks in the field. Inside each block I have two subplots representing a split factor (named trt, "with" or "without" a specific treatment). Inside each subplot (with or without) I have another split factor with two quadrats (named sp, "species 1", "Species 2"). In each quadrat I have two seedlings of each species in the study (each seedling identified by a unique id). Finally I have monitored a given response variable for each seedling in the experiment along 4 repeated measures in time (weeks).
Therefore, I have 9 blocks, 2 treatments inside each block, 2 species inside each treatment and 2 seedlings from each species. This was monitored for 4 weeks.
I want to understand if time * trt * sp affect my response variable.
Considering my experimental design, is the following code a correct specification of the Error term for fitting an aov repeated measures split-split-plot model?
fit <- aov(response ~ time * sp * trt + Error(block/trt/sp/id), data = d3)
summary(fit)
Error: block
Df Sum Sq Mean Sq F value Pr(>F)
Residuals 1 11.29 11.29
Error: block:trt
Df Sum Sq Mean Sq
trt 1 0.114 0.114
Error: block:trt:sp
Df Sum Sq Mean Sq
sp 1 61.14 61.14
sp:trt 1 10.27 10.27
Error: block:trt:sp:id
Df Sum Sq Mean Sq F value Pr(>F)
sp 1 7.16 7.159 2.299 0.141
trt 1 1.07 1.072 0.344 0.562
sp:trt 1 2.18 2.181 0.701 0.410
Residuals 28 87.17 3.113
Error: Within
Df Sum Sq Mean Sq F value Pr(>F)
time 1 0.38 0.3781 0.237 0.627
time:sp 1 0.93 0.9317 0.585 0.446
time:trt 1 1.91 1.9117 1.201 0.276
time:sp:trt 1 2.73 2.7257 1.712 0.194
Residuals 104 165.59 1.5922
This code results in the following Warning message:
Warning message:
In aov(response ~ time * sp * trt + Error(block/trt/sp/id), data = d3) :
Error() model is singular
I really appreciate any help on this issue.
Thank you very much! I am happy to provide any further detail, if need it.
Data and plots
To illustrate the results
Data for reproducibility is presented below (dput print):
Edit 1 (updated datset):
structure(list(block = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L,
7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L,
6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L,
9L, 9L, 9L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L,
4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L,
8L, 8L, 8L, 9L, 9L, 9L, 9L), trt = c("without", "without", "with",
"with", "with", "with", "without", "without", "with", "with",
"without", "without", "with", "with", "without", "without", "with",
"with", "without", "without", "with", "with", "without", "without",
"without", "without", "with", "with", "with", "with", "without",
"without", "without", "without", "with", "with", "without", "without",
"with", "with", "with", "with", "without", "without", "with",
"with", "without", "without", "with", "with", "without", "without",
"with", "with", "without", "without", "with", "with", "without",
"without", "without", "without", "with", "with", "with", "with",
"without", "without", "without", "without", "with", "with", "without",
"without", "with", "with", "with", "with", "without", "without",
"with", "with", "without", "without", "with", "with", "without",
"without", "with", "with", "without", "without", "with", "with",
"without", "without", "without", "without", "with", "with", "with",
"with", "without", "without", "without", "without", "with", "with",
"without", "without", "with", "with", "with", "with", "without",
"without", "with", "with", "without", "without", "with", "with",
"without", "without", "with", "with", "without", "without", "with",
"with", "without", "without", "without", "without", "with", "with",
"with", "with", "without", "without", "without", "without", "with",
"with"), sp = c("species 1", "species 2", "species 2", "species 1",
"species 2", "species 1", "species 2", "species 1", "species 1",
"species 2", "species 2", "species 1", "species 2", "species 1",
"species 1", "species 2", "species 1", "species 2", "species 2",
"species 1", "species 2", "species 1", "species 1", "species 2",
"species 1", "species 2", "species 1", "species 2", "species 1",
"species 2", "species 2", "species 1", "species 2", "species 1",
"species 2", "species 1", "species 1", "species 2", "species 2",
"species 1", "species 2", "species 1", "species 2", "species 1",
"species 1", "species 2", "species 2", "species 1", "species 2",
"species 1", "species 1", "species 2", "species 1", "species 2",
"species 2", "species 1", "species 2", "species 1", "species 1",
"species 2", "species 1", "species 2", "species 1", "species 2",
"species 1", "species 2", "species 2", "species 1", "species 2",
"species 1", "species 2", "species 1", "species 1", "species 2",
"species 2", "species 1", "species 2", "species 1", "species 2",
"species 1", "species 1", "species 2", "species 2", "species 1",
"species 2", "species 1", "species 1", "species 2", "species 1",
"species 2", "species 2", "species 1", "species 2", "species 1",
"species 1", "species 2", "species 1", "species 2", "species 1",
"species 2", "species 1", "species 2", "species 2", "species 1",
"species 2", "species 1", "species 2", "species 1", "species 1",
"species 2", "species 2", "species 1", "species 2", "species 1",
"species 2", "species 1", "species 1", "species 2", "species 2",
"species 1", "species 2", "species 1", "species 1", "species 2",
"species 1", "species 2", "species 2", "species 1", "species 2",
"species 1", "species 1", "species 2", "species 1", "species 2",
"species 1", "species 2", "species 1", "species 2", "species 2",
"species 1", "species 2", "species 1", "species 2", "species 1"
), id = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L,
24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L,
15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L,
28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L,
32L, 33L, 34L, 35L, 36L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L,
23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L,
36L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8", "9",
"10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20",
"21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31",
"32", "33", "34", "35", "36"), class = "factor"), time = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L),
response = c(3.1, 5.7, 4.8, 2.9, 5, 3.9, 5.7, 4.2, 3.6, 4.4,
3.9, 2.9, 3.2, 7.5, 4.3, 4.8, 3, 4.9, 5.6, 3.9, 4.1, 4.2,
2.8, 3.9, 3.9, 7.5, 4, 4.3, 3.1, 7.1, 5.8, 2.5, 6.4, 4.5,
5, 3.6, 3.1, 5.2, 3.6, 2.9, 5.2, 4.6, 4.7, 4.3, 3.9, 4.4,
4.2, 3.6, 3.2, 2.7, 3.4, 5.6, 2.8, 6, 5.1, 3.7, 4.1, 3.4,
3, 4.1, 3.2, 6.7, 3.1, 3.8, 2.9, 6.9, 5.6, 2.1, 5.6, 4.8,
4.8, 2.7, 3, 5.5, 3.4, 3.1, 5.1, 5, 5, 4.8, 4, 4, 4, 2.6,
3, 3, 3.9, 6, 3, 7, 5, 3.5, 4, 4, 3, 4, 3, 6.5, 4, 5, 4,
8, 6, 2.2, 5.9, 4, 6, 3, 3, 5, 3.5, 3, 5, 4, 4, 2, 6.5, 4,
5, 2, 3, 3, 3, 5.5, 2, 5, 6, 2.5, 5, 2.5, 3, 5, 3, 5.5, 3,
2, 3, 6, 5, 5, 5, 3, 4, 15)), row.names = c(NA, -144L), class = "data.frame")

GLMER warning: variance-covariance matrix [...] is not positive definite or contains NA values

I sometimes find that my GLMMs from glmer, package lme4, show the following warning messages, when their summary is called:
Warning messages:
1: In vcov.merMod(object, use.hessian = use.hessian) :
variance-covariance matrix computed from finite-difference Hessian is
not positive definite or contains NA values: falling back to var-cov estimated from RX
2: In vcov.merMod(object, correlation = correlation, sigm = sig) :
variance-covariance matrix computed from finite-difference Hessian is
not positive definite or contains NA values: falling back to var-cov estimated from RX
Similar questions I found here on Stackoverflow refer to other functions, not glmer, and the LME4 Wiki does not elaborate on that either. In this question, the problem was solved before that kind of error messages were tackled, and here the discussion focuses on a particular model rather than on the meaning of the warning message.
So the question is: should I worry about that message, or is it OK because it is simply a warning and not an error, and as it says, it is "falling back to var-cov estimated from RX" (whatever RX is) anyway.
Interestingly, although the summary states that the model failed to converge, I do not get the usual convergence warnings in red.
Here comes a (minimal) dataset:
testdata=structure(list(Site = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), .Label = c("EO1", "EO2",
"EO3", "EO4", "EO5", "EO6"), class = "factor"), Treatment = structure(c(1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L), .Label = c("control",
"no ants", "no birds", "no birds no ants"), class = "factor"),
Tree = structure(c(2L, 3L, 4L, 16L, 12L, 13L, 14L, 15L, 5L,
6L, 7L, 8L, 1L, 9L, 10L, 11L, 28L, 29L, 30L, 31L, 17L, 25L,
26L, 27L, 18L, 19L, 20L, 32L, 21L, 22L, 23L, 24L, 33L, 41L,
42L, 43L, 37L, 38L, 39L, 40L, 44L, 45L, 46L, 47L, 34L, 35L,
36L, 48L, 49L, 57L, 58L, 59L, 50L, 51L, 52L, 64L, 53L, 54L,
55L, 56L, 60L, 61L, 62L, 63L, 66L, 67L, 68L, 80L, 69L, 70L,
71L, 72L, 76L, 77L, 78L, 79L, 65L, 73L, 74L, 75L, 82L, 83L,
84L, 96L, 92L, 93L, 94L, 95L, 85L, 86L, 87L, 88L, 81L, 89L,
90L, 91L), .Label = c("EO1 1", "EO1 10", "EO1 11", "EO1 12",
"EO1 13", "EO1 14", "EO1 15", "EO1 16", "EO1 2", "EO1 3",
"EO1 4", "EO1 5", "EO1 6", "EO1 7", "EO1 8", "EO1 9", "EO2 1",
"EO2 10", "EO2 11", "EO2 12", "EO2 13", "EO2 14", "EO2 15",
"EO2 16", "EO2 2", "EO2 3", "EO2 4", "EO2 5", "EO2 6", "EO2 7",
"EO2 8", "EO2 9", "EO3 1", "EO3 10", "EO3 11", "EO3 12",
"EO3 13", "EO3 14", "EO3 15", "EO3 16", "EO3 2", "EO3 3",
"EO3 4", "EO3 5", "EO3 6", "EO3 7", "EO3 8", "EO3 9", "EO4 1",
"EO4 10", "EO4 11", "EO4 12", "EO4 13", "EO4 14", "EO4 15",
"EO4 16", "EO4 2", "EO4 3", "EO4 4", "EO4 5", "EO4 6", "EO4 7",
"EO4 8", "EO4 9", "EO5 1", "EO5 10", "EO5 11", "EO5 12",
"EO5 13", "EO5 14", "EO5 15", "EO5 16", "EO5 2", "EO5 3",
"EO5 4", "EO5 5", "EO5 6", "EO5 7", "EO5 8", "EO5 9", "EO6 1",
"EO6 10", "EO6 11", "EO6 12", "EO6 13", "EO6 14", "EO6 15",
"EO6 16", "EO6 2", "EO6 3", "EO6 4", "EO6 5", "EO6 6", "EO6 7",
"EO6 8", "EO6 9"), class = "factor"), predators_trunk = c(7L,
10L, 9L, 15L, 18L, 11L, 5L, 7L, 15L, 12L, 6L, 12L, 7L, 13L,
24L, 17L, 3L, 0L, 0L, 2L, 4L, 3L, 0L, 6L, 2L, 3L, 5L, 1L,
5L, 12L, 18L, 15L, 7L, 0L, 5L, 1L, 17L, 7L, 13L, 19L, 7L,
3L, 5L, 10L, 11L, 7L, 13L, 7L, 7L, 0L, 4L, 2L, 5L, 7L, 4L,
7L, 8L, 7L, 9L, 20L, 13L, 2L, 12L, 7L, 0L, 7L, 2L, 2L, 2L,
4L, 17L, 2L, 3L, 1L, 1L, 1L, 11L, 1L, 1L, 8L, 8L, 18L, 5L,
6L, 6L, 5L, 6L, 5L, 9L, 2L, 8L, 13L, 13L, 5L, 3L, 5L), pH_H2O = c(4.145,
4.145, 4.145, 4.145, 4.1825, 4.1825, 4.1825, 4.1825, 4.1325,
4.1325, 4.1325, 4.1325, 4.14125, 4.14125, 4.14125, 4.14125,
4.265, 4.265, 4.265, 4.265, 4.21, 4.21, 4.21, 4.21, 4.18375,
4.18375, 4.18375, 4.18375, 4.09625, 4.09625, 4.09625, 4.09625,
4.1575, 4.1575, 4.1575, 4.1575, 4.1125, 4.1125, 4.1125, 4.1125,
4.20875, 4.20875, 4.20875, 4.20875, 3.97125, 3.97125, 3.97125,
3.97125, 4.025, 4.025, 4.025, 4.025, 4.005, 4.005, 4.005,
4.005, 4.04, 4.04, 4.04, 4.04, 4.03125, 4.03125, 4.03125,
4.03125, 4.4575, 4.4575, 4.4575, 4.4575, 4.52, 4.52, 4.52,
4.52, 4.505, 4.505, 4.505, 4.505, 4.34875, 4.34875, 4.34875,
4.34875, 4.305, 4.305, 4.305, 4.305, 4.32, 4.32, 4.32, 4.32,
4.35, 4.35, 4.35, 4.35, 4.445, 4.445, 4.445, 4.445), ant_mean_abundance = c(53.85714,
53.85714, 53.85714, 53.85714, 24.28571, 24.28571, 24.28571,
24.28571, 45.5, 45.5, 45.5, 45.5, 51.14286, 51.14286, 51.14286,
51.14286, 66.28571, 66.28571, 66.28571, 66.28571, 76.5, 76.5,
76.5, 76.5, 65.71429, 65.71429, 65.71429, 65.71429, 8.642857,
8.642857, 8.642857, 8.642857, 109.3571, 109.3571, 109.3571,
109.3571, 25.14286, 25.14286, 25.14286, 25.14286, 101.3571,
101.3571, 101.3571, 101.3571, 31.78571, 31.78571, 31.78571,
31.78571, 78.64286, 78.64286, 78.64286, 78.64286, 93.28571,
93.28571, 93.28571, 93.28571, 63.14286, 63.14286, 63.14286,
63.14286, 67.14286, 67.14286, 67.14286, 67.14286, 44.0625,
44.0625, 44.0625, 44.0625, 23.875, 23.875, 23.875, 23.875,
95.8125, 95.8125, 95.8125, 95.8125, 49.125, 49.125, 49.125,
49.125, 57, 57, 57, 57, 38.125, 38.125, 38.125, 38.125, 40.6875,
40.6875, 40.6875, 40.6875, 22, 22, 22, 22), bird_activity = c(153.24,
153.24, 153.24, 153.24, 153.24, 153.24, 153.24, 153.24, 0,
0, 0, 0, 0, 0, 0, 0, 240.96, 240.96, 240.96, 240.96, 240.96,
240.96, 240.96, 240.96, 0, 0, 0, 0, 0, 0, 0, 0, 154.54, 154.54,
154.54, 154.54, 154.54, 154.54, 154.54, 154.54, 0, 0, 0,
0, 0, 0, 0, 0, 107.68, 107.68, 107.68, 107.68, 107.68, 107.68,
107.68, 107.68, 0, 0, 0, 0, 0, 0, 0, 0, 172.42, 172.42, 172.42,
172.42, 172.42, 172.42, 172.42, 172.42, 0, 0, 0, 0, 0, 0,
0, 0, 113.8, 113.8, 113.8, 113.8, 113.8, 113.8, 113.8, 113.8,
0, 0, 0, 0, 0, 0, 0, 0)), .Names = c("Site", "Treatment",
"Tree", "predators_trunk", "pH_H2O", "ant_mean_abundance", "bird_activity"
), class = "data.frame", row.names = c(NA, -96L))
And here is the code leading to the warnings:
library(lme4)
summary(glmer.nb(predators_trunk ~ scale(ant_mean_abundance) + scale(bird_activity) + scale(pH_H2O) + (1 | Site/Treatment), testdata, na.action = na.fail))
summary(glmer(predators_trunk ~ scale(ant_mean_abundance) + scale(bird_activity) + scale(pH_H2O) + (1 | Site/Treatment), testdata, family = negative.binomial(theta = 4.06643400243645), na.action = na.fail))
Interestingly to me, the summary of the glmer.nb does not yield any warnings, but the call to glmer, using the theta that was estimated by glmer.nb, does give me the warnings. The latter is the model call that is generated by using dredge (MuMIn) on the corresponding glmer.nb full model.
This warning suggests that your standard error estimates might be less accurate. But as with all warnings, it's hard to know for sure and the best thing is to try to cross-check if you can.
In this case I saved your two fits, from glmer.nb and glmer, as g1 and g2. You can see that the estimates (point estimates, SEs, Z values ...) have changed a little bit, but not very much, so at the very least that should reassure you.
printCoefmat(coef(summary(g1)),digits=2)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.844 0.111 16.7 <2e-16 ***
scale(ant_mean_abundance) -0.347 0.077 -4.5 7e-06 ***
scale(bird_activity) -0.122 0.076 -1.6 0.107
scale(pH_H2O) -0.275 0.104 -2.6 0.008 **
> printCoefmat(coef(summary(g2)),digits=2)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.846 0.108 17.1 <2e-16 ***
scale(ant_mean_abundance) -0.347 0.077 -4.5 6e-06 ***
scale(bird_activity) -0.122 0.075 -1.6 0.102
scale(pH_H2O) -0.275 0.102 -2.7 0.007 **
I have a development version of lme4 on Github (the test_mods branch, hopefully integrated into the master branch soon: if you want to install it, you can use devtools::install_github("lme4/lme4",ref="test_mods")) which allows you to pick a more accurate (but slower) calculation for the standard errors: this gets us back to (nearly) the same standard errors as glmer.nb.
g3 <- update(g2, control=glmerControl(deriv.method="Richardson"))
printCoefmat(coef(summary(g3)),digits=2)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.846 0.111 16.7 <2e-16 ***
scale(ant_mean_abundance) -0.347 0.077 -4.5 6e-06 ***
scale(bird_activity) -0.122 0.076 -1.6 0.106
scale(pH_H2O) -0.275 0.104 -2.6 0.008 **
all.equal(coef(summary(g1))[,"Std. Error"],
coef(summary(g3))[,"Std. Error"])
[1] "Mean relative difference: 0.001597978"
The glmmTMB package (on Github) also gives almost the same results:
library(glmmTMB)
g5 <- glmmTMB(predators_trunk ~ scale(ant_mean_abundance) +
scale(bird_activity) + scale(pH_H2O) +
(1 | Site/Treatment), testdata,
family=nbinom2)
printCoefmat(coef(summary(g5))[["cond"]],digits=2)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.852 0.110 16.8 <2e-16 ***
scale(ant_mean_abundance) -0.348 0.077 -4.5 7e-06 ***
scale(bird_activity) -0.123 0.076 -1.6 0.106
scale(pH_H2O) -0.276 0.105 -2.6 0.008 **

Reverse fill order of stacked bars with faceting

I can't figure out how to get the fill order to reverse. Basically, I'm trying to get the guide and the fill to match an intrinsic order of the words from positive to negative:
The guide, and the fill order, from top to bottom should be:
"Far better than I expected", (Filled at very top, at top of legend)
"A little better than I expected",
"About what I expected",
"A little worse than I expected",
"Far worse than I expected" (Filled at very bottom, at bottom of legend)
You'll need sample data:
dat <- structure(list(Banner = structure(c(2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L), .Label = c("Other", "Some Company"
), class = "factor"), Response = structure(c(1L, 2L, 3L, 4L,
5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L,
1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L),
.Label = c(
"Far better than I expected",
"A little better than I expected",
"About what I expected",
"A little worse than I expected",
"Far worse than I expected"), class = "factor"), Frequency = c(1L,
6L, 9L, 0L, 0L, 29L, 71L, 149L, 32L, 6L, 1L, 7L, 16L, 1L, 0L,
38L, 90L, 211L, 24L, 6L, 0L, 0L, 8L, 1L, 1L, 6L, 13L, 109L, 35L,
9L), Proportion = c(6, 38, 56, 0, 0, 10, 25, 52, 11, 2, 4, 28,
64, 4, 0, 10, 24, 57, 7, 2, 0, 0, 80, 10, 10, 3, 8, 63, 20, 5
), Phase = c("Phase 1", "Phase 1", "Phase 1", "Phase 1", "Phase 1",
"Phase 1", "Phase 1", "Phase 1", "Phase 1", "Phase 1", "Phase 2",
"Phase 2", "Phase 2", "Phase 2", "Phase 2", "Phase 2", "Phase 2",
"Phase 2", "Phase 2", "Phase 2", "Phase 3", "Phase 3", "Phase 3",
"Phase 3", "Phase 3", "Phase 3", "Phase 3", "Phase 3", "Phase 3",
"Phase 3")), .Names = c("Banner", "Response", "Frequency", "Proportion",
"Phase"),
row.names = c(NA, 30L),
sig = character(0),
comment = "Overall, my experience was... by Company", q1 = "", q2 = "",
class = c("survcsub", "data.frame"))
Position labels
dat <- ddply(dat, .(Banner, Phase), function(x) {
x$Pos <- (cumsum(x$Proportion) - 0.5*x$Proportion)
x
})
Plot
ggplot(dat, aes(Banner, Proportion/100, fill=Response,
label=ifelse(Proportion > 5, percent(Proportion/100), ""))) +
geom_bar(position="fill", stat="identity") +
geom_text(aes(Banner, Pos/100)) +
facet_grid(~Phase) +
scale_y_continuous(labels=percent) +
labs(x="\nCompany", y="\nProportion")
What I've tried:
dat$Response <- factor(dat$Response, levels=rev(dat$Response))
# No dice, reverses the colour of the scale but not the position of the fill
To change the order of values in stacked barplot you should use argument order= in aes() of geom_bar() and set name of column necessary for ordering (in this case Response). With function desc() you can set reverse order of bars.
Using your original data frame (without last line of factor()).
ggplot(dat, aes(Banner, Proportion/100, fill=Response,
label=ifelse(Proportion > 5, percent(Proportion/100), ""))) +
geom_bar(position="fill", stat="identity",aes(order=desc(Response))) +
geom_text(aes(Banner, Pos/100)) +
facet_grid(~Phase) +
scale_y_continuous(labels=percent) +
labs(x="\nCompany", y="\nProportion")
To get correct placement of labels, changed calculation of positions:
dat <- ddply(dat, .(Banner, Phase), function(x) {
x$Pos <- (100-cumsum(x$Proportion) + 0.5*x$Proportion)
x
})

Resources