LPsolve Hybrid constraints - r

I use the following R code to optimize Soccer lineups for my fantasy sports league. It has been working great up until now, but a new wrinkle has been added into the list of constraints that I would like to resolve.
A lineup consists of 8 players. 1GK, 2D, 2M, 2F, & 1 Util.
When creating the model Matrix, I now have to account for hybrid player positions such as M/F or D/M
In R what is the correct way to add a 1 in the column for M and a 1 in the column for F if a players position is M/F? Is this the correct approach to resolve this or should I be looking at other ideas.
Working Solver code with GK D M F positions accounted for but not D/M or M/F
df <- read.csv("players.csv",encoding = "UTF-8")
mm <- cbind(model.matrix(as.formula("FP~Pos+0"), df))
mm <- cbind(mm, mm, 1, df$Salary, df$Salary, df$FP)
colnames(mm) <- c("D", "F", "GK", "M", "D", "F", "GK", "M", "tot", "salary", "minSal", "FP")
mm <- t(mm)
obj <- df$FP
dir <- c("<=", "<=", "<=", "<=", ">=", ">=", ">=", ">=", "==", "<=", ">=", "<=")
x <- 20000
vals <- c()
ptm <- proc.time()
for(i in 1:5){
rhs <- c(3, 3, 1, 3, 2, 2, 1, 2, 8, 50000, 49500, x)
lp <- lp(direction = 'max',
objective.in = obj,
all.bin = T,
const.rhs = rhs,
const.dir = dir,
const.mat = mm)
vals <- c(vals, lp$objval)
x <- lp$objval - 0.00001
df$selected <- lp$solution
lineup <- df[df$selected == 1, ]
lineup = subset(lineup, select = -c(selected))
lineup <- lineup %>%
arrange(Pos)
print("---- Start ----")
print(i)
print(lineup)
print(sum(lineup$FP))
print(mean(lineup$own, na.rm = TRUE))
print(sum(lineup$Salary))
print(sum(lineup$S))
print("---- END ----")
}
proc.time() - ptm
Here is a sample pool of approx 100 players with a few hybrid players included.
structure(list(Name = structure(c(104L, 105L, 92L, 16L, 84L,
53L, 85L, 37L, 21L, 34L, 100L, 101L, 83L, 31L, 14L, 35L, 98L,
59L, 60L, 5L, 6L, 78L, 57L, 89L, 26L, 17L, 74L, 63L, 33L, 71L,
75L, 41L, 9L, 39L, 12L, 1L, 29L, 7L, 2L, 68L, 73L, 90L, 46L,
72L, 79L, 50L, 88L, 20L, 97L, 64L, 67L, 3L, 94L, 4L, 22L, 103L,
52L, 47L, 30L, 58L, 10L, 44L, 28L, 38L, 23L, 15L, 49L, 69L, 81L,
43L, 99L, 93L, 32L, 56L, 82L, 91L, 62L, 36L, 70L, 48L, 11L, 77L,
27L, 51L, 25L, 24L, 65L, 96L, 42L, 18L, 102L, 86L, 76L, 87L,
45L, 61L, 40L, 95L, 8L, 55L, 13L, 66L, 80L, 19L, 54L), .Label = c(" Bojan",
" Oscar", " Willian", "Aaron Ramsey", "Abel Hernandez", "Adam Smith",
"Adama Diomande", "Adlene Guedioura", "Adnan Januzaj", "Ahmed Elmohamady",
"Alex Iwobi", "Alex Oxlade-Chamberlain", "Alexis Sanchez", "Andre Gray",
"Andrew Robertson", "Andros Townsend", "Anthony Martial", "Antonio Valencia",
"Ben Mee", "Branislav Ivanovic", "Calum Chambers", "Cedric Soares",
"Cesc Fabregas", "Charlie Daniels", "Christian Fuchs", "Curtis Davies",
"Daley Blind", "Daniel Drinkwater", "David de Gea", "Demarai Gray",
"Diego Costa", "Donald Love", "Dusan Tadic", "Eden Hazard", "Eldin Jakupovic",
"Erik Pieters", "Etienne Capoue", "Fernando Llorente", "Gareth Barry",
"Glenn Whelan", "Gylfi Sigurdsson", "Hector Bellerin", "Idrissa Gueye",
"Jack Cork", "Jack Rodwell", "Jason Puncheon", "Jefferson Montero",
"Jeremain Lens", "Jeremy Pied", "Jermain Defoe", "Joe Allen",
"Joel Ward", "John Obi Mikel", "Jordi Amat", "Jordon Ibe", "Joshua King",
"Juan Mata", "Kasper Schmeichel", "Kevin Mirallas", "Kyle Naughton",
"Laurent Koscielny", "Leighton Baines", "Leroy Fer", "Lukasz Fabianski",
"Maarten Stekelenburg", "Marc Albrighton", "Mason Holgate", "Matt Targett",
"Matthew Lowton", "Max Gradel", "Michy Batshuayi", "Modou Barrow",
"Nacho Monreal", "Nathan Redmond", "Nordin Amrabat", "Pape Souare",
"Papy Djilobodji", "Patrick van Aanholt", "Paul Pogba", "Phil Bardsley",
"Pierre-Emile HĂžjbjerg", "Ramiro Funes Mori", "Riyad Mahrez",
"Robert Snodgrass", "Ross Barkley", "Ryan Fraser", "Sam Clucas",
"Sam Vokes", "Santiago Cazorla", "Serge Gnabry", "Shane Long",
"Shaun Maloney", "Simon Francis", "Stephen Kingsley", "Stephen Ward",
"Steven Davis", "Steven Defour", "Theo Walcott", "Thibaut Courtois",
"Tom Heaton", "Wayne Rooney", "Wayne Routledge", "Wilfried Zaha",
"Xherdan Shaqiri", "Zlatan Ibrahimovic"), class = "factor"),
Salary = c(7000L, 9600L, 5700L, 7100L, 6500L, 3200L, 7800L,
4200L, 3300L, 8600L, 4200L, 7900L, 9900L, 8700L, 7700L, 4300L,
6700L, 5600L, 3700L, 6600L, 4700L, 5700L, 6600L, 7200L, 3500L,
7300L, 5900L, 4300L, 7700L, 7100L, 4000L, 9100L, 7400L, 4000L,
5800L, 5700L, 5600L, 6300L, 6800L, 4500L, 5100L, 3400L, 5700L,
5100L, 8000L, 7800L, 7000L, 5100L, 4900L, 4500L, 3300L, 8300L,
3200L, 6600L, 4900L, 6300L, 4400L, 4200L, 4800L, 5200L, 5200L,
4500L, 4300L, 7100L, 6500L, 4100L, 3000L, 3800L, 4700L, 4600L,
5800L, 4600L, 4200L, 6100L, 3500L, 6800L, 5800L, 4800L, 7300L,
5000L, 5000L, 3300L, 4200L, 3900L, 6100L, 5500L, 5400L, 4700L,
4700L, 4600L, 4400L, 3400L, 4300L, 4900L, 4600L, 4000L, 3500L,
3600L, 3300L, 4800L, 9300L, 7900L, 3700L, 3400L, 2800L),
Position = structure(c(5L, 3L, 2L, 5L, 5L, 5L, 5L, 5L, 1L,
6L, 4L, 3L, 6L, 3L, 3L, 4L, 6L, 6L, 1L, 3L, 1L, 1L, 5L, 5L,
1L, 6L, 6L, 5L, 5L, 3L, 6L, 5L, 5L, 5L, 6L, 6L, 4L, 3L, 5L,
1L, 2L, 5L, 5L, 6L, 5L, 3L, 3L, 2L, 5L, 4L, 1L, 5L, 1L, 5L,
1L, 6L, 1L, 6L, 6L, 4L, 1L, 5L, 5L, 3L, 5L, 1L, 1L, 1L, 5L,
5L, 4L, 1L, 1L, 3L, 1L, 3L, 2L, 1L, 6L, 3L, 6L, 1L, 1L, 5L,
1L, 2L, 4L, 5L, 1L, 1L, 5L, 5L, 1L, 5L, 5L, 1L, 5L, 1L, 5L,
6L, 6L, 5L, 1L, 1L, 1L), .Label = c("D", "D/M", "F", "GK",
"M", "M/F"), class = "factor"), FP = c(23.5, 21.75, 21, 19.75,
17.5, 17.333, 16.625, 16.5, 16.5, 16.25, 16, 15.25, 14.875,
14.25, 13.75, 13.5, 13.375, 13.25, 12.875, 12.75, 12.75,
12.5, 12.375, 12, 11.75, 11.625, 11.375, 11, 10.875, 10.625,
10.5, 10.375, 10.125, 10, 9.625, 9.625, 9.5, 9.25, 9.125,
9.125, 9, 9, 8.875, 8.875, 8.75, 8.75, 8.5, 8.5, 8.5, 8.5,
8.5, 8.25, 8.25, 8, 8, 7.875, 7.875, 7.875, 7.75, 7.5, 7.5,
7.5, 7.5, 7.25, 7.25, 7.125, 7, 6.875, 6.625, 6.625, 6.5,
6.5, 6.5, 6.25, 6.25, 6.125, 6.125, 6.125, 6, 6, 6, 6, 5.875,
5.875, 5.75, 5.75, 5.75, 5.75, 5.75, 5.75, 5.75, 5.75, 5.625,
5.5, 5.5, 5.5, 5.5, 5.375, 5.375, 5.25, 5.125, 5, 5, 5, 5
), teamAbbrev = structure(c(11L, 9L, 7L, 5L, 7L, 4L, 6L,
14L, 1L, 4L, 3L, 9L, 8L, 4L, 3L, 7L, 1L, 6L, 13L, 7L, 2L,
12L, 9L, 1L, 7L, 9L, 10L, 13L, 10L, 4L, 14L, 13L, 12L, 6L,
1L, 11L, 9L, 7L, 4L, 10L, 1L, 1L, 5L, 13L, 9L, 12L, 3L, 4L,
3L, 13L, 6L, 4L, 13L, 1L, 10L, 5L, 5L, 13L, 8L, 8L, 7L, 13L,
8L, 13L, 4L, 7L, 10L, 3L, 10L, 6L, 4L, 2L, 12L, 2L, 6L, 10L,
6L, 11L, 2L, 12L, 1L, 12L, 9L, 11L, 8L, 2L, 6L, 10L, 1L,
9L, 13L, 2L, 5L, 7L, 12L, 1L, 11L, 3L, 14L, 2L, 1L, 8L, 11L,
3L, 13L), .Label = c("ARS", "BOU", "BUR", "CHE", "CRY", "EVE",
"HUL", "LEI", "MU", "SOU", "STK", "SUN", "SWA", "WAT"), class = "factor")), .Names = c("Name",
"Salary", "Position", "FP", "teamAbbrev"), class = "data.frame", row.names = c(NA,
-105L))

By using an empty matrix and filling the rows with the correct values for each position I was able to get this to work.
#### SOLVER ##### ----
mm <- matrix(0, nrow = 8, ncol = nrow(df))
# Goal Keeper
j<-1
i<-1
for (i in 1:nrow(df)){
if (df$Pos[i]=="GK")
mm[j,i]<-1
}
# Defender
j<-2
i<-1
for (i in 1:nrow(df)){
if (df$Pos[i]=="D")
mm[j,i]<-1
}
# Midfielder
j<-3
i<-1
for (i in 1:nrow(df)){
if (df$Pos[i]=="M" ||
df$Pos[i]=="M/F")
mm[j,i]<-1
}
# Forward
j<-4
i<-1
for (i in 1:nrow(df)){
if (df$Pos[i]=="F" ||
df$Pos[i]=="M/F")
mm[j,i]<-1
}
# Utility
j<-5
i<-1
for (i in 1:nrow(df)){
if (!df$Pos[i]=="GK")
mm[j,i]<-1
}
# Salary
mm[6, ] <- df$Salary
mm[7, ] <- df$FP
mm[8, ] <- 1
# rbind existing matrix to itself to set minimum constraints
mm <- rbind(mm, mm[1:5,])
i<-1
objective.in <- df$FP
const.mat <- mm
const.dir <- c("<=", "<=", "<=", "<=", "<=", "<=", "<=", "==",
">=", ">=", ">=", ">=", ">=")
x <- 20000
vals <- c()
for(i in 1:5){
const.rhs <- c(1, 4, 4, 4, 7, 50000, x, 8, # max for each contraint
1, 2, 2, 2, 7) # min for each constraint
sol <- lp(direction = "max", objective.in, # maximize objective function
const.mat, const.dir, const.rhs, # constraints
all.bin = TRUE)
vals <- c(vals, sol$objval)
x <- sol$objval - 0.00001
inds <- which(sol$solution == 1)
sum(df$salary[inds])
solution<-df[inds, ]
solution <- solution[,-c(8)]
solution <- solution %>%
arrange(Pos)
print("---- Start ----")
print(i)
print(solution)
print(sum(solution$FP))
print(sum(solution$Salary))
print(sum(solution$S))
print("---- END ----")
}

Related

Why is my dplyr code to create multiple variables using mutate and zoo incredibly slow?

I am using dplyr to create multiple variables in my data frame using mutate. At the same time, I am using zoo to calculate a rolling average. As an example, I have my variables set up like so:
vars <- "total_apples", "total_oranges", "total_bananas"
My data has over 100 variables and approx. 40,000 lines, but the above is just an example.
I am using this code below:
library(dplyr)
library(zoo)
data <- data %>%
group_by(fruit) %>%
mutate(across(c(all_of(vars)), list(avge_last2 = ~ zoo::rollapplyr(., 2, FUN = mean, partial = TRUE))))
Just for the above to calculate the average over the last 2 records, it takes over 5 mins:
> end.time <- Sys.time()
> time.taken <- end.time - start.time
> time.taken
Time difference of 5.925337 mins
It takes even longer if I want to average over more records, say n= 10 like so:
library(dplyr)
library(zoo)
data <- data %>%
group_by(fruit) %>%
mutate(across(c(all_of(vars)), list(avge_last2 = ~ zoo::rollapplyr(., 10, FUN = mean, partial = TRUE))))
Is there an issue with my code or is it something else?
dput(head(data,20)) provides the following:
structure(list(match_id = c(14581L, 14581L, 14581L, 14581L, 14581L,
14581L, 14581L, 14581L, 14581L, 14581L, 14581L, 14581L, 14581L,
14581L, 14581L, 14581L, 14581L, 14581L, 14581L, 14581L), match_date = structure(c(16527,
16527, 16527, 16527, 16527, 16527, 16527, 16527, 16527, 16527,
16527, 16527, 16527, 16527, 16527, 16527, 16527, 16527, 16527,
16527), class = "Date"), season = c(2015, 2015, 2015, 2015, 2015,
2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015,
2015, 2015, 2015, 2015), match_round = c(1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), home_team = c(3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3), away_team = c(14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14), venue = c(11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11), venue_name = c("MCG",
"MCG", "MCG", "MCG", "MCG", "MCG", "MCG", "MCG", "MCG", "MCG",
"MCG", "MCG", "MCG", "MCG", "MCG", "MCG", "MCG", "MCG", "MCG",
"MCG"), opponent = c(14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14), player_id = c(11186L,
11215L, 11285L, 11330L, 11380L, 11388L, 11407L, 11472L, 11473L,
11490L, 11553L, 11561L, 11573L, 11582L, 11598L, 11601L, 11616L,
11643L, 11671L, 11737L), player_first_name = c("Chris", "Chris",
"Kade", "Troy", "Andrew", "Brett", "Cameron", "Marc", "Dale",
"Ivan", "Bryce", "Shane", "Bachar", "Jack", "Andrejs", "Shaun",
"Michael", "Lachie", "Trent", "Alex"), player_last_name = c("Judd",
"Newman", "Simpson", "Chaplin", "Carrazzo", "Deledio", "Wood",
"Murphy", "Thomas", "Maric", "Gibbs", "Edwards", "Houli", "Riewoldt",
"Everitt", "Grigg", "Jamison", "Henderson", "Cotchin", "Rance"
), player_team = c("Carlton", "Richmond", "Carlton", "Richmond",
"Carlton", "Richmond", "Carlton", "Carlton", "Carlton", "Richmond",
"Carlton", "Richmond", "Richmond", "Richmond", "Carlton", "Richmond",
"Carlton", "Carlton", "Richmond", "Richmond"), player_team_numeric = c(3,
14, 3, 14, 3, 14, 3, 3, 3, 14, 3, 14, 14, 14, 3, 14, 3, 3, 14,
14), guernsey_number = c(5L, 1L, 6L, 25L, 44L, 3L, 36L, 3L, 39L,
20L, 4L, 10L, 14L, 8L, 33L, 6L, 40L, 23L, 9L, 18L), player_position = c(3,
14, 14, 1, 17, 13, 16, 12, 20, 16, 14, 5, 10, 8, 13, 14, 6, 7,
3, 2), disposals = c(21L, 7L, 21L, 13L, 18L, 18L, 11L, 21L, 1L,
13L, 26L, 21L, 21L, 17L, 18L, 17L, 8L, 10L, 17L, 18L), kicks = c(16L,
6L, 13L, 9L, 9L, 9L, 8L, 9L, 1L, 8L, 15L, 9L, 15L, 13L, 14L,
9L, 4L, 9L, 6L, 9L), marks = c(5L, 1L, 8L, 1L, 2L, 3L, 2L, 2L,
0L, 4L, 4L, 1L, 5L, 8L, 8L, 4L, 2L, 6L, 3L, 4L), handballs = c(5L,
1L, 8L, 4L, 9L, 9L, 3L, 12L, 0L, 5L, 11L, 12L, 6L, 4L, 4L, 8L,
4L, 1L, 11L, 9L), tackles = c(6L, 1L, 2L, 2L, 2L, 0L, 1L, 2L,
0L, 4L, 4L, 3L, 1L, 0L, 2L, 2L, 1L, 2L, 1L, 0L), clearances = c(6L,
0L, 0L, 0L, 6L, 1L, 6L, 4L, 0L, 4L, 4L, 7L, 0L, 0L, 1L, 3L, 0L,
0L, 1L, 1L), brownlow_votes = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), effective_disposals = c(15L,
6L, 16L, 11L, 16L, 13L, 6L, 14L, 1L, 11L, 13L, 16L, 16L, 10L,
14L, 12L, 5L, 6L, 9L, 17L), disposal_efficiency_percentage = c(71L,
86L, 76L, 85L, 89L, 72L, 55L, 67L, 100L, 85L, 50L, 76L, 76L,
59L, 78L, 71L, 63L, 60L, 53L, 94L), contested_possessions = c(11L,
3L, 5L, 7L, 9L, 6L, 7L, 9L, 1L, 9L, 9L, 15L, 1L, 7L, 3L, 4L,
3L, 4L, 5L, 5L), uncontested_possessions = c(10L, 4L, 17L, 6L,
10L, 12L, 4L, 12L, 0L, 4L, 17L, 7L, 18L, 9L, 14L, 11L, 5L, 7L,
12L, 14L), time_on_ground_percentage = c(79L, 65L, 73L, 100L,
76L, 69L, 89L, 81L, 1L, 88L, 73L, 83L, 85L, 98L, 95L, 81L, 96L,
91L, 86L, 96L), afl_fantasy_score = c(93L, 26L, 97L, 42L, 54L,
53L, 61L, 67L, 4L, 91L, 96L, 67L, 78L, 89L, 80L, 80L, 30L, 54L,
54L, 58L), contested_marks = c(0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L,
0L, 2L, 1L, 0L, 1L, 3L, 0L, 0L, 0L, 1L, 0L, 0L), metres_gained = c(474L,
231L, 269L, 165L, 128L, 181L, 151L, 227L, -7L, 160L, 466L, 332L,
709L, 268L, 464L, 283L, 99L, 257L, 203L, 288L), turnovers = c(5L,
3L, 4L, 2L, 3L, 2L, 2L, 4L, 0L, 1L, 6L, 2L, 5L, 8L, 5L, 2L, 2L,
3L, 3L, 1L), effective_kicks = c(11L, 5L, 9L, 7L, 7L, 4L, 3L,
5L, 1L, 6L, 5L, 4L, 11L, 7L, 12L, 5L, 2L, 6L, 1L, 9L), ground_ball_gets = c(8L,
2L, 4L, 5L, 7L, 4L, 4L, 8L, 0L, 3L, 6L, 9L, 0L, 4L, 3L, 2L, 2L,
2L, 5L, 3L), cum_rec = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20), rank_match_kicks = c(2, 34,
10.5, 20.5, 20.5, 20.5, 28, 20.5, 43, 28, 4.5, 20.5, 4.5, 10.5,
8, 20.5, 39.5, 20.5, 34, 20.5), rank_match_marks = c(14, 39,
5, 39, 33, 27.5, 33, 33, 43.5, 20.5, 20.5, 39, 14, 5, 5, 20.5,
33, 10, 27.5, 20.5)), row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame"))
Update:
Consider the example below using the functions suggested in the answer below:
match_id <- c("match_1", "match_1","match_1","match_2","match_2","match_2","match_3","match_3","match_3")
player_id <- c("player_1", "player_2", "player_3", "player_1", "player_2", "player_3", "player_1", "player_2", "player_3")
turnovers <- c(5,10,15,1,2,3,5,7,9)
data <- data.frame(match_id, player_id, turnovers)
f <- function(dt, window, vars, byvars, partial=F) {
res = dt[, lapply(.SD, frollmean, n=window), by=byvars, .SDcols=vars]
if(partial) {
res = rbind(
partials(dt,window-1,vars, byvars),
res[window:.N, .SD, by=byvars]
)
}
return(res)
}
partials <- function(dt,w,vars,byvars) {
rbindlist(lapply(1:w, function(p) {
dt[1:p, lapply(.SD, function(v) Reduce(`+`, shift(v,0:(p-1)))/p),
.SDcols = vars, by=byvars][p:.N, .SD, by=byvars]
}))
}
# set the data as data.table
setDT(data)
# define vars of interest
vars = c("turnovers")
# ensure the order is correct for rolling mean
setorder(data, player_id, match_id )
# set the window size
n=3
# get the rolling mean, by grouping variable, for each var in `vars`, and add the partials
f(data, window=n, vars=vars, byvars="player_id", partial=T)
This returns the following:
player_id turnovers
1: player_1 5.000000
2: player_1 3.000000
3: player_1 3.666667
4: player_2 NA
5: player_2 NA
6: player_2 6.333333
7: player_3 NA
8: player_3 NA
9: player_3 9.000000
What am I doing wrong?
You could try this:
library(data.table)
setDT(data)
data[,paste0(vars, "_avge_last2_"):= lapply(.SD, frollmean, n=2),
.SDcols=vars,
by=.(fruit)
]
Update
Here is a more generalized solution for handling the NA(s) at the top of each window (i.e. the partial windows)
First, start with a function that can take a data table (dt), a window size (window), a set of variables (vars), and a set of grouping variables (byvars), and an optional logical indicator partial
f <- function(dt, window, vars, byvars, partial=F) {
res = dt[, lapply(.SD, frollmean, n=window), by=byvars, .SDcols=vars]
if(partial) {
res = rbind(
partials(dt,window-1,vars, byvars),
res[,.SD[window:.N], by=byvars]
)
}
return(res)
}
Add, the optional function partials()
partials <- function(dt,w,vars,byvars) {
rbindlist(lapply(1:w, function(p) {
dt[, lapply(.SD[1:p], function(v) Reduce(`+`, shift(v,0:(p-1)))/p),
.SDcols = vars, by=byvars][, .SD[p:.N], by=byvars]
}))
}
Apply the function
# set the data as data.table
setDT(data)
# define vars of interest
vars = c("turnovers", "effective_kicks")
# ensure the order is correct for rolling mean
setorder(data, match_id, player_id)
# set the window size
n=3
# get the rolling mean, by grouping variable, for each var in `vars`, and add the partials
f(data, window=n, vars=vars, byvars="player_id", partial=T)
There are several problems:
the code in the question does not work with the data provided but rather it gives errors. There is no fruit column in the data and the vars columns don't exist either. To make it run we group by match_id and define vars to include some existing columns.
it is better not to overwrite data but rather use a different name for the output to make debugging easier.
using across causes rollapplyr to be applied separately for each column which is inefficient given that rollapply can process multiple columns at once.
Using columns that actually exist in the data provided and assuming we want to use rollapplyr on the columns named in vars try this which only runs rollapplyr once per group and seems slightly faster.
Also fill=NA is used in place of partial=TRUE it will use a somewhat faster algorithm; however, in that case the first row in each group will have NA's as that is what fill=NA means and also that algorithm won't be used if there are already NA's in the columns to be averaged.
library(dplyr)
library(zoo)
vars <- c("home_team", "away_team")
data_out <- data %>%
group_by(match_id) %>%
data.frame(avg = rollapplyr(.[vars], 2, mean, partial = TRUE)) %>%
ungroup
I find that processing grouped dataframes in dplyer can really slows things down, I'm not sure if it's the best workaround but when I finish grouping I pipe in
%>% as.data.frame()
to get rid of the grouping information, and then do my calculations afterward. It can save a lot of time. If you've previously grouped a large dataset give that a try.

Cowplot Package: Aligning text labels in the top lefthand corner of plot space using plot_grid() in R

Overview:
I have produced a series of maps (see below) using the R-code below, and I used plot_grid() in the Cowplot package to arrange the plots using the two data frames below called "QuercusRobur 1" and "QuercusRobur2".
Problem:
The plots look really good; however, the text labels are not well aligned. A few labels overlay the top of y-axis called Latitude, and two of the text labels called "A: Urbanisation Index" and "B: Urbanisation Index" are not positioned above their plots, and they also overlay the main titles called "Observation Period 1" and "Observation Period 2".
Does anyone know how to neatly align the plot labels so they are positioned in the top lefthand corner of each plot but also prevent them from overlaying the y-axis or parts of the map (see desired output below)?
If anyone can help, I would be deeply appreciative
R-Code
##Import Packages
library(ggplot2)
library(maps)
library(mapdata)
library(tidyverse)
##Get a map of the UK from maps:
UK <- map_data(map = "world", region = "UK")
head(UK)
dim(UK)
##Produce point data
MapUK<-ggplot(data = UK, aes(x = long, y = lat, group = group)) +
geom_polygon() +
coord_map()
##head
head(QuercusRobur1)
head(QuercusRobur2)
##Remove weird data points
QuercusRobur1<-QuercusRobur1%>%filter(Longitude<=3)
##Observation 1
p1 <- ggplot(QuercusRobur1,
aes(x = Longitude, y = Latitude)) +
geom_polygon(data = UK,
aes(x = long, y = lat, group = group),
inherit.aes = FALSE) +
geom_point() +
coord_map(xlim = c(-10, 5)) + #limits added as there are some points really far away
theme_classic()
Urban1<-p1 +
aes(color = Urbanisation_index) +
scale_color_discrete(name = "Urbanisation Index",
labels = c("Urban", "Suburban", "Village", "Rural"))
Stand1<-p1 +
aes(color = Stand_density_index) +
scale_color_discrete(name = "Stand Density Index",
labels = c("Standing alone",
"Within a few trees or close proximity to other trees",
"Within a stand of 10-30 trees",
"Large or woodland"))
Phenology1<-p1 +
aes(color = factor(Phenological_Index)) +
scale_color_discrete(name = "Phenological Index",
labels = c("No indication of autumn timing",
"First autumn tinting",
"Partial autumn tinting (>25% of leaves)",
"Advanced autumn tinting (>75% of leaves)"))
##Observation 2
p2 <- ggplot(QuercusRobur2,
aes(x = Longitude, y = Latitude)) +
geom_polygon(data = UK,
aes(x = long, y = lat, group = group),
inherit.aes = FALSE) +
geom_point() +
coord_map(xlim = c(-10, 5)) + #limits added as there are some points really far away
theme_classic()
Urban2<-p2 +
aes(color = Urbanisation_index) +
scale_color_discrete(name = "Urbanisation Index",
labels = c("Urban", "Suburban", "Village", "Rural"))
Stand2<-p2 +
aes(color = Stand_density_.index) +
scale_color_discrete(name = "Stand Density Index",
labels = c("Standing alone",
"Within a few trees or close proximity to other trees",
"Within a stand of 10-30 trees",
"Large or woodland"))
Phenology2<-p2 +
aes(color = factor(Phenological_Index)) +
scale_color_discrete(name = "Phenological Index",
labels = c("No indication of autumn timing",
"First autumn tinting",
"Partial autumn tinting (>25% of leaves)",
"Advanced autumn tinting (>75% of leaves)"))
##Arrange the individual plots into one main plot
plot_grid(Urban1 + ggtitle("Observational Period 1"),
Urban2 + ggtitle("Observational Period 2"),
Stand1,
Stand2,
Phenology1,
Phenology2,
labels=c("A: Urbanisation Index", "B: Urbanisation Index",
"C: Stand Density Index","D: Stand Density Index",
"E: Phenological Index","F: Phenological Index"),
align = "v",
label_fontface="bold",
label_fontfamily="Times New Roman",
label_size = 8,
rel_widths = c(1, 1.3),
ncol = 2,
nrow = 3,
hjust = 0,
label_x = 0.01)
Plot produced from R-code
Desired Output
Data frame - QuercusRobur1
structure(list(Obs_.no = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 19L,
20L, 21L, 22L, 23L, 24L, 25L, 28L, 29L, 30L, 31L, 32L, 33L, 34L,
35L, 36L, 37L, 38L, 39L, 44L, 45L, 46L, 47L, 57L, 58L, 59L, 60L,
61L, 62L, 63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 74L,
75L, 81L, 82L, 83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L, 91L, 93L,
102L, 103L, 104L, 112L, 113L, 114L, 115L, 116L, 117L, 118L, 119L,
120L, 121L, 122L, 123L, 124L, 125L, 126L, 127L, 128L, 129L, 130L,
131L, 135L, 136L, 137L, 138L, 143L, 144L, 145L, 146L, 147L, 148L,
149L, 150L, 151L, 152L, 153L, 154L, 155L, 158L, 159L, 160L, 161L,
162L, 163L, 164L, 165L, 169L, 170L, 171L, 172L, 177L, 178L, 179L,
180L, 181L, 182L, 183L, 184L, 185L, 186L, 187L, 188L, 189L, 190L,
191L, 192L, 193L, 194L, 195L, 196L, 200L), Date_observed = structure(c(4L,
15L, 6L, 6L, 6L, 6L, 2L, 2L, 8L, 8L, 8L, 8L, 8L, 8L, 6L, 6L,
6L, 6L, 6L, 6L, 11L, 11L, 11L, 11L, 12L, 7L, 7L, 9L, 9L, 9L,
9L, 5L, 5L, 5L, 5L, 14L, 14L, 14L, 14L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 6L, 6L, 5L, 5L, 9L, 9L, 9L, 9L, 3L, 3L, 3L, 3L, 4L, 4L,
1L, 1L, 11L, 6L, 6L, 6L, 6L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 3L, 3L, 3L, 3L, 11L,
11L, 11L, 4L, 4L, 4L, 4L, 8L, 8L, 10L, 10L, 10L, 10L, 9L, 9L,
9L, 9L, 3L, 3L, 3L, 3L, 9L, 9L, 9L, 9L, 2L, 2L, 2L, 2L, 13L,
13L, 13L, 13L, 8L, 8L, 8L, 8L, 10L, 10L, 10L, 10L, 3L, 3L, 3L,
3L, 13L), .Label = c("10/1/18", "10/19/18", "10/20/18", "10/21/18",
"10/22/18", "10/23/18", "10/24/18", "10/25/18", "10/26/18", "10/27/18",
"10/28/18", "10/28/19", "10/29/18", "12/9/18", "8/20/18"), class = "factor"),
Latitude = c(51.4175, 52.12087, 52.0269, 52.0269, 52.0269,
52.0269, 52.947709, 52.947709, 51.491811, 51.491811, 52.59925,
52.59925, 52.59925, 52.59925, 51.60157, 51.60157, 52.6888,
52.6888, 52.6888, 52.6888, 50.697802, 50.697802, 50.697802,
50.697802, 53.62417, 50.446841, 50.446841, 53.959679, 53.959679,
53.959679, 53.959679, 51.78375, 51.78375, 51.78375, 51.78375,
51.456965, 51.456965, 51.456965, 51.456965, 51.3651, 51.3651,
51.3651, 51.3651, 52.01182, 52.01182, 52.01182, 52.01182,
50.114277, 50.114277, 51.43474, 51.43474, 51.10676, 51.10676,
51.10676, 51.10676, 50.435984, 50.435984, 50.435984, 50.435984,
51.78666, 51.78666, 52.441088, 52.441088, 52.552344, 49.259471,
49.259471, 49.259471, 49.259471, 50.461625, 50.461625, 50.461625,
50.461625, 51.746642, 51.746642, 51.746642, 51.746642, 52.2501,
52.2501, 52.2501, 52.2501, 52.423336, 52.423336, 52.423336,
52.423336, 53.615575, 53.615575, 53.615575, 53.615575, 51.08474,
51.08474, 51.08474, 53.19329, 53.19329, 53.19329, 53.19329,
55.96785, 55.96785, 56.52664, 56.52664, 56.52664, 56.52664,
51.8113, 51.8113, 51.8113, 51.8113, 52.580157, 52.580157,
52.580157, 52.580157, 50.52008, 50.52008, 50.52008, 50.52008,
51.48417, 51.48417, 51.48417, 51.48417, 54.58243, 54.58243,
54.58243, 54.58243, 52.58839, 52.58839, 52.58839, 52.58839,
52.717283, 52.717283, 52.717283, 52.717283, 50.740764, 50.740764,
50.740764, 50.740764, 52.57937), Longitude = c(-0.32118,
-0.29293, -0.7078, -0.7078, -0.7078, -0.7078, -1.435407,
-1.435407, -3.210324, -3.210324, 1.33011, 1.33011, 1.33011,
1.33011, -3.67111, -3.67111, -3.30909, -3.30909, -3.30909,
-3.30909, -2.11692, -2.11692, -2.11692, -2.11692, -2.43155,
-3.706923, -3.706923, -1.061008, -1.061008, -1.061008, -1.061008,
-0.65046, -0.65046, -0.65046, -0.65046, -2.624917, -2.624917,
-2.624917, -2.624917, 0.70706, 0.70706, 0.70706, 0.70706,
-0.70082, -0.70082, -0.70082, -0.70082, -5.541128, -5.541128,
0.45981, 0.45981, -2.32071, -2.32071, -2.32071, -2.32071,
-4.105617, -4.105617, -4.105617, -4.105617, -0.71433, -0.71433,
-0.176158, -0.176158, -1.337177, -123.107788, -123.107788,
-123.107788, -123.107788, 3.560973, 3.560973, 3.560973, 3.560973,
0.486416, 0.486416, 0.486416, 0.486416, -0.8825, -0.8825,
-0.8825, -0.8825, -1.787563, -1.787563, -1.787563, -1.787563,
-2.432959, -2.432959, -2.432959, -2.432959, -0.73645, -0.73645,
-0.73645, -0.63793, -0.63793, -0.63793, -0.63793, -3.18084,
-3.18084, -3.40313, -3.40313, -3.40313, -3.40313, -0.22894,
-0.22894, -0.22894, -0.22894, -1.948571, -1.948571, -1.948571,
-1.948571, -4.20756, -4.20756, -4.20756, -4.20756, -0.34854,
-0.34854, -0.34854, -0.34854, -5.93229, -5.93229, -5.93229,
-5.93229, -1.96843, -1.96843, -1.96843, -1.96843, -2.410575,
-2.410575, -2.410575, -2.410575, -2.361234, -2.361234, -2.361234,
-2.361234, -1.89325), Altitude = c(5L, 0L, 68L, 68L, 68L,
68L, 104L, 104L, 15L, 15L, 23L, 23L, 23L, 23L, 184L, 184L,
176L, 176L, 176L, 176L, 12L, 12L, 12L, 12L, 178L, 36L, 36L,
11L, 11L, 11L, 11L, 210L, 210L, 210L, 210L, 97L, 97L, 97L,
97L, 23L, 23L, 23L, 23L, 0L, 0L, 0L, 0L, 9L, 9L, 4L, 4L,
200L, 200L, 200L, 200L, 160L, 160L, 160L, 160L, 166L, 166L,
0L, 0L, 0L, 47L, 47L, 47L, 47L, 58L, 58L, 58L, 58L, 43L,
43L, 43L, 43L, 97L, 97L, 97L, 97L, 133L, 133L, 133L, 133L,
123L, 123L, 123L, 123L, 128L, 128L, 128L, 15L, 15L, 15L,
15L, 14L, 14L, 65L, 65L, 65L, 65L, 129L, 129L, 129L, 129L,
140L, 140L, 140L, 140L, 18L, 18L, 18L, 18L, 30L, 30L, 30L,
30L, 19L, 19L, 19L, 19L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
96L, 96L, 96L, 96L, 169L), Species = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Quercus robur", class = "factor"),
Tree_diameter = c(68.8, 10, 98.5, 97, 32.5, 45.1, 847, 817,
62, 71, 140, 111.4, 114.6, 167.1, 29, 40.1, 68, 45, 60, 54,
104, 122, 85, 71, 81, 39.8, 43.6, 20.1, 17.8, 15.6, 12.1,
81.8, 102.5, 75.5, 57.3, 0.3, 0.2, 0.3, 0.3, 70, 36, 53,
44, 31.5, 27.1, 23.3, 22, 69.4, 37.3, 19.9, 14.6, 196, 122,
118, 180, 58.6, 54.1, 58, 61.5, 58.4, 61, 134, 64, 52.2,
170, 114, 127, 158, 147.4, 135.3, 122.9, 104.1, 263, 237,
322, 302, 175, 182, 141, 155, 89, 41, 70, 83, 141, 86.5,
82, 114.5, 129, 127, 143, 125, 92, 68, 90, 24.5, 20.1, 63.7,
39.8, 66.2, 112.4, 124.5, 94.1, 68.6, 74.4, 23.6, 27.7, 22.9,
25.2, 24.2, 54.7, 43, 33.1, 306, 274, 56, 60, 72.5, 128.5,
22, 16, 143, 103, 53, 130, 48.4, 69.8, 6.4, 18.6, 129.2,
41.7, 57.6, 14, 41.7), Urbanisation_index = c(2L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 4L, 4L,
4L, 4L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L,
4L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 1L, 1L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 2L), Stand_density_index = c(3L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 4L, 1L, 1L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 2L, 2L, 4L, 4L, 3L, 3L, 3L, 3L, 4L, 3L,
4L, 4L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L,
2L, 2L, 2L, 2L, 2L, 3L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 2L, 4L, 4L, 4L, 4L, 4L), Canopy_Index = c(85L,
85L, 85L, 75L, 45L, 25L, 75L, 65L, 75L, 75L, 95L, 95L, 95L,
95L, 95L, 65L, 85L, 65L, 95L, 85L, 85L, 85L, 75L, 75L, 65L,
85L, 85L, 75L, 75L, 85L, 65L, 95L, 85L, 95L, 95L, 75L, 75L,
85L, 85L, 85L, 85L, 85L, 75L, 85L, 85L, 85L, 85L, 75L, 75L,
85L, 85L, 65L, 75L, 85L, 75L, 95L, 95L, 95L, 95L, 75L, 65L,
95L, 95L, 55L, 75L, 65L, 75L, 65L, 85L, 95L, 95L, 75L, 95L,
75L, 95L, 65L, 75L, 75L, 85L, 85L, 65L, 95L, 65L, 65L, 65L,
65L, 65L, 65L, 85L, 85L, 75L, 95L, 85L, 85L, 75L, 45L, 55L,
35L, 35L, 25L, 25L, 95L, 85L, 75L, 85L, 85L, 75L, 75L, 65L,
75L, 85L, 65L, 45L, 95L, 95L, 95L, 95L, 65L, 75L, 45L, 35L,
75L, 95L, 95L, 85L, 75L, 65L, 85L, 95L, 75L, 85L, 85L, 95L,
65L), Phenological_Index = c(2L, 4L, 2L, 2L, 4L, 4L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 2L, 3L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 2L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 3L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 1L, 1L, 1L, 1L, 3L, 2L, 3L, 3L, 3L, 3L, 4L, 3L, 2L, 3L,
2L, 2L, 2L, 1L, 3L, 1L, 4L)), class = "data.frame", row.names = c(NA,
-134L))
Data frame - QuercusRobur2
structure(list(X = c(1L, 2L, 3L, 4L, 13L, 14L, 15L, 18L, 19L,
20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 35L, 36L,
37L, 38L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L,
59L, 63L, 64L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L,
78L, 80L, 89L, 90L, 91L, 95L, 96L, 97L, 98L, 99L, 100L, 101L,
102L, 103L, 104L, 105L, 106L, 107L, 108L, 109L, 110L, 111L, 112L,
113L, 114L, 118L, 119L, 120L, 121L, 126L, 127L, 128L, 129L, 130L,
131L, 132L, 133L, 134L, 135L, 136L, 137L, 138L, 141L, 142L, 143L,
144L, 148L, 149L, 150L, 151L, 156L, 157L, 158L, 159L, 160L, 161L,
162L, 163L, 164L, 165L, 166L, 167L, 168L, 169L, 170L, 171L, 172L,
173L, 174L, 175L, 179L, 180L, 181L, 182L, 183L, 185L, 187L, 189L,
190L, 191L, 192L, 193L, 194L, 195L, 196L, 208L, 209L, 210L, 212L,
214L, 225L, 226L, 227L, 228L, 229L, 230L, 231L, 242L, 243L, 244L,
245L, 246L, 247L, 248L, 249L, 250L, 251L, 252L, 253L, 254L, 255L,
256L, 257L, 258L, 259L, 260L, 261L), Obs_no = c(1L, 2L, 3L, 4L,
13L, 14L, 15L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L,
28L, 29L, 30L, 35L, 36L, 37L, 38L, 48L, 49L, 50L, 51L, 52L, 53L,
54L, 55L, 56L, 57L, 58L, 59L, 63L, 64L, 68L, 69L, 70L, 71L, 72L,
73L, 74L, 75L, 76L, 77L, 78L, 80L, 89L, 90L, 91L, 95L, 96L, 97L,
98L, 99L, 100L, 101L, 102L, 103L, 104L, 105L, 106L, 107L, 108L,
109L, 110L, 111L, 112L, 113L, 114L, 118L, 119L, 120L, 121L, 126L,
127L, 128L, 129L, 130L, 131L, 132L, 133L, 134L, 135L, 136L, 137L,
138L, 141L, 142L, 143L, 144L, 148L, 149L, 150L, 151L, 156L, 157L,
158L, 159L, 160L, 161L, 162L, 163L, 164L, 165L, 166L, 167L, 168L,
169L, 170L, 171L, 172L, 173L, 174L, 175L, 179L, 180L, 181L, 182L,
183L, 185L, 187L, 189L, 190L, 191L, 192L, 193L, 194L, 195L, 196L,
208L, 209L, 210L, 212L, 214L, 225L, 226L, 227L, 228L, 229L, 230L,
231L, 242L, 243L, 244L, 245L, 246L, 247L, 248L, 249L, 250L, 251L,
252L, 253L, 254L, 255L, 256L, 257L, 258L, 259L, 260L, 261L),
Date_observed = structure(c(9L, 14L, 3L, 3L, 12L, 12L, 10L,
10L, 8L, 8L, 8L, 8L, 11L, 11L, 11L, 11L, 5L, 5L, 9L, 9L,
13L, 13L, 13L, 13L, 8L, 8L, 8L, 8L, 13L, 13L, 13L, 13L, 7L,
7L, 7L, 7L, 6L, 6L, 11L, 11L, 11L, 11L, 11L, 11L, 4L, 4L,
4L, 4L, 12L, 12L, 12L, 12L, 5L, 1L, 1L, 1L, 1L, 5L, 5L, 5L,
5L, 12L, 12L, 12L, 12L, 11L, 11L, 11L, 11L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 13L, 13L, 13L, 8L, 8L, 8L, 8L, 13L, 13L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L,
3L, 3L, 3L, 3L, 13L, 13L, 13L, 13L, 10L, 10L, 10L, 10L, 12L,
12L, 12L, 12L, 3L, 3L, 3L, 3L, 13L, 13L, 5L, 5L, 5L, 11L,
11L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L, 13L, 13L, 13L,
9L, 9L, 12L, 12L, 12L, 12L, 8L, 8L, 8L, 5L, 5L, 5L, 5L, 12L,
12L, 12L, 12L, 11L, 11L, 11L, 11L, 13L, 13L, 13L, 13L, 5L,
5L, 5L, 5L), .Label = c("10/23/18", "11/18/18", "11/30/18",
"12/1/18", "12/10/18", "12/12/18", "12/2/18", "12/3/18",
"12/4/18", "12/6/18", "12/7/18", "12/8/18", "12/9/18", "9/10/18"
), class = "factor"), Latitude = c(51.41752, 52.243806, 52.947709,
52.947709, 51.491811, 51.491811, 51.60157, 51.60157, 52.68959,
52.68959, 52.68959, 52.68959, 50.697802, 50.697802, 50.697802,
50.697802, 53.62417, 53.62417, 50.446841, 50.446841, 53.959679,
53.959679, 53.959679, 53.959679, 51.78375, 51.78375, 51.78375,
51.78375, 51.456965, 51.456965, 51.456965, 51.456965, 52.011812,
52.011812, 52.011812, 52.011812, 50.121978, 50.121978, 51.43474,
51.43474, 51.10708, 51.10708, 51.10708, 51.10708, 50.435984,
50.435984, 50.435984, 50.435984, 51.78666, 51.78666, 52.441088,
52.441088, 52.552344, 49.259471, 49.259471, 49.259471, 49.259471,
50.462, 50.462, 50.462, 50.462, 51.746642, 51.746642, 51.746642,
51.746642, 52.2501, 52.2501, 52.2501, 52.2501, 52.42646,
52.42646, 52.42646, 52.42646, 53.615575, 53.615575, 53.615575,
53.615575, 51.08478, 51.08478, 51.08478, 53.19329, 53.19329,
53.19329, 53.19329, 55.968437, 55.968437, 56.52664, 56.52664,
56.52664, 56.52664, 51.8113, 51.8113, 51.8113, 51.8113, 50.52008,
50.52008, 50.52008, 50.52008, 51.48417, 51.48417, 51.48417,
51.48417, 54.58243, 54.58243, 54.58243, 54.58243, 52.58839,
52.58839, 52.58839, 52.58839, 52.717283, 52.717283, 52.717283,
52.717283, 50.740764, 50.740764, 50.740764, 50.740764, 50.733412,
50.733412, 50.79926, 50.79926, 50.79926, 53.675788, 53.675788,
48.35079, 48.35079, 48.35079, 48.35079, 51.36445, 51.36445,
51.36445, 51.36445, 52.122402, 52.122402, 52.122402, 52.16104,
52.16104, 51.88468, 51.88468, 51.88468, 51.88468, 52.34015,
52.34015, 52.34015, 52.026042, 52.026042, 52.026042, 52.026042,
51.319032, 51.319032, 51.319032, 51.319032, 51.51365, 51.51365,
51.51365, 51.51365, 53.43202, 53.43202, 53.43202, 53.43202,
51.50797, 51.50797, 51.50797, 51.50797), Longitude = c(-0.32116,
1.30786, -1.435407, -1.435407, -3.210324, -3.210324, -3.67111,
-3.67111, -3.3081, -3.3081, -3.3081, -3.3081, -2.11692, -2.11692,
-2.11692, -2.11692, -2.43155, -2.43155, -3.706923, -3.706923,
-1.061008, -1.061008, -1.061008, -1.061008, -0.65046, -0.65046,
-0.65046, -0.65046, -2.624917, -2.624917, -2.624917, -2.624917,
-0.70082, -0.70082, -0.70082, -0.70082, -5.555169, -5.555169,
0.45981, 0.45981, -2.32027, -2.32027, -2.32027, -2.32027,
-4.105617, -4.105617, -4.105617, -4.105617, -0.71433, -0.71433,
-0.176158, -0.176158, -1.337177, -123.107788, -123.107788,
-123.107788, -123.107788, -3.5607, -3.5607, -3.5607, -3.5607,
0.486416, 0.486416, 0.486416, 0.486416, -0.8825, -0.8825,
-0.8825, -0.8825, -1.78771, -1.78771, -1.78771, -1.78771,
-2.432959, -2.432959, -2.432959, -2.432959, -0.73626, -0.73626,
-0.73626, -0.63793, -0.63793, -0.63793, -0.63793, -3.179732,
-3.179732, -3.40313, -3.40313, -3.40313, -3.40313, -0.22894,
-0.22894, -0.22894, -0.22894, -4.20756, -4.20756, -4.20756,
-4.20756, -0.34854, -0.34854, -0.34854, -0.34854, -5.93229,
-5.93229, -5.93229, -5.93229, -1.96843, -1.96843, -1.96843,
-1.96843, -2.410575, -2.410575, -2.410575, -2.410575, -2.361234,
-2.361234, -2.361234, -2.361234, -2.014029, -2.014029, -3.19446,
-3.19446, -3.19446, -1.272404, -1.272404, 10.91812, 10.91812,
10.91812, 10.91812, -0.23106, -0.23106, -0.23106, -0.23106,
-0.487443, -0.487443, -0.487443, 0.18702, 0.18702, -0.17853,
-0.17853, -0.17853, -0.17853, -1.27795, -1.27795, -1.27795,
-0.503113, -0.503113, -0.503113, -0.503113, -0.472994, -0.472994,
-0.472994, -0.472994, -3.18722, -3.18722, -3.18722, -3.18722,
-2.27968, -2.27968, -2.27968, -2.27968, -0.25931, -0.25931,
-0.25931, -0.25931), Altitude = c(0, 0, 103.9, 103.9, 15,
15, 184, 184, 176, 176, 176, 176, 12, 12, 12, 12, 178, 178,
36, 36, 11, 11, 11, 11, 210, 210, 210, 210, 97, 97, 97, 97,
0, 0, 0, 0, 68, 68, 4, 4, 200, 200, 200, 200, 160, 160, 160,
160, 165.8, 165.8, 0, 0, 0, 47, 47, 47, 47, 0, 0, 0, 0, 43,
43, 43, 43, 97, 97, 97, 97, 133, 133, 133, 133, 123, 123,
123, 123, 127, 127, 127, 15, 15, 15, 15, 14, 14, 65, 65,
65, 65, 129, 129, 129, 129, 18, 18, 18, 18, 30, 30, 30, 30,
19, 19, 19, 19, 0, 0, 0, 0, 0, 0, 0, 0, 96, 96, 96, 96, 0,
0, 0, 0, 0, 49, 49, 0, 0, 0, 0, 48, 48, 48, 48, 43, 43, 43,
75, 75, 94, 94, 94, 94, 112, 112, 112, 103, 103, 103, 103,
0, 0, 0, 0, 37.5, 37.5, 37.5, 37.5, 29, 29, 29, 29, 63, 63,
63, 63), Species = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Quercus robur", class = "factor"),
Tree_diameter = c(68.8, 300, 847, 817, 62, 71, 29, 40.1,
68, 45, 60, 54, 104, 122, 85, 71, 81, 118, 39.8, 43.6, 19.8,
16.6, 15.1, 11.9, 81.8, 102.5, 75.5, 57.3, 0.3, 0.2, 0.3,
0.3, 99, 85, 74, 68, 82, 51.8, 19.9, 14.6, 196, 122, 118,
180, 58.6, 54.1, 58, 61.5, 58.4, 61, 134, 64, 52.2, 170,
114, 127, 158, 147.4, 135.3, 122.9, 104.1, 263, 237, 322,
302, 173, 186, 144, 155, 89, 41, 68, 83, 141.6, 85.5, 82.8,
114.1, 129, 127, 143, 125, 92, 68, 90, 25, 20, 63.7, 39.8,
66.2, 112.4, 124.5, 94.1, 68.6, 74.4, 24.2, 54.7, 43, 33.1,
306, 274, 56, 60, 72.5, 128.5, 22, 16, 143, 103, 53, 130,
48.4, 69.8, 6.4, 18.6, 129.2, 41.7, 57.6, 14, 320, 352, 120.9,
108.3, 53.2, 274, 85, 52, 43, 38, 37, 219, 215, 216, 175,
85.9, 49.7, 97.1, 40.8, 62.4, 181.5, 149.7, 122, 143.6, 148,
145, 99, 27.5, 32, 54, 54.1, 169, 152, 160, 138, 90.8, 87.9,
77.4, 81.2, 91.7, 62.7, 50, 72.9, 24.8, 61, 88.6, 80.1),
Urbanisation_index = structure(c(2L, 2L, 2L, 2L, 2L, 2L,
4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L,
2L, 2L, 2L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
4L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L,
4L, 4L, 4L, 1L, 1L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 3L, 4L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 1L, 1L, 1L,
1L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L), .Label = c("1", "2",
"3", "4"), class = "factor"), Stand_density_.index = structure(c(3L,
4L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L,
4L, 4L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 2L, 2L, 4L, 4L, 3L, 3L,
3L, 3L, 4L, 3L, 4L, 4L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 3L, 4L, 4L, 4L, 4L, 2L,
2L, 2L, 2L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 2L,
2L, 2L, 2L, 4L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 4L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 2L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L), .Label = c("1",
"2", "3", "4"), class = "factor"), Canopy_Index = c(15L,
95L, 45L, 5L, 5L, 5L, 25L, 15L, 25L, 25L, 35L, 35L, 25L,
35L, 15L, 15L, 15L, 15L, 5L, 5L, 5L, 5L, 5L, 5L, 35L, 35L,
55L, 35L, 5L, 5L, 5L, 5L, 95L, 95L, 95L, 95L, 25L, 25L, 15L,
5L, 25L, 25L, 25L, 25L, 5L, 5L, 5L, 5L, 5L, 5L, 35L, 25L,
5L, 35L, 35L, 25L, 25L, 5L, 5L, 5L, 5L, 35L, 25L, 25L, 25L,
5L, 5L, 15L, 15L, 35L, 65L, 35L, 35L, 25L, 25L, 25L, 25L,
15L, 15L, 5L, 35L, 35L, 45L, 35L, 5L, 15L, 15L, 25L, 5L,
15L, 15L, 5L, 5L, 15L, 5L, 5L, 5L, 5L, 5L, 85L, 5L, 35L,
15L, 5L, 5L, 5L, 25L, 25L, 15L, 35L, 95L, 95L, 95L, 95L,
15L, 15L, 5L, 25L, 25L, 5L, 15L, 15L, 5L, 15L, 5L, 25L, 25L,
25L, 25L, 5L, 5L, 5L, 5L, 25L, 25L, 55L, 35L, 25L, 15L, 15L,
25L, 15L, 45L, 35L, 35L, 15L, 35L, 15L, 15L, 35L, 15L, 25L,
25L, 15L, 15L, 15L, 15L, 5L, 5L, 5L, 5L, 5L, 5L, 15L, 15L
), Phenological_Index = c(4L, 4L, 3L, 4L, 2L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 3L, 2L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L)), row.names = c(NA, -165L), class = "data.frame")
How about using subtitle for labeling each individual plot?
## Observation 1
p1 <- ggplot(
QuercusRobur1,
aes(x = Longitude, y = Latitude)
) +
geom_polygon(
data = UK,
aes(x = long, y = lat, group = group),
inherit.aes = FALSE
) +
coord_map(xlim = c(-10, 5)) + # limits added as there are some points really far away
theme_classic()
Urban1 <- p1 +
geom_point(aes(color = factor(Urbanisation_index))) +
scale_color_discrete(
name = "Urbanisation Index",
labels = c("Urban", "Suburban", "Village", "Rural")
) +
labs(subtitle = "A: Urbanisation Index") +
theme(legend.justification = "left")
Stand1 <- p1 +
geom_point(aes(color = factor(Stand_density_index))) +
scale_color_discrete(
name = "Stand Density Index",
labels = c(
"Standing alone",
"Within a few trees or close proximity to other trees",
"Within a stand of 10-30 trees",
"Large or woodland"
)
) +
labs(subtitle = "C: Stand Density Index") +
theme(legend.justification = "left")
## Observation 2
p2 <- ggplot(
QuercusRobur2,
aes(x = Longitude, y = Latitude)
) +
geom_polygon(
data = UK,
aes(x = long, y = lat, group = group),
inherit.aes = FALSE
) +
coord_map(xlim = c(-10, 5)) +
theme_classic()
Urban2 <- p2 +
geom_point(aes(color = factor(Urbanisation_index))) +
scale_color_discrete(
name = "Urbanisation Index",
labels = c("Urban", "Suburban", "Village", "Rural")
) +
labs(subtitle = "B: Urbanisation Index") +
theme(legend.justification = "left")
Stand2 <- p2 +
geom_point(aes(color = factor(Stand_density_.index))) +
scale_color_discrete(
name = "Stand Density Index",
labels = c(
"Standing alone",
"Within a few trees or close proximity to other trees",
"Within a stand of 10-30 trees",
"Large or woodland"
)
) +
labs(subtitle = "D: Stand Density Index") +
theme(legend.justification = "left")
## Arrange the individual plots into one main plot
plot_grid(
Urban1 + ggtitle("Observational Period 1\n") + theme(plot.title = element_text(hjust = 1.0)),
Urban2 + ggtitle("Observational Period 2\n") + theme(plot.title = element_text(hjust = 1.0)),
Stand1,
Stand2,
align = "hv",
axis = 'tblr',
label_fontface = "bold",
label_fontfamily = "Times New Roman",
label_size = 8,
rel_widths = c(1, 1.3),
ncol = 2,
nrow = 2,
hjust = 0,
label_x = 0.01
)
Edit: remove duplicate axis labels and legends then use egg::ggarrange to combine subplots.
## Observation 1
Urban1 <- p1 +
geom_point(aes(color = factor(Urbanisation_index))) +
scale_color_discrete(
name = "Urbanisation Index",
labels = c("Urban", "Suburban", "Village", "Rural")
) +
labs(subtitle = "A: Urbanisation Index") +
theme(legend.position = "none")
Stand1 <- p1 +
geom_point(aes(color = factor(Stand_density_index))) +
scale_color_discrete(
name = "Stand Density Index",
labels = c(
"Standing alone",
"Within a few trees or close proximity to other trees",
"Within a stand of 10-30 trees",
"Large or woodland"
)
) +
labs(subtitle = "C: Stand Density Index") +
theme(legend.position = "none")
## Observation 2
p2 <- ggplot(
QuercusRobur2,
aes(x = Longitude, y = Latitude)
) +
geom_polygon(
data = UK,
aes(x = long, y = lat, group = group),
inherit.aes = FALSE
) +
coord_map(xlim = c(-10, 5)) +
theme_classic() +
ylab("")
Urban2 <- p2 +
geom_point(aes(color = factor(Urbanisation_index))) +
scale_color_discrete(
name = "Urbanisation Index",
labels = c("Urban", "Suburban", "Village", "Rural")
) +
labs(subtitle = "B: Urbanisation Index") +
theme(legend.justification = "left")
Stand2 <- p2 +
geom_point(aes(color = factor(Stand_density_.index))) +
scale_color_discrete(
name = "Stand Density Index",
labels = c(
"Standing alone",
"Within a few trees or close proximity to other trees",
"Within a stand of 10-30 trees",
"Large or woodland"
)
) +
labs(subtitle = "D: Stand Density Index") +
theme(legend.justification = "left")
## Use the `egg` package
library(egg)
ggarrange(
Urban1 + ggtitle("Observational Period 1\n") + theme(plot.title = element_text(hjust = 0.5)),
Urban2 + ggtitle("Observational Period 2\n") + theme(plot.title = element_text(hjust = 0.5)),
Stand1,
Stand2,
nrow = 2,
ncol = 2
)

Trouble with GLMM with glmer in R: Error in pwrssUpdate...halvings failed to reduce deviance in pwrssUpdate

Here's a snipped of randomly selected data from my full dataframe:
canopy<-structure(list(Stage = structure(c(6L, 5L, 3L, 6L, 7L, 5L, 4L,
7L, 2L, 7L, 5L, 1L, 1L, 4L, 3L, 6L, 5L, 7L, 4L, 4L), .Label = c("milpa",
"robir", "jurup che", "pak che kor", "mehen che", "nu kux che",
"tam che"), class = c("ordered", "factor")), ID = c(44L, 34L,
18L, 64L, 54L, 59L, 28L, 51L, 11L, 56L, 33L, 1L, 7L, 25L, 58L,
48L, 36L, 51L, 27L, 66L), Sample = c(4L, 2L, 2L, 10L, 6L, 9L,
4L, 3L, 3L, 8L, 1L, 1L, 7L, 1L, 10L, 8L, 4L, 3L, 3L, 10L), Subsample = c(2L,
3L, 4L, 3L, 2L, 1L, 3L, 2L, 4L, 3L, 1L, 3L, 2L, 4L, 1L, 1L, 3L,
1L, 1L, 4L), Size..ha. = c(0.5, 0.5, 0.5, 0.5, 6, 0.5, 0.5, 0.25,
0.5, 6, 1, 1, 0.5, 2, 1, 0.5, 1, 0.25, 0.5, 2), Avg.Subsample.Canopy = c(94.8,
94.8, 97.92, 96.88, 97.14, 92.46, 93.24, 97.4, 25.64, 97.4, 94.8,
33.7, 13.42, 98.18, 85.44, 96.36, 97.4, 95.58, 85.7, 92.2), dec = c(0.948,
0.948, 0.9792, 0.9688, 0.9714, 0.9246, 0.9324, 0.974, 0.2564,
0.974, 0.948, 0.337, 0.1342, 0.9818, 0.8544, 0.9636, 0.974, 0.9558,
0.857, 0.922)), .Names = c("Stage", "ID", "Sample", "Subsample",
"Size..ha.", "Avg.Subsample.Canopy", "dec"), row.names = c(693L,
537L, 285L, 1017L, 853L, 929L, 441L, 805L, 173L, 889L, 513L,
9L, 101L, 397L, 913L, 753L, 569L, 801L, 417L, 1053L), class = "data.frame")
I am trying to code a GLMM of dec as a function of Stage and Size..ha.
The GLMM is necessary because each row represents a point Subsample measured within a larger Sample area. I am also using a binomial distribution given dec are proportional data.
I tried the model:
canopy.binomial.mod<-glmer(dec~Stage*Size..ha.+(1|Sample),family="binomial",data=canopy)
summary(canopy.binomial.mod)
but get the error:
Error in pwrssUpdate(pp, resp, tol = tolPwrss, GQmat = GQmat, compDev
= compDev, : (maxstephalfit) PIRLS step-halvings failed to reduce deviance in pwrssUpdate
I've seen online that this can be a result of needing to scale a predictor variable, so I tried:
cs. <- function(x) scale(x,scale=TRUE,center=TRUE)
canopy.binomial.mod<-glmer(dec~Stage*cs.(Size..ha.)+(1|Sample),family="binomial",data=canopy.rmna)
summary(canopy.binomial.mod)
Which doesn't seem to help. I also thought that maybe I'm asking too much of the model and it's not converging due to too many predictor variables, so let's remove the Size variable, which is of less interest to me.
canopy.binomial.mod<-glmer(dec~Stage+(1|Sample),family="binomial",data=canopy.rmna)
summary(canopy.binomial.mod)
Still no luck. Any ideas how to address this?

predicting regression fit into multiple other dataframes

Using R I've fit an ordered logistic regression to some data. I need to use the fitted values to predict future values. I have a dataset which has 10 different combinations of parameters I want to predict the probabilities for the 'Result' dependant variable (which the model is fit to). I've used 'split' to split into 10 separate dataframes and I want to try to run the predict statement against each of the outputted dataframes. Can someone show me how to:
a. Create a function to run the predict statement, and
b. Loop the function through the 10 dataframes?
c. Output the prediction probabilities for the three result conditions, and bind them to the relevant input dataframe
Here's what I've tried:
# Fit the model
library(MASS)
fit <- polr(Result ~ State + score1+ score2 + history + pastdefault, data = dat, Hess=TRUE)
## Model Summary
summary(fit)
#Take the new data which doesn't have predictions
t13<- dat[ which(dat$Tranche==13), ]
out <- split( t13 , f = r13$Gamen )
#Function
pred <- function(Result, State, score1, score2, history, pastdefault,...){
cbind(out, predict(fit, out, type = "probs"))}
#Use sapply to run prediction
pred.out <- sapply(out, pred)
Further to a suggestion I've tried running the predict statement against the result of splitting the data into 'out' as per code below:
#Predict against 'out' dataframes
newdat1 <- cbind(out, predict(fit, newdata=out, type = "probs"))
However I get the same error message:
in eval(expr, envir, enclos) : object 'State' not found
Here's what the data looks like using head(data):
State score1 score2 Salaries history exp moody Result pastdefault Tranche
1 Minnesota 6.000 17.5 35.948 9 579 1085 moreinfo yes 1
2 New Mexico 4.586 17.2 28.493 11 530 1015 fail No 1
3 Washington 5.906 20.2 36.151 48 494 937 fail No 1
4 West Virginia 6.107 14.8 31.944 17 484 932 moreinfo No 1
5 Louisiana 4.761 16.8 26.461 9 535 1021 pass yes 2
6 Oklahoma 4.845 15.5 28.172 9 536 1027 pass yes 2
Gamen
1 1
2 2
3 3
4 4
5 1
6 2
Here is the result of dput(data):
ture(list(State = structure(c(23L, 31L, 47L, 48L, 18L, 36L,
39L, 42L, 13L, 14L, 16L, 35L, 50L, 1L, 20L, 27L, 30L, 7L, 9L,
34L, 43L, 3L, 4L, 19L, 25L, 26L, 37L, 8L, 29L, 38L, 2L, 21L,
32L, 6L, 10L, 11L, 15L, 24L, 5L, 17L, 33L, 40L, 41L, 46L, 49L,
28L, 12L, 22L, 45L, 32L, 9L, 5L, 44L, 13L, 38L, 19L, 25L, 26L,
37L, 8L), .Label = c("Alabama", "Alaska", "Arizona", "Arkansas",
"California", "Colorado", "Connecticut", "Delaware", "Florida",
"Georgia", "Hawaii", "Idaho", "Illinois", "Indiana", "Iowa",
"Kansas", "Kentucky", "Louisiana", "Maine", "Maryland", "Massachusetts",
"Michigan", "Minnesota", "Mississippi", "Missouri", "Montana",
"Nebraska", "Nevada", "New Hampshire", "New Jersey", "New Mexico",
"New York", "North Carolina", "North Dakota", "Ohio", "Oklahoma",
"Oregon", "Pennsylvania", "Rhode Island", "South Carolina", "South Dakota",
"Tennessee", "Texas", "Utah", "Vermont", "Virginia", "Washington",
"West Virginia", "Wisconsin", "Wyoming"), class = "factor"),
score1 = c(6, 4.586, 5.906, 6.107, 4.761, 4.845, 7.469, 4.388,
6.136, 5.826, 5.817, 6.162, 6.16, 4.405, 7.245, 5.935, 9.774,
8.817, 5.718, 4.775, 5.222, 4.778, 4.459, 6.428, 5.383, 5.692,
6.436, 7.03, 5.859, 7.109, 8.963, 7.287, 9.623, 5.443, 5.193,
6.078, 5.483, 4.08, 4.992, 5.217, 5.077, 4.797, 4.775, 5.327,
6.93, 5.16, 4.21, 6.994, 6.75, 5.859, 7.109, 8.963, 3.656,
7.287, 9.623, 5.443, 6.078, 5.483, 4.08, 4.992), score2 = c(17.5,
17.2, 20.2, 14.8, 16.8, 15.5, 14.7, 18.6, 17.3, 17.5, 15.1,
16.6, 14.9, 17.2, 17, 14.5, 13.8, 14.4, 19.1, 15.3, 15.7,
19.3, 17.1, 13.8, 15.5, 16.3, 19.9, 16.6, 15.6, 17.1, 17.6,
14.8, 15.2, 18.4, 16.3, 17.9, 15.8, 17.5, 24, 17, 16.2, 16.4,
14.4, 14.6, 15.9, 18.7, 19.1, 20.1, 13.8, 16.6, 15.6, 17.1,
24.3, 17.6, 14.8, 15.2, 24, 17, 16.2, 14.4), Salaries = c(35.948,
28.493, 36.151, 31.944, 26.461, 28.172, 40.729, 32.477, 39.431,
36.785, 34.652, 36.802, 31.285, 31.144, 40.661, 30.922, 46.087,
50.045, 32.588, 26.327, 31.223, 32.175, 28.934, 31.972, 31.189,
28.785, 38.555, 39.076, 34.72, 44.51, 47.951, 40.795, 47.612,
34.571, 32.291, 38.518, 31.511, 26.818, 41.078, 32.257, 30.793,
30.279, 25.994, 33.987, 37.746, 34.836, 29.783, 41.895, 35.406,
28.785, 38.555, 39.076, 29.082, 31.511, 26.818, 41.078, 32.257,
25.994, 33.987, 37.746), history = c(9L, 11L, 48L, 17L, 9L,
9L, 70L, 12L, 13L, 58L, 9L, 23L, 10L, 8L, 64L, 9L, 70L, 81L,
48L, 5L, 47L, 27L, 6L, 68L, 9L, 21L, 51L, 68L, 70L, 70L,
47L, 80L, 74L, 29L, 65L, 57L, 5L, 4L, 45L, 11L, 60L, 58L,
5L, 65L, 9L, 30L, 15L, 11L, 68L, 45L, 11L, 60L, 4L, 58L,
5L, 65L, 9L, 5L, 65L, 80L), exp = c(579L, 530L, 494L, 484L,
535L, 536L, 463L, 543L, 560L, 467L, 557L, 515L, 525L, 538L,
479L, 556L, 478L, 477L, 469L, 592L, 474L, 496L, 523L, 469L,
550L, 536L, 499L, 468L, 491L, 461L, 489L, 477L, 473L, 518L,
448L, 482L, 583L, 540L, 485L, 522L, 454L, 443L, 563L, 468L,
572L, 483L, 511L, 549L, 472L, 526L, 528L, 530L, 563L, 532L,
534L, 536L, 538L, 540L, 542L, 544L), moody = c(1085L, 1015L,
937L, 932L, 1021L, 1027L, 888L, 1040L, 1048L, 882L, 1060L,
975L, 1001L, 1029L, 909L, 1050L, 898L, 908L, 889L, 1107L,
893L, 944L, 1005L, 896L, 1045L, 1009L, 947L, 897L, 935L,
880L, 934L, 907L, 892L, 980L, 854L, 889L, 1099L, 1036L, 902L,
999L, 865L, 844L, 1068L, 896L, 1073L, 917L, 979L, 1033L,
901L, 1002L, 1007L, 1011L, 1076L, 1016L, 900L, 760L, 1029L,
850L, 1038L, 990L), Result = structure(c(3L, 2L, 2L, 3L,
4L, 4L, 4L, 4L, 4L, 3L, 4L, 4L, 3L, 2L, 4L, 3L, 3L, 3L, 3L,
4L, 4L, 2L, 3L, 3L, 3L, 4L, 2L, 4L, 2L, 4L, 2L, 3L, 2L, 4L,
3L, 4L, 4L, 2L, 2L, 3L, 4L, 2L, 4L, 3L, 3L, 4L, 2L, 3L, 3L,
3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("",
"fail", "moreinfo", "pass"), class = "factor"), pastdefault = structure(c(2L,
1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L), .Label = c("No",
"yes"), class = "factor"), Tranche = c(1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
5L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 8L, 8L, 8L, 9L, 9L,
9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 12L,
12L, 12L, 12L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L,
13L), Gamen = c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 5L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 5L,
6L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 1L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L)), .Names = c("State", "score1", "score2",
"Salaries", "history", "exp", "moody", "Result", "pastdefault",
"Tranche", "Gamen"), class = "data.frame", row.names = c(NA,
-60L))

R - Lattice xyplot - How do you add error bars to groups and summary lines?

I'm posting this question because the very similar question here has not been answered until now.
I have been asked to plot the mean +/- SEM of my whole cohort of patients over the xyplot() that depicts the values of all patients. The data used represents intraoperative cardiovascular findings from patients undergoing surgery.
This is my data.frame called df
dput(df)
structure(list(Name = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L,
5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L), .Label = c("DE", "JS", "KG", "MK", "TG", "WT"), class = "factor"),
Time = structure(c(1L, 2L, 3L, 4L, 5L, 7L, 8L, 1L, 2L, 3L,
4L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 7L, 8L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 2L, 3L, 4L, 5L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L), .Label = c("T1", "T2", "T3", "T4", "T5", "T6", "T7",
"T8"), class = "factor"), Dobut = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L), .Label = c("No", "Yes"
), class = "factor"), DobutDose = c(NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
4L, 6L, 8L, 8L, 8L, 8L, 8L, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 5L, 5L, NA), CI = c(1.4, 2.3, 1.3, 1.8, 2.1,
2, 2.1, 2.1, 2.3, 1.9, 1.6, 2, 2.4, 2.7, 2.6, 2.7, 2.6, 2.3,
2.4, 2.6, 0.9, 2.5, 2.1, 1.6, 1.5, 1.8, 2, 2, 1.9, 2.1, 2.3,
2, 2.4, 2.3, 2.6, 2.4, 2, 2.2, 1.6, 2.1, 2.5, 2.8), SvO2 = c(57L,
65L, 47L, 45L, 51L, 60L, 56L, 70L, 85L, 75L, 79L, 82L, 73L,
77L, 78L, 73L, 71L, 73L, 80L, 74L, 41L, 66L, 51L, 51L, 49L,
54L, 68L, 48L, 80L, 70L, 71L, 69L, 74L, 79L, 77L, 77L, 75L,
74L, 70L, 79L, 80L, 79L), SVRI = c(4000L, 1983L, 4000L, 2444L,
1981L, 2120L, 2514L, 2971L, 2157L, 3747L, 4300L, 3200L, 2867L,
1778L, 1169L, 1215L, 1262L, 1461L, 1600L, 1692L, 4978L, 1760L,
2019L, 2650L, 2827L, 2356L, 1800L, 2840L, 2063L, 2248L, 1948L,
2160L, 1733L, 2296L, 2677L, 2100L, 2640L, 2655L, 3950L, 2210L,
2848L, 2543L), MAP = c(80L, 65L, 86L, 74L, 67L, 65L, 74L,
90L, 70L, 90L, 96L, 94L, 100L, 82L, 60L, 61L, 62L, 62L, 69L,
71L, 70L, 71L, 77L, 73L, 75L, 77L, 61L, 85L, 65L, 74L, 70L,
67L, 69L, 74L, 92L, 71L, 88L, 93L, 89L, 79L, 97L, 97L), CVP = c(10L,
8L, 21L, 19L, 15L, 12L, 8L, 12L, 8L, 11L, 10L, 14L, 14L,
22L, 22L, 20L, 21L, 20L, 21L, 16L, 14L, 16L, 24L, 20L, 22L,
24L, 16L, 14L, 16L, 15L, 14L, 13L, 17L, 8L, 5L, 8L, 22L,
20L, 20L, 21L, 8L, 8L), PAP = c(23L, 22L, 36L, 36L, 34L,
32L, 22L, 33L, 28L, 36L, 36L, 40L, 37L, 37L, 40L, 35L, 35L,
34L, 38L, 36L, 45L, 43L, 55L, 49L, 52L, 54L, 43L, 47L, 27L,
25L, 23L, 22L, 28L, 21L, 20L, 25L, 33L, 33L, 38L, 35L, 33L,
29L), PCWP = c(15L, 11L, 28L, 26L, 23L, 21L, 11L, 26L, NA,
NA, 25L, 25L, NA, 27L, NA, NA, NA, NA, NA, NA, 30L, NA, NA,
NA, NA, NA, NA, NA, 19L, NA, NA, NA, NA, NA, 16L, NA, NA,
NA, NA, NA, NA, NA)), .Names = c("Name", "Time", "Dobut",
"DobutDose", "CI", "SvO2", "SVRI", "MAP", "CVP", "PAP", "PCWP"
), class = "data.frame", row.names = c(NA, -42L))
Now the first xyplot I made for the variable CI looks like this
require(lattice)
xyplot(CI~Time, groups=Name, data=df, ty=c("l", "p"),
+ ,xlab="Measurement Time Point",
ylab=expression("CI"~(l/min/m^"2")), main="Cardiac Index")
Now I was able to add the mean (black line) of the whole cohort, by doing the following
xyplot(CI~Time, groups=Name, data=df, ty=c("l", "p"),
panel = function(x, y, ...) {
panel.xyplot(x, y, ...)
panel.linejoin(x, y, horizontal = FALSE,..., col="black", lty=1, lwd=4)
}
,xlab="Measurement Time Point",
ylab=expression("CI"~(l/min/m^"2")), main="Cardiac Index")
Now I'd like to add +/- SE to the mean as a line above/below the mean, but nowhere can I find how to do this.
What I can do is using the latticeExtra package is add the loess line +/- SE, as below, but that's not the correct mathematical function I'm looking for. I've left the mean line in there to illustrate the difference between the two.
require(latticeExtra)
xyplot(CI~Time, groups=Name, data=df, ty=c("l", "p"),
+ panel = function(x, y, ...) {
+ panel.xyplot(x, y, ...)
+ panel.linejoin(x, y, horizontal = FALSE,..., col="black", lty=1, lwd=4)
+ panel.smoother(x,y,se=TRUE, col.se="grey")
+ }
+ ,xlab="Measurement Time Point",
ylab=expression("CI"~(l/min/m^"2")), main="Cardiac Index")
I have performed an extensive search through SO and the internet, but I haven't been able to find the right function to do this.
Help is very much appreciated! Thanks.
You could create your own panel function to plot a +/- SD region. For example
#new panel function
panel.se <- function(x, y, col.se=plot.line$col, alpha.se=.25, ...) {
plot.line <- trellis.par.get("plot.line")
xs <- if(is.factor(x)) {
factor(c(levels(x) , rev(levels(x))), levels=levels(x))
} else {
xx <- sort(unique(x))
c(xx, rev(xx))
}
means <- tapply(y,x, mean, na.rm=T)
vars <- tapply(y,x, var, na.rm=T)
Ns <- tapply(!is.na(y),x, sum)
ses <- sqrt(vars/Ns)
panel.polygon(xs, c(means+ses, rev(means-ses)), col=col.se, alpha=alpha.se)
}
and then you can use it like
#include new panel function
xyplot(CI~Time, groups=Name, data=df, ty=c("l", "p"),
panel = function(x, y, ...) {
panel.se(x,y, col.se="grey")
panel.xyplot(x, y, ...)
panel.linejoin(x, y, horizontal = FALSE,..., col="black", lty=1, lwd=4)
}
,xlab="Measurement Time Point",
ylab=expression("CI"~(l/min/m^"2")), main="Cardiac Index")
which results in

Resources