I am using dplyr to create multiple variables in my data frame using mutate. At the same time, I am using zoo to calculate a rolling average. As an example, I have my variables set up like so:
vars <- "total_apples", "total_oranges", "total_bananas"
My data has over 100 variables and approx. 40,000 lines, but the above is just an example.
I am using this code below:
library(dplyr)
library(zoo)
data <- data %>%
group_by(fruit) %>%
mutate(across(c(all_of(vars)), list(avge_last2 = ~ zoo::rollapplyr(., 2, FUN = mean, partial = TRUE))))
Just for the above to calculate the average over the last 2 records, it takes over 5 mins:
> end.time <- Sys.time()
> time.taken <- end.time - start.time
> time.taken
Time difference of 5.925337 mins
It takes even longer if I want to average over more records, say n= 10 like so:
library(dplyr)
library(zoo)
data <- data %>%
group_by(fruit) %>%
mutate(across(c(all_of(vars)), list(avge_last2 = ~ zoo::rollapplyr(., 10, FUN = mean, partial = TRUE))))
Is there an issue with my code or is it something else?
dput(head(data,20)) provides the following:
structure(list(match_id = c(14581L, 14581L, 14581L, 14581L, 14581L,
14581L, 14581L, 14581L, 14581L, 14581L, 14581L, 14581L, 14581L,
14581L, 14581L, 14581L, 14581L, 14581L, 14581L, 14581L), match_date = structure(c(16527,
16527, 16527, 16527, 16527, 16527, 16527, 16527, 16527, 16527,
16527, 16527, 16527, 16527, 16527, 16527, 16527, 16527, 16527,
16527), class = "Date"), season = c(2015, 2015, 2015, 2015, 2015,
2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015,
2015, 2015, 2015, 2015), match_round = c(1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), home_team = c(3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3), away_team = c(14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14), venue = c(11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11), venue_name = c("MCG",
"MCG", "MCG", "MCG", "MCG", "MCG", "MCG", "MCG", "MCG", "MCG",
"MCG", "MCG", "MCG", "MCG", "MCG", "MCG", "MCG", "MCG", "MCG",
"MCG"), opponent = c(14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14), player_id = c(11186L,
11215L, 11285L, 11330L, 11380L, 11388L, 11407L, 11472L, 11473L,
11490L, 11553L, 11561L, 11573L, 11582L, 11598L, 11601L, 11616L,
11643L, 11671L, 11737L), player_first_name = c("Chris", "Chris",
"Kade", "Troy", "Andrew", "Brett", "Cameron", "Marc", "Dale",
"Ivan", "Bryce", "Shane", "Bachar", "Jack", "Andrejs", "Shaun",
"Michael", "Lachie", "Trent", "Alex"), player_last_name = c("Judd",
"Newman", "Simpson", "Chaplin", "Carrazzo", "Deledio", "Wood",
"Murphy", "Thomas", "Maric", "Gibbs", "Edwards", "Houli", "Riewoldt",
"Everitt", "Grigg", "Jamison", "Henderson", "Cotchin", "Rance"
), player_team = c("Carlton", "Richmond", "Carlton", "Richmond",
"Carlton", "Richmond", "Carlton", "Carlton", "Carlton", "Richmond",
"Carlton", "Richmond", "Richmond", "Richmond", "Carlton", "Richmond",
"Carlton", "Carlton", "Richmond", "Richmond"), player_team_numeric = c(3,
14, 3, 14, 3, 14, 3, 3, 3, 14, 3, 14, 14, 14, 3, 14, 3, 3, 14,
14), guernsey_number = c(5L, 1L, 6L, 25L, 44L, 3L, 36L, 3L, 39L,
20L, 4L, 10L, 14L, 8L, 33L, 6L, 40L, 23L, 9L, 18L), player_position = c(3,
14, 14, 1, 17, 13, 16, 12, 20, 16, 14, 5, 10, 8, 13, 14, 6, 7,
3, 2), disposals = c(21L, 7L, 21L, 13L, 18L, 18L, 11L, 21L, 1L,
13L, 26L, 21L, 21L, 17L, 18L, 17L, 8L, 10L, 17L, 18L), kicks = c(16L,
6L, 13L, 9L, 9L, 9L, 8L, 9L, 1L, 8L, 15L, 9L, 15L, 13L, 14L,
9L, 4L, 9L, 6L, 9L), marks = c(5L, 1L, 8L, 1L, 2L, 3L, 2L, 2L,
0L, 4L, 4L, 1L, 5L, 8L, 8L, 4L, 2L, 6L, 3L, 4L), handballs = c(5L,
1L, 8L, 4L, 9L, 9L, 3L, 12L, 0L, 5L, 11L, 12L, 6L, 4L, 4L, 8L,
4L, 1L, 11L, 9L), tackles = c(6L, 1L, 2L, 2L, 2L, 0L, 1L, 2L,
0L, 4L, 4L, 3L, 1L, 0L, 2L, 2L, 1L, 2L, 1L, 0L), clearances = c(6L,
0L, 0L, 0L, 6L, 1L, 6L, 4L, 0L, 4L, 4L, 7L, 0L, 0L, 1L, 3L, 0L,
0L, 1L, 1L), brownlow_votes = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), effective_disposals = c(15L,
6L, 16L, 11L, 16L, 13L, 6L, 14L, 1L, 11L, 13L, 16L, 16L, 10L,
14L, 12L, 5L, 6L, 9L, 17L), disposal_efficiency_percentage = c(71L,
86L, 76L, 85L, 89L, 72L, 55L, 67L, 100L, 85L, 50L, 76L, 76L,
59L, 78L, 71L, 63L, 60L, 53L, 94L), contested_possessions = c(11L,
3L, 5L, 7L, 9L, 6L, 7L, 9L, 1L, 9L, 9L, 15L, 1L, 7L, 3L, 4L,
3L, 4L, 5L, 5L), uncontested_possessions = c(10L, 4L, 17L, 6L,
10L, 12L, 4L, 12L, 0L, 4L, 17L, 7L, 18L, 9L, 14L, 11L, 5L, 7L,
12L, 14L), time_on_ground_percentage = c(79L, 65L, 73L, 100L,
76L, 69L, 89L, 81L, 1L, 88L, 73L, 83L, 85L, 98L, 95L, 81L, 96L,
91L, 86L, 96L), afl_fantasy_score = c(93L, 26L, 97L, 42L, 54L,
53L, 61L, 67L, 4L, 91L, 96L, 67L, 78L, 89L, 80L, 80L, 30L, 54L,
54L, 58L), contested_marks = c(0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L,
0L, 2L, 1L, 0L, 1L, 3L, 0L, 0L, 0L, 1L, 0L, 0L), metres_gained = c(474L,
231L, 269L, 165L, 128L, 181L, 151L, 227L, -7L, 160L, 466L, 332L,
709L, 268L, 464L, 283L, 99L, 257L, 203L, 288L), turnovers = c(5L,
3L, 4L, 2L, 3L, 2L, 2L, 4L, 0L, 1L, 6L, 2L, 5L, 8L, 5L, 2L, 2L,
3L, 3L, 1L), effective_kicks = c(11L, 5L, 9L, 7L, 7L, 4L, 3L,
5L, 1L, 6L, 5L, 4L, 11L, 7L, 12L, 5L, 2L, 6L, 1L, 9L), ground_ball_gets = c(8L,
2L, 4L, 5L, 7L, 4L, 4L, 8L, 0L, 3L, 6L, 9L, 0L, 4L, 3L, 2L, 2L,
2L, 5L, 3L), cum_rec = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20), rank_match_kicks = c(2, 34,
10.5, 20.5, 20.5, 20.5, 28, 20.5, 43, 28, 4.5, 20.5, 4.5, 10.5,
8, 20.5, 39.5, 20.5, 34, 20.5), rank_match_marks = c(14, 39,
5, 39, 33, 27.5, 33, 33, 43.5, 20.5, 20.5, 39, 14, 5, 5, 20.5,
33, 10, 27.5, 20.5)), row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame"))
Update:
Consider the example below using the functions suggested in the answer below:
match_id <- c("match_1", "match_1","match_1","match_2","match_2","match_2","match_3","match_3","match_3")
player_id <- c("player_1", "player_2", "player_3", "player_1", "player_2", "player_3", "player_1", "player_2", "player_3")
turnovers <- c(5,10,15,1,2,3,5,7,9)
data <- data.frame(match_id, player_id, turnovers)
f <- function(dt, window, vars, byvars, partial=F) {
res = dt[, lapply(.SD, frollmean, n=window), by=byvars, .SDcols=vars]
if(partial) {
res = rbind(
partials(dt,window-1,vars, byvars),
res[window:.N, .SD, by=byvars]
)
}
return(res)
}
partials <- function(dt,w,vars,byvars) {
rbindlist(lapply(1:w, function(p) {
dt[1:p, lapply(.SD, function(v) Reduce(`+`, shift(v,0:(p-1)))/p),
.SDcols = vars, by=byvars][p:.N, .SD, by=byvars]
}))
}
# set the data as data.table
setDT(data)
# define vars of interest
vars = c("turnovers")
# ensure the order is correct for rolling mean
setorder(data, player_id, match_id )
# set the window size
n=3
# get the rolling mean, by grouping variable, for each var in `vars`, and add the partials
f(data, window=n, vars=vars, byvars="player_id", partial=T)
This returns the following:
player_id turnovers
1: player_1 5.000000
2: player_1 3.000000
3: player_1 3.666667
4: player_2 NA
5: player_2 NA
6: player_2 6.333333
7: player_3 NA
8: player_3 NA
9: player_3 9.000000
What am I doing wrong?
You could try this:
library(data.table)
setDT(data)
data[,paste0(vars, "_avge_last2_"):= lapply(.SD, frollmean, n=2),
.SDcols=vars,
by=.(fruit)
]
Update
Here is a more generalized solution for handling the NA(s) at the top of each window (i.e. the partial windows)
First, start with a function that can take a data table (dt), a window size (window), a set of variables (vars), and a set of grouping variables (byvars), and an optional logical indicator partial
f <- function(dt, window, vars, byvars, partial=F) {
res = dt[, lapply(.SD, frollmean, n=window), by=byvars, .SDcols=vars]
if(partial) {
res = rbind(
partials(dt,window-1,vars, byvars),
res[,.SD[window:.N], by=byvars]
)
}
return(res)
}
Add, the optional function partials()
partials <- function(dt,w,vars,byvars) {
rbindlist(lapply(1:w, function(p) {
dt[, lapply(.SD[1:p], function(v) Reduce(`+`, shift(v,0:(p-1)))/p),
.SDcols = vars, by=byvars][, .SD[p:.N], by=byvars]
}))
}
Apply the function
# set the data as data.table
setDT(data)
# define vars of interest
vars = c("turnovers", "effective_kicks")
# ensure the order is correct for rolling mean
setorder(data, match_id, player_id)
# set the window size
n=3
# get the rolling mean, by grouping variable, for each var in `vars`, and add the partials
f(data, window=n, vars=vars, byvars="player_id", partial=T)
There are several problems:
the code in the question does not work with the data provided but rather it gives errors. There is no fruit column in the data and the vars columns don't exist either. To make it run we group by match_id and define vars to include some existing columns.
it is better not to overwrite data but rather use a different name for the output to make debugging easier.
using across causes rollapplyr to be applied separately for each column which is inefficient given that rollapply can process multiple columns at once.
Using columns that actually exist in the data provided and assuming we want to use rollapplyr on the columns named in vars try this which only runs rollapplyr once per group and seems slightly faster.
Also fill=NA is used in place of partial=TRUE it will use a somewhat faster algorithm; however, in that case the first row in each group will have NA's as that is what fill=NA means and also that algorithm won't be used if there are already NA's in the columns to be averaged.
library(dplyr)
library(zoo)
vars <- c("home_team", "away_team")
data_out <- data %>%
group_by(match_id) %>%
data.frame(avg = rollapplyr(.[vars], 2, mean, partial = TRUE)) %>%
ungroup
I find that processing grouped dataframes in dplyer can really slows things down, I'm not sure if it's the best workaround but when I finish grouping I pipe in
%>% as.data.frame()
to get rid of the grouping information, and then do my calculations afterward. It can save a lot of time. If you've previously grouped a large dataset give that a try.
Overview:
I have produced a series of maps (see below) using the R-code below, and I used plot_grid() in the Cowplot package to arrange the plots using the two data frames below called "QuercusRobur 1" and "QuercusRobur2".
Problem:
The plots look really good; however, the text labels are not well aligned. A few labels overlay the top of y-axis called Latitude, and two of the text labels called "A: Urbanisation Index" and "B: Urbanisation Index" are not positioned above their plots, and they also overlay the main titles called "Observation Period 1" and "Observation Period 2".
Does anyone know how to neatly align the plot labels so they are positioned in the top lefthand corner of each plot but also prevent them from overlaying the y-axis or parts of the map (see desired output below)?
If anyone can help, I would be deeply appreciative
R-Code
##Import Packages
library(ggplot2)
library(maps)
library(mapdata)
library(tidyverse)
##Get a map of the UK from maps:
UK <- map_data(map = "world", region = "UK")
head(UK)
dim(UK)
##Produce point data
MapUK<-ggplot(data = UK, aes(x = long, y = lat, group = group)) +
geom_polygon() +
coord_map()
##head
head(QuercusRobur1)
head(QuercusRobur2)
##Remove weird data points
QuercusRobur1<-QuercusRobur1%>%filter(Longitude<=3)
##Observation 1
p1 <- ggplot(QuercusRobur1,
aes(x = Longitude, y = Latitude)) +
geom_polygon(data = UK,
aes(x = long, y = lat, group = group),
inherit.aes = FALSE) +
geom_point() +
coord_map(xlim = c(-10, 5)) + #limits added as there are some points really far away
theme_classic()
Urban1<-p1 +
aes(color = Urbanisation_index) +
scale_color_discrete(name = "Urbanisation Index",
labels = c("Urban", "Suburban", "Village", "Rural"))
Stand1<-p1 +
aes(color = Stand_density_index) +
scale_color_discrete(name = "Stand Density Index",
labels = c("Standing alone",
"Within a few trees or close proximity to other trees",
"Within a stand of 10-30 trees",
"Large or woodland"))
Phenology1<-p1 +
aes(color = factor(Phenological_Index)) +
scale_color_discrete(name = "Phenological Index",
labels = c("No indication of autumn timing",
"First autumn tinting",
"Partial autumn tinting (>25% of leaves)",
"Advanced autumn tinting (>75% of leaves)"))
##Observation 2
p2 <- ggplot(QuercusRobur2,
aes(x = Longitude, y = Latitude)) +
geom_polygon(data = UK,
aes(x = long, y = lat, group = group),
inherit.aes = FALSE) +
geom_point() +
coord_map(xlim = c(-10, 5)) + #limits added as there are some points really far away
theme_classic()
Urban2<-p2 +
aes(color = Urbanisation_index) +
scale_color_discrete(name = "Urbanisation Index",
labels = c("Urban", "Suburban", "Village", "Rural"))
Stand2<-p2 +
aes(color = Stand_density_.index) +
scale_color_discrete(name = "Stand Density Index",
labels = c("Standing alone",
"Within a few trees or close proximity to other trees",
"Within a stand of 10-30 trees",
"Large or woodland"))
Phenology2<-p2 +
aes(color = factor(Phenological_Index)) +
scale_color_discrete(name = "Phenological Index",
labels = c("No indication of autumn timing",
"First autumn tinting",
"Partial autumn tinting (>25% of leaves)",
"Advanced autumn tinting (>75% of leaves)"))
##Arrange the individual plots into one main plot
plot_grid(Urban1 + ggtitle("Observational Period 1"),
Urban2 + ggtitle("Observational Period 2"),
Stand1,
Stand2,
Phenology1,
Phenology2,
labels=c("A: Urbanisation Index", "B: Urbanisation Index",
"C: Stand Density Index","D: Stand Density Index",
"E: Phenological Index","F: Phenological Index"),
align = "v",
label_fontface="bold",
label_fontfamily="Times New Roman",
label_size = 8,
rel_widths = c(1, 1.3),
ncol = 2,
nrow = 3,
hjust = 0,
label_x = 0.01)
Plot produced from R-code
Desired Output
Data frame - QuercusRobur1
structure(list(Obs_.no = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 19L,
20L, 21L, 22L, 23L, 24L, 25L, 28L, 29L, 30L, 31L, 32L, 33L, 34L,
35L, 36L, 37L, 38L, 39L, 44L, 45L, 46L, 47L, 57L, 58L, 59L, 60L,
61L, 62L, 63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 74L,
75L, 81L, 82L, 83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L, 91L, 93L,
102L, 103L, 104L, 112L, 113L, 114L, 115L, 116L, 117L, 118L, 119L,
120L, 121L, 122L, 123L, 124L, 125L, 126L, 127L, 128L, 129L, 130L,
131L, 135L, 136L, 137L, 138L, 143L, 144L, 145L, 146L, 147L, 148L,
149L, 150L, 151L, 152L, 153L, 154L, 155L, 158L, 159L, 160L, 161L,
162L, 163L, 164L, 165L, 169L, 170L, 171L, 172L, 177L, 178L, 179L,
180L, 181L, 182L, 183L, 184L, 185L, 186L, 187L, 188L, 189L, 190L,
191L, 192L, 193L, 194L, 195L, 196L, 200L), Date_observed = structure(c(4L,
15L, 6L, 6L, 6L, 6L, 2L, 2L, 8L, 8L, 8L, 8L, 8L, 8L, 6L, 6L,
6L, 6L, 6L, 6L, 11L, 11L, 11L, 11L, 12L, 7L, 7L, 9L, 9L, 9L,
9L, 5L, 5L, 5L, 5L, 14L, 14L, 14L, 14L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 6L, 6L, 5L, 5L, 9L, 9L, 9L, 9L, 3L, 3L, 3L, 3L, 4L, 4L,
1L, 1L, 11L, 6L, 6L, 6L, 6L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 3L, 3L, 3L, 3L, 11L,
11L, 11L, 4L, 4L, 4L, 4L, 8L, 8L, 10L, 10L, 10L, 10L, 9L, 9L,
9L, 9L, 3L, 3L, 3L, 3L, 9L, 9L, 9L, 9L, 2L, 2L, 2L, 2L, 13L,
13L, 13L, 13L, 8L, 8L, 8L, 8L, 10L, 10L, 10L, 10L, 3L, 3L, 3L,
3L, 13L), .Label = c("10/1/18", "10/19/18", "10/20/18", "10/21/18",
"10/22/18", "10/23/18", "10/24/18", "10/25/18", "10/26/18", "10/27/18",
"10/28/18", "10/28/19", "10/29/18", "12/9/18", "8/20/18"), class = "factor"),
Latitude = c(51.4175, 52.12087, 52.0269, 52.0269, 52.0269,
52.0269, 52.947709, 52.947709, 51.491811, 51.491811, 52.59925,
52.59925, 52.59925, 52.59925, 51.60157, 51.60157, 52.6888,
52.6888, 52.6888, 52.6888, 50.697802, 50.697802, 50.697802,
50.697802, 53.62417, 50.446841, 50.446841, 53.959679, 53.959679,
53.959679, 53.959679, 51.78375, 51.78375, 51.78375, 51.78375,
51.456965, 51.456965, 51.456965, 51.456965, 51.3651, 51.3651,
51.3651, 51.3651, 52.01182, 52.01182, 52.01182, 52.01182,
50.114277, 50.114277, 51.43474, 51.43474, 51.10676, 51.10676,
51.10676, 51.10676, 50.435984, 50.435984, 50.435984, 50.435984,
51.78666, 51.78666, 52.441088, 52.441088, 52.552344, 49.259471,
49.259471, 49.259471, 49.259471, 50.461625, 50.461625, 50.461625,
50.461625, 51.746642, 51.746642, 51.746642, 51.746642, 52.2501,
52.2501, 52.2501, 52.2501, 52.423336, 52.423336, 52.423336,
52.423336, 53.615575, 53.615575, 53.615575, 53.615575, 51.08474,
51.08474, 51.08474, 53.19329, 53.19329, 53.19329, 53.19329,
55.96785, 55.96785, 56.52664, 56.52664, 56.52664, 56.52664,
51.8113, 51.8113, 51.8113, 51.8113, 52.580157, 52.580157,
52.580157, 52.580157, 50.52008, 50.52008, 50.52008, 50.52008,
51.48417, 51.48417, 51.48417, 51.48417, 54.58243, 54.58243,
54.58243, 54.58243, 52.58839, 52.58839, 52.58839, 52.58839,
52.717283, 52.717283, 52.717283, 52.717283, 50.740764, 50.740764,
50.740764, 50.740764, 52.57937), Longitude = c(-0.32118,
-0.29293, -0.7078, -0.7078, -0.7078, -0.7078, -1.435407,
-1.435407, -3.210324, -3.210324, 1.33011, 1.33011, 1.33011,
1.33011, -3.67111, -3.67111, -3.30909, -3.30909, -3.30909,
-3.30909, -2.11692, -2.11692, -2.11692, -2.11692, -2.43155,
-3.706923, -3.706923, -1.061008, -1.061008, -1.061008, -1.061008,
-0.65046, -0.65046, -0.65046, -0.65046, -2.624917, -2.624917,
-2.624917, -2.624917, 0.70706, 0.70706, 0.70706, 0.70706,
-0.70082, -0.70082, -0.70082, -0.70082, -5.541128, -5.541128,
0.45981, 0.45981, -2.32071, -2.32071, -2.32071, -2.32071,
-4.105617, -4.105617, -4.105617, -4.105617, -0.71433, -0.71433,
-0.176158, -0.176158, -1.337177, -123.107788, -123.107788,
-123.107788, -123.107788, 3.560973, 3.560973, 3.560973, 3.560973,
0.486416, 0.486416, 0.486416, 0.486416, -0.8825, -0.8825,
-0.8825, -0.8825, -1.787563, -1.787563, -1.787563, -1.787563,
-2.432959, -2.432959, -2.432959, -2.432959, -0.73645, -0.73645,
-0.73645, -0.63793, -0.63793, -0.63793, -0.63793, -3.18084,
-3.18084, -3.40313, -3.40313, -3.40313, -3.40313, -0.22894,
-0.22894, -0.22894, -0.22894, -1.948571, -1.948571, -1.948571,
-1.948571, -4.20756, -4.20756, -4.20756, -4.20756, -0.34854,
-0.34854, -0.34854, -0.34854, -5.93229, -5.93229, -5.93229,
-5.93229, -1.96843, -1.96843, -1.96843, -1.96843, -2.410575,
-2.410575, -2.410575, -2.410575, -2.361234, -2.361234, -2.361234,
-2.361234, -1.89325), Altitude = c(5L, 0L, 68L, 68L, 68L,
68L, 104L, 104L, 15L, 15L, 23L, 23L, 23L, 23L, 184L, 184L,
176L, 176L, 176L, 176L, 12L, 12L, 12L, 12L, 178L, 36L, 36L,
11L, 11L, 11L, 11L, 210L, 210L, 210L, 210L, 97L, 97L, 97L,
97L, 23L, 23L, 23L, 23L, 0L, 0L, 0L, 0L, 9L, 9L, 4L, 4L,
200L, 200L, 200L, 200L, 160L, 160L, 160L, 160L, 166L, 166L,
0L, 0L, 0L, 47L, 47L, 47L, 47L, 58L, 58L, 58L, 58L, 43L,
43L, 43L, 43L, 97L, 97L, 97L, 97L, 133L, 133L, 133L, 133L,
123L, 123L, 123L, 123L, 128L, 128L, 128L, 15L, 15L, 15L,
15L, 14L, 14L, 65L, 65L, 65L, 65L, 129L, 129L, 129L, 129L,
140L, 140L, 140L, 140L, 18L, 18L, 18L, 18L, 30L, 30L, 30L,
30L, 19L, 19L, 19L, 19L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
96L, 96L, 96L, 96L, 169L), Species = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Quercus robur", class = "factor"),
Tree_diameter = c(68.8, 10, 98.5, 97, 32.5, 45.1, 847, 817,
62, 71, 140, 111.4, 114.6, 167.1, 29, 40.1, 68, 45, 60, 54,
104, 122, 85, 71, 81, 39.8, 43.6, 20.1, 17.8, 15.6, 12.1,
81.8, 102.5, 75.5, 57.3, 0.3, 0.2, 0.3, 0.3, 70, 36, 53,
44, 31.5, 27.1, 23.3, 22, 69.4, 37.3, 19.9, 14.6, 196, 122,
118, 180, 58.6, 54.1, 58, 61.5, 58.4, 61, 134, 64, 52.2,
170, 114, 127, 158, 147.4, 135.3, 122.9, 104.1, 263, 237,
322, 302, 175, 182, 141, 155, 89, 41, 70, 83, 141, 86.5,
82, 114.5, 129, 127, 143, 125, 92, 68, 90, 24.5, 20.1, 63.7,
39.8, 66.2, 112.4, 124.5, 94.1, 68.6, 74.4, 23.6, 27.7, 22.9,
25.2, 24.2, 54.7, 43, 33.1, 306, 274, 56, 60, 72.5, 128.5,
22, 16, 143, 103, 53, 130, 48.4, 69.8, 6.4, 18.6, 129.2,
41.7, 57.6, 14, 41.7), Urbanisation_index = c(2L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 4L, 4L,
4L, 4L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L,
4L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 1L, 1L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 2L), Stand_density_index = c(3L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 4L, 1L, 1L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 2L, 2L, 4L, 4L, 3L, 3L, 3L, 3L, 4L, 3L,
4L, 4L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L,
2L, 2L, 2L, 2L, 2L, 3L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 2L, 4L, 4L, 4L, 4L, 4L), Canopy_Index = c(85L,
85L, 85L, 75L, 45L, 25L, 75L, 65L, 75L, 75L, 95L, 95L, 95L,
95L, 95L, 65L, 85L, 65L, 95L, 85L, 85L, 85L, 75L, 75L, 65L,
85L, 85L, 75L, 75L, 85L, 65L, 95L, 85L, 95L, 95L, 75L, 75L,
85L, 85L, 85L, 85L, 85L, 75L, 85L, 85L, 85L, 85L, 75L, 75L,
85L, 85L, 65L, 75L, 85L, 75L, 95L, 95L, 95L, 95L, 75L, 65L,
95L, 95L, 55L, 75L, 65L, 75L, 65L, 85L, 95L, 95L, 75L, 95L,
75L, 95L, 65L, 75L, 75L, 85L, 85L, 65L, 95L, 65L, 65L, 65L,
65L, 65L, 65L, 85L, 85L, 75L, 95L, 85L, 85L, 75L, 45L, 55L,
35L, 35L, 25L, 25L, 95L, 85L, 75L, 85L, 85L, 75L, 75L, 65L,
75L, 85L, 65L, 45L, 95L, 95L, 95L, 95L, 65L, 75L, 45L, 35L,
75L, 95L, 95L, 85L, 75L, 65L, 85L, 95L, 75L, 85L, 85L, 95L,
65L), Phenological_Index = c(2L, 4L, 2L, 2L, 4L, 4L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 2L, 3L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 2L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 3L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 1L, 1L, 1L, 1L, 3L, 2L, 3L, 3L, 3L, 3L, 4L, 3L, 2L, 3L,
2L, 2L, 2L, 1L, 3L, 1L, 4L)), class = "data.frame", row.names = c(NA,
-134L))
Data frame - QuercusRobur2
structure(list(X = c(1L, 2L, 3L, 4L, 13L, 14L, 15L, 18L, 19L,
20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 35L, 36L,
37L, 38L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L,
59L, 63L, 64L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L,
78L, 80L, 89L, 90L, 91L, 95L, 96L, 97L, 98L, 99L, 100L, 101L,
102L, 103L, 104L, 105L, 106L, 107L, 108L, 109L, 110L, 111L, 112L,
113L, 114L, 118L, 119L, 120L, 121L, 126L, 127L, 128L, 129L, 130L,
131L, 132L, 133L, 134L, 135L, 136L, 137L, 138L, 141L, 142L, 143L,
144L, 148L, 149L, 150L, 151L, 156L, 157L, 158L, 159L, 160L, 161L,
162L, 163L, 164L, 165L, 166L, 167L, 168L, 169L, 170L, 171L, 172L,
173L, 174L, 175L, 179L, 180L, 181L, 182L, 183L, 185L, 187L, 189L,
190L, 191L, 192L, 193L, 194L, 195L, 196L, 208L, 209L, 210L, 212L,
214L, 225L, 226L, 227L, 228L, 229L, 230L, 231L, 242L, 243L, 244L,
245L, 246L, 247L, 248L, 249L, 250L, 251L, 252L, 253L, 254L, 255L,
256L, 257L, 258L, 259L, 260L, 261L), Obs_no = c(1L, 2L, 3L, 4L,
13L, 14L, 15L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L,
28L, 29L, 30L, 35L, 36L, 37L, 38L, 48L, 49L, 50L, 51L, 52L, 53L,
54L, 55L, 56L, 57L, 58L, 59L, 63L, 64L, 68L, 69L, 70L, 71L, 72L,
73L, 74L, 75L, 76L, 77L, 78L, 80L, 89L, 90L, 91L, 95L, 96L, 97L,
98L, 99L, 100L, 101L, 102L, 103L, 104L, 105L, 106L, 107L, 108L,
109L, 110L, 111L, 112L, 113L, 114L, 118L, 119L, 120L, 121L, 126L,
127L, 128L, 129L, 130L, 131L, 132L, 133L, 134L, 135L, 136L, 137L,
138L, 141L, 142L, 143L, 144L, 148L, 149L, 150L, 151L, 156L, 157L,
158L, 159L, 160L, 161L, 162L, 163L, 164L, 165L, 166L, 167L, 168L,
169L, 170L, 171L, 172L, 173L, 174L, 175L, 179L, 180L, 181L, 182L,
183L, 185L, 187L, 189L, 190L, 191L, 192L, 193L, 194L, 195L, 196L,
208L, 209L, 210L, 212L, 214L, 225L, 226L, 227L, 228L, 229L, 230L,
231L, 242L, 243L, 244L, 245L, 246L, 247L, 248L, 249L, 250L, 251L,
252L, 253L, 254L, 255L, 256L, 257L, 258L, 259L, 260L, 261L),
Date_observed = structure(c(9L, 14L, 3L, 3L, 12L, 12L, 10L,
10L, 8L, 8L, 8L, 8L, 11L, 11L, 11L, 11L, 5L, 5L, 9L, 9L,
13L, 13L, 13L, 13L, 8L, 8L, 8L, 8L, 13L, 13L, 13L, 13L, 7L,
7L, 7L, 7L, 6L, 6L, 11L, 11L, 11L, 11L, 11L, 11L, 4L, 4L,
4L, 4L, 12L, 12L, 12L, 12L, 5L, 1L, 1L, 1L, 1L, 5L, 5L, 5L,
5L, 12L, 12L, 12L, 12L, 11L, 11L, 11L, 11L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 13L, 13L, 13L, 8L, 8L, 8L, 8L, 13L, 13L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L,
3L, 3L, 3L, 3L, 13L, 13L, 13L, 13L, 10L, 10L, 10L, 10L, 12L,
12L, 12L, 12L, 3L, 3L, 3L, 3L, 13L, 13L, 5L, 5L, 5L, 11L,
11L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L, 13L, 13L, 13L,
9L, 9L, 12L, 12L, 12L, 12L, 8L, 8L, 8L, 5L, 5L, 5L, 5L, 12L,
12L, 12L, 12L, 11L, 11L, 11L, 11L, 13L, 13L, 13L, 13L, 5L,
5L, 5L, 5L), .Label = c("10/23/18", "11/18/18", "11/30/18",
"12/1/18", "12/10/18", "12/12/18", "12/2/18", "12/3/18",
"12/4/18", "12/6/18", "12/7/18", "12/8/18", "12/9/18", "9/10/18"
), class = "factor"), Latitude = c(51.41752, 52.243806, 52.947709,
52.947709, 51.491811, 51.491811, 51.60157, 51.60157, 52.68959,
52.68959, 52.68959, 52.68959, 50.697802, 50.697802, 50.697802,
50.697802, 53.62417, 53.62417, 50.446841, 50.446841, 53.959679,
53.959679, 53.959679, 53.959679, 51.78375, 51.78375, 51.78375,
51.78375, 51.456965, 51.456965, 51.456965, 51.456965, 52.011812,
52.011812, 52.011812, 52.011812, 50.121978, 50.121978, 51.43474,
51.43474, 51.10708, 51.10708, 51.10708, 51.10708, 50.435984,
50.435984, 50.435984, 50.435984, 51.78666, 51.78666, 52.441088,
52.441088, 52.552344, 49.259471, 49.259471, 49.259471, 49.259471,
50.462, 50.462, 50.462, 50.462, 51.746642, 51.746642, 51.746642,
51.746642, 52.2501, 52.2501, 52.2501, 52.2501, 52.42646,
52.42646, 52.42646, 52.42646, 53.615575, 53.615575, 53.615575,
53.615575, 51.08478, 51.08478, 51.08478, 53.19329, 53.19329,
53.19329, 53.19329, 55.968437, 55.968437, 56.52664, 56.52664,
56.52664, 56.52664, 51.8113, 51.8113, 51.8113, 51.8113, 50.52008,
50.52008, 50.52008, 50.52008, 51.48417, 51.48417, 51.48417,
51.48417, 54.58243, 54.58243, 54.58243, 54.58243, 52.58839,
52.58839, 52.58839, 52.58839, 52.717283, 52.717283, 52.717283,
52.717283, 50.740764, 50.740764, 50.740764, 50.740764, 50.733412,
50.733412, 50.79926, 50.79926, 50.79926, 53.675788, 53.675788,
48.35079, 48.35079, 48.35079, 48.35079, 51.36445, 51.36445,
51.36445, 51.36445, 52.122402, 52.122402, 52.122402, 52.16104,
52.16104, 51.88468, 51.88468, 51.88468, 51.88468, 52.34015,
52.34015, 52.34015, 52.026042, 52.026042, 52.026042, 52.026042,
51.319032, 51.319032, 51.319032, 51.319032, 51.51365, 51.51365,
51.51365, 51.51365, 53.43202, 53.43202, 53.43202, 53.43202,
51.50797, 51.50797, 51.50797, 51.50797), Longitude = c(-0.32116,
1.30786, -1.435407, -1.435407, -3.210324, -3.210324, -3.67111,
-3.67111, -3.3081, -3.3081, -3.3081, -3.3081, -2.11692, -2.11692,
-2.11692, -2.11692, -2.43155, -2.43155, -3.706923, -3.706923,
-1.061008, -1.061008, -1.061008, -1.061008, -0.65046, -0.65046,
-0.65046, -0.65046, -2.624917, -2.624917, -2.624917, -2.624917,
-0.70082, -0.70082, -0.70082, -0.70082, -5.555169, -5.555169,
0.45981, 0.45981, -2.32027, -2.32027, -2.32027, -2.32027,
-4.105617, -4.105617, -4.105617, -4.105617, -0.71433, -0.71433,
-0.176158, -0.176158, -1.337177, -123.107788, -123.107788,
-123.107788, -123.107788, -3.5607, -3.5607, -3.5607, -3.5607,
0.486416, 0.486416, 0.486416, 0.486416, -0.8825, -0.8825,
-0.8825, -0.8825, -1.78771, -1.78771, -1.78771, -1.78771,
-2.432959, -2.432959, -2.432959, -2.432959, -0.73626, -0.73626,
-0.73626, -0.63793, -0.63793, -0.63793, -0.63793, -3.179732,
-3.179732, -3.40313, -3.40313, -3.40313, -3.40313, -0.22894,
-0.22894, -0.22894, -0.22894, -4.20756, -4.20756, -4.20756,
-4.20756, -0.34854, -0.34854, -0.34854, -0.34854, -5.93229,
-5.93229, -5.93229, -5.93229, -1.96843, -1.96843, -1.96843,
-1.96843, -2.410575, -2.410575, -2.410575, -2.410575, -2.361234,
-2.361234, -2.361234, -2.361234, -2.014029, -2.014029, -3.19446,
-3.19446, -3.19446, -1.272404, -1.272404, 10.91812, 10.91812,
10.91812, 10.91812, -0.23106, -0.23106, -0.23106, -0.23106,
-0.487443, -0.487443, -0.487443, 0.18702, 0.18702, -0.17853,
-0.17853, -0.17853, -0.17853, -1.27795, -1.27795, -1.27795,
-0.503113, -0.503113, -0.503113, -0.503113, -0.472994, -0.472994,
-0.472994, -0.472994, -3.18722, -3.18722, -3.18722, -3.18722,
-2.27968, -2.27968, -2.27968, -2.27968, -0.25931, -0.25931,
-0.25931, -0.25931), Altitude = c(0, 0, 103.9, 103.9, 15,
15, 184, 184, 176, 176, 176, 176, 12, 12, 12, 12, 178, 178,
36, 36, 11, 11, 11, 11, 210, 210, 210, 210, 97, 97, 97, 97,
0, 0, 0, 0, 68, 68, 4, 4, 200, 200, 200, 200, 160, 160, 160,
160, 165.8, 165.8, 0, 0, 0, 47, 47, 47, 47, 0, 0, 0, 0, 43,
43, 43, 43, 97, 97, 97, 97, 133, 133, 133, 133, 123, 123,
123, 123, 127, 127, 127, 15, 15, 15, 15, 14, 14, 65, 65,
65, 65, 129, 129, 129, 129, 18, 18, 18, 18, 30, 30, 30, 30,
19, 19, 19, 19, 0, 0, 0, 0, 0, 0, 0, 0, 96, 96, 96, 96, 0,
0, 0, 0, 0, 49, 49, 0, 0, 0, 0, 48, 48, 48, 48, 43, 43, 43,
75, 75, 94, 94, 94, 94, 112, 112, 112, 103, 103, 103, 103,
0, 0, 0, 0, 37.5, 37.5, 37.5, 37.5, 29, 29, 29, 29, 63, 63,
63, 63), Species = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Quercus robur", class = "factor"),
Tree_diameter = c(68.8, 300, 847, 817, 62, 71, 29, 40.1,
68, 45, 60, 54, 104, 122, 85, 71, 81, 118, 39.8, 43.6, 19.8,
16.6, 15.1, 11.9, 81.8, 102.5, 75.5, 57.3, 0.3, 0.2, 0.3,
0.3, 99, 85, 74, 68, 82, 51.8, 19.9, 14.6, 196, 122, 118,
180, 58.6, 54.1, 58, 61.5, 58.4, 61, 134, 64, 52.2, 170,
114, 127, 158, 147.4, 135.3, 122.9, 104.1, 263, 237, 322,
302, 173, 186, 144, 155, 89, 41, 68, 83, 141.6, 85.5, 82.8,
114.1, 129, 127, 143, 125, 92, 68, 90, 25, 20, 63.7, 39.8,
66.2, 112.4, 124.5, 94.1, 68.6, 74.4, 24.2, 54.7, 43, 33.1,
306, 274, 56, 60, 72.5, 128.5, 22, 16, 143, 103, 53, 130,
48.4, 69.8, 6.4, 18.6, 129.2, 41.7, 57.6, 14, 320, 352, 120.9,
108.3, 53.2, 274, 85, 52, 43, 38, 37, 219, 215, 216, 175,
85.9, 49.7, 97.1, 40.8, 62.4, 181.5, 149.7, 122, 143.6, 148,
145, 99, 27.5, 32, 54, 54.1, 169, 152, 160, 138, 90.8, 87.9,
77.4, 81.2, 91.7, 62.7, 50, 72.9, 24.8, 61, 88.6, 80.1),
Urbanisation_index = structure(c(2L, 2L, 2L, 2L, 2L, 2L,
4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L,
2L, 2L, 2L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
4L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L,
4L, 4L, 4L, 1L, 1L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 3L, 4L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 1L, 1L, 1L,
1L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L), .Label = c("1", "2",
"3", "4"), class = "factor"), Stand_density_.index = structure(c(3L,
4L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L,
4L, 4L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 2L, 2L, 4L, 4L, 3L, 3L,
3L, 3L, 4L, 3L, 4L, 4L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 3L, 4L, 4L, 4L, 4L, 2L,
2L, 2L, 2L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 2L,
2L, 2L, 2L, 4L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 4L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 2L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L), .Label = c("1",
"2", "3", "4"), class = "factor"), Canopy_Index = c(15L,
95L, 45L, 5L, 5L, 5L, 25L, 15L, 25L, 25L, 35L, 35L, 25L,
35L, 15L, 15L, 15L, 15L, 5L, 5L, 5L, 5L, 5L, 5L, 35L, 35L,
55L, 35L, 5L, 5L, 5L, 5L, 95L, 95L, 95L, 95L, 25L, 25L, 15L,
5L, 25L, 25L, 25L, 25L, 5L, 5L, 5L, 5L, 5L, 5L, 35L, 25L,
5L, 35L, 35L, 25L, 25L, 5L, 5L, 5L, 5L, 35L, 25L, 25L, 25L,
5L, 5L, 15L, 15L, 35L, 65L, 35L, 35L, 25L, 25L, 25L, 25L,
15L, 15L, 5L, 35L, 35L, 45L, 35L, 5L, 15L, 15L, 25L, 5L,
15L, 15L, 5L, 5L, 15L, 5L, 5L, 5L, 5L, 5L, 85L, 5L, 35L,
15L, 5L, 5L, 5L, 25L, 25L, 15L, 35L, 95L, 95L, 95L, 95L,
15L, 15L, 5L, 25L, 25L, 5L, 15L, 15L, 5L, 15L, 5L, 25L, 25L,
25L, 25L, 5L, 5L, 5L, 5L, 25L, 25L, 55L, 35L, 25L, 15L, 15L,
25L, 15L, 45L, 35L, 35L, 15L, 35L, 15L, 15L, 35L, 15L, 25L,
25L, 15L, 15L, 15L, 15L, 5L, 5L, 5L, 5L, 5L, 5L, 15L, 15L
), Phenological_Index = c(4L, 4L, 3L, 4L, 2L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 3L, 2L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L)), row.names = c(NA, -165L), class = "data.frame")
How about using subtitle for labeling each individual plot?
## Observation 1
p1 <- ggplot(
QuercusRobur1,
aes(x = Longitude, y = Latitude)
) +
geom_polygon(
data = UK,
aes(x = long, y = lat, group = group),
inherit.aes = FALSE
) +
coord_map(xlim = c(-10, 5)) + # limits added as there are some points really far away
theme_classic()
Urban1 <- p1 +
geom_point(aes(color = factor(Urbanisation_index))) +
scale_color_discrete(
name = "Urbanisation Index",
labels = c("Urban", "Suburban", "Village", "Rural")
) +
labs(subtitle = "A: Urbanisation Index") +
theme(legend.justification = "left")
Stand1 <- p1 +
geom_point(aes(color = factor(Stand_density_index))) +
scale_color_discrete(
name = "Stand Density Index",
labels = c(
"Standing alone",
"Within a few trees or close proximity to other trees",
"Within a stand of 10-30 trees",
"Large or woodland"
)
) +
labs(subtitle = "C: Stand Density Index") +
theme(legend.justification = "left")
## Observation 2
p2 <- ggplot(
QuercusRobur2,
aes(x = Longitude, y = Latitude)
) +
geom_polygon(
data = UK,
aes(x = long, y = lat, group = group),
inherit.aes = FALSE
) +
coord_map(xlim = c(-10, 5)) +
theme_classic()
Urban2 <- p2 +
geom_point(aes(color = factor(Urbanisation_index))) +
scale_color_discrete(
name = "Urbanisation Index",
labels = c("Urban", "Suburban", "Village", "Rural")
) +
labs(subtitle = "B: Urbanisation Index") +
theme(legend.justification = "left")
Stand2 <- p2 +
geom_point(aes(color = factor(Stand_density_.index))) +
scale_color_discrete(
name = "Stand Density Index",
labels = c(
"Standing alone",
"Within a few trees or close proximity to other trees",
"Within a stand of 10-30 trees",
"Large or woodland"
)
) +
labs(subtitle = "D: Stand Density Index") +
theme(legend.justification = "left")
## Arrange the individual plots into one main plot
plot_grid(
Urban1 + ggtitle("Observational Period 1\n") + theme(plot.title = element_text(hjust = 1.0)),
Urban2 + ggtitle("Observational Period 2\n") + theme(plot.title = element_text(hjust = 1.0)),
Stand1,
Stand2,
align = "hv",
axis = 'tblr',
label_fontface = "bold",
label_fontfamily = "Times New Roman",
label_size = 8,
rel_widths = c(1, 1.3),
ncol = 2,
nrow = 2,
hjust = 0,
label_x = 0.01
)
Edit: remove duplicate axis labels and legends then use egg::ggarrange to combine subplots.
## Observation 1
Urban1 <- p1 +
geom_point(aes(color = factor(Urbanisation_index))) +
scale_color_discrete(
name = "Urbanisation Index",
labels = c("Urban", "Suburban", "Village", "Rural")
) +
labs(subtitle = "A: Urbanisation Index") +
theme(legend.position = "none")
Stand1 <- p1 +
geom_point(aes(color = factor(Stand_density_index))) +
scale_color_discrete(
name = "Stand Density Index",
labels = c(
"Standing alone",
"Within a few trees or close proximity to other trees",
"Within a stand of 10-30 trees",
"Large or woodland"
)
) +
labs(subtitle = "C: Stand Density Index") +
theme(legend.position = "none")
## Observation 2
p2 <- ggplot(
QuercusRobur2,
aes(x = Longitude, y = Latitude)
) +
geom_polygon(
data = UK,
aes(x = long, y = lat, group = group),
inherit.aes = FALSE
) +
coord_map(xlim = c(-10, 5)) +
theme_classic() +
ylab("")
Urban2 <- p2 +
geom_point(aes(color = factor(Urbanisation_index))) +
scale_color_discrete(
name = "Urbanisation Index",
labels = c("Urban", "Suburban", "Village", "Rural")
) +
labs(subtitle = "B: Urbanisation Index") +
theme(legend.justification = "left")
Stand2 <- p2 +
geom_point(aes(color = factor(Stand_density_.index))) +
scale_color_discrete(
name = "Stand Density Index",
labels = c(
"Standing alone",
"Within a few trees or close proximity to other trees",
"Within a stand of 10-30 trees",
"Large or woodland"
)
) +
labs(subtitle = "D: Stand Density Index") +
theme(legend.justification = "left")
## Use the `egg` package
library(egg)
ggarrange(
Urban1 + ggtitle("Observational Period 1\n") + theme(plot.title = element_text(hjust = 0.5)),
Urban2 + ggtitle("Observational Period 2\n") + theme(plot.title = element_text(hjust = 0.5)),
Stand1,
Stand2,
nrow = 2,
ncol = 2
)
Using R I've fit an ordered logistic regression to some data. I need to use the fitted values to predict future values. I have a dataset which has 10 different combinations of parameters I want to predict the probabilities for the 'Result' dependant variable (which the model is fit to). I've used 'split' to split into 10 separate dataframes and I want to try to run the predict statement against each of the outputted dataframes. Can someone show me how to:
a. Create a function to run the predict statement, and
b. Loop the function through the 10 dataframes?
c. Output the prediction probabilities for the three result conditions, and bind them to the relevant input dataframe
Here's what I've tried:
# Fit the model
library(MASS)
fit <- polr(Result ~ State + score1+ score2 + history + pastdefault, data = dat, Hess=TRUE)
## Model Summary
summary(fit)
#Take the new data which doesn't have predictions
t13<- dat[ which(dat$Tranche==13), ]
out <- split( t13 , f = r13$Gamen )
#Function
pred <- function(Result, State, score1, score2, history, pastdefault,...){
cbind(out, predict(fit, out, type = "probs"))}
#Use sapply to run prediction
pred.out <- sapply(out, pred)
Further to a suggestion I've tried running the predict statement against the result of splitting the data into 'out' as per code below:
#Predict against 'out' dataframes
newdat1 <- cbind(out, predict(fit, newdata=out, type = "probs"))
However I get the same error message:
in eval(expr, envir, enclos) : object 'State' not found
Here's what the data looks like using head(data):
State score1 score2 Salaries history exp moody Result pastdefault Tranche
1 Minnesota 6.000 17.5 35.948 9 579 1085 moreinfo yes 1
2 New Mexico 4.586 17.2 28.493 11 530 1015 fail No 1
3 Washington 5.906 20.2 36.151 48 494 937 fail No 1
4 West Virginia 6.107 14.8 31.944 17 484 932 moreinfo No 1
5 Louisiana 4.761 16.8 26.461 9 535 1021 pass yes 2
6 Oklahoma 4.845 15.5 28.172 9 536 1027 pass yes 2
Gamen
1 1
2 2
3 3
4 4
5 1
6 2
Here is the result of dput(data):
ture(list(State = structure(c(23L, 31L, 47L, 48L, 18L, 36L,
39L, 42L, 13L, 14L, 16L, 35L, 50L, 1L, 20L, 27L, 30L, 7L, 9L,
34L, 43L, 3L, 4L, 19L, 25L, 26L, 37L, 8L, 29L, 38L, 2L, 21L,
32L, 6L, 10L, 11L, 15L, 24L, 5L, 17L, 33L, 40L, 41L, 46L, 49L,
28L, 12L, 22L, 45L, 32L, 9L, 5L, 44L, 13L, 38L, 19L, 25L, 26L,
37L, 8L), .Label = c("Alabama", "Alaska", "Arizona", "Arkansas",
"California", "Colorado", "Connecticut", "Delaware", "Florida",
"Georgia", "Hawaii", "Idaho", "Illinois", "Indiana", "Iowa",
"Kansas", "Kentucky", "Louisiana", "Maine", "Maryland", "Massachusetts",
"Michigan", "Minnesota", "Mississippi", "Missouri", "Montana",
"Nebraska", "Nevada", "New Hampshire", "New Jersey", "New Mexico",
"New York", "North Carolina", "North Dakota", "Ohio", "Oklahoma",
"Oregon", "Pennsylvania", "Rhode Island", "South Carolina", "South Dakota",
"Tennessee", "Texas", "Utah", "Vermont", "Virginia", "Washington",
"West Virginia", "Wisconsin", "Wyoming"), class = "factor"),
score1 = c(6, 4.586, 5.906, 6.107, 4.761, 4.845, 7.469, 4.388,
6.136, 5.826, 5.817, 6.162, 6.16, 4.405, 7.245, 5.935, 9.774,
8.817, 5.718, 4.775, 5.222, 4.778, 4.459, 6.428, 5.383, 5.692,
6.436, 7.03, 5.859, 7.109, 8.963, 7.287, 9.623, 5.443, 5.193,
6.078, 5.483, 4.08, 4.992, 5.217, 5.077, 4.797, 4.775, 5.327,
6.93, 5.16, 4.21, 6.994, 6.75, 5.859, 7.109, 8.963, 3.656,
7.287, 9.623, 5.443, 6.078, 5.483, 4.08, 4.992), score2 = c(17.5,
17.2, 20.2, 14.8, 16.8, 15.5, 14.7, 18.6, 17.3, 17.5, 15.1,
16.6, 14.9, 17.2, 17, 14.5, 13.8, 14.4, 19.1, 15.3, 15.7,
19.3, 17.1, 13.8, 15.5, 16.3, 19.9, 16.6, 15.6, 17.1, 17.6,
14.8, 15.2, 18.4, 16.3, 17.9, 15.8, 17.5, 24, 17, 16.2, 16.4,
14.4, 14.6, 15.9, 18.7, 19.1, 20.1, 13.8, 16.6, 15.6, 17.1,
24.3, 17.6, 14.8, 15.2, 24, 17, 16.2, 14.4), Salaries = c(35.948,
28.493, 36.151, 31.944, 26.461, 28.172, 40.729, 32.477, 39.431,
36.785, 34.652, 36.802, 31.285, 31.144, 40.661, 30.922, 46.087,
50.045, 32.588, 26.327, 31.223, 32.175, 28.934, 31.972, 31.189,
28.785, 38.555, 39.076, 34.72, 44.51, 47.951, 40.795, 47.612,
34.571, 32.291, 38.518, 31.511, 26.818, 41.078, 32.257, 30.793,
30.279, 25.994, 33.987, 37.746, 34.836, 29.783, 41.895, 35.406,
28.785, 38.555, 39.076, 29.082, 31.511, 26.818, 41.078, 32.257,
25.994, 33.987, 37.746), history = c(9L, 11L, 48L, 17L, 9L,
9L, 70L, 12L, 13L, 58L, 9L, 23L, 10L, 8L, 64L, 9L, 70L, 81L,
48L, 5L, 47L, 27L, 6L, 68L, 9L, 21L, 51L, 68L, 70L, 70L,
47L, 80L, 74L, 29L, 65L, 57L, 5L, 4L, 45L, 11L, 60L, 58L,
5L, 65L, 9L, 30L, 15L, 11L, 68L, 45L, 11L, 60L, 4L, 58L,
5L, 65L, 9L, 5L, 65L, 80L), exp = c(579L, 530L, 494L, 484L,
535L, 536L, 463L, 543L, 560L, 467L, 557L, 515L, 525L, 538L,
479L, 556L, 478L, 477L, 469L, 592L, 474L, 496L, 523L, 469L,
550L, 536L, 499L, 468L, 491L, 461L, 489L, 477L, 473L, 518L,
448L, 482L, 583L, 540L, 485L, 522L, 454L, 443L, 563L, 468L,
572L, 483L, 511L, 549L, 472L, 526L, 528L, 530L, 563L, 532L,
534L, 536L, 538L, 540L, 542L, 544L), moody = c(1085L, 1015L,
937L, 932L, 1021L, 1027L, 888L, 1040L, 1048L, 882L, 1060L,
975L, 1001L, 1029L, 909L, 1050L, 898L, 908L, 889L, 1107L,
893L, 944L, 1005L, 896L, 1045L, 1009L, 947L, 897L, 935L,
880L, 934L, 907L, 892L, 980L, 854L, 889L, 1099L, 1036L, 902L,
999L, 865L, 844L, 1068L, 896L, 1073L, 917L, 979L, 1033L,
901L, 1002L, 1007L, 1011L, 1076L, 1016L, 900L, 760L, 1029L,
850L, 1038L, 990L), Result = structure(c(3L, 2L, 2L, 3L,
4L, 4L, 4L, 4L, 4L, 3L, 4L, 4L, 3L, 2L, 4L, 3L, 3L, 3L, 3L,
4L, 4L, 2L, 3L, 3L, 3L, 4L, 2L, 4L, 2L, 4L, 2L, 3L, 2L, 4L,
3L, 4L, 4L, 2L, 2L, 3L, 4L, 2L, 4L, 3L, 3L, 4L, 2L, 3L, 3L,
3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("",
"fail", "moreinfo", "pass"), class = "factor"), pastdefault = structure(c(2L,
1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L), .Label = c("No",
"yes"), class = "factor"), Tranche = c(1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
5L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 8L, 8L, 8L, 9L, 9L,
9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 12L,
12L, 12L, 12L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L,
13L), Gamen = c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 5L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 5L,
6L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 1L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L)), .Names = c("State", "score1", "score2",
"Salaries", "history", "exp", "moody", "Result", "pastdefault",
"Tranche", "Gamen"), class = "data.frame", row.names = c(NA,
-60L))