Predict at a finer spatial scale using XGBoost regression - r

I want to make a prediction at a finer spatial scale using XGBoost regression. I created a model at a coarse spatial scale and now I want to apply the model parameters at a finer scale. The issue is that at the finer spatial scale the xgb.DMatrix doesn't include the dependent variable so the predict function returns this error:
Error in predict.xgb.Booster(m, xgb_p, na.rm = TRUE): Feature names stored in `object` and `newdata` are different!
I have seen this post but I still get the same error. Also, I tried to create a data.frame with the independent variables and an extra empty column with the name of the dependent variable, but still the same error.
How can I use the XGB model parameters found in the coarse spatial scale to make predictions at a fine spatial scale?
I have checked the names of the variables in both the coarse and spatial scales and they are the same.
Here is the code:
library(xgboost)
library(raster)
# the dependent and independent variables at the coarse scale
xgb_m <- xgb.DMatrix(data = data.matrix(block.data), label = block.data$ntl)
# that's the model
m = xgb.train(data = xgb_m,
max.depth = 2,
nrounds = 1000,
min_child_weight = 1,
subsample = 0.75,
eta = 0.015,
gamma = 0.5,
colsample_bytree = 1,
objective = "reg:squarederror")
# these are the independent variables at the fine spatial scale
pop = raster(paste0(wd, "pop.tif"))
tirs = raster(paste0(wd, "tirs.tif"))
agbh = raster(paste0(wd, "agbh.tif"))
vars = stack(pop, tirs, agbh)
# the xgb.DMatrix with the independent variables
xgb_p <- xgb.DMatrix(data = data.matrix(vars))
xgb_pred <- predict(m, xgb_p, na.rm = TRUE) # the error
The data.frame (block.data):
block.data = structure(list(x = c(11880750L, 11879250L, 11879750L, 11880250L,
11880750L, 11881250L, 11879250L, 11879750L, 11880250L, 11880750L,
11881250L, 11878750L, 11879250L, 11879750L, 11880250L, 11880750L,
11881250L, 11879250L, 11879750L, 11880250L, 11880750L, 11881250L,
11881750L, 11882250L, 11879250L, 11879750L, 11880250L, 11880750L,
11881250L, 11881750L, 11882250L, 11882750L, 11879250L, 11879750L
), y = c(1802250L, 1801750L, 1801750L, 1801750L, 1801750L, 1801750L,
1801250L, 1801250L, 1801250L, 1801250L, 1801250L, 1800750L, 1800750L,
1800750L, 1800750L, 1800750L, 1800750L, 1800250L, 1800250L, 1800250L,
1800250L, 1800250L, 1800250L, 1800250L, 1799750L, 1799750L, 1799750L,
1799750L, 1799750L, 1799750L, 1799750L, 1799750L, 1799250L, 1799250L
), ntl = c(18.7969169616699, 25.7222957611084, 23.4188251495361,
25.4322757720947, 16.4593601226807, 12.7868213653564, 30.9337253570557,
29.865758895874, 30.4080600738525, 29.5479888916016, 24.3493347167969,
35.2427635192871, 38.989933013916, 34.6536979675293, 29.4607238769531,
30.7469024658203, 34.3946380615234, 42.8660278320312, 34.7930717468262,
30.9516315460205, 32.20654296875, 39.999755859375, 46.6002235412598,
38.6480979919434, 60.5214920043945, 33.1799964904785, 31.8498134613037,
30.9209423065186, 32.2269744873047, 53.7062034606934, 45.5225944519043,
38.3570976257324, 123.040382385254, 73.0528182983398), pop = c(19.6407718658447,
610.009216308594, 654.812622070312, 426.475830078125, 66.3839492797852,
10.6471328735352, 443.848846435547, 602.677429199219, 488.478454589844,
387.470947265625, 58.2341117858887, 413.888488769531, 315.057678222656,
354.082946777344, 602.827758789062, 463.518829345703, 296.713928222656,
923.920593261719, 434.436645507812, 799.562927246094, 404.709564208984,
265.043304443359, 366.697235107422, 399.851684570312, 952.2314453125,
870.356994628906, 673.406616210938, 493.521606445312, 273.841888427734,
371.428619384766, 383.057830810547, 320.986755371094, 991.131225585938,
1148.87768554688), tirs = c(39.7242431640625, 44.9583969116211,
41.4048385620117, 42.6056709289551, 40.0976028442383, 38.7490005493164,
44.2747650146484, 43.5645370483398, 41.6180191040039, 40.3799781799316,
38.8664817810059, 44.9089202880859, 44.414306640625, 44.560977935791,
43.1288986206055, 40.9315185546875, 38.8918418884277, 46.3063850402832,
45.5805702209473, 44.9196586608887, 42.2495613098145, 39.3051452636719,
38.7914810180664, 38.6069412231445, 44.6782455444336, 46.4024772644043,
44.4720573425293, 41.7361183166504, 42.3378067016602, 41.0018348693848,
39.3579216003418, 41.6303863525391, 43.8207550048828, 46.0460357666016
), agbh = c(3.32185006141663, 4.98925733566284, 4.35699367523193,
4.94798421859741, 3.14325952529907, 2.93211793899536, 4.52736520767212,
4.99723243713379, 5.13944292068481, 3.92965626716614, 3.43465113639832,
3.55617475509644, 3.4659411907196, 5.24469566345215, 5.36995029449463,
4.61549234390259, 4.82002925872803, 4.20452928543091, 4.71502685546875,
5.20452785491943, 5.05676746368408, 5.9952244758606, 6.16778612136841,
4.69053316116333, 2.62325501441956, 4.74775457382202, 4.93133020401001,
5.02366256713867, 5.74016952514648, 6.28353786468506, 4.67424774169922,
4.56812858581543, 1.88153350353241, 4.31531000137329)), class = "data.frame", row.names = c(NA,
-34L))
The fine resolution data
vars = new("RasterBrick", file = new(".RasterFile", name = "", datanotation = "FLT4S",
byteorder = "little", nodatavalue = -Inf, NAchanged = FALSE,
nbands = 1L, bandorder = "BIL", offset = 0L, toptobottom = TRUE,
blockrows = 0L, blockcols = 0L, driver = "", open = FALSE),
data = new(".MultipleRasterData", values = structure(c(NA,
NA, NA, NA, 18.7969169616699, NA, NA, NA, NA, NA, 25.7222957611084,
23.4188251495361, 25.4322757720947, 16.4593601226807, 12.7868213653564,
NA, NA, NA, NA, 30.9337253570557, 29.865758895874, 30.4080600738525,
29.5479888916016, 24.3493347167969, NA, NA, NA, 35.2427635192871,
38.989933013916, 34.6536979675293, 29.4607238769531, 30.7469024658203,
34.3946380615234, NA, NA, NA, NA, 42.8660278320312, 34.7930717468262,
30.9516315460205, 32.20654296875, 39.999755859375, 46.6002235412598,
38.6480979919434, NA, NA, 60.5214920043945, 33.1799964904785,
31.8498134613037, 30.9209423065186, 32.2269744873047, 53.7062034606934,
45.5225944519043, 38.3570976257324, NA, 123.040382385254,
73.0528182983398, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
19.6407718658447, NA, NA, NA, NA, NA, 610.009216308594, 654.812622070312,
426.475830078125, 66.3839492797852, 10.6471328735352, NA,
NA, NA, NA, 443.848846435547, 602.677429199219, 488.478454589844,
387.470947265625, 58.2341117858887, NA, NA, NA, 413.888488769531,
315.057678222656, 354.082946777344, 602.827758789062, 463.518829345703,
296.713928222656, NA, NA, NA, NA, 923.920593261719, 434.436645507812,
799.562927246094, 404.709564208984, 265.043304443359, 366.697235107422,
399.851684570312, NA, NA, 952.2314453125, 870.356994628906,
673.406616210938, 493.521606445312, 273.841888427734, 371.428619384766,
383.057830810547, 320.986755371094, NA, 991.131225585938,
1148.87768554688, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
39.7242431640625, NA, NA, NA, NA, NA, 44.9583969116211, 41.4048385620117,
42.6056709289551, 40.0976028442383, 38.7490005493164, NA,
NA, NA, NA, 44.2747650146484, 43.5645370483398, 41.6180191040039,
40.3799781799316, 38.8664817810059, NA, NA, NA, 44.9089202880859,
44.414306640625, 44.560977935791, 43.1288986206055, 40.9315185546875,
38.8918418884277, NA, NA, NA, NA, 46.3063850402832, 45.5805702209473,
44.9196586608887, 42.2495613098145, 39.3051452636719, 38.7914810180664,
38.6069412231445, NA, NA, 44.6782455444336, 46.4024772644043,
44.4720573425293, 41.7361183166504, 42.3378067016602, 41.0018348693848,
39.3579216003418, 41.6303863525391, NA, 43.8207550048828,
46.0460357666016, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
3.32185006141663, NA, NA, NA, NA, NA, 4.98925733566284, 4.35699367523193,
4.94798421859741, 3.14325952529907, 2.93211793899536, NA,
NA, NA, NA, 4.52736520767212, 4.99723243713379, 5.13944292068481,
3.92965626716614, 3.43465113639832, NA, NA, NA, 3.55617475509644,
3.4659411907196, 5.24469566345215, 5.36995029449463, 4.61549234390259,
4.82002925872803, NA, NA, NA, NA, 4.20452928543091, 4.71502685546875,
5.20452785491943, 5.05676746368408, 5.9952244758606, 6.16778612136841,
4.69053316116333, NA, NA, 2.62325501441956, 4.74775457382202,
4.93133020401001, 5.02366256713867, 5.74016952514648, 6.28353786468506,
4.67424774169922, 4.56812858581543, NA, 1.88153350353241,
4.31531000137329, NA, NA, NA, NA, NA, NA), .Dim = c(63L,
4L)), offset = 0, gain = 1, inmemory = TRUE, fromdisk = FALSE,
nlayers = 4L, dropped = NULL, isfactor = c(FALSE, FALSE,
FALSE, FALSE), attributes = list(), haveminmax = TRUE,
min = c(12.7868213653564, 10.6471328735352, 38.6069412231445,
1.88153350353241), max = c(123.040382385254, 1148.87768554688,
46.4024772644043, 6.28353786468506), unit = "", names = c("ntl",
"pop", "tirs", "agbh")), legend = new(".RasterLegend",
type = character(0), values = logical(0), color = logical(0),
names = logical(0), colortable = logical(0)), title = character(0),
extent = new("Extent", xmin = 11878500, xmax = 11883000,
ymin = 1799000, ymax = 1802500), rotated = FALSE, rotation = new(".Rotation",
geotrans = numeric(0), transfun = function ()
NULL), ncols = 9L, nrows = 7L, crs = new("CRS", projargs = NA_character_),
srs = character(0), history = list(), z = list())

The issue you have is that the model is trained on all columns present in block.data, so also on x, y and ntl itself. In your later rasterstack, those variables are not present and thus you get an error.
We get can around this by creating the xgb.DMatrix differently:
#create matrix
xgb_m <- xgb.DMatrix(data = data.matrix(block.data[, c("pop", "tirs", "agbh")]), label = block.data$ntl)
#your model
m = xgb.train(data = xgb_m,
max.depth = 2,
nrounds = 1000,
min_child_weight = 1,
subsample = 0.75,
eta = 0.015,
gamma = 0.5,
colsample_bytree = 1,
objective = "reg:squarederror")
#load rasterstack
library(terra)
pop = rast("pop.tif")
tirs = rast("tirs.tif")
agbh = rast("agbh.tif")
vars <- c(pop, tirs, agbh)
#predict
xgb_p <- xgb.DMatrix(data.matrix(vars))
predict(m, xgb_p)

Related

Extract residuals from an XGBoost regression as a single raster

In R, using the caret and xgboost packages and this tutorial, I am running an XGBoost regression (XGBR) and I want to extract the residuals of the XGBR. I hyper-tuned the model using the caret package and then, using the 'best' model parameters I used the xgboost package to perform the regression.
My dataset has the ntl, pop, tirs, agbh variables stored in data.frame (ntl is the dependent variable while the other three are the independent). Assuming that my XGBR model is called m and my data.frame is called block.data, I did:
library(caret)
library(terra)
library(xgboost)
library(doParallel)
library(dplyr)
library(ggplot2)
library(glue)
library(ModelMetrics)
library(readr)
wd = "path/"
block.data = read.csv(paste0(wd, "block.data.csv"))
block.data = subset(block.data, select = c(ntl, tirs, pop, agbh))
set.seed(1123)
samp <- sample(nrow(block.data), 0.80 * nrow(block.data))
train <- block.data[samp, ]
train_x <- as.matrix(select(train, -ntl))
train_y <- train$ntl
test <- block.data[-samp, ]
test_x <- select(test, -ntl)
test_y <- test$ntl
no_cores <- detectCores() - 1
cl = makePSOCKcluster(no_cores)
registerDoParallel(cl)
# default model
grid_default <- expand.grid(
nrounds = 100,
max_depth = 6,
eta = 0.3,
gamma = 0,
colsample_bytree = 1,
min_child_weight = 1,
subsample = 1
)
train_control <- caret::trainControl(
method = "none",
verboseIter = FALSE, # no training log
allowParallel = TRUE # FALSE for reproducible results
)
xgb_base <- caret::train(
x = train_x,
y = train_y,
trControl = train_control,
tuneGrid = grid_default,
method = "xgbTree",
verbose = TRUE
)
# hyperparameter tuning
# setting up the maximum number of trees
nrounds <- 1000
# note to start nrounds from 200, as smaller learning rates result in errors so
# big with lower starting points that they'll mess the scales
tune_grid <- expand.grid(
nrounds = seq(from = 200, to = nrounds, by = 50),
eta = c(0.025, 0.05, 0.1, 0.3),
max_depth = c(2, 3, 4, 5, 6),
gamma = 0,
colsample_bytree = 1,
min_child_weight = 1,
subsample = 1
)
tune_control <- caret::trainControl(
method = "cv", # cross-validation
number = 3, # with n folds
#index = createFolds(tr_treated$Id_clean), # fix the folds
verboseIter = FALSE, # no training log
allowParallel = TRUE # FALSE for reproducible results
)
xgb_tune <- caret::train(
x = train_x,
y = train_y,
trControl = tune_control,
tuneGrid = tune_grid,
method = "xgbTree",
verbose = TRUE
)
tuneplot <- function(x, probs = .90) {
ggplot(x) +
coord_cartesian(ylim = c(quantile(x$results$RMSE, probs = probs), min(x$results$RMSE))) +
theme_bw()
}
tuneplot(xgb_tune)
xgb_tune$bestTune
# find maximum depth
tune_grid2 <- expand.grid(
nrounds = seq(from = 50, to = nrounds, by = 50),
eta = xgb_tune$bestTune$eta,
max_depth = ifelse(xgb_tune$bestTune$max_depth == 2,
c(xgb_tune$bestTune$max_depth:4),
xgb_tune$bestTune$max_depth - 1:xgb_tune$bestTune$max_depth + 1),
gamma = 0,
colsample_bytree = 1,
min_child_weight = c(1, 2, 3),
subsample = 1
)
xgb_tune2 <- caret::train(
x = train_x,
y = train_y,
trControl = tune_control,
tuneGrid = tune_grid2,
method = "xgbTree",
verbose = TRUE
)
tuneplot(xgb_tune2)
xgb_tune2$bestTune
# different values for row and column sampling
tune_grid3 <- expand.grid(
nrounds = seq(from = 50, to = nrounds, by = 50),
eta = xgb_tune$bestTune$eta,
max_depth = xgb_tune2$bestTune$max_depth,
gamma = 0,
colsample_bytree = c(0.4, 0.6, 0.8, 1.0),
min_child_weight = xgb_tune2$bestTune$min_child_weight,
subsample = c(0.5, 0.75, 1.0)
)
xgb_tune3 <- caret::train(
x = train_x,
y = train_y,
trControl = tune_control,
tuneGrid = tune_grid3,
method = "xgbTree",
verbose = TRUE
)
tuneplot(xgb_tune3, probs = .95)
xgb_tune3$bestTune
set.seed(57)
omp_set_num_threads(2) # caret parallel processing threads
# gamma
tune_grid4 <- expand.grid(
nrounds = seq(from = 50, to = nrounds, by = 50),
eta = xgb_tune$bestTune$eta,
max_depth = xgb_tune2$bestTune$max_depth,
gamma = c(0, 0.05, 0.1, 0.5, 0.7, 0.9, 1.0),
colsample_bytree = xgb_tune3$bestTune$colsample_bytree,
min_child_weight = xgb_tune2$bestTune$min_child_weight,
subsample = xgb_tune3$bestTune$subsample
)
xgb_tune4 <- caret::train(
x = train_x,
y = train_y,
trControl = tune_control,
tuneGrid = tune_grid4,
method = "xgbTree",
verbose = TRUE
)
tuneplot(xgb_tune4)
xgb_tune4$bestTune
# Reducing the Learning Rate
tune_grid5 <- expand.grid(
nrounds = seq(from = 100, to = 10000, by = 100),
eta = c(0.01, 0.015, 0.025, 0.05, 0.1),
max_depth = xgb_tune2$bestTune$max_depth,
gamma = xgb_tune4$bestTune$gamma,
colsample_bytree = xgb_tune3$bestTune$colsample_bytree,
min_child_weight = xgb_tune2$bestTune$min_child_weight,
subsample = xgb_tune3$bestTune$subsample
)
xgb_tune5 <- caret::train(
x = train_x,
y = train_y,
trControl = tune_control,
tuneGrid = tune_grid5,
method = "xgbTree",
verbose = TRUE
)
tuneplot(xgb_tune5)
xgb_tune5$bestTune
# Fitting the Model
(final_grid <- expand.grid(
nrounds = xgb_tune5$bestTune$nrounds,
eta = xgb_tune5$bestTune$eta,
max_depth = xgb_tune5$bestTune$max_depth,
gamma = xgb_tune5$bestTune$gamma,
colsample_bytree = xgb_tune5$bestTune$colsample_bytree,
min_child_weight = xgb_tune5$bestTune$min_child_weight,
subsample = xgb_tune5$bestTune$subsample
))
(xgb_model <- caret::train(
x = train_x,
y = train_y,
trControl = train_control,
tuneGrid = final_grid,
method = "xgbTree",
verbose = TRUE
))
stopCluster(cl)
# apply model to the whole data set using xgboost
xgb_m <- xgb.DMatrix(data = data.matrix(block.data), label = block.data$ntl)
m = xgb.train(data = xgb_m,
max.depth = xgb_tune5$bestTune$max_depth,
# watchlist = watchlist,
nrounds = xgb_tune5$bestTune$nrounds,
min_child_weight = xgb_tune5$bestTune$min_child_weight,
subsample = xgb_tune5$bestTune$subsample,
eta = xgb_tune5$bestTune$eta,
gamma = xgb_tune5$bestTune$gamma,
colsample_bytree = xgb_tune5$bestTune$colsample_bytree,
objective = "reg:squarederror")
m
# export xgb residuals
xgb_resids = predict(m, xgb_m, na.rm = TRUE)
sb = c(ntl, pop_res, tirs_res, agbh_res)
xgb_resids = sb$ntl - xgb_resids
plot(xgb_resids)
The plot looks like this:
Obviously, I am doing something very wrong. How can I export the residuals of an XGBR as a single raster?
Here is a very small sample of my dataset:
block.data = structure(list(x = c(11880750L, 11879250L, 11879750L, 11880250L,
11880750L, 11881250L, 11879250L, 11879750L, 11880250L, 11880750L,
11881250L, 11878750L, 11879250L, 11879750L, 11880250L, 11880750L,
11881250L, 11879250L, 11879750L, 11880250L, 11880750L, 11881250L,
11881750L, 11882250L, 11879250L, 11879750L, 11880250L, 11880750L,
11881250L, 11881750L, 11882250L, 11882750L, 11879250L, 11879750L
), y = c(1802250L, 1801750L, 1801750L, 1801750L, 1801750L, 1801750L,
1801250L, 1801250L, 1801250L, 1801250L, 1801250L, 1800750L, 1800750L,
1800750L, 1800750L, 1800750L, 1800750L, 1800250L, 1800250L, 1800250L,
1800250L, 1800250L, 1800250L, 1800250L, 1799750L, 1799750L, 1799750L,
1799750L, 1799750L, 1799750L, 1799750L, 1799750L, 1799250L, 1799250L
), ntl = c(18.7969169616699, 25.7222957611084, 23.4188251495361,
25.4322757720947, 16.4593601226807, 12.7868213653564, 30.9337253570557,
29.865758895874, 30.4080600738525, 29.5479888916016, 24.3493347167969,
35.2427635192871, 38.989933013916, 34.6536979675293, 29.4607238769531,
30.7469024658203, 34.3946380615234, 42.8660278320312, 34.7930717468262,
30.9516315460205, 32.20654296875, 39.999755859375, 46.6002235412598,
38.6480979919434, 60.5214920043945, 33.1799964904785, 31.8498134613037,
30.9209423065186, 32.2269744873047, 53.7062034606934, 45.5225944519043,
38.3570976257324, 123.040382385254, 73.0528182983398), pop = c(19.6407718658447,
610.009216308594, 654.812622070312, 426.475830078125, 66.3839492797852,
10.6471328735352, 443.848846435547, 602.677429199219, 488.478454589844,
387.470947265625, 58.2341117858887, 413.888488769531, 315.057678222656,
354.082946777344, 602.827758789062, 463.518829345703, 296.713928222656,
923.920593261719, 434.436645507812, 799.562927246094, 404.709564208984,
265.043304443359, 366.697235107422, 399.851684570312, 952.2314453125,
870.356994628906, 673.406616210938, 493.521606445312, 273.841888427734,
371.428619384766, 383.057830810547, 320.986755371094, 991.131225585938,
1148.87768554688), tirs = c(39.7242431640625, 44.9583969116211,
41.4048385620117, 42.6056709289551, 40.0976028442383, 38.7490005493164,
44.2747650146484, 43.5645370483398, 41.6180191040039, 40.3799781799316,
38.8664817810059, 44.9089202880859, 44.414306640625, 44.560977935791,
43.1288986206055, 40.9315185546875, 38.8918418884277, 46.3063850402832,
45.5805702209473, 44.9196586608887, 42.2495613098145, 39.3051452636719,
38.7914810180664, 38.6069412231445, 44.6782455444336, 46.4024772644043,
44.4720573425293, 41.7361183166504, 42.3378067016602, 41.0018348693848,
39.3579216003418, 41.6303863525391, 43.8207550048828, 46.0460357666016
), agbh = c(3.32185006141663, 4.98925733566284, 4.35699367523193,
4.94798421859741, 3.14325952529907, 2.93211793899536, 4.52736520767212,
4.99723243713379, 5.13944292068481, 3.92965626716614, 3.43465113639832,
3.55617475509644, 3.4659411907196, 5.24469566345215, 5.36995029449463,
4.61549234390259, 4.82002925872803, 4.20452928543091, 4.71502685546875,
5.20452785491943, 5.05676746368408, 5.9952244758606, 6.16778612136841,
4.69053316116333, 2.62325501441956, 4.74775457382202, 4.93133020401001,
5.02366256713867, 5.74016952514648, 6.28353786468506, 4.67424774169922,
4.56812858581543, 1.88153350353241, 4.31531000137329)), class = "data.frame", row.names = c(NA,
-34L))
My raster layer:
r = new("RasterBrick", file = new(".RasterFile", name = "", datanotation = "FLT4S",
byteorder = "little", nodatavalue = -Inf, NAchanged = FALSE,
nbands = 1L, bandorder = "BIL", offset = 0L, toptobottom = TRUE,
blockrows = 0L, blockcols = 0L, driver = "", open = FALSE),
data = new(".MultipleRasterData", values = structure(c(NA,
NA, NA, NA, 18.7969169616699, NA, NA, NA, NA, NA, 25.7222957611084,
23.4188251495361, 25.4322757720947, 16.4593601226807, 12.7868213653564,
NA, NA, NA, NA, 30.9337253570557, 29.865758895874, 30.4080600738525,
29.5479888916016, 24.3493347167969, NA, NA, NA, 35.2427635192871,
38.989933013916, 34.6536979675293, 29.4607238769531, 30.7469024658203,
34.3946380615234, NA, NA, NA, NA, 42.8660278320312, 34.7930717468262,
30.9516315460205, 32.20654296875, 39.999755859375, 46.6002235412598,
38.6480979919434, NA, NA, 60.5214920043945, 33.1799964904785,
31.8498134613037, 30.9209423065186, 32.2269744873047, 53.7062034606934,
45.5225944519043, 38.3570976257324, NA, 123.040382385254,
73.0528182983398, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
19.6407718658447, NA, NA, NA, NA, NA, 610.009216308594, 654.812622070312,
426.475830078125, 66.3839492797852, 10.6471328735352, NA,
NA, NA, NA, 443.848846435547, 602.677429199219, 488.478454589844,
387.470947265625, 58.2341117858887, NA, NA, NA, 413.888488769531,
315.057678222656, 354.082946777344, 602.827758789062, 463.518829345703,
296.713928222656, NA, NA, NA, NA, 923.920593261719, 434.436645507812,
799.562927246094, 404.709564208984, 265.043304443359, 366.697235107422,
399.851684570312, NA, NA, 952.2314453125, 870.356994628906,
673.406616210938, 493.521606445312, 273.841888427734, 371.428619384766,
383.057830810547, 320.986755371094, NA, 991.131225585938,
1148.87768554688, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
39.7242431640625, NA, NA, NA, NA, NA, 44.9583969116211, 41.4048385620117,
42.6056709289551, 40.0976028442383, 38.7490005493164, NA,
NA, NA, NA, 44.2747650146484, 43.5645370483398, 41.6180191040039,
40.3799781799316, 38.8664817810059, NA, NA, NA, 44.9089202880859,
44.414306640625, 44.560977935791, 43.1288986206055, 40.9315185546875,
38.8918418884277, NA, NA, NA, NA, 46.3063850402832, 45.5805702209473,
44.9196586608887, 42.2495613098145, 39.3051452636719, 38.7914810180664,
38.6069412231445, NA, NA, 44.6782455444336, 46.4024772644043,
44.4720573425293, 41.7361183166504, 42.3378067016602, 41.0018348693848,
39.3579216003418, 41.6303863525391, NA, 43.8207550048828,
46.0460357666016, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
3.32185006141663, NA, NA, NA, NA, NA, 4.98925733566284, 4.35699367523193,
4.94798421859741, 3.14325952529907, 2.93211793899536, NA,
NA, NA, NA, 4.52736520767212, 4.99723243713379, 5.13944292068481,
3.92965626716614, 3.43465113639832, NA, NA, NA, 3.55617475509644,
3.4659411907196, 5.24469566345215, 5.36995029449463, 4.61549234390259,
4.82002925872803, NA, NA, NA, NA, 4.20452928543091, 4.71502685546875,
5.20452785491943, 5.05676746368408, 5.9952244758606, 6.16778612136841,
4.69053316116333, NA, NA, 2.62325501441956, 4.74775457382202,
4.93133020401001, 5.02366256713867, 5.74016952514648, 6.28353786468506,
4.67424774169922, 4.56812858581543, NA, 1.88153350353241,
4.31531000137329, NA, NA, NA, NA, NA, NA), .Dim = c(63L,
4L)), offset = 0, gain = 1, inmemory = TRUE, fromdisk = FALSE,
nlayers = 4L, dropped = NULL, isfactor = c(FALSE, FALSE,
FALSE, FALSE), attributes = list(), haveminmax = TRUE,
min = c(12.7868213653564, 10.6471328735352, 38.6069412231445,
1.88153350353241), max = c(123.040382385254, 1148.87768554688,
46.4024772644043, 6.28353786468506), unit = "", names = c("ntl",
"pop", "tirs", "agbh")), legend = new(".RasterLegend",
type = character(0), values = logical(0), color = logical(0),
names = logical(0), colortable = logical(0)), title = character(0),
extent = new("Extent", xmin = 11878500, xmax = 11883000,
ymin = 1799000, ymax = 1802500), rotated = FALSE, rotation = new(".Rotation",
geotrans = numeric(0), transfun = function ()
NULL), ncols = 9L, nrows = 7L, crs = new("CRS", projargs = NA_character_),
srs = character(0), history = list(), z = list())
I found this solution:
d <- tibble(pred = predict(f, newdata = xs)
, obs = mtcars$mpg) %>%
mutate(resid = pred - obs,
resid_sq = resid^2)
and then I cbind the d with the coordinates from my raster data and I create the residuals raster.

Adding legend to ggplot with geom_line with factor color and manually added line

I can see that there are a lot of questions similar to this, but I cant find solution for my particular problem.
Data:
risk_accum <- structure(list(date = structure(c(1465948800, 1465952400, 1465956000,
1465959600, 1465963200, 1465966800, 1465970400, 1465974000, 1465977600,
1465981200, 1465984800, 1465988400, 1465992000, 1465995600, 1465999200,
1466002800, 1466006400, 1466010000, 1466013600, 1466017200, 1466020800,
1466024400, 1466028000, 1466031600, 1466035200, 1466038800, 1466042400,
1466046000, 1466049600, 1466053200, 1466056800, 1466060400, 1466064000,
1466067600, 1466071200, 1466074800, 1466078400, 1466082000, 1466085600,
1466089200, 1466092800, 1466096400, 1466100000, 1466103600, 1466107200,
1466110800, 1466114400, 1466118000, 1466121600, 1466125200, 1466128800,
1466132400, 1466136000, 1466139600, 1466143200, 1466146800, 1466150400,
1466154000, 1466157600, 1466161200, 1466164800, 1466168400, 1466172000,
1466175600, 1466179200, 1466182800, 1466186400, 1466190000, 1466193600,
1466197200, 1466200800, 1466204400, 1466208000, 1466211600, 1466215200,
1466218800, 1466222400, 1466226000, 1466229600, 1466233200, 1466236800,
1466240400, 1466244000, 1466247600, 1466251200, 1466254800, 1466258400,
1466262000, 1466265600, 1466269200, 1466272800, 1466276400, 1466280000,
1466283600, 1466287200, 1466290800, 1466294400, 1466298000, 1466301600,
1466305200, 1466308800, 1466312400, 1466316000, 1466319600, 1466323200,
1466326800, 1466330400, 1466334000, 1466337600, 1466341200, 1466344800,
1466348400, 1466352000, 1466355600, 1466359200, 1466362800, 1466366400,
1466370000, 1466373600, 1466377200), class = c("POSIXct", "POSIXt"
), tzone = "UTC"), risk = c(NA, NA, NA, 1, 2, 3, 4, 5, 6, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, 2, 3, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, 2, 3,
4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, NA, NA)), .Names = c("date",
"risk"), row.names = c(NA, -120L), class = c("tbl_df", "tbl",
"data.frame"))
And code to generate graph:
#color variable
color_var <- vector(mode = "double",length = length(risk_accum$risk))
color_var[color_var== '0']<-NA
color_var[risk_accum$risk<6] <- "green4"
color_var[risk_accum$risk>=6 & risk_accum$risk<12] <- "yellow2"
color_var[risk_accum$risk>=12] <- "red"
#plot of Effective Blight Hours accumulation
ggplot(risk_accum)+
geom_line(aes(x = date, y = risk), color = color_var)+
scale_y_continuous(name = "EBH accumulation")+
scale_colour_manual(values=c("green", "yellow", "red"))+
geom_line(aes(date, y= 12), linetype= "dotted", size = 0.1)+
theme(axis.title.x = element_blank())
I need to get a legend which would explain the traffic light system (red is danger, etc) and manually added threshold risk line.
Add your color variable to the dataset, map to that variable inside aes, and use scale_*_identity to directly use the colors.
risk_accum$color_var <- NA
risk_accum$color_var[risk_accum$risk<6] <- "green4"
risk_accum$color_var[risk_accum$risk>=6 & risk_accum$risk<12] <- "yellow2"
risk_accum$color_var[risk_accum$risk>=12] <- "red"
ggplot(risk_accum)+
geom_line(aes(x = date, y = risk, color = color_var)) +
scale_y_continuous(name = "EBH accumulation")+
scale_color_identity(guide = 'legend') +
geom_line(aes(date, y= 12), linetype= "dotted", size = 0.1)+
theme(axis.title.x = element_blank())
You can also add your threshold to the legend:
ggplot(risk_accum)+
geom_line(aes(x = date, y = risk, color = color_var)) +
geom_line(aes(date, y= 12, linetype = "threshold"), size = 0.1)+
scale_y_continuous(name = "EBH accumulation")+
scale_color_identity(guide = 'legend') +
scale_linetype_manual(values = 2) +
theme(axis.title.x = element_blank())

How can I get highcharter to represent a forecast object?

This is a follow-on to this question.
I am trying to get the pipeline given in that question to accept a forecast object as input:
Again, using this data:
> dput(t)
structure(c(2, 2, 267822980, 325286564, 66697091, 239352431,
94380295, 1, 126621669, 158555699, 32951026, 23, 108000151, 132505189,
29587564, 120381505, 25106680, 117506099, 22868767, 115940080,
22878163, 119286731, 22881061), .Dim = c(23L, 1L), index = structure(c(1490990400,
1490994000, 1490997600, 1491001200, 1491004800, 1491008400, 1491012000,
1491026400, 1491033600, 1491037200, 1491040800, 1491058800, 1491062400,
1491066000, 1491069600, 1491073200, 1491076800, 1491109200, 1491112800,
1491120000, 1491123600, 1491156000, 1491159600), tzone = "US/Mountain", tclass = c("POSIXct",
"POSIXt")), class = c("xts", "zoo"), .indexCLASS = c("POSIXct",
"POSIXt"), tclass = c("POSIXct", "POSIXt"), .indexTZ = "US/Mountain", tzone = "US/Mountain", .CLASS = "double", .Dimnames = list(
NULL, "count"))
I use
highchart(type = 'stock') %>%
hc_add_series(t) %>%
hc_xAxis(type = 'datetime')
To create
But if I follow this same recipe using
require("forecast")
t.arima <- auto.arima(t)
x <- forecast(t.arima, level = c(95, 80))
highchart(type = 'stock') %>%
hc_add_series(x) %>%
hc_xAxis(type = 'datetime')
I get this error:
Error in as.Date.ts(.) : unable to convert ts time to Date class
How can I show the forecast series along with the historical? I've seen this in the documentation, but don't understand why I'd be getting this error.
JS CONSOLE OUTPUT FOR JK:
DF DATA AFTER RE-INDEXING:
dput(df)
structure(list(Index = structure(c(1490968800, 1490972400, 1490976000,
1490979600, 1490983200, 1490986800, 1490990400, 1491004800, 1491012000,
1491015600, 1491019200, 1491037200, 1491040800, 1491044400, 1491048000,
1491051600, 1491055200, 1491087600, 1491091200, 1491098400, 1491102000,
1491134400, 1491138000, 1491217200, 1491220800, 1491224400, 1491228000,
1491231600, 1491235200, 1491238800, 1491242400, 1491246000, 1491249600,
1491253200, 1491256800, 1491260400, 1491264000, 1491267600), class = c("POSIXct",
"POSIXt")), Data = c(2, 2, 259465771, 315866206, 64582553, 233440220,
91918347, 1, 126563786, 158555699, 32951026, 23, 108000151, 132505189,
29587564, 120381505, 25106680, 117506099, 22868767, 115898351,
22878163, 119285747, 22881061, 157925588, 32447780, 223096830,
281656273, 45406684, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
Fitted = c(102170573.857143, 102170573.857143, 102170573.857143,
102170573.857143, 102170573.857143, 102170573.857143, 102170573.857143,
102170573.857143, 102170573.857143, 102170573.857143, 102170573.857143,
102170573.857143, 102170573.857143, 102170573.857143, 102170573.857143,
102170573.857143, 102170573.857143, 102170573.857143, 102170573.857143,
102170573.857143, 102170573.857143, 102170573.857143, 102170573.857143,
102170573.857143, 102170573.857143, 102170573.857143, 102170573.857143,
102170573.857143, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
`Point Forecast` = c(NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, 102170573.857143, 102170573.857143, 102170573.857143,
102170573.857143, 102170573.857143, 102170573.857143, 102170573.857143,
102170573.857143, 102170573.857143, 102170573.857143), `Lo 80` = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -16003477.5789723,
-16003477.5789723, -16003477.5789723, -16003477.5789723,
-16003477.5789723, -16003477.5789723, -16003477.5789723,
-16003477.5789723, -16003477.5789723, -16003477.5789723),
`Hi 80` = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, 220344625.293258, 220344625.293258, 220344625.293258,
220344625.293258, 220344625.293258, 220344625.293258, 220344625.293258,
220344625.293258, 220344625.293258, 220344625.293258), `Lo 95` = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -78561041.5917782,
-78561041.5917782, -78561041.5917782, -78561041.5917782,
-78561041.5917782, -78561041.5917782, -78561041.5917782,
-78561041.5917782, -78561041.5917782, -78561041.5917782),
`Hi 95` = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, 282902189.306064, 282902189.306064, 282902189.306064,
282902189.306064, 282902189.306064, 282902189.306064, 282902189.306064,
282902189.306064, 282902189.306064, 282902189.306064)), .Names = c("Index",
"Data", "Fitted", "Point Forecast", "Lo 80", "Hi 80", "Lo 95",
"Hi 95"), row.names = c(NA, -38L), class = "data.frame")
Not sure this is due to the irregular time series.
Anyway, ggfortify:::fortify.forecast is your friend. Why? Because fortify (try to) transform all the R object in data frames. So:
library(highcharter)
library(forecast)
t.arima <- auto.arima(t)
x <- forecast(t, level = c(95, 80))
library(highcharter)
library(ggplot2)
library(ggfortify)
#>
#> Attaching package: 'ggfortify'
#> The following object is masked from 'package:forecast':
#>
#> gglagplot
class(x)
#> [1] "forecast"
df <- fortify(x)
head(df)
#> Index Data Fitted Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
#> 1 1 2 140658844 NA NA NA NA NA
#> 2 3601 2 121734145 NA NA NA NA NA
#> 3 7201 267822980 105355638 NA NA NA NA NA
#> 4 10801 325286564 127214522 NA NA NA NA NA
#> 5 14401 66697091 153863779 NA NA NA NA NA
#> 6 18001 239352431 142136089 NA NA NA NA NA
Now you can:
highchart(type = "stock") %>%
hc_add_series(df, "line", hcaes(Index, Data), name = "Original") %>%
hc_add_series(df, "line", hcaes(Index, Fitted), name = "Fitted") %>%
hc_add_series(df, "line", hcaes(Index, `Point Forecast`), name = "Forecast") %>%
hc_add_series(df, "arearange", hcaes(Index, low = `Lo 80`, high = `Hi 80`), name = "Interval")
As you can see, fortify can't detect the real time too. So you need to transform the Index in the time what you want.
The error
Error in as.Date.ts(.) : unable to convert ts time to Date class
is due to the fact that you have a ts object with a frequency that is not covered by the function as.Date.ts(.). When we see what this function does, this is what we get:
function (x, offset = 0, ...)
{
time.x <- unclass(time(x)) + offset
if (frequency(x) == 1)
as.Date(paste(time.x, 1, 1, sep = "-"))
else if (frequency(x) == 4)
as.Date(paste((time.x + 0.001)%/%1, 3 * (cycle(x) - 1) +
1, 1, sep = "-"))
else if (frequency(x) == 12)
as.Date(paste((time.x + 0.001)%/%1, cycle(x), 1, sep = "-"))
else stop("unable to convert ts time to Date class")
}
This function considers only 3 values for the frequency of a ts object: 1, 4, or 12. When we take a look at the frequency of your object x, we see that its frequency = 0.000277777777777778, so when highcharter calls the function using the ts objects in x it stops and gives you that error.
We have two options on how to "fix" it:
Transform t into a ts object (instead of a xts object) with frequency = 1 before running auto.arima and forecast;
After running auto.arima and forecast, we can create an index for the future dates and transform the ts objects in x into xts objects with the correct index.
I said "fix" because these solutions are not perfect, as we will see.
Option 1
t <- structure(
c(2, 2, 267822980, 325286564, 66697091, 239352431,
94380295, 1, 126621669, 158555699, 32951026, 23,
108000151, 132505189, 29587564, 120381505, 25106680,
117506099, 22868767, 115940080, 22878163, 119286731,
22881061),
.Dim = c(23L, 1L),
index = structure(c(1490990400, 1490994000, 1490997600,
1491001200, 1491004800, 1491008400,
1491012000, 1491026400, 1491033600,
1491037200, 1491040800, 1491058800,
1491062400, 1491066000, 1491069600,
1491073200, 1491076800, 1491109200,
1491112800, 1491120000, 1491123600,
1491156000, 1491159600),
tzone = "US/Mountain",
tclass = c("POSIXct","POSIXt")),
class = c("xts", "zoo"),
.indexCLASS = c("POSIXct","POSIXt"),
tclass = c("POSIXct", "POSIXt"),
.indexTZ = "US/Mountain",
tzone = "US/Mountain",
.CLASS = "double",
.Dimnames = list(NULL, "count"))
require("forecast")
library(highcharter)
# SOLUTION 1
t.tmp <- ts(t, start=1, end = length(t))
t.arima.1 <- auto.arima(t.tmp)
x.1 <- forecast(t.arima.1, level = c(95, 80))
highchart(type = 'stock') %>%
hc_add_series(x.1) %>%
hc_add_series(x.1$x, name = "Original") %>%
hc_add_series(x.1$fitted, name = "Fitted")
The problem with this approach is that we lose the dates (axis, tooltip, etc.).
Option 2, 1st try: Hourly Forecasts
I tried to create an hourly index for the future values, but for some reason Highcharter moves the intervals to the left (or there's some problem with the dates that I can't see/figure out).
Option 2, 2nd try: Daily Forecasts
When I changed it to a daily index for the future values it worked, but it's weird since we have hourly observations and the forecast part of our plot shows "daily forecasts".
Here is the full code:
t <- structure(
c(2, 2, 267822980, 325286564, 66697091, 239352431,
94380295, 1, 126621669, 158555699, 32951026, 23,
108000151, 132505189, 29587564, 120381505, 25106680,
117506099, 22868767, 115940080, 22878163, 119286731,
22881061),
.Dim = c(23L, 1L),
index = structure(c(1490990400, 1490994000, 1490997600,
1491001200, 1491004800, 1491008400,
1491012000, 1491026400, 1491033600,
1491037200, 1491040800, 1491058800,
1491062400, 1491066000, 1491069600,
1491073200, 1491076800, 1491109200,
1491112800, 1491120000, 1491123600,
1491156000, 1491159600),
tzone = "US/Mountain",
tclass = c("POSIXct","POSIXt")),
class = c("xts", "zoo"),
.indexCLASS = c("POSIXct","POSIXt"),
tclass = c("POSIXct", "POSIXt"),
.indexTZ = "US/Mountain",
tzone = "US/Mountain",
.CLASS = "double",
.Dimnames = list(NULL, "count"))
require("forecast")
library(highcharter)
library(xts)
t.arima <- auto.arima(t)
x <- forecast(t.arima, level = c(95, 80))
# Problem
## Time from 'forecast'
time.x <- time(x$mean) # ts variable
time.x # see that frequency = 0.000277777777777778
## Original time
time.t <- time(t) # POSIXct variable, use as.ts to see frequency
as.ts(time.t) # frequency = 1
## Try to transform back to formatted date
as.POSIXct(as.double(time.t), tz = "US/Mountain", origin = "1970-01-01")
as.POSIXct(as.double(time.x), tz = "US/Mountain", origin = "1970-01-01")
#--------------------------------------------------------#
# SOLUTION 1
t.tmp <- ts(t, start=1, end = length(t))
t.arima.1 <- auto.arima(t.tmp)
x.1 <- forecast(t.arima.1, level = c(95, 80))
highchart(type = 'stock') %>%
hc_add_series(x.1) %>%
hc_add_series(x.1$x, name = "Original") %>%
hc_add_series(x.1$fitted, name = "Fitted")
#------------------------------------------------------#
# SOLUTION 2 - With correct dates but wrong plot
## Create new forecast variable
x.2 <- forecast(t.arima.1, level = c(95, 80))
## Take forecast length
forecast.length <- length(time.x)
### Create New Forecast dates (HOUR)
### Since I don't know the exact forecast times, I'll add one HOUR
### for each obs starting from the last date in the original dataset
last.date <- time.t[length(time.t)]
new.forecast.time.hour <- as.POSIXct(last.date) + c((1:forecast.length)*3600)
## Insert date back
x.2$mean <- xts(x.1$mean, order.by = new.forecast.time.hour)
x.2$lower <- xts(x.1$lower, order.by = new.forecast.time.hour)
x.2$upper <- xts(x.1$upper, order.by = new.forecast.time.hour)
### Original Data
x.2$x <- xts(x.1$x, order.by = time.t)
### Fitted
x.2$fitted <- xts(x.1$fitted, order.by = time.t)
# Plot forecasts with correct date
highchart(type = 'stock') %>%
hc_add_series(x.2) %>%
hc_add_series(x.2$x, name = "Original") %>%
hc_add_series(x.2$fitted, name = "Fitted") %>%
hc_xAxis(type = 'datetime')
#------------------------------------------------------#
# SOLUTION 3 - Correct plot but only for daily forecasts
## Create new forecast variable
x.3 <- forecast(t.arima.1, level = c(95, 80))
## Take forecast length
forecast.length <- length(time.x)
### Create New Forecast dates (DAY)
### Since I don't know the exact forecast times, I'll add one DAY
### for each obs starting from the last date in the original dataset
last.date <- time.t[length(time.t)]
new.forecast.time.day <- as.POSIXct(last.date) + c((1:forecast.length)*3600*24)
## Add change from as.POSIXct to as.Date
new.forecast.time.day <- as.Date(new.forecast.time.day)
## Insert date back
x.3$mean <- xts(x.1$mean, order.by = new.forecast.time.day)
x.3$lower <- xts(x.1$lower, order.by = new.forecast.time.day)
x.3$upper <- xts(x.1$upper, order.by = new.forecast.time.day)
### Original Data
x.3$x <- xts(x.1$x, order.by = time.t)
### Fitted
x.3$fitted <- xts(x.1$fitted, order.by = time.t)
# Plot forecasts with correct date
highchart(type = 'stock') %>%
hc_add_series(x.3) %>%
hc_add_series(x.3$x, name = "Original") %>%
hc_add_series(x.3$fitted, name = "Fitted") %>%
hc_xAxis(type = 'datetime')
One other thing: the fitted values on my plots differ from the fitted values on jbkunst's plot because he used forecast directly on t, not on t.arima (just a typo, I believe). This way, my forecasts are based on an Arima model, while his are based on an ETS model.

How to dput() a raster

If I use dput() to output the structure of a raster object created using the raster package, then assigning that structure back into a new object throws an error
Error in datanotation %in% c("LOG1S", "INT1S", "INT2S", "INT4S", "INT1U", :
error in evaluating the argument 'x' in selecting a method for function '%in%': Error: object 'datanotation' not found
Example output from dput to test this on:
rast <- new("RasterLayer", file = new(".RasterFile"
, name = ""
, datanotation = "FLT4S"
, byteorder = "little"
, nodatavalue = -Inf
, NAchanged = FALSE
, nbands = 1L
, bandorder = "BIL"
, offset = 0L
, toptobottom = TRUE
, blockrows = 0L
, blockcols = 0L
, driver = ""
, open = FALSE
)
, data = new(".SingleLayerData"
, values = c(NA, NA, NA, NA, 27.7696047300953, 25.8297302967319, 21.8282877533719,
18.2355885882618, 27.0557882676846, 27.2210269605054, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 27.7812364734848, 27.405183119753,
24.2674419226904, 21.1096354803572, 19.7839120235376, 28.0337762198564,
30.3552042477317, 27.9129238649901, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 27.8602581108286, 25.5695030720577, 19.308317452836,
20.2224030952562, 19.8943689815922, 26.0737945219631, 29.8730429910469,
30.0356550838097, NA, NA, NA, NA, NA, NA, NA, NA, NA, 27.9364248138976,
26.9457930700303, 23.0304323166943, 19.4650798632613, 19.0999036995668,
17.5193560841074, 27.7251998095169, 28.4496104452209, 28.9315408261731,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 26.8544908125766, 25.0566493895284,
19.392461671792, 17.9138961574326, 18.457466509715, 16.2828861956587,
24.3601694045773, 28.1808209395655, 28.9282707782622, 26.6332021683416,
NA, NA, NA, NA, NA, NA, NA, 25.7558302469057, 25.8550702427802,
22.5693001232205, 19.6993922601795, 16.1425172340908, 18.5221217322922,
15.6749250516081, 23.7808882591915, 26.8347423074187, 27.2630654814702,
25.9184967686647, NA, NA, NA, NA, NA, NA, NA, 24.8123372469289,
21.9120014347897, 21.8593245154305, 18.7720082061109, 19.7574885247249,
18.4980326509342, 16.3585539605331, 24.9138993320561, 25.2434828477134,
24.163634092843, 21.0163621891882, 20.415437668758, NA, NA, NA,
NA, NA, NA, 24.1877819407117, 20.6452893546199, 20.1902008603325,
19.3002926063194, 16.8587312480956, 16.2594198755341, 19.2032612963314,
23.627249155838, 20.2610810034085, 20.512646252079, 21.2108132984962,
21.3929956864179, 22.5462104762584, NA, NA, NA, NA, 28.1377507911064,
26.6783600800768, 21.9226216069185, 18.7325546681671, 19.3040954243679,
18.9295032049331, 16.9754437056141, 18.4150075374079, 22.1472527043877,
23.0212426364059, 24.3613220176048, 23.8262550760194, 23.1817611577951,
23.5871416966677, 24.5249361302642, 24.8507563698565, 24.5380700828535,
24.6222669309606, 28.1310406991608, 26.6318516890262, 22.2093701933002,
18.5946021290531, 18.8365649393596, 19.1392679329481, 18.0261774155026,
15.4867865984622, 22.6594382919435, 24.0000969920539, 26.8590549383737,
25.3828920205212, 24.7396876533108, 24.2529425383968, 25.4417776029091,
25.4515553773556, 25.362837214521, 24.9104771169439, NA, 26.0466484214637,
22.5547365784066, 21.385068811716, 21.7011412999039, 21.5908931968994,
17.5042944634609, 16.1420136345859, 22.2389789351034, 24.9668657681713,
27.1669375893459, 26.7413589409561, 26.421134458086, 26.219636989708,
26.3182362528439, 26.4198369697735, 26.3436222765849, NA, NA,
NA, 23.0141531354431, 25.0932401677589, 21.6662460243741, 20.5190520941524,
18.375683158989, 21.0476911567136, 24.5643254483451, 26.328155553503,
26.272891752264, 27.9174179692592, 27.8104921435185, 27.3675508861065,
27.0387799062499, 26.7967626268208, NA, NA, NA, NA, 23.9459211033352,
27.0411266756461, 24.5366020483741, 21.827679045105, 20.9547907819176,
22.0691273897516, 23.4745553057174, 26.3462307150211, 28.3701287602482,
27.3758861302374, 27.3750656061461, 27.962003557149, 27.5345722331493,
NA, NA, NA, NA, NA, NA, 28.2432491874035, 25.6912040459346, 23.8651528078732,
21.6046243626329, 22.9131480337219, 23.819129269607, 26.3568262380137,
28.9188481472128, 28.1497370861287, 27.7360100735352, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 25.045874725646, 23.2493769507419,
23.1730515314323, 24.2038209656421, 28.3416254663092, 28.8959112067936,
NA, NA, NA, NA, NA, NA, NA)
, offset = 0
, gain = 1
, inmemory = TRUE
, fromdisk = FALSE
, isfactor = FALSE
, attributes = list()
, haveminmax = TRUE
, min = 15.4867865984622
, max = 30.3552042477317
, band = 1L
, unit = ""
, names = "MAT_eclp"
)
, legend = new(".RasterLegend"
, type = character(0)
, values = logical(0)
, color = logical(0)
, names = logical(0)
, colortable = logical(0)
)
, title = character(0)
, extent = new("Extent"
, xmin = 832565.530013465
, xmax = 2452565.53001346
, ymin = 383803.949813352
, ymax = 1733803.94981335
)
, rotated = FALSE
, rotation = new(".Rotation"
, geotrans = numeric(0)
, transfun = function ()
NULL
)
, ncols = 18L
, nrows = 15L
, crs = new("CRS"
, projargs = "+proj=aea +lat_1=20 +lat_2=-23 +lat_0=0 +lon_0=25 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0"
)
, history = list()
, z = list()
)
Or, another minimal example to dput(), then try assigning output to another object:
library(raster)
r1 <- raster(nrow=10, ncol=10)
values(r1) <- runif(ncell(r1))
dput(r1)
How can I load a raster from the text version and avoid this error?
This is a bug, due to an error in the validity check of the .RasterFile object (part of the RasterLayer). Illustrated here:
x <- new(".RasterFile")
validObject(x)
#Error in datanotation %in% c("LOG1S", "INT1S", "INT2S", "INT4S", "INT1U", :
# object 'datanotation' not found
I fixed this in version 2.5-11 (available from R-Forge in an hour or so: install.packages("raster", repos="http://R-Forge.R-project.org") ).
P.S. why would you want to use dput/dget?

How to change xticks locations and customize legend using levelplot (lattice library)

I am trying to move the position of x-ticks and x-labels from the bottom of the figure to its top.
In addition, my data has a bunch of NAs. Currently, levelplot just remove them and leave them as white space in the plot. I wondering if it is possible to add this NAs to the legend as well.
Any suggestions? Thanks!
Here is my code and its output:
require(lattice)
# see data from dput() below
rownames(data)=data[,1]
data_matrix=as.matrix(data[,2:11])
color = colorRampPalette(rev(c("#D73027", "#FC8D59", "#FEE090", "#FFFFBF", "#E0F3F8", "#91BFDB", "#4575B4")))(100)
levelplot(data_matrix, scale=list(x=list(rot=45)), ylab="Days", xlab="Strains", col.regions = color)
Data
data <-
structure(list(X = structure(1:17, .Label = c("Arcobacter", "Bacillus",
"Bordetella", "Campylobacter", "Chlamydia", "Clostridium ", "Corynebacterium",
"Enterococcus", "Escherichia", "Francisella", "Legionella", "Mycobacterium",
"Pseudomonas", "Rickettsia", "Staphylococcus", "Streptococcus",
"Treponema"), class = "factor"), day.0 = c(NA, -3.823301154,
NA, NA, NA, -3.518606107, NA, NA, NA, NA, NA, -4.859479387, NA,
NA, NA, -2.588402346, -2.668136603), day.2 = c(-4.006281239,
-3.024823788, NA, -5.202804501, NA, -3.237622321, NA, NA, -5.296138823,
-5.105469059, NA, NA, -4.901775198, NA, NA, -2.979144202, -3.050083791
), day.4 = c(-2.880770182, -3.210165554, -4.749097175, -5.209064234,
NA, -2.946480184, NA, -5.264113795, -5.341881713, -4.435780293,
NA, -4.810650076, -4.152531609, NA, NA, -3.106172794, -3.543161966
), day.6 = c(-2.869833226, -3.293283924, -3.831346387, NA, NA,
-3.323947791, NA, NA, NA, NA, NA, -4.397581863, -4.068855504,
NA, NA, -3.27028378, -3.662618619), day.8 = c(-3.873589331, -3.446192193,
-3.616207965, NA, NA, -3.13869325, NA, -5.010807453, NA, NA,
NA, -4.091502649, -4.412399025, -4.681675749, NA, -3.404738625,
-3.955464159), day.15 = c(-5.176583159, -2.512963066, -3.392832457,
NA, NA, -3.194662968, NA, -3.60440455, NA, NA, -4.875554468,
-2.507376205, -4.727255906, -5.27116754, -3.200499549, -3.361296145,
-4.320554841), day.22 = c(-4.550052847, -3.654013004, -3.486879661,
NA, NA, -3.614890858, NA, NA, NA, NA, -4.706690492, -2.200533317,
-4.836957953, NA, -4.390423731, NA, NA), day.29 = c(-4.730006329,
-3.46707372, -3.594457287, NA, NA, -3.800757834, NA, NA, NA,
NA, -4.285154089, -2.121152491, -4.816807055, -5.064577888, -2.945243736,
-4.479177287, -5.226435146), day.43 = c(-4.398680025, -3.144603215,
-3.642065153, NA, NA, -3.8268662, NA, NA, NA, NA, -4.762539208,
-2.156862316, -4.118608495, NA, -4.030291084, -4.678213147, NA
), day.57 = c(-4.689982547, -2.713502214, -3.51279797, NA, -5.069579266,
-3.495580794, NA, NA, NA, NA, -4.515973639, -1.90591075, -4.134826117,
-4.479351427, -3.482134037, -4.538534489, NA)), .Names = c("X",
"day.0", "day.2", "day.4", "day.6", "day.8", "day.15", "day.22",
"day.29", "day.43", "day.57"), class = "data.frame", row.names = c("Arcobacter",
"Bacillus", "Bordetella", "Campylobacter", "Chlamydia", "Clostridium ",
"Corynebacterium", "Enterococcus", "Escherichia", "Francisella",
"Legionella", "Mycobacterium", "Pseudomonas", "Rickettsia", "Staphylococcus",
"Streptococcus", "Treponema"))
Figure
The request to move the labels to the top is pretty easy (after looking at the ?xyplot under the scales section):
levelplot(data_matrix, scale=list(x=list(rot=45,alternating=2)),
ylab="Days", xlab="Strains", col.regions = color)
Trying to get the NA values into the color legend may take a bit more thinking, but it seems as though sensible values for the colorkey arguments for at and col might suffice.
levelplot(data_matrix, scale=list(x=list(rot=45,alternating=2)),
ylab="Days", xlab="Strains", col.regions = color,
colorkey=list(at=as.numeric( factor( c( seq(-5.5, -2, by=0.5),
"NA"))),
labels=as.character( c( seq(-5.5, -2, by=0.5),
"NA")),
col=c(color, "#FFFFFF") ) )

Resources