Extract residuals from an XGBoost regression as a single raster - r

In R, using the caret and xgboost packages and this tutorial, I am running an XGBoost regression (XGBR) and I want to extract the residuals of the XGBR. I hyper-tuned the model using the caret package and then, using the 'best' model parameters I used the xgboost package to perform the regression.
My dataset has the ntl, pop, tirs, agbh variables stored in data.frame (ntl is the dependent variable while the other three are the independent). Assuming that my XGBR model is called m and my data.frame is called block.data, I did:
library(caret)
library(terra)
library(xgboost)
library(doParallel)
library(dplyr)
library(ggplot2)
library(glue)
library(ModelMetrics)
library(readr)
wd = "path/"
block.data = read.csv(paste0(wd, "block.data.csv"))
block.data = subset(block.data, select = c(ntl, tirs, pop, agbh))
set.seed(1123)
samp <- sample(nrow(block.data), 0.80 * nrow(block.data))
train <- block.data[samp, ]
train_x <- as.matrix(select(train, -ntl))
train_y <- train$ntl
test <- block.data[-samp, ]
test_x <- select(test, -ntl)
test_y <- test$ntl
no_cores <- detectCores() - 1
cl = makePSOCKcluster(no_cores)
registerDoParallel(cl)
# default model
grid_default <- expand.grid(
nrounds = 100,
max_depth = 6,
eta = 0.3,
gamma = 0,
colsample_bytree = 1,
min_child_weight = 1,
subsample = 1
)
train_control <- caret::trainControl(
method = "none",
verboseIter = FALSE, # no training log
allowParallel = TRUE # FALSE for reproducible results
)
xgb_base <- caret::train(
x = train_x,
y = train_y,
trControl = train_control,
tuneGrid = grid_default,
method = "xgbTree",
verbose = TRUE
)
# hyperparameter tuning
# setting up the maximum number of trees
nrounds <- 1000
# note to start nrounds from 200, as smaller learning rates result in errors so
# big with lower starting points that they'll mess the scales
tune_grid <- expand.grid(
nrounds = seq(from = 200, to = nrounds, by = 50),
eta = c(0.025, 0.05, 0.1, 0.3),
max_depth = c(2, 3, 4, 5, 6),
gamma = 0,
colsample_bytree = 1,
min_child_weight = 1,
subsample = 1
)
tune_control <- caret::trainControl(
method = "cv", # cross-validation
number = 3, # with n folds
#index = createFolds(tr_treated$Id_clean), # fix the folds
verboseIter = FALSE, # no training log
allowParallel = TRUE # FALSE for reproducible results
)
xgb_tune <- caret::train(
x = train_x,
y = train_y,
trControl = tune_control,
tuneGrid = tune_grid,
method = "xgbTree",
verbose = TRUE
)
tuneplot <- function(x, probs = .90) {
ggplot(x) +
coord_cartesian(ylim = c(quantile(x$results$RMSE, probs = probs), min(x$results$RMSE))) +
theme_bw()
}
tuneplot(xgb_tune)
xgb_tune$bestTune
# find maximum depth
tune_grid2 <- expand.grid(
nrounds = seq(from = 50, to = nrounds, by = 50),
eta = xgb_tune$bestTune$eta,
max_depth = ifelse(xgb_tune$bestTune$max_depth == 2,
c(xgb_tune$bestTune$max_depth:4),
xgb_tune$bestTune$max_depth - 1:xgb_tune$bestTune$max_depth + 1),
gamma = 0,
colsample_bytree = 1,
min_child_weight = c(1, 2, 3),
subsample = 1
)
xgb_tune2 <- caret::train(
x = train_x,
y = train_y,
trControl = tune_control,
tuneGrid = tune_grid2,
method = "xgbTree",
verbose = TRUE
)
tuneplot(xgb_tune2)
xgb_tune2$bestTune
# different values for row and column sampling
tune_grid3 <- expand.grid(
nrounds = seq(from = 50, to = nrounds, by = 50),
eta = xgb_tune$bestTune$eta,
max_depth = xgb_tune2$bestTune$max_depth,
gamma = 0,
colsample_bytree = c(0.4, 0.6, 0.8, 1.0),
min_child_weight = xgb_tune2$bestTune$min_child_weight,
subsample = c(0.5, 0.75, 1.0)
)
xgb_tune3 <- caret::train(
x = train_x,
y = train_y,
trControl = tune_control,
tuneGrid = tune_grid3,
method = "xgbTree",
verbose = TRUE
)
tuneplot(xgb_tune3, probs = .95)
xgb_tune3$bestTune
set.seed(57)
omp_set_num_threads(2) # caret parallel processing threads
# gamma
tune_grid4 <- expand.grid(
nrounds = seq(from = 50, to = nrounds, by = 50),
eta = xgb_tune$bestTune$eta,
max_depth = xgb_tune2$bestTune$max_depth,
gamma = c(0, 0.05, 0.1, 0.5, 0.7, 0.9, 1.0),
colsample_bytree = xgb_tune3$bestTune$colsample_bytree,
min_child_weight = xgb_tune2$bestTune$min_child_weight,
subsample = xgb_tune3$bestTune$subsample
)
xgb_tune4 <- caret::train(
x = train_x,
y = train_y,
trControl = tune_control,
tuneGrid = tune_grid4,
method = "xgbTree",
verbose = TRUE
)
tuneplot(xgb_tune4)
xgb_tune4$bestTune
# Reducing the Learning Rate
tune_grid5 <- expand.grid(
nrounds = seq(from = 100, to = 10000, by = 100),
eta = c(0.01, 0.015, 0.025, 0.05, 0.1),
max_depth = xgb_tune2$bestTune$max_depth,
gamma = xgb_tune4$bestTune$gamma,
colsample_bytree = xgb_tune3$bestTune$colsample_bytree,
min_child_weight = xgb_tune2$bestTune$min_child_weight,
subsample = xgb_tune3$bestTune$subsample
)
xgb_tune5 <- caret::train(
x = train_x,
y = train_y,
trControl = tune_control,
tuneGrid = tune_grid5,
method = "xgbTree",
verbose = TRUE
)
tuneplot(xgb_tune5)
xgb_tune5$bestTune
# Fitting the Model
(final_grid <- expand.grid(
nrounds = xgb_tune5$bestTune$nrounds,
eta = xgb_tune5$bestTune$eta,
max_depth = xgb_tune5$bestTune$max_depth,
gamma = xgb_tune5$bestTune$gamma,
colsample_bytree = xgb_tune5$bestTune$colsample_bytree,
min_child_weight = xgb_tune5$bestTune$min_child_weight,
subsample = xgb_tune5$bestTune$subsample
))
(xgb_model <- caret::train(
x = train_x,
y = train_y,
trControl = train_control,
tuneGrid = final_grid,
method = "xgbTree",
verbose = TRUE
))
stopCluster(cl)
# apply model to the whole data set using xgboost
xgb_m <- xgb.DMatrix(data = data.matrix(block.data), label = block.data$ntl)
m = xgb.train(data = xgb_m,
max.depth = xgb_tune5$bestTune$max_depth,
# watchlist = watchlist,
nrounds = xgb_tune5$bestTune$nrounds,
min_child_weight = xgb_tune5$bestTune$min_child_weight,
subsample = xgb_tune5$bestTune$subsample,
eta = xgb_tune5$bestTune$eta,
gamma = xgb_tune5$bestTune$gamma,
colsample_bytree = xgb_tune5$bestTune$colsample_bytree,
objective = "reg:squarederror")
m
# export xgb residuals
xgb_resids = predict(m, xgb_m, na.rm = TRUE)
sb = c(ntl, pop_res, tirs_res, agbh_res)
xgb_resids = sb$ntl - xgb_resids
plot(xgb_resids)
The plot looks like this:
Obviously, I am doing something very wrong. How can I export the residuals of an XGBR as a single raster?
Here is a very small sample of my dataset:
block.data = structure(list(x = c(11880750L, 11879250L, 11879750L, 11880250L,
11880750L, 11881250L, 11879250L, 11879750L, 11880250L, 11880750L,
11881250L, 11878750L, 11879250L, 11879750L, 11880250L, 11880750L,
11881250L, 11879250L, 11879750L, 11880250L, 11880750L, 11881250L,
11881750L, 11882250L, 11879250L, 11879750L, 11880250L, 11880750L,
11881250L, 11881750L, 11882250L, 11882750L, 11879250L, 11879750L
), y = c(1802250L, 1801750L, 1801750L, 1801750L, 1801750L, 1801750L,
1801250L, 1801250L, 1801250L, 1801250L, 1801250L, 1800750L, 1800750L,
1800750L, 1800750L, 1800750L, 1800750L, 1800250L, 1800250L, 1800250L,
1800250L, 1800250L, 1800250L, 1800250L, 1799750L, 1799750L, 1799750L,
1799750L, 1799750L, 1799750L, 1799750L, 1799750L, 1799250L, 1799250L
), ntl = c(18.7969169616699, 25.7222957611084, 23.4188251495361,
25.4322757720947, 16.4593601226807, 12.7868213653564, 30.9337253570557,
29.865758895874, 30.4080600738525, 29.5479888916016, 24.3493347167969,
35.2427635192871, 38.989933013916, 34.6536979675293, 29.4607238769531,
30.7469024658203, 34.3946380615234, 42.8660278320312, 34.7930717468262,
30.9516315460205, 32.20654296875, 39.999755859375, 46.6002235412598,
38.6480979919434, 60.5214920043945, 33.1799964904785, 31.8498134613037,
30.9209423065186, 32.2269744873047, 53.7062034606934, 45.5225944519043,
38.3570976257324, 123.040382385254, 73.0528182983398), pop = c(19.6407718658447,
610.009216308594, 654.812622070312, 426.475830078125, 66.3839492797852,
10.6471328735352, 443.848846435547, 602.677429199219, 488.478454589844,
387.470947265625, 58.2341117858887, 413.888488769531, 315.057678222656,
354.082946777344, 602.827758789062, 463.518829345703, 296.713928222656,
923.920593261719, 434.436645507812, 799.562927246094, 404.709564208984,
265.043304443359, 366.697235107422, 399.851684570312, 952.2314453125,
870.356994628906, 673.406616210938, 493.521606445312, 273.841888427734,
371.428619384766, 383.057830810547, 320.986755371094, 991.131225585938,
1148.87768554688), tirs = c(39.7242431640625, 44.9583969116211,
41.4048385620117, 42.6056709289551, 40.0976028442383, 38.7490005493164,
44.2747650146484, 43.5645370483398, 41.6180191040039, 40.3799781799316,
38.8664817810059, 44.9089202880859, 44.414306640625, 44.560977935791,
43.1288986206055, 40.9315185546875, 38.8918418884277, 46.3063850402832,
45.5805702209473, 44.9196586608887, 42.2495613098145, 39.3051452636719,
38.7914810180664, 38.6069412231445, 44.6782455444336, 46.4024772644043,
44.4720573425293, 41.7361183166504, 42.3378067016602, 41.0018348693848,
39.3579216003418, 41.6303863525391, 43.8207550048828, 46.0460357666016
), agbh = c(3.32185006141663, 4.98925733566284, 4.35699367523193,
4.94798421859741, 3.14325952529907, 2.93211793899536, 4.52736520767212,
4.99723243713379, 5.13944292068481, 3.92965626716614, 3.43465113639832,
3.55617475509644, 3.4659411907196, 5.24469566345215, 5.36995029449463,
4.61549234390259, 4.82002925872803, 4.20452928543091, 4.71502685546875,
5.20452785491943, 5.05676746368408, 5.9952244758606, 6.16778612136841,
4.69053316116333, 2.62325501441956, 4.74775457382202, 4.93133020401001,
5.02366256713867, 5.74016952514648, 6.28353786468506, 4.67424774169922,
4.56812858581543, 1.88153350353241, 4.31531000137329)), class = "data.frame", row.names = c(NA,
-34L))
My raster layer:
r = new("RasterBrick", file = new(".RasterFile", name = "", datanotation = "FLT4S",
byteorder = "little", nodatavalue = -Inf, NAchanged = FALSE,
nbands = 1L, bandorder = "BIL", offset = 0L, toptobottom = TRUE,
blockrows = 0L, blockcols = 0L, driver = "", open = FALSE),
data = new(".MultipleRasterData", values = structure(c(NA,
NA, NA, NA, 18.7969169616699, NA, NA, NA, NA, NA, 25.7222957611084,
23.4188251495361, 25.4322757720947, 16.4593601226807, 12.7868213653564,
NA, NA, NA, NA, 30.9337253570557, 29.865758895874, 30.4080600738525,
29.5479888916016, 24.3493347167969, NA, NA, NA, 35.2427635192871,
38.989933013916, 34.6536979675293, 29.4607238769531, 30.7469024658203,
34.3946380615234, NA, NA, NA, NA, 42.8660278320312, 34.7930717468262,
30.9516315460205, 32.20654296875, 39.999755859375, 46.6002235412598,
38.6480979919434, NA, NA, 60.5214920043945, 33.1799964904785,
31.8498134613037, 30.9209423065186, 32.2269744873047, 53.7062034606934,
45.5225944519043, 38.3570976257324, NA, 123.040382385254,
73.0528182983398, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
19.6407718658447, NA, NA, NA, NA, NA, 610.009216308594, 654.812622070312,
426.475830078125, 66.3839492797852, 10.6471328735352, NA,
NA, NA, NA, 443.848846435547, 602.677429199219, 488.478454589844,
387.470947265625, 58.2341117858887, NA, NA, NA, 413.888488769531,
315.057678222656, 354.082946777344, 602.827758789062, 463.518829345703,
296.713928222656, NA, NA, NA, NA, 923.920593261719, 434.436645507812,
799.562927246094, 404.709564208984, 265.043304443359, 366.697235107422,
399.851684570312, NA, NA, 952.2314453125, 870.356994628906,
673.406616210938, 493.521606445312, 273.841888427734, 371.428619384766,
383.057830810547, 320.986755371094, NA, 991.131225585938,
1148.87768554688, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
39.7242431640625, NA, NA, NA, NA, NA, 44.9583969116211, 41.4048385620117,
42.6056709289551, 40.0976028442383, 38.7490005493164, NA,
NA, NA, NA, 44.2747650146484, 43.5645370483398, 41.6180191040039,
40.3799781799316, 38.8664817810059, NA, NA, NA, 44.9089202880859,
44.414306640625, 44.560977935791, 43.1288986206055, 40.9315185546875,
38.8918418884277, NA, NA, NA, NA, 46.3063850402832, 45.5805702209473,
44.9196586608887, 42.2495613098145, 39.3051452636719, 38.7914810180664,
38.6069412231445, NA, NA, 44.6782455444336, 46.4024772644043,
44.4720573425293, 41.7361183166504, 42.3378067016602, 41.0018348693848,
39.3579216003418, 41.6303863525391, NA, 43.8207550048828,
46.0460357666016, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
3.32185006141663, NA, NA, NA, NA, NA, 4.98925733566284, 4.35699367523193,
4.94798421859741, 3.14325952529907, 2.93211793899536, NA,
NA, NA, NA, 4.52736520767212, 4.99723243713379, 5.13944292068481,
3.92965626716614, 3.43465113639832, NA, NA, NA, 3.55617475509644,
3.4659411907196, 5.24469566345215, 5.36995029449463, 4.61549234390259,
4.82002925872803, NA, NA, NA, NA, 4.20452928543091, 4.71502685546875,
5.20452785491943, 5.05676746368408, 5.9952244758606, 6.16778612136841,
4.69053316116333, NA, NA, 2.62325501441956, 4.74775457382202,
4.93133020401001, 5.02366256713867, 5.74016952514648, 6.28353786468506,
4.67424774169922, 4.56812858581543, NA, 1.88153350353241,
4.31531000137329, NA, NA, NA, NA, NA, NA), .Dim = c(63L,
4L)), offset = 0, gain = 1, inmemory = TRUE, fromdisk = FALSE,
nlayers = 4L, dropped = NULL, isfactor = c(FALSE, FALSE,
FALSE, FALSE), attributes = list(), haveminmax = TRUE,
min = c(12.7868213653564, 10.6471328735352, 38.6069412231445,
1.88153350353241), max = c(123.040382385254, 1148.87768554688,
46.4024772644043, 6.28353786468506), unit = "", names = c("ntl",
"pop", "tirs", "agbh")), legend = new(".RasterLegend",
type = character(0), values = logical(0), color = logical(0),
names = logical(0), colortable = logical(0)), title = character(0),
extent = new("Extent", xmin = 11878500, xmax = 11883000,
ymin = 1799000, ymax = 1802500), rotated = FALSE, rotation = new(".Rotation",
geotrans = numeric(0), transfun = function ()
NULL), ncols = 9L, nrows = 7L, crs = new("CRS", projargs = NA_character_),
srs = character(0), history = list(), z = list())

I found this solution:
d <- tibble(pred = predict(f, newdata = xs)
, obs = mtcars$mpg) %>%
mutate(resid = pred - obs,
resid_sq = resid^2)
and then I cbind the d with the coordinates from my raster data and I create the residuals raster.

Related

Predict at a finer spatial scale using XGBoost regression

I want to make a prediction at a finer spatial scale using XGBoost regression. I created a model at a coarse spatial scale and now I want to apply the model parameters at a finer scale. The issue is that at the finer spatial scale the xgb.DMatrix doesn't include the dependent variable so the predict function returns this error:
Error in predict.xgb.Booster(m, xgb_p, na.rm = TRUE): Feature names stored in `object` and `newdata` are different!
I have seen this post but I still get the same error. Also, I tried to create a data.frame with the independent variables and an extra empty column with the name of the dependent variable, but still the same error.
How can I use the XGB model parameters found in the coarse spatial scale to make predictions at a fine spatial scale?
I have checked the names of the variables in both the coarse and spatial scales and they are the same.
Here is the code:
library(xgboost)
library(raster)
# the dependent and independent variables at the coarse scale
xgb_m <- xgb.DMatrix(data = data.matrix(block.data), label = block.data$ntl)
# that's the model
m = xgb.train(data = xgb_m,
max.depth = 2,
nrounds = 1000,
min_child_weight = 1,
subsample = 0.75,
eta = 0.015,
gamma = 0.5,
colsample_bytree = 1,
objective = "reg:squarederror")
# these are the independent variables at the fine spatial scale
pop = raster(paste0(wd, "pop.tif"))
tirs = raster(paste0(wd, "tirs.tif"))
agbh = raster(paste0(wd, "agbh.tif"))
vars = stack(pop, tirs, agbh)
# the xgb.DMatrix with the independent variables
xgb_p <- xgb.DMatrix(data = data.matrix(vars))
xgb_pred <- predict(m, xgb_p, na.rm = TRUE) # the error
The data.frame (block.data):
block.data = structure(list(x = c(11880750L, 11879250L, 11879750L, 11880250L,
11880750L, 11881250L, 11879250L, 11879750L, 11880250L, 11880750L,
11881250L, 11878750L, 11879250L, 11879750L, 11880250L, 11880750L,
11881250L, 11879250L, 11879750L, 11880250L, 11880750L, 11881250L,
11881750L, 11882250L, 11879250L, 11879750L, 11880250L, 11880750L,
11881250L, 11881750L, 11882250L, 11882750L, 11879250L, 11879750L
), y = c(1802250L, 1801750L, 1801750L, 1801750L, 1801750L, 1801750L,
1801250L, 1801250L, 1801250L, 1801250L, 1801250L, 1800750L, 1800750L,
1800750L, 1800750L, 1800750L, 1800750L, 1800250L, 1800250L, 1800250L,
1800250L, 1800250L, 1800250L, 1800250L, 1799750L, 1799750L, 1799750L,
1799750L, 1799750L, 1799750L, 1799750L, 1799750L, 1799250L, 1799250L
), ntl = c(18.7969169616699, 25.7222957611084, 23.4188251495361,
25.4322757720947, 16.4593601226807, 12.7868213653564, 30.9337253570557,
29.865758895874, 30.4080600738525, 29.5479888916016, 24.3493347167969,
35.2427635192871, 38.989933013916, 34.6536979675293, 29.4607238769531,
30.7469024658203, 34.3946380615234, 42.8660278320312, 34.7930717468262,
30.9516315460205, 32.20654296875, 39.999755859375, 46.6002235412598,
38.6480979919434, 60.5214920043945, 33.1799964904785, 31.8498134613037,
30.9209423065186, 32.2269744873047, 53.7062034606934, 45.5225944519043,
38.3570976257324, 123.040382385254, 73.0528182983398), pop = c(19.6407718658447,
610.009216308594, 654.812622070312, 426.475830078125, 66.3839492797852,
10.6471328735352, 443.848846435547, 602.677429199219, 488.478454589844,
387.470947265625, 58.2341117858887, 413.888488769531, 315.057678222656,
354.082946777344, 602.827758789062, 463.518829345703, 296.713928222656,
923.920593261719, 434.436645507812, 799.562927246094, 404.709564208984,
265.043304443359, 366.697235107422, 399.851684570312, 952.2314453125,
870.356994628906, 673.406616210938, 493.521606445312, 273.841888427734,
371.428619384766, 383.057830810547, 320.986755371094, 991.131225585938,
1148.87768554688), tirs = c(39.7242431640625, 44.9583969116211,
41.4048385620117, 42.6056709289551, 40.0976028442383, 38.7490005493164,
44.2747650146484, 43.5645370483398, 41.6180191040039, 40.3799781799316,
38.8664817810059, 44.9089202880859, 44.414306640625, 44.560977935791,
43.1288986206055, 40.9315185546875, 38.8918418884277, 46.3063850402832,
45.5805702209473, 44.9196586608887, 42.2495613098145, 39.3051452636719,
38.7914810180664, 38.6069412231445, 44.6782455444336, 46.4024772644043,
44.4720573425293, 41.7361183166504, 42.3378067016602, 41.0018348693848,
39.3579216003418, 41.6303863525391, 43.8207550048828, 46.0460357666016
), agbh = c(3.32185006141663, 4.98925733566284, 4.35699367523193,
4.94798421859741, 3.14325952529907, 2.93211793899536, 4.52736520767212,
4.99723243713379, 5.13944292068481, 3.92965626716614, 3.43465113639832,
3.55617475509644, 3.4659411907196, 5.24469566345215, 5.36995029449463,
4.61549234390259, 4.82002925872803, 4.20452928543091, 4.71502685546875,
5.20452785491943, 5.05676746368408, 5.9952244758606, 6.16778612136841,
4.69053316116333, 2.62325501441956, 4.74775457382202, 4.93133020401001,
5.02366256713867, 5.74016952514648, 6.28353786468506, 4.67424774169922,
4.56812858581543, 1.88153350353241, 4.31531000137329)), class = "data.frame", row.names = c(NA,
-34L))
The fine resolution data
vars = new("RasterBrick", file = new(".RasterFile", name = "", datanotation = "FLT4S",
byteorder = "little", nodatavalue = -Inf, NAchanged = FALSE,
nbands = 1L, bandorder = "BIL", offset = 0L, toptobottom = TRUE,
blockrows = 0L, blockcols = 0L, driver = "", open = FALSE),
data = new(".MultipleRasterData", values = structure(c(NA,
NA, NA, NA, 18.7969169616699, NA, NA, NA, NA, NA, 25.7222957611084,
23.4188251495361, 25.4322757720947, 16.4593601226807, 12.7868213653564,
NA, NA, NA, NA, 30.9337253570557, 29.865758895874, 30.4080600738525,
29.5479888916016, 24.3493347167969, NA, NA, NA, 35.2427635192871,
38.989933013916, 34.6536979675293, 29.4607238769531, 30.7469024658203,
34.3946380615234, NA, NA, NA, NA, 42.8660278320312, 34.7930717468262,
30.9516315460205, 32.20654296875, 39.999755859375, 46.6002235412598,
38.6480979919434, NA, NA, 60.5214920043945, 33.1799964904785,
31.8498134613037, 30.9209423065186, 32.2269744873047, 53.7062034606934,
45.5225944519043, 38.3570976257324, NA, 123.040382385254,
73.0528182983398, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
19.6407718658447, NA, NA, NA, NA, NA, 610.009216308594, 654.812622070312,
426.475830078125, 66.3839492797852, 10.6471328735352, NA,
NA, NA, NA, 443.848846435547, 602.677429199219, 488.478454589844,
387.470947265625, 58.2341117858887, NA, NA, NA, 413.888488769531,
315.057678222656, 354.082946777344, 602.827758789062, 463.518829345703,
296.713928222656, NA, NA, NA, NA, 923.920593261719, 434.436645507812,
799.562927246094, 404.709564208984, 265.043304443359, 366.697235107422,
399.851684570312, NA, NA, 952.2314453125, 870.356994628906,
673.406616210938, 493.521606445312, 273.841888427734, 371.428619384766,
383.057830810547, 320.986755371094, NA, 991.131225585938,
1148.87768554688, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
39.7242431640625, NA, NA, NA, NA, NA, 44.9583969116211, 41.4048385620117,
42.6056709289551, 40.0976028442383, 38.7490005493164, NA,
NA, NA, NA, 44.2747650146484, 43.5645370483398, 41.6180191040039,
40.3799781799316, 38.8664817810059, NA, NA, NA, 44.9089202880859,
44.414306640625, 44.560977935791, 43.1288986206055, 40.9315185546875,
38.8918418884277, NA, NA, NA, NA, 46.3063850402832, 45.5805702209473,
44.9196586608887, 42.2495613098145, 39.3051452636719, 38.7914810180664,
38.6069412231445, NA, NA, 44.6782455444336, 46.4024772644043,
44.4720573425293, 41.7361183166504, 42.3378067016602, 41.0018348693848,
39.3579216003418, 41.6303863525391, NA, 43.8207550048828,
46.0460357666016, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
3.32185006141663, NA, NA, NA, NA, NA, 4.98925733566284, 4.35699367523193,
4.94798421859741, 3.14325952529907, 2.93211793899536, NA,
NA, NA, NA, 4.52736520767212, 4.99723243713379, 5.13944292068481,
3.92965626716614, 3.43465113639832, NA, NA, NA, 3.55617475509644,
3.4659411907196, 5.24469566345215, 5.36995029449463, 4.61549234390259,
4.82002925872803, NA, NA, NA, NA, 4.20452928543091, 4.71502685546875,
5.20452785491943, 5.05676746368408, 5.9952244758606, 6.16778612136841,
4.69053316116333, NA, NA, 2.62325501441956, 4.74775457382202,
4.93133020401001, 5.02366256713867, 5.74016952514648, 6.28353786468506,
4.67424774169922, 4.56812858581543, NA, 1.88153350353241,
4.31531000137329, NA, NA, NA, NA, NA, NA), .Dim = c(63L,
4L)), offset = 0, gain = 1, inmemory = TRUE, fromdisk = FALSE,
nlayers = 4L, dropped = NULL, isfactor = c(FALSE, FALSE,
FALSE, FALSE), attributes = list(), haveminmax = TRUE,
min = c(12.7868213653564, 10.6471328735352, 38.6069412231445,
1.88153350353241), max = c(123.040382385254, 1148.87768554688,
46.4024772644043, 6.28353786468506), unit = "", names = c("ntl",
"pop", "tirs", "agbh")), legend = new(".RasterLegend",
type = character(0), values = logical(0), color = logical(0),
names = logical(0), colortable = logical(0)), title = character(0),
extent = new("Extent", xmin = 11878500, xmax = 11883000,
ymin = 1799000, ymax = 1802500), rotated = FALSE, rotation = new(".Rotation",
geotrans = numeric(0), transfun = function ()
NULL), ncols = 9L, nrows = 7L, crs = new("CRS", projargs = NA_character_),
srs = character(0), history = list(), z = list())
The issue you have is that the model is trained on all columns present in block.data, so also on x, y and ntl itself. In your later rasterstack, those variables are not present and thus you get an error.
We get can around this by creating the xgb.DMatrix differently:
#create matrix
xgb_m <- xgb.DMatrix(data = data.matrix(block.data[, c("pop", "tirs", "agbh")]), label = block.data$ntl)
#your model
m = xgb.train(data = xgb_m,
max.depth = 2,
nrounds = 1000,
min_child_weight = 1,
subsample = 0.75,
eta = 0.015,
gamma = 0.5,
colsample_bytree = 1,
objective = "reg:squarederror")
#load rasterstack
library(terra)
pop = rast("pop.tif")
tirs = rast("tirs.tif")
agbh = rast("agbh.tif")
vars <- c(pop, tirs, agbh)
#predict
xgb_p <- xgb.DMatrix(data.matrix(vars))
predict(m, xgb_p)

which function can I use to add individual name under each star plot

I want to give names to individual starplots in R
stars(norm_datas[, 1:12], full = TRUE,radius = TRUE,len = 1.0, key.loc = c(14,1), labels = abbreviate(case.names(norm_datas)),main = "Provision of Ecosystem services", draw.segments = TRUE, lwd = 0.25, lty = par("lty"), xpd = TRUE).
This is what I tried but it just labeled each star plot as 1, 2, 3.
Kindly help resolve.
structure(list(Type_Garden = c("AG", "AG", "AG"), Pollinators = c(10,
6, 5.5), Flower_abundance = c(384, 435, 499), Climate_regulation = c(1,
7, 2), Crop_area = c(34, 25, 10), Plant_diversity = c(22, 53,
41), Nitrogen_balance = c(0.95, 0.26, NA), Phosphorus_balance = c(0.24,
0.04, NA), Habitat_provision = c(1, 2, 0), Recreation_covid = c(1,
NA, NA), Aesthetic_appreciation = c(3, NA, NA), Reconnection_nature = c(4,
NA, NA), Mental_health = c(1, NA, NA), Physical_health = c(1,
NA, NA)), class = "data.frame", row.names = c(NA, -3L))

How to change the y-axis scale in plot for a forecast object?

I have the following graph made with plot. I basically plotted the outcome of an arima model. The problem, as you can see, is the y-axis. I want to rescale it so that it shows values as integers and not in scientific notation. I already tried with ylim = c(a,b) but it didn't work.
This is the data to plot:
structure(list(method = "ARIMA(1,2,0)", model = structure(list(
coef = c(ar1 = 0.165440211592995), sigma2 = 314372.871343033,
var.coef = structure(0.0387588365491072, .Dim = c(1L, 1L), .Dimnames = list(
"ar1", "ar1")), mask = TRUE, loglik = -201.464633423226,
aic = 406.929266846451, arma = c(1L, 0L, 0L, 0L, 1L, 2L,
0L), residuals = structure(c(0.144002762945477, -0.257594259049227,
169.62992413163, -40.455716409227, 3.98528254071288, 325.669119576814,
-277.933508979317, 161.058607396831, 100.485413762468, 161.981734397248,
-21.1101185099251, 467.511038095663, 167.408540762885, 264.467148159716,
-870.459264535865, 1471.66097350626, 116.971877311758, -159.918791518434,
967.205782005673, -64.1682010133445, -372.385939678148, 352.062155538701,
632.526018003249, 1002.33521590517, 479.534164073812, 461.147699502253,
-1091.4663608196, -614.056109041783), .Tsp = c(1, 28, 1), class = "ts"),
call = arima(x = corona_total$Total_Cases, order = c(1, 2,
0)), series = "corona_total$Total_Cases", code = 0L,
n.cond = 0L, nobs = 26L, model = list(phi = 0.165440211592995,
theta = numeric(0), Delta = c(2, -1), Z = c(1, 2, -1),
a = c(-779, 59138, 53578), P = structure(c(-2.22044604925031e-16,
2.86887593857152e-17, -5.56124814802562e-17, 2.86887593857152e-17,
-3.31423141286073e-17, -1.61722928090181e-32, -5.56124814802562e-17,
-3.75958688714994e-17, -5.56124814802562e-17), .Dim = c(3L,
3L)), T = structure(c(0.165440211592995, 1, 0, 0, 2,
1, 0, -1, 0), .Dim = c(3L, 3L)), V = structure(c(1, 0,
0, 0, 0, 0, 0, 0, 0), .Dim = c(3L, 3L)), h = 0, Pn = structure(c(1,
-5.4830714621183e-18, 1.21812129054869e-17, -5.48307146211831e-18,
-3.31423141286073e-17, -1.84889274661175e-32, 1.21812129054869e-17,
-3.75958688714994e-17, -5.56124814802562e-17), .Dim = c(3L,
3L))), x = structure(c(322, 400, 650, 888, 1128, 1694,
2036, 2502, 3089, 3858, 4636, 5883, 7375, 9172, 10149, 12462,
15113, 17660, 21157, 24747, 27980, 31506, 35713, 41035, 47021,
53578, 59138, 63919), .Tsp = c(1, 28, 1), class = "ts")), class = "Arima"),
level = c(80, 95), mean = structure(c(68571.1220751691, 73201.9225591844,
77829.1955946478, 82455.8850482763, 87082.4779540027, 91709.0548868236,
96335.6291770837, 100962.203030158, 105588.776810904, 110215.350579684,
114841.924346485, 119468.498112958, 124095.071879377, 128721.645645786,
133348.219412195, 137974.793178603, 142601.366945011, 147227.940711419,
151854.514477827, 156481.088244235, 161107.662010643, 165734.235777051,
170360.80954346, 174987.383309868, 179613.957076276, 184240.530842684,
188867.104609092, 193493.6783755, 198120.252141908, 202746.825908316,
207373.399674724, 211999.973441132, 216626.54720754, 221253.120973948,
225879.694740356, 230506.268506765, 235132.842273173, 239759.416039581,
244385.989805989, 249012.563572397, 253639.137338805, 258265.711105213,
262892.284871621, 267518.858638029, 272145.432404437, 276772.006170845,
281398.579937253, 286025.153703662, 290651.72747007, 295278.301236478,
299904.875002886, 304531.448769294, 309158.022535702, 313784.59630211,
318411.170068518, 323037.743834926, 327664.317601334, 332290.891367742,
336917.46513415, 341544.038900558), .Tsp = c(29, 88, 1), class = "ts"),
lower = structure(c(67852.5693904542, 71488.0378850631, 74869.4056219101,
78042.7559156995, 81035.3037876344, 83865.5016552685, 86546.988586515,
89090.4113186268, 91504.3946218833, 93796.1160212266, 95971.6728237902,
98036.3298321095, 99994.6937502293, 101850.840164951, 103608.408563905,
105270.675078771, 106840.60926587, 108320.919172912, 109714.087632186,
111022.401864165, 112247.977899771, 113392.780933102, 114458.642437768,
115447.274680314, 116360.283118863, 117199.177067588, 117965.378927058,
118660.232219392, 119285.008620154, 119840.914142592, 120329.094601246,
120750.640459457, 121106.591147333, 121397.938922313, 121625.632332819,
121790.579335963, 121893.650112513, 121935.679615911, 121917.46988679,
121839.792160025, 121703.388787634, 121508.974997728, 121257.240507039,
120948.851002351, 120584.449504222, 120164.657624752, 119690.076729762,
119161.289014511, 118578.858501069, 117943.331964511, 117255.239794357,
116515.096796942, 115723.402943843, 114880.644070922, 113987.29253211,
113043.807811626, 112050.637097962, 111008.215822666, 109916.968166633,
108777.307536393, 67472.1905761175, 70580.7621429779, 73302.5874546909,
75706.5864702675, 77834.1231526988, 79713.3753855171, 81365.1952663977,
82805.8644073931, 84048.5730632326, 85104.2982790952, 85982.3650762524,
86690.8252744766, 87236.7242194817, 87626.2949833746, 87865.1036821527,
87958.1607268483, 87910.0076619409, 87724.7860890043, 87406.2931723477,
86958.0269138267, 86383.2235034666, 85684.8884462828, 84865.8227394482,
83928.6450686659, 82875.8107702521, 81709.6281410368, 80432.2725549711,
79045.7987518185, 77552.151591525, 75953.1755121868, 74250.6228859222,
72446.1614324952, 70541.3808230744, 68537.798584461, 66436.8653962784,
64239.9698590982, 61948.4427995644, 59563.561168772, 57086.5515820169,
54518.5935412548, 51860.8223759273, 49114.3319330342, 46280.1770432903,
43359.3757867774, 40352.9115785747, 37261.7350923526, 34086.7660377659,
30828.8948056289, 27488.983993257, 24067.8698209688, 20566.3634495346,
16985.2522073028, 13325.3007348137, 9587.25205390016, 5771.82856756041,
1879.73299626436, -2088.35074420535, -6131.75671876397, -10249.8361987147,
-14441.9569333302), .Dim = c(60L, 2L), .Dimnames = list(NULL,
c("80%", "95%")), .Tsp = c(29, 88, 1), class = c("mts",
"ts", "matrix")), upper = structure(c(69289.674759884, 74915.8072333057,
80788.9855673855, 86869.0141808532, 93129.6521203709, 99552.6081183786,
106124.269767652, 112833.994741689, 119673.158999925, 126634.585138142,
133712.175869179, 140900.666393806, 148195.450008524, 155592.451126622,
163088.030260485, 170678.911278435, 178362.124624152, 186134.962249926,
193994.941323469, 201939.774624305, 209967.346121516, 218075.690621001,
226262.976649151, 234527.491939421, 242867.631033688, 251281.88461778,
259768.830291125, 268327.124531608, 276955.495663662, 285652.73767404,
294417.704748202, 303249.306422807, 312146.503267748, 321108.303025584,
330133.757147894, 339221.957677567, 348372.034433833, 357583.15246325,
366854.509725187, 376185.334984769, 385574.885889976, 395022.447212698,
404527.329236203, 414088.866273707, 423706.415304653, 433379.354716939,
443107.083144745, 452889.018392812, 462724.59643907, 472613.270508444,
482554.510211415, 492547.800741645, 502592.64212756, 512688.548533298,
522835.047604926, 533031.679858227, 543277.998104707, 553573.566912819,
563917.962101668, 574310.770264724, 69670.0535742206, 75823.0829753909,
82355.8037346047, 89205.1836262851, 96330.8327553065, 103704.73438813,
111306.06308777, 119118.541652923, 127128.980558576, 135326.402880273,
143701.483616717, 152246.170951439, 160953.419539271, 169816.996308198,
178831.335142237, 187991.425630358, 197292.726228081, 206731.095333834,
216302.735783307, 226004.149574644, 235832.10051782, 245783.58310782,
255855.796347471, 266046.121551069, 276352.103382299, 286771.433544331,
297301.936663213, 307941.557999181, 318688.352692291, 329540.476304445,
340496.176463526, 351553.785449769, 362711.713592006, 373968.443363436,
385322.524084435, 396772.567154431, 408317.241746781, 419955.270910389,
431685.428029961, 443506.533603539, 455417.452301683, 467417.090277392,
479504.392699952, 491678.341489281, 503937.9532303, 516282.277249338,
528710.393836741, 541221.412601694, 553814.470946882, 566488.732651987,
579243.386556237, 592077.645331285, 604990.74433659, 617981.94055032,
631050.511569476, 644195.754673588, 657416.985946874, 670713.539454249,
684084.766467015, 697530.034734447), .Dim = c(60L, 2L), .Dimnames = list(
NULL, c("80%", "95%")), .Tsp = c(29, 88, 1), class = c("mts",
"ts", "matrix")), x = structure(c(322, 400, 650, 888, 1128,
1694, 2036, 2502, 3089, 3858, 4636, 5883, 7375, 9172, 10149,
12462, 15113, 17660, 21157, 24747, 27980, 31506, 35713, 41035,
47021, 53578, 59138, 63919), .Tsp = c(1, 28, 1), class = "ts"),
series = "corona_total$Total_Cases", fitted = structure(c(321.855997237055,
400.257594259049, 480.37007586837, 928.455716409227, 1124.01471745929,
1368.33088042319, 2313.93350897932, 2340.94139260317, 2988.51458623753,
3696.01826560275, 4657.11011850993, 5415.48896190434, 7207.59145923711,
8907.53285184028, 11019.4592645359, 10990.3390264937, 14996.0281226882,
17819.9187915184, 20189.7942179943, 24811.1682010133, 28352.3859396781,
31153.9378444613, 35080.4739819968, 40032.6647840948, 46541.4658359262,
53116.8523004977, 60229.4663608196, 64533.0561090418), .Tsp = c(1,
28, 1), class = "ts"), residuals = structure(c(0.144002762945477,
-0.257594259049227, 169.62992413163, -40.455716409227, 3.98528254071288,
325.669119576814, -277.933508979317, 161.058607396831, 100.485413762468,
161.981734397248, -21.1101185099251, 467.511038095663, 167.408540762885,
264.467148159716, -870.459264535865, 1471.66097350626, 116.971877311758,
-159.918791518434, 967.205782005673, -64.1682010133445, -372.385939678148,
352.062155538701, 632.526018003249, 1002.33521590517, 479.534164073812,
461.147699502253, -1091.4663608196, -614.056109041783), .Tsp = c(1,
28, 1), class = "ts")), class = "forecast")
This is the code I used to make the plot (ignore the dotted exponential curve):
plot(forecast, shaded = TRUE, shadecols=NULL, lambda = NULL, col = 1, fcol = 4, pi.col=1,
pi.lty=2, ylim = NULL, main = "Out-of-Sample Forecast", ylab = "Number of Cases",
xlab = "Days (since 23/03/2020)") + abline(v = 28:29, col= "#FF000033", lty=1, lwd=5)
Output:
Can anyone please help me with this?
I couldn't load your object in my R session, so I'm assuming your plot works like a regular one.
You have 2 options.
Either you set options(scipen = 10) (or some high value), which is a quick fix, but if you need some plots with scientific notation and others without on the same graphics window, this will not work.
You define the axis yourself, with the format you need.
You can use axTicks(2) to get the position of default ticks and then format the labels as you need.
I recommend option 2. Here's a quick example :
x <- seq(1,10, l = 100)
y <- x*1e5
par(mfrow = c(1,2))
plot(x, y, main = "custom axis", yaxt = "n")
ticks <- axTicks(2) # get axis ticks
axis(2, at = ticks, labels = formatC(ticks, format = 'd')) # make axis
plot(x, y, main = "default axis")
Outputs :
You can take a look at other potential options in the answers to this post

Entering data into multiple dataframes

I have two loops, the first looks at a specific protein then the second looks at specific cells. In addition I have 16 tables (all named the protein then "_table"). If I specify the cell type I can get the data to enter into the correct row but if I try paste0(temp_TSPAN, "_table") I get the error incorrect number of subscripts on matrix
Any ideas how I can get my loop to specify the correct table?
Here is the second loop which codes for putting data into the tables:
temp_cell <- xCell_cells[i]
print(temp_TSPAN)
print(temp_cell)
temp_means <- c(mean(xCell_Lum_A_Q1[,temp_cell]),mean(xCell_Lum_A_Q2[,temp_cell]),mean(xCell_Lum_A_Q3[,temp_cell]),mean(xCell_Lum_A_Q4[,temp_cell]),
mean(xCell_Lum_B_Q1[,temp_cell]),mean(xCell_Lum_B_Q2[,temp_cell]),mean(xCell_Lum_B_Q3[,temp_cell]),mean(xCell_Lum_B_Q4[,temp_cell]),
mean(xCell_Her_2_Q1[,temp_cell]),mean(xCell_Her_2_Q2[,temp_cell]),mean(xCell_Her_2_Q3[,temp_cell]),mean(xCell_Her_2_Q4[,temp_cell]),
mean(xCell_Basal_Q1[,temp_cell]),mean(xCell_Basal_Q2[,temp_cell]),mean(xCell_Basal_Q3[,temp_cell]),mean(xCell_Basal_Q4[,temp_cell]),
mean(xCell_Normal_Q1[,temp_cell]),mean(xCell_Normal_Q2[,temp_cell]),mean(xCell_Normal_Q3[,temp_cell]),mean(xCell_Normal_Q4[,temp_cell]))
print(temp_means)
paste0(temp_TSPAN, "_table")[temp_cell,] <- temp_means
temp_means <- c()
}
The entire code
library(dplyr)
library(RColorBrewer)
RNA_seq <- read.table("RNASeq2Norm_expr_BCRA.txt", stringsAsFactors = F)
xCell <- read.table("..../xCell_ES_RNAseq.txt", stringsAsFactors = F)
PAM50 <- read.table("..../PAM50_subtypes.txt", stringsAsFactors = F)
TSPANS <- read.table("..../TSPANS.txt", stringsAsFactors = F)
len_TSPAN <- length(TSPANS$V1)
col <- brewer.pal(4, "Pastel1")
xCell_cells <- rownames(xCell)
#Create table for quartile means to be entered
for (i in seq(1,len_TSPAN)){
temp_TSPAN <- TSPANS$V1[i]
print(temp_TSPAN)
assign(as.character((temp_TSPAN)), value = data.frame(Lum_A_Q1_means = rep(NA, 67), Lum_A_Q2_means = rep(NA,67),
Lum_A_Q3_means = rep(NA, 67), Lum_A_Q4_means = rep(NA,67),
Lum_B_Q1_means = rep(NA, 67), Lum_B_Q2_means = rep(NA,67),
Lum_B_Q3_means = rep(NA, 67), Lum_B_Q4_means = rep(NA,67),
Her_2_Q1_means = rep(NA, 67), Her_2_Q2_means = rep(NA,67),
Her_2_Q3_means = rep(NA, 67), Her_2_Q4_means = rep(NA,67),
Basal_Q1_means = rep(NA, 67), Basal_Q2_means = rep(NA,67),
Basal_Q3_means = rep(NA, 67), Basal_Q4_means = rep(NA,67),
Normal_Q1_means = rep(NA, 67), Normal_Q2_means = rep(NA,67),
Normal_Q3_means = rep(NA, 67), Normal_Q4_means = rep(NA,67),
row.names = xCell_cells))
}
temp_TSPAN <- c()
temp_cell <- c()
#Determine which samples belong to each quartile
for (T in seq(1,len_TSPAN)) {
temp_TSPAN <- TSPANS$V1[T]
print(temp_TSPAN)
Lum_A <- RNA_seq[temp_TSPAN, PAM50$subtype == "LumA"]
Lum_A_Quartiles <- quantile(Lum_A[temp_TSPAN,])
Q1_Lum_A <- Lum_A[,(Lum_A[temp_TSPAN,]) <= Lum_A_Quartiles$`25%`]
Q2_Lum_A <- Lum_A[,(Lum_A[temp_TSPAN,]) > Lum_A_Quartiles$`25%`]
Q2_Lum_A <- Q2_Lum_A[,(Q2_Lum_A[temp_TSPAN,]) <= Lum_A_Quartiles$`50%`]
Q3_Lum_A <- Lum_A[,(Lum_A[temp_TSPAN,]) > Lum_A_Quartiles$`50%`]
Q3_Lum_A <- Q3_Lum_A[,(Q3_Lum_A[temp_TSPAN,]) <= Lum_A_Quartiles$`75%`]
Q4_Lum_A <- Lum_A[,(Lum_A[temp_TSPAN,]) > Lum_A_Quartiles$`75%`]
Lum_B <- RNA_seq[temp_TSPAN, PAM50$subtype == "LumB"]
Lum_B_Quartiles <- quantile(Lum_B[temp_TSPAN,])
Q1_Lum_B <- Lum_B[,(Lum_B[temp_TSPAN,]) <= Lum_B_Quartiles$`25%`]
Q2_Lum_B <- Lum_B[,(Lum_B[temp_TSPAN,]) > Lum_B_Quartiles$`25%`]
Q2_Lum_B <- Q2_Lum_B[,(Q2_Lum_B[temp_TSPAN,]) <= Lum_B_Quartiles$`50%`]
Q3_Lum_B <- Lum_B[,(Lum_B[temp_TSPAN,]) > Lum_B_Quartiles$`50%`]
Q3_Lum_B <- Q3_Lum_B[,(Q3_Lum_B[temp_TSPAN,]) <= Lum_B_Quartiles$`75%`]
Q4_Lum_B <- Lum_B[,(Lum_B[temp_TSPAN,]) > Lum_B_Quartiles$`75%`]
Her_2 <- RNA_seq[temp_TSPAN, PAM50$subtype == "Her2"]
Her_2_Quartiles <- quantile(Her_2[temp_TSPAN,])
Q1_Her_2 <- Her_2[,(Her_2[temp_TSPAN,]) <= Her_2_Quartiles$`25%`]
Q2_Her_2 <- Her_2[,(Her_2[temp_TSPAN,]) > Her_2_Quartiles$`25%`]
Q2_Her_2 <- Q2_Her_2[,(Q2_Her_2[temp_TSPAN,]) <= Her_2_Quartiles$`50%`]
Q3_Her_2 <- Her_2[,(Her_2[temp_TSPAN,]) > Her_2_Quartiles$`50%`]
Q3_Her_2 <- Q3_Her_2[,(Q3_Her_2[temp_TSPAN,]) <= Her_2_Quartiles$`75%`]
Q4_Her_2 <- Her_2[,(Her_2[temp_TSPAN,]) > Her_2_Quartiles$`75%`]
Basal <- RNA_seq[temp_TSPAN, PAM50$subtype == "Basal"]
Basal_Quartiles <- quantile(Basal[temp_TSPAN,])
Q1_Basal <- Basal[,(Basal[temp_TSPAN,]) <= Basal_Quartiles$`25%`]
Q2_Basal <- Basal[,(Basal[temp_TSPAN,]) > Basal_Quartiles$`25%`]
Q2_Basal <- Q2_Basal[,(Q2_Basal[temp_TSPAN,]) <= Basal_Quartiles$`50%`]
Q3_Basal <- Basal[,(Basal[temp_TSPAN,]) > Basal_Quartiles$`50%`]
Q3_Basal <- Q3_Basal[,(Q3_Basal[temp_TSPAN,]) <= Basal_Quartiles$`75%`]
Q4_Basal <- Basal[,(Basal[temp_TSPAN,]) > Basal_Quartiles$`75%`]
Normal <- RNA_seq[temp_TSPAN, PAM50$subtype == "Normal"]
Normal_Quartiles <- quantile(Normal[temp_TSPAN,])
Q1_Normal <- Normal[,(Normal[temp_TSPAN,]) <= Normal_Quartiles$`25%`]
Q2_Normal <- Normal[,(Normal[temp_TSPAN,]) > Normal_Quartiles$`25%`]
Q2_Normal <- Q2_Normal[,(Q2_Normal[temp_TSPAN,]) <= Normal_Quartiles$`50%`]
Q3_Normal <- Normal[,(Normal[temp_TSPAN,]) > Normal_Quartiles$`50%`]
Q3_Normal <- Q3_Normal[,(Q3_Normal[temp_TSPAN,]) <= Normal_Quartiles$`75%`]
Q4_Normal <- Normal[,(Normal[temp_TSPAN,]) > Normal_Quartiles$`75%`]
Lum_A_Q1_samples <- colnames(Q1_Lum_A)
Lum_A_Q2_samples <- colnames(Q2_Lum_A)
Lum_A_Q3_samples <- colnames(Q3_Lum_A)
Lum_A_Q4_samples <- colnames(Q4_Lum_A)
Lum_B_Q1_samples <- colnames(Q1_Lum_B)
Lum_B_Q2_samples <- colnames(Q2_Lum_B)
Lum_B_Q3_samples <- colnames(Q3_Lum_B)
Lum_B_Q4_samples <- colnames(Q4_Lum_B)
Her_2_Q1_samples <- colnames(Q1_Her_2)
Her_2_Q2_samples <- colnames(Q2_Her_2)
Her_2_Q3_samples <- colnames(Q3_Her_2)
Her_2_Q4_samples <- colnames(Q4_Her_2)
Basal_Q1_samples <- colnames(Q1_Basal)
Basal_Q2_samples <- colnames(Q2_Basal)
Basal_Q3_samples <- colnames(Q3_Basal)
Basal_Q4_samples <- colnames(Q4_Basal)
Normal_Q1_samples <- colnames(Q1_Normal)
Normal_Q2_samples <- colnames(Q2_Normal)
Normal_Q3_samples <- colnames(Q3_Normal)
Normal_Q4_samples <- colnames(Q4_Normal)
#Finding enrichment scores for the samples in each quartile
xCell_Lum_A_Q1 <- t(xCell[,Lum_A_Q1_samples])
xCell_Lum_A_Q2 <- t(xCell[,Lum_A_Q2_samples])
xCell_Lum_A_Q3 <- t(xCell[,Lum_A_Q3_samples])
xCell_Lum_A_Q4 <- t(xCell[,Lum_A_Q4_samples])
xCell_Lum_B_Q1 <- t(xCell[,Lum_B_Q1_samples])
xCell_Lum_B_Q2 <- t(xCell[,Lum_B_Q2_samples])
xCell_Lum_B_Q3 <- t(xCell[,Lum_B_Q3_samples])
xCell_Lum_B_Q4 <- t(xCell[,Lum_B_Q4_samples])
xCell_Her_2_Q1 <- t(xCell[,Her_2_Q1_samples])
xCell_Her_2_Q2 <- t(xCell[,Her_2_Q2_samples])
xCell_Her_2_Q3 <- t(xCell[,Her_2_Q3_samples])
xCell_Her_2_Q4 <- t(xCell[,Her_2_Q4_samples])
xCell_Basal_Q1 <- t(xCell[,Basal_Q1_samples])
xCell_Basal_Q2 <- t(xCell[,Basal_Q2_samples])
xCell_Basal_Q3 <- t(xCell[,Basal_Q3_samples])
xCell_Basal_Q4 <- t(xCell[,Basal_Q4_samples])
xCell_Normal_Q1 <- t(xCell[,Normal_Q1_samples])
xCell_Normal_Q2 <- t(xCell[,Normal_Q2_samples])
xCell_Normal_Q3 <- t(xCell[,Normal_Q3_samples])
xCell_Normal_Q4 <- t(xCell[,Normal_Q4_samples])
len_xCell <- length(xCell_cells)
temp_means <- c()
for (i in seq(1, len_xCell)){
temp_cell <- xCell_cells[i]
print(temp_TSPAN)
print(temp_cell)
temp_means <- c(mean(xCell_Lum_A_Q1[,temp_cell]),mean(xCell_Lum_A_Q2[,temp_cell]),mean(xCell_Lum_A_Q3[,temp_cell]),mean(xCell_Lum_A_Q4[,temp_cell]),
mean(xCell_Lum_B_Q1[,temp_cell]),mean(xCell_Lum_B_Q2[,temp_cell]),mean(xCell_Lum_B_Q3[,temp_cell]),mean(xCell_Lum_B_Q4[,temp_cell]),
mean(xCell_Her_2_Q1[,temp_cell]),mean(xCell_Her_2_Q2[,temp_cell]),mean(xCell_Her_2_Q3[,temp_cell]),mean(xCell_Her_2_Q4[,temp_cell]),
mean(xCell_Basal_Q1[,temp_cell]),mean(xCell_Basal_Q2[,temp_cell]),mean(xCell_Basal_Q3[,temp_cell]),mean(xCell_Basal_Q4[,temp_cell]),
mean(xCell_Normal_Q1[,temp_cell]),mean(xCell_Normal_Q2[,temp_cell]),mean(xCell_Normal_Q3[,temp_cell]),mean(xCell_Normal_Q4[,temp_cell]))
print(temp_means)
nm1 <- temp_TSPAN, "_table")
assign(nm1, `[<-`(get(nm1), get(nm1)[temp_cell,], temp_means))
temp_means <- c()
}
}
Sample data
> dput(head(TSPANS))
structure(list(V1 = c("TSPAN1", "TSPAN3", "TSPAN4", "TSPAN6",
"TSPAN8", "TSPAN9")), row.names = c(NA, 6L), class = "data.frame")
> dput(head(PAM50))
structure(list(Sample_ID = c("TCGA.3C.AAAU.01A.11R.A41B.07",
"TCGA.3C.AALI.01A.11R.A41B.07", "TCGA.3C.AALJ.01A.31R.A41B.07",
"TCGA.3C.AALK.01A.11R.A41B.07", "TCGA.4H.AAAK.01A.12R.A41B.07",
"TCGA.5L.AAT0.01A.12R.A41B.07"), subtype = c("LumB", "Her2",
"LumB", "Her2", "LumB", "LumA")), row.names = c("1", "2", "3",
"4", "5", "6"), class = "data.frame")
> dput(xCell[1:5, 1:5])
structure(list(TCGA.3C.AAAU.01A.11R.A41B.07 = c(0.0182278777214451,
0, 0, 0.00312390016077943, 0.136068543973221), TCGA.3C.AALI.01A.11R.A41B.07 = c(0.282595778602895,
0, 0.0600603500818251, 0.0589537608635649, 0.205506668589802),
TCGA.3C.AALJ.01A.31R.A41B.07 = c(0.18283171431184, 0.0941680866198556,
0.146150110122777, 0.0304405814585031, 8.9658687089931e-20
), TCGA.3C.AALK.01A.11R.A41B.07 = c(0.134145304728982, 0.032112973032126,
0.154386799682783, 0, 4.17812708486922e-20), TCGA.4H.AAAK.01A.12R.A41B.07 = c(0.106111324096064,
0.0121130054841642, 0.191944288358642, 0, 0.125099426066817
)), row.names = c("aDC", "Adipocytes", "Astrocytes", "B-cells",
"Basophils"), class = "data.frame")
> dput(RNA_seq[1:5, 1:5])
structure(list(TCGA.3C.AAAU.01A.11R.A41B.07 = c(197.0897, 0,
0, 102.9634, 1.3786), TCGA.3C.AALI.01A.11R.A41B.07 = c(237.3844,
0, 0, 70.8646, 4.3502), TCGA.3C.AALJ.01A.31R.A41B.07 = c(423.2366,
0.9066, 0, 161.2602, 0), TCGA.3C.AALK.01A.11R.A41B.07 = c(191.0178,
0, 0, 62.5072, 1.6549), TCGA.4H.AAAK.01A.12R.A41B.07 = c(268.8809,
0.4255, 3.8298, 154.3702, 3.4043)), row.names = c("A1BG", "A1CF",
"A2BP1", "A2LD1", "A2ML1"), class = "data.frame")
> dput(head(TSPAN1_table))
structure(list(Lum_A_Q1_means = c(NA, NA, NA, NA, NA, NA), Lum_A_Q2_means = c(NA,
NA, NA, NA, NA, NA), Lum_A_Q3_means = c(NA, NA, NA, NA, NA, NA
), Lum_A_Q4_means = c(NA, NA, NA, NA, NA, NA), Lum_B_Q1_means = c(NA,
NA, NA, NA, NA, NA), Lum_B_Q2_means = c(NA, NA, NA, NA, NA, NA
), Lum_B_Q3_means = c(NA, NA, NA, NA, NA, NA), Lum_B_Q4_means = c(NA,
NA, NA, NA, NA, NA), Her_2_Q1_means = c(NA, NA, NA, NA, NA, NA
), Her_2_Q2_means = c(NA, NA, NA, NA, NA, NA), Her_2_Q3_means = c(NA,
NA, NA, NA, NA, NA), Her_2_Q4_means = c(NA, NA, NA, NA, NA, NA
), Basal_Q1_means = c(NA, NA, NA, NA, NA, NA), Basal_Q2_means = c(NA,
NA, NA, NA, NA, NA), Basal_Q3_means = c(NA, NA, NA, NA, NA, NA
), Basal_Q4_means = c(NA, NA, NA, NA, NA, NA), Normal_Q1_means = c(NA,
NA, NA, NA, NA, NA), Normal_Q2_means = c(NA, NA, NA, NA, NA,
NA), Normal_Q3_means = c(NA, NA, NA, NA, NA, NA), Normal_Q4_means = c(NA,
NA, NA, NA, NA, NA)), row.names = c("aDC", "Adipocytes", "Astrocytes",
"B-cells", "Basophils", "CD4+ memory T-cells"), class = "data.frame")
We need to get the value with get an assign using assign
temp_cell <- xCell_cells[i]
print(temp_TSPAN)
print(temp_cell)
temp_means <- c(mean(xCell_Lum_A_Q1[,temp_cell]),mean(xCell_Lum_A_Q2[,temp_cell]),mean(xCell_Lum_A_Q3[,temp_cell]),mean(xCell_Lum_A_Q4[,temp_cell]),
mean(xCell_Lum_B_Q1[,temp_cell]),mean(xCell_Lum_B_Q2[,temp_cell]),mean(xCell_Lum_B_Q3[,temp_cell]),mean(xCell_Lum_B_Q4[,temp_cell]),
mean(xCell_Her_2_Q1[,temp_cell]),mean(xCell_Her_2_Q2[,temp_cell]),mean(xCell_Her_2_Q3[,temp_cell]),mean(xCell_Her_2_Q4[,temp_cell]),
mean(xCell_Basal_Q1[,temp_cell]),mean(xCell_Basal_Q2[,temp_cell]),mean(xCell_Basal_Q3[,temp_cell]),mean(xCell_Basal_Q4[,temp_cell]),
mean(xCell_Normal_Q1[,temp_cell]),mean(xCell_Normal_Q2[,temp_cell]),mean(xCell_Normal_Q3[,temp_cell]),mean(xCell_Normal_Q4[,temp_cell]))
print(temp_means)
nm1 <- temp_TSPAN, "_table")
assign(nm1, `[<-`(get(nm1), get(nm1)[temp_cell,], temp_means))
temp_means <- c()
}
May be the OP is looking for simplified version with
lapply(split(RNA_seq, setNames(PAM50$subtype, PAM50$Sample_ID)[colnames(RNA_seq)]),
function(dat) apply(dat, 1, function(x) {
qnt <- quantile(x)
data.frame(val = names(x), grp = names(qnt)[findInterval(x, qnt)])
apply(xCell[, names(x)], 2, function(y) tapply(y, names(x), FUN = mean))
}))

Control axis labeling in R heatmap

I am trying to create a heatmap in R, but the axis labels (which uses the row.names information of the data frame that is being passed to the heatmap function) is crowding the x-axis, and I can't figure out how to control the labeling.
Here is an example:
vDates = seq.Date(from = as.Date('29-11-2012',
format = '%d-%m-%Y'),
length.out = 203, by = 'day')
dfHeatMap = rdirichlet(length(vDates), runif(15))
row.names(dfHeatMap) = as.character(vDates)
heatmap(t(dfHeatMap), Rowv = NA, Colv = NA,
col = cm.colors(256))
Any suggestions/packages that take care of this issue?
I was able to figure this out by RTFM (more carefully). Initially I was not able to get the labCol and the labRow working. Here is a working example:
library(gtools)
library(ClassDiscovery)
# generate sequence of dates
vDates = seq.Date(from = as.Date('29-11-2012',
format = '%d-%m-%Y'),
length.out = 203, by = 'day')
# generate the random samples
dfHeatMap = as.matrix(rdirichlet(length(vDates), runif(15)))
row.names(dfHeatMap) = as.character(vDates)
# column labels
vDatesNew = rep(as.Date(NA), length(vDates))
vDatesNew[seq(from = 1, to = 203, by = 10)] =
vDates[seq(from = 1, to = 203, by = 10)]
# row labels
labRow = c(NA, NA, 3, NA, NA, 6, NA, NA, 9,
NA, NA, 12, NA, NA, 15)
# draw the heatmap with aspect control
aspectHeatmap(t(dfHeatMap), Rowv = NA, Colv = NA,
col = cm.colors(256), labCol = vDatesNew, labRow = labRow,
margins = c(5, 5), hExp = 1.5, wExp = 4)
I have used the ClassDiscovery package to control the aspect ratio of the heatmap. This is what it looks like:

Resources