Process sets of rasters in parallel using lapp function from terra package - r

I have groups of rasters that I want to run a function on, I think probably using the lapp function from the {terra} package? Here is a simple example using toy data of the 'style' of thing I am hoping to accomplish.
library("terra")
rp10val = 106520
rp20val = 106520
rp50val = 154250
rp100val = 154250
rp200val = 154250
rp500val = 154250
rp1500val = 154250
sopval = 200
rp_10_vul = rast(nrow = 10, ncol = 10, vals = rep(rp10val, 10))
rp_20_vul = rast(nrow = 10, ncol = 10, vals = rep(rp20val, 10))
rp_50_vul = rast(nrow = 10, ncol = 10, vals = rep(rp50val, 10))
rp_100_vul = rast(nrow = 10, ncol = 10, vals = rep(rp100val, 10))
rp_200_vul = rast(nrow = 10, ncol = 10, vals = rep(rp200val, 10))
rp_500_vul = rast(nrow = 10, ncol = 10, vals = rep(rp500val, 10))
rp_1500_vul = rast(nrow = 10, ncol = 10, vals = rep(rp1500val, 10))
sop_tile = rast(nrow = 10, ncol = 10, vals = rep(sopval, 10))
input_raster_group <- c(rp_10_vul, rp_20_vul, rp_50_vul, rp_100_vul,
rp_200_vul, rp_500_vul, rp_1500_vul, sop_tile)
## In real world each of these lists would have rasters with different data in
input_raster_lists <- list(list(input_raster_group),
list(input_raster_group),
list(input_raster_group))
mcmapply(lapp,
input_raster_lists,
function(a,b,c,d,e,f,g,h){a+b+c+d+e+f+g+h},
mc.cores = 2)
## If working on windows, this might be better to try and run as proof of concept
# mapply(lapp,
# input_raster_lists,
# function(a,b,c,d,e,f,g,h){(a+b-c) / (d+e+f+g+h)})

Simplified data to make this easier to read
library("terra")
r10 = rast(nrow = 10, ncol = 10, vals = 10)
r20 = rast(nrow = 10, ncol = 10, vals = 20)
r50 = rast(nrow = 10, ncol = 10, vals = 50)
group <- c(r10, r20, r50)
input <- list(group, group, group)
You can use lapply to compute lists sequentially
x <- lapply(input, \(i) sum(i))
y <- lapply(input, \(i) app(i, sum))
z <- lapply(input, \(i) lapp(i, function(a,b,c){a+b+c}))
To use parallelization you could use e.g. parallel::parLapply or, as in your case, parallel::mcmapply.
SpatRaster objects hold a pointer (reference) to a C++ object that cannot be passed to a worker. Therefore you would need to use wrap and unwrap as I show below. I use proxy=TRUE to not force values to memory.
library(parallel)
inp <- lapply(input, \(x) wrap(x, proxy=TRUE))
f <- \(i) { unwrap(i) |> sum() |> wrap(proxy=TRUE)}
b <- mcmapply(f, inp)
out <- lapply(b, unwrap)
This approach may be useful in some cases, e.g. when you have to do many simulations on a relatively small raster that is memory.
In most cases you would do parallelization because you are dealing with large rasters that are on disk. In that case, you could just send the filenames to the workers, and create the SpatRasters there (and write the output to disk).
There is more discussion here

Related

Using app function from {terra} package on raster stacks? (in parallel)

I have four high resolution rasters for a country. I have split each raster into tiles and done some other processing to them. I now want to apply a function to each cell, of each 'stack' of the raster tiles, to produce one set of output tiles. The function is a little complex. I have tried to synthesise some data below to reproduce my current approach. It works (ish) but I'm convinced that there's a better way to do this. To use parallel processing on my unix box, I simply swap mapply for mcmapply, but I haven't done that in the example below as I presume many will be working on Windows machines. I'd welcome ideas on my approach and particularly optimisation.
library("terra")
library("glue")
## Make some toy data
dir.create("temp_folder")
dir.create("result_folder")
x <- rast(ncols = 10, nrows = 10)
a <- rast(ncol = 100, nrow = 100)
some_values <- as.integer(runif(10000, min = 1, max = 100))
ind <- which(some_values %in% sample(some_values, 15))
some_values[ind] <- NA
values(a) <- some_values
a_tiles <- makeTiles(a, x, glue("temp_folder/tile_a_{1:100}.tif"), overwrite = TRUE)
b <- rast(ncol = 100, nrow = 100)
some_values <- as.integer(runif(10000, min = 1, max = 100))
ind <- which(some_values %in% sample(some_values, 15))
some_values[ind] <- NA
values(b) <- some_values
b_tiles <- makeTiles(b, x, glue("temp_folder/tile_b_{1:100}.tif"), overwrite = TRUE)
c <-rast(ncol = 100, nrow = 100)
some_values <- as.integer(runif(10000, min = 1, max = 100))
ind <- which(some_values %in% sample(some_values, 15))
some_values[ind] <- NA
values(c) <- some_values
c_tiles <- makeTiles(c, x, glue("temp_folder/tile_c_{1:100}.tif"), overwrite = TRUE)
d <- rast(ncol = 100, nrow = 100)
some_values <- as.integer(runif(10000, min = 1, max = 100))
ind <- which(some_values %in% sample(some_values, 15))
some_values[ind] <- NA
values(d) <- some_values
d_tiles <- makeTiles(d, x, glue("temp_folder/tile_d_{1:100}.tif"), overwrite = TRUE)
## Outer function so that this can be used in parallel ? But maybe this is a silly way to do it?
outer_function <- function(a_tiles, b_tiles, c_tiles, d_tiles, output_files) {
one_a_tile <- rast(unlist(a_tiles))
one_b_tile <- rast(unlist(b_tiles))
one_c_tile <- rast(unlist(c_tiles))
one_d_tile <- rast(unlist(d_tiles))
output_file <- output_files
# I replace any NAs with 0 as an NA will break my 'if' statement of the inner_function.
# I get Error in if (z["a"] <= z["b"]) { : missing value where TRUE/FALSE needed
one_a_tile[is.na(one_a_tile)] <- 0
one_b_tile[is.na(one_b_tile)] <- 0
one_c_tile[is.na(one_c_tile)] <- 0
one_d_tile[is.na(one_d_tile)] <- 0
z <- sds(one_a_tile, one_b_tile, one_c_tile, one_d_tile)
## Inner function that actually does the work I want doing
inner_function <- function(z) {
names(z) <- c('a', 'b', 'c', 'd')
if (z['a'] <= z['b']) {
y <- rowSums(cbind((z['c'] + z['a'] * 10),
(z['c'] + z['a'] * 20)))
}
if (z['a'] >= z['b']) {
y <- rowSums(cbind((z['c'] + z['a'] * 40),
(z['c'] + z['a'] * 10)))
}
if (z['a'] == z['b']) {
y <- rowSums(cbind((z['c'] + z['a'] * 60),
(z['c'] + z['a'] * 10)))
}
y <- ifelse(y == 0, NA, y)
return(y)
}
app(z,
inner_function,
filename = output_file,
overwrite = TRUE,
wopt = list(datatype = "INT4U"))
return(output_file)
}
results <- mapply(outer_function,
a_tiles = a_tiles,
b_tiles = b_tiles,
c_tiles = c_tiles,
d_tiles = d_tiles,
output_files = output_files <- glue("result_folder/result_tile_{1:length(d_tiles)}.tif"))
names(results) <- NULL
unlink("temp_folder", recursive = TRUE)
unlink("result_folder", recursive = TRUE)

build matrix in a for loop automatically in R

Suppose I have a code like this
probv=c(0.5,0.1,0.2,0.3)
N=c(1,2,3,4)
g1=matrix(rbinom(n = 10, size = N[1], prob = probv[1]), nrow=5)
g2=matrix(rbinom(n = 10, size = N[2], prob = probv[2]), nrow=5)
g3=matrix(rbinom(n = 10, size = N[3], prob = probv[3]), nrow=5)
g4=matrix(rbinom(n = 10, size = N[4], prob = probv[4]), nrow=5)
I want to use a for loop
for i in (1:J)
{......} J=4 in this case
use one line function to return the same output like this, I want to know
how I create a matrix g_ in the loop
which is also benefit for me when I increase the length
of my vector into 5,6,7......
for example N=c(1,2,3,4,5) probv=c(0.5,0.1,0.2,0.3,0.5)
I do not change my code to create another matrix called g5.The code can create it and I just need to change my input to achieve my goal
Thanks Akrun
what is my N is a three dimensional array, I want to map the last dimension of it? How to change in the map method?
probv=c(0.5,0.1,0.2,0.3)
N=array(1:24,c(3,2,4))
g1=matrix(rbinom(n = 10, size = N[,,1], prob = probv[1]), nrow=5)
g2=matrix(rbinom(n = 10, size = N[,,2], prob = probv[2]), nrow=5)
g3=matrix(rbinom(n = 10, size = N[,,3], prob = probv[3]), nrow=5)
g4=matrix(rbinom(n = 10, size = N[,,4], prob = probv[4]), nrow=5)
We can use Map to loop over the 'N' and 'probv' vector, get the corresponding values into rbinom and create a matrix. It returns a list of matrices
lst1 <- Map(function(x, y) matrix(rbinom(n = 10,
size = x, prob = y), nrow = 5), N, probv)
Or using for loop
lst2 <- vector('list', length(N))
for(i in seq_along(N)) {
lst2[[i]] <- matrix(rbinom(n = 10, size = N[i], prob = probv[i]), nrow = 5)
}
names(lst2) <- paste0("g", seq_along(lst2))
For the updated question to extract from an array
mnLength <- min(length(probv), dim(N)[3])
lst2 <- vector('list', mnLength)
for(i in seq_len(mnLength)) {
lst2[[i]] <- matrix(rbinom(n = 10, size = N[,,i], prob = probv[i]), nrow = 5)
}
names(lst2) <- paste0("g", seq_along(lst2))
lst2$g1
lst2$g2

Plot heatmaps of multiple data frames using a slider in R

I have multiple data.frames and each one of them represent the pairwise interactions of individuals at different time points.
Here is an example of how my data.frames look.
df1 <- matrix(data = rexp(9, rate = 10), nrow = 3, ncol = 3)
df2 <- matrix(data = rexp(16, rate = 10), nrow = 4, ncol = 4)
df3 <- matrix(data = rexp(4, rate = 10), nrow = 2, ncol = 2)
I would like to plot them as it is pointed in this page (https://plotly.com/r/sliders/)
where with a slider I can move from one heatmap to the other.
I have tried so far with plotly but I have not succeeded. Any help is highly appreciated.
I am struggling for long with this issue. I might be a bit blind at this point so please forgive me if the question is stupid.
Following the Sine Wave Slider example on https://plotly.com/r/sliders/ this can be achieved like so. The first step of my approach involves converting the matrices to dataframes with columns x, y, z. Second instead of lines we plot heatmaps.
df1 <- matrix(data = rexp(9, rate = 10), nrow = 3, ncol = 3)
df2 <- matrix(data = rexp(16, rate = 10), nrow = 4, ncol = 4)
df3 <- matrix(data = rexp(4, rate = 10), nrow = 2, ncol = 2)
library(tibble)
library(tidyr)
library(plotly)
# Make dataframes
d <- lapply(list(df1, df2, df3), function(d) {
d %>%
as_tibble(.colnames = seq(ncol(.))) %>%
rowid_to_column("x") %>%
pivot_longer(-x, names_to = "y", values_to = "z") %>%
mutate(y = stringr::str_extract(y, "\\d"),
y = as.numeric(y))
})
aval <- list()
for(step in seq_along(d)){
aval[[step]] <-list(visible = FALSE,
name = paste0('v = ', step),
x = d[[step]]$x,
y = d[[step]]$y,
z = d[[step]]$z)
}
aval[1][[1]]$visible = TRUE
steps <- list()
fig <- plot_ly()
for (i in seq_along(aval)) {
fig <- add_trace(fig, x = aval[i][[1]]$x, y = aval[i][[1]]$y, z = aval[i][[1]]$z, visible = aval[i][[1]]$visible,
name = aval[i][[1]]$name, type = "heatmap")
fig
step <- list(args = list('visible', rep(FALSE, length(aval))), method = 'restyle')
step$args[[2]][i] = TRUE
steps[[i]] = step
}
fig <- fig %>%
layout(sliders = list(list(active = 0,
currentvalue = list(prefix = "Heatmap: "),
steps = steps)))
fig

Methods for iteratively changing matrices in a set in R

Currently I am working on an R project where I iteratively have to make a lot of small changes to the final output, which is stored in an self made Class. The calculation time of the problem becomes very large if the amount of iterations increase. Unfortunately, a more vectorized version of the code is not possible, because the future values to be changed depend on the current changes.
A small example of the problem that I encouter is given below. For the calculations using the Example_Class the calculations are significantly longer then for the instance where just a matrix is used. Are their methods available to speed up the calculations in R? Or should I look at extionsion to for example C++?
Example_Class <- setClass(
"Example",
slots = c(
slot_1 = "matrix",
slot_2 = "matrix"
),
prototype=list(
slot_1 = matrix(1, ncol = 1, nrow = 7),
slot_2 = matrix(1, ncol = 4, nrow = 7)
)
)
Example <- Example_Class()
example_matrix_1 <- matrix(1, ncol = 1, nrow = 7)
example_matrix_2 <- matrix(1, ncol = 4, nrow = 7)
example_list <- list(example_matrix_1, example_matrix_2)
profile <- microbenchmark::microbenchmark(
example_matrix_2[3,3] <- (example_matrix_2[3,3] + 1)/2,
example_list[[2]][3,3] <- (example_list[[2]][3,3] + 1)/2,
Example#slot_2[3,3] <- (Example#slot_2[3,3] + 1)/2,
times = 1000
)
profile

R function with loop append avoiding for (using lapply instead)

I have heard that it is not recommended to use for loops in R mainly because it is slow. I have heard that I should use lapply instead because it's calling C for efficiency.
Question: Would it be possible to show me how to transform the following example into a lapply efficient code (or any other apply sapply from the same family)?
myFun <- function(loop){
result = data.frame() #init new df
for(iteration in 1:loop){
generateRnorm1 = matrix(data = rnorm(n = 1000000), nrow = 10000, ncol = 10000)
generateRnorm2 = matrix(data = rnorm(n = 1000000), nrow = 10000, ncol = 10000)
iterationResult = sum(generateRnorm1, generateRnorm2)
bindIterationResult = cbind(iteration, iterationResult)
result = rbind(result, bindIterationResult)
}
return(result)
}
test = myFun(loop = 10)
Here is an lapply method:
myFun2 <- function(loop){
generateRnorm1 = matrix(data = rnorm(n = 1000000), nrow = 10000, ncol = 10000)
generateRnorm2 = matrix(data = rnorm(n = 1000000), nrow = 10000, ncol = 10000)
sum(generateRnorm1, generateRnorm2)
}
# run function over 1:10
myList <- lapply(seq.int(10), myFun2)
# rbind the resulting list
result2 <- do.call(rbind, myList)
Note that there isn't much (if any) speed increase, because the body of your function takes a long time to execute. This swamps any potential speed up with lapply.
On my computer, both methods take about 20 seconds to run.

Resources