parallelizing pixel-wise regression in R - r

I'm not familiar with R, and I want to speed up calculation while doing pixel-wise regression over two large datasets(abot 4GB each) in R, but I got the error Error in clusterR(gim_mod, calc, args = list(fun = coeff)) : cluster error.
Can anyone tell me what's wrong in my code and help me out. here are my codes that got an error:
gim_mod <- stack(gimms_dis_re,modis_re)
coeff <- function(x){
if (is.na(x[1])){
NA
}
else {
lm(x[1:156] ~ x[157:312])$coefficients
}
}
beginCluster(n = 5)
coef_gm <- clusterR(gim_mod,calc, args = list(fun = coeff))
endCluster()
the gimms_dis_re and modis_re are two Rasterstacks that each contains 156 Rasterlayers, and I want to do pixel-wise regression over them.

The function used in calc should return the same number of values for each cell. Your function returns an NA when there is only one number; but two values when there is not.
The below works for me (minimal example data).
Example data
library(raster)
r <- raster(nrow=10, ncol=10)
set.seed(321)
s1 <- lapply(1:12, function(i) setValues(r, rnorm(ncell(r), i, 3)))
s2 <- lapply(1:12, function(i) setValues(r, rnorm(ncell(r), i, 3)))
s1 <- stack(s1)
s2 <- stack(s2)
s1[1:5] = NA
Regression of values in one RasterStack with another
s <- stack(s1, s2)
fun <- function(x) {
if (is.na(x[1])) {
c(NA, NA)
} else {
lm(x[1:12] ~ x[13:24])$coefficients
}
}
# works without cluster
x <- calc(s, fun)
# and with cluster
beginCluster(n = 2)
g <- clusterR(s, calc, args = list(fun = fun))
endCluster()

Related

parallelizing lapply with parLapply does not recognize objects even though I suppied them

I am trying to parallelize something with parLapply. I am exporting all necessary information to the cores, but somehow I am getting an error saying that it cannot find the object 'market_time' (first line of the function that is called in parLapply. However, this object is just a column of the data table 'dt' that I export to the cores.
library('data.table')
library('numDeriv')
library('snow')
cores=detectCores()
cl <- makeCluster(cores[1], type = 'PSOCK')
markets <- unique(dt[, market_time])
R = 10000
nu_p <- rnorm(n = R, -2, 0.5)
nu_xr <- rnorm(n = R, 2, 0.5)
nu_xm <- rnorm(n = R, 2, 0.5)
nu_xj <- rnorm(n = R, 2, 0.5)
clusterExport(cl,c('dt','nu_p','nu_xr','nu_xm','nu_xj')
temp <- parLapply(cl, markets,calc_mc_w, dt=dt,nu_p=nu_p,nu_xr= nu_xr,
nu_xm=nu_xm,nu_xj=nu_xj)
where the function calc_mc_w calls:
calc_mc_w <- function(m, dt,nu_p,nu_xr,nu_xm,nu_xj){
dt_mkt = dt[market_time==m,]
market_time <- dt_mkt[, market_time]
x_m <- dt_mkt[, x_m]
x_j <- dt_mkt[, x_j]
x_r <- dt_mkt[, x_r]
p <- as.matrix(dt_mkt[, p])
xi <- dt_mkt[, xi]
p <- as.matrix(dt_mkt[, p])
jacobian <- jacobian(function(x){calc_shares(x, x_m, x_j, x_r, xi, nu_p,
nu_xm, nu_xj, nu_xr,
market_time)},p)
output <- dt_mkt[,c('prod','market','time','retailer')]
#Get a system of equations with as many equations as unknowns
retailers = unique(dt_mkt[, retailer])
temp <- lapply(retailers,calc_mc_w_r,dt_mkt = dt_mkt, jacobian = jacobian)
temp <- rbindlist(temp)
output <- merge(output,temp,by.x = c('prod','retailer'),
by.y = c('prod','retailer'), allow.cartesian=TRUE)
output
}
calc_mc_w_r <- function(r, dt_mkt, jacobian){
dt_r = dt_mkt[retailer == r,]
result <- dt_r[,c('prod','retailer')]
rows = (dt_mkt[,'retailer']== r)
jacobian_r = jacobian[rows,rows]
result <- result[,mc_w := solve(jacobian_r, dt_r[,shares]+ jacobian_r %*% dt_r[,p])]
result
}
The error I get is:
Error in checkForRemoteErrors(val) :
2 nodes produced errors; first error: object 'market_time' not found
If instead, I do not export the data table dt, but instead each column of it, I get the same error but just for 'jacobian' which is something that I calculate in the function (I do not want to calculate it across the whole dataset as it is super costly, which is why I just want to calculate it on each subset).

Extracting p-values from lineair regression on raster image through calc function (in R)

I basically have a followup question to one answered a few years ago about conducting linear regression on a raster stack. (See Linear regression on raster images - lm complains about NAs)
I did the linear regression and calculated the trends with values from $coefficients but now would like to know the associated p-values (one for each raster pixel).
However, calc complains unable to find an inherited method for function ‘calc’ for signature ‘"integer", "function"’
You can reproduce this error with the following code:
library(raster)
names = c('...','...','...','...','...')
s <- stack(names)
y <- values(s)
x <- log(c(10,20,30,40,50))
funa <- function(y) {
if(all(is.na(y))) {
c(NA, NA)
} else {
summary(lm(y ~ x))$coefficients
}
}
r <- calc(s, funa)
I can understand that calc doesn't know how the translate the output of summary to a new raster stack. So I've tried reshaping the output of lm to other forms, for example with the "broom" package, to no avail however. Now calc complains that: Error in is.infinite(v) : default method not implemented for type 'list' even when I try to force the output to a data.frame or as.numeric. Like this for example
library(broom)
funlm <- function(y) {
if(all(is.na(y))) {
c(NA, NA)
} else {
as.data.frame(glance(lm(y ~ x)))
}
}
r <- calc(s, funlm)
Please help
Here is an example, based on the example in ?calc
# create data
library(raster)
r <- raster(nrow=10, ncol=10)
s1 <- lapply(1:12, function(i) setValues(r, rnorm(ncell(r), i, 3)))
s2 <- lapply(1:12, function(i) setValues(r, rnorm(ncell(r), i, 3)))
s1 <- stack(s1)
s2 <- stack(s2)
# regression of values in one brick (or stack) with another, extract p-values
s <- stack(s1, s2)
fun <- function(x) {
if (all(is.na(x))) {
return(c(NA, NA))
}
m <- lm(x[1:12] ~ x[13:24])
summary(m)$coefficients[,4]
}
x1 <- calc(s, fun)

Assign values to overlay in R?

My problem is when assigning values to overlay.
library(raster)
beginCluster(10)
r <- raster(ncol=10, nrow=10)
r1 <- init(r, fun=runif)
r2 <- init(r, fun=runif)
s=stack(r1,r2,r2,r1,r2,r1)
wi=c(3,5,7)
fun1 = function(x) {overlay(x, fun=function(x) movingFun(x, fun=mean, n=3))}
vm = clusterR(s, fun1, progress = "text")
no problem!
but when I assign n to wi it did not work
for(i in 1:3) {
fun1 = function(x) {overlay(x, fun=function(x) movingFun(x, fun=mean, n=wi[i]))}
vm = clusterR(s, fun1, progress = "text")
}
I got this error
cannot use this formula, probably because it is not vectorized"
Everything inside the function has to be passed to it - it doesn't pick anything up from your environment because of the way the cluster operates.
So pass wi and i to your function:
fun2 = function(x, wi, i) {
overlay(x,
fun=function(x) movingFun(x, fun=mean, n=wi[i]))}
and list them as args in the call to clusterR:
for(i in 1:3){
vm = clusterR(s, fun2, list(wi, i), progress = "text")
}

Adamic Adar coefficient for Link Prediction in R

Hey guys I'm trying to write an R function that computes a summation. Specifically program the following formula:
formula
This is what I have but i can't figure out why it won't work or what I am doing wrong. Note this function resembles the Adamic Adar scoring coefficient.
Please note that the original data is called "fblog" and has 192 vertices.
#Scoring function
library(sand)
nv <- vcount(fblog)
ncn2 <- numeric()
upgrade_graph(fblog)
A2 <- get.adjacency(fblog)
for(i in (1:(nv-1))){
ni <- neighborhood(fblog, 1, i)
nj <- neighborhood(fblog, 1, (i+1):nv)
nbhd.ij <- mapply(intersect, ni, nj, SIMPLIFY=FALSE)
for(i in unlist(nbhd.ij)) {
k_deg = unlist(lapply(nbhd.ij, length))
temp = 1/(log(k_deg))
}
ncn2 <- c(ncn2, temp)
}

Raster linear and conditional regression using raster stacks by month in R

I have two raster stacks and I want to carry out a refression analysis. If each raster in each stack was a month in the year (6 data points would be three months in two years i.e. January, February and March for two different years), how do I calculate the slope using the indices such that the result generates 3 slope rasters (one for each month) please?
#First raster track
r <- raster(ncol=10, nrow=10)
r[]=1:ncell(r)
S <- stack(r,r,r,r,r,r)
#Second raster stack
r1 <- raster(ncol=10, nrow=10)
r1[]=1:ncell(r1)
N <- stack(r1,r1,r1,r1,r1,r1)
#combine both raster stacks
s <- stack(S,N)
#function to calculate slope
fun=function(x) { if (is.na(x[1])){ NA } else { lm(x[7:12] ~ x[1:6] )$coefficients [2]}}
#apply function
slope <- calc(s, fun)
Result should be 3 rasters.
A second question:
If I wanted to do a conditional regression using a third raster stack, what would the codes be?
Try fun with 1:12
fun(1:12)
# Error in model.frame.default(formula = x[6:12] ~ x[1:6], drop.unused.levels = TRUE) :
# variable lengths differ (found for 'x[1:6]')
it should be
fun=function(x) { if (is.na(x[1])){ NA } else { lm(x[7:12] ~ x[1:6] )$coefficients [2]}}
Working example
library(raster)
r <- raster(ncol=10, nrow=10)
set.seed(99)
s <- stack(sapply(1:12, function(i) setValues(r, runif(ncell(r)))))
fun <- function(x) { if (is.na(x[1])){ NA } else { lm(x[7:12] ~ x[1:6] )$coefficients [2]}}
slope <- calc(s, fun)
For the three slopes:
fun3 <- function(x) {
r <- rep(NA, 3)
if (!is.na(x[1])) {
r[1] <- lm(x[3:4] ~ x[1:2] )$coefficients[2]
r[2] <- lm(x[7:8] ~ x[5:6] )$coefficients[2]
r[3] <- lm(x[11:12] ~ x[9:10] )$coefficients[2]
}
r
}
slope3 <- calc(s, fun3)

Resources