I have two raster stacks and I want to carry out a refression analysis. If each raster in each stack was a month in the year (6 data points would be three months in two years i.e. January, February and March for two different years), how do I calculate the slope using the indices such that the result generates 3 slope rasters (one for each month) please?
#First raster track
r <- raster(ncol=10, nrow=10)
r[]=1:ncell(r)
S <- stack(r,r,r,r,r,r)
#Second raster stack
r1 <- raster(ncol=10, nrow=10)
r1[]=1:ncell(r1)
N <- stack(r1,r1,r1,r1,r1,r1)
#combine both raster stacks
s <- stack(S,N)
#function to calculate slope
fun=function(x) { if (is.na(x[1])){ NA } else { lm(x[7:12] ~ x[1:6] )$coefficients [2]}}
#apply function
slope <- calc(s, fun)
Result should be 3 rasters.
A second question:
If I wanted to do a conditional regression using a third raster stack, what would the codes be?
Try fun with 1:12
fun(1:12)
# Error in model.frame.default(formula = x[6:12] ~ x[1:6], drop.unused.levels = TRUE) :
# variable lengths differ (found for 'x[1:6]')
it should be
fun=function(x) { if (is.na(x[1])){ NA } else { lm(x[7:12] ~ x[1:6] )$coefficients [2]}}
Working example
library(raster)
r <- raster(ncol=10, nrow=10)
set.seed(99)
s <- stack(sapply(1:12, function(i) setValues(r, runif(ncell(r)))))
fun <- function(x) { if (is.na(x[1])){ NA } else { lm(x[7:12] ~ x[1:6] )$coefficients [2]}}
slope <- calc(s, fun)
For the three slopes:
fun3 <- function(x) {
r <- rep(NA, 3)
if (!is.na(x[1])) {
r[1] <- lm(x[3:4] ~ x[1:2] )$coefficients[2]
r[2] <- lm(x[7:8] ~ x[5:6] )$coefficients[2]
r[3] <- lm(x[11:12] ~ x[9:10] )$coefficients[2]
}
r
}
slope3 <- calc(s, fun3)
Related
I am trying to run corLocal on 2 stacks (average temperatures, day of the year for spring- over a 17 year period. I.e. 17 tiff files for temp and 17 tiff files for day of the year). I've used the following line
p<-corLocal(stack1,stack2,method="kendall") ##or pearson
I would like to get the p value and sens slope value as 2 separate rasters but I am not sure what my output is - it ranges between -0.5 and 0.5. Thank you,
p<-corLocal(stack1,stack2,method="kendall")
p value and slope value 2 separate rasters files
Example data
library(terra)
set.seed(0)
s <- r <- rast(ncol=10, nrow=10, nlyr=17)
values(r) <- runif(size(r))
values(s) <- runif(size(s))
sr <- sds(r,s)
To get the Kendall correlation coefficient for each cell (across the 17 layers).
ken <- lapp(sr, \(x,y) {
out <- rep(NA, nrow(x))
for (i in 1:nrow(x)) {
out[i] <- cor(x[i,], y[i,], "kendall", use="complete.obs")
}
out
})
And to get the p-value
pken <- lapp(sr, \(x,y) {
out <- rep(NA, nrow(x))
for (i in 1:nrow(x)) {
out[i] <- cor(x[i,], y[i,], "kendall", use="complete.obs")
out[i] <- cor.test(x[i,], y[i,], method="kendall", use="complete.obs")$p.value)
}
out
})
For completeness: the corLocal method (called focalPairs in "terra") can be usd to compute the focal correlation between layers.
library(terra)
r <- rast(system.file("ex/logo.tif", package="terra"))
set.seed(0)
r[[1]] <- flip(r[[1]], "horizontal")
r[[2]] <- flip(r[[2]], "vertical") + init(rast(r,1), runif)
r[[3]] <- init(rast(r,1), runif)
Kendall correlation coefficient and p-value
x <- focalPairs(r, w=5, \(x, y) cor(x, y, "kendall", use="complete.obs"))
y <- focalPairs(r, w=5, \(x, y) cor.test(x, y, method="kendall", use="complete.obs")$p.value)
library(raster)
library(rnaturalearth)
library(terra)
r <- raster::getData('CMIP5', var='tmin', res=10, rcp=45, model='HE', year=70)
r <- r[[1]]
shp <- rnaturalearth::ne_countries()
newcrs <- "+proj=robin +datum=WGS84"
r <- rast(r)
shp <- vect(shp)
r_pr <- terra::project(r, newcrs)
shp_pr <- terra::project(shp, newcrs)
For every country in shp_pr, I want to normalise the underlying raster
on a scale of 0-1. This means dividing a cell by the sum of all the cells within a country boundary and repeating it for all the countries. I am doing this as follows:
country_vec <- shp$sovereignt
temp_ls <- list()
for(c in seq_along(country_vec)){
country_ref <- country_vec[c]
if(country_ref == "Antarctica") { next }
shp_ct <- shp[shp$sovereignt == country_ref]
r_country <- terra::crop(r, shp_ct) # crops to the extent of boundary
r_country <- terra::extract(r_country, shp_ct, xy=T)
r_country$score_norm <- r_country$he45tn701/sum(na.omit(r_country$he45tn701))
r_country_norm_rast <- rasterFromXYZ(r_country[ , c("x","y","score_norm")])
temp_ls[[c]] <- r_country_norm_rast
rm(shp_ct, r_country, r_country_norm_rast)
}
m <- do.call(merge, temp_ls)
I wondered if this is the most efficient/right way to do this i.e. without any for loop and anyone has any suggestions?
Somewhat updated and simplified example data (there is no need for projection the data)
library(terra)
library(geodata)
r <- geodata::cmip6_world("HadGEM3-GC31-LL", "585", "2061-2080", "tmin", 10, ".")[[1]]
v <- world(path=".")
v$ID <- 1:nrow(v)
Solution
z <- rasterize(v, r, "ID", touches=TRUE)
zmin <- zonal(r, z, min, na.rm=TRUE, as.raster=TRUE)
zmax <- zonal(r, z, max, na.rm=TRUE, as.raster=TRUE)
x <- (r - zmin) / (zmax - zmin)
Note that the above normalizes the cell values for each country between 0 and 1.
To transform the data such that the values add up to 1 (by country), you can do:
z <- rasterize(v, r, "ID", touches=TRUE)
zsum <- zonal(r, z, sum, na.rm=TRUE, as.raster=TRUE)
x <- r / zsum
I'm not familiar with R, and I want to speed up calculation while doing pixel-wise regression over two large datasets(abot 4GB each) in R, but I got the error Error in clusterR(gim_mod, calc, args = list(fun = coeff)) : cluster error.
Can anyone tell me what's wrong in my code and help me out. here are my codes that got an error:
gim_mod <- stack(gimms_dis_re,modis_re)
coeff <- function(x){
if (is.na(x[1])){
NA
}
else {
lm(x[1:156] ~ x[157:312])$coefficients
}
}
beginCluster(n = 5)
coef_gm <- clusterR(gim_mod,calc, args = list(fun = coeff))
endCluster()
the gimms_dis_re and modis_re are two Rasterstacks that each contains 156 Rasterlayers, and I want to do pixel-wise regression over them.
The function used in calc should return the same number of values for each cell. Your function returns an NA when there is only one number; but two values when there is not.
The below works for me (minimal example data).
Example data
library(raster)
r <- raster(nrow=10, ncol=10)
set.seed(321)
s1 <- lapply(1:12, function(i) setValues(r, rnorm(ncell(r), i, 3)))
s2 <- lapply(1:12, function(i) setValues(r, rnorm(ncell(r), i, 3)))
s1 <- stack(s1)
s2 <- stack(s2)
s1[1:5] = NA
Regression of values in one RasterStack with another
s <- stack(s1, s2)
fun <- function(x) {
if (is.na(x[1])) {
c(NA, NA)
} else {
lm(x[1:12] ~ x[13:24])$coefficients
}
}
# works without cluster
x <- calc(s, fun)
# and with cluster
beginCluster(n = 2)
g <- clusterR(s, calc, args = list(fun = fun))
endCluster()
I basically have a followup question to one answered a few years ago about conducting linear regression on a raster stack. (See Linear regression on raster images - lm complains about NAs)
I did the linear regression and calculated the trends with values from $coefficients but now would like to know the associated p-values (one for each raster pixel).
However, calc complains unable to find an inherited method for function ‘calc’ for signature ‘"integer", "function"’
You can reproduce this error with the following code:
library(raster)
names = c('...','...','...','...','...')
s <- stack(names)
y <- values(s)
x <- log(c(10,20,30,40,50))
funa <- function(y) {
if(all(is.na(y))) {
c(NA, NA)
} else {
summary(lm(y ~ x))$coefficients
}
}
r <- calc(s, funa)
I can understand that calc doesn't know how the translate the output of summary to a new raster stack. So I've tried reshaping the output of lm to other forms, for example with the "broom" package, to no avail however. Now calc complains that: Error in is.infinite(v) : default method not implemented for type 'list' even when I try to force the output to a data.frame or as.numeric. Like this for example
library(broom)
funlm <- function(y) {
if(all(is.na(y))) {
c(NA, NA)
} else {
as.data.frame(glance(lm(y ~ x)))
}
}
r <- calc(s, funlm)
Please help
Here is an example, based on the example in ?calc
# create data
library(raster)
r <- raster(nrow=10, ncol=10)
s1 <- lapply(1:12, function(i) setValues(r, rnorm(ncell(r), i, 3)))
s2 <- lapply(1:12, function(i) setValues(r, rnorm(ncell(r), i, 3)))
s1 <- stack(s1)
s2 <- stack(s2)
# regression of values in one brick (or stack) with another, extract p-values
s <- stack(s1, s2)
fun <- function(x) {
if (all(is.na(x))) {
return(c(NA, NA))
}
m <- lm(x[1:12] ~ x[13:24])
summary(m)$coefficients[,4]
}
x1 <- calc(s, fun)
I'm writing function used for forecasting sales on the base of trend and seasonality. I use dummy variables to represent seasonality and time variable for trend. Here is the code:
forecast<-function(data, time, fn) {
n <- length(data)
seasonal <- factor(cycle(data))
new_data <- seq(from=cycle(data)[n]+1, length=fn, by=1)
new_seasonal <- factor(new_data)
trend <- rep(0, n)
new_trend <- rep(0, n)
if (time > 0) {
trend <- vector()
new_trend <- vector()
ttrend <- seq(from=1, to=n, by=1)
tnew_trend <- seq(from=n+1, length=fn, by=1)
for(i in 1:time) {
trend <- cbind(trend, ttrend^i)
new_trend <- cbind(new_trend, tnew_trend^i)
}
model_trend <- lm(data ~ seasonal + trend)
} else {
model_trend <- lm(data ~ seasonal)
}
df <- data.frame(new_seasonal, new_trend)
p <- predict(model_trend, df)
}
forecast(data = dane.ts[,"SALES"], time=2, fn=5)
However I get warning:
Warning message:
'newdata' had 5 rows but variable(s) found have 104 rows
And it seems that sth is wrong with this function. I would appreciate any help.