Converting WRF files to raster stacks! Projection - r

I have WRF output netCDF files with 149974991 dimensions produced with "Mercator" projection over the Horn Of Africa. I would like to convert netCDF files into raster stack to undertake further analysis. I have been trying different options but it didn't work for me. I am getting values on wrong locations. I require help in this regards and any help is much appreciated.
Here is the code :
ro_rast <- nc_open("wrf_CAM0_daily_pre.nc")
pre <- ncvar_get(ro_rast, "pre") ro_rast$dim$lon$vals -> lon ro_rast$dim$lat$vals -> lat ro_rast$dim$ncl2$vals -> time rm(ro_rast)
r1_brick <- brick(pre, xmn=min(lat), xmx=max(lat), ymn=min(lon), ymx=max(lon), crs=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs+ towgs84=0,0,0"))
names(r1_brick)<- seq(as.Date('2018-06-01'), as.Date('2018-08-31'), 'days')
# convert names of layer into date par(mar = c(2, 2, 2, 2))
cam1_mean <- t(calc(r1_brick, sum))
# seasonal sum precipitation
cam1 <- flip(cam1_mean, direction = 2)
library(akima)# intepolation
lonlat_reg <- expand.grid(lon = seq(min(lon), max(lon), length.out = 1499),
lat = seq(min(lat), max(lat), length.out = 749))
test <- interp(x = as.vector(lon), y = as.vector(lat), z = as.vector(pre),
xo = unique(lonlat_reg[,"lon"]), yo = unique(lonlat_reg[,"lat"]),
duplicate = "error", linear = FALSE, extrap = FALSE)
test <- interp(x = as.vector(lon), y = as.vector(lat), z = as.vector(pre),
nx = 1499, ny = 749, linear = FALSE, extrap = FALSE)
# turn into a raster
test_ras <- raster(test)

The standard approach would be
library(raster)
b <- brick("wrf_CAM0_daily_pre.nc")
It that does not work, can you point us to the file you are using?
I get this error message (you should have added that to your question).
Error in .rasterObjectFromCDF(x, type = objecttype, band = band, ...) :
cells are not equally spaced; you should extract values as points
I checked the file, and in this case, the raster is not a regular grid. The size of the cells changes with latitude. The file does not provide the x and y values of the coordinate reference system used. So the best you can do is extract these values as points, as you were doing, using the interface of the ncdf4 or another package. You can then not directly make a RasterBrick. But you do so using rasterize or interpolate.

Related

akima interpolation for irregular grid

I am trying to interpolate a irregular raster grid to a regular grid using akima library in R. However, after I define the regular grid and interpolate the values to the new regular grid, I end up in a strange raster position. I'm doing something wrong but I don't see where. If anyone has a solution (or know a different approach), please let me know. Thank you very much.
library(raster)
library(akima)
library(rgdal)
library(sp)
# download the file
url <- 'https://downloads.psl.noaa.gov/Datasets/NARR/Derived/monolevel/air.2m.mon.ltm.nc'
file <- paste0(getwd(), "/airtemp.nc")
download.file(url, file, quiet = TRUE, mode = "wb") # less than 4 mb
# define the grid edges according to https://psl.noaa.gov/data/gridded/data.narr.monolevel.html
y <- c(12.2, 14.3, 57.3, 54.5)
x <- c(-133.5, -65.1, -152.9, -49.4)
xym <- cbind(x, y)
p = Polygon(xym)
ps = Polygons(list(p),1)
sps = SpatialPolygons(list(ps))
# create a spatial grid to 0.3 cell size
xy <- makegrid(sps, cellsize = 0.3)
xy$first <-1
names(xy) <- c('x','y',"first")
coordinates(xy)<-~x+y
gridded(xy)<-T
# read the netcdf file and extract the values
cape <- brick(file)[[1]] #get the first layer only
rp <- rasterToPoints(cape)
rp <- na.exclude(rp)
# interpolate to the crs for Northern America Conformal Conic
r2 <- project(rp[,1:2], paste('+proj=lcc +lat_1=20 +lat_2=60 +lat_0=40 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs'), inv=TRUE, use_ob_tran=TRUE)
# add the transformed coordinates
rp[,1:2] <-r2
rp <- as.data.frame(rp)
# create a spatial points object and plot it
coordinates(rp)<-~x+y
spplot(rp, scales=list(draw = T))
# interpolate the points to the coordinates (takes a while)
akima.sp <- interpp(x = coordinates(rp)[,1], y = coordinates(rp)[,2],
z = rp#data[,names(rp)[1]],
xo = coordinates(xy)[,1],
yo = coordinates(xy)[,2],
linear = F, extrap = F)
# create a raster file
r.a <- rasterFromXYZ(as.matrix(data.frame(akima.sp)))
plot(r.a)

Some issues Related to simulation from rLGCP and Point Process Models fitting

I trying two generate points from rLGCP function. I assumed that the presence of these points in the Window is governed by two covaiates ras1 and ras2. Hence I need to comptute log-lambda.
rm(list= ls(all=T))
#Libraries
library(spatstat)
library(raster)
library(maptools)
library(fields)
Creating the domaine D and two rasters
D <- c(300, 300) # Square Domaine D of side 300
Win <- owin(xrange =c(0, D[1]), yrange =c(0,D[2]))
spatstat.options(npixel=c(D[1],D[2]))
ext <- extent(Win$xrange, Win$yrange) # Extent of the rasters
# First raster ras1
par(mfrow=c(1,1))
ras1 <- raster()
extent(ras1) <- ext
res(ras1) <- 10
names(ras1) <- 'Radiation sim'
crs(ras1) <- "+proj=lcc +lat_1=48 +lat_2=33 +lon_0=-100 +datum=WGS84"
values(ras1) <- matrix(c(seq(from =0, to =50, length.out=200), seq(from=50, to=100, length.out = 100), seq(from=100, to=150, length.out = 200), seq(from=150, to=200, length.out = 200), seq(from=200, to=290, length.out = 200)), nrow = 30, ncol = 30)
ras1
plot(ras1, asp=1)
# Second Raster ras2
ras2 <- raster()
extent(ras2) <- ext
res(ras2) <- 10
names(ras2) <- 'Precipitation sim'
crs(ras2) <- "+proj=lcc +lat_1=48 +lat_2=33 +lon_0=-100 +datum=WGS84"
values(ras2) <- matrix(c(seq(from =-0, to =200, length.out=500), seq(from=400, to=893, length.out = 20), seq(from=200, to=300, length.out = 300),seq(from=300, to = 400, length.out=80)))
ras2
plot(ras2, asp=1)
Rasters.group <- stack(ras1, ras2)
plot(Rasters.group)
graphics.off()
From Rasters to im. objects
im.ras1 <- as.im.RasterLayer(ras1); summary(im.ras1)
im.ras2 <- as.im.RasterLayer(ras2); summary(im.ras2)
covar.list <- list(Radiation.sim=im.ras1, Precipitation.sim=im.ras2)
# plot .im object
par(mfrow=c(1,2))
image.plot(list(x=im.ras1$xcol, y=im.ras1$yrow, z=t(im.ras1$v)), main= "Radiation sim", asp=1)
image.plot(list(x=im.ras2$xcol, y=im.ras2$yrow, z=t(im.ras2$v)), main= "Precipitation sim", asp=1)
Now I can compute log-Lambda
#normalization
norm.im.ras1 <- (im.ras1- summary(im.ras1)$mean)/sd(im.ras1) ; summary(norm.im.ras1)
norm.im.ras2 <- (im.ras2- summary(im.ras2)$mean)/sd(im.ras2) ; summary(norm.im.ras2)
#Compute log-lambda
log.lambda <- norm.im.ras1 + 2*norm.im.ras2
summary(log.lambda)
resut dispays very weak values
Pixel values
range = [-4.657923, 10.94624]
integral = -9.678445e-12
mean = -1.075383e-16
When I try to simulate from rLGCP
gen.lgcp <- rLGCP("matern", mu=log.lambda, var=0.5, scale=0.05, nu=1)
Error: could not allocate a vector of size 181.9 MB
I tried to get around that with
log.lambda0 <- as.im(solutionset(log.lambda>0))
gen.lgcp <- rLGCP("matern", mu=log.lambda0, var=0.5, scale=0.05, nu=1)
summary(gen.lgcp)
I can move forward. But further, I did not get relevent results
#Thinning
image.plot(list(x=log.lambda$xcol, y=log.lambda$yrow, z=t(log.lambda$v)), main= "log.lambda", asp=1)
samp.lgcp <- rthin(gen.lgcp, P=seq(from=0.02, to=0.2, length.out = gen.lgcp$n)); points(samp.lgcp$x, samp.lgcp$y, type = 'p', cex=0.2, lwd=1, col='white')
#point pattern
pts.locations <- as.data.frame(cbind(longitude=samp.lgcp$x, latitude=samp.lgcp$y))
ppp.lgcp <- ppp(pts.locations$longitude, pts.locations$latitude, window = owin(xrange=c(min(pts.locations [,1]),max(pts.locations [,1])), yrange = c(min(pts.locations[,2]),max(pts.locations[,2]))))
plot(ppp.lgcp)
#Extract value of each sampled point covariate
cov.value <- extract(Rasters.group, pts.locations)
cov.value <- as.data.frame(cov.value )
presence.data <- data.frame(pts.locations, cov.value, presence=rep(1, nrow(cov.value)))
### Choosing absence point pattern
abs.region <- crop(Virtual.species.domaine, extent(25.28486 , 162.2897 ,181.7417 , 280.7651 ))
im.abs.region <- as.im.RasterLayer(abs.region)
abs.points <- rasterToPoints(abs.region)
ppp.abs.points <- ppp(abs.points[,1], abs.points[,2], window = owin(xrange = c(min(abs.points[,1]), max(abs.points[,1])), yrange =c(min(abs.points[,2]), max(abs.points[,2]))))
plot(ppp.abs.points)
cov.value.abs <- extract(Rasters.group, abs.points[,1:2])
absence.data <- data.frame(abs.points[,1:2], cov.value.abs, presence=rep(0, nrow(abs.points)))
colnames(absence.data)[1:2] <- c("longitude", "latitude")
head(absence.data)
# Get database for LGCP
LGCP.Data.Set <- rbind(presence.data, absence.data)
#' Model
#' we will use non-stationary formula
covar.formula <- as.formula(paste("~", paste(names(LGCP.Data.Set[,3:4]), collapse = "+")))
#Quadrature scheme
Q.lgcp <- quadscheme(ppp.lgcp, ppp.abs.points, method = 'grid')
plot(Q.lgcp)
Warning message:
In countingweights(id, areas) :
some tiles with positive area do not contain any quadrature points: relative error = 94.2%
# Inhomogenous poisson process Model
fit.ipp <- ppm(Q.lgcp, trend = covar.formula, covariates = LGCP.Data.Set[,3:4])
summary(fit.ipp)
Warning message:
glm.fit: algorithm did not converge
What is going wrong?
My goal is to evaluate de model and the predict with
prediction.ipp <- predict.ppm(fit.ipp, log.lambda, type = 'intensity')
This is a very long and un-focused question but I will try to help.
After constructing the image log.lambda, you say "result shows very weak values". What do you mean? The image values were assigned as a sequence of values ranging from 0 to 200, and then standardised to have mean zero and standard deviation 1. How is this "weak"?
You then call rLGCP using this image as the mean log intensity. The values of log.lambda range from about -4 to +10. This means that the desired intensity will range from exp(-4) to exp(+10), that is, about 0.01 to 20 000 points per square unit. The image dimensions are 30 by 30 units. Thus, a very large number of random points must be generated, and this fails because of memory limits. (The expected number of points is integral(exp(log.lambda)).
You then change log.lambda to another image which takes only the values 0 and 1.
The next body of code appears to take a raster image (of "absence" pixels) and try to construct a quadrature scheme using the "absence" pixels as dummy points. This is not the purpose for which quadscheme is designed (for quadscheme the dummy points should be sparse).
You don't need to construct a quadrature scheme to use ppm. You could just do something like
D <- solist(A=im.ras.1, B=im.ras.2)
ppm(ppp.logi ~ A+B , data=D)
If you really want to construct a quadrature scheme, I suggest you use the function pixelquad instead. Just do pixelquad(ppp.lgcp, im.abs.region) or similar. Then use ppm.
Since the data were generated by a Cox process, it would be more appropriate to use kppm rather than ppm.
See the spatstat book for further information.

Kriging not working in R studio: method not applicable st_crs

I have been working on an exercise using Rstudio. I want to do a kriging interpolation, but I get an error that I have not been able to solve.
I'd appreciate if some of you could tell me how to fix it.
Here my code:
#importing libraries
library(raster)
library(sf)
library(sp)
library(rgdal)
library(gstat)
library(tmap)
library(tidyverse)
#reading csv data
id <- "1u18s9g15USnzNhj-t2EEFYHsW6GZEIgd" # google file ID
read.csv(sprintf("https://docs.google.com/uc?id=%s&export=download", id))
silver_data = read.csv(sprintf("https://docs.google.com/uc?id=%s&export=download", id))
#creating spatial dataframe
silver_2 <- st_as_sf(silver_data, coords = c("long", "lat"))
st_crs(silver_2) <- 4326
#coordinates transformation
silver_utm2 <- st_transform(silver_2, crs=9155)
#setting variogram
ve <- variogram(silver ~ 1, silver_utm2, cutoff = 5000,width = 200)
plot(ve, plot.numbers = T, asp=1)
# experimental variogram
vt <- vgm(psill = 350, model = "Sph", range = 2000,nugget = 0)
plot(ve, pl = T, model = vt)
#automatic adjustment
va <- fit.variogram(ve, vt)
plot(ve, pl = T, model = va)
#empty raster in order to create grid for kriging interpolation
z <- raster(as(silver_utm2, "Spatial"), ncols = 200, nrows = 80)
# using raster to create grid
grilla2 <- rasterToPoints(z, spatial = TRUE)
gridded(grilla2) <- TRUE
grilla2 <- as(grilla2, "SpatialPixels")
# using kriging function
ok <- krige(silver~1, locations=silver_utm2, newdata=grilla2, model=va)
The error that appears:
> ok <- krige(silver~1, locations=silver_utm2, newdata=grilla2, model=va)
Error in UseMethod("st_crs<-") :
no applicable method for 'st_crs<-' applied to an object of class "c('SpatialPixels', 'SpatialPoints', 'Spatial', 'SpatialVector')
The tutorial I'm following is https://rstudio-pubs-static.s3.amazonaws.com/416462_1463d00750c54fce89955a925eaa4957.html, but the data are not the same.
st_crs() is a function from the package sf and is designed to operate on sf objects only. As your code stands, grilla2 is an sp object. Convert it to sf before using krige().
grilla2 <- sf::st_as_sf(grilla2)

Using H2oBinomialModel to predict outcomes in a raster stack

I use machine learning algorithms for species distribution modeling. This involves fitting a model in R and applying the model to a raster stack of environmental predictors using the predict() function. This is relatively straightforward for functions such as randomForest, glm, and maxent. However, I would like to use the best model selected using the h2o.automl function in the package h2o to predict occurrence probabilities across a stack of environmental predictors. Now, it is possible to convert raster stacks into data frames and then use these for h2o prediction. However, this tends to eat up a lot of RAM for large raster stacks. Below is an example using some publicly available low-resolution data.
library(sp)
library(raster)
library(maptools)
library(rgdal)
library(dismo)
library(h2o)
library(sp)
library(h2o)
bioclim.data <- getData(name = "worldclim",
var = "bio",
res = 2.5)
obs.data <- read.csv("https://raw.githubusercontent.com/jcoliver/learn-r/gh-pages/data/Carnegiea-gigantea-GBIF.csv")
obs.data <- obs.data[!is.na(obs.data$latitude), ]
obs.data <- obs.data[, c("longitude", "latitude")]
obs.sp<-SpatialPoints(obs.data, proj4string = CRS("+proj=longlat +datum=WGS84 +no_defs"))
# Determine geographic extent of our data
max.lat <- ceiling(max(obs.data$latitude))
min.lat <- floor(min(obs.data$latitude))
max.lon <- ceiling(max(obs.data$longitude))
min.lon <- floor(min(obs.data$longitude))
geographic.extent <- extent(x = c(min.lon, max.lon, min.lat, max.lat))
bioclim.data <- crop(x = bioclim.data, y = geographic.extent)
background<-randomPoints(mask = bioclim.data,
n = nrow(obs.data),
ext = geographic.extent,
extf = 1.25)
colnames(background)<-c("longitude", "latitude")
background.sp<-SpatialPoints(background, proj4string = CRS("+proj=longlat +datum=WGS84 +no_defs"))
presence<-data.frame(Presence = rep(1, times = nrow(obs.data)), extract(bioclim.data, obs.sp))
absence<-data.frame(Presence = rep(0, times = nrow(obs.data)), extract(bioclim.data, background.sp))
df<-rbind(presence, absence)
df$Presence<-as.factor(df$Presence)
h2o.init(max_mem_size = "4g")
set.seed(1234)
df_h2o <- as.h2o(df)
splits <- h2o.splitFrame(df_h2o, c(0.7, 0.15), seed = 1234)
train <- h2o.assign(splits[[1]], "train")
valid <- h2o.assign(splits[[2]], "valid")
test <- h2o.assign(splits[[3]], "test")
y <- "Presence"
x <- setdiff(names(df_h2o), y)
aml <- h2o.automl(x = x,
y= y,
training_frame = train,
leaderboard_frame = valid,
max_runtime_secs = 60) #default 1 hour 3600 secs, more time, more accurate
pred<-as.data.frame(predict(aml, test)) # this works. It reports predicted probabilities of presence vs. absence.
pred_rstr<-predict(aml, bioclim.data) # this doesn't work. Must be an H2oFrame
biodf<-as.data.frame(bioclim.data) # this forces the raster stack into a dataframe. This works, but only for low resolution rasters.
bio_h2o<-as.h2o(biodf)
pred2<-as.data.frame(predict(aml, bio_h2o))
pred_rstr<-bioclim.data[[1]] # pull one of the rasters from the stack to serve as a template
values(pred_rstr)<-as.vector(pred2$p1) # set raster values to predicted probabilities
plot(pred_rstr)

How can I include the temporal aspect when generating pseudo-absence data?

I have 608 observations of satellite-tagged turtles. I want to model these with environmental data which includes sea surface temperature, current speed, wind speed etc. Of course, both the tagging and environmental data vary spatially and temporally. I have generated pseudo-absence data using the code below which I adapted from here. However, it has now occurred to me that the data points I have generated are only spatial samples. Is there some way I can edit this code to sample temporally as well so that my resulting csv has a date/time for each point so that I can match it up to my environmental data? Alternatively, is there a different package I could try that would allow me to do this?
dir.create(path = "data")
library("sp")
library("raster")
library("maptools")
library("rgdal")
library("dismo")
bioclim.data <- getData(name = "worldclim",
var = "bio",
res = 2.5,
path = "data/")
# Read in observations
obs.data <- read.csv(file = "data/Presence.csv")
# Determine geographic extent of data
max.lat <- ceiling(max(obs.data$Latitude))
min.lat <- floor(min(obs.data$Latitude))
max.lon <- ceiling(max(obs.data$Longitude))
min.lon <- floor(min(obs.data$Longitude))
geographic.extent <- extent(x = c(min.lon, max.lon, min.lat, max.lat))
# Use the bioclim data files for sampling resolution
bil.files <- list.files(path = "data/wc2-5",
pattern = "*.bil$",
full.names = TRUE)
# only need one file, so use the first one in the list of .bil files
mask <- raster(bil.files[1])
# Randomly sample points (same number as our observed points)
background <- randomPoints(mask = mask, # Provides resolution of sampling points
n = nrow(obs.data), # Number of random points
ext = geographic.extent, # Spatially restricts sampling
extf = 1.25) # Expands sampling a little bit
write.csv(background, "pseudo-absence.csv")
I solved this issue by simply generating random times with the code below and merging the resulting and above .csv.
#ADD TIMES
time.start <- as.POSIXct('2014-12-01T01:00:00z', format = "%Y-%m-%dT%H:%M:%S")
time.end <- as.POSIXct('2015-04-30T01:00:00z', format = "%Y-%m-%dT%H:%M:%S")
seconds <- difftime(time.end, time.start, units = "secs")
# Option with runif()
v <- round(runif(6000, 0, seconds))
# Option with sample()
v <- sample(1:seconds, 6000, replace = T)
time.uniform <- time.start + v
write.csv(time.uniform, "time.csv")
tag<-read.csv("pseudo-absence.csv")
time<- read.csv("time.csv")
myfulldata = merge(tag,time)
write.csv(myfulldata, "pseudo-absence_with_time.csv")

Resources