Related
I am processing netCDF data for multiple years. The netCDF is for air pollutant data, and the latitude and longitude are provided as separate variables, not as part of the original grid.
LINK TO DATE: Sample Netcdf
These netCDF files provide Level 2 Nitrogen Dioxide data and they are downloaded from NASA Earthdata portal. The satellite is Sentinel-5P and the instrument is TROPOMI.
So when processing this data, you have to create variables for NO2, latitude and longitude. I am trying to create raster layers, and then save them as GeoTIFF files for my research.
The problem here is related to the fact that I don't know how best to create these rasters. The latitude and longitude is not equally spaced throughout the dataset, and I haven't figured out a way to accurately create these images. I created a model grid using the number of rows and columns provided by the netCDF files. In the list of variables this is called the scanline and ground_pixel, but when I plot it the cells in the final image don't look right.
This is how I upload the data:
## Open the netcdf
ncname <- no2files$filename[m]
ncfname <- paste(ncname, sep = "")
nc <- nc_open(ncfname)
## Get the necessary variables.
no2tc <-ncvar_get(nc, "PRODUCT/nitrogendioxide_tropospheric_column")
lat <- ncvar_get(nc, "PRODUCT/latitude")
lon <- ncvar_get(nc, "PRODUCT/longitude")
qa <- ncvar_get(nc, "PRODUCT/qa_value")
fillvalue = ncatt_get(nc, "DETAILED_RESULTS/nitrogendioxide_total_column",
"_FillValue")
mfactor <- ncatt_get(nc, "DETAILED_RESULTS/nitrogendioxide_total_column",
"multiplication_factor_to_convert_to_molecules_percm2")
fillvalue_qa = ncatt_get(nc,"PRODUCT/qa_value",
"_FillValue")
no2tc[no2tc == fillvalue$value] <- NA
no2tc <- no2tc * mfactor$value
qa[qa == fillvalue_qa$value] <- NA
nc_close(nc)
# rm(ncfname)
no2vec <- as.vector(no2tc)
latvec <- as.vector(lat)
lonvec <- as.vector(lon)
qavec <- as.vector(qa)
dfsat <- data.frame(no2vec, lonvec, latvec)
dfqa <- data.frame(qavec,lonvec,latvec)
colnames(dfsat) <- c('z', 'x', 'y')
colnames(dfqa) <- c('z', 'x', 'y')
df <- rbind(df, dfsat)
dfqa <- rbind(df,dfqa)
rm(lat,lon,no2tc,qa,latvec,lonvec,no2vec,qavec)
This is how I currently create the raster:
## Create the raster. The ncol = 3245 and now = 450 are from the scanline and ground_pixel variables.
e <- extent(-180,180,-90,90)
r <- raster(e, ncol = 3245, nrow = 450)
xx <- rasterize(df[, 2:3], r, df[, 1], fun = mean)
qa_raster <- rasterize(dfqa[, 2:3], r, df[, 1], fun = mean)
crs(xx) <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
crs(qa_raster) <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
## Crop and plot the raster
## change shapefile coordinate system
# border <- spTransform(ontario, crs(xx))
aoi <- spTransform(ontario_buffer, crs(xx))
## Mask values with qa < 0.5 (this is the recommended value)
xx[qa_raster < 0.5 & xx < 0] <- NA
## This is the final plot
plot_tif <- crop(xx, extent(aoi))
### Use this if you want to view the plot.
mask_tif <- mask(plot_tif,aoi)
# plot(mask_tif)
# final <- plot(border,add=TRUE)
## Plot the raster
filename <- paste(i,".tif",sep="")
writeRaster(mask_tif,filename = filename,"GTiff", overwrite=TRUE)
The end results looks something like this:
Then I tried another method I found online, but you have to set a resolution. I COULD do this, but I just want to plot the cells AS THEY ARE, without any modification.
ncfname <- "S5P_OFFL_L2__NO2____20200107T173517_20200107T191647_11582_01_010302_20200109T103930.nc"
nc <- ncdf4::nc_open(ncfname)
mfactor = ncdf4::ncatt_get(nc, "PRODUCT/nitrogendioxide_tropospheric_column","multiplication_factor_to_convert_to_molecules_percm2")
fillvalue = ncdf4::ncatt_get(nc, "PRODUCT/nitrogendioxide_tropospheric_column","_FillValue")
my_unit = ncdf4::ncatt_get(nc, "PRODUCT/nitrogendioxide_tropospheric_column","units")
my_product_name = ncdf4::ncatt_get(nc, "PRODUCT/nitrogendioxide_tropospheric_column", "long_name")
mfactor <- mfactor$value
fillvalue <- fillvalue$value
vals <- ncdf4::ncvar_get(nc, "PRODUCT/nitrogendioxide_tropospheric_column")
lat <- ncdf4::ncvar_get(nc, "PRODUCT/latitude")
lon <- ncdf4::ncvar_get(nc, "PRODUCT/longitude")
vals[vals == fillvalue] <- NA
vals_df = NULL
vals_df <- rbind(vals_df, data.frame(lat = as.vector(lat), lon = as.vector(lon), vals = as.vector(vals)))
pts <- vals_df
sp::coordinates(pts) <- ~lon + lat
my_projection <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
sp::proj4string(pts) <- sp::CRS(my_projection)
my_aoi <- ontario
crs_test <- raster::compareCRS(pts, my_aoi)
my_aoi <- sp::spTransform(my_aoi, CRS = as.character(raster::crs(pts)))
p <- methods::as(raster::extent(my_aoi), "SpatialPolygons")
sp::proj4string(p) <- sp::CRS(my_projection)
pts <- raster::crop(pts, p)
extent_distance_vertical <- geosphere::distm(c(raster::extent(pts)[1], raster::extent(pts)[3]), c(raster::extent(pts)[1], raster::extent(pts)[4]),
fun = geosphere::distHaversine)
vertical_mid_distance <- (raster::extent(pts)[4] - raster::extent(pts)[3])/2
lat_mid <- raster::extent(pts)[3] + vertical_mid_distance
horizontal_distance <- raster::extent(pts)[2] - raster::extent(pts)[1]
if (horizontal_distance > 180) {
one_degree_horizontal_distance <- geosphere::distm(c(1,
lat_mid), c(2, lat_mid), fun = geosphere::distHaversine)
extent_distance_horizontal <- one_degree_horizontal_distance *
horizontal_distance
} else {
extent_distance_horizontal <-
geosphere::distm(c(raster::extent(pts)[1],
lat_mid),
c(raster::extent(pts)[2], lat_mid),
fun = geosphere::distHaversine)
}
my_res <- 20000
ncol_rast <- as.integer(extent_distance_horizontal/my_res)
nrow_rast <- as.integer(extent_distance_vertical/my_res)
print(paste0("Create raster file from points"))
rast <- raster::raster(nrows = nrow_rast, ncols = ncol_rast,
crs = as.character(raster::crs(pts)), ext = raster::extent(pts),
vals = NULL)
final <- raster::rasterize(pts, rast, pts$vals, fun = mean)
final <- raster::mask(final, my_aoi)
sp::plot(final)
How can I create these raster layers accurately? Thanks!
With the example file
f <- "S5P_OFFL_L2__NO2____20200107T173517_20200107T191647_11582_01_010302_20200109T103930.nc"
You can do
library(terra)
r <- rast(f, paste0("/PRODUCT/", c("longitude", "latitude", "nitrogendioxide_tropospheric_column")))
r
#class : SpatRaster
#dimensions : 4172, 450, 3 (nrow, ncol, nlyr)
#resolution : 1, 1 (x, y)
#extent : -0.5, 449.5, -0.5, 4171.5 (xmin, xmax, ymin, ymax)
#coord. ref. :
#sources : longitude
# latitude
# nitrogendioxide_tropospheric_column
#varnames : longitude (pixel center longitude)
# latitude (pixel center latitude)
# nitrogendioxide_tropospheric_column (Tropospheric vertical column of nitrogen dioxide)
#names : longitude, latitude, nitrogendi~ric_column
#unit : degrees_east, degrees_north, mol m-2
#time : 2020-01-07
plot(r, nr=1)
The plot illustrates that the data are not organized as regular raster data (it they were, there would be a N-S gradient for latitude, and a E-W gradient for longitude). Also see plot(r$longitude, r$latitude). You can treat them as points in stead:
dp <- as.data.frame(r)
p <- vect(dp, geom=c("longitude", "latitude"))
Plotting takes a while, as there are > 1.5 million points, so I take a sample
plot(p[sample(nrow(p), 10000)], "nitrogendioxide_tropospheric_column")
If you want your data organized as a regular raster, you can use rasterize
x <- rast(res=1/6)
x <- rasterize(p, x, "nitrogendioxide_tropospheric_column", fun=mean)
plot(x > 0)
And you can get Ontario like this
can <- vect(raster::getData("GADM", country="CAN", level=1))
ontario <- can[can$NAME_1=="Ontario", ]
x <- crop(x, ontario)
x <- mask(x, ontario)
plot(x)
I want to calculate the share (%) of pixels classified as 1 from a list of files. For a single image the code works well, however, when I try to write it in a for loop R tells me named numeric(0) for all files.
How do I get what I want?
Single Image:
ras <- raster("path") # binary product
ras_df <- as.data.frame(ras) # creates data frame
ras_table <- table(ras_df$file) # creates table
share_suit_hab <- ras_table[names(ras_table)==1]/sum(ras_table[names(ras_table)]) # number of pixels with value 1 divided by sum of pixels with value 0 and 1 = share of suitable habitat (%)
print(share_suit_hab)
> ras
class : RasterLayer
dimensions : 1000, 1000, 1e+06 (nrow, ncol, ncell)
resolution : 2165.773, 2463.182 (x, y)
extent : -195054.2, 1970719, 2723279, 5186461 (xmin, xmax, ymin, ymax)
crs : +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0
source : C:/Users/name/MASTERARBEIT/BASELINE/Eastern Arctic/Summer_EA_Output/ct/2006/cis_SGRDREA_20060703_pl_a.tif
names : cis_SGRDREA_20060703_pl_a
values : 0, 1 (min, max)
For Loop:
list_ct <- list.dirs("path")
i=0
for(year in list_ct){
ct_files_list <- list.files(year, recursive = FALSE, pattern = "\\.tif$", full.names = FALSE)
ct_file_df <- as.data.frame(paste0("path", i, "/", ct_files_list))
ct_file_df <- as.data.frame(matrix(unlist(ct_file_df), nrow= length(unlist(ct_file_df[1]))))
ct_table <- table(ct_file_df[, 1])
stored <- ct_table[names(ct_table)==1]/sum(ct_table[names(ct_table)])
print(stored)
}
This is the final code which is running perfectly!
list_ct <- list.dirs("path", recursive = FALSE)
stored <- list()
for (year in seq_along(list_ct)){
ct_file_list <- list.files(list_ct[year], recursive=FALSE, pattern = ".tif$", full.names = FALSE)
tmp <- list()
for (i in seq_along(ct_file_list)){
ct_file_df <- raster(paste0(list_ct[year], "/", ct_file_list[i])) %>% as.data.frame()
# do calculations
tmp[[i]] <- sum(ct_file_df[,1], na.rm=TRUE) / length(ct_file_df[!is.na(ct_file_df)[],1])
names(tmp)[i] <- paste0(list_ct[year], "/", ct_file_list[i])
print(tmp[i])
}
stored[[year]] <- tmp
names(stored)[year] <- paste0(list_ct[year])
}
Could you add a reproducible example (data incl.)?
You probably need to replace numeric(0) simply by 0. Numeric(0) does
not mean 0, it means a numeric vector of length zero (i.e., empty). I'm guessing you're probably assigning numeric(0)+1 which is still a numeric vector of 0.
Edit:
You have a folder containing multiple folders which each include 1 or more tif files. You want to loop through each of these folders, importing the tif(s) file, do a calculation, save the result.
In the following, my path contains 5 folders named '2006','2007','2008','2009' and '2010'. Each of these "year"-folders contain an .xlsx file. Each .xlsx file contains 1 column (here, you just need to select the right one in your data frame). This column has the same name in all excel files, "col1", and contains values between 0 and 1. Then this will work:
library(dplyr)
library(readxl)
#
list_ct <- list.dirs("mypath", recursive = FALSE)
stored <- list()
for (year in seq_along(list_ct)){
ct_file_list <- list.files(list_ct[year], recursive=FALSE, pattern = ".xlsx$", full.names = FALSE)
tmp <- list()
for (i in seq_along(ct_file_list)){
ct_file_df <- read_excel(paste0(list_ct[year], "/", ct_file_list[i])) %>% as.data.frame()
# do calculations ..
tmp[[i]] <- sum(ct_file_df$col1) / length(ct_file_df$col1)
names(tmp)[i] <- paste0(list_ct[year], "/", ct_file_list[i])
print(tmp[i])
}
stored[[year]] <- tmp
names(stored)[year] <- paste0(list_ct[year])
}
Instead of using "read_excel", you just use raster() like you did with the single file. Hope you can use the answer.
Example data
library(raster)
s <- stack(system.file("external/rlogo.grd", package="raster"))
s <- s > 200
#plot(s)
If your actual data is all for the same area (and the raster data have the same extent and resolution, you want to create a RasterStack (using the filenames) and use freq as below
f <- freq(s)
f
#$red
# value count
#[1,] 0 3975
#[2,] 1 3802
#$green
# value count
#[1,] 0 3915
#[2,] 1 3862
#$blue
# value count
#[1,] 0 3406
#[2,] 1 4371
Followed by
sapply(f, function(x) x[2,2]/sum(x[,2]))
# red.count green.count blue.count
# 0.4888775 0.4965925 0.5620419
If you cannot make a RasterStack you can make a list and lapply and continue as above, or use sapply and do this
ss <- as.list(s)
x <- sapply(ss, freq)
x[4,] / colSums(x[3:4, ])
#[1] 0.4888775 0.4965925 0.5620419
If you insist on a loop
res <- rep(NA, length(ss))
for (i in 1:length(ss)) {
# r <- raster(ss[i]) # if these were filenames
r <- ss[[i]] # here we extract from the list
x <- freq(r)[,2]
res[i] <- x[2] / sum(x)
}
res
# 0.4888775 0.4965925 0.5620419
Thank you!
This is working perfectly for all files of one year!
library(raster)
s_list <- list.files("C:/Users/OneDrive - wwfgermany/MASTERARBEIT/BASELINE/Eastern Arctic/Summer_EA_Output/area_calc/ct/2006/", full.names = T)
s <- raster::stack(s_list)
f <- freq(s, useNA = 'no')
f
ct_avg <- sapply(f, function(x) x[2,2]/sum(x[,2]))
ct_avg__mean <- mean(ct_avg)
ct_avg__mean
However, when I want to write it in another loop, to get one value per year as a final result in the end, I end up with an error saying subscript out of bounds. This is the code I am using:
setwd("C:/Users/MASTERARBEIT/BASELINE/Eastern Arctic/Summer_EA_Output/area_calc/ct/")
list_ct <- list.dirs("C:/Users/MASTERARBEIT/BASELINE/Eastern Arctic/Summer_EA_Output/area_calc/ct/")
i=0
for (year in list_ct) {
s_list <- list.files(year, recursive = FALSE, pattern = "\\.tif$", full.names = FALSE)
s <- raster::stack(s_list)
f <- freq(s, useNA = 'no')
f
ct_avg <- sapply(f, function(x) x[2,2]/sum(x[,2]))
ct_avg__mean <- mean(ct_avg)
ct_avg__mean
}
I have this dataframe in R:
library(raster)
# create a random dataframe with yearly values for each column
df <- data.frame(year = seq(1981,2012), a = runif(32,1,33), b = rnorm(32, 6, 18), c = rnorm(32, 3, 12),
d = rnorm(32, 0, 18))
and then this multilayer raster:
rs <- stack()
for (i in 1:1:32){
xy <- matrix(rnorm(400),20,20)
# Turn the matrix into a raster
rast <- raster(xy)
# Give it lat/lon coords for 20-30°E, 43-49°N
extent(rast) <- c(20,30,43,49)
rs <- addLayer(rs, rast)
}
# create a Z field for raster just created
years <- seq(as.Date("1981-01-01"), as.Date("2012-12-31"), by = "years")
aa <- setZ(rs, years)
names(rs) <- years
My question is: how would it be possible to obtain five rasters representing the correlation (let's say Spearman) between each column in dataframe df and the raster stack rs?
Thank you all for your help!
I am not sure what exactly you want to do. There are 32 values in each column of df, and 32 layers with 400 values in the RasterStack,
Perhaps you are looking for the correlation of the columns in df and the mean value of the layers? That you can do like this:
Your data
set.seed(0)
df <- data.frame(year = seq(1981,2012), a=runif(32,1,33), b=rnorm(32, 6, 18), c=rnorm(32, 3, 12), d=rnorm(32, 0, 18))
r <- raster(nrow=20, ncol=20, ext=extent(20,30,43,49))
rs <- stack(lapply(1:32, function(i) setValues(r, rnorm(400,20,20))))
years <- seq(as.Date("1981-01-01"), as.Date("2012-12-31"), by = "years")
names(rs) <- years
Solution
x <- cellStats(rs, mean)
sapply(2:5, function(i) cor(x, df[,i]))
#[1] 0.123391584 -0.007801092 -0.124336155 0.060774465
Well, I figured out a solution; don't know if is the best but I think is working.
Here is the example for column a from df; I created a dummy raster layer for each row in column a; after that, I used corLocal to have the correlation:
### create a raster layer for each row (year) for column 'a' in df
rs.r <- stack()
library(data.table)
### extract x and y coordinates for raster rs to create a raster stack
cord <- rasterToPoints(rs[[1]], spatial = F)
cord<- cord[,1:2]
head(cord)
### create a raster where each layer is the value in column a from df
year.s <- unique(df$year)
for (i in 1:length(df$year)){
print(df$year[i])
re <- df$a[df$year==year.s[i]]
c <- data.table(x = cord[,1], y = cord[,2], tt = re)
m <- rasterFromXYZ(c)
crs(m) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0 "
rs.r <- addLayer(rs.r, m)
crs(rs.r) <-" +proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
}
names(rs.r) <- df$year ### set the names for the layers
ext <- extent(rs)
rs.r <- setExtent(rs.r, ext)
rs.r<- projectRaster(rs.r, rs,method = 'ngb')
spplot(corLocal(rs.r, rs, 'spearman'))
I'm trying to calculate the SPI from CHIRPS monthly mean precipitation data, because it's too large I cut it down to my area of interest and here it is: https://www.dropbox.com/s/jpwcg8j5bdc5gq6/chirps_mensual_v1.nc?dl=0
I did this to open it:
require(utils)
require(colorRamps)
require(RNetCDF)
require(rasterVis)
require(rgdal)
library(ncdf4)
library(raster)
datos2 <- nc_open("Datos/chirps_mensual_v1.nc")
ppt_array <- ncvar_get(datos2, "precip")
#I'm only taking complete years so I took out two months from 2018
ppt_mes <- ppt_array[ , ,1:444]
I know there is a SPI library but I don't know how should I format the data in order to use it. So I tried to do it without the function by fitting the gamma distribution but I dont' know how to do it for this data base.
Does anyone know how to calculate SPI either with the function or by fitting the distribution?
I don't think the SPI package is doing what you (or anyone) thinks it is doing. If you use debug(spi) and step through the code, you'll see that in one step it fits a empirical cumulative distribution function (with ecdf()) to the first two and last rows of data. Why the first two and last rows? I have no clue, but whoever wrote this package also used a for loop to do t() to a matrix. Not to mention that I think it should use a Gamma distribution or Pearson III distribution not ecdf() (according to Guttman, N.B. (1999) Accepting the standardized precipitation index: a calculation algorithm. JAWRA Journal of the American Water Resources Association, 35, 311–322.).
At the end I made it by using the SPI library, the result will be a value for each month in each grid point, if you want to calculate the value over a specific area I made that too but I can share it if you want it too:
Also, this one I made it using CRU data but you can adjust it:
#spei cru 1x1
rm(list=ls(all=TRUE)); dev.off()
require(utils)
require(RNetCDF)
require(rasterVis)
require(rgdal)
library(ncdf4)
require(SPEI)
########################################################################################################
prec <- open.nc("pre_mensual.nc")
lon <- length(var.get.nc(prec, "lon"))
lat <- length(var.get.nc(prec, "lat"))
lon1 <- var.get.nc(prec, "lon")
lat1 <- var.get.nc(prec, "lat")
ppt <- var.get.nc(prec, "pre")
ppt <- ppt[ , ,109:564] #31 18 456 (1980-2017)
anio = 456/12
###########################################################################################################
#Reshape data
precip <- sapply(1:dim(ppt)[3], function(x)t(ppt[,,x]))
############################################################################################################
#This is for SPI-6, you can use either of them
spi_6 <- array(list(),(lon*lat))
for (i in 1:(lon*lat)) {
spi_6[[i]] <- spi(precip[i,], scale=6, na.rm=TRUE)
}
#############################################################################################################
#Go back to an array form
sapply(spi_6, '[[',2 )->matriz_ppt
ppt_6 <- array(aperm(matriz_ppt, c(2,1),c(37,63,456)));spi_c <- array(t(ppt_6), dim=c(37,63,456))
#############################################################################################################
#Save to netcdf
for(i in 1:456) {
nam <- paste("SPI", i, sep = "")
assign(nam,raster((spi_c[ , ,i]), xmn=min(lon1), xmx=max(lon1), ymn=min(lat1), ymx=max(lat1), crs=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs+ towgs84=0,0,0")) )
}
gpcc_spi <- stack(mget(paste0("SPI", 1:456)))
outfile <- "spi6_cru_1980_2017.nc"
crs(gpcc_spi) <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
writeRaster(gpcc_spi, outfile, overwrite=TRUE, format="CDF", varname="SPEI", varunit="units",longname="SPEI CRU", xname="lon", yname="lat")
It's not the most stylish way to calculate it but it does work. :)
EDIT: If you want to calculate the SPI/SPEI over an area this is what I did:
library(SPEI)
library(ncdf4)
library(raster)
#
pre_nc <- nc_open("pre_1971_2017_Vts4.nc")
pre <- ncvar_get(pre_nc, "pre")
pre <- pre[, , 109:564] #This is for the time I'm interested in
lats <- ncvar_get(pre_nc, "lat")
lons <- ncvar_get(pre_nc, "lon")
times <- 0:467
# Read mask
#This is a mask you need to create that adjusts to your region of interest
#It consist of a matrix of 0's and 1's, the 1's are placed in the area
#you are interested in
mask1 <- nc_open("cuenca_IV_CDO_05_final.nc")
m1 <- ncvar_get(mask1, "Band1")
m1[m1 == 0] <- NA
#
# Apply mask to data
#
pre1 <- array(NA, dim=dim(pre))
#
for(lon in 1:length(lons)){
for(lat in 1:length(lats)){
pre1[lon,lat,] <- pre[lon,lat,]*m1[lon,lat]
}
}
#
# Mean over the area of interest
#
mean_pre1 <- apply(pre1,c(3),mean, na.rm=TRUE)
# Calculate SPI/SPEI
spi1 <- matrix(data= NA, nrow = 456, ncol = 48)
for (i in 1:48) {
spi1[,i] <- spi(data=ts(mean_pre1,freq=12),scale= i)$fitted
}
#This calculates SPI/SPEI-1 to SPI/SPEI-48, you can change it
# Save
#
write.table(spi1,'spi_1980_2017.csv',sep=';',row.names=FALSE)
We have a raster which represents the ordinal date corresponding to the start of growing season. That is, each pixel value in the raster lies between 1:365, representing the ordinal date.
I have also calculated cumulative growing degree days for all 365 days in the corresponding year. These data are loaded into R as a raster stack with 365 layers.
My goal is to randomly sample geographic locations on the start of growing season layer. I then hope to extract the value of cumulative growing degree days from those same coordinates, but only from the growing degree days stack's layer which corresponds to the start of season pixel value.
For example, if the start of season at a given pixel was the 100th day of the year, I would like to extract the growing degree days from that location on the 100th day of the year (nlayers = 100).
I have been attempting to write a function to accomplish this, but I can't seem to get it to work right. I would like to end up with a data frame or matrix showing my x location, y location, start of season day, and GDD for that day. Instead of many GDD values in one column, I get many columns of one GDD value.
It seems the problem is in my use of extract. I've experimented with the arguments nl, layer, and indexing the x argument with [[]]. They seem to produce the same result. Here's a simplified code with only 5 days to consider, and the function I am trying to construct.
Any help/suggestions is appreciated!
#============================================================
library(raster)
SOST <- raster()
SOST[] <- 1:5
r1 <- r2 <- r3 <- r4 <- r5 <- raster()
r1[] <- 10
r2[] <- 20
r3[] <- 30
r4[] <- 40
r5[] <- 50
GDD <- stack(r1,r2,r3,r4,r5)
getGDD <- function(sost, gdd, n){set.seed(232)
samp <- sampleRandom(sost, n, xy = TRUE,
na.rm = TRUE)
df <- data.frame('x'=as.numeric(), 'y'=
as.numeric(), 'SOST'=as.numeric(),
'GDD'=as.numeric())
df.temp <- data.frame('x' = samp[1:n,1], 'y' =
samp[1:n,2], 'SOST' = samp[,3],'GDD' =
extract(gdd, samp[1:n,1:2], nl = samp[1:n,3]))
df <- rbind(df, df.temp)
return(df)
}
getGDD(sost = SOST, gdd = GDD, n = 5)
It doesn't seem like this gathered a lot of attention, but I was able to solve it. Using the sample posted in the original question, the easiest solution is the stackSelect function. This was pointed out to me by Robert Hijmans on R-sig-geo.
x <- stackSelect(GDD, SOST)
set.seed(232)
samp <- sampleRandom(SOST, 5, xy = TRUE, na.rm = TRUE)[, -3]
extract(x, samp)
This works great if your data have the same extent and resolution. However, I failed to mention and include that my data do not align perfectly. Thus, as far as I know, I still need to create a function. With a little more thought, I was able to come up with the following example and function and solve the problem.
library(raster)
#SOST and GDD simulations with same ncell and extents as actual data:
SOST <- raster(nrow = 3991, ncol = 3025, xmn = 688635, xmx = 779385,
ymn = 4276125, ymx = 4395855, crs = "+proj=utm +zone=11 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0")
SOST[] <- 1:5
r1 <- r2 <- r3 <- r4 <- r5 <- raster(nrow = 3951, ncol = 2995, xmn = 688620.2, xmx = 779377.8, ymn = 4276121, ymx = 4395848, crs = "+proj=utm +zone=11 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0")
r1[] <- 10
r2[] <- 20
r3[] <- 30
r4[] <- 40
r5[] <- 50
GDD <- stack(r1,r2,r3,r4,r5)
getGDD <- function(sost, gdd, n){
set.seed(232)
samp <- sampleRandom(sost, size = n, xy = TRUE)
extr <- NULL
for(i in 1:n){
extr[i] <- extract(gdd[[samp[i,3]]], cbind(as.matrix(samp[i,1]),
as.matrix(samp[i,2])))
}
out <- data.frame(x = samp[,1], y = samp[,2], 'SOST' = samp[,3], 'GDD' = extr)
return(out)
}
test <- getGDD(sost = SOST, gdd = GDD, n = 5)
test