create netcdf in R with multiples variables - r

I want to create a netcdf with 11 or more different variables. For that I was trying to write it within a loop, but it is not working. My code would be something like that:
#Defining names and dimensions
nam_cwt <- c("N","NE","E","SE","S","SW","W","NW","C","A","U") #number of variables
CWTvar <- paste("var_",nam_cwt,sep="")
data_cwt <- list()
mat_cwt <- array()
dimX <- dim.def.ncdf("longitude", "degrees_east", Longvector)
dimY <- dim.def.ncdf("latitude", "degrees_north", Latvector)
dimT <- dim.def.ncdf("time","days since 1961-01-01",1:length(my.date), unlim=TRUE)
missval <- -999
#Creating and filling the netcdf file
for (i in 1:length(nam_cwt)){
#Getting every matrix of elements
data_cwt <- lapply(cwt_out,function(x) x[[1]][[i]][[2]])
dmatrix <- unlist(data_cwt)
mat_cwt <- array(dmatrix,dim=c(144,length(my.date),25))
tmat_cwt <- aperm(mat_cwt,c(1,3,2))
CWTvar[[i]] <- var.def.ncdf(nam_cwt[i],"days",list(dimX,dimY,dimT), ,missval,longname=nam_cwt[i])
ncfile <- create.ncdf("nctypes.nc",CWTvar)
put.var.ncdf(ncfile,CWTvar[i],tmat_cwt)
}
The problem is that I am not sure if I should use var.add.ncdf (instead put.var.ncdf)..
any idea about that???
How can I create and write the file within the loop??
Any help will be helpful!

Here's how I would do this: create a netcdf file first with one of the variables, then add and fill the others in a loop.
library(ncdf)
#Defining names and dimensions
nam_cwt <- c("N","NE","E","SE","S","SW","W","NW","C","A","U") #number of variables
CWTvar <- paste("var_",nam_cwt,sep="")
# values and length of dimensions
Longvector = -180:180
Latvector = -90:90
Datevector = 1:10
x= length(Longvector)
y= length(Latvector)
z= length(Datevector)
# define dimensions
dimX <- dim.def.ncdf("longitude", "degrees_east", Longvector)
dimY <- dim.def.ncdf("latitude", "degrees_north", Latvector)
dimT <- dim.def.ncdf("time","days since 1961-01-01",1:length(Datevector), unlim=TRUE)
# set missing value
missval <- -9999
# create the file with first variable so dimensions are set
CWTvar1 <- var.def.ncdf(name=nam_cwt[1],"days",list(dimX,dimY,dimT), missval=missval)
ncfile <- create.ncdf("nctypes.nc",vars=CWTvar1)
# open newly created file for writing data
mync = open.ncdf(con='nctypes.nc', write=T)
# create some data for first variable
mydata = array(data=runif(n=x*y*z, min=0, max=10), dim=c(x,y,z))
# add data to ncdf file
put.var.ncdf(nc=mync, varid='N', vals=mydata)
# now add all other variables with the same dimensions
for (i in 2:length(nam_cwt)){
# generate new data
mydata = array(data=runif(n=x*y*z, min=0, max=10)*i, dim=c(x,y,z))
# create new variable
CWTvar <- var.def.ncdf(name=nam_cwt[i],"days",list(dimX,dimY,dimT) ,missval=missval)
# add new variable to existing file
mync = var.add.ncdf(nc=mync, v=CWTvar)
# add data to variable in file
put.var.ncdf(nc=mync,varid=CWTvar$name,vals=mydata)
}
close.ncdf(mync)
# check file
newnc = open.ncdf(con='nctypes.nc')
par(mfrow=c(4,3))
for (i in 1:length(nam_cwt)){
zz = get.var.ncdf(newnc, varid=nam_cwt[i])
image(x=Longvector, y=Latvector, z=zz[,,1])
}
close.ncdf(newnc)

Related

Looped raster extractions from a SpatialPointsDataFrame; each row needs to extract from a corresponding raster indicated in a column

I am attempting to make a loop that extracts values from NDVI rasters for multiple shapefiles. The shapefiles are animal GPS locations that include a date and a time. The shapefiles include random locations that have been generated from the population's range and I am therefore assigning each random location that didn't have a date, a date from one of the GPS locations in a 5:1 ratio. I have a function that finds the closest date to the date for each GPS location or random point and stores it in the dataframe as "x$NDVIfile" The code all up to this point but I think the problem is here specifically:
for(j in length(nrow(x))){
a <- raster(paste0("E:/RSF_GIS/HabitatVariables/NDVI/",
x$NDVIfile[j], ".tif"))
x$ndvi[j] <- raster::extract(a, x[j,]) # extract each row based on the closest NDVI file
}
I want to extract from each row in my data the raster from the directory that corresponds to "x$NDVIfile[j]". My current outcome looks like this. All the extracted NDVI values are the same for each dataframe and I have the suspicion that the first raster referenced is the only raster being extracted from for each dataframe :
dataframe x
Here is the entire code :
RSF_dir <- list.files("E:/RSF_GIS/RSF_files",
pattern = "*.shp",
full.names = TRUE)
ndvi_dir = list.files("E:/RSF_GIS/HabitatVariables/NDVI",
pattern = "*.tif",
full.names = FALSE)
ndvi_dir.df <- tools::file_path_sans_ext(basename(ndvi_dir))
ndvi_dir.df <- as.Date(ndvi_dir.df)
ndvi_dir.df <- as.data.frame(ndvi_dir.df)
for (i in 1:length(RSF_dir)) {
x <- rgdal::readOGR(RSF_dir[i])
x <- as.data.frame(x)
nona <- x$Acqst_T[!is.na(x$Acqst_T)] # make a list of non- NA values
nona <- rep.int(nona, times = 6) # have the list repeat itself 6 times (5:1 = random_location:GPS_fix)
x$Acqst_T <- dplyr::coalesce(x$Acqst_T, nona) # and assign to NAs
x$date <- as.Date(x$Acqst_T, format = '%Y-%m-%d %H:%M:%S')
min_distances <- as.numeric(x$date)- matrix(rep(as.numeric(ndvi_dir.df$ndvi_dir),nrow(x)),ncol=length(ndvi_dir.df$ndvi_dir),byrow=T)
min_distances <- as.data.frame(t(min_distances))
closest <- sapply(min_distances,function(o) { # function to find the closest NDVI date for each GPS fix
w <- which(o==min(o[o>0])); # (MOD09Q1 collects imagery every 8 days)
ifelse(length(w)==0,NA,w)
})
x$NDVIfile <- as.Date(ndvi_dir.df$ndvi_dir[closest])
x <- SpatialPointsDataFrame(data.frame(x$coords.x1, x$coords.x2), x, proj4string=veg_INREV#proj4string)
for(j in length(nrow(x))){
a <- raster(paste0("E:/RSF_GIS/HabitatVariables/NDVI/",
x$NDVIfile[j], ".tif"))
x$ndvi[j] <- raster::extract(a, x[j,]) # extract each row based on the closest NDVI file
}
writeOGR(obj= x, dsn="E:/RSF_GIS/RSF_files/trial",
layer=(paste0(tools::file_path_sans_ext(basename(RSF_dir[i])))), driver="ESRI Shapefile", overwrite_layer = TRUE)
}
Thanks!
Answering my own question, the solution was simply a change in code indicating the iterations of the loop:
for(j in length(nrow(x))){
...
}
should have been instead
for(j in 1:nrow(x)){
...
}

Create new netcdf with mean temperature as variable from two netcdf with max and min temperature as variable in R

I have two netcdf files with same characteristics (lat, lon, time period), one with max temperature as variable, the other one minimum temperature. I would like to create a new netcdf file exactly like the original ones, but instead of max and min temperature, I need the mean temperature as variable.
I am quite new with R and manipulating netcdf files, I would highly appreciate any help!
The code and the two files out of which I need to compute the mean are at this link. The only code is below this text.
The code is working well, but the resulting netcdf file is not correct (dimensions etc. are differing).
Thanks a lot!
# Compute mean
library(ncdf4)
library(raster)
library(rgdal)
library(lubridate)
library(geosphere)
library(reshape2)
library(ggplot2)
setwd("~/")
nc_max <- nc_open('Obs_tas_max_1983-2005.nc')
nc_min <- nc_open('Obs_tas_min_1983-2005.nc')
X <- list()
h <- 1
lat <- ncvar_get(nc_max, "lat");
nlat <- nrow(lat)
lon <- ncvar_get(nc_max, "lon");
nlon <- nrow(lon)
# lat <- as.vector(lat);
# lon <- as.vector(lon)
t_max <- ncvar_get(nc_max, "tas_max");
t_min <- ncvar_get(nc_min, "tas_min");
tas <- (t_max+t_min)/2
sw <- as.character('tas_max')
time <- ncvar_get(nc_max, "time");
ndays <- length(time)
v <- matrix(NA, length(lat), ndays)
# for (t in 1:ndays) {
# x <- ncvar_get(nc_max, sw, start = c(1, 1, t), count = c(nlon, nlat, 1))
# x <- as.vector(x)
# v[ ,t] <- x
X[[h]] <- (v)
# h <- h + 1
# nc_close(nc_max)
# nc_close(nc_min)
X <- do.call(rbind, X) # Time on the rows, coordinates on the columns
dt <- seq.Date(as.Date("1983-01-01"), as.Date("2005-12-31"), 'day')
m <- month(dt)
d <- day(dt)
idx <- !(m == 2 & d == 29)
X <- X[idx, ]
dim_idx <- ncdim_def(name='Index',
units='m',
longname='idx',
vals= 1:nrow(X))
dim_time <- ncdim_def('time',
units='days from 1983-01-01',
longname='time',
calendar="standard", vals=1:idx)
varLat <- ncvar_def(name='lat',
units='degrees_north',
dim=list(dim_idx),
missval=NA,
longname='latitude',
prec='double'
)
varLon <- ncvar_def(name='lon',
units='degrees_east',
dim=list(dim_idx),
missval=NA,
longname='latitude',
prec='double'
)
varX <- ncvar_def(name='tas',
units= 'degrees Celsius',
dim=list(dim_time, dim_idx),
missval=NA,
longname='Temperature'
)
vars <- list(varLat, varLon, varX)
outputfile <- paste('tas', '.nc', sep = '_')
con <- nc_create(outputfile, vars)
ncvar_put(con, varLat, lat)
ncvar_put(con, varLon, lon)
ncvar_put(con, varX, X)
nc_close(con)
I know you are asking for R help, but I think this might be done in a few lines of nco.
Something like this:
# change variable name to be the same in each netcdf file
ncrename -v tas_max,tas Obs_tas_max_1983-2005.nc tmax.nc
ncrename -v tas_min,tas Obs_tas_min_1983-2005.nc tmin.nc
# ensemble average of tas variable across the two files
ncea tmax.nc tmin.nc tas_out.nc
# change the long_name attribute
ncatted -O -a long_name,tas,o,c,Temperature tas_out.nc

Using loops to extract coordinates, match them and write to file

I am trying to use for loops (or the apply function as an alternative) to extract coordinates from a data.frame, search for the closest point within the E-OBS gridded dataset, extract the temperature-data for time x1-x2 and write it to another excel file.
While the code works to extract single data points, I seem unable to include this code within a loop and to add the results potentially next to the input-coordinates.
library(sp)
library(raster)
library(ncdf4)
#Coordinates
df
ID site E N
1 1 site_place_date1 7.558758 47.81004
2 2 site_place_date2 7.582749 47.63411
3 3 site_place_date3 7.607968 48.01475
4 4 site_place_date4 7.644660 47.67139
... ... ... ...`
Set coordinates of target point MANUALLY:
lon <- 7.558758 # longitude of location
lat <- 47.81004 # latitude of location
#Mean daily temperature
ncin <- nc_open("tg_0.25deg_reg_v17.0.nc")
print(ncin)
t <- ncvar_get(ncin,"time")
tunits <- ncatt_get(ncin,"time","units")nt <- dim(t)
nt
obsoutput <- ncvar_get(ncin,
start= c(which.min(abs(ncin$dim$longitude$vals - lon)), # look for closest long
which.min(abs(ncin$dim$latitude$vals - lat)), # look for closest lat
1),
count=c(1,1,-1))
DataMeanT <- data.frame(DateN= t, MeanDailyT = obsoutput)
nc_close(ncin)
head(DataMeanT)
#check if there are NAs =999
summary(DataMeanT)
Data = DataMeanT
Data$Date = as.Date(Data$DateN,origin="20000-01-01")
Data$Year = format(Data$Date,"%Y")
Data$Month = format(Data$Date,"%m")
head(Data)
Data$YearMonth = format(Data$Date, format="%Y-%b")
Data_annual = aggregate(("T_AnnualMean" = MeanDailyT) ~ Year,data = Data, FUN = mean,na.action = na.pass)
names(Data_annual)[2] <- "AirT"
head(Data_annual)
#Export table
write.table(Data_annual, "Site_AirTemp.csv", row.names = FALSE, append = FALSE, col.names = TRUE, sep = ", ", quote = TRUE)
The aim is to run the script as part of a loop for all coordinates in df and to write the temperature data to a new data-table with information on site-ID or alternatively into the next columns of df.
Simply wrap your entire process in a defined method and use an apply function to pass in lon/lat coordinates. One great candidate is mapply or its wrapper Map to iterate elementwise between both vectors of df$E and df$N. Also, a third argument, df$site, is passed into method for unique CSV names as right now the same file will be overwritten.
Below some non-assignment lines such as head or summary are removed since they do nothing inside a method. Also context managers, within and with are used to avoid repetition of Data$ for more streamlined data manipulation. The Map call writes to file AND builds a list of aggregated data frames for use later.
Function
my_function <- function(lon, lat, site) {
# Mean daily temperature
ncin <- nc_open("tg_0.25deg_reg_v17.0.nc")
print(ncin)
t <- ncvar_get(ncin,"time")
tunits <- ncatt_get(ncin,"time","units")nt <- dim(t)
# look for closest lon and lat
obsoutput <- ncvar_get(ncin,
start = c(which.min(abs(ncin$dim$longitude$vals - lon)),
which.min(abs(ncin$dim$latitude$vals - lat)),
1),
count = c(1,1,-1))
DataMeanT <- data.frame(DateN = t, MeanDailyT = obsoutput)
nc_close(ncin)
Data <- within(DataMeanT, {
Date <- as.Date(DateN, origin="2000-01-01")
Year <- format(Date,"%Y")
Month <- format(Date,"%m")
YearMonth <- format(Date, format="%Y-%b")
})
Data_annual <- with(Data, aggregate(list("AirT" = MeanDailyT), list(Year=Year),
FUN = mean, na.action = na.pass))
# Export table
write.table(Data_annual, paste0("Site_AirTemp_", site, "_.csv"), row.names=FALSE,
append = FALSE, col.names = TRUE, sep = ", ", quote = TRUE)
# SAVE AGGREGATED DATA FRAME
return(Data_annual)
}
Call
# ITERATE THROUGH EACH LON/LAT PAIR ELEMENTWISE
df_list <- Map(my_function, df$E, df$N, df$site)
# df_list <- mapply(my_function, df$E, df$N, df$site, SIMPLIFY=FALSE) # EQUIVALENT CALL
You can probably do:
library(raster)
b <- brick("tg_0.25deg_reg_v17.0.nc")
e <- extract(b, df[, c('E', 'N')])

Converting a raster to a csv in R

I wish to convert a raster to a csv file. I have tried to convert a raster to a dataframe on one file just to see if it works. I have tried using:
as.data.frame( rasterToPoints(species) )
but I get an error when I try to write "species" to a csv :
Error in as.data.frame.default(x[[i]], optional = TRUE) :
cannot coerce class "structure("RasterLayer", package = "raster")" to a data.frame
This is my code (I need to convert multiple rasters to csv (see the loop))
#start loop
file.names <- dir(path, pattern=".csv")
for(i in 1:length(file.names)){
file<- read.csv(file.name[i], header = TRUE, stringsAsFactors=FALSE)
#subsetting each file and renaming column header names
sub.file<-subset(file, select = c('Matched.Scientific.Name', 'Vernacular.Name...matched', 'Latitude...processed', 'Longitude...processed'))
names(sub.file) <- c('species', 'name', 'Lat','Lon')
#turn into a SpatialPointsDataFrame
coordinates(sub.file) <- ~ Lon + Lat
proj4string(sub.file) <- '+init=EPSG:4326'
plot(sub.file, axes=TRUE)
#converting to BNG
sub.file.BNG <- spTransform(sub.file, '+init=EPSG:27700')
plot(sub.file.BNG, axes=TRUE)
#creating template raster
template <- raster(xmn=400000, xmx=600000, ymn=200000, ymx=800000, res=25000, crs='+init=EPSG:27700')
#point data > presence grid
species <- rasterize(sub.file.BNG, template, field=1)
plot(species)
# UK wide
template <- raster(xmn=-200000, xmx=700000, ymn=0, ymx=1250000, res=25000, crs='+init=EPSG:27700')
# use that to turn species point data into a presence grid
species <- rasterize(sub.file, template, field=1)
plot(species)
#converting a raster>dataframe>csv?????
as.data.frame( rasterToPoints(species) )
}
Always provide some example data when asking a question.
library(raster)
f <- system.file("external/test.grd", package="raster")
r <- raster(f)
To get the cell values
x <- as.data.frame(r)
head(x, 2)
# test
#1 NA
#2 NA
To get the cell coordinates and values, only for cells that are not NA
x <- rasterToPoints(r)
head(x, 2)
# x y test
#[1,] 181180 333740 633.686
#[2,] 181140 333700 712.545
To get the cell coordinates and values, only for all cells (including NA)
x <- cbind(coordinates(r), v=values(r))
head(x, 2)
# x y v
#[1,] 178420 333980 NA
#[2,] 178460 333980 NA
Whichever one you choose, you can then do
write.csv(x, "test.csv")
The mistake you made is that you did not assign the result of as.data.frame to a variable, and then tried to write the RasterLayer with write.csv. That is an error, and you get
write.csv(r)
#Error in as.data.frame.default(x[[i]], optional = TRUE) :
# cannot coerce class ‘structure("RasterLayer", package = "raster")’ to a
# data.frame
By the way, if you have multiple rasters, you may want to combine them first
s <- stack(r, r, r)
x <- rasterToPoints(s)
head(x, 2)
# x y test.1 test.2 test.3
#[1,] 181180 333740 633.686 633.686 633.686
#[2,] 181140 333700 712.545 712.545 712.545
write.csv(x, "test.csv")
Assuming your raster is "species"
species<- raster("C:/.../species.tif")
To perform this conversion, it is necessary to take the values of each pixel: X coordinates (1), Y coordinates (2) and own values of each cell (3).
# don't run these lines
#(1) = coordinates (species) [, 1]
#(2) = coordinates (species) [, 2]
#(3) = values (species)
Having these expressions we can add them to a dataframe as follows
dat<- data.frame("X"=coordinates(species)[,1],"Y"=coordinates(species)
[,2],"Values"=values(species))

R: Creating a new raster/shape file from calculations done using variables from a shape file and a raster

I am currently trying to create a new raster or shape file based on a conditional calculation that needs to be done over every value in a shape value based on a value in a raster file. I don't usually work with raster and shape files, so I am pretty out of my element here. I'm asking this in general terms, but here is the data I am using so hopefully it will give a better understanding of what I am trying to accomplish:
rast_norm <- ftp://prism.nacse.org/normals_4km/tmean/PRISM_tmean_30yr_normal_4kmM2_04_bil.zip
shp_probs <- ftp://ftp.cpc.ncep.noaa.gov/GIS/us_tempprcpfcst/seastemp_201603.zip
The main objective is to take the probability associated with each point (latitude and longitude) in shp_probs and multiply it by the value that corresponds to the same latitude and longitude in rast_norm, along with some other calculations afterward. If I had two data.tables, I could do something like the following:
dt1 <- data.table(col1 = c(0:3), col2 = c(1:4)*11, factor1 = sqrt(c(285:288))
# # Output # #
# col1 col2 factor1
# 0 11 16.88194
# 1 22 16.91153
# 2 33 16.94107
# 3 44 16.97056
dt2 <- data.table(col1 = c(0:3), col2 = c(1:4)*11, factor2 = abs(sin(c(1:4))))
# # Output # #
# col1 col2 factor1
# 0 11 0.8414710
# 1 22 0.9092974
# 2 33 0.1411200
# 3 44 0.7568025
dt3 <- merge(dt1, dt2, by = c("col1", "col2"))
dt3$factor1 <- dt3$factor1 * dt3$factor2
dt3$factor2 <- NULL
# # Output # #
# col1 col2 factor1
# 0 11 14.205665
# 1 22 15.377615
# 2 33 2.390725
# 3 44 12.843364
Easy-peasy using data tables. But I am at a loss trying to do this with a Raster and a SpatialPolygonsDataFrame. Here's what I have so far to read in and clean up the files:
# Importing the "rast_norm" file, the first listed above with a link
rast_norm <- "/my/file/path/PRISM_tmean_30yr_normal_4kmM2_04_bil.zip"
zipdirec <- "/my/zip/directory"
unzip(rast_norm, exdir = zipdirec)
# Get the correct file from the file list
rast_norm <- list.files(zipdirec, full.names = TRUE, pattern = ".bil")
rast_norm <- rast_norm[!grepl("\\.xml", rast_norm)]
# Convert to raster
rast_norm <- raster(rast_norm)
Plotting rast_norm on its own gives this map.
# Importing the "shp_probs" file, the second listed above with a link
shp_probs <- "/my/file/path/seastemp_201603.zip"
zipdirec <- "/my/zip/directory"
unzip(shp_probs, exdir = zipdirec, overwrite = TRUE)
# Get the correct file from the list of file names and find the layer name
layer_name <- list.files(zipdirec, pattern = "lead14")
layer_name <- layer_name[grepl(".shp", layer_name)]
layer_name <- layer_name[!grepl("\\.xml", layer_name)]
layer_name <- do.call("rbind", strsplit(layer_name, "\\.shp"))[,1]
layer_name <- unique(layer_name)
# Use the layer name to read in the shape file
shp_probs <- readOGR(shp_probs, layer = layer_name)
names_levels <- paste0(shp_probs$Cat, shp_probs$Prob)
names_levels <- gsub("Below", "-", names_levels)
names_levels <- gsub("Above", "+", names_levels)
names_levels <- as.integer(names_levels)
shp_probs#data$id <- names_levels
shp_probs <- as(shp_probs, "SpatialPolygons")
# Create a data frame of values to use in conjunction with the existing id's
weights <- data.table(id = shp_probs$id, weight = shp_probs$id)
weights$weight <- c(.80, .80, .10, .10, .10, .10, .10, .10, .80, .10, .10, .10, .10, .10)
shp_probs <- SpatialPolygonsDataFrame(otlk_sp, weights, match.ID = FALSE)
Plotting shp_probs on its own gives this map.
I now want to take the probabilities that are associated with the shp_probs file and multiply it by the amounts of rainfall associated with the rast_norm file and multiply again by the weight associated with the probability in the shp_probs file.
I really don't know what to do and any help would be very much appreciated. How do I extract all of the corresponding data points for matching latitudes and longitudes? I think if I knwo that, I will know what to do.
Thank you, in advance.
Assuming that you want to perform this calculation for each grid cell of your raster, you can do something like this:
Download/read data, and add weight column. Note that here I've just used random weights, since your example seems to assign 14 weights to 7 polygons. Also, I'm not sure what purpose your id column serves, so I've skipped that part.
library(raster)
library(rgdal)
download.file('ftp://prism.nacse.org/normals_4km/tmean/PRISM_tmean_30yr_normal_4kmM2_04_bil.zip',
fr <- tempfile(), mode='wb')
download.file('ftp://ftp.cpc.ncep.noaa.gov/GIS/us_tempprcpfcst/seastemp_201603.zip',
fs <- tempfile(), mode='wb')
unzip(fr, exdir=tempdir())
unzip(fs, exdir=tempdir())
r <- raster(file.path(tempdir(), 'PRISM_tmean_30yr_normal_4kmM2_04_bil.bil'))
s <- readOGR(tempdir(), 'lead14_Apr_temp')
s$weight <- runif(length(s))
Perform spatial overlay of the coordinates of the raster cells and the polygons. (Alternatively, you could use raster::rasterize twice to convert the Prob and id fields to rasters, and then multiplied the three rasters.)
xy <- SpatialPoints(coordinates(r), proj4string=crs(r))
o <- over(xy, s)
Create a new raster with the same extent/dimensions as the original raster, and assign the appropriate values to its cells.
r2 <- raster(r)
r2[] <- r[] * o$Prob * o$weight
With these random data, the result looks something like this:

Resources