I am currently trying to extract rainfall data from Mexico from the CHIRPS database. The goal is to get a comprehensive database of monthly rainfall over a period of 15 years. This involves merging a lot of columns after extracting the data from .tif-files which contain the information on weather conditions in a specific months.
CVE_ENT and CVE_MUN are the two variables that later on help me to identify individual municipalities.
Running my code in R and looking at the resulting data frame everything looks fine. However, as soon as I try to extract it to become a .dta or .csv file, I get the following error message:
fwrite(vextractall, file="rainfall55.csv")
Error in fwrite(vextractall, file = "rainfall55.csv") :
Column 4's length (1) is not the same as column 1's length (2456)
The error occurs for multiple other scenarios e.g. if using write.dta.
Anyone who possibly knows on what I am missing out on?
Thanks a lot in advance.
#Data get methods#
tif.raster1 <- raster('chirps-v2.0.1999.01.tif')
crs.LL <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84"
tif.raster1 <- projectRaster(tif.raster1, crs = crs.LL)
mexico2shp <- readOGR(dsn='GIS Mexican Municipalities', layer='Mexican Municipalities')
tif.raster1 <- crop(tif.raster1, extent(mexico2shp))
vras.tif1 <- velox(tif.raster1)
iters <- nrow(mexico2shp)
#mapping function#
vextractall <- vras.tif1$extract(mexico2shp, fun=mean)
mexicomm <- as.data.frame(mexico2shp)
vextractall <- as.data.frame(vextractall)
iters <- nrow(mexico2shp)
x <- foreach(a=1:iters) %do% {
if(is.na(vextractall[a,1])) {
ext <- raster::extract(tif.raster1, mexico2shp[a,], fun=mean)
vextractall[a,1] <- ext[1,1]
}
}
vextractall<-as.data.frame(vextractall)
vextractall$CVE_ENT <- mexicomm[,c("CVE_ENT")]
vextractall$CVE_MUN <- mexicomm[,c("CVE_MUN")]
vextractall<-vextractall[,c(ncol(vextractall), 1:(ncol(vextractall)-1))]
vextractall<-vextractall[,c(ncol(vextractall), 1:(ncol(vextractall)-1))]
vextractall <- plyr::rename(vextractall, c("V1"="Milimeters011999"))
tif.raster1 <- raster('chirps-v2.0.1999.02.tif')
tif.raster1 <- crop(tif.raster1, extent(mexico2shp))
vras.tif1 <- velox(tif.raster1)
crs.LL <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84"
tif.raster1 <- projectRaster(tif.raster1, crs = crs.LL)
vextract2 <- vras.tif1$extract(mexico2shp, fun=mean)
vextract2 <- as.data.frame(vextract2)
iters <- nrow(mexico2shp)
foreach(a=1:iters) %do% {
if(is.na(vextract2[a,1])) {
ext <- raster::extract(tif.raster1, mexico2shp[a,], fun=mean)
vextract2[a,1] <- ext[1,1]
}
}
vextractall<-as.data.frame(vextractall)
vextract2<-as.data.frame(vextract2)
vextractall$Milimeters021999 <- vextract2
Related
Since a couple of weeks ive been trying to get a NetCDF file into a dataframe. Even though Im succesfull in extracting the variables/dimensions and plotting a slice from the ncdf file, all the data is husseled when cbinding it into a dataframe and then plotting it. The data is weather data from Copernicus, with data for each longitude and latitude point in the world. The ultimate goal here is to raster the dataframe to be able to categorize weather per raster over time.
The data can be retrieved from https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-pressure-levels?tab=overview.
My code looks like this:
library(ncdf4)
library(raster)
library(ggplot2)
##dataset, 5 augustus
###bron: https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-pressure-levels?tab=overview
weatherpress <- nc_open("C:/Users/heinj/OneDrive/Documenten/Universiteit/Master/Master Thesis/dataset/allecoors5augweer.nc")
{
sink('WeatherPressure5aug.txt')
print(weatherpress)
sink()
}
print(weatherpress)
long<- ncvar_get(weatherpress, "longitude")
lat <- ncvar_get(weatherpress, "latitude")
tijd <- ncvar_get(weatherpress, "time")
rain <- ncvar_get(weatherpress,"crwc")
temperature <- ncvar_get(weatherpress, "t") #temperature
Uwind <- ncvar_get(weatherpress, "u") #u wind
Vwind <- ncvar_get(weatherpress, "v") # v wind
#removal NA's
fillvaluecrwc <- ncatt_get(weatherpress, "crwc", "_FillValue")
rain[rain == fillvaluecrwc$value] <- NA
rain <- na.omit(rain)
fillvaluet <- ncatt_get(weatherpress, "t", "_FillValue")
temperature[temperature == fillvaluet$value] <- NA
temperature <- na.omit(temperature)
fillvalueu <- ncatt_get(weatherpress, "u", "_FillValue")
Uwind[Uwind == fillvalueu$value] <- NA
Uwind <- na.omit(Uwind)
fillvaluev <- ncatt_get(weatherpress, "v", "_FillValue")
Vwind[Vwind == fillvaluev$value] <- NA
Vwind <- na.omit(Vwind)
min(temperature)
#correcting longitude
nc_close(weatherpress)
#plotje
temperature_slice <- temperature[, ,1]
r_temperature <- raster(t(temperature_slice), xmn=min(long),
xmx=max(long), ymn=min(lat), ymx=max(lat),
crs=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs+ towgs84=0,0,0"))
plot(r_temperature)
#binding into a dataframe
weather <- cbind(long, lat, rain, tijd, temperature, Uwind, Vwind)
weather <- as.data.frame(weather)
View(weather)
#subset temperature
only_temperature <- weather
only_temperature <- subset(only_temperature[,c(1:2, 5)])
head(only_temperature)
summary(only_temperature)
##ggplot###
ggplot(as.data.frame(poging1.df), aes(x = long, y = lat)) +
geom_raster(aes(fill = temperature))
### rastering ###
only_temp <- only_temperature
r = raster(xmn = min(long), xmx=max(long),
ymn=min(lat), ymx=max(lat), res = 10)
p = as(r#extent, 'SpatialPolygons')
cordi <- only_temp[c("long", "lat")]
coordinates(cordi) <- ~long + lat
only_temp <- SpatialPointsDataFrame(cordi, only_temp)
meanr <- rasterize(only_temp, r, "temperature", fun = mean)
plot(meanr)
where the ggplot and meanr plot look different from the r_plot (the correct one) - with the temperature belonging to Antartica going dimensional through the plot.
Does anybody know where my problem lies?
Thanks in advance!
If your goal is to make a raster, you can use the terra package like this:
library(terra)
f <- "allecoors5augweer.nc"
r <- rast(f)
Or, to get a single variable, t in this case:
r <- rast(f, "t")
Also see sds(f). Alternatively, you can use raster::brick(f). To inspect the data, you can do
plot(r)
Just to answer your question about making a data.frame from a ncdf file (which does not seem to be necessary for your goals). It may depend a bit on the file at hand (for example, are there sub-datasets in the file, and how do you want to treat those --- it would have been very useful if you had shared a file). You can use tidync for any nc file, but if it is spatial (gridded) data, it may be easiest to use raster or terra. With terra you can do
library(terra)
f <- "allecoors5augweer.nc"
r <- rast(f)
d <- as.data.frame(r)
I have a raster layer that has values as factors, and an attribute table. I would like to convert this to a SpatialPixelsDataFrame however when I try:
library(sp)
library(sf)
library(rgdal)
library(raster)
library(FedData)
library(rasterVis)
create_factor_rast <- function(base_rast,Spatial_polygon,fieldname) {
nam <- unique(Spatial_polygon[[fieldname]])
nam_df <- data.frame(ID = 1:length(nam), nam = nam)
idfieldname = paste0(fieldname,"ID")
Spatial_polygon[[idfieldname]] <- nam_df$ID[match(Spatial_polygon[[fieldname]],nam_df$nam)]
new_raster <- rasterize(x=Spatial_polygon, y=base_rast, field = idfieldname)
new_raster <- ratify(new_raster)
# # Create levels
rat <- levels(new_raster)[[1]]
rat$names <- nam_df$nam
rat$IDs <- nam_df$ID
levels(new_raster) <- rat
new_raster
}
spft <- "+proj=lcc +lat_1=40 +lat_2=43 +lat_0=39.83333333333334 +lon_0=-100 +x_0=500000.0000000002 +y_0=0 +datum=NAD83 +units=us-ft +no_defs"
SSURGO.dl <- FedData::get_ssurgo(template=c('NE043'), label = "Tau",raw.dir = "./SSURGO_r/RAW/SSURGO",
extraction.dir = paste0("./SSURGO_R/EXTRACTIONS/", "soils","/SSURGO"))
SSURGO.dl_shp<-SSURGO.dl$spatial
unlink('SSURGO_r',recursive=TRUE)
# extract the muaggatt table
muaggatt<-SSURGO.dl[["tabular"]][["muaggatt"]]
# get only the fields needed, must have mukey to do the join
muaggatt <- muaggatt[c("mukey","hydgrpdcd","drclassdcd")]
SSURGO_merge <- merge(SSURGO.dl_shp,muaggatt,by.x = "MUKEY", by.y="mukey")
# Convert merged SSURGO polygon to Stateplane feet
SSURGO_merge <- sf::st_transform(SSURGO_merge,crs(spft))
SSURGO_merge <- as(SSURGO_merge,"Spatial")
grid_cell_size <- 100 # ~30 meters
shp_extent <- extent(SSURGO_merge)
r <- raster(SSURGO_merge,resolution = grid_cell_size,crs=crs(SSURGO_merge))
r.drclassdcd <- create_factor_rast(r, SSURGO_merge,'drclassdcd')
rasterVis::levelplot(r.drclassdcd)
r.drclassdcd_sp <- as(r.drclassdcd,"SpatialPixelsDataFrame")
The last line gives the following error:
Error in h(simpleError(msg, call)) :
error in evaluating the argument 'x' in selecting a method for function 'as.factor': undefined columns selected
Is there a way to get a pixel dataframe from the raster layer with factors converted back to the characters in the attribute field?
I have a dataframe:
In total more than 3 million rows and 1800 species (scientific name)
The code below creates an empty raster at 0.5 degree scale..
library(raster)
ext <- extent(-180.0, 180, -90.0, 90.0)
gridsize <- 0.5
tempraster<- raster(ext, res=gridsize)
crs(tempraster) <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"`
#and then the code below fills that raster with x y data one species at a time, creating an ascii as per the scientific name, with a 1 where the species is and a 0 where it is not.
selection<-animals
spp <- unique(animals$scientific_name)
result <- list()
for (i in 1:length(spp)) {
spi <- selection[selection$scientific_name == spp[i], c("lon", "lat")]
fname <- paste0(spp[i], ".asc")
result[[i]] <- rasterize(spi, tempraster, fun="count", filename=fname, background = 0, overwrite = TRUE)}
I would like to adjust this code so that instead of the resulting ascii having a 1 where the species is, it uses the value from the total column. Unfortunately I am a beginner at for loops and other functions so am asking for any help.
rasterize() function has field argument, so you can call it like this:
result[[i]] <- rasterize(spi, tempraster,
field=selection[selection$scientific_name == spp[i], "total"],
fun=sum,
filename=fname, background = 0,
overwrite = TRUE)
I'm trying to calculate the SPI from CHIRPS monthly mean precipitation data, because it's too large I cut it down to my area of interest and here it is: https://www.dropbox.com/s/jpwcg8j5bdc5gq6/chirps_mensual_v1.nc?dl=0
I did this to open it:
require(utils)
require(colorRamps)
require(RNetCDF)
require(rasterVis)
require(rgdal)
library(ncdf4)
library(raster)
datos2 <- nc_open("Datos/chirps_mensual_v1.nc")
ppt_array <- ncvar_get(datos2, "precip")
#I'm only taking complete years so I took out two months from 2018
ppt_mes <- ppt_array[ , ,1:444]
I know there is a SPI library but I don't know how should I format the data in order to use it. So I tried to do it without the function by fitting the gamma distribution but I dont' know how to do it for this data base.
Does anyone know how to calculate SPI either with the function or by fitting the distribution?
I don't think the SPI package is doing what you (or anyone) thinks it is doing. If you use debug(spi) and step through the code, you'll see that in one step it fits a empirical cumulative distribution function (with ecdf()) to the first two and last rows of data. Why the first two and last rows? I have no clue, but whoever wrote this package also used a for loop to do t() to a matrix. Not to mention that I think it should use a Gamma distribution or Pearson III distribution not ecdf() (according to Guttman, N.B. (1999) Accepting the standardized precipitation index: a calculation algorithm. JAWRA Journal of the American Water Resources Association, 35, 311–322.).
At the end I made it by using the SPI library, the result will be a value for each month in each grid point, if you want to calculate the value over a specific area I made that too but I can share it if you want it too:
Also, this one I made it using CRU data but you can adjust it:
#spei cru 1x1
rm(list=ls(all=TRUE)); dev.off()
require(utils)
require(RNetCDF)
require(rasterVis)
require(rgdal)
library(ncdf4)
require(SPEI)
########################################################################################################
prec <- open.nc("pre_mensual.nc")
lon <- length(var.get.nc(prec, "lon"))
lat <- length(var.get.nc(prec, "lat"))
lon1 <- var.get.nc(prec, "lon")
lat1 <- var.get.nc(prec, "lat")
ppt <- var.get.nc(prec, "pre")
ppt <- ppt[ , ,109:564] #31 18 456 (1980-2017)
anio = 456/12
###########################################################################################################
#Reshape data
precip <- sapply(1:dim(ppt)[3], function(x)t(ppt[,,x]))
############################################################################################################
#This is for SPI-6, you can use either of them
spi_6 <- array(list(),(lon*lat))
for (i in 1:(lon*lat)) {
spi_6[[i]] <- spi(precip[i,], scale=6, na.rm=TRUE)
}
#############################################################################################################
#Go back to an array form
sapply(spi_6, '[[',2 )->matriz_ppt
ppt_6 <- array(aperm(matriz_ppt, c(2,1),c(37,63,456)));spi_c <- array(t(ppt_6), dim=c(37,63,456))
#############################################################################################################
#Save to netcdf
for(i in 1:456) {
nam <- paste("SPI", i, sep = "")
assign(nam,raster((spi_c[ , ,i]), xmn=min(lon1), xmx=max(lon1), ymn=min(lat1), ymx=max(lat1), crs=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs+ towgs84=0,0,0")) )
}
gpcc_spi <- stack(mget(paste0("SPI", 1:456)))
outfile <- "spi6_cru_1980_2017.nc"
crs(gpcc_spi) <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
writeRaster(gpcc_spi, outfile, overwrite=TRUE, format="CDF", varname="SPEI", varunit="units",longname="SPEI CRU", xname="lon", yname="lat")
It's not the most stylish way to calculate it but it does work. :)
EDIT: If you want to calculate the SPI/SPEI over an area this is what I did:
library(SPEI)
library(ncdf4)
library(raster)
#
pre_nc <- nc_open("pre_1971_2017_Vts4.nc")
pre <- ncvar_get(pre_nc, "pre")
pre <- pre[, , 109:564] #This is for the time I'm interested in
lats <- ncvar_get(pre_nc, "lat")
lons <- ncvar_get(pre_nc, "lon")
times <- 0:467
# Read mask
#This is a mask you need to create that adjusts to your region of interest
#It consist of a matrix of 0's and 1's, the 1's are placed in the area
#you are interested in
mask1 <- nc_open("cuenca_IV_CDO_05_final.nc")
m1 <- ncvar_get(mask1, "Band1")
m1[m1 == 0] <- NA
#
# Apply mask to data
#
pre1 <- array(NA, dim=dim(pre))
#
for(lon in 1:length(lons)){
for(lat in 1:length(lats)){
pre1[lon,lat,] <- pre[lon,lat,]*m1[lon,lat]
}
}
#
# Mean over the area of interest
#
mean_pre1 <- apply(pre1,c(3),mean, na.rm=TRUE)
# Calculate SPI/SPEI
spi1 <- matrix(data= NA, nrow = 456, ncol = 48)
for (i in 1:48) {
spi1[,i] <- spi(data=ts(mean_pre1,freq=12),scale= i)$fitted
}
#This calculates SPI/SPEI-1 to SPI/SPEI-48, you can change it
# Save
#
write.table(spi1,'spi_1980_2017.csv',sep=';',row.names=FALSE)
We have a raster which represents the ordinal date corresponding to the start of growing season. That is, each pixel value in the raster lies between 1:365, representing the ordinal date.
I have also calculated cumulative growing degree days for all 365 days in the corresponding year. These data are loaded into R as a raster stack with 365 layers.
My goal is to randomly sample geographic locations on the start of growing season layer. I then hope to extract the value of cumulative growing degree days from those same coordinates, but only from the growing degree days stack's layer which corresponds to the start of season pixel value.
For example, if the start of season at a given pixel was the 100th day of the year, I would like to extract the growing degree days from that location on the 100th day of the year (nlayers = 100).
I have been attempting to write a function to accomplish this, but I can't seem to get it to work right. I would like to end up with a data frame or matrix showing my x location, y location, start of season day, and GDD for that day. Instead of many GDD values in one column, I get many columns of one GDD value.
It seems the problem is in my use of extract. I've experimented with the arguments nl, layer, and indexing the x argument with [[]]. They seem to produce the same result. Here's a simplified code with only 5 days to consider, and the function I am trying to construct.
Any help/suggestions is appreciated!
#============================================================
library(raster)
SOST <- raster()
SOST[] <- 1:5
r1 <- r2 <- r3 <- r4 <- r5 <- raster()
r1[] <- 10
r2[] <- 20
r3[] <- 30
r4[] <- 40
r5[] <- 50
GDD <- stack(r1,r2,r3,r4,r5)
getGDD <- function(sost, gdd, n){set.seed(232)
samp <- sampleRandom(sost, n, xy = TRUE,
na.rm = TRUE)
df <- data.frame('x'=as.numeric(), 'y'=
as.numeric(), 'SOST'=as.numeric(),
'GDD'=as.numeric())
df.temp <- data.frame('x' = samp[1:n,1], 'y' =
samp[1:n,2], 'SOST' = samp[,3],'GDD' =
extract(gdd, samp[1:n,1:2], nl = samp[1:n,3]))
df <- rbind(df, df.temp)
return(df)
}
getGDD(sost = SOST, gdd = GDD, n = 5)
It doesn't seem like this gathered a lot of attention, but I was able to solve it. Using the sample posted in the original question, the easiest solution is the stackSelect function. This was pointed out to me by Robert Hijmans on R-sig-geo.
x <- stackSelect(GDD, SOST)
set.seed(232)
samp <- sampleRandom(SOST, 5, xy = TRUE, na.rm = TRUE)[, -3]
extract(x, samp)
This works great if your data have the same extent and resolution. However, I failed to mention and include that my data do not align perfectly. Thus, as far as I know, I still need to create a function. With a little more thought, I was able to come up with the following example and function and solve the problem.
library(raster)
#SOST and GDD simulations with same ncell and extents as actual data:
SOST <- raster(nrow = 3991, ncol = 3025, xmn = 688635, xmx = 779385,
ymn = 4276125, ymx = 4395855, crs = "+proj=utm +zone=11 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0")
SOST[] <- 1:5
r1 <- r2 <- r3 <- r4 <- r5 <- raster(nrow = 3951, ncol = 2995, xmn = 688620.2, xmx = 779377.8, ymn = 4276121, ymx = 4395848, crs = "+proj=utm +zone=11 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0")
r1[] <- 10
r2[] <- 20
r3[] <- 30
r4[] <- 40
r5[] <- 50
GDD <- stack(r1,r2,r3,r4,r5)
getGDD <- function(sost, gdd, n){
set.seed(232)
samp <- sampleRandom(sost, size = n, xy = TRUE)
extr <- NULL
for(i in 1:n){
extr[i] <- extract(gdd[[samp[i,3]]], cbind(as.matrix(samp[i,1]),
as.matrix(samp[i,2])))
}
out <- data.frame(x = samp[,1], y = samp[,2], 'SOST' = samp[,3], 'GDD' = extr)
return(out)
}
test <- getGDD(sost = SOST, gdd = GDD, n = 5)
test