Extraction from Land Cover Raster Data - r

I am trying to extract grassland values from a historical land use and land cover database created by USGS. I have some issues with the Raster package and getValues option. The tiff file is too large to add with this post, but it is available online.
The data is available under Land-use and Land-cover Backcasting.
This is my code:
install.packages("raster")
install.packages("rastervis")
install.packages("RCurl")
install.packages("R.utils")
install.packages("rgdal")
install.packages("sp")
install.packages("maptools")
install.packages("tibble")
install.packages("ggplot2")
install.packages("gridExtra")
library(R.utils)
library(rgdal)
library(sp)
library(maptools)
library(raster)
library(rasterVis)
library(RCurl)
library(R.utils)
library(rgdal)
library('rgdal')
library('raster')
library("tibble")
library('ggplot2')
Landcover file in tiff format:
Landcover1 <- raster ("CONUS_Backcasting_y1938.tif")
USA counties file:
USA_county <- readOGR("UScounties",layer="UScounties")
These two files are not in the same projection, so projection:
newprojection <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84
+towgs84=0,0,0"
projected_raster_landcover1 <- projectRaster(Landcover1, crs =
newprojection)
Now, I want to extract the land cover data only for the grassland (there are total 17 land classes, and grassland is coded as '11')
Landcover1_values <- extract(x = projected_raster_landcover1,
y = USA_county)
But when I use getValues to extract the grassland,
Landcover1_values_count<- lapply(Landcover1_values, FUN = function(x) {
length(which(getValues(x) == 11)) })
it shows error:
**Error in (function (classes, fdef, mtable) :
unable to find an inherited method for function ‘getValues’ for signature ‘"numeric", "missing", "missing"’**
I thought it is for NA, but I could not get how to solve the problem.

extract returns a vector or a matrix while getValues needs a raster as input. It's why you have this error.
Therefore, this should work in your case:
Landcover1_values_count <- sum(Landcover1_values == 11, na.rm = T)
Having said that, I am not sure about the use of extract in your workflow. I think what you are looking for is to mask your raster. So I suggest you use:
Landcover1_values <- mask(projected_raster_landcover1, USA_county)
Landcover1_values_count <- sum(Landcover1_values[,] == 11, na.rm = T)
EDIT
According to your comment, what you really want is to perform a zonal statistics (the number of pixels labeled as grasslands (11) in each county). Here are some steps on how to do it:
library(raster)
library(plyr)
# Function for efficient zonal stats using data.table, source: https://stat.ethz.ch/pipermail/r-sig-geo/2013-February/017475.html
myZonal <- function (x, z, stat, digits = 0, na.rm = TRUE,
...) {
library(data.table)
fun <- match.fun(stat)
vals <- getValues(x)
zones <- round(getValues(z), digits = digits)
rDT <- data.table(vals, z=zones)
setkey(rDT, z)
rDT[, lapply(.SD, fun, na.rm = TRUE), by=z]
}
# Add an ID field to the shapefile
USA_county#data$ID <- c(1:length(USA_county#data[,1]))
# Crop raster to 'zone' shapefile extent
r <- crop(projected_raster_landcover1, extent(USA_county))
# Reclassify raster in binary raster with 1 for grasslands and 0 for all others values
r[r != 11] <- 0
r[r == 11] <- 1
# Rasterize shapefile using ID field
zone <- rasterize(USA_county, r, field="ID", dataType = "INT1U") # Change dataType if nrow(USA_county) > 255 to INT2U or INT4U
# Zonal stats
Zstat <- data.frame(myZonal(r, zone, "sum"))
colnames(Zstat) <- c("ID", "Grassland")
# Merge data
USA_county#data <- plyr::join(USA_county#data, Zstat, by="ID")
# Show results
USA_county#data

Related

How to correctly get a NCDF file into a data.frame?

Since a couple of weeks ive been trying to get a NetCDF file into a dataframe. Even though Im succesfull in extracting the variables/dimensions and plotting a slice from the ncdf file, all the data is husseled when cbinding it into a dataframe and then plotting it. The data is weather data from Copernicus, with data for each longitude and latitude point in the world. The ultimate goal here is to raster the dataframe to be able to categorize weather per raster over time.
The data can be retrieved from https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-pressure-levels?tab=overview.
My code looks like this:
library(ncdf4)
library(raster)
library(ggplot2)
##dataset, 5 augustus
###bron: https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-pressure-levels?tab=overview
weatherpress <- nc_open("C:/Users/heinj/OneDrive/Documenten/Universiteit/Master/Master Thesis/dataset/allecoors5augweer.nc")
{
sink('WeatherPressure5aug.txt')
print(weatherpress)
sink()
}
print(weatherpress)
long<- ncvar_get(weatherpress, "longitude")
lat <- ncvar_get(weatherpress, "latitude")
tijd <- ncvar_get(weatherpress, "time")
rain <- ncvar_get(weatherpress,"crwc")
temperature <- ncvar_get(weatherpress, "t") #temperature
Uwind <- ncvar_get(weatherpress, "u") #u wind
Vwind <- ncvar_get(weatherpress, "v") # v wind
#removal NA's
fillvaluecrwc <- ncatt_get(weatherpress, "crwc", "_FillValue")
rain[rain == fillvaluecrwc$value] <- NA
rain <- na.omit(rain)
fillvaluet <- ncatt_get(weatherpress, "t", "_FillValue")
temperature[temperature == fillvaluet$value] <- NA
temperature <- na.omit(temperature)
fillvalueu <- ncatt_get(weatherpress, "u", "_FillValue")
Uwind[Uwind == fillvalueu$value] <- NA
Uwind <- na.omit(Uwind)
fillvaluev <- ncatt_get(weatherpress, "v", "_FillValue")
Vwind[Vwind == fillvaluev$value] <- NA
Vwind <- na.omit(Vwind)
min(temperature)
#correcting longitude
nc_close(weatherpress)
#plotje
temperature_slice <- temperature[, ,1]
r_temperature <- raster(t(temperature_slice), xmn=min(long),
xmx=max(long), ymn=min(lat), ymx=max(lat),
crs=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs+ towgs84=0,0,0"))
plot(r_temperature)
#binding into a dataframe
weather <- cbind(long, lat, rain, tijd, temperature, Uwind, Vwind)
weather <- as.data.frame(weather)
View(weather)
#subset temperature
only_temperature <- weather
only_temperature <- subset(only_temperature[,c(1:2, 5)])
head(only_temperature)
summary(only_temperature)
##ggplot###
ggplot(as.data.frame(poging1.df), aes(x = long, y = lat)) +
geom_raster(aes(fill = temperature))
### rastering ###
only_temp <- only_temperature
r = raster(xmn = min(long), xmx=max(long),
ymn=min(lat), ymx=max(lat), res = 10)
p = as(r#extent, 'SpatialPolygons')
cordi <- only_temp[c("long", "lat")]
coordinates(cordi) <- ~long + lat
only_temp <- SpatialPointsDataFrame(cordi, only_temp)
meanr <- rasterize(only_temp, r, "temperature", fun = mean)
plot(meanr)
where the ggplot and meanr plot look different from the r_plot (the correct one) - with the temperature belonging to Antartica going dimensional through the plot.
Does anybody know where my problem lies?
Thanks in advance!
If your goal is to make a raster, you can use the terra package like this:
library(terra)
f <- "allecoors5augweer.nc"
r <- rast(f)
Or, to get a single variable, t in this case:
r <- rast(f, "t")
Also see sds(f). Alternatively, you can use raster::brick(f). To inspect the data, you can do
plot(r)
Just to answer your question about making a data.frame from a ncdf file (which does not seem to be necessary for your goals). It may depend a bit on the file at hand (for example, are there sub-datasets in the file, and how do you want to treat those --- it would have been very useful if you had shared a file). You can use tidync for any nc file, but if it is spatial (gridded) data, it may be easiest to use raster or terra. With terra you can do
library(terra)
f <- "allecoors5augweer.nc"
r <- rast(f)
d <- as.data.frame(r)

Converting raster layer with values as factors to SpatialPixelsDataFrame

I have a raster layer that has values as factors, and an attribute table. I would like to convert this to a SpatialPixelsDataFrame however when I try:
library(sp)
library(sf)
library(rgdal)
library(raster)
library(FedData)
library(rasterVis)
create_factor_rast <- function(base_rast,Spatial_polygon,fieldname) {
nam <- unique(Spatial_polygon[[fieldname]])
nam_df <- data.frame(ID = 1:length(nam), nam = nam)
idfieldname = paste0(fieldname,"ID")
Spatial_polygon[[idfieldname]] <- nam_df$ID[match(Spatial_polygon[[fieldname]],nam_df$nam)]
new_raster <- rasterize(x=Spatial_polygon, y=base_rast, field = idfieldname)
new_raster <- ratify(new_raster)
# # Create levels
rat <- levels(new_raster)[[1]]
rat$names <- nam_df$nam
rat$IDs <- nam_df$ID
levels(new_raster) <- rat
new_raster
}
spft <- "+proj=lcc +lat_1=40 +lat_2=43 +lat_0=39.83333333333334 +lon_0=-100 +x_0=500000.0000000002 +y_0=0 +datum=NAD83 +units=us-ft +no_defs"
SSURGO.dl <- FedData::get_ssurgo(template=c('NE043'), label = "Tau",raw.dir = "./SSURGO_r/RAW/SSURGO",
extraction.dir = paste0("./SSURGO_R/EXTRACTIONS/", "soils","/SSURGO"))
SSURGO.dl_shp<-SSURGO.dl$spatial
unlink('SSURGO_r',recursive=TRUE)
# extract the muaggatt table
muaggatt<-SSURGO.dl[["tabular"]][["muaggatt"]]
# get only the fields needed, must have mukey to do the join
muaggatt <- muaggatt[c("mukey","hydgrpdcd","drclassdcd")]
SSURGO_merge <- merge(SSURGO.dl_shp,muaggatt,by.x = "MUKEY", by.y="mukey")
# Convert merged SSURGO polygon to Stateplane feet
SSURGO_merge <- sf::st_transform(SSURGO_merge,crs(spft))
SSURGO_merge <- as(SSURGO_merge,"Spatial")
grid_cell_size <- 100 # ~30 meters
shp_extent <- extent(SSURGO_merge)
r <- raster(SSURGO_merge,resolution = grid_cell_size,crs=crs(SSURGO_merge))
r.drclassdcd <- create_factor_rast(r, SSURGO_merge,'drclassdcd')
rasterVis::levelplot(r.drclassdcd)
r.drclassdcd_sp <- as(r.drclassdcd,"SpatialPixelsDataFrame")
The last line gives the following error:
Error in h(simpleError(msg, call)) :
error in evaluating the argument 'x' in selecting a method for function 'as.factor': undefined columns selected
Is there a way to get a pixel dataframe from the raster layer with factors converted back to the characters in the attribute field?

changing extent() to aggregate over a specfic grid of a RasterLayer

I am currently working with some data from Switzerland with 100x100 m resolution. I would like to aggregate the data to 1x1km from km-squares based on the national grid (with coordinates f.i. x = 500000, y = 130000), differing from the original extent of my RasterLayer object. I have provided some code for a RasterLayer based on the original extent (=r) and a RasterLayer with a new extent based on national grid cells in Switzerland (=r.agg) and plotted r.agg over r (the latter of, which I coloured in red to make the borders more obvious).
library(raster)
ncol <- 3677 # same as original RasterLayer
nrow <- 2261 # same as original RasterLayer
## create raster (with original extent)
r <- raster(ncol = ncol, nrow = nrow) # dimensions of raster
mat <- matrix(runif(ncol*nrow, 0,2), ncol = ncol, nrow = nrow) # write data
# create simplified country-borders / buffer
mat [0:100, 0:ncol] <- NA
mat [0:nrow, 0:100] <- NA
mat [(nrow - 100):nrow, 0:ncol] <- NA
mat [0:nrow, (ncol -100):ncol] <- NA
r[] <- mat #write data to raster
extent(r) <- matrix(c(479950, 73950, 847650, 300050), nrow = 2) # define extent
proj4string(r) <- "+proj=somerc +lat_0=46.95240555555556
+lon_0=7.439583333333333
+k_0=1 +x_0=600000 +y_0=200000
+ellps=bessel +towgs84=674.374,15.056,405.346,0,0,0,0
+units=m +no_defs"
## create raster (with customised extent)
r.agg <- r # prepare aggregatable RasterLayer
extent(r.agg) <- matrix(c(480000, 74000, 847700, 300100), nrow = 2)
r.agg <- aggregate(r.agg, fact = 10, fun = mean)
par(bg = 'darkgrey')
plot(r, col = "red", legend = FALSE)
plot(r.agg, add = TRUE)
However, I am not entirely sure to interpret the results correctly:
The plotted "r.agg" over "r" shows a red line which I don't quite understand.
Option A (desired outcome): extent() allows me to aggregate over the national grid and aggregate over the spatially correct 100x100m information
Option B (which I hope does not happen): extent() will let me aggregate over the national grid, but using the data from the original grid and thus not the one from the correct 100x100 squares.
I didn't find the help page for extent() particularly helpful and would be glad to receive some clarification. If Option B is the case (or I misunderstood something completely), I would be really happy if someone could help me to figure this out. Thanks a lot in advance and sorry for my weird phrasing, I am new to analyzing spatial data.
The two rasters do not align, so you cannot rely on aggregate alone.
You can do
library(raster)
r <- raster(ncol = 3677, nrow = 2261, ext=extent(479950, 847650, 73950, 300050))
crs(r) <- "+proj=somerc +lat_0=46.95240555555556 +lon_0=7.439583333333333 +k_0=1 +x_0=600000 +y_0=200000
+ellps=bessel +towgs84=674.374,15.056,405.346,0,0,0,0 +units=m +no_defs"
r.agg <- raster(nrow=227, ncol=368, ext=extent(480000, 848000, 73100, 300100))
x <- resample(r, r.agg)
Or, to have more control
r.low <- disaggregate(r.agg, 10)
r.low <- resample(r, r.low)
ra <- aggregate(r.low, 10, mean)

R Standardized Precipitation Index .nc file

I'm trying to calculate the SPI from CHIRPS monthly mean precipitation data, because it's too large I cut it down to my area of interest and here it is: https://www.dropbox.com/s/jpwcg8j5bdc5gq6/chirps_mensual_v1.nc?dl=0
I did this to open it:
require(utils)
require(colorRamps)
require(RNetCDF)
require(rasterVis)
require(rgdal)
library(ncdf4)
library(raster)
datos2 <- nc_open("Datos/chirps_mensual_v1.nc")
ppt_array <- ncvar_get(datos2, "precip")
#I'm only taking complete years so I took out two months from 2018
ppt_mes <- ppt_array[ , ,1:444]
I know there is a SPI library but I don't know how should I format the data in order to use it. So I tried to do it without the function by fitting the gamma distribution but I dont' know how to do it for this data base.
Does anyone know how to calculate SPI either with the function or by fitting the distribution?
I don't think the SPI package is doing what you (or anyone) thinks it is doing. If you use debug(spi) and step through the code, you'll see that in one step it fits a empirical cumulative distribution function (with ecdf()) to the first two and last rows of data. Why the first two and last rows? I have no clue, but whoever wrote this package also used a for loop to do t() to a matrix. Not to mention that I think it should use a Gamma distribution or Pearson III distribution not ecdf() (according to Guttman, N.B. (1999) Accepting the standardized precipitation index: a calculation algorithm. JAWRA Journal of the American Water Resources Association, 35, 311–322.).
At the end I made it by using the SPI library, the result will be a value for each month in each grid point, if you want to calculate the value over a specific area I made that too but I can share it if you want it too:
Also, this one I made it using CRU data but you can adjust it:
#spei cru 1x1
rm(list=ls(all=TRUE)); dev.off()
require(utils)
require(RNetCDF)
require(rasterVis)
require(rgdal)
library(ncdf4)
require(SPEI)
########################################################################################################
prec <- open.nc("pre_mensual.nc")
lon <- length(var.get.nc(prec, "lon"))
lat <- length(var.get.nc(prec, "lat"))
lon1 <- var.get.nc(prec, "lon")
lat1 <- var.get.nc(prec, "lat")
ppt <- var.get.nc(prec, "pre")
ppt <- ppt[ , ,109:564] #31 18 456 (1980-2017)
anio = 456/12
###########################################################################################################
#Reshape data
precip <- sapply(1:dim(ppt)[3], function(x)t(ppt[,,x]))
############################################################################################################
#This is for SPI-6, you can use either of them
spi_6 <- array(list(),(lon*lat))
for (i in 1:(lon*lat)) {
spi_6[[i]] <- spi(precip[i,], scale=6, na.rm=TRUE)
}
#############################################################################################################
#Go back to an array form
sapply(spi_6, '[[',2 )->matriz_ppt
ppt_6 <- array(aperm(matriz_ppt, c(2,1),c(37,63,456)));spi_c <- array(t(ppt_6), dim=c(37,63,456))
#############################################################################################################
#Save to netcdf
for(i in 1:456) {
nam <- paste("SPI", i, sep = "")
assign(nam,raster((spi_c[ , ,i]), xmn=min(lon1), xmx=max(lon1), ymn=min(lat1), ymx=max(lat1), crs=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs+ towgs84=0,0,0")) )
}
gpcc_spi <- stack(mget(paste0("SPI", 1:456)))
outfile <- "spi6_cru_1980_2017.nc"
crs(gpcc_spi) <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
writeRaster(gpcc_spi, outfile, overwrite=TRUE, format="CDF", varname="SPEI", varunit="units",longname="SPEI CRU", xname="lon", yname="lat")
It's not the most stylish way to calculate it but it does work. :)
EDIT: If you want to calculate the SPI/SPEI over an area this is what I did:
library(SPEI)
library(ncdf4)
library(raster)
#
pre_nc <- nc_open("pre_1971_2017_Vts4.nc")
pre <- ncvar_get(pre_nc, "pre")
pre <- pre[, , 109:564] #This is for the time I'm interested in
lats <- ncvar_get(pre_nc, "lat")
lons <- ncvar_get(pre_nc, "lon")
times <- 0:467
# Read mask
#This is a mask you need to create that adjusts to your region of interest
#It consist of a matrix of 0's and 1's, the 1's are placed in the area
#you are interested in
mask1 <- nc_open("cuenca_IV_CDO_05_final.nc")
m1 <- ncvar_get(mask1, "Band1")
m1[m1 == 0] <- NA
#
# Apply mask to data
#
pre1 <- array(NA, dim=dim(pre))
#
for(lon in 1:length(lons)){
for(lat in 1:length(lats)){
pre1[lon,lat,] <- pre[lon,lat,]*m1[lon,lat]
}
}
#
# Mean over the area of interest
#
mean_pre1 <- apply(pre1,c(3),mean, na.rm=TRUE)
# Calculate SPI/SPEI
spi1 <- matrix(data= NA, nrow = 456, ncol = 48)
for (i in 1:48) {
spi1[,i] <- spi(data=ts(mean_pre1,freq=12),scale= i)$fitted
}
#This calculates SPI/SPEI-1 to SPI/SPEI-48, you can change it
# Save
#
write.table(spi1,'spi_1980_2017.csv',sep=';',row.names=FALSE)

get coordinates of a patch in a raster map (raster package in R)

I have a raster map with many patches (clumps of continguous cells with the same value). What I need to do is to obtain the coordinates of the center (or close to the center) of each patch.
I am very unexperienced with raster package but it seems I can get coordinates only if I know the position of the cells in the map. Is there any way to get coordinates giving a value of the cells instead? Thank you
If by patch you mean clumps, Raster package allows you to find , and isolate, clumps. Taking the clump() raster package example, and extending it:
library(raster)
library(igraph)
detach("package:coin", unload=TRUE)
r <- raster(ncols=12, nrows=12)
set.seed(0)
r[] <- round(runif(ncell(r))*0.7 )
rc <- clump(r)
clump_id <- getValues(rc)
xy <- xyFromCell(rc,1:ncell(rc))
df <- data.frame(xy, clump_id, is_clump = rc[] %in% freq(rc, useNA = 'no')[,1])
df[df$is_clump == T, ]
plot(r)
plot(rc)
text(df[df$is_clump == T, 1:2], labels = df[df$is_clump == T, 3])
May not be as interesting as you could expect.
You do it all over with directions = 4
rc <- clump(r, directions = 4)
clump_id <- getValues(rc)
xy <- xyFromCell(rc,1:ncell(rc))
df <- data.frame(xy, clump_id, is_clump = rc[] %in% freq(rc, useNA = 'no')[,1])
df[df$is_clump == T, ]
to get
and maybe clump 'centroids'
dfm <- ddply(df[df$is_clump == T, ], .(clump_id), summarise, xm = mean(x), ym = mean(y))
plot(rc)
text(dfm[, 2:3], labels = dfm$clump_id)
Notes:
There will be an error if you try to use clump() without first
detach modeltools library. modeltools is called by coin and maybe
other statistical libraries.
You could take the mean of the coordinates of each patch:
# some dummy data
m <- matrix(c(
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,4,4,0,
0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,4,4,0,
0,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,0,4,4,0,
0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,
0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,
0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,
0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,
0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,
0,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,
0,0,2,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
0,0,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), nrow=20, byrow=T)
# create a raster
r <- raster(m)
# convert raster to points
p <- data.frame(rasterToPoints(r))
# filter out packground
p <- p[p$layer > 0,]
# for each patch calc mean coordinates
sapply(split(p[, c("x", "y")], p$layer), colMeans)

Resources