calculate agreement of two maps at zonal level in R - r

I am trying to calculate the agreement of two different cropland maps at zonal/ grid level.
I first created the rasters that show cropland agreement between a classified raster and validation data that was created.
agreement<- sample_raster==raster_sampled
crop_agreement <- sample_raster==1 & raster_sampled==1
I then prepared the tiles for zonal analysis by:
tiles_poly$id<- as.double(tiles_poly$id)
tiles_poly$zone <- tiles_poly$id
tiles<-fasterize(st_as_sf(tiles_poly),raster_aoi, field="id")
This works fine.
However, when I'm trying to calculate the agreement at zonal level, I get an error.
zonal_crop_agreement<- zonal(crop_agreement,tiles,fun='sum', na.rm=TRUE)
Error in dimnames(x) <- dn :
length of 'dimnames' [2] not equal to array extent
As far as I understand, the error arises because the number of columns is not equal between the two rasters. Yet, when I print the length of tiles & crop_agreement, the have the same length.
I think therefore, that the error might be due to the "fun = 'sum'". I also do not exactly understand what that function does.
Does someone have an idea what the problem could be?
The complete code is here:
## INPUT DATA GOES HERE ##
test_name<- "220624_cropland" # use this to name the test. The exported files will have this name
tiles_poly<-readOGR("C:/data_cropland_burkina/Grid_50km.shp") #the tile grid
raster_aoi <- raster("C:/data_cropland_burkina/empty_raster.tif") #sets the resolution and area of interest
k_rast<-raster("C:data_cropland_burkina/Cropland_Burkina_Copernicus.tif") # this is the classified raster I am analyzing
#k_rast <- rast_class
## This is to convert points into raster ##
sample_points<-readOGR("C:/Users/louis/OneDrive/Desktop/data_cropland_burkina/validation_points_poly0.shp", stringsAsFactors = FALSE) #the validation points
sample_points$class_2 <- as.numeric(sample_points$item) # make sure the class column is numeric
sample_raster<- fasterize(st_as_sf(sample_points), raster_aoi, field="item")
## crop the raster (makes the script run faster) ##
#aoi<- crop(raster_aoi,k_rast)
plot(k_rast)
#plot(sample_points, add=TRUE) ##maybe remove this by commenting out
crop_aoi <- function(x) {crop(x,raster_aoi)}
k_rast <- crop_aoi(k_rast)
sample_raster<-crop_aoi(sample_raster)
## ONLY USE THIS IF THE RASTER DOES NOT HAVE THE SAME RESOLUTION AS THE RASTER_AOI ##
#resample all rasters
resize_aoi<- function(x){raster::resample(x,raster_aoi,method="ngb")}
k_rast<-resize_aoi(k_rast)
##Reclassify if the raster is binary. otherwise comment out.
m <- c(0, 2, 1, 1)
rclmat <- matrix(m, ncol=2, byrow=TRUE)
k_rast <- reclassify(k_rast, rclmat)
## get the unique values from each raster ##
k_rast_vals<-sort(unique(na.omit(getValues(k_rast))))
sample_vals<-unique(na.omit(getValues(sample_raster)))
## extract the crop and non-crop pixels from the sample ##
sample_crop<- sample_raster==1
sample_non_crop<- sample_raster==2
sample_raster_binary<-sample_raster>0
## reduce the classified raster to only the pixels covered by the sample ##
raster_sampled<- k_rast*sample_raster_binary
## Calculate the confusion matrix ##
crop_comp <- function(x) cellStats(raster_sampled==x * sample_crop, stat='sum')
non_crop_comp <- function(x)cellStats(raster_sampled==x * sample_non_crop, stat='sum')
results_crop <- rbind(sapply(k_rast_vals,crop_comp))
colnames(results_crop)<- k_rast_vals
results_non_crop<- rbind(sapply(k_rast_vals,non_crop_comp))
colnames(results_crop)<- k_rast_vals
results<- rbind(results_crop, results_non_crop)
rownames(results) <- c("crop_ref", "non_crop_ref")
results<-cbind(results, total = rowSums(results))
results<-rbind(results, total = colSums(results))
results
## ------SECTION 2- Zonal Analysis - show agreement at the grid level------- ##
## Create rasters that show cropland agreement between the classified raster and validation data ##
agreement<- sample_raster==raster_sampled
crop_agreement <- sample_raster==1 & raster_sampled==1
## prepare the tiles for zonal analysis ##
tiles_poly$id<- as.double(tiles_poly$id)
tiles_poly$zone <- tiles_poly$id
tiles<-fasterize(st_as_sf(tiles_poly),raster_aoi, field="id")
## calculate agreement at the zonal/grid level (thus, the statistics on cell alues of the raster within the ones defined by the other dataset - validation data & input crop maps)##
## next line calculates the zonal crop agreement, taking the crop agreement raster created as well as the prepared tiles as input
## na.rm removed missing values from the data if they are indicated as NA.
## the function fun = 'sum' is
zonal_crop_agreement<- zonal(crop_agreement,tiles,FUN='sum', na.rm=TRUE)
zonal_crop_ref <- zonal(sample_crop,tiles,fun= 'sum', na.rm=TRUE)
zonal_crop_agreement<- data.frame(cbind(zonal_crop_agreement, zonal_crop_ref[,2]))
colnames(zonal_crop_agreement)[3]<- "crop_ref"
zonal_crop_agreement<- data.frame(cbind(zonal_crop_agreement,zonal(raster_sampled==1,tiles,fun="sum",na.rm=TRUE)[,2]))
colnames(zonal_crop_agreement)[4]<- "crop_class"
zonal_crop_agreement<- data.frame(cbind(zonal_crop_agreement,zonal(sample_raster,tiles,fun="count",na.rm=TRUE)[,2]))
colnames(zonal_crop_agreement)[5]<- "samp_pxls"

Related

Iterate through Incremental Raster Values in R to Create New Raster Datasets

apologies if this has been asked before but I could not find a solution to my problem, which I believe is a rather simple problem. I have a single source raster dataset containing continuous floating values ranging between 100 and 500. What I would like to do is loop through this source raster in increments of 50 to export/create new raster datasets of all values that are lower than the increment. For example, I have the following R code (using the raster library) to specify the raster and identify the increments. I would to develop a way to automatically create 9 output raster datasets that are less than or equal to the values of each increment. I can't seem to get there. Can anyone help? TIA!
#Trying to iteratively create new raster datasets
#Based on increments of Source Raster
library(raster)
setwd("C:/Path/To/Folder")
r=raster("Source_Raster.tif") #Raster is floating between 100 and 500
#Create a list of increments I would like to use
list <-seq(100, 500, 50)
#The list creates the following sequence:
# 100 150 200 250 300 350 400 450 500
###THIS IS WHERE I STRUGGLE####
# I would like to use the sequence to create
# new raster datasets that only include values
# from the source raster that are less than or equal to each increment
# for example, the first output raster will contain values less than
# or equal to the first increment (100)
r100 <- calc(r, fun=function(x){ x[x > 100] <- NA; return(x)} )
After you determined the break points, we can use the lapply function to create each raster layer. In this example, r_list is the final output with 9 raster layers.
library(raster)
set.seed(145)
# Create example raster
r <- raster(matrix(runif(100, min = 100, max = 500), ncol = 10))
# Create break points
brk <-seq(100, 500, 50)
# Conduct the operation, create nine raster than smaller than each break points
r_list <- lapply(brk, function(x){
temp <- r
temp[temp > x] <- NA
return(temp)
})

Using R intersections to create a polygons-inside-a-polygon key using two shapefile layers

The data
I have two shapefiles marking the boundaries of national and provincial electoral constituencies in Pakistan.
The objective
I am attempting to use R to create a key that will generate a list of which provincial-level constituencies are "contained within" or otherwise intersecting with which national-level constituencies, based on their coordinates in this data. For example, NA-01 corresponds with PA-01, PA-02, PA-03; NA-02 corresponds with PA-04 and PA-05, etc. (The key will ultimately be used to link separate dataframes containing electoral results at the national and provincial level; that part I've figured out.)
I have only basic/intermediate R skills learned largely through trial and error and no experience working with GIS data outside of R.
The attempted solution
The closest solution I could find for this problem comes from this guide to calculating intersection areas in R. However, I have been unable to successfully replicate any of the three proposed approaches (either the questioner's use of a general TRUE/FALSE report on intersections, or the more precise calculations of area of overlap).
The code
# import map files
NA_map <- readOGR(dsn = "./National_Constituency_Boundary", layer = "National_Constituency_Boundary")
PA_map <- readOGR(dsn = "./Provincial_Constituency_Boundary", layer = "Provincial_Constituency_Boundary")
# Both are now SpatialPolygonsDataFrame objects of 273 and 577 elements, respectively.
# If relevant, I used spdpylr to tweak some of data attribute names (for use later when joining to electoral dataframes):
NA_map <- NA_map %>%
rename(constituency_number = NA_Cons,
district_name = District,
province = Province)
PA_map <- PA_map %>%
rename(province = PROVINCE,
district_name = DISTRICT,
constituency_number = PA)
# calculate intersections, take one
Results <- gIntersects(NA_map, PA_map, byid = TRUE)
# this creates a large matrix of 157,521 elements
rownames(Results) <- NA_map#data$constituency_number
colnames(Results) <- PA_map#data$constituency_number
Attempting to add the rowname/colname labels, however, gives me the error message:
Error in dimnames(x) <- dn :
length of 'dimnames' [1] not equal to array extent
Without the rowname/colname labels, I'm unable to read the overlay matrix, and unsure how to filter them so as to produce a list of only TRUE intersections that would help make a NA-PA key.
I also attempted to replicate the other two proposed solutions for calculating exact area of overlap:
# calculate intersections, take two
pi <- intersect(NA_map, PA_map)
# this generates a SpatialPolygons object with 273 elements
areas <- data.frame(area=sapply(pi#polygons, FUN = function(x) {slot(x, 'area')}))
# this calculates the area of intersection but has no other variables
row.names(areas) <- sapply(pi#polygons, FUN=function(x) {slot(x, 'ID')})
This generates the error message:
Error in `row.names<-.data.frame`(`*tmp*`, value = c("2", "1", "4", "5", :
duplicate 'row.names' are not allowed
In addition: Warning message:
non-unique value when setting 'row.names': ‘1’
So that when I attempt to attach areas to attributes info with
attArrea <- spCbind(pi, areas)
I get the error message
Error in spCbind(pi, areas) : row names not identical
Attempting the third proposed method:
# calculate intersections, take three
pi <- st_intersection(NA_map, PA_map)
Produces the error message:
Error in UseMethod("st_intersection") :
no applicable method for 'st_intersection' applied to an object of class "c('SpatialPolygonsDataFrame', 'SpatialPolygons', 'Spatial', 'SpatialPolygonsNULL', 'SpatialVector')"
I understand that my SPDF maps can't be used for this third approach, but wasn't clear from the description what steps would be needed to transform it and attempt this method.
The plea for help
Any suggestions on corrections necessary to use any of these approaches, or pointers towards some other method of figuring this, would be greatly appreciated. Thanks!
Here is some example data
library(raster)
p <- shapefile(system.file("external/lux.shp", package="raster"))
p1 <- aggregate(p, by="NAME_1")
p2 <- p[, 'NAME_2']
So we have p1 with regions, and p2 with lower level divisions.
Now we can do
x <- intersect(p1, p2)
# or x <- union(p1, p2)
data.frame(x)
Which should be (and is) the same as the original
data.frame(p)[, c('NAME_1', 'NAME_2')]
To get the area of the polygons, you can do
x$area <- area(x) / 1000000 # divide to get km2
There are likely to be many "slivers", very small polygons because of slight variations in borders. That might not matter to you.
But another approach could be matching by centroid:
y <- p2
e <- extract(p1, coordinates(p2))
y$NAME_1 <- e$NAME_1
data.frame(y)
Your code isn't self-contained, so I didn't try to replicate the errors you report.
However, getting the 'key' you want is very simple using the sf package (which is intended to supercede rgeos, rgdal and sp in the near future). See here:
library(sf)
# Download shapefiles
national.url <- 'https://data.humdata.org/dataset/5d48a142-1f92-4a65-8ee5-5d22eb85f60f/resource/d85318cb-dcc0-4a59-a0c7-cf0b7123a5fd/download/national-constituency-boundary.zip'
provincial.url <- 'https://data.humdata.org/dataset/137532ad-f4a9-471e-8b5f-d1323df42991/resource/c84c93d7-7730-4b97-8382-4a783932d126/download/provincial-constituency-boundary.zip'
download.file(national.url, destfile = file.path(tempdir(), 'national.zip'))
download.file(provincial.url, destfile = file.path(tempdir(), 'provincial.zip'))
# Unzip shapefiles
unzip(file.path(tempdir(), 'national.zip'), exdir = file.path(tempdir(), 'national'))
unzip(file.path(tempdir(), 'provincial.zip'), exdir = file.path(tempdir(), 'provincial'))
# Read map files
NA_map <- st_read(dsn = file.path(tempdir(), 'national'), layer = "National_Constituency_Boundary")
PA_map <- st_read(dsn = file.path(tempdir(), 'provincial'), layer = "Provincial_Constituency_Boundary")
# Get sparse list representation of intersections
intrs.sgpb <- st_intersects(NA_map, PA_map)
length(intrs.sgpb) # One list element per national constituency
# [1] 273
print(intrs.sgpb[[1]]) # Indices of provnicial constituencies intersecting with first national constituency
# [1] 506 522 554 555 556
print(PA_map$PROVINCE[intrs.sgpb[[1]]])[1] # Name of first province intersecting with first national constituency
# [1] KHYBER PAKHTUNKHWA

Raster in R: Create Zonal Count of specific cell values without reclassification

I would like to know if there is way to create zonal statistics for RasterLayerObjects, specifically the count of a given cell value (e.g. a land-use class) in R without having to reclassify the whole raster. The solution should be memory efficient in order to work on large raster files i.e. no extraction of the values into a matrix in R is desired.
Below an example of how I handle it until now. In this case I reclassify the original raster to hold only 1 for the value of interest and missings for all other values.
My proposed solution creates both, redundant data and additional processing steps to get me to my initial goal. I thought something like zonal(r1[r1==6],r2,"count") would work but obviously it does not (see below).
# generate reproducible Raster
library("raster")
## RASTER 1 (e.g. land-use classes)
r1 <- raster( crs="+proj=utm +zone=31")
extent(r1) <- extent(0, 100, 0, 100)
res(r1) <- c(5, 5)
values(r1) <- sample(10, ncell(r1), replace=TRUE)
plot(r1)
## RASTER 2 (containing zones of interest)
r2 <- raster( crs="+proj=utm +zone=31")
extent(r2) <- extent(0, 100, 0, 100)
res(r2) <- c(5, 5)
values(r2) <- c(rep(1,100),rep(2,100),rep(3,100),rep(4,100))
plot(r2)
# (1) ZONAL STATISTICS
# a. how many cells per zone (independent of specific cell value)
zonal(r1,r2,"count")
# b. how many cells per zone of specific value 6
zonal(r1[r1==6],r2,"count")
# -> fails
# with reclassification
r1.reclass<-
reclassify(r1,
matrix(c(1,5,NA,
5.5,6.5,1, #class of interest
6.5,10,NA),
ncol=3,
byrow = T),
include.lowest=T # include the lowest value from the table.
)
zonal(r1.reclass,r2,"count")
you can use raster::match.
zonal(match(r1, 6),r2, "count")
As you can see from plot(match(r1, 6)), it only returns raster cells which hold the desired value(s). All other cells are NA.
r1==6 as used in your try unfortunately returns a vector and therefore cannot be used in focal anymore.

why is subscript out of bounds in adehabitatHR getvertices (puechabonsp data)

Hope someone can help, I have a large dataset from which I have generated 10 estUD's with the same grid and h value=200. Here is a subset of just two of them. I can visulise them using image(liud) but when I try to use the fuction getverticeshr I get an error that the subscript is out of bounds. I have tried changing the grid and the value of h to no avail. I wonder if it something to do with the way I am combining them into and out of a list?
library(adehabitatHR)
#combine all Ud's into one dataset
liud <- list(Y2889a, Y2889b)
class(liud) <- "estUDm"
image(liud)#plot all est ud's
v<-getverticeshr(liud)
I have reproduced the error with the puechabonsp dataset below
library(adehabitatHR)
## Load the data
data(puechabonsp)
loc <- puechabonsp$relocs
## have a look at the data
head(as.data.frame(loc))
## the first column of this data frame is the ID
## Estimation of UD for each of the animals (two here as an example)
udBrock <- kernelUD(loc[as.data.frame(loc)[,1]=="Brock",], grid=200)
udCalou <- kernelUD(loc[as.data.frame(loc)[,1]=="Calou",], grid=200)
liud <- list(udBrock, udCalou)
class(liud) <- "estUDm"
image(liud)#plot all est ud's
v<-getverticeshr(liud)
Thanks for your comment Chris, I should have explained my dataset. I have 10 animals and have generated random points based on recorded polygons for each animal. I have run this 100 times per animal. My aim is to generate a mean utilized distribution for each animal based on all 100 runs. so far I have used this code:
xybat <- subset(bat.master, bat.master$id =="Y2889a",select=x:loopno )
#change to spatial points
xy <- xybat[1:2]#first two rows save as coords
df <- xybat[-1:-3]#remove unneded columns for ud
SPDF <- SpatialPointsDataFrame(coords=xy, data=df)#combine df and xy
udHR <- kernelUD(SPDF, h = 200, grid=habitat, kernel=epa)
## I would proceed using the raster packages
ud <- stack(lapply(udHR, raster))
## You can now check the first one
plot(ud[[1]])
## or at all of them
#plot(ud)
## take the mean
plot(udm <- mean(ud))
## now you can either proceed in raster and calculate your isopleths or convert it back to a estUD, this is a bit of a hack and not the nicest way to do it
Y2889a<- udHR[[1]]
Y2889a#grid <- as(udm, "GridTopology")
so if I follow your suggestion and run the kernelud function on the whole dataset I still need to stack each of the animal's ud's separatley and then combine them into an EstUDm and I am back to the same problem. I hope you can help me come up with a solution.
Best wishes,
Simone
This basically just a generalisation for multiple animals of my previouse answer, maybe it is useful:
library(adehabitatHR)
library(raster)
## generate some dummy data for 15 animals, each with 10 replications)
pts <- replicate(15, SpatialPointsDataFrame(coords=cbind(rnorm(1000), rnorm(1000)),
data=data.frame(id=rep(1:10, each=100))))
## generate uds
uds <- lapply(pts, function(x) kernelUD(x, h = "href", same4all = TRUE, kern = "bivnorm"))
udsr <- lapply(uds, function(x) stack(lapply(x, raster)))
## You can now check the first one
plot(udsr[[1]][[1]])
## or at all 10 uds of the first animal
plot(udsr[[1]])
## take the mean
udsm <- lapply(udsr, mean)
## go back to adehabitat
for (i in seq_along(udsm)) {
uds[[i]] <- uds[[i]][[1]]
uds[[i]]#grid <- as(udsm[[i]], "GridTopology")
}
## now you can work with udHR as if it were a HR estimate
iso95 <- lapply(uds, getverticeshr, percent=95)
## plot first animal
plot(iso95[[1]])
## plot second animal
plot(iso95[[2]])

How to combine estUD's into single layer

I have some data for 10 animals from which I have generated some random points. Each data set I have replicated 100 times. Below I have separated out of the data 1 animal and generated kernelUD's for each rep. I would now like someway to combine the UD's to be able to produce a summed estimated density map which I can then go on and measure 50% and 90% home ranges along with other metrics.
bat.master <- read.csv("C:/Users/Sim/Dropbox/Wallington GIS/bat.master")
names(bat.master)
# subset data frame to 1st bat only
bat1 <- bat.master$id="Y2889a"
xybat1 <- subset(bat.master, bat.master$id == "Y2889a",select=x:loopno )
# change to spatial points
xy <- xybat1[1:2] # first two rows save as coords
SPDF <- SpatialPointsDataFrame(coords=xy, data=df) # combine df and xy
ud1 <- kernelUD(SPDF, h = "href", same4all = TRUE, kern = "bivnorm")
Not sure if I understood your question right, but you could try something like this:
library(adehabitatHR)
## generate some dummy data
SPDF <- SpatialPointsDataFrame(coords=cbind(rnorm(1000), rnorm(1000)),
data=data.frame(id=rep(1:10, each=100)))
udHR <- kernelUD(SPDF, h = "href", same4all = TRUE, kern = "bivnorm")
## I would proceed using the raster packages
library(raster)
ud1 <- stack(lapply(udHR, raster))
## You can now check the first one
plot(ud1[[1]])
## or at all of them
plot(ud1)
## take the mean
plot(udm <- mean(ud1))
## now you can either proceed in raster and calculate your isopleths or convert it back to a estUD, this is a bit of a hack and not the nicest way to do it
udHR <- udHR[[1]]
udHR#grid <- as(udm, "GridTopology")
## now you can work with udHR as if it were a HR estimate
plot(getverticeshr(udHR, percent=95))
plot(getverticeshr(udHR, percent=50), add=TRUE)

Resources