Error when using Aggregate function in stars raster extract - r

I'm trying to extract the mean of the raster values for each polygon in a shapefile using stars. Unfortunately, I'm getting the following error when executing it
library(sf)
library(exactextractr)
library(stars)
SA2_data <- st_read("/tmp/SA2_2016_GDA94_data/SA2_2016_AUST.shp") # SA2 regions for australia
SA2_data <- SA2_data[SA2_data$STE_CODE16 == '1',] # filtering the state of NSW
SA2_data <- SA2_data %>% dplyr::filter(!st_is_empty(.)) # filter out empty geometries
current_SOC <- read_stars("/tmp/current_soc_june2021_data/Current_SOC30_mean_June2021a.tif") #importing carbon stocks
SA2_data <- st_transform(SA2_data, crs = st_crs(current_SOC)) ## setting the same CRS in both files
### Extracting the sum of current SOC mean for each sa2 code
data_fill <- NULL
data_mean <- SA2_data %>%
st_drop_geometry() %>%
dplyr::select(SA2_MAIN16)
for (i in 98){
#i <-1
data <- data_mean$SA2_MAIN16[i]
extract_current <- aggregate(current_SOC, SA2_data[i,], sum, na.rm = TRUE, exact = TRUE)
extract_current <- extract_current %>%
st_as_sf() %>%
st_drop_geometry()
extract_current <- as.data.frame(extract_current)
data = cbind(data[1], extract_current)
data_fill <- rbind(data_fill, data)
print(i)
}
At some point (polygon number 98 in the loop, I receive the following error
Error in CPL_read_gdal(as.character(x), as.character(options), as.character(driver), :
negative length vectors are not allowed
I was trying at the beginning to do it for all the SA2_data in one run (without any loop) and received the same message. I've read that the processor could be running out of memory, but it doesn't make much sense since the polygon is not that big.
The data sources for these files can be found here:
sa2 shape file https://www.abs.gov.au/AUSSTATS/abs#.nsf/DetailsPage/1270.0.55.001July%202016?OpenDocument
current_SOC https://datasets.seed.nsw.gov.au/dataset/soil-carbon-sequestration-potential-with-enhanced-vegetation-cover-over-nsw
Appreciate any comment on this. Thanks!

Related

Filling road network data gaps

I have a motorway network with count points that can be matched to road links. However, they only match around half the osm links. The network is uni directional and it should be possible to assign data from joining links to the missing links.
I currently have a rather ugly and long solution based on a WHILE loop that sequentially fills the connecting links. However, I think a more elegant solution might be possible by using an sfnetwork or spatial lines network. The packages stplanr, sfnetwork and dodger closely match what I want to do, but all seem to focus on routing and origin destination data.
Below is a reproducible example that uses a small area of UK motorway network and removes a random sample of half the links and generates flow and speed data for the half remaining.
How do I fill in the missing links with data from either end of the missing links?
library(tidyverse)
library(mapview)
library(sf)
library(osmdata)
## define area to import osm data
x_max <- -2.31
x_min <- -2.38
y_max <- 51.48
y_min <- 51.51
##create a data frame to setup a polygon generation
df <- data.frame(X = c(x_min, x_max, x_max, x_min),
Y = c(y_max, y_max, y_min, y_min))
##generate a polygon of the area
rd_area <- df %>%
st_as_sf(coords = c("X", "Y"), crs = 4326) %>%
dplyr::summarise(geometry = st_combine(geometry)) %>%
st_cast("POLYGON")
##get osm geometry for motorway links for defined area
x <- opq(bbox = rd_area) %>%
add_osm_feature(key = c('highway'), value = c('motorway',
'motorway_link')) %>% osmdata_sf()
## extract line geometry, generate a unique segment ID and get rid of excess columns
rdz <- x$osm_lines %>%
mutate(seg_id = paste0("L", sprintf("%02d", 1:NROW(bicycle)))) %>%
select(seg_id)
## pretend we only have traffic counts and speeds for half the links
osm_dat <- rdz[c(3,4,5,7,11,14,15),]
## links without data
osm_nodat <- filter(rdz, !seg_id %in% osm_dat$seg_id)
## visualise links with data and without
mapview(osm_dat, color = "green")+mapview(osm_nodat, color = "red")
## make up some data to work with
pretend_counts <- st_centroid(osm_dat)
## assign some random annual average daily flow and speed averages
pretend_counts$aadt <- sample(200:600, nrow(pretend_counts))
pretend_counts$speed <- sample(40:80, nrow(pretend_counts))
Here is one quick and elegant solution from the Cyipt project https://github.com/cyipt/cyipt/blob/master/scripts/prep_data/get_traffic.R
It uses the code from the get.aadt.class function and uses Voroni polygons to give the flows and speeds to the nearest roads. However, it doesn't distribute, i.e. split the flows where one links meets two and it sometimes results in opposing directions having the same flows and speeds.
library(dismo) ## dismo package for voroni polygon generation
#Make voronoi polygons and convert to SF
voronoi <- dismo::voronoi(x = st_coordinates(pretend_counts))
voronoi <- as(voronoi, "sf")
st_crs(voronoi) <- st_crs(pretend_counts)
#Find Intersections of roads with vernoi polygons
inter <- st_intersects(osm_nodat,voronoi)
#Get aadt and ncycle values
osm_nodat$aadt <- as.numeric(lapply(1:nrow(osm_nodat),function(x){as.numeric(round(mean(pretend_counts$aadt[inter[[x]]])),0)}))
osm_nodat$speed <- as.numeric(lapply(1:nrow(osm_nodat),function(x){as.numeric(round(mean(pretend_counts$speed[inter[[x]]])),0)}))
#Remove Unneded Data
all_osm <- as.data.frame(rbind(osm_dat, osm_nodat))
st_geometry(all_osm) <- all_osm$geometry
flows <- dplyr::select(all_osm, aadt)
mapview(flows)

sp::over(). Does the dot belong to one of the polygons identified with an OGRGeoJSON file?

I'm trying to get a boolleans vector, where for example, v[i] =1 tells me if an i-th point (latitude longitude pair, present inside a train dataframe) falls within one of the geographical areas identified by an OGRGeoJSON file.
The OGR file is structured roughly like this:
District 1: 24 polygonal
District 2: 4 polygonal
District 3: 27 polygonal
District 4: 18 polygonal
District 5: 34 polygonal
That's what I tried to do.
However, the results obtained are not correct because the polygonal that is generated is a mix of all the various areas present in the OGR file.
library(rgdal)
library(httr)
library(sp)
r <- GET('https://data.cityofnewyork.us/api/geospatial/tqmj-j8zm?method=export&format=GeoJSON')
nyc_neighborhoods <- readOGR(content(r,'text'), 'OGRGeoJSON', verbose = F)
#New York City polygonal
pol_lat <- c(nyc_neighborhoods_df$lat)
pol_long <- c(nyc_neighborhoods_df$long)
xy <- cbind(pol_lat, pol_long)
p = Polygon(xy)
ps = Polygons(list(p),1)
pol = SpatialPolygons(list(ps))
#Points to analyse (pair of coordinates)
ny_lat <- c(train$pickup_latitude, train$dropoff_latitude)
ny_long <- c(train$pickup_longitude, train$dropoff_longitude)
ny_coord <- cbind(ny_lat, ny_long)
pts <- SpatialPoints(ny_coord)
#Query: Does the point to analyze fall in or out NYC?
over(pts, pol, returnList = TRUE)
How can I fix this to get the correct result?
sp is an older package which is being phased out in favor of the newer "Simple Features" sf package. Let me know if you are open to using the pipe operator %>% from the magrittr package, as it works nicely with the sf package (as does dplyr and purrr).
Using sf, you could do:
library(sf)
# Replace this with the path to the geojson file
geojson_path <- "path/to/file.geojson"
boroughs <- sf::st_read(dsn = geojson_path, stringsAsFactors = FALSE)
Now making a very simple spatial point object to stand in for the "trains" data.
# Make test data.frame
test_df <-
data.frame(
# Random test point I chose, a couple of blocks from Central Park
a = "manhattan_point",
y = 40.771959,
x = -73.964128,
stringsAsFactors = FALSE)
# Turn the test_df into a spatial object
test_point <-
sf::st_as_sf(
test_df,
# The coords argument tells the st_as_sf function
# what columns store the longitude and latitude data
# which it uses to associate a spatial point to each
# row in the data.frame
coords = c("x", "y"),
crs = 4326 # WGS84
)
Now we are ready to determine what polygon(s) our point falls in:
# Get the sparse binary predicate. This will give a list with as
# many elements as there are spatial objects in the first argument,
# in this case, test_point, which has 1 element.
# It also has attributes which detail what the relationship is
# (intersection, in our case)
sparse_bin_pred <- sf::st_intersects(test_point, boroughs)
# Output the boro_name that matched. I think the package purrr
# offers some more intuitive ways to do this, but
lapply(
sparse_bin_pred,
function(x) boroughs$boro_name[x]
)
That last part outputs:
[[1]]
[1] "Manhattan"

Extract WORLDCLIM data using R for a single country

I want to extract world climate data for minimum and maximum temperature for only one country India using R and save it as a data set (to use with my own data-set that contains crop yields at the district level).
I have gone through several posts and can see that this can be done easily in R, however the posts that I have tried to follow are a bit different in terms of the commands or sequences and I am getting confused.
(https://gis.stackexchange.com/questions/259478/worldclim-data-na-for-my-coordinates, https://gis.stackexchange.com/questions/227585/using-r-to-extract-data-from-worldclim
What I have tried to use is as follows.
library(raster)
library(sp)
r<- getData('CMIP5', var='tmin', res=10, rcp=45, model='HE', year=70)
r <- r[[c(1,12)]]
values <- extract(r,points)
df <- cbind.data.frame(coordinates(points),values)
head(df)
However, I can run only the first two lines and the line values
<- extract(r,points) gives the error Error in (function (classes, fdef, mtable) :
unable to find an inherited method for function ‘extract’ for signature ‘"RasterStack", "function"’
Any suggestions?
Here is the solution for it
library(raster)
library(sp)
library(rgeos)
library(rgdal)
library(sf)
r<- getData('CMIP5', var='tmin', res=10, rcp=45, model='HE', year=70)
#Using Zonal statistics
poly <- shapefile("Provide_your_drive_name" e.g. "F:\\Kriging in R\\India Shape files\\2011_Dist.shp")
plot(poly)
#This will take considerable time
ex <- extract(r, poly, fun='mean', na.rm=TRUE, df=TRUE, weights = TRUE)
write.csv(cbind(poly$DISTRICT,ex),"Worldclim.csv", row.names = F)
# using centroids
nc <- st_read(dsn="Provide_your_drive_name" e.g. "F:\\Kriging in R\\India Shape files", layer="2011_Dist")
# just view the attributes & first 6 attribute values of the data
head(nc)
sp_cent <- gCentroid(as(nc, "Spatial"), byid = TRUE)
values <- extract(r,sp_cent)
write.csv(cbind(as.data.frame(nc$DISTRICT),as.data.frame(values)),"Worldclim_centroids.csv", row.names = F)

Population density within polygons

So, I have some questions regarding the raster package in R. I have a raster with estimated population in each grid point. I also have a shapefile with polygons of regions. I want to find out the coordinates of the neighborhood with the highest population density within each regions. Supose that each neighborhood is a homogeneous square of 5 by 5 grid points.
The following toy example mimics my problem.
library(raster)
library(maptools)
set.seed(123)
data(wrld_simpl)
wrld_simpl <- st_as_sf(wrld_simpl)
contr_c_am <- wrld_simpl %>%
filter(SUBREGION ==13) %>%
filter(FIPS != "MX") %>%
select(NAME)
# Create a raster of population (sorry for the bad example spatial distribution)
r <- raster(xmn=-180, xmx=180, ymn=-90, ymx=90, res=0.1)
values(r) <- runif(ncell(r), 0, 100)
# keep only raster around the region of interest
r_small <- crop(r, extent(contr_c_am))
plot(r_small)
plot(st_geometry(contr_c_am), add = T)
raster_contr_c_am <- rasterize(contr_c_am, r)
raster_contr_c_am is the population grid and the name of the region is saved as an attribute.
Somehow I need to filter only grid points from one region, and probably use some funcion like focal() to find total nearby population.
focal(raster_contr_c_am, matrix(1,5,5),sum, pad = T, padValue = 0)
Then, I need to find which grid point has the highest value within each region, and save it's coordinates.
I hope my explanation is not too confusing,
Thanks for any help!
Here's an example that iterates over the shape defining the region, then uses the raster values within the region and the focal() function to find the maximum.
library(raster)
library(maptools)
library(sf)
library(dplyr)
set.seed(123)
data(wrld_simpl)
wrld_simpl <- st_as_sf(wrld_simpl)
contr_c_am <- wrld_simpl %>%
filter(SUBREGION ==13) %>%
filter(FIPS != "MX") %>%
select(NAME)
# Create a raster of population (sorry for the bad example spatial distribution)
r <- raster(xmn=-180, xmx=180, ymn=-90, ymx=90, res=0.1)
values(r) <- runif(ncell(r), 0, 100)
# keep only raster around the region of interest
r_small <- crop(r, extent(contr_c_am))
raster_contr_c_am <- rasterize(contr_c_am, r_small)
# function to find the max raster value using focal
# in a region
findMax <- function(region, raster) {
tt <- trim((mask(raster, region))) # focus on the region
ff <- focal(tt, w=matrix(1/25,nc=5,nr=5))
maximumCell <- which.max(ff) # find the maximum cell id
maximumvalue <- maxValue(ff) # find the maximum value
maximumx <- xFromCell(ff, maximumCell) # get the coordinates
maximumy <- yFromCell(ff, maximumCell)
# return a data frame
df <- data.frame(maximumx, maximumy, maximumvalue)
df
}
numberOfShapes <- nrow(contr_c_am)
ll <- lapply(1:numberOfShapes, function(s) findMax(region = contr_c_am[s,], raster = r_small))
merged <- do.call(rbind, ll)
maxpoints <- st_as_sf(merged, coords=c('maximumx', 'maximumy'), crs=crs(contr_c_am))
library(mapview) # optional but nice visualization - select layers to see if things look right
mapview(maxpoints) + mapview(r_small) + mapview(contr_c_am)
I've made an sf object so that it can be plotted with the other spatial objects. Using the mapview package, I get this.

Using R intersections to create a polygons-inside-a-polygon key using two shapefile layers

The data
I have two shapefiles marking the boundaries of national and provincial electoral constituencies in Pakistan.
The objective
I am attempting to use R to create a key that will generate a list of which provincial-level constituencies are "contained within" or otherwise intersecting with which national-level constituencies, based on their coordinates in this data. For example, NA-01 corresponds with PA-01, PA-02, PA-03; NA-02 corresponds with PA-04 and PA-05, etc. (The key will ultimately be used to link separate dataframes containing electoral results at the national and provincial level; that part I've figured out.)
I have only basic/intermediate R skills learned largely through trial and error and no experience working with GIS data outside of R.
The attempted solution
The closest solution I could find for this problem comes from this guide to calculating intersection areas in R. However, I have been unable to successfully replicate any of the three proposed approaches (either the questioner's use of a general TRUE/FALSE report on intersections, or the more precise calculations of area of overlap).
The code
# import map files
NA_map <- readOGR(dsn = "./National_Constituency_Boundary", layer = "National_Constituency_Boundary")
PA_map <- readOGR(dsn = "./Provincial_Constituency_Boundary", layer = "Provincial_Constituency_Boundary")
# Both are now SpatialPolygonsDataFrame objects of 273 and 577 elements, respectively.
# If relevant, I used spdpylr to tweak some of data attribute names (for use later when joining to electoral dataframes):
NA_map <- NA_map %>%
rename(constituency_number = NA_Cons,
district_name = District,
province = Province)
PA_map <- PA_map %>%
rename(province = PROVINCE,
district_name = DISTRICT,
constituency_number = PA)
# calculate intersections, take one
Results <- gIntersects(NA_map, PA_map, byid = TRUE)
# this creates a large matrix of 157,521 elements
rownames(Results) <- NA_map#data$constituency_number
colnames(Results) <- PA_map#data$constituency_number
Attempting to add the rowname/colname labels, however, gives me the error message:
Error in dimnames(x) <- dn :
length of 'dimnames' [1] not equal to array extent
Without the rowname/colname labels, I'm unable to read the overlay matrix, and unsure how to filter them so as to produce a list of only TRUE intersections that would help make a NA-PA key.
I also attempted to replicate the other two proposed solutions for calculating exact area of overlap:
# calculate intersections, take two
pi <- intersect(NA_map, PA_map)
# this generates a SpatialPolygons object with 273 elements
areas <- data.frame(area=sapply(pi#polygons, FUN = function(x) {slot(x, 'area')}))
# this calculates the area of intersection but has no other variables
row.names(areas) <- sapply(pi#polygons, FUN=function(x) {slot(x, 'ID')})
This generates the error message:
Error in `row.names<-.data.frame`(`*tmp*`, value = c("2", "1", "4", "5", :
duplicate 'row.names' are not allowed
In addition: Warning message:
non-unique value when setting 'row.names': ‘1’
So that when I attempt to attach areas to attributes info with
attArrea <- spCbind(pi, areas)
I get the error message
Error in spCbind(pi, areas) : row names not identical
Attempting the third proposed method:
# calculate intersections, take three
pi <- st_intersection(NA_map, PA_map)
Produces the error message:
Error in UseMethod("st_intersection") :
no applicable method for 'st_intersection' applied to an object of class "c('SpatialPolygonsDataFrame', 'SpatialPolygons', 'Spatial', 'SpatialPolygonsNULL', 'SpatialVector')"
I understand that my SPDF maps can't be used for this third approach, but wasn't clear from the description what steps would be needed to transform it and attempt this method.
The plea for help
Any suggestions on corrections necessary to use any of these approaches, or pointers towards some other method of figuring this, would be greatly appreciated. Thanks!
Here is some example data
library(raster)
p <- shapefile(system.file("external/lux.shp", package="raster"))
p1 <- aggregate(p, by="NAME_1")
p2 <- p[, 'NAME_2']
So we have p1 with regions, and p2 with lower level divisions.
Now we can do
x <- intersect(p1, p2)
# or x <- union(p1, p2)
data.frame(x)
Which should be (and is) the same as the original
data.frame(p)[, c('NAME_1', 'NAME_2')]
To get the area of the polygons, you can do
x$area <- area(x) / 1000000 # divide to get km2
There are likely to be many "slivers", very small polygons because of slight variations in borders. That might not matter to you.
But another approach could be matching by centroid:
y <- p2
e <- extract(p1, coordinates(p2))
y$NAME_1 <- e$NAME_1
data.frame(y)
Your code isn't self-contained, so I didn't try to replicate the errors you report.
However, getting the 'key' you want is very simple using the sf package (which is intended to supercede rgeos, rgdal and sp in the near future). See here:
library(sf)
# Download shapefiles
national.url <- 'https://data.humdata.org/dataset/5d48a142-1f92-4a65-8ee5-5d22eb85f60f/resource/d85318cb-dcc0-4a59-a0c7-cf0b7123a5fd/download/national-constituency-boundary.zip'
provincial.url <- 'https://data.humdata.org/dataset/137532ad-f4a9-471e-8b5f-d1323df42991/resource/c84c93d7-7730-4b97-8382-4a783932d126/download/provincial-constituency-boundary.zip'
download.file(national.url, destfile = file.path(tempdir(), 'national.zip'))
download.file(provincial.url, destfile = file.path(tempdir(), 'provincial.zip'))
# Unzip shapefiles
unzip(file.path(tempdir(), 'national.zip'), exdir = file.path(tempdir(), 'national'))
unzip(file.path(tempdir(), 'provincial.zip'), exdir = file.path(tempdir(), 'provincial'))
# Read map files
NA_map <- st_read(dsn = file.path(tempdir(), 'national'), layer = "National_Constituency_Boundary")
PA_map <- st_read(dsn = file.path(tempdir(), 'provincial'), layer = "Provincial_Constituency_Boundary")
# Get sparse list representation of intersections
intrs.sgpb <- st_intersects(NA_map, PA_map)
length(intrs.sgpb) # One list element per national constituency
# [1] 273
print(intrs.sgpb[[1]]) # Indices of provnicial constituencies intersecting with first national constituency
# [1] 506 522 554 555 556
print(PA_map$PROVINCE[intrs.sgpb[[1]]])[1] # Name of first province intersecting with first national constituency
# [1] KHYBER PAKHTUNKHWA

Resources