SO-gurues!
I am trying to count the densities of surviving units in different gridcells.
I have two shapefiles with points from the two survey periods in question (one before and one after the mortality event). What I intend is to see whether there is a difference in survival rates and link the proportion of survival to any climatic variable obtained from the raster value of the desired grid. In the code snippet below I have created some random raster and shapefiles.
packs = c('raster', 'rgdal', 'spatstat', 'sp' ,'dplyr')
sapply(packs, FUN = 'require', character.only = TRUE)
xy <- matrix(rnorm(1024),32,32) #Creating the desired raster
image(xy)
rast <- raster(xy)
extent(rast) <- c(36,37,-3,-2)
projection(rast) <- CRS("+proj=longlat +datum=WGS84")
points <- runifpoint(n =4000, c(36,37,-3,-2)) # Creating the points
x <- points$x
y <- points$y
values <- c(rep(1, 900), rep(0, 3100))
xy <- cbind(x, y)
points <- cbind(x, y, values)
points <- data.frame(points)
shp <- SpatialPointsDataFrame(coords = xy, data = data.frame(values) ) # creating shpfiles
projection(shp) <- CRS("+proj=longlat +datum=WGS84")
subs <- filter(points, values == 1)
suxy <- select(subs, x,y)
shpsub <- SpatialPointsDataFrame(coords = suxy, data = data.frame(subs$values)) # creating shpfiles
projection(shpsub) <- CRS("+proj=longlat +datum=WGS84")
When I attempt to extract the points I use the following lines of code
shp <- spTransform(shp, projection(rast)) # make sure they have same transformation
shpsub <- spTransform(shpsub, projection(rast))
XY <- xyFromCell(rast, cell = 1:ncell(rast))
v <- as.data.frame(rast) #Extract values from raster
XY <- data.frame(XY, v) # Creating a data frame containing coord., cellno and value
XY$cell <- c(1:ncell(rast))
cells <- cellFromXY(rast,shp) # find which cells the points are in
cells <- rle(cells) # returns a value and a length, fast for counting
cellsfound <- cellFromXY(rast,shpsub)
cellsfound <- rle(cellsfound)
Proportion <- data.frame(cell = cells$values, shp = cells$lengths)
test <- data.frame(cell = rep(NA,NROW(Proportion)), shpsub = rep(NA, NROW(Proportion)))
test$cell <- c(cellsfound$values, rep(NA, nrow(test) - length(cellsfound$values)))
test$shpsub <- c(cellsfound$lengths, rep(NA, NROW(test) - length(cellsfound$lengths)))
Proportion <- full_join(Proportion, test, by = "cell")
test.Proportion <- mutate(Proportion, Proportion = shpsub/shp) #Calculating Proportion
XY <- left_join(XY, test.Proportion, by = "cell") # Adding Proportion to coord and cell no.
XY.m <- summarise(XY, )
XY <- na.omit(XY) ; XY <- XY[,-4]
As I see it. Using rle() returns the same cells multiple times instead of counting the no of points within each individual cell as was my intention. Can anyone please explain me how to do this in a way that retrieves the information on the number of occurrences in the individual cells?
Related
I have this dataframe in R:
library(raster)
# create a random dataframe with yearly values for each column
df <- data.frame(year = seq(1981,2012), a = runif(32,1,33), b = rnorm(32, 6, 18), c = rnorm(32, 3, 12),
d = rnorm(32, 0, 18))
and then this multilayer raster:
rs <- stack()
for (i in 1:1:32){
xy <- matrix(rnorm(400),20,20)
# Turn the matrix into a raster
rast <- raster(xy)
# Give it lat/lon coords for 20-30°E, 43-49°N
extent(rast) <- c(20,30,43,49)
rs <- addLayer(rs, rast)
}
# create a Z field for raster just created
years <- seq(as.Date("1981-01-01"), as.Date("2012-12-31"), by = "years")
aa <- setZ(rs, years)
names(rs) <- years
My question is: how would it be possible to obtain five rasters representing the correlation (let's say Spearman) between each column in dataframe df and the raster stack rs?
Thank you all for your help!
I am not sure what exactly you want to do. There are 32 values in each column of df, and 32 layers with 400 values in the RasterStack,
Perhaps you are looking for the correlation of the columns in df and the mean value of the layers? That you can do like this:
Your data
set.seed(0)
df <- data.frame(year = seq(1981,2012), a=runif(32,1,33), b=rnorm(32, 6, 18), c=rnorm(32, 3, 12), d=rnorm(32, 0, 18))
r <- raster(nrow=20, ncol=20, ext=extent(20,30,43,49))
rs <- stack(lapply(1:32, function(i) setValues(r, rnorm(400,20,20))))
years <- seq(as.Date("1981-01-01"), as.Date("2012-12-31"), by = "years")
names(rs) <- years
Solution
x <- cellStats(rs, mean)
sapply(2:5, function(i) cor(x, df[,i]))
#[1] 0.123391584 -0.007801092 -0.124336155 0.060774465
Well, I figured out a solution; don't know if is the best but I think is working.
Here is the example for column a from df; I created a dummy raster layer for each row in column a; after that, I used corLocal to have the correlation:
### create a raster layer for each row (year) for column 'a' in df
rs.r <- stack()
library(data.table)
### extract x and y coordinates for raster rs to create a raster stack
cord <- rasterToPoints(rs[[1]], spatial = F)
cord<- cord[,1:2]
head(cord)
### create a raster where each layer is the value in column a from df
year.s <- unique(df$year)
for (i in 1:length(df$year)){
print(df$year[i])
re <- df$a[df$year==year.s[i]]
c <- data.table(x = cord[,1], y = cord[,2], tt = re)
m <- rasterFromXYZ(c)
crs(m) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0 "
rs.r <- addLayer(rs.r, m)
crs(rs.r) <-" +proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
}
names(rs.r) <- df$year ### set the names for the layers
ext <- extent(rs)
rs.r <- setExtent(rs.r, ext)
rs.r<- projectRaster(rs.r, rs,method = 'ngb')
spplot(corLocal(rs.r, rs, 'spearman'))
Is there a way to generate regularly spaced (e.g., 500 meters apart) points within a polygon using R? I have been trying to use the sp package but can't seem to define a set of points that are spaced a certain distance apart from one another. My aim is to generate the points, then extract their lat/long coordinates into a new dataframe. Any help would be much appreciated! Thanks
Quite straight forward and almost out-of-the-box.
As OP did not share data, buckle up, put your seats in a vertical position and let us fly to Paris. There, we will adapt a geosphere function, and with its help we will divide up Paris' shape into lon / lat coordinates that are 500 meters apart each (vertically and horizontally).
# Load necessary libraries.
library(raster)
library(geosphere)
library(tidyverse)
library(sp)
# This is an adapted version of geosphere's destPoint() function that works with
# changing d (distance).
destPoint_v <- function (x, y, b, d, a = 6378137, f = 1/298.257223563, ...)
{
r <- list(...)$r
if (!is.null(r)) {
return(.old_destPoint(x, y, b, d, r = r))
}
b <- as.vector(b)
d <- as.vector(d)
x <- as.vector(x)
y <- as.vector(y)
p <- cbind(x, y, b, d)
r <- .Call("_geodesic", as.double(p[, 1]), as.double(p[, 2]),
as.double(p[, 3]), as.double(p[, 4]),
as.double(a), as.double(f),
PACKAGE = "geosphere")
r <- matrix(r, ncol = 3, byrow = TRUE)
colnames(r) <- c("lon", "lat", "finalbearing")
return(r[, 1:2, drop = FALSE])
}
# Data can be downloaded from
# http://osm13.openstreetmap.fr/~cquest/openfla/export/communes-20190101-shp.zip
# or
# https://www.data.gouv.fr/en/datasets/decoupage-administratif-communal-francais-issu-d-openstreetmap/
# ("Export simple de janvier 2019 (225Mo)")
# Load shapefile.
# shp <- raster::shapefile("Dropbox/work/crema/communes-20190101-shp/communes-20190101.shp")
# Extract Paris.
paris <- shp[shp$nom == "Paris", ]
# Set distance of points in meters.
dist <- 500
# Extract bounding box from Paris' SpatialPolygonDataFrame.
bbox <- raster::extent(paris)
# Calculate number of points on the vertical axis.
ny <- ceiling(geosphere::distGeo(p1 = c(bbox#xmin, bbox#ymin),
p2 = c(bbox#xmin, bbox#ymax)) / dist)
# Calculate maximum number of points on the horizontal axis.
# This needs to be calculated for the lowermost and uppermost horizontal lines
# as the distance between latitudinal lines varies when the longitude changes.
nx <- ceiling(max(geosphere::distGeo(p1 = c(bbox#xmin, bbox#ymin),
p2 = c(bbox#xmax, bbox#ymin)) / dist,
geosphere::distGeo(p1 = c(bbox#xmin, bbox#ymax),
p2 = c(bbox#xmax, bbox#ymax)) / dist))
# Create result data frame with number of points on vertical axis.
df <- data.frame(ny = 1:ny)
# Calculate coordinates along the vertical axis.
pts <- geosphere::destPoint(p = c(bbox#xmin, bbox#ymin),
b = 0, d = dist * (1:ny - 1))
df$x <- pts[, 1]
df$y <- pts[, 2]
# Add points on horizontal axis.
df <- tidyr::crossing(nx = 1:nx, df)
# Calculate coordinates.
pts <- destPoint_v(df$x, df$y, b = 90, 500 * (df$nx - 1))
# Turn coordinates into SpatialPoints.
pts <- SpatialPoints(cbind(pts[, 1], pts[, 2]), proj4string = CRS(proj4string(paris)))
# Cut to boundaries of Paris.
result <- raster::intersect(pts, paris)
# Plot result.
plot(result)
title("Paris in Points")
Kind of looks like a fish, doesn't it?
Here is a way to do assuming you have a lonlat polygon by first transforming it to a planar crs (not as nifty as Roman's solution with destPoint).
Packages and example data
library(raster)
library(rgdal)
p <- shapefile(system.file("external/lux.shp", package="raster"))[1,]
Transform to planar crs (pick one that matches your data!)
putm <- spTransform(p, "+proj=utm +zone=32 +datum=WGS84")
Create a raster with 500 m resolution, rasterize the polygon and transform to points
r <- raster(putm, res=500)
r <- rasterize(putm, r)
pts <- rasterToPoints(r, spatial=TRUE)
Transform the points to lon/lat and plot the results
pts_lonlat <- spTransform(pts, "+proj=longlat +datum=WGS84")
result <- coordinates(pts_lonlat)
plot(p)
points(result, pch="+", cex=.5)
(looks like an elephant)
I wrote the following script to produce the raw data for plotting the above map. The problem is, for 550,000 data points, this takes about 2 hours to run on a relatively powerful machine. I'm new to R, however, and I'm wondering if there are any optimized functions I can take advantage of?
The basic idea is that, given a set of geospatial data, you split the set into 200 rows, and split each row into a bunch of squares. You then calculate the total of a value in each square in a row. The approach I've taken below is to take the "upper left" point of a square, calculate the latitude/longitude of edges of the square, and exclude all points not in those bounds, and then sum what remains. Is there a better way without using a solution like PostGIS?
all.data <- read.csv("FrederictonPropertyTaxDiffCleanedv3.csv", header=TRUE,
stringsAsFactors=FALSE)
all.data$X <- as.numeric(all.data$X)
all.data$Y <- as.numeric(all.data$Y)
startEnd <- function(lats, lngs) {
# Find the "upper left" (NW) and "bottom right" (SE) coordinates of a set of data.
#
# Args:
# lats: A list of latitude coordinates
# lngs: A list of longitude coordinates
#
# Returns:
# A list of values corresponding to the northwest-most and southeast-most coordinates
# Convert to real number and remove NA values
lats <- na.omit(as.numeric(lats))
lngs <- na.omit(as.numeric(lngs))
topLat <- max(lats)
topLng <- min(lngs)
botLat <- min(lats)
botLng <- max(lngs)
return(c(topLat, topLng, botLat, botLng))
}
startEndVals <- startEnd(all.data$Y, all.data$X)
startLat <- startEndVals[1]
endLat <- startEndVals[3]
startLng <- startEndVals[2]
endLng <- startEndVals[4]
num_intervals = 200.0
interval <- (startEndVals[1] - startEndVals[3]) / num_intervals
# testLng <- -66.6462379307115
# testLat <- 45.9581234392
# Prepare the data to be sent in
data <- all.data[,c("Y", "X", "levy2014_ha")]
sumInsideSquare <- function(pointLat, pointLng, interval, data) {
# Sum all the values that fall within a square on a map given a point,
# an interval of the map, and data that contains lat, lng and the values
# of interest
colnames(data) <- c("lat", "lng", "value")
# Data east of point
data <- data[data$lng > pointLng,]
# Data west of point + interval
data <- data[data$lng < pointLng + interval,]
# Data north of point + interval (down)
data <- data[data$lat > pointLat - interval,]
# Data south of point
data <- data[data$lat < pointLat, ]
# Clean remaining data
data <- na.omit(data)
return(sum(data$value))
}
# Debugging
# squareSumTemp <- sumInsideSquare(testLat, testLng, interval, data)
# Given a start longitude and an end longitude, calculate an array of values
# corresponding to the sums for that latitude
calcSumLat <- function(startLng, endLng, lat, interval, data) {
row <- c()
lng <- startLng
while (lng < endLng) {
row <- c(row, sumInsideSquare(lat, lng, interval, data))
lng <- lng + interval
}
return(row)
}
# Debugging
# rowTemp <- calcSumLat(startLng, endLng, testLat, interval, data)
# write.csv(rowTemp, file = "Temp.csv", row.names = FALSE)
# Get each line of data to plot
lat <- startLat
rowCount <- 1
all.sums <- list()
while (lat > endLat) {
col <- calcSumLat(startLng, endLng, lat, interval, data)
all.sums[[as.character(rowCount)]] <- col
lat <- lat - interval
rowCount <- rowCount + 1
}
# Convert to data frame
all.sums.frame <- data.frame(all.sums)
# Save to disk so I don't have to run it again
write.csv(all.sums.frame, file = "Levy2014Sums200.csv", row.names = FALSE)
Ended up finding a solution to this myself. The key to it was using the foreach package with the doParallel package so it could take advantage of all the cores on my computer. There is a great guide on it here: http://www.r-bloggers.com/a-brief-foray-into-parallel-processing-with-r/
I want to calculate the distance between two points in two different datasets. I don't want to calculate the distance between all points - just to the nearest point of datasetB.
Some examples:
Dataset A - Persons
http://pastebin.com/HbaeqACi
Dataset B - Waterfeatures:
http://pastebin.com/UdDvNtHs
Dataset C - City:
http://pastebin.com/nATnkMRk
So...I want to calculate the distance of each person to the nearest waterfeature point.
I've already tried to work with the rgeos package and after struggling with some projections errors, I've got it to work. But this calculate (at least I assume it) all distances to every point, but, as already said, I've only interested in the distance to the nearest waterfeature point.
# load csv files
persons = read.csv("persons.csv", header = TRUE)
water = read.csv("water.csv", header = TRUE)
# change dataframes to SpatialPointDataFrame and assign a projection
library(sp)
library(rgeos)
coordinates(persons) <- c("POINT_X", "POINT_Y")
proj4string(persons) <- CRS("+proj=utm +datum=WGS84")
coordinates(water) <- c("POINT_X", "POINT_Y")
proj4string(water) <- CRS("+proj=utm +datum=WGS84")
# use rgoes package to calculate the distance
distance <- gDistance(persons, water, byid=TRUE)
# works, but calculates a huge number of distances
Is there any parameter, which I've missed. Or do I need to use another package or function? I've also looked at spatstat, which is able to calculate the distance to the nearest neighbor, but not of two different datasets: http://hosho.ees.hokudai.ac.jp/~kubo/Rdoc/library/spatstat/html/nndist.html
Edit:
The complete R-Script including plotting of the datasets:
library(RgoogleMaps)
library(ggplot2)
library(ggmap)
library(sp)
library(fossil)
#load data
persons = read.csv("person.csv", header = TRUE, stringsAsFactors=FALSE)
water = read.csv("water.csv", header =TRUE, stringsAsFactors=FALSE)
city = read.csv("city.csv", header =TRUE)
# plot data
persons_ggplot2 <- persons
city_ggplot2 <- city
water_ggplot2 <- water
gc <- geocode('new york, usa')
center <- as.numeric(gc)
G <- ggmap(get_googlemap(center = center, color = 'bw', scale = 1, zoom = 11, maptype = "terrain", frame=T), extent="device")
G1 <- G + geom_point(aes(x=POINT_X, y=POINT_Y ),data=city, shape = 22, color="black", fill = "yellow", size = 4) + geom_point(aes(x=POINT_X, y=POINT_Y ),data=persons, shape = 8, color="red", size=2.5) + geom_point(aes(x=POINT_X, y=POINT_Y ),data=water_ggplot2, color="blue", size=1)
plot(G1)
#### calculate distance
# Generate unique coordinates dataframe
UniqueCoordinates <- data.frame(unique(persons[,4:5]))
UniqueCoordinates$Id <- formatC((1:nrow(UniqueCoordinates)), width=3,flag=0)
# Generate a function that looks for the closest waterfeature for each id coordinates
NearestW <- function(id){
tmp <- UniqueCoordinates[UniqueCoordinates$Id==id, 1:2]
WaterFeatures <- rbind(tmp,water[,2:3])
tmp1 <- earth.dist(WaterFeatures, dist=TRUE)[1:(nrow(WaterFeatures)-1)]
tmp1 <- which.min(tmp1)
tmp1 <- water[tmp1,1]
tmp1 <- data.frame(tmp1, WaterFeature=tmp)
return(tmp1)
}
#apply to each id and the merge
CoordinatesWaterFeature <- ldply(UniqueCoordinates$Id, NearestW)
persons <- merge(persons, CoordinatesWaterFeature, by.x=c(4,5), by.y=c(2,3))
What about writing a function that looks for the nearest waterfeature for every person?
#requires function earth.dist from "fossil" package
require(fossil)
#load data
persons = read.csv("person.csv", header = TRUE, stringsAsFactors=FALSE)
water = read.csv("water.csv", header =TRUE, stringsAsFactors=FALSE)
#Generate unique coordinates dataframe
UniqueCoordinates <- data.frame(unique(persons[,4:5]))
UniqueCoordinates$Id <- formatC((1:nrow(UniqueCoordinates)), width=3,flag=0)
#Generate a function that looks for the closest waterfeature for each id coordinates
NearestW <- function(id){
tmp <- UniqueCoordinates[UniqueCoordinates$Id==id, 1:2]
WaterFeatures <- rbind(tmp,water[,2:3])
tmp1 <- earth.dist(WaterFeatures, dist=TRUE)[1:(nrow(WaterFeatures)-1)]
tmp1 <- min(tmp1)
tmp1 <- data.frame(tmp1, WaterFeature=tmp)
return(tmp1)
}
#apply to each id and the merge
CoordinatesWaterFeature <- ldply(UniqueCoordinates$Id, NearestW)
persons <- merge(persons, CoordinatesWaterFeature, by.x=c(4,5), by.y=c(2,3))
NOTE: I've added a stringsAsFactors parameter to the original read.csv , it make the merging easier at the end
NOTE:Column tmp1 notes the number of METERS to the nearest water feature
Maybe I'm a little too late, but you can use spatstat to compute distances between two different datasets. The command is nncross. The arguments you have to use are two objects of type ppp, which you can create using the as.ppp() function.
We have a raster which represents the ordinal date corresponding to the start of growing season. That is, each pixel value in the raster lies between 1:365, representing the ordinal date.
I have also calculated cumulative growing degree days for all 365 days in the corresponding year. These data are loaded into R as a raster stack with 365 layers.
My goal is to randomly sample geographic locations on the start of growing season layer. I then hope to extract the value of cumulative growing degree days from those same coordinates, but only from the growing degree days stack's layer which corresponds to the start of season pixel value.
For example, if the start of season at a given pixel was the 100th day of the year, I would like to extract the growing degree days from that location on the 100th day of the year (nlayers = 100).
I have been attempting to write a function to accomplish this, but I can't seem to get it to work right. I would like to end up with a data frame or matrix showing my x location, y location, start of season day, and GDD for that day. Instead of many GDD values in one column, I get many columns of one GDD value.
It seems the problem is in my use of extract. I've experimented with the arguments nl, layer, and indexing the x argument with [[]]. They seem to produce the same result. Here's a simplified code with only 5 days to consider, and the function I am trying to construct.
Any help/suggestions is appreciated!
#============================================================
library(raster)
SOST <- raster()
SOST[] <- 1:5
r1 <- r2 <- r3 <- r4 <- r5 <- raster()
r1[] <- 10
r2[] <- 20
r3[] <- 30
r4[] <- 40
r5[] <- 50
GDD <- stack(r1,r2,r3,r4,r5)
getGDD <- function(sost, gdd, n){set.seed(232)
samp <- sampleRandom(sost, n, xy = TRUE,
na.rm = TRUE)
df <- data.frame('x'=as.numeric(), 'y'=
as.numeric(), 'SOST'=as.numeric(),
'GDD'=as.numeric())
df.temp <- data.frame('x' = samp[1:n,1], 'y' =
samp[1:n,2], 'SOST' = samp[,3],'GDD' =
extract(gdd, samp[1:n,1:2], nl = samp[1:n,3]))
df <- rbind(df, df.temp)
return(df)
}
getGDD(sost = SOST, gdd = GDD, n = 5)
It doesn't seem like this gathered a lot of attention, but I was able to solve it. Using the sample posted in the original question, the easiest solution is the stackSelect function. This was pointed out to me by Robert Hijmans on R-sig-geo.
x <- stackSelect(GDD, SOST)
set.seed(232)
samp <- sampleRandom(SOST, 5, xy = TRUE, na.rm = TRUE)[, -3]
extract(x, samp)
This works great if your data have the same extent and resolution. However, I failed to mention and include that my data do not align perfectly. Thus, as far as I know, I still need to create a function. With a little more thought, I was able to come up with the following example and function and solve the problem.
library(raster)
#SOST and GDD simulations with same ncell and extents as actual data:
SOST <- raster(nrow = 3991, ncol = 3025, xmn = 688635, xmx = 779385,
ymn = 4276125, ymx = 4395855, crs = "+proj=utm +zone=11 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0")
SOST[] <- 1:5
r1 <- r2 <- r3 <- r4 <- r5 <- raster(nrow = 3951, ncol = 2995, xmn = 688620.2, xmx = 779377.8, ymn = 4276121, ymx = 4395848, crs = "+proj=utm +zone=11 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0")
r1[] <- 10
r2[] <- 20
r3[] <- 30
r4[] <- 40
r5[] <- 50
GDD <- stack(r1,r2,r3,r4,r5)
getGDD <- function(sost, gdd, n){
set.seed(232)
samp <- sampleRandom(sost, size = n, xy = TRUE)
extr <- NULL
for(i in 1:n){
extr[i] <- extract(gdd[[samp[i,3]]], cbind(as.matrix(samp[i,1]),
as.matrix(samp[i,2])))
}
out <- data.frame(x = samp[,1], y = samp[,2], 'SOST' = samp[,3], 'GDD' = extr)
return(out)
}
test <- getGDD(sost = SOST, gdd = GDD, n = 5)
test