Extracting random points from a raster within a grid cell - r

I would like to get non-NA values extracted from random coordinates of a raster within each grid cell.
An example of a raster
library(raster)
r <- raster(ncol = 10, nrow = 10, xmx = -80, xmn = -150, ymn = 20, ymx = 60)
values(r) <- runif(ncell(r))
An example of a grid
grid <- raster(extent(r))
res(grid) <- 15
proj4string(grid)<- proj4string(r)
gridpolygon <- rasterToPolygons(grid)
plot(r)
plot(gridpolygon, add = T)
How can I extract a value with random coordinates for each raster portions inside each grid cells?
I am really new at this kind of stuff so any suggestions will be very welcome.
Thanks.

You didn't specify all the condition for sampling, so I'm going by some assumptions here.
One can sample a point per grid polygon and extract the value. Here's how you can do it in one go and hope for the best:
# pick random points per each grid cell and plot
set.seed(357)
pickpts <- sapply(gridpolygon#polygons, spsample, n = 1, type = "random")
sapply(pickpts, plot, add = TRUE)
# extract values of raster cells at specified points
sapply(pickpts, FUN = extract, x = r)
Or you can do it in a loop and sample until you get a non-NA value.
N <- length(gridpolygon#polygons)
result <- rep(NA, times = N)
for (i in 1:N) {
message(sprintf("Trying polygon %d", i))
pl <- gridpolygon#polygons[[i]]
candval <- result[i] # start with NA
# sample until you get a non-NA hit
while (is.na(candval)) {
pickpoint <- spsample(pl, n = 1, type = "random")
candval <- extract(x = r, y = pickpoint)
}
result[i] <- candval
}
result
[1] 0.4235214 0.6081435 0.9126583 0.1710365 0.7788590 0.9413206 0.8589753
[8] 0.0376722 0.9662231 0.1421353 0.0804440 0.1969363 0.1519467 0.1398272
[15] 0.4783207

Related

R function to convert polygon (sf, wkt) into mask (matrix, array)

I have an image stored as matrix with grayscale for each pixel.
On this image I use SLIC algorithm to divide it into areas.
So I get a simple feature (sf) with polygons, I am able to extract in well-known-text (wkt).
But what I really need is a matrix/mask (same dimension as my pixel-image-matrix) storing the id of the polygon each pixel belongs to. For example the pixel image[1,2] belongs to polygon 5, then mask[1,2] <- 5.
I add some code to give example of my porblem (for a random "image"):
mat <- array(runif(10000, min=0, max=500), dim=c(100,100))
# SLIC
library(supercells);
library(sf);
library(terra);
# make spatial raster from matrix
raster <- rast(mat);
rasterSLIC <- supercells(raster, k = 50, compactness = 1, dist_fun = "euclidean", avg_fun = "mean");
plot(raster);
plot(st_geometry(rasterSLIC), add = TRUE, lwd = 0.2);
point <- st_cast(rasterSLIC$geometry[2], to="POINT");
coord <- st_coordinates(point);
# what I want:
goal <- array(c(1,1,1,2,2,1,2,3,3), dim=c(3,3));
image(goal);
goal;
I would like to have something that helps me turning coords into such a mask/matrix I gave a small example for in goal.
You can use terra::rasterize
Example data
library(terra)
# polygons
f <- system.file("ex/lux.shp", package="terra")
v <- vect(f)
# arbitrary raster
r <- rast(v, res=.01)
Solution:
rid <- rasterize(v, r, 1:nrow(r))
#or
v$ID <- 1:nrow(v)
rid <- rasterize(v, r, "ID")
Illustration
plot(rid, type="classes")
text(v)
lines(v)
To get the a matrix of the raster values you can do
m <- as.matrix(rid, wide=TRUE)
With your more specific example, you could do
library(supercells);
library(terra)
set.seed(1)
mat <- array(runif(10000, min=0, max=500), dim=c(100,100))
r <- rast(mat)
SLIC <- supercells(r, k = 50, compactness = 1, dist_fun = "euclidean", avg_fun = "mean");
x <- rasterize(SLIC, r, "supercells")
xm <- as.matrix(x, wide=TRUE)
plot(x);
s <- vect(SLIC)
lines(s)

select raster within a specified distance from polygon boundary

I want to select raster cells that are within a certain distance (for e.g. 1 km or 5 km) from the boundary of a polygon. I ultimately want to take an average of only those raster cells that are within the specified distance from the boundary of shapefile inwards.
The way I thought I would approach is to create a negative buffer inwards, and subtract the original polygon and the buffer. Then mask and crop the raster using the new polygon and take the average.
Here's sample data demonstrating what I want to do.
library(raster)
# raster
r <- raster(xmn=1035792, xmx= 1116792, ymn=825303.6, ymx=937803.6, resolution = 12.5,crs = "+init=epsg:3174")
r <- setValues(r, 0)
# polygon
x <- c(1199999, 1080000, 1093067, 1090190, 1087977, 1070419, 1180419)
y <- c(957803.6,937803.6, 894366.9, 872153.9, 853703.0, 825353.6, 805353.6)
poly.lake <- SpatialPolygons(list(Polygons(list(Polygon(data.frame(x,y))), ID = 1)))
r <- mask(r, poly.lake)
r <- crop(r, poly.lake)
plot(poly.lake)
plot(r, add = T)
Instead of taking average of the resulting raster r, I only want to average raster cells which are within a certain specified distance from the boundary.
The example data but using "terra"
library(terra)
r <- rast(xmin=1035792, xmax= 1116792, ymin=825303.6, ymax=937803.6, resolution = 125, crs = "epsg:3174")
values(r) <- 1:ncell(r)
# polygon
x <- c(1199999, 1080000, 1093067, 1090190, 1087977, 1070419, 1180419)
y <- c(957803.6,937803.6, 894366.9, 872153.9, 853703.0, 825353.6, 805353.6)
p <- vect(cbind(x, y), "polygons", crs = "epsg:3174")
r <- mask(r, p)
r <- crop(r, p)
You can now take the internal buffer of p
b <- buffer(p, -10000)
x <- mask(r, b, inverse=TRUE)
global(x, mean,na.rm=T)
# mean
#lyr.1 296549.9
Or you can take both sides like this
bb <- buffer(as.lines(p), 10000)
y <- mask(r, bb)
global(y, mean,na.rm=T)
# mean
#lyr.1 296751.3
So there is a slight difference between these two approaches; I think because the first uses inverse=TRUE; I would go with the second approach.
Your drawing (and Chris' answer) suggests that you only want the distance to the western border. In that case, you can first find the start and end nodes you need (from 2 to 6)
plot(p)
points(p)
text(as.points(p), pos=2)
Select the segments in between these nodes and create a line type SpatVector.
g <- geom(p)
k <- vect(g[2:6,], "lines", crs=crs(p))
lines(k, col="red", lwd=2)
And now do as above.
bk <- buffer(k, 10000)
z <- mask(r, bk)
global(z, mean,na.rm=T)
# mean
#lyr.1 297747
If you wanted to get the part of buffer bk that is inside the original polygon p you can do
bki <- intersect(bk, p)
To complete the plot
polys(bk, lty=3, border=NA, col=adjustcolor("light blue", alpha.f = 0.4))
lines(bki, lty=3)
Finding which segments of a polygon to buffer was what puzzled me, and this seems a decent approach cast_poly_to_subsegments. Taking your poly.lake as poly_sf:
geom <- lapply(
1:(length(st_coordinates(poly_sf)[, 1]) - 1),
function(i) {
rbind(
as.numeric(st_coordinates(poly_sf)[i, 1:2]),
as.numeric(st_coordinates(poly_sf)[i + 1, 1:2])
)
}
+ ) |>
st_multilinestring() |>
st_sfc(crs=st_crs(rt)) |>
st_cast('LINESTRING')
gives us
which is a little surprising, the 'green and red', that I assumed would be 'green'. It is wound clockwise so the desired segments to buffer are 4 & 5.
lns_buf4 <- st_buffer(st_geometry(geom)[4], 1000, singleSide = TRUE)
lns_buf5 <- st_buffer(st_geometry(geom)[5], 1000, singleSide= TRUE)
lns_buf5_neg <- st_buffer(st_geometry(geom)[5], -1000, singleSide= TRUE)
plot(st_geometry(geom), col = c('red', 'yellow', 'blue', 'green'))
plot(lns_buf4, col = 'black', add = TRUE)
plot(lns_buf5, col = 'green', add = TRUE)
plot(lns_buf5_neg, col = 'blue', add = TRUE)
Whether +/-1000 is sufficient is a further intersection test between the buffer poly(s) and the other boundary. If the desired sampling area is not rectangular, steps can be taken to construct a sampling polygon from the buffer and intersection.
#library(lwgeom)
# on poly_sf
new_line <- draw(x = 'line', col ='blue', lwd = 2, n = 10)
lns_buf5_10k_neg <- st_buffer(st_geometry(geom)[5], -10000, singleSide= TRUE)
new_line_sf <- st_as_sf(new_line, crs = st_crs(lns_buf5_10k_neg))
buf5_nline_split <- lwgeom::st_split(lns_buf5_10k_neg, new_line_sf$geometry)
irreg_smp_area <- st_collection_extract(buf5_nline_split)[1]
Though I'm happy to see it all done in terra.

Compile multiple raster extractions to one table

I have 3 rasters in which I have extracted data from using a polyline from a shapefile. Currently, I have the extraction as 3 separate lists. Is there A way I can do an extraction from all three rasters and compile them to one table with different columns for the data from each raster?
This is the current code I am using
Harney_Transects <- readOGR(dsn = ".", layer = "Transect_HN")
MeanTreeHeightHarneyBefore=raster('HN_TrMean_B_Clip.tif')
ScanAngleHarneyBefore= raster('HNScanAngle_B_Clip.tif')
MeanShrubHeightHarneyBefore= raster('HN_MeanShrub_B_Clip.tif')
Extraction_Shrub_Harney= extract(MeanShrubHeightHarneyBefore,Harney_Transects)
Extraction_Tree_Harney= extract(MeanTreeHeightHarneyBefore,Harney_Transects)
Extraction_ScanAngle_Harney= extract(ScanAngleHarneyBefore,Harney_Transects)
In short, you can stack() all the rasters you want to extract data from, and extract from the stack.
Here's a fully reproducible example using two rasters and a SpatialLines object, like you have in your question. Skip to the last code chunk for a direct answer to your question.
library(sp)
library(raster)
# function to generate random rasters
gen_raster <- function(){
r <- raster(nrows = 10, ncols = 10, res = 1,
xmn = 0, xmx = 10, ymn = 0, ymx = 10,
vals = rnorm(100, 5, 1))
return(r)
}
# generate 2 random rasters
r1 <- gen_raster()
r2 <- gen_raster()
# view
par(mfrow = c(1,2))
plot(r1, main = "raster 1"); plot(r2, main = "raster 2")
dev.off()
# generate transect (`SpatialLines` object)
m <- as.matrix(data.frame(x = 5.5, y = seq(0, 10, 1)))
l <- list(Lines(Line(m), "m"))
l <- SpatialLines(l)
# view the transect
plot(r1, main = "raster 1 with transect"); lines(l)
Running extract on the stacked rasters returns a list with a matrix in it. The last thing you'll want is to pull this out as a data.frame, which is a bit tricky.
rs <- stack(r1, r2) # stack any amount of rasters to extract from
re <- extract(rs, l) # extract at locations `l`
do.call(rbind.data.frame, re) # convert to data.frame
layer.1 layer.2
1 4.586890 5.115136
2 4.780503 5.093281
3 6.877302 3.337345
4 5.913230 3.755099
5 4.907834 4.887160
6 5.576908 5.386136
7 3.572350 5.225392
8 4.778727 5.391765
9 6.600041 4.205841
10 6.946321 5.544172
The names of the columns are the names of the raster layers in the stack. You can access these names with names(rs), and modify them with names(rs) <- c("new_name_1", "new_name_2").

Extraction of pixel values in lines to each number of pixels

In my example I create a raster:
require(raster); require(sp)
## Raster Raster creation
r <- raster(nc=10, nr=10)
r <- setValues(r, round(runif(ncell(r))* 255))
After, I make pixels values extraction by selection of coordinates:
x <- c(-150)
y <- c(-80)
p <- data.frame(x,y)
pontos <- SpatialPoints(p)
p$cel <- cellFromXY(r, pontos)
p$col <- colFromCell(r, p$cel)
p$row <- rowFromCell(r, p$cel)
p
plot(r)
text(r)
points(pontos, pch = 4, col = 2)
But, I'd like to find a way to extract the value of the pixels in which I would select a coordinate and the function would perform the extraction of the pixels of entire horizontal lines of the raster to every two pixels from the given coordinate. For example, I choose xy(-150,-80) coordinates but my function below returns values only for the first line and need lines 4, 7 and 10 too.
require(plyr)
vals2cols <- ldply(1:nrow(p),
function(ir){
getValuesBlock(r,
col = p$col[ir],
ncols = 10,
row = p$row[ir],
nrows = 1)
}# end fun
)
df <- data.frame(p, vals2cols)
df
This is possible?

Counting spatialpoints in gridcells

SO-gurues!
I am trying to count the densities of surviving units in different gridcells.
I have two shapefiles with points from the two survey periods in question (one before and one after the mortality event). What I intend is to see whether there is a difference in survival rates and link the proportion of survival to any climatic variable obtained from the raster value of the desired grid. In the code snippet below I have created some random raster and shapefiles.
packs = c('raster', 'rgdal', 'spatstat', 'sp' ,'dplyr')
sapply(packs, FUN = 'require', character.only = TRUE)
xy <- matrix(rnorm(1024),32,32) #Creating the desired raster
image(xy)
rast <- raster(xy)
extent(rast) <- c(36,37,-3,-2)
projection(rast) <- CRS("+proj=longlat +datum=WGS84")
points <- runifpoint(n =4000, c(36,37,-3,-2)) # Creating the points
x <- points$x
y <- points$y
values <- c(rep(1, 900), rep(0, 3100))
xy <- cbind(x, y)
points <- cbind(x, y, values)
points <- data.frame(points)
shp <- SpatialPointsDataFrame(coords = xy, data = data.frame(values) ) # creating shpfiles
projection(shp) <- CRS("+proj=longlat +datum=WGS84")
subs <- filter(points, values == 1)
suxy <- select(subs, x,y)
shpsub <- SpatialPointsDataFrame(coords = suxy, data = data.frame(subs$values)) # creating shpfiles
projection(shpsub) <- CRS("+proj=longlat +datum=WGS84")
When I attempt to extract the points I use the following lines of code
shp <- spTransform(shp, projection(rast)) # make sure they have same transformation
shpsub <- spTransform(shpsub, projection(rast))
XY <- xyFromCell(rast, cell = 1:ncell(rast))
v <- as.data.frame(rast) #Extract values from raster
XY <- data.frame(XY, v) # Creating a data frame containing coord., cellno and value
XY$cell <- c(1:ncell(rast))
cells <- cellFromXY(rast,shp) # find which cells the points are in
cells <- rle(cells) # returns a value and a length, fast for counting
cellsfound <- cellFromXY(rast,shpsub)
cellsfound <- rle(cellsfound)
Proportion <- data.frame(cell = cells$values, shp = cells$lengths)
test <- data.frame(cell = rep(NA,NROW(Proportion)), shpsub = rep(NA, NROW(Proportion)))
test$cell <- c(cellsfound$values, rep(NA, nrow(test) - length(cellsfound$values)))
test$shpsub <- c(cellsfound$lengths, rep(NA, NROW(test) - length(cellsfound$lengths)))
Proportion <- full_join(Proportion, test, by = "cell")
test.Proportion <- mutate(Proportion, Proportion = shpsub/shp) #Calculating Proportion
XY <- left_join(XY, test.Proportion, by = "cell") # Adding Proportion to coord and cell no.
XY.m <- summarise(XY, )
XY <- na.omit(XY) ; XY <- XY[,-4]
As I see it. Using rle() returns the same cells multiple times instead of counting the no of points within each individual cell as was my intention. Can anyone please explain me how to do this in a way that retrieves the information on the number of occurrences in the individual cells?

Resources