Identify nearest neighbor in grid in R (spatial) - r

I would like to create a square grid and identify the grid cells that border a set of other grid cell for which a binary variable takes a 1. In the following example, I would like to generate a vector of cell ids that border id g13 and g24:
require(sp)
grid <- GridTopology(c(0,0), c(1,1), c(5,5))
polys <- as(grid, "SpatialPolygons")
centroids <- coordinates(polys)
id <- names(polys)
tr <- ifelse(id == "g13" | id == "g24", 1, 0)
ex <- SpatialPolygonsDataFrame(polys, data = data.frame(id = id, tr = tr, row.names = row.names(polys)))
plot(ex)
text(coordinates(polys), labels = row.names(polys))
Such that it outputs a vector for all matching g13 as (g7, g8, g9, g12, g14, g17, g18, g19) and one matching g24 as (g18, g19, g20, g23, g24, g25). Any and all thoughts greatly appreciated.

rgeos::gTouches is perfect for this:
library(rgeos)
adj <- gTouches(polys, polys[which(ex$tr==1)], byid=TRUE)
apply(adj, 1, which)
# $g13
# g7 g8 g9 g12 g14 g17 g18 g19
# 7 8 9 12 14 17 18 19
#
# $g24
# g18 g19 g20 g23 g25
# 18 19 20 23 25
And, because everyone loves pictures:
plot(ex, col=ifelse(seq_along(ex) %in% c(unlist(adj), which(ex$tr==1)), 'gray', NA))
text(coordinates(polys), labels=row.names(polys))

Related

How can I join elements (columns from dataframes) from two lists by row names using R?

I need help please. I have two lists: the first contains ndvi time series for distinct points, the second contains precipitation time series for the same plots (plots are in the same order in the two lists).
I need to combine the two lists. I want to add the column called precipitation from one list to the corresponding ndvi column from the other list respecting the dates (represented here by letters in the row names) to a posterior analises of correlation between columns. However, both time series of ndvi and precipitation have distinct lenghts and distinct dates.
I created the two lists to be used as example of my dataset. However, in my actual dataset the row names are monthly dates in the format "%Y-%m-%d".
library(tidyverse)
set.seed(100)
# First variable is ndvi.mon1 (monthly ndvi)
ndvi.mon1 <- vector("list", length = 3)
for (i in seq_along(ndvi.mon1)) {
aux <- data.frame(ndvi = sample(randu$x,
sample(c(seq(1,20, 1)),1),
replace = T))
ndvi.mon1[i] <- aux
ndvi.mon1 <- ndvi.mon1 %>% map(data.frame)
rownames(ndvi.mon1[[i]]) <- sample(letters, size=seq(letters[1:as.numeric(aux %>% map(length))]) %>% length)
}
# Second variable is precipitation
precipitation <- vector("list", length = 3)
for (i in seq_along(ndvi.mon1)){
prec_aux <- data.frame(precipitation = sample(randu$x*500,
26,
replace = T))
row.names(prec_aux) <- seq(letters[1:as.numeric(prec_aux %>% map(length))])
precipitation[i] <- prec_aux
precipitation <- precipitation %>% map(data.frame)
rownames(precipitation[[i]]) <- letters[1:(as.numeric(precipitation[i] %>% map(dim) %>% map(first)))]
}
Can someone help me please?
Thank you!!!
Marcio.
Maybe like this?
library(dplyr)
library(purrr)
precipitation2 <- precipitation %>%
map(rownames_to_column) %>%
map(rename, precipitation = 2)
ndvi.mon2 <- ndvi.mon1 %>%
map(rownames_to_column) %>%
map(rename, ndvi = 2)
purrr::map2(ndvi.mon2, precipitation2, left_join, by = "rowname")
[[1]]
rowname ndvi precipitation
1 k 0.354886 209.7415
2 x 0.596309 103.3700
3 r 0.978769 403.8775
4 l 0.322291 354.2630
5 c 0.831722 348.9390
6 s 0.973205 273.6030
7 h 0.949827 218.6430
8 y 0.443353 61.9310
9 b 0.826368 8.3290
10 d 0.337308 291.2110
The below will return a list of data.frames, that have been merged, using rownames:
lapply(seq_along(ndvi.mon1), function(i) {
merge(
x = data.frame(date = rownames(ndvi.mon1[[i]]), ndvi = ndvi.mon1[[i]][,1]),
y = data.frame(date = rownames(precipitation[[i]]), precip = precipitation[[i]][,1]),
by="date"
)
})
Output:
[[1]]
date ndvi precip
1 b 0.826368 8.3290
2 c 0.831722 348.9390
3 d 0.337308 291.2110
4 h 0.949827 218.6430
5 k 0.354886 209.7415
6 l 0.322291 354.2630
7 r 0.978769 403.8775
8 s 0.973205 273.6030
9 x 0.596309 103.3700
10 y 0.443353 61.9310
[[2]]
date ndvi precip
1 g 0.415824 283.9335
2 k 0.573737 311.8785
3 p 0.582422 354.2630
4 y 0.952495 495.4340
[[3]]
date ndvi precip
1 b 0.656463 332.5700
2 c 0.347482 94.7870
3 d 0.215425 431.3770
4 e 0.063100 499.2245
5 f 0.419460 304.5190
6 g 0.712057 226.7125
7 h 0.666700 284.9645
8 i 0.778547 182.0295
9 k 0.902520 82.5515
10 l 0.593219 430.6630
11 m 0.788715 443.5345
12 n 0.347482 132.3950
13 q 0.719538 79.1835
14 r 0.911370 100.7025
15 s 0.258743 309.3575
16 t 0.940644 142.3725
17 u 0.626980 335.4360
18 v 0.167640 390.4915
19 w 0.826368 63.3760
20 x 0.937211 439.8685

Calculating the distance between coordinates R

We have a set of 50 csv files from participants, currently being read into a list as
file_paths <- fs::dir_ls("data")
file_paths
file_contents <- list ()
for (i in seq_along (file_paths)) {
file_contents[[i]] <- read_csv(
file = file_paths[[i]]
)
}
dt <- set_names(file_contents, file_paths)
My data looks like this:
level time X Y Type
1 1 355. -10.6 22.36 P
1 1 371. -33 24.85 O
1 2 389. -10.58 17.23 P
1 2 402. -16.7 30.46 O
1 3 419. -29.41 17.32 P
1 4 429. -10.28 26.36 O
2 5 438. -26.86 32.98 P
2 6 451. -21 17.06 O
2 7 463. -21 32.98 P
2 8 474. -19.9 17.06 O
We have 70 sets of coordinates per csv.
Time does not matter for this, but I would like to split up by the level column at some stage.
For every 'P' I want to compare it to 'O' and get the distance between coordinates.The first P will always match with the first O and so on.
For now, I have them split into two different lists, though this may be the complete wrong way to do it! I'm having trouble figuring out how to take all of these csv files and get the distances for all of them, the list seems to cause issues with most functions (like dist)
Here is how I've pulled the right information so far
for (i in seq_along (dt)) {
pLoc[[i]] <- dplyr::filter(dt[[i]], grepl("P", type))
oLoc[[i]] <- dplyr::filter(dt[[i]], grepl("o", type))
pX[[i]] <- pLoc[[i]] %>% pull(as.numeric(headX))
pY[[i]] <- pLoc[[i]] %>% pull(as.numeric(headY))
pCoordinates[[i]] <- cbind(pX[[i]], pY[[i]])
}
[EDITED] Following comments, here is how you can do it with the raster library:
library(raster)
library(dplyr)
df = data.frame(
x = c(10, 20 ,15,9),
y = c(45,34,54,24),
type = c("P","O","P","O")
)
df = cbind(df[df$type=="P",] %>%
dplyr::select(-type) %>%
dplyr::rename(xP = x,
yP = y),
df[df$type=="O",] %>%
dplyr::select(-type) %>%
dplyr::rename(xO = x,
yO = y))
The following could probably be achieved more efficiently with some form of the apply() function:
v = c()
for(i in 1:nrow(df)){
dist = raster::pointDistance(lonlat = F,
p1 = c(df$xP[i],df$yP[i]),
p2 = c(df$xO[i],df$yO[i]))
v = c(v,dist)
}
df$dist = v
print(df)
xP yP xO yO dist
1 10 45 20 34 14.86607
3 15 54 9 24 30.59412

Using approx function within tapply or by in R

I have a temperature profiler (tp) data for date, depth and temperature. The depth for each date is not exactly the same so I need to unify it to the same depth and set the temperature for that depth by linear approximation. I was able to do this with a loop using ‘approx’ function (see first part of the enclosed code). But I know that I should do it better without a loop (considering I will have about 600,000 rows). I tried to do it with ‘by’ function but was not successful transforming the result (list) into a data frame or matrix (see second part of the code).
Keep in mind that length of the rounded depth is not always the same as in the example.
Rounded depth is in Depth2 column, interpulated temperature is put in Temp2
What is the ‘right’ way to solve this?
# create df manually
tp <- data.frame(Date=double(31), Depth=double(31), Temperature=double(31))
tp$Date[1:11] <- '2009-12-17' ; tp$Date[12:22] <- '2009-12-18'; tp$Date[23:31] <- '2009-12-19'
tp$Depth <- c(24.92,25.50,25.88,26.33,26.92,27.41,27.93,28.37,28.82,29.38,29.92,25.07,25.56,26.06,26.54,27.04,27.53,28.03,28.52,29.02,29.50,30.01,25.05,25.55,26.04,26.53,27.02,27.52,28.01,28.53,29.01)
tp$Temperature <- c(19.08,19.06,19.06,18.87,18.67,17.27,16.53,16.43,16.30,16.26,16.22,17.62,17.43,17.11,16.72,16.38,16.28,16.20,16.15,16.13,16.11,16.08,17.54,17.43,17.32,17.14,16.89,16.53,16.28,16.20,16.13)
# create rounded depth column
tp$Depth2 <- round(tp$Depth)
# loop on date to calculate linear approximation for rounded depth
dtgrp <- tp[!duplicated(tp[,1]),1]
for (i in dtgrp) {
x1 <- tp[tp$Date == i, "Depth"]
y1 <- tp[tp$Date == i, "Temperature"]
x2 <- tp[tp$Date == i, "Depth2"]
tpa <- approx(x=x1,y=y1,xout=x2, rule=2)
tp[tp$Date == i, "Temp2"] <- tpa$y
}
# reduce result to rounded depth
tp1 <- tp[!duplicated(tp[,-c(2:3)]),-c(2:3)]
# not part of the question, but the end need is for a matrix, so this complete it:
library(reshape2)
tpbydt <- acast(tp1, Date~Depth2, value.var="Temp2")
# second part: I tried to use the by function (instead of loop) but got lost when tring to convert it to data frame or matrix
rdpth <- function(x1,y1,x2) {
tpa <- approx(x=x1,y=y1,xout=x2, rule=2)
return(tpa)
}
tp2 <- by(tp, tp$Date,function(tp) rdpth(tp$Depth,tp$Temperature,tp$Depth2), simplify = TRUE)
Very close with by call but remember it returns a list of objects. Therefore, consider building a list of data frames to be row binded at very end:
df_list <- by(tp, tp$Date, function(sub) {
tpa <- approx(x=sub$Depth, y=sub$Temperature, xout=sub$Depth2, rule=2)
df <- unique(data.frame(Date = sub$Date,
Depth2 = sub$Depth2,
Temp2 = tpa$y,
stringsAsFactors = FALSE))
return(df)
})
tp2 <- do.call(rbind, unname(df_list))
tp2
# Date Depth2 Temp2
# 1 2009-12-17 25 19.07724
# 2 2009-12-17 26 19.00933
# 5 2009-12-17 27 18.44143
# 7 2009-12-17 28 16.51409
# 9 2009-12-17 29 16.28714
# 11 2009-12-17 30 16.22000
# 12 2009-12-18 25 17.62000
# 21 2009-12-18 26 17.14840
# 4 2009-12-18 27 16.40720
# 6 2009-12-18 28 16.20480
# 8 2009-12-18 29 16.13080
# 10 2009-12-18 30 16.08059
# 13 2009-12-19 25 17.54000
# 22 2009-12-19 26 17.32898
# 41 2009-12-19 27 16.90020
# 61 2009-12-19 28 16.28510
# 81 2009-12-19 29 16.13146
And if you reset row.names, this is exactly identical to your tp1 output:
identical(data.frame(tp1, row.names = NULL),
data.frame(tp2, row.names = NULL))
# [1] TRUE

Extract values from raster and append to existing dataframe

I am trying to extract values from a rasterstack and append those to an existing dataframe. The values are a collection of metrics (PatchStat from r package SDMtools) which I am able to extract into list format, but I am stuck trying to bind the values to my existing dataframe.
Input data:
library(sp)
library(sf)
library(raster)
library(dplyr)
library(SDMTools)
mydata <- read.table(header=TRUE, text = "
animal X Y ord.year
1 pb_20414 157978.9 2323819 2009168
2 pb_20414 156476.3 2325586 2009168
3 pb_06817 188512.0 2299679 2006263
4 pb_06817 207270.9 2287248 2006264")
# add rasters
s <- stack(system.file("external/rlogo.grd", package="raster"))
names(s) <- c('masie_ice_r00_v01_2009168_4km', 'masie_ice_r00_v01_2006263_4km', 'masie_ice_r00_v01_2006264_4km')
# Create sp object
projection <-CRS('+proj=stere +lat_0=90 +lat_ts=60 +lon_0=-80 +k=1 +x_0=0 +y_0=0 +ellps=WGS84 +units=m + datum=WGS84 +no_defs +towgs84=0,0,0') # matches MASIE raster
coords <- cbind(mydata$X, mydata$Y)
mydata.sp <- SpatialPointsDataFrame(coords = coords, data = mydata, proj4string = projection)
# Create sf object
mydata.sf <- st_as_sf(mydata)
mydata.buf30 <- st_buffer(mydata.sf, 30000)
My goal is to match each GPS point (X,Y) with the correct GeoTIFF by date (mydata$ord.year), crop the raster to a (spatially explicit) 30 km buffer, run PatchStat in program SDMtools for R, and append the results to the original dataframe. The catch is that PatchStat results are provided in a dataframe, so I am having trouble matching those results to my existing dataframe.
Here is an example of results provided when I run PatchStat:
patchID n.cell n.core.cell n.edges.perimeter n.edges.internal area core.area perimeter
2 3 73 13 86 206 73 13 86
perim.area.ratio shape.index frac.dim.index core.area.index
2 1.178082 2.388889 1.430175 0.1780822
Here is what I have been able to do so far:
# separate date component of TIF name to correspond to mydata$ord.year
stack <- list()
date<-vector()
for (i in 1:length(rasterlist)) {
stack[[i]]<-raster(rasterlist[i])
tt<-unlist(strsplit(names(stack[[i]]), "[_]"))
date[i]<-tt[which(nchar(tt)==max(nchar(tt)))]
}
st <- stack(stack) # Create rasterstack object
# crop raster to buffer
mydata.sp <- as(mydata.sf, 'Spatial') # back to sp object
# pull raster data from GeoTIFF that corresponds to ordinal date
pat <- list()
for (i in 1:nrow(mydata.sp)) {
st2<-st[[which(date==mydata.sp$ord.year[i])]]
GeoCrop <- raster::crop(st2, mydata.sp[i,])
GeoCrop_mask <- raster::mask(GeoCrop, mydata.sp[i,])
pat[[i]] <- PatchStat(GeoCrop_mask)}
Additionally, I have eliminated one of the two land cover types so that each element in the list has only one row:
pat2 <- lapply(pat, `[`, -1,) # remove first row in each list element so only one row remains (using program plyr for R)
Now, I would like to match these rows to my original dataframe, so that pat2[[1]] is appended to mydata.sp[1,] like this (assuming a,b, and c are columns of metadata within my original SpatialPointsDataFrame). I would like all the columns of data from PatchStat added but to save time and space, I only included the first three here:
a b c PatchID n.cell n.core.cell
1 2 3 3 73 13
Note: If possible, I would love for this whole process to be included in the for loop to minimize room for error and also processing time.
Thanks so much!
Thanks for your effort to provide example data. But it is still incomplete (it refers to files that we do not have. You could to this
library(raster)
library(SDMTools)
s <- stack(system.file("external/rlogo.grd", package="raster"))
s <- round(s / 50) # to have fewer patches
names(s) <- c('masie_ice_r00_v01_2009168_4km', 'masie_ice_r00_v01_2006263_4km', 'masie_ice_r00_v01_2006264_4km')
df <- data.frame(ord.year=c("2009168", "2009168", "2006263", "2006264"))
pts <- SpatialPoints(cbind(c(20,40,60,80), c(20,40,60,20)))
crs(pts) <- crs(s)
pts <- SpatialPointsDataFrame(pts, df)
Make a buffer
b <- buffer(pts, 15, dissolve=FALSE)
Get matching names
nms <- names(s)
nms <- gsub('masie_ice_r00_v01_', '', nms)
nms <- gsub('_4km', '', nms)
Loop to match names, and put results in a list
p <- list()
for (i in 1:length(b)) {
j <- which(b$ord.year[i] == nms)
r <- s[[j]]
z <- crop(r, b[i,])
z <- mask(z, b[i,])
p[[i]] <- PatchStat(z)
}
Note that each element of p has a data.frame with multiple rows and columns.
p[[1]]
#patchID n.cell n.core.cell n.edges.perimeter n.edges.internal area core.area perimeter perim.area.ratio shape.index frac.dim.index core.area.index
#1 1 53 5 68 144 53 5 68 1.2830189 2.266667 1.427207 0.09433962
#2 2 123 8 182 310 123 8 182 1.4796748 3.956522 1.586686 0.06504065
#3 3 149 31 190 406 149 31 190 1.2751678 3.800000 1.543074 0.20805369
#4 4 54 2 114 102 54 2 114 2.1111111 3.800000 1.679578 0.03703704
#5 5 337 206 146 1202 337 206 146 0.4332344 1.972973 1.236172 0.61127596
If you only want the first rows
pp <- t(sapply(p, function(i) i[1,]))
Combining this with the orginal data.frame is now trivial
dfpp <- cbind(df, pp)
Well I did this very ugly thing and got what I wanted. But I don't like it. If anyone has a better idea I'd love to hear it!
# Change objects to df
pat2 <- lapply(pat, `[`, -1,) # remove first row in each list element
library(plyr) # ldply command
pat3 <- ldply (pat2, data.frame)
pat4 <- bind_cols(pb, pat3)

Colours plot vector based

I have a vector z like this
z <- as.numeric(as.factor(c("A","B","C","D","E","F","G","H")))
and for different days a data frame df like this
df[[1]]
ID LON LAT
A 1 1
B 10 14
C 12 13
df[[2]]
ID LON LAT
A 2 3
B 11 18
D 12 13
df[[3]]
ID LON LAT
A 13 1
E 10 14
D 12 13
where the IDs are the ones in z but can be different for every day.
I have assigned a colour to each element of the vector
range01 <- function(x)(x-min(x))/diff(range(x))
rainbow(7)
cRamp <- function(x){
cols <- colorRamp(rainbow(7))(range01(x))
apply(cols, 1, function(xt)rgb(xt[1], xt[2], xt[3], maxColorValue=255))
}
and what I would like to do is to plot for every day my df with the colours cRamp(z) but I am no able to link the ID value in every df with the one in z
Here is my code
for (i in 1:length(myfiles)){
plot(df[[i]]$LON,df[[i]]$LAT, col = cRamp(z))
map(add=T,col="saddlebrown",interior = FALSE)
legend("topleft", legend=c(unique(df[[i]]$ID)), col=cRamp(z))
}
but the colour for e.g. ID A are not the same for every day!
Many thanks
Maybe something like this:
z <- LETTERS[1:7]
df <- list(
data.frame(ID=LETTERS[1:3],
LON=c(1,10,12),
LAT=c(1,14,13)),
data.frame(ID=LETTERS[3:5],
LON=c(2,11,18),
LAT=c(2,9,20))
)
layout(t(1:2))
for (i in 1:2){
plot(df[[i]]$LON, df[[i]]$LAT,
col = rainbow(length(z))[match(df[[i]]$ID,z)],
pch=16)
legend("topleft",
legend=z,
col=rainbow(length(z)),
pch=16)
}

Resources