I want to compute distances from a data frame source to a data frame destination in R. Both data frames have coordinates but my code returns the following error:
Error: "src" should contain coordinates.
Below is my code
randompoints_df <- randompts %>%
as.data.frame()
bus_stops_df <- bus_stops %>%
dplyr::select(id = stop_name, stop_lon, stop_lat) %>%
as.data.frame()
t0 <- Sys.time()
distancetable <- osrm::osrmTable(src = randompoints_df, dst = bus_stops_df)
I tried selecting only the id, lon and lat columns in my data to ensure it had coordinates but the src parameter still brought the same error. Any help in fixing this would be appreciated.
osrmTable() accepts sf & sp objects or dataframes and matrices with exactly 2 columns, WGS84 lon & lat.
library(sf)
library(osrm)
# some sample data
stops <- read.csv(text = "lon,lat,name
24.7653725,59.4426444,Reisisadama D-terminal
24.7514720,59.4438342,Linnahall
24.7601144,59.4446286,Reisisadama A-terminal
24.7655886,59.4425595,Reisisadama D-terminal
24.7696412,59.4413391,Uus-Sadama
24.7593333,59.4401405,Siimeoni",header = T, )
# 3 columns, numeric lon/lat
str(stops)
#> 'data.frame': 6 obs. of 3 variables:
#> $ lon : num 24.8 24.8 24.8 24.8 24.8 ...
#> $ lat : num 59.4 59.4 59.4 59.4 59.4 ...
#> $ name: chr "Reisisadama D-terminal" "Linnahall" "Reisisadama A-terminal" "Reisisadama D-terminal" ...
# osrmTable() with 3-column tables - fails
osrmTable(stops[1:3,], stops[4:6,])
#> Error: "src" should contain coordinates.
# osrmTable() with 2-column tables - OK
osrmTable(stops[1:3,c("lon", "lat")], stops[4:6,c("lon", "lat")])
#> $durations
#> 4 5 6
#> 1 0.0 1.1 1.0
#> 2 3.1 3.0 2.3
#> 3 3.1 3.1 2.3
#>
#> $sources
#> lon lat
#> 1 24.76541 59.44259
#> 2 24.75145 59.44378
#> 3 24.76006 59.44473
#>
#> $destinations
#> lon lat
#> 4 24.76555 59.44261
#> 5 24.76971 59.44126
#> 6 24.75928 59.44008
# osrmTable() with sf objects - OK
stops_sf <- st_as_sf(stops, coords = c("lon", "lat"), crs = "WGS84")
osrmTable(stops_sf[1:3,], stops_sf[4:6,])
#> $durations
#> 4 5 6
#> 1 0.0 1.1 1.0
#> 2 3.1 3.0 2.3
#> 3 3.1 3.1 2.3
#>
#> $sources
#> lon lat
#> 1 24.76541 59.44259
#> 2 24.75145 59.44378
#> 3 24.76006 59.44473
#>
#> $destinations
#> lon lat
#> 4 24.76555 59.44261
#> 5 24.76971 59.44126
#> 6 24.75928 59.44008
Created on 2023-01-31 with reprex v2.0.2
Related
I have a dataframe from almost all zipcodes of Germany.
# German Zip
Germany <- read.csv("https://gist.githubusercontent.com/MarcoKuehne/a012325ef8a9fa33aaa943dbc3db4ba9/raw/8616404bece8d405553d36380c7242fab37043d9/zipcodes.germany.csv", sep = ";")
head(Germany)
id loc_id zipcode name lat lon
1 1 14308 19348 Berge bei Perleberg 53.23746 11.870770
2 2 22537 85309 Pörnbach 48.61670 11.466700
3 3 106968 24790 Osterrönfeld Heidkrug, Gemeinde Osterrönfeld 54.27536 9.737535
4 4 18324 98646 Hildburghausen 50.43950 10.723922
5 5 16590 27336 Frankenfeld, Aller 52.76951 9.430780
6 6 19092 19294 Karenz 53.23012 11.343840
and a dataframe of particular places/locations in Germany, e.g. blood donation center, both with their respective longitude and latitude information:
# German Blood Donation
Blooddonation <- read.csv("https://gist.githubusercontent.com/MarcoKuehne/95cc459b81f2bc6bec2f2b46d1f6273a/raw/2b1c77fe5cf1203ca105b7f61019bb390335db8e/LocationsUpdate.csv", sep=",")
head(Blooddonation)
title zip lat lon
1 Haema Blutspendezentrum Dresden-World Trade Center 01067 51.04807 13.7238
2 Octapharma Plasmaspende Dresden 01067 51.04932 13.73557
3 Haema Dresden Elbepark 01139 51.08232 13.696
4 DRK-Blutspendedienst Dresden 01307 51.05294 13.78027
5 Haema Blutspendezentrum Dresden-Fetscherplatz 01307 51.04654 13.77047
6 Haema Blutspendezentrum Görlitz 02826 51.15275 14.98878
How can I find the number of neighbour locations (blood donation centers) within a radius of e.g. 10km, 20km from each zipcode in Germany and store the result as a variable in my Germany dataframe.
Is there a tidyverse (tidy) solution such that the results are stored as variable in a dataframe?
With sf and distance matrix:
library(dplyr)
library(sf)
ger_sf <- st_as_sf(Germany, coords = c("lon", "lat"), crs = "WGS84")
bd_sf <- st_as_sf(Blooddonation, coords = c("lon", "lat"), crs = "WGS84")
# distance matrix in km with units dropped
# rows: locations from Germany
# cols: locations from Blooddonation
distm_km <- st_distance(ger_sf, bd_sf) %>%
units::set_units("km") %>%
units::drop_units()
distm_km[1:8, 1:8]
#> [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
#> [1,] 274.3081 274.5550 270.0329 275.6236 275.9407 314.4278 313.1224 234.2907
#> [2,] 315.0837 315.6358 317.3197 317.6389 316.6712 378.3292 378.1703 404.2936
#> [3,] 448.2405 448.6026 444.0099 450.0901 450.2696 495.4185 494.1030 415.7224
#> [4,] 221.6435 222.4717 220.9114 225.5819 224.7243 310.0170 309.1489 291.4043
#> [5,] 351.1441 351.7419 347.3852 354.0903 353.9269 420.9728 419.6516 352.0349
#> [6,] 291.9424 292.2731 287.6897 293.6504 293.8668 339.1699 337.8463 260.3578
#> [7,] 272.3777 272.5296 268.1349 273.2452 273.6690 303.6681 302.3973 222.6707
#> [8,] 158.5451 159.3540 156.3434 162.4375 161.8107 246.2483 245.1566 210.5743
dim(distm_km)
#> [1] 17367 248
# rowSums() to count values matching condition across each row in the matrix
Germany <- Germany %>%
mutate(within10km = rowSums(distm_km <= 10),
within20km = rowSums(distm_km <= 20))
Results :
as_tibble(Germany)
#> # A tibble: 17,367 × 8
#> id loc_id zipcode name lat lon withi…¹ withi…²
#> <int> <int> <int> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 1 14308 19348 Berge bei Perleberg 53.2 11.9 0 0
#> 2 2 22537 85309 Pörnbach 48.6 11.5 0 1
#> 3 3 106968 24790 Osterrönfeld Heidkrug, Geme… 54.3 9.74 0 0
#> 4 4 18324 98646 Hildburghausen 50.4 10.7 0 1
#> 5 5 16590 27336 Frankenfeld, Aller 52.8 9.43 0 0
#> 6 6 19092 19294 Karenz 53.2 11.3 0 1
#> 7 7 144118 19395 Wendisch Priborn Tönchow 53.3 12.3 0 0
#> 8 8 16355 99628 Eßleben-Teutleben 51.1 11.5 0 0
#> 9 9 25953 38486 Wenze 52.6 11.1 0 0
#> 10 10 21836 72622 Nürtingen 48.6 9.35 0 0
#> # … with 17,357 more rows, and abbreviated variable names ¹within10km,
#> # ²within20km
Inuput:
library(httr)
library(stringr)
Germany <- read.csv("https://gist.githubusercontent.com/MarcoKuehne/a012325ef8a9fa33aaa943dbc3db4ba9/raw/8616404bece8d405553d36380c7242fab37043d9/zipcodes.germany.csv", sep = ";")
Blooddonation <- GET('https://www.blutspenden.de/blutspendedienste/#') %>%
content(as = "text") %>%
str_match("var instituionsmap_data = '(.*)'") %>%
.[, 2] %>%
jsonlite::parse_json(simplifyVector = T) %>%
select(title, street, number, zip, city, lat, lon)
Created on 2023-01-15 with reprex v2.0.2
An answer hint might be :
library(geosphere)
withinKM=10
Germany$within10KM=0
for (i in 1:300) # test only the first 300 zipcode
{
count=0
for (k in 1:nrow(Blooddonation))
{
dis=distm(c(Germany[i,'lon'], Germany[i,'lat']),
c( as.numeric(Blooddonation[k,'lon']), as.numeric(Blooddonation[k,'lat'])), fun = distHaversine)/1000
if (dis<withinKM) count=count+1
}
Germany$within10KM=count
}
I have a list of many dataframes, all of the same format. For each member of this list, I would like to generate a spatial extent, and store it with that dataframe (this data is all lat/long data, and I am using functions from the terra package to analyze it). I am not super experienced with working with lists, and so I took the following stab at trying to generate it:
library(terra)
library(dplyr)
lat_1 <- c(23.2, 14.5, 28.6)
lon_1 <- c(12.1, 8.5, 2.2)
lat_2 <- c(89.3, 94.4, 72.3)
lon_2 <- c(45.2, 47, 48.5)
coords_1 <- data.frame(lon_1, lat_1)
coords_2 <- data.frame(lon_2, lat_2)
list_coords <- list(coords_1, coords_2)
write_extent <- function(lon, lat) {
max_lat <- ceiling(max(lat))
min_lat <- floor(min(lat))
max_lon <- ceiling(max(lon))
min_lon <- floor(min(lon))
extent <- extent(x = c(max_lat, min_lat, max_lon, min_lon))
}
However, this function has errors, and I can't conceptualize how I can store the spatial extent that corresponds with each member of the list with that specific list- should I be using mutate()? Should I not be designing a function and rather be using lapply?
You can do this a couple of different ways. First, you'll want to make the data frames have the same column names for longitude and latitude lon and lat, but that's arbitrary. Once you've done that, then one way is to produce a new list where each element of the list has both a data frame and an extent object:
library(terra)
library(raster)
library(dplyr)
lat_1 <- c(23.2, 14.5, 28.6)
lon_1 <- c(12.1, 8.5, 2.2)
lat_2 <- c(89.3, 94.4, 72.3)
lon_2 <- c(45.2, 47, 48.5)
coords_1 <- data.frame(lon = lon_1, lat = lat_1)
coords_2 <- data.frame(lon = lon_2, lat = lat_2)
list_coords <- list(coords_1, coords_2)
write_extent <- function(lon, lat) {
max_lat <- ceiling(max(lat))
min_lat <- floor(min(lat))
max_lon <- ceiling(max(lon))
min_lon <- floor(min(lon))
extent <- extent(x = min_lat, xmax=max_lat, ymin = min_lon, ymax=max_lon)
extent
}
res <- lapply(list_coords, function(x){
list(data=x, extent = write_extent(x$lon, x$lat))
})
res
#> [[1]]
#> [[1]]$data
#> lon lat
#> 1 12.1 23.2
#> 2 8.5 14.5
#> 3 2.2 28.6
#>
#> [[1]]$extent
#> class : Extent
#> xmin : 14
#> xmax : 29
#> ymin : 2
#> ymax : 13
#>
#>
#> [[2]]
#> [[2]]$data
#> lon lat
#> 1 45.2 89.3
#> 2 47.0 94.4
#> 3 48.5 72.3
#>
#> [[2]]$extent
#> class : Extent
#> xmin : 72
#> xmax : 95
#> ymin : 45
#> ymax : 49
In the output above, you could get the data for the first object with res[[1]]$data and the extent for the first object with res[[1]]$extent. Or you could get a list of all the extents with lapply(res, function(x)x$extent). Another option would be to store the extent as an attribute of the data. This way, it always follows the data around:
res <- lapply(list_coords, function(x){
e <- write_extent(x$lon, x$lat)
attr(x, "extent") <- e
x
})
res
#> [[1]]
#> lon lat
#> 1 12.1 23.2
#> 2 8.5 14.5
#> 3 2.2 28.6
#>
#> [[2]]
#> lon lat
#> 1 45.2 89.3
#> 2 47.0 94.4
#> 3 48.5 72.3
You don't see the extent when you print the data frame, but you can retrieve it either for a single data frame with:
attr(res[[1]], "extent")
#> class : Extent
#> xmin : 14
#> xmax : 29
#> ymin : 2
#> ymax : 13
Or for all of them with:
lapply(res, function(x)attr(x, "extent"))
#> [[1]]
#> class : Extent
#> xmin : 14
#> xmax : 29
#> ymin : 2
#> ymax : 13
#>
#> [[2]]
#> class : Extent
#> xmin : 72
#> xmax : 95
#> ymin : 45
#> ymax : 49
Created on 2022-12-08 by the reprex package (v2.0.1)
While I think it's a bit less conventional to set object attributes, this answer suggests it is not bad practice to do so.
I am creating a nested sampling design.
Is it possible to calculate the distance between each and every one of my 200 sampling locations (longitude/latitude)?
I would like to calculate where the distances occur on the lag line (e.g how many samples are separated by 1m,10m,100m etc) to check that there is sufficient amount of points at each distance.
Is this possible in r or any other free software?
Let's use sf::st_distance() suggested by #nniloc
Let's prepare sample occurrence data:
occ <- rgbif::occ_data(
scientificName = "Calystegia pulchra",
country = "GB",
hasCoordinate = TRUE
)
occ <- head(occ$data, 220) |>
sf::st_as_sf(coords = c("decimalLongitude", "decimalLatitude"), crs = 4326) |>
subset(select = c("key", "scientificName"))
Let's create a distance matrix
m <- sf::st_distance(occ)
m[1:4, 1:4]
#> Units: [m]
#> [,1] [,2] [,3] [,4]
#> [1,] 0.0 127215.11 202758.86 763395.9
#> [2,] 127215.1 0.00 98557.85 681999.6
#> [3,] 202758.9 98557.85 0.00 583484.0
#> [4,] 763395.9 681999.59 583484.00 0.0
and function, which takes the row, and calculates how much entries where the distance > ...
how_much <- function(matrix = m, row = 1, distance = 100000) {
length(which({{matrix}}[{{row}},] > units::as_units({{distance}}, "m")))
}
how_much(m, 2, 100000)
#> [1] 195
Let's add it to our occurrence data:
occ |>
dplyr::mutate(row_number = dplyr::row_number()) |>
dplyr::rowwise() |>
dplyr::mutate(dist_200000 = how_much(m, row_number, 200000))
#> Simple feature collection with 220 features and 4 fields
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -7.978369 ymin: 49.97314 xmax: 1.681262 ymax: 57.90633
#> Geodetic CRS: WGS 84
#> # A tibble: 220 × 5
#> # Rowwise:
#> key scientificName geometry row_number dist_20…¹
#> * <chr> <chr> <POINT [°]> <int> <int>
#> 1 3320924569 Calystegia pulchra Brum… (-0.17902 51.49553) 1 196
#> 2 3437494613 Calystegia pulchra Brum… (-1.962333 51.78564) 2 158
#> 3 3785877810 Calystegia pulchra Brum… (-2.541045 52.5979) 3 146
#> 4 3352641836 Calystegia pulchra Brum… (-6.189535 57.41207) 4 171
#> 5 3338086543 Calystegia pulchra Brum… (-2.302697 53.20301) 5 154
#> 6 3352720276 Calystegia pulchra Brum… (-3.052632 54.8225) 6 147
#> 7 3352736637 Calystegia pulchra Brum… (-1.614114 55.37932) 7 164
#> 8 3384449063 Calystegia pulchra Brum… (-4.681922 57.14801) 8 135
#> 9 3421262904 Calystegia pulchra Brum… (-6.19056 57.42103) 9 171
#> 10 3392248456 Calystegia pulchra Brum… (-6.159327 56.11731) 10 158
#> # … with 210 more rows, and abbreviated variable name ¹dist_200000
Regards,
Grzegorz
Created on 2022-10-04 with reprex v2.0.2
I have two sets of geo location points i.e. latitude and longitude.
control <-
data.frame(
id = c("110000308033", "110000308042", "110000308060", "110000308346", "110000308505",
"110000308541", "110000308612", "110000309684", "110000309773", "110000309835"),
latitude = c(42.20227, 42.19802, 42.19251, 42.13690, 42.65253, 42.24066, 42.60008,
42.62743, 42.72361, 42.70060),
longitude = c(-72.606052, -72.600913, -72.609814, -72.542205, -73.110606, -73.358297,
-72.378388, -71.156079, -70.9629610, -71.16304)
)
treatment <-
data.frame(
id = c("110000308881", "110000310556", "110000314570", "110000316024", "110000324845"),
latitude = c(42.61366, 42.16657, 45.36801, 41.62371, 43.30851 ),
longitude = c(-71.633782, -71.212503, -68.510184, -72.043135, -73.63481 )
)
I want to find the shortest distance from control unit to treatment unit for each of the control unit. Also how can I show them on a US map. I would appreciate any help.
Please find one possible solution using sf, units, dplyr and tmap libraries
Reprex
Computing the distance matrix (distance in km)
library(sf)
library(units)
library(dplyr)
library(tmap)
# Convert the two dataframes into 'sf' objects
control_sf <- st_as_sf(control, coords = c("longitude", "latitude"), crs = 4326)
treatment_sf <- st_as_sf(treatment, coords = c("longitude", "latitude"), crs = 4326)
# Compute a distance matrix (distance in km)
# rows = control
# columns = treatment
Distances_df <- control_sf %>%
st_distance(., treatment_sf) %>%
set_units(., km) %>%
drop_units() %>%
round(., 1) %>%
as.data.frame() %>%
setNames(., treatment$id) %>%
mutate(control = control$id) %>%
relocate(control)
Distances_df
#> control 110000308881 110000310556 110000314570 110000316024
#> 1 110000308033 92.0 114.9 481.6 79.4
#> 2 110000308042 91.9 114.5 481.6 78.8
#> 3 110000308060 92.8 115.2 482.6 78.7
#> 4 110000308346 91.5 109.7 483.6 70.5
#> 5 110000308505 120.9 164.9 475.8 144.3
#> 6 110000308541 147.5 176.9 521.6 128.6
#> 7 110000308612 61.0 107.2 436.4 112.0
#> 8 110000309684 39.1 51.5 371.0 133.4
#> 9 110000309773 56.2 65.2 353.4 151.3
#> 10 110000309835 39.7 59.5 364.6 140.0
#> 110000324845
#> 1 148.9
#> 2 149.6
#> 3 149.7
#> 4 157.9
#> 5 84.5
#> 6 120.9
#> 7 129.1
#> 8 215.4
#> 9 226.7
#> 10 212.0
Visualization (control in blue and treatment in red)
tmap_mode("view")
#> tmap mode set to interactive viewing
tm_shape(control_sf) +
tm_dots(col = "blue")+
tm_shape(treatment_sf) +
tm_dots(col = "red")
Created on 2022-01-14 by the reprex package (v2.0.1)
I have a dataframe with values for multiple macro variables. When i compute log of the values and then the log differences it changes the variables into lists, causing problems with my script later on.
Example code:
#Compute log of relevant macrovariables
macro[,c("hp", "unem", "m1", "inc")] <- log(macro[,c("hp", "unem", "m1", "inc")])
colnames(macro)[2:5] <- paste(colnames(macro)[2:5], "log", sep = "_")
#Computing log differences
macro$ldiff_hp <- c(-diff(macro$hp_log), na.omit)
Im trying to unlist the columns and convert them to numeric with either of the following:
#Alternative 1
macro[,15:19]<- unlist(as.numeric(macro[,15:19]))
#Alternative 2
macro[,15:19] <- sapply(macro[,15:19],as.numeric)
It gives me the following error output:
> macro[,15:19]<- unlist(as.numeric(macro[,15:19]))
Error in unlist(as.numeric(macro[, 15:19])) :
(list) object cannot be coerced to type 'double'
Using the economics dataset from ggplot2 as example data and making use of dplyrs lag function the log differenced vars can be computed like so:
library(ggplot2)
library(dplyr)
macro <- ggplot2::economics
vars <- c("uempmed", "psavert")
vars_log <- paste(vars, "log", sep = "_")
vars_ldiff <- paste(vars, "ldiff", sep = "_")
#Compute log of relevant macrovariables
macro[, vars_log] <- sapply(macro[, vars], log)
# Lag values
macro[, vars_ldiff] <- sapply(macro[, vars_log], dplyr::lag)
# First Difference of logs
macro[, vars_ldiff] <- macro[, vars_log] - macro[, vars_ldiff]
macro
#> # A tibble: 574 x 10
#> date pce pop psavert uempmed unemploy uempmed_log psavert_log
#> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1967-07-01 507. 198712 12.6 4.5 2944 1.50 2.53
#> 2 1967-08-01 510. 198911 12.6 4.7 2945 1.55 2.53
#> 3 1967-09-01 516. 199113 11.9 4.6 2958 1.53 2.48
#> 4 1967-10-01 512. 199311 12.9 4.9 3143 1.59 2.56
#> 5 1967-11-01 517. 199498 12.8 4.7 3066 1.55 2.55
#> 6 1967-12-01 525. 199657 11.8 4.8 3018 1.57 2.47
#> 7 1968-01-01 531. 199808 11.7 5.1 2878 1.63 2.46
#> 8 1968-02-01 534. 199920 12.3 4.5 3001 1.50 2.51
#> 9 1968-03-01 544. 200056 11.7 4.1 2877 1.41 2.46
#> 10 1968-04-01 544 200208 12.3 4.6 2709 1.53 2.51
#> # ... with 564 more rows, and 2 more variables: uempmed_ldiff <dbl>,
#> # psavert_ldiff <dbl>
Created on 2020-03-23 by the reprex package (v0.3.0)