Errors attempting to applying function to data frame rows in R - r

I wrote a very simple function (which works well) that returns the timezone given a set of coordinates:
library(XML)
findTZ <- function(lon, lat, date=Sys.Date())
{ apiurl <- sprintf("https://maps.googleapis.com/maps/api/timezone/%s?location=%s,%s&timestamp=%d&sensor=%s",
"xml", lat, lon, as.numeric(as.POSIXct(date)), "false")
TZ <- xmlParse(readLines(apiurl))[["string(//time_zone_id)"]]
return(TZ)
}
findTZ(-112.86, 53.61) # example
However, when I try to run the function on a list of coordinates in a dataframe I get an error: Error in file(con, "r") : invalid 'description' argument
Any hints at what I'm getting wrong here? It seems like it should be very simple.
Here's the very basic data I'm testing on:
DF <- data.frame(
longitude = c(-122, -112, -102),
latitude = c(54, 53, 52)
)
DF$timezone = findTZ(lon=DF$longitude, lat=DF$latitude)
Thank you for any pointers!
EDIT / ADDITION
After implementing the answer from #Floo0 I tried implementing the same solution with another function for calculating sunrise/set times using the same location data (and that I want to return in local time, hence the timezone function).
Here's the sunrise function:
library(maptools)
SSun <- function(lon, lat, date, deg=0, dir, tzone)
{ # deg = solar depth: rise/set=0, civil=6, nautical=12, astronomical=18
# dir = direction: sunrise="dawn", sunset="dusk"
# tzone = time zone of output, NOT of location
siteX <- SpatialPoints(matrix(c(lon, lat), nrow=1), proj4string=CRS("+proj=longlat +datum=WGS84"))
dateX <- as.POSIXct(date, tz=tzone)
duskX <- crepuscule(siteX, dateX, solarDep=deg, direction=dir, POSIXct.out=TRUE)
duskX <- duskX$time # keep only date and time, discard day_frac
return(duskX)
}
SSun(-112.86, 53.61, "2016-09-25", deg=0, dir="dawn", tzone="America/Edmonton") # example
And the updated timezone function:
library(tidyverse); library(xml2)
findTZ <- function(lon, lat, date=Sys.Date()){
apiurl <- sprintf("https://maps.googleapis.com/maps/api/timezone/%s?location=%s,%s&timestamp=%d&sensor=%s",
"xml", lat, lon, as.numeric(as.POSIXct(date)), "false")
read_xml(apiurl) %>% xml_find_first(".//time_zone_id") %>% xml_text
}
findTZ(-112.86, 53.61) # example
And the code I used to call both functions:
DF %>% mutate(date = as.POSIXct(date),
TZ = map2_chr(longitude, latitude, findTZ),
sunrise = SSun(longitude, latitude, date, deg=0, dir="dawn", tzone=TZ))
I feel like I must be misunderstanding how this works. Any insights?

You can do the following (using xml2 instead of XML as i find it easier to use)
require(xml2)
findTZ <- function(lon, lat, date=Sys.Date()){
apiurl <- sprintf("https://maps.googleapis.com/maps/api/timezone/%s?location=%s,%s&timestamp=%d&sensor=%s",
"xml", lat, lon, as.numeric(as.POSIXct(date)), "false")
read_xml(apiurl) %>% xml_find_first(".//time_zone_id") %>% xml_text
}
To loop through your test data you can use:
require(tidyverse)
DF %>% mutate(TZ = map2_chr(longitude, latitude, findTZ))
Which gives you:
longitude latitude TZ
1 -122 54 America/Vancouver
2 -112 53 America/Edmonton
3 -102 52 America/Regina
As #Rich Scriven points out correctly you need to loop through the data somewhere. This loop is "hidden" in the map2_chr call.

Consider mapply to pass each pair of element-wise values into function to return a vector:
DF$timezones <- mapply(findTZ, DF$longitude, DF$latitude)

Related

Convert DMS coordinates to decimal degrees in R

I have the following coordinates in DMS format. I need to convert them to decimal degrees.
# Libraries
> library(sp)
> library(magrittr)
# Latitide & Longitude as strings
> lat <- '21d11m24.32s'
> lng <- '104d38m26.88s'
I tried:
> lat_d <- char2dms(lat, chd='d', chm='m', chs='s') %>% as.numeric()
> lng_d <- char2dms(lng, chd='d', chm='m', chs='s') %>% as.numeric()
> print(c(lat_d, lng_d))
[1] 21.18333 104.63333
Although close, this result is different from the output I get from this website. According to this site, the correct output should be:
Latitude: 21.190089
Longitude: 104.6408
It seems that sp::char2dms and as.numeric are rounding the coordinates. I noticed this issue when converting a large batch of DMS coordinates using this method because the number of unique values decreases drastically after the conversion.
You are right! To tell you the truth, I didn't notice this problem.
To get around this, here is a solution with the use of the package measurements:
REPREX:
install.packages("measurements")
library(measurements)
lat <- conv_unit('21 11 24.32', from = "deg_min_sec", to = "dec_deg")
long <- conv_unit('104 38 26.88' , from = "deg_min_sec", to = "dec_deg")
print(c(lat, long))
#> [1] "21.1900888888889" "104.6408"
Created on 2021-10-07 by the reprex package (v2.0.1)
Edit from OP
This can also be solved by adding 'N' or 'S' to latitude and 'E' or 'W' to longitude.
# Add character to lat & long strings
> lat_d <- char2dms(paste0(lat,'N'), chd='d', chm='m', chs='s') %>% as.numeric()
> lng_d <- char2dms(paste0(lng,'W'), chd='d', chm='m', chs='s') %>% as.numeric()
> print(c(lat_d, lng_d))
[1] 21.19009 -104.64080

Error in do.ply(i) : task 1 failed - "could not find function "%>%"" in R parallel programming

Every time I run the script it always gives me an error: Error in { : task 1 failed - "could not find function "%>%""
I already check every post on this forum and tried to apply it but no one works.
Please advise any solution.
Please note: I have only 2 cores on my PC.
My code is as follows:
library(dplyr) # For basic data manipulation
library(ncdf4) # For creating NetCDF files
library(tidync) # For easily dealing with NetCDF data
library(ggplot2) # For visualising data
library(doParallel) # For parallel processing
MHW_res_grid <- readRDS("C:/Users/SUDHANSHU KUMAR/Desktop/MTech Project/R/MHW_result.Rds")
# Function for creating arrays from data.frames
df_acast <- function(df, lon_lat){
# Force grid
res <- df %>%
right_join(lon_lat, by = c("lon", "lat")) %>%
arrange(lon, lat)
# Convert date values to integers if they are present
if(lubridate::is.Date(res[1,4])) res[,4] <- as.integer(res[,4])
# Create array
res_array <- base::array(res[,4], dim = c(length(unique(lon_lat$lon)), length(unique(lon_lat$lat))))
dimnames(res_array) <- list(lon = unique(lon_lat$lon),
lat = unique(lon_lat$lat))
return(res_array)
}
# Wrapper function for last step before data are entered into NetCDF files
df_proc <- function(df, col_choice){
# Determine the correct array dimensions
lon_step <- mean(diff(sort(unique(df$lon))))
lat_step <- mean(diff(sort(unique(df$lat))))
lon <- seq(min(df$lon), max(df$lon), by = lon_step)
lat <- seq(min(df$lat), max(df$lat), by = lat_step)
# Create full lon/lat grid
lon_lat <- expand.grid(lon = lon, lat = lat) %>%
data.frame()
# Acast only the desired column
dfa <- plyr::daply(df[c("lon", "lat", "event_no", col_choice)],
c("event_no"), df_acast, .parallel = T, lon_lat = lon_lat)
return(dfa)
}
# We must now run this function on each column of data we want to add to the NetCDF file
doParallel::registerDoParallel(cores = 2)
prep_dur <- df_proc(MHW_res_grid, "duration")
prep_max_int <- df_proc(MHW_res_grid, "intensity_max")
prep_cum_int <- df_proc(MHW_res_grid, "intensity_cumulative")
prep_peak <- df_proc(MHW_res_grid, "date_peak")

Using R to process google earth engine data

I want to download the daily tmax from the NASA for a given lat lon (https://developers.google.com/earth-engine/datasets/catalog/NASA_NEX-DCP30_ENSEMBLE_STATS)
using the following tutorial https://jesjehle.github.io/earthEngineGrabR/index.html
library(devtools)
install_github('JesJehle/earthEngineGrabR')
library(earthEngineGrabR)
ee_grab_install() # had to install Anaconda before doing this step.
test_data <- ee_grab(data = ee_data_collection(datasetID = "NASA/NEX-DCP30_ENSEMBLE_STATS",
timeStart = "1980-01-01",
timeEnd = '1980-01-02',
bandSelection = 'tasmax'),
targetArea = system.file("data/territories.shp", package = "earthEngineGrabR")
)
Error: With the given product argument no valid data could be requested.
In addition: Warning message:
Error on Earth Engine servers for data product: NASA-NEX-DCP30_ENSEMBLE_STATS_s-mean_t-mean_1980-01-01to2005-12-31
Error in py_call_impl(callable, dots$args, dots$keywords): EEException: Collection.first: Error in map(ID=historical_195001):
Image.select: Pattern 'tasmax' did not match any bands.
I would like to know how to specify the bandwidth so that I do get this error and instead of using a shapefile as target area, I do I download tmax data for a single lat lon 9.55, 78.59?
You might use rgee to accomplish this. Currently, rgee has a function called rgee::ee_extract that works similar to raster::extract().
library(rgee)
library(sf)
# 1. Load a geometry
y <- st_read(system.file("shape/nc.shp", package = "sf"), quiet = TRUE) %>%
st_transform(4326)
## Move that geometry from local to earth engine
ee_y <- sf_as_ee(y)
# 2. Load your ImageCollection
x <- ee$ImageCollection("NASA/NEX-DCP30_ENSEMBLE_STATS")$
filterDate("1980-01-01","1980-01-02")$
map(function(img) img$select("tasmax_mean"))
## calculate the nominal scale
scale <- x$first()$projection()$nominalScale()$getInfo()
# 3. Extract values
tasmax_mean_data <- ee_extract(x = x,
y = y,
fun = ee$Reducer$mean(),
scale = scale,
id = "FIPS")
# 4. Merge results with the sf object
ee_nc_tasmax <- merge(y, tasmax_mean_data, by = "FIPS")
plot(ee_nc_rain['historical_198001'])

How to exactly reproduce historical ORS-Isochrones? [duplicate]

I am working with the gmapsdistance package in R. I have my API key, and I am familiar with the functions within the package.
However, I would like to work out a problem in the reverse direction. Instead of just finding the Time, Distance, and Status between lat/longs are vectors of lat/longs, I would like to input a lat/long, and draw a region of all points that could be driven to in 3 hours or less. Then I'd like to draw this on a Google map.
To start, it would be great to use Marimar, FL: 25.9840, -80.2821.
Does anyone have experience with that type of problem?
As suggested in the comments, you can sign up to a service like Travel Time Platform (which I'm using in this example) and use their API to get the possible destinations given a starting point.
Then you can plot this on a map using Google Maps (in my googleway package)
appId <- "TravelTime_APP_ID"
apiKey <- "TravelTime_API_KEY"
mapKey <- "GOOGLE_MAPS_API_KEY"
library(httr)
library(googleway)
library(jsonlite)
location <- c(25.9840, -80.2821)
driveTime <- 2 * 60 * 60
## London example
## location <- c(51.507609, -0.128315)
## sign up to http://www.traveltimeplatform.com/ and get an API key
## and use their 'Time Map' API
url <- "http://api.traveltimeapp.com/v4/time-map"
requestBody <- paste0('{
"departure_searches" : [
{"id" : "test",
"coords": {"lat":', location[1], ', "lng":', location[2],' },
"transportation" : {"type" : "driving"} ,
"travel_time" : ', driveTime, ',
"departure_time" : "2017-05-03T08:00:00z"
}
]
}')
res <- httr::POST(url = url,
httr::add_headers('Content-Type' = 'application/json'),
httr::add_headers('Accept' = 'application/json'),
httr::add_headers('X-Application-Id' = appId),
httr::add_headers('X-Api-Key' = apiKey),
body = requestBody,
encode = "json")
res <- jsonlite::fromJSON(as.character(res))
pl <- lapply(res$results$shapes[[1]]$shell, function(x){
googleway::encode_pl(lat = x[['lat']], lon = x[['lng']])
})
df <- data.frame(polyline = unlist(pl))
df_marker <- data.frame(lat = location[1], lon = location[2])
google_map(key = mapKey) %>%
add_markers(data = df_marker) %>%
add_polylines(data = df, polyline = "polyline")
If you want to render in leaflet and use a free isochrone service, this is a pretty neat option. There is a limit of 2 hours drive away though.
devtools::install_github("tarakc02/rmapzen")
library(rmapzen)
Sys.setenv(MAPZEN_KEY = "") # get for free at https://mapzen.com/
marimar <- mz_geocode("Marimar, FL")
isos <- mz_isochrone(
marimar,
costing_model = mz_costing$auto(),
contours = mz_contours(c(60 * 2)) # 2 hours
)
library(leaflet)
leaflet(as_sp(isos)) %>%
addProviderTiles("CartoDB.DarkMatter") %>%
addPolygons(color = ~paste0("#", color), weight = 1)

Using R to identify nearest point to a location and calculate the distance between them along a network/road

I have a series of locations (Points_B) and would like to find the closest point to them from a different set of points (Points_A) and the distance between them in kms. I can do this as the crow flies but cannot work out how to do the same along a road network (the 'Roads' object in the code). The code I have so far is a follows:
library(sp)
library(rgdal)
library(rgeos)
download.file("https://dl.dropboxusercontent.com/u/27869346/Road_Shp.zip", "Road_Shp.zip")
#2.9mb
unzip("Road_Shp.zip")
Roads <- readOGR(".", "Subset_Roads_WGS")
Points_A <- data.frame(ID = c("A","B","C","D","E","F","G","H","I","J","K","L"), ID_Lat = c(50.91487, 50.92848, 50.94560, 50.94069, 50.92275, 50.94109, 50.92288, 50.92994, 50.92076, 50.90496, 50.89203, 50.88757), ID_Lon = c(-1.405821, -1.423619, -1.383509, -1.396910, -1.441801, -1.459088, -1.466626, -1.369458, -1.340104, -1.360153, -1.344662, -1.355842))
rownames(Points_A) <- Points_A$ID
Points_B <- data.frame(Code = 1:30, Code_Lat = c(50.92658, 50.92373, 50.93785, 50.92274, 50.91056, 50.88747, 50.90940, 50.91328, 50.91887, 50.92129, 50.91326, 50.91961, 50.91653, 50.90910, 50.91432, 50.93742, 50.91848, 50.93196, 50.94209, 50.92080, 50.92127, 50.92538, 50.88418, 50.91648, 50.91224, 50.92216, 50.90526, 50.91580, 50.91203, 50.91774), Code_Lon = c(-1.417311, -1.457155, -1.400106, -1.374250, -1.335896, -1.362710, -1.360263, -1.430976, -1.461693, -1.417107, -1.426709, -1.439435, -1.429997, -1.413220, -1.415046, -1.440672, -1.392502, -1.459934, -1.432446, -1.357745, -1.374369, -1.458929, -1.365000, -1.426285, -1.403963, -1.344068, -1.340864, -1.399607, -1.407266, -1.386722))
rownames(Points_B) <- Points_B$Code
Points_A_SP <- SpatialPoints(Points_A[,2:3])
Points_B_SP <- SpatialPoints(Points_B[,2:3])
Distances <- (gDistance(Points_A_SP, Points_B_SP, byid=TRUE))*100
Points_B$Nearest_Points_A_CF <- colnames(Distances)[apply(Distances,1,which.min)]
Points_B$Distance_Points_A_CF <- apply(Distances,1,min)
The output I am after would be two additional columns in 'Points_B' with 1) having the nearest Point A object ID along the road network and 2) having the distance along the network in km. Any help would be appreciated. Thanks.
I've been working on this kind of problem all day. Try mapdist() in the ggmap package and see if this works:
library(dplyr)
library(ggmap)
#Your data
Points_A <- data.frame(ID = c("A","B","C","D","E","F","G","H","I","J","K","L"), ID_Lat = c(50.91487, 50.92848, 50.94560, 50.94069, 50.92275, 50.94109, 50.92288, 50.92994, 50.92076, 50.90496, 50.89203, 50.88757), ID_Lon = c(-1.405821, -1.423619, -1.383509, -1.396910, -1.441801, -1.459088, -1.466626, -1.369458, -1.340104, -1.360153, -1.344662, -1.355842))
Points_B <- data.frame(Code = 1:30, Code_Lat = c(50.92658, 50.92373, 50.93785, 50.92274, 50.91056, 50.88747, 50.90940, 50.91328, 50.91887, 50.92129, 50.91326, 50.91961, 50.91653, 50.90910, 50.91432, 50.93742, 50.91848, 50.93196, 50.94209, 50.92080, 50.92127, 50.92538, 50.88418, 50.91648, 50.91224, 50.92216, 50.90526, 50.91580, 50.91203, 50.91774), Code_Lon = c(-1.417311, -1.457155, -1.400106, -1.374250, -1.335896, -1.362710, -1.360263, -1.430976, -1.461693, -1.417107, -1.426709, -1.439435, -1.429997, -1.413220, -1.415046, -1.440672, -1.392502, -1.459934, -1.432446, -1.357745, -1.374369, -1.458929, -1.365000, -1.426285, -1.403963, -1.344068, -1.340864, -1.399607, -1.407266, -1.386722))
#Combine coords into one field (mapdist was doing something funny with the commas so I had to specify "%2C" here)
Points_A$COORD <- paste(ID_Lat, ID_Lon, sep="%2C")
Points_B$COORD <- paste(Code_Lat, Code_Lon, sep="%2C")
#use expand grid to generate all combos
get_directions <- expand.grid(Start = Points_A$COORD,
End = Points_B$COORD,
stringsAsFactors = F,
KEEP.OUT.ATTRS = F) %>%
left_join(select(Points_A, COORD, ID), by = c("Start" = "COORD")) %>%
left_join(select(Points_B, COORD, Code), by = c("End" = "COORD"))
#make a base dataframe
route_df <- mapdist(from = get_directions$Start[1],
to = get_directions$End[1],
mode = "driving") %>%
mutate(Point_A = get_directions$ID[1],
Point_B = get_directions$Code[1])
#get the rest in a for-loop
start <- Sys.time()
for(i in 2:nrow(get_directions)){
get_route <- mapdist(from = get_directions$Start[i],
to = get_directions$End[i],
mode = "driving") %>%
mutate(Point_A = get_directions$ID[i],
Point_B = get_directions$Code[i])
route_df <<- rbind(route_df, get_route) #add to your original file
Sys.sleep(time = 1) #so google doesn't get mad at you for speed
end <- Sys.time()
print(paste(i, "of", nrow(get_directions),
round(i/nrow(get_directions),4)*100, "%", sep=" "))
print(end-start)
}
#save if you want
write.csv(route_df, "route_df.csv", row.names = F)
#Route Evaluation
closest_point <-route_df %>%
group_by(Point_A) %>%
filter(km == min(km)) %>%
ungroup()
I'm still kind of new at this so there may be a better way to do the data wrangling. Hope this helps & good luck
The packages igraph, osmr and walkalytics all seem to provide this functionality these days. Mode-specific routing networks exist (in various degrees of functionality).

Resources