I am trying to use google distance matrix api to receive the coordinates from a list of address (2289 addresses in total). I am trying to pull out the coordinates in latitude and longitude to each address.
# ADDRESS : a list of the N adresses to be geocoded
# LON /LAT : two matrices, size [nx1],initialised to contain only 0
Address <- as.matrix(Coordinates$Origin)
LON = matrix(0, length(Address), 1)
LAT = matrix(0, length(Address), 1)
View(LAT)
for (i in seq(1,length(Address))){
APIstring = c("https://maps.googleapis.com/maps/api/geocode/json?address=",
Address[i],",&key=AIzaSyCevHB7yTBuiDbdHd8DwE64ZvWM-NZH79s")
res = GET(APIstring)
tmp = fromJSON(content(res, as = "text"))
LAT[i] =tmp$results$geometry$location$lat
LON[i] =tmp$results$geometry$location$lng
}
Error in parse_url(url) : length(url) == 1 is not TRUE
Likely, your problems arises because your code trying to vectorize the GET call is not properly integrated into the loop. You might want to handle URLs one by one, and could do so using functional programming (faster than your loop, too). How about the following based on the suggestion of #Limey?
library(tidyverse)
get_lon_and_lat <- function(.address){
.api_string <- paste0(
"https://maps.googleapis.com/maps/api/geocode/json?address=",
.address,
",&key=[*insert key here*]")
.res <- GET(.api_string)
.res <- fromJSON(content(.res, as = "text"))
.out <- data.frame(
lat = .res$results$geometry$location$lat,
lon = .res$results$geometry$location$lng
)
return(.out)
}
result_lat_lon <- lapply(Address, get_lon_and_lat) %>%
bind_rows()
PS: You might want to remove your API key from your question for security reasons.
Related
Every time I run the script it always gives me an error: Error in { : task 1 failed - "could not find function "%>%""
I already check every post on this forum and tried to apply it but no one works.
Please advise any solution.
Please note: I have only 2 cores on my PC.
My code is as follows:
library(dplyr) # For basic data manipulation
library(ncdf4) # For creating NetCDF files
library(tidync) # For easily dealing with NetCDF data
library(ggplot2) # For visualising data
library(doParallel) # For parallel processing
MHW_res_grid <- readRDS("C:/Users/SUDHANSHU KUMAR/Desktop/MTech Project/R/MHW_result.Rds")
# Function for creating arrays from data.frames
df_acast <- function(df, lon_lat){
# Force grid
res <- df %>%
right_join(lon_lat, by = c("lon", "lat")) %>%
arrange(lon, lat)
# Convert date values to integers if they are present
if(lubridate::is.Date(res[1,4])) res[,4] <- as.integer(res[,4])
# Create array
res_array <- base::array(res[,4], dim = c(length(unique(lon_lat$lon)), length(unique(lon_lat$lat))))
dimnames(res_array) <- list(lon = unique(lon_lat$lon),
lat = unique(lon_lat$lat))
return(res_array)
}
# Wrapper function for last step before data are entered into NetCDF files
df_proc <- function(df, col_choice){
# Determine the correct array dimensions
lon_step <- mean(diff(sort(unique(df$lon))))
lat_step <- mean(diff(sort(unique(df$lat))))
lon <- seq(min(df$lon), max(df$lon), by = lon_step)
lat <- seq(min(df$lat), max(df$lat), by = lat_step)
# Create full lon/lat grid
lon_lat <- expand.grid(lon = lon, lat = lat) %>%
data.frame()
# Acast only the desired column
dfa <- plyr::daply(df[c("lon", "lat", "event_no", col_choice)],
c("event_no"), df_acast, .parallel = T, lon_lat = lon_lat)
return(dfa)
}
# We must now run this function on each column of data we want to add to the NetCDF file
doParallel::registerDoParallel(cores = 2)
prep_dur <- df_proc(MHW_res_grid, "duration")
prep_max_int <- df_proc(MHW_res_grid, "intensity_max")
prep_cum_int <- df_proc(MHW_res_grid, "intensity_cumulative")
prep_peak <- df_proc(MHW_res_grid, "date_peak")
I have more than 3500 origins and more than 3500 destinations that are connected by more than 54000 links with 24000 nodes. I am modeling a real street network (Chicago Metropolitan Area) in R using Igraph and CppRouting. The following code is called "all or nothing traffic assignment (AON)" which has to be executed more than 40 times to reach the equilibrium in the network. Now it takes more than 10 minutes for each AON execution. It is too much time. I appreciate any suggestion besides parallel computing to reduce the execution time of the following source code:
demand_matrix <- demand_matrix[order(demand_matrix$ORG ,demand_matrix$DEST) ,]
tic()
for (i in 1:length(unique(demand_matrix$ORG))){
#I think I have to iterate on every origin
org <- unique(demand_matrix$ORG)[i]
destinations <- demand_matrix$DEST[demand_matrix$ORG == org ]
demand <- demand_matrix[demand_matrix$ORG == org,2:3]
#the igraph function is also included here which requires more time to run!
#destinations <- demand_matrix$DEST[demand_matrix$ORG == org]
#sht_path <- unlist(shortest_paths(network_igraph,from =c (org) , to = c(destinations) , mode = c("out"), weights = resolved.Network[[5]]$t0,output = c("epath")),recursive = FALSE)
#sht_path <- sapply(sht_path , as_ids)
#the procedures with cppRouting
sht_path <- get_multi_paths(network_cpprouting_graph , from = org , to = destinations ,long = TRUE)
sht_path$end <- c(sht_path$node[2:nrow(sht_path)],0)
sht_path <-sht_path[sht_path$from != sht_path$node , ]
sht_path$paste <- paste(sht_path$end , sht_path$node)
edge_id_node_sequence <- as.integer(unlist(strsplit(sht_path$paste , split = " ")))
sht_path$edge_ids <- get.edge.ids(network_igraph , edge_id_node_sequence)
###I changed the sequence of nodes to edge ids in shortest path.
sht_path$to <- as.integer(sht_path$to) #I just found that "to" is character and changing it to integer would result lower time in left_join function
sht_path <-left_join(sht_path , demand,by = c("to" = "DEST"))
V2[sht_path$edge_ids] <- V2[sht_path$edge_ids] + sht_path$TRIPS #adding traffic to each link (that is what is all about, the goal is to calculate each link volume)
}
The demand Matrix has more the 4e6 none-zero values and I tried to calculate the shortest path with get_path_pair with all origin-destination Pairs, but it never ended and I restarted my Laptop. I have only 8GB of rams.
I tried to have the shortest paths with only 8e5 pairs each time (divided my matrix to 5 sections) the third section almost never ended.
length_group <- min(nrow(demand_matrix)/4,800000)
path_pair <- get_path_pair(Graph = test_net , from = demand_matrix$ORG[1:length_group],to = demand_matrix$DEST[1:length_group], long = TRUE)
path_pair <- rbind(path_pair , get_path_pair(Graph = test_net , from = demand_matrix$ORG[(length_group+1):(2*length_group)],to = demand_matrix$DEST[(length_group+1):(2*length_group)],long = TRUE))
path_pair <- rbind(path_pair , get_path_pair(Graph = test_net , from = demand_matrix$ORG[((2*length_group)+1):(3*length_group)],to = demand_matrix$DEST[((2*length_group)+1):(3*length_group)],long = TRUE))
path_pair <- rbind(path_pair , get_path_pair(Graph = test_net , from = demand_matrix$ORG[((3*length_group)+1):(4*length_group)],to = demand_matrix$DEST[((3*length_group)+1):(4*length_group)],long = TRUE))
path_pair <- rbind(path_pair , get_path_pair(Graph = test_net , from = demand_matrix$ORG[((4*length_group)+1):(5*length_group)],to = demand_matrix$DEST[((4*length_group)+1):(5*length_group)],long = TRUE))
path_pair <- rbind(path_pair , get_path_pair(Graph = test_net , from = demand_matrix$ORG[((5*length_group)+1):nrow(demand_matrix)],to = demand_matrix$DEST[((5*length_group)+1):nrow(demand_matrix)],long = TRUE))
If I understand correctly, demand_matrix is all possible combination between origin and destination nodes ? (3500² = 12 250 000)
Since cppRouting functions are vectorized, why not try :
get_multi_path(graph, origin, dest, long=TRUE)
with origin and dest your origin and destination nodes, with length of ~ 3500.
get_multi_path is the equivalent of get_distance_matrix function, it use the main property of Dijkstra's algorithm : finding shortest path between an origin node "n" and all nodes. So, full Dijkstra algorithm is runned N times, with N being origin length.
On the other hand, get_*_pair functions run Dijkstra's algorithm with a stopping criterion : when destination node is reached. So you basically increase runtime by a factor of ~1500 (not 3500, because Dijkstra's algorithm is aborted in the last option)
If you have memory issues, splitting all combinations in smaller chunks is the good strategy. However, I suggest you to split origin nodes by 10, then run get_multi_path between origin chunk and all destination nodes. At each iteration, you can aggregate the result to have the cumulated flow for each node of the network.
Finally, try to use lapply() and data.table::rbindlist() instead of multiple rbind() calls.
EDIT : If you want to accumulate traffic on edges, here is a piece of code :
library(data.table)
# or are origin nodes (I assume of length 3500)
# dest are destination nodes
chunk_size = 350
test <- lapply(seq(1,3500, chunk_size), function(x){
print(x)
res = get_multi_paths(graph, or[x:(x+chunk_size-1)] ,
dest,
long = TRUE)
setDT(res)
# eventually merge demand for each trip (origin-destination)
# reconstruct edges (by reference using data.table)
res[,edge_from := c(node[-1], NA),.(from,to)]
# aggregate demand on each edge
res <- res[!is.na(edge_from),.(traffic = sum(demand)),.(edge_from,node)]
gc()
return(res)
})
test <- rbindlist(test)
test <- test[,.(traffic = sum(traffic)),.(edge_from,node)]
Of course, you can modify chunk_size depending your available memory.
Here is my sample dataset (called origAddress):
lat lng
1.436316 103.8299
1.375093 103.8516
1.369347 103.8398
1.367353 103.8426
I have many more rows of latitude and longitude numbers (330) and I would like to find the address. I have used this for loop to do that:
for(i in 1:nrow(origAddress))
{
# Print("Working...")
result <- google_reverse_geocode(location = c(origAddress$lat[i],origAddress$lng[i]),
key = key,
location_type = "rooftop")
if(is.null(result) || length(dim(result)) < 2 || !nrow(result)) next
origAddress$venadd <- geocode_address(result)
}
It works for the first three or four rows but then returns the same address as the first row although the latitude and longitude numbers are definitely different. I have looked at other stackoverflow questions(here) and tried to copy their approach with similar bad results.
Please help!
It looks like the calls to google_geocode can return more than one address for each lat/longitude pair thus you could be overwriting your data in the output data frame.
Also, I am not sure your if statement is evaluating properly.
Here is my attempt on your problem:
library(googleway)
origAddress<-read.table(header = TRUE, text = "lat lng
1.436316 103.8299
1.375093 103.8516
1.369347 103.8398
1.367353 103.8426")
#add the output column
origAddress$venadd<-NA
for(i in 1:nrow(origAddress))
{
# Print("Working...")
result <- google_reverse_geocode(location = c(origAddress$lat[i],origAddress$lng[i]),
key=key,
location_type = "rooftop")
#add a slight pause so not to overload the call requests
Sys.sleep(1)
if(result$status =="OK" ){
#multiple address can be returned with in gecode request picks the first one
origAddress$venadd[i] <- result$results$formatted_address[1]
#use this to collect all addresses:
#paste(result$results$formatted_address, collapse = " ")
}
}
Since the call to google_reverse_geocode returns the address, I just pull the first address from the result saving a call to the internet (performance improvement). Also since the call returns a status, I check for an OK and if exist save the first address.
Hope this helps.
My problem is as detailed below:
My input data is of the format as given in the small example below:
USERID LONGITUDE LATITUDE
1 -8.79659 55.879554
2 -6.874743 56.87896
3 -3.874743 58.87896
4 -10.874743 80.87896
I have used the follwoing code to reverse geocode the latitiude and longitude
dset <- as.data.frame(dataset[,2:3])
dset <- na.omit(dset)
library (ggmap)
location <- dset
nrow(location)
locaddr <- matrix(0,nrow(location),1)
location <- as.matrix(location)
for (i in 1:nrow(location))
{
locaddr[i,] <- revgeocode(location[i,], output = c("address"), messaging = FALSE, sensor = FALSE, override_limit = FALSE)
}
Now certain longitude-latitude return NA from Google Maps API. But when this happens the for loop is terminated for some reason. I would like to circumvent this and continue processing for the remaining data points. One idea I had was the following pseudocode:
if i = nrow(location)
continue
else
repeat revgeocode for loop here
end-for
end-if.
Kindly advise how this can be done or if there is a better way to do this.
Thank you in advance for your time and help.
No need to use a for-loop here. I recommand you to use lapply to avoid side effect, and pre-allocate problems:
locaddr <- lapply(seq(nrow(location)), function(i){
revgeocode(location[i,],
output = c("address"),
messaging = FALSE,
sensor = FALSE,
override_limit = FALSE)
})
I try to use Google Maps and R to calculate travel times per transit between an origin an destination.
The guidelines for the search can be found at: https://developers.google.com/maps/documentation/directions/#TravelModes
When I submit the latitude and the longitude of the origin and destination as literals, things work fine.
For instance, the following R code executes correctly and we obtain the distance and trip duration (the output of the search is in JSON format and is converted to an R object with fromJSON)
library(rjson)
library(gooJSON)
route <- url('http://maps.googleapis.com/maps/api/directions/json? origin=51.13854,4.384575&destination=51.13156,4.387118®ion=be&sensor=false&mode=transit&departure_time=1372665319')
route_file <- file("route_file.json")
L <- readLines(route,-1)
writeLines(L, route_file)
close(route)
routesR_zone1_to_zone20 <- fromJSON( file = route_file )
routesR_zone1_to_zone20$routes[[1]][[3]][[1]]$distance$value/1000
routesR_zone1_to_zone20$routes[[1]][[3]][[1]]$duration$value/60
However, what I am really interested in is to repeat this operation for thousands of origin-destination pairs. The longitude and the latitude of the origins and destinations then become variables.
For instance:
> lat_or
[1] 51.13854
> long_or
[1] 4.384575
> lat_des
[1] 51.13156
> long_des
[1] 4.387118
> route <- url('http://maps.googleapis.com/maps/api/directions/json? origin=lat_or,long_or&destination=lat_des,long_des®ion=be&sensor=false&mode=transit&departure_time=1372665319')
> route_file <- file("route_file.json")
> L <- readLines(route,-1)
> writeLines(L, route_file)
> close(route)
> routesR_zone1_to_zone20 <- fromJSON( file = route_file )
> routesR_zone1_to_zone20
$routes
list()
$status
[1] "NOT_FOUND"
Thus, although the coordinates are the same as in the previous example, this time, no route is found.
I suppose that the problem is that, when the url is accessed, lat_or etc are not "translated" in the corresponding numeric values, and that Google tries to calculate the route between the literals " lat_or,long_or" and " lat_des,long_des".
Does anyone have a suggestion on how to circumvent the problem?
Standard text processing:
lat_or <- 51.13854
long_or <- 4.384575
lat_des <- 51.13156
long_des <- 4.387118
route <- url(paste0("http://maps.googleapis.com/maps/api/directions/json?origin=",lat_or,",",long_or,"&destination=",lat_des,",",long_des,"®ion=be&sensor=false&mode=transit&departure_time=1372665319") )
route_file <- file("route_file.json")
L <- readLines(route,-1)
writeLines(L, route_file)
close(route)
routesR_zone1_to_zone20 <- fromJSON( file = route_file )
routesR_zone1_to_zone20$routes[[1]][[3]][[1]]$distance$value/1000
#[1] 1.161
routesR_zone1_to_zone20$routes[[1]][[3]][[1]]$duration$value/60
#[1] 11.73333