Mapdist: Error is.character(from) - r

My dataset includes a column "pickup" corresponding to the starting coordinates and a "dropoff" for the ending coordinates, of a trip. Like:
pickup dropoff
40.77419,-73.872608 40.78055,-73.955042
40.7737,-73.870721 40.757007,-73.971953
I want to calculate the shortest route suggested by Google Maps, and saved the calculations in a new column. This is what I'm doing:
X$GoogleDist <- mapdist(from= list(X$pickup),
to = list(X$dropoff),
mode = "driving" ,
output = "simple", messaging = FALSE, sensor = FALSE,
language = "en-EN", override_limit = FALSE)
Which gives me the following error:
Error: is.character(from) is not TRUE

You could do
library(ggmap)
X <- read.table(header=TRUE, text="pickup dropoff
40.77419,-73.872608 40.78055,-73.955042
40.7737,-73.870721 40.757007,-73.971953")
X <- as.data.frame(lapply(X, function(x) sapply(as.character(x), function(y) URLencode(y, T) ) ), stringsAsFactors = F)
rownames(X) <- NULL
res <- mapdist(from= X$pickup,
to = X$dropoff,
mode = "driving" ,
output = "simple", messaging = FALSE, sensor = FALSE,
language = "en-EN", override_limit = FALSE)
cbind(X, res)
# pickup dropoff from to m km miles seconds minutes hours
# 1 40.77419%2C-73.872608 40.78055%2C-73.955042 40.77419%2C-73.872608 40.78055%2C-73.955042 12805 12.805 7.957027 1212 20.20 0.3366667
# 2 40.7737%2C-73.870721 40.757007%2C-73.971953 40.7737%2C-73.870721 40.757007%2C-73.971953 14038 14.038 8.723213 1437 23.95 0.3991667
Your columns are probably of type factor (check with str(X)). mapdist needs character vectors (check ?mapdist). So you have to convert the columns using as.character beforehand. Also, when using geo coordinates, I think you got to URL encode them. I.e. the comma , becomes %2C. Otherwise it didn`t work for me...

Related

How to to calculate the shortest path in R efficiently?

I have more than 3500 origins and more than 3500 destinations that are connected by more than 54000 links with 24000 nodes. I am modeling a real street network (Chicago Metropolitan Area) in R using Igraph and CppRouting. The following code is called "all or nothing traffic assignment (AON)" which has to be executed more than 40 times to reach the equilibrium in the network. Now it takes more than 10 minutes for each AON execution. It is too much time. I appreciate any suggestion besides parallel computing to reduce the execution time of the following source code:
demand_matrix <- demand_matrix[order(demand_matrix$ORG ,demand_matrix$DEST) ,]
tic()
for (i in 1:length(unique(demand_matrix$ORG))){
#I think I have to iterate on every origin
org <- unique(demand_matrix$ORG)[i]
destinations <- demand_matrix$DEST[demand_matrix$ORG == org ]
demand <- demand_matrix[demand_matrix$ORG == org,2:3]
#the igraph function is also included here which requires more time to run!
#destinations <- demand_matrix$DEST[demand_matrix$ORG == org]
#sht_path <- unlist(shortest_paths(network_igraph,from =c (org) , to = c(destinations) , mode = c("out"), weights = resolved.Network[[5]]$t0,output = c("epath")),recursive = FALSE)
#sht_path <- sapply(sht_path , as_ids)
#the procedures with cppRouting
sht_path <- get_multi_paths(network_cpprouting_graph , from = org , to = destinations ,long = TRUE)
sht_path$end <- c(sht_path$node[2:nrow(sht_path)],0)
sht_path <-sht_path[sht_path$from != sht_path$node , ]
sht_path$paste <- paste(sht_path$end , sht_path$node)
edge_id_node_sequence <- as.integer(unlist(strsplit(sht_path$paste , split = " ")))
sht_path$edge_ids <- get.edge.ids(network_igraph , edge_id_node_sequence)
###I changed the sequence of nodes to edge ids in shortest path.
sht_path$to <- as.integer(sht_path$to) #I just found that "to" is character and changing it to integer would result lower time in left_join function
sht_path <-left_join(sht_path , demand,by = c("to" = "DEST"))
V2[sht_path$edge_ids] <- V2[sht_path$edge_ids] + sht_path$TRIPS #adding traffic to each link (that is what is all about, the goal is to calculate each link volume)
}
The demand Matrix has more the 4e6 none-zero values and I tried to calculate the shortest path with get_path_pair with all origin-destination Pairs, but it never ended and I restarted my Laptop. I have only 8GB of rams.
I tried to have the shortest paths with only 8e5 pairs each time (divided my matrix to 5 sections) the third section almost never ended.
length_group <- min(nrow(demand_matrix)/4,800000)
path_pair <- get_path_pair(Graph = test_net , from = demand_matrix$ORG[1:length_group],to = demand_matrix$DEST[1:length_group], long = TRUE)
path_pair <- rbind(path_pair , get_path_pair(Graph = test_net , from = demand_matrix$ORG[(length_group+1):(2*length_group)],to = demand_matrix$DEST[(length_group+1):(2*length_group)],long = TRUE))
path_pair <- rbind(path_pair , get_path_pair(Graph = test_net , from = demand_matrix$ORG[((2*length_group)+1):(3*length_group)],to = demand_matrix$DEST[((2*length_group)+1):(3*length_group)],long = TRUE))
path_pair <- rbind(path_pair , get_path_pair(Graph = test_net , from = demand_matrix$ORG[((3*length_group)+1):(4*length_group)],to = demand_matrix$DEST[((3*length_group)+1):(4*length_group)],long = TRUE))
path_pair <- rbind(path_pair , get_path_pair(Graph = test_net , from = demand_matrix$ORG[((4*length_group)+1):(5*length_group)],to = demand_matrix$DEST[((4*length_group)+1):(5*length_group)],long = TRUE))
path_pair <- rbind(path_pair , get_path_pair(Graph = test_net , from = demand_matrix$ORG[((5*length_group)+1):nrow(demand_matrix)],to = demand_matrix$DEST[((5*length_group)+1):nrow(demand_matrix)],long = TRUE))
If I understand correctly, demand_matrix is all possible combination between origin and destination nodes ? (3500² = 12 250 000)
Since cppRouting functions are vectorized, why not try :
get_multi_path(graph, origin, dest, long=TRUE)
with origin and dest your origin and destination nodes, with length of ~ 3500.
get_multi_path is the equivalent of get_distance_matrix function, it use the main property of Dijkstra's algorithm : finding shortest path between an origin node "n" and all nodes. So, full Dijkstra algorithm is runned N times, with N being origin length.
On the other hand, get_*_pair functions run Dijkstra's algorithm with a stopping criterion : when destination node is reached. So you basically increase runtime by a factor of ~1500 (not 3500, because Dijkstra's algorithm is aborted in the last option)
If you have memory issues, splitting all combinations in smaller chunks is the good strategy. However, I suggest you to split origin nodes by 10, then run get_multi_path between origin chunk and all destination nodes. At each iteration, you can aggregate the result to have the cumulated flow for each node of the network.
Finally, try to use lapply() and data.table::rbindlist() instead of multiple rbind() calls.
EDIT : If you want to accumulate traffic on edges, here is a piece of code :
library(data.table)
# or are origin nodes (I assume of length 3500)
# dest are destination nodes
chunk_size = 350
test <- lapply(seq(1,3500, chunk_size), function(x){
print(x)
res = get_multi_paths(graph, or[x:(x+chunk_size-1)] ,
dest,
long = TRUE)
setDT(res)
# eventually merge demand for each trip (origin-destination)
# reconstruct edges (by reference using data.table)
res[,edge_from := c(node[-1], NA),.(from,to)]
# aggregate demand on each edge
res <- res[!is.na(edge_from),.(traffic = sum(demand)),.(edge_from,node)]
gc()
return(res)
})
test <- rbindlist(test)
test <- test[,.(traffic = sum(traffic)),.(edge_from,node)]
Of course, you can modify chunk_size depending your available memory.

How to read a .MAP file extension in R?

Is there a simple way to read a file of .MAP extension in R? I have tried a few options below but had no success. Here is a .MAP file for a reproducible example.
context: For some odd reason, the spatial regionalization used in health planning policies in Brazil is only available in this format. I would like to convert it to geopackage so we can add it to the geobr package.
# none of these options work
mp <- sf::st_read("./se_mapas_2013/se_regsaud.MAP")
mp <- rgdal::readGDAL("./se_mapas_2013/se_regsaud.MAP")
mp <- rgdal::readOGR("./se_mapas_2013/se_regsaud.MAP")
mp <- raster::raster("./se_mapas_2013/se_regsaud.MAP")
mp <- stars::read_stars("./se_mapas_2013/se_regsaud.MAP")
ps. there is a similar question on SO focused on Python, unfortunately unanswered
UPDATE
We have found a publication that uses a custom function that reads the .MAP file. See example below. However, it returns a "polylist" object. Is there a simple way to convert it to a simple feature?
original custom function
read.map = function(filename){
zz=file(filename,"rb")
#
# header of .map
#
versao = readBin(zz,"integer",1,size=2) # 100 = versao 1.00
#Bounding Box
Leste = readBin(zz,"numeric",1,size=4)
Norte = readBin(zz,"numeric",1,size=4)
Oeste = readBin(zz,"numeric",1,size=4)
Sul = readBin(zz,"numeric",1,size=4)
geocodigo = ""
nome = ""
xleg = 0
yleg = 0
sede = FALSE
poli = list()
i = 0
#
# repeat of each object in file
#
repeat{
tipoobj = readBin(zz,"integer",1,size=1) # 0=Poligono, 1=PoligonoComSede, 2=Linha, 3=Ponto
if (length(tipoobj) == 0) break
i = i + 1
Len = readBin(zz,"integer",1,size=1) # length byte da string Pascal
geocodigo[i] = readChar(zz,10)
Len = readBin(zz,"integer",1,size=1) # length byte da string Pascal
nome[i] = substr(readChar(zz,25),1,Len)
xleg[i] = readBin(zz,"numeric",1,size=4)
yleg[i] = readBin(zz,"numeric",1,size=4)
numpontos = readBin(zz,"integer",1,size=2)
sede = sede || (tipoobj = 1)
x=0
y=0
for (j in 1:numpontos){
x[j] = readBin(zz,"numeric",1,size=4)
y[j] = readBin(zz,"numeric",1,size=4)
}
# separate polygons
xInic = x[1]
yInic = y[1]
for (j in 2:numpontos){
if (x[j] == xInic & y[j] == yInic) {x[j]=NA; y[j] = NA}
}
poli[[i]] = c(x,y)
dim(poli[[i]]) = c(numpontos,2)
}
class(poli) = "polylist"
attr(poli,"region.id") = geocodigo
attr(poli,"region.name") = nome
attr(poli,"centroid") = list(x=xleg,y=yleg)
attr(poli,"sede") = sede
attr(poli,"maplim") = list(x=c(Oeste,Leste),y=c(Sul,Norte))
close(zz)
return(poli)
}
using original custom function
mp <- read.map("./se_mapas_2013/se_regsaud.MAP")
class(mp)
>[1] "polylist"
# plot
plot(attributes(mp)$maplim, type='n', asp=1, xlab=NA, ylab=NA)
title('Map')
lapply(mp, polygon, asp=T, col=3)
The problems were: use of readChar with trailing nul bytes - changed to readBin(); 8-bit characters that rawToChar() would not accept (on my UTF-8 system); multiple slivers in some files that needed dropping; and some others. I added the edited read.map() function above to maptools, but with a different name and not exported. So now (with maptools rev 370 from https://r-forge.r-project.org/R/?group_id=943 when build completes):
library(maptools)
o <- maptools:::readMAP2polylist("se_regsaud.MAP")
oo <- maptools:::.makePolylistValid(o)
ooo <- maptools:::.polylist2SpP(oo, tol=.Machine$double.eps^(1/4))
rn <- row.names(ooo)
df <- data.frame(ID=rn, row.names=rn, stringsAsFactors=FALSE)
res <- SpatialPolygonsDataFrame(ooo, data=df)
library(sf)
res_sf <- st_as_sf(res)
res_sf
plot(st_geometry(res_sf))
This approach re-uses the maptools code dating back almost twenty years, with minor edits to handle subsequent changes in reading binary files, and fixing slivers.
EDIT: looks like this doesn't work generally across all files so proper conversion to sf would need a deeper look.
Here's a quick stab at resurrection. It might be incorrect to cumulatively sum to get the multi linestrings, I tested with se_municip.MAP and it only had NAs as the closing row of each ring. If it potentially has non-connected multi-rings (multipolygon) then this approach won't work completely.
x <- read.map("se_municip.MAP")
df <- setNames(as.data.frame(do.call(rbind, x)), c("x", "y"))
df$region.name <- rep(attr(x, "region.name"), unlist(lapply(x, nrow)))
## in case there are multi-rings
df$linestring_id <- cumsum(c(0, diff(is.na(df$x))))
df$polygon_id <- as.integer(factor(df$region.name))
df <- df[!is.na(df$x), ]
sfx <- sfheaders::sf_polygon(df, x = "x", y = "y", linestring_id = "linestring_id", polygon_id = "polygon_id", keep = TRUE)
#sf::st_crs(sfx) <- sf::st_crs(<whatever it is probably 4326>)
plot(sf::st_geometry(sfx), reset = FALSE)
maps::map(add = TRUE)
Interesting that you came across an official version of a forgotten legacy!
(BTW can I publish the data sets in a package?)

Difficulty in downloading TCGA data

I am trying to download the TCGA data but I am getting this error:
Error in summarizeMaf(maf = maf, anno = clinicalData, chatty =
verbose): Tumor_Sample_Barcode column not found in provided clinical
data. Rename column containing sample names to Tumor_Sample_Barcode if
necessary.
This is my code:
library("TCGAbiolinks")
library("tidyverse")
library(maftools)
query <- GDCquery( project = "TCGA-LIHC",
data.category = "Clinical",
file.type = "xml",
legacy = FALSE)
GDCdownload(query,directory = ".")
clinical <- GDCprepare_clinic(query, clinical.info = "patient",directory = ".")
#getting the survival time of event data
survival_data <- as_tibble(clinical[,c("days_to_last_followup","days_to_death","vital_status","bcr_patient_barcode","patient_id")])
survival_data <- filter(survival_data,!is.na(days_to_last_followup)|!is.na(days_to_death)) #not both NA
survival_data <- filter(survival_data,!is.na(days_to_last_followup)|days_to_last_followup>0 &is.na(days_to_death)|days_to_death > 0 ) #ensuring positive values
survival_data <- survival_data[!duplicated(survival_data$patient_id),] #ensuring no duplicates
dim(survival_data) #should be 371
maf <- GDCquery_Maf("LIHC", pipelines = "muse")
#maf <- GDCquery_Maf("LIHC", pipelines = "somaticsniper")
#clin <- GDCquery_clinic("TCGA-LIHC","clinical")
#print(clin )
laml = read.maf(
maf,
clinicalData = clinical,
removeDuplicatedVariants = TRUE,
useAll = TRUE,
gisticAllLesionsFile = NULL,
gisticAmpGenesFile = NULL,
gisticDelGenesFile = NULL,
gisticScoresFile = NULL,
cnLevel = "all",
cnTable = NULL,
isTCGA = TRUE,
vc_nonSyn = NULL,
verbose = TRUE
)
You should have: a) loaded with library(maftools) and b) included what was printed out before that error message:
-Validating
-Silent variants: 18306
-Summarizing
--Possible FLAGS among top ten genes:
TTN
MUC16
OBSCN
FLG
-Processing clinical data
Available fields in provided annotations..
[1] "bcr_patient_barcode" "additional_studies"
[3] "tissue_source_site" "patient_id"
# snipped remaining 78 column names
Notice that the first column is not named "Tumor_Sample_Barcode", so you need to follow the helpful error message directions and rename the appropriate column which appears to be the first one:
ns. After doing so I get:
-Validating
-Silent variants: 18306
-Summarizing
--Possible FLAGS among top ten genes:
TTN
MUC16
OBSCN
FLG
-Processing clinical data
-Finished in 1.911s elapsed (2.470s cpu)

Gmapsdistance function not accepting departure times

I am attempting to give a series of routes to gmapsdistance function. This has previously worked fine with other routes but is not accepting the departure_time arguments with this combination of lat and long.
I have attempted simply entering in a future time in character format ("20:00:00") but get the same error.
route_1 <- c(c(57.14748,-2.0954), c(51.12788,-4.25714))
route_2 <- c(c(55.81875,-4.02555), c(51.4721,-0.45273))
route_3 <- c(c(54.96566,-5.0152), c(55.86568,-4.25714))
route_4 <- c(c(51.12788,-4.25714), c(51.38867,0.193838))
route_5 <- c(c(55.86568,-4.25714), c(51.12788,-4.25714))
route_6 <- c(c(51.4721,-0.45273), c(51.12788,-4.25714))
result <- gmapsdistance(origin = route_6[[1]], route_6[[2]],
destination = route_6[[3]], route_6[[4]],
traffic_model = "pessimistic",
mode = "driving",
dep_date = as.character(Sys.Date()),
dep_time = as.character(Sys.time() + 60*10),
key=key,
combinations = "all",
avoid = "")``
Error received is strangely:
The departure time has to be some time in the future!
This is not related to setting departure_time in the past like the error indicates. It actually looks like it's stemming from an invalid format for origin & destination and for dep_time.
According to the library's docs, the coordinates should be formatted as follows:
"38.1621328+24.0029257"
And departure time should only contain a time, not a date+time. E.g.:
"20:40:00"
Please try running the full code below (with your API key):
library("gmapsdistance")
set.api.key("AIza...")
route_6 <- c(c("51.4721+-0.45273"), c("51.12788+-4.25714"))
results = gmapsdistance(origin = route_6[[1]],
destination = route_6[[2]],
traffic_model = "pessimistic",
mode = "driving",
dep_date = as.character(Sys.Date()),
dep_time = as.character(format(Sys.time() + 60*10, "%H:%M:%S")),
key=get.api.key(),
combinations = "all",
avoid = "")
results
Which for me as of now it returns:
$Time
[1] 16842
$Distance
[1] 337527
$Status
[1] "OK"
Hope this helps!

How to exactly reproduce historical ORS-Isochrones? [duplicate]

I am working with the gmapsdistance package in R. I have my API key, and I am familiar with the functions within the package.
However, I would like to work out a problem in the reverse direction. Instead of just finding the Time, Distance, and Status between lat/longs are vectors of lat/longs, I would like to input a lat/long, and draw a region of all points that could be driven to in 3 hours or less. Then I'd like to draw this on a Google map.
To start, it would be great to use Marimar, FL: 25.9840, -80.2821.
Does anyone have experience with that type of problem?
As suggested in the comments, you can sign up to a service like Travel Time Platform (which I'm using in this example) and use their API to get the possible destinations given a starting point.
Then you can plot this on a map using Google Maps (in my googleway package)
appId <- "TravelTime_APP_ID"
apiKey <- "TravelTime_API_KEY"
mapKey <- "GOOGLE_MAPS_API_KEY"
library(httr)
library(googleway)
library(jsonlite)
location <- c(25.9840, -80.2821)
driveTime <- 2 * 60 * 60
## London example
## location <- c(51.507609, -0.128315)
## sign up to http://www.traveltimeplatform.com/ and get an API key
## and use their 'Time Map' API
url <- "http://api.traveltimeapp.com/v4/time-map"
requestBody <- paste0('{
"departure_searches" : [
{"id" : "test",
"coords": {"lat":', location[1], ', "lng":', location[2],' },
"transportation" : {"type" : "driving"} ,
"travel_time" : ', driveTime, ',
"departure_time" : "2017-05-03T08:00:00z"
}
]
}')
res <- httr::POST(url = url,
httr::add_headers('Content-Type' = 'application/json'),
httr::add_headers('Accept' = 'application/json'),
httr::add_headers('X-Application-Id' = appId),
httr::add_headers('X-Api-Key' = apiKey),
body = requestBody,
encode = "json")
res <- jsonlite::fromJSON(as.character(res))
pl <- lapply(res$results$shapes[[1]]$shell, function(x){
googleway::encode_pl(lat = x[['lat']], lon = x[['lng']])
})
df <- data.frame(polyline = unlist(pl))
df_marker <- data.frame(lat = location[1], lon = location[2])
google_map(key = mapKey) %>%
add_markers(data = df_marker) %>%
add_polylines(data = df, polyline = "polyline")
If you want to render in leaflet and use a free isochrone service, this is a pretty neat option. There is a limit of 2 hours drive away though.
devtools::install_github("tarakc02/rmapzen")
library(rmapzen)
Sys.setenv(MAPZEN_KEY = "") # get for free at https://mapzen.com/
marimar <- mz_geocode("Marimar, FL")
isos <- mz_isochrone(
marimar,
costing_model = mz_costing$auto(),
contours = mz_contours(c(60 * 2)) # 2 hours
)
library(leaflet)
leaflet(as_sp(isos)) %>%
addProviderTiles("CartoDB.DarkMatter") %>%
addPolygons(color = ~paste0("#", color), weight = 1)

Resources