My code is running very slowly on my laptop and i have access to a windows 2012 server x64 with 256Gb ram.
I have the server set up running R and have this code working but 48 hours = 25%
From what i have learnt its due to only using one core.
Currently I'm exploring foreach loop but getting nowhere slowly
library("sp")
library("rgeos")
library("geosphere")
library("gdistance")
# Data
dna <- data.frame(cbind(rnorm(400) * 2 + 13, rnorm(400) + 48))
dna$ID <- seq.int(nrow(dna))
match <- data.frame(cbind(rnorm(4000) * 2 + 13, rnorm(4000) + 48))
match$ID <- seq.int(nrow(match))
##Set row id
RID2 <- 1
#create output table
tablelength <- print (nrow(dna))
match1 = data.frame( UPRN=rep(0, tablelength), Long=rep(0,tablelength), Lats=rep(0,tablelength), MatchID=rep(0,tablelength) , Longm=rep(0,tablelength), Latsm=rep(0,tablelength), distance=rep(0,tablelength))
#start loop
for(RID2 in dna[,3]) {
#Set UPRN and Exchange Name
Name <- paste(dna[RID2,3])
set1 <- data.frame(dna[RID2,1:2])
set2 <- data.frame(match[,1:2])
set1sp <- SpatialPoints(set2)
set2sp <- SpatialPoints(set1)
set1$ID <- apply(gDistance(set1sp, set2sp, byid=TRUE), 1, which.min)
ID <- paste(apply(gDistance(set1sp, set2sp, byid=TRUE), 1, which.min))
#insert Row
match1[RID2, ] = c(Name, set1[,1], set1[,2], paste(match[ID,3]), set2[ID,1], set2[ID,2], distVincentyEllipsoid(c(set1[,1], set1[,2]), c(set2[ID,1], set2[ID,2]), a=6378137, b=6356752.3142, f=1/298.257223563))
remove(set1,set2,set1sp,set2sp)
}
The output is what i am looking for but ideally with a sub 1 day runtime (currently at 8)
This works for me, and cuts calculation time (on your sample data) in half on my machine..
set.seed(123)
# Data
dna <- data.frame(cbind(rnorm(400) * 2 + 13, rnorm(400) + 48))
dna$ID <- seq.int(nrow(dna))
match <- data.frame(cbind(rnorm(4000) * 2 + 13, rnorm(4000) + 48))
match$ID <- seq.int(nrow(match))
###
library( sf )
library( data.table )
dna.sf <- st_as_sf( x = dna,
coords = c( "X1", "X2"),
crs = "+proj=longlat +datum=WGS84" )
match.sf <- st_as_sf( x = match,
coords = c( "X1", "X2"),
crs = "+proj=longlat +datum=WGS84" )
#create data.tables
setDT(dna)
setDT(match)
#add suffixes to identify columns later (after join)
setnames(dna, names(dna), paste0(names(dna),".dna"))
setnames(match, names(match), paste0(names(match),".match"))
#create distance matrix
m <- round( st_distance( dna.sf, match.sf ), digits = 0 )
colnames( m ) <- match.sf$ID
rownames( m ) <- dna.sf$ID
#get colname of min to nearest (remember, colname = match-ID ;-) )
dna$nearest <- apply( m, 1, which.min )
#get the min distance
dna$dist <- apply( m, 1, min )
#now left-join to get the coordinates of match, use data.table for speed
library( data.table )
result <- match[dna, on = c("ID.match==nearest") ]
The results seem to be the same as when using your 'old' method, but calculation time is roughly cut in half (7.5 -> 4 secs)
You can already get a good speed boost by simply optimizing the code and removing redundant parts. For example, this is more or less twice as fast on the test data, and is easily parallelizable.
library("sp")
library("rgeos")
library("geosphere")
library("gdistance")
# Data
dna <- data.frame(cbind(rnorm(400) * 2 + 13, rnorm(400) + 48))
dna$ID <- seq.int(nrow(dna))
match <- data.frame(cbind(rnorm(40000) * 2 + 13, rnorm(40000) + 48))
match$ID <- seq.int(nrow(match))
##Set row id
RID2 <- 1
#create output table
tablelength <- nrow(dna)
matchlist <- list()
set2 <- match[,1:2]
set1sp <- SpatialPoints(set2)
for(RID2 in dna[,3]) {
set1 <- dna[RID2,1:2]
set2sp <- SpatialPoints(set1)
ID <- which.min(gDistance(set1sp, set2sp, byid=TRUE))
#insert Row
matchlist[[RID2]] = data.frame(UPRN = dna[RID2,3],
Long = set1[,1],
Lats = set1[,2],
matchid = match[ID,3],
Longm = set2[ID,1],
Latsm = set2[ID,1],
distance = distVincentyEllipsoid(set1, set2[ID,],
a=6378137, b=6356752.3142, f=1/298.257223563))
}
match1 <- data.table::rbindlist(matchlist)
thanks you all for your input i will be reading the different styles to further my R learning's.
I have used a solution posted from the reddit thread i also made at the same time.
require(foreach)
require(doParallel)
cl <- makeCluster(4)
registerDoParallel(cl)
temp <- foreach(I = 1:nrow(dna),.combine = "c", .packages = c("rgeos","sp")) %dopar% {
return(c(which.min(
gDistance(
SpatialPoints(data.frame(dna[I,1:2]))
, SpatialPoints(data.frame(match[,1:2]))
, byid=TRUE
))))
}
https://old.reddit.com/r/rstats/comments/aebamb/how_do_i_use_all_the_cores_on_a_server_to_match/
Again thank you for the help :-D
Related
I've been stuck with the data here for days, as I want to get data from API Binance, which is surely over ten thousand obs, but the R only limited the obs at 1500L.
I have been advised to use loop, but it doesn't help any.
Any help would be totally my gratitude!
library(httr)
library(jsonlite)
library(lubridate)
# api description:
#
get
("https://github.com/binance-exchange/binance-official-api-docs/blob/master/rest-api.md"
)
#klinecandlestick-data
options(stringsAsFactors = FALSE)
url <- "https://api.binance.com"
path <- "/api/v3/exchangeInfo"
raw.result <- GET(url = url, path = path)
not.cool.data <- rawToChar(raw.result$content)
list1 <- fromJSON(not.cool.data)
list <- list1$symbols$symbol
klines2 <- rbindlist(lapply(
c('LTCTUSD', 'LTCBNB'),
binance_klines,
interval = '30m',
start_time = '2017-01-01',
end_time = '2021-01-08'
))
names(klines2)
sapply(klines2, function(x) length(unique(x)))
klines2
df.1 <- list.files(pattern = "2017-2021")
df.1_r <- vector(mode = integer,
length = length(klines2))
tickling <- unique(klines2$symbol)
tickling
low <- c()
high <- c()
for (symbol in tickling) {
look.at <- klines2$symbol == symbol
low <- append(low,min(symbol$low[look.at]))
high <- append(high, max(symbol$high[look.at]))
}
tickling
I have about 977 obs in top500Stocks which contains name of 977 stocks.
head(top500Stocks,10)
ï..Symbol
1 RELIANCE
2 TCS
3 HDFCBANK
4 INFY
5 HINDUNILVR
6 HDFC
7 ICICIBANK
8 KOTAKBANK
9 SBIN
10 BAJFINANCE
and I have Date, OHLC and Adj.Close, Vol and Ret of each stocks from the top500Stocks in stocksRetData
head(stocksRetData[[1]],3)
Date Open High Low Close Adj.Close Volume Ret
1 20000103 28.18423 29.86935 28.18423 38.94457 29.86935 28802010 0.000
2 20000104 30.66445 32.26056 29.82188 42.06230 32.26056 61320457 0.080
3 20000105 30.45677 34.16522 30.45677 43.71014 33.52440 173426953 0.039
Now for a given lookbackPeriod and holdPeriod I am trying to run the below function but it takes about 1 minute. How can I make it faster? Because I have to run for multiple lookbackPeriod and holdPeriod it will take forever to complete.
CalC.MOD_MScore.Ret.High <- function(lookbackPeriod, holdPeriod, fnoStocks,
stocksRetData, totalTestPeriod) {
#We go through each stock and calculate Modified mscores where we give more importance to recent data
WeeklyData <- list()
wmean <- function(x, k) mean(seq(k)/k * x)
for (i in 1:nrow(fnoStocks)){
out <- stocksRetData[[i]]
out <- tail(out,totalTestPeriod)
if (nrow(out)==totalTestPeriod){
tempDF <- transform(out, wtMean = rollapply(Ret, lookbackPeriod, wmean,
k = lookbackPeriod, align = "right",
fill = NA))
tempDF <- transform(tempDF, ExitVal = rollapply(lead(High, holdPeriod),
holdPeriod, max,
align = "right",
fill = NA))
tempDF$NWeekRet <- (tempDF$ExitVal - tempDF$Adj.Close ) / tempDF$Adj.Close
tempDF <- tempDF[!is.na(tempDF$wtMean),]
tempDF <- tempDF[!is.na(tempDF$ExitVal),]
tempDF$StockName = fnoStocks[i,1]
tempDF$WeekNum = c((lookbackPeriod):(nrow(tempDF)+lookbackPeriod-1))
WeeklyData[[i]] <- data.frame(
StockName = tempDF$StockName,
WeekNum = tempDF$WeekNum,
M_Score = tempDF$wtMean,
NWeekRet = tempDF$NWeekRet,
stringsAsFactors = FALSE
)
}
}# i ends here
return(bind_rows(WeeklyData))
}
This takes more than a minute to complete.
a <- CalC.MOD_MScore.Ret.High(4,14,fnoStocks = top500Stocks, stocksRetData = stocksRetData, 2000)
First of all, I wouldn't suggest using for-loops in R. I would rewrite your loop with a lapply like
CalC.MOD_MScore.Ret.High <- function(lookbackPeriod, holdPeriod, fnoStocks,
stocksRetData, totalTestPeriod) {
#We go through each stock and calculate Modified mscores where we give more importance to recent data
wmean <- function(x, k) mean(seq(k)/k * x)
WeeklyData <- lapply(1:nrow(fnoStocks), function(i) {
out <- stocksRetData[[i]]
out <- tail(out,totalTestPeriod)
if(nrow(out)!=totalTestPeriod) return(NULL)
tempDF <- transform(out, wtMean = rollapply(Ret, lookbackPeriod, wmean,
k = lookbackPeriod, align = "right",
fill = NA))
tempDF <- transform(tempDF, ExitVal = rollapply(lead(High, holdPeriod),
holdPeriod, max,
align = "right",
fill = NA))
tempDF$NWeekRet <- (tempDF$ExitVal - tempDF$Adj.Close ) / tempDF$Adj.Close
tempDF <- tempDF[!is.na(tempDF$wtMean),]
tempDF <- tempDF[!is.na(tempDF$ExitVal),]
tempDF$StockName = fnoStocks[i,1]
tempDF$WeekNum = c((lookbackPeriod):(nrow(tempDF)+lookbackPeriod-1))
data.frame(
StockName = tempDF$StockName,
WeekNum = tempDF$WeekNum,
M_Score = tempDF$wtMean,
NWeekRet = tempDF$NWeekRet,
stringsAsFactors = FALSE
)
})
return(bind_rows(WeeklyData))
}
Having an lapply makes it easier to throw some parallelization-tools on it.
You can have a look at the package parallel. With this package, you can parallelize and make use of multiple cores on you machine. Therefore, you need to setup a cluster, which produces some overhead, but I think it will pay out in your case. To use it, setup a cluster via cl <- parallel::makeCluster(parallel::detectCores()). The detectCores-method gets the number of available cores on your machine. Then, you can edit the lapply to
WeeklyData <- parallel::parLapply(cl = cl, 1:nrow(fnoStocks), function(i) {
...
})
After all your caluclations finished, call parallel::stopCluster(cl) to stop the cluster.
I have the following function:
CFC_GLM <- function(data, frequency_bins){
adj_mat <- matrix(0, nrow = dim(data)[1], ncol = dim(data)[1])
bf_filters <- list()
combs <- combinations(length(frequency_bins), 2, repeats.allowed = T)
all_adj_mat <- list()
for(z in 1:length(frequency_bins)){
bf_filters[[z]] <- butter(3, c(frequency_bins[[z]][1]/1200,
frequency_bins[[z]][2]/1200), type = "pass")
}
for(f in 1:nrow(combs)){
for(i in 1:dim(data)[1]){
for(j in 1:dim(data)[1]){
sensor_1 <- data[i,]
sensor_2 <- data[j,]
sensor_1_filt = filtfilt(bf_filters[[combs[f,1]]], sensor_1)
sensor_2_filt = filtfilt(bf_filters[[combs[f,2]]], sensor_2)
a_y <- abs(hilbert(sensor_2_filt, 1200))
a_x <- abs(hilbert(sensor_1_filt, 1200))
theta_x <- angle(hilbert(sensor_1_filt, 1200)) %% 2*pi
a_x_norm <- (a_x - mean(a_x))/std(a_x)
a_y_norm <- (a_y - mean(a_y))/std(a_y)
theta_x_norm <- (theta_x - mean(theta_x))/std(theta_x)
fit <- lm(a_y_norm ~ sin(theta_x_norm) + cos(theta_x_norm) +
a_x_norm)
summ <- summary(fit)
r <- sqrt(summ$r.squared)
adj_mat[i,j] <- r
}
}
all_adj_mat[[f]] <- adj_mat
}
return(all_adj_mat)
}
Just to summarize, the function takes a matrix of signals (246 sensors by 2400 samples), performs some signal processing, and then performs a GLM between every possible pairs of sensors. This process is repeated for 4 frequency bandwidths and their combinations (within and cross-frequency coupling). Right now, this code seems terribly inefficient and takes a really long time to run. Is there a way to vectorize/parallelize this function? I have researched this question extensively and cannot seem to find an answer.
I am not sure whether to make some of the tasks within the function parallel or just make the whole function able to be called by parApply (vectorized). My intuition is the latter but I am not sure how to approach this. Any help is greatly appreciated.
Reproducible Example
test_data <- c(-347627.104358097, 821947.421444641, 496824.676355433,
-178091.364312102, -358842.250713998, 234666.210462063,
-1274153.04141668,
1017066.42839987, -158388.137875357, 191691.279588641,
-16231.2106151229,
378249.600546794, 1080850.88212858, -688841.640871254,
-616713.991288002,
639401.465180969, -1625802.44142751, 472370.867686569,
-631863.239075449,
-598755.248911174, 276422.966753179, -44010.9403226763,
1569374.08537143,
-1138797.2585617, -824232.849278583, 955783.332556046,
-1943384.98409094,
-54443.829280377, -1040354.44654998, -1207674.05255178,
496481.331429747,
-417435.356472725, 1886817.1254085, -1477199.59091112,
-947353.716505171,
1116336.49812969, -2173805.84111182, -574875.152250742,
-1343996.2219146,
-1492260.06197604, 626856.67540728, -713761.48191904, 1987730.27341334,
-1673384.77863935, -968522.886481198, 1089458.71433614,
-2274932.19262517,
-1096749.79392427, -1520842.86946059, -1390794.61065106,
669864.477272507,
-906096.822125892, 1863506.59188299, -1720956.06310511,
-889359.420058576,
885300.628410276, -2224340.54992297, -1619386.88041896,
-1570131.07127786,
-934848.556063722, 644671.113108699, -973418.329437102,
1541962.53750178,
-1636863.31666018, -728992.972371437, 551297.997356909,
-2026413.5471505,
-2129730.49230266, -1511423.25789691, -236962.889589694,
580683.399845852,
-906261.700784793, 1080101.95011954, -1455931.89179814,
-518630.187846405,
158846.288141661, -1715610.22092989, -2601349.5081924,
-1380068.64260811,
541310.557194977, 509125.333244057, -711696.682554995,
551748.792106809,
-1222430.29467688, -293847.487823853, -215078.751157158,
-1354005.89576504,
-2997647.23289805, -1220136.14918605, 1231169.98678596,
455388.081391798,
-415489.975542684, 32724.7895795912, -980848.930757441,
-86618.5594163355,
-506333.915891838, -1022235.58829567, -3279232.01820961,
-1076344.95091665,
1696655.88400158), .Dim = c(10L, 10L))
frequency_bins <- list(band1 = c(2,4), band2 = c(4,12), band3 =
c(12,30), band4 = c(30,100))
system.time(test_result <- CFC_GLM(test_data, frequency_bins))
user system elapsed
1.839 0.009 1.849
I'm not sure how to include the result in a manageable way. Sorry for the naivety. This is only with 10 sensors by 10 samples, to have a manageable test set.
Right off the bat I would suggest predeclaring the length of your lists.
bf_filters <- rep(list(NA), length(frequency_bins))
all_adj_mat <- rep(list(NA), nrow(combos))
#this is your function to be applied
i_j_fun <- function ( perms ) {
sensor_1_filt = filtfilt(bf_filters[[combos[f,1]]], data[perms[1],])
sensor_2_filt = filtfilt(bf_filters[[combos[f,2]]], data[persm[2],])
a_y <- abs(hilbert(sensor_2_filt, 1200))
a_x <- abs(hilbert(sensor_1_filt, 1200))
theta_x <- angle(hilbert(sensor_1_filt, 1200)) %% 2*pi
a_x_norm <- (a_x - mean(a_x))/std(a_x)
a_y_norm <- (a_y - mean(a_y))/std(a_y)
theta_x_norm <- (theta_x - mean(theta_x))/std(theta_x)
fit <- lm(a_y_norm ~ sin(theta_x_norm) + cos(theta_x_norm) +
a_x_norm)
summ <- summary(fit)
r <- sqrt(summ$r.squared)
return(r)
}
Your i and j for loops can be turned into a function and used with apply.
#perms acts like the for loop
perms <- permuations(dim(data)[1], 2, seq_len(dim(data)[1]))
for(f in 1:nrow(combs)){
all_adj_mat[[f]] <- matrix(apply(perms, 1, i_j_fun),
nrow = dim(data)[1], ncol = dim(data[2]), byrow = TRUE)
}
That should do it.
The following code runs a loops but the problem is the speed; it takes several hours to finish and I am looking for an alternative so that I don´t have to wait so long.
Basically what the code does the follolling calculations:
1.-It calculates the mean of the values of the 60 days.
2.-It gets the standard deviation of the values of the 60 days.
3.-It gets the Max of the values of the 60 days.
4.-It gets the Min of the values of the 60 days.
5.-Then with the previous calculations the code "smooths" the peaks up and down.
6.-Then the code simply get the means from 60, 30, 15 and 7 Days.
So the purpose of these code is to remove the peaks of the data using the method already mentioned.
Here is the code:
options(stringsAsFactors=F)
DAT <- data.frame(ITEM = "x", CLIENT = as.numeric(1:100000), matrix(sample(1:1000, 60, replace=T), ncol=60, nrow=100000, dimnames=list(NULL,paste0('DAY_',1:60))))
DATT <- DAT
nRow <- nrow(DAT)
TMP <- NULL
for(iROW in 1:nRow){#iROW <- 1
print(c(iROW,nRow))
Demand <- NULL
for(iCOL in 3:ncol(DAT)){#iCOL <- 1
Demand <- c(Demand,DAT[iROW,iCOL])
}
ww <- which(!is.na(Demand))
if(length(ww) > 0){
Average <- round(mean(Demand[ww]),digits=4)
DesvEst <- round(sd(Demand,na.rm=T),digits=4)
Max <- round(Average + (1 * DesvEst),digits=4)
Min <- round(max(Average - (1 * DesvEst), 0),digits=4)
Demand <- round(ifelse(is.na(Demand), Demand, ifelse(Demand > Max, Max, ifelse(Demand < Min, Min, Demand))))
Prom60 <- round(mean(Demand[ww]),digits=4)
Prom30 <- round(mean(Demand[intersect(ww,(length(Demand) - 29):length(Demand))]),digits=4)
Prom15 <- round(mean(Demand[intersect(ww,(length(Demand) - 14):length(Demand))]),digits=4)
Prom07 <- round(mean(Demand[intersect(ww,(length(Demand) - 6):length(Demand))]),digits=4)
}else{
Average <- DesvEst <- Max <- Min <- Prom60 <- Prom30 <- Prom15 <- Prom07 <- NA
}
DAT[iROW,3:ncol(DAT)] <- Demand
TMP <- rbind(TMP, cbind(DAT[iROW,], Average, DesvEst, Max, Min, Prom60, Prom30, Prom15, Prom07))
}
DAT <- TMP
If one runs your code (with smaller number of rows) through a profiler, one sees that the main issue is the rbind in the end, followed by the c mentioned by #Riverarodrigoa:
We can focus on these two by creating numeric matrices of suitable size and working with those. Only in the end the final data.frame is created:
options(stringsAsFactors=F)
N <- 1000
set.seed(42)
DAT <- data.frame(ITEM = "x",
CLIENT = as.numeric(1:N),
matrix(sample(1:1000, 60, replace=T), ncol=60, nrow=N, dimnames=list(NULL,paste0('DAY_',1:60))))
nRow <- nrow(DAT)
TMP <- matrix(0, ncol = 8, nrow = N,
dimnames = list(NULL, c("Average", "DesvEst", "Max", "Min", "Prom60", "Prom30", "Prom15", "Prom07")))
DemandMat <- as.matrix(DAT[,3:ncol(DAT)])
for(iROW in 1:nRow){
Demand <- DemandMat[iROW, ]
ww <- which(!is.na(Demand))
if(length(ww) > 0){
Average <- round(mean(Demand[ww]),digits=4)
DesvEst <- round(sd(Demand,na.rm=T),digits=4)
Max <- round(Average + (1 * DesvEst),digits=4)
Min <- round(max(Average - (1 * DesvEst), 0),digits=4)
Demand <- round(ifelse(is.na(Demand), Demand, ifelse(Demand > Max, Max, ifelse(Demand < Min, Min, Demand))))
Prom60 <- round(mean(Demand[ww]),digits=4)
Prom30 <- round(mean(Demand[intersect(ww,(length(Demand) - 29):length(Demand))]),digits=4)
Prom15 <- round(mean(Demand[intersect(ww,(length(Demand) - 14):length(Demand))]),digits=4)
Prom07 <- round(mean(Demand[intersect(ww,(length(Demand) - 6):length(Demand))]),digits=4)
}else{
Average <- DesvEst <- Max <- Min <- Prom60 <- Prom30 <- Prom15 <- Prom07 <- NA
}
DemandMat[iROW, ] <- Demand
TMP[iROW, ] <- c(Average, DesvEst, Max, Min, Prom60, Prom30, Prom15, Prom07)
}
DAT <- cbind(DAT[,1:2], DemandMat, TMP)
For 1000 rows this takes about 0.2 s instead of over 4 s. For 10.000 rows I get 2 s instead of 120 s.
Obviously, this is not really pretty code. One could do this much nicer using tidyverse or data.table. I just find it worth noting that for loops are not necessarily slow in R. But dynamically growing data structures is.
I am querying Google Maps Roads API which only accepts a maximum of 100 coordinate pairs per request. Some of my input linestrings from routes, however, contain more than 100 segments.
I've written a sample loop which sends a request to the API but it is limited to lines with under 100 segments with if (nrow(routes$mat) <= 100) {...} where routes$mat is the matrix of coordinate pairs.
for (i in 1:length(routes)) {
if (nrow(routes$mat) <= 100) {
mat <- paste(apply(mat, 1, paste, collapse=","), collapse="|")
a <- "https://roads.googleapis.com/v1/snapToRoads?path="
b <- mat
c <- "&interpolate=false&key=YOUR_API_KEY"
request <- paste(a,b,c, sep="")
con <- curl(request)
open(con)
out <- readLines(con)
mydf <- fromJSON(out)
close(con)
output <- cbind(mydf$snappedPoints$location$longitude, mydf$snappedPoints$location$latitude)
}
My objective is to send all of the lines to the API - regardless of their length, but this means sending them piecemeal.
How would one adjust this loop to send only 100 at a time if mat is longer than 100 and then concatenate the results into a single output ?
So for example, if nrow(mat) = 250, there would be 3 outputs, the first with 100, the second with 100, and the third with 50.
I always find doing this kind of loop a bit messy, but sometimes it's needed.
For this answer I'm using my googleway package which handles the API call for you. I'm also using the tram_route data that comes with it. There are 55 rows, so I'm iterating every 10, but with a larger data set you just increase the by = 10 value.
library(googleway)
set_key("roads_api_key", api = "roads")
n <- nrow(tram_route)
subsets <- c(seq(1, n, by = 10), n)
iters <- length(subsets) - 1
## set up a data.frame to store the results
df_result <- data.frame(latitude = numeric(n),
longitude = numeric(n))
for (i in 1:(length(subsets)-1) ) {
if (i == iters[length(iters)]) {
idx <- subsets[i]:subsets[i+1]
} else {
idx <- subsets[i]:(subsets[i+1] -1)
}
print(idx)
res <- google_snapToRoads(df_path = tram_route[idx, ],
lat = "shape_pt_lat",
lon = "shape_pt_lon")
df_result[idx, ] <- res$snappedPoints$location
}
head(df_result)
# latitude longitude
# 1 -37.81436 144.9386
# 2 -37.81330 144.9415
# 3 -37.81274 144.9429
# 4 -37.81268 144.9430
# 5 -37.81314 144.9439
# 6 -37.81351 144.9443
And proof if proof be needed
set_key("map_api_key")
df_result$colour <- "blue"
google_map() %>%
add_markers(tram_route, lat = "shape_pt_lat", lon = "shape_pt_lon") %>%
add_markers(df_result, colour = "colour")