Fastest Implementation of Dijkstra Algorithm in R - r

I have a below R code. The code works fine in computing shortest path between minimal input. If my input is big the code throws error in my coding environment.
Is there a way to fine tune the below code to achieve faster implementation. Suggestions/Corrections are highly appreciable.
Note : I am trying to implement without external packages
input <- suppressWarnings(readLines(stdin(), n=7))
5 6
1 2 30
1 3 10
5 2 40
3 5 20
5 1 30
5 4 20
l1 <- unlist(strsplit(input[1]," "))
cities <- as.numeric(l1[1])
roads <- as.numeric(l1[2])
L <- matrix(100000000,cities,roads)
#L <- format(L,scientific = FALSE)
distance <- function(inp){
f <- unlist(strsplit(input[inp+1]," "))
m <- as.numeric(f[1])
n <- as.numeric(f[2])
L[m,n] <<- as.numeric(f[3])
}
invisible(mapply(distance,seq_along(1:roads )))
if(cities > dim(L)[2])
{
cat("NOT POSSIBLE")
} else {
n=length(L[,1])
v=1
dest=n
cost=L
dijkstra = function(n, v, cost, dest) {
dest = numeric(n)
flag = numeric(n)
prev = numeric(n)
dstv <- function(i){
prev[i] <<- -1
dest[i] <<- cost[v, i]
}
invisible(mapply(dstv,seq_along(1:n)))
count = 2
while (count <= n) {
min = 100000000
destw <- function(w) {
if (dest[w] < min && !flag[w]) {
min <<- dest[w]
u <<- w
}
}
invisible(mapply(destw,seq_along(1:n)))
flag[u] = 1
count = count + 1
destu <- function(w) {
if ((dest[u] + cost[u, w] < dest[w]) && !flag[w]) {
dest[w] <<- dest[u] + cost[u, w]
prev[w] <<- u
}
}
invisible(mapply(destu,seq_along(1:n)))
}
return(prev)
}
savepath = function(f, x) {
path = x
while (f[x] != -1) {
path = c(path, f[x])
x = f[x]
savepath(f, x)
}
path = c(path, 1)
return(path)
}
prev = dijkstra(n,v,cost,dest)
path = sort(savepath(prev,dest))
}

Related

Error running identical function when comparing two vectors

I have a vector (length 8) and change one element randomly in the vector each time the for loop run. I want to check my new generated vector with a fixed vector (Destination) and save the valid case in a counting variable (Time_to_Destination). However, when I use identical() function, there was no saved recorded in Time_to_Destination. Is there anything wrong with my code?
I tried identical(), all.equal()
If I commented out if(identical()), the variable Time_to_Destination was counted
rm(list = ls())
library(igraph)
#Plot the graph
graph_MC <- graph(edge = c(1,2,1,3,2,3,2,4,3,4), n = 4, directed = F)
plot(graph_MC)
#Initial position
X_0 <- c(1,2,3,1)
Destination <- c(3,1,2,3)
X <- X_0
Time_to_destination <- 0
#I would like to see how many time I arrive to the Destination, which have a valid set of color
for(i in 1:10000){
#Generate a random node
a <- as.integer(runif(1,1,4))
#Generate a random color of the node
q <- as.integer(runif(1,1,3))
test_vector <- X
test_vector[a] = q
#Check if the new set of color is valid
if (a == 1) {
if ((q != test_vector[2]) & (q != test_vector[3])) {
X = test_vector
if (identical(X, Destination)){
Time_to_destination = Time_to_destination + 1
}
}
}
if (a == 2) {
if ((q != test_vector[1]) & (q != test_vector[3]) & (q != test_vector[4])) {
X = test_vector
if (identical(X, Destination)){
Time_to_destination = Time_to_destination + 1
}
}
}
if (a == 3) {
if ((q != test_vector[1]) & (q != test_vector[2]) & (q != test_vector[4])) {
X = test_vector
if (identical(X, Destination)){
Time_to_destination = Time_to_destination + 1
}
}
}
if (a == 4) {
if ((q != test_vector[3]) & (q != test_vector[2])) {
X = test_vector
if (identical(X, Destination)){
Time_to_destination = Time_to_destination + 1
}
}
}
}
Time_to_destination

How to solve a problem using "break" statement in R?

I am building a simple ant colony optimization code in R, but I have a problem in compiling a function to obtain the optimum route for each ant using the "break" statement. There is always appear an error saying that "missing value where TRUE/FALSE needed" in my looping. Here is the code
rm(list = ls())
x = c(11.7057,17.4151,1.4992,14.9609,9.5711)
y = c(11.1929,10.7112,17.0964,12.2228,6.7928)
n = length(x)
m = 20
t = matrix(0.0001,ncol=n,nrow=n)
beta = 1
alpha = 5
miter = 100
d = matrix(c(rep(0,n*n)),ncol=n,byrow=FALSE)
for (i in 1:n){
for (j in 1:n){
d[i,j] = sqrt((x[i]-x[j])^2+(y[i]-y[j])^2)
}
}
d
h = matrix(c(rep(0,n*n)),ncol=n,byrow=FALSE)
for (i in 1:n){
for (j in 1:n){
if (d[i,j]==0){
h[i,j]=0
}
else{
h[i,j]=1/d[i,j]
}
}
}
h
antour <- function(a1,a2,a3,a4,a5,a6,a7){
for (i in 1:m){
mh = h
for (j in 1:n-1){
a = start_places[i,j]
mh[,c(a)]=0
temp = (t[c(a),]^alpha)*(mh[c(a),]^beta)
q = sum(temp)
p = (1/q)*temp
r = runif(1)
s = 0
for (k in 1:n){
s = s+p[k]
start_places[i,j+1] = k
if (r <= s){
break
}
print(start_places)
}
}
}
new_places = start_places
}
for (i in 1:miter){
start_places = matrix(c(rep(1,m)),ncol=1)
tour = antour(a1=start_places,a2=m,a3=n,a4=h,a5=t,a6=alpha,a7=beta)
}
I expect that in the looping process, the start_places[i,j+1]=k when the value of r <= s and obtain the optimum route for each ant, but the actual output is an error always appears as follows
output is Error in if (r <= s) { : missing value where TRUE/FALSE needed

R - Error executing user-defined function

I am trying to build a function that takes 2 arguments and uses those 2 arguments inside a replicate funtion
SPM <- function(bilhetes, N){
total_bilhetes <- 12012000
total_bilhetes_premios <- 3526450
premios <- c(0,5,10,15,20,25,50,100,300,1000,27000,108000,288000)
premios_bilhetes <- c(8485550,1895000,496800*2,88800*3,55200*4,16800*4,7920*6,5030*6,950*5,950,30,10,10)
probs <- premios_bilhetes/total_bilhetes
vector_ganhos <- c()
ganho <- 0
replicate(N, function(bilhetes) {
total_bilhetes1 <- total_bilhetes
premios_bilhetes1 <- premios_bilhetes
probs1 <- probs
for (i in c(1:bilhetes)) {
A <- sample(x = premios,replace = T,size = 1, prob = probs1)
ganho <- ganho - 5 + A
if (A == 0) {
premios_bilhetes1[1] <- premios_bilhetes1[1] - 1
} else if (A == 5) {
premios_bilhetes1[2] <- premios_bilhetes1[2] - 1
} else if (A == 10) {
premios_bilhetes1[3] <- premios_bilhetes1[3] - 1
} else if (A == 15) {
premios_bilhetes1[4] <- premios_bilhetes1[4] - 1
} else if (A == 20) {
premios_bilhetes1[5] <- premios_bilhetes1[5] - 1
} else if (A == 25) {
premios_bilhetes1[6] <- premios_bilhetes1[6] - 1
} else if (A == 50) {
premios_bilhetes1[7] <- premios_bilhetes1[7] - 1
} else if (A == 100) {
premios_bilhetes1[8] <- premios_bilhetes1[8] - 1
} else if (A == 300) {
premios_bilhetes1[9] <- premios_bilhetes1[9] - 1
} else if (A == 1000) {
premios_bilhetes1[10] <- premios_bilhetes1[10] - 1
} else if (A == 27000) {
premios_bilhetes1[11] <- premios_bilhetes1[11] - 1
} else if (A == 108000) {
premios_bilhetes1[12] <- premios_bilhetes1[12] - 1
} else {
premios_bilhetes1[13] <- premios_bilhetes1[13] - 1
}
total_bilhetes1 <- total_bilhetes1 - 1
probs1 <- premios_bilhetes1/(total_bilhetes1)
}
vector_ganhos[length(vector_ganhos)+1] = ganho
})
return(vector_ganhos)
}
when I try to run it, e.g., SPM(bilhetes = 5, N = 100) I get:
Error in SPM(bilhetes = 5, N = 100) : could not find function "SPM"
I looked in another question and someone mentioned "sourcing" the function. I tried it, and this was the output:
> source("SPM")
Error in file(filename, "r", encoding = encoding) :
cannot open the connection
In addition: Warning message:
In file(filename, "r", encoding = encoding) :
I'm rather new to R, so I'm probably making a dumb mistake.
Can someone help?

Creating Subset of Vector Adds Null Values

I'm trying to create a mergeSort algorithm in R. While I think I have the method down, the first time I create the vector e, I end up with a vector of length 4 instead of 2. This causes the error below:
"Error in if (a[2] < a[1]) { : missing value where TRUE/FALSE needed"
For some reason, R is adding an extra 2 elements to what should be a two-element vector. If anyone has an explanation for this, that would be wonderful. (Please limit advice to solving this error. I want to figure out how to do the rest on my own.)
Code:
addLeftOver <- function(buffer, array, index) {
j <- length(buffer)
for(i in array[index:length(array)]) {
buffer[j] = i
print(i)
j <- j+1
}
return(buffer)
}
mergeSort <- function(a) {
len <- length(a)
print(a)
print(len)
browser()
if(len<=2) { #base case
if(len==1) {
return(a)
}
if(a[2]<a[1]) {
return(c(a[2],a[1]))
}
return(a)
}
print(len/2)
b <- mergeSort(a[1:(len/2)]) #recursion
e <- mergeSort(a[(len/2)+1:len]) #recursion
indexB <- 1
indexE <- 1
buffer
for(i in 1:len) {
if(e[indexC]<b[indexB]) {
buffer[index] <- e[indexE]
index <- index + 1
indexE <- indexE + 1
} else {
buffer[index] <- b[indexB]
index <- index + 1
indexB <- indexB + 1
}
if(indexB==len/2) {
buffer = addLeftover(buffer, e, indexE)
return(buffer)
}
if(indexE==len/2) {
buffer = addLeftover(buffer, b, indexB)
return(buffer)
}
}
}
sumArray <- c(6,4,2,7,8,1,3,5)
print(mergeSort(sumArray))
The addLeftOver() function's purpose is to copy the remnants of one of the two vectors (b or e) to the buffer when the end of the other has been reached.
Main error can be fixed with parenthesis. e <- mergeSort(a[(len/2)+1:len]) should be e <- mergeSort(a[((len/2)+1:len])
That and some other bugs were fixed.
Working code:
addLeftOver <- function(buffer, array, index)
{
j <- length(buffer)+1
for(i in array[index:length(array)]) {
buffer[j] = i
j <- j+1
}
return(buffer)
}
mergeSort <- function(a)
{
len <- length(a)
if(len<=2)
{
if(len==1)
{
return(a)
}
if(a[2]<a[1])
{
return(c(a[2],a[1]))
}
return(a)
}
half <- as.integer(len/2)
b <- mergeSort(a[1:(half)])
e <- mergeSort(a[((half)+1):len])
index <- 1
indexB <- 1
indexE <- 1
buffer <- c(0)
for(i in 1:len)
{
if(e[indexE]<b[indexB])
{
buffer[index] <- e[indexE]
index <- index + 1
indexE <- indexE + 1
}
else
{
buffer[index] <- b[indexB]
index <- index + 1
indexB <- indexB + 1
}
if(indexB==(length(b))+1)
{
buffer = addLeftover(buffer, e, indexE)
return(buffer)
}
if(indexE==(length(e))+1)
{
buffer = addLeftover(buffer, b, indexB)
return(buffer)
}
}
}

Improving run time for R with nested for loops

My reproducible R example:
f = runif(1500,10,50)
p = matrix(0, nrow=1250, ncol=250)
count = rep(0, 1250)
for(i in 1:1250) {
ref=f[i]
for(j in 1:250) {
p[i,j] = f[i + j - 1] / ref-1
if(p[i,j] == "NaN") {
count[i] = count[i]
}
else if(p[i,j] > (0.026)) {
count[i] = (count[i] + 1)
ref = f[i + j - 1]
}
}
}
To be more precise, I have a set of 600 f-series and this code runs 200 times for each f-series. Currently I am doing the iterations in loops and most of the operations are element-wise. My random variables are f, the condition if(p[i,j] > (0.026)), and the number 0.026 in itself.
One can drastically reduce the run-time by vectorizing my code and using functions, specifically the apply family, but I am rusty with apply and looking for some advice to proceed in the right direction.
It is quite easy to put for loops in Rcpp. I just copy-pasted your code to Rcpp and haven't checked the validity. In case of discrepancy, let me know. fCpp returns the list of p and c.
cppFunction('List fCpp(NumericVector f) {
const int n=1250;
const int k=250;
NumericMatrix p(n, k);
NumericVector c(n);
for(int i = 0; i < n; i++) {
double ref=f[i];
for(int j = 0; j < k; j++) {
p(i,j) = f[i+j+1]/ref-1;
if(p(i,j) == NAN){
c[i]=c[i];
}
else if(p(i,j) > 0.026){
c[i] = c[i]+1;
ref = f[i+j+1];
}
}
}
return List::create(p, c);
}')
Benchmark
set.seed(1)
f = runif(1500,10,50)
f1 <- function(f){
p = matrix(0, nrow=1250, ncol=250)
count = rep(0, 1250)
for(i in 1:1250) {
ref=f[i]
for(j in 1:250) {
p[i,j] = f[i + j - 1] / ref-1
if(p[i,j] == "NaN") {
count[i] = count[i]
}
else if(p[i,j] > (0.026)) {
count[i] = (count[i] + 1)
ref = f[i + j - 1]
}
}
}
list(p, count)
}
microbenchmark::microbenchmark(fCpp(f), f1(f), times=10L, unit="relative")
Unit: relative
expr min lq mean median uq max neval
fCpp(f) 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 10
f1(f) 785.8484 753.7044 734.4243 764.5883 718.0868 644.9022 10
Values returned by fCpp(f) and f1(f) are essentially identical, apart from column 1 of p matrix returned by f1 is filled with 0s.
system.time(a <- f1(f))[3]
#elapsed
# 2.8
system.time(a1 <- fCpp(f))[3]
#elapsed
# 0
all.equal( a[[1]], a1[[1]])
#[1] "Mean relative difference: 0.7019406"
all.equal( a[[2]], a1[[2]])
#[1] TRUE
Here is an implementation using while, although it is taking much longer than nested for loops which is a bit counter intuitive.
f1 <- function() {
n <- 1500
d <- 250
f = runif(n,1,5)
f = embed(f, d)
f = f[-(n-d+1),]
count = rep(0, n-d)
for(i in 1:(n-d)) {
tem <- f[i,]/f[i,1] - 1
ti <- which(t[-d] > 0.026)[1]
while(ti < d & !is.na(ti)) {
ti.plus = ti+1
tem[ti.plus:d] = f[i, ti.plus:d] / tem[ti]
count[i] = count[i] + 1
ti <- ti + which(tem[ti.plus:d-1] > 0.026)[1]
}
f[i] = tem
}
list(f, count)
}
system.time(f1())
#elapsed
#6.365
#ajmartin, your logic was better and reduced the number of iterations I was attempting. Here is the improved version of your code in R:
f1 <- function() {
n <- 1500
d <- 250
f = runif(n,1,5)
count = rep(0, n-d)
for(i in 1:(n-d)) {
tem <- f[i:(i+d-1)] / f[i] - 1
ind = which(tem>0.026)[1]
while(length(which(tem>0.026))){
count[i] = count[i] + 1
tem[ind:d] = f[ind:d] / tem[ind] - 1
ind = ind - 1 + (which(tem[ind:d] > 0.026)[1])
}
}
list(f, count)
}
system.time(f1())[3]
# elapsed
# 0.09
Implementing this in Rcpp will further reduce system-time but I can't install Rtools as my current machine does not have admin rights. Meanwhile this helps.

Resources