Creating Subset of Vector Adds Null Values - r

I'm trying to create a mergeSort algorithm in R. While I think I have the method down, the first time I create the vector e, I end up with a vector of length 4 instead of 2. This causes the error below:
"Error in if (a[2] < a[1]) { : missing value where TRUE/FALSE needed"
For some reason, R is adding an extra 2 elements to what should be a two-element vector. If anyone has an explanation for this, that would be wonderful. (Please limit advice to solving this error. I want to figure out how to do the rest on my own.)
Code:
addLeftOver <- function(buffer, array, index) {
j <- length(buffer)
for(i in array[index:length(array)]) {
buffer[j] = i
print(i)
j <- j+1
}
return(buffer)
}
mergeSort <- function(a) {
len <- length(a)
print(a)
print(len)
browser()
if(len<=2) { #base case
if(len==1) {
return(a)
}
if(a[2]<a[1]) {
return(c(a[2],a[1]))
}
return(a)
}
print(len/2)
b <- mergeSort(a[1:(len/2)]) #recursion
e <- mergeSort(a[(len/2)+1:len]) #recursion
indexB <- 1
indexE <- 1
buffer
for(i in 1:len) {
if(e[indexC]<b[indexB]) {
buffer[index] <- e[indexE]
index <- index + 1
indexE <- indexE + 1
} else {
buffer[index] <- b[indexB]
index <- index + 1
indexB <- indexB + 1
}
if(indexB==len/2) {
buffer = addLeftover(buffer, e, indexE)
return(buffer)
}
if(indexE==len/2) {
buffer = addLeftover(buffer, b, indexB)
return(buffer)
}
}
}
sumArray <- c(6,4,2,7,8,1,3,5)
print(mergeSort(sumArray))
The addLeftOver() function's purpose is to copy the remnants of one of the two vectors (b or e) to the buffer when the end of the other has been reached.

Main error can be fixed with parenthesis. e <- mergeSort(a[(len/2)+1:len]) should be e <- mergeSort(a[((len/2)+1:len])
That and some other bugs were fixed.
Working code:
addLeftOver <- function(buffer, array, index)
{
j <- length(buffer)+1
for(i in array[index:length(array)]) {
buffer[j] = i
j <- j+1
}
return(buffer)
}
mergeSort <- function(a)
{
len <- length(a)
if(len<=2)
{
if(len==1)
{
return(a)
}
if(a[2]<a[1])
{
return(c(a[2],a[1]))
}
return(a)
}
half <- as.integer(len/2)
b <- mergeSort(a[1:(half)])
e <- mergeSort(a[((half)+1):len])
index <- 1
indexB <- 1
indexE <- 1
buffer <- c(0)
for(i in 1:len)
{
if(e[indexE]<b[indexB])
{
buffer[index] <- e[indexE]
index <- index + 1
indexE <- indexE + 1
}
else
{
buffer[index] <- b[indexB]
index <- index + 1
indexB <- indexB + 1
}
if(indexB==(length(b))+1)
{
buffer = addLeftover(buffer, e, indexE)
return(buffer)
}
if(indexE==(length(e))+1)
{
buffer = addLeftover(buffer, b, indexB)
return(buffer)
}
}
}

Related

Fastest Implementation of Dijkstra Algorithm in R

I have a below R code. The code works fine in computing shortest path between minimal input. If my input is big the code throws error in my coding environment.
Is there a way to fine tune the below code to achieve faster implementation. Suggestions/Corrections are highly appreciable.
Note : I am trying to implement without external packages
input <- suppressWarnings(readLines(stdin(), n=7))
5 6
1 2 30
1 3 10
5 2 40
3 5 20
5 1 30
5 4 20
l1 <- unlist(strsplit(input[1]," "))
cities <- as.numeric(l1[1])
roads <- as.numeric(l1[2])
L <- matrix(100000000,cities,roads)
#L <- format(L,scientific = FALSE)
distance <- function(inp){
f <- unlist(strsplit(input[inp+1]," "))
m <- as.numeric(f[1])
n <- as.numeric(f[2])
L[m,n] <<- as.numeric(f[3])
}
invisible(mapply(distance,seq_along(1:roads )))
if(cities > dim(L)[2])
{
cat("NOT POSSIBLE")
} else {
n=length(L[,1])
v=1
dest=n
cost=L
dijkstra = function(n, v, cost, dest) {
dest = numeric(n)
flag = numeric(n)
prev = numeric(n)
dstv <- function(i){
prev[i] <<- -1
dest[i] <<- cost[v, i]
}
invisible(mapply(dstv,seq_along(1:n)))
count = 2
while (count <= n) {
min = 100000000
destw <- function(w) {
if (dest[w] < min && !flag[w]) {
min <<- dest[w]
u <<- w
}
}
invisible(mapply(destw,seq_along(1:n)))
flag[u] = 1
count = count + 1
destu <- function(w) {
if ((dest[u] + cost[u, w] < dest[w]) && !flag[w]) {
dest[w] <<- dest[u] + cost[u, w]
prev[w] <<- u
}
}
invisible(mapply(destu,seq_along(1:n)))
}
return(prev)
}
savepath = function(f, x) {
path = x
while (f[x] != -1) {
path = c(path, f[x])
x = f[x]
savepath(f, x)
}
path = c(path, 1)
return(path)
}
prev = dijkstra(n,v,cost,dest)
path = sort(savepath(prev,dest))
}

Why does a piece of code that works on its own not work in parallel in R?

I wrote a piece of code (appended below) that works fine when I run it in serial, but when I use the foreach and doparallel libraries in R, I get an error code that reads: " task 1 failed - "missing value where TRUE/FALSE needed"
Everything inside the for each loop works on its own, and on a smaller batch, I can run it serially and it works.
ListOfColumns <- colnames(tempdata)
foreach(i = 1:nSubsets,
.export = ls(globalenv())) %dopar% {
DoubleTempData <- get(paste0("Subset", i))
DoubleTempData <- subset(DoubleTempData, select = -c(subset))
RowCounter <- 2
ColumnFigurer <- 2
LastCATEGORYIndicator <- "THERE IS NO CATEGORY, ONLY ZUUL"
while (RowCounter <= nrow(DoubleTempData)) {
print(paste("Checking row ", RowCounter))
RowChecker <- max(1, RowCounter - 5)
while (RowChecker < RowCounter) {
print(paste("Checking row",
RowCounter,
"against row",
RowChecker))
if (DoubleTempData$CATEGORY[RowChecker] == DoubleTempData$CATEGORY[RowCounter])
{
print("The rows match!")
while (ColumnFigurer > 0) {
if (DoubleTempData$CATEGORY[RowCounter] != LastCATEGORYIndicator) {
ColumnFigurer <- 2
}
print(paste ("Checking Iteration", ColumnFigurer))
if (ColumnFigurer * length(ListOfColumns) <= length(colnames(DoubleTempData)))
{
print(paste("Iteration", ColumnFigurer, " exists"))
CellChecker <-
((ColumnFigurer - 1) * length(ListOfColumns) + 1)
if (is.na(DoubleTempData[[RowChecker, CellChecker]])) {
print(paste("Current value is NA. Writing in new value."))
ColumnCounter <- 1
while (ColumnCounter <= length(ListOfColumns)) {
DoubleTempData[[RowChecker, (ColumnFigurer - 1) * length(ListOfColumns) +
ColumnCounter]] <-
DoubleTempData[[RowCounter, ColumnCounter]]
ColumnCounter <- ColumnCounter + 1
}
DoubleTempData <- DoubleTempData[-RowCounter]
LastCATEGORYIndicator <-
DoubleTempData$CATEGORY[RowChecker]
RowCounter <- max(2, RowCounter - 1)
ColumnFigurer <- ColumnFigurer + 1
break
}
else
{
print(paste(
"Current value is not NA, increasing iteration count."
))
ColumnFigurer <- ColumnFigurer + 1
}
}
if (ColumnFigurer * length(ListOfColumns) > length(colnames(DoubleTempData)))
{
print(paste(
"Iteration ",
ColumnFigurer,
"does not exist, adding iteration."
))
ColumnAdder <- 1
while (ColumnAdder <= length(ListOfColumns)) {
NewColumnName <-
paste0(ListOfColumns[ColumnAdder], "_", ColumnFigurer)
DoubleTempData[, NewColumnName] <- NA
ColumnAdder <- ColumnAdder + 1
}
}
}
}
RowChecker <- RowChecker + 1
}
RowCounter <- RowCounter + 1
}
assign(paste0("Subset", i), DoubleTempData)
}
For example, here is a sample of a randomly generated Subset1 that I ran, with about 70 observations and 7 columns (one of which gets dropped by the program as intended):
[![enter image description here][1]][1]
[1]: https://i.stack.imgur.com/Jlytj.png
It then outputs a dataset with 9 observations, and 60 columns:

Error running identical function when comparing two vectors

I have a vector (length 8) and change one element randomly in the vector each time the for loop run. I want to check my new generated vector with a fixed vector (Destination) and save the valid case in a counting variable (Time_to_Destination). However, when I use identical() function, there was no saved recorded in Time_to_Destination. Is there anything wrong with my code?
I tried identical(), all.equal()
If I commented out if(identical()), the variable Time_to_Destination was counted
rm(list = ls())
library(igraph)
#Plot the graph
graph_MC <- graph(edge = c(1,2,1,3,2,3,2,4,3,4), n = 4, directed = F)
plot(graph_MC)
#Initial position
X_0 <- c(1,2,3,1)
Destination <- c(3,1,2,3)
X <- X_0
Time_to_destination <- 0
#I would like to see how many time I arrive to the Destination, which have a valid set of color
for(i in 1:10000){
#Generate a random node
a <- as.integer(runif(1,1,4))
#Generate a random color of the node
q <- as.integer(runif(1,1,3))
test_vector <- X
test_vector[a] = q
#Check if the new set of color is valid
if (a == 1) {
if ((q != test_vector[2]) & (q != test_vector[3])) {
X = test_vector
if (identical(X, Destination)){
Time_to_destination = Time_to_destination + 1
}
}
}
if (a == 2) {
if ((q != test_vector[1]) & (q != test_vector[3]) & (q != test_vector[4])) {
X = test_vector
if (identical(X, Destination)){
Time_to_destination = Time_to_destination + 1
}
}
}
if (a == 3) {
if ((q != test_vector[1]) & (q != test_vector[2]) & (q != test_vector[4])) {
X = test_vector
if (identical(X, Destination)){
Time_to_destination = Time_to_destination + 1
}
}
}
if (a == 4) {
if ((q != test_vector[3]) & (q != test_vector[2])) {
X = test_vector
if (identical(X, Destination)){
Time_to_destination = Time_to_destination + 1
}
}
}
}
Time_to_destination

R - Error executing user-defined function

I am trying to build a function that takes 2 arguments and uses those 2 arguments inside a replicate funtion
SPM <- function(bilhetes, N){
total_bilhetes <- 12012000
total_bilhetes_premios <- 3526450
premios <- c(0,5,10,15,20,25,50,100,300,1000,27000,108000,288000)
premios_bilhetes <- c(8485550,1895000,496800*2,88800*3,55200*4,16800*4,7920*6,5030*6,950*5,950,30,10,10)
probs <- premios_bilhetes/total_bilhetes
vector_ganhos <- c()
ganho <- 0
replicate(N, function(bilhetes) {
total_bilhetes1 <- total_bilhetes
premios_bilhetes1 <- premios_bilhetes
probs1 <- probs
for (i in c(1:bilhetes)) {
A <- sample(x = premios,replace = T,size = 1, prob = probs1)
ganho <- ganho - 5 + A
if (A == 0) {
premios_bilhetes1[1] <- premios_bilhetes1[1] - 1
} else if (A == 5) {
premios_bilhetes1[2] <- premios_bilhetes1[2] - 1
} else if (A == 10) {
premios_bilhetes1[3] <- premios_bilhetes1[3] - 1
} else if (A == 15) {
premios_bilhetes1[4] <- premios_bilhetes1[4] - 1
} else if (A == 20) {
premios_bilhetes1[5] <- premios_bilhetes1[5] - 1
} else if (A == 25) {
premios_bilhetes1[6] <- premios_bilhetes1[6] - 1
} else if (A == 50) {
premios_bilhetes1[7] <- premios_bilhetes1[7] - 1
} else if (A == 100) {
premios_bilhetes1[8] <- premios_bilhetes1[8] - 1
} else if (A == 300) {
premios_bilhetes1[9] <- premios_bilhetes1[9] - 1
} else if (A == 1000) {
premios_bilhetes1[10] <- premios_bilhetes1[10] - 1
} else if (A == 27000) {
premios_bilhetes1[11] <- premios_bilhetes1[11] - 1
} else if (A == 108000) {
premios_bilhetes1[12] <- premios_bilhetes1[12] - 1
} else {
premios_bilhetes1[13] <- premios_bilhetes1[13] - 1
}
total_bilhetes1 <- total_bilhetes1 - 1
probs1 <- premios_bilhetes1/(total_bilhetes1)
}
vector_ganhos[length(vector_ganhos)+1] = ganho
})
return(vector_ganhos)
}
when I try to run it, e.g., SPM(bilhetes = 5, N = 100) I get:
Error in SPM(bilhetes = 5, N = 100) : could not find function "SPM"
I looked in another question and someone mentioned "sourcing" the function. I tried it, and this was the output:
> source("SPM")
Error in file(filename, "r", encoding = encoding) :
cannot open the connection
In addition: Warning message:
In file(filename, "r", encoding = encoding) :
I'm rather new to R, so I'm probably making a dumb mistake.
Can someone help?

Autoconversion from number to NULL

I am trying to generate a vector of random numbers based on a finite random variable X
With probGen function I generate a variable X, l1 is the first line and l2 is the second one.
And at this point if(sum1 >= U) I recive this error Error in if (sum1 >= U) { : argument is of length zero
This is my code:
probGen=function(n)
{
v=vector()
k=sample(1:n,1)
v=rep(0,k)
for(i in 1:n)
{
aux=sample(1:k,1)
v[aux]=v[aux]+1
}
vfinal=vector()
klen=0
for(i in 1:k)
{
if(v[i]!=0) klen=klen+1
}
for(i in 1:k)
{
if(v[i]!=0)
vfinal=c(vfinal,rep(1/(klen*v[i]),v[i]))
}
vfinal=sample(vfinal)
return (vfinal)
}
n=22
l1=c(1:n)
l2=probGen(n)
l1
l2
simVar=function(l1,l2)
{
variante=vector()
U=runif(1,0,1)
for(i in 1:length(l1))
{
sum1=1-1
for(j in 1:i-1)
{
if(i-1>=1)
{
sum1=sum1+l2[j]
}
}
sum2=0.0
for(j in 1:i)
{
sum2=sum2+l2[j]
}
if(sum1 >= U)
{
if(U<sum2)
{
variante=c(variante,l1[i])
}
}
}
return (variante)
}
varR=simVar(l1,l2)
varR
Any idea?
Thanks!
The for(j in 1:i-1) near the top of the code for simVar is evaluating as (1:i)-1, resulting in a zero j which produces a NA value of sum1. Use for(j in 1:(i-1)) instead.

Resources