Number of comparisons in mergesort in R - r

i am writing a merge sort in R. I have 2 functions. The first is mergelist()
mergelist <- function(a, b)
{
al <- length(a$data)
bl <- length(b$data)
r <- numeric(al + bl)
numberOfComparisions <-0
ai <- 1
bi <- 1
j <- 1
while((ai <= al) && (bi <= bl))
{
if(a$data[ai]<b$data[bi])
{
r[j] <- a$data[ai]
ai <- ai + 1
numberOfComparisions = numberOfComparisions + 1
}
else
{
r[j] <- b$data[bi]
bi <- bi + 1
numberOfComparisions = numberOfComparisions + 1
}
j <- j + 1
}
if(ai<=al)
r[j:(al+bl)] <- a$data[ai:al]
else if(bi <= bl)
r[j:(al+bl)] <- b$data[bi:bl]
returnList <- list(number=numberOfComparisions + a$number + b$number , data = r)
return(returnList)
}
This methods takes in 2 sorted lists as parameter a and b and returns a sorted list with the number attribute of the list telling me how many comparisions there have been made.
I also have this method mergesort()
mergesort <- function(x)
{
l <- length(x)
if(l > 1) {
p <- ceiling(1/2)
a <- mergesort(x[1:p])
b <- mergesort(x[(p+1):l])
return(mergelist(a,b))
}
lister <- list(number=0, data = x)
return(lister)
}
Which takes in a vector x and sorts it. It is returning a list just like mergelist() with the number attribute being the same as in mergelist.
Now my problem is that i have following example: mergesort(c(11,10,9,15,6,12,17,8,19,7)) ,
Which should return
$number [1] 22
$data [1] 6 7 8 9 10 11 12 15 17 19
But it returns
$number
[1] 30
$data
[1] 6 7 8 9 10 11 12 15 17 19
So this means it counts a comparision where it shouldn´t. I don´t know where. Can somebody tell me what i am doing wrong?

Shouldn't your code
p <- ceiling(**1**/2)
be like this
p <- ceiling(**l**/2)

Related

Using for-loop to fill vector in R

I'm simplifying this question, for clarity.
Say I want to create a vector with a number of elements with the value zero. I then want to use a for-loop to replace every element with its own index number inside the vector.
Can this be done?
K <- 11
p <- rep(0, K + 1)
for (k in 0:K) {
p[k+1] <- .... ?
}
Here's one solution if I understand your expected output:
K <- 11
p <- rep(0, K + 1)
for (k in 0:K) {
p[k + 1] <- k
}
Which returns:
[1] 0 1 2 3 4 5 6 7 8 9 10 11

Error in if (a[i][j] > 4) { : missing value where TRUE/FALSE needed

Find the number of entries in each row which are greater than 4.
set.seed(75)
aMat <- matrix( sample(10, size=60, replace=T), nr=6)
rowmax=function(a)
{
x=nrow(a)
y=ncol(a)
i=1
j=1
z=0
while (i<=x) {
for(j in 1:y) {
if(!is.na(a[i][j])){
if(a[i][j]>4){
z=z+1
}
}
j=j+1
}
print(z)
i=i+1
}
}
rowmax(aMat)
It is showing the error. I don't want to apply in built function
You could do this easier counting the x that are greater than 4 using length.
rowmax2 <- function(x) apply(x, 1, function(x) {x <- na.omit(x);length(x[x > 4])})
rowmax2(aMat)
# [1] 8 7 8 7 4 3
If you wanted to do this absolutely without any shortcut you could use two for loops. 1 for each row and another for each value in the row.
rowmax = function(a) {
y=nrow(a)
result <- numeric(y)
for(j in seq_len(y)) {
count = 0
for(val in a[j, ]) {
if(!is.na(val) && val > 4)
count = count + 1
}
result[j] <- count
}
return(result)
}
rowmax(aMat)
#[1] 8 7 8 7 4 3
If you wanted to do this using in-built functions in base R you could use rowSums.
rowSums(aMat > 4, na.rm = TRUE)
#[1] 8 7 8 7 4 3
There are several errors in you code:
You should put z <- 0 inside while loop
You should use a[i,j] for the matrix indexing, rather than a[i][j]
Below is a version after fixing the problems
rowmax <- function(a) {
x <- nrow(a)
y <- ncol(a)
i <- 1
j <- 1
while (i <= x) {
z <- 0
for (j in 1:y) {
if (!is.na(a[i, j])) {
if (a[i, j] > 4) {
z <- z + 1
}
}
j <- j + 1
}
print(z)
i <- i + 1
}
}
and then we get
> rowmax(aMat)
[1] 8
[1] 7
[1] 8
[1] 7
[1] 4
[1] 3
A concise approach to make it is using rowSums, e.g.,
rowSums(aMat, na.rm = TRUE)

I am trying to create a Collatz sequence with while loop in R. What am i doing wrong in this while loop here?

I am trying to create a Collatz sequence with while loop in R.
vector <- NULL
n <- 10
while (n != 1) {
if (n %% 2 == 0) {
n <- n / 2
} else {
n <- (n * 3) + 1
}
vector <- c(vector, cbind(n))
print(vector)
}
After running the code, I got:
[1] 5
[1] 5 16
[1] 5 16 8
[1] 5 16 8 4
[1] 5 16 8 4 2
[1] 5 16 8 4 2 1
How do I do it such that it only shows the last row? What went wrong in my code?
Thanks for any help on this.
You have several ways to deal with print(vector)
add if condition before print(vector), i.e.,
vector <- NULL
n <- 10
while (n != 1) {
if (n %% 2 == 0) {
n <- n / 2
} else {
n <- (n * 3) + 1
}
vector <- c(vector, cbind(n))
if (n==1) print(vector)
}
move it outside while loop, i.e.,
vector <- NULL
n <- 10
while (n != 1) {
if (n %% 2 == 0) {
n <- n / 2
} else {
n <- (n * 3) + 1
}
vector <- c(vector, cbind(n))
}
print(vector)

Merge-sorting in R using lists instead of vectors

So i've written this basic code that sorts a list using the well-known merge-sorting algorithm, i've defined two functions mergelists that compares and merges the elements and mergesort that divides the list into single elements:
mergelists <- function(a,b) {
al <- length(a)
bl <- length(b)
r <- numeric(al+bl)
ai <- 1
bi <- 1
j <- 1
while((ai<=al) && (bi<=bl)) {
if(a[ai]<b[bi]) {
r[j] <- a[ai]
ai <- ai+1
} else {
r[j] <- b[bi]
bi <- bi+1
}
j <- j+1
}
if(ai<=al) r[j:(al+bl)] <- a[ai:al]
else if(bi<=bl) r[j:(al+bl)] <- b[bi:bl]
return(r)
}
mergesort <- function(x) {
l <- length(x)
if(l>1) {
p <- ceiling(l/2)
a <- mergesort(x[1:p])
b <- mergesort(x[(p+1):l])
return(mergelists(a,b))
}
return(x)
}
this seems to work fine for the examples i used so far, for example:
> mergesort(c(11,10,9,15,6,12,17,8,19,7))
[1] 6 7 8 9 10 11 12 15 17 19
now for the sake of some research i'm doing, i want to change this code to work with R-lists and not vectors, the lists are usually defined as following:
> list(number=10,data=c(10,5,8,2))
$number
[1] 10
$data
[1] 10 5 8 2
data represents here the vector and number is the number of comparaisons.
After the change i imagine that the program should give me something like this:
>mergelists(list(number=8,data=c(1,3,5,8,9,10)),list(number=5,data=c(2,4,6,7)))
$number
[1] 20
$data
[1] 1 2 3 4 5 6 7 8 9 10
> mergesort(c(11,10,9,15,6,12,17,8,19,7))
$number
[1] 22
$data
[1] 6 7 8 9 10 11 12 15 17 19
the 20 here is basically 8 + 5 + 7, because 7 comparaisons would be necessary to merge the two sorted lists, but i don't know how to do this because i have a little experience with R-lists. i would appreciate your help. Thanks.
The starting point for any vector vec is list(number = 0, data = vec), where number is 0 because it as taken 0 comparisons to start with an unsorted vector.
You first need to modify mergelists to deal with two lists, simply by adding the indexing and then reforming the list at the end.
mergelists <- function(a,b) {
firstn <- a$number + b$number
a <- a$data
b <- b$data
al <- length(a)
bl <- length(b)
r <- numeric(al+bl)
ai <- 1
bi <- 1
j <- 1
while((ai<=al) && (bi<=bl)) {
if(a[ai]<b[bi]) {
r[j] <- a[ai]
ai <- ai+1
} else {
r[j] <- b[bi]
bi <- bi+1
}
j <- j+1
}
if(ai<=al) r[j:(al+bl)] <- a[ai:al]
else if(bi<=bl) r[j:(al+bl)] <- b[bi:bl]
return(list(number = firstn + j - 1L, data = r))
}
mergelists(list(number=8,data=c(1,3,5,8,9,10)), list(number=5,data=c(2,4,6,7)))
# $number
# [1] 20
# $data
# [1] 1 2 3 4 5 6 7 8 9 10
Now that you have the "base function" defined, you need the calling function to generate the enhanced vector (list) and pass it accordingly. This function can easily be improved for efficiency, but I think its recursive properties are sound.
mergesort <- function(x) {
# this first guarantees that if called with a vector, it is list-ified,
# but if called with a list (i.e., every other time in the recursion),
# the argument is untouched
if (! is.list(x)) x <- list(number = 0, data = x)
l <- length(x$data)
if (l > 1) {
p <- ceiling(l/2)
# the `within(...)` trick is a sneaky trick, can easily be
# handled with pre-assignment/subsetting
a <- mergesort(within(x, { data <- data[1:p]; }))
b <- mergesort(within(x, { data <- data[(p+1):l]; }))
return(mergelists(a,b))
}
return(x)
}
mergesort(c(11,10,9,15,6,12,17,8,19,7))
# $number
# [1] 22
# $data
# [1] 6 7 8 9 10 11 12 15 17 19

multiple next statements in function

I'm trying to run a function much more complex but equal to:
A <<- 5
B <<- 5
table <- data.frame(A,B)
backup <- table
test <- function(A,B){
while(A > 0){
r <- runif(1,0,1)
if ((r >= 0)&(r <= 0.5)){
A <- A + 1
B <- B - 1
} else if ((r > 0.5)&(r<=1)){
A <- A - 1
B <- B + 1
}
tab <- cbind(A,B)
table <<- rbind(table,tab)
if (nrow(table) == 10) {
break
} else if ((A > 0) & (nrow(table) != 10)) {
next
} else {if ((A == 0) & (nrow(table != 10)) { #pointing to error here?
A <- 5
B <- 5
table <- backup
next
}
}
}
So what I want this function to do is stop when the when number of rows of the table (= the number of times the function ran) is equal to a certain value, in this case 10. But A cannot take a value below 0. If A reaches 0 before the number of rows of the table is 10 the whole process has to start again, with the same inputvalues as before.
However, this function does not work. I think it's because I use multiple next statements, is that correct?
Thanks!
I think you're on the right track... just a few recommendations
I think this accomplishes what you're trying to achieve a little cleaner. I second Carl's suggestion of avoiding the global operator '<<-' when possible and passing the objects through to the function as arguments or parameters. I also second Justin's suggestion of avoiding the break command in favor of placing the return() call smartly. To accomplish this, I put your 10 (stopping criteria) directly into the while loop. I included it as a parameter n so that you can experiment with different stopping criteria. Hope this helps :)
test <- function(A,B,n){
A0 <- A
B0 <- B
tab <- data.frame(A,B)
while(A > 0 & nrow(tab) < n){
r <- runif(1,0,1)
if (r <= 0.5){
A <- A + 1
B <- B - 1
} else {
A <- A - 1
B <- B + 1
}
tab1 <- cbind(A,B)
tab <- rbind(tab,tab1)
if(nrow(tab)<n & A==0){
A<-5
B<-5
tab <- cbind(A0,B0)
names(tab) <- c('A', 'B')
print(paste('found A==0 in less than ', n, ' iterations', sep=''))
}
}
return(tab)
}
Testing function...
## running function
A <- 5
B <- 5
testDF <- test(A,B,10)
> testDF
A B
1 5 5
2 6 4
3 5 5
4 6 4
5 7 3
6 8 2
7 9 1
8 10 0
9 9 1
10 10 0

Resources