R median function from scratch - r

I am a R beginner and I tried to make a median function from scratch.
Here is my code:
mymedian <- function(x) {
len <- length(x)
sorted <- sort(x)
if (len %% 2 == 0) {
med1 <- sorted[len / 2]
med2 <- sorted[(len + 1) %/% 2]
result <- sorted[med1 + med2 / 2]
return(result)
} else {
result <- sorted[(len + 1)/2]
return(result)
}
}
Im getting "NA" output. I couldn't find where the problem is.

Main issue is you're trying to index your sorted vector with a non-integer (e.g., 168.5). Compare your function to this:
mymedian <- function(x){
len <- length(x)
sorted <-sort(x)
if(len%%2==0){
i <- len/2
med1<-sorted[i]
med2 <- sorted[i+1]
result <- sum(med1,med2)/2
return(result)
}else{
result<-sorted[(len+1)/2]
return(result)
}
}

Related

avoid R loop and parallelize with snow

I have a large loop that will take too long (~100 days). I'm hoping to speed it up with the snow library, but I'm not great with apply statements. This is only part of the loop, but if I can figure this part out, the rest should be straightforward. I'm ok with a bunch of apply statements or loops, but one apply statement using a function to get object 'p' would be ideal.
Original data
dim(m1) == x x # x >>> 0
dim(m2) == y x # y >>> 0, y > x, y > x-10
dim(mout) == x x
thresh == x-10 #specific to my data, actual number probably unimportant
len(v1) == y #each element is a random integer, min==1, max==thresh
len(v2) == y #each element is a random integer, min==1, max==thresh
Original loop
p <- rep(NA,y)
for (k in 1:y){
mout <- m1 * matrix(m2[k,],x,x)
mout <- mout/sum(mout)
if (v1[k] < thresh + 1){
if(v2[k] < thresh + 1){
p[k] <- out[v1[k],v2[k]]
}
if(v2[k] > thresh){
p[k] <- sum(mout[v1[k],(thresh+1):x])
}
}
#do stuff with object 'p'
}
library(snow)
dostuff <- function(k){
#contents of for-loop
mout <- m1 * matrix(m2[k,],x,x)
mout <- mout/sum(mout)
if (v1[k] < thresh + 1){
if(v2[k] < thresh + 1){
p <- out[v1[k],v2[k]]
}
if(v2[k] > thresh){
p <- sum(mout[v1[k],(thresh+1):x])
}
}
#etc etc
return(list(p,
other_vars))
}
exports = c('m1',
'm2',
'thresh',
'v1',
'x' ,
'v2')
cl = makeSOCKcluster(4)
clusterExport(cl,exports)
loop <- as.array(1:y)
out <- parApply(cl,loop,1,dostuff)
p <- rep(NA,y)
for(k in 1:y){
p[k] <- out[[k]][[1]]
other_vars[k] <- out[[k]][[2]]
}

Error in seq.default(a, length = max(0, b - a - 1)) : length must be non-negative number

I tried running the code below.
set.seed(307)
y<- rnorm(200)
h2=0.3773427
t=seq(-3.317670, 2.963407, length.out=500)
fit=density(y, bw=h2, n=1024, kernel="epanechnikov")
integrate.xy(fit$x, fit$y, min(fit$x), t[407])
However, i recived the following message:
"Error in seq.default(a, length = max(0, b - a - 1)) :
length must be non-negative number"
I am not sure what's wrong.
I do not encounter any problem when i use t[406] or t[408] as follow:
integrate.xy(fit$x, fit$y, min(fit$x), t[406])
integrate.xy(fit$x, fit$y, min(fit$x), t[408])
Does anyone know what's the problem and how to fix it? Appreciate your help please. Thanks!
I went through the source code for the integrate.xy function, and there seems to be a bug relating to the usage of the xtol argument.
For reference, here is the source code of integrate.xy function:
function (x, fx, a, b, use.spline = TRUE, xtol = 2e-08)
{
dig <- round(-log10(xtol))
f.match <- function(x, table) match(signif(x, dig), signif(table,
dig))
if (is.list(x)) {
fx <- x$y
x <- x$x
if (length(x) == 0)
stop("list 'x' has no valid $x component")
}
if ((n <- length(x)) != length(fx))
stop("'fx' must have same length as 'x'")
if (is.unsorted(x)) {
i <- sort.list(x)
x <- x[i]
fx <- fx[i]
}
if (any(i <- duplicated(x))) {
n <- length(x <- x[!i])
fx <- fx[!i]
}
if (any(diff(x) == 0))
stop("bug in 'duplicated()' killed me: have still multiple x[]!")
if (missing(a))
a <- x[1]
else if (any(a < x[1]))
stop("'a' must NOT be smaller than min(x)")
if (missing(b))
b <- x[n]
else if (any(b > x[n]))
stop("'b' must NOT be larger than max(x)")
if (length(a) != 1 && length(b) != 1 && length(a) != length(b))
stop("'a' and 'b' must have length 1 or same length !")
else {
k <- max(length(a), length(b))
if (any(b < a))
stop("'b' must be elementwise >= 'a'")
}
if (use.spline) {
xy <- spline(x, fx, n = max(1024, 3 * n))
if (xy$x[length(xy$x)] < x[n]) {
if (TRUE)
cat("working around spline(.) BUG --- hmm, really?\n\n")
xy$x <- c(xy$x, x[n])
xy$y <- c(xy$y, fx[n])
}
x <- xy$x
fx <- xy$y
n <- length(x)
}
ab <- unique(c(a, b))
xtol <- xtol * max(b - a)
BB <- abs(outer(x, ab, "-")) < xtol
if (any(j <- 0 == apply(BB, 2, sum))) {
y <- approx(x, fx, xout = ab[j])$y
x <- c(ab[j], x)
i <- sort.list(x)
x <- x[i]
fx <- c(y, fx)[i]
n <- length(x)
}
ai <- rep(f.match(a, x), length = k)
bi <- rep(f.match(b, x), length = k)
dfx <- fx[-c(1, n)] * diff(x, lag = 2)
r <- numeric(k)
for (i in 1:k) {
a <- ai[i]
b <- bi[i]
r[i] <- (x[a + 1] - x[a]) * fx[a] + (x[b] - x[b - 1]) *
fx[b] + sum(dfx[seq(a, length = max(0, b - a - 1))])
}
r/2
}
The value given to the xtol argument, is being overwritten in the line xtol <- xtol * max(b - a). But the value of the dig variable is calculated based on the original value of xtol, as given in the input to the function. Because of this mismatch, f.match function, in the line bi <- rep(f.match(b, x), length = k), returns no matches between x and b (i.e., NA). This results in the error that you have encountered.
A simple fix, at least for the case in question, would be to remove the xtol <- xtol * max(b - a) line. But, you should file a bug report with the maintainer of this package, for a more rigorous fix.

switch statement help in R

I've got the following code in R:
func.time <- function(n){
times <- c()
for(i in 1:n){
r <- 1 #x is the room the mouse is in
X <- 0 #time, starting at 0
while(r != 5){
if(r == 1){
r <- sample(c(2,3),1) }
else if(r == 2){
r <- sample(c(1,3), 1) }
else if(r == 3){
r <- sample(c(1,2,4,5), 1) }
else if (r == 4){
r <- sample(c(3,5), 1) }
X <- X + 1
}
times <- c(X, times)
}
mean(times)
}
func.time(10000)
It works fine, but I've been told that using switch() can speed it up seeing as I've got so many if else statements but I can't seem to get it to work, any help is appreciated in advance.
Edit
I've tried this:
func.time <- function(n) {
times <- c()
for(i in 1:n) {
r <- 1 #x is the room the mouse is in
X <- 0 #time, starting at 0
while(r != 5) {
switch(r, "1" = sample(c(2,3), 1),
"2" = sample(c(1,3), 1),
"3" = sample(c(1,2,4,5), 1),
"4" = sample(c(3,5)))
X <- X + 1
}
times <- c(X, times)
}
mean(times)
}
func.time(10000)
But it was a basic attempt, I'm not sure I've understood the switch() method properly.
I though Dominic's assessment was very useful but when I went to examine the edit it was being held up on what I thought was an incorrect basis. So I decided to just fix the code. When usign a numeric argument to the EXPR parameter you do not use the item=value formalism but rather just put in the expressions:
func.time <- function(n){times <- c()
for(i in 1:n){; r <- 1; X <- 0
while(r != 5){
r <- switch(r,
sample(c(2,3), 1) , # r=1
sample(c(1,3), 1) , # r=2
sample(c(1,2,4,5), 1), #r=3
sample(c(3,5), 1) ) # r=4
X <- X + 1 }
times <- c(X, times) }
mean(times) }
func.time(1000)
#[1] 7.999
For another example of how to use switch with a numeric argument to EXPR, consider my answer to this question: R switch statement with varying outputs throwing error

looping through a matrix with a function

I'd like to perform this function on a matrix 100 times. How can I do this?
v = 1
m <- matrix(0,10,10)
rad <- function(x) {
idx <- sample(length(x), size=1)
flip = sample(0:1,1,rep=T)
if(flip == 1) {
x[idx] <- x[idx] + v
} else if(flip == 0) {
x[idx] <- x[idx] - v
return(x)
}
}
This is what I have so far but doesn't work.
for (i in 1:100) {
rad(m)
}
I also tried this, which seemed to work, but gave me an output of like 5226 rows for some reason. The output should just be a 10X10 matrix with changed values depending on the conditions of the function.
reps <- unlist(lapply(seq_len(100), function(x) rad(m)))
Ok I think I got it.
The return statement in your function is only inside a branch of an if statement, so it returns a matrix with a probability of ~50% while in the other cases it does not return anything; you should change the code function into this:
rad <- function(x) {
idx <- sample(length(x), size=1)
flip = sample(0:1,1,rep=T)
if(flip == 1) {
x[idx] <- x[idx] + v
} else if(flip == 0) {
x[idx] <- x[idx] - v
}
return(x)
}
Then you can do:
for (i in 1:n) {
m <- rad(m)
}
Note that this is semantically equal to:
for (i in 1:n) {
tmp <- rad(m) # return a modified verion of m (m is not changed yet)
# and put it into tmp
m <- tmp # set m equal to tmp, then in the next iteration we will
# start from a modified m
}
When you run rad(m) is not do changes on m.
Why?
It do a local copy of m matrix and work on it in the function. When function end it disappear.
Then you need to save what function return.
As #digEmAll write the right code is:
for (i in 1:100) {
m <- rad(m)
}
You don't need a loop here. The whole operation can be vectorized.
v <- 1
m <- matrix(0,10,10)
n <- 100 # number of random replacements
idx <- sample(length(m), n, replace = TRUE) # indices
flip <- sample(c(-1, 1), n, replace = TRUE) # subtract or add
newVal <- aggregate(v * flip ~ idx, FUN = sum) # calculate new values for indices
m[newVal[[1]]] <- m[newVal[[1]]] + newVal[[2]] # add new values

Summing specific data from column in r

Im trying to creat a function in r that will let me sum data from the last three rows into a new column (i.e. if im looking at day 4 = days 3+2+1)
This is what i've worked out so far, however it doesnt work.
S3<- function(x){
res <- numeric(nrow(x))
for (i in 1:nrow(x)){
res[i] <- i
if (i > 3) {
res[i] <- x[i-3,10]
} else {
res[i] <- x[i,10]
}
}
x$PP3 <- res
return(x)
}
Use this:
x$PP3 <- x[,10] + c(0,head(x[,10],-1)) + c(0,0,head(x[,10],-2))
If you want a function:
S3 <- function(x, j=10){
within(x, PP3 <- x[,j] + c(0,head(x[,j],-1)) + c(0,0,head(x[,j],-2)))
}

Resources