R foreach loops on 3D array data - r

Some background: I need to work out the distance from some point to each cell in a 3D grid, then apply a function to this distance. I need to do this for multiple points, and add the functions values in each cell for all the points. I can do this using the following code for points located at (x,y,z):
x <- c(1,2,3,4,5)
y <- x
z <- x
radius <- c(0.4,0.5,0.6,0.7,0.8)
numsphere <- length(x)
radius_buffer <- 0.2
xvox <- seq((min(x)-1),(max(x)+2),0.5)
yvox <- xvox
zvox <- xvox
probability_array <<- array(0,dim=c(length(xvox),length(yvox),length(zvox)))
for (j in 1:length(yvox)){ # for every y element
for (i in 1:length(xvox)){ # for every x element
for (k in length(zvox):1){ # for every z element
for (n in 1:numsphere){ # for the total number of points
dist_sd <- ((xvox[i]-x[n])^2+(yvox[j]-y[n])^2+(zvox[k]-z[n])^2)^0.5
probability_array[i,j,k] <- probability_array[i,j,k] +
round(exp(-1*(dist_sd-radius[n])^2/(2*radius_buffer^2)),3)
}
}
}
}
The output is an array and the plotted result looks like this:
probability_array <- probability_array/max(probability_array)
contour3d(probability_array,level=c(0.2,0.8,0.9),x=xvox,y=yvox,z=zvox,color = c("aquamarine","gold","darkorange"),alpha = c(0.1,0.2,0.5),add=T)
I have tried to parallelise this because it seems ideal for it, but can't get it to work.
I've tried:
cl<-makeCluster(detectCores(),type="SOCK")
registerDoSNOW(cl)
for (j in 1:length(yvox)){
for (i in 1:length(xvox)){
for(k in length(zvox):1){
probability_array[i,j,k] <- foreach(n=1:numsphere, .combine='+') %dopar% {
dist_sd <- ((xvox[i]-x[n])^2+(yvox[j]-y[n])^2+(zvox[k]-z[n])^2)^0.5
round(exp(-1*(dist_sd-radius[n])^2/(2*radius_buffer^2)),3)
}
}
}
}
and things like:
r <- foreach(j=1:length(yvox)) %:% foreach(i=1:length(xvox)) %:% foreach(k=length(zvox):1) %:% foreach(n=1:numsphere, .combine='+') %do% {
dist_sd <- ((xvox[i]-x[n])^2+(yvox[j]-y[n])^2+(zvox[k]-z[n])^2)^0.5
probability_array[i,j,k] <- probability_array[i,j,k] + round(exp(-1*(dist_sd-radius[n])^2/(2*radius_buffer^2)),3)
probability_array[i,j,k]
}
But I'm missing something important. Any help would be greatly appreciated.
Cheers

When parallelizing computations, because of the overhead it introduces,
it is preferable to run large pieces of computations in parallel,
rather than small -- outer loops, rather than inner loops.
In this case, however, there is no need to parallelize the computations:
you can just vectorize them.
# 3-dimensional analogue of row() and col()
dim3 <- function( a, i ) {
stopifnot( length(dim(a)) == 3 )
r <- a
if( i == 1 ) { r[] <- rep(1:dim(a)[1], dim(a)[2] * dim(a)[3]) }
if( i == 2 ) { r[] <- rep(1:dim(a)[2], each = dim(a)[1], times = dim(a)[3]) }
if( i == 3 ) { r[] <- rep(1:dim(a)[3], each = dim(a)[1] * dim(a)[2]) }
r
}
probability_array <- array(0,dim=c(length(xvox),length(yvox),length(zvox)))
i <- dim3(probability_array,1)
j <- dim3(probability_array,2)
k <- dim3(probability_array,3)
for (n in 1:numsphere){
dist_sd <- sqrt(
(xvox[i]-x[n])^2 + (yvox[j]-y[n])^2 + (zvox[k]-z[n])^2
)
probability_array <- probability_array +
# Rounding intermediate results looks suspicious
round(exp(-1*(dist_sd-radius[n])^2/(2*radius_buffer^2)),3)
}

Related

How to find the smallest circumcircle of an irregular polygon on R project?

I was wondering about how to find the smallest circumcircle of an irregular polygon. I've worked with spatial polygons in R.
I want to reproduce some of the fragstats metrics in a vector mode because I had hard times with the package 'landscapemetrics' for a huge amount of data. In specific I would like to implement the circle (http://www.umass.edu/landeco/research/fragstats/documents/Metrics/Shape%20Metrics/Metrics/P11%20-%20CIRCLE.htm). So far, I could not find the formula or script for the smallest circumcircle.
All your comments are more than welcome.
Than you
As I mentioned in a comment, I don't know of existing R code for this, but a brute force search should be fast enough if you don't have too many points that need to be in the circle. I just wrote this one. The center() function is based on code from Wikipedia for drawing a circle around a triangle; circumcircle() is the function you want, found by brute force search through all circles that pass through 2 or 3 points in the set. On my laptop it takes about 4 seconds to handle 100 points. If you have somewhat bigger sets, you can probably get tolerable results by translating to C++, but it's an n^4 growth rate, so you'll need a better solution
for a really large set.
center <- function(D) {
if (NROW(D) == 0)
matrix(numeric(), ncol = 2)
else if (NROW(D) == 1)
D
else if (NROW(D) == 2) {
(D[1,] + D[2,])/2
} else if (NROW(D) == 3) {
B <- D[2,] - D[1,]
C <- D[3,] - D[1,]
Dprime <- 2*(B[1]*C[2] - B[2]*C[1])
if (Dprime == 0) {
drop <- which.max(c(sum((B-C)^2), sum(C^2), sum(B^2)))
center(D[-drop,])
} else
c((C[2]*sum(B^2) - B[2]*sum(C^2))/Dprime,
(B[1]*sum(C^2) - C[1]*sum(B^2))/Dprime) + D[1,]
} else
center(circumcircle(D))
}
radius <- function(D, U = center(D))
sqrt(sum((D[1,] - U)^2))
circumcircle <- function(P) {
n <- NROW(P)
if (n < 3)
return(P)
P <- P[sample(n),]
bestset <- NULL
bestrsq <- Inf
# Brute force search
for (i in 1:(n-1)) {
for (j in (i+1):n) {
D <- P[c(i,j),]
U <- center(D)
rsq <- sum((D[1,] - U)^2)
if (rsq >= bestrsq)
next
failed <- FALSE
for (k in (1:n)[-j][-i]) {
Pk <- P[k,,drop = FALSE]
if (sum((Pk - U)^2) > rsq) {
failed <- TRUE
break
}
}
if (!failed) {
bestset <- c(i,j)
bestrsq <- rsq
}
}
}
# Look for the best 3 point set
for (i in 1:(n-2)) {
for (j in (i+1):(n-1)) {
for (l in (j+1):n) {
D <- P[c(i,j,l),]
U <- center(D)
rsq <- sum((D[1,] - U)^2)
if (rsq >= bestrsq)
next
failed <- FALSE
for (k in (1:n)[-l][-j][-i]) {
Pk <- P[k,,drop = FALSE]
if (sum((Pk - U)^2) > rsq) {
failed <- TRUE
break
}
}
if (!failed) {
bestset <- c(i,j,l)
bestrsq <- rsq
}
}
}
}
P[bestset,]
}
showP <- function(P, ...) {
plot(P, asp = 1, type = "n", ...)
text(P, labels = seq_len(nrow(P)))
}
showD <- function(D) {
U <- center(D)
r <- radius(D, U)
theta <- seq(0, 2*pi, len = 100)
lines(U[1] + r*cos(theta), U[2] + r*sin(theta))
}
n <- 100
P <- cbind(rnorm(n), rnorm(n))
D <- circumcircle(P)
showP(P)
showD(D)
This shows the output

avoid R loop and parallelize with snow

I have a large loop that will take too long (~100 days). I'm hoping to speed it up with the snow library, but I'm not great with apply statements. This is only part of the loop, but if I can figure this part out, the rest should be straightforward. I'm ok with a bunch of apply statements or loops, but one apply statement using a function to get object 'p' would be ideal.
Original data
dim(m1) == x x # x >>> 0
dim(m2) == y x # y >>> 0, y > x, y > x-10
dim(mout) == x x
thresh == x-10 #specific to my data, actual number probably unimportant
len(v1) == y #each element is a random integer, min==1, max==thresh
len(v2) == y #each element is a random integer, min==1, max==thresh
Original loop
p <- rep(NA,y)
for (k in 1:y){
mout <- m1 * matrix(m2[k,],x,x)
mout <- mout/sum(mout)
if (v1[k] < thresh + 1){
if(v2[k] < thresh + 1){
p[k] <- out[v1[k],v2[k]]
}
if(v2[k] > thresh){
p[k] <- sum(mout[v1[k],(thresh+1):x])
}
}
#do stuff with object 'p'
}
library(snow)
dostuff <- function(k){
#contents of for-loop
mout <- m1 * matrix(m2[k,],x,x)
mout <- mout/sum(mout)
if (v1[k] < thresh + 1){
if(v2[k] < thresh + 1){
p <- out[v1[k],v2[k]]
}
if(v2[k] > thresh){
p <- sum(mout[v1[k],(thresh+1):x])
}
}
#etc etc
return(list(p,
other_vars))
}
exports = c('m1',
'm2',
'thresh',
'v1',
'x' ,
'v2')
cl = makeSOCKcluster(4)
clusterExport(cl,exports)
loop <- as.array(1:y)
out <- parApply(cl,loop,1,dostuff)
p <- rep(NA,y)
for(k in 1:y){
p[k] <- out[[k]][[1]]
other_vars[k] <- out[[k]][[2]]
}

Repeating a function multiple times using a for loop

A have code that creates a random graph in the form of a matrix. Now I would like it to create many, say m, random graphs so the output is m matrices. I am trying to do this with a for loop. This would be my preferred method however I am open to other suggestions (apply family?). Here is my code, where n is the number of nodes/vertices the graph has and beta is the amount of preferential attachment (keep this between 0 and 1.5)
multiplerandomgraphs <- function(n, beta, m) {
for(k in 1:m) {
randomgraph <- function(n, beta) {
binfunction <- function(y) {
L <- length(y)
x <- c(0, cumsum(y))
U <- runif(1, min = 0 , max = sum(y))
for(i in 1:L) {
if(x[i] <= U && x[i+1] > U){
return(i)
}
}
}
mat <- matrix(0,n,n)
mat[1,2] <- 1
mat[2,1] <- 1
for(i in 3:n) {
degvect <- colSums(mat[ , (1:(i-1))])
degvect <- degvect^(beta)
j <- binfunction(degvect)
mat[i,j] <- 1
mat[j,i] <- 1
}
return(mat)
}
}
}
You can define your randomgraph function as randomgraph <- function(i, n, beta) {} with the body the same as your definition, leaves the parameter i as a dummy parameter. And then use apply function as listOfMatrix <- lapply(1:m, randomgraph, n, beta) which return a list of matrix.

looping through a matrix with a function

I'd like to perform this function on a matrix 100 times. How can I do this?
v = 1
m <- matrix(0,10,10)
rad <- function(x) {
idx <- sample(length(x), size=1)
flip = sample(0:1,1,rep=T)
if(flip == 1) {
x[idx] <- x[idx] + v
} else if(flip == 0) {
x[idx] <- x[idx] - v
return(x)
}
}
This is what I have so far but doesn't work.
for (i in 1:100) {
rad(m)
}
I also tried this, which seemed to work, but gave me an output of like 5226 rows for some reason. The output should just be a 10X10 matrix with changed values depending on the conditions of the function.
reps <- unlist(lapply(seq_len(100), function(x) rad(m)))
Ok I think I got it.
The return statement in your function is only inside a branch of an if statement, so it returns a matrix with a probability of ~50% while in the other cases it does not return anything; you should change the code function into this:
rad <- function(x) {
idx <- sample(length(x), size=1)
flip = sample(0:1,1,rep=T)
if(flip == 1) {
x[idx] <- x[idx] + v
} else if(flip == 0) {
x[idx] <- x[idx] - v
}
return(x)
}
Then you can do:
for (i in 1:n) {
m <- rad(m)
}
Note that this is semantically equal to:
for (i in 1:n) {
tmp <- rad(m) # return a modified verion of m (m is not changed yet)
# and put it into tmp
m <- tmp # set m equal to tmp, then in the next iteration we will
# start from a modified m
}
When you run rad(m) is not do changes on m.
Why?
It do a local copy of m matrix and work on it in the function. When function end it disappear.
Then you need to save what function return.
As #digEmAll write the right code is:
for (i in 1:100) {
m <- rad(m)
}
You don't need a loop here. The whole operation can be vectorized.
v <- 1
m <- matrix(0,10,10)
n <- 100 # number of random replacements
idx <- sample(length(m), n, replace = TRUE) # indices
flip <- sample(c(-1, 1), n, replace = TRUE) # subtract or add
newVal <- aggregate(v * flip ~ idx, FUN = sum) # calculate new values for indices
m[newVal[[1]]] <- m[newVal[[1]]] + newVal[[2]] # add new values

for loop creating vector r

I am trying to create a function to calculate the Box-Cox transformation in R, where you iterate values of lambda (lambdas) in a formula to maximize L. What I ultimately want is a vector of L, such that for all i in lambda, there is a corresponding L value.
y <- c(256,256,231,101,256,213,241,246,207,143,287,240,262,234,146,255,184,161,252,229,283,132,218,113,194,237,181,262,104)
df <- 28
n=29
lambdas <- seq(-3,3,0.001)
L <- c(rep(NA,length(lambdas)))
for(i in lambdas) {
if(i != 0) {
yprime <- (((y^i)-1)/i)
} else
{ yprime <- log(y)
}
st2 <- var(yprime)
L <- (((-df/2)*(log(st2))) + ((i-1)*(df/n)*(sum(log(y)))))
}
What I typically end up with L as a vector of 1, with the final iteration calculated.
Use seq_along to generate an index for lambdas[] and L[]
for(i in seq_along(lambdas)) {
if(i != 0) {
yprime <- (((y^lambdas[i])-1)/lambdas[i])
} else {
yprime <- log(y)
}
st2 <- var(yprime)
L[i] <- (((-df/2)*(log(st2))) + ((lambdas[i]-1)*(df/n)*(sum(log(y)))))
}
plot(L)

Resources