I am in the process of transforming a traditional loop to a foreach loop for solving the shoelace formula problem in R; however, I am not getting the right accumulation with the foreach loop.
library("foreach")
x = c(0, 4, 4, 0)
# coordinates of points
y = c(0, 0, 4, 4)
# coordinates of points
points <- length(x)
area <- 0
# Accumulates area in the loop
i <- 0
j <- points
# using foreach loop
area <- foreach(i = seq(x), .combine = "+") %do% {
(x[[j]] + x[[i]]) * (y[[j]] - y[[i]])
j <- i
}
area # 10
This is just 1 + 2 + 3 + 4. It has not taken into account the points in x and y.
# using traditional loop
area <- vector("list", length(x))
for (i in seq_along(x)) {
area[[i]] <- (x[[j]] + x[[i]]) * (y[[j]] - y[[i]])
j <- i
}
area
# [[1]]
# [1] 0
# [[2]]
# [1] 0
# [[3]]
# [1] -32
# [[4]]
# [1] 0
The sum is 32 units, which is correct.
What am I doing wrong with the foreach loop? Thank you.
foreach is returning the last calculated expression, as in regular functions.
So, you can do:
area <- foreach(i = seq(x)) %do% {
j0 <- j
j <- i
(x[[j0]] + x[[i]]) * (y[[j0]] - y[[i]])
}
Related
Just some smaller changes which do not need to be considered.
This for loop may be helpful.
1. Run all of your codes
s <- 60000
t <- 20
mu <- function(x, t) {
A <- .00022
B <- 2.7*10^(-6)
c <- 1.124
mutemp <- A + B*c^(x + t)
out <- ifelse(t <= 2, 0.9^(2 - t)*mutemp, mutemp)
out}
f <- function(x) (s - x - 0.05*(0.04*x + 1810.726 - mu(40, t)*(s - x)))
2. Run the for loop below for iteration
2.1 Predefine the length of the outcome. In your case is 400 (t/0.05 = 400).
output <- vector(mode = "numeric", length = t/0.05)
2.2 Run through the for loop from 1 to 400. Save each uniroot result to step 2.1, and then reassign both s and t accordingly.
for (i in 1:400) {
output[i] <- uniroot(f, lower=0.1, upper=100000000)$root
s <- output[i]
t <- 20 - i * 0.05
}
3. Inspect the result
output
Hope this is helpful.
You could use vapply on a defined t sequence.
s <- 6e4
tseq <- seq.int(19.95, 0, -.05)
x <- vapply(tseq, \(t) {
s <<- uniroot(\(x) (s - x - 0.05*(0.04*x + 1810.726 - mu(40, t)*(s - x))), lower=0.1, upper=100000000)$root
}, numeric(1L))
Note, that <<- changes s in the global environment, and at the end gets the last value.
s
# [1] 2072.275
res <- cbind(t=tseq, x)
head(res)
# t x
# [1,] 19.95 59789.92
# [2,] 19.90 59580.25
# [3,] 19.85 59371.01
# [4,] 19.80 59162.18
# [5,] 19.75 58953.77
# [6,] 19.70 58745.77
To cross validation for CCLE (Cancer Cell Line Encyclopedia) drug data I tried to convert the following codes from matlab to R. However, I was unsuccessful. Matlab codes work fine and can create both a *cross.mat that is a group of 10 fold CV data for each data set and a *data.mat that is the grouped data of 10 times of CV of each data set.
I will be appreciate if you can help me find my mistake.
#This function is about 10-fold cross-validation data grouping
getcrossMatrixs <- function(MM){
library(pracma)
N <- nnz(MM)
zeroM <- matrix(0L, nrow = dim(MM)[1], ncol = dim(MM)[2])
D <- randperm(N)
first <- floor(N/10)
w = which(MM != 0, arr.ind=TRUE);
nrows=w[,1]; ncols=w[,2]
crossdata <- list()
for (i in 1:10) {
crossdata[[i]] <- zeroM
}
for (i in 1:10){
for (j in (1+(i-1)*first):(i*first)){
crossdata[[i]][c(nrows[D[j]]),c(ncols[D[j]]) ] <- MM[c(nrows[D[j]]),c(ncols[D[j]])]
}
}
k <- (N-(10*first))
i <- 10*first+1
for (j in 1:k){
crossdata[[j]][c(nrows[D[i]]),c(ncols[D[i]]) ] <- MM[c(nrows[D[i]]),c(ncols[D[i]])]
i <- i+1
}
}
#The following lines is the main for calling above function.
library(foreach)
n.cores <- parallel::detectCores()
my.cluster <- parallel::makeCluster(
n.cores,
type = "PSOCK"
)
print(my.cluster)
#> socket cluster with 16 nodes on host 'localhost'
doParallel::registerDoParallel(cl = my.cluster)
foreach::getDoParRegistered()
#> [1] TRUE
CCLEdata <- list()
#MM<-matrix(read_csv("MM.csv", col_names = FALSE, show_col_types = FALSE), rownames.force = NA)
MM <- matrix(seq(0, 4.5, length.out = 11784), nrow = 491) #datamatrix like CCLE drug activity area sensitivity matrrix(491*24)
foreach(i = 1:10) %dopar% {
CCLEcross <- getcrossMatrixs(MM)
CCLEdata[[i]] <- CCLEcross
}
#> [[1]]
#> NULL
#>
#> [[2]]
#> NULL
#>
#> [[3]]
#> NULL
#>
#> [[4]]
#> NULL
#>
#> [[5]]
#> NULL
#>
#> [[6]]
#> NULL
#>
#> [[7]]
#> NULL
#>
#> [[8]]
#> NULL
#>
#> [[9]]
#> NULL
#>
#> [[10]]
#> NULL
Created on 2022-08-29 with reprex v2.0.2
Actually when I use the original CCLE dataset the error is changing in the main.R:
Error in { : task 1 failed - "is.numeric(x) || is.complex(x) is not TRUE"
or
Error in { :
task 1 failed - "attempt to select less than one element in integerOneIndex"
%These are from Matlab
function [crossdata] = getcrossMatrixs(MM)
N = nnz(MM(:));
zeroM = zeros(size(MM));
D = randperm(N);
first = floor(N/10);
[nrows,ncols] = find(MM);
crossdata = {};
for i = 1:10
crossdata{i} = zeroM;
end
for i = 1:10
for j = 1+(i-1)*first:i*first
crossdata{i}(nrows(D(j)),ncols(D(j))) = MM(nrows(D(j)),ncols(D(j)));
end
end
k=N -10*first ;
i=10*first+1;
for j=1:k
crossdata{j}(nrows(D(i)),ncols(D(i))) = MM(nrows(D(i)),ncols(D(i)));
i=i+1;
end
end
load('MM.mat')
parfor i=1:10
[CCLEcross] = getcrossMatrixs(MM);
CCLEdata{i}=CCLEcross;
end
I didn't look too closely to figure out what was wrong. I based this function on the Matlab function supplied. Note that for this particular example, going parallel is more expensive due to overhead. Parallel will provide performance with large enough matrices and/or more samples.
library(parallel)
MM <- matrix(seq(0, 4.5, length.out = 11784), nrow = 491)
getcrossMatrixs <- function(MM, parts = 10L) {
D <- sample(which(MM != 0))
first <- length(D) %/% parts
last <- length(D) %% parts
idx <- c(0L, cumsum(c(rep(first + 1L, last), rep(first, parts - last))))
mZero <- matrix(0, nrow(MM), ncol(MM))
lapply(1:parts, function(i, m) {m[D[(idx[i] + 1L):idx[i + 1L]]] <- MM[D[(idx[i] + 1L):idx[i + 1L]]]; m}, mZero)
}
reps <- 10L
clust <- makeCluster(min(detectCores() - 1L, reps))
clusterExport(clust, c("getcrossMatrixs", "MM"))
CCLEdata <- parLapply(clust, 1:reps, function(x) getcrossMatrixs(MM))
stopCluster(clust)
# check that each set of matrices returned has all elements of MM
identical(rep(list(MM), reps), lapply(1:reps, function(i) Reduce("+", CCLEdata[[i]], matrix(0, nrow(MM), ncol(MM)))))
#> [1] TRUE
And here's a cleaned-up version of the Matlab function:
function [crossdata] = getcrossMatrixs(MM)
idx = find(MM);
N = length(nrows);
zeroM = zeros(size(MM));
idx = idx(randperm(N));
first = floor(N/10);
crossdata = cell(10, 1);
for i = 1:10
crossdata{i} = zeroM;
end
for i = 1:10
j = 1 + (i - 1)*first:i*first;
crossdata{i}(idx(j)) = MM(idx(j));
end
k = N - 10*first;
j = 10*first + 1;
for i = 1:k
crossdata{i}(idx(j)) = MM(idx(j));
j = j + 1;
end
end
In an earlier question (R: Logical Conditions Not Being Respected), I learned how to make the following simulation :
Step 1: Keep generating two random numbers "a" and "b" until both "a" and "b" are greater than 12
Step 2: Track how many random numbers had to be generated until it took for Step 1 to be completed
Step 3: Repeat Step 1 and Step 2 100 times
res <- matrix(0, nrow = 0, ncol = 3)
for (j in 1:100){
a <- rnorm(1, 10, 1)
b <- rnorm(1, 10, 1)
i <- 1
while(a < 12 | b < 12) {
a <- rnorm(1, 10, 1)
b <- rnorm(1, 10, 1)
i <- i + 1
}
x <- c(a,b,i)
res <- rbind(res, x)
}
head(res)
[,1] [,2] [,3]
x 12.14232 12.08977 399
x 12.27158 12.01319 1695
x 12.57345 12.42135 302
x 12.07494 12.64841 600
x 12.03210 12.07949 82
x 12.34006 12.00365 782
Question: Now, I am trying to make a slight modification to the above code - Instead of "a" and "b" being produced separately, I want them to be produced "together" (in math terms: "a" and "b" were being produced from two independent univariate normal distributions, now I want them to come from a bivariate normal distribution).
I tried to modify this code myself:
library(MASS)
Sigma = matrix(
c(1,0.5, 0.5, 1), # the data elements
nrow=2, # number of rows
ncol=2, # number of columns
byrow = TRUE) # fill matrix by rows
res <- matrix(0, nrow = 0, ncol = 3)
for (j in 1:100){
e_i = data.frame(mvrnorm(n = 1, c(10,10), Sigma))
e_i$i <- 1
while(e_i$X1 < 12 | e_i$X2 < 12) {
e_i = data.frame(mvrnorm(n = 1, c(10,10), Sigma))
e_i$i <- i + 1
}
x <- c(e_i$X1, e_i$X2 ,i)
res <- rbind(res, x)
}
res = data.frame(res)
But this is producing the following error:
Error in while (e_i$X1 < 12 | e_i$X2 < 12) { : argument is of length
zero
If I understand your code correctly you are trying to see how many samples occur before both values are >=12 and doing that for 100 trials? This is the approach I would take:
library(MASS)
for(i in 1:100){
n <- 1
while(any((x <- mvrnorm(1, mu=c(10,10), Sigma=diag(0.5, nrow=2)+0.5))<12)) n <- n+1
if(i==1) res <- data.frame("a"=x[1], "b"=x[2], n)
else res <- rbind(res, data.frame("a"=x[1], "b"=x[2], n))
}
Here I am assigning the results of a mvrnorm to x within the while() call. In that same call, it evaluates whether either are less than 12 using the any() function. If that evaluates to FALSE, n (the counter) is increased and the process repeated. Once TRUE, the values are appended to your data.frame and it goes back to the start of the for-loop.
Regarding your code, the mvrnorm() function is returning a vector, not a matrix, when n=1 so both values go into a single variable in the data.frame:
data.frame(mvrnorm(n = 1, c(10,10), Sigma))
Returns:
mvrnorm.n...1..c.10..10...Sigma.
1 9.148089
2 10.605546
The matrix() function within your data.frame() calls, along with some tweaks to your use of i, will fix your code:
library(MASS)
Sigma = matrix(
c(1,0.5, 0.5, 1), # the data elements
nrow=2, # number of rows
ncol=2, # number of columns
byrow = TRUE) # fill matrix by rows
res <- matrix(0, nrow = 0, ncol = 3)
for (j in 1:10){
e_i = data.frame(matrix(mvrnorm(n = 1, c(10,10), Sigma), ncol=2))
i <- 1
while(e_i$X1[1] < 12 | e_i$X2[1] < 12) {
e_i = data.frame(matrix(mvrnorm(n = 1, c(10,10), Sigma), ncol=2))
i <- i + 1
}
x <- c(e_i$X1, e_i$X2 ,i)
res <- rbind(res, x)
}
res = data.frame(res)
I would like to write a code that generates 3 x 1 vector y according to following rule (The small numbers are selected for simplicity):
Here x is a 3 x 1 vector. According to the rule, for an update of y, I need sum of all y’s.
An attemp to code with an arbitrary x:
x <- c(2,3,1)
y <- c(0,0,0)
for(i in 1:5){
for(j in 1:3){
y[j] <- x[j] + y[j] + sum(y)
}
}
This code is not appropriate because it computes sum(b) term by term.
The inner loop indicates something like this:
y[1] = x[1] + 0 = 2
y[2] = x[2] + 2 = 5
y[3] = x[3] + 2 + 5 = 8
It is not appropriate because sum(y) term contains one term for y[1], two terms for y[2], three terms for y[3]. But I think sum(y) should be 2 + 5 + 8 = 15 for each iteration, y[1], y[2], y[3], according to the rule given above. Moreover this procedure should be repeated for a certain times (here 5 times shown by the outer loop). At each time of outer loop, only one sum(y) term will be computed for all three iteration of inner loop and it will be put as sum(y) term for each j.
How should I code this?
You are over-complicating this. Vectorize the inner-loop away:
> x <- c(2,3,1)
> y <- c(0,0,0)
> for(j in 1:5) y <- x + y + sum(y)
> y
[1] 682 687 677
This approach only computes sum(y) once per iteration, which is what you seem to want. As an added benefit, adding vectors in a single operation is much faster than adding them component-wise in a loop.
Maybe this will work
myfun <- function(x, y, i) {
y[i] <- x[i] + sum(y)
if (i < length(x)) {
myfun(x, y, i+1)
} else {
return(y)
}
}
x <- c(2, 3, 1)
y <- rep(0, length(x))
myfun(x, y, 1)
# [1] 2 5 8
x <- c(2, 3, 1, 5)
y <- rep(0, length(x))
myfun(x, y, 1)
# [1] 2 5 8 20
I want to reduce time and memory usage (I previously used outer for this but it consumes more memory than I have) by reducing the iterations to create a symmetric matrix, that is sol[i, j] is the same as sol[j, i].
My code so far:
# Prepare input
subss <- list(a = c(1, 2, 4), b = c(1, 2, 3), c = c(4, 5))
A <- matrix(runif(25), ncol = 5, nrow = 5)
# Pre allocate memory
sol <- matrix(nrow = length(subss), ncol = length(subss),
dimnames = list(names(subss), names(subss)))
x <- 0
for (i in seq_along(subss)) {
# Omit for the subsets I already calculated ?
for (j in seq_along(subss)) {
x <- x + 1
message(x)
# The function I use here might result in a NA
sol[i, j] <- mean(A[subss[[i]], subss[[j]]])
sol[j, i] <- sol[i, j] # Will overwrite when it shouldn't
}
}
Will use 9 iterations, how can I avoid them and do just 6 iterations?
I need to calculate the symmetric values, so this question doesn't apply. Also this other one doesn't work either because there might be many combinations and at some point it can't allocate the vector in memory.
A for loop will usually be slower than outer. Try byte-compiling the loop or implement it in Rcpp.
subss <- list(a = c(1, 2, 4), b = c(1, 2, 3), c = c(4, 5))
set.seed(42)
A <- matrix(runif(25), ncol = 5, nrow = 5)
#all combinations of indices
ij <- combn(seq_along(subss), 2)
#add all i = j
ij <- matrix(c(ij, rep(seq_along(subss), each = 2)), nrow = 2)
#preallocate
res <- numeric(ncol(ij))
#only one loop
for (k in seq_len(ncol(ij))) {
message(k)
res[k] <- mean(A[subss[[ij[1, k]]], subss[[ij[2, k]]]])
}
#1
#2
#3
#4
#5
#6
#create symmetric sparse matrix
library(Matrix)
sol <- sparseMatrix(i = ij[1,], j = ij[2,],
x = res, dims = rep(length(subss), 2),
symmetric = TRUE, index1 = TRUE)
#3 x 3 sparse Matrix of class "dsCMatrix"
#
#[1,] 0.7764715 0.6696987 0.7304413
#[2,] 0.6696987 0.6266553 0.6778936
#[3,] 0.7304413 0.6778936 0.5161089
I found a way with plain for loops:
x <- 0
for (i in seq_along(subss)) {
for (j in seq_len(i)) { # or for (j in 1:i) as proposed below
x <- x + 1
message(x)
sol[i, j] <- mean(A[subss[[i]], subss[[j]]])
sol[j, i] <- sol[i, j]
}
}
for (i in 1:length(subss)) {
for (j in 1:i) {
message(i, ' ', j, ' - ', mean(A[subss[[i]], subss[[j]]]) ) # Check iterations and value
sol2[i, j] <- sol2[j, i] <- mean(A[subss[[i]], subss[[j]]])
}
}
I checked your script values and aren't symmetric:
1 1 - 0.635455905252861
1 2 - 0.638608284398086
1 3 - 0.488700995299344
2 1 - 0.568414432255344
2 2 - 0.602851431118324
2 3 - 0.516099992596234
3 1 - 0.595461705311512
3 2 - 0.656920690399905
3 3 - 0.460815121419728
Mine values (same as #Llopis):
1 2 - 0.638608284398086
1 3 - 0.488700995299344
2 2 - 0.602851431118324
2 3 - 0.516099992596234
3 2 - 0.656920690399905
3 3 - 0.460815121419728