Calculation in a loop in R - r

Consider the following data:
library(Benchmarking)
x <- c(2, 3, 8)
y <- c(1, 5, 10)
k <- 25
d <- data.frame(x,y,k)
x <- c(5, 2, 3, 4)
y <- c(3, 5, 9, 10)
k <- 30
d2 <- data.frame(x,y,k)
d3 <- replicate(3, rbind(d, d2[sample(seq_len(nrow(d2)), 3), , drop=FALSE]), simplify = FALSE)
So now I have a list of 3 data.frame. For each of these 3 data.frame I want to perform the following calculation:
e1 <- with(subset(d3[[1]], k == 25), dea(d3[[1]]$x, d3[[1]]$y, XREF = x, YREF = y))
e2 <- with(subset(d3[[1]], k == 30), dea(d3[[1]]$x, d3[[1]]$y, XREF = x, YREF = y))
we1 <- weighted.mean(eff(e1), d3[[1]]$y)
we2 <- weighted.mean(eff(e2), d3[[1]]$y)
va <- we2/we1
But instead of using that code three times, where I change [[1]] to [[2]] and [[3]], can I instead use a loop, where it does the calculation for all 3 data.frame and create a new data.frame where it list va for the three calculations?

We create a function and then loop through the list and apply the function
f1 <- function(dat) {
d1 <- subset(dat, k == 25)
e1 <- with(d1, dea(dat$x, dat$y, XREF = x, YREF = y))
d2 <- subset(dat, k == 30)
e2 <- with(d2, dea(dat$x, dat$y, XREF = x, YREF = y))
we1 <- weighted.mean(eff(e1), dat$y)
we2 <- weighted.mean(eff(e2), dat$y)
we2/we1
}
sapply(d3, f1)

Related

R Getting numeric matrix from predict()

I have the following code:
fit_lm=lm(z~x+y)
mix <- 2
max <- 12
miy <- 2
may <- 12
griddf <- expand.grid(x = seq(mix,max, length.out = 10),
y = seq( miy,may,length.out = 10))
Prediction_data <- data.frame(griddf)
colnames(Prediction_data) <- c("x", "y")
coordinates(Prediction_data ) <- ~ x + y
terrain_lm <- predict(fit_lm, Prediction_data)
I want that terrain_lm is a numeric matrix, in such a way, that I can use
fig <- plot_ly()
fig <- fig %>% add_surface(terrain_lm)
but I get a 1d array with 100 elements.
The result of predict is a vector. You need to add it to the x and y values and then use xtabs to transform into a suitable matrix for a surface plot.
library(plotly)
#test data
x <- runif(20, 4, 10)
y <- runif(20, 3, 6)
z <- 3*x+y +runif(20, 0, 2)
fit_lm <- lm(z~x+y)
mix <- 2
max <- 12
miy <- 2
may <- 12
griddf <- expand.grid(x = seq(mix,max, length.out = 10),
y = seq( miy,may,length.out = 10))
terrain_lm <- data.frame(griddf)
terrain_lm$z <- predict(fit_lm, terrain_lm)
fig <- plot_ly(z = ~xtabs(z ~ x + y, data = terrain_lm))
fig <- fig %>% add_surface()

Convert for loops into foreach loops

I want to make the code below more efficient by using the foreach package. I tried it for a very long time but I don't manage to get the same result as when using the for-loops. I would like to use a nested foreach-loop including parallelization... And as output I would like to have two matrices with dim [R,b1] I would be very grateful for some suggestions!!
n <- c(100, 300, 500)
R <- 100
b0 <- 110
b1 <- seq(0.01, 0.1, length.out = 100)
## all combinations of n and b1
grid <- expand.grid(n, b1)
names(grid) <- c("n", "b1")
calcPower <- function( R, b0, grid) {
cl <- makeCluster(3)
registerDoParallel(cl)
## n and b1 coefficients
n <- grid$n
b1 <- grid$b1
## ensures reproducibility
set.seed(2020)
x <- runif(n, 18, 80)
x.dich <- factor( ifelse( x < median( x), 0, 1))
## enables to store two outputs
solution <- list()
## .options.RNG ensures reproducibility
res <- foreach(i = 1:R, .combine = rbind, .inorder = TRUE, .options.RNG = 666) %dorng% {
p.val <- list()
p.val.d <- list()
for( j in seq_along(b1)) {
y <- b0 + b1[j] * x + rnorm(n, 0, sd = 10)
mod.lm <- lm( y ~ x)
mod.lm.d <- lm( y ~ x.dich)
p.val <- c( p.val, ifelse( summary(mod.lm)$coef[2,4] <= 0.05, 1, 0))
p.val.d <- c( p.val.d, ifelse( summary(mod.lm.d)$coef[2,4] <= 0.05, 1, 0))
}
solution[[1]] <- p.val
solution[[2]] <- p.val.d
return(solution)
}
dp.val <- matrix( unlist(res[,1], use.names = FALSE), R, length(b1), byrow = TRUE)
dp.val.d <- matrix( unlist(res[,2], use.names = FALSE), R, length(b1), byrow = TRUE)
stopCluster(cl)
df <- data.frame(
effectS = b1,
power = apply( dp.val, 2, function(x){ mean(x) * 100}),
power.d = apply( dp.val.d, 2, function(x){ mean(x) * 100}),
n = factor(n))
return(df)
}
## simulation for different n
tmp <- with(grid,
by( grid, n,
calcPower, R = R, b0 = b0))
## combines the 3 results
df.power <- rbind(tmp[[1]], tmp[[2]], tmp[[3]])
I created a foreach loop in following code. There had to be some changes made. It is a lot easier to return a list then a matrix in foreach, since it's combined with rbind. Especially when you want to return multiple ones. My solution here is to save everything in a list and afterwards transform it into a matrix of length 100.
Note: there is one mistake in your code. summary( mod.lm.d)$coef[2,4] does not exist. I changed it to [2]. Adjust to your needing
solution <- list()
df2<-foreach(i = 1:R, .combine = rbind, .inorder=TRUE) %dopar%{
set.seed(i)
p.val <- list()
p.val.d <- list()
counter <- list()
for( j in seq_along(b1)){
x <- sort( runif(n, 18, 80))
x.dich <- factor( ifelse( x < median(x), 0, 1))
y <- b0 + b1[j] * x + rnorm( n, 0, sd = 10)
mod.lm <- lm( y ~ x)
mod.lm.d <- lm( y ~ x.dich)
p.val <- c(p.val, ifelse( summary( mod.lm)$coef[2] <= 0.05, 1, 0))
p.val.d <- c(p.val.d, ifelse( summary( mod.lm.d)$coef[2] <= 0.05, 1, 0))
counter <- c(counter, j)
}
solution[[1]] <- p.val
solution[[2]] <- p.val.d
solution[[3]] <- counter
return(solution)
}
dp.val <- unlist(df2[,1], use.names = FALSE)
dp.val.d <- unlist(df2[,2], use.names = FALSE)
dp.val.matr <- matrix(dp.val, R, length(b1))
dp.val.d.matr <- matrix(dp.val.d, R, length(b1))
stopCluster(cl)
for your comment:
A foreach does work with a normal for loop. Minimal reproducible example:
df<-foreach(i = 1:R, .combine = cbind, .inorder=TRUE) %dopar%{
x <- list()
for(j in 1:3){
x <- c(x,j)
}
return(x)
}

loop with certain values is not working

I just need help for the first loop! I would like to run the loop for each certain value of m (see first line in code) but its running only for 1:10? The outcome shoud be stored in the last rows msediff1 to msediff100! Also i need the graphics for each value of m!Thanks in advance!
m = c(1,2,3,4,5,6,7,8,9,10,25,50,100)
for (m in 1:length(unique(m))){
n <- 150
x1 <- rnorm(n = n, mean = 10, sd = 4)
R <- 100 # Number of reps
results.true <- matrix(NA , ncol = 2, nrow = R)
colnames(results.true) <- c("beta0.hat", "beta1.hat")
results.diff <- matrix(NA, ncol = 2, nrow = R)
colnames(results.diff) <- c("beta0.hat", "betadiff.hat")
sigma <- 1.2
beta <- c(1.2)
X <- cbind(x1)
if (m==1){d0 <- .7071; d <- c(-.7071)}
if (m==2){d0 = .8090; d = c(-.5,-.309)}
if (m==3){d0 = .8582; d = c(-.3832,-.2809,-.1942) }
if (m==4){d0 = .8873; d = c(-.3090,-.2464,-.1901,-.1409)}
if (m==5){d0 <- .9064; d <- c(-.2600,-.2167,-.1774,-.1420,-.1103)}
if (m==6){d0 = .92; d = c(-.2238,-.1925,-.1635,-.1369,-.1126,-.0906)}
if (m==7){d0 = .9302; d = c(-.1965,-.1728,-.1506,-.1299,-.1107,-.093,-.0768)}
if (m==8){d0 = .9380; d = c(-.1751,-.1565,-.1389,-.1224,-.1069,-.0925,-.0791,-.0666)}
if (m==9){d0 = .9443; d = c(-.1578,-.1429,-.1287,-.1152,-.1025,-.0905,-.0792,-.0687,-.0538)}
if (m==10){d0 <- .9494;
d <- c(-.1437, -.1314, -.1197, -.1085, -.0978, -.0877, -.0782, -.0691, -.0606, -.0527)}
if (m==25){d0 <- 0.97873;
d <- c(-0.06128, -0.05915, -0.05705, -0.05500, -0.05298, -0.05100, -0.04906, -0.04715, -0.04528, -0.04345, -0.04166, -0.03990, -0.03818, -0.03650, -0.03486, -0.03325, -0.03168, -0.03015, -0.02865, -0.02719,
-0.02577, -0.02438, -0.02303, -0.02171, -0.02043) }
if (m==50) {d0 <- 0.98918;
d <- c(-0.03132, -0.03077, -0.03023, -0.02969, -0.02916, -0.02863, -0.02811, -0.02759, -0.02708, -0.02657, -0.02606, -0.02556, -0.02507, -0.02458, -0.02409, -0.02361, -0.02314, -0.02266, -0.02220, -0.02174, -0.02128, -0.02083, -0.02038, -0.01994, -0.01950, -0.01907, -0.01864, -0.01822, -0.01780, -0.01739,-0.01698,-0.01658,-0.01618,-0.01578,-0.01539,-0.01501,-0.01463,-0.01425,-0.01388,-0.01352,
-0.01316,-0.01280,-0.01245,-0.01210,-0.01176,-0.01142,-0.01108,-0.01075,-0.01043,-0.01011) }
if (m==100) { d0 <- 0.99454083;
d <- c(-0.01583636,-0.01569757,-0.01555936,-0.01542178,-0.01528478,-0.01514841,-0.01501262,-0.01487745,-0.01474289,-0.01460892,
-0.01447556,-0.01434282,-0.01421067,-0.01407914,-0.01394819,-0.01381786,-0.01368816,-0.01355903,-0.01343053,-0.01330264,
-0.01317535,-0.01304868,-0.01292260,-0.01279714,-0.01267228,-0.01254803,-0.01242439,-0.01230136,-0.01217894,-0.01205713,
-0.01193592,-0.01181533,-0.01169534,-0.01157596,-0.01145719,-0.01133903,-0.01122148,-0.01110453,-0.01098819,-0.01087247,
-0.01075735,-0.01064283,-0.01052892,-0.01041563,-0.01030293,-0.01019085,-0.01007937,-0.00996850,-0.00985823,-0.00974857,
-0.00963952,-0.00953107,-0.00942322,-0.00931598,-0.00920935,-0.00910332,-0.00899789,-0.00889306,-0.00878884,-0.00868522,
-0.00858220,-0.00847978,-0.00837797,-0.00827675,-0.00817614,-0.00807612,-0.00797670,-0.00787788,-0.00777966,-0.00768203,
-0.00758500,-0.00748857,-0.00739273,-0.00729749,-0.00720284,-0.00710878,-0.00701532,-0.00692245,-0.00683017,-0.00673848,
-0.00664738,-0.00655687,-0.00646694,-0.00637761,-0.00628886,-0.00620070,-0.00611312,-0.00602612,-0.00593971,-0.00585389,
-0.00576864,-0.00568397,-0.00559989,-0.00551638,-0.00543345,-0.00535110,-0.00526933,-0.00518813,-0.00510750,-0.00502745) }
for(r in 1:R){
u <- rnorm(n = n, mean = 0, sd = sigma)
y <- X%*%beta + u
yy = d0* y[(m+1):n]; Xd <- d0* x1[(m+1):n];
for (i in 1:m) { yy <- yy + d[i]* y[(m+1-i):(n-i) ]
Xd = Xd + d[i]* x1[(m+1-i):(n-i)] }
reg.true <- lm(y ~ x1)
reg.diff <- lm(yy ~ Xd)
results.true[r, ] <- coef(reg.true)
results.diff[r, ] <- coef(reg.diff)
}
results.true
results.diff
beta
apply(results.true, MARGIN = 2, FUN = mean)
apply(results.diff, MARGIN = 2, FUN = mean)
co <- 2
dens.true <- density(results.true[, co])
dens.diff <- density(results.diff[, co])
win.graph()
plot(dens.true,
xlim = range(c(results.true[, co], results.diff[, co])),
ylim = range(c(dens.true$y, dens.diff$yy)),
main = "beta estimation true vs. diff", lwd = 2,)
lines(density(results.diff[, co]), col = "red", lwd = 2)
abline(v = beta, col = "blue", lwd = 2)
legend(x=1.24,y=12,c("outcome true","outcome diff"),lty=c(1,1),col =c("black","red") )
legend(x=1.12,y=12,c("m=",m))
#Mean Squared Error
mse=mean(reg.true$residuals^2)
if (m==1) {msediff1=mean(reg.diff$residuals^2)}
if (m==2) {msediff2=mean(reg.diff$residuals^2)}
if (m==3) {msediff3=mean(reg.diff$residuals^2)}
if (m==4) {msediff4=mean(reg.diff$residuals^2)}
if (m==5) {msediff5=mean(reg.diff$residuals^2)}
if (m==6) {msediff6=mean(reg.diff$residuals^2)}
if (m==7) {msediff7=mean(reg.diff$residuals^2)}
if (m==8) {msediff8=mean(reg.diff$residuals^2)}
if (m==9) {msediff9=mean(reg.diff$residuals^2)}
if (m==10) {msediff10=mean(reg.diff$residuals^2)}
if (m==25) {msediff25=mean(reg.diff$residuals^2)}
if (m==50) {msediff50=mean(reg.diff$residuals^2)}
if (m==100) {msediff100=mean(reg.diff$residuals^2)}
}
I can see an error in the code.
m = c(1,2,3,4,5,6,7,8,9,10,25,50,100)
for (m in 1:length(unique(m))){
As soon as the loop starts, m is changed. It's not what's in the first line anymore...
Try, for (ind in 1:length(unique(m))){ if that's not the intention.

IDW parameters in R

I want to perform IDW interpolation using R using the idw command from the gstat package. I have this data:
#settings
library(gstat)
library(dplyr)
library(sp)
library(tidyr)
id_rep <- rep(c(1,2), 20)
f <- rep(c(930,930.2), each=20)
perc <- rep(c(90, 80), each=10)
x <- sample(1:50, 40)
y <- sample(50:100, 40)
E <- runif(40)
df <- data.frame(id_rep, perc, x,y, f, E)
df_split <- split(df, list(df$id_rep, df$perc, df$f), drop = TRUE, sep="_")
#grid
x.range <- range(df$x)
y.range <- range(df$y)
grid <- expand.grid(x = seq(x.range[1], x.range[2], by=1),
y = seq(y.range[1], y.range[2], by=1))
coordinates(grid) <- ~x + y
#interpolation
lst_interp_idw <- lapply(df_split, function(X) {
coordinates(X) <- ~x + y
E_idw <- idw(E~ 1, X, grid, idp=1, nmax=3) %>% as.data.frame()
df_interp <- select(E_idw, x,y,E_pred=var1.pred)
df_interp
})
df_interp_idw <- bind_rows(lst_interp_idw, .id = "interact") %>%
separate(interact, c("id_rep", "perc", "f"), sep = "\\_")
Now I want to perform each run with different idp and nmax parameters within certain values​ (idp from 1 to 3 by 0.5, and nmax 3 to 6 by 1) and get out a data frame with columns for each combination of idp and nmax values. I try with two for loops but it doesn't work.
EDIT
the code that doesn't work is:
idp = seq(from = 1, to = 3, by = 0.5)
nmax = seq(from = 3, to = 6, by = 1)
...
for(i in idp) {
for(j in nmax)
{ E_idw= idw(E ~ 1, X, grid, nmax = i, idp = j)
}
}
...
Here is a way how to store the result of every iteration in a list.
#settings
#install.packages("gstat")
library(gstat)
library(dplyr)
library(sp)
library(tidyr)
id_rep <- rep(c(1,2), 20)
f <- rep(c(930,930.2), each=20)
perc <- rep(c(90, 80), each=10)
x <- sample(1:50, 40)
y <- sample(50:100, 40)
E <- runif(40)
df <- data.frame(id_rep, perc, x,y, f, E)
df_split <- split(df, list(df$id_rep, df$perc, df$f), drop = TRUE, sep="_")
#grid
x.range <- range(df$x)
y.range <- range(df$y)
grid <- expand.grid(x = seq(x.range[1], x.range[2], by=1),
y = seq(y.range[1], y.range[2], by=1))
coordinates(grid) <- ~x + y
# ==============================================
# NEW function
# ==============================================
idp = seq(from = 1, to = 3, by = 0.5)
nmax = seq(from = 3, to = 6, by = 1)
#interpolation
lst_interp_idw <- lapply(df_split, function(X) {
coordinates(X) <- ~x + y
df_interp <- vector(length(idp)*length(nmax), mode = "list" )
k <- 0
for(i in idp) {
for(j in nmax) {
print(paste(i, j))
# Iterator
k <- k + 1
E_idw= idw(E ~ 1, X, grid, nmax = i, idp = j) %>% as.data.frame()
df_interp[[k]] <- select(E_idw, x,y,E_pred=var1.pred)
}
}
return(df_interp)
})
# ==============================================
Some plausibility checks (lapply is applied to 8 list elements and 20 variations are calculated):
length(lst_interp_idw) # 8
length(lst_interp_idw[[1]]) #20
length(lst_interp_idw[[1]]) #20
It should be easy for you to adapt the last line of your code
df_interp_idw <- bind_rows(lst_interp_idw, .id = "interact") %>%
separate(interact, c("id_rep", "perc", "f"), sep = "\\_")
to format the output in the desired format. This highly depends on how you want to present the different interpolation alternatives.

Subset list, keep names

I have list out like this:
u <- list(a = list(b = 1, c = 2),
x = list(k = list(ka = 1, kb = 3),
l = list(la = 1, la = 4)))
v <- list(a = list(b = 1, c = 2),
x = list(m = list(ma = 5, mb = 8),
n = list(na = 5, nb = 8)))
w <- list(a = list(b = 1, c = 2),
x = list(o = list(oa = 4, ob = 1),
p = list(pa = 8, pb = 0)))
out <- list(u, v, w)
I would like to create another list where there are elements k, l, m, n, o, p and names of the list elements are preserved. I found a solution, but looks sub-optimal:
x <- lapply(out, function(y) y[['x']])
o <- list()
for (a in x) {
o <- c(o, a)
}
> str(o, max.level = 1)
List of 6
$ k:List of 2
$ l:List of 2
$ m:List of 2
$ n:List of 2
$ o:List of 2
$ p:List of 2
Is there a better way?
The loop could be replaced with unlist:
res <- unlist( lapply(out,"[[","x"), recursive=FALSE)
identical(res,o)
# [1] TRUE
My lapply is the same as in the OP; it's just a shortcut.
As #akrun suggested, you could more closely mirror the OP's loop with
do.call("c", lapply(out, '[[', 'x'))

Resources