Does preallocation of R list improve loop run time? how? - r

I am running a simulation in R, in which the outputs should be stored in numeric vectors in a variable of the type list. However, I am wondering why when I preallocated the list with numeric vectors, the computational time remains the same instead of reducing. My code is similar to the following hypothetical cases in which I have to use nested loops and store the results in the list.
Here is the code for the case without preallocation:
n_times <- 5000
my_list <- list()
Sys.time()
start_time <- Sys.time()
for( i in 1:n_times){
for (j in 1:10){
df <- data.frame(y = rnorm(n = 200, mean = sample.int(10,1), sd = 4),
x1 = rnorm(n = 200, mean = sample.int(10,1), sd = 1),
x2 = rnorm(n = 200, mean = sample.int(10,1), sd = 4))
model <- lm(y ~ x1 + x2, data = df)
my_list[[as.character(j)]][i] <- summary(model)$r.squared
}
}
end_time <- Sys.time()
end_time - start_time
and here is the code for the case with preallocation:
# number of times the simulation to be run
n_times <- 5000
# preallocating the list of length 10 with numeric vectors of length n_times
my_list <- replicate(10, vector("numeric", n_times), simplify = F)
names(my_list) <- as.character(1:10)
Sys.time()
start_time <- Sys.time()
for( i in 1:n_times){
for (j in 1:10){
df <- data.frame(y = rnorm(n = 200, mean = sample.int(10,1), sd = 4),
x1 = rnorm(n = 200, mean = sample.int(10,1), sd = 1),
x2 = rnorm(n = 200, mean = sample.int(10,1), sd = 4))
model <- lm(y ~ x1 + x2, data = df)
my_list[[as.character(j)]][i] <- summary(model)$r.squared
}
}
end_time <- Sys.time()
end_time - start_time

I think preallocating a list with just 5000 * 10 elements doesn't take much time , after profiling you code most time goes to lm and data.farme creations , see below

Related

Loop a function in r to create a new table

I have a dataframe in r and want to perform the levene's/ variance test on multiple variables with two groups and save all results in a table. I have tried to do this using a for() loop and sapply() but I get neither working:
df <- data.frame(
x = rnorm(100, 0, 1),
y = rnorm(100, 50, 1),
z = rnorm(100, 70, 2),
group = rep(c(0,1), each = 50)
)
varlist <- c("x","y","z")
res.var <- character(length(varlist))
res.f <- numeric(length(varlist))
res.p <- numeric(length(varlist))
Option 1)
for(i in seq_along(varlist)) {
form <- substitute(i ~ group, list(i = as.name(varlist)))
result <- var.test(
formula = form,
data = df)
res.var[i] <- varlist[i]
res.f[i] <- result$estimate
res.p[i] <- result$p.value
}
Option 2:
sapply(varlist, function(x) {
form <- substitute(i ~ group, list(i = as.name(varlist)))
result <- var.test(
formula = form,
data = df)
res.var[i] <- varlist[i]
res.f[i] <- result$estimate
res.p[i] <- result$p.value
})
Maybe there's an easier way to that this. I'd be glad for any help ;o) Thank you in advance.

Why does function return NULL?

A beginner in R over here, so apologies for the basic question.
Why does ATE return a null vector instead of saving the values of the difference of the means?
fun.cluster <- function(M, N){
set.seed(02139)
J <- 1:M # vector J_i
df <- as.data.frame(matrix(data=1:N, nrow = N, ncol = 1)) #data frame of all original values
df$cluster <- cut(df$V1, M, labels = 1:M) #breaking the dataframe into clusters
df$cluster <- as.numeric(df$cluster)
Y1 <- as.vector(sample(J, 5)) # assigning treatment
df$treatment <- ifelse(df$cluster %in% Y1, df$treatment <- 1, df$treatment <- 0)
#Inducing intracluster correlation:
mu_0j <- runif(n = 50, min = -1, max = 1)
df$V1[df$treatment==0] <- mu_0j
mu_1j <- runif(n=50, min = -0.5, max = 1.5)
df$V1[df$treatment==0] <- mu_1j
# drawing values
y_0i <- rnorm(n = 50, mean = mu_0j, sd = 1)
y_1i <- rnorm(n = 50, mean = mu_1j, sd = 1)
D_i <- as.vector(c(y_0i, y_1i))
# calculating ATE:
ATE[i] <- mean(y_1i - y_0i)
}
ATE <- c()
for(i in 1:10){
fun.cluster(M = 10, N = 100)
}

Count with Varying Levels Parameters in R

I want R to count how many times my simulated ARIMA data conform to ARIMA(1,0,0) which I have achieved with:
library(forecast)
library(forecast)
cnt <- 0
num <- 60
phi <- 0.8
for(i in 1:10) {
epselon <- rnorm(num, mean=0, sd=1)
ar1 <- arima.sim(n = num, model=list(ar=phi, order = c(1, 0, 0)),
sd=1)
ar2 <- auto.arima(ar1)
if(all(arimaorder(ar2) == c(1, 0, 0))) cnt <- cnt + 1}
cnt
The above is just for a single case when sd=1, n=60, and ar=0.8.
I want a case when I have varying levels of N <- c(15, 20), SD <- c(1, 2) ^ 2, and phi = c(0.8, 0.9) for sample size, standard diviation and AR parameter respectively.
I have traid this:
library(forecast)
N <- c(15, 20)
SD <- c(1, 2) ^ 2
phi = c(0.8, 0.9)
## generate all combos
all_combos <- expand.grid(N = N, SD = SD, phi = phi)
epselon = function(n) rnorm(n, mean = 0, sd = SD)
## create function
fx_arima <- function(n, SD, phi) {
cnt <- 0
num <- 60
phi <- 0.8
for(i in 1:10) {
epselon <- rnorm(num, mean=0, sd=1)
ar1 <- arima.sim(n = num, model=list(ar=phi, order = c(1, 0, 0)), sd=1)
ar2 <- auto.arima(ar1)
if(all(arimaorder(ar2) == c(1, 0, 0))) cnt <- cnt + 1}
cnt
}
## find arima for all combos using Map
set.seed(123L)
res = Map(fx_arima, all_combos[["N"]], all_combos[["SD"]],
all_combos[["phi"]])
## or a little bit more work:
set.seed(123L)
res2 = by(all_combos, all_combos["N"],
function(DF) {
res = mapply(fx_arima, DF[["N"]], DF[["SD"]], DF[["phi"]])
colnames(res) = paste("SD", DF[["SD"]], "phi", DF[["phi"]], sep = "_")
res
})
res2
## write to csv
Map(function(file, DF) write.csv(DF, paste0("N_", file, ".csv")),
names(res2), res2)
which I mirror from arima.sim() function with varying: sample sizes, phi values and sd values and R Count How Many Time auto.arima() Confirmarima.sim() to be True
I got this error message
Error in `colnames<-`(`*tmp*`, value = c("SD_1_phi_0.2", "SD_4_phi_0.2", : attempt to set 'colnames' on an object with less than two dimensions
Traceback:
How can I solve this such that will have my result to show in varying form suuch that first row will be the label while the second row will be the count itself. The result will in two sheets; the first will be for 'N=15' and the second will be for 'N=20'.
If I understood your problem correctly, the error comes from function colnames because your function does not return a "pure" matrix-like object. If, instead, you use the function names in your last chunk of code as follows:
res2 = by(all_combos, all_combos["N"],
function(DF) {
res = mapply(fx_arima, DF[["N"]], DF[["SD"]], DF[["phi"]])
names(res) = paste("SD", DF[["SD"]], "phi", DF[["phi"]], sep = "_")
return(res)
})
res2
You will get:
> res2
N: 15
SD_1_phi_0.8 SD_4_phi_0.8 SD_1_phi_0.9 SD_4_phi_0.9
1 3 7 5
---------------------------------------------------------------------------
N: 20
SD_1_phi_0.8 SD_4_phi_0.8 SD_1_phi_0.9 SD_4_phi_0.9
3 4 5 2
With elements accessible by name and index:
> res2$`15`["SD_1_phi_0.8"]
SD_1_phi_0.8
1
> res2$`15`[1]
SD_1_phi_0.8
1

arima.sim() function with varying: sample sizes, phi values and sd values

I want to simulate ARIMA(1,1,0) with varying:
sample sizes
phi values
standard deviation values.
I admire how the bellow r code is simulating just one ARIMA(1,1,0) which I want to follow the format to simulate many ARIMA(1,1,0) with varying sample sizes, phi values and standard deviation values
wn <- rnorm(10, mean = 0, sd = 1)
ar <- wn[1:2]
for (i in 3:10){
ar<- arima.sim(n=10,model=list(ar=-0.7048,order=c(1,1,0)),start.innov=4.1,n.start=1,innov=wn)
}
I have asked a similar question here and given a good answer based on my question, but now I see that arima.sim() function is indispensable in simulating ARIMA time series and therefore want to incorporate it into my style of simulating ARIMA time series.
I come up with this trial that uses arima.sim() function to simulate N=c(15, 20) ARIMA(1,1,0) time series with varying sample sizes, standard deviation values and phi values by first generating N random number and then using the initial two random number to be the first two ARIMA(1,1,0). The 3rd to **n**th are the made to followARIMA(1,1,0)`.
Here is what I have tried bellow:
N <- c(15L, 20L)
SD = c(1, 2) ^ 2
phi = c(0.2, 0.4)
res <- vector('list', length(N))
names(res) <- paste('N', N, sep = '_')
set.seed(123L)
for (i in seq_along(N)){
res[[i]] <- vector('list', length(SD))
names(res[[i]]) <- paste('SD', SD, sep = '_')
ma <- matrix(NA_real_, nrow = N[i], ncol = length(phi))
for (j in seq_along(SD)){
wn <- rnorm(N[i], mean = 0, sd = SD[j])
ar[[1:2, ]] <- wn[[1:2]]
for (k in 3:N[i]){
ar[k, ] <- arima.sim(n=N[[i]],model=list(ar=phi[[k]],order=c(1,1,0)),start.innov=4.1,n.start=1,innov=wn)
}
colnames(ar) <- paste('ar_theta', phi, sep = '_')
res[[i]][[j]] <- ar
}
}
res1 <- lapply(res, function(dat) do.call(cbind, dat))
sapply(names(res1), function(nm) write.csv(res1[[nm]],
file = paste0(nm, ".csv"), row.names = FALSE, quote = FALSE))
The last two lines write the time series data in .csv and save it in my working directory.
Here may be a method using Map. Please edit your post to include expected output if this does not meet your requirements.
N <- c(15L, 20L)
SD <- c(1, 2) ^ 2
phi = c(0.2, 0.4)
## generate all combos
all_combos <- expand.grid(N = N, SD = SD, phi = phi)
## create function
fx_arima <- function(n, SD, phi) {
arima.sim(n = n,
model=list(ar=phi, order = c(1, 1, 0)),
start.innov = 4.1,
n.start = 1,
rand.gen = function(n) rnorm(n, mean = 0, sd = SD))[-1L]
}
## find arima for all combos using Map
set.seed(123L)
res = Map(fx_arima, all_combos[["N"]], all_combos[["SD"]], all_combos[["phi"]])
## or a little bit more work:
set.seed(123L)
res2 = by(all_combos, all_combos["N"],
function(DF) {
res = mapply(fx_arima, DF[["N"]], DF[["SD"]], DF[["phi"]])
colnames(res) = paste("SD", DF[["SD"]], "phi", DF[["phi"]], sep = "_")
res
})
res2
## write to csv
Map(function(file, DF) write.csv(DF, paste0("N_", file, ".csv")), names(res2), res2)

How can I add an event with matrix data in ode solver

I have a differential equation model that is running on a network of interactions. Nodes connect to food and can take food at a rate dependent on the size of the food (see first chunk of code).
changes <- function(t, state_a, parameters){
with(as.list(c(state_a, parameters)),{
r <- rowSums(n_mat * food)
dN <- matrix(r * state_a,3,1)
list(c(dN))
})
}
food <- c(0,0.2,0.5)
n_vec <- c(0,0,1,1,0,0,0,1,0)
n_mat <- matrix(n_vec, 3 ,3)
times <- seq(0, 10, by = 1)
state_a <- runif(3, 0, 1000)
parameters <- c(n_mat, food)
out <- ode (y = state_a,
times = times,
func = changes, parms = parameters)
However, I'd like to be able to change the size of the food over time, whilst the differential equations are runnning. For example, if the food looks like the below code (where each row is a timepoint and each column is a food source). It looks like this is possible with using events in the ode solver, but I can't figure out how to do this when I have a matrix of parameters to change, rather than just a single parameter. Is there a good way to do this?
food <- rep(c(0,0.6,0.1,0.4,0.2,0.1,0.2), 6)
food <- matrix(food[1:30],10,3)
colnames(food) <- 1:3
rownames(food) <- 1:10
Below is a working example of ode events where only a single parameter is being changed:
derivs <- function(t, var, parms) {
list(dvar = rep(0, 2))
}
yini <- c(v1 = 1, v2 = 2)
times <- seq(0, 10, by = 0.1)
eventdat <- data.frame(var = c("v1", "v2", "v2", "v1"),
time = c(1, 1, 5, 9) ,
value = c(1, 2, 3, 4),
method = c("add", "mult", "rep", "add"))
eventdat
out <- vode(func = derivs, y = yini, times = times, parms = NULL,
events = list(data = eventdat))
New, but not working code:
calc_food_mat <- function(t, food_df){
return(food_df[which(food_df$time == floor(t)),2] + ((food_df[which(food_df$time == ceiling(t)),2] - food_df[which(food_df$time == floor(t)),2]) * (t - floor(t))))
}
changes <- function(t, state_a, parameters){
with(as.list(c(t, state_a, parameters)),{
food <- calc_food_mat(t, food_df)
r <- rowSums((n_mat * food)[drop = FALSE])
dN <- r * state_a
list(c(dN))
})
}
seasonl <- 40
foodsize <- 4000
foods <- 3
food_seq <- append(seq(foodsize/5, foodsize, foodsize/5), rev(seq(foodsize/5, foodsize, foodsize/5)))
start <- round(runif(foods, -0.5, seasonl - length(food_seq) + 0.5))
food_mat <- matrix(0, foods, seasonl)
for (i in 1:length(start)){
food_mat[i,(start[i]+1):(start[i]+length(food_seq))] <- food_seq
}
food_mat <- data.frame(food_mat)
colnames(food_mat) <- 1:seasonl
rownames(food_mat) <- 1:foods
food_df <- food_mat %>%
gather (key = time, value = resources)
n_vec <- c(0,0,1,1,0,0,0,1,0)
n_mat <- matrix(n_vec, 3 ,3)
times <- seq(0, 40, by = 1)
state_a <- runif(3, 0, 1000)
parameters <- c(n_mat, food_df)
out <- ode (y = state_a,
times = times,
func = changes, parms = parameters)

Resources