For in function in R for different groups of rows - r

I have the following R objects:
y <- sample(c(0,2,2),1000,replace=T)
X <- matrix(runif(2000,0,2),ncol=2,byrow=T)*2
XX = t(X)%*%X
XY = as.numeric(t(X)%*%y)
YY = as.numeric(t(y)%*%y)
How can I run it to get several XX, XY and YY objects calculated with the first 10 rows, others with the rows 11 to 20, etc...?? Any ideas with a for in loop?
Thank you!

We can create lists to store different outputs. Create a sequence with a step of 10 and calculate the result in for loop.
len <- length(y)/10
XX_list <- vector('list', len)
XY_list <- vector('list', len)
YY_list <- vector('list', len)
vals <- seq(1, length(y), 10)
for(i in seq_along(vals)) {
inds <- vals[i]:(vals[i] + 9)
XX_list[[i]] <- t(X[inds, ]) %*% X[inds, ]
XY_list[[i]] = as.numeric(t(X[inds, ])%*% y[inds])
YY_list[[i]] = as.numeric(t(y[inds])%*% y[inds])
}

Related

How to make a double loop for glm, using two data frames (one for dependent and another for independent variables)?

I feel that I am just little bit off with my code but cannot figure out how to make it work. I am trying to use all the columns in one data frame as an independent variable and all the columns in another as dependent (to run multiple single variable models) I'll greatly appreciate any suggestions.
A <- as.data.frame( matrix(rnorm(1:(250*4)), ncol = 4) )
colnames(a) <- paste0("A", 1:ncol(a))
B <- as.data.frame( matrix(rnorm(1:(250*6)), ncol = 6) )
model_<-list()
results_<-list()
for (i in 1:ncol(A)){
for (j in 1:ncol(B)){
model_<-glm(A[,i]~B[,j], family=quasipoisson){
results_<-lapply(model_, function(x) anova(x, test="F"))
}
}
}
You can initialise the list with fixed length. Keep track of an index to store data in a list.
A <- as.data.frame( matrix(abs(rnorm(1:(250*4))), ncol = 4) )
colnames(A) <- paste0("A", 1:ncol(A))
B <- as.data.frame( matrix(abs(rnorm(1:(250*6))), ncol = 6) )
model_<- vector('list', ncol(A) * ncol(B))
results_<- vector('list', ncol(A) * ncol(B))
k <- 1
for (i in 1:ncol(A)){
for (j in 1:ncol(B)){
model_[[k]] <-glm(A[,i]~B[,j], family=quasipoisson)
results_[[k]] <-anova(model_[[k]], test="F")
k <- k + 1
}
}

Optimizing an R code with For Loop with matrix

I need to convert the following code to one with for loop, what is the easiest way to do it?
set.seed(123)
iter <- 1000
s1 <- 2
mat1 <- matrix(data = rcauchy(iter*s1,0,1),nrow = iter,ncol = s1)
sets1 <- apply(mat1,1,median)
hist(sets1)
s2 <- 5
mat2 <- matrix(data = rcauchy(iter*s2,0,1),nrow = iter,ncol = s2)
sets2 <- apply(mat2,1,median)
hist(sets2)
s3 <- 10
mat3 <- matrix(data = rcauchy(iter*s3,0,1),nrow = iter,ncol = s3)
sets3 <- apply(mat3,1,median)
hist(sets3)
s4 <-20
mat4 <- matrix(data = rcauchy(iter*s4,0,1),nrow = iter,ncol = s4)
sets4 <- apply(mat4,1,median)
hist(sets4)
I tried the following:
set.seed(1234)
iter <- 1000
size <- c(2,5,10,20)
for(i in 2:size){
for (j in 1:iter){
mat[] <- matrix(data = rcauchy(i*j,0,1),nrow=iter,ncol=i)
s <- apply(mat,1,median)
hist(s)
}
}
But it does not work, please help
The easies way is to wrap the creation of the matrix into a lapply function.
set.seed(123)
iter <- 1000
size <- c(2,5,10,20)
returnmatrix<-lapply(size, function(i){
mat<-matrix(data = rcauchy(i*iter,0,1),nrow=iter,ncol=i)
s <- apply(mat,1,median)
hist(s, main=paste("Histogram when S=", i))
mat
})
The lapply function will plot the histograms and will return the matrixes as list if additional processing is desired.

how can i get list of all partition with K dimension R

i'm using this code:
library("partitions")
x <- c(2,4,6)
parts <- listParts(length(x))
out <- rapply(parts, function(ii) x[ii], how="replace")
to calculate list vector of all partition, but i would be like list of partition with k dimension, for example:
k=2
{(2),(4,6)}{(4),(2,6)}{(6),(2,4)}
Maybe there are better ways of doing this but the following does what you want.
library(partitions)
funParts <- function(x, k){
parts <- listParts(length(x))
res <- lapply(parts, function(inx) sapply(inx, function(i) x[i]))
res <- unlist(res, recursive = FALSE)
res <- res[sapply(res, length) <= k]
unique(res)
}
x <- c(2,4,6)
k <- 2
funParts(x, 2)
funParts(x, 1)
funParts(4:10, 3)

For loops for nested variables within function in R

I would like to iterate through vectors of values and calculate something for every value while being within a function environment in R. For example:
# I have costs for 3 companies
c <- c(10, 20, 30)
# I have the same revenue across all 3
r <- 100
# I want to obtain the profits for all 3 within one variable
result <- list()
# I could do this in a for loop
for(i in 1:3){
result[i] <- r - c[i]
}
Now lets assume I have a model that is very long and I define everything as a function which is to be solved with various random draws for the costs.
# Random draws
n <- 1000
r <- rnorm(n, mean = 100, sd = 10)
c1 <- rnorm(n, mean = 10, sd = 1)
c2 <- rnorm(n, mean = 20, sd = 2)
c3 <- rnorm(n, mean = 30, sd = 3)
X <- data.frame(r, c1, c2, c3)
fun <- function(x){
r <- x[1]
c <- c(x[2], x[3], x[4])
for(i in 1:3){
result[i] <- r - c[i]
}
return(result)
}
I could then evaluate the result for all draws by iterating through the rows of randomly sampled input data.
for(j in 1:n){
x <- X[j,]
y <- fun(x)
}
In this example, the output variable y would entail the nested result variable which comprises of the results for all 3 companies. However, my line of thinking results in an error and I think it has to do with the fact that I try to return a nested variable? Hence my question how you guys would approach something like this.
I would suggest rethinking your coding approach. This is a very un-R-like way of doing things.
For example, the first for loop can be written much more succinctly as
x <- c(10, 20, 30)
r <- 100
result <- lapply(-x, `+`, r)
Then fun becomes something like
fun <- function(x) lapply(-x[-1], `+`, x[1])
To then operate over the rows of a data.frame (which is what you seem to do in the last step), you can use something like
apply(X, 1, fun)
where the MARGIN = 1 argument in apply ensures that you are applying a function per row (as opposed to per column).
Here's an approach using your function and a for loop:
# Random draws
n <- 1000
r <- rnorm(n, mean = 100, sd = 10)
c1 <- rnorm(n, mean = 10, sd = 1)
c2 <- rnorm(n, mean = 20, sd = 2)
c3 <- rnorm(n, mean = 30, sd = 3)
X <- data.frame(r, c1, c2, c3)
result <- list()
fun <- function(x){
r <- x[[1]]
c <- c(x[[2]], x[[3]], x[[4]])
for(i in 1:3){
result[i] <- r - c[i]
}
return(result)
}
# Create a list to store results
profits <- rep(rep(list(1:3)),nrow(X))
# Loop throuhg each row of dataframe and store in profits.
for(i in 1:nrow(X)){
profits_temp <-
fun(list(X[i,"r"],X[i,"c1"],X[i,"c2"],X[i,"c3"]))
for(j in 1:3)
profits[[i]][[j]] <- profits_temp[[j]]
}
# Eye results
profits[[1]]
#> [1] 93.23594 81.25731 70.27699
profits[[2]]
#> [1] 80.50516 69.27517 63.36439

Conditional replacement of values in an array

I want to modify an array but with an element-by-element condition. This is what I want to do
vector <- runif(18, 0,1)
xx <- array(vector, dim=c(2,3,3))
for (i in 1:2) {
for (j in 1:3) {
xx[i,j,1] <- ifelse(xx[i,j,1]<0.5,1,xx[i,j,1])
xx[i,j,2] <- ifelse(xx[i,j,2]<0.4,1.5,xx[i,j,2])
xx[i,j,3] <- ifelse(xx[i,j,3]<0.2,2,xx[i,j,3])
}
}
Is there a more efficient way to do it?
Thanks
Not sure what you mean by efficient but this avoids looping:
vector <- runif(18, 0,1)
xx <- array(vector, dim=c(2,3,3))
xx
xx[,,1][xx[,,1]<.5] <- 1
xx[,,2][xx[,,2]<.4] <- 1.5
xx[,,3][xx[,,3]<.2] <- 2
Try it online!
There are two ways that you could simplify this double loop
Option 1:
vector <- runif(18, 0,1)
xx <- array(vector, dim=c(2,3,3))
xx[,,1][xx[,,1]<.5] = 1
xx[,,2][xx[,,2]<.4] = 1.5
xx[,,3][xx[,,3]<.2] = 2
You still have to write one line for each condition, though.
The second way is to use lapply, but in this case you have to create three vectors: index, threshhold, substitution
idx = 1:3
thr = c(.5, .4, .2)
sb = c(1, 1.5, 2)
lapply(idx, function(k){
xx[,,k][ xx[,,k]< thr[x] ] <<- sb[k]
})

Resources