for loop with lists R - r

I want to create two lists of data frames in a for loop, but I cannot use assign:
dat <- data.frame(name = c(rep("a", 10), rep("b", 13)),
x = c(1,3,4,4,5,3,7,6,5,7,8,6,4,3,9,1,2,3,5,4,6,3,1),
y = c(1.1,3.2,4.3,4.1,5.5,3.7,7.2,6.2,5.9,7.3,8.6,6.3,4.2,3.6,9.7,1.1,2.3,3.2,5.7,4.8,6.5,3.3,1.2))
a <- dat[dat$name == "a",]
b <- dat[dat$name == "b",]
samp <- vector(mode = "list", length = 100)
h <- list(a,b)
hname <- c("a", "b")
for (j in 1:length(h)) {
for (i in 1:100) {
samp[[i]] <- sample(1:nrow(h[[j]]), nrow(h[[j]])*0.5)
assign(paste("samp", hname[j], sep="_"), samp[[i]])
}
}
Instead of lists named samp_a and samp_b I get vectors which contain the result of the 100th sample. I want to get a list samp_a and samp_b, which have all the different samples for dat[dat$name == a,] and dat[dat$name == a,].
How could I do this?

How about creating two different lists and avoiding using assign:
Option 1:
# create empty list
samp_a <-list()
samp_b <- list()
for (j in seq(h)) {
# fill samp_a list
if(j == 1){
for (i in 1:100) {
samp_a[[i]] <- sample(1:nrow(h[[j]]), nrow(h[[j]])*0.5)
}
# fill samp_b list
} else if(j == 2){
for (i in 1:100) {
samp_b[[i]] <- sample(1:nrow(h[[j]]), nrow(h[[j]])*0.5)
}
}
}
You could use assign too, shorter answer:
Option 2:
for (j in seq(hname)) {
l = list()
for (i in 1:100) {
l[[i]] <- sample(1:nrow(h[[j]]), nrow(h[[j]])*0.5)
}
assign(paste0('samp_', hname[j]), l)
rm(l)
}

You could easily use an lapply for this using the rep function. Unless you want a random x, paired with a random y. This will maintain the existing paired order.
dat <- data.frame(name = c(rep("a", 10), rep("b", 13)),
x = c(1,3,4,4,5,3,7,6,5,7,8,6,4,3,9,1,2,3,5,4,6,3,1),
y = c(1.1,3.2,4.3,4.1,5.5,3.7,7.2,6.2,5.9,7.3,8.6,6.3,4.2,3.6,9.7,1.1,2.3,3.2,5.7,4.8,6.5,3.3,1.2))
a <- dat[dat$name == "a",]
b <- dat[dat$name == "b",]
h <- list(a,b)
hname <- c("a", "b")
testfunc <- function(df) {
#df[sample(nrow(df), nrow(df)*0.5), ] #gives you the values in your data frame
sample(nrow(df), nrow(df)*0.5) # just gives you the indices
}
lapply(h, testfunc) # This gives you the standard lapply format, and only gives one a, and one b
samp <- lapply(rep(h, 100), testfunc) # This shows you how to replicate the function n times, giving you 100 a and 100 b data.frames in a list
samp_a <- samp[c(TRUE, FALSE)] # Applies a repeating T/F vector, selecting the odd data.frames, which in this case are the `a` frames.
samp_b <- samp[c(FALSE, TRUE)] # And here, the even data.frames, which are the `b` frames.

Related

How to make a double loop for glm, using two data frames (one for dependent and another for independent variables)?

I feel that I am just little bit off with my code but cannot figure out how to make it work. I am trying to use all the columns in one data frame as an independent variable and all the columns in another as dependent (to run multiple single variable models) I'll greatly appreciate any suggestions.
A <- as.data.frame( matrix(rnorm(1:(250*4)), ncol = 4) )
colnames(a) <- paste0("A", 1:ncol(a))
B <- as.data.frame( matrix(rnorm(1:(250*6)), ncol = 6) )
model_<-list()
results_<-list()
for (i in 1:ncol(A)){
for (j in 1:ncol(B)){
model_<-glm(A[,i]~B[,j], family=quasipoisson){
results_<-lapply(model_, function(x) anova(x, test="F"))
}
}
}
You can initialise the list with fixed length. Keep track of an index to store data in a list.
A <- as.data.frame( matrix(abs(rnorm(1:(250*4))), ncol = 4) )
colnames(A) <- paste0("A", 1:ncol(A))
B <- as.data.frame( matrix(abs(rnorm(1:(250*6))), ncol = 6) )
model_<- vector('list', ncol(A) * ncol(B))
results_<- vector('list', ncol(A) * ncol(B))
k <- 1
for (i in 1:ncol(A)){
for (j in 1:ncol(B)){
model_[[k]] <-glm(A[,i]~B[,j], family=quasipoisson)
results_[[k]] <-anova(model_[[k]], test="F")
k <- k + 1
}
}

R - lapply on deep nested elements of list

I have a large list that stored measurements (a product of other lapply() runs). I now want to gather these measurements and calculate median/mean/sd etc but I don't know how to access them. The structure of this list is like this:
foo[[i]][[j]][[k]][[1]]
foo[[i]][[j]][[k]][[2]]$bar
I can't figure out a function that would return e.g. mean of $bar (but not of $x) and keep relation the values of the indices i,j,k.
A sample list can be generated with the following R code:
library(purrr)
metrics <- function(y){
tt10r <- median(y)
list(y, flatten(list(bar = tt10r)))
}
example_list <- list()
for (i in 1:10)
{
v <- list()
for (j in 1:10)
{
w <- 1:10
v[j] <- list(w)
}
example_list[[i]] <- v
}
foo <- list()
for (i in 1:length(example_list))
{
u <- list()
values <- list()
for (j in 1:length(example_list[[i]]))
{
u[[j]] <- lapply(example_list[[i]][[j]], function(x) mean(x))
values[[j]] <- lapply(u[[j]], function(x) metrics(x))
}
foo[[i]] <- values
}
The following code works nicely, but I am not sure if it is efficient (loops!). Gives the anticipated result:
final <- matrix(nrow = tail(cumsum(unlist(lapply(foo, function(x) lengths(x) -2))), n=1), ncol = 3)
final <- data.frame(final)
j=1
i=1
all_js <- c(0, cumsum(lengths(foo)))
starts <- c(0, cumsum(unlist(lapply(foo, function(x) lengths(x) -2)))) + 1
ends <- c(0, cumsum(unlist(lapply(foo, function(x) lengths(x) -2))))
for (i in 1:length(foo))
{
a <- foo[[i]]
for (j in 1:length(a))
{
b <- a[[j]]
data <- unlist(lapply(lapply(b[1], '[', 2), '[[', 1))
for (k in 2:c(length(b)-2))
{
data <- rbind(data,unlist(lapply(lapply(b[k], '[', 2), '[[', 1)))
}
row.names(data) <- NULL
colnames(final) <- c("i", "j", colnames(data))
first <- starts[all_js[i] + j]
last <- ends[all_js[i] + j+1]
final[first:last,] <- data.frame(cbind(i = i, j = j, data))
}
}

Loop for value matching won't work across data frames for multiple instances

Can anyone tell me what’s preventing this loop from running?
For each row i, in column 3 of the data frame ‘depth.df’, the loop preforms a mathematical function, using a second data frame, 'linker.df' (it multiplies i by a constant / a value from linker.df which is found by matching the value of i.
If I run the loop for a single instance of i, (lets say its = 50) it runs fine:
cor.depth <- function(depth.df){
result <- seq(from=1, to=(nrow(depth.df)))
x <- 8971
for(i in 1:nrow(depth.df)){
result[i] <- depth.df[i,3]*(x /( linker.df [i,2][ linker.df [i,1] == 50]))
return(result)
}
}
>97,331
but if I run it to loop over each instance of i, it always returns an error:
cor.depth <- function(depth.df){
result <- seq(from=1, to=(nrow(depth.df)))
x <- 8971
for(i in 1:nrow(depth.df)){
result[i] <- depth.df[i,3]*(x /( linker.df [i,2][ linker.df [i,1] %in% depth.df[i,3]]))
return(result)
}
}
Error in result[i] <- depth.df[i, 3] * (all_SC_bins/(depth.ea.bin.all[, :
replacement has length zero
EDIT
Here is a reproducible data set provided to illustrate data structure and issue
#make some data as an example
#make some data as an example
linker.data <- sample(x=40:50, replace = FALSE)
linker.df <- data.frame(
X = linker.data
, Y = sample(x=2000:3000, size = 11, replace = TRUE)
)
depth.df <- data.frame(
X = sample(x=9000:9999, size = 300, replace = TRUE)
, Y = sample(x=c("A","G","T","C"), size = 300, replace = TRUE)
, Z = sample(linker.data, size = 300, replace = TRUE)
)
cor.depth <- function(depth.df){
result <- seq(from=1, to=(nrow(depth.df)))
x <- 8971
for(i in 1:nrow(depth.df)){
result[i] <- depth.df[i,3]*(x /( linker.df [i,2][ linker.df [i,1] %in% depth.df[i,3]]))
return(result)
}
}
Error emerges because denominator returns integer(0) or numeric(0) or a FALSE result on most rows. Your loop attempts to find exact row number, i, where both dataframes' respective X and Z match. Likely, you intended where any of the rows match which would entail using a second, nested loop with an if conditional on matches.
cor.depth <- function(depth.df){
result <- seq(from=1, to=(nrow(depth.df)))
x <- 8971
for(i in 1:nrow(depth.df)){
for (j in 1:nrow(linker.df)){
if (linker.df[j,1] == depth.df[i,3]) {
result[i] <- depth.df[i,3]*(x /( linker.df[j,2]))
}
}
}
return(result)
}
Nonetheless, consider merge a more efficient, vectorized approach which matches any rows between both sets on ids. The setNames below renames columns to avoid duplicate headers:
mdf <- merge(setNames(linker.df, paste0(names(linker.df), "_l")),
setNames(depth.df, paste0(names(depth.df), "_d")),
by.x="X_l", by.y="Z_d")
mdf$result <- mdf$X_l * (8971 / mdf$Y_l)
And as comparison, the two approaches would be equivalent:
depth.df$result <- cor.depth(depth.df)
depth.df <- with(depth.df, depth.df[order(Z),]) # ORDER BY Z
mdf <- with(mdf, mdf[order(X_l),]) # ORDER BY X_L
all.equal(depth.df$result, mdf$result)
# [1] TRUE

Extract data by for loop

I have a large number of matrix, calls train$X which is a binary data with 1 and 0
I want to extract and make another two lists which contains 1 as list1 and 0 as list2 by using for loop
my R code is not working
X <- c(0,1,0,1,0,1)
Y <- c(1,1,1,1,1,0)
train<- as.matrix (cbind(X,Y))
list1 <- list()
list2 <- list()
for(i in 1:length(train)) {
if(train[i]== 1)
list1 = train[i]
else
list2 = train[i]
}
Therefore
list1 contains (1,1,1,1,1,1,1)
list2 contains (0,0,0,0)
I don't think the best way is a loop, it isn't a list that you want but a vector object, I propose to use == on a matrix as following :
X <- c(0,1,0,1,0,1)
Y <- c(1,1,1,1,1,0)
train <- cbind(X,Y)
v1 <- train[train == 1]
v2 <- train[train == 0]
If you really want a loop :
v1 <- c() # It is not necessary
v2 <- c() # It is not necessary
for(i in 1:nrow(train)){
for(j in 1:ncol(train)){
if(train[i, j] == 1){
v1 <- c(v1, train[i, j])
}else{
v2 <- c(v2, train[i, j])
}
}
}
A last solution but not least :
v1 <- rep(1, sum(train == 1))
v2 <- rep(0, sum(train == 0))
So their is a lot of differents solutions to do it.

apply function in R

I want to apply a user defined function over a matrix object.
I don't have desired results my input is a 4x4 matrix and I want to get as output a 4x4 matrix with the transformation defined in mapfun function
Where is my error?
Thanks in advance
mapfun <- function(val){
if (val == 1){
res <- "A" }
else{
if (val == 2){
res <- "B"
} else
{
if (val == 3){
res <- "C"
} else
{
res <- "D"
}
}
}
return(res)
}
mat1 <- matrix(sample(c(1,2,3,4), 16, replace=T, prob=c(0.25,0.25,0.25,0.25)),
nrow=4, ncol=4)
mat2 <- apply(mat1, 1, FUN=mapfun)`
We can just the numeric values in 'mat1' as index to replace the corresponding 'LETTERS'. The output will be a vector which can be converted back to a matrix by assigning dim.
`dim<-`(LETTERS[mat1], dim(mat1))
Regarding the Warning message in the mapfun, it would be better to use ifelse instead of if/else as we are dealing with a vector of length greater than 1 in each row of 'mat1'.
mapfun <- function(val){
ifelse(val == 1, 'A',
ifelse(val==2, 'B',
ifelse(val==3, 'C', 'D')))
}
apply(mat1, 1, mapfun)
This could probably work:
mat1[] <- mapply(mapfun,mat1)
Note that this will modify mat1, so you could make a copy of mat1 named mat2 and apply the function to mat2:
mat2<- mat1
mat2[] <- mapply(mapfun,mat1)
You could try this (fun) solution as well. First you coerce input value to a character vector and then use repeatedly replace function and return the result as a matrix again. Maybe there is a way to trick replace function so that it would be used only once.
mapfun <- function(val) {
res <- as.character(val)
res <- replace(x = res, list = which(res == "1"), values = "A")
res <- replace(x = res, list = which(res == "2"), values = "B")
res <- replace(x = res, list = which(res == "3"), values = "C")
res <- replace(x = res, list = which(res == "4"), values = "D")
return(matrix(res, ncol = 4)) # ncol(val)
}
mat1 <- matrix(sample(c(1,2,3,4), 16, replace=T, prob=c(0.25,0.25,0.25,0.25)),
nrow=4, ncol=4)
mat1
mapfun(mat1)

Resources