R Loop over triple nested list - r

I have got a triple nested list in R. The structure is like my testlist down below:
df_yes = data.frame(replicate(2,sample(0:130,30,rep=TRUE)))
df_no = data.frame(replicate(2,sample(0:130,20,rep=TRUE)))
s1 = list(df_yes, df_no)
names(s1) = c("yes", "no")
df_yes = data.frame(replicate(2,sample(0:130,25,rep=TRUE)))
df_no = data.frame(replicate(2,sample(0:130,15,rep=TRUE)))
s2 = list(df_yes, df_no)
names(s2) = c("yes", "no")
DJF = list(s1, s2)
names(DJF) = c("s1", "s2")
df_yes = data.frame(replicate(2,sample(0:130,60,rep=TRUE)))
df_no = data.frame(replicate(2,sample(0:130,10,rep=TRUE)))
s1 = list(df_yes, df_no)
names(s1) = c("yes", "no")
df_yes = data.frame(replicate(2,sample(0:530,25,rep=TRUE)))
df_no = data.frame(replicate(2,sample(0:230,15,rep=TRUE)))
s2 = list(df_yes, df_no)
names(s2) = c("yes", "no")
JJA = list(s1, s2)
names(JJA) = c("s1", "s2")
total_list = list(DJF, JJA)
names(total_list) = c("DJF", "JJA")
I want to add now $x3 and $x4 in the yes and no dataframes. The content should be the first number of x1 for x3) and the first number of x2 for x4.
I know how to do that with a single dataframe or a simple nested list:
df1 = total_list$DJF$s1$yes
df1$x3 = substr(df1$X1, 1,1)
df1$x4 = substr(df1$X2, 1,1)
Or with a loop in a normal list:
for(i in 1:length(df)){
df[[i]]$v3 = substr(df[[i]][,1], 1,1)}
But how can I access a triple nested list with a loop? Do I have to make a double loop with 2 variables, like [[i]][[k]][[1]]?

this will not the most suitable solution,hope this will work
N <- names(total_list)
for (i in 1:length(N)) {
name1 <- N[i]
product1 = total_list[[name1]]
K <- names(product1)
for (n in 1:length(K)) {
name2 <- K[n]
product2 = product1[[n]]
dfnames = names(product2)
for (l in dfnames) {
df_t = product2[[l]]
df_t$x3 = substr(df_t$X1, 1,1)
df_t$x4 = substr(df_t$X2, 1,1)
df_t$x3 <- as.numeric(df_t$x3)
df_t$x4 <- as.numeric(df_t$x4)
total_list[[name1]][[name2]][[l]] <- df_t
}
}
}

Related

Demography package issue with aggregating data

# Function to construct a mortality demogdata object from HMD
hmd.mx <- function(country, username, password, label=country){
path <- paste("https://www.mortality.org/hmd/", country, "/STATS/", "Mx_1x1.txt", sep = "")
}
userpwd <- paste(username, ":", password, sep = "")
txt <- RCurl::getURL(path, userpwd = userpwd)
con <- textConnection(txt)
mx <- try(utils::read.table(con, skip = 2, header = TRUE, na.strings = "."),TRUE)
close(con)
if(class(mx)=="try-error")
stop("Connection error at www.mortality.org. Please check username, password and country label.")
path <- paste("https://www.mortality.org/hmd/", country, "/STATS/", "Exposures_1x1.txt", sep = "")
userpwd <- paste(username, ":", password, sep = "")
txt <- RCurl::getURL(path, userpwd = userpwd)
con <- textConnection(txt)
pop <- try(utils::read.table(con, skip = 2, header = TRUE, na.strings = "."),TRUE)
close(con)
if(class(pop)=="try-error")
stop("Exposures file not found at www.mortality.org")
obj <- list(type="mortality",label=label,lambda=0)
obj$year <- sort(unique(mx[, 1]))
#obj$year <- ts(obj$year, start=min(obj$year))
n <- length(obj$year)
m <- length(unique(mx[, 2]))
obj$age <- mx[1:m, 2]
obj$rate <- obj$pop <- list()
for (i in 1:n.mort)
{ obj$rate[[i]] <- matrix(mx[, i + 2], nrow = m, ncol = n)
obj$rate[[i]][obj$rate[[i]] < 0] <- NA
obj$pop[[i]] <- matrix(pop[, i + 2], nrow = m, ncol = n)
obj$pop[[i]][obj$pop[[i]] < 0] <- NA
dimnames(obj$rate[[i]]) <- dimnames(obj$pop[[i]]) <- list(obj$age, obj$year)
}
names(obj$pop) = names(obj$rate) <- tolower(mnames)
obj$age <- as.numeric(as.character(obj$age))
if (is.na(obj$age[m])) {
obj$age[m] <- 2 * obj$age[m - 1] - obj$age[m - 2] }
return(structure(obj, class = "demogdata"))
}
Above is the code that we are using to import our population data into r.
NLdata <- hmd.mx(country = "NLD",username = "username",password="password")
This would be the specific code to obtain the Dutch data.
Would anyone happen to know how to add multiple countries into one, and put that data into one dataframe (same format as the demography data packages that we download)? So for example the mortality rates for the (Netherlands + France + Norway) / 3 into one package.
You can try this code. However I could not run your demography package. So you might need to edit the code a bit. Perhaps someone else can fill in the second part? I saw that no one has reacted yet.
C1 <- data.frame(Year = 1980:2018, value1 = rnorm(39), value2 = rnorm(39), Cat =rbinom(39,1,0.5), Country = "France")
C2 <- data.frame(Year = 1980:2018, value1 = rnorm(39), value2 = rnorm(39), Cat =rbinom(39,1,0.5),Country = "England")
C3 <- data.frame(Year = 1970:2018, value1 = rnorm(49), value2 = rnorm(49), Cat =rbinom(49,1,0.5),Country = "Netherlands")
C1 <- split(C1, C1$Cat)
C2 <- split(C2, C2$Cat)
C3 <- split(C3, C3$Cat)
list_all <- list(rbind(C1[[1]],C2[[1]],C3[[1]]),rbind(C1[[2]],C2[[2]],C3[[2]]))
Final_list <- lapply(list_all, function(x) x %>% group_by(Year) %>% summarise(Val1 = mean(value1), Val2 = mean(value2), Country = "All") %>% as.data.frame)

Loop or batch process through a list of CSV files in R

I would like to loop through a list of CSV files:
Macro <- read.csv("P:/R/R_Input/JWN_Input.csv")
Macro <- read.csv("P:/R/R_Input/BBY_Input.csv")
...
That also output to a respective CSV file:
write.csv(a, "P:/Model_Output/JWN.csv", row.names = F, na="")
write.csv(a, "P:/Model_Output/BBY.csv", row.names = F, na="")
...
The above two items are the only unique input/out. The body of the code is below. I am attempting to essentially batch process the input/output CSV files using the body of code below.
Macro <- read.csv("P:/R/R_Input/JWN_Input.csv")
# train set up
ctrl <- caret::trainControl(method = "timeslice", initialWindow = 8, horizon = 1,
fixedWindow = FALSE, savePredictions = TRUE)
# Loads all variable names from Macro and Macro2
vars_macro = names(Macro)[!names(Macro) %in% c("qtrs", "y", "s1", "s2", "s3")]
vars_macro2 = names(Macro2)[!names(Macro2) %in% c("y", "s1", "s2", "s3")]
vars_macro3 = names(Macro3)[!names(Macro3) %in% c("y", "s1", "s2", "s3")]
vars = c(vars_macro, vars_macro2, vars_macro3)
# run lm
lst = foreach(var = vars) %dopar% {
if (var %in% vars_macro)
foo <- function(start, mod_formula) {
myfit <- caret::train(mod_formula, data = Macro[start:14, ,drop = FALSE],
method = "lm", trControl = ctrl)
c(myfit$pred) ## return; drop dimension as a vector
}
if (var %in% vars_macro2)
foo <- function(start, mod_formula) {
myfit <- caret::train(mod_formula, data = Macro2[start:14, ,drop = FALSE],
method = "lm", trControl = ctrl)
c(myfit$pred) ## return; drop dimension as a vector
}
if (var %in% vars_macro3)
foo <- function(start, mod_formula) {
myfit <- caret::train(mod_formula, data = Macro3[start:14, ,drop = FALSE],
method = "lm", trControl = ctrl)
c(myfit$pred) ## return; drop dimension as a vector
}
f = formula(paste0("y ~ ", var, "+ s1 + s2 + s3"))
Forecast <- sapply(1:6, foo, mod_formula = f)
F9 <- c(Forecast[[1,1]][1])
F10 <- c(Forecast[[1,1]][2], Forecast[[1,2]][1])
F11 <- c(Forecast[[1,1]][3], Forecast[[1,2]][2], Forecast[[1,3]][1])
F12 <- c(Forecast[[1,1]][4], Forecast[[1,2]][3], Forecast[[1,3]][2],
Forecast[[1,4]][1])
F13 <- c(Forecast[[1,1]][5], Forecast[[1,2]][4], Forecast[[1,3]][3],
Forecast[[1,4]][2], Forecast[[1,5]][1])
F14 <- c(Forecast[[1,1]][6], Forecast[[1,2]][5], Forecast[[1,3]][4],
Forecast[[1,4]][3], Forecast[[1,5]][2], Forecast[[1,6]][1])
A <-c((mean(F9)/Macro[9:9,2:2]-1), (mean(F10)/Macro[10:10,2:2]-1),
(mean(F11)/Macro[11:11,2:2]-1), (mean(F12)/Macro[12:12,2:2]-1),
(mean(F13)/Macro[13:13,2:2]-1),(mean(F14)/Macro[14:14,2:2]-1))
Temp <- mean(abs(A[0:5]))
P <-c((mean(F9)/Macro[9:9,2:2]-1), (mean(F10)/Macro[10:10,2:2]-1),
(mean(F11)/Macro[11:11,2:2]-1), (mean(F12)/Macro[12:12,2:2]-1),
(mean(F13)/Macro[13:13,2:2]-1),(mean(F14)/Macro[14:14,2:2]-1),
Temp,(mean(F14)/(1+mean(A[3:5])))/Macro[14:14,2:2]-1)
#E <- scales::percent(P)
C <- c(mean(F9),mean(F10),mean(F11), mean(F12), mean(F13), mean(F14),
"abs error",mean(F14)/(1+mean(P[3:5])))
data.frame(C, P)
}
# Summary
model_error = as.character(sapply(lst, function(elt) elt$P[7]))
forecasts = as.numeric(as.character(sapply(lst, function(elt) elt$C[8])))
delta = as.character(sapply(lst, function(elt) elt$P[8]))
df = data.frame(Card = vars, Model_Avg_Error = model_error,
Forecast = forecasts, Delta = delta)
df$blankVar = NA
df_macro1 = df[df$Card %in% vars_macro,]
df_macro1$blankVar = NA
df_macro2 = df[df$Card %in% vars_macro2,]
df_macro2 = df_macro2[order(df_macro2$Model_Avg_Error),]
df_macro2$blankVar = NA
df_macro3 = df[df$Card %in% vars_macro3,]
df_macro3 = df_macro3[order(df_macro3$Model_Avg_Error),]
df_macro3$blankVar = NA
df_macro4 = df[df$Card %in% names(Macro4),]
df_macro4 = df_macro4[order(df_macro4$Model_Avg_Error),]
df = df[order(df$Model_Avg_Error),]
a = cbind.fill(df_macro1, df_macro2, df_macro3, df, df_macro4)
# save
write.csv(a, "P:/Model_Output/JWN.csv", row.names = F, na="")
Just create a list of the filenames and loop through them like this:
Files = c("JWN", "BBY")
for(f in Files) {
InFile = paste("P:/R/R_Input/", f, "_Input.csv", sep="")
OutFile = paste("P:/R/R_Input/", f, "_Output.csv", sep="")
Macro <- read.csv(InFile)
## All of that other code
write.csv(a, OutFile, row.names = F, na="")
}
Addendum based on comments:
Original Poster got an error:
Error in file(con, "w") : all connections are in use"
This was addressed by adding closeAllConnections() immediately after the write.csv statement.

R: Get index names while looping through df elements

Say, I have a data frame and I need to do something with its cells and remember what cells I have changed. One way is to loop through indices with two for-loops. But is there a way to do this with one loop?
Perfectly I need something like this:
changes = data.frame(Row = character(), Col = character())
for (cell in df){
if (!(is.na(df))){
cell = do.smt(cell)
temp = list(Row = get.row(cell), Col = get.col(cell))
changes = rbind(changes,temp)
}
}
Example of what I need:
df = data.frame(A = c(1,2,3), B = c(4,5,6), C = c(7,8,9))
rownames(df) = c('a','b','c')
changes = data.frame(Row = NA, Col = NA)
for (i in rownames(df)){
for (j in colnames(df)) {
if (df[i,j] > 5) {
df[i,j] = 0
temp = list(Row = i, Col = j)
changes = rbind(changes, temp)
}
}
}
This gets rid of both loops
df = data.frame(A = c(1,2,3), B = c(4,5,6), C = c(7,8,9))
rownames(df) = c('a','b','c')
changes <- which(df > 5, arr.ind=TRUE)
df[changes] <- 0
If you want the format exactly as specified you can sort that out with
changes <- data.frame(changes,row.names=NULL)
changes$row <- rownames(df)[changes$row]
changes$col <- colnames(df)[changes$col]
and its a simple matter of sorting if you're concerned that the order of the rows matches your example output

Apply a function based on column name in data.tables R

I'm looking to apply a user define function based on the name given to a column
dt <- data.table(gr_id = 1, id = seq(1,10),min_c = runif(10,10,30),
ml_c = runif(10,30,50),mx_c = runif(10,50,100),
min_t = runif(10,10,20),ml_t = runif(10,20,25),
mx_t = runif(10,25,30))
I would like to apply a function which calculates (min(min)+min(ml))/mx for both "c" columns and "t" columns. Currently, I've done as follows. However, becomes hard when I want to add more columns (lets say, "a")
dt[,{
temp1 = min(min_c)
temp2 = min(ml_c)
temp3 = min(mx_c)
score_c = (temp1+temp2)/temp3
temp4 = min(min_t)
temp5 = min(ml_t)
temp6 = min(mx_t)
score_t = (temp4+temp5)/temp6
list(score_c = score_c,
score_t = score_t)
},by = gr_id
]
I think this will work. the basic idea is using get.
# the original code could be simplified to:
dt[, .(
score_c = (min(min_c) + min(ml_c)) / min(mx_c),
score_t = (min(min_t) + min(ml_t)) / min(mx_t)
), by = gr_id]
#
# gr_id score_c score_t
# 1: 1 0.9051556 1.28054
# using `get`
cols <- c('c', 't')
dt[, {
res <- lapply(cols, function(i){
vars <- paste(c('min', 'ml', 'mx'), i, sep = '_')
(min(get(vars[1])) + min(get(vars[2]))) / min(get(vars[3]))
})
names(res) <- paste('score', cols, sep = '_')
res
}, by = gr_id]
# gr_id score_c score_t
# 1: 1 0.9051556 1.28054

R function to combine lists but prioritize the values in one of them

I'm trying to make a function to combine multiple lists, usually between 2 and 4, that will weed out duplicates and hopefully (if possible) prioritize the values of one of the lists. Is this possible? It's better explained with code:
PassOpts <- function(in1 = list(), in2 = list(), in3 = list(), in4 = list(){
c(in1, in2, in3, in4)
}
opts1 <- list(a = 1, b = 2, c = 4)
opts2 <- list(a = 1, b = 2, c = 4)
opts3 <- list(a = 5, b = 10)
combinedOpts <- PassOpts(opts1, opts2, opts3)
Ideally what I want is for it to be possible to 'prioritize' the list that is the most different from the rest, so in this case I would want for combinedOpts to be a list of a = 5, b = 10, c = 4. I'm using it as a way to set and combine default and also user input options.
Thanks
**Solved, ended up doing this as I realized the latest input (i.e. with 3 inputs in3) would be the one I want to use as default, so did as follows
PassOpts <- function(in1 = list(), in2 = list(), in3 = list(), in4 = list()){
if(length(in4) != 0){
in4Names <- names(in4)
rList <- in4
temp <- c(in1,in2,in3)
tempNames <- names(temp)
for(i in 1:length(tempNames)){
nam <- tempNames[i]
if(!(nam %in% in4Names)){
in4Names <- c(in4Names,nam)
rList[nam] <- temp[nam]
}
}
}else if(length(in3) != 0){
in3Names <- names(in3)
rList <- in3
temp <- c(in1,in2)
tempNames <- names(temp)
for(i in 1:length(tempNames)){
nam <- tempNames[i]
if(!(nam %in% in3Names)){
in3Names <- c(in3Names, nam)
rList[nam] <- temp[nam]
}
}
}else if(length(in2) != 0){
in2Names <- names(in2)
rList <- in2
temp <- in1
tempNames <- names(temp)
for(i in 1:length(tempNames)){
nam <- tempNames[i]
if(!(nam %in% in2Names)){
in2Names <- c(in2Names, nam)
rList[nam] <- temp[nam]
}
}
}else{
return(in1)
}
return(rList)
}
Looks likes you are looking of most unique number.
Here is how I would do:
1. aggregate input lists
2. find out the most unique one for each key
PassOpts <- function(listOfList){
resList = list()
# reduce lists by key
for (l in listOfList){
for (i in 1:length(l)){
key = names(l[i])
value = l[[i]]
resList[[key]] = c(resList[[key]], value)
}
}
# found most diffent one for each key
findDiff <- function(elements){
countTable = table(elements)
minCount = min(countTable)
return(names(countTable)[countTable == minCount])
}
return(lapply(resList, FUN=findDiff))
}
opts1 <- list(a = 1, b = 2, c = 4)
opts2 <- list(a = 1, b = 2, c = 4)
opts3 <- list(a = 5, b = 10)
combinedOpts <- PassOpts(list(opts1, opts2, opts3))

Resources