Merge List containing Lists with the same structure - r

I have a list containing multiple lists which all have the same structure:
ls <- list(
one = list(df = data.frame(var1_1 = c(1, 1, 1),
var1_2 = c('a', 'a', 'a')),
ls = list(n_df_1 = data.frame(var3_1 = c('x', 'x', 'x'),
var3_2 = c(4, 4, 4))),
name = c("one", "one", "one")),
two = list(df = data.frame(var1_1 = c(1, 1, 1),
var1_2 = c('a', 'a', 'a')),
ls = list(n_df_1 = data.frame(var3_1 = c('x', 'x', 'x'),
var3_2 = c(4, 4, 4))),
name = c("two", "two", "two")))
I want to merge all these nested lists like stated here: Merge Two Lists in R
It does exactly what want if I do this:
merged <- mapply(c, ls[[1]], ls[[2]], SIMPLIFY = FALSE)
The problem is, is that the main list (ls) doesn't always have only two nested lists. How can I make this code more modular?
I tried to make a vector containing all indexes of the nested lists:
sapply(seq_along(ls), function(x) paste0("ls[[", x, "]]"))
Which output this:
[1] "ls[[1]]" "ls[[2]]"
I thought I could unquote these character vector so that R sees them as object. But I can't figure out how to do that (if it's even possible). I looked at tidy eval for this, but I'm don't know if this is the way to do it.
Any suggestions?

You can use Reduce to do it on an abstract number of list elements, i.e.
Reduce(function(...)Map(c, ...), l1) #Map = mapply(..., simplify = FALSE)
which gives,
$df
$df$var1_1
[1] 1 1 1
$df$var1_2
[1] a a a
Levels: a
$df$var1_1
[1] 1 1 1
$df$var1_2
[1] a a a
Levels: a
$ls
$ls$n_df_1
var3_1 var3_2
1 x 4
2 x 4
3 x 4
$ls$n_df_1
var3_1 var3_2
1 x 4
2 x 4
3 x 4
$name
[1] "one" "one" "one" "two" "two" "two"
DATA:
dput(l1)
list(one = list(df = structure(list(var1_1 = c(1, 1, 1), var1_2 = structure(c(1L,
1L, 1L), .Label = "a", class = "factor")), class = "data.frame", row.names = c(NA,
-3L)), ls = list(n_df_1 = structure(list(var3_1 = structure(c(1L,
1L, 1L), .Label = "x", class = "factor"), var3_2 = c(4, 4, 4)), class = "data.frame", row.names = c(NA,
-3L))), name = c("one", "one", "one")), two = list(df = structure(list(
var1_1 = c(1, 1, 1), var1_2 = structure(c(1L, 1L, 1L), .Label = "a", class = "factor")), class = "data.frame", row.names = c(NA,
-3L)), ls = list(n_df_1 = structure(list(var3_1 = structure(c(1L,
1L, 1L), .Label = "x", class = "factor"), var3_2 = c(4, 4, 4)), class = "data.frame", row.names = c(NA,
-3L))), name = c("two", "two", "two")))

Related

Naming N sublists in R

This was my first question for naming a list that contained lists that contained a dataframe:
Now my list contains even more lists, but at the end a dataframe:
So for every list level, I do have a vector, for the names these are the vectors:
spaces = 5;
low_mu0 = 0.005; high_mu0 = 0.01;
low_sigma_e = 0.0001; high_sigma_e = 0.001;
low_tau0 = 10; high_tau0 = 20;
low_phi1 = 0.5; high_phi1 = 2;
low_phi2 = -0.5; high_phi2 = 0.5;
low_sigma_n = 0.0001; high_sigma_n = 0.001;
low_c0 = -0.1; high_c0 = 0.1;
low_c1 = -0.1; high_c1 = 0.1
range_mu0 <- seq(low_mu0, high_mu0, length.out = spaces)
range_sigma_e <- seq(low_sigma_e, high_sigma_e, length.out = spaces)
range_tau0 <- seq(low_tau0, high_tau0, length.out = spaces)
range_phi1 <- seq(low_phi1, high_phi1, length.out = spaces)
range_phi2 <- seq(low_phi2, high_phi2, length.out = spaces)
range_sigma_n <- seq(low_sigma_n, high_sigma_n, length.out = spaces)
range_c0 <- seq(low_c0, high_c0, length.out = spaces)
range_c1 <- seq(low_c1, high_c1, length.out = spaces)
For naming each level I tried this:
a2 <- setNames(lapply(a, function(x)
setNames(x, nm = paste("mu =", range_mu0))),
setNames(x, paste("sigma_e =", range_sigma_e)),
setNames(x, paste("tau =", range_tau0)),
setNames(x, paste("phi1 =", range_phi1)),
setNames(x, paste("phi2 =", range_phi2)),
setNames(x, paste("sigma_n =", range_sigma_n)),
setNames(x, paste("c0 =", range_c0)),
paste("c1 =", range_c1))
But gives me an error, is there any way to have a general naming version of this question?
Thanks in advance!
data
the data is kinda massive, so I put it here
I would do this recursively. I'm using the two-level list from your previous question but it works for deeper lists as well.
set_names <- function(lst, level.names) {
if (length(level.names) > 0) {
names(lst) <- paste(names(level.names)[1], '=', level.names[[1]])
lapply(lst, set_names, level.names[-1])
} else { # deepest level
lst
}
}
# store level names in a list
lev.names <- list('y0'=seq(10, 20, length.out=3),
'sigma_e'=seq(0, 1, length.out=3))
lst <- set_names(lst, lev.names)
sapply(lst, names)
# y0 = 10 y0 = 15 y0 = 20
# [1,] "sigma_e = 0" "sigma_e = 0" "sigma_e = 0"
# [2,] "sigma_e = 0.5" "sigma_e = 0.5" "sigma_e = 0.5"
# [3,] "sigma_e = 1" "sigma_e = 1" "sigma_e = 1"
Data:
lst <- list(list(structure(list(period = 1:10, y = c(NA, 10, 10, 10,
10, 10, 10, 10, 10, 10)), class = "data.frame", row.names = c(NA,
-10L)), structure(list(period = 1:10, y = c(NA, 10, 10.7793541570746,
10.8146083527869, 10.8792522203673, 11.736784713809, 11.9672428168036,
11.3347121995003, 10.9912857735535, 10.7684547885036)), class = "data.frame", row.names = c(NA,
-10L)), structure(list(period = 1:10, y = c(NA, 10, 11.5587083141491,
11.6292167055737, 11.7585044407346, 13.4735694276179, 13.9344856336071,
12.6694243990006, 11.9825715471071, 11.5369095770071)), class = "data.frame", row.names = c(NA,
-10L))), list(structure(list(period = 1:10, y = c(NA, 15, 15,
15, 15, 15, 15, 15, 15, 15)), class = "data.frame", row.names = c(NA,
-10L)), structure(list(period = 1:10, y = c(NA, 15, 15.7793541570746,
15.8146083527869, 15.8792522203673, 16.736784713809, 16.9672428168036,
16.3347121995003, 15.9912857735535, 15.7684547885036)), class = "data.frame", row.names = c(NA,
-10L)), structure(list(period = 1:10, y = c(NA, 15, 16.5587083141491,
16.6292167055737, 16.7585044407346, 18.4735694276179, 18.9344856336071,
17.6694243990006, 16.9825715471071, 16.5369095770071)), class = "data.frame", row.names = c(NA,
-10L))), list(structure(list(period = 1:10, y = c(NA, 20, 20,
20, 20, 20, 20, 20, 20, 20)), class = "data.frame", row.names = c(NA,
-10L)), structure(list(period = 1:10, y = c(NA, 20, 20.7793541570746,
20.8146083527868, 20.8792522203673, 21.736784713809, 21.9672428168036,
21.3347121995003, 20.9912857735535, 20.7684547885036)), class = "data.frame", row.names = c(NA,
-10L)), structure(list(period = 1:10, y = c(NA, 20, 21.5587083141491,
21.6292167055737, 21.7585044407346, 23.4735694276179, 23.9344856336071,
22.6694243990006, 21.9825715471071, 21.5369095770071)), class = "data.frame", row.names = c(NA,
-10L))))
With the original data, we need a nested lapply with setNames
a2 <- setNames(lapply(a, function(x1)
setNames(lapply(x1, function(x2)
setNames(lapply(x2, function(x3)
setNames(lapply(x3, function(x4)
setNames(lapply(x4, function(x5)
setNames(lapply(x5, function(x6)
setNames(lapply(x6, function(x7)
setNames(x7, paste("c1 =", range_c1))
), paste("c0 =", range_c0))
), paste("sigma_n =", range_sigma_n))
), paste("phi2 =", range_phi2))
), paste("phi1 =", range_phi1))
), paste("tau =", range_tau0))
), paste("sigma_e =", range_sigma_e))
), paste("mu =", range_mu0))
-output

How to get a data frame into a graph, when dealing with words and numbers?

This is my data frame
id product cost
1 Milk 3
2 egg 2
3 coffee 4
4 tea 2
5 sugar 3
I am trying to work out how to get the product and its' cost into a graph, my current issue is that it will not do it because the product column is not numeric.
Thanks!
Based on your comment, you can use ggplot2 to create a barplot. Like this:
library(ggplot2)
ggplot(data = df, aes(x = product, y = cost)) +
geom_bar(stat = "identity") + coord_flip()
data
df <- structure(list(id = c(1, 2, 3, 4, 5), product = structure(c(3L, 2L, 1L, 5L, 4L), .Label = c("coffee", "egg", "Milk", "sugar", "tea"), class = "factor"), cost = c(3, 2, 4, 2, 3)), class = "data.frame", row.names = c(NA, -5L))
We define the column "product" as a factor, like so:
data <- data.frame(id = c(1, 2, 3, 4, 5),
product = c("Milk", "egg", "coffee", "tea", "sugar"),
cost = c(3, 2, 4, 2, 3)
)
plot(x = as.factor(data$product),
y = data$cost,
type = "p"
)

Return row(i) if two columns match

I have two datasets:
df1
ID paddock cow ID
90/123 10 09/123
90/124 11 09/124
90/125 11 09/124
df2
ID paddock
09/123 20
09/124 21
I would like to match df1$cowID with df2$ID and return df2$paddock for whatever row matches. My current code is as follows:
dt <- ifelse(df1$cowID %in% df2$ID, df2$paddock[i], NA)
But I'm getting a return error. Could someone direct me in the right direction please? Thanks in advance!
You might consider joining the datasets.
dplyr::left_join(df1, df2, by = c('cow ID', 'ID')
You should probably use match :
df1$df2_paddock <- df2$paddock[match(df1$cow_ID, df2$ID)]
df1
# ID paddock cow_ID df2_paddock
#1 90/123 10 09/123 20
#2 90/124 11 09/124 21
data
df1 <- structure(list(ID = structure(1:2, .Label = c("90/123", "90/124"
), class = "factor"), paddock = 10:11, cow_ID = structure(1:2, .Label = c("09/123",
"09/124"), class = "factor")), class = "data.frame", row.names = c(NA, -2L))
df2 <- structure(list(ID = structure(1:2, .Label = c("09/123", "09/124"
), class = "factor"), paddock = 20:21), class = "data.frame",
row.names = c(NA, -2L))
You can do that by joining the two dataframes and getting the column that you want.
Using Base R
df1 <-
data.frame(
ID = c("90/123", "90/124"),
paddock = c(10, 11),
cow_ID = c("09/123", "09/124")
)
df2 <-
data.frame(
ID = c("90/123", "90/124"),
paddock = c(20, 21)
)
# Joining the two dataframes by ID then choosing coloum of interest
merge(df1, df2, by = c("ID"), suffixes = c(".x", ".y"))["paddock.y"]
# paddock.y
# 20
# 21
Using Dplyr
library(dplyr)
df1 <-
data.frame(
ID = c("90/123", "90/124"),
paddock = c(10, 11),
cow_ID = c("09/123", "09/124")
)
df2 <-
data.frame(
ID = c("90/123", "90/124"),
paddock = c(20, 21)
)
# Joining the two dataframes by ID then choosing coloum of interest
df1 %>%
inner_join(df2, by = c("ID"), suffixes = c(".x", ".y")) %>%
select(paddock.y) %>%
rename(paddock = paddock.y)
# paddock
# 20
# 21
If you would like to use ifelse(), maybe you can use the following code to make it
with(df2,ifelse(ID %in% df1$cow_ID,paddock,NA))
such that
> with(df2,ifelse(ID %in% df1$cow_ID,paddock,NA))
[1] 20 21
DATA
df1 <- structure(list(ID = structure(1:3, .Label = c("90/123", "90/124",
"90/125"), class = "factor"), paddock = c(10, 11, 11), cow_ID = structure(c(1L,
2L, 2L), .Label = c("09/123", "09/124"), class = "factor")), class = "data.frame", row.names = c(NA,
-3L))
df2 <- structure(list(ID = structure(1:2, .Label = c("09/123", "09/124"
), class = "factor"), paddock = c(20, 21)), class = "data.frame", row.names = c(NA,
-2L))

How to cast nested list to matrix like or tabular like object?

I have nested list data that needed to be in desired output representation, either matrix like object or just directly export these nested list as csv file. I tried several general approach to do this task, but exporting nested list is not going well, so I am looking for the solution that casting nested list to matrix like or tabular like object to hold data in desired way. Maybe I could hold nested list data in data.table, but not quite sure about this. Can anyone tell me how to do this sort of manipulation easily ? How can I achieve clean, well structured data representation for nested list data ? Any idea ? Thanks a lot
mini example :
output of custom function:
AcceptedList <- list(
A_accepted = data.frame(pos.start=c(1,6,16), pos.stop=c(4,12,23), pos.ID=c("A1","A2","A3"), pos.score=c(11,8,13)),
B_accepted = data.frame(pos.start=c(7,19,31), pos.stop=c(13,28,43), pos.ID=c("B3","B6","B7"), pos.score=c(12,5,7)),
C_accepted = data.frame(pos.start=c(5,21,36), pos.stop=c(11,29,42), pos.ID=c("C2","C4","C9"), pos.score=c(7,13,9))
)
RejectedList <- list(
A_rejected = data.frame(pos.start=c(6,25,40), pos.stop=c(12,33,49), pos.ID=c("A2","A5","A8"), pos.score=c(8,4,7)),
B_rejected = data.frame(pos.start=c(15,19,47), pos.stop=c(18,28,55), pos.ID=c("B4","B6","B9"), pos.score=c(10,5,14)),
C_rejected = data.frame(pos.start=c(13,21,36,53), pos.stop=c(19,29,42,67), pos.ID=c("C3","C4","C9","C12"), pos.score=c(4,13,9,17))
)
so I implement this function to further manipulate output one more step :
func <- function(mlist, threshold) {
res <- lapply(mlist, function(x) {
splt <- split(x, ifelse(x$pos.score >= threshold, "up", "down"))
})
return(res)
}
#example
.res_accepted <- func(AcceptedList, 9)
.res_rejected <- func(RejectedList, 9)
I have hard time how to case nested list .res_accepted, .res_rejected as matrix like object. Ideally exporting nested list as csv file is highly expected, but I failed to export them in desired way. How can I make this happen ?
ultimately, desired list of csv files with desired named as follows:
A_accepted_up.csv
A_accepted_down.csv
A_rejected_up.csv
A_rejected_down.csv
B_accepted_up.csv
B_accepted_down.csv
B_rejected_up.csv
B_rejected_down.csv
C_accepted_up.csv
C_accepted_down.csv
C_rejected_up.csv
C_rejected_down.csv
The point is, nested list returned by my custom functions, so I intend to either directly export them or cast them into matrix like object as well. Any idea for this sort of manipulation ? Thanks:)
This returns a data.frame DF of the data. No packages are used.
both <- do.call("rbind", c(AcceptedList, RejectedList))
cn <- c("letter", "accepted", "seq")
DF <- cbind(
read.table(text = chartr("_", ".", rownames(both)), sep = ".", col.names = cn),
both)
DF <- transform(DF, updown = ifelse(pos.score > 8, "up", "down"))
giving:
> DF
letter accepted seq pos.start pos.stop pos.ID pos.score updown
A_accepted.1 A accepted 1 1 4 A1 11 up
A_accepted.2 A accepted 2 6 12 A2 8 down
A_accepted.3 A accepted 3 16 23 A3 13 up
B_accepted.1 B accepted 1 7 13 B3 12 up
B_accepted.2 B accepted 2 19 28 B6 5 down
B_accepted.3 B accepted 3 31 43 B7 7 down
C_accepted.1 C accepted 1 5 11 C2 7 down
C_accepted.2 C accepted 2 21 29 C4 13 up
C_accepted.3 C accepted 3 36 42 C9 9 up
A_rejected.1 A rejected 1 6 12 A2 8 down
A_rejected.2 A rejected 2 25 33 A5 4 down
A_rejected.3 A rejected 3 40 49 A8 7 down
B_rejected.1 B rejected 1 15 18 B4 10 up
B_rejected.2 B rejected 2 19 28 B6 5 down
B_rejected.3 B rejected 3 47 55 B9 14 up
C_rejected.1 C rejected 1 13 19 C3 4 down
C_rejected.2 C rejected 2 21 29 C4 13 up
C_rejected.3 C rejected 3 36 42 C9 9 up
C_rejected.4 C rejected 4 53 67 C12 17 up
This will write DF out in separate files:
junk <- by(DF, DF[c("letter", "accepted", "updown")],
function(x) write.csv(x[-(1:3)],
sprintf("%s_%s_%s.csv", x$letter[1], x$accepted[1], x$updown[1])))
or this will write out the data frames in .res_accepted -- .res_rejected could be handled similarly:
junk <- lapply(names(.res_accepted), function(nm)
mapply(write.csv,
.res_accepted[[nm]],
paste0(nm, "_", names(.res_accepted[[nm]]), ".csv")))
Note: The poster changed the data after this answer already had appeared. The output above corresponds to the original data; however, it should also work for the revised data. The original data was:
AcceptedList <-
structure(list(foo_accepted = structure(list(pos.start = c(1,
6, 16), pos.stop = c(4, 12, 23), pos.ID = structure(1:3, .Label = c("A1",
"A2", "A3"), class = "factor"), pos.score = c(11, 8, 13)), .Names = c("pos.start",
"pos.stop", "pos.ID", "pos.score"), row.names = c(NA, -3L), class = "data.frame"),
bar_accepted = structure(list(pos.start = c(7, 19, 31), pos.stop = c(13,
28, 43), pos.ID = structure(1:3, .Label = c("B3", "B6", "B7"
), class = "factor"), pos.score = c(12, 5, 7)), .Names = c("pos.start",
"pos.stop", "pos.ID", "pos.score"), row.names = c(NA, -3L
), class = "data.frame"), cat_accepted = structure(list(pos.start = c(5,
21, 36), pos.stop = c(11, 29, 42), pos.ID = structure(1:3, .Label = c("C2",
"C4", "C9"), class = "factor"), pos.score = c(7, 13, 9)), .Names = c("pos.start",
"pos.stop", "pos.ID", "pos.score"), row.names = c(NA, -3L
), class = "data.frame")), .Names = c("foo_accepted", "bar_accepted",
"cat_accepted"))
RejectedList <-
structure(list(foo_rejected = structure(list(pos.start = c(6,
25, 40), pos.stop = c(12, 33, 49), pos.ID = structure(1:3, .Label = c("A2",
"A5", "A8"), class = "factor"), pos.score = c(8, 4, 7)), .Names = c("pos.start",
"pos.stop", "pos.ID", "pos.score"), row.names = c(NA, -3L), class = "data.frame"),
bar_rejected = structure(list(pos.start = c(15, 19, 47),
pos.stop = c(18, 28, 55), pos.ID = structure(1:3, .Label = c("B4",
"B6", "B9"), class = "factor"), pos.score = c(10, 5,
14)), .Names = c("pos.start", "pos.stop", "pos.ID", "pos.score"
), row.names = c(NA, -3L), class = "data.frame"), cat_rejected = structure(list(
pos.start = c(13, 21, 36, 53), pos.stop = c(19, 29, 42,
67), pos.ID = structure(c(2L, 3L, 4L, 1L), .Label = c("C12",
"C3", "C4", "C9"), class = "factor"), pos.score = c(4,
13, 9, 17)), .Names = c("pos.start", "pos.stop", "pos.ID",
"pos.score"), row.names = c(NA, -4L), class = "data.frame")),
.Names = c("foo_rejected",
"bar_rejected", "cat_rejected"))

Merge contents within list of list by duplicate name

Given a list of lists as such, is there an elegant way to convert original to treated? I'm using simple values like 1,2,3, but values could be data frames or whatever. The goal is not to de-dupe the contents of each unique named, simply to de-dupe names by merging contents.
original = structure(list(name1 = structure(list(one = 1, two = 2, three = 3), .Names = c("one",
"two", "three")), name2 = structure(list(a = 9), .Names = "a"),
name1 = structure(list(four = 4, five = 5, six = 6), .Names = c("four",
"five", "six")), name2 = structure(list(b = 8), .Names = "b")), .Names = c("name1",
"name2", "name1", "name2"))
treated = structure(list(name1 = structure(list(one = 1, two = 2, three = 3,
four = 4, five = 5, six = 6), .Names = c("one", "two", "three",
"four", "five", "six")), name2 = structure(list(a = 9, b = 8), .Names = c("a",
"b"))), .Names = c("name1", "name2"))
Here is a solution using plyr.
require(plyr)
lnames = names(original)
ulnames = unique(lnames)
treated = plyr::llply(ulnames, function(x) original[lnames == x])
treated = llply(treated, unlist, recursive = F)
names(treated) = ulnames

Resources