This was my first question for naming a list that contained lists that contained a dataframe:
Now my list contains even more lists, but at the end a dataframe:
So for every list level, I do have a vector, for the names these are the vectors:
spaces = 5;
low_mu0 = 0.005; high_mu0 = 0.01;
low_sigma_e = 0.0001; high_sigma_e = 0.001;
low_tau0 = 10; high_tau0 = 20;
low_phi1 = 0.5; high_phi1 = 2;
low_phi2 = -0.5; high_phi2 = 0.5;
low_sigma_n = 0.0001; high_sigma_n = 0.001;
low_c0 = -0.1; high_c0 = 0.1;
low_c1 = -0.1; high_c1 = 0.1
range_mu0 <- seq(low_mu0, high_mu0, length.out = spaces)
range_sigma_e <- seq(low_sigma_e, high_sigma_e, length.out = spaces)
range_tau0 <- seq(low_tau0, high_tau0, length.out = spaces)
range_phi1 <- seq(low_phi1, high_phi1, length.out = spaces)
range_phi2 <- seq(low_phi2, high_phi2, length.out = spaces)
range_sigma_n <- seq(low_sigma_n, high_sigma_n, length.out = spaces)
range_c0 <- seq(low_c0, high_c0, length.out = spaces)
range_c1 <- seq(low_c1, high_c1, length.out = spaces)
For naming each level I tried this:
a2 <- setNames(lapply(a, function(x)
setNames(x, nm = paste("mu =", range_mu0))),
setNames(x, paste("sigma_e =", range_sigma_e)),
setNames(x, paste("tau =", range_tau0)),
setNames(x, paste("phi1 =", range_phi1)),
setNames(x, paste("phi2 =", range_phi2)),
setNames(x, paste("sigma_n =", range_sigma_n)),
setNames(x, paste("c0 =", range_c0)),
paste("c1 =", range_c1))
But gives me an error, is there any way to have a general naming version of this question?
Thanks in advance!
data
the data is kinda massive, so I put it here
I would do this recursively. I'm using the two-level list from your previous question but it works for deeper lists as well.
set_names <- function(lst, level.names) {
if (length(level.names) > 0) {
names(lst) <- paste(names(level.names)[1], '=', level.names[[1]])
lapply(lst, set_names, level.names[-1])
} else { # deepest level
lst
}
}
# store level names in a list
lev.names <- list('y0'=seq(10, 20, length.out=3),
'sigma_e'=seq(0, 1, length.out=3))
lst <- set_names(lst, lev.names)
sapply(lst, names)
# y0 = 10 y0 = 15 y0 = 20
# [1,] "sigma_e = 0" "sigma_e = 0" "sigma_e = 0"
# [2,] "sigma_e = 0.5" "sigma_e = 0.5" "sigma_e = 0.5"
# [3,] "sigma_e = 1" "sigma_e = 1" "sigma_e = 1"
Data:
lst <- list(list(structure(list(period = 1:10, y = c(NA, 10, 10, 10,
10, 10, 10, 10, 10, 10)), class = "data.frame", row.names = c(NA,
-10L)), structure(list(period = 1:10, y = c(NA, 10, 10.7793541570746,
10.8146083527869, 10.8792522203673, 11.736784713809, 11.9672428168036,
11.3347121995003, 10.9912857735535, 10.7684547885036)), class = "data.frame", row.names = c(NA,
-10L)), structure(list(period = 1:10, y = c(NA, 10, 11.5587083141491,
11.6292167055737, 11.7585044407346, 13.4735694276179, 13.9344856336071,
12.6694243990006, 11.9825715471071, 11.5369095770071)), class = "data.frame", row.names = c(NA,
-10L))), list(structure(list(period = 1:10, y = c(NA, 15, 15,
15, 15, 15, 15, 15, 15, 15)), class = "data.frame", row.names = c(NA,
-10L)), structure(list(period = 1:10, y = c(NA, 15, 15.7793541570746,
15.8146083527869, 15.8792522203673, 16.736784713809, 16.9672428168036,
16.3347121995003, 15.9912857735535, 15.7684547885036)), class = "data.frame", row.names = c(NA,
-10L)), structure(list(period = 1:10, y = c(NA, 15, 16.5587083141491,
16.6292167055737, 16.7585044407346, 18.4735694276179, 18.9344856336071,
17.6694243990006, 16.9825715471071, 16.5369095770071)), class = "data.frame", row.names = c(NA,
-10L))), list(structure(list(period = 1:10, y = c(NA, 20, 20,
20, 20, 20, 20, 20, 20, 20)), class = "data.frame", row.names = c(NA,
-10L)), structure(list(period = 1:10, y = c(NA, 20, 20.7793541570746,
20.8146083527868, 20.8792522203673, 21.736784713809, 21.9672428168036,
21.3347121995003, 20.9912857735535, 20.7684547885036)), class = "data.frame", row.names = c(NA,
-10L)), structure(list(period = 1:10, y = c(NA, 20, 21.5587083141491,
21.6292167055737, 21.7585044407346, 23.4735694276179, 23.9344856336071,
22.6694243990006, 21.9825715471071, 21.5369095770071)), class = "data.frame", row.names = c(NA,
-10L))))
With the original data, we need a nested lapply with setNames
a2 <- setNames(lapply(a, function(x1)
setNames(lapply(x1, function(x2)
setNames(lapply(x2, function(x3)
setNames(lapply(x3, function(x4)
setNames(lapply(x4, function(x5)
setNames(lapply(x5, function(x6)
setNames(lapply(x6, function(x7)
setNames(x7, paste("c1 =", range_c1))
), paste("c0 =", range_c0))
), paste("sigma_n =", range_sigma_n))
), paste("phi2 =", range_phi2))
), paste("phi1 =", range_phi1))
), paste("tau =", range_tau0))
), paste("sigma_e =", range_sigma_e))
), paste("mu =", range_mu0))
-output
Related
I have the below 2 dataframes:
lc2 <- structure(list(group = 1:3, sumpct = c(13, 32, 54)), class = "data.frame", row.names = c(NA,
-3L))
note this is for the "likelihood to click" bar (see image), where "extremely/somewhat likely" is
13%, neutral is 32, and extremely/somewhat unlikely is 54)
and
le2 <- structure(list(e = 1:3, t = c(13, 38, 48)), class = "data.frame", row.names = c(NA,
-3L))
note similarly this code above is for "likelihood to enroll" bar below.
But I want to create this:
lc2 <- structure(list(group = 1:3, sumpct = c(13, 32, 54)),
class = "data.frame", row.names = c(NA, -3L))
le2 <- structure(list(e = 1:3, t = c(13, 38, 48)),
class = "data.frame", row.names = c(NA, -3L))
lc2$type <- "click"
le2$type <- "enroll"
colnames(lc2) <- c("group", "pct", "type")
colnames(le2) <- c("group", "pct", "type")
library(data.table)
library(ggplot2)
dt <- rbindlist(list(lc2, le2))
dt[, group := as.factor(group)]
ggplot(dt, aes(x = type, y = pct, fill = group)) +
geom_bar(stat = "identity") +
geom_text(aes(label=scales::percent(pct/100)), position = position_stack(vjust = .5))+
theme_classic() +
coord_flip()
I have a list containing multiple lists which all have the same structure:
ls <- list(
one = list(df = data.frame(var1_1 = c(1, 1, 1),
var1_2 = c('a', 'a', 'a')),
ls = list(n_df_1 = data.frame(var3_1 = c('x', 'x', 'x'),
var3_2 = c(4, 4, 4))),
name = c("one", "one", "one")),
two = list(df = data.frame(var1_1 = c(1, 1, 1),
var1_2 = c('a', 'a', 'a')),
ls = list(n_df_1 = data.frame(var3_1 = c('x', 'x', 'x'),
var3_2 = c(4, 4, 4))),
name = c("two", "two", "two")))
I want to merge all these nested lists like stated here: Merge Two Lists in R
It does exactly what want if I do this:
merged <- mapply(c, ls[[1]], ls[[2]], SIMPLIFY = FALSE)
The problem is, is that the main list (ls) doesn't always have only two nested lists. How can I make this code more modular?
I tried to make a vector containing all indexes of the nested lists:
sapply(seq_along(ls), function(x) paste0("ls[[", x, "]]"))
Which output this:
[1] "ls[[1]]" "ls[[2]]"
I thought I could unquote these character vector so that R sees them as object. But I can't figure out how to do that (if it's even possible). I looked at tidy eval for this, but I'm don't know if this is the way to do it.
Any suggestions?
You can use Reduce to do it on an abstract number of list elements, i.e.
Reduce(function(...)Map(c, ...), l1) #Map = mapply(..., simplify = FALSE)
which gives,
$df
$df$var1_1
[1] 1 1 1
$df$var1_2
[1] a a a
Levels: a
$df$var1_1
[1] 1 1 1
$df$var1_2
[1] a a a
Levels: a
$ls
$ls$n_df_1
var3_1 var3_2
1 x 4
2 x 4
3 x 4
$ls$n_df_1
var3_1 var3_2
1 x 4
2 x 4
3 x 4
$name
[1] "one" "one" "one" "two" "two" "two"
DATA:
dput(l1)
list(one = list(df = structure(list(var1_1 = c(1, 1, 1), var1_2 = structure(c(1L,
1L, 1L), .Label = "a", class = "factor")), class = "data.frame", row.names = c(NA,
-3L)), ls = list(n_df_1 = structure(list(var3_1 = structure(c(1L,
1L, 1L), .Label = "x", class = "factor"), var3_2 = c(4, 4, 4)), class = "data.frame", row.names = c(NA,
-3L))), name = c("one", "one", "one")), two = list(df = structure(list(
var1_1 = c(1, 1, 1), var1_2 = structure(c(1L, 1L, 1L), .Label = "a", class = "factor")), class = "data.frame", row.names = c(NA,
-3L)), ls = list(n_df_1 = structure(list(var3_1 = structure(c(1L,
1L, 1L), .Label = "x", class = "factor"), var3_2 = c(4, 4, 4)), class = "data.frame", row.names = c(NA,
-3L))), name = c("two", "two", "two")))
I have a panel (cross-sectional time series) dataset. For each group (defined by (NAICS2, occ_type) in time ym) I have many variables. For each variable I would like to subtract each group's first (dplyr::first) value from every value of that group.
Ultimately I am trying to take the Euclidean difference between the vector of each row 's group's first entry, (i.e. sqrt(c_1^2 + ... + c_k^2).
I was able to create the a column equal to the first entries for each group:
df2 <- df %>%
group_by(ym, NAICS2, occ_type) %>%
distinct(ym, NAICS2, occ_type, .keep_all = T) %>%
arrange(occ_type, NAICS2, ym) %>%
select(group_cols(), ends_with("_scf")) %>%
mutate_at(vars(-group_cols(), ends_with("_scf")),
list(first = dplyr::first))
I then tried to include variations of f.diff = . - dplyr::first(.) in the list, but none of those worked. I googled the dot notation for a while as well as first and lag in dplyr timeseries but have not been able to resolve this yet.
Ideally, I unite all variables into a vector for each row first and then take the difference.
df2 <- df %>%
group_by(ym, NAICS2, occ_type) %>%
distinct(ym, NAICS2, occ_type, .keep_all = T) %>%
arrange(occ_type, NAICS2, ym) %>%
select(group_cols(), ends_with("_scf")) %>%
unite(vector, c(-group_cols(), ends_with("_scf")), sep = ',') %>%
# TODO: DISTANCE_BETWEEN_ENTRY_AND_FIRST
mutate(vector.diff = ???)
I expect the output to be a numeric column that contains a distance measure of how different each group's row vector is from its initial row vector.
Here is a sample of the data:
structure(list(ym = c("2007-01-01", "2007-02-01"), NAICS2 = c(0L,
0L), occ_type = c("is_middle_manager", "is_middle_manager"),
Administration_scf = c(344, 250), Agriculture..Horticulture..and.the.Outdoors_scf = c(11,
17), Analysis_scf = c(50, 36), Architecture.and.Construction_scf = c(57,
51), Business_scf = c(872, 585), Customer.and.Client.Support_scf = c(302,
163), Design_scf = c(22, 17), Economics..Policy..and.Social.Studies_scf = c(7,
7), Education.and.Training_scf = c(77, 49), Energy.and.Utilities_scf = c(25,
28), Engineering_scf = c(90, 64), Environment_scf = c(19,
19), Finance_scf = c(455, 313), Health.Care_scf = c(105,
71), Human.Resources_scf = c(163, 124), Industry.Knowledge_scf = c(265,
174), Information.Technology_scf = c(467, 402), Legal_scf = c(21,
17), Maintenance..Repair..and.Installation_scf = c(194, 222
), Manufacturing.and.Production_scf = c(176, 174), Marketing.and.Public.Relations_scf = c(139,
109), Media.and.Writing_scf = c(18, 20), Personal.Care.and.Services_scf = c(31,
16), Public.Safety.and.National.Security_scf = c(14, 7),
Religion_scf = c(0, 0), Sales_scf = c(785, 463), Science.and.Research_scf = c(52,
24), Supply.Chain.and.Logistics_scf = c(838, 455), total_scf = c(5599,
3877)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"
), row.names = c(NA, -2L), groups = structure(list(ym = c("2007-01-01",
"2007-02-01"), NAICS2 = c(0L, 0L), occ_type = c("is_middle_manager",
"is_middle_manager"), .rows = list(1L, 2L)), row.names = c(NA,
-2L), class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE))
Enchanté.
EDIT: Solution
As pointed out by MartineJ and emilliman5, nodes should be uniquely labelled (below).
library("riverplot")
nodes<-structure(list(ID = c("2011+", "2011-", "2016+", "2016-"), x = c(20,
20, 30, 30), y = c(50, 40, 50, 40)), class = "data.frame", row.names = c(NA,
-4L))
edges<-structure(list(N1 = c("2011+", "2011-", "2011+", "2011-"), N2 =
c("2016+", "2016-", "2016-", "2016+"), Value = c(461, 7, 0, 46)), class =
"data.frame", row.names = c(NA, -4L))
river <- makeRiver(nodes,edges)
riverplot(river)
I've been toying to plot a Sankey diagram/riverplot (using the riverplot package) of how cancer registrations evolve over time, though this code has bought me little success so far. Could anyone possibly direct me on the faults of this code?
Warning message: In checkedges(x2$edges, names(x2)) : duplicated edge information, removing 1 edges
Here is the suspect code:
library(“riverplot”)
edges<-structure(list(N1 = c("+", "-", "+", "-"), N2 = c("+", "-", "-", "+"), Value = c(664L, 50L, 0L, 46L)), .Names = c("N1", "N2", "Value"), class = "data.frame", row.names = c(NA, -4L))
nodes = data.frame(ID = unique(c(edges$N1, edges$N2)), stringsAsFactors = FALSE)
nodes$x = c(1,2)
rownames(nodes) = nodes$ID
rp <- list(nodes=nodes, edges=edges)
class(rp) <- c(class(rp), "riverplot")
plot(rp)
And the data, which is included in code:
N1 N2 Value
+ + 664
- - 50
+ - 0
- + 46
Eternally grateful.
It looks like you're using the same value multiple times in N1 (and in N2). Try to make them all different (per column) and try again, f.i.:
N1: plus1 minus1 plus2 minus2
If you want to show only + and -: in makeRiver, there is an option **node_labels **
Your nodes need to be named uniquely and then use the nodes$labels to change it back:
library(riverplot)
edges<-structure(list(N1 = c("+", "-", "+", "-"), N2 = c("+", "-", "-", "+"), Value = c(664L, 50L, 0L, 46L)), .Names = c("N1", "N2", "Value"), class = "data.frame", row.names = c(NA, -4L))
edges$N1 <- paste0(edges$N1, "a")
edges$N2 <- paste0(edges$N2, "b")
nodes = data.frame(ID = unique(c(edges$N1, edges$N2)), stringsAsFactors = FALSE)
nodes$x = c(1,1,2,2)
nodes$labels <- as.character(substr(nodes$ID, 1, 1))
rownames(nodes) = nodes$ID
rp <- list(nodes=nodes, edges=edges)
class(rp) <- c(class(rp), "riverplot")
plot(rp)
I have two lists of data frames: listA and listB. How to get a list of merged dataframes (listC)?
dfA1 <- data.frame(x1 = c("a", "b"), y1 = c(1, 2), row.names = c("1", "2"))
dfA2 <- data.frame(x1 = c("c", "d"), y1 = c(3, 4), row.names = c("1", "3"))
dfB1 <- data.frame(x2 = c("c", "d"), y2 = c(3, 4), row.names = c("1", "2"))
dfB2 <- data.frame(x2 = c("e", "f"), y2 = c(5, 6), row.names = c("2", "3"))
listA <- list(dfA1, dfA2) # first input list
listB <- list(dfB1, dfB2) # second input list
m1 <- merge(dfA1, dfB1, by = 0, all = T)
m2 <- merge(dfA2, dfB2, by = 0, all = T)
listC <- list(m1, m2) # desired output list
Found following solution:
listC <- mapply(function(x, y) merge(x, y, by = 0, all = T), x = listA, y = listB, SIMPLIFY = F)