Merge contents within list of list by duplicate name - r

Given a list of lists as such, is there an elegant way to convert original to treated? I'm using simple values like 1,2,3, but values could be data frames or whatever. The goal is not to de-dupe the contents of each unique named, simply to de-dupe names by merging contents.
original = structure(list(name1 = structure(list(one = 1, two = 2, three = 3), .Names = c("one",
"two", "three")), name2 = structure(list(a = 9), .Names = "a"),
name1 = structure(list(four = 4, five = 5, six = 6), .Names = c("four",
"five", "six")), name2 = structure(list(b = 8), .Names = "b")), .Names = c("name1",
"name2", "name1", "name2"))
treated = structure(list(name1 = structure(list(one = 1, two = 2, three = 3,
four = 4, five = 5, six = 6), .Names = c("one", "two", "three",
"four", "five", "six")), name2 = structure(list(a = 9, b = 8), .Names = c("a",
"b"))), .Names = c("name1", "name2"))

Here is a solution using plyr.
require(plyr)
lnames = names(original)
ulnames = unique(lnames)
treated = plyr::llply(ulnames, function(x) original[lnames == x])
treated = llply(treated, unlist, recursive = F)
names(treated) = ulnames

Related

ggplot boxplot not plotting when supplying breaks

I have 7 values that I am trying to plot using geom_boxpot and I wish to only show certain values in the legend so am using the breaks argument in the scale_linetype_manual function but it doesn't plot those boxplots at all,
Boxplot using scale_linetype_manual
However, if I use the same code with scale_linetype_discrete it works fine and plots and only gives me the required values in the legend. But, I can't control the linetype in the function using the values argument. Is there a way to add values to the scale_linetype_discrete function?
Boxplot using scale_linetype_discrete
EDIT - UPDATED with fake data + code
> head(debug)
# A tibble: 6 × 4
subj time cond y
<dbl> <chr> <chr> <dbl>
1 1 one one_A 2
2 1 two two_A 1
3 1 two two_B 5
4 1 two two_C 0
5 1 three three_A 4
6 1 four four_A 4
> dput(debug)
structure(list(subj = c(1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4), time = c("one",
"two", "two", "two", "three", "four", "four", "one", "two", "two",
"two", "three", "four", "four", "one", "two", "two", "two", "three",
"four", "four", "one", "two", "two", "two", "three", "four",
"four"), cond = c("one_A", "two_A", "two_B", "two_C", "three_A",
"four_A", "four_B", "one_A", "two_A", "two_B", "two_C", "three_A",
"four_A", "four_B", "one_A", "two_A", "two_B", "two_C", "three_A",
"four_A", "four_B", "one_A", "two_A", "two_B", "two_C", "three_A",
"four_A", "four_B"), y = c(2, 1, 5, 0, 4, 4, 2, 2, 4, 3, 0, 1,
5, 3, 1, 5, 4, 2, 0, 4, 4, 0, 0, 4, 2, 1, 5, 5)), row.names = c(NA,
-28L), spec = structure(list(cols = list(subj = structure(list(), class = c("collector_double",
"collector")), time = structure(list(), class = c("collector_character",
"collector")), cond = structure(list(), class = c("collector_character",
"collector")), y = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), delim = ","), class = "col_spec"), problems = <pointer: 0x600004737380>, class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"))
# Scale_linetype_discrete (would like to add linetype values to this)
ggplot() +
geom_boxplot(debug,
mapping = aes(x = time,
y = y,
linetype = cond),
show.legend = TRUE) +
scale_x_discrete(limits = c("one","two","three","four"),
labels = c("one","two","three","four")) +
scale_linetype_discrete(name = "My legend",
breaks = c("two_A","two_B","two_C","four_A","four_B"),
labels = c("two_A","two_B","two_C","four_A","four_B"))
# Scale_linetype_manual (would like 'one' and 'three' to be actually plotted but not showing in my legend)
ggplot() +
geom_boxplot(debug,
mapping = aes(x = time,
y = y,
linetype = cond),
show.legend = TRUE) +
scale_x_discrete(limits = c("one","two","three","four"),
labels = c("one","two","three","four")) +
scale_linetype_manual(name = "My legend",
breaks = c("two_A","two_B","two_C","four_A","four_B"),
labels = c("two_A","two_B","two_C","four_A","four_B"),
values = c("solid","dashed","solid","dashed","solid", "dashed","solid"))
Not sure about the reasons why your code does not work. But one option to fix your issue would be to make use of a named vector to assign linetypes to categories of you cond variable:
library(ggplot2)
lty <- c("solid", "dashed", "solid", "dashed", "solid", "dashed", "solid")
names(lty) <- c("four_A", "four_B", "one_A", "three_A", "two_A", "two_B", "two_C")
ggplot() +
geom_boxplot(debug,
mapping = aes(
x = time,
y = y,
linetype = cond
)
) +
scale_x_discrete(
limits = c("one", "two", "three", "four"),
labels = c("one", "two", "three", "four")
) +
scale_linetype_manual(
name = "My legend",
breaks = c("two_A", "two_B", "two_C", "four_A", "four_B"),
values = lty
)

How to get a data frame into a graph, when dealing with words and numbers?

This is my data frame
id product cost
1 Milk 3
2 egg 2
3 coffee 4
4 tea 2
5 sugar 3
I am trying to work out how to get the product and its' cost into a graph, my current issue is that it will not do it because the product column is not numeric.
Thanks!
Based on your comment, you can use ggplot2 to create a barplot. Like this:
library(ggplot2)
ggplot(data = df, aes(x = product, y = cost)) +
geom_bar(stat = "identity") + coord_flip()
data
df <- structure(list(id = c(1, 2, 3, 4, 5), product = structure(c(3L, 2L, 1L, 5L, 4L), .Label = c("coffee", "egg", "Milk", "sugar", "tea"), class = "factor"), cost = c(3, 2, 4, 2, 3)), class = "data.frame", row.names = c(NA, -5L))
We define the column "product" as a factor, like so:
data <- data.frame(id = c(1, 2, 3, 4, 5),
product = c("Milk", "egg", "coffee", "tea", "sugar"),
cost = c(3, 2, 4, 2, 3)
)
plot(x = as.factor(data$product),
y = data$cost,
type = "p"
)

Merge List containing Lists with the same structure

I have a list containing multiple lists which all have the same structure:
ls <- list(
one = list(df = data.frame(var1_1 = c(1, 1, 1),
var1_2 = c('a', 'a', 'a')),
ls = list(n_df_1 = data.frame(var3_1 = c('x', 'x', 'x'),
var3_2 = c(4, 4, 4))),
name = c("one", "one", "one")),
two = list(df = data.frame(var1_1 = c(1, 1, 1),
var1_2 = c('a', 'a', 'a')),
ls = list(n_df_1 = data.frame(var3_1 = c('x', 'x', 'x'),
var3_2 = c(4, 4, 4))),
name = c("two", "two", "two")))
I want to merge all these nested lists like stated here: Merge Two Lists in R
It does exactly what want if I do this:
merged <- mapply(c, ls[[1]], ls[[2]], SIMPLIFY = FALSE)
The problem is, is that the main list (ls) doesn't always have only two nested lists. How can I make this code more modular?
I tried to make a vector containing all indexes of the nested lists:
sapply(seq_along(ls), function(x) paste0("ls[[", x, "]]"))
Which output this:
[1] "ls[[1]]" "ls[[2]]"
I thought I could unquote these character vector so that R sees them as object. But I can't figure out how to do that (if it's even possible). I looked at tidy eval for this, but I'm don't know if this is the way to do it.
Any suggestions?
You can use Reduce to do it on an abstract number of list elements, i.e.
Reduce(function(...)Map(c, ...), l1) #Map = mapply(..., simplify = FALSE)
which gives,
$df
$df$var1_1
[1] 1 1 1
$df$var1_2
[1] a a a
Levels: a
$df$var1_1
[1] 1 1 1
$df$var1_2
[1] a a a
Levels: a
$ls
$ls$n_df_1
var3_1 var3_2
1 x 4
2 x 4
3 x 4
$ls$n_df_1
var3_1 var3_2
1 x 4
2 x 4
3 x 4
$name
[1] "one" "one" "one" "two" "two" "two"
DATA:
dput(l1)
list(one = list(df = structure(list(var1_1 = c(1, 1, 1), var1_2 = structure(c(1L,
1L, 1L), .Label = "a", class = "factor")), class = "data.frame", row.names = c(NA,
-3L)), ls = list(n_df_1 = structure(list(var3_1 = structure(c(1L,
1L, 1L), .Label = "x", class = "factor"), var3_2 = c(4, 4, 4)), class = "data.frame", row.names = c(NA,
-3L))), name = c("one", "one", "one")), two = list(df = structure(list(
var1_1 = c(1, 1, 1), var1_2 = structure(c(1L, 1L, 1L), .Label = "a", class = "factor")), class = "data.frame", row.names = c(NA,
-3L)), ls = list(n_df_1 = structure(list(var3_1 = structure(c(1L,
1L, 1L), .Label = "x", class = "factor"), var3_2 = c(4, 4, 4)), class = "data.frame", row.names = c(NA,
-3L))), name = c("two", "two", "two")))

Add a new element (tag) to a list in R

I have a dataset that gets generated using the GET method (API Call).
dataset<-GET('www.ttttyyyyzzzz.com')
contents <- content(dataset)
I extract the data by using the following command
contents<-contents$response$data. This creates a list. A list of list of list.
This is how the list looks on RStudio.
In terms of looking at this as a dataframe, I have 8 rows and 42 columns. What I really want to do is create a 43rd column called 'Year' and add it to each of the 8 high-level elements.
Therefore, the list would now be "List of 43" for all 8 of my elements.
#Ronak asked for a dput - I have created a mock up for the sake of this exercise. With the following command, I want to add a 4th element to my main list called 'p' with a common value of 25 across all lists.
structure(list(m = 1, n = 2, o = 3, structure(list(m = 3, n = 4,
o = 5), .Names = c("m", "n", "o")), structure(list(m = 6,
n = 9, o = 8), .Names = c("m", "n", "o"))), .Names = c("m",
"n", "o", "", ""))
If I got the aim well, we can use:
plyr::llply(contents,function(x) rlist::list.append(x,p=25))
So we can add a named list element by doing
lapply(lst, function(x) c(x, p = 25))
#$m
# p
# 1 25
#$n
# p
# 2 25
#$o
# p
# 3 25
#[[4]]
#[[4]]$m
#[1] 3
#[[4]]$n
#[1] 4
#[[4]]$o
#[1] 5
#[[4]]$p
#[1] 25
#...
#...
Or with purrr::map
purrr::map(lst, ~ c(., p = 25))
data
lst <- structure(list(m = 1, n = 2, o = 3, structure(list(m = 3, n = 4, o =
5), .Names = c("m", "n", "o")), structure(list(m = 6, n = 9, o = 8), .Names =
c("m", "n", "o"))), .Names = c("m", "n", "o", "", ""))
Here is an option with base R
Map(c, lst1, p = 25)
#$m
# p
# 1 25
#$n
# p
# 2 25
#$o
# p
# 3 25
#[[4]]
#[[4]]$m
#[1] 3
#[[4]]$n
#[1] 4
#[[4]]$o
#[1] 5
#[[4]]$p
#[1] 25
#...
data
lst1 <- structure(list(m = 1, n = 2, o = 3, structure(list(m = 3, n = 4, o =
5), .Names = c("m", "n", "o")), structure(list(m = 6, n = 9, o = 8), .Names =
c("m", "n", "o"))), .Names = c("m", "n", "o", "", ""))

Merge two lists with dataframes into one list with merged dataframes

I have two lists of data frames: listA and listB. How to get a list of merged dataframes (listC)?
dfA1 <- data.frame(x1 = c("a", "b"), y1 = c(1, 2), row.names = c("1", "2"))
dfA2 <- data.frame(x1 = c("c", "d"), y1 = c(3, 4), row.names = c("1", "3"))
dfB1 <- data.frame(x2 = c("c", "d"), y2 = c(3, 4), row.names = c("1", "2"))
dfB2 <- data.frame(x2 = c("e", "f"), y2 = c(5, 6), row.names = c("2", "3"))
listA <- list(dfA1, dfA2) # first input list
listB <- list(dfB1, dfB2) # second input list
m1 <- merge(dfA1, dfB1, by = 0, all = T)
m2 <- merge(dfA2, dfB2, by = 0, all = T)
listC <- list(m1, m2) # desired output list
Found following solution:
listC <- mapply(function(x, y) merge(x, y, by = 0, all = T), x = listA, y = listB, SIMPLIFY = F)

Resources