Appending lists with corresponding values from tibbles - r

Suppose my data is ordered as listed tibbles with a corresponding tibble that provides further info. Row "a" in infos refers thus to tibble "a" from the list.
list_in <- list(a=tibble(I=c(6:10),
II=c(2:6),
III=letters[1:5]),
b=tibble(I=c(1:5),
II=c(2:6),
III=letters[2:6]),
c=tibble(I=c(7:11),
II=c(3:7),
III=letters[5:9]))
infos <- tibble(id=c("a","b","c"),
weights=c(1:3),
grades=letters[4:6])
In order to do further calculations, is there a way to use lapply or a loop to append list_in, so that list_out also contains the corresponding values from infos? The expected output would look like this:
# install.packages("rlist")
library(rlist)
list_out <- list((list.append(list_in$a, weights=infos$weights[1], grades=infos$grades[1])),
(list.append(list_in$b, weights=infos$weights[2], grades=infos$grades[2])),
(list.append(list_in$c, weights=infos$weights[3], grades=infos$grades[3])))
but this way to get there feels very awkward and only works for very small data sets.
Thanks in advance!

You can use lapply and c() to append each tibble with the corresponding row of infos.
list_out2 <- lapply(names(list_in), \(x) {
c(list_in[[x]], infos[infos$id == x, -1])
})
all.equal(list_out, list_out2)
# [1] TRUE
list_out2
[[1]]
[[1]]$I
[1] 6 7 8 9 10
[[1]]$II
[1] 2 3 4 5 6
[[1]]$III
[1] "a" "b" "c" "d" "e"
[[1]]$weights
[1] 1
[[1]]$grades
[1] "d"
[[2]]
[[2]]$I
[1] 1 2 3 4 5
[[2]]$II
[1] 2 3 4 5 6
[[2]]$III
[1] "b" "c" "d" "e" "f"
[[2]]$weights
[1] 2
[[2]]$grades
[1] "e"
[[3]]
[[3]]$I
[1] 7 8 9 10 11
[[3]]$II
[1] 3 4 5 6 7
[[3]]$III
[1] "e" "f" "g" "h" "i"
[[3]]$weights
[1] 3
[[3]]$grades
[1] "f"

You can do a left_join between the tibble in the list and the extra info:
append_info <- function(n) {
out <- list_in[[n]] %>%
mutate(id = n) %>%
left_join(infos, by = 'id') %>%
select(-id)
return(out)
}
lapply(names(list_in), append_info)

Using Map
Map(c, list_in, split(infos[-1], infos$id))
-output
$a
$a$I
[1] 6 7 8 9 10
$a$II
[1] 2 3 4 5 6
$a$III
[1] "a" "b" "c" "d" "e"
$a$weights
[1] 1
$a$grades
[1] "d"
$b
$b$I
[1] 1 2 3 4 5
$b$II
[1] 2 3 4 5 6
$b$III
[1] "b" "c" "d" "e" "f"
$b$weights
[1] 2
$b$grades
[1] "e"
$c
$c$I
[1] 7 8 9 10 11
$c$II
[1] 3 4 5 6 7
$c$III
[1] "e" "f" "g" "h" "i"
$c$weights
[1] 3
$c$grades
[1] "f"

Related

Append vector not giving names

In R studio, I am looking to create a vector for country names. They are enclosed in my data set in column 1. Countryvec gives factor names
"Australia Australia ..."
x just gives the names of Russia, country 36, country ends up being
1,1,...,2,2,...,4,4.. etc.
They are also not in order, 3 ends up between 42 and 43. How do I make the numbers the factors?
gdppc=read.xlsx("H:/dissertation/ALL/YAS.xlsx",sheetIndex = 1,startRow = 1)
countryvec=gdppc[,1]
country=c()
for (j in 1:43){
x=rep(countryvec[j],25)
country=append(country,x)
}
You need to retrieve the levels attribute
set.seed(7)
v <- factor(letters[rbinom(20, 10, .5)])
> c(v)
[1] 6 4 2 2 3 5 3 6 2 4 2 3 5 2 4 2 4 1 6 3
> levels(v)[v]
[1] "h" "e" "c" "c" "d" "f" "d" "h" "c" "e" "c" "d" "f" "c" "e" "c" "e" "a" "h" "d"
You'll probably need to modify the code to inside the loop:
x <- rep(levels(countryvec)[countryvec][j], 25)
Or convert the vector prior to the loop:
countryvec <- levels(countryvec)[countryvec]

Filtering list in R

I have a list
A <- c(1,2,3,4,5,6,7,8,9,10)
B <- c("a" ,"b", "c" ,"d","b" ,"f" ,"g" ,"a" ,"b" ,"a")
C <- c(25, 26, 27, 28, 29, 30, 31, 32, 10, 15)
mylist <- list(A,B,C)
mylist
[[1]]
[1] 1 2 3 4 5 6 7 8 9 10
[[2]]
[1] "a" "b" "c" "d" "b" "f" "g" "a" "b" "a"
[[3]]
[1] 25 26 27 28 29 30 31 32 10 15
I would like to select all components A,B,C of the list where second component B has value "a" or "b" .
Sample output
mylist
[[1]]
[1] 1 2 6 8 9 10
[[2]]
[1] "a" "b" "b" "a" "b" "a"
[[3]]
[1] 25 26 29 32 10 15
How can I do that? Note that each component have same length.
To stay with a list, why not simply:
lapply(mylist, `[`, is.element(B, letters[1:2]))
#[[1]]
#[1] 1 2 5 8 9 10
#[[2]]
#[1] "a" "b" "b" "a" "b" "a"
#[[3]]
#[1] 25 26 29 32 10 15
I would go with a data.frame or data.table for this use case:
Using your original list (with a 10 added to A to have the same number of entries as B and C):
>df <- data.frame(A=mylist[[1]],B=mylist[[2]],C=mylist[[3]],stringsAsFactors=F)
> df[df$B %in% c("a","b"),]
A B C
1 1 a 25
2 2 b 26
5 5 b 29
8 8 a 32
9 9 b 10
10 10 a 15
This will subset the data.frame by where B values are a or b. If you build your list at first, you may avoid the list step and build the data.frame directly.
If you really want a list at end:
> as.list(df[df$B %in% c("a","b"),])
$A
[1] 1 2 5 8 9 10
$B
[1] "a" "b" "b" "a" "b" "a"
$C
[1] 25 26 29 32 10 15
If you wish to avoid the named entries, use unname: as.list(unname(df[..]))
Here is a simple solution.
First, I create mylist :
mylist <- list(1:10, letters[1:10], 25:15)
Then I create a function which returns TRUE if the condition is TRUE and FALSE otherwise
> filt <- function(x) {
+ x[2] %in% c("a", "b")
+ }
>
Then I use sapply to apply the function to mylist and I select only the components I need :
> mylist[sapply(mylist, filt) == TRUE]
[[1]]
[1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j"

Unlist data frame column preserving information from other column

I have a data frame which consists of two column: a character vector col1 and a list column, col2.
myVector <- c("A","B","C","D")
myList <- list()
myList[[1]] <- c(1, 4, 6, 7)
myList[[2]] <- c(2, 7, 3)
myList[[3]] <- c(5, 5, 3, 9, 6)
myList[[4]] <- c(7, 9)
myDataFrame <- data.frame(row = c(1,2,3,4))
myDataFrame$col1 <- myVector
myDataFrame$col2 <- myList
myDataFrame
# row col1 col2
# 1 1 A 1, 4, 6, 7
# 2 2 B 2, 7, 3
# 3 3 C 5, 5, 3, 9, 6
# 4 4 D 7, 9
I want to unlist my col2 still keeping for each element of the vectors in the list the information stored in col1. To phrase it differently, in commonly used data frame reshape terminology: the "wide" list column should be converted to a "long" format.
Then at the end of the day I want two vectors of length equal to length(unlist(myDataFrame$col2)). In code:
# unlist myList
unlist.col2 <- unlist(myDataFrame$col2)
unlist.col2
# [1] 1 4 6 7 2 7 3 5 5 3 9 6 7 9
# unlist myVector to obtain
# unlist.col1 <- ???
# unlist.col1
# [1] A A A A B B B C C C C C D D
I can't think of any straightforward way to get it.
You may also use unnest from package tidyr:
library(tidyr)
unnest(myDataFrame, col2)
# row col1 col2
# (dbl) (chr) (dbl)
# 1 1 A 1
# 2 1 A 4
# 3 1 A 6
# 4 1 A 7
# 5 2 B 2
# 6 2 B 7
# 7 2 B 3
# 8 3 C 5
# 9 3 C 5
# 10 3 C 3
# 11 3 C 9
# 12 3 C 6
# 13 4 D 7
# 14 4 D 9
You can use the "data.table" to expand the whole data.frame, and extract the column of interest.
library(data.table)
## expand the entire data.frame (uncomment to see)
# as.data.table(myDataFrame)[, unlist(col2), by = list(row, col1)]
## expand and select the column of interest:
as.data.table(myDataFrame)[, unlist(col2), by = list(row, col1)]$col1
# [1] "A" "A" "A" "A" "B" "B" "B" "C" "C" "C" "C" "C" "D" "D"
In newer versions of R, you can now use the lengths function instead of the sapply(list, length) approach. The lengths function is considerably faster.
with(myDataFrame, rep(col1, lengths(col2)))
# [1] "A" "A" "A" "A" "B" "B" "B" "C" "C" "C" "C" "C" "D" "D"
Here, the idea is to first get the length of each list element using sapply and then use rep to replicate the col1 with that length
l1 <- sapply(myDataFrame$col2, length)
unlist.col1 <- rep(myDataFrame$col1, l1)
unlist.col1
#[1] "A" "A" "A" "A" "B" "B" "B" "C" "C" "C" "C" "C" "D" "D"
Or as suggested by #Ananda Mahto, the above could be also done with vapply
with(myDataFrame, rep(col1, vapply(col2, length, 1L)))
#[1] "A" "A" "A" "A" "B" "B" "B" "C" "C" "C" "C" "C" "D" "D"

Convert or transform the list

Input list is:
$A
[1] 25
$B
[1] 22
$C
[1] 25
$D
[1] 26
----
Need to convert this to
$25
[1] "A" "C"
$22
[1] "B"
$26
[1] "D"
How do I change the grouping? Please help me.
If your list is called "L" (example below), try:
L <- list(A = 25, B = 22, C = 25, D = 26)
split(names(L), unlist(L))
# $`22`
# [1] "B"
#
# $`25`
# [1] "A" "C"
#
# $`26`
# [1] "D"
You could also try with(stack(L), split(as.character(ind), values)).

Matching values from two vectors in R

I have two vectors:
A <- c(1,3,5,6,4,3,2,3,3,3,3,3,4,6,7,7,5,4,4,3) # 7 unique values
B <- c("a","b","c","d","e","f","g") # 7 different values
I would like to match the values of B to A such that the smallest value in A gets the first value from B and continued on to the largest.
The above example would be:
A: 1 3 5 6 4 3 2 3 3 3 3 3 4 6 7 7 5 4 4 3
assigned: a c e f d c b c c c c c d f g g e d d c
Try this:
A <- c(1,3,5,6,4,3,2,3,3,3,3,3,4,6,7,7,5,4,4,3)
B <- letters[1:7]
B[match(A, sort(unique(A)))]
# [1] "a" "c" "e" "f" "d" "c" "b" "c" "c" "c" "c" "c" "d" "f" "g"
# [16] "g" "e" "d" "d" "c"
Another option that handles the general case that #JoshO'Brien addresses would be
B[as.numeric(factor(A))]
# [1] "a" "c" "e" "f" "d" "c" "b" "c" "c" "c" "c" "c" "d"
# [14] "f" "g" "g" "e" "d" "d" "c"
A2<-ifelse(A > 4, A + 1, A)
# [1] 1 3 6 7 4 3 2 3 3 3 3 3 4 7 8 8 6 4 4 3
B[as.numeric(factor(A2))]
# [1] "a" "c" "e" "f" "d" "c" "b" "c" "c" "c" "c" "c" "d"
# [14] "f" "g" "g" "e" "d" "d" "c"
However, following benchmark shows that this method is slower than #JoshOBrien's.
library(microbenchmark)
B <- make.unique(rep(letters, length.out=1000))
A <- sample(seq_along(B), replace=TRUE)
unique_sort_match <- function() B[match(A, sort(unique(A)))]
factor_as.numeric <- function() B[as.numeric(factor(A))]
bm<-microbenchmark(unique_sort_match(), factor_as.numeric(), times=1000L)
plot(bm)
To elaborate on the comments in #Josh's answer:
If A does in fact represent a permutation of the elements of B (ie, where a 1 in A represents the first element of B, a 4 in A represents the 4th element in B, etc), then as #Matthew Plourde points out, you would want to simply use A as your index to B:
B[A]
If A does not represent a permutation of B, then you should use the method suggested by #Josh

Resources