I have a list
A <- c(1,2,3,4,5,6,7,8,9,10)
B <- c("a" ,"b", "c" ,"d","b" ,"f" ,"g" ,"a" ,"b" ,"a")
C <- c(25, 26, 27, 28, 29, 30, 31, 32, 10, 15)
mylist <- list(A,B,C)
mylist
[[1]]
[1] 1 2 3 4 5 6 7 8 9 10
[[2]]
[1] "a" "b" "c" "d" "b" "f" "g" "a" "b" "a"
[[3]]
[1] 25 26 27 28 29 30 31 32 10 15
I would like to select all components A,B,C of the list where second component B has value "a" or "b" .
Sample output
mylist
[[1]]
[1] 1 2 6 8 9 10
[[2]]
[1] "a" "b" "b" "a" "b" "a"
[[3]]
[1] 25 26 29 32 10 15
How can I do that? Note that each component have same length.
To stay with a list, why not simply:
lapply(mylist, `[`, is.element(B, letters[1:2]))
#[[1]]
#[1] 1 2 5 8 9 10
#[[2]]
#[1] "a" "b" "b" "a" "b" "a"
#[[3]]
#[1] 25 26 29 32 10 15
I would go with a data.frame or data.table for this use case:
Using your original list (with a 10 added to A to have the same number of entries as B and C):
>df <- data.frame(A=mylist[[1]],B=mylist[[2]],C=mylist[[3]],stringsAsFactors=F)
> df[df$B %in% c("a","b"),]
A B C
1 1 a 25
2 2 b 26
5 5 b 29
8 8 a 32
9 9 b 10
10 10 a 15
This will subset the data.frame by where B values are a or b. If you build your list at first, you may avoid the list step and build the data.frame directly.
If you really want a list at end:
> as.list(df[df$B %in% c("a","b"),])
$A
[1] 1 2 5 8 9 10
$B
[1] "a" "b" "b" "a" "b" "a"
$C
[1] 25 26 29 32 10 15
If you wish to avoid the named entries, use unname: as.list(unname(df[..]))
Here is a simple solution.
First, I create mylist :
mylist <- list(1:10, letters[1:10], 25:15)
Then I create a function which returns TRUE if the condition is TRUE and FALSE otherwise
> filt <- function(x) {
+ x[2] %in% c("a", "b")
+ }
>
Then I use sapply to apply the function to mylist and I select only the components I need :
> mylist[sapply(mylist, filt) == TRUE]
[[1]]
[1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j"
Related
Suppose my data is ordered as listed tibbles with a corresponding tibble that provides further info. Row "a" in infos refers thus to tibble "a" from the list.
list_in <- list(a=tibble(I=c(6:10),
II=c(2:6),
III=letters[1:5]),
b=tibble(I=c(1:5),
II=c(2:6),
III=letters[2:6]),
c=tibble(I=c(7:11),
II=c(3:7),
III=letters[5:9]))
infos <- tibble(id=c("a","b","c"),
weights=c(1:3),
grades=letters[4:6])
In order to do further calculations, is there a way to use lapply or a loop to append list_in, so that list_out also contains the corresponding values from infos? The expected output would look like this:
# install.packages("rlist")
library(rlist)
list_out <- list((list.append(list_in$a, weights=infos$weights[1], grades=infos$grades[1])),
(list.append(list_in$b, weights=infos$weights[2], grades=infos$grades[2])),
(list.append(list_in$c, weights=infos$weights[3], grades=infos$grades[3])))
but this way to get there feels very awkward and only works for very small data sets.
Thanks in advance!
You can use lapply and c() to append each tibble with the corresponding row of infos.
list_out2 <- lapply(names(list_in), \(x) {
c(list_in[[x]], infos[infos$id == x, -1])
})
all.equal(list_out, list_out2)
# [1] TRUE
list_out2
[[1]]
[[1]]$I
[1] 6 7 8 9 10
[[1]]$II
[1] 2 3 4 5 6
[[1]]$III
[1] "a" "b" "c" "d" "e"
[[1]]$weights
[1] 1
[[1]]$grades
[1] "d"
[[2]]
[[2]]$I
[1] 1 2 3 4 5
[[2]]$II
[1] 2 3 4 5 6
[[2]]$III
[1] "b" "c" "d" "e" "f"
[[2]]$weights
[1] 2
[[2]]$grades
[1] "e"
[[3]]
[[3]]$I
[1] 7 8 9 10 11
[[3]]$II
[1] 3 4 5 6 7
[[3]]$III
[1] "e" "f" "g" "h" "i"
[[3]]$weights
[1] 3
[[3]]$grades
[1] "f"
You can do a left_join between the tibble in the list and the extra info:
append_info <- function(n) {
out <- list_in[[n]] %>%
mutate(id = n) %>%
left_join(infos, by = 'id') %>%
select(-id)
return(out)
}
lapply(names(list_in), append_info)
Using Map
Map(c, list_in, split(infos[-1], infos$id))
-output
$a
$a$I
[1] 6 7 8 9 10
$a$II
[1] 2 3 4 5 6
$a$III
[1] "a" "b" "c" "d" "e"
$a$weights
[1] 1
$a$grades
[1] "d"
$b
$b$I
[1] 1 2 3 4 5
$b$II
[1] 2 3 4 5 6
$b$III
[1] "b" "c" "d" "e" "f"
$b$weights
[1] 2
$b$grades
[1] "e"
$c
$c$I
[1] 7 8 9 10 11
$c$II
[1] 3 4 5 6 7
$c$III
[1] "e" "f" "g" "h" "i"
$c$weights
[1] 3
$c$grades
[1] "f"
I'm trying to learn how to work with the sjlabelled package in R, and labelled data more generally. I'm trying
Here's the output for what I tried for a very simple example:
> library(dplyr)
> library(sjlabelled)
>
> df <- data.frame(col1 = c(1:3, 1:3),
+ col2 = c(seq(11, 33, 11), 11, 12, 13))
>
> df <- df %>%
+ set_labels(col1, labels = c("a" = 1, "b" = 2, "c" = 3)) %>%
+ set_labels(col2, labels = c("A" = 11, "B" = 12, "C" = 13, "D" = 22, "E" = 33))
>
> df
col1 col2
1 1 11
2 2 22
3 3 33
4 1 11
5 2 12
6 3 13
>
> get_labels(df)
$col1
[1] "a" "b" "c"
$col2
[1] "A" "B" "C" "D" "E"
>
> df <- df %>%
+ mutate(col1 = ifelse(col1 > 1 & col2 < 33, 2, as_labelled(col1)))
>
> df
col1 col2
1 1 11
2 2 22
3 3 33
4 1 11
5 2 12
6 2 13
>
> get_labels(df)
$col1
NULL
$col2
[1] "A" "B" "C" "D" "E"
I have used as_labelled to preserve labels in other situations, such as when using rbind for data frames with labelled data, but it isn't working here.
Are either of the following possible using sjlabelled or a similar approach:
a) overwriting values with other values which had already been assigned a label so that labels are preserved and the overwritten value has its label overwritten with the corresponding label (e.g. any values that are overwritten as '2' in the example will now be labelled as "b")?
b) overwriting values with values which weren't in the column (e.g. NA) without losing the labels from the values which were already labelled (e.g. with the example, overwriting '1' and '2', with NA but keeping '3' labelled as "c")?
Thank you.
We could assign as
df$col1[with(df, col1 > 1 & col2 < 33)] <- 2
-checking
> get_labels(df)
$col1
[1] "a" "b" "c"
$col2
[1] "A" "B" "C" "D" "E"
> df
col1 col2
1 1 11
2 2 22
3 3 33
4 1 11
5 2 12
6 2 13
In R studio, I am looking to create a vector for country names. They are enclosed in my data set in column 1. Countryvec gives factor names
"Australia Australia ..."
x just gives the names of Russia, country 36, country ends up being
1,1,...,2,2,...,4,4.. etc.
They are also not in order, 3 ends up between 42 and 43. How do I make the numbers the factors?
gdppc=read.xlsx("H:/dissertation/ALL/YAS.xlsx",sheetIndex = 1,startRow = 1)
countryvec=gdppc[,1]
country=c()
for (j in 1:43){
x=rep(countryvec[j],25)
country=append(country,x)
}
You need to retrieve the levels attribute
set.seed(7)
v <- factor(letters[rbinom(20, 10, .5)])
> c(v)
[1] 6 4 2 2 3 5 3 6 2 4 2 3 5 2 4 2 4 1 6 3
> levels(v)[v]
[1] "h" "e" "c" "c" "d" "f" "d" "h" "c" "e" "c" "d" "f" "c" "e" "c" "e" "a" "h" "d"
You'll probably need to modify the code to inside the loop:
x <- rep(levels(countryvec)[countryvec][j], 25)
Or convert the vector prior to the loop:
countryvec <- levels(countryvec)[countryvec]
I have a data frame which consists of two column: a character vector col1 and a list column, col2.
myVector <- c("A","B","C","D")
myList <- list()
myList[[1]] <- c(1, 4, 6, 7)
myList[[2]] <- c(2, 7, 3)
myList[[3]] <- c(5, 5, 3, 9, 6)
myList[[4]] <- c(7, 9)
myDataFrame <- data.frame(row = c(1,2,3,4))
myDataFrame$col1 <- myVector
myDataFrame$col2 <- myList
myDataFrame
# row col1 col2
# 1 1 A 1, 4, 6, 7
# 2 2 B 2, 7, 3
# 3 3 C 5, 5, 3, 9, 6
# 4 4 D 7, 9
I want to unlist my col2 still keeping for each element of the vectors in the list the information stored in col1. To phrase it differently, in commonly used data frame reshape terminology: the "wide" list column should be converted to a "long" format.
Then at the end of the day I want two vectors of length equal to length(unlist(myDataFrame$col2)). In code:
# unlist myList
unlist.col2 <- unlist(myDataFrame$col2)
unlist.col2
# [1] 1 4 6 7 2 7 3 5 5 3 9 6 7 9
# unlist myVector to obtain
# unlist.col1 <- ???
# unlist.col1
# [1] A A A A B B B C C C C C D D
I can't think of any straightforward way to get it.
You may also use unnest from package tidyr:
library(tidyr)
unnest(myDataFrame, col2)
# row col1 col2
# (dbl) (chr) (dbl)
# 1 1 A 1
# 2 1 A 4
# 3 1 A 6
# 4 1 A 7
# 5 2 B 2
# 6 2 B 7
# 7 2 B 3
# 8 3 C 5
# 9 3 C 5
# 10 3 C 3
# 11 3 C 9
# 12 3 C 6
# 13 4 D 7
# 14 4 D 9
You can use the "data.table" to expand the whole data.frame, and extract the column of interest.
library(data.table)
## expand the entire data.frame (uncomment to see)
# as.data.table(myDataFrame)[, unlist(col2), by = list(row, col1)]
## expand and select the column of interest:
as.data.table(myDataFrame)[, unlist(col2), by = list(row, col1)]$col1
# [1] "A" "A" "A" "A" "B" "B" "B" "C" "C" "C" "C" "C" "D" "D"
In newer versions of R, you can now use the lengths function instead of the sapply(list, length) approach. The lengths function is considerably faster.
with(myDataFrame, rep(col1, lengths(col2)))
# [1] "A" "A" "A" "A" "B" "B" "B" "C" "C" "C" "C" "C" "D" "D"
Here, the idea is to first get the length of each list element using sapply and then use rep to replicate the col1 with that length
l1 <- sapply(myDataFrame$col2, length)
unlist.col1 <- rep(myDataFrame$col1, l1)
unlist.col1
#[1] "A" "A" "A" "A" "B" "B" "B" "C" "C" "C" "C" "C" "D" "D"
Or as suggested by #Ananda Mahto, the above could be also done with vapply
with(myDataFrame, rep(col1, vapply(col2, length, 1L)))
#[1] "A" "A" "A" "A" "B" "B" "B" "C" "C" "C" "C" "C" "D" "D"
Input list is:
$A
[1] 25
$B
[1] 22
$C
[1] 25
$D
[1] 26
----
Need to convert this to
$25
[1] "A" "C"
$22
[1] "B"
$26
[1] "D"
How do I change the grouping? Please help me.
If your list is called "L" (example below), try:
L <- list(A = 25, B = 22, C = 25, D = 26)
split(names(L), unlist(L))
# $`22`
# [1] "B"
#
# $`25`
# [1] "A" "C"
#
# $`26`
# [1] "D"
You could also try with(stack(L), split(as.character(ind), values)).