Appending the lists of two lists of lists - r

I have two lists as follows:
list1 <- list(c(`0` = 0L, `25` = 0L, `100` = 1L, `250` = 1L, `500` = 1L,
`1000` = 1L, Infinity = 3L), c(`0` = 0L, `25` = 0L, `100` = 1L,
`250` = 1L, `500` = 1L, Infinity = 4L))
list2 <- list(c(`0` = 0L, `25` = 0L, `100` = 0L, `250` = 2L, `500` = 1L,
`1000` = 1L, Infinity = 3L), c(`0` = 0L, `25` = 0L, `100` = 1L,
`250` = 1L, `500` = 1L, Infinity = 4L))
I would like to append list2[[1]] to list1[[1]] and append list2[[2]] to list1[[2]]. So that:
list_out <- list(c(`0` = 0L, `25` = 0L, `100` = 1L, `250` = 1L, `500` = 1L,
`1000` = 1L, Infinity = 3L, `0` = 0L, `25` = 0L, `100` = 0L, `250` = 2L, `500` = 1L,
`1000` = 1L, Infinity = 3L), c(`0` = 0L, `25` = 0L, `100` = 1L,
`250` = 1L, `500` = 1L, Infinity = 4L, `0` = 0L, `25` = 0L, `100` = 1L,
`250` = 1L, `500` = 1L, Infinity = 4L))
Could anyone help me explain how should I do this?

You can use lapply and c().
lapply(1:length(list1), function(x) c(list1[[x]], list2[[x]]))
Or mapply with append or c:
mapply(append, list1, list2)
Output
[[1]]
0 25 100 250 500 1000 Infinity 0
0 0 1 1 1 1 3 0
25 100 250 500 1000 Infinity
0 0 2 1 1 3
[[2]]
0 25 100 250 500 Infinity 0 25
0 0 1 1 1 4 0 0
100 250 500 Infinity
1 1 1 4
Check if it's identical to your list_out:
identical(lapply(1:length(list1), function(x) c(list1[[x]], list2[[x]])), list_out)
[1] TRUE
identical(mapply(append, list1, list2), list_out)
[1] TRUE

Here is another base R solution.
Map(c, list1, list2)
identical(Map(c, list1, list2), list_out)
#[1] TRUE

Another option is map2
library(purrr)
map2(list1, list2, c)

You can use the base function append:
> append(list1[[1]], list2[[1]])
0 25 100 250 500 1000 Infinity 0 25 100
0 0 1 1 1 1 3 0 0 0
250 500 1000 Infinity
2 1 1 3
> append(list1[[2]], list2[[2]])
0 25 100 250 500 Infinity 0 25 100 250
0 0 1 1 1 4 0 0 1 1
500 Infinity
1 4

Related

Filtering rows based on dynamic column count & column name in R

I am having a data frame named inputDf which have the binary values in all the columns other than Rating column.
inputDf <- structure(list(Q1 = c(0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L), Q2 = c(1L,
1L, 1L, 1L, 1L, 0L, 1L, 0L), Q3 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), Q4 = c(1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L), Q5 = c(1L, 1L, 1L,
1L, 1L, 0L, 0L, 1L), Q6 = c(1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L),
Q7 = c(1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L), Q8 = c(1L, 1L, 1L,
1L, 1L, 1L, 0L, 0L), Q9 = c(1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L
), Q10 = c(0L, 1L, 0L, 1L, 1L, 0L, 0L, 0L), Q11 = c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L), Q12 = c(1L, 1L, 1L, 1L, 1L, 1L,
0L, 1L), Rating = c(7L, 7L, 6L, 5L, 6L, 6L, 7L, 5L), RatingBinary = c(1L,
1L, 1L, 0L, 1L, 1L, 1L, 0L)), row.names = c(13L, 17L, 26L,
71L, 72L, 55L, 56L, 69L), class = "data.frame")
I am having another similar data frame named combinationDf
combinationDf <- structure(list(Q1 = c(0L, 0L), Q2 = c(0L, 0L), Q3 = 1:0, Q4 = c(1L,
1L), Q5 = c(0L, 0L), Q6 = c(0L, 0L), Q7 = c(0L, 0L), Q8 = c(0L,
0L), Q9 = c(0L, 0L), Q10 = c(0L, 0L), Q11 = c(1L, 1L), Q12 = 0:1), row.names = 1:2, class =
"data.frame")
The problem statement is for every combination of 1's in each row in combinationDf, I need to filter rows from inputDf
I implemented the logic by hard-coding the number of columns to be considered for filtering out the data.
finalDf <- data.frame()
for(i in 1:nrow(combinationDf)){
ind <- which(combinationDf[i,] == 1)
ind <- paste("Q",ind, sep = "")
sample <- inputDf %>%
dplyr::filter(eval(parse(text=ind[1])) == 1 & eval(parse(text=ind[2])) == 1 & eval(parse(text=ind[3])) == 1) %>%
as.data.frame()
finalDf <<- rbind(finalDf,sample)
}
However, I'm looking for the general code to filter out the data using N # of columns. i.e, the above code works for filtering using 3 columns. If I need to filter based on 4 columns, I need to add a condition. To overcome that, I used the code below,
sample <- inputDf %>%
dplyr::filter(as.logical(paste(paste0("eval(parse(text = ind[", 1:length(ind), "])) == 1"), collapse = " & "))) %>%
as.data.frame()
This snippet doesn't filter the rows as expected. Can anyone point me out the mistake I have done in the above code? Or can provide the best approach to achieve the same?
It may make sense to subset and then do a semi join for filtering
finalDf <- data.frame()
for(i in 1:nrow(combinationDf)){
sample <- inputDf %>%
semi_join(combinationDf %>% slice(i) %>% select(where(~.x==1)))
finalDf2 <- rbind(finalDf ,sample)
}
At each loop iteration we select all the columns that are 1 and then just join to extract the matching values from inputDf. This will work with any number of columns. Another way of expressing this without the loop in dplyr is
combinationDf %>%
group_by(id=1:n()) %>%
group_map(~.x %>%
select(where(~.x==1)) %>%
semi_join(inputDf, .)
) %>%
bind_rows()
This may be more readable.
Base R approach :
Use apply in rowwise fashion to go through each row in combinationDf.
Get the column names which has value as 1 in a row.
Subset those columns in inputDf and select rows where all the values are 1.
Combine the list of dataframes into one dataframe.
result <- do.call(rbind, apply(combinationDf, 1, function(x)
inputDf[rowSums(inputDf[names(x)[x == 1]] != 1) == 0, ]))
rownames(result) <- NULL
result
# Q1 Q2 Q3 Q4 Q5 Q6 Q7 Q8 Q9 Q10 Q11 Q12 Rating RatingBinary
#1 0 1 1 1 1 1 1 1 1 0 1 1 7 1
#2 1 1 1 1 1 1 1 1 1 1 1 1 7 1
#3 1 1 1 1 1 1 1 1 1 0 1 1 6 1
#4 1 1 1 1 1 1 1 1 1 1 1 1 5 0
#5 1 1 1 1 1 1 1 1 1 1 1 1 6 1
#6 0 1 1 1 1 1 1 1 1 0 1 1 7 1
#7 1 1 1 1 1 1 1 1 1 1 1 1 7 1
#8 1 1 1 1 1 1 1 1 1 0 1 1 6 1
#9 1 1 1 1 1 1 1 1 1 1 1 1 5 0
#10 1 1 1 1 1 1 1 1 1 1 1 1 6 1
combinationDf %>%
apply(1, function(x) paste0(names(inputDf)[x == 1], "==1", collapse = "&")) %>%
lapply(function(x) filter(inputDf, eval(parse(text = x)))) %>%
Reduce(rbind, .)

Split data from matrix with names

I have a 42*14 matrix (5 * 14 as the example below). Are there any approaches I can split the data into 42 individual data sets (vector) and at the same time name them from subject 1 to subject 42?
#expected result (I need subject1 to subject5)
subject1 <- structure(list(`1` = 0L, `2` = 0L, `3` = 1L, `4` = 1L, `5` = 0L,
`6` = 0L, `7` = 1L, `8` = 1L, `9` = 0L, `10` = 1L, `11` = 1L,
`12` = 0L, `13` = 1L, `14` = 0L), row.names = c(NA, -1L), class = c("data.table",
"data.frame"))
structure(list(`1` = c(0L, 1L, 0L, 1L, 0L), `2` = c(0L, 1L, 1L,
0L, 1L), `3` = c(1L, 0L, 0L, 0L, 1L), `4` = c(1L, 1L, 1L, 1L,
0L), `5` = c(0L, 0L, 0L, 0L, 0L), `6` = c(0L, 0L, 1L, 1L, 1L),
`7` = c(1L, 1L, 0L, 1L, 1L), `8` = c(1L, 0L, 1L, 0L, 1L),
`9` = c(0L, 0L, 1L, 0L, 0L), `10` = c(1L, 1L, 0L, 1L, 1L),
`11` = c(1L, 1L, 1L, 0L, 0L), `12` = c(0L, 0L, 1L, 1L, 0L
), `13` = c(1L, 0L, 0L, 0L, 1L), `14` = c(0L, NA, NA, 1L,
0L)), row.names = c(NA, -5L), class = c("data.table", "data.frame"
))
You can use list2env assuming your matrix is named x.
list2env(setNames(asplit(x, 1),
paste0("subject", seq_len(nrow(x)))), globalenv())
ls()
#[1] "subject1" "subject2" "subject3" "subject4" "subject5" "x"
subject1
# 1 2 3 4 5 6 7 8 9 10 11 12 13 14
# 0 0 1 1 0 0 1 1 0 1 1 0 1 0
In case you want to remove also NA:
list2env(setNames(lapply(asplit(x, 1), Filter, f = Negate(is.na)),
paste0("subject", seq_len(nrow(x)))), globalenv())
subject2
# 1 2 3 4 5 6 7 8 9 10 11 12 13
# 1 1 0 1 0 0 1 0 0 1 1 0 0
You want each row as one element in the list, right?
Might be prettier solutions out there, but this would do the trick:
l <- lapply(
1:nrow(your.data),
function(i){
j <- !is.na(your.data[i,])
return( your.data[i,which(j)] )
}
)
names(l) <- paste0( "subject", 1:length(l) )

How do I convert this adjacency matrix into a graph object?

I have a matrix that represents social interaction data on a CSV, which looks like below:
`0` `1` `2` `3` `4` `5` `6` `7` `8` `9`
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
0 0 29 1 0 1 9 3 0 1 4
1 1 0 0 1 3 1 0 1 1 1
2 1 1 0 13 4 0 1 1 15 0
3 3 0 1 0 1 1 7 1 1 1
4 1 0 1 98 0 1 1 1 1 2
5 2 5 1 1 3 0 2 0 1 5
6 1 1 0 0 12 1 0 2 1 1
7 1 1 0 1 0 1 9 0 1 2
8 1 1 17 13 145 1 39 1 0 1
9 88 23 1 5 1 2 1 7 1 0
I am new to social network analysis, so I am not sure of my terminology, but this seems like a weighted adjacency matrix to me, as we can say from this that student 1 has had 29 interactions with student 0 in the last year. I had this object stored as a data-frame in my RStudio, but when I ran the following code, I received the below error:
> fn <- graph_from_adjacency_matrix(output, weighted = T)
Error in mde(x) : 'list' object cannot be coerced to type 'double'
I've tried converting it to matrix, but that does not seem to work either. Any help concerning this would be really appreciated.
You need to convert your data.frame to matrix first and then apply graph_from_adjacency_matrix, e.g.,
g <- graph_from_adjacency_matrix(as.matrix(df),weighted = TRUE)
and plot(g) gives
Data
> dput(df)
structure(list(``0`` = c(0L, 1L, 1L, 3L, 1L, 2L, 1L, 1L, 1L,
88L), ``1`` = c(29L, 0L, 1L, 0L, 0L, 5L, 1L, 1L, 1L, 23L), ``2`` = c(1L,
0L, 0L, 1L, 1L, 1L, 0L, 0L, 17L, 1L), ``3`` = c(0L, 1L, 13L,
0L, 98L, 1L, 0L, 1L, 13L, 5L), ``4`` = c(1L, 3L, 4L, 1L, 0L,
3L, 12L, 0L, 145L, 1L), ``5`` = c(9L, 1L, 0L, 1L, 1L, 0L, 1L,
1L, 1L, 2L), ``6`` = c(3L, 0L, 1L, 7L, 1L, 2L, 0L, 9L, 39L, 1L
), ``7`` = c(0L, 1L, 1L, 1L, 1L, 0L, 2L, 0L, 1L, 7L), ``8`` = c(1L,
1L, 15L, 1L, 1L, 1L, 1L, 1L, 0L, 1L), ``9`` = c(4L, 1L, 0L, 1L,
2L, 5L, 1L, 2L, 1L, 0L)), class = "data.frame", row.names = c("0",
"1", "2", "3", "4", "5", "6", "7", "8", "9"))

Find the number of specific value where is greater than a specific frequency in r

I'm trying to get the frequency distribution for a list if it's over a certain number. In my data, I have multiple columns and I want to generate a code that identifies the frequency of "0" in each column where "0" is greater than 3.
My dataset is like this:
a b c d e f g h
0 1 0 1 1 1 1 1
2 0 0 0 0 0 0 0
0 1 2 2 2 1 0 1
0 0 0 0 1 0 0 0
1 0 2 1 1 0 0 0
1 1 0 0 1 0 0 0
0 1 2 2 2 2 2 2
```
The output of the code that I need is :
```
Variable Frequency
a 4
c 4
f 4
g 5
h 4
```
So this will show us the numbers of "0" in the data frame in each column when it is greater than 3.
Thank you.
You can use colSums to count number of 0's in each column and subset the values which are greater than 3.
subset(stack(colSums(df == 0, na.rm = TRUE)), values > 3)
tidyverse way would be :
library(dplyr)
df %>%
summarise(across(.fns = ~sum(. == 0, na.rm = TRUE))) %>%
tidyr::pivot_longer(cols = everything()) %>%
filter(value > 3)
# name value
# <chr> <int>
#1 a 4
#2 c 4
#3 f 4
#4 g 5
#5 h 4
data
df <- structure(list(a = c(0L, 2L, 0L, 0L, 1L, 1L, 0L), b = c(1L, 0L,
1L, 0L, 0L, 1L, 1L), c = c(0L, 0L, 2L, 0L, 2L, 0L, 2L), d = c(1L,
0L, 2L, 0L, 1L, 0L, 2L), e = c(1L, 0L, 2L, 1L, 1L, 1L, 2L), f = c(1L,
0L, 1L, 0L, 0L, 0L, 2L), g = c(1L, 0L, 0L, 0L, 0L, 0L, 2L), h = c(1L,
0L, 1L, 0L, 0L, 0L, 2L)), class = "data.frame", row.names = c(NA, -7L))

reshaping data (a faster way)

I came across a table of freq. counts today I had to expand into a data frame of raw values. I was able to do it but was wondering if there's a faster way using the reshape package or data.table?
The original table looked like this:
i1 i2 i3 i4 m f
1 0 0 0 0 22 29
2 1 0 0 0 30 50
3 0 1 0 0 13 15
4 0 0 1 0 1 6
5 1 1 0 0 24 67
6 1 0 1 0 5 12
7 0 1 1 0 1 2
8 1 1 1 0 10 22
9 0 0 0 1 10 7
10 1 0 0 1 27 30
11 0 1 0 1 14 4
12 0 0 1 1 1 0
13 1 1 0 1 54 63
14 1 0 1 1 8 10
15 0 1 1 1 8 6
16 1 1 1 1 57 51
Here's an easy grab of the data using dput:
dat <- structure(list(i1 = c(0L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 1L,
0L, 0L, 1L, 1L, 0L, 1L), i2 = c(0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L,
0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L), i3 = c(0L, 0L, 0L, 1L, 0L, 1L,
1L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 1L), i4 = c(0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), m = c(22L, 30L,
13L, 1L, 24L, 5L, 1L, 10L, 10L, 27L, 14L, 1L, 54L, 8L, 8L, 57L
), f = c(29L, 50L, 15L, 6L, 67L, 12L, 2L, 22L, 7L, 30L, 4L, 0L,
63L, 10L, 6L, 51L)), .Names = c("i1", "i2", "i3", "i4", "m",
"f"), class = "data.frame", row.names = c(NA, -16L))
My approach(s) to reshape the data (is there a faster way?):
#step 1: method 1 (in this case binding and stacking uses less code than reshape)
dat2 <- data.frame(rbind(dat[,1:4], dat[, 1:4]),
sex = rep(c('m', 'f'), each=16),
n = c(dat$m, dat$f))
dat2
#step 1: method 2
dat3 <- reshape(dat, direction = "long", idvar = 1:4,
varying = list(c("m", "f")),
v.names = c("n"),
timevar = "sex",
times = c("m", "f"))
rownames(dat3) <- 1:nrow(dat3)
dat3 <- data.frame(dat3)
dat3$sex <- as.factor(dat3$sex)
all.equal(dat3, dat2) #just to show both method 1 and 2 give the same data frame
#step 2
dat4 <- dat2[rep(seq_len(nrow(dat2)), dat2$n), 1:5]
rownames(dat4) <- 1:nrow(dat4)
dat4
I assume this is a common problem as when you want to take a table from an article and reproduce it, it requires some unpacking. I am finding myself doing this more and more and want to make sure I'm being efficient.
Here is a one-liner.
dat2 <- ddply(dat, 1:4, summarize, sex = c(rep('m', m), rep('f', f)))
And here's a base R one-liner.
dat2 <- cbind(dat[c(rep(1:nrow(dat), dat$m), rep(1:nrow(dat), dat$f)),1:4],
sex=c(rep("m",sum(dat$m)), rep("f", sum(dat$f))))
Or, a little more generally:
d1 <- dat[,1:4]
d2 <- as.matrix(dat[,5:6])
dat2 <- cbind(d1[rep(rep(1:nrow(dat), ncol(d2)), d2),],
sex=rep(colnames(d2), colSums(d2)))
Given that nobody has posted a data.table solution (as suggested in the original question)
library(data.table)
DT <- as.data.table(dat)
DT[,list(sex = rep(c('m','f'),c(m,f))), by= list(i1,i2,i3,i4)]
Or, even more succinctly
DT[,list(sex = rep(c('m','f'),c(m,f))), by= 'i1,i2,i3,i4']
I would use melt for the first step and ddply for the second.
library(reshape2)
library(plyr)
d <- ddply(
melt(dat, id.vars=c("i1","i2","i3","i4"), variable.name="sex"),
c("i1","i2","i3","i4","sex"),
summarize,
id=rep(1,value)
)
d$id <- cumsum(d$id)

Resources