Transpose a list of dataframes like dataframes with headers - r

I have a list of dataframes and my goal it is transpose them to bind into one. How could i do this? Below it is my list
$pri
$pri$x
a b
1 1 3
2 2 4
$pri$y
a b c
1 1 3 5
2 2 4 6
$sec
$sec$w
a b
1 7 9
2 8 10
$sec$z
a b c d
1 11 13 15 17
2 12 14 16 18
I aim the output like this
"col1" "col2"
a ; 1 ; 2
b ; 3 ; 4
a ; 1 ; 2
b ; 3 ; 4
c ; 5 ; 6
a ; 7 ; 8
b ; 9 ; 10
a ; 11 ; 12
b ; 13 ; 14
c ; 15 ; 16
d ; 17 ; 18

library(purrr)
pri <-
list(
x = data.frame(a = 1:2, b = 3:4),
y = data.frame(a = 1:2, b = 3:4, c = 5:6)
)
sec <-
list(
w = data.frame(a = 7:8, b = 9:10),
z = data.frame(a = 11:12, b = 13:14, c = 15:16, d = 17:18)
)
list(pri = pri, sec = sec) %>% flatten() %>% map(t) %>% reduce(rbind)
#> [,1] [,2]
#> a 1 2
#> b 3 4
#> a 1 2
#> b 3 4
#> c 5 6
#> a 7 8
#> b 9 10
#> a 11 12
#> b 13 14
#> c 15 16
#> d 17 18
Created on 2020-03-12 by the reprex package (v0.3.0)

Assuming your data given like this (According to your question):
frame_list <- list(pri = list(x = structure(list(a = 1:2, b = 3:4), class = "data.frame", row.names = c(NA,
-2L)), y = structure(list(a = 1:2, b = 3:4, c = 5:6), class = "data.frame", row.names = c(NA,
-2L))), sec = list(w = structure(list(a = 7:8, b = 9:10), class = "data.frame", row.names = c(NA,
-2L)), z = structure(list(a = 11:12, b = 13:14, c = 15:16, d = 17:18), class = "data.frame", row.names = c(NA,
-2L))))
then you can do:
df <- t(do.call('cbind', unlist(frame_list, recursive = FALSE)))
rownames(df) <- gsub('\\w+\\.\\w\\.', '', rownames(df))
Note: The output will be a matrix, In case you need to convert this to a dataframe, you can use data.frame but this will change the rownames by appending some number to it to make it unique.
Output:
[,1] [,2]
a 1 2
b 3 4
a 1 2
b 3 4
c 5 6
a 7 8
b 9 10
a 11 12
b 13 14
c 15 16
d 17 18
In case you want it into a dataframe, then you can do:
df <- data.frame(t(do.call('cbind', unlist(frame_list, recursive = FALSE))), stringsAsFactors = FALSE)
df$newcol <- gsub('\\w+\\.\\w\\.', '', rownames(df))
rownames(df) <- NULL
Output:
X1 X2 newcol
1 1 2 a
2 3 4 b
3 1 2 a
4 3 4 b
5 5 6 c
6 7 8 a
7 9 10 b
8 11 12 a
9 13 14 b
10 15 16 c
11 17 18 d

you could also solve your problem using base R functions as follows:
dfs <- list(pri = list(x = structure(list(a = 1:2, b = 3:4), class = "data.frame", row.names = c(NA,
-2L)), y = structure(list(a = 1:2, b = 3:4, c = 5:6), class = "data.frame", row.names = c(NA,
-2L))), sec = list(w = structure(list(a = 7:8, b = 9:10), class = "data.frame", row.names = c(NA,
-2L)), z = structure(list(a = 11:12, b = 13:14, c = 15:16, d = 17:18), class = "data.frame", row.names = c(NA,
-2L))))
t(Reduce(cbind, unlist(dfs, FALSE)))
# [,1] [,2]
# a 1 2
# b 3 4
# a 1 2
# b 3 4
# c 5 6
# a 7 8
# b 9 10
# a 11 12
# b 13 14
# c 15 16
# d 17 18

Related

Reshaping wide to long with every n columns

Suppose I have a dataframe:
dw <- read.table(header=T, text='
ID q1 q2 q3 q4 q5 ...q10
A 10 6 50 10 bA
B 12 5 70 11 bB
C 20 7 20 8 bC
D 22 8 22 9 bD
')
I would like to move every 2 columns after 'ID' to new rows so it looks like:
ID q1 q2
A 10 6
B 12 5
C 20 7
D 22 8
A 50 10
B 70 11
C 20 8
D 22 9
....
pivot_longer seems to move every single column instead of multiple columns?
It seems that you are not concerned with the column names (other than ID), and that they are all the same class. For this, we can "pivot" manually, without the safeguards or power of pivot_lower perhaps, but without the requirements as well.
The first step is to make sure that class won't be an issue; because you have some strings in there, we need to convert all to character:
dw[-1] <- lapply(dw[-1], as.character)
After that, we can manually extract every two (non-ID) columns and combine with ID:
cols <- seq_along(dw)[-1]
list_of_frames <- lapply(split(cols, cols %/% 2), function(ind) setNames(dw[,c(1, ind)], c("ID", "q1", "q2")))
list_of_frames
# $`1`
# ID q1 q2
# 1 A 10 6
# 2 B 12 5
# 3 C 20 7
# 4 D 22 8
# $`2`
# ID q1 q2
# 1 A 50 10
# 2 B 70 11
# 3 C 20 8
# 4 D 22 9
# $`3`
# ID q1 q2
# 1 A bA zA
# 2 B bB zB
# 3 C bC zC
# 4 D bD zD
This can be easily combined with several methods, choose one of:
data.table::rbindlist(list_of_frames)
dplyr::bind_rows(list_of_frames)
do.call(rbind, list_of_frames)
# ID q1 q2
# 1 A 10 6
# 2 B 12 5
# 3 C 20 7
# 4 D 22 8
# 5 A 50 10
# 6 B 70 11
# 7 C 20 8
# 8 D 22 9
# 9 A bA zA
# 10 B bB zB
# 11 C bC zC
# 12 D bD zD
Data
dw <- structure(list(ID = c("A", "B", "C", "D"), q1 = c("10", "12", "20", "22"), q2 = c("6", "5", "7", "8"), q3 = c("50", "70", "20", "22"), q4 = c("10", "11", "8", "9"), q5 = c("bA", "bB", "bC", "bD"), q6 = c("zA", "zB", "zC", "zD")), row.names = c(NA, -4L), class = "data.frame")
Another option:
data.frame(ID = dw$ID,
q1 = unlist(dw[,seq(2, ncol(dw), 2)], use.names = FALSE),
q2 = unlist(dw[,seq(3, ncol(dw), 2)], use.names = FALSE))
With data:
dw <- structure(list(ID = c("A", "B", "C", "D"),
q1 = c(10L, 12L, 20L, 22L),
q2 = c(6L, 5L, 7L, 8L),
q3 = c(50L, 70L, 20L, 22L),
q4 = c(10L, 11L, 8L, 9L),
q5 = c("bA", "bB", "bC", "bD"),
q6 = c("cc", "dd", "ee", "ff"))
, class = "data.frame", row.names = c(NA, -4L))
data.frame(ID = dw$ID,
q1 = unlist(dw[,seq(2, ncol(dw), 2)], use.names = FALSE),
q2 = unlist(dw[,seq(3, ncol(dw), 2)], use.names = FALSE))
#> ID q1 q2
#> 1 A 10 6
#> 2 B 12 5
#> 3 C 20 7
#> 4 D 22 8
#> 5 A 50 10
#> 6 B 70 11
#> 7 C 20 8
#> 8 D 22 9
#> 9 A bA cc
#> 10 B bB dd
#> 11 C bC ee
#> 12 D bD ff
Or more generally:
n <- 3L # operate on every 3 columns
data.frame(
setNames(
c(
list(dw[,1]),
lapply(
2:(n + 1L),
function(i) unlist(dw[,seq(i, ncol(dw), n)], TRUE, FALSE)
)
),
names(dw)[1:(n + 1L)]
)
)
#> ID q1 q2 q3
#> 1 A 10 6 50
#> 2 B 12 5 70
#> 3 C 20 7 20
#> 4 D 22 8 22
#> 5 A 10 bA cc
#> 6 B 11 bB dd
#> 7 C 8 bC ee
#> 8 D 9 bD ff
The melt(...) method for data.table allows for melting groups of columns. Using dw from #r2evans answer:
library(data.table)
setDT(dw)
result <- melt(dw, measure.vars = list(seq(2, ncol(dw), 2), seq(3, ncol(dw), 2)))
result[, variable:=NULL]
result
## ID value1 value2
## 1: A 10 6
## 2: B 12 5
## 3: C 20 7
## 4: D 22 8
## 5: A 50 10
## 6: B 70 11
## 7: C 20 8
## 8: D 22 9
## 9: A bA zA
## 10: B bB zB
## 11: C bC zC
## 12: D bD zD
melt(...) introduces a column variable which keeps track of the location of the original columns in the wide dataset. You don't seem to care about that so it's removed. If there are indeed different classes (integer, character) melt(...) will take care of that with a warning.

How to stack raw and creating a new variable in R

Here is a small sample of my data
AB AN AQ AP AA
1 O1 N 12 13
2 K1 B 22 16
I want to generate this table
AB AN AQ New AP
1 O1 N 1 12
1 O1 N 2 13
2 K1 B 1 22
2 K1 B 2 16
The logic is to stack the same data in AB, AN and AQ next generate a new colour which gets 1 and then AP.
Under this raw, the same data but the New column get 2 and then AA. So the new column is 1 and 2
An option with reshape from base R
names(df)[4:5] <- paste0("AP", 1:2)
reshape(df, direction = "long", varying = 4:5, sep= "", timevar = "New")
# AB AN AQ New AP id
#1.1 1 O1 N 1 12 1
#2.1 2 K1 B 1 22 2
#1.2 1 O1 N 2 13 1
#2.2 2 K1 B 2 16 2
data
df <- structure(list(AB = 1:2, AN = c("O1", "K1"), AQ = c("N", "B"),
AP = c(12L, 22L), AA = c(13L, 16L)),
class = "data.frame", row.names = c(NA, -2L))
You can get the data in long format and then generate a new column based on unique column values.
library(dplyr)
library(tidyr)
df %>%
pivot_longer(cols = c(AP, AA),
values_to = 'AP',
names_to = 'New') %>%
mutate(New = match(New, unique(New)))
# AB AN AQ New AP
# <int> <chr> <chr> <int> <int>
#1 1 O1 N 1 12
#2 1 O1 N 2 13
#3 2 K1 B 1 22
#4 2 K1 B 2 16
data
df <- structure(list(AB = 1:2, AN = c("O1", "K1"), AQ = c("N", "B"),
AP = c(12L, 22L), AA = c(13L, 16L)),
class = "data.frame", row.names = c(NA, -2L))

How do you find if a number is between a range of multiple mins and max numbers

In R I have:
DataSet1
A
1
4
13
19
22
DataSet2
(min)B (max)C
4 6
8 9
12 15
16 18
I am looking to set up a binary column D based on whether A is between B and C.
So D would added to dataset 1 and calculated as follows:
A D
1 0
4 1
13 1
19 0
22 0
I have tried using the InRange function but it just calculating for between one row of B and C rather than all intervals.
Any help would be much appreciated.
enter image description here
Here is one option using fuzzy_left_join
library(fuzzyjoin)
library(dplyr)
df1 %>% fuzzy_left_join(df2, by = c("A" = "B", "A" = "C"),
match_fun = list(`>=`, `<`)) %>%
mutate(D = ifelse(is.na(B) & is.na(C), 0, 1))
A B C D
1 1 NA NA 0
2 4 4 6 1
3 13 12 15 1
4 19 NA NA 0
5 22 NA NA 0
Data
df1 <- structure(list(A = c(1L, 4L, 13L, 19L, 22L)), class = "data.frame", row.names = c(NA, -5L))
df2 <- structure(list(B = c(4L, 8L, 12L, 16L), C = c(6L, 9L, 15L, 18L)), class = "data.frame", row.names = c(NA, -4L))
Here's a way using sapply from base R -
df1$D <- sapply(df1$A, function(x) {
+any(x >= df2$B & x <= df2$C)
})
df1
A D
1 1 0
2 4 1
3 13 1
4 19 0
5 22 0

How to append group row into dataframe

I have this df1:
A B C
1 2 3
5 7 9
where A B C are columns names.
I have another df2 with one column:
A
1
2
3
4
I would like to append df2 for each column of df1, creating this final dataframe:
A B C
1 2 3
5 7 9
1 1 1
2 2 2
3 3 3
4 4 4
is it possible to do it?
data.frame(sapply(df1, c, unlist(df2)), row.names = NULL)
# A B C
#1 1 2 3
#2 5 7 9
#3 1 1 1
#4 2 2 2
#5 3 3 3
#6 4 4 4
DATA
df1 = structure(list(A = c(1L, 5L), B = c(2L, 7L), C = c(3L, 9L)), .Names = c("A",
"B", "C"), class = "data.frame", row.names = c(NA, -2L))
df2 = structure(list(A = 1:4), .Names = "A", class = "data.frame", row.names = c(NA,
-4L))
We can replicate df2 for the number of columns of df1, unname it, then rbind it.
rbind(df1, unname(rep(df2, ncol(df1))))
# A B C
# 1 1 2 3
# 2 5 7 9
# 3 1 1 1
# 4 2 2 2
# 5 3 3 3
# 6 4 4 4
Data:
df1 <- structure(list(A = c(1L, 5L), B = c(2L, 7L), C = c(3L, 9L)), .Names = c("A",
"B", "C"), class = "data.frame", row.names = c(NA, -2L))
df2 <- structure(list(A = 1:4), .Names = "A", row.names = c(NA, -4L), class = "data.frame")
We can use base R methods
rbind(df1, setNames(as.data.frame(do.call(cbind, rep(list(df2$A), 3))), names(df1)))
# A B C
#1 1 2 3
#2 5 7 9
#3 1 1 1
#4 2 2 2
#5 3 3 3
#6 4 4 4
data
df1 <- structure(list(A = c(1L, 5L), B = c(2L, 7L), C = c(3L, 9L)), .Names = c("A",
"B", "C"), class = "data.frame", row.names = c(NA, -2L))
df2 <- structure(list(A = 1:4), .Names = "A", class = "data.frame",
row.names = c(NA, -4L))
Here is a base R method with rbind, rep, and setNames:
rbind(dat, setNames(data.frame(rep(dat1, ncol(dat))), names(dat)))
A B C
1 1 2 3
2 5 7 9
3 1 1 1
4 2 2 2
5 3 3 3
6 4 4 4
Edit: turns outdata.frame isn't necessary:
rbind(dat, setNames(rep(dat1, ncol(dat)), names(dat)))
will work.
data
dat <-
structure(list(A = c(1L, 5L), B = c(2L, 7L), C = c(3L, 9L)), .Names = c("A",
"B", "C"), class = "data.frame", row.names = c(NA, -2L))
dat1 <-
structure(list(A = 1:4), .Names = "A", row.names = c(NA, -4L),
class = "data.frame")
I just love R, here is yet another Base R solution but with mapply:
data.frame(mapply(c, df1, df2))
Result:
A B C
1 1 2 3
2 5 7 9
3 1 1 1
4 2 2 2
5 3 3 3
6 4 4 4
Note:
No need to deal with colnames like almost all the other solutions... The key to why this works is that "mapply calls FUN for the values of ... [each element]
(re-cycled to the length of the longest...[element]" (See ?mapply). In other words, df2$A is recycled to however many columns df1 has.
Data:
df1 = structure(list(A = c(1L, 5L), B = c(2L, 7L), C = c(3L, 9L)), .Names = c("A",
"B", "C"), class = "data.frame", row.names = c(NA, -2L))
df2 = structure(list(A = 1:4), .Names = "A", row.names = c(NA, -4L), class = "data.frame")
Data:
df1 <- data.frame(A=c(1,5),
B=c(2,7),
C=c(3,9))
df2 <- data.frame(A=c(1,2,3,4))
Solution:
df2 <- matrix(rep(df2$A, ncol(df1)), ncol=ncol(df1))
colnames(df2) <- colnames(df1)
rbind(df1,df2)
Result:
A B C
1 1 2 3
2 5 7 9
3 1 1 1
4 2 2 2
5 3 3 3
6 4 4 4
A solution from purrr, which uses map_dfc to loop through all columns in df1 to combine all the elements with df2$A.
library(purrr)
map_dfc(df1, ~c(., df2$A))
# A tibble: 6 x 3
A B C
<int> <int> <int>
1 1 2 3
2 5 7 9
3 1 1 1
4 2 2 2
5 3 3 3
6 4 4 4
Data
df1 <- structure(list(A = c(1L, 5L), B = c(2L, 7L), C = c(3L, 9L)), .Names = c("A",
"B", "C"), class = "data.frame", row.names = c(NA, -2L))
df2 <- structure(list(A = 1:4), .Names = "A", class = "data.frame",
row.names = c(NA, -4L))
By analogy with #useR's excellent Base R answer, here's a tidyverse solution:
library(purrr)
map2_df(df1, df2, c)
A B C
1 1 2 3
2 5 7 9
3 1 1 1
4 2 2 2
5 3 3 3
6 4 4 4
Here are a few other (less desirable) options from when I first answered this question.
library(dplyr)
bind_rows(df1, df2 %>% mutate(B=A, C=A))
Or, if we want to dynamically get the number of columns and their names from df1:
bind_rows(df1,
df2[,rep(1,ncol(df1))] %>% setNames(names(df1)))
And one more Base R method:
rbind(df1, setNames(df2[,rep(1,ncol(df1))], names(df1)))
For the sake of completeness, here is data.table approach which doesn't require to handle column names:
library(data.table)
setDT(df1)[, lapply(.SD, c, df2$A)]
A B C
1: 1 2 3
2: 5 7 9
3: 1 1 1
4: 2 2 2
5: 3 3 3
6: 4 4 4
Note that the OP has described df2 to consist only of one column.
There is also a base R version of this approach:
data.frame(lapply(df1, c, df2$A))
A B C
1 1 2 3
2 5 7 9
3 1 1 1
4 2 2 2
5 3 3 3
6 4 4 4
This is similar to d.b's approach but doesn't required to deal with column names.

Appending list items

I have a list of some length(let's say 1000). Each element of the list is another list of length = 2. Each element of the new list is a data.table. The second element of each list might be an empty data.table.
I need to rbind() all the data.frames that are in the first position of the list. I am currently doing the following:
DT1 = data.table()
DT2 = data.table()
for (i in 1:length(myList)){
DT1 = rbind(DT1, myList[[i]][[1]]
DT2 = rbind(DT2, myList[[i]][[2]]
}
This works, but it is too slow. Is there a way I can avoid the for-loop?
Thank you in advance!
data table has a dedicated fast function: rbindlist
Cf: http://www.inside-r.org/packages/cran/data.table/docs/rbindlist
Edited:
Here is an example of code
library(data.table)
srcList=list(list(DT1=data.table(X=0),DT2=NULL),list(DT1=data.table(X=2),data.table(Y=3)))
# first have a list for all DT1s
DT1.list= lapply(srcList, FUN=function(el){el$DT1})
rbindlist(DT1.list)
X
1: 0
2: 2
Do this:
do.call("rbind", lapply(df.list, "[[", 1)) # for first list element
# x y
# 1 1 10
# 2 2 20
# 3 3 30
# 4 4 40
# 5 5 50
# 6 6 60
do.call("rbind", lapply(df.list, "[[", 2)) # for second list element
# x y
# 1 1 30
# 2 2 40
# 3 3 50
# 4 4 70
# 5 5 80
# 6 6 90
DATA
df.list=list(list(structure(list(x = 1:3, y = c(10, 20, 30)), .Names = c("x",
"y"), row.names = c(NA, -3L), class = "data.frame"), structure(list(
x = 1:3, y = c(30, 40, 50)), .Names = c("x", "y"), row.names = c(NA,
-3L), class = "data.frame")), list(structure(list(x = 4:6, y = c(40,
50, 60)), .Names = c("x", "y"), row.names = c(NA, -3L), class = "data.frame"),
structure(list(x = 4:6, y = c(70, 80, 90)), .Names = c("x",
"y"), row.names = c(NA, -3L), class = "data.frame")))
# df.list
# [[1]]
# [[1]][[1]]
# x y
# 1 1 10
# 2 2 20
# 3 3 30
# [[1]][[2]]
# x y
# 1 1 30
# 2 2 40
# 3 3 50
# [[2]]
# [[2]][[1]]
# x y
# 1 4 40
# 2 5 50
# 3 6 60
# [[2]][[2]]
# x y
# 1 4 70
# 2 5 80
# 3 6 90

Resources