tidyverse: binding list elements of same dimension - r

Using reduce(bind_cols), the list elements of same dimension may be combined. However, I would like to know how to combine only same dimension (may be specified dimesion in some way) elements from a list which may have elements of different dimension.
library(tidyverse)
df1 <- data.frame(A1 = 1:10, A2 = 10:1)
df2 <- data.frame(B = 11:30)
df3 <- data.frame(C = 31:40)
ls1 <- list(df1, df3)
ls1
[[1]]
A1 A2
1 1 10
2 2 9
3 3 8
4 4 7
5 5 6
6 6 5
7 7 4
8 8 3
9 9 2
10 10 1
[[2]]
C
1 31
2 32
3 33
4 34
5 35
6 36
7 37
8 38
9 39
10 40
ls1 %>%
reduce(bind_cols)
A1 A2 C
1 1 10 31
2 2 9 32
3 3 8 33
4 4 7 34
5 5 6 35
6 6 5 36
7 7 4 37
8 8 3 38
9 9 2 39
10 10 1 40
ls2 <- list(df1, df2, df3)
ls2
[[1]]
A1 A2
1 1 10
2 2 9
3 3 8
4 4 7
5 5 6
6 6 5
7 7 4
8 8 3
9 9 2
10 10 1
[[2]]
B
1 11
2 12
3 13
4 14
5 15
6 16
7 17
8 18
9 19
10 20
11 21
12 22
13 23
14 24
15 25
16 26
17 27
18 28
19 29
20 30
[[3]]
C
1 31
2 32
3 33
4 34
5 35
6 36
7 37
8 38
9 39
10 40
ls2 %>%
reduce(bind_cols)
Error: Can't recycle `..1` (size 10) to match `..2` (size 20).
Run `rlang::last_error()` to see where the error occurred.
Question
Looking for a function to combine all data.frames in a list with an argument of number of rows.

One option could be:
map(split(lst, map_int(lst, NROW)), bind_cols)
$`10`
A1 A2 C
1 1 10 31
2 2 9 32
3 3 8 33
4 4 7 34
5 5 6 35
6 6 5 36
7 7 4 37
8 8 3 38
9 9 2 39
10 10 1 40
$`20`
B
1 11
2 12
3 13
4 14
5 15
6 16
7 17
8 18
9 19
10 20
11 21
12 22
13 23
14 24
15 25
16 26
17 27
18 28
19 29
20 30

You can use -
n <- 1:max(sapply(ls2, nrow))
res <- do.call(cbind, lapply(ls2, `[`, n, ,drop = FALSE))
res
# A1 A2 B C
#1 1 10 11 31
#2 2 9 12 32
#3 3 8 13 33
#4 4 7 14 34
#5 5 6 15 35
#6 6 5 16 36
#7 7 4 17 37
#8 8 3 18 38
#9 9 2 19 39
#10 10 1 20 40
#NA NA NA 21 NA
#NA.1 NA NA 22 NA
#NA.2 NA NA 23 NA
#NA.3 NA NA 24 NA
#NA.4 NA NA 25 NA
#NA.5 NA NA 26 NA
#NA.6 NA NA 27 NA
#NA.7 NA NA 28 NA
#NA.8 NA NA 29 NA
#NA.9 NA NA 30 NA
A little-bit shorter with purrr::map_dfc
purrr::map_dfc(ls2, `[`, n, , drop = FALSE)

We can use cbind.fill from rowr
library(rowr)
do.call(cbind.fill, c(ls2, fill = NA))

A base R option using tapply + sapply
tapply(
ls2,
sapply(ls2, nrow),
function(x) do.call(cbind, x)
)
gives
$`10`
A1 A2 C
1 1 10 31
2 2 9 32
3 3 8 33
4 4 7 34
5 5 6 35
6 6 5 36
7 7 4 37
8 8 3 38
9 9 2 39
10 10 1 40
$`20`
B
1 11
2 12
3 13
4 14
5 15
6 16
7 17
8 18
9 19
10 20
11 21
12 22
13 23
14 24
15 25
16 26
17 27
18 28
19 29
20 30

You may also use if inside reduce if you want to combine similar elements of list (case: when first item in list has priority)
df1 <- data.frame(A1 = 1:10, A2 = 10:1)
df2 <- data.frame(B = 11:30)
df3 <- data.frame(C = 31:40)
ls1 <- list(df1, df3)
ls2 <- list(df1, df2, df3)
library(tidyverse)
reduce(ls2, ~if(nrow(.x) == nrow(.y)){bind_cols(.x, .y)} else {.x})
#> A1 A2 C
#> 1 1 10 31
#> 2 2 9 32
#> 3 3 8 33
#> 4 4 7 34
#> 5 5 6 35
#> 6 6 5 36
#> 7 7 4 37
#> 8 8 3 38
#> 9 9 2 39
#> 10 10 1 40
Created on 2021-06-09 by the reprex package (v2.0.0)

Here's another tidyverse option.
We're creating a dummy ID in each data.frame based on the row_number(), then joining all data.frames by the dummy ID, and then dropping the dummy ID.
ls2 %>%
map(., ~mutate(.x, id = row_number())) %>%
reduce(full_join, by = "id") %>%
select(-id)
This gives us:
A1 A2 B C
1 1 10 11 31
2 2 9 12 32
3 3 8 13 33
4 4 7 14 34
5 5 6 15 35
6 6 5 16 36
7 7 4 17 37
8 8 3 18 38
9 9 2 19 39
10 10 1 20 40
11 NA NA 21 NA
12 NA NA 22 NA
13 NA NA 23 NA
14 NA NA 24 NA
15 NA NA 25 NA
16 NA NA 26 NA
17 NA NA 27 NA
18 NA NA 28 NA
19 NA NA 29 NA
20 NA NA 30 NA

We can also use Reduce function from base R:
lst <- list(df1, df2, df3)
# First we create id number for each underlying data set
lst |>
lapply(\(x) {x$id <- 1:nrow(x);
x
}
) -> ls2
Reduce(function(x, y) if(nrow(x) == nrow(y)){
merge(x, y, by = "id")
} else {
x
}, ls2)
id A1 A2 C
1 1 1 10 31
2 2 2 9 32
3 3 3 8 33
4 4 4 7 34
5 5 5 6 35
6 6 6 5 36
7 7 7 4 37
8 8 8 3 38
9 9 9 2 39
10 10 10 1 40

Related

matrics merge by column names (partically common and different set to NA) in R

I would like to merge two datasets like this:
data1 <- data.frame (id=paste("id",1:10, sep=""), a=1:10, b= 11:20, d=21:30)
data2 <- data.frame(id=paste("id", 11:20, sep=""), a=1:10, b=11:20, e= 21:30)
The merged data will have all common columns and merged by column "id".
id a b d e
1 1 1 11 21 NA
2 2 2 12 22 NA
3 3 3 13 23 NA
4 4 4 14 24 NA
5 5 5 15 25 NA
6 6 6 16 26 NA
7 7 7 17 27 NA
8 8 8 18 28 NA
9 9 9 19 29 NA
10 10 10 20 30 NA
11 11 1 11 NA 21
12 12 2 12 NA 22
13 13 3 13 NA 23
14 14 4 14 NA 24
15 15 5 15 NA 25
16 16 6 16 NA 26
17 17 7 17 NA 27
18 18 8 18 NA 28
19 19 9 19 NA 29
20 20 10 20 NA 30
You can see the missing columns are added and values are set to NA. The manual work around would like this, but looking for elegant way to do in R which a large number of variables are mismatching :
data1$e=NA
data2$d = NA
rbind(data1, data2)
I'd just use dplyr::bind_rows() instead of rbind(). That'll give you the behavior you want with no additional lifting.
data1 <- data.frame (id=1:10, a=1:10, b= 11:20, d=21:30)
data2 <- data.frame(id=11:20, a=1:10, b=11:20, e= 21:30)
dplyr::bind_rows(data1, data2)
#> id a b d e
#> 1 1 1 11 21 NA
#> 2 2 2 12 22 NA
#> 3 3 3 13 23 NA
#> 4 4 4 14 24 NA
#> 5 5 5 15 25 NA
#> 6 6 6 16 26 NA
#> 7 7 7 17 27 NA
#> 8 8 8 18 28 NA
#> 9 9 9 19 29 NA
#> 10 10 10 20 30 NA
#> 11 11 1 11 NA 21
#> 12 12 2 12 NA 22
#> 13 13 3 13 NA 23
#> 14 14 4 14 NA 24
#> 15 15 5 15 NA 25
#> 16 16 6 16 NA 26
#> 17 17 7 17 NA 27
#> 18 18 8 18 NA 28
#> 19 19 9 19 NA 29
#> 20 20 10 20 NA 30
Created on 2021-03-25 by the reprex package (v1.0.0)

Repeat the first two rows for each id two times

I would like to repeat the first two rows for each id two times. I don't know how to do that. Does anyone have a suggestion?
id <- rep(1:4,each=6)
scored <- c(12,13,NA,NA,NA,NA,14,20,NA,NA,NA,NA,23,56,NA,NA,NA,NA, 45,78,NA,NA,NA,NA)
df <- data.frame(id,scored)
df
id scored
1 1 12
2 1 13
3 1 NA
4 1 NA
5 1 NA
6 1 NA
7 2 14
8 2 20
9 2 NA
10 2 NA
11 2 NA
12 2 NA
13 3 23
14 3 56
15 3 NA
16 3 NA
17 3 NA
18 3 NA
19 4 45
20 4 78
21 4 NA
22 4 NA
23 4 NA
24 4 NA
>
I want it to look like:
df
id score
1 1 12
2 1 13
3 1 12
4 1 13
5 1 12
6 1 13
7 2 14
8 2 20
9 2 14
10 2 20
11 2 14
12 2 20
13 3 23
14 3 56
15 3 23
16 3 56
17 3 23
18 3 56
19 4 45
20 4 78
21 4 45
22 4 78
23 4 45
24 4 78
>
..................................................
..................................................
..................................................
We can do a group by rep on the non-NA elements of 'scored'
library(dplyr)
df %>%
group_by(id) %>%
mutate(scored = rep(scored[!is.na(scored)], length.out = n()))
# A tibble: 24 x 2
# Groups: id [4]
# id scored
# <int> <dbl>
# 1 1 12
# 2 1 13
# 3 1 12
# 4 1 13
# 5 1 12
# 6 1 13
# 7 2 14
# 8 2 20
# 9 2 14
#10 2 20
# … with 14 more rows

R recode multiple variables following same rules

data=data.frame("x1"=c(1:10),
"x2"=c(1:4,4,6:10),
"x3"=c(1:3,2:5,5:7),
"x4"=c(21:30),
"x5"=c(35:44))
recode=c("x1","x2","x3")
data <- data[recode %in% c(4,5)] <- NA
I want to store a specific set of variables for example above I store x1,x2,x3 in 'recode'. Then I want to change all the values for all variables in recode such that any value of 4 or 5 is set to NA.
We need to use replace with lapply
data[recode] <- lapply(data[recode], function(x) replace(x, x %in% 4:5, NA))
data
# x1 x2 x3 x4 x5
#1 1 1 1 21 35
#2 2 2 2 22 36
#3 3 3 3 23 37
#4 NA NA 2 24 38
#5 NA NA 3 25 39
#6 6 6 NA 26 40
#7 7 7 NA 27 41
#8 8 8 NA 28 42
#9 9 9 6 29 43
#10 10 10 7 30 44
Or with dplyr
library(dplyr)
data %>%
mutate_at(vars(recode), ~ na_if(., 4)) %>%
mutate_at(vars(recode), ~ na_if(., 5))
# x1 x2 x3 x4 x5
#1 1 1 1 21 35
#2 2 2 2 22 36
#3 3 3 3 23 37
#4 NA NA 2 24 38
#5 NA NA 3 25 39
#6 6 6 NA 26 40
#7 7 7 NA 27 41
#8 8 8 NA 28 42
#9 9 9 6 29 43
#10 10 10 7 30 44
One dplyr possibility could be:
data %>%
mutate_at(vars(recode), ~ replace(., . %in% 4:5, NA))
x1 x2 x3 x4 x5
1 1 1 1 21 35
2 2 2 2 22 36
3 3 3 3 23 37
4 NA NA NA 24 38
5 NA NA NA 25 39
6 6 6 4 26 40
7 7 7 5 27 41
8 8 8 5 28 42
9 9 9 6 29 43
10 10 10 7 30 44
Use Map().
data[recode] <- Map(function(x) ifelse(x %in% c(4, 5), NA, x), data[recode])
data
# x1 x2 x3 x4 x5
# 1 1 1 1 21 35
# 2 2 2 2 22 36
# 3 3 3 3 23 37
# 4 NA NA 2 24 38
# 5 NA NA 3 25 39
# 6 6 6 NA 26 40
# 7 7 7 NA 27 41
# 8 8 8 NA 28 42
# 9 9 9 6 29 43
# 10 10 10 7 30 44

How to fill blank data using several data frames

I have two data frames as follows:
df1<- read.table( text="id time group class income
12 NA NA NA NA
17 NA NA NA NA
19 NA NA NA NA
36 NA NA NA NA
14 NA NA NA NA
15 NA NA NA NA
8 NA NA NA NA
22 NA NA NA NA
33 NA NA NA NA
11 NA NA NA NA
",h=T)
The second data frame is as follows:
df2<- read.table( text="id time group class income age
17 3 A 1 2 12
11 6 A 3 12 11
36 9 E 2 11 23
19 19 E 2 13 13
8 23 M 2 14 14
15 11 M 1 16 13
12 3 P 1 15 15
14 3 Q 2 13 13
33 4 Z 3 19 13
22 5 G 3 20 11",h=T)
I want to get the following table using ids
df3<- read.table( text="id time group class income
12 23 P 1 15
17 3 A 1 2
19 3 E 2 13
36 5 E 2 11
14 3 Q 2 13
15 3 M 1 16
8 23 M 2 14
22 5 G 3 20
33 4 Z 3 19
11 6 A 3 12",h=T)
I am sorry, but I was unsuccessful to find some possible solutions to show my attempt. I appreciate your help.
Here's a dplyr solution assuming you want the order of id from df1 with most of the data from df2:
library(dplyr)
df1 %>%
select(id) %>%
left_join(df2) %>%
select(-age)
Joining, by = "id"
id time group class income
1 12 3 P 1 15
2 17 3 A 1 2
3 19 19 E 2 13
4 36 9 E 2 11
5 14 3 Q 2 13
6 15 11 M 1 16
7 8 23 M 2 14
8 22 5 G 3 20
9 33 4 Z 3 19
10 11 6 A 3 12

Quartiles by group saved as new variable in data frame

I have data that look something like this:
id <- c(1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7,8,8,8,9,9,9)
yr <- c(1,2,3,1,2,3,1,2,3,1,2,3,1,2,3,1,2,3,1,2,3,1,2,3,1,2,3)
gr <- c(3,4,5,3,4,5,3,4,5,4,5,6,4,5,6,4,5,6,5,6,7,5,6,7,5,6,7)
x <- c(33,48,31,41,31,36,25,38,28,17,39,53,60,60,19,39,34,47,20,28,38,15,17,49,48,45,39)
df <- data.frame(id,yr,gr,x)
id yr gr x
1 1 1 3 33
2 1 2 4 48
3 1 3 5 31
4 2 1 3 41
5 2 2 4 31
6 2 3 5 36
7 3 1 3 25
8 3 2 4 38
9 3 3 5 28
10 4 1 4 17
11 4 2 5 39
12 4 3 6 53
13 5 1 4 60
14 5 2 5 60
15 5 3 6 19
16 6 1 4 39
17 6 2 5 34
18 6 3 6 47
19 7 1 5 20
20 7 2 6 28
21 7 3 7 38
22 8 1 5 15
23 8 2 6 17
24 8 3 7 49
25 9 1 5 48
26 9 2 6 45
27 9 3 7 39
I would like to create a new variable in the data frame that contains the quantiles of "x" computed within each unique combination of "yr" and "gr". That is, rather than finding the quantiles of "x" based on all 27 rows of data in the example, I would like to compute the quantiles by two grouping variables: yr and gr. For instance, the quantiles of "x" when yr = 1 and gr = 3, yr = 1 and gr = 4, etc.
Once these values are computed, I would like them to be appended to the data frame as a single column, say "x_quant".
I am able to split the data into the separate groups I need, and I am know how to calculate quantiles, but I am having trouble combining the two steps in a way that is amenable to creating a new column in the existing data frame.
Any help y'all can provide would be greatly appretiated! Thank you much!
~kj
# turn "yr" and "gr" into sortable column
df$y <- paste(df$yr,"",df$gr)
df.ordered <- df[order(df$y),] #sort df based on group
grp <- split(df.ordered,df.ordered$y);grp
# get quantiles and turn results into string
q <- vector('list')
for (i in 1:length(grp)) {
a <- quantile(grp[[i]]$x)
q[i] <- paste(a[1],"",a[2],"",a[3],"",a[4],"",a[5])
}
x_quant <- unlist(sapply(q, `[`, 1))
x_quant <- rep(x_quant,each=3)
# append quantile back to data frame. Gave new column a more descriptive name
df.ordered$xq_0_25_50_75_100 <- x_quant
df.ordered$y <- NULL
df <- df.ordered;df </pre>
Output:
> # turn "yr" and "gr" into sortable column
> df$y <- paste(df$yr,"",df$gr)
> df.ordered <- df[order(df$y),] #sort df based on group
> grp <- split(df.ordered,df.ordered$y);grp
$`1 3`
id yr gr x y
1 1 1 3 33 1 3
4 2 1 3 41 1 3
7 3 1 3 25 1 3
$`1 4`
id yr gr x y
10 4 1 4 17 1 4
13 5 1 4 60 1 4
16 6 1 4 39 1 4
$`1 5`
id yr gr x y
19 7 1 5 20 1 5
22 8 1 5 15 1 5
25 9 1 5 48 1 5
$`2 4`
id yr gr x y
2 1 2 4 48 2 4
5 2 2 4 31 2 4
8 3 2 4 38 2 4
$`2 5`
id yr gr x y
11 4 2 5 39 2 5
14 5 2 5 60 2 5
17 6 2 5 34 2 5
$`2 6`
id yr gr x y
20 7 2 6 28 2 6
23 8 2 6 17 2 6
26 9 2 6 45 2 6
$`3 5`
id yr gr x y
3 1 3 5 31 3 5
6 2 3 5 36 3 5
9 3 3 5 28 3 5
$`3 6`
id yr gr x y
12 4 3 6 53 3 6
15 5 3 6 19 3 6
18 6 3 6 47 3 6
$`3 7`
id yr gr x y
21 7 3 7 38 3 7
24 8 3 7 49 3 7
27 9 3 7 39 3 7
> # get quantiles and turn results into string
> q <- vector('list')
> for (i in 1:length(grp)) {
+ a <- quantile(grp[[i]]$x)
+ q[i] <- paste(a[1],"",a[2],"",a[3],"",a[4],"",a[5])
+ }
> x_quant <- unlist(sapply(q, `[`, 1))
> x_quant <- rep(x_quant,each=3)
> # append quantile back to data frame
> df.ordered$xq_0_25_50_75_100 <- x_quant
> df.ordered$y <- NULL
> df <- df.ordered
> df
id yr gr x xq_0_25_50_75_100
1 1 1 3 33 25 29 33 37 41
4 2 1 3 41 25 29 33 37 41
7 3 1 3 25 25 29 33 37 41
10 4 1 4 17 17 28 39 49.5 60
13 5 1 4 60 17 28 39 49.5 60
16 6 1 4 39 17 28 39 49.5 60
19 7 1 5 20 15 17.5 20 34 48
22 8 1 5 15 15 17.5 20 34 48
25 9 1 5 48 15 17.5 20 34 48
2 1 2 4 48 31 34.5 38 43 48
5 2 2 4 31 31 34.5 38 43 48
8 3 2 4 38 31 34.5 38 43 48
11 4 2 5 39 34 36.5 39 49.5 60
14 5 2 5 60 34 36.5 39 49.5 60
17 6 2 5 34 34 36.5 39 49.5 60
20 7 2 6 28 17 22.5 28 36.5 45
23 8 2 6 17 17 22.5 28 36.5 45
26 9 2 6 45 17 22.5 28 36.5 45
3 1 3 5 31 28 29.5 31 33.5 36
6 2 3 5 36 28 29.5 31 33.5 36
9 3 3 5 28 28 29.5 31 33.5 36
12 4 3 6 53 19 33 47 50 53
15 5 3 6 19 19 33 47 50 53
18 6 3 6 47 19 33 47 50 53
21 7 3 7 38 38 38.5 39 44 49
24 8 3 7 49 38 38.5 39 44 49
27 9 3 7 39 38 38.5 39 44 49
>

Resources