I am having different files that have a variable that is named differently but has the same string character “type_category” e.g., type_category_lifestyle_characterstics, type_category_uniqueness etc. The idea is to go through these files and rename such variables to type_category. Below are examples of data frames
df1 <- data.frame(id = c(1,2,3), type_category_lifestyle_characterstics = c(5,6,7), rating = c(1,3,4))
df2 <- data.frame(id = c(9,5,3), type_category_uniqueness = c(4,6,1), rating = c(2,7,4))
Thanks in advance
We can get the datasets in a list
library(dplyr)
library(purrr)
out <- map(mget(ls(pattern = '^df\\d+$')), ~ .x %>%
rename_with(~ "type_category",
starts_with("type_category")))
-output
out
$df1
id type_category rating
1 1 5 1
2 2 6 3
3 3 7 4
$df2
id type_category rating
1 9 4 2
2 5 6 7
3 3 1 4
We could use setNames with lapply:
my_list <- list(df1, df2)
colnames <- c("id","type_category","rating")
lapply(my_list, setNames, colnames)
output:
[[1]]
id type_category rating
1 1 5 1
2 2 6 3
3 3 7 4
[[2]]
id type_category rating
1 9 4 2
2 5 6 7
3 3 1 4
Base R
Once you got them in a list, you can use lapply to change the variable names in all of them
df1 <- data.frame(id = c(1,2,3), type_category_lifestyle_characterstics = c(5,6,7), rating = c(1,3,4))
df2 <- data.frame(id = c(9,5,3), type_category_uniqueness = c(4,6,1), rating = c(2,7,4))
lapply(list(df1, df2),
function(df){
nms <- names(df)
nms[grepl(pattern = "type_category",
x = nms,
ignore.case = TRUE)] <- "type_category"
names(df) <- nms
return(df)
})
#> [[1]]
#> id type_category rating
#> 1 1 5 1
#> 2 2 6 3
#> 3 3 7 4
#>
#> [[2]]
#> id type_category rating
#> 1 9 4 2
#> 2 5 6 7
#> 3 3 1 4
Just note that you would need to assign the result back to a list.
data.table
Since you tagged data.table, this allows you to change the names in place and no extra assignment is necessary
library(data.table)
dt1 <- data.table::data.table(id = c(1,2,3), type_category_lifestyle_characterstics = c(5,6,7), rating = c(1,3,4))
dt2 <- data.table::data.table(id = c(9,5,3), type_category_uniqueness = c(4,6,1), rating = c(2,7,4))
invisible(
lapply(list(dt1, dt2),
function(dt){
nms_old <- names(data.table::copy(dt))
nms_new <- data.table::copy(nms_old)
nms_new[grepl(pattern = "type_category",
x = nms_old,
ignore.case = TRUE)] <- "type_category"
data.table::setnames(dt, old = nms_old, new = nms_new)
return(NULL)
})
)
dt1
#> id type_category rating
#> 1: 1 5 1
#> 2: 2 6 3
#> 3: 3 7 4
dt2
#> id type_category rating
#> 1: 9 4 2
#> 2: 5 6 7
#> 3: 3 1 4
Related
I have a list of 15 tibbles. The tibbles are in either one of 3 formats with different numbers of columns. What I am trying to do is to create a loop over all of the tibbles that will change each tibble depending on its structure. Essentially, I have the names of each tibble in each group stored in a separate vector. I'm trying to use case_when to call when the name of the tibble is in the group vector.
I'm pretty new to R and coding in general so any help is appreciated!
library(tidyverse)
#Example of what my data looks like
df.1 <- tibble(x = 1:5, y = 1, z = 4:8)
df.2 <- tibble(x= 1:8, y = 2, z = 5, r = 7, d = 9)
df.3 <- tibble(x = 1:4, y = 3, z = 8, r = 2, d = 8)
df.4 <- tibble(x=1:3, y = 1, z = 0, r = 4)
df.list <- list(df.1, df.2, df.3, df.4)
names(df.list) <- c("a", "b", "c", "d")
#I have groups like these which I'm using to index by each Tibble's name
group1 <- c("a")
group2 <- c("b", "c")
group3 <- c("d")
#Here's the for loop I try to run to rearrange each tibble based on it's grouping
for(i in seq_along(df.list)){
df.list[[i]] <- case_when(
names(df.list[i])%in%group1 ~ df.list[[i]] %>%
dplyr::select(c(3, 2)) %>%
rename("yy" = "y", "zz" = "z"),
names(df.list[1])%in%group2 ~ df.list[[i]] %>%
dplyr::select(c(3,4)) %>%
rename("zz" = "z", "rr" = "r"),
names(df.list[i])%in%group3 ~ df.list[[i]] %>%
dplyr::select(c(1, 4, 3)) %>%
rename("zz"= "z")
)
}
From this, I expect each of my tibbles to have only the columns I selected for that group and have those columns renamed. I get an error like:
Error in `dplyr::select()`:
! Can't subset columns past the end.
ℹ Location 4 doesn't exist.
ℹ There are only 3 columns.
case_when/ifelse/if_else requires all the arguments to be of same length and type. Here, it is not the case. We may need if/else if/else loop here
library(dplyr)
for(i in seq_along(df.list)) {
tmp <- df.list[[i]]
if(names(df.list)[i] %in% group1)
{
tmp <- tmp %>%
dplyr::select(c(3, 2)) %>%
rename("yy" = "y", "zz" = "z")
} else if(names(df.list)[i] %in% group2)
{
tmp <- tmp %>%
dplyr::select(c(3,4)) %>%
rename("zz" = "z", "rr" = "r")
} else if(names(df.list)[i] %in% group3)
{
tmp <- tmp %>%
dplyr::select(c(1, 4, 3)) %>%
rename("zz"= "z")
}
df.list[[i]] <- tmp
}
-output
> df.list
$a
# A tibble: 5 × 2
zz yy
<int> <dbl>
1 4 1
2 5 1
3 6 1
4 7 1
5 8 1
$b
# A tibble: 8 × 2
zz rr
<dbl> <dbl>
1 5 7
2 5 7
3 5 7
4 5 7
5 5 7
6 5 7
7 5 7
8 5 7
$c
# A tibble: 4 × 2
zz rr
<dbl> <dbl>
1 8 2
2 8 2
3 8 2
4 8 2
$d
# A tibble: 3 × 3
x r zz
<int> <dbl> <dbl>
1 1 4 0
2 2 4 0
3 3 4 0
It is also possible to do this with key/mapping dataset
library(tibble)
library(purrr)
key_dat <- tibble(col1 = c("a", "b", "c", "d"),
col2 = list(c(3, 2), c(3, 4), c(3, 4), c(1, 4, 3)),
col3 = list(c(yy = "y", zz = "z"), c(zz = "z", rr = "r"),
c(zz = "z", rr = "r"), c(zz = "z")))
imap(df.list, ~
{
ind <- match(.y, key_dat$col1)
.x %>%
dplyr::select(all_of(key_dat$col2[[ind]])) %>%
rename(!!! key_dat$col3[[ind]])
}
)
-output
$a
# A tibble: 5 × 2
zz yy
<int> <dbl>
1 4 1
2 5 1
3 6 1
4 7 1
5 8 1
$b
# A tibble: 8 × 2
zz rr
<dbl> <dbl>
1 5 7
2 5 7
3 5 7
4 5 7
5 5 7
6 5 7
7 5 7
8 5 7
$c
# A tibble: 4 × 2
zz rr
<dbl> <dbl>
1 8 2
2 8 2
3 8 2
4 8 2
$d
# A tibble: 3 × 3
x r zz
<int> <dbl> <dbl>
1 1 4 0
2 2 4 0
3 3 4 0
As the question is about case_when, we may need to make sure that each of the expressions in case_when can work for each of the list elements. To do that, wrap with any_of in select as well as use rename_with and any_of
for(i in seq_along(df.list)){
tmp <- df.list[[i]]
nm1 <- names(df.list)[i]
tmp1 <- case_when(
nm1 %in% group1 ~ list(
tmp %>%
dplyr::select(any_of(c(3, 2))) %>%
rename_with(~ strrep(.x, 2), any_of(c("y", "z"))))
,
nm1 %in% group2 ~ list(
tmp %>%
dplyr::select(any_of(c(3,4))) %>%
rename_with(~ strrep(.x, 2), any_of(c("z", "r")))),
nm1 %in% group3 ~ list(
tmp %>%
dplyr::select(any_of(c(1, 4, 3))) %>%
rename_with(~ strrep(.x, 2), any_of(c("z"))))
)
df.list[[i]] <- tmp1[[1]]
}
-output
> df.list
$a
# A tibble: 5 × 2
zz yy
<int> <dbl>
1 4 1
2 5 1
3 6 1
4 7 1
5 8 1
$b
# A tibble: 8 × 2
zz rr
<dbl> <dbl>
1 5 7
2 5 7
3 5 7
4 5 7
5 5 7
6 5 7
7 5 7
8 5 7
$c
# A tibble: 4 × 2
zz rr
<dbl> <dbl>
1 8 2
2 8 2
3 8 2
4 8 2
$d
# A tibble: 3 × 3
x r zz
<int> <dbl> <dbl>
1 1 4 0
2 2 4 0
3 3 4 0
I would like to combine two dataframes using crossing, but some have the same columnnames. For that, I would like to add "_nameofdataframe" to these columns. Here are some reproducible dataframes (dput below):
> df1
person V1 V2 V3
1 A 1 3 3
2 B 4 4 5
3 C 2 1 1
> df2
V2 V3
1 2 5
2 1 6
3 1 2
When I run the following code it will return duplicated column names:
library(tidyr)
crossing(df1, df2, .name_repair = "minimal")
#> # A tibble: 9 × 6
#> person V1 V2 V3 V2 V3
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 A 1 3 3 1 2
#> 2 A 1 3 3 1 6
#> 3 A 1 3 3 2 5
#> 4 B 4 4 5 1 2
#> 5 B 4 4 5 1 6
#> 6 B 4 4 5 2 5
#> 7 C 2 1 1 1 2
#> 8 C 2 1 1 1 6
#> 9 C 2 1 1 2 5
As you can see it returns the column names while being duplicated. My desired output should look like this:
person V1 V2_df1 V3_df1 V2_df2 V3_df2
1 A 1 3 3 1 2
2 A 1 3 3 1 6
3 A 1 3 3 2 5
4 B 4 4 5 1 2
5 B 4 4 5 1 6
6 B 4 4 5 2 5
7 C 2 1 1 1 2
8 C 2 1 1 1 6
9 C 2 1 1 2 5
So I was wondering if anyone knows a more automatic way to give the duplicated column names a name like in the desired output above with crossing?
dput of df1 and df2:
df1 <- structure(list(person = c("A", "B", "C"), V1 = c(1, 4, 2), V2 = c(3,
4, 1), V3 = c(3, 5, 1)), class = "data.frame", row.names = c(NA,
-3L))
df2 <- structure(list(V2 = c(2, 1, 1), V3 = c(5, 6, 2)), class = "data.frame", row.names = c(NA,
-3L))
As you probably know, the .name_repair parameter can take a function. The problem is crossing() only passes that function one argument, a vector of the concatenated column names() of both data frames. So we can't easily pass the names of the data frame objects to it. It seems to me that there are two solutions:
Manually add the desired suffix to an anonymous function.
Create a wrapper function around crossing().
1. Manually add the desired suffix to an anonymous function
We can simply supply the suffix as a character vector to the anonymous .name_repair parameter, e.g. suffix = c("_df1", "_df2").
crossing(
df1,
df2,
.name_repair = \(x, suffix = c("_df1", "_df2")) {
names_to_repair <- names(which(table(x) == 2))
x[x %in% names_to_repair] <- paste0(
x[x %in% names_to_repair],
rep(
suffix,
each = length(unique(names_to_repair))
)
)
x
}
)
# person V1 V2_df1 V3_df1 V2_df2 V3_df2
# <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 A 1 3 3 1 2
# 2 A 1 3 3 1 6
# 3 A 1 3 3 2 5
# 4 B 4 4 5 1 2
# 5 B 4 4 5 1 6
# 6 B 4 4 5 2 5
# 7 C 2 1 1 1 2
# 8 C 2 1 1 1 6
# 9 C 2 1 1 2 5
The disadvantage of this is that there is a room for error when typing the suffix, or that we might forget to change it if we change the names of the data frames.
Also note that we are checking for names which appear twice. If one of your original data frames already has broken (duplicated) names then this function will also rename those columns. But I think it would be unwise to try to do any type of join if either data frame did not have unique column names.
2. Create a wrapper function around crossing()
This might be more in the spirit of the tidyverse. Thecrossing() docs to which you linked state crossing() is a wrapper around expand_grid(). The source for expand_grid() show that it is basically a wrapper which uses map() to apply vctrs::vec_rep() to some inputs. So if we want to add another function to the call stack, there are two ways I can think of:
Using deparse(substitute())
crossing_fix_names <- function(df_1, df_2) {
suffixes <- paste0(
"_",
c(deparse(substitute(df_1)), deparse(substitute(df_2)))
)
crossing(
df_1,
df_2,
.name_repair = \(x, suffix = suffixes) {
names_to_repair <- names(which(table(x) == 2))
x[x %in% names_to_repair] <- paste0(
x[x %in% names_to_repair],
rep(
suffix,
each = length(unique(names_to_repair))
)
)
x
}
)
}
# Output the same as above
crossing_fix_names(df1, df2)
The disadvantage of this is that deparse(substitute()) is ugly and can occasionally have surprising behaviour. The advantage is we do not need to remember to manually add the suffixes.
Using match.call()
crossing_fix_names2 <- function(df_1, df_2) {
args <- as.list(match.call())
suffixes <- paste0(
"_",
c(
args$df_1,
args$df_2
)
)
crossing(
df_1,
df_2,
.name_repair = \(x, suffix = suffixes) {
names_to_repair <- names(which(table(x) == 2))
x[x %in% names_to_repair] <- paste0(
x[x %in% names_to_repair],
rep(
suffix,
each = length(unique(names_to_repair))
)
)
x
}
)
}
# Also the same output
crossing_fix_names2(df1, df2)
As we don't have the drawbacks of deparse(substitute()) and we don't have to manually specify the suffix, I think this is the probably the best approach.
test for the condition using dputs :
colnames(df1) %in% colnames(df2)
[1] FALSE FALSE TRUE TRUE
rename
colnames(df2) <- paste0(colnames(df2), '_df2')
then cbind
cbind(df1,df2)
person V1 V2 V3 V2_df2 V3_df2
1 A 1 3 3 2 5
2 B 4 4 5 1 6
3 C 2 1 1 1 2
not so elegant, but usefully discernible later.
This question already has answers here:
Dictionary style replace multiple items
(11 answers)
Closed 1 year ago.
Another thread solved a similar problem very nicely
But what i would like to do is get rid of some redundancy in my similar problem.
Using their example:
df <- data.frame(name = rep(letters[1:3], each = 3), foo=rep(1:9),var1 = letters[1:3], var2 = rep(3:5, each = 3))
creates:
df
name foo var1 var2
1 a 1 a 3
2 a 2 a 3
3 a 3 a 3
4 b 4 b 4
5 b 5 b 4
6 b 6 b 4
7 c 7 c 5
8 c 8 c 5
9 c 9 c 5
But what do i need to do to replace multiple characters with unique values?
a=1
b=2
c=3
I tried:
df[,c(4,6)] <- lapply(df[,c(4,6)], function(x) replace(x,x %in% "a", 1),
replace(x,x %in% "b", 2),
replace(x,x %in% "c", 3))
and
z<- c("a","b","c")
y<- c(1,2,3)
df[,c(1,3)] <- lapply(df[,c(1,3)], function(x) replace(x,x %in% z, y))
But neither seem to work.
Thanks.
You can use dplyr::recode
df <- data.frame(name = rep(letters[1:3], each = 3), foo=rep(1:9),var1 = letters[1:3], var2 = rep(3:5, each = 3))
library(dplyr, warn.conflicts = FALSE)
df %>%
mutate(across(c(name, var1), ~ recode(., a = 1, b = 2, c = 3)))
#> name foo var1 var2
#> 1 1 1 1 3
#> 2 1 2 2 3
#> 3 1 3 3 3
#> 4 2 4 1 4
#> 5 2 5 2 4
#> 6 2 6 3 4
#> 7 3 7 1 5
#> 8 3 8 2 5
#> 9 3 9 3 5
Created on 2021-10-19 by the reprex package (v2.0.1)
Across will apply the function defined by ~ recode(., a = 1, b = 2, c = 3) to both name and var1.
Using ~ and . is another way to define a function in across. This function is equivalent to the one defined by function(x) recode(x, a = 1, b = 2, c = 3), and you could use that code in across instead of the ~ form and it would give the same result. The only name I know for this is what it's called in ?across, which is "purrr-style lambda function", because the purrr package was the first to use formulas to define functions in this way.
If you want to see the actual function created by the formula, you can look at rlang::as_function(~ recode(., a = 1, b = 2, c = 3)), although it's a little more complex than the one above to support the use of ..1, ..2 and ..3 which are not used here.
Now that R supports the easier way of defining functions below, this purrr-style function is maybe no longer useful, it's just an old habit to write it that way.
df <- data.frame(name = rep(letters[1:3], each = 3), foo=rep(1:9),var1 = letters[1:3], var2 = rep(3:5, each = 3))
library(dplyr, warn.conflicts = FALSE)
df %>%
mutate(across(c(name, var1), \(x) recode(x, a = 1, b = 2, c = 3)))
#> name foo var1 var2
#> 1 1 1 1 3
#> 2 1 2 2 3
#> 3 1 3 3 3
#> 4 2 4 1 4
#> 5 2 5 2 4
#> 6 2 6 3 4
#> 7 3 7 1 5
#> 8 3 8 2 5
#> 9 3 9 3 5
Created on 2021-10-19 by the reprex package (v2.0.1)
A simple for loop would do the trick:
for (i in 1:length(z)) {
df[df==z[i]] <- y[i]
}
df
name foo var1 var2
1 1 1 1 3
2 1 2 2 3
3 1 3 3 3
4 2 4 1 4
5 2 5 2 4
6 2 6 3 4
7 3 7 1 5
8 3 8 2 5
9 3 9 3 5
You could use a lookup vector combined with apply:
z <- c("a","b","c")
y <- c(1,2,3)
lookup <- setNames(y, z)
df[,c(1,3)] <- apply(df[,c(1,3)], 2, function(x) lookup[x])
df
This returns
name foo var1 var2
1 1 1 1 3
2 1 2 2 3
3 1 3 3 3
4 2 4 1 4
5 2 5 2 4
6 2 6 3 4
7 3 7 1 5
8 3 8 2 5
9 3 9 3 5
If you are open to a tidyverse approach:
library(tidyverse)
df_new <- df %>%
mutate(across(c(var1, name), ~case_when(. == 'a' ~ 1,
. == 'b' ~ 2,
. == 'c' ~ 3)))
df_new
name foo var1 var2
1 1 1 1 3
2 1 2 2 3
3 1 3 3 3
4 2 4 1 4
5 2 5 2 4
6 2 6 3 4
7 3 7 1 5
8 3 8 2 5
9 3 9 3 5
Note, this code works only if you change all values of your column. E.g. if there was a „d“ in your var1 column that you don‘t tuen into a number, it would be changed to NA.
# Import data: df => data.frame
df <- data.frame(name = rep(letters[1:3], each = 3), foo=rep(1:9),var1 = letters[1:3], var2 = rep(3:5, each = 3))
# Function performing a mapping replacement:
# replaceMultipleValues => function()
replaceMultipleValues <- function(df, mapFrom, mapTo){
# Extract the values in the data.frame:
# dfVals => named character vector
dfVals <- unlist(df)
# Get all values in the mapping & data
# and assign a name to them: tmp1 => named character vector
tmp1 <- c(
setNames(mapTo, mapFrom),
setNames(dfVals, dfVals)
)
# Extract the unique values:
# valueMap => named character vector
valueMap <- tmp1[!(duplicated(names(tmp1)))]
# Recode the values, coerce vectors to appropriate
# types: res => data.frame
res <- type.convert(
data.frame(
matrix(
valueMap[dfVals],
nrow = nrow(df),
ncol = ncol(df),
dimnames = dimnames(df)
)
)
)
# Explicitly define the returned object: data.frame => env
return(res)
}
# Recode values in data.frame:
# res => data.frame
res <- replaceMultipleValues(
df,
c("a", "b", "c"),
c("1", "2", "3")
)
# Print data.frame to console:
# data.frame => stdout(console)
res
Consider the following named vector vec and tibble df:
vec <- c("1" = "a", "2" = "b", "3" = "c")
df <- tibble(col = rep(1:3, c(4, 2, 5)))
df
# # A tibble: 11 x 1
# col
# <int>
# 1 1
# 2 1
# 3 1
# 4 1
# 5 2
# 6 2
# 7 3
# 8 3
# 9 3
# 10 3
# 11 3
I would like to replace the values in the col column with the corresponding named values in vec.
I'm looking for a tidyverse approach, that doesn't involve converting vec as a tibble.
I tried the following, without success:
df %>%
mutate(col = map(
vec,
~ str_replace(col, names(.x), .x)
))
Expected output:
# A tibble: 11 x 1
col
<chr>
1 a
2 a
3 a
4 a
5 b
6 b
7 c
8 c
9 c
10 c
11 c
You could use col :
df$col1 <- vec[as.character(df$col)]
Or in mutate :
library(dplyr)
df %>% mutate(col1 = vec[as.character(col)])
# col col1
# <int> <chr>
# 1 1 a
# 2 1 a
# 3 1 a
# 4 1 a
# 5 2 b
# 6 2 b
# 7 3 c
# 8 3 c
# 9 3 c
#10 3 c
#11 3 c
We can also use data.table
library(data.table)
setDT(df)[, col1 := vec[as.character(col)]]
I have 2 dataframes in R (df1, df2).
A C D
1 1 1
2 2 2
df2 as
A B C
1 1 1
2 2 2
How can I merge these 2 dataframes to produce the following output?
A B C D
2 1 2 1
4 2 4 2
Columns are sorted and column values are added. Both DFs have same number of rows. Thank you in advance.
Code to create DF:
df1 <- data.frame("A" = 1:2, "C" = 1:2, "D" = 1:2)
df2 <- data.frame("A" = 1:2, "B" = 1:2, "C" = 1:2)
nm1 = names(df1)
nm2 = names(df2)
nm = intersect(nm1, nm2)
if (length(nm) == 0){ # if no column names in common
cbind(df1, df2)
} else { # if column names in common
cbind(df1[!nm1 %in% nm2], # columns only in df1
df1[nm] + df2[nm], # add columns common to both
df2[!nm2 %in% nm1]) # columns only in df2
}
# D A C B
#1 1 2 2 1
#2 2 4 4 2
You can try:
library(tidyverse)
list(df2, df1) %>%
map(rownames_to_column) %>%
bind_rows %>%
group_by(rowname) %>%
summarise_all(sum, na.rm = TRUE)
# A tibble: 2 x 5
rowname A B C D
<chr> <int> <int> <int> <int>
1 1 2 1 2 1
2 2 4 2 4 2
By using left_join() from dplyr you won't lose the column
library(tidyverse)
dat1 <- tibble(a = 1:10,
b = 1:10,
c = 1:10)
dat2 <- tibble(c = 1:10,
d = 1:10,
e = 1:10)
left_join(dat1, dat2, by = "c")
#> # A tibble: 10 x 5
#> a b c d e
#> <int> <int> <int> <int> <int>
#> 1 1 1 1 1 1
#> 2 2 2 2 2 2
#> 3 3 3 3 3 3
#> 4 4 4 4 4 4
#> 5 5 5 5 5 5
#> 6 6 6 6 6 6
#> 7 7 7 7 7 7
#> 8 8 8 8 8 8
#> 9 9 9 9 9 9
#> 10 10 10 10 10 10
Created on 2019-01-16 by the reprex package (v0.2.1)
allnames <- sort(unique(c(names(df1), names(df2))))
df3 <- data.frame(matrix(0, nrow = nrow(df1), ncol = length(allnames)))
names(df3) <- allnames
df3[,allnames %in% names(df1)] <- df3[,allnames %in% names(df1)] + df1
df3[,allnames %in% names(df2)] <- df3[,allnames %in% names(df2)] + df2
df3
A B C D
1 2 1 2 1
2 4 2 4 2
Here is a fun base R method with Reduce.
Reduce(cbind,
list(Reduce("+", list(df1[intersect(names(df1), names(df2))],
df2[intersect(names(df1), names(df2))])), # sum results
df1[setdiff(names(df1), names(df2))], # in df1, not df2
df2[setdiff(names(df2), names(df1))])) # in df2, not df1
This returns
A C D B
1 2 2 1 1
2 4 4 2 2
This assumes that both df1 and df2 have columns that are not present in the other. If this is not true, you'd have to adjust the list.
Note also that you could replace Reduce with do.call in both places and you'd get the same result.