Calculate all possible interactions in model_matrix - r

I'm simulating data with a fluctuating number of variables. As part of the situation, I am needing to calculate a model matrix with all possible combinations. See the following reprex for an example. I am able to get all two-interactions by specifying the formula as ~ .*.. However, this particular dataset has 3 variables (ndim <- 3). I can get all two- and three-way interactions by specifying the formula as ~ .^3. The issue is that there may be 4+ variables that I need to calculate, so I would like to be able to generalize this. I have tried specifying the formula as ~ .^ndim, but this throws an error.
Is there a way define the power in the formula with a variable?
library(tidyverse)
library(mvtnorm)
library(modelr)
ndim <- 3
data <- rmvnorm(100, mean = rep(0, ndim)) %>%
as_tibble(.name_repair = ~ paste0("dim_", seq_len(ndim)))
model_matrix(data, ~ .*.)
#> # A tibble: 100 x 7
#> `(Intercept)` dim_1 dim_2 dim_3 `dim_1:dim_2` `dim_1:dim_3`
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 -0.775 0.214 0.111 -0.166 -0.0857
#> 2 1 1.25 -0.0636 1.40 -0.0794 1.75
#> 3 1 1.07 -0.361 0.976 -0.384 1.04
#> 4 1 2.08 0.381 0.593 0.793 1.24
#> 5 1 -0.197 0.382 -0.257 -0.0753 0.0506
#> 6 1 0.266 -1.82 0.00411 -0.485 0.00109
#> 7 1 3.09 2.57 -0.612 7.96 -1.89
#> 8 1 2.03 0.247 0.112 0.501 0.226
#> 9 1 -0.397 0.204 1.55 -0.0810 -0.614
#> 10 1 0.597 0.335 0.533 0.200 0.319
#> # … with 90 more rows, and 1 more variable: `dim_2:dim_3` <dbl>
model_matrix(data, ~ .^3)
#> # A tibble: 100 x 8
#> `(Intercept)` dim_1 dim_2 dim_3 `dim_1:dim_2` `dim_1:dim_3`
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 -0.775 0.214 0.111 -0.166 -0.0857
#> 2 1 1.25 -0.0636 1.40 -0.0794 1.75
#> 3 1 1.07 -0.361 0.976 -0.384 1.04
#> 4 1 2.08 0.381 0.593 0.793 1.24
#> 5 1 -0.197 0.382 -0.257 -0.0753 0.0506
#> 6 1 0.266 -1.82 0.00411 -0.485 0.00109
#> 7 1 3.09 2.57 -0.612 7.96 -1.89
#> 8 1 2.03 0.247 0.112 0.501 0.226
#> 9 1 -0.397 0.204 1.55 -0.0810 -0.614
#> 10 1 0.597 0.335 0.533 0.200 0.319
#> # … with 90 more rows, and 2 more variables: `dim_2:dim_3` <dbl>,
#> # `dim_1:dim_2:dim_3` <dbl>
model_matrix(data, ~.^ndim)
#> Error in terms.formula(object, data = data): invalid power in formula
Created on 2019-02-15 by the reprex package (v0.2.1)

You can use use as.formula with paste in model_matrix:
model_matrix(data, as.formula(paste0("~ .^", ndim)))

Related

Subtract all columns from each other R

I am looking for a nice tidy/dplyr approach to compute the difference between all possible pair of columns (including repeats e.g A-B & B-A) in a dataframe.
I start with df and would like to end with end_df:
library(tidyverse)
#> Warning: package 'tidyverse' was built under R version 4.2.1
#> Warning: package 'tibble' was built under R version 4.2.1
df <- tibble(A = rnorm(1:10),
B = rnorm(1:10),
C = rnorm(1:10))
print(df)
#> # A tibble: 10 × 3
#> A B C
#> <dbl> <dbl> <dbl>
#> 1 -0.292 1.27 0.783
#> 2 -1.11 0.254 -0.410
#> 3 2.05 1.67 1.35
#> 4 1.31 0.0329 -1.29
#> 5 -1.67 -0.379 -0.696
#> 6 -1.02 -0.686 1.43
#> 7 -0.291 -0.0728 0.336
#> 8 -0.507 0.350 1.70
#> 9 -0.707 0.961 -0.493
#> 10 0.0459 -0.299 -0.0113
end_df <- df %>%
mutate( "A-B" = A-B,
"A-C" = A-C,
"B-A" = B-A,
"B-C" = B-C,
"C-A" = C-A,
"C-B" = C-B)
print(end_df)
#> # A tibble: 10 × 9
#> A B C `A-B` `A-C` `B-A` `B-C` `C-A` `C-B`
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 -0.292 1.27 0.783 -1.56 -1.08 1.56 0.482 1.08 -0.482
#> 2 -1.11 0.254 -0.410 -1.37 -0.703 1.37 0.664 0.703 -0.664
#> 3 2.05 1.67 1.35 0.380 0.702 -0.380 0.321 -0.702 -0.321
#> 4 1.31 0.0329 -1.29 1.28 2.60 -1.28 1.33 -2.60 -1.33
#> 5 -1.67 -0.379 -0.696 -1.29 -0.975 1.29 0.317 0.975 -0.317
#> 6 -1.02 -0.686 1.43 -0.334 -2.44 0.334 -2.11 2.44 2.11
#> 7 -0.291 -0.0728 0.336 -0.218 -0.627 0.218 -0.409 0.627 0.409
#> 8 -0.507 0.350 1.70 -0.857 -2.20 0.857 -1.35 2.20 1.35
#> 9 -0.707 0.961 -0.493 -1.67 -0.215 1.67 1.45 0.215 -1.45
#> 10 0.0459 -0.299 -0.0113 0.345 0.0572 -0.345 -0.288 -0.0572 0.288
Created on 2022-09-05 by the reprex package (v2.0.1)
You can get a list of all of the pairs of names, and then create a list of columns of the original dataframe mutated, the bind them:
pairs <- expand.grid(names(df), names(df)) %>%
filter(Var1 != Var2)
map2(pairs$Var1, pairs$Var2, function(x, y) as_tibble_col(df[[x]] - df[[y]], str_c(x, "-", y))) %>%
bind_cols(df, .)
# # A tibble: 10 × 9
# A B C `B-A` `C-A` `A-B` `C-B` `A-C` `B-C`
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 0.199 0.110 0.0148 -0.0895 -0.184 0.0895 -0.0948 0.184 0.0948
# 2 -0.851 -0.413 0.338 0.438 1.19 -0.438 0.751 -1.19 -0.751
# 3 -1.13 0.112 -1.97 1.24 -0.835 -1.24 -2.08 0.835 2.08
# 4 0.597 -2.89 -2.32 -3.49 -2.92 3.49 0.572 2.92 -0.572
# 5 -1.10 0.0953 0.996 1.19 2.09 -1.19 0.900 -2.09 -0.900
# 6 0.0191 0.500 1.17 0.481 1.15 -0.481 0.667 -1.15 -0.667
# 7 0.416 0.949 -0.865 0.533 -1.28 -0.533 -1.81 1.28 1.81
# 8 1.84 -1.66 -1.39 -3.50 -3.23 3.50 0.267 3.23 -0.267
# 9 0.406 -1.48 -1.33 -1.89 -1.74 1.89 0.149 1.74 -0.149
# 10 0.393 -0.491 -0.139 -0.884 -0.532 0.884 0.352 0.532 -0.352

How can I keep old columns and rename new columns when using `mutate` with `across`

When I mutate across data, the columns selected by .cols are replaced by the results of the mutation. How can I perform this operation whilst:
Keeping the columns selected by .cols in the output
Appropriately & automatically renaming the columns created by mutate?
For example:
require(dplyr)
#> Loading required package: dplyr
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
require(magrittr)
#> Loading required package: magrittr
set.seed(7337)
## Create arbitrary tibble
myTibble <- tibble(x = 1:10,
y = runif(10),
z = y * pi)
## I can mutate across these columns
mutate(myTibble, across(everything(), multiply_by, 2))
#> # A tibble: 10 x 3
#> x y z
#> <dbl> <dbl> <dbl>
#> 1 2 1.78 5.58
#> 2 4 0.658 2.07
#> 3 6 0.105 0.331
#> 4 8 1.75 5.50
#> 5 10 1.33 4.19
#> 6 12 1.02 3.20
#> 7 14 1.20 3.75
#> 8 16 0.00794 0.0250
#> 9 18 0.108 0.340
#> 10 20 1.74 5.45
## I can subsequently rename these columns
mutate(myTibble, across(everything(), multiply_by, 2)) %>%
rename_with(paste0, everything(), "_double")
#> # A tibble: 10 x 3
#> x_double y_double z_double
#> <dbl> <dbl> <dbl>
#> 1 2 1.78 5.58
#> 2 4 0.658 2.07
#> 3 6 0.105 0.331
#> 4 8 1.75 5.50
#> 5 10 1.33 4.19
#> 6 12 1.02 3.20
#> 7 14 1.20 3.75
#> 8 16 0.00794 0.0250
#> 9 18 0.108 0.340
#> 10 20 1.74 5.45
## But how can I achieve this (without the fuss of creating & joining an additional table):
# A tibble: 10 x 6
# x y z x_double y_double z_double
# <int> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 1 0.313 0.982 2 0.625 1.96
# 2 2 0.759 2.39 4 1.52 4.77
# 3 3 0.705 2.22 6 1.41 4.43
# 4 4 0.573 1.80 8 1.15 3.60
# 5 5 0.599 1.88 10 1.20 3.77
# 6 6 0.0548 0.172 12 0.110 0.344
# 7 7 0.571 1.80 14 1.14 3.59
# 8 8 0.621 1.95 16 1.24 3.90
# 9 9 0.709 2.23 18 1.42 4.46
# 10 10 0.954 3.00 20 1.91 5.99
Created on 2021-09-16 by the reprex package (v2.0.1)
Use the .names argument of across
across names its outputs using the argument .names, which is an argument passed to glue::glue(). This is a string in which "{.col}" and "{.fn}" are replaced by the names of your columns (specified by .cols) and functions (specified by .fns)
The default value for .names is NULL, which is equivalent to "{.col}". This means that every mutated column is assigned the same name its counterpart in .cols, which effectively 'overwrites' these columns in the output.
To produce your desired table you would need to do:
require(dplyr)
#> Loading required package: dplyr
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
require(magrittr)
#> Loading required package: magrittr
set.seed(7337)
## Create arbitrary tibble
myTibble <- tibble(x = 1:10,
y = runif(10),
z = y * pi)
mutate(myTibble, across(everything(), multiply_by, 2, .names = "{.col}_double"))
#> # A tibble: 10 x 6
#> x y z x_double y_double z_double
#> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 0.889 2.79 2 1.78 5.58
#> 2 2 0.329 1.03 4 0.658 2.07
#> 3 3 0.0527 0.165 6 0.105 0.331
#> 4 4 0.875 2.75 8 1.75 5.50
#> 5 5 0.666 2.09 10 1.33 4.19
#> 6 6 0.509 1.60 12 1.02 3.20
#> 7 7 0.598 1.88 14 1.20 3.75
#> 8 8 0.00397 0.0125 16 0.00794 0.0250
#> 9 9 0.0541 0.170 18 0.108 0.340
#> 10 10 0.868 2.73 20 1.74 5.45
Created on 2021-09-16 by the reprex package (v2.0.1)
In this way, you can use across with .fns and .names to do quite a lot:
mutate(myTibble, across(everything(),
.fns = list(double = multiply_by, half = divide_by),
2,
.names = "{.col}_{.fn}"))
#> # A tibble: 10 x 9
#> x y z x_double x_half y_double y_half z_double z_half
#> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 0.889 2.79 2 0.5 1.78 0.444 5.58 1.40
#> 2 2 0.329 1.03 4 1 0.658 0.165 2.07 0.517
#> 3 3 0.0527 0.165 6 1.5 0.105 0.0263 0.331 0.0827
#> 4 4 0.875 2.75 8 2 1.75 0.437 5.50 1.37
#> 5 5 0.666 2.09 10 2.5 1.33 0.333 4.19 1.05
#> 6 6 0.509 1.60 12 3 1.02 0.255 3.20 0.800
#> 7 7 0.598 1.88 14 3.5 1.20 0.299 3.75 0.939
#> 8 8 0.00397 0.0125 16 4 0.00794 0.00199 0.0250 0.00624
#> 9 9 0.0541 0.170 18 4.5 0.108 0.0271 0.340 0.0850
#> 10 10 0.868 2.73 20 5 1.74 0.434 5.45 1.36

Purrr's Modify-In Function

I'm trying to use purrr's modify_in to modify elements of a list. An example of the list:
tib_list <- map(1:3, ~ tibble(col_one = runif(5),
col_two = runif(5), col_three = runif(5)))
Let's say I want to change elements 2 and 3 of the list to unselect col_one. I imagined doing this:
modify_in(tib_list, 2:length(tib_list), ~ select(.x, -col_one)
But this yields an error. I then thought of doing something like this, but this ends up duplicating the list
map(1:3, ~ modify_in(tib_list, .x, ~ select(.x, -col_one))
I think you wanted to use modify_at which lets you specify either element names or positions. modify_in allows us to use only one position like purrr::pluck.
library(tidyverse)
tib_list <- map(1:3, ~ tibble(col_one = runif(5), col_two = runif(5), col_three = runif(5)))
modify_at(tib_list, c(2,3), ~ select(.x, -col_one))
#> [[1]]
#> # A tibble: 5 x 3
#> col_one col_two col_three
#> <dbl> <dbl> <dbl>
#> 1 0.190 0.599 0.824
#> 2 0.214 0.172 0.106
#> 3 0.236 0.666 0.584
#> 4 0.373 0.903 0.252
#> 5 0.875 0.196 0.643
#>
#> [[2]]
#> # A tibble: 5 x 2
#> col_two col_three
#> <dbl> <dbl>
#> 1 0.513 0.113
#> 2 0.893 0.377
#> 3 0.275 0.675
#> 4 0.529 0.612
#> 5 0.745 0.405
#>
#> [[3]]
#> # A tibble: 5 x 2
#> col_two col_three
#> <dbl> <dbl>
#> 1 0.470 0.789
#> 2 0.181 0.289
#> 3 0.680 0.213
#> 4 0.772 0.114
#> 5 0.314 0.895
Created on 2021-08-27 by the reprex package (v0.3.0)
We can use modify_in with one position, but supplying a vector such as c(2,3) would mean that we want to access the third element of the second parent element in a nested list. This is why we see the error below.
# works
modify_in(tib_list, 2, ~ select(.x, -col_one))
#> [[1]]
#> # A tibble: 5 x 3
#> col_one col_two col_three
#> <dbl> <dbl> <dbl>
#> 1 0.109 0.697 0.0343
#> 2 0.304 0.645 0.851
#> 3 0.530 0.786 0.600
#> 4 0.708 0.0324 0.605
#> 5 0.898 0.232 0.567
#>
#> [[2]]
#> # A tibble: 5 x 2
#> col_two col_three
#> <dbl> <dbl>
#> 1 0.766 0.157
#> 2 0.0569 0.0422
#> 3 0.943 0.0850
#> 4 0.947 0.0806
#> 5 0.761 0.297
#>
#> [[3]]
#> # A tibble: 5 x 3
#> col_one col_two col_three
#> <dbl> <dbl> <dbl>
#> 1 0.878 0.864 0.540
#> 2 0.168 0.745 0.120
#> 3 0.943 0.338 0.535
#> 4 0.353 0.478 0.204
#> 5 0.267 0.669 0.478
# doesn't work
modify_in(tib_list, c(2,3), ~ select(.x, -col_one))
#> Error in UseMethod("select"): no applicable method for 'select' applied to an object of class "c('double', 'numeric')"
I never used modify_in, but you could use
library(purrr)
library(dplyr)
tib_list %>%
imap(~ if (.y > 1) { select(.x, -col_one) } else { .x })
to get
[[1]]
# A tibble: 5 x 3
col_one col_two col_three
<dbl> <dbl> <dbl>
1 0.710 0.189 0.644
2 0.217 0.946 0.955
3 0.590 0.770 0.0180
4 0.135 0.101 0.888
5 0.640 0.645 0.346
[[2]]
# A tibble: 5 x 2
col_two col_three
<dbl> <dbl>
1 0.267 0.926
2 0.456 0.0902
3 0.659 0.707
4 0.421 0.0451
5 0.801 0.220
[[3]]
# A tibble: 5 x 2
col_two col_three
<dbl> <dbl>
1 0.437 0.649
2 0.256 0.466
3 0.331 0.594
4 0.586 0.558
5 0.625 0.444
We can use modify_if
modify_if(tib_list,.f = ~ .x %>% select(-col_one),
.p = seq_along(tib_list) != 1)
-output
[[1]]
# A tibble: 5 x 3
col_one col_two col_three
<dbl> <dbl> <dbl>
1 0.819 0.666 0.384
2 0.183 0.549 0.0211
3 0.374 0.240 0.252
4 0.359 0.913 0.792
5 0.515 0.402 0.217
[[2]]
# A tibble: 5 x 2
col_two col_three
<dbl> <dbl>
1 0.696 0.0269
2 0.433 0.147
3 0.235 0.743
4 0.589 0.748
5 0.635 0.851
[[3]]
# A tibble: 5 x 2
col_two col_three
<dbl> <dbl>
1 0.707 0.976
2 0.0966 0.130
3 0.574 0.572
4 0.854 0.680
5 0.819 0.582

Performing a linear model in R of a single response with a single predictor from a large dataframe and repeat for each column

It might not be very clear from the title but what I wish to do is:
I have a dataframe df with, say, 200 columns and the first 80 columns are response variables (y1, y2, y3, ...) and the rest of 120 are predictors (x1, x2, x3, ...).
I wish to compute a linear model for each pair – lm(yi ~ xi, data = df).
Many problems and solutions I have looked through online have a either a fixed response vs many predictors or the other way around, using lapply() and its related functions.
Could anyone who is familiar with it point me to the right step?
use tidyverse
library(tidyverse)
library(broom)
df <- mtcars
y <- names(df)[1:3]
x <- names(df)[4:7]
result <- expand_grid(x, y) %>%
rowwise() %>%
mutate(frm = list(reformulate(x, y)),
model = list(lm(frm, data = df)))
result$model <- purrr::set_names(result$model, nm = paste0(result$y, " ~ ", result$x))
result$model[1:2]
#> $`mpg ~ hp`
#>
#> Call:
#> lm(formula = frm, data = df)
#>
#> Coefficients:
#> (Intercept) hp
#> 30.09886 -0.06823
#>
#>
#> $`cyl ~ hp`
#>
#> Call:
#> lm(formula = frm, data = df)
#>
#> Coefficients:
#> (Intercept) hp
#> 3.00680 0.02168
map_df(result$model, tidy)
#> # A tibble: 24 x 5
#> term estimate std.error statistic p.value
#> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 (Intercept) 30.1 1.63 18.4 6.64e-18
#> 2 hp -0.0682 0.0101 -6.74 1.79e- 7
#> 3 (Intercept) 3.01 0.425 7.07 7.41e- 8
#> 4 hp 0.0217 0.00264 8.23 3.48e- 9
#> 5 (Intercept) 21.0 32.6 0.644 5.25e- 1
#> 6 hp 1.43 0.202 7.08 7.14e- 8
#> 7 (Intercept) -7.52 5.48 -1.37 1.80e- 1
#> 8 drat 7.68 1.51 5.10 1.78e- 5
#> 9 (Intercept) 14.6 1.58 9.22 2.93e-10
#> 10 drat -2.34 0.436 -5.37 8.24e- 6
#> # ... with 14 more rows
map_df(result$model, glance)
#> # A tibble: 12 x 12
#> r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 0.602 0.589 3.86 45.5 1.79e- 7 1 -87.6 181. 186.
#> 2 0.693 0.683 1.01 67.7 3.48e- 9 1 -44.6 95.1 99.5
#> 3 0.626 0.613 77.1 50.1 7.14e- 8 1 -183. 373. 377.
#> 4 0.464 0.446 4.49 26.0 1.78e- 5 1 -92.4 191. 195.
#> 5 0.490 0.473 1.30 28.8 8.24e- 6 1 -52.7 111. 116.
#> 6 0.504 0.488 88.7 30.5 5.28e- 6 1 -188. 382. 386.
#> 7 0.753 0.745 3.05 91.4 1.29e-10 1 -80.0 166. 170.
#> 8 0.612 0.599 1.13 47.4 1.22e- 7 1 -48.3 103. 107.
#> 9 0.789 0.781 57.9 112. 1.22e-11 1 -174. 355. 359.
#> 10 0.175 0.148 5.56 6.38 1.71e- 2 1 -99.3 205. 209.
#> 11 0.350 0.328 1.46 16.1 3.66e- 4 1 -56.6 119. 124.
#> 12 0.188 0.161 114. 6.95 1.31e- 2 1 -196. 398. 402.
#> # ... with 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>
Created on 2020-12-11 by the reprex package (v0.3.0)

Can I overlook a missing variable in a summing part of a function?

This is a shortened version of my real df. I have a function (called: calc) which creates a new variable called 'total', for simplicity this adds up three variables: a, b, c. When I add a dataframe, to that function, that does not feature one variable (say c) so only has a & b, the function falls over. Is there a 'function' / simple way that counts the variables regardless if they are missing?
calc <- function(x) {x %>% mutate(total = a + b + c)}
data.2 has two columns a & b with many rows of values, but when running that in the function it cannot find c so does not calculate.
new.df <- calc(data.2)
Many thanks.
If you want to perform rowwise sum or mean they have na.rm argument which you can use to ignore NA values.
library(dplyr)
calc <- function(x) {x %>% mutate(total = rowSums(select(., a:c), na.rm = TRUE))}
In general case if you are not able to find a function which gives you an out-of-box solution you can replace NA values with 0 maybe and then perform the operation that you want to perform.
calc <- function(x) {
x %>%
mutate(across(a:c, tidyr::replace_na, 0),
total = a + b + c)
}
You can use rowwise() and c_across() with any_of() (or any other tidyselect function) from dplyr (>= 1.0.0).
library(dplyr)
df <- data.frame(a = rnorm(10), b = rnorm(10))
dfc <- data.frame(a = rnorm(10), b = rnorm(10), c = rnorm(10))
calc <- function(x) {
x %>%
rowwise() %>%
mutate(total = sum(c_across(any_of(c("a", "b", "c"))))) %>%
ungroup()
}
calc(df)
#> # A tibble: 10 x 3
#> a b total
#> <dbl> <dbl> <dbl>
#> 1 -0.884 0.851 -0.0339
#> 2 -1.56 -0.464 -2.02
#> 3 -0.884 0.815 -0.0689
#> 4 -1.46 -0.259 -1.71
#> 5 0.211 -0.528 -0.317
#> 6 1.85 0.190 2.04
#> 7 -1.31 -0.921 -2.23
#> 8 0.450 0.394 0.845
#> 9 -1.14 0.428 -0.714
#> 10 -1.11 0.417 -0.698
calc(dfc)
#> # A tibble: 10 x 4
#> a b c total
#> <dbl> <dbl> <dbl> <dbl>
#> 1 -0.0868 0.632 1.81 2.36
#> 2 0.568 -0.523 0.240 0.286
#> 3 -0.0325 0.377 -0.437 -0.0921
#> 4 0.660 0.456 1.28 2.39
#> 5 -0.123 1.75 -1.03 0.599
#> 6 0.641 1.39 0.902 2.93
#> 7 0.266 0.520 0.904 1.69
#> 8 -1.53 0.319 0.439 -0.776
#> 9 0.942 0.468 -1.69 -0.277
#> 10 0.254 -0.600 -0.196 -0.542
If you want to be able to generalize beyond those 3 variables you can use any tidyselect methodology.
df <- data.frame(a = rnorm(10), b = rnorm(10))
dfc <- data.frame(a = rnorm(10), b = rnorm(10), c = rnorm(10))
calc <- function(x) {
x %>%
rowwise() %>%
mutate(total = sum(c_across(everything()))) %>%
ungroup()
}
calc(df)
#> # A tibble: 10 x 3
#> a b total
#> <dbl> <dbl> <dbl>
#> 1 0.775 1.17 1.95
#> 2 -1.05 1.21 0.155
#> 3 2.07 -0.264 1.81
#> 4 1.11 0.793 1.90
#> 5 -0.700 -0.216 -0.916
#> 6 -1.04 -1.03 -2.07
#> 7 -0.525 1.60 1.07
#> 8 0.354 0.828 1.18
#> 9 0.126 0.110 0.236
#> 10 -0.0954 -0.603 -0.698
calc(dfc)
#> # A tibble: 10 x 4
#> a b c total
#> <dbl> <dbl> <dbl> <dbl>
#> 1 -0.616 0.767 0.0462 0.196
#> 2 -0.370 -0.538 -0.186 -1.09
#> 3 0.337 1.11 -0.700 0.751
#> 4 -0.993 -0.531 -0.984 -2.51
#> 5 0.0538 1.50 -0.0808 1.47
#> 6 -0.907 -1.54 -0.734 -3.18
#> 7 -1.65 -0.242 1.43 -0.455
#> 8 -0.166 0.447 -0.281 -0.000524
#> 9 0.0637 -0.0185 0.754 0.800
#> 10 1.81 -1.09 -2.15 -1.42
Created on 2020-09-10 by the reprex package (v0.3.0)

Resources