I'm trying to use dplyr::mutate to change a dynamic column with conditions using other columns dynamically.
I've got this bit of code:
d <- mtcars %>% tibble
fld_name <- "mpg"
other_fld_name <- "cyl"
d <- d %>% mutate(!!fld_name := ifelse(!!other_fld_name < 5,NA,!!fld_name))
which sets mpg to
mpg
<chr>
1 mpg
2 mpg
3 mpg
4 mpg
5 mpg
6 mpg
7 mpg
8 mpg
9 mpg
10 mpg
it seems to select the field on the LHS of assignment operator, but just pastes the field name on the RHS.
Removing the unquotes on the RHS yields the same result.
Any help is much appreciated.
use get to retreive column value instead
library(tidyverse)
d <- mtcars %>% tibble
fld_name <- "mpg"
other_fld_name <- "cyl"
d %>% mutate(!!fld_name := ifelse(get(other_fld_name) < 5 ,NA, get(fld_name)))
#> # A tibble: 32 x 11
#> mpg cyl disp hp drat wt qsec vs am gear carb
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4
#> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4
#> 3 NA 4 108 93 3.85 2.32 18.6 1 1 4 1
#> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1
#> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2
#> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1
#> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4
#> 8 NA 4 147. 62 3.69 3.19 20 1 0 4 2
#> 9 NA 4 141. 95 3.92 3.15 22.9 1 0 4 2
#> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4
#> # ... with 22 more rows
Created on 2021-06-22 by the reprex package (v2.0.0)
We can also use ensym function to quote variable name stored as string and unquote it with !! like the following:
library(rlang)
d <- mtcars %>% tibble
fld_name <- "mpg"
other_fld_name <- "cyl"
d %>%
mutate(!!ensym(fld_name) := ifelse(!!ensym(other_fld_name) < 5, NA, !!ensym(fld_name)))
# A tibble: 32 x 11
mpg cyl disp hp drat wt qsec vs am gear carb
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 21 6 160 110 3.9 2.62 16.5 0 1 4 4
2 21 6 160 110 3.9 2.88 17.0 0 1 4 4
3 NA 4 108 93 3.85 2.32 18.6 1 1 4 1
4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1
5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2
6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1
7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4
8 NA 4 147. 62 3.69 3.19 20 1 0 4 2
9 NA 4 141. 95 3.92 3.15 22.9 1 0 4 2
10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4
# ... with 22 more rows
We could also use .data
library(dplyr)
d %>%
mutate(!! fld_name := case_when(.data[[other_fld_name]] >=5 ~
.data[[fld_name]]))
-output
# A tibble: 32 x 11
mpg cyl disp hp drat wt qsec vs am gear carb
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 21 6 160 110 3.9 2.62 16.5 0 1 4 4
2 21 6 160 110 3.9 2.88 17.0 0 1 4 4
3 NA 4 108 93 3.85 2.32 18.6 1 1 4 1
4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1
5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2
6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1
7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4
8 NA 4 147. 62 3.69 3.19 20 1 0 4 2
9 NA 4 141. 95 3.92 3.15 22.9 1 0 4 2
10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4
# … with 22 more rows
data
d <- mtcars %>%
as_tibble
fld_name <- "mpg"
other_fld_name <- "cyl"
Related
Before, I used group_by_at to group by a vector of strings or by NULL:
library(tidyverse)
grouping_1 <- c("cyl", "vs")
grouping_2 <- NULL
mtcars %>% group_by_at(grouping_1)
mtcars %>% group_by_at(grouping_2)
The help of group_by_at indicates that the function is superseded and that across should be used instead. But, grouping by NULL gives an error
mtcars %>% group_by(across(grouping_1)) # this works
mtcars %>% group_by(across(grouping_2)) # this gives an error
For me, group_by_at used in the way described has been useful because in my functions I can use the same code without checking every time whether the grouping argument is empty (NULL) or not.
It is still ok to use syms to splice strings into group_by using !!!.
library(tidyverse)
grouping_1 <- c("cyl", "vs")
grouping_2 <- NULL
sym_gr_1 <- syms(grouping_1)
sym_gr_2 <- syms(grouping_2)
mtcars %>% group_by(!!! sym_gr_1) # this works
#> # A tibble: 32 x 11
#> # Groups: cyl, vs [5]
#> mpg cyl disp hp drat wt qsec vs am gear carb
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4
#> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4
#> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1
#> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1
#> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2
#> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1
#> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4
#> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2
#> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2
#> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4
#> # … with 22 more rows
mtcars %>% group_by(!!! sym_gr_2) # this works
#> # A tibble: 32 x 11
#> mpg cyl disp hp drat wt qsec vs am gear carb
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4
#> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4
#> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1
#> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1
#> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2
#> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1
#> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4
#> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2
#> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2
#> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4
#> # … with 22 more rows
Created on 2020-06-20 by the reprex package (v0.3.0)
Using dplyr::across() another option (on top of the official way to do with all_of as posted in the answer below) is to wrap the strings containing the variable names in c(). This even works, when the object is NULL. However, results in a note, reminding use to better use all_of.
grouping_1 <- c("cyl", "vs")
grouping_2 <- NULL
mtcars %>% group_by(across(c(grouping_1)))
#> Note: Using an external vector in selections is ambiguous.
#> ℹ Use `all_of(grouping_1)` instead of `grouping_1` to silence this message.
#> ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
#> This message is displayed once per session.
#> # A tibble: 32 x 11
#> # Groups: cyl, vs [5]
#> mpg cyl disp hp drat wt qsec vs am gear carb
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4
#> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4
#> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1
#> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1
#> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2
#> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1
#> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4
#> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2
#> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2
#> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4
#> # … with 22 more rows
mtcars %>% group_by(across(c(grouping_2)))
#> Note: Using an external vector in selections is ambiguous.
#> ℹ Use `all_of(grouping_2)` instead of `grouping_2` to silence this message.
#> ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
#> This message is displayed once per session.
#> # A tibble: 32 x 11
#> mpg cyl disp hp drat wt qsec vs am gear carb
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4
#> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4
#> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1
#> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1
#> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2
#> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1
#> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4
#> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2
#> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2
#> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4
#> # … with 22 more rows
Created on 2021-05-30 by the reprex package (v0.3.0)
Using all_of:
library(tidyverse)
mtcars %>% group_by(across(all_of(grouping_1))) # this works
mtcars %>% group_by(across(all_of(grouping_2))) # this works
This question already has answers here:
Why does summarize or mutate not work with group_by when I load `plyr` after `dplyr`?
(2 answers)
Closed 2 years ago.
I know this question has answers in multiple places, but I am unable to figure out where I am going wrong. Suppose I want to find the sum of hp for each group in cyl:
mtcars%>%
group_by(cyl) %>%
mutate(
sum_hp = sum(hp)
)
sum_hp is giving me 4694 for every value. I want the sum for each value of cyl.
It could be a case of plyr::mutate masking dplyr::mutate when both the packages are loaded. We can specify dplyr::<functionname> to correct this
library(dplyr)
mtcars%>%
group_by(cyl) %>%
dplyr::mutate(sum_hp = sum(hp))
# A tibble: 32 x 12
# Groups: cyl [3]
# mpg cyl disp hp drat wt qsec vs am gear carb sum_hp
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 856
# 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 856
# 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 909
# 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 856
# 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 2929
# 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 856
# 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 2929
# 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 909
# 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 909
#10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 856
# … with 22 more rows
If we use plyr::mutate, the OP's output can be reproduced
mtcars%>%
group_by(cyl) %>%
plyr::mutate(
sum_hp = sum(hp)
)
# A tibble: 32 x 12
# Groups: cyl [3]
# mpg cyl disp hp drat wt qsec vs am gear carb sum_hp
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 4694
# 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 4694
# 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 4694
# 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 4694
# 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 4694
# 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 4694
# 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 4694
# 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 4694
# 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 4694
#10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 4694
# … with 22 more rows
I am trying to write a custom function that uses rlang's non-standard evaluation to group a dataframe by more than one variable.
This is what I've-
library(rlang)
# function definition
tryfn <- function(data, groups, ...) {
# preparing data
df <- dplyr::group_by(data, !!!rlang::enquos(groups))
print(head(df))
# applying some function `.f` on df that absorbs `...`
# .f(df, ...)
}
This works with a single grouping variable-
# works
tryfn(mtcars, am)
#> # A tibble: 6 x 11
#> # Groups: am [2]
#> mpg cyl disp hp drat wt qsec vs am gear carb
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4
#> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4
#> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1
#> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1
#> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2
#> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1
But if try to use more than one grouping variable, this doesn't work-
# doesn't work
tryfn(mtcars, c(am, cyl))
#> Error: Column `c(am, cyl)` must be length 32 (the number of rows) or one, not 64
# doesn't work
tryfn(mtcars, list(am, cyl))
#> Error: Column `list(am, cyl)` must be length 32 (the number of rows) or one, not 2
We could parse as an expression with enexpr and use !!!
tryfn <- function(data, groups, ...) {
groups <- as.list(rlang::enexpr(groups))
groups <- if(length(groups) > 1) groups[-1] else groups
group_by(data, !!!groups)
}
-testing
tryfn(mtcars, am)
# A tibble: 32 x 11
# Groups: am [2]
# mpg cyl disp hp drat wt qsec vs am gear carb
# * <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4
# 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4
# 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1
# 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1
# 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2
# 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1
# 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4
# 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2
# 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2
#10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4
# … with 22 more rows
tryfn(mtcars, c(am, cyl))
# A tibble: 32 x 11
# Groups: am, cyl [6]
# mpg cyl disp hp drat wt qsec vs am gear carb
# * <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4
# 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4
# 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1
# 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1
# 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2
# 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1
# 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4
# 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2
# 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2
#10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4
# … with 22 more rows
I would like to change some of the variables from numerical to factor types, leaving other types as they are. I know how to do this one variable at a time, but I would like to automate the process for larger datasets.
I've changed variables in the mtcars dataset one by one, copying and pasting the code. I've used mapply to successfully automate this, but I've only managed to do it on a subset of mtcars. I'm not sure how I would keep the entire dataset intact with the new variable types, though. Reprex below.
#before
as_tibble(mtcars)
#> # A tibble: 32 x 11
#> mpg cyl disp hp drat wt qsec vs am gear carb
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4
#> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4
#> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1
#> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1
#> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2
#> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1
#> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4
#> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2
#> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2
#> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4
#> # ... with 22 more rows
#copy + paste job
mtcars$cyl <- factor(as.character(mtcars$cyl))
mtcars$hp <- factor(as.character(mtcars$hp))
mtcars$vs <- factor(as.character(mtcars$vs))
#after
as_tibble(mtcars)
#> # A tibble: 32 x 11
#> mpg cyl disp hp drat wt qsec vs am gear carb
#> <dbl> <fct> <dbl> <fct> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl>
#> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4
#> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4
#> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1
#> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1
#> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2
#> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1
#> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4
#> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2
#> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2
#> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4
#> # ... with 22 more rows
Created on 2019-05-17 by the reprex package (v0.2.1)
I managed to change the variable types successfully. I would hate to do this something like 30-50 times though. What are some ways to automate this? Thank you.
library(dplyr)
as_tibble(mtcars) %>%
mutate_at(.vars = vars(cyl, hp, vs),
.funs = ~ factor(as.character(.)))
Hope this helps.
Using base R:
vars_to_make_f <- c("cyl", "hp", "vs")
mtcars[vars_to_make_f] <-
lapply(mtcars[vars_to_make_f], function(x) as.factor(as.character(x)))
mtcars
# A tibble: 32 x 11
mpg cyl disp hp drat wt qsec vs am gear carb
<dbl> <fct> <dbl> <fct> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl>
1 21 6 160 110 3.9 2.62 16.5 0 1 4 4
2 21 6 160 110 3.9 2.88 17.0 0 1 4 4
3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1
4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1
5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2
6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1
7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4
8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2
9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2
10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4
# ... with 22 more rows
You can use mutate_at:
mtcars %>%
mutate_at(c("cyl","hp","vs"),function(x) factor(as.character(x)))
Or use purrr modify_at:
mtcars %>%
modify_at(c("cyl","hp","vs"),function(x) factor(as.character(x)))
An option is mutate_at. The as.factor(as.character is not needed, we can directly convert to factor. But, the reverse route would be `factor -> character -> numeric)
library(dplyr)
mtcars %>%
as_tibble %>%
mutate_at(vars(cyl, hp, vs), factor)
# A tibble: 32 x 11
# mpg cyl disp hp drat wt qsec vs am gear carb
# <dbl> <fct> <dbl> <fct> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl>
# 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4
# 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4
# 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1
# 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1
# 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2
# 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1
# 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4
# 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2
# 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2
#10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4
# … with 22 more rows
I have a number of dataframes and a series of changes I want to make to each of them. For this example, let to the desired change be simply making each data frame a tibble using as_tibble. I know there are various ways of doing this, but I'd like to do this using purrr:walk.
For data frames df1 and df2,
df1 <- mtcars
df2 <- mtcars
I'd like to do the equivalent of
df1 %<>% as_tibble
df2 %<>% as_tibble
using walk. My attempt:
library(tidyverse)
walk(c(df1, df2), ~ assign(deparse(substitute(.)), as_tibble(.)))
This runs but does not make the desired change:
is_tibble(df1)
#> [1] FALSE
Here is how you can combine assign with walk (see the comments the code for more explanation)-
library(tidyverse)
# data
df1 <- mtcars
df2 <- mtcars
# creating tibbles
# this creates a list of objects with names ("df1", "df2")
tibble::lst(df1, df2) %>%
purrr::walk2(
.x = names(.), # names to assign
.y = ., # object to be assigned
.f = ~ assign(x = .x,
value = tibble::as.tibble(.y),
envir = .GlobalEnv)
)
# checking the newly created tibbles
df1
#> # A tibble: 32 x 11
#> mpg cyl disp hp drat wt qsec vs am gear carb
#> * <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4
#> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4
#> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1
#> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1
#> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2
#> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1
#> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4
#> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2
#> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2
#> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4
#> # ... with 22 more rows
df2
#> # A tibble: 32 x 11
#> mpg cyl disp hp drat wt qsec vs am gear carb
#> * <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4
#> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4
#> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1
#> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1
#> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2
#> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1
#> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4
#> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2
#> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2
#> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4
#> # ... with 22 more rows
Created on 2018-11-13 by the reprex package (v0.2.1)