Change the column values withing dplyr pipes - r

I want to change the values of a column, which to be called by it's index, using pipe -
require(dplyr)
mtcars[, 1] = 4 * mtcars[,2]
I was wondering if above calculation can be done using pipe

You can use magrittr and %<>%:
mtcars -> df1
library(dplyr)
library(magrittr)
df1 %<>%
mutate_at(vars(1), list(~ df1[[2]] * 4))
#> # A tibble: 32 x 11
#> mpg cyl disp hp drat wt qsec vs am gear carb
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 24 6 160 110 3.9 2.62 16.5 0 1 4 4
#> 2 24 6 160 110 3.9 2.88 17.0 0 1 4 4
#> 3 16 4 108 93 3.85 2.32 18.6 1 1 4 1
#> 4 24 6 258 110 3.08 3.22 19.4 1 0 3 1
#> 5 32 8 360 175 3.15 3.44 17.0 0 0 3 2
#> 6 24 6 225 105 2.76 3.46 20.2 1 0 3 1
#> 7 32 8 360 245 3.21 3.57 15.8 0 0 3 4
#> 8 16 4 147. 62 3.69 3.19 20 1 0 4 2
#> 9 16 4 141. 95 3.92 3.15 22.9 1 0 4 2
#> 10 24 6 168. 123 3.92 3.44 18.3 1 0 4 4
#> # ... with 22 more rows

Another option could be:
mtcars %<>%
mutate_at(vars(1), ~ !!select(., 2) %>% pull() * 4)
mpg cyl disp hp drat wt qsec vs am gear carb
1 24 6 160.0 110 3.90 2.620 16.46 0 1 4 4
2 24 6 160.0 110 3.90 2.875 17.02 0 1 4 4
3 16 4 108.0 93 3.85 2.320 18.61 1 1 4 1
4 24 6 258.0 110 3.08 3.215 19.44 1 0 3 1
5 32 8 360.0 175 3.15 3.440 17.02 0 0 3 2
6 24 6 225.0 105 2.76 3.460 20.22 1 0 3 1
7 32 8 360.0 245 3.21 3.570 15.84 0 0 3 4
8 16 4 146.7 62 3.69 3.190 20.00 1 0 4 2
9 16 4 140.8 95 3.92 3.150 22.90 1 0 4 2
10 24 6 167.6 123 3.92 3.440 18.30 1 0 4 4

Related

Calculate means including all factor levels but one

Using the dataframe mtcars I would like to add the column qsec_control which is calculated as the mean(qsec) of all rows that don't have the same cyl as the current row (e.g. if cyl == 6, it would take mean(qsec[cyl != 6])).
The question feels somewhat dumb, but I cant figure out how to do this.
This solution groups by cyl, then uses dplyr::cur_group_rows() to index into mtcars$qsec:
library(dplyr)
mtcars %>%
group_by(cyl) %>%
mutate(qsec_control = mean(
mtcars$qsec[-cur_group_rows()]
)) %>%
ungroup()
# A tibble: 32 × 12
mpg cyl disp hp drat wt qsec vs am gear carb qsec_cont…¹
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 17.8
2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 17.8
3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 17.2
4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 17.8
5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 18.7
6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 17.8
7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 18.7
8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 17.2
9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 17.2
10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 17.8
# … with 22 more rows, and abbreviated variable name ¹​qsec_control
Replicating zephryl's answer in data.table:
library(data.table)
data(mtcars)
setDT(mtcars)
mtcars[, qsec_control := mtcars[-.I, mean(qsec)] , by = .(cyl)]
head(mtcars)
mpg cyl disp hp drat wt qsec vs am gear carb cyl2 qsec_control
1: 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 6 17.81280
2: 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 6 17.81280
3: 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 4 17.17381
4: 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 6 17.81280
5: 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 8 18.68611
6: 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1 6 17.81280

missing information in tibble [duplicate]

This question already has answers here:
How do I name the "row names" column in r
(2 answers)
Closed 1 year ago.
Taking the 'mtcars' data and moving it into a tibble
data<- tibble(mtcars)
Noted that the first column which was the vehicle make has now become in the tibble just a numeric list 1,2,3 etc
Fairly new to R so is there a way to tell a tibble to keep the format of the original data?
tibbles don't support rownames, to maintain the rownames you can create a new column.
library(dplyr)
library(tibble)
mtcars %>% rownames_to_column('make') %>% tibble()
# make mpg cyl disp hp drat wt qsec vs am gear carb
# <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 Mazda … 21 6 160 110 3.9 2.62 16.5 0 1 4 4
# 2 Mazda … 21 6 160 110 3.9 2.88 17.0 0 1 4 4
# 3 Datsun… 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1
# 4 Hornet… 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1
# 5 Hornet… 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2
# 6 Valiant 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1
# 7 Duster… 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4
# 8 Merc 2… 24.4 4 147. 62 3.69 3.19 20 1 0 4 2
# 9 Merc 2… 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2
#10 Merc 2… 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4
# … with 22 more rows
As long as tibble doesn't keep the row names, you can use dplyr::as_tibble which as an option to create a column with row names:
as_tibble(mtcars, rownames = "names_car")
Output:
names_car mpg cyl disp hp drat wt qsec vs am gear carb
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Mazda RX4 21 6 160 110 3.9 2.62 16.5 0 1 4 4
2 Mazda RX4 Wag 21 6 160 110 3.9 2.88 17.0 0 1 4 4
3 Datsun 710 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1
4 Hornet 4 Drive 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1
5 Hornet Sportabout 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2
6 Valiant 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1
7 Duster 360 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4
8 Merc 240D 24.4 4 147. 62 3.69 3.19 20 1 0 4 2
9 Merc 230 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2
10 Merc 280 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4

create a duplicate column using tidyeval [duplicate]

This question already has answers here:
Pass a string as variable name in dplyr::filter
(4 answers)
Closed 2 years ago.
I am trying to create a duplicate column using tidyeval. In each loop the name of the column to duplicate varies and is obtained using a regular expression. For example,
library(tidyverse)
a <- str_subset(string = names(mtcars), pattern = "^a")
a
# am
to get the column to be duplicated.
Then I have no idea how to use the string here to duplicate the column (to a new column a2). Tried various combinations from the code below, but struggling to get my head around tidy evaluations.
# a <- enquo(a)
mtcars %>%
as_tibble() %>%
mutate(a2 := {{a}})
# mutate(a2 := !!a)
# mutate(a2 := vars(!!!a))
# # A tibble: 32 x 12
# mpg cyl disp hp drat wt qsec vs am gear carb am2
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
# 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 am
# 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 am
# 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 am
# 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 am
# 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 am
# 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 am
# 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 am
# 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 am
# 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 am
# 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 am
(I am looking for am2 to be a copy of am here, so 1 and 0 in each row, not "am")
If only one column is selected, e.g. am
a <- "am"
mtcars %>%
mutate("{a}2" := !!sym(a))
# mpg cyl disp hp drat wt qsec vs am gear carb am2
# 1 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 1
# 2 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 1
# 3 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 1
# 4 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 0
# 5 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 0
# 6 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 0
# 7 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 0
# 8 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 0
If there are more than one columns selected, e.g. mpg and cyl, you can use the .names argument in across().
a <- c("mpg", "cyl")
mtcars %>%
mutate(across(all_of(a), ~ ., .names = "{col}2"))
# mpg cyl disp hp drat wt qsec vs am gear carb mpg2 cyl2
# 1 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 21.0 6
# 2 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 21.0 6
# 3 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 22.8 4
# 4 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 21.4 6
# 5 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 18.7 8
# 6 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 18.1 6
# 7 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 14.3 8
# 8 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 24.4 4

Why does the base R `print()` function require a tibble?

Why does the base R print() function require a tibble when using the n = X argument? It appears this is true from the examples below.
This does NOT work
library(tidyverse)
mtcars %>% print(n = 20)
#> Error in print.default(m, ..., quote = quote, right = right, max = max) :
#> invalid 'na.print' specification
This does work
mtcars %>% as_tibble() %>% print(n = 20)
#> # A tibble: 32 x 11
#> mpg cyl disp hp drat wt qsec vs am gear carb
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4
#> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4
#> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1
#> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1
#> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2
#> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1
#> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4
#> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2
#> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2
#> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4
#> 11 17.8 6 168. 123 3.92 3.44 18.9 1 0 4 4
#> 12 16.4 8 276. 180 3.07 4.07 17.4 0 0 3 3
#> 13 17.3 8 276. 180 3.07 3.73 17.6 0 0 3 3
#> 14 15.2 8 276. 180 3.07 3.78 18 0 0 3 3
#> 15 10.4 8 472 205 2.93 5.25 18.0 0 0 3 4
#> 16 10.4 8 460 215 3 5.42 17.8 0 0 3 4
#> 17 14.7 8 440 230 3.23 5.34 17.4 0 0 3 4
#> 18 32.4 4 78.7 66 4.08 2.2 19.5 1 1 4 1
#> 19 30.4 4 75.7 52 4.93 1.62 18.5 1 1 4 2
#> 20 33.9 4 71.1 65 4.22 1.84 19.9 1 1 4 1
#> # ... with 12 more rows
Your first example is equivalent to print(mtcars, n=20) -- which also fails.
Because mtcars is a data.frame your call dispatches on print.data.frame. And as args(print.data.frame) will tell you, there is no n= argument in it.
In short, you got confused between a specific dispatch (I presume print.tbl) with a more generic approach.
So a better title for the question might be 'Why does only the print method for tibbles have a n argument' -- for general use we commonly just invoke head as in
R> head(mtcars)
mpg cyl disp hp drat wt qsec vs am gear carb
Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
R>
which of course also works in a pipelined expression.

How do I selectively change variable data type automatically in the tidyverse?

I would like to change some of the variables from numerical to factor types, leaving other types as they are. I know how to do this one variable at a time, but I would like to automate the process for larger datasets.
I've changed variables in the mtcars dataset one by one, copying and pasting the code. I've used mapply to successfully automate this, but I've only managed to do it on a subset of mtcars. I'm not sure how I would keep the entire dataset intact with the new variable types, though. Reprex below.
#before
as_tibble(mtcars)
#> # A tibble: 32 x 11
#> mpg cyl disp hp drat wt qsec vs am gear carb
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4
#> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4
#> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1
#> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1
#> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2
#> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1
#> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4
#> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2
#> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2
#> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4
#> # ... with 22 more rows
#copy + paste job
mtcars$cyl <- factor(as.character(mtcars$cyl))
mtcars$hp <- factor(as.character(mtcars$hp))
mtcars$vs <- factor(as.character(mtcars$vs))
#after
as_tibble(mtcars)
#> # A tibble: 32 x 11
#> mpg cyl disp hp drat wt qsec vs am gear carb
#> <dbl> <fct> <dbl> <fct> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl>
#> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4
#> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4
#> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1
#> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1
#> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2
#> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1
#> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4
#> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2
#> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2
#> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4
#> # ... with 22 more rows
Created on 2019-05-17 by the reprex package (v0.2.1)
I managed to change the variable types successfully. I would hate to do this something like 30-50 times though. What are some ways to automate this? Thank you.
library(dplyr)
as_tibble(mtcars) %>%
mutate_at(.vars = vars(cyl, hp, vs),
.funs = ~ factor(as.character(.)))
Hope this helps.
Using base R:
vars_to_make_f <- c("cyl", "hp", "vs")
mtcars[vars_to_make_f] <-
lapply(mtcars[vars_to_make_f], function(x) as.factor(as.character(x)))
mtcars
# A tibble: 32 x 11
mpg cyl disp hp drat wt qsec vs am gear carb
<dbl> <fct> <dbl> <fct> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl>
1 21 6 160 110 3.9 2.62 16.5 0 1 4 4
2 21 6 160 110 3.9 2.88 17.0 0 1 4 4
3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1
4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1
5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2
6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1
7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4
8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2
9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2
10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4
# ... with 22 more rows
You can use mutate_at:
mtcars %>%
mutate_at(c("cyl","hp","vs"),function(x) factor(as.character(x)))
Or use purrr modify_at:
mtcars %>%
modify_at(c("cyl","hp","vs"),function(x) factor(as.character(x)))
An option is mutate_at. The as.factor(as.character is not needed, we can directly convert to factor. But, the reverse route would be `factor -> character -> numeric)
library(dplyr)
mtcars %>%
as_tibble %>%
mutate_at(vars(cyl, hp, vs), factor)
# A tibble: 32 x 11
# mpg cyl disp hp drat wt qsec vs am gear carb
# <dbl> <fct> <dbl> <fct> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl>
# 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4
# 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4
# 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1
# 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1
# 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2
# 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1
# 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4
# 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2
# 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2
#10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4
# … with 22 more rows

Resources