Related
I have this dataset:
structure(list(ID = c(1, 2, 3, 4, 6, 7), V = c(0, 0, 1, 1,
1, 0), Mus = c(1, 0, 1, 1, 1, 0), R = c(1, 0, 1, 1, 1, 1),
E = c(1, 0, 0, 1, 0, 0), S = c(1, 0, 1, 1, 1, 0), t = c(0,
0, 0, 1, 0, 0), score = c(1, 0.4, 1, 0.4, 0.4, 0.4)), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame"), na.action = structure(c(`5` = 5L,
`12` = 12L, `15` = 15L, `21` = 21L, `22` = 22L, `23` = 23L, `34` = 34L,
`44` = 44L, `46` = 46L, `52` = 52L, `56` = 56L, `57` = 57L, `58` = 58L
), class = "omit"))
I would like to make new assignment on the score column, in this way:
in the case of each ID, if there is an occurrence of number 1 higher than 3, then in the last column should appear number 1.
in the case of each ID, if there is an occurrence of the number 1 equal to 3, then the last column should appear number 0.4.
in the case of each ID, if there is an occurrence of number 1 lower than 3, then the last column should appear number 0.
Could please suggest a way to do this via for loop, dplyr, map, or apply functions?
Thanks
This should work - calculating the number of 1s in the new ones column then applying the conditions using case_when:
library(tidyverse)
df |>
rowwise() |>
mutate(ones = sum(c_across(V:t)),
score = case_when(
ones > 3 ~ 1,
ones == 3 ~ 0.4,
ones < 3 ~ 0
))
#> # A tibble: 6 × 9
#> # Rowwise:
#> ID V Mus R E S t score ones
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 0 1 1 1 1 0 1 4
#> 2 2 0 0 0 0 0 0 0 0
#> 3 3 1 1 1 0 1 0 1 4
#> 4 4 1 1 1 1 1 1 1 6
#> 5 6 1 1 1 0 1 0 1 4
#> 6 7 0 0 1 0 0 0 0 1
To make it tidier, you can use sum(c_across(V:t)) directly in case_when to not need a new variable (though it would repeat the calculation each time):
df |>
rowwise() |>
mutate(score = case_when(
sum(c_across(V:t)) > 3 ~ 1,
sum(c_across(V:t)) == 3 ~ 0.4,
sum(c_across(V:t)) < 3 ~ 0
))
I am working with data imported from SPSS using the haven package, imported using read_sav().
The data exists in columns of class haven_labelled, which is somewhat similar to a factor in that it contains a value and a label but is different in other ways.
I want to recode the values in the data and associated label values.
Here is an example:
library(haven)
library(dplyr)
library(labelled)
library(tidyr)
x <- structure(list(q0015_0001 = structure(c(3, 5, NA, 3, 1, 2, NA, NA, 3, 4, 2, NA, 2, 2, 4, NA,
4, 3, 3, 3, 3, 2, NA, NA, 2), label = "Menu Options/Variety", format.spss = "F8.2", labels =
c(`Very Dissatisfied` = 1, Dissatisfied = 2, Neutral = 3, Satisfied = 4, `Very Satisfied` = 5),
class = c("haven_labelled", "vctrs_vctr", "double")), q0015_0002 = structure(c(4, 4, NA, 5, 3, 3,
NA, NA, 3, 4, 2, NA, 5, 2, 4, NA, 4, 3, 4, 4, 4, 4, NA, NA, 2), label = "Cleanliness", format.spss
= "F8.2", labels = c(`Very Dissatisfied` = 1, Dissatisfied = 2, Neutral = 3, Satisfied = 4, `Very
Satisfied` = 5), class = c("haven_labelled", "vctrs_vctr", "double")), q0015_0003 =
structure(c(2, 2, NA, 3, 1, 2, NA, NA, 3, 4, 3, NA, 4, 3, 4, NA, 3, 2, 4, 4, 2, 2, NA, NA, 1),
label = "Taste and Quality of Food", format.spss = "F8.2", labels = c(`Very Dissatisfied` = 1,
Dissatisfied = 2, Neutral = 3, Satisfied = 4, `Very Satisfied` = 5), class = c("haven_labelled",
"vctrs_vctr", "double"))), row.names = c(NA, -25L), class = c("tbl_df", "tbl", "data.frame"),
label = "File created by user")
x
# A tibble: 25 x 3
# q0015_0001 q0015_0002 q0015_0003
# <dbl+lbl> <dbl+lbl> <dbl+lbl>
# 1 3 [Neutral] 4 [Satisfied] 2 [Dissatisfied]
# 2 5 [Very Satisfied] 4 [Satisfied] 2 [Dissatisfied]
# 3 NA NA NA
# 4 3 [Neutral] 5 [Very Satisfied] 3 [Neutral]
# 5 1 [Very Dissatisfied] 3 [Neutral] 1 [Very Dissatisfied]
# 6 2 [Dissatisfied] 3 [Neutral] 2 [Dissatisfied]
# 7 NA NA NA
# 8 NA NA NA
# 9 3 [Neutral] 3 [Neutral] 3 [Neutral]
#10 4 [Satisfied] 4 [Satisfied] 4 [Satisfied]
# ... with 15 more rows
To illustrate the column structure better
x$q0015_0001
#<labelled<double>[25]>: Menu Options/Variety
# [1] 3 5 NA 3 1 2 NA NA 3 4 2 NA 2 2 4 NA 4 3 3 3 3 2 NA NA 2
#
#Labels:
# value label
# 1 Very Dissatisfied
# 2 Dissatisfied
# 3 Neutral
# 4 Satisfied
# 5 Very Satisfied
The data include values from 1 to 5, each with a corresponding label (i.e., 1 = "Very Dissatisfied", etc.). haven_labelled allows numeric or character values.
I wish to change the values from c(1, 2, 3, 4, 5) to c(-2, -1, 0, 1, 2) but preserve the labels in the same order (i.e., -2 = "Very Dissatisfied", etc.).
Label
Old Value
New Value
Very Dissatisfied
1
-2
Dissatisfied
2
-1
Neutral
3
0
Satisfied
4
1
Very Satisfied
5
2
The closest I have come is using dplyr::recode(). The labelled package is supposed to extend the dplyr::recode() method to work with labelled vectors [1], but I haven't noticed a difference with/without it being loaded.
dplyr::recode(x$q0015_0001,`1` = -2, `2` = -1, `3` = 0, `4` = 1, `5` = 2)
#<labelled<double>[25]>: Menu Options/Variety
# [1] 0 2 NA 0 -2 -1 NA NA 0 1 -1 NA -1 -1 1 NA 1 0 0 0 0 -1 NA NA -1
#
#Labels:
# value label
# 1 Very Dissatisfied
# 2 Dissatisfied
# 3 Neutral
# 4 Satisfied
# 5 Very Satisfied
Notice that the values in the data changed as expected (3 became 0, 5 became 2, etc.) but not the label values. This means that if you were to attempt to use as_factor (the labelled vector equivalent to as.factor from the haven package) to reference the labels instead of the values, the labels will be incorrect. The effect on the data is further illustrated when viewing the values and labels together.
x %>%
mutate(across(starts_with("q0015"),
~recode(., `1` = -2, `2` = -1, `3` = 0, `4` = 1, `5` = 2)))
# A tibble: 25 x 3
#q0015_0001 q0015_0002 q0015_0003
#<dbl+lbl> <dbl+lbl> <dbl+lbl>
#1 0 1 [Very Dissatisfied] -1
#2 2 [Dissatisfied] 1 [Very Dissatisfied] -1
#3 NA NA NA
#4 0 2 [Dissatisfied] 0
#5 -2 0 -2
#6 -1 0 -1
#7 NA NA NA
#8 NA NA NA
#9 0 0 0
#10 1 [Very Dissatisfied] 1 [Very Dissatisfied] 1 [Very Dissatisfied]
# ... with 15 more rows
As shown, the labels still map to the old values. In the recoded version, 1 and 2 are positive scores but still map to Very Dissatisfied/Dissatisfied, while -2, -1 and 0 are not recognized as labelled values.
Question
How may I recode labelled vectors such that the data values and label values are updated together and labels are preserved/mapped to the new values?
It's ugly AF, but it does the job. Problem is that setting value labels is not straightforward. Package labelled offers functions for it, but these aren't "tidyverse-ready", i.e. they don't work within a mutate, nor do they allow for selecting variables with tidyselect helpers like starts_with.
However, set_value_labels allos for passing a list where each list element carries the name of the variable you want to apply labels to and then the labels itself are provided as a named vector:
x |>
mutate(across(starts_with("q0015"),
~dplyr::recode(., `1` = -2, `2` = -1, `3` = 0, `4` = 1, `5` = 2))) |>
set_value_labels(.labels = rep(list(c("Very Dissatisfied" = -2,
"Dissatisfied" = -1,
"Neutral" = 0,
"Satisfied" = 1,
"Very Satisfied" = 2)),
x |>
select(starts_with("q0015")) |>
ncol()) |>
setNames(nm = x |>
select(starts_with("q0015")) |>
names()))
which gives:
# A tibble: 25 × 3
q0015_0001 q0015_0002 q0015_0003
<dbl+lbl> <dbl+lbl> <dbl+lbl>
1 0 [Neutral] 1 [Satisfied] -1 [Dissatisfied]
2 2 [Very Satisfied] 1 [Satisfied] -1 [Dissatisfied]
3 NA NA NA
4 0 [Neutral] 2 [Very Satisfied] 0 [Neutral]
5 -2 [Very Dissatisfied] 0 [Neutral] -2 [Very Dissatisfied]
6 -1 [Dissatisfied] 0 [Neutral] -1 [Dissatisfied]
7 NA NA NA
8 NA NA NA
9 0 [Neutral] 0 [Neutral] 0 [Neutral]
10 1 [Satisfied] 1 [Satisfied] 1 [Satisfied]
# … with 15 more rows
# ℹ Use `print(n = ...)` to see more rows
I was curious and checked with the package developer of the labelled package, and an alternative would be to write a small function for recoding and relabeling a single variable and then run this function within across:
https://github.com/larmarange/labelled/issues/126
I am working with a data set where I have to recode variables so that Never and Rarely =0, Sometimes and Always as 1, and Not Applicable as NA. For reference, the numbering scheme for the code is that 1=Never, 2=Rarely, 3=Sometimes, 4=Always, and 5= Not Applicable. Should I change the numeric variables before renaming them or change the character variables into numeric ones? I'm at an impasse and could use help on what code to use.
The problem
You have a vector (or a data frame column) x with values 1 through 5, eg:
x <- c(1,2,3,4,5,4,3,2,1)
You want to recode 1 and 2 to 0, 3 and 4 to 1, and 5 to NA.
Solution in base R
values <- list(`1` = 0, `2` = 0, `3` = 1, `4` = 1, `5` = NA)
x <- unname(unlist(values[x]))
[1] 0 0 1 1 NA 1 1 0 0
Solution with dplyr::recode()
values <- list(`1` = 0, `2` = 0, `3` = 1, `4` = 1, `5` = NA_real_)
x <- dplyr::recode(x, !!!values)
[1] 0 0 1 1 NA 1 1 0 0
I have a dataframe apcd_hud_ex. I want to take some column names (e.g. x2014_03_15), and change the value of the columns based on the current value of the columns, the parsed date in the column names, and another column in the dataframe (SMOKEFREE_DATE). I can do it in a loop over the columns, but I would really like to know how to do it with dplyr and mutate. Any help would be much appreciated!
apcd_hud_ex = structure(list(studyid = 1:5, SMOKEFREE_DATE = structure(c(16283,
16283, 16071, 16071, 16648), class = "Date"), x2014_03_15 = c(1,
1, 1, 0, 1), x2014_04_15 = c(1, 1, 1, 1, 1), x2014_05_15 = c(1,
1, 1, 1, 1), x2014_06_15 = c(1, 1, 1, 1, 1), x2014_07_15 = c(1,
1, 1, 1, 1), x2014_08_15 = c(1, 1, 1, 1, 1), x2014_09_15 = c(1,
1, 1, 1, 1), x2014_10_15 = c(1, 1, 1, 1, 1), x2014_11_15 = c(1,
1, 1, 1, 1), x2014_12_15 = c(1, 1, 1, 1, 1), x2015_01_15 = c(1,
1, 1, 1, 1)), row.names = c(NA, -5L), class = c("tbl_df", "tbl",
"data.frame"))
> apcd_hud_ex
# A tibble: 5 x 13
studyid SMOKEFREE_DATE x2014_03_15 x2014_04_15 x2014_05_15 x2014_06_15 x2014_07_15 x2014_08_15 x2014_09_15 x2014_10_15
<int> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 2014-08-01 1 1 1 1 1 1 1 1
2 2 2014-08-01 1 1 1 1 1 1 1 1
3 3 2014-01-01 1 1 1 1 1 1 1 1
4 4 2014-01-01 0 1 1 1 1 1 1 1
5 5 2015-08-01 1 1 1 1 1 1 1 1
# ... with 3 more variables: x2014_11_15 <dbl>, x2014_12_15 <dbl>, x2015_01_15 <dbl>
>
#function for loop
assign_PHRes_enrollIns_fn <- function(SFdate,insValue,insDate){
val = if_else(insValue == 0,
0,
if_else(as.Date(insDate) < as.Date(SFdate,"%Y-%m-%d"),
1,
2))
return(val)
}
#vectorized function
assign_PHRes_enrollIns_fn_vec <- Vectorize(assign_PHRes_enrollIns_fn)
dateCols = names(apcd_hud_ex)[which(names(apcd_hud_ex) == "x2014_03_15"):which(names(apcd_hud_ex) == "x2015_01_15")]
This loop over the column names (dateCols) works:
for(i in 1:length(dateCols)){
dateCol = dateCols[i]
insDate = as.Date(paste0(str_sub(dateCol,2,5),"/",str_sub(dateCol,7,8),"/",str_sub(dateCol,10,11)),"%Y/%m/%d")
apcd_hud_ex[,dateCol] = assign_PHRes_enrollIns_fn_vec(apcd_hud_ex[,"SMOKEFREE_DATE"],apcd_hud_ex[,dateCol],insDate)
}
Now the manipulated dataframe looks like this, which is what I want:
> apcd_hud_ex
# A tibble: 5 x 13
studyid SMOKEFREE_DATE x2014_03_15 x2014_04_15 x2014_05_15 x2014_06_15 x2014_07_15 x2014_08_15 x2014_09_15 x2014_10_15
<int> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 2014-08-01 1 1 1 1 1 2 2 2
2 2 2014-08-01 1 1 1 1 1 2 2 2
3 3 2014-01-01 2 2 2 2 2 2 2 2
4 4 2014-01-01 0 2 2 2 2 2 2 2
5 5 2015-08-01 1 1 1 1 1 1 1 1
# ... with 3 more variables: x2014_11_15 <dbl>, x2014_12_15 <dbl>, x2015_01_15 <dbl>
However, I would like to learn how to do this with dynamic programming and dplyr. I've tried 2 functions:
newInsValCols_fn1 <- function(df,dateCols){
insDate = as.Date(paste0(str_sub(dateCols,2,5),"/",str_sub(dateCols,7,8),"/",str_sub(dateCols,10,11)),"%Y/%m/%d")
df1 <- df %>%
mutate({{dateCols}} := if_else({{dateCols}} == 0,
0,
if_else(as.Date(insDate) < as.Date(SMOKEFREE_DATE,"%Y-%m-%d"),
1,
2)))
return(df1)
}
newInsValCols_fn1(apcd_hud_ex,dateCols)
Which gives error:
Error: The LHS of `:=` must be a string or a symbol
So I tried using symbols:
newInsValCols_fn2 <- function(df,dateCols){
dateCols_syms = syms(dateCols)
insDate = as.Date(paste0(str_sub(dateCols,2,5),"/",str_sub(dateCols,7,8),"/",str_sub(dateCols,10,11)),"%Y/%m/%d")
df1 <- df %>%
mutate(!!dateCols_syms := if_else({{dateCols}} == 0,
0,
if_else(as.Date(insDate) < as.Date(SMOKEFREE_DATE,"%Y-%m-%d"),
1,
2)))
return(df1)
}
newInsValCols_fn2(apcd_hud_ex,dateCols)
which gives the same error:
Error: The LHS of `:=` must be a string or a symbol
I also tried using !!! instead of !!, but that resulted in the following error:
Error: The LHS of `:=` can't be spliced with `!!!`
Something in my understanding is lacking.
Here's how I'd do it with dplyr.
library(dplyr)
library(lubridate)
apcd_hud_ex %>%
mutate(across(
starts_with('x'),
~ case_when(. == 0 ~ 0,
ymd(gsub('x', '', cur_column())) < SMOKEFREE_DATE ~ 1,
TRUE ~ 2)
))
#> # A tibble: 5 x 13
#> studyid SMOKEFREE_DATE x2014_03_15 x2014_04_15 x2014_05_15 x2014_06_15 x2014_07_15 x2014_08_15 x2014_09_15 x2014_10_15 x2014_11_15 x2014_12_15 x2015_01_15
#> <int> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 2014-08-01 1 1 1 1 1 2 2 2 2 2 2
#> 2 2 2014-08-01 1 1 1 1 1 2 2 2 2 2 2
#> 3 3 2014-01-01 2 2 2 2 2 2 2 2 2 2 2
#> 4 4 2014-01-01 0 2 2 2 2 2 2 2 2 2 2
#> 5 5 2015-08-01 1 1 1 1 1 1 1 1 1 1 1
You can use pivot_longer to have just one column to modify, which is an alternative to mutate(across()).
You can use case_when to have multiple conditions, so you do not need to nest multiple if statements. The value will be the one of the first true statement.
library(tidyverse)
apcd_hud_ex <- structure(list(studyid = 1:5, SMOKEFREE_DATE = structure(c(
16283,
16283, 16071, 16071, 16648
), class = "Date"), x2014_03_15 = c(
1,
1, 1, 0, 1
), x2014_04_15 = c(1, 1, 1, 1, 1), x2014_05_15 = c(
1,
1, 1, 1, 1
), x2014_06_15 = c(1, 1, 1, 1, 1), x2014_07_15 = c(
1,
1, 1, 1, 1
), x2014_08_15 = c(1, 1, 1, 1, 1), x2014_09_15 = c(
1,
1, 1, 1, 1
), x2014_10_15 = c(1, 1, 1, 1, 1), x2014_11_15 = c(
1,
1, 1, 1, 1
), x2014_12_15 = c(1, 1, 1, 1, 1), x2015_01_15 = c(
1,
1, 1, 1, 1
)), row.names = c(NA, -5L), class = c(
"tbl_df", "tbl",
"data.frame"
))
apcd_hud_ex %>%
pivot_longer(starts_with("x")) %>%
mutate(
insDate = name %>% str_remove("^x") %>% str_replace_all("_", "-") %>% as.Date(),
value = case_when(
value == 0 ~ 0,
insDate < SMOKEFREE_DATE ~ 1,
insDate >= SMOKEFREE_DATE ~ 2
)
) %>%
select(-insDate) %>%
pivot_wider()
#> # A tibble: 5 × 13
#> studyid SMOKEFREE_DATE x2014_03_15 x2014_04_15 x2014_05_15 x2014_06_15
#> <int> <date> <dbl> <dbl> <dbl> <dbl>
#> 1 1 2014-08-01 1 1 1 1
#> 2 2 2014-08-01 1 1 1 1
#> 3 3 2014-01-01 2 2 2 2
#> 4 4 2014-01-01 0 2 2 2
#> 5 5 2015-08-01 1 1 1 1
#> # … with 7 more variables: x2014_07_15 <dbl>, x2014_08_15 <dbl>,
#> # x2014_09_15 <dbl>, x2014_10_15 <dbl>, x2014_11_15 <dbl>, x2014_12_15 <dbl>,
#> # x2015_01_15 <dbl>
Created on 2022-05-05 by the reprex package (v2.0.0)
I was wondering if anyone could point me in the direction of how I would go about recoding multiple variables with the same rules. I have the following df bhs1:
structure(list(bhs1_1 = c(NA, 1, NA, 2, 1, 2), bhs1_2 = c(NA,
2, NA, 2, 1, 1), bhs1_3 = c(NA, 1, NA, 2, 2, 2), bhs1_4 = c(NA,
2, NA, 1, 1, 1), bhs1_5 = c(NA, 1, NA, 1, 2, 2), bhs1_6 = c(NA,
1, NA, 2, 1, 2), bhs1_7 = c(NA, 1, NA, 1, 2, 1), bhs1_8 = c(NA,
2, NA, 2, 2, 2), bhs1_9 = c(NA, 1, NA, 2, 1, 1), bhs1_10 = c(NA,
2, NA, 1, 2, 2), bhs1_11 = c(NA, 2, NA, 2, 2, 1), bhs1_12 = c(NA,
2, NA, 2, 1, 1), bhs1_13 = c(NA, 1, NA, 1, 2, 2), bhs1_14 = c(NA,
2, NA, 2, 1, 1), bhs1_15 = c(NA, 1, NA, 2, 2, 2), bhs1_16 = c(NA,
2, NA, 2, 2, 2), bhs1_17 = c(NA, 2, NA, 2, 2, 1), bhs1_18 = c(NA,
1, NA, 1, 2, 1), bhs1_19 = c(NA, 1, NA, 2, 1, 2), bhs1_20 = c(NA,
2, NA, 2, 1, 1)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
There are two transformation rules, for half of the data set, e.g.,:
(bhs1_2, bhs1_4, bhs1_7, bhs1_9, bhs1_11, bhs1_12, bhs1_14, bhs1_16, bhs1_17,
bhs1_18, bhs1_20)
(if_else(1, 1, 0))
and
(bhs1_1, bhs1_3, bhs1_5, bhs1_6, bhs1_8, bhs1_10, bhs1_13,
bhs1_15, bhs1_19)
(if_else(2, 1, 0))
Is there an elegant way to write code to meet this use case? If so, can someone please point me in the right direction and/or provide me with a sample?
Here's a solution using dplyr
library(dplyr)
case1 <- vars(bhs1_2, bhs1_4, bhs1_7, bhs1_9, bhs1_11, bhs1_12, bhs1_14, bhs1_16, bhs1_17,
bhs1_18, bhs1_20)
case2 <- vars(bhs1_1, bhs1_3, bhs1_5, bhs1_6, bhs1_8, bhs1_10, bhs1_13,
bhs1_15, bhs1_19)
result <- df %>%
mutate_at(case1, ~ (. == 1) * 1L) %>%
mutate_at(case2, ~ (. == 2) * 1L)
Note - I skipped the ifelse statement - I'm just testing for your condition, then converted the TRUE/FALSE responses to numbers by multiplying by 1. I'm also not sure how you want NAs to be handled, but this is ignoring them.
If you aren't familiar with the pipe operator (%>%), it takes the result of the previous function, and sets it as the first argument of the next function. It's designed to improve code legibility by avoiding lots of function nesting.
We can create the column names of interest, then convert to binary (as.integer) from the logical expression
case1 <- c("bhs1_2", "bhs1_4", "bhs1_7", "bhs1_9", "bhs1_11", "bhs1_12",
"bhs1_14", "bhs1_16", "bhs1_17", "bhs1_18", "bhs1_20")
case2 <- c("bhs1_1", "bhs1_3", "bhs1_5", "bhs1_6", "bhs1_8",
"bhs1_10", "bhs1_13", "bhs1_15", "bhs1_19")
library(magrittr)
df1 %<>%
mutate_at(vars(case1), funs(as.integer(.==1 ))) %<>%
mutate_at(vars(case2), funs(as.integer(.==2)))
df1
# A tibble: 6 x 20
# bhs1_1 bhs1_2 bhs1_3 bhs1_4 bhs1_5 bhs1_6 bhs1_7 bhs1_8 bhs1_9 bhs1_10
# <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
#1 NA NA NA NA NA NA NA NA NA NA
#2 0 0 0 0 0 0 1 1 1 1
#3 NA NA NA NA NA NA NA NA NA NA
#4 1 0 1 1 0 1 1 1 0 0
#5 0 1 1 1 1 0 0 1 1 1
#6 1 1 1 1 1 1 1 1 1 1
# ... with 10 more variables: bhs1_11 <int>, bhs1_12 <int>, bhs1_13 <int>,
# bhs1_14 <int>, bhs1_15 <int>, bhs1_16 <int>, bhs1_17 <int>, bhs1_18 <int>,
# bhs1_19 <int>, bhs1_20 <int>
Or an efficient option would be to use data.table
library(data.table)
setDT(df1)[, (case1) := lapply(.SD, function(x) as.integer(x == 1 )),
.SDcols = case1
][, (case2) := lapply(.SD, function(x) as.integer(x == 2)),
.SDcols = case2][]
NOTE This doesn't assume that all the values are of the same
You can use a very fast base R way of doing this as below:
case1=c("bhs1_10", "bhs1_11", "bhs1_12", "bhs1_13", "bhs1_14", "bhs1_15","bhs1_16", "bhs1_17", "bhs1_18", "bhs1_19", "bhs1_20")
case2=c("bhs1_1", "bhs1_3", "bhs1_5", "bhs1_6", "bhs1_8", "bhs1_10", "bhs1_13", "bhs1_15", "bhs1_19")
dat[case1]=abs(dat[case1]-2)
dat[case2]=dat[case2]-1
An simple ifelse can be helpful considering OP wants NA to be converted based on specified rules:
case1 = c("bhs1_2", "bhs1_4", "bhs1_7", "bhs1_9", "bhs1_11", "bhs1_12",
"bhs1_14", "bhs1_16", "bhs1_17", "bhs1_18", "bhs1_20")
case2 = c("bhs1_1", "bhs1_3", "bhs1_5", "bhs1_6", "bhs1_8", "bhs1_10",
"bhs1_13", "bhs1_15", "bhs1_19")
df[case1] = ifelse(!is.na(df[case1]) & df[case1]==1,1,0)
df[case2] = ifelse(!is.na(df[case2]) & df[case2]==2,1,0)
#Test solution
df[1:7]
# bhs1_1 bhs1_2 bhs1_3 bhs1_4 bhs1_5 bhs1_6 bhs1_7
# 1 0 0 0 0 0 0 0
# 2 0 0 0 0 0 0 1
# 3 0 0 0 0 0 0 0
# 4 1 0 1 1 0 1 1
# 5 0 1 1 1 1 0 0
# 6 1 1 1 1 1 1 1
**Updated:**If NA to be left as is then solution can be:
df[case1] = ifelse(df[case1]==1,1,0)
df[case2] = ifelse(df[case2]==2,1,0)
df[1:7]
# bhs1_1 bhs1_2 bhs1_3 bhs1_4 bhs1_5 bhs1_6 bhs1_7
# 1 NA NA NA NA NA NA NA
# 2 0 0 0 0 0 0 1
# 3 NA NA NA NA NA NA NA
# 4 1 0 1 1 0 1 1
# 5 0 1 1 1 1 0 0
# 6 1 1 1 1 1 1 1