How to rename suffix in column - r

I would like to replace 2003 with x, 2004 with y, and 2005 with z, in the column suffixes.
For example, I would like to transform:
In:
Here's the reproducible example:
structure(list(id = c(1, 1, 1, 1, 1), xd_2004 = c(1, 1, 1, 1,
1), xd_2003 = c(1, 1, 1, 1, 1), xe_2004 = c(1, 1, 1, 1, 1), xe_2003 = c(1,
1, 1, 1, 1), xd_2005 = c(1, 1, 1, 1, 1), xe_2005 = c(1, 1, 1,
1, 1)), class = "data.frame", row.names = c(NA, -5L))

We may also use a named vector in str_replace_all
library(dplyr)
library(stringr)
df %>%
rename_with(~ str_replace_all(.x, setNames(c('x', 'y', 'z'), 2003:2005)))
-output
id xd_y xd_x xe_y xe_x xd_z xe_z
1 1 1 1 1 1 1 1
2 1 1 1 1 1 1 1
3 1 1 1 1 1 1 1
4 1 1 1 1 1 1 1
5 1 1 1 1 1 1 1

Using rename_with we could do:
library(dplyr)
library(stringr)
df %>%
rename_with(., ~str_replace_all(., '2004', "y")) %>%
rename_with(., ~str_replace_all(., '2003', "x")) %>%
rename_with(., ~str_replace_all(., '2005', "z"))
id xd_y xd_x xe_y xe_x xd_z xe_z
1 1 1 1 1 1 1 1
2 1 1 1 1 1 1 1
3 1 1 1 1 1 1 1
4 1 1 1 1 1 1 1
5 1 1 1 1 1 1 1

Related

How do you make a new factor column based on other columns in r?

I have a data set that looks like this
ID Group 1 Group 2 Group 3 Group 4
1 1 0 1 0
2 0 1 1 1
3 1 1 0 0
.
.
.
100 0 1 0 1
I want to make another column lets say Group 5 where if the condition of Group 1 is 1 then Group 5 would be 1. If Group 2 = 1, then Group 5 = 2. If Group 3 = 1, then Group 5 = 3, and if Group 4 = 1, then Group 5 = 4. How do I do this?
I tried these lines of code, but I seem to be missing something.
Group5 <- data.frame(Group1, Group2, Group3, Group4, stringsAsFactors=FALSE)
df$Group5 <- with(finalmerge, ifelse(Group1 %in% c("1", "0"),
"1", ""))
Any advice would be helpful, thanks in advance.
You could use which.max(), and apply this to each row.
df["Group_5"] <- apply(df[, -1], 1, which.max)
Output:
ID Group_1 Group_2 Group_3 Group_4 Group_5
1 1 0 0 0 1 4
2 2 0 1 0 0 2
3 3 0 0 1 0 3
4 4 1 0 0 0 1
Input:
df = structure(list(ID = c(1, 2, 3, 4), Group_1 = c(0, 0, 0, 1), Group_2 = c(0,
1, 0, 0), Group_3 = c(0, 0, 1, 0), Group_4 = c(1, 0, 0, 0)), class = "data.frame", row.names = c(NA,
-4L))

Mutate multiple columns using the dplyr framework

I have a dataframe apcd_hud_ex. I want to take some column names (e.g. x2014_03_15), and change the value of the columns based on the current value of the columns, the parsed date in the column names, and another column in the dataframe (SMOKEFREE_DATE). I can do it in a loop over the columns, but I would really like to know how to do it with dplyr and mutate. Any help would be much appreciated!
apcd_hud_ex = structure(list(studyid = 1:5, SMOKEFREE_DATE = structure(c(16283,
16283, 16071, 16071, 16648), class = "Date"), x2014_03_15 = c(1,
1, 1, 0, 1), x2014_04_15 = c(1, 1, 1, 1, 1), x2014_05_15 = c(1,
1, 1, 1, 1), x2014_06_15 = c(1, 1, 1, 1, 1), x2014_07_15 = c(1,
1, 1, 1, 1), x2014_08_15 = c(1, 1, 1, 1, 1), x2014_09_15 = c(1,
1, 1, 1, 1), x2014_10_15 = c(1, 1, 1, 1, 1), x2014_11_15 = c(1,
1, 1, 1, 1), x2014_12_15 = c(1, 1, 1, 1, 1), x2015_01_15 = c(1,
1, 1, 1, 1)), row.names = c(NA, -5L), class = c("tbl_df", "tbl",
"data.frame"))
> apcd_hud_ex
# A tibble: 5 x 13
studyid SMOKEFREE_DATE x2014_03_15 x2014_04_15 x2014_05_15 x2014_06_15 x2014_07_15 x2014_08_15 x2014_09_15 x2014_10_15
<int> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 2014-08-01 1 1 1 1 1 1 1 1
2 2 2014-08-01 1 1 1 1 1 1 1 1
3 3 2014-01-01 1 1 1 1 1 1 1 1
4 4 2014-01-01 0 1 1 1 1 1 1 1
5 5 2015-08-01 1 1 1 1 1 1 1 1
# ... with 3 more variables: x2014_11_15 <dbl>, x2014_12_15 <dbl>, x2015_01_15 <dbl>
>
#function for loop
assign_PHRes_enrollIns_fn <- function(SFdate,insValue,insDate){
val = if_else(insValue == 0,
0,
if_else(as.Date(insDate) < as.Date(SFdate,"%Y-%m-%d"),
1,
2))
return(val)
}
#vectorized function
assign_PHRes_enrollIns_fn_vec <- Vectorize(assign_PHRes_enrollIns_fn)
dateCols = names(apcd_hud_ex)[which(names(apcd_hud_ex) == "x2014_03_15"):which(names(apcd_hud_ex) == "x2015_01_15")]
This loop over the column names (dateCols) works:
for(i in 1:length(dateCols)){
dateCol = dateCols[i]
insDate = as.Date(paste0(str_sub(dateCol,2,5),"/",str_sub(dateCol,7,8),"/",str_sub(dateCol,10,11)),"%Y/%m/%d")
apcd_hud_ex[,dateCol] = assign_PHRes_enrollIns_fn_vec(apcd_hud_ex[,"SMOKEFREE_DATE"],apcd_hud_ex[,dateCol],insDate)
}
Now the manipulated dataframe looks like this, which is what I want:
> apcd_hud_ex
# A tibble: 5 x 13
studyid SMOKEFREE_DATE x2014_03_15 x2014_04_15 x2014_05_15 x2014_06_15 x2014_07_15 x2014_08_15 x2014_09_15 x2014_10_15
<int> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 2014-08-01 1 1 1 1 1 2 2 2
2 2 2014-08-01 1 1 1 1 1 2 2 2
3 3 2014-01-01 2 2 2 2 2 2 2 2
4 4 2014-01-01 0 2 2 2 2 2 2 2
5 5 2015-08-01 1 1 1 1 1 1 1 1
# ... with 3 more variables: x2014_11_15 <dbl>, x2014_12_15 <dbl>, x2015_01_15 <dbl>
However, I would like to learn how to do this with dynamic programming and dplyr. I've tried 2 functions:
newInsValCols_fn1 <- function(df,dateCols){
insDate = as.Date(paste0(str_sub(dateCols,2,5),"/",str_sub(dateCols,7,8),"/",str_sub(dateCols,10,11)),"%Y/%m/%d")
df1 <- df %>%
mutate({{dateCols}} := if_else({{dateCols}} == 0,
0,
if_else(as.Date(insDate) < as.Date(SMOKEFREE_DATE,"%Y-%m-%d"),
1,
2)))
return(df1)
}
newInsValCols_fn1(apcd_hud_ex,dateCols)
Which gives error:
Error: The LHS of `:=` must be a string or a symbol
So I tried using symbols:
newInsValCols_fn2 <- function(df,dateCols){
dateCols_syms = syms(dateCols)
insDate = as.Date(paste0(str_sub(dateCols,2,5),"/",str_sub(dateCols,7,8),"/",str_sub(dateCols,10,11)),"%Y/%m/%d")
df1 <- df %>%
mutate(!!dateCols_syms := if_else({{dateCols}} == 0,
0,
if_else(as.Date(insDate) < as.Date(SMOKEFREE_DATE,"%Y-%m-%d"),
1,
2)))
return(df1)
}
newInsValCols_fn2(apcd_hud_ex,dateCols)
which gives the same error:
Error: The LHS of `:=` must be a string or a symbol
I also tried using !!! instead of !!, but that resulted in the following error:
Error: The LHS of `:=` can't be spliced with `!!!`
Something in my understanding is lacking.
Here's how I'd do it with dplyr.
library(dplyr)
library(lubridate)
apcd_hud_ex %>%
mutate(across(
starts_with('x'),
~ case_when(. == 0 ~ 0,
ymd(gsub('x', '', cur_column())) < SMOKEFREE_DATE ~ 1,
TRUE ~ 2)
))
#> # A tibble: 5 x 13
#> studyid SMOKEFREE_DATE x2014_03_15 x2014_04_15 x2014_05_15 x2014_06_15 x2014_07_15 x2014_08_15 x2014_09_15 x2014_10_15 x2014_11_15 x2014_12_15 x2015_01_15
#> <int> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 2014-08-01 1 1 1 1 1 2 2 2 2 2 2
#> 2 2 2014-08-01 1 1 1 1 1 2 2 2 2 2 2
#> 3 3 2014-01-01 2 2 2 2 2 2 2 2 2 2 2
#> 4 4 2014-01-01 0 2 2 2 2 2 2 2 2 2 2
#> 5 5 2015-08-01 1 1 1 1 1 1 1 1 1 1 1
You can use pivot_longer to have just one column to modify, which is an alternative to mutate(across()).
You can use case_when to have multiple conditions, so you do not need to nest multiple if statements. The value will be the one of the first true statement.
library(tidyverse)
apcd_hud_ex <- structure(list(studyid = 1:5, SMOKEFREE_DATE = structure(c(
16283,
16283, 16071, 16071, 16648
), class = "Date"), x2014_03_15 = c(
1,
1, 1, 0, 1
), x2014_04_15 = c(1, 1, 1, 1, 1), x2014_05_15 = c(
1,
1, 1, 1, 1
), x2014_06_15 = c(1, 1, 1, 1, 1), x2014_07_15 = c(
1,
1, 1, 1, 1
), x2014_08_15 = c(1, 1, 1, 1, 1), x2014_09_15 = c(
1,
1, 1, 1, 1
), x2014_10_15 = c(1, 1, 1, 1, 1), x2014_11_15 = c(
1,
1, 1, 1, 1
), x2014_12_15 = c(1, 1, 1, 1, 1), x2015_01_15 = c(
1,
1, 1, 1, 1
)), row.names = c(NA, -5L), class = c(
"tbl_df", "tbl",
"data.frame"
))
apcd_hud_ex %>%
pivot_longer(starts_with("x")) %>%
mutate(
insDate = name %>% str_remove("^x") %>% str_replace_all("_", "-") %>% as.Date(),
value = case_when(
value == 0 ~ 0,
insDate < SMOKEFREE_DATE ~ 1,
insDate >= SMOKEFREE_DATE ~ 2
)
) %>%
select(-insDate) %>%
pivot_wider()
#> # A tibble: 5 × 13
#> studyid SMOKEFREE_DATE x2014_03_15 x2014_04_15 x2014_05_15 x2014_06_15
#> <int> <date> <dbl> <dbl> <dbl> <dbl>
#> 1 1 2014-08-01 1 1 1 1
#> 2 2 2014-08-01 1 1 1 1
#> 3 3 2014-01-01 2 2 2 2
#> 4 4 2014-01-01 0 2 2 2
#> 5 5 2015-08-01 1 1 1 1
#> # … with 7 more variables: x2014_07_15 <dbl>, x2014_08_15 <dbl>,
#> # x2014_09_15 <dbl>, x2014_10_15 <dbl>, x2014_11_15 <dbl>, x2014_12_15 <dbl>,
#> # x2015_01_15 <dbl>
Created on 2022-05-05 by the reprex package (v2.0.0)

how to build a string variable to capture muti cols info

I have a df that looks like this:
It can be build using codes:
structure(list(ID = c(1, 2, 3, 4, 5), Pass = c(0, 1, 1, 1, 1),
Math = c(0, 0, 1, 1, 1), ELA = c(0, 1, 0, 1, 0), PE = c(0,
0, 1, 1, 1)), row.names = c(NA, -5L), class = c("tbl_df",
"tbl", "data.frame"))
Where pass stand for a student pass any test or not. Now I want to build a new var Result to capture a student's test results like following, what should I do?
Try the base R code below
q <- with(data.frame(which(df[-(1:2)] == 1, arr.ind = TRUE)),
tapply(names(df[-(1:2)])[col], factor(row, levels = 1:nrow(df)), toString))
df$Result <- ifelse(is.na(q), "Not Pass", paste0("Pass: ", q))
which gives
> df
# A tibble: 5 x 6
ID Pass Math ELA PE Result
<dbl> <dbl> <dbl> <dbl> <dbl> <chr>
1 1 0 0 0 0 Not Pass
2 2 1 0 1 0 Pass: ELA
3 3 1 1 0 1 Pass: Math, PE
4 4 1 1 1 1 Pass: Math, ELA, PE
5 5 1 1 0 1 Pass: Math, PE
Using dplyr with rowwise
library(dplyr)
library(stringr)
df1 %>%
rowwise %>%
mutate(Result = if(as.logical(Pass))
str_c('Pass: ', toString(names(select(., Math:PE))[as.logical(c_across(Math:PE))])) else 'Not pass' ) %>%
ungroup
# A tibble: 5 x 6
# ID Pass Math ELA PE Result
# <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
#1 1 0 0 0 0 Not pass
#2 2 1 0 1 0 Pass: ELA
#3 3 1 1 0 1 Pass: Math, PE
#4 4 1 1 1 1 Pass: Math, ELA, PE
#5 5 1 1 0 1 Pass: Math, PE
data
df1 <- structure(list(ID = c(1, 2, 3, 4, 5), Pass = c(0, 1, 1, 1, 1),
Math = c(0, 0, 1, 1, 1), ELA = c(0, 1, 0, 1, 0), PE = c(0,
0, 1, 1, 1)), row.names = c(NA, -5L), class = c("tbl_df",
"tbl", "data.frame"))
Here's one solution:
library(dplyr)
library(magrittr)
library(stringr)
df <- structure(list(ID = c(1, 2, 3, 4, 5), Pass = c(0, 1, 1, 1, 1),
Math = c(0, 0, 1, 1, 1), ELA = c(0, 1, 0, 1, 0), PE = c(0,
0, 1, 1, 1)), row.names = c(NA, -5L), class = c("tbl_df",
"tbl", "data.frame"))
df %<>% pivot_longer(cols = -c(ID, Pass), names_to = "sub", values_to = "done")
df %<>% group_by(ID) %>% mutate(Result = paste0(ifelse(done == 1, sub, NA), collapse = ", ")) %>% ungroup()
df %<>% pivot_wider(names_from = sub, values_from = done)
df %<>% mutate(Result = paste0("Pass: ", str_replace_all(Result, "NA[, ]*", "")))
df %<>% mutate(Result = ifelse(str_detect(Result, "Pass: $"), "Not pass", str_replace_all(Result, ",[\\s]*$", "")))
df
# # A tibble: 5 x 6
# ID Pass Result Math ELA PE
# <dbl> <dbl> <chr> <dbl> <dbl> <dbl>
# 1 1 0 Not pass 0 0 0
# 2 2 1 Pass: ELA 0 1 0
# 3 3 1 Pass: Math, PE 1 0 1
# 4 4 1 Pass: Math, ELA, PE 1 1 1
# 5 5 1 Pass: Math, PE 1 0 1
I can provide an explanation of what the code is doing if necessary.

R: What is an efficient way to recode variables? How do I prorate means?

I was wondering if anyone could point me in the direction of how I would go about recoding multiple variables with the same rules. I have the following df bhs1:
structure(list(bhs1_1 = c(NA, 1, NA, 2, 1, 2), bhs1_2 = c(NA,
2, NA, 2, 1, 1), bhs1_3 = c(NA, 1, NA, 2, 2, 2), bhs1_4 = c(NA,
2, NA, 1, 1, 1), bhs1_5 = c(NA, 1, NA, 1, 2, 2), bhs1_6 = c(NA,
1, NA, 2, 1, 2), bhs1_7 = c(NA, 1, NA, 1, 2, 1), bhs1_8 = c(NA,
2, NA, 2, 2, 2), bhs1_9 = c(NA, 1, NA, 2, 1, 1), bhs1_10 = c(NA,
2, NA, 1, 2, 2), bhs1_11 = c(NA, 2, NA, 2, 2, 1), bhs1_12 = c(NA,
2, NA, 2, 1, 1), bhs1_13 = c(NA, 1, NA, 1, 2, 2), bhs1_14 = c(NA,
2, NA, 2, 1, 1), bhs1_15 = c(NA, 1, NA, 2, 2, 2), bhs1_16 = c(NA,
2, NA, 2, 2, 2), bhs1_17 = c(NA, 2, NA, 2, 2, 1), bhs1_18 = c(NA,
1, NA, 1, 2, 1), bhs1_19 = c(NA, 1, NA, 2, 1, 2), bhs1_20 = c(NA,
2, NA, 2, 1, 1)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
There are two transformation rules, for half of the data set, e.g.,:
(bhs1_2, bhs1_4, bhs1_7, bhs1_9, bhs1_11, bhs1_12, bhs1_14, bhs1_16, bhs1_17,
bhs1_18, bhs1_20)
(if_else(1, 1, 0))
and
(bhs1_1, bhs1_3, bhs1_5, bhs1_6, bhs1_8, bhs1_10, bhs1_13,
bhs1_15, bhs1_19)
(if_else(2, 1, 0))
Is there an elegant way to write code to meet this use case? If so, can someone please point me in the right direction and/or provide me with a sample?
Here's a solution using dplyr
library(dplyr)
case1 <- vars(bhs1_2, bhs1_4, bhs1_7, bhs1_9, bhs1_11, bhs1_12, bhs1_14, bhs1_16, bhs1_17,
bhs1_18, bhs1_20)
case2 <- vars(bhs1_1, bhs1_3, bhs1_5, bhs1_6, bhs1_8, bhs1_10, bhs1_13,
bhs1_15, bhs1_19)
result <- df %>%
mutate_at(case1, ~ (. == 1) * 1L) %>%
mutate_at(case2, ~ (. == 2) * 1L)
Note - I skipped the ifelse statement - I'm just testing for your condition, then converted the TRUE/FALSE responses to numbers by multiplying by 1. I'm also not sure how you want NAs to be handled, but this is ignoring them.
If you aren't familiar with the pipe operator (%>%), it takes the result of the previous function, and sets it as the first argument of the next function. It's designed to improve code legibility by avoiding lots of function nesting.
We can create the column names of interest, then convert to binary (as.integer) from the logical expression
case1 <- c("bhs1_2", "bhs1_4", "bhs1_7", "bhs1_9", "bhs1_11", "bhs1_12",
"bhs1_14", "bhs1_16", "bhs1_17", "bhs1_18", "bhs1_20")
case2 <- c("bhs1_1", "bhs1_3", "bhs1_5", "bhs1_6", "bhs1_8",
"bhs1_10", "bhs1_13", "bhs1_15", "bhs1_19")
library(magrittr)
df1 %<>%
mutate_at(vars(case1), funs(as.integer(.==1 ))) %<>%
mutate_at(vars(case2), funs(as.integer(.==2)))
df1
# A tibble: 6 x 20
# bhs1_1 bhs1_2 bhs1_3 bhs1_4 bhs1_5 bhs1_6 bhs1_7 bhs1_8 bhs1_9 bhs1_10
# <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
#1 NA NA NA NA NA NA NA NA NA NA
#2 0 0 0 0 0 0 1 1 1 1
#3 NA NA NA NA NA NA NA NA NA NA
#4 1 0 1 1 0 1 1 1 0 0
#5 0 1 1 1 1 0 0 1 1 1
#6 1 1 1 1 1 1 1 1 1 1
# ... with 10 more variables: bhs1_11 <int>, bhs1_12 <int>, bhs1_13 <int>,
# bhs1_14 <int>, bhs1_15 <int>, bhs1_16 <int>, bhs1_17 <int>, bhs1_18 <int>,
# bhs1_19 <int>, bhs1_20 <int>
Or an efficient option would be to use data.table
library(data.table)
setDT(df1)[, (case1) := lapply(.SD, function(x) as.integer(x == 1 )),
.SDcols = case1
][, (case2) := lapply(.SD, function(x) as.integer(x == 2)),
.SDcols = case2][]
NOTE This doesn't assume that all the values are of the same
You can use a very fast base R way of doing this as below:
case1=c("bhs1_10", "bhs1_11", "bhs1_12", "bhs1_13", "bhs1_14", "bhs1_15","bhs1_16", "bhs1_17", "bhs1_18", "bhs1_19", "bhs1_20")
case2=c("bhs1_1", "bhs1_3", "bhs1_5", "bhs1_6", "bhs1_8", "bhs1_10", "bhs1_13", "bhs1_15", "bhs1_19")
dat[case1]=abs(dat[case1]-2)
dat[case2]=dat[case2]-1
An simple ifelse can be helpful considering OP wants NA to be converted based on specified rules:
case1 = c("bhs1_2", "bhs1_4", "bhs1_7", "bhs1_9", "bhs1_11", "bhs1_12",
"bhs1_14", "bhs1_16", "bhs1_17", "bhs1_18", "bhs1_20")
case2 = c("bhs1_1", "bhs1_3", "bhs1_5", "bhs1_6", "bhs1_8", "bhs1_10",
"bhs1_13", "bhs1_15", "bhs1_19")
df[case1] = ifelse(!is.na(df[case1]) & df[case1]==1,1,0)
df[case2] = ifelse(!is.na(df[case2]) & df[case2]==2,1,0)
#Test solution
df[1:7]
# bhs1_1 bhs1_2 bhs1_3 bhs1_4 bhs1_5 bhs1_6 bhs1_7
# 1 0 0 0 0 0 0 0
# 2 0 0 0 0 0 0 1
# 3 0 0 0 0 0 0 0
# 4 1 0 1 1 0 1 1
# 5 0 1 1 1 1 0 0
# 6 1 1 1 1 1 1 1
**Updated:**If NA to be left as is then solution can be:
df[case1] = ifelse(df[case1]==1,1,0)
df[case2] = ifelse(df[case2]==2,1,0)
df[1:7]
# bhs1_1 bhs1_2 bhs1_3 bhs1_4 bhs1_5 bhs1_6 bhs1_7
# 1 NA NA NA NA NA NA NA
# 2 0 0 0 0 0 0 1
# 3 NA NA NA NA NA NA NA
# 4 1 0 1 1 0 1 1
# 5 0 1 1 1 1 0 0
# 6 1 1 1 1 1 1 1

How to replace some values in a row based on condition in R

i am new to R and hence going to ask a not-so-difficult question. I have a df which is about 200 columns and 1000 rows. It looks a bit like this
I need to calculate for each if the number of 2s is more than 0s in each row or vice-versa. If 2 is more than convert 0s in the same row to -1 or if 0 is more than convert 2 to -1.
I tried:
ifelse((rowSums(b=="0") > rowSums(b=="2")) , apply(b, 1 , function(b) b[b== "2" , ] <- "-1"), apply(b, 1 , function(b) b[b== "0" , ] <- "-1"))
but it gives the error:
Error in b[b == "2", ] <- "-1" : incorrect number of subscripts on
matrix
Any help and suggestion are most welcome!
We can use apply
t(apply(b, 1, FUN = function(x) {
if(sum(x==2) > sum(x==0)) replace(x, x==0, -1)
else if (sum(x==0) > sum(x==2)) replace( x, x==2, -1)
else x}))
# ind1 ind2 ind3 ind4 ind5 ind6 ind7 ind8 ind9 ind10 ind11 ind12 ind13 ind14 ind15 ind16 ind17 ind18 ind19 ind20
#M8 -1 2 2 2 -1 2 2 1 1 -1 1 1 1 1 1 1 1 1 1 2
#M9 2 2 2 2 2 2 2 -1 -1 2 1 1 1 1 1 1 1 1 -1 1
#M17 1 1 -1 1 1 1 1 1 1 1 2 2 2 2 2 -1 -1 -1 -1 2
#M19 0 -1 0 0 0 0 -1 0 0 0 1 -1 1 -1 -1 1 1 1 1 1
Or we can do this based on rowSums
i1 <- rowSums(b == 0) > rowSums(b == 2)
b[b==0 & !i1] <- -1
b[b==2 & i1] <- -1
b
# ind1 ind2 ind3 ind4 ind5 ind6 ind7 ind8 ind9 ind10 ind11 ind12 ind13 ind14 ind15 ind16 ind17 ind18 ind19 ind20
#M8 -1 2 2 2 -1 2 2 1 1 -1 1 1 1 1 1 1 1 1 1 2
#M9 2 2 2 2 2 2 2 -1 -1 2 1 1 1 1 1 1 1 1 -1 1
#M17 1 1 -1 1 1 1 1 1 1 1 2 2 2 2 2 -1 -1 -1 -1 2
#M19 0 -1 0 0 0 0 -1 0 0 0 1 -1 1 -1 -1 1 1 1 1 1
data
b <- structure(list(ind1 = c(0, 2, 1, 0), ind2 = c(2, 2, 1, 2),
ind3 = c(2,
2, -1, 0), ind4 = c(2, 2, 1, 0), ind5 = c(0, 2, 1, 0), ind6 = c(2,
2, 1, 0), ind7 = c(2, 2, 1, -1), ind8 = c(1, 0, 1, 0), ind9 = c(1,
0, 1, 0), ind10 = c(0, 2, 1, 0), ind11 = c(1, 1, 2, 1), ind12 = c(1,
1, 2, -1), ind13 = c(1, 1, 2, 1), ind14 = c(1, 1, 2, -1), ind15 = c(1,
1, 2, -1), ind16 = c(1, 1, 0, 1), ind17 = c(1, 1, -1, 1), ind18 = c(1,
1, -1, 1), ind19 = c(1, 0, 0, 1), ind20 = c(2, 1, 2, 1)),
.Names = c("ind1",
"ind2", "ind3", "ind4", "ind5", "ind6", "ind7", "ind8", "ind9",
"ind10", "ind11", "ind12", "ind13", "ind14", "ind15", "ind16",
"ind17", "ind18", "ind19", "ind20"), row.names = c("M8", "M9",
"M17", "M19"), class = "data.frame")
apply(data, 1, function(x) {
if (sum(x == 2, na.rm = TRUE) > sum(x == 0, na.rm = TRUE)) {
x[x == 0] <- -1
} else if {sum(x == 0, na.rm = TRUE) > sum(x == 0, na.rm = TRUE)) {
x[x == 2] <- -1
}
x
})

Resources