How to create design matrix in r - r

I have two factors. factor A have 2 level, factor B have 3 level.
How to create the following design matrix?
factorA1 factorA2 factorB1 factorB2 factorB3
[1,] 1 0 1 0 0
[2,] 1 0 0 1 0
[3,] 1 0 0 0 1
[4,] 0 1 1 0 0
[5,] 0 1 0 1 0
[6,] 0 1 0 0 1

You have a couple of options:
Use base and piece it together yourself:
(iris.dummy<-with(iris,model.matrix(~Species-1)))
(IRIS<-data.frame(iris,iris.dummy))
Or use the ade4 package as follows:
dummy <- function(df) {
require(ade4)
ISFACT <- sapply(df, is.factor)
FACTS <- acm.disjonctif(df[, ISFACT, drop = FALSE])
NONFACTS <- df[, !ISFACT,drop = FALSE]
data.frame(NONFACTS, FACTS)
}
dat <-data.frame(eggs = c("foo", "foo", "bar", "bar"),
ham = c("red","blue","green","red"), x=rnorm(4))
dummy(dat)
## x eggs.bar eggs.foo ham.blue ham.green ham.red
## 1 0.3365302 0 1 0 0 1
## 2 1.1341354 0 1 1 0 0
## 3 2.0489741 1 0 0 1 0
## 4 1.1019108 1 0 0 0 1

Assuming your data in in a data.frame called dat, let's say the two factors are given as in this example:
> dat <- data.frame(f1=sample(LETTERS[1:3],20,T),f2=sample(LETTERS[4:5],20,T),id=1:20)
> dat
f1 f2 id
1 C D 1
2 B E 2
3 B E 3
4 A D 4
5 C E 5
6 C E 6
7 C D 7
8 B E 8
9 C D 9
10 A D 10
11 B E 11
12 C E 12
13 B D 13
14 B E 14
15 A D 15
16 C E 16
17 C D 17
18 C D 18
19 B D 19
20 C D 20
> dat$f1
[1] C B B A C C C B C A B C B B A C C C B C
Levels: A B C
> dat$f2
[1] D E E D E E D E D D E E D E D E D D D D
Levels: D E
You can use outer to get a matrix as you showed, for each factor:
> F1 <- with(dat, outer(f1, levels(f1), `==`)*1)
> colnames(F1) <- paste("f1",sep="=",levels(dat$f1))
> F1
f1=A f1=B f1=C
[1,] 0 0 1
[2,] 0 1 0
[3,] 0 1 0
[4,] 1 0 0
[5,] 0 0 1
[6,] 0 0 1
[7,] 0 0 1
[8,] 0 1 0
[9,] 0 0 1
[10,] 1 0 0
[11,] 0 1 0
[12,] 0 0 1
[13,] 0 1 0
[14,] 0 1 0
[15,] 1 0 0
[16,] 0 0 1
[17,] 0 0 1
[18,] 0 0 1
[19,] 0 1 0
[20,] 0 0 1
Now do the same for the second factor:
> F2 <- with(dat, outer(f2, levels(f2), `==`)*1)
> colnames(F2) <- paste("f2",sep="=",levels(dat$f2))
And cbind them to get the final result:
> cbind(F1,F2)

model.matrix is the process that lm and others use in the background to convert for you.
dat <- data.frame(f1=sample(LETTERS[1:3],20,T),f2=sample(LETTERS[4:5],20,T),id=1:20)
dat
model.matrix(~dat$f1 + dat$f2)
It creates the INTERCEPT variable as a column of 1's, but you can easily remove that if you need.
model.matrix(~dat$f1 + dat$f2)[,-1]
Edit: Now i see that this is essentially the same as one of the other comments, but more concise.

Expanding and generalizing #Ferdinand.kraft's answer:
dat <- data.frame(
f1 = sample(LETTERS[1:3], 20, TRUE),
f2 = sample(LETTERS[4:5], 20, TRUE),
row.names = paste0("id_", 1:20))
covariates <- c("f1", "f2") # in case you have other columns that you don't want to include in the design matrix
design <- do.call(cbind, lapply(covariates, function(covariate){
apply(outer(dat[[covariate]], unique(dat[[covariate]]), FUN = "=="), 2, as.integer)
}))
rownames(design) <- rownames(dat)
colnames(design) <- unlist(sapply(covariates, function(covariate) unique(dat[[covariate]])))
design <- design[, !duplicated(colnames(design))] # duplicated colnames happen sometimes
design
# C A B D E
# id_1 1 0 0 1 0
# id_2 0 1 0 1 0
# id_3 0 0 1 1 0
# id_4 1 0 0 1 0
# id_5 0 1 0 1 0
# id_6 0 1 0 0 1
# id_7 0 0 1 0 1

Model matrix only allows what it calls "dummy" coding for the first factor in a formula.
If the intercept is present, it plays that role. To get the desired effect of a redundant index matrix (where you have a 1 in every column for the corresponding factor level and 0 elsewhere), you can lie to model.matrix() and pretend there's an extra level. Then trim off the intercept column.
> a=rep(1:2,3)
> b=rep(1:3,2)
> df=data.frame(A=a,B=b)
> # Lie and pretend there's a level 0 in each factor.
> df$A=factor(a,as.character(0:2))
> df$B=factor(b,as.character(0:3))
> mm=model.matrix (~A+B,df)
> mm
(Intercept) A1 A2 B1 B2 B3
1 1 1 0 1 0 0
2 1 0 1 0 1 0
3 1 1 0 0 0 1
4 1 0 1 1 0 0
5 1 1 0 0 1 0
6 1 0 1 0 0 1
attr(,"assign")
[1] 0 1 1 2 2 2
attr(,"contrasts")
attr(,"contrasts")$A
[1] "contr.treatment"
attr(,"contrasts")$B
[1] "contr.treatment"
> # mm has an intercept column not requested, so kill it
> dm=as.matrix(mm[,-1])
> dm
A1 A2 B1 B2 B3
1 1 0 1 0 0
2 0 1 0 1 0
3 1 0 0 0 1
4 0 1 1 0 0
5 1 0 0 1 0
6 0 1 0 0 1
> # You can also add interactions
> mm2=model.matrix (~A*B,df)
> dm2=as.matrix(mm2[,-1])
> dm2
A1 A2 B1 B2 B3 A1:B1 A2:B1 A1:B2 A2:B2 A1:B3 A2:B3
1 1 0 1 0 0 1 0 0 0 0 0
2 0 1 0 1 0 0 0 0 1 0 0
3 1 0 0 0 1 0 0 0 0 1 0
4 0 1 1 0 0 0 1 0 0 0 0
5 1 0 0 1 0 0 0 1 0 0 0
6 0 1 0 0 1 0 0 0 0 0 1

Things get complicated with model.matrix() again if we add a covariate x and interactions of x with factors.
a=rep(1:2,3)
b=rep(1:3,2)
x=1:6
df=data.frame(A=a,B=b,x=x)
# Lie and pretend there's a level 0 in each factor.
df$A=factor(a,as.character(0:2))
df$B=factor(b,as.character(0:3))
mm=model.matrix (~A + B + A:x + B:x,df)
print(mm)
(Intercept) A1 A2 B1 B2 B3 A0:x A1:x A2:x B1:x B2:x B3:x
1 1 1 0 1 0 0 0 1 0 1 0 0
2 1 0 1 0 1 0 0 0 2 0 2 0
3 1 1 0 0 0 1 0 3 0 0 0 3
4 1 0 1 1 0 0 0 0 4 4 0 0
5 1 1 0 0 1 0 0 5 0 0 5 0
6 1 0 1 0 0 1 0 0 6 0 0 6
So mm has an intercept, but now A:x interaction terms have an unwanted level A0:x
If we reintroduce x as as a separate term, we will cancel that unwanted level
mm2=model.matrix (~ x + A + B + A:x + B:x, df)
print(mm2)
(Intercept) x A1 A2 B1 B2 B3 x:A1 x:A2 x:B1 x:B2 x:B3
1 1 1 1 0 1 0 0 1 0 1 0 0
2 1 2 0 1 0 1 0 0 2 0 2 0
3 1 3 1 0 0 0 1 3 0 0 0 3
4 1 4 0 1 1 0 0 0 4 4 0 0
5 1 5 1 0 0 1 0 5 0 0 5 0
6 1 6 0 1 0 0 1 0 6 0 0 6
We can get rid of the unwanted intercept and the unwanted bare x term
dm2=as.matrix(mm2[,c(-1,-2)])
print(dm2)
A1 A2 B1 B2 B3 x:A1 x:A2 x:B1 x:B2 x:B3
1 1 0 1 0 0 1 0 1 0 0
2 0 1 0 1 0 0 2 0 2 0
3 1 0 0 0 1 3 0 0 0 3
4 0 1 1 0 0 0 4 4 0 0
5 1 0 0 1 0 5 0 0 5 0
6 0 1 0 0 1 0 6 0 0 6

Related

Count occurences of teams in matrix in R

Have a 1000*16 matrix from a simulation with team names as characters. I want to count number of occurrences per team in all 16 columns.
I know I could do apply(test, 2, table) but that makes the data hard to work with afterward since all teams is not included in every column.
If you have a vector that is all the unique team names you could do something like this. I'm counting occurrences here via column to ensure that not every team (in this case letter) is not included.
set.seed(15)
letter_mat <- matrix(
sample(
LETTERS,
size = 1000*16,
replace = TRUE
),
ncol = 16,
nrow = 1000
)
output <- t(
apply(
letter_mat,
1,
function(x) table(factor(x, levels = LETTERS))
)
)
head(output)
A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
[1,] 1 2 0 1 1 1 1 0 0 0 1 0 0 0 0 1 1 1 1 1 0 1 1 0 0 1
[2,] 0 1 0 2 2 1 0 1 0 0 0 0 0 1 0 0 0 0 0 1 1 1 0 2 2 1
[3,] 1 1 0 0 1 0 1 2 1 0 0 0 0 0 1 0 1 0 1 1 0 0 3 0 1 1
[4,] 0 1 0 0 0 1 0 0 0 2 0 1 0 0 1 1 1 1 2 0 2 3 0 0 0 0
[5,] 2 1 0 0 0 0 0 2 0 2 1 1 1 0 0 2 0 2 1 0 0 1 0 0 0 0
[6,] 0 0 0 0 0 1 3 1 0 0 0 0 1 1 3 0 1 0 0 1 0 0 0 1 0 3

R to recode variables if the categorical variable's frequency lower than an defined value

Here is an example for the dataset (d):
rs3 rs4 rs5 rs6
1 0 0 0
1 0 1 0
0 0 0 0
2 0 1 0
0 0 0 0
0 2 0 1
0 2 NA 1
0 2 2 1
NA 1 2 1
To check the frequency of the SNP genotype (0,1,2), we can use the table command
table (d$rs3)
The output would be
0 1 2
5 2 1
Here we want to recode the variables if the genotype 2's frequency is <3, the recoded output should be
rs3 rs4 rs5 rs6
1 0 0 0
1 0 1 0
0 0 0 0
1 0 1 0
0 0 0 0
0 2 0 1
0 2 NA 1
0 2 1 1
NA 1 1 1
I have 70000SNPs that need to check and recode. How to use the for loop or other method to do that in R?
Here's another possible (vectorized) solution
indx <- colSums(d == 2, na.rm = TRUE) < 3 # Select columns by condition
d[indx][d[indx] == 2] <- 1 # Inset 1 when the subset by condition equals 2
d
# rs3 rs4 rs5 rs6
# 1 1 0 0 0
# 2 1 0 1 0
# 3 0 0 0 0
# 4 1 0 1 0
# 5 0 0 0 0
# 6 0 2 0 1
# 7 0 2 NA 1
# 8 0 2 1 1
# 9 NA 1 1 1
We can try
d[] <- lapply(d, function(x)
if(sum(x==2, na.rm=TRUE) < 3) replace(x, x==2, 1) else x)
d
# rs3 rs4 rs5 rs6
#1 1 0 0 0
#2 1 0 1 0
#3 0 0 0 0
#4 1 0 1 0
#5 0 0 0 0
#6 0 2 0 1
#7 0 2 NA 1
#8 0 2 1 1
#9 NA 1 1 1
Or the same methodology can be used in dplyr
library(dplyr)
d %>%
mutate_each(funs(if(sum(.==2, na.rm=TRUE) <3)
replace(., .==2, 1) else .))

How to convert two factors to adjacency matrix in R?

I have a data frame with two columns (key and value) where each column is a factor:
df = data.frame(gl(3,4,labels=c('a','b','c')), gl(6,2))
colnames(df) = c("key", "value")
key value
1 a 1
2 a 1
3 a 2
4 a 2
5 b 3
6 b 3
7 b 4
8 b 4
9 c 5
10 c 5
11 c 6
12 c 6
I want to convert it to adjacency matrix (in this case 3x6 size) like:
1 2 3 4 5 6
a 1 1 0 0 0 0
b 0 0 1 1 0 0
c 0 0 0 0 1 1
So that I can run clustering on it (group keys that have similar values together) with either kmeans or hclust.
Closest that I was able to get was using model.matrix( ~ value, df) which results in:
(Intercept) value2 value3 value4 value5 value6
1 1 0 0 0 0 0
2 1 0 0 0 0 0
3 1 1 0 0 0 0
4 1 1 0 0 0 0
5 1 0 1 0 0 0
6 1 0 1 0 0 0
7 1 0 0 1 0 0
8 1 0 0 1 0 0
9 1 0 0 0 1 0
10 1 0 0 0 1 0
11 1 0 0 0 0 1
12 1 0 0 0 0 1
but results aren't grouped by key yet.
From another side I can collapse this dataset into groups using:
aggregate(df$value, by=list(df$key), unique)
Group.1 x.1 x.2
1 a 1 2
2 b 3 4
3 c 5 6
But I don't know what to do next...
Can someone help to solve this?
An easy way to do it in base R:
res <-table(df)
res[res>0] <-1
res
value
#key 1 2 3 4 5 6
# a 1 1 0 0 0 0
# b 0 0 1 1 0 0
# c 0 0 0 0 1 1

How can I suppress (not print) line numbers?

How can I suppress (not print) line numbers?
Code reads:
dd<-data.frame(a=gl(2,3),b=gl(3,1,6) )
model.matrix( ~a + b + a*b, dd )
Tries:
> dd<-data.frame(a=gl(2,3),b=gl(3,1,6) )
> model.matrix( ~a + b + a*b, dd )
(Intercept) a2 b2 b3 a2:b2 a2:b3
1 1 0 0 0 0 0
2 1 0 1 0 0 0
3 1 0 0 1 0 0
4 1 1 0 0 0 0
5 1 1 1 0 1 0
6 1 1 0 1 0 1
attr(,"assign")
[1] 0 1 2 2 3 3
attr(,"contrasts")
attr(,"contrasts")$a
[1] "contr.treatment"
attr(,"contrasts")$b
[1] "contr.treatment"
> cat(model.matrix( ~a + b + a*b, dd ))
1 1 1 1 1 1 0 0 0 1 1 1 0 1 0 0 1 0 0 0 1 0 0 1 0 0 0 0 1 0 0 0 0 0 0 1
> model.matrix( ~ a + b + a*b, dd )
(Intercept) a2 b2 b3 a2:b2 a2:b3
1 1 0 0 0 0 0
2 1 0 1 0 0 0
3 1 0 0 1 0 0
4 1 1 0 0 0 0
5 1 1 1 0 1 0
6 1 1 0 1 0 1
attr(,"assign")
[1] 0 1 2 2 3 3
attr(,"contrasts")
attr(,"contrasts")$a
[1] "contr.treatment"
attr(,"contrasts")$b
[1] "contr.treatment"
> dd<-data.frame(a=gl(2,3),b=gl(3,1,6) )
> print(model.matrix( ~a + b + a*b, dd , rowNames=False))
(Intercept) a2 b2 b3 a2:b2 a2:b3
1 1 0 0 0 0 0
2 1 0 1 0 0 0
3 1 0 0 1 0 0
4 1 1 0 0 0 0
5 1 1 1 0 1 0
6 1 1 0 1 0 1
attr(,"assign")
[1] 0 1 2 2 3 3
attr(,"contrasts")
attr(,"contrasts")$a
[1] "contr.treatment"
attr(,"contrasts")$b
[1] "contr.treatment"
> print(model.matrix( ~a + b + a*b, dd , colNames=False))
(Intercept) a2 b2 b3 a2:b2 a2:b3
1 1 0 0 0 0 0
2 1 0 1 0 0 0
3 1 0 0 1 0 0
4 1 1 0 0 0 0
5 1 1 1 0 1 0
6 1 1 0 1 0 1
attr(,"assign")
[1] 0 1 2 2 3 3
attr(,"contrasts")
attr(,"contrasts")$a
[1] "contr.treatment"
attr(,"contrasts")$b
[1] "contr.treatment"
It is unfortunate that there doesn't seem to be any way to suppress row names when printing matrices, isn't it? One option is to coerce to data.frame and use the row.names argument of print.data.frame():
dd <- data.frame(a=gl(2,3),b=gl(3,1,6));
print(as.data.frame(model.matrix( ~a + b + a*b, dd )),row.names=F);
## (Intercept) a2 b2 b3 a2:b2 a2:b3
## 1 0 0 0 0 0
## 1 0 1 0 0 0
## 1 0 0 1 0 0
## 1 1 0 0 0 0
## 1 1 1 0 1 0
## 1 1 0 1 0 1
You can save the result of model.matrix, then change rownames to empty characters.
dd<-data.frame(a=gl(2,3),b=gl(3,1,6) )
mm <- model.matrix( ~a + b + a*b, dd )
mm
# (Intercept) a2 b2 b3 a2:b2 a2:b3
#1 1 0 0 0 0 0
#2 1 0 1 0 0 0
#3 1 0 0 1 0 0
#4 1 1 0 0 0 0
#5 1 1 1 0 1 0
#6 1 1 0 1 0 1
#attr(,"assign")
#[1] 0 1 2 2 3 3
#attr(,"contrasts")
#attr(,"contrasts")$a
#[1] "contr.treatment"
#attr(,"contrasts")$b
#[1] "contr.treatment"
rownames(mm) <- rep("", 6)
#rownames(mm) <- rep("", nrow(mm)) #more general
mm
# (Intercept) a2 b2 b3 a2:b2 a2:b3
# 1 0 0 0 0 0
# 1 0 1 0 0 0
# 1 0 0 1 0 0
# 1 1 0 0 0 0
# 1 1 1 0 1 0
# 1 1 0 1 0 1
#attr(,"assign")
#[1] 0 1 2 2 3 3
#attr(,"contrasts")
#attr(,"contrasts")$a
#[1] "contr.treatment"
#attr(,"contrasts")$b
#[1] "contr.treatment"
There is no print.matrix method, so the appropriate help page to consult is ?print.default where there really is no argument for suppressing row or column names, so I would suggest something like this:
( matrix( model.matrix( ~a + b + a*b, dd ) , nrow(dd)) )
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 1 0 0 0 0 0
[2,] 1 0 1 0 0 0
[3,] 1 0 0 1 0 0
[4,] 1 1 0 0 0 0
[5,] 1 1 1 0 1 0
[6,] 1 1 0 1 0 1
Another option is to build a function that performs as you desired, and if you wanted only the rownames or colnames to be suppressed, you could make the logic more elaborate:
> print.noRowCol <- function(x) {dimnames(x)<- NULL; print(x)}
> print.noRowCol (model.matrix( ~a + b + a*b, dd ) )
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 1 0 0 0 0 0
[2,] 1 0 1 0 0 0
[3,] 1 0 0 1 0 0
[4,] 1 1 0 0 0 0
[5,] 1 1 1 0 1 0
[6,] 1 1 0 1 0 1
attr(,"assign")
[1] 0 1 2 2 3 3
attr(,"contrasts")
attr(,"contrasts")$a
[1] "contr.treatment"
attr(,"contrasts")$b
[1] "contr.treatment"

Transform data frame

I have a questionnaire with an open-ended question like "Please name up to ten animals", which gives me the following data frame (where each letter stands for an animal):
nrow <- 1000
list <- vector("list", nrow)
for(i in 1:nrow){
na <- rep(NA, sample(1:10, 1))
list[[i]] <- sample(c(letters, na), 10, replace=FALSE)
}
df <- data.frame()
df <- rbind(df, do.call(rbind, list))
head(df)
# V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
# 1 r <NA> a j w e i h u z
# 2 t o e x d v <NA> z n c
# 3 f y e s n c z i u k
# 4 y <NA> v j h z p i c q
# 5 w s v f <NA> c g b x e
# 6 p <NA> a h v x k z o <NA>
How can I transform this data frame to look like the following data frame? Remember that I don't actually know the column names.
r <- 1000
c <- length(letters)
t1 <- matrix(rbinom(r*c,1,0.5),r,c)
colnames(t1) <- letters
head(t1)
# a b c d e f g h i j k l m n o p q r s t u v w x y z
# [1,] 0 1 0 1 0 0 0 1 0 0 1 1 1 1 0 0 0 1 0 1 0 1 1 0 1 0
# [2,] 1 1 1 1 0 1 0 1 1 1 1 0 1 0 0 0 1 1 1 0 0 1 0 1 0 1
# [3,] 0 1 0 0 0 1 1 1 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0
# [4,] 1 0 1 0 0 0 0 0 1 1 0 0 0 0 0 0 1 1 1 0 1 0 1 1 0 0
# [5,] 1 0 1 1 1 1 1 1 1 0 1 1 0 0 0 0 1 1 0 1 1 0 0 1 0 0
# [6,] 1 1 0 1 1 0 0 1 0 0 1 0 0 0 0 0 1 1 1 0 0 0 1 1 0 1
td <- data.frame(t(apply(df, 1, function(x) as.numeric( unique(unlist(df)) %in% x))))
colnames (td) <- unique(unlist(df))
letters could be replaced with a vector of animal names colnames(t1).
You can do the following using tidyr which could be much faster than other approaches, though I like the approach by #germcd very much. You may need to tinker with the select, removing NAs as well as a blank space, which may be an artifact of the simulated data you provided:
require(tidyr)
## Add an ID for each record:
df$id <- 1:nrow(df)
out <- (df %>%
gather(column, animal, -id) %>%
filter(animal != " ") %>%
spread(animal, column)
)
head(out)
This code gathers the unnamed columns into a long format, removes any empty columns or missing data, and then spreads by the unique values of the animal column. This also has the potentially desirable property of preserving the column order in which the animals were named. If it's not desirable then you could easily convert the resulting animal columns to numeric:
out_num <- out
out_num[,-1] <- as.numeric((!is.na(out[,-1])))
head(out_num)
You can try mtabulate from the "qdapTools" package:
library(qdapTools)
head(mtabulate(as.data.frame(t(df))))
# c d i l m o r v x y a f s t k p u b h j n q e g w z
# 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
# 2 0 1 0 0 1 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0
# 3 0 0 1 0 0 0 1 0 1 1 1 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0
# 4 1 0 1 1 0 0 0 0 0 1 0 0 0 0 0 0 1 1 1 1 1 0 0 0 0 0
# 5 0 1 0 0 0 0 1 0 0 0 0 0 1 0 1 1 0 1 1 0 1 1 0 0 0 0
# 6 0 0 0 0 1 0 0 0 0 0 0 0 1 1 1 0 1 1 0 1 0 1 0 0 0 0
There are, of course, many other options.
For example, cSplit_e from my "splitstackshape" package (with the downside that inefficiently, you need to paste the values together first before you can split them):
library(splitstackshape)
library(dplyr)
As ones and zeroes:
df %>%
mutate(combined = apply(., 1, function(x) paste(na.omit(x), collapse = ","))) %>%
cSplit_e("combined", ",", mode = "binary", type = "character", fill = 0) %>%
select(starts_with("combined_")) %>%
head
# combined_a combined_b combined_c combined_d combined_e combined_f combined_g combined_h combined_i
# 1 0 0 1 1 0 0 0 0 1
# 2 1 0 0 1 0 1 0 0 0
# 3 1 0 0 0 0 0 0 0 1
# 4 0 1 1 0 0 0 0 1 1
# 5 0 1 0 1 0 0 0 1 0
# 6 0 1 0 0 0 0 0 0 0
# combined_j combined_k combined_l combined_m combined_n combined_o combined_p combined_q combined_r
# 1 0 0 1 1 0 1 0 0 1
# 2 0 0 0 1 0 0 0 0 0
# 3 0 1 0 0 0 0 1 0 1
# 4 1 0 1 0 1 0 0 0 0
# 5 0 1 0 0 1 0 1 1 1
# 6 1 1 0 1 0 0 0 1 0
# combined_s combined_t combined_u combined_v combined_w combined_x combined_y combined_z
# 1 0 0 0 1 0 1 1 0
# 2 1 1 0 0 0 0 0 0
# 3 0 1 1 0 0 1 1 0
# 4 0 0 1 0 0 0 1 0
# 5 1 0 0 0 0 0 0 0
# 6 1 1 1 0 0 0 0 0
As the original values:
df %>%
mutate(combined = apply(., 1, function(x) paste(na.omit(x), collapse = ","))) %>%
cSplit_e("combined", ",", mode = "value", type = "character", fill = "") %>%
select(starts_with("combined_")) %>%
head
# combined_a combined_b combined_c combined_d combined_e combined_f combined_g combined_h combined_i
# 1 c d i
# 2 a d f
# 3 a i
# 4 b c h i
# 5 b d h
# 6 b
# combined_j combined_k combined_l combined_m combined_n combined_o combined_p combined_q combined_r
# 1 l m o r
# 2 m
# 3 k p r
# 4 j l n
# 5 k n p q r
# 6 j k m q
# combined_s combined_t combined_u combined_v combined_w combined_x combined_y combined_z
# 1 v x y
# 2 s t
# 3 t u x y
# 4 u y
# 5 s
# 6 s t u
Alternatively, you can use "reshape2":
library(reshape2)
## The values
dcast(melt(as.matrix(df), na.rm = TRUE),
Var1 ~ value, value.var = "value")
## ones and zeroes
dcast(melt(as.matrix(df), na.rm = TRUE),
Var1 ~ value, value.var = "value", fun.aggregate = length)

Resources