Count occurrences of distinct values across multiple columns and groups - r

I've got a dataframe like the one below (in the actual dataset the number of rows are a few thousands and I 've got more than 300 variables):
df <- data.frame (Gr = c("A","A","A","B","B","B","B","B","B"),
Var1 = c("a","b","c","e","a","a","c","e","b"),
Var2 = c("a","a","a","d","b","b","c","a","e"),
Var3 = c("e","a","b",NA,"a","b","c","d","a"),
Var4 = c("e",NA,"a","e","a","b","d","c",NA))
which returns:
Gr Var1 Var2 Var3 Var4
1 A a a e e
2 A b a a <NA>
3 A c a b a
4 B e d <NA> e
5 B a b a a
6 B a b b b
7 B c c c d
8 B e a d c
9 B b e a <NA>
and would like to get number of occurrences of each value (a,b,c,d,e, and NA) in each variable and in each group. Hence, the output should look something like the following:
df1 <- data.frame(Vars = c("Var1","Var2","Var3","Var4"),
a = c(1,3,1,1),
b = c(1,0,1,0),
c = c(1,0,0,0),
d = c(0,0,0,0),
e = c(0,0,1,1),
na = c(0,0,0,1))
df2 <- data.frame(Vars = c("Var1","Var2","Var3","Var4"),
a = c(2,1,2,1),
b = c(0,2,1,1),
c = c(1,1,1,1),
d = c(0,1,1,1),
e = c(2,1,0,1),
na = c(0,0,1,1))
output <- list(df1,df2)
names(output) <- c("A","B")
which looks like:
$A
Vars a b c d e na
1 Var1 1 1 1 0 0 0
2 Var2 3 0 0 0 0 0
3 Var3 1 1 0 0 1 0
4 Var4 1 0 0 0 1 1
$B
Vars a b c d e na
1 Var1 2 0 1 0 2 0
2 Var2 1 2 1 1 1 0
3 Var3 2 1 1 1 0 1
4 Var4 1 1 1 1 1 1
I haven't been able to make any considerable progress so far, and a tidyverse solution is preferred.

We may use mtabulate after spliting
library(qdapTools)
lapply(split(df[-1], df$Gr), mtabulate)
If we need the na count as well
lapply(split(replace(df[-1], is.na(df[-1]), "na"), df$Gr), mtabulate)
-output
$A
a b c e na
Var1 1 1 1 0 0
Var2 3 0 0 0 0
Var3 1 1 0 1 0
Var4 1 0 0 1 1
$B
a b c d e na
Var1 2 1 1 0 2 0
Var2 1 2 1 1 1 0
Var3 2 1 1 1 0 1
Var4 1 1 1 1 1 1
Or using tidyverse
library(dplyr)
library(tidyr)
df %>%
pivot_longer(cols = -Gr, names_to = "Vars") %>%
pivot_wider(names_from = value, values_from = value,
values_fn = length, values_fill = 0) %>%
{split(.[-1], .$Gr)}
-output
$A
# A tibble: 4 × 7
Vars a e b `NA` c d
<chr> <int> <int> <int> <int> <int> <int>
1 Var1 1 0 1 0 1 0
2 Var2 3 0 0 0 0 0
3 Var3 1 1 1 0 0 0
4 Var4 1 1 0 1 0 0
$B
# A tibble: 4 × 7
Vars a e b `NA` c d
<chr> <int> <int> <int> <int> <int> <int>
1 Var1 2 2 1 0 1 0
2 Var2 1 1 2 0 1 1
3 Var3 2 0 1 1 1 1
4 Var4 1 1 1 1 1 1

A NA save base R approach using colSums
val <- sort(unique(unlist(df[-1])), na.last=T)
as.list(lapply(split(df[-1], df$Gr), function(dlist)
data.frame(sapply(val, function(x)
colSums(dlist == x | (is.na(dlist) & is.na(x)), na.rm=T)), check.names=F)))
$A
a b c d e NA
Var1 1 1 1 0 0 0
Var2 3 0 0 0 0 0
Var3 1 1 0 0 1 0
Var4 1 0 0 0 1 1
$B
a b c d e NA
Var1 2 1 1 0 2 0
Var2 1 2 1 1 1 0
Var3 2 1 1 1 0 1
Var4 1 1 1 1 1 1

reshape2::recast(df,Gr+variable~value,length,id.var = 'Gr')
Gr variable a b c d e NA
1 A Var1 1 1 1 0 0 0
2 A Var2 3 0 0 0 0 0
3 A Var3 1 1 0 0 1 0
4 A Var4 1 0 0 0 1 1
5 B Var1 2 1 1 0 2 0
6 B Var2 1 2 1 1 1 0
7 B Var3 2 1 1 1 0 1
If you must split them:
split(reshape2::recast(df,Gr+variable~value,length,id.var = 'Gr'), ~Gr)
$A
Gr variable a b c d e NA
1 A Var1 1 1 1 0 0 0
2 A Var2 3 0 0 0 0 0
3 A Var3 1 1 0 0 1 0
4 A Var4 1 0 0 0 1 1
$B
Gr variable a b c d e NA
5 B Var1 2 1 1 0 2 0
6 B Var2 1 2 1 1 1 0
7 B Var3 2 1 1 1 0 1
8 B Var4 1 1 1 1 1 1
in base R:
ftable(cbind(df[1], stack(replace(df, is.na(df),'na'), -1)),col.vars = 2)
values a b c d e na
Gr ind
A Var1 1 1 1 0 0 0
Var2 3 0 0 0 0 0
Var3 1 1 0 0 1 0
Var4 1 0 0 0 1 1
B Var1 2 1 1 0 2 0
Var2 1 2 1 1 1 0
Var3 2 1 1 1 0 1
Var4 1 1 1 1 1 1

Related

how to add columns iteratively for recoding with semi-modified names

I would use this dataset as an example
BEZ <- c("A","A","A","A","B","B","B")
var <- c("B","B","B","B","B","B","B")
bar <- c("B","B","B","B","B","B","B")
Bez1 <- c("A","A","A","A","B","B","B")
var1 <- c("B","B","B","B","B","B","B")
bar1 <- c("B","B","B","B","B","B","B")
dat <- data.frame(BEZ, var, bar, Bez1, var1, bar1)
the tricky thing that I would like to do is use a method (loops, map(), apply(), dplyr functions, and so on) to create aside the already existing new column where based on the respective row value is converted into a number.
Excepeted result
BEZ BEZ_num var var_num bar bar_num Bez1 BEZ1_num var1 var1_num bar1 bar1_num
A 0 B 1 B 1 A 0 B 1 B 1
A 0 B 1 B 1 A 0 B 1 B 1
A 0 B 1 C 2 A 0 B 1 A 0
A 0 B 1 B 1 A 0 C 2 B 1
B 1 B 1 B 1 B 1 C 2 C 2
B 1 B 1 B 1 A 0 B 1 B 1
B 1 B 1 B 1 A 0 B 1 B 1
This is more or less the idea I would like to hit. Any suggestions?
Thanks
Using factor
library(dplyr)
dat %>%
mutate(across(everything(), ~ as.integer(factor(.x))-1, .names = "{.col}_num"))
-output
BEZ var bar Bez1 var1 bar1 BEZ_num var_num bar_num Bez1_num var1_num bar1_num
1 A B B A B B 0 0 0 0 0 0
2 A B B A B B 0 0 0 0 0 0
3 A B B A B B 0 0 0 0 0 0
4 A B B A B B 0 0 0 0 0 0
5 B B B B B B 1 0 0 1 0 0
6 B B B B B B 1 0 0 1 0 0
7 B B B B B B 1 0 0 1 0 0
See in the comments. The provided data frame and the expected output do not match. But I think we could use mutate(across..) with the .names argument combined with case_when:
library(dplyr)
dat %>%
mutate(across(everything(), ~case_when(
. == "A" ~ "0",
. == "B" ~ "1",
. == "C" ~ "2"), .names = "{col}_num"))
BEZ var bar Bez1 var1 bar1 BEZ_num var_num bar_num Bez1_num var1_num bar1_num
1 A B B A B B 0 1 1 0 1 1
2 A B B A B B 0 1 1 0 1 1
3 A B B A B B 0 1 1 0 1 1
4 A B B A B B 0 1 1 0 1 1
5 B B B B B B 1 1 1 1 1 1
6 B B B B B B 1 1 1 1 1 1
7 B B B B B B 1 1 1 1 1 1
Using a for loop in base R:
dat2 <- dat[, 1, drop = FALSE]
for (col in names(dat)) {
dat2[[col]] <- dat[[col]]
dat2[[paste0(col, "_num")]] <- match(dat[[col]], LETTERS) - 1
}
dat2
# BEZ BEZ_num var var_num bar bar_num Bez1 Bez1_num var1 var1_num bar1 bar1_num
# 1 A 0 B 1 B 1 A 0 B 1 B 1
# 2 A 0 B 1 B 1 A 0 B 1 B 1
# 3 A 0 B 1 B 1 A 0 B 1 B 1
# 4 A 0 B 1 B 1 A 0 B 1 B 1
# 5 B 1 B 1 B 1 B 1 B 1 B 1
# 6 B 1 B 1 B 1 B 1 B 1 B 1
# 7 B 1 B 1 B 1 B 1 B 1 B 1
Or a (slightly convoluted) approach using dplyr::across():
library(dplyr)
dat %>%
mutate(
across(BEZ:bar1, list(TMP = identity, num = \(x) match(x, LETTERS) - 1)),
.keep = "unused"
) %>%
rename_with(\(x) gsub("_TMP$", "", x))
# same output as above
Or finally, if you don't care about the order of the output columns, you could also use dplyr::across() with the .names argument:
dat %>%
mutate(across(
BEZ:bar1,
\(x) match(x, LETTERS) - 1,
.names = "{.col}_num"
))
# BEZ var bar Bez1 var1 bar1 BEZ_num var_num bar_num Bez1_num var1_num bar1_num
# 1 A B B A B B 0 1 1 0 1 1
# 2 A B B A B B 0 1 1 0 1 1
# 3 A B B A B B 0 1 1 0 1 1
# 4 A B B A B B 0 1 1 0 1 1
# 5 B B B B B B 1 1 1 1 1 1
# 6 B B B B B B 1 1 1 1 1 1
# 7 B B B B B B 1 1 1 1 1 1
To add two further options:
With dplyr v.1.1.0 we can use consecutive_id():
library(dplyr) # v.1.1.0
dat %>%
mutate(across(everything(),
~ consecutive_id(.x)-1,
.names = "{.col}_num"))
#> BEZ var bar Bez1 var1 bar1 BEZ_num var_num bar_num Bez1_num var1_num bar1_num
#> 1 A B B A B B 0 0 0 0 0 0
#> 2 A B B A B B 0 0 0 0 0 0
#> 3 A B B A B B 0 0 0 0 0 0
#> 4 A B B A B B 0 0 0 0 0 0
#> 5 B B B B B B 1 0 0 1 0 0
#> 6 B B B B B B 1 0 0 1 0 0
#> 7 B B B B B B 1 0 0 1 0 0
Similar we could use data.table::rleid():
dat %>%
mutate(across(everything(),
~ data.table::rleid(.x)-1,
.names = "{.col}_num"))
#> BEZ var bar Bez1 var1 bar1 BEZ_num var_num bar_num Bez1_num var1_num bar1_num
#> 1 A B B A B B 0 0 0 0 0 0
#> 2 A B B A B B 0 0 0 0 0 0
#> 3 A B B A B B 0 0 0 0 0 0
#> 4 A B B A B B 0 0 0 0 0 0
#> 5 B B B B B B 1 0 0 1 0 0
#> 6 B B B B B B 1 0 0 1 0 0
#> 7 B B B B B B 1 0 0 1 0 0
Created on 2023-02-03 with reprex v2.0.2

Identify the column name of the last occurrence of a value in R data frame

I have a dataset like below with columns of 1s and 0s. I would like to add a final column that identifies the column name of the final occurrence of 0 per row.
have = data.frame(a = c(1,0,1,1,0,0,1,1,1,0),
b = c(1,0,1,1,1,0,1,1,0,0),
c = c(0,0,0,1,0,1,1,1,1,0),
d = c(1,0,1,1,0,0,0,1,0,1),
e = c(1,1,1,1,1,1,1,1,1,1))
> have
a b c d e
1 1 1 0 1 1
2 0 0 0 0 1
3 1 1 0 1 1
4 1 1 1 1 1
5 0 1 0 0 1
6 0 0 1 0 1
7 1 1 1 0 1
8 1 1 1 1 1
9 1 0 1 0 1
10 0 0 0 1 1
I would like the output to look like this where the final column specifies the column name of the last occurring 0 and if one does not exist return NA.
> want
a b c d e last_0
1 1 1 0 1 1 c
2 0 0 0 0 1 d
3 1 1 0 1 1 c
4 1 1 1 1 1 <NA>
5 0 1 0 0 1 d
6 0 0 1 0 1 d
7 1 1 1 0 1 d
8 1 1 1 1 1 <NA>
9 1 0 1 0 1 d
10 0 0 0 1 1 c
I've tried using max.col but it returns the last column name if a zero does not exist. Any other solutions? A dplyr solution is preferred.
> have$last_0 = names(have)[max.col(have == 0, ties.method = "last")]
> have
a b c d e last_0
1 1 1 0 1 1 c
2 0 0 0 0 1 d
3 1 1 0 1 1 c
4 1 1 1 1 1 e
5 0 1 0 0 1 d
6 0 0 1 0 1 d
7 1 1 1 0 1 d
8 1 1 1 1 1 e
9 1 0 1 0 1 d
10 0 0 0 1 1 c
Here is an approach with purrr::pmap:
library(dplyr);library(purrr)
have %>%
mutate(want = pmap_chr(cur_data(),
~ tail(c(NA,names(which(c(...)==0))),1)))
a b c d e want
1 1 1 0 1 1 c
2 0 0 0 0 1 d
3 1 1 0 1 1 c
4 1 1 1 1 1 <NA>
5 0 1 0 0 1 d
6 0 0 1 0 1 d
7 1 1 1 0 1 d
8 1 1 1 1 1 <NA>
9 1 0 1 0 1 d
10 0 0 0 1 1 c
purrr:pmap is a very useful function because it will work row wise on data and it comes in various flavors so you can control what returns. You can refer to the entire row of data with c(...).
If you wanted to apply the procedure to only a subset of columns, you might use dplyr::select:
have %>%
mutate(want = pmap_chr(cur_data() %>% select(a,b,c),
~ tail(c(NA,names(which(c(...)==0))),1)))
a b c d e want
1 1 1 0 1 1 c
2 0 0 0 0 1 c
3 1 1 0 1 1 c
4 1 1 1 1 1 <NA>
5 0 1 0 0 1 c
6 0 0 1 0 1 b
7 1 1 1 0 1 <NA>
8 1 1 1 1 1 <NA>
9 1 0 1 0 1 b
10 0 0 0 1 1 c
We can use max.col and then replace those elements that doesn't have any 0 to NA
have$last_0 <- names(have)[(NA^!rowSums(have == 0)) * max.col(have == 0, 'last')]
-output
have
a b c d e last_0
1 1 1 0 1 1 c
2 0 0 0 0 1 d
3 1 1 0 1 1 c
4 1 1 1 1 1 <NA>
5 0 1 0 0 1 d
6 0 0 1 0 1 d
7 1 1 1 0 1 d
8 1 1 1 1 1 <NA>
9 1 0 1 0 1 d
10 0 0 0 1 1 c
Here is my approach:
library(dplyr)
have %>%
purrr::pmap_dfr(\(...) tibble(...,
last_0 = which(c(...) == 0) %>%
names %>%
last))
Returns:
# A tibble: 10 x 6
a b c d e last_0
<dbl> <dbl> <dbl> <dbl> <dbl> <chr>
1 1 1 0 1 1 c
2 0 0 0 0 1 d
3 1 1 0 1 1 c
4 1 1 1 1 1 NA
5 0 1 0 0 1 d
6 0 0 1 0 1 d
7 1 1 1 0 1 d
8 1 1 1 1 1 NA
9 1 0 1 0 1 d
10 0 0 0 1 1 c
We can use pmax
have$last_0 <- names(have)[replace(do.call(pmax, data.frame((have == 0) * col(have))), rowSums(have) == ncol(have), NA)]
Another base R option using max.col
have$last_0 <- replace(names(have)[max.col(1 - have, "last")], rowSums(have) == ncol(have), NA)
such that
> have
a b c d e last_0
1 1 1 0 1 1 c
2 0 0 0 0 1 d
3 1 1 0 1 1 c
4 1 1 1 1 1 <NA>
5 0 1 0 0 1 d
6 0 0 1 0 1 d
7 1 1 1 0 1 d
8 1 1 1 1 1 <NA>
9 1 0 1 0 1 d
10 0 0 0 1 1 c
You can also use the following solution, however it may sound a bit verbose. As most of the possible "row_wise" operations had already been suggested, I thought I would try something I had never done before:
library(dplyr)
library(tidyr)
have %>%
mutate(id = row_number()) %>%
pivot_longer(-id, names_to = "Last_0", values_to = "val") %>%
group_by(id) %>%
arrange(desc(val), .by_group = TRUE) %>%
slice_tail(n = 1) %>%
mutate(Last_0 = ifelse(val == 1, "NA", Last_0)) %>%
select(Last_0) %>%
bind_cols(have) %>%
relocate(Last_0, .after = last_col())
# A tibble: 10 x 7
# Groups: id [10]
id a b c d e Last_0
<int> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
1 1 1 1 0 1 1 c
2 2 0 0 0 0 1 d
3 3 1 1 0 1 1 c
4 4 1 1 1 1 1 NA
5 5 0 1 0 0 1 d
6 6 0 0 1 0 1 d
7 7 1 1 1 0 1 d
8 8 1 1 1 1 1 NA
9 9 1 0 1 0 1 d
10 10 0 0 0 1 1 c
Though the strategy used by dear #akrun is fantabulous, but you can simply store the value calculated by you inside a temp variable and replace it like this. This can be done in dplyr only using cur_data()
library(dplyr)
have %>% mutate(last_0 = {xx <- names(.)[max.col(cur_data() == 0, ties.method = 'last')];
replace(xx, rowSums(cur_data() == 0) == 0, NA)})
#> a b c d e last_0
#> 1 1 1 0 1 1 c
#> 2 0 0 0 0 1 d
#> 3 1 1 0 1 1 c
#> 4 1 1 1 1 1 <NA>
#> 5 0 1 0 0 1 d
#> 6 0 0 1 0 1 d
#> 7 1 1 1 0 1 d
#> 8 1 1 1 1 1 <NA>
#> 9 1 0 1 0 1 d
#> 10 0 0 0 1 1 c
Created on 2021-06-10 by the reprex package (v2.0.0)
A minimalist base R approach (that actually uses nothing but a grep-search for the condition):
data.frame( have,
last_0=unlist(apply( have, 1, function(x){
sol <- grep(0,x);
if( length(sol > 0 )){
colnames(have)[sol[length(sol)]]
}else{
NA } } )) )
a b c d e last_0
1 1 1 0 1 1 c
2 0 0 0 0 1 d
3 1 1 0 1 1 c
4 1 1 1 1 1 <NA>
5 0 1 0 0 1 d
6 0 0 1 0 1 d
7 1 1 1 0 1 d
8 1 1 1 1 1 <NA>
9 1 0 1 0 1 d
10 0 0 0 1 1 c

define a new column

I want to define a column which is 1 for rows with col3==loop it is 2 after col3==loop until they get to a row with col3==loop or col3==a or col3==b. otherwise 0.
example
group1 group2 col3
1 1 a
1 1 loop
1 1 d
1 1 c
1 1 b
1 1 f
1 2 loop
1 2 a
1 2 loop
1 2 f
2 1 loop
2 1 g
group1 group2 col3 loop
1 1 a 0
1 1 loop 1
1 1 d 2
1 1 c 2
1 1 b 2
1 1 f 0
1 2 loop 1
1 2 a 2
1 2 h 0
1 2 f 0
2 1 b 0
2 1 g 0
let me know if it is not clear
The following code does what the question asks for. It uses index vectors to assign 1 and 2 to the appropriate places in the new column. I have read in the expected output and created column loop2 to compare the result with the posted column loop. Change this after checking if the answer is right.
i1 <- which(df1$col3 == "loop")
i2 <- which(df1$col3 %in% c("a", "b"))
df1$loop2 <- 0
df1$loop2[i1] <- 1
k <- Map(`:`, (i1 + 1), i2[findInterval(i1, i2) + 1])
df1$loop2[unlist(k)] <- 2
df1
# group1 group2 col3 loop loop2
#1 1 1 a 0 0
#2 1 1 loop 1 1
#3 1 1 d 2 2
#4 1 1 c 2 2
#5 1 1 b 2 2
#6 1 1 f 0 0
#7 1 2 loop 1 1
#8 1 2 a 2 2
#9 1 2 h 0 0
#10 1 2 f 0 0
#11 2 1 b 0 0
#12 2 1 g 0 0
Data.
df1 <- read.table(text = "
group1 group2 col3 loop
1 1 a 0
1 1 loop 1
1 1 d 2
1 1 c 2
1 1 b 2
1 1 f 0
1 2 loop 1
1 2 a 2
1 2 h 0
1 2 f 0
2 1 b 0
2 1 g 0
", header = TRUE)

dataframes in R with multi-level rows and columns

Let's say I have the following data frame.
> df = data.frame(rowsA = sample(c('A','B','C'), 100, replace=TRUE),
rowsB = sample(c('D','E','F'), 100, replace=TRUE),
colsA = sample(c('G','H','I'), 100, replace=TRUE),
colsB = sample(c('J','K','L'), 100, replace=TRUE))
> head(df)
rowsA rowsB colsA colsB
1 B E I L
2 A E G J
3 A E H K
4 A D I J
5 C F G J
6 A F G J
Is it possible to create a multi-level table of counts?
In excel, it is possible with the PivotTable functionality
I think it possible in python in pandas with the df.columns.levels method.
I also figured out how to do multi-level rows only in R with dplyr (but haven't figured out multi-level columns)
df %>%
group_by(rowsA, rowsB, colsA) %>%
summarise(count = n()) %>%
spread(colsA, count)
# A tibble: 9 x 5
# Groups: rowsA, rowsB [9]
rowsA rowsB G H I
* <fctr> <fctr> <int> <int> <int>
1 A D 5 3 1
2 A E 1 2 1
3 A F 5 8 NA
4 B D 5 5 5
5 B E 2 4 6
6 B F 4 6 5
7 C D 2 6 NA
8 C E 6 5 3
9 C F 4 3 3
Paste the columns that goes to the header into one column, then reshape it, in which way you have a contingency table with the same meaning as the multi level count:
library(dplyr); library(tidyr)
df %>%
unite(header, c('colsA', 'colsB')) %>%
count(rowsA, rowsB, header) %>%
spread(header, n, fill = 0)
# A tibble: 9 x 11
# rowsA rowsB G_J G_K G_L H_J H_K H_L I_J I_K I_L
#* <fctr> <fctr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 A D 1 0 0 0 3 1 1 1 0
#2 A E 2 0 0 1 1 0 0 0 1
#3 A F 5 0 0 3 2 1 0 1 1
#4 B D 0 1 1 1 0 3 1 1 1
#5 B E 2 2 1 3 1 1 0 3 1
#6 B F 1 1 2 3 3 0 1 2 1
#7 C D 0 2 3 1 2 0 4 3 2
#8 C E 2 2 2 1 2 0 0 1 1
#9 C F 1 0 1 2 0 1 2 1 2
Or if you are OK with a table/array/matrix as result, you can use xtabs, (borrowed from this answer), which essentially gives a 4-d array but with ftable, it can be displayed as you need:
ftable(xtabs(data = df), row.vars = 1:2, col.vars = 3:4)
# colsA G H I
# colsB J K L J K L J K L
#rowsA rowsB
#A D 1 0 0 0 3 1 1 1 0
# E 2 0 0 1 1 0 0 0 1
# F 5 0 0 3 2 1 0 1 1
#B D 0 1 1 1 0 3 1 1 1
# E 2 2 1 3 1 1 0 3 1
# F 1 1 2 3 3 0 1 2 1
#C D 0 2 3 1 2 0 4 3 2
# E 2 2 2 1 2 0 0 1 1
# F 1 0 1 2 0 1 2 1 2

Counting existing permutations in R

I have a large dataset with columns IDNum, Var1, Var2, Var3, Var4, Var5, Var6. The variables are boolean with value either 0 or 1. Each row could be one of 64 different possible permutations. I would like to count the number of rows corresponding to each permutation present. Is there an efficient way to write this in R?
aggregate can do this. Here's a shorter example:
r <- function() rbinom(10, 1, .5)
d <- data.frame(IDNum=1:10, Var1=r(), Var2=r())
d
IDNum Var1 Var2
1 1 0 1
2 2 0 1
3 3 0 0
4 4 1 0
5 5 1 1
6 6 0 0
7 7 1 1
8 8 1 0
9 9 0 1
10 10 0 1
Now to count the number of each combination:
> aggregate(d$IDNum, d[-1], FUN=length)
Var1 Var2 x
1 0 0 2
2 1 0 2
3 0 1 4
4 1 1 2
The values in d$IDNum aren't actually used here, but something must be passed to the length function. The values in d$IDNum for each combination are passed to length to get the count.
This will give a slightly different result and will list out all the possibilities regardless of whether they are present or not. Example data:
nam <- c("IDNum",paste0("Var",1:6))
n <- 5
set.seed(23)
dat <- setNames(data.frame(1:n,replicate(6,sample(0:1,n,replace=TRUE))),nam)
# IDNum Var1 Var2 Var3 Var4 Var5 Var6
#1 1 1 0 1 0 1 1
#2 2 0 1 1 1 0 1
#3 3 0 1 0 1 0 1
#4 4 1 1 0 1 1 0
#5 5 1 1 1 1 0 1
Count em up:
data.frame(table(dat[-1]))
# Var1 Var2 Var3 Var4 Var5 Var6 Freq
#1 0 0 0 0 0 0 0
#...
#28 1 1 0 1 1 0 1
#...
#43 0 1 0 1 0 1 1
#...
#47 0 1 1 1 0 1 1
#48 1 1 1 1 0 1 1
#...
#54 1 0 1 0 1 1 1
#...
#64 1 1 1 1 1 1 0
You can as well using the count function in dplyr:
library(dplyr)
r <- function() rbinom(10, 1, .5)
d <- data.frame(IDNum=1:10, Var1=r(), Var2=r())
d
d %>% count(Var1, Var2)
Output:
Var1 Var2 n
1 0 0 3
2 0 1 3
3 1 0 1
4 1 1 3

Resources