R - Common users across months - r

I have a transaction table with the following columns:
TransactionId UserId YearMonth Group
What I am trying to accomplish is to get unique users across different months.
Eg:
YearMonth Group UsersCountMonth1 UsersCountMonth2 UsersCountMonth3
201301 A 1000 900 800
201301 B 1200 940 700
201302 B 1300 1140 900
201303 A 12e0 970 706
Basically Month1 and Month2 are the incremental months based on YearMonth value for the record.
I am using this result to perform retention analysis.

I remember you were looking for a possibility to analyze subscription cohorts, yesterday. So I guess you can do
library(tidyverse)
set.seed(1)
n <- 100
df <- data.frame(
user = sample(1:20, n, T),
transDate = sample(seq(as.Date("2016-01-01"), as.Date("2016-12-31"), "1 month"), n, T),
group = sample(LETTERS[1:2], n, T)
)
diffmonth <- function(d1, d2) {
# http://stackoverflow.com/questions/1995933/number-of-months-between-two-dates
monnb <- function(d) {
lt <- as.POSIXlt(as.Date(d, origin="1900-01-01"))
lt$year*12 + lt$mon
}
monnb(d2) - monnb(d1) + 1L
}
df %>%
group_by(user, group) %>%
mutate(cohort = min(transDate), month = diffmonth(cohort, transDate)) %>%
unite(cohort, cohort, group, remove = T) %>%
group_by(month, cohort) %>%
summarise(n=n()) %>%
spread(month, n, fill = 0, drop = F)
# # A tibble: 16 × 12
# cohort `1` `2` `3` `4` `5` `6` `7` `8` `9` `10` `11`
# * <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 2016-01-01_A 5 1 0 1 1 1 1 0 2 0 0
# 2 2016-02-01_A 1 0 0 0 0 0 0 0 1 0 1
# 3 2016-02-01_B 4 1 2 1 0 1 2 0 1 1 0
# 4 2016-03-01_A 5 0 3 1 2 2 2 0 1 2 0
# 5 2016-03-01_B 4 0 0 0 2 0 1 0 0 0 0
# 6 2016-04-01_A 4 0 2 1 0 1 0 2 1 0 0
# 7 2016-04-01_B 1 0 0 0 0 0 0 0 0 0 0
# 8 2016-05-01_A 2 0 2 2 0 0 2 0 0 0 0
# 9 2016-05-01_B 1 0 0 1 0 0 2 0 0 0 0
# 10 2016-06-01_A 1 0 2 0 0 1 0 0 0 0 0
# 11 2016-06-01_B 4 0 0 0 0 1 1 0 0 0 0
# 12 2016-07-01_A 1 0 1 0 0 0 0 0 0 0 0
# 13 2016-08-01_B 4 1 1 0 0 0 0 0 0 0 0
# 14 2016-09-01_A 1 0 0 0 0 0 0 0 0 0 0
# 15 2016-10-01_B 1 0 0 0 0 0 0 0 0 0 0
# 16 2016-12-01_A 3 0 0 0 0 0 0 0 0 0 0

Related

How to convert a daatset where some subjects chose multiple answers into a dummy variables format?

I have this example dataset
df <- data.frame(subjects = 1:12,
Why_are_you_not_happy =
c(1,2,"1,2,5",5,1,2,"3,4",3,2,"1,5",3,4),
why_are_you_sad =
c("1,2,3",1,2,3,"4,5,3",2,1,4,3,1,1,1) )
And would like to convert it into a dummy variables format (based on the 5 answers of each question). Can someone guide me through an effective way ? thanks.
You can separate_rows for multiple choices, convert to dummy and summarise by subjects (to get one row per subjects, with all their choices).
library(fastDummies)
library(tidyr)
library(dplyr)
df %>%
separate_rows(Why_are_you_not_happy, why_are_you_sad) %>%
dummy_cols(c("Why_are_you_not_happy", "why_are_you_sad"),
remove_selected_columns = TRUE) %>%
group_by(subjects) %>%
summarise(across(everything(), max))
output
# A tibble: 12 × 11
subjects Why_are_you…¹ Why_a…² Why_a…³ Why_a…⁴ Why_a…⁵ why_a…⁶ why_a…⁷ why_a…⁸ why_a…⁹ why_a…˟
<int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
1 1 1 0 0 0 0 1 1 1 0 0
2 2 0 1 0 0 0 1 0 0 0 0
3 3 1 1 0 0 1 0 1 0 0 0
4 4 0 0 0 0 1 0 0 1 0 0
5 5 1 0 0 0 0 0 0 1 1 1
6 6 0 1 0 0 0 0 1 0 0 0
7 7 0 0 1 1 0 1 0 0 0 0
8 8 0 0 1 0 0 0 0 0 1 0
9 9 0 1 0 0 0 0 0 1 0 0
10 10 1 0 0 0 1 1 0 0 0 0
11 11 0 0 1 0 0 1 0 0 0 0
12 12 0 0 0 1 0 1 0 0 0 0

Assign an NA value to a numeric variable using IF statement in R

I have a function that calculates the difference between rows (based on the same columns) in 2 datasets. Here is a sample and function
#################
## Sample ##
#################
# data frame for recipients
IDr= c(seq(1,4))
Blood_type_r=c("A","B","AB","O")
data_R=data.frame(IDr,Blood_type_r,A=rep(0,4),B=c(rep(0,3),1),C=c(rep(1,3),0),D=rep(1,4),E=c(rep(0,2),rep(1,1),0),stringsAsFactors=FALSE)
data_R
IDr Blood_type_r A B C D E
1 1 A 0 0 1 1 0
2 2 B 0 0 1 1 0
3 3 AB 0 0 1 1 1
4 4 O 0 1 0 1 0
# data frame for donors
IDd= c(seq(1,8))
Blood_type_d= c(rep("A", each=2),rep("B", each=2),rep("AB", each=2),rep("O", each=2))
WD= c(rep(0.25, each=2),rep(0.125, each=2),rep(0.125, each=2),rep(0.5, each=2))
data_D=data.frame(IDd,Blood_type_d,A=c(rep(0,6),1,1),B=c(rep(0,6),1,1),C=c(rep(1,7),0),D=rep(1,8),E=c(rep(0,6),rep(1,1),0),WD,stringsAsFactors=FALSE)
data_D
IDd Blood_type_d A B C D E WD
1 1 A 0 0 1 1 0 0.250
2 2 A 0 0 1 1 0 0.250
3 3 B 0 0 1 1 0 0.125
4 4 B 0 0 1 1 0 0.125
5 5 AB 0 0 1 1 0 0.125
6 6 AB 0 0 1 1 0 0.125
7 7 O 1 1 1 1 1 0.500
8 8 O 1 1 0 1 0 0.500
# function
soustraction.i=function(D,R,i,threshold){
D=as.data.frame(D)
R=as.data.frame(R)
dif=map2_df(D, R[i,], `-`)
dif[dif<0] = 0
dif$mismatch=rowSums(dif)
dif=dif[which(dif$mismatch <= threshold),]
return(dif)
}
soustraction.i(data_D[,3:7],data_R[,3:7],1,3)
# A tibble: 8 x 6
A B C D E mismatch
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0 0 0 0 0 0
2 0 0 0 0 0 0
3 0 0 0 0 0 0
4 0 0 0 0 0 0
5 0 0 0 0 0 0
6 0 0 0 0 0 0
7 1 1 0 0 1 3
8 1 1 0 0 0 2
What i want to do is when i set my threshold to 0 and my mismatch is greater than 0, i do not want to loose theses patients, instead i want to keep them and assign an NA value, for example if i set the threshold at 0 i would get
soustraction.i(data_D[,3:7],data_R[,3:7],1,0)
# A tibble: 6 x 6
A B C D E mismatch
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0 0 0 0 0 0
2 0 0 0 0 0 0
3 0 0 0 0 0 0
4 0 0 0 0 0 0
5 0 0 0 0 0 0
6 0 0 0 0 0 0
I am loosing 2 patients which i would want to assign an NA value. So the output would be
# Desired output
# A tibble: 8 x 6
A B C D E mismatch
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0 0 0 0 0 0
2 0 0 0 0 0 0
3 0 0 0 0 0 0
4 0 0 0 0 0 0
5 0 0 0 0 0 0
6 0 0 0 0 0 0
7 1 1 0 0 1 NA
8 1 1 0 0 0 NA
Here is what i tried so far and it gives me a warning and not doing the right thing
soustraction.j=function(D,R,i,threshold){
D=as.data.frame(D)
R=as.data.frame(R)
dif=map2_df(D, R[i,], `-`)
dif[dif<0] = 0
dif$mismatch=rowSums(dif)
if(threshold==0){
if(dif$mismatch > 0){
dif$mismatch= NA
}
}else{
dif=dif[which(dif$mismatch <= threshold),]
}
return(dif)
}
soustraction.j(data_D[,3:7],data_R[,3:7],1,0)
# A tibble: 8 x 6
A B C D E mismatch
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0 0 0 0 0 0
2 0 0 0 0 0 0
3 0 0 0 0 0 0
4 0 0 0 0 0 0
5 0 0 0 0 0 0
6 0 0 0 0 0 0
7 1 1 0 0 1 3
8 1 1 0 0 0 2
#Warning message:
#In if (dif$mismatch > 0) { :
# the condition has length > 1 and only the first element will be used
Thank you in advance for your help
Here is a dplyr solution. It should work for when threshold == 0 and generalize to other threshold values:
soustraction.i=function(D,R,i,threshold){
D=as.data.frame(D)
R=as.data.frame(R)
dif=map2_df(D, R[i,], `-`)
dif[dif<0] = 0
dif$mismatch=rowSums(dif)
dif <- dif %>%
mutate(mismatch = case_when(mismatch > threshold ~ NA_real_,
TRUE ~ mismatch))
return(dif)
}
Output:
soustraction.i(data_D[,3:7],data_R[,3:7],1,0)
# A tibble: 8 x 6
A B C D E mismatch
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0 0 0 0 0 0
2 0 0 0 0 0 0
3 0 0 0 0 0 0
4 0 0 0 0 0 0
5 0 0 0 0 0 0
6 0 0 0 0 0 0
7 1 1 0 0 1 NA
8 1 1 0 0 0 NA
EDIT
Here's one example of a "dplyr-ized" version of your function
soustraction.i <- function(D,R,i,threshold){
D <- as_tibble(D)
R <- as_tibble(R)
dif <- map2_df(D, R[i,], `-`) %>%
mutate(across(everything(), ~ifelse(.x < 0, 0, .x))) %>%
rowwise() %>%
mutate(mismatch = sum(c_across(everything())),
mismatch = case_when(as.numeric(mismatch) > threshold ~ NA_real_,
TRUE ~ mismatch))
return(dif)
}
Instead of taking subset of rows you can assign them to NA where mismatch is greater than threshold.
soustraction.i=function(D,R,i,threshold){
D=as.data.frame(D)
R=as.data.frame(R)
dif= purrr::map2_df(D, R[i,], `-`)
dif[dif<0] = 0
dif$mismatch=rowSums(dif)
dif$mismatch[dif$mismatch > threshold] <- NA
return(dif)
}
You can check the output :
soustraction.i(data_D[,3:7],data_R[,3:7],1,3)
# A tibble: 8 x 6
# A B C D E mismatch
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 0 0 0 0 0 0
#2 0 0 0 0 0 0
#3 0 0 0 0 0 0
#4 0 0 0 0 0 0
#5 0 0 0 0 0 0
#6 0 0 0 0 0 0
#7 1 1 0 0 1 3
#8 1 1 0 0 0 2
soustraction.i(data_D[,3:7],data_R[,3:7],1,0)
# A tibble: 8 x 6
# A B C D E mismatch
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 0 0 0 0 0 0
#2 0 0 0 0 0 0
#3 0 0 0 0 0 0
#4 0 0 0 0 0 0
#5 0 0 0 0 0 0
#6 0 0 0 0 0 0
#7 1 1 0 0 1 NA
#8 1 1 0 0 0 NA

convert one factor column to multiple dichotomous columns in r

I have a dataset with PatientID and their diagnoses, and they are as follows :
Id Diagnoses
1 Nerve conditions (e.g., Multiple sclerosis, myasthenia gravis, Guillain-Barre syndrome, demyelinating polyneuropathy)
2 Gastrointestinal conditions (e.g., irritable bowl disease, ulcerative colitis, Chron's disease),Heart conditions,High blood pressure,Migraines/headaches
3 Heart conditions,Traumatic brain injury
4 Chronic pain,Heart conditions,Post-traumatic Stress Disorder (PTSD),Traumatic brain injury
5 Anxiety,Chronic pain,Depression,Sleep apnea
6 High blood pressure
7 High blood pressure
How can I split the Diagnoses column as follows :
Id Anxiety Depression Nerve conditions Sleep apnea Chronic Diseases AND SO ON....
1 0 0 0 1 1
2 1 1 1 1 1
3 1 1 1 1 0
4 0 0 1 1 1
5 1 0 0 0 1
6 1 1 1 0 1
7 1 1 0 1 0
I have tried this code, but I did not get the result:
df %>%
separate_rows(Diagnoses, sep=",") %>%
separate(Q2.3, into = c("Anxiety", "Depression, "THE REST OF CONDITIONS"), sep=":\\s*") %>%
mutate(anxiety1 = str_c("Anxiety", Anxiety))
I appreciate your help.,
Does this work:
library(stringr)
library(dplyr)
library(tidyr)
df %>% mutate(Diagnoses = str_remove(Diagnoses, ' \\(.*\\)?')) %>%
separate_rows(Diagnoses, sep = ',') %>% count(Id, Diagnoses, name = 'Cnt') %>%
pivot_wider(id_cols = Id, names_from = Diagnoses, values_from = Cnt, values_fill = list(Cnt = 0))
# A tibble: 7 x 11
Id `Nerve condition~ `Gastrointestina~ `Heart conditio~ `Traumatic brai~ `Chronic pain` `Post-traumatic ~ Anxiety Depression `Sleep apnea` `High blood pre~
<dbl> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
1 1 1 0 0 0 0 0 0 0 0 0
2 2 0 1 0 0 0 0 0 0 0 0
3 3 0 0 1 1 0 0 0 0 0 0
4 4 0 0 1 0 1 1 0 0 0 0
5 5 0 0 0 0 1 0 1 1 1 0
6 6 0 0 0 0 0 0 0 0 0 1
7 7 0 0 0 0 0 0 0 0 0 1
>

Create a new dataframe with the all possible combinations

Having a dataframe like this:
data.frame(previous = c(1,2,2,1,3,3), next = c(1,1,2,3,1,3), id = c(1,2,3,4,5,6))
How is it possible to exatract a data frame which will check the previous and next columns and create 9 new columns which will have 1 only if the combination of previous and next exist. Example if previous if 2 and next 1 the combination is 2 1 and receives one.
Example of expected output:
data.frame(previous = c(1,2,2,1,3,3), next = c(1,1,2,3,1,3),
col1_1 = c(1,0,0,0,0,0),
col1_2 = c(0,0,0,0,0,0),
col1_3 = c(0,0,0,1,0,0),
col2_1 = c(0,1,0,0,0,0),
col2_2 = c(0,0,1,0,0,0),
col2_3 = c(0,0,0,0,0,0),
col3_1 = c(0,0,0,0,1,0),
col3_2 = c(0,0,0,0,0,0),
col3_3 = c(0,0,0,0,0,1), id = c(1,2,3,4,5,6))
You could use expand.grid to get all the combinations.
Assuming your data frame is called df and the column next is actually called next. to avoid clashing with the keyword next:
as.data.frame(apply(expand.grid(1:3, 1:3), 1, function(x) {
as.numeric(x[1] == df$previous & x[2] == df$next.)}))
#> V1 V2 V3 V4 V5 V6 V7 V8 V9
#> 1 1 0 0 0 0 0 0 0 0
#> 2 0 1 0 0 0 0 0 0 0
#> 3 0 0 0 0 1 0 0 0 0
#> 4 0 0 0 0 0 0 1 0 0
#> 5 0 0 1 0 0 0 0 0 0
#> 6 0 0 0 0 0 0 0 0 1
An step by step approach might be the following one. I have changed the next column name for next1 to avoid problems:
AllComb<-expand.grid(unique(df$previous),unique(df$next1))# Creating all possible combinations
myframe <- matrix(rep(0,nrow(AllComb)*nrow(df)),ncol=nrow(AllComb),nrow =nrow(df))
colnames(myframe)<-paste("col_",AllComb$Var1,"_",AllComb$Var2, sep ="")
for(id_row in 1:ncol(df)){
myvec <- df[id_row,]
Word <- paste("col_",myvec[1],"_",myvec[2], sep ="")# Finding Word
Colindex <-which(colnames(myframe)==Word) #Finding Column index
myframe[id_row, Colindex] <-1 # Replacing in column index and vetor
}
dfRes<-cbind(previous =df$previous, "next"= df$next1, myframe, id=df$id)
# previous next col_1_1 col_2_1 col_3_1 col_1_2 col_2_2 col_3_2 col_1_3 col_2_3 col_3_3 id
# [1,] 1 1 1 0 0 0 0 0 0 0 0 1
# [2,] 2 1 0 1 0 0 0 0 0 0 0 2
# [3,] 2 2 0 0 0 0 1 0 0 0 0 3
# [4,] 1 3 0 0 0 0 0 0 0 0 0 4
# [5,] 3 1 0 0 0 0 0 0 0 0 0 5
# [6,] 3 3 0 0 0 0 0 0 0 0 0 6
Inside a by you could use a switch, because your values are nicely consecutive 1:3. Finally we merge to get the result.
tmp <- by(dat, dat$next., function(x) {
x1 <- x$previous
o <- `colnames<-`(t(sapply(x1, function(z)
switch(z, c(1, 0, 0), c(0, 1, 0), c(0, 0, 1)))),
paste(el(x1), 1:3, sep="_"))
cbind(x, col=o)
})
res <- Reduce(function(...) merge(..., all=TRUE), tmp)
res[is.na(res)] <- 0 ## set NA to zero if wanted
Result
res[order(res$id),] ## order by ID if needed
# previous next. id col.1_1 col.1_2 col.1_3 col.2_1 col.2_2 col.2_3
# 1 1 1 1 1 0 0 0 0 0
# 3 2 1 2 0 1 0 0 0 0
# 4 2 2 3 0 0 0 0 1 0
# 2 1 3 4 1 0 0 0 0 0
# 5 3 1 5 0 0 1 0 0 0
# 6 3 3 6 0 0 1 0 0 0
Data
dat <- structure(list(previous = c(1, 2, 2, 1, 3, 3), next. = c(1, 1,
2, 3, 1, 3), id = c(1, 2, 3, 4, 5, 6)), class = "data.frame", row.names = c(NA,
-6L))
Note: next as column name is not particularly a good idea, since it has a special meaning in R.
Here is a tidyverse approach:
library(tidyr)
library(dplyr)
df %>%
rowid_to_column() %>%
complete(previous, nxt) %>%
unite(col , previous, nxt, sep = "_", remove = FALSE) %>%
pivot_wider(names_from = col, values_from = rowid, values_fn = list(rowid = ~1), values_fill = list(rowid = 0)) %>%
na.omit() %>%
arrange(id)
# A tibble: 6 x 12
previous nxt id `1_1` `1_2` `1_3` `2_1` `2_2` `2_3` `3_1` `3_2` `3_3`
<dbl> <dbl> <dbl> <int> <int> <int> <int> <int> <int> <int> <int> <int>
1 1 1 1 1 0 0 0 0 0 0 0 0
2 2 1 2 0 0 0 1 0 0 0 0 0
3 2 2 3 0 0 0 0 1 0 0 0 0
4 1 3 4 0 0 1 0 0 0 0 0 0
5 3 1 5 0 0 0 0 0 0 1 0 0
6 3 3 6 0 0 0 0 0 0 0 0 1
This is another tidyverse solution that differ a little (maybe more concise) from #H1's one.
library(dplyr)
library(tidyr)
df %>%
mutate(n = 1) %>%
complete(id, previous, next., fill = list(n = 0)) %>%
unite(col, previous, next.) %>%
pivot_wider(names_from = col, names_prefix = "col", values_from = n) %>%
right_join(df)
# # A tibble: 6 x 12
# id col1_1 col1_2 col1_3 col2_1 col2_2 col2_3 col3_1 col3_2 col3_3 previous next.
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 1 1 0 0 0 0 0 0 0 0 1 1
# 2 2 0 0 0 1 0 0 0 0 0 2 1
# 3 3 0 0 0 0 1 0 0 0 0 2 2
# 4 4 0 0 1 0 0 0 0 0 0 1 3
# 5 5 0 0 0 0 0 0 1 0 0 3 1
# 6 6 0 0 0 0 0 0 0 0 1 3 3
You can try the code below
dfout <- within(df,
col <- `colnames<-`(t(sapply((Previous-1)*3+Next,
function(v) replace(rep(0,9),v,1))),
do.call(paste,c(expand.grid(1:3,1:3),sep = "_"))))
such that
> dfout
Previous Next id col.1_1 col.2_1 col.3_1 col.1_2 col.2_2 col.3_2 col.1_3 col.2_3 col.3_3
1 1 1 1 1 0 0 0 0 0 0 0 0
2 2 1 2 0 0 0 1 0 0 0 0 0
3 2 2 3 0 0 0 0 1 0 0 0 0
4 1 3 4 0 0 1 0 0 0 0 0 0
5 3 1 5 0 0 0 0 0 0 1 0 0
6 3 3 6 0 0 0 0 0 0 0 0 1

Convert from long to wide format from categorical data

Having categorical data like this:
data.frame(id = c(1,2,3,4,5), stock1 = c(1,2,0,1,2), stock2 = c(0,1,0,1,1), end = c(0,1,3,0,3), start = c(2,3,0,1,0))
id stock1 stock2 end start
1 1 1 0 0 2
2 2 2 1 1 3
3 3 0 0 3 0
4 4 1 1 0 1
5 5 2 1 3 0
How is it possible to convert them from long to wide format in which every column will show if exist or not with specific name?
Example of expected output:
data.frame(id = c(1,2,3,4,5), stock1_0 = c(0,0,1,0,0), stock1_1 = c(1,0,0,1,0), stock1_2 = c(0,1,0,0,1), stock2_0 = c(1,0,1,0,0), stock2_1 = c(0,1,0,0,0), end_0 = c(1,0,0,1,0), end_1 = c(0,1,0,0,0), end_3 = c(0,0,1,0,1), start_0 = c(0,0,1,0,1), start_1 = c(0,0,0,1,0), start_2 = c(1,0,0,0,0), start_3 = c(0,1,0,0,0))
id stock1_0 stock1_1 stock1_2 stock2_0 stock2_1 end_0 end_1 end_3 start_0 start_1 start_2 start_3
1 1 0 1 0 1 0 1 0 0 0 0 1 0
2 2 0 0 1 0 1 0 1 0 0 0 0 1
3 3 1 0 0 1 0 0 0 1 1 0 0 0
4 4 0 1 0 0 0 1 0 0 0 1 0 0
5 5 0 0 1 0 0 0 0 1 1 0 0 0
You could use model.matrix.
data.frame(dat[1],
do.call(cbind, lapply(seq(dat)[-1], function(x)
`colnames<-`(m <- model.matrix( ~ as.factor(dat[[x]]) - 1),
paste(names(dat[x]), seq_len(ncol(m)), sep="_")))))
# id stock1_1 stock1_2 stock1_3 stock2_1 stock2_2 end_1 end_2 end_3 start_1
# 1 1 0 1 0 1 0 1 0 0 0
# 2 2 0 0 1 0 1 0 1 0 0
# 3 3 1 0 0 1 0 0 0 1 1
# 4 4 0 1 0 0 1 1 0 0 0
# 5 5 0 0 1 0 1 0 0 1 1
# start_2 start_3 start_4
# 1 0 1 0
# 2 0 0 1
# 3 0 0 0
# 4 1 0 0
# 5 0 0 0
Data:
dat <- structure(list(id = c(1, 2, 3, 4, 5), stock1 = c(1, 2, 0, 1,
2), stock2 = c(0, 1, 0, 1, 1), end = c(0, 1, 3, 0, 3), start = c(2,
3, 0, 1, 0)), class = "data.frame", row.names = c(NA, -5L))
library(data.table)
setDT(df)
dcast(melt(df, 'id'),
id ~ paste0(variable, '_', value),
fun.aggregate = length)
# id end_0 end_1 end_3 start_0 start_1 start_2 start_3 stock1_0
# 1: 1 1 0 0 0 0 1 0 0
# 2: 2 0 1 0 0 0 0 1 0
# 3: 3 0 0 1 1 0 0 0 1
# 4: 4 1 0 0 0 1 0 0 0
# 5: 5 0 0 1 1 0 0 0 0
# stock1_1 stock1_2 stock2_0 stock2_1
# 1: 1 0 1 0
# 2: 0 1 0 1
# 3: 0 0 1 0
# 4: 1 0 0 1
# 5: 0 1 0 1
One way would be to get data in long format, combine column name with value and get the data back in wide format.
library(dplyr)
library(tidyr)
df %>%
pivot_longer(cols = -id) %>%
unite(name, name, value) %>%
mutate(value = 1) %>%
pivot_wider(values_fill = list(value = 0))
# A tibble: 5 x 13
# id stock1_1 stock2_0 end_0 start_2 stock1_2 stock2_1 end_1 start_3 stock1_0 end_3 start_0 start_1
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 1 1 1 1 1 0 0 0 0 0 0 0 0
#2 2 0 0 0 0 1 1 1 1 0 0 0 0
#3 3 0 1 0 0 0 0 0 0 1 1 1 0
#4 4 1 0 1 0 0 1 0 0 0 0 0 1
#5 5 0 0 0 0 1 1 0 0 0 1 1 0

Resources