Decide if variables are equal by rank and id - r

DF<-data.frame(id=c(1,1,1,2,2,2),rank=c("1","2","3","1","2","3"),code=c("A","B","B","B","B","A"))
DF
id rank code
1 A1 1 A
2 A1 2 B
3 A1 3 B
4 B2 1 B
5 B2 2 B
6 B2 3 A
Desired output:
id rank code type1 type2 type3
1 A1 1 A aa MIX MIX
2 A1 2 B NA MIX MIX
3 A1 3 B NA NA MIX
4 B2 1 B bb bb MIX
5 B2 2 B NA bb MIX
6 B2 3 A NA NA MIX
All is grouped by id
type1 gets code where rank = 1.
type2 gets code where rank = 1-2. If code is different in rank 1 and 2, then MIX
type3 gets code where rank = 1-3. etc. etc.
Anyone? :)

If the column 'code' is factor, convert to character with as.character or use type.convert (automatically), then grouped by 'id', create the conditions with case_when to create the columns, 'type1', 'type2' and 'type3'
library(dplyr)
DF %>%
type.convert(as.is = TRUE) %>%
group_by(id) %>%
mutate(type1 = case_when(rank == 1
~ strrep(tolower(code), 2)),
type2 = case_when(rank %in% 1:2 & all(c(1, 2) %in% rank) &
n_distinct(code[rank %in% 1:2]) == 1
~ strrep(tolower(code), 2),
rank %in% 1:2 & all(c(1, 2) %in% rank) &
n_distinct(code[rank %in% 1:2]) > 1 ~
"MIX"),
type3 = case_when(rank %in% 1:3 & all(c(1, 2, 3) %in% rank) &
n_distinct(code[rank %in% 1:3]) == 1 ~
strrep(tolower(code), 2), rank %in% 1:3 &
all(c(1, 2, 3) %in% rank) & n_distinct(code[rank %in% 1:3]) > 1 ~
"MIX")) %>%
ungroup
-output
# A tibble: 7 × 6
id rank code type1 type2 type3
<int> <int> <chr> <chr> <chr> <chr>
1 1 1 A aa MIX MIX
2 1 2 B <NA> MIX MIX
3 1 3 B <NA> <NA> MIX
4 2 1 B bb bb MIX
5 2 2 B <NA> bb MIX
6 2 3 A <NA> <NA> MIX
7 3 1 A aa <NA> <NA>
data
DF <- data.frame(id=c(1,1,1,2,2,2,3),
rank=c("1","2","3","1","2","3","1"),
code=c("A","B","B","B","B","A","A"))

With a slight modification to my answer from your previous question
maxtype=3
do.call(
rbind,
by(DF,list(DF$id),function(x){
y=list()
for (i in 1:maxtype) {
tmp=rep(NA,nrow(x))
idx=as.numeric(x$rank)<=i
if (length(unique(x$code[idx]))==1) {
tmp[idx]=paste0(rep(tolower(x$code[1]),2),collapse="")
} else {
tmp[idx]="MIX"
}
y[[paste0("type",i)]]=tmp
}
cbind(x,y)
})
)
id rank code type1 type2 type3
1.1 1 1 A aa MIX MIX
1.2 1 2 B <NA> MIX MIX
1.3 1 3 B <NA> <NA> MIX
2.4 2 1 B bb bb MIX
2.5 2 2 B <NA> bb MIX
2.6 2 3 A <NA> <NA> MIX
Also note that your id column is different in DF and your output.

Related

R: creating combinations of elements within a group and adding up numbers associated with combinations in a new data frame

I have the following dataset:
Letter ID Number
A A1 1
A A2 2
A A3 3
B B1 1
B B2 2
B B3 3
B B4 4
My aim is first to create all possible combinations of IDs within the same "Letter" group. For example, for the letter A, it would be only three combinations: A1-A2,A2-A3,and A1-A3. The same IDs ordered differently don't count as a new combination, so for example A1-A2 is the same as A2-A1.
Then, within those combinations, I want to add up the numbers from the "Number" column associated with those IDs. So for the combination A1-A2, which are associated with 1 and 2 in the "Number" column, this would result in the number 1+2=3.
Finally, I want to place the ID combinations, added numbers and original Letter in a new data frame. Something like this:
Letter Combination Add.Number
A A1-A2 3
A A2-A3 5
A A1-A3 4
B B1-B2 3
B B2-B3 5
B B3-B4 7
B B1-B3 4
B B2-B4 6
B B1-B4 5
How can I do this in R, ideally using the package dplyr?
library(dplyr)
letter <- c("A","A","A","B","B","B","B")
df <-
data.frame(letter) %>%
group_by(letter) %>%
mutate(
number = row_number(),
id = paste0(letter,number)
)
df %>%
full_join(df,by = "letter") %>%
filter(number.x < number.y) %>%
mutate(
combination = paste0(id.x,"-",id.y),
add_number = number.x + number.y) %>%
select(letter,combination,add_number)
# A tibble: 9 x 3
# Groups: letter [2]
letter combination add_number
<chr> <chr> <int>
1 A A1-A2 3
2 A A1-A3 4
3 A A2-A3 5
4 B B1-B2 3
5 B B1-B3 4
6 B B1-B4 5
7 B B2-B3 5
8 B B2-B4 6
9 B B3-B4 7
In base R, using combn:
df <- data.frame(
Letter = c("A","A","A","B","B","B","B"),
Id = c("A1","A2","A3","B1","B2","B3","B4"),
Number = c(1,2,3,1,2,3,4))
# combinations
l<-lapply(split(df$Id, df$Letter) ,function(x)
setNames(data.frame(t(combn(x,2))), c("L1","L2")))
n<-lapply(split(df$Number, df$Letter) ,function(x)
setNames(data.frame(t(combn(x,2))), c("N1","N2")))
# rbind all
result <- do.call(rbind, mapply(cbind, Letter=names(l), l, n, SIMPLIFY = F))
result$combination <- paste(result$L1, result$L2, sep="-")
result$sum = result$N1 + result$N2
result
#> Letter L1 L2 N1 N2 combination sum
#> A.1 A A1 A2 1 2 A1-A2 3
#> A.2 A A1 A3 1 3 A1-A3 4
#> A.3 A A2 A3 2 3 A2-A3 5
#> B.1 B B1 B2 1 2 B1-B2 3
#> B.2 B B1 B3 1 3 B1-B3 4
#> B.3 B B1 B4 1 4 B1-B4 5
#> B.4 B B2 B3 2 3 B2-B3 5
#> B.5 B B2 B4 2 4 B2-B4 6
#> B.6 B B3 B4 3 4 B3-B4 7

decide 1 or 2 or both grouped by 2 variables

Question updated 9/10 !
DF<-data.frame(id=c(1,1,1,2,2,2),rank=c("1","2","3","1","2","3"),code=c("A","B","B","B","B","A"))
DF
id rank code
1 A1 1 A
2 A1 2 B
3 A1 3 B
4 B2 1 B
5 B2 2 B
6 B2 3 A
Desired output:
id rank code type1 type2 type3
1 A1 1 A aa MIX MIX
2 A1 2 B NA MIX MIX
3 A1 3 B NA NA MIX
4 B2 1 B bb bb MIX
5 B2 2 B NA bb MIX
6 B2 3 A NA NA MIX
All is grouped by id
type1 gets code where rank = 1.
type2 gets code where rank = 1-2. If code is different in rank 1 and 2, then MIX
type3 gets code where rank = 1-3. etc. etc.
Anyone? :)
Here's a dplyr solution using ifelse and a temporary column to reduce boilerplate:
library(dplyr)
DF %>%
group_by(id) %>%
mutate(a = code[rank == 1],
type1 = ifelse(rank > 1, NA,
ifelse(all(code[!(rank > 1)] == a[1]), a[1], "MIX")),
type2 = ifelse(rank > 2, NA,
ifelse(all(code[!(rank > 2)] == a[1]), a[1], "MIX")),
type3 = ifelse(rank > 3, NA,
ifelse(all(code[!(rank > 3)] == a[1]), a[1], "MIX"))) %>%
select(-a)
#> # A tibble: 6 x 6
#> # Groups: id [2]
#> id rank code type1 type2 type3
#> <dbl> <chr> <chr> <chr> <chr> <chr>
#> 1 1 1 A A MIX MIX
#> 2 1 2 B NA MIX MIX
#> 3 1 3 B NA NA MIX
#> 4 2 1 B B B MIX
#> 5 2 2 B NA B MIX
#> 6 2 3 A NA NA MIX
Using dplyr with case_when statements:
DF %>%
group_by(id) %>%
mutate(type2_grp = if_else(rank <= 2, 1, 0),
type3_grp = if_else(rank <= 3, 1, 0)) %>%
mutate(type1 = case_when(rank == 1 ~ code)) %>%
group_by(id, type2_grp) %>%
mutate(type2 = case_when(type2_grp == 1 & length(unique(code)) > 1 ~ "MIX",
type2_grp == 1 & code == "A" ~ "A",
type2_grp == 1 & code == "B" ~ "B")) %>%
group_by(id, type3_grp) %>%
mutate(type3 = case_when(type3_grp == 1 & length(unique(code)) > 1 ~ "MIX",
type3_grp == 1 & code == "A" ~ "A",
type3_grp == 1 & code == "B" ~ "B")) %>%
ungroup() %>%
select(-type2_grp, -type3_grp)
Which creates:
# A tibble: 6 x 6
id rank code type1 type2 type3
<dbl> <chr> <chr> <chr> <chr> <chr>
1 1 1 A A MIX MIX
2 1 2 B NA MIX MIX
3 1 3 B NA NA MIX
4 2 1 B B B MIX
5 2 2 B NA B MIX
6 2 3 A NA NA MIX
A base R solution for an arbitrary number of "type" columns
maxtype=3
do.call(rbind,
by(DF,list(DF$id),function(x){
y=list()
for (i in 1:maxtype) {
tmp=rep(NA,nrow(x))
idx=as.numeric(x$rank)<=i
if (length(unique(x$code[idx]))==1) {
tmp[idx]=x$code[1]
} else {
tmp[idx]="MIX"
}
y[[paste0("type",i)]]=tmp
}
cbind(x,y)
})
)
id rank code type1 type2 type3
1.1 1 1 A A MIX MIX
1.2 1 2 B <NA> MIX MIX
1.3 1 3 B <NA> <NA> MIX
2.4 2 1 B B B MIX
2.5 2 2 B <NA> B MIX
2.6 2 3 A <NA> <NA> MIX
Assuming DF is sorted by id then rank, your type columns for each id will be an upper triangular matrix of "MIX" subset with an upper triangular matrix of the first code value for as many rows as it appears.
A data.table solution:
library(data.table)
DF <- data.frame(id=c(1,1,1,2,2,2),rank=c("1","2","3","1","2","3"),code=c("A","B","B","B","B","A"))
setDT(DF)[, `:=`(rank = factor(rank), code = factor(code))]
maxRank <- nlevels(DF$rank)
naLvl <- nlevels(DF$code) + 2L
mTri <- matrix(nlevels(DF$code) + 1L, nrow = maxRank, ncol = maxRank)
mTri[lower.tri(mTri)] <- naLvl
typeMat <- function(rank, code) {
firstrep <- rle(code)[[1]][1]
mSubTri <- matrix(naLvl, nrow = firstrep, ncol = firstrep)
mSubTri[upper.tri(mSubTri, diag = TRUE)] <- code[1]
mOut <- mTri
mOut[1:firstrep, 1:firstrep] <- mSubTri
return(mOut[rank,, drop = FALSE])
}
DF <- cbind(DF, as.data.table(do.call(rbind, DF[, (type = list(list(typeMat(as.integer(rank), as.integer(code))))), by = id]$V1)))
typeCols <- 4:(3 + maxRank)
DF[, (typeCols) := lapply(.SD, function(x) {factor(x, levels = 1:naLvl, labels = c(levels(code), "MIX", NA), exclude = NULL)}), .SDcols = typeCols]
setnames(DF, 4:(3 + maxRank), paste0("type", 1:maxRank))
> DF
id rank code type1 type2 type3
1: 1 1 A A MIX MIX
2: 1 2 B <NA> MIX MIX
3: 1 3 B <NA> <NA> MIX
4: 2 1 B B B MIX
5: 2 2 B <NA> B MIX
6: 2 3 A <NA> <NA> MIX

Conditionally copy contents of one column to another [duplicate]

This question already has answers here:
How to join (merge) data frames (inner, outer, left, right)
(13 answers)
Closed 2 years ago.
I want to add extra columns depending on values of code which are defined in VAR
DF <- data.frame(id = c(1:5), code = c("A","B","C","D","E"), sub = c("A1","B1","C1","D1","E1"))
id code sub
1 1 A A1
2 2 B B1
3 3 C C1
4 4 D D1
5 5 E E1
VAR <- c("A","B")
How result should be:
id code sub AB ABsub
1 1 A A1 A A1
2 2 B B1 B B1
3 3 C C1 <NA> <NA>
4 4 D D1 <NA> <NA>
5 5 E E1 <NA> <NA>
Or using dplyr:
library(dplyr)
DF<-data.frame(id=c(1:5),code=c("A","B","C","D","E"),sub=c("A1","B1","C1","D1","E1"), stringsAsFactors = FALSE)
VAR<-c("A","B")
DF <- DF %>%
mutate(AB = ifelse(code %in% {{VAR}}, code, NA_character_)) %>%
mutate(ABsub = ifelse(code == AB, sub, NA_character_))
with:
> DF
id code sub AB ABsub
1 1 A A1 A A1
2 2 B B1 B B1
3 3 C C1 <NA> <NA>
4 4 D D1 <NA> <NA>
5 5 E E1 <NA> <NA>
Also works if VAR would equal c("A", "B", "C") but we do not know if that is what you are after.
A simple base R option using merge + subset
merge(DF,subset(DF,code %in% VAR),by = "id",all = TRUE)
such that
> merge(DF,subset(DF,code %in% VAR),by = "id",all = TRUE)
id code.x sub.x code.y sub.y
1 1 A A1 A A1
2 2 B B1 B B1
3 3 C C1 <NA> <NA>
4 4 D D1 <NA> <NA>
5 5 E E1 <NA> <NA>
A dplyr solution with across():
library(dplyr)
DF %>%
mutate(across(-id, ~ replace(.x, !(code %in% VAR), NA), .names = "AB{col}"))
# id code sub ABcode ABsub
# 1 1 A A1 A A1
# 2 2 B B1 B B1
# 3 3 C C1 <NA> <NA>
# 4 4 D D1 <NA> <NA>
# 5 5 E E1 <NA> <NA>
or with left_join():
DF %>%
filter(code %in% VAR) %>%
left_join(DF, ., by = "id", suffix = c("", "AB"))
# id code sub codeAB subAB
# 1 1 A A1 A A1
# 2 2 B B1 B B1
# 3 3 C C1 <NA> <NA>
# 4 4 D D1 <NA> <NA>
# 5 5 E E1 <NA> <NA>
Note: If you have multiple columns in your real data, you don't need to type
mutate(Col1 = ifelse(...), Col2 = ifelse(...), etc.)
one by one.
Here's a solution
ABsub <- ifelse(DF$code %in% VAR, DF$code, NA)
cbind(DF, ABsub)

Using lapply to transpose part of a column and add it as new columns to a data frame

I've been searching for some clarity on this one, but cannot find something that applies to my case, I constructed a DF very similar to this one (but with considerably more data, over a million rows in total)
Key1 <- c("A", "B", "C", "A", "C", "B", "B", "C", "A", "C")
Key2 <- c("A1", "B1", "C1", "A2", "C2", "B2", "B3", "C3", "A3", "C4")
NumVal <- c(2, 3, 1, 4, 6, 8, 2, 3, 1, 0)
DF1 <- as.data.frame(cbind(Key1, Key2, NumVal), stringsAsFactors = FALSE) %>% arrange(Key2)
ConsId <- c(1:10)
DF1 <- cbind(DF1, ConsId)
Now, what I want to do is to add lets say 3 new columns (in real life I need 12, but in order to be more graphic in this toy example we'll use 3) to the data frame, where each row corresponds to the values of $NumVal with the same $Key1 and greater than or equal $ConsId to the ones in each row and filling the remaining spaces with NA's, here is the expected result in case I wasn't very clear:
Key1 Key2 NumVal ConsId V1 V2 V3
A A1 2 1 2 4 1
A A2 4 2 4 1 NA
A A3 1 3 1 NA NA
B B1 3 4 3 8 2
B B2 8 5 8 2 NA
B B3 2 6 2 NA NA
C C1 1 7 1 6 3
C C2 6 8 6 3 0
C C3 3 9 3 0 NA
C C4 0 10 0 NA NA
Now I'm using a do.call(rbind), and even tough it works fine, it takes way too long for my real data with a bit over 1 million rows (around 6 hrs), I also tried with the bind_rows dplyr function but it took a bit longer so I stuck with the do.call option, here's an example of the code I'm using:
# Function
TranspNumVal <- function(i){
Id <- DF1[i, "Key1"]
IdCons <- DF1[i, "ConsId"]
myvect <- as.matrix(filter(DF1, Id == Key1, ConsId >= IdCons) %>% select(NumVal))
Result <- as.data.frame(t(myvect[1:3]))
return(Result)
}
# Applying the function to the entire data frame
DF2 <- do.call(rbind, lapply(1:NROW(DF1), function(i) TranspNumVal(i)))
DF3 <- cbind(DF1, DF2)
Maybe changing the class is causing the code to be so inefficient, or maybe I'm just not finding a better way to vectorize my problem (you don't want to know how long it took with a nested loop), I'm fairly new to R and have just started fooling around with dplyr, so I'm open to any suggestion about how to optimize my code
We can use dplyr::lead
DF1 %>%
group_by(Key1) %>%
mutate(
V1 = NumVal,
V2 = lead(NumVal, n = 1),
V3 = lead(NumVal, n = 2))
## A tibble: 10 x 7
## Groups: Key1 [3]
# Key1 Key2 NumVal ConsId V1 V2 V3
# <chr> <chr> <chr> <int> <chr> <chr> <chr>
# 1 A A1 2 1 2 4 1
# 2 A A2 4 2 4 1 NA
# 3 A A3 1 3 1 NA NA
# 4 B B1 3 4 3 8 2
# 5 B B2 8 5 8 2 NA
# 6 B B3 2 6 2 NA NA
# 7 C C1 1 7 1 6 3
# 8 C C2 6 8 6 3 0
# 9 C C3 3 9 3 0 NA
#10 C C4 0 10 0 NA NA
Explanation: We group entries by Key1 and then use lead to shift NumVal values for columns V2 and V3. V1 is simply a copy of NumVal.
A dplyr pipeline.
First utility function will filter a (NumVal) based on the values of b (ConsId):
myfunc1 <- function(a,b) {
n <- length(b)
lapply(seq_along(b), function(i) a[ b >= b[i] ])
}
Second utility function converts a ragged list into a data.frame. It works with arbitrary number of columns to append, but we've limited it to 3 based on your requirements:
myfunc2 <- function(x, ncols = 3) {
n <- min(ncols, max(lengths(x)))
as.data.frame(do.call(rbind, lapply(x, `length<-`, n)))
}
Now the pipeline:
dat %>%
group_by(Key1) %>%
mutate(lst = myfunc1(NumVal, ConsId)) %>%
ungroup() %>%
bind_cols(myfunc2(.$lst)) %>%
select(-lst) %>%
arrange(Key1, ConsId)
# # A tibble: 10 × 7
# Key1 Key2 NumVal ConsId V1 V2 V3
# <chr> <chr> <int> <int> <int> <int> <int>
# 1 A A1 2 1 2 4 1
# 2 A A2 4 2 4 1 NA
# 3 A A3 1 3 1 NA NA
# 4 B B1 3 4 3 8 2
# 5 B B2 8 5 8 2 NA
# 6 B B3 2 6 2 NA NA
# 7 C C1 1 7 1 6 3
# 8 C C2 6 8 6 3 0
# 9 C C3 3 9 3 0 NA
# 10 C C4 0 10 0 NA NA
After grouping by 'Key1', use shift (from data.table) to get the next value of 'NumVal' in a list, convert it to tibble and unnest the nested list elements to individual columns of the dataset. By default, shift fill NA at the end.
library(data.table)
library(tidyverse)
DF1 %>%
group_by(Key1) %>%
mutate(new = shift(NumVal, 0:(n()-1), type = 'lead') %>%
map(~
as.list(.x) %>%
set_names(paste0("V", seq_along(.))) %>%
as_tibble)) %>%
unnest %>%
select(-V4)
# A tibble: 10 x 7
# Groups: Key1 [3]
# Key1 Key2 NumVal ConsId V1 V2 V3
# <chr> <chr> <dbl> <int> <dbl> <dbl> <dbl>
# 1 A A1 2 1 2 4 1
# 2 A A2 4 2 4 1 NA
# 3 A A3 1 3 1 NA NA
# 4 B B1 3 4 3 8 2
# 5 B B2 8 5 8 2 NA
# 6 B B3 2 6 2 NA NA
# 7 C C1 1 7 1 6 3
# 8 C C2 6 8 6 3 0
# 9 C C3 3 9 3 0 NA
#10 C C4 0 10 0 NA NA
data
DF1 <- data.frame(Key1, Key2, NumVal, stringsAsFactors = FALSE) %>%
arrange(Key2)
DF1$ConsId <- 1:10

Element of vector to different columns of data frame

I have a df:
group number id
1 A abcd 1
2 A abcd 2
3 A abcd 3
4 A efgh 4
5 A efgh 5
6 B abcd 1
7 B abcd 2
8 B abcd 3
9 B abcd 9
10 B ijkl 10
I want to make it like this:
group number data1 data2 data3 data4 Length
1 A abcd 1 2 3 3
2 A efgh 4 5 2
3 B abcd 1 2 3 9 4
4 B ijkl 10 1
I am sorry I can only make it to df2 like this:
group number data Length
1 A abcd c(1,2,3) 3
2 A efgh c(4,5) 2
3 B abcd c(1,2,3,9) 4
4 B ijkl 10 1
My code is here:
library(tidyverse)
df <- data.frame (group = c(rep('A',5),rep("B",5)),
number = c(rep('abcd',3),rep('efgh',2),rep('abcd',4),rep('ijkl',1)),
id = c(1,2,3,4,5,1,2,3,9,10))
df2 <- df %>%
group_by(group,number) %>%
nest() %>%
mutate(data=map(data,~unlist(.x, recursive = TRUE, use.names = FALSE)),
Length= map(data, ~length(.x)))
Please feel free to start with df or df2, with(out) any package is fine.
You can change the name count to length(also, I perfer make the 'space' to NA, If want to change it , df2[is.na(df2)]='')
Option 1
df <- data.frame (group = c(rep('A',5),rep("B",5)),
number = c(rep('abcd',3),rep('efgh',2),rep('abcd',4),rep('ijkl',1)),
id = c(1,2,3,4,5,1,2,3,9,10))
df2 <- df %>%
group_by(group,number) %>%
mutate(data=toString(id),count=n())
library(splitstackshape)
cSplit(df2, 3, drop = TRUE,sep=',')
group number count data_1 data_2 data_3 data_4
1: A abcd 3 1 2 3 NA
2: A efgh 2 4 5 NA NA
3: B abcd 4 1 2 3 9
4: B ijkl 1 10 NA NA NA
Option 2
library(dplyr)
library(tidyr)
df2 <- df %>%
group_by(group,number) %>%
summarise(data=toString(id),count=n())%>%separate_rows(data)%>% mutate(Col = paste0("data", 1:n()))%>%spread(Col, data)
df2
# A tibble: 4 x 8
# Groups: group [2]
group number count data1 data2 data3 data4 data5
* <fctr> <fctr> <int> <chr> <chr> <chr> <chr> <chr>
1 A abcd 3 1 2 3 <NA> <NA>
2 A efgh 2 <NA> <NA> <NA> 4 5
3 B abcd 4 1 2 3 9 <NA>
4 B ijkl 1 <NA> <NA> <NA> <NA> 10
I must give it to you blindly but that should work or be close :
library(tidyverse)
df %>%
group_by(group,number) %>%
mutate(key = paste0("data",row_number()),length = n()) %>%
ungroup %>%
spread(key,id,"")
To make it work from your nested data I think you have to change these vectors into 1 line data.frames of same col numbers and names , then use unnest, much more complicated! :)
In base R
temp = split(df, paste(df$group, df$number))
columns = max(sapply(temp, NROW))
do.call(rbind, lapply(temp, function(a)
cbind(group = a$group[1],
number = a$number[1],
setNames(data.frame(t(a$id[1:columns])), paste0("data", 1:columns)),
length = length(a$id))
))
# group number data1 data2 data3 data4 length
#A abcd A abcd 1 2 3 NA 3
#A efgh A efgh 4 5 NA NA 2
#B abcd B abcd 1 2 3 9 4
#B ijkl B ijkl 10 NA NA NA 1
Here is an option using data.table
library(data.table)
dcast(setDT(df), group + number~ paste0("data", rowid(group, number)),
value.var = 'id', fill = 0)[,
length := Reduce(`+`, lapply(.SD, `>`, 0)), .SDcols = data1:data4][]
# group number data1 data2 data3 data4 length
#1: A abcd 1 2 3 0 3
#2: A efgh 4 5 0 0 2
#3: B abcd 1 2 3 9 4
#4: B ijkl 10 0 0 0 1
This is a variation of akrun's data.table answer which does compute Length before reshaping from long to wide format and uses the prefix parameter in the call to rowid():
library(data.table)
data.table(df)[, Length := .N, by = .(group, number)][
, dcast(.SD, group + number + Length ~ rowid(group, number, prefix = "data"),
value.var = "id")]
group number Length data1 data2 data3 data4
1: A abcd 3 1 2 3 NA
2: A efgh 2 4 5 NA NA
3: B abcd 4 1 2 3 9
4: B ijkl 1 10 NA NA NA
For pretty printing, the NA values can be converted into white space:
data.table(df)[, Length := .N, by = .(group, number)][
, dcast(.SD, group + number + Length ~ rowid(group, number, prefix = "data"),
as.character, value.var = "id", fill = "")]
group number Length data1 data2 data3 data4
1: A abcd 3 1 2 3
2: A efgh 2 4 5
3: B abcd 4 1 2 3 9
4: B ijkl 1 10

Resources