Summing consecutive values, broken up by specific value, in R - r

I'm having a trouble figuring out how to group variables to achieve the desired result from dplyr. I have an experimental dataset set up like this:
subject task_phase block_number trial_number ResponseCorrect
<chr> <chr> <dbl> <dbl> <dbl>
1 268301377 1 1 2 1
2 268301377 1 1 3 1
3 268301377 1 1 4 1
4 268301377 1 2 2 -1
5 268301377 1 2 3 1
6 268301377 1 2 4 1
7 268301377 1 3 2 1
8 268301377 1 3 3 -1
9 268301377 1 3 4 1
10 268301377 2 1 50 1
11 268301377 2 1 51 1
12 268301377 2 1 52 1
13 268301377 2 2 37 -1
14 268301377 2 2 38 1
15 268301377 2 2 39 1
16 268301377 2 3 41 -1
17 268301377 2 3 42 -1
18 268301377 2 3 43 1
I'm hoping to sum the consecutive "correct" responses, and to have this tally "reset" each time there was an incorrect response:
subject task_phase block_number trial_number ResponseCorrect ConsecutiveCorrect
<chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 268301377 1 1 1 1 1
2 268301377 1 1 2 1 2
3 268301377 1 1 3 1 3
4 268301377 1 2 1 -1 0
5 268301377 1 2 2 1 1
6 268301377 1 2 3 1 2
7 268301377 1 3 1 1 1
8 268301377 1 3 2 -1 0
9 268301377 1 3 3 1 1
10 268301377 2 1 1 1 1
11 268301377 2 1 2 1 2
12 268301377 2 1 3 1 3
13 268301377 2 2 1 -1 0
14 268301377 2 2 2 1 1
15 268301377 2 2 3 1 2
16 268301377 2 3 1 -1 0
17 268301377 2 3 2 -1 0
18 268301377 2 3 3 1 1
I originally thought I could do something along the lines of df %>% group_by(subject, task_phase, block_number, ResponseCorrect) %>% mutate(ConsecutiveCorrect = cumsum(ResponseCorrect), and that almost works. But, it doesn't give a consecutive value: it just sums up the total number of correct responses per block (. I'm essentially trying to use the -1s as break points that start the summation over again.
Is there a grouping function (Tidyverse or otherwise) that I'm not aware of that could do something along these lines?

You could try
library(dplyr)
data %>%
group_by(
subject,
task_phase,
block_number,
grp = lag(cumsum(ResponseCorrect == -1), default = 0)
) %>%
mutate(ConsecutiveCorrect = ifelse(ResponseCorrect == -1, 0, cumsum(ResponseCorrect))) %>%
ungroup() %>%
select(-grp)
which returns
# A tibble: 18 x 6
subject task_phase block_number trial_number ResponseCorrect ConsecutiveCorrect
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 268301377 1 1 2 1 1
2 268301377 1 1 3 1 2
3 268301377 1 1 4 1 3
4 268301377 1 2 2 -1 0
5 268301377 1 2 3 1 1
6 268301377 1 2 4 1 2
7 268301377 1 3 2 1 1
8 268301377 1 3 3 -1 0
9 268301377 1 3 4 1 1
10 268301377 2 1 50 1 1
11 268301377 2 1 51 1 2
12 268301377 2 1 52 1 3
13 268301377 2 2 37 -1 0
14 268301377 2 2 38 1 1
15 268301377 2 2 39 1 2
16 268301377 2 3 41 -1 0
17 268301377 2 3 42 -1 0
18 268301377 2 3 43 1 1

An option with data.table. Grouped by 'subject', 'task_phase', 'block_number', get the run-length-id (rleid) of 'ResponseCorrect', return with rowid of that sequence, multiply with a logical vector so that elements that corresponds to -1 (FALSE -> 0 will return 0 and TRUE -> 1 returns the element)
library(data.table)
setDT(df)[, ConsecutiveCorrect := rowid(rleid(ResponseCorrect)) *
(ResponseCorrect == 1), by = .(subject, task_phase, block_number)]
-output
df
subject task_phase block_number trial_number ResponseCorrect ConsecutiveCorrect
1: 268301377 1 1 2 1 1
2: 268301377 1 1 3 1 2
3: 268301377 1 1 4 1 3
4: 268301377 1 2 2 -1 0
5: 268301377 1 2 3 1 1
6: 268301377 1 2 4 1 2
7: 268301377 1 3 2 1 1
8: 268301377 1 3 3 -1 0
9: 268301377 1 3 4 1 1
10: 268301377 2 1 50 1 1
11: 268301377 2 1 51 1 2
12: 268301377 2 1 52 1 3
13: 268301377 2 2 37 -1 0
14: 268301377 2 2 38 1 1
15: 268301377 2 2 39 1 2
16: 268301377 2 3 41 -1 0
17: 268301377 2 3 42 -1 0
18: 268301377 2 3 43 1 1
data
df <- structure(list(subject = c(268301377L, 268301377L, 268301377L,
268301377L, 268301377L, 268301377L, 268301377L, 268301377L, 268301377L,
268301377L, 268301377L, 268301377L, 268301377L, 268301377L, 268301377L,
268301377L, 268301377L, 268301377L), task_phase = c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L),
block_number = c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L,
1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L), trial_number = c(2L, 3L,
4L, 2L, 3L, 4L, 2L, 3L, 4L, 50L, 51L, 52L, 37L, 38L, 39L,
41L, 42L, 43L), ResponseCorrect = c(1L, 1L, 1L, -1L, 1L,
1L, 1L, -1L, 1L, 1L, 1L, 1L, -1L, 1L, 1L, -1L, -1L, 1L)),
class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18"))

Related

R Only Keep Rows up to a certin condition

I have a dataframe as follows
head(data)
subject block trial timeLeft timeRight stim1 stim2 Chosen
1 1 13 0 0 0 2 1 2
2 1 13 1 0 1 3 2 2
3 1 13 3 0 0 3 1 1
4 1 13 4 2 0 2 3 3
5 1 13 6 1 1 1 3 1
6 1 13 7 2 2 2 1 1
...
454 1006 14 0 0 0 6 5 5
455 1006 14 1 0 0 6 4 6
456 1006 14 3 0 1 4 5 4
457 1006 14 4 1 1 4 5 4
458 1006 14 6 1 2 6 4 6
my objective is to group by subject and block and to only keep rows prior and including where both timeLeft and timeRight =0
in this case the output would be
subject block trial timeLeft timeRight stim1 stim2 Chosen
1 1 13 0 0 0 2 1 2
2 1 13 1 0 1 3 2 2
3 1 13 3 0 0 3 1 1
...
454 1006 14 0 0 0 6 5 5
455 1006 14 1 0 0 6 4 6
Thank you in advance!
here is the structure of the data
'data.frame': 64748 obs. of 8 variables:
$ subject : num 1 1 1 1 1 1 1 1 1 1 ...
$ block : int 13 13 13 13 13 13 13 13 13 13 ...
$ trial : int 0 1 3 4 6 7 9 10 12 13 ...
$ timeLeft : int 0 0 0 2 1 2 2 1 3 4 ...
$ timeRight: int 0 1 0 0 1 2 1 3 4 4 ...
$ stim1 : int 2 3 3 2 1 2 2 3 2 2 ...
$ stim2 : int 1 2 1 3 3 1 3 1 1 1 ...
$ Chosen : int 2 2 1 3 1 1 2 1 2 2 ...
You may do this with the help of custom function -
library(dplyr)
select_rows <- function(timeLeft, timeRight) {
inds <- which(timeLeft == 0 & timeRight == 0)
if(length(inds) >= 2) inds[1]:inds[2]
else 0
}
data %>%
group_by(subject, block) %>%
slice(select_rows(timeLeft, timeRight)) %>%
ungroup
# subject block trial timeLeft timeRight stim1 stim2 Chosen
# <int> <int> <int> <int> <int> <int> <int> <int>
#1 1 13 0 0 0 2 1 2
#2 1 13 1 0 1 3 2 2
#3 1 13 3 0 0 3 1 1
#4 1006 14 0 0 0 6 5 5
#5 1006 14 1 0 0 6 4 6
If the data is huge you may also do this with data.table -
library(data.table)
setDT(data)[, .SD[select_rows(timeLeft, timeRight)], .(subject, block)]
data
It is easier to help if you provide data in a reproducible format
data <- structure(list(subject = c(1L, 1L, 1L, 1L, 1L, 1L, 1006L, 1006L,
1006L, 1006L, 1006L), block = c(13L, 13L, 13L, 13L, 13L, 13L,
14L, 14L, 14L, 14L, 14L), trial = c(0L, 1L, 3L, 4L, 6L, 7L, 0L,
1L, 3L, 4L, 6L), timeLeft = c(0L, 0L, 0L, 2L, 1L, 2L, 0L, 0L,
0L, 1L, 1L), timeRight = c(0L, 1L, 0L, 0L, 1L, 2L, 0L, 0L, 1L,
1L, 2L), stim1 = c(2L, 3L, 3L, 2L, 1L, 2L, 6L, 6L, 4L, 4L, 6L
), stim2 = c(1L, 2L, 1L, 3L, 3L, 1L, 5L, 4L, 5L, 5L, 4L), Chosen = c(2L,
2L, 1L, 3L, 1L, 1L, 5L, 6L, 4L, 4L, 6L)), class = "data.frame", row.names =
c("1", "2", "3", "4", "5", "6", "454", "455", "456", "457", "458"))
If you want to keep all rows before timeLeft and timeRight are 0, you can try this way.
Data
subject block trial timeLeft timeRight stim1 stim2 Chosen
1 1 13 0 0 0 2 1 2
2 1 13 1 0 1 3 2 2
3 1 13 3 0 0 3 1 1
4 1 13 4 2 0 2 3 3
5 1 13 6 1 1 1 3 1
6 1 13 7 2 2 2 1 1
7 1006 14 0 0 1 6 5 5
8 1006 14 0 0 0 6 5 5
9 1006 14 1 0 0 6 4 6
10 1006 14 3 0 1 4 5 4
11 1006 14 4 1 1 4 5 4
12 1006 14 6 1 2 6 4 6
I add one more row for subject:1006, to make first row is not 0,0.
Code
df %>%
group_by(subject) %>%
mutate(key = max(which((timeLeft == 0 & timeRight ==0)))) %>%
slice(1:key)
subject block trial timeLeft timeRight stim1 stim2 Chosen key
<int> <int> <int> <int> <int> <int> <int> <int> <int>
1 1 13 0 0 0 2 1 2 3
2 1 13 1 0 1 3 2 2 3
3 1 13 3 0 0 3 1 1 3
4 1006 14 0 0 1 6 5 5 3
5 1006 14 0 0 0 6 5 5 3
6 1006 14 1 0 0 6 4 6 3
You can filter for only rows that meet the condition and then group
data %>%
filter(timeLeft > 0 & timeRight > 0) %>%
group_by(subject, block)

Combining Using Two Columns

A very basic question about merging interchangable columns.
Say I have a table
subject stim1 stim2 Chosen Tchosen
<dbl> <int> <int> <int> <int>
1 1 1 2 1 4
2 1 1 2 2 15
3 1 1 3 1 2
4 1 1 3 3 13
5 1 2 1 1 2
6 1 2 1 2 13
7 1 2 3 2 3
where stim1 and stim 2 are interchangable (Stim1=1,Stim2=2 is equivalent to Stim1=2, Stim2=1)
What is the simplest way to merge the data so that Tchosen is added from the two equivalent columns (though Chosen and by subject should be maintained distinctly)
Desired output
subject stim1 stim2 Chosen Tchosen
<dbl> <int> <int> <int> <int>
1 1 1 2 1 6
2 1 1 2 2 28
3 1 1 3 1 4
4 1 1 3 3 28
5 1 2 3 2 3
6 1 2 3 3 12...
Thank you
Here is a data.table approach.. could not reproduce your desired output, since it contains values that are not in your sample data?
library(data.table)
DT <- fread(" subject stim1 stim2 Chosen Tchosen
1 1 2 1 4
1 1 2 2 15
1 1 3 1 2
1 1 3 3 13
1 2 1 1 2
1 2 1 2 13
1 2 3 2 3")
# Switch values of stim2 and stim1 if stim2 < stim1
DT[stim2 < stim1, `:=`(stim1 = stim2, stim2 = stim1)]
# Now summarise and sum
DT[, .(Tchosen = sum(Tchosen, na.rm = TRUE)), by = .(subject,stim1, stim2, Chosen)]
# subject stim1 stim2 Chosen Tchosen
# 1: 1 1 2 1 6
# 2: 1 1 2 2 28
# 3: 1 1 3 1 2
# 4: 1 1 3 3 13
# 5: 1 2 3 2 3
A base R option using merge + rowSums
transform(
merge(df,
df,
by.x = c("subject", "stim1", "stim2", "Chosen"),
by.y = c("subject", "stim2", "stim1", "Chosen"),
all.x = TRUE
),
Tchosen = rowSums(cbind(Tchosen.x, Tchosen.y), na.rm = TRUE)
)
which gives
subject stim1 stim2 Chosen Tchosen.x Tchosen.y Tchosen
1 1 1 2 1 4 2 6
2 1 1 2 2 15 13 28
3 1 1 3 1 2 NA 2
4 1 1 3 3 13 NA 13
5 1 2 1 1 2 4 6
6 1 2 1 2 13 15 28
7 1 2 3 2 3 NA 3
where NA exists probably due to the incomplete data in your post.
Data
> dput(df)
structure(list(subject = c(1L, 1L, 1L, 1L, 1L, 1L, 1L), stim1 = c(1L,
1L, 1L, 1L, 2L, 2L, 2L), stim2 = c(2L, 2L, 3L, 3L, 1L, 1L, 3L
), Chosen = c(1L, 2L, 1L, 3L, 1L, 2L, 2L), Tchosen = c(4L, 15L,
2L, 13L, 2L, 13L, 3L)), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7"))
You can use pmin/pmax to sort stim1 and stim2 columns and calculate sum for each group.
aggregate(Tchosen~., transform(df, stim1 = pmin(stim1, stim2),
stim2 = pmax(stim1, stim2)), sum)
# subject stim1 stim2 Chosen Tchosen
#1 1 1 2 1 6
#2 1 1 3 1 2
#3 1 1 2 2 28
#4 1 2 3 2 3
#5 1 1 3 3 13

how refill a column with the help of 2 other column?

I have a data based 3 groups : SAMPN,PERNO,loop
there are 2 columns, mode1 and mode2. and a column called int.
SAMPN PERNO loop mode1 mode2 int
1 1 1 1 2 NA
1 1 1 2 1 NA
1 1 1 3 2 0
1 2 1 3 2 NA
1 2 1 1 1 2
2 2 1 3 2 NA
2 2 1 1 3 NA
2 2 1 3 1 0
2 2 2 1 2 NA
2 2 2 3 1 2
SAMPN is family index, PERNO is index of persons in each family and loop is tour of each person. the last row of each loop for each person is 0 or 2 and and rest of loop is NA. in each family and for each person and each loop I want copy the column mode 1 in int if the last row of loop is 0 and copy mode2 if the last row of loo is 2.
output
SAMPN PERNO loop mode1 mode2 int
1 1 1 1 2 1
1 1 1 2 1 2
1 1 1 3 2 3
1 2 1 3 2 2
1 2 1 1 1 1
2 2 1 3 2 3
2 2 1 1 3 1
2 2 1 3 1 3
2 2 2 1 2 2
2 2 2 3 1 1
the first 3 rows is loop of first person in the first family, I filled that loop by mode1 because the third row was 0. and so on
Here's a way using dplyr
df <- read.table(h=T,text="SAMPN PERNO loop mode1 mode2 int
1 1 1 1 2 NA
1 1 1 2 1 NA
1 1 1 3 2 0
1 2 1 3 2 NA
1 2 1 1 1 2
2 2 1 3 2 NA
2 2 1 1 3 NA
2 2 1 3 1 0
2 2 2 1 2 NA
2 2 2 3 1 2")
library(dplyr)
df %>%
group_by(loop, SAMPN, PERNO) %>%
mutate(int = if(last(int) == 0) mode1 else mode2) %>%
ungroup()
#> # A tibble: 10 x 6
#> SAMPN PERNO loop mode1 mode2 int
#> <int> <int> <int> <int> <int> <int>
#> 1 1 1 1 1 2 1
#> 2 1 1 1 2 1 2
#> 3 1 1 1 3 2 3
#> 4 1 2 1 3 2 2
#> 5 1 2 1 1 1 1
#> 6 2 2 1 3 2 3
#> 7 2 2 1 1 3 1
#> 8 2 2 1 3 1 3
#> 9 2 2 2 1 2 2
#> 10 2 2 2 3 1 1
If you have more values than 0 or 2, switch could be a good alternative :
df %>%
group_by(loop, SAMPN, PERNO) %>%
mutate(int = switch(
as.character(last(int)),
`0` = mode1,
`2` = mode2)) %>%
ungroup()
# same output!
We can also use case_when
library(dplyr)
df %>%
group_by(loop, SAMPN, PERNO) %>%
mutate(int = case_when(rep(last(int) == 0, n()) ~ mode1, TRUE ~mode2))
# A tibble: 10 x 6
# Groups: loop, SAMPN, PERNO [4]
# SAMPN PERNO loop mode1 mode2 int
# <int> <int> <int> <int> <int> <int>
# 1 1 1 1 1 2 1
# 2 1 1 1 2 1 2
# 3 1 1 1 3 2 3
# 4 1 2 1 3 2 2
# 5 1 2 1 1 1 1
# 6 2 2 1 3 2 3
# 7 2 2 1 1 3 1
# 8 2 2 1 3 1 3
#9 2 2 2 1 2 2
#10 2 2 2 3 1 1
data
df <- structure(list(SAMPN = c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L), PERNO = c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), loop = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L), mode1 = c(1L, 2L, 3L, 3L,
1L, 3L, 1L, 3L, 1L, 3L), mode2 = c(2L, 1L, 2L, 2L, 1L, 2L, 3L,
1L, 2L, 1L), int = c(NA, NA, 0L, NA, 2L, NA, NA, 0L, NA, 2L)),
class = "data.frame", row.names = c(NA,
-10L))

R - Insert Missing Numbers in A Sequence by Group's Max Value

I'd like to insert missing numbers in the index column following these two conditions:
Partitioned by multiple columns
The minimum value is always 1
The maximum value is always the maximum for the group and type
Current Data:
group type index vol
A 1 1 200
A 1 2 244
A 1 5 33
A 2 2 66
A 2 3 2
A 2 4 199
A 2 10 319
B 1 4 290
B 1 5 188
B 1 6 573
B 1 9 122
Desired Data:
group type index vol
A 1 1 200
A 1 2 244
A 1 3 0
A 1 4 0
A 1 5 33
A 2 1 0
A 2 2 66
A 2 3 2
A 2 4 199
A 2 5 0
A 2 6 0
A 2 7 0
A 2 8 0
A 2 9 0
A 2 10 319
B 1 1 0
B 1 2 0
B 1 3 0
B 1 4 290
B 1 5 188
B 1 6 573
B 1 7 0
B 1 8 0
B 1 9 122
I've just added in spaces between the partitions for clarity.
Hope you can help out!
You can do the following
library(dplyr)
library(tidyr)
my_df %>%
group_by(group, type) %>%
complete(index = 1:max(index), fill = list(vol = 0))
# group type index vol
# 1 A 1 1 200
# 2 A 1 2 244
# 3 A 1 3 0
# 4 A 1 4 0
# 5 A 1 5 33
# 6 A 2 1 0
# 7 A 2 2 66
# 8 A 2 3 2
# 9 A 2 4 199
# 10 A 2 5 0
# 11 A 2 6 0
# 12 A 2 7 0
# 13 A 2 8 0
# 14 A 2 9 0
# 15 A 2 10 319
# 16 B 1 1 0
# 17 B 1 2 0
# 18 B 1 3 0
# 19 B 1 4 290
# 20 B 1 5 188
# 21 B 1 6 573
# 22 B 1 7 0
# 23 B 1 8 0
# 24 B 1 9 122
With group_by you specify the groups you indicated withed the white spaces. With complete you specify which columns should be complete and then what values should be filled in for the remaining column (default would be NA)
Data
my_df <-
structure(list(group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("A", "B"), class = "factor"),
type = c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L),
index = c(1L, 2L, 5L, 2L, 3L, 4L, 10L, 4L, 5L, 6L, 9L),
vol = c(200L, 244L, 33L, 66L, 2L, 199L, 319L, 290L, 188L, 573L, 122L)),
class = "data.frame", row.names = c(NA, -11L))
One dplyr and tidyr possibility could be:
df %>%
group_by(group, type) %>%
complete(index = full_seq(1:max(index), 1), fill = list(vol = 0))
group type index vol
<fct> <int> <dbl> <dbl>
1 A 1 1 200
2 A 1 2 244
3 A 1 3 0
4 A 1 4 0
5 A 1 5 33
6 A 2 1 0
7 A 2 2 66
8 A 2 3 2
9 A 2 4 199
10 A 2 5 0
11 A 2 6 0
12 A 2 7 0
13 A 2 8 0
14 A 2 9 0
15 A 2 10 319
16 B 1 1 0
17 B 1 2 0
18 B 1 3 0
19 B 1 4 290
20 B 1 5 188
21 B 1 6 573
22 B 1 7 0
23 B 1 8 0
24 B 1 9 122

Longest consecutive count of the same value per group

I have a data.frame as below and I want to add a variable describing the longest consecutive count of 1 in the VALUE variable observed in the group (i.e. longest consecutive rows with 1 in VALUE per group).
GROUP_ID VALUE
1 0
1 1
1 1
1 1
1 1
1 0
2 1
2 1
2 0
2 1
2 1
2 1
3 1
3 0
3 1
3 0
So the output would look like this:
GROUP_ID VALUE CONSECUTIVE
1 0 4
1 1 4
1 1 4
1 1 4
1 1 4
1 0 4
2 1 3
2 1 3
2 0 3
2 1 3
2 1 3
2 1 3
3 1 1
3 0 1
3 1 1
3 0 1
Any help would be greatly appreciated!
Using dplyr:
library(dplyr)
dat %>%
group_by(GROUP_ID) %>%
mutate(CONSECUTIVE = {rl <- rle(VALUE); max(rl$lengths[rl$values == 1])})
which gives:
# A tibble: 16 x 3
# Groups: GROUP_ID [3]
GROUP_ID VALUE CONSECUTIVE
<int> <int> <int>
1 1 0 4
2 1 1 4
3 1 1 4
4 1 1 4
5 1 1 4
6 1 0 4
7 2 1 3
8 2 1 3
9 2 0 3
10 2 1 3
11 2 1 3
12 2 1 3
13 3 1 1
14 3 0 1
15 3 1 1
16 3 0 1
Or with data.table:
library(data.table)
setDT(dat) # convert to a 'data.table'
dat[, CONSECUTIVE := {rl <- rle(VALUE); max(rl$lengths[rl$values == 1])}
, by = GROUP_ID][]
We can use ave with rle and get maximum occurrence of consecutive 1's for each group. (GROUP_ID)
df$Consecutive <- ave(df$VALUE, df$GROUP_ID, FUN = function(x) {
y <- rle(x == 1)
max(y$lengths[y$values])
})
df
# GROUP_ID VALUE Consecutive
#1 1 0 4
#2 1 1 4
#3 1 1 4
#4 1 1 4
#5 1 1 4
#6 1 0 4
#7 2 1 3
#8 2 1 3
#9 2 0 3
#10 2 1 3
#11 2 1 3
#12 2 1 3
#13 3 1 1
#14 3 0 1
#15 3 1 1
#16 3 0 1
Here is another option with data.table
library(data.table)
library(dplyr)
setDT(df1)[, CONSECUTIVE := max(table(na_if(rleid(VALUE)*VALUE, 0))), .(GROUP_ID)]
df1
# GROUP_ID VALUE CONSECUTIVE
# 1: 1 0 4
# 2: 1 1 4
# 3: 1 1 4
# 4: 1 1 4
# 5: 1 1 4
# 6: 1 0 4
# 7: 2 1 3
# 8: 2 1 3
# 9: 2 0 3
#10: 2 1 3
#11: 2 1 3
#12: 2 1 3
#13: 3 1 1
#14: 3 0 1
#15: 3 1 1
#16: 3 0 1
data
df1 <- structure(list(GROUP_ID = c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L), VALUE = c(0L, 1L, 1L, 1L, 1L, 0L,
1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 0L)), class = "data.frame", row.names = c(NA,
-16L))

Resources