Assigning NAs to rows with conditional statement in r - r

I'm trying to assign NAs to the first two rows of each event, with the following conditional statement:
If the first day of each event has a value of "variable" = 0, check the day before. If the day before (last day of previous event) has a "variable" > 0, then assign NAs to the first two rows of the event having "variable" = 0 on the first day. If the day before has a "variable" = 0, do nothing.
Here is an example:
day <- c(1:16)
event<- c(1,1,2,3,4,4,4,5,5,5,6,6,6,7,7,7)
variable<- c(0,0,5,0,0,0,10,0,1,1,0,0,0,0,0,0)
A<- data.frame(day, event, variable)
day event variable
1 1 1 0
2 2 1 0
3 3 2 5
4 4 3 0
5 5 4 0
6 6 4 0
7 7 4 10
8 8 5 0
9 9 5 1
10 10 5 1
11 11 6 0
12 12 6 0
13 13 6 0
14 14 7 0
15 15 7 0
16 16 7 0
And how it should look like
day event variable
1 1 1 0
2 2 1 0
3 3 2 5
4 4 3 NA
5 5 4 0
6 6 4 0
7 7 4 10
8 8 5 NA
9 9 5 NA
10 10 5 1
11 11 6 NA
12 12 6 NA
13 13 6 0
14 14 7 0
15 15 7 0
16 16 7 0
Note: It doesn't matter if event 1 has to be assigned with NAs
I tried to do this with if conditions, but is not working well. Any idea? and thanks in advance!

EDIT: New example data from OP
library(data.table)
event2<- c(1,2,2,3,4,4,4,4,4,5,5)
variable2<- c(140, 0, 69, 569, 28, 0,0,0,100,0,0)
desire_output<- c(140, NA, NA, 569, 28, 0,0,0,100, NA,NA)
A2<- data.frame(event2, variable2, desire_output)
setDT(A2)
A2[,first_days_event:=fifelse(.I==min(.I),1,fifelse(.I==min(.I)+1,2,NA_integer_)),by=.(event2)]
A2[,result:={v <- variable2
for (i in 2:.N) {
if (is.na(first_days_event[i])) {
v[i] <- variable2[i]
} else if (first_days_event[i]==1 & variable2[i]==0){
if (variable2[i-1]>0) {
v[i] <- NA_integer_
if (first_days_event[i+1]==2) {
v[i+1] <- NA_integer_
}
}
}
}
v}]
A2
#> event2 variable2 desire_output first_days_event result
#> 1: 1 140 140 1 140
#> 2: 2 0 NA 1 NA
#> 3: 2 69 NA 2 NA
#> 4: 3 569 569 1 569
#> 5: 4 28 28 1 28
#> 6: 4 0 0 2 0
#> 7: 4 0 0 NA 0
#> 8: 4 0 0 NA 0
#> 9: 4 100 100 NA 100
#> 10: 5 0 NA 1 NA
#> 11: 5 0 NA 2 NA
I will use this simple loop solution. Just need to create a flag indicating the first tow days of each event.
library(data.table)
day <- c(1:16)
event<- c(1,1,2,3,4,4,4,5,5,5,6,6,6,7,7,7)
variable<- c(0,0,5,0,0,0,10,0,1,1,0,0,0,0,0,0)
A<- data.frame(day, event, variable)
setDT(A)
A[,first_days_event:=fifelse(.I==min(.I),1,fifelse(.I==min(.I)+1,2,NA_integer_)),by=.(event)]
A[,result:={v <- numeric(.N)
for (i in 2:.N) {
if (is.na(first_days_event[i])) {
v[i] <- variable[i]
} else if (first_days_event[i]==1){
if (variable[i-1]>0) {
v[i] <- NA_integer_
if (first_days_event[i+1]==2) {
v[i+1] <- NA_integer_
}
} else {
v[i] <- variable[i]
}
}
}
v}]
A
#> day event variable first_days_event result
#> 1: 1 1 0 1 0
#> 2: 2 1 0 2 0
#> 3: 3 2 5 1 5
#> 4: 4 3 0 1 NA
#> 5: 5 4 0 1 0
#> 6: 6 4 0 2 0
#> 7: 7 4 10 NA 10
#> 8: 8 5 0 1 NA
#> 9: 9 5 1 2 NA
#> 10: 10 5 1 NA 1
#> 11: 11 6 0 1 NA
#> 12: 12 6 0 2 NA
#> 13: 13 6 0 NA 0
#> 14: 14 7 0 1 0
#> 15: 15 7 0 2 0
#> 16: 16 7 0 NA 0

Here is a potential tidyverse approach.
You can store the last value of a group in a temporary column last_var and use lag to move to the first row of the following group for comparison.
Note that the default in lag will determine if variable in event 1 is 0 or NA.
The final mutate will evaluate the row if within the first 2 rows of the group, and check last_var to determine if should set to NA or leave alone.
Edit: For the ifelse need to also check if first day's variable for the event is 0.
library(tidyverse)
A %>%
group_by(event) %>%
mutate(last_var = ifelse(row_number() == n(), last(variable), 0)) %>%
ungroup %>%
mutate(last_var = lag(last_var, default = 0)) %>%
group_by(event) %>%
mutate(variable = ifelse(row_number() <= 2 & first(last_var) > 0 & first(variable) == 0, NA, variable)) %>%
select(-last_var)
Output
# A tibble: 16 x 3
# Groups: event [7]
day event variable
<int> <dbl> <dbl>
1 1 1 0
2 2 1 0
3 3 2 5
4 4 3 NA
5 5 4 0
6 6 4 0
7 7 4 10
8 8 5 NA
9 9 5 NA
10 10 5 1
11 11 6 NA
12 12 6 NA
13 13 6 0
14 14 7 0
15 15 7 0
16 16 7 0
With the second data frame included in the comments:
Output
# A tibble: 11 x 3
# Groups: event [5]
event variable desire_output
<dbl> <dbl> <dbl>
1 1 140 140
2 2 NA NA
3 2 NA NA
4 3 569 569
5 4 28 28
6 4 0 0
7 4 0 0
8 4 0 0
9 4 100 100
10 5 NA NA
11 5 NA NA

Related

How to update a value in a specific column in R

Here is a part of the sample data :
dat<-read.table (text=" ID Time B1 T1 Q1 W1 M1
1 12 12 0 12 11 9
1 13 0 1 NA NA NA
2 10 12 0 6 7 8
2 14 0 1 NA NA NA
1 16 16A 0 1 2 4
1 14 0 1 NA NA NA
2 14 16A 0 5 6 7
2 7 0 1 NA NA NA
1 7 20 0 5 8 0
1 7 0 1 NA NA NA
2 9 20 0 7 8 1
2 9 0 1 NA NA NA
", header=TRUE)
I want to update value 1 In column T1 for repeated IDs. For the first repeated IDs, should be a value of 1, and for the second repeated IDs, a value of 2; and for the third repeated IDs, a value of 3 and so on. I also want to replace NA with blank cells. here is the expected outcome:
ID Time B1 T1 Q1 W1 M1
1 12 12 0 12 11 9
1 13 0 1
2 10 12 0 6 7 8
2 14 0 1
1 16 16A 0 1 2 4
1 14 0 2
2 14 16A 0 5 6 7
2 7 0 2
1 7 20 0 5 8 0
1 7 0 3
2 9 20 0 7 8 1
2 9 0 3
You could use an ifelse across with cumsum per group like this:
library(dplyr)
dat %>%
group_by(ID, B1) %>%
mutate(across(T1, ~ ifelse(.x == 1, cumsum(.x), T1)))
#> # A tibble: 12 × 7
#> # Groups: ID, B1 [8]
#> ID Time B1 T1 Q1 W1 M1
#> <int> <int> <chr> <int> <int> <int> <int>
#> 1 1 12 12 0 12 11 9
#> 2 1 13 0 1 NA NA NA
#> 3 2 10 12 0 6 7 8
#> 4 2 14 0 1 NA NA NA
#> 5 1 16 16A 0 1 2 4
#> 6 1 14 0 2 NA NA NA
#> 7 2 14 16A 0 5 6 7
#> 8 2 7 0 2 NA NA NA
#> 9 1 7 20 0 5 8 0
#> 10 1 7 0 3 NA NA NA
#> 11 2 9 20 0 7 8 1
#> 12 2 9 0 3 NA NA NA
Created on 2023-01-14 with reprex v2.0.2
With data.table
library(data.table)
setDT(dat)[T1 ==1, T1 := cumsum(T1), .(ID, B1)]
-output
> dat
ID Time B1 T1 Q1 W1 M1
1: 1 12 12 0 12 11 9
2: 1 13 0 1 NA NA NA
3: 2 10 12 0 6 7 8
4: 2 14 0 1 NA NA NA
5: 1 16 16A 0 1 2 4
6: 1 14 0 2 NA NA NA
7: 2 14 16A 0 5 6 7
8: 2 7 0 2 NA NA NA
9: 1 7 20 0 5 8 0
10: 1 7 0 3 NA NA NA
11: 2 9 20 0 7 8 1
12: 2 9 0 3 NA NA NA

Replace row value in a data frame group by the smallest value in that group

I have the following data set:
time <- c(0,1,2,3,4,5,0,1,2,3,4,5,0,1,2,3,4,5)
value <- c(10,8,6,5,3,2,12,10,6,5,4,2,20,15,16,9,2,2)
group <- c(1,1,1,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3)
data <- data.frame(time, value, group)
I want to create a new column called data$diff that is equal to data$value minus the value of data$value when data$time == 0 within each group.
I am beginning with the following code
for(i in 1:nrow(data)){
for(n in 1:max(data$group)){
if(data$group[i] == n) {
data$diff[i] <- ???????
}
}
}
But cannot figure out what to put in place of the question marks. The desired output would be this table: https://i.stack.imgur.com/1bAKj.png
Any thoughts are appreciated.
Since in your example data$time == 0 is always the first element of the group, you can use this data.table approach.
library(data.table)
setDT(data)
data[, diff := value[1] - value, by = group]
In case that data$time == 0 is not the first element in each group you can use this:
data[, diff := value[time==0] - value, by = group]
Output:
> data
time value group diff
1: 0 10 1 0
2: 1 8 1 2
3: 2 6 1 4
4: 3 5 1 5
5: 4 3 1 7
6: 5 2 1 8
7: 0 12 2 0
8: 1 10 2 2
9: 2 6 2 6
10: 3 5 2 7
11: 4 4 2 8
12: 5 2 2 10
13: 0 20 3 0
14: 1 15 3 5
15: 2 16 3 4
16: 3 9 3 11
17: 4 2 3 18
18: 5 2 3 18
Here is a base R approach.
within(data, diff <- ave(
seq_along(value), group,
FUN = \(i) value[i][time[i] == 0] - value[i]
))
Output
time value group diff
1 0 10 1 0
2 1 8 1 2
3 2 6 1 4
4 3 5 1 5
5 4 3 1 7
6 5 2 1 8
7 0 12 2 0
8 1 10 2 2
9 2 6 2 6
10 3 5 2 7
11 4 4 2 8
12 5 2 2 10
13 0 20 3 0
14 1 15 3 5
15 2 16 3 4
16 3 9 3 11
17 4 2 3 18
18 5 2 3 18
Here is a short way to do it with dplyr.
library(dplyr)
data %>%
group_by(group) %>%
mutate(diff = value[which(time == 0)] - value)
Which gives
# Groups: group [3]
time value group diff
<dbl> <dbl> <dbl> <dbl>
1 0 10 1 0
2 1 8 1 2
3 2 6 1 4
4 3 5 1 5
5 4 3 1 7
6 5 2 1 8
7 0 12 2 0
8 1 10 2 2
9 2 6 2 6
10 3 5 2 7
11 4 4 2 8
12 5 2 2 10
13 0 20 3 0
14 1 15 3 5
15 2 16 3 4
16 3 9 3 11
17 4 2 3 18
18 5 2 3 18
library(dplyr)
vals2use <- data %>%
group_by(group) %>%
filter(time==0) %>%
select(c(2,3)) %>%
rename(value4diff=value)
dataNew <- merge(data, vals2use, all=T)
dataNew$diff <- dataNew$value4diff-dataNew$value
dataNew <- dataNew[,c(1,2,3,5)]
dataNew
group time value diff
1 1 0 10 0
2 1 1 8 2
3 1 2 6 4
4 1 3 5 5
5 1 4 3 7
6 1 5 2 8
7 2 0 12 0
8 2 1 10 2
9 2 2 6 6
10 2 3 5 7
11 2 4 4 8
12 2 5 2 10
13 3 0 20 0
14 3 1 15 5
15 3 2 16 4
16 3 3 9 11
17 3 4 2 18
18 3 5 2 18

R Configure Data With Data.Table

data=data.frame("Student"=c(1,1,1,2,2,2,3,3,3,4,4,4,5,5,5),
"Grade"=c(5,6,7,3,4,5,4,5,6,8,9,10,2,3,4),
"Pass"=c(NA,0,1,0,1,1,0,1,0,0,NA,NA,0,0,0),
"NEWPass"=c(0,0,1,0,1,1,0,1,1,0,0,0,0,0,0),
"GradeNEWPass"=c(7,7,7,4,4,4,5,5,5,10,10,10,4,4,4),
"GradeBeforeNEWPass"=c(6,6,6,3,3,3,4,4,4,10,10,10,4,4,4))
I have a data.frame called data. It has column names Student, Grade and Pass. I wish to do this:
NEWPass: Take Pass and for every Student fill in NA values with the previous value. If the first value is 'NA' than put a zero. Then this should be a running maximum.
GradeNEWPass: Take the lowest value of Grade that a Student got a one in NEWPass. If a Student did not get a one in NEWPass, this equals to the maximum grade.
GradeBeforeNEWPass: Take the value of Grade BEFORE a Student got a one in NEWPass. If a Student did not get a one in NEWPass, this equals to the maximum grade.
__
Attempts:
setDT(data)[, NEWPassTry := cummax(Pass), by = Student]
data$GradeNEWPass = data$NEWPassTry * data$Grade
data[, GradeNEWPass := min(GradeNEWPass), by = Student]
Not pretty, admittedly, but your logic includes words like "if any ... for a student", so it's a group-wise conditional, not a row-wise conditional.
library(magrittr) # just for %>% for breakout, not required
mydata %>%
.[, NEWPass2 :=
cummax(fifelse(seq_len(.N) == 1 & is.na(Pass), 0,
zoo::na.locf(Pass, na.rm = FALSE))), by = .(Student) ] %>%
.[, GradeNEWPass2 :=
if (any(NEWPass2 > 0)) min(Grade[ NEWPass2 > 0 ]) else max(Grade),
by = .(Student) ] %>%
.[, GradeBeforeNEWPass2 :=
if (NEWPass2[1] == 0 && any(NEWPass2 > 0)) Grade[ which(NEWPass2 > 0)[1] - 1 ] else max(Grade),
by = .(Student) ]
# Student Grade Pass NEWPass GradeNEWPass GradeBeforeNEWPass NEWPass2 GradeNEWPass2 GradeBeforeNEWPass2
# 1: 1 5 NA 0 7 6 0 7 6
# 2: 1 6 0 0 7 6 0 7 6
# 3: 1 7 1 1 7 6 1 7 6
# 4: 2 3 0 0 4 3 0 4 3
# 5: 2 4 1 1 4 3 1 4 3
# 6: 2 5 1 1 4 3 1 4 3
# 7: 3 4 0 0 5 4 0 5 4
# 8: 3 5 1 1 5 4 1 5 4
# 9: 3 6 0 1 5 4 1 5 4
# 10: 4 8 0 0 10 10 0 10 10
# 11: 4 9 NA 0 10 10 0 10 10
# 12: 4 10 NA 0 10 10 0 10 10
# 13: 5 2 0 0 4 4 0 4 4
# 14: 5 3 0 0 4 4 0 4 4
# 15: 5 4 0 0 4 4 0 4 4
I'm using magrittr::%>% solely to break it out into stages of computation, it is not required.
We can use data.table methods. Grouped by 'Student', create an index ('i1') where the 'Pass' is 1 and not an NA, then get the first position of 1 with which and head ('i2'), while calculating the max of 'Grade' ('mx'), then create the three columns based on the indexes ('v1' - get the cumulative maximum of the binary, 'v2' - if there are any 1s, then subset the 'Grade' with the index 'i2' or else return 'mx', similarly 'v3'- the index is subtracted 1 to get the 'Grade' value
library(data.table)
setDT(data)[, c('NEWPass1', 'GradeNEWPass1', 'GradeBeforeNEWPass1') :={
i1 <- Pass == 1 & !is.na(Pass)
i2 <- head(which(i1), 1)
mx <- max(Grade, na.rm = TRUE)
v1 <- cummax(+(i1))
v2 <- if(any(i1)) Grade[i2] else mx
v3 <- if(any(i1)) Grade[max(1, i2-1)] else mx
.(v1, v2, v3)}, Student]
data
# Student Grade Pass NEWPass GradeNEWPass GradeBeforeNEWPass NEWPass1 GradeNEWPass1 GradeBeforeNEWPass1
# 1: 1 5 NA 0 7 6 0 7 6
# 2: 1 6 0 0 7 6 0 7 6
# 3: 1 7 1 1 7 6 1 7 6
# 4: 2 3 0 0 4 3 0 4 3
# 5: 2 4 1 1 4 3 1 4 3
# 6: 2 5 1 1 4 3 1 4 3
# 7: 3 4 0 0 5 4 0 5 4
# 8: 3 5 1 1 5 4 1 5 4
# 9: 3 6 0 1 5 4 1 5 4
#10: 4 8 0 0 10 10 0 10 10
#11: 4 9 NA 0 10 10 0 10 10
#12: 4 10 NA 0 10 10 0 10 10
#13: 5 2 0 0 4 4 0 4 4
#14: 5 3 0 0 4 4 0 4 4
#15: 5 4 0 0 4 4 0 4 4

Dealing dataframes with conditional statements

Following the previous two questions:
removing the first 3 rows of a group with conditional statement in r
Assigning NAs to rows with conditional statement in r
I'm having some troubles with my code. If Instead of deleting rows, I want to assign NAs to every event that has in their first row a Value higher than 2. So, if an event is having in its first row a Value higher than 2, I want to assign NA to that row, and to the coming two rows of that event. If the event has no more rows, just assign NAs to the rows the event has.
Here is an example, with a column of the desire output I want.
Event<- c(1,1,1,1,1,2,2,2,2,3,3,4,5,6,6,6,7,7,7,7)
Value<- c(1,0,8,0,8,8,7,1,10,4,0,1,10,3,0,0,NA,NA,5,0)
Desire_output<- c(1,0,8,0,8,NA, NA, NA,10,NA,NA,1,NA,NA,NA,NA,NA,NA,5,0)
AAA<- data.frame(Event, Value, Desire_output)
Event Value Desire_output
1 1 1 1
2 1 0 0
3 1 8 8
4 1 0 0
5 1 8 8
6 2 8 NA
7 2 7 NA
8 2 1 NA
9 2 10 10
10 3 4 NA
11 3 0 NA
12 4 1 1
13 5 10 NA
14 6 3 NA
15 6 0 NA
16 6 0 NA
17 7 NA NA
18 7 NA NA
19 7 5 5
20 7 0 0
Note: If an event start with an NA, do nothing (like in event 7).
please let me know if you have an idea, and thanks in advance for your time.
Here's a dplyr pipe to do that:
library(dplyr)
AAA %>%
group_by(Event) %>%
mutate(
bad = row_number() == 1 & !is.na(Value) & Value >= 2,
bad = bad | lag(bad, default = FALSE) | lag(bad, 2, default = FALSE),
bad = bad | is.na(Value),
Value2 = if_else(bad, NA_real_, Value)
) %>%
ungroup()
# # A tibble: 20 x 5
# Event Value Desire_output bad Value2
# <dbl> <dbl> <dbl> <lgl> <dbl>
# 1 1 1 1 FALSE 1
# 2 1 0 0 FALSE 0
# 3 1 8 8 FALSE 8
# 4 1 0 0 FALSE 0
# 5 1 8 8 FALSE 8
# 6 2 8 NA TRUE NA
# 7 2 7 NA TRUE NA
# 8 2 1 NA TRUE NA
# 9 2 10 10 FALSE 10
# 10 3 4 NA TRUE NA
# 11 3 0 NA TRUE NA
# 12 4 1 1 FALSE 1
# 13 5 10 NA TRUE NA
# 14 6 3 NA TRUE NA
# 15 6 0 NA TRUE NA
# 16 6 0 NA TRUE NA
# 17 7 NA NA TRUE NA
# 18 7 NA NA TRUE NA
# 19 7 5 5 FALSE 5
# 20 7 0 0 FALSE 0
I updated the data with
AAA$Desire_output[9] <- 10
since it was inconsistent with your displayed frame (and the display made more sense).

Count consecutive strings of zeroes and ones over multiple groups

There have been several discussions about counting consecutive strings of zeroes and ones (or other values) using functions like rle or cumsum. I have played around with these functions, but I can't easily figure out how to get them to apply to my specific problem.
I am working with ecological presence/absence data ("pres.abs" = 1 or 0) organized by time ("year") and location ("id"). For each location id, I would like to separately calculate the length of consecutive ones and zeroes through time. Where these cannot be calculated, I want to return "NA".
Below is a sample of what the data looks like (first 3 columns) and the output I am hoping to achieve (last 2 columns). Ideally, this would be a pretty fast function avoiding for-loops since the real data frame contains ~15,000 rows.
year = rep(1:10, times=3)
id = c(rep(1, times=10), rep(2, times=10), rep(3, times=10))
pres.abs.id.1 = c(0, 0, 0, 1, 1, 1, 0, 0, 1, 1) #Pres/abs data at site 1 across time
pres.abs.id.2 = c(1, 1, 0, 1, 0, 0, 1, 0, 0, 0) #Pres/abs data at site 2 across time
pres.abs.id.3 = c(0, 0, 0, 0, 0, 1, 1, 1, 1, 1) #Pres/abs data at site 3 across time
pres.abs = c(pres.abs.id.1, pres.abs.id.2, pres.abs.id.3)
dat = data.frame(id, year, pres.abs)
dat$cumul.zeroes = c(1,2,3,NA,NA,NA,1,2,NA,NA,NA,NA,1,NA,1,2,NA,1,2,3,1,2,3,4,5,NA,NA,NA,NA,NA)
dat$cumul.ones = c(NA,NA,NA,1,2,3,NA,NA,1,2,1,2,NA,1,NA,NA,1,NA,NA,NA,NA,NA,NA,NA,NA,1,2,3,4,5)
> dat
id year pres.abs cumul.zeroes cumul.ones
1 1 1 0 1 NA
2 1 2 0 2 NA
3 1 3 0 3 NA
4 1 4 1 NA 1
5 1 5 1 NA 2
6 1 6 1 NA 3
7 1 7 0 1 NA
8 1 8 0 2 NA
9 1 9 1 NA 1
10 1 10 1 NA 2
11 2 1 1 NA 1
12 2 2 1 NA 2
13 2 3 0 1 NA
14 2 4 1 NA 1
15 2 5 0 1 NA
16 2 6 0 2 NA
17 2 7 1 NA 1
18 2 8 0 1 NA
19 2 9 0 2 NA
20 2 10 0 3 NA
21 3 1 0 1 NA
22 3 2 0 2 NA
23 3 3 0 3 NA
24 3 4 0 4 NA
25 3 5 0 5 NA
26 3 6 1 NA 1
27 3 7 1 NA 2
28 3 8 1 NA 3
29 3 9 1 NA 4
30 3 10 1 NA 5
Thanks very much for your help.
Here's a base R way using rle and sequence:
dat <- within(dat, {
cumul.counts <- unlist(lapply(split(pres.abs, id), function(x) sequence(rle(x)$lengths)))
cumul.zeroes <- replace(cumul.counts, pres.abs == 1, NA)
cumul.ones <- replace(cumul.counts, pres.abs == 0, NA)
rm(cumul.counts)
})
# id year pres.abs cumul.ones cumul.zeroes
# 1 1 1 0 NA 1
# 2 1 2 0 NA 2
# 3 1 3 0 NA 3
# 4 1 4 1 1 NA
# 5 1 5 1 2 NA
# 6 1 6 1 3 NA
# 7 1 7 0 NA 1
# 8 1 8 0 NA 2
# 9 1 9 1 1 NA
# 10 1 10 1 2 NA
# 11 2 1 1 1 NA
# 12 2 2 1 2 NA
# 13 2 3 0 NA 1
# 14 2 4 1 1 NA
# 15 2 5 0 NA 1
# 16 2 6 0 NA 2
# 17 2 7 1 1 NA
# 18 2 8 0 NA 1
# 19 2 9 0 NA 2
# 20 2 10 0 NA 3
# 21 3 1 0 NA 1
# 22 3 2 0 NA 2
# 23 3 3 0 NA 3
# 24 3 4 0 NA 4
# 25 3 5 0 NA 5
# 26 3 6 1 1 NA
# 27 3 7 1 2 NA
# 28 3 8 1 3 NA
# 29 3 9 1 4 NA
# 30 3 10 1 5 NA
Here's one option with dplyr:
require(dplyr)
dat %>%
group_by(id, x = cumsum(c(0,diff(pres.abs)) != 0)) %>%
mutate(cumul.zeros = ifelse(pres.abs, NA_integer_, row_number()),
cumul.ones = ifelse(!pres.abs, NA_integer_, row_number())) %>%
ungroup() %>% select(-x)
#Source: local data frame [30 x 5]
#
# id year pres.abs cumul.zeros cumul.ones
#1 1 1 0 1 NA
#2 1 2 0 2 NA
#3 1 3 0 3 NA
#4 1 4 1 NA 1
#5 1 5 1 NA 2
#6 1 6 1 NA 3
#7 1 7 0 1 NA
#8 1 8 0 2 NA
#9 1 9 1 NA 1
#10 1 10 1 NA 2
#11 2 1 1 NA 1
#12 2 2 1 NA 2
#13 2 3 0 1 NA
#14 2 4 1 NA 1
#15 2 5 0 1 NA
#16 2 6 0 2 NA
#17 2 7 1 NA 1
#18 2 8 0 1 NA
#19 2 9 0 2 NA
#20 2 10 0 3 NA
#21 3 1 0 1 NA
#22 3 2 0 2 NA
#23 3 3 0 3 NA
#24 3 4 0 4 NA
#25 3 5 0 5 NA
#26 3 6 1 NA 1
#27 3 7 1 NA 2
#28 3 8 1 NA 3
#29 3 9 1 NA 4
#30 3 10 1 NA 5

Resources