complete dplyr with missing ID - r

I understand how to use complete from tidyverse tidyverse complete a dataframe
In the example they give:
df <- tibble(
group = c(1:2, 1, 2),
item_id = c(1:2, 2, 3),
item_name = c("a", "a", "b", "b"),
value1 = c(1, NA, 3, 4),
value2 = 4:7
)
df
#> # A tibble: 4 × 5
#> group item_id item_name value1 value2
#> <dbl> <dbl> <chr> <dbl> <int>
#> 1 1 1 a 1 4
#> 2 2 2 a NA 5
#> 3 1 2 b 3 6
#> 4 2 3 b 4 7
Is there a way of adding a group and completing? e.g. add a group 3 and complete the table.
For example, I have a df which I populate in a for loop to make a plot. The df is like so:
variant Location Position variable value protein Mutation.type
FANCI_L605F FANCI chr15:89828441_C/T B 0.45 L605F nonsynonymous_SNV
PLCG2_R953* PLCG2 chr16:81969788_C/T B 0.87 R953* stopgain
STAT3_R278C STAT3 chr17:40486033_G/A B 0.38 R278C nonsynonymous_SN
FANCI_L605F FANCI chr15:89828441_C/T C 0.45 L605F nonsynonymous_SNV
PLCG2_R953* PLCG2 chr16:81969788_C/T C 0.87 R953* stopgain
STAT3_R278C STAT3 chr17:40486033_G/A C 0.38 R278C nonsynonymous_SNV
I also have a vector of possible variable names:
all_var<-c("A","B","C")
I have worked out how to add any missing variables (I think):
new_df<-complete(df,variable=all_var,Position)
>new_df
variant Location Position variable value protein Mutation.type
NA NA chr15:89828441_C/T A NA NA NA
NA NA chr16:81969788_C/T A NA NA NA
NA NA chr17:40486033_G/A A NA NA NA
FANCI_L605F FANCI chr15:89828441_C/T B 0.45 L605F nonsynonymous_SNV
PLCG2_R953* PLCG2 chr16:81969788_C/T B 0.87 R953* stopgain
STAT3_R278C STAT3 chr17:40486033_G/A B 0.38 R278C nonsynonymous_SN
FANCI_L605F FANCI chr15:89828441_C/T C 0.45 L605F nonsynonymous_SNV
PLCG2_R953* PLCG2 chr16:81969788_C/T C 0.87 R953* stopgain
STAT3_R278C STAT3 chr17:40486033_G/A C 0.38 R278C nonsynonymous_SNV
How do I now complete the variant,Location, protein, Mutation.Type?

You can use add a row by specifying the group and use complete() to complete the combinations, i.e.
library(dplyr)
library(tidyr)
df %>%
add_row(group = 3) %>%
complete(group, nesting(item_id, item_name)) %>%
drop_na(item_id)
# A tibble: 12 x 5
group item_id item_name value1 value2
<dbl> <dbl> <chr> <dbl> <int>
1 1 1 a 1 4
2 1 2 a NA NA
3 1 2 b 3 6
4 1 3 b NA NA
5 2 1 a NA NA
6 2 2 a NA 5
7 2 2 b NA NA
8 2 3 b 4 7
9 3 1 a NA NA
10 3 2 a NA NA
11 3 2 b NA NA
12 3 3 b NA NA

Related

How to Filter by group and move all values to new column if any value in any of the affected columns is greater than 5 in R

I have a Datafaame like this:
dt <- tibble(
TRIAL = c("A", "A", "A", "B", "B", "B", "C", "C", "C","D","D","D"),
RL = c(1, NA, 3, 1, 6, 3, 2, 3, 1, 0, 1.5, NA),
SL = c(6, 1.5, 1, 0, 0, 1, 1, 2, 0, 1, 1.5, NA),
HC = c(0, 1, 5, 6,7, 8, 9, 3, 4, 5, 4, 2)
)
# A tibble: 12 x 4
TRIAL RL SL HC
<chr> <dbl> <dbl> <dbl>
1 A 1 6 0
2 A NA 1.5 1
3 A 3 1 5
4 B 1 0 6
5 B 6 0 7
6 B 3 1 8
7 C 2 1 9
8 C 3 2 3
9 C 1 0 4
10 D 0 1 5
11 D 1.5 1.5 4
12 D NA NA 2
I want to group the data frame by TRIAL and have the values in RL and SL checked by group, if the value in either of the column is greater than 5 then move all values for RL and SL for that particular group to RLCT and SLCT respectively.
# A tibble: 12 x 6
TRIAL HC RLCT SLCT SL RL
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 A 0 1 6 NA NA
2 A 1 NA 1.5 NA NA
3 A 5 3 1 NA NA
4 B 6 1 0 NA NA
5 B 7 6 0 NA NA
6 B 8 3 1 NA NA
7 C 9 NA NA 1 3
8 C 3 NA NA 3 5
9 C 4 NA NA 1 1
10 D 5 NA NA 1 0
11 D 4 NA NA 1.5 1.5
12 D 2 NA NA NA NA
When I run the below code, I did not get the expected output
dt0 <- dt %>%
mutate(RLCT = NA,
SLCT = NA) %>%
group_by(TRIAL) %>%
filter(!any(RL > 5.0 | SL > 5.0))
dt1 <- dt %>%
group_by(TRIAL) %>%
filter(any(RL > 5.0 | SL > 5.0)) %>%
mutate(RLCT = RL,
SLCT = SL) %>%
rbind(dt0, .) %>%
mutate(RL = ifelse(!is.na(RLCT), NA, RL),
SL = ifelse(!is.na(SLCT), NA, SL)) %>% arrange(TRIAL)
This is what I get
# A tibble: 9 x 6
# Groups: TRIAL [3]
TRIAL RL SL HC RLCT SLCT
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 A NA NA 0 1 6
2 A NA NA 1 NA 1.5
3 A NA NA 5 3 1
4 B NA NA 6 1 0
5 B NA NA 7 6 0
6 B NA NA 8 3 1
7 C 2 1 9 NA NA
8 C 3 2 3 NA NA
9 C 1 0 4 NA NA
You can define a column to storage the condition, and change RL and SL with ifelse inside across.
dt %>%
group_by(TRIAL) %>%
mutate(cond = any(RL > 5.0 | SL > 5.0, na.rm = TRUE),
across(c(RL, SL), ~ ifelse(cond, ., NA), .names = "{.col}CT"),
across(c(RL, SL), ~ ifelse(!cond, ., NA)),
cond = NULL)
Result:
# A tibble: 12 x 6
# Groups: TRIAL [4]
TRIAL RL SL HC RLCT SLCT
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 A NA NA 0 1 6
2 A NA NA 1 NA 1.5
3 A NA NA 5 3 1
4 B NA NA 6 1 0
5 B NA NA 7 6 0
6 B NA NA 8 3 1
7 C 2 1 9 NA NA
8 C 3 2 3 NA NA
9 C 1 0 4 NA NA
10 D 0 1 5 NA NA
11 D 1.5 1.5 4 NA NA
12 D NA NA 2 NA NA
With dplyr, you could use group_modify():
library(dplyr)
dt %>%
group_by(TRIAL) %>%
group_modify(~ {
if(any(select(.x, c(RL, SL)) > 5, na.rm = TRUE)) {
rename_with(.x, ~ paste0(.x, 'CT'), c(RL, SL))
} else {
.x
}
})
Output
# A tibble: 12 × 6
# Groups: TRIAL [4]
TRIAL RLCT SLCT HC RL SL
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 A 1 6 0 NA NA
2 A NA 1.5 1 NA NA
3 A 3 1 5 NA NA
4 B 1 0 6 NA NA
5 B 6 0 7 NA NA
6 B 3 1 8 NA NA
7 C NA NA 9 2 1
8 C NA NA 3 3 2
9 C NA NA 4 1 0
10 D NA NA 5 0 1
11 D NA NA 4 1.5 1.5
12 D NA NA 2 NA NA

Group_by id and count the consective NA's and then restart counting when a new series of NA's is encountered

I have a dataframe like this:
df <- data_frame(id = c(rep('A', 10), rep('B', 10)),
value = c(1:3, rep(NA, 2), 1:2, rep(NA, 3), 1, rep(NA, 4), 1:3, rep(NA, 2)))
I need to count the number of consective NA's in the value column. The count needs to be grouped by ID, and it needs to restart at 1 every time a new NA or new series of NA's is encountered. The exptected output should look like this:
df$expected_output <- c(rep(NA, 3), 1:2, rep(NA, 2), 1:3, NA, 1:4, rep(NA, 3), 1:2)
If anyone can give me a dplyr solution that would also be great :)
I've tried a few things but nothing is giving any sort of sensical result. Thanks in advance^!
A solution using dplyr and data.table.
library(dplyr)
library(data.table)
df2 <- df %>%
group_by(id) %>%
mutate(info = rleid(value)) %>%
group_by(id, info) %>%
mutate(expected_output = row_number()) %>%
ungroup() %>%
mutate(expected_output = ifelse(!is.na(value), NA, expected_output)) %>%
select(-info)
df2
# # A tibble: 20 x 3
# id value expected_output
# <chr> <dbl> <int>
# 1 A 1 NA
# 2 A 2 NA
# 3 A 3 NA
# 4 A NA 1
# 5 A NA 2
# 6 A 1 NA
# 7 A 2 NA
# 8 A NA 1
# 9 A NA 2
# 10 A NA 3
# 11 B 1 NA
# 12 B NA 1
# 13 B NA 2
# 14 B NA 3
# 15 B NA 4
# 16 B 1 NA
# 17 B 2 NA
# 18 B 3 NA
# 19 B NA 1
# 20 B NA 2
We can use rle to get length of groups that are or are not na, and use purrr::map2 to apply seq if they are NA and get the growing count or just fill in with NA values using rep.
library(tidyverse)
count_na <- function(x) {
r <- rle(is.na(x))
consec <- map2(r$lengths, r$values, ~ if (.y) seq(.x) else rep(NA, .x))
unlist(consec)
}
df %>%
mutate(expected_output = count_na(value))
#> # A tibble: 20 × 3
#> id value expected_output
#> <chr> <dbl> <int>
#> 1 A 1 NA
#> 2 A 2 NA
#> 3 A 3 NA
#> 4 A NA 1
#> 5 A NA 2
#> 6 A 1 NA
#> 7 A 2 NA
#> 8 A NA 1
#> 9 A NA 2
#> 10 A NA 3
#> 11 B 1 NA
#> 12 B NA 1
#> 13 B NA 2
#> 14 B NA 3
#> 15 B NA 4
#> 16 B 1 NA
#> 17 B 2 NA
#> 18 B 3 NA
#> 19 B NA 1
#> 20 B NA 2
Here is a solution using rle:
x <- rle(is.na(df$value))
df$new[is.na(df$value)] <- sequence(x$lengths[x$values])
# A tibble: 20 x 3
id value new
<chr> <dbl> <int>
1 A 1 NA
2 A 2 NA
3 A 3 NA
4 A NA 1
5 A NA 2
6 A 1 NA
7 A 2 NA
8 A NA 1
9 A NA 2
10 A NA 3
11 B 1 NA
12 B NA 1
13 B NA 2
14 B NA 3
15 B NA 4
16 B 1 NA
17 B 2 NA
18 B 3 NA
19 B NA 1
20 B NA 2
Yet another solution:
library(tidyverse)
df %>%
mutate(aux =data.table::rleid(value)) %>%
group_by(id, aux) %>%
mutate(eout = ifelse(is.na(value), row_number(), NA_real_)) %>%
ungroup %>% select(-aux)
#> # A tibble: 20 × 4
#> id value expected_output eout
#> <chr> <dbl> <int> <dbl>
#> 1 A 1 NA NA
#> 2 A 2 NA NA
#> 3 A 3 NA NA
#> 4 A NA 1 1
#> 5 A NA 2 2
#> 6 A 1 NA NA
#> 7 A 2 NA NA
#> 8 A NA 1 1
#> 9 A NA 2 2
#> 10 A NA 3 3
#> 11 B 1 NA NA
#> 12 B NA 1 1
#> 13 B NA 2 2
#> 14 B NA 3 3
#> 15 B NA 4 4
#> 16 B 1 NA NA
#> 17 B 2 NA NA
#> 18 B 3 NA NA
#> 19 B NA 1 1
#> 20 B NA 2 2

How to apply value in first row to all subsequent rows by participant using the tidyverse [duplicate]

This question already has answers here:
Replace NA with previous or next value, by group, using dplyr
(5 answers)
Replacing NAs with latest non-NA value
(21 answers)
Closed 2 years ago.
Just say we run a study where participants are measured on some outcome variable four times each. At the start of testing they provide their age and sex. Here is some toy data to illustrate.
set.seed(1)
sex <- NA
age <- NA
df <- data.frame(id = factor(rep(1:4,each=4)),
time = rep(1:4,times=4),
sex = as.vector(sapply(0:3, function(i) sex[i*4 + 1:4] <- c(sample(c("m", "f"), 1, replace = T), rep(NA,3)))),
age = as.vector(sapply(0:3, function(i) age[i*4 + 1:4] <- c(sample(18:75, 1, replace = T), rep(NA,3)))),
outcome = round(rnorm(16),2),
stringsAsFactors = F)
Here is what the data looks like
df
# output
# id time sex age outcome
# 1 1 m 29 0.33
# 1 2 <NA> NA -0.82
# 1 3 <NA> NA 0.49
# 1 4 <NA> NA 0.74
# 2 1 m 70 0.58
# 2 2 <NA> NA -0.31
# 2 3 <NA> NA 1.51
# 2 4 <NA> NA 0.39
# 3 1 f 72 -0.62
# 3 2 <NA> NA -2.21
# 3 3 <NA> NA 1.12
# 3 4 <NA> NA -0.04
# 4 1 f 56 -0.02
# 4 2 <NA> NA 0.94
# 4 3 <NA> NA 0.82
# 4 4 <NA> NA 0.59
Now what I want to do is to use the tidyverse to apply the values for the demographic variables, at present only on the first row of each participant's data, to all the rows.
At present all I could come up with was
df %>% group_by(id) %>% # group by id
distinct(sex) %>% # shrink to unique values for each id
dplyr::filter(!is.na(sex)) %>% # remove the NAs
left_join(df, by = "id")
Which yields the output
# A tibble: 16 x 6
# Groups: id [4]
# sex.x id time sex.y age outcome
# <chr> <fct> <int> <chr> <int> <dbl>
# 1 m 1 1 m 29 0.33
# 2 m 1 2 NA NA -0.82
# 3 m 1 3 NA NA 0.49
# 4 m 1 4 NA NA 0.74
# 5 m 2 1 m 70 0.580
# 6 m 2 2 NA NA -0.31
# 7 m 2 3 NA NA 1.51
# 8 m 2 4 NA NA 0.39
# 9 f 3 1 f 72 -0.62
# 10 f 3 2 NA NA -2.21
# 11 f 3 3 NA NA 1.12
# 12 f 3 4 NA NA -0.04
# 13 f 4 1 f 56 -0.02
# 14 f 4 2 NA NA 0.94
# 15 f 4 3 NA NA 0.82
# 16 f 4 4 NA NA 0.59
Now I would consider this partially successful because the first row in each participant's sex.x column has now been applied to all their other rows, but I really don't like that there are now two sex columns.
Now I could easily add some more functions to the chain that remove the superfluous sex.y column and rename the sex.x column to its original form, but this seems a bit clunky.
Can anyone suggest how to do this better?
You can fill the sex value for each id :
library(dplyr)
df %>% group_by(id) %>% tidyr::fill(sex)
# id time sex age outcome
# <fct> <int> <chr> <int> <dbl>
# 1 1 1 m 51 -1.54
# 2 1 2 m NA -0.93
# 3 1 3 m NA -0.290
# 4 1 4 m NA -0.01
# 5 2 1 f 40 2.4
# 6 2 2 f NA 0.76
# 7 2 3 f NA -0.8
# 8 2 4 f NA -1.15
# 9 3 1 m 60 -0.290
#10 3 2 m NA -0.3
#11 3 3 m NA -0.41
#12 3 4 m NA 0.25
#13 4 1 m 31 -0.89
#14 4 2 m NA 0.44
#15 4 3 m NA -1.24
#16 4 4 m NA -0.22
You could also fill age value.(df %>% group_by(id) %>% tidyr::fill(sex, age)).
PS - I get different numbers from the same seed value though.

How to gather every 4 columns, but variables may have different length

I have a data frame that is organized with 4 columns that repeats 145. The length of the columns are not the same.
I would like reshape the data frame so that I have just 4 columns. I also have to spread some data.
Here is an example of what my data looks like:
df<- data.frame(
id = c(rep("M",8), rep(NA, 2)),
day =c(rep(seq(1:4),2), rep(NA,2)),
parameter= c(rep("glu",4), rep("lac",4), rep(NA,2)),
value = c(rep(2,4), rep(0.5,4), rep(NA,2)),
id1 =c(rep("v",10)),
day1= c(rep(1,5), rep(2,3), rep(1,2)),
parameter1 = c(rep("glu", 8), rep("lac", 2)),
value1 = c(rep(2,8), rep(5,2)))
This is the result I want:
ideal.df<-data.frame(id =c(rep("M",10), rep("v", 10)),
day = c(rep(seq(1:4),2), rep(NA,2), rep(1,5), rep(2,3), rep(1,2)),
glu = c(rep(2,4), rep(NA,6), rep(1,8), rep(NA,2) ),
lac = c( rep(0.5,4), rep(NA,6),rep(5,2), rep(NA,8))
)
The expected output seems very strange. Below is some very ugly example code to generate a result that seems more logical,
but apparently isn’t what the OP wants. Could the OP please explain why the result should be as posted (ideal.df) rather
than what is shown here (df2)?
Given data:
df<- data.frame(
id = c(rep("M",8), rep(NA, 2)),
day =c(rep(seq(1:4),2), rep(NA,2)),
parameter= c(rep("glu",4), rep("lac",4), rep(NA,2)),
value = c(rep(2,4), rep(0.5,4), rep(NA,2)),
id1 =c(rep("v",10)),
day1= c(rep(1,5), rep(2,3), rep(1,2)),
parameter1 = c(rep("glu", 8), rep("lac", 2)),
value1 = c(rep(2,8), rep(5,2)))
df
#> id day parameter value id1 day1 parameter1 value1
#> 1 M 1 glu 2.0 v 1 glu 2
#> 2 M 2 glu 2.0 v 1 glu 2
#> 3 M 3 glu 2.0 v 1 glu 2
#> 4 M 4 glu 2.0 v 1 glu 2
#> 5 M 1 lac 0.5 v 1 glu 2
#> 6 M 2 lac 0.5 v 2 glu 2
#> 7 M 3 lac 0.5 v 2 glu 2
#> 8 M 4 lac 0.5 v 2 glu 2
#> 9 <NA> NA <NA> NA v 1 lac 5
#> 10 <NA> NA <NA> NA v 1 lac 5
Result of simple processing of the data:
df2 <- data.frame(id = c(df$id, df$id1), day = c(df$day, df$day1))
df2$glu <- c(ifelse(df$parameter=="glu",df$value,NA), ifelse(df$parameter1=="glu",df$value1,NA))
df2$lac <- c(ifelse(df$parameter=="lac",df$value,NA), ifelse(df$parameter1=="lac",df$value1,NA))
df2
#> id day glu lac
#> 1 1 1 2 NA
#> 2 1 2 2 NA
#> 3 1 3 2 NA
#> 4 1 4 2 NA
#> 5 1 1 NA 0.5
#> 6 1 2 NA 0.5
#> 7 1 3 NA 0.5
#> 8 1 4 NA 0.5
#> 9 NA NA NA NA
#> 10 NA NA NA NA
#> 11 1 1 2 NA
#> 12 1 1 2 NA
#> 13 1 1 2 NA
#> 14 1 1 2 NA
#> 15 1 1 2 NA
#> 16 1 2 2 NA
#> 17 1 2 2 NA
#> 18 1 2 2 NA
#> 19 1 1 NA 5.0
#> 20 1 1 NA 5.0
Desired result:
ideal.df<-data.frame(id =c(rep("M",10), rep("v", 10)),
day = c(rep(seq(1:4),2), rep(NA,2), rep(1,5), rep(2,3), rep(1,2)),
glu = c(rep(2,4), rep(NA,6), rep(1,8), rep(NA,2) ),
lac = c( rep(0.5,4), rep(NA,6),rep(5,2), rep(NA,8))
)
ideal.df
#> id day glu lac
#> 1 M 1 2 0.5
#> 2 M 2 2 0.5
#> 3 M 3 2 0.5
#> 4 M 4 2 0.5
#> 5 M 1 NA NA
#> 6 M 2 NA NA
#> 7 M 3 NA NA
#> 8 M 4 NA NA
#> 9 M NA NA NA
#> 10 M NA NA NA
#> 11 v 1 1 5.0
#> 12 v 1 1 5.0
#> 13 v 1 1 NA
#> 14 v 1 1 NA
#> 15 v 1 1 NA
#> 16 v 2 1 NA
#> 17 v 2 1 NA
#> 18 v 2 1 NA
#> 19 v 1 NA NA
#> 20 v 1 NA NA

R - Replace missing values with highest of 4 previous values

This is a variation of the last observation carried forward problem in a vector with some missing values. Instead of filling in NA values with the last non NA observation, I would like to fill in NA values with the highest value in the 4 observations preceding it. If all 4 observations preceding are also NA, the NA missing value should be retained. Would also appreciate it this can be done by groups in a data frame/data table.
Example:
Original DF:
ID Week Value
a 1 5
a 2 1
a 3 NA
a 4 NA
a 5 3
a 6 4
a 7 NA
b 1 NA
b 2 NA
b 3 NA
b 4 NA
b 5 NA
b 6 1
b 7 NA
Output DF:
ID Week Value
a 1 5
a 2 1
a 3 5
a 4 5
a 5 3
a 6 4
a 7 4
b 1 NA
b 2 NA
b 3 NA
b 4 NA
b 5 NA
b 6 1
b 7 1
lag shifts the column by n steps and lets you peek at previous values. pmax is element-wise maximum and lets to pick the highest value for each set/row of the observations.
To abstract away notion of 4 and maintain vectorized performance, you may use quasiquotes from rlang: http://dplyr.tidyverse.org/articles/programming.html#quasiquotation
It can look a little cryptic at first but is very precise and expressive.
df <- readr::read_table(
" ID Week Value
a 1 5
a 2 1
a 3 NA
a 4 NA
a 5 3
a 6 4
a 7 NA
b 1 NA
b 2 NA
b 3 NA
b 4 NA
b 5 NA
b 6 1
b 7 NA")
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
df %>%
group_by(ID) %>%
mutate(
Value = if_else(is.na(Value), pmax(lag(Value, 1), lag(Value, 2), lag(Value, 3), lag(Value, 4), na.rm = TRUE), Value)
)
#> # A tibble: 14 x 3
#> # Groups: ID [2]
#> ID Week Value
#> <chr> <int> <int>
#> 1 a 1 5
#> 2 a 2 1
#> 3 a 3 5
#> 4 a 4 5
#> 5 a 5 3
#> 6 a 6 4
#> 7 a 7 4
#> 8 b 1 NA
#> 9 b 2 NA
#> 10 b 3 NA
#> 11 b 4 NA
#> 12 b 5 NA
#> 13 b 6 1
#> 14 b 7 1
# or if you are an rlang ninja
library(purrr)
pmax_lag_n <- function(column, n) {
column <- enquo(column)
1:n %>%
map(~quo(lag(!!column, !!.x))) %>%
{ quo(pmax(!!!., na.rm = TRUE)) }
}
df %>%
group_by(ID) %>%
mutate(Value = if_else(is.na(Value), !!pmax_lag_n(Value, 4), Value))
#> # A tibble: 14 x 3
#> # Groups: ID [2]
#> ID Week Value
#> <chr> <int> <int>
#> 1 a 1 5
#> 2 a 2 1
#> 3 a 3 5
#> 4 a 4 5
#> 5 a 5 3
#> 6 a 6 4
#> 7 a 7 4
#> 8 b 1 NA
#> 9 b 2 NA
#> 10 b 3 NA
#> 11 b 4 NA
#> 12 b 5 NA
#> 13 b 6 1
#> 14 b 7 1
Define function Max which accepts a vector x and returns NA if all its elements are NA. Otherwise, if the last value is NA it returns the maximum of all non-NA elements and if the last value is not NA then it returns it.
Also define na.max which runs Max on a rolling window of length n (given by the second argument to na.max -- default 5).
Finally apply na.max to Value by ID using ave.
library(zoo)
Max <- function(x) {
last <- tail(x, 1)
if (all(is.na(x))) NA
else if (is.na(last)) max(x, na.rm = TRUE)
else last
}
na.max <- function(x, n = 5) rollapplyr(x, n, Max, partial = TRUE)
transform(DF, Value = ave(Value, ID, FUN = na.max))
giving:
ID Week Value
1 a 1 5
2 a 2 1
3 a 3 5
4 a 4 5
5 a 5 3
6 a 6 4
7 a 7 4
8 b 1 NA
9 b 2 NA
10 b 3 NA
11 b 4 NA
12 b 5 NA
13 b 6 1
14 b 7 1
Note: Input DF in reproducible form:
Lines <- "
ID Week Value
a 1 5
a 2 1
a 3 NA
a 4 NA
a 5 3
a 6 4
a 7 NA
b 1 NA
b 2 NA
b 3 NA
b 4 NA
b 5 NA
b 6 1
b 7 NA"
DF <- read.table(text = Lines, header = TRUE)

Resources