How to gather every 4 columns, but variables may have different length - r

I have a data frame that is organized with 4 columns that repeats 145. The length of the columns are not the same.
I would like reshape the data frame so that I have just 4 columns. I also have to spread some data.
Here is an example of what my data looks like:
df<- data.frame(
id = c(rep("M",8), rep(NA, 2)),
day =c(rep(seq(1:4),2), rep(NA,2)),
parameter= c(rep("glu",4), rep("lac",4), rep(NA,2)),
value = c(rep(2,4), rep(0.5,4), rep(NA,2)),
id1 =c(rep("v",10)),
day1= c(rep(1,5), rep(2,3), rep(1,2)),
parameter1 = c(rep("glu", 8), rep("lac", 2)),
value1 = c(rep(2,8), rep(5,2)))
This is the result I want:
ideal.df<-data.frame(id =c(rep("M",10), rep("v", 10)),
day = c(rep(seq(1:4),2), rep(NA,2), rep(1,5), rep(2,3), rep(1,2)),
glu = c(rep(2,4), rep(NA,6), rep(1,8), rep(NA,2) ),
lac = c( rep(0.5,4), rep(NA,6),rep(5,2), rep(NA,8))
)

The expected output seems very strange. Below is some very ugly example code to generate a result that seems more logical,
but apparently isn’t what the OP wants. Could the OP please explain why the result should be as posted (ideal.df) rather
than what is shown here (df2)?
Given data:
df<- data.frame(
id = c(rep("M",8), rep(NA, 2)),
day =c(rep(seq(1:4),2), rep(NA,2)),
parameter= c(rep("glu",4), rep("lac",4), rep(NA,2)),
value = c(rep(2,4), rep(0.5,4), rep(NA,2)),
id1 =c(rep("v",10)),
day1= c(rep(1,5), rep(2,3), rep(1,2)),
parameter1 = c(rep("glu", 8), rep("lac", 2)),
value1 = c(rep(2,8), rep(5,2)))
df
#> id day parameter value id1 day1 parameter1 value1
#> 1 M 1 glu 2.0 v 1 glu 2
#> 2 M 2 glu 2.0 v 1 glu 2
#> 3 M 3 glu 2.0 v 1 glu 2
#> 4 M 4 glu 2.0 v 1 glu 2
#> 5 M 1 lac 0.5 v 1 glu 2
#> 6 M 2 lac 0.5 v 2 glu 2
#> 7 M 3 lac 0.5 v 2 glu 2
#> 8 M 4 lac 0.5 v 2 glu 2
#> 9 <NA> NA <NA> NA v 1 lac 5
#> 10 <NA> NA <NA> NA v 1 lac 5
Result of simple processing of the data:
df2 <- data.frame(id = c(df$id, df$id1), day = c(df$day, df$day1))
df2$glu <- c(ifelse(df$parameter=="glu",df$value,NA), ifelse(df$parameter1=="glu",df$value1,NA))
df2$lac <- c(ifelse(df$parameter=="lac",df$value,NA), ifelse(df$parameter1=="lac",df$value1,NA))
df2
#> id day glu lac
#> 1 1 1 2 NA
#> 2 1 2 2 NA
#> 3 1 3 2 NA
#> 4 1 4 2 NA
#> 5 1 1 NA 0.5
#> 6 1 2 NA 0.5
#> 7 1 3 NA 0.5
#> 8 1 4 NA 0.5
#> 9 NA NA NA NA
#> 10 NA NA NA NA
#> 11 1 1 2 NA
#> 12 1 1 2 NA
#> 13 1 1 2 NA
#> 14 1 1 2 NA
#> 15 1 1 2 NA
#> 16 1 2 2 NA
#> 17 1 2 2 NA
#> 18 1 2 2 NA
#> 19 1 1 NA 5.0
#> 20 1 1 NA 5.0
Desired result:
ideal.df<-data.frame(id =c(rep("M",10), rep("v", 10)),
day = c(rep(seq(1:4),2), rep(NA,2), rep(1,5), rep(2,3), rep(1,2)),
glu = c(rep(2,4), rep(NA,6), rep(1,8), rep(NA,2) ),
lac = c( rep(0.5,4), rep(NA,6),rep(5,2), rep(NA,8))
)
ideal.df
#> id day glu lac
#> 1 M 1 2 0.5
#> 2 M 2 2 0.5
#> 3 M 3 2 0.5
#> 4 M 4 2 0.5
#> 5 M 1 NA NA
#> 6 M 2 NA NA
#> 7 M 3 NA NA
#> 8 M 4 NA NA
#> 9 M NA NA NA
#> 10 M NA NA NA
#> 11 v 1 1 5.0
#> 12 v 1 1 5.0
#> 13 v 1 1 NA
#> 14 v 1 1 NA
#> 15 v 1 1 NA
#> 16 v 2 1 NA
#> 17 v 2 1 NA
#> 18 v 2 1 NA
#> 19 v 1 NA NA
#> 20 v 1 NA NA

Related

How to Filter by group and move all values to new column if any value in any of the affected columns is greater than 5 in R

I have a Datafaame like this:
dt <- tibble(
TRIAL = c("A", "A", "A", "B", "B", "B", "C", "C", "C","D","D","D"),
RL = c(1, NA, 3, 1, 6, 3, 2, 3, 1, 0, 1.5, NA),
SL = c(6, 1.5, 1, 0, 0, 1, 1, 2, 0, 1, 1.5, NA),
HC = c(0, 1, 5, 6,7, 8, 9, 3, 4, 5, 4, 2)
)
# A tibble: 12 x 4
TRIAL RL SL HC
<chr> <dbl> <dbl> <dbl>
1 A 1 6 0
2 A NA 1.5 1
3 A 3 1 5
4 B 1 0 6
5 B 6 0 7
6 B 3 1 8
7 C 2 1 9
8 C 3 2 3
9 C 1 0 4
10 D 0 1 5
11 D 1.5 1.5 4
12 D NA NA 2
I want to group the data frame by TRIAL and have the values in RL and SL checked by group, if the value in either of the column is greater than 5 then move all values for RL and SL for that particular group to RLCT and SLCT respectively.
# A tibble: 12 x 6
TRIAL HC RLCT SLCT SL RL
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 A 0 1 6 NA NA
2 A 1 NA 1.5 NA NA
3 A 5 3 1 NA NA
4 B 6 1 0 NA NA
5 B 7 6 0 NA NA
6 B 8 3 1 NA NA
7 C 9 NA NA 1 3
8 C 3 NA NA 3 5
9 C 4 NA NA 1 1
10 D 5 NA NA 1 0
11 D 4 NA NA 1.5 1.5
12 D 2 NA NA NA NA
When I run the below code, I did not get the expected output
dt0 <- dt %>%
mutate(RLCT = NA,
SLCT = NA) %>%
group_by(TRIAL) %>%
filter(!any(RL > 5.0 | SL > 5.0))
dt1 <- dt %>%
group_by(TRIAL) %>%
filter(any(RL > 5.0 | SL > 5.0)) %>%
mutate(RLCT = RL,
SLCT = SL) %>%
rbind(dt0, .) %>%
mutate(RL = ifelse(!is.na(RLCT), NA, RL),
SL = ifelse(!is.na(SLCT), NA, SL)) %>% arrange(TRIAL)
This is what I get
# A tibble: 9 x 6
# Groups: TRIAL [3]
TRIAL RL SL HC RLCT SLCT
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 A NA NA 0 1 6
2 A NA NA 1 NA 1.5
3 A NA NA 5 3 1
4 B NA NA 6 1 0
5 B NA NA 7 6 0
6 B NA NA 8 3 1
7 C 2 1 9 NA NA
8 C 3 2 3 NA NA
9 C 1 0 4 NA NA
You can define a column to storage the condition, and change RL and SL with ifelse inside across.
dt %>%
group_by(TRIAL) %>%
mutate(cond = any(RL > 5.0 | SL > 5.0, na.rm = TRUE),
across(c(RL, SL), ~ ifelse(cond, ., NA), .names = "{.col}CT"),
across(c(RL, SL), ~ ifelse(!cond, ., NA)),
cond = NULL)
Result:
# A tibble: 12 x 6
# Groups: TRIAL [4]
TRIAL RL SL HC RLCT SLCT
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 A NA NA 0 1 6
2 A NA NA 1 NA 1.5
3 A NA NA 5 3 1
4 B NA NA 6 1 0
5 B NA NA 7 6 0
6 B NA NA 8 3 1
7 C 2 1 9 NA NA
8 C 3 2 3 NA NA
9 C 1 0 4 NA NA
10 D 0 1 5 NA NA
11 D 1.5 1.5 4 NA NA
12 D NA NA 2 NA NA
With dplyr, you could use group_modify():
library(dplyr)
dt %>%
group_by(TRIAL) %>%
group_modify(~ {
if(any(select(.x, c(RL, SL)) > 5, na.rm = TRUE)) {
rename_with(.x, ~ paste0(.x, 'CT'), c(RL, SL))
} else {
.x
}
})
Output
# A tibble: 12 × 6
# Groups: TRIAL [4]
TRIAL RLCT SLCT HC RL SL
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 A 1 6 0 NA NA
2 A NA 1.5 1 NA NA
3 A 3 1 5 NA NA
4 B 1 0 6 NA NA
5 B 6 0 7 NA NA
6 B 3 1 8 NA NA
7 C NA NA 9 2 1
8 C NA NA 3 3 2
9 C NA NA 4 1 0
10 D NA NA 5 0 1
11 D NA NA 4 1.5 1.5
12 D NA NA 2 NA NA

complete dplyr with missing ID

I understand how to use complete from tidyverse tidyverse complete a dataframe
In the example they give:
df <- tibble(
group = c(1:2, 1, 2),
item_id = c(1:2, 2, 3),
item_name = c("a", "a", "b", "b"),
value1 = c(1, NA, 3, 4),
value2 = 4:7
)
df
#> # A tibble: 4 × 5
#> group item_id item_name value1 value2
#> <dbl> <dbl> <chr> <dbl> <int>
#> 1 1 1 a 1 4
#> 2 2 2 a NA 5
#> 3 1 2 b 3 6
#> 4 2 3 b 4 7
Is there a way of adding a group and completing? e.g. add a group 3 and complete the table.
For example, I have a df which I populate in a for loop to make a plot. The df is like so:
variant Location Position variable value protein Mutation.type
FANCI_L605F FANCI chr15:89828441_C/T B 0.45 L605F nonsynonymous_SNV
PLCG2_R953* PLCG2 chr16:81969788_C/T B 0.87 R953* stopgain
STAT3_R278C STAT3 chr17:40486033_G/A B 0.38 R278C nonsynonymous_SN
FANCI_L605F FANCI chr15:89828441_C/T C 0.45 L605F nonsynonymous_SNV
PLCG2_R953* PLCG2 chr16:81969788_C/T C 0.87 R953* stopgain
STAT3_R278C STAT3 chr17:40486033_G/A C 0.38 R278C nonsynonymous_SNV
I also have a vector of possible variable names:
all_var<-c("A","B","C")
I have worked out how to add any missing variables (I think):
new_df<-complete(df,variable=all_var,Position)
>new_df
variant Location Position variable value protein Mutation.type
NA NA chr15:89828441_C/T A NA NA NA
NA NA chr16:81969788_C/T A NA NA NA
NA NA chr17:40486033_G/A A NA NA NA
FANCI_L605F FANCI chr15:89828441_C/T B 0.45 L605F nonsynonymous_SNV
PLCG2_R953* PLCG2 chr16:81969788_C/T B 0.87 R953* stopgain
STAT3_R278C STAT3 chr17:40486033_G/A B 0.38 R278C nonsynonymous_SN
FANCI_L605F FANCI chr15:89828441_C/T C 0.45 L605F nonsynonymous_SNV
PLCG2_R953* PLCG2 chr16:81969788_C/T C 0.87 R953* stopgain
STAT3_R278C STAT3 chr17:40486033_G/A C 0.38 R278C nonsynonymous_SNV
How do I now complete the variant,Location, protein, Mutation.Type?
You can use add a row by specifying the group and use complete() to complete the combinations, i.e.
library(dplyr)
library(tidyr)
df %>%
add_row(group = 3) %>%
complete(group, nesting(item_id, item_name)) %>%
drop_na(item_id)
# A tibble: 12 x 5
group item_id item_name value1 value2
<dbl> <dbl> <chr> <dbl> <int>
1 1 1 a 1 4
2 1 2 a NA NA
3 1 2 b 3 6
4 1 3 b NA NA
5 2 1 a NA NA
6 2 2 a NA 5
7 2 2 b NA NA
8 2 3 b 4 7
9 3 1 a NA NA
10 3 2 a NA NA
11 3 2 b NA NA
12 3 3 b NA NA

Vectorialised column addition [duplicate]

This question already has answers here:
How can I automatically create n lags in a timeseries?
(3 answers)
Closed 1 year ago.
Given this tibble:
tibble(x = c(1:9))
I want to add a column x_lag_1 = c(NA,1:8), a column x_lag_2 = c(NA,NA,1:7), etc.
Up to x_lag_n.
This can be quick with data.table:
library(data.table)
n <- seq(4)
setDT(df)[, paste0('x_lag_', n) := shift(x, n)]
df
x x_lag_1 x_lag_2 x_lag_3 x_lag_4
1: 1 NA NA NA NA
2: 2 1 NA NA NA
3: 3 2 1 NA NA
4: 4 3 2 1 NA
5: 5 4 3 2 1
6: 6 5 4 3 2
7: 7 6 5 4 3
8: 8 7 6 5 4
9: 9 8 7 6 5
You may use map_dfc to add n new columns.
library(dplyr)
library(purrr)
df <- tibble(x = c(1:9))
n <- 3
bind_cols(df, map_dfc(seq_len(n), ~df %>%
transmute(!!paste0('x_lag', .x) := lag(x, .x))))
# x x_lag1 x_lag2 x_lag3
# <int> <int> <int> <int>
#1 1 NA NA NA
#2 2 1 NA NA
#3 3 2 1 NA
#4 4 3 2 1
#5 5 4 3 2
#6 6 5 4 3
#7 7 6 5 4
#8 8 7 6 5
#9 9 8 7 6
Edit 2: Reworked the answer to contemplate the case of a grouped df.
library(tidyverse)
set.seed(123)
df <- tibble(group = sample(letters[1:3], 30, replace = TRUE), x = c(1:30))
formulas <- seq(3, 12, 3) %>%
map(~ as.formula(str_glue("~lag(.,n={.x})"))) %>%
set_names(str_c("lag", seq(3, 12, 3)))
df %>%
summarise(x, across(x, lst(!!!formulas)))
#> # A tibble: 30 × 5
#> x x_lag3 x_lag6 x_lag9 x_lag12
#> <int> <int> <int> <int> <int>
#> 1 1 NA NA NA NA
#> 2 2 NA NA NA NA
#> 3 3 NA NA NA NA
#> 4 4 1 NA NA NA
#> 5 5 2 NA NA NA
#> 6 6 3 NA NA NA
#> 7 7 4 1 NA NA
#> 8 8 5 2 NA NA
#> 9 9 6 3 NA NA
#> 10 10 7 4 1 NA
#> # … with 20 more rows
df %>%
group_by(group) %>%
summarise(x, across(x, lst(!!!formulas)), .groups = "drop")
#> # A tibble: 30 × 6
#> group x x_lag3 x_lag6 x_lag9 x_lag12
#> <chr> <int> <int> <int> <int> <int>
#> 1 a 10 NA NA NA NA
#> 2 a 13 NA NA NA NA
#> 3 a 16 NA NA NA NA
#> 4 a 19 10 NA NA NA
#> 5 a 20 13 NA NA NA
#> 6 a 21 16 NA NA NA
#> 7 a 22 19 10 NA NA
#> 8 a 27 20 13 NA NA
#> 9 b 4 NA NA NA NA
#> 10 b 6 NA NA NA NA
#> # … with 20 more rows
Created on 2021-12-30 by the reprex package (v2.0.1)

Group_by id and count the consective NA's and then restart counting when a new series of NA's is encountered

I have a dataframe like this:
df <- data_frame(id = c(rep('A', 10), rep('B', 10)),
value = c(1:3, rep(NA, 2), 1:2, rep(NA, 3), 1, rep(NA, 4), 1:3, rep(NA, 2)))
I need to count the number of consective NA's in the value column. The count needs to be grouped by ID, and it needs to restart at 1 every time a new NA or new series of NA's is encountered. The exptected output should look like this:
df$expected_output <- c(rep(NA, 3), 1:2, rep(NA, 2), 1:3, NA, 1:4, rep(NA, 3), 1:2)
If anyone can give me a dplyr solution that would also be great :)
I've tried a few things but nothing is giving any sort of sensical result. Thanks in advance^!
A solution using dplyr and data.table.
library(dplyr)
library(data.table)
df2 <- df %>%
group_by(id) %>%
mutate(info = rleid(value)) %>%
group_by(id, info) %>%
mutate(expected_output = row_number()) %>%
ungroup() %>%
mutate(expected_output = ifelse(!is.na(value), NA, expected_output)) %>%
select(-info)
df2
# # A tibble: 20 x 3
# id value expected_output
# <chr> <dbl> <int>
# 1 A 1 NA
# 2 A 2 NA
# 3 A 3 NA
# 4 A NA 1
# 5 A NA 2
# 6 A 1 NA
# 7 A 2 NA
# 8 A NA 1
# 9 A NA 2
# 10 A NA 3
# 11 B 1 NA
# 12 B NA 1
# 13 B NA 2
# 14 B NA 3
# 15 B NA 4
# 16 B 1 NA
# 17 B 2 NA
# 18 B 3 NA
# 19 B NA 1
# 20 B NA 2
We can use rle to get length of groups that are or are not na, and use purrr::map2 to apply seq if they are NA and get the growing count or just fill in with NA values using rep.
library(tidyverse)
count_na <- function(x) {
r <- rle(is.na(x))
consec <- map2(r$lengths, r$values, ~ if (.y) seq(.x) else rep(NA, .x))
unlist(consec)
}
df %>%
mutate(expected_output = count_na(value))
#> # A tibble: 20 × 3
#> id value expected_output
#> <chr> <dbl> <int>
#> 1 A 1 NA
#> 2 A 2 NA
#> 3 A 3 NA
#> 4 A NA 1
#> 5 A NA 2
#> 6 A 1 NA
#> 7 A 2 NA
#> 8 A NA 1
#> 9 A NA 2
#> 10 A NA 3
#> 11 B 1 NA
#> 12 B NA 1
#> 13 B NA 2
#> 14 B NA 3
#> 15 B NA 4
#> 16 B 1 NA
#> 17 B 2 NA
#> 18 B 3 NA
#> 19 B NA 1
#> 20 B NA 2
Here is a solution using rle:
x <- rle(is.na(df$value))
df$new[is.na(df$value)] <- sequence(x$lengths[x$values])
# A tibble: 20 x 3
id value new
<chr> <dbl> <int>
1 A 1 NA
2 A 2 NA
3 A 3 NA
4 A NA 1
5 A NA 2
6 A 1 NA
7 A 2 NA
8 A NA 1
9 A NA 2
10 A NA 3
11 B 1 NA
12 B NA 1
13 B NA 2
14 B NA 3
15 B NA 4
16 B 1 NA
17 B 2 NA
18 B 3 NA
19 B NA 1
20 B NA 2
Yet another solution:
library(tidyverse)
df %>%
mutate(aux =data.table::rleid(value)) %>%
group_by(id, aux) %>%
mutate(eout = ifelse(is.na(value), row_number(), NA_real_)) %>%
ungroup %>% select(-aux)
#> # A tibble: 20 × 4
#> id value expected_output eout
#> <chr> <dbl> <int> <dbl>
#> 1 A 1 NA NA
#> 2 A 2 NA NA
#> 3 A 3 NA NA
#> 4 A NA 1 1
#> 5 A NA 2 2
#> 6 A 1 NA NA
#> 7 A 2 NA NA
#> 8 A NA 1 1
#> 9 A NA 2 2
#> 10 A NA 3 3
#> 11 B 1 NA NA
#> 12 B NA 1 1
#> 13 B NA 2 2
#> 14 B NA 3 3
#> 15 B NA 4 4
#> 16 B 1 NA NA
#> 17 B 2 NA NA
#> 18 B 3 NA NA
#> 19 B NA 1 1
#> 20 B NA 2 2

R - Replace missing values with highest of 4 previous values

This is a variation of the last observation carried forward problem in a vector with some missing values. Instead of filling in NA values with the last non NA observation, I would like to fill in NA values with the highest value in the 4 observations preceding it. If all 4 observations preceding are also NA, the NA missing value should be retained. Would also appreciate it this can be done by groups in a data frame/data table.
Example:
Original DF:
ID Week Value
a 1 5
a 2 1
a 3 NA
a 4 NA
a 5 3
a 6 4
a 7 NA
b 1 NA
b 2 NA
b 3 NA
b 4 NA
b 5 NA
b 6 1
b 7 NA
Output DF:
ID Week Value
a 1 5
a 2 1
a 3 5
a 4 5
a 5 3
a 6 4
a 7 4
b 1 NA
b 2 NA
b 3 NA
b 4 NA
b 5 NA
b 6 1
b 7 1
lag shifts the column by n steps and lets you peek at previous values. pmax is element-wise maximum and lets to pick the highest value for each set/row of the observations.
To abstract away notion of 4 and maintain vectorized performance, you may use quasiquotes from rlang: http://dplyr.tidyverse.org/articles/programming.html#quasiquotation
It can look a little cryptic at first but is very precise and expressive.
df <- readr::read_table(
" ID Week Value
a 1 5
a 2 1
a 3 NA
a 4 NA
a 5 3
a 6 4
a 7 NA
b 1 NA
b 2 NA
b 3 NA
b 4 NA
b 5 NA
b 6 1
b 7 NA")
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
df %>%
group_by(ID) %>%
mutate(
Value = if_else(is.na(Value), pmax(lag(Value, 1), lag(Value, 2), lag(Value, 3), lag(Value, 4), na.rm = TRUE), Value)
)
#> # A tibble: 14 x 3
#> # Groups: ID [2]
#> ID Week Value
#> <chr> <int> <int>
#> 1 a 1 5
#> 2 a 2 1
#> 3 a 3 5
#> 4 a 4 5
#> 5 a 5 3
#> 6 a 6 4
#> 7 a 7 4
#> 8 b 1 NA
#> 9 b 2 NA
#> 10 b 3 NA
#> 11 b 4 NA
#> 12 b 5 NA
#> 13 b 6 1
#> 14 b 7 1
# or if you are an rlang ninja
library(purrr)
pmax_lag_n <- function(column, n) {
column <- enquo(column)
1:n %>%
map(~quo(lag(!!column, !!.x))) %>%
{ quo(pmax(!!!., na.rm = TRUE)) }
}
df %>%
group_by(ID) %>%
mutate(Value = if_else(is.na(Value), !!pmax_lag_n(Value, 4), Value))
#> # A tibble: 14 x 3
#> # Groups: ID [2]
#> ID Week Value
#> <chr> <int> <int>
#> 1 a 1 5
#> 2 a 2 1
#> 3 a 3 5
#> 4 a 4 5
#> 5 a 5 3
#> 6 a 6 4
#> 7 a 7 4
#> 8 b 1 NA
#> 9 b 2 NA
#> 10 b 3 NA
#> 11 b 4 NA
#> 12 b 5 NA
#> 13 b 6 1
#> 14 b 7 1
Define function Max which accepts a vector x and returns NA if all its elements are NA. Otherwise, if the last value is NA it returns the maximum of all non-NA elements and if the last value is not NA then it returns it.
Also define na.max which runs Max on a rolling window of length n (given by the second argument to na.max -- default 5).
Finally apply na.max to Value by ID using ave.
library(zoo)
Max <- function(x) {
last <- tail(x, 1)
if (all(is.na(x))) NA
else if (is.na(last)) max(x, na.rm = TRUE)
else last
}
na.max <- function(x, n = 5) rollapplyr(x, n, Max, partial = TRUE)
transform(DF, Value = ave(Value, ID, FUN = na.max))
giving:
ID Week Value
1 a 1 5
2 a 2 1
3 a 3 5
4 a 4 5
5 a 5 3
6 a 6 4
7 a 7 4
8 b 1 NA
9 b 2 NA
10 b 3 NA
11 b 4 NA
12 b 5 NA
13 b 6 1
14 b 7 1
Note: Input DF in reproducible form:
Lines <- "
ID Week Value
a 1 5
a 2 1
a 3 NA
a 4 NA
a 5 3
a 6 4
a 7 NA
b 1 NA
b 2 NA
b 3 NA
b 4 NA
b 5 NA
b 6 1
b 7 NA"
DF <- read.table(text = Lines, header = TRUE)

Resources