Filter data for all event - r

I have a dataset df like this, which is the data collected from individuals using a repeating instrument:
ID <- c('A1', 'A1', 'A1', 'A1', 'A2', 'A2', 'A2', 'A2', 'A2', 'A2', 'A3', 'A3', 'A3', 'A3', 'A4', 'A4', 'A4', 'A4', 'A4', 'A4', 'A4', 'A5', 'A5', 'A5', 'A5', 'A5', 'A5')
day_stat <- c(2, 1, 1, 2, 2, 2, 2, 1, 1, 1, 2, NA, NA, NA, NA, NA, 1, 1, 1, NA, NA, 1, 2, 2, 2, 1, NA)
adm_dat <- c(NA, NA, NA, NA, NA, NA, NA, '2020-10-12', NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, '2020-10-18', NA, NA)
adm_ever <- c(NA, NA, NA, 1, NA, NA, NA, NA, NA, 2, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA)
df <- data.frame(ID, day_stat, adm_dat, adm_ever)
I am trying to filter the data like this:
df1 = df %>% filter(day_stat==1 | adm_dat!= NA | adm_ever==1)
Current result (not wanted):
Desired Output:
If one of these filter conditions is true for an ID, then keep all event data of that ID.

To check for NA values use is.na and to select entire group use group_by :
library(dplyr)
df %>%
group_by(ID) %>%
filter(any(day_stat==1 | !is.na(adm_dat) | adm_ever==1))
# ID day_stat adm_dat adm_ever
# <chr> <dbl> <chr> <dbl>
# 1 A1 2 NA NA
# 2 A1 1 NA NA
# 3 A1 1 NA NA
# 4 A1 2 NA 1
# 5 A2 2 NA NA
# 6 A2 2 NA NA
# 7 A2 2 NA NA
# 8 A2 1 2020-10-12 NA
# 9 A2 1 NA NA
#10 A2 1 NA 2
# … with 13 more rows

We can use data.table
library(data.table)
setDT(df)[, .SD[any(day_stat==1 | !is.na(adm_dat) | adm_ever==1)], ID]

Related

reshaping multiple columns in R, based on name values

Df <- data.frame(prop1 = c(NA, NA, NA, "French", NA, NA,NA, "-29 to -20", NA, NA, NA, "Pop", NA, NA, NA, "French", "-29 to -20", "Pop"),
prop1_rank = c(NA, NA, NA, 0, NA, NA,NA, 11, NA, NA, NA, 1, NA, NA, NA, 40, 0, 2),
prop2 = c(NA, NA, NA, "Spanish", NA, NA,NA, "-19 to -10", NA, NA, NA, "Rock", NA, NA, NA, "Spanish", "-19 to -10", "Rock"),
prop2_rank = c(NA, NA, NA, 10, NA, NA,NA, 4, NA, NA, NA, 1, NA, NA, NA, 1, 0, 2),
initOSF1 = c(NA, NA, NA, NA, NA, "French", NA,NA,NA, "-29 to -20", NA, NA, NA, "Pop", NA, NA, NA, NA),
initOSF1_freq = c(NA, NA, NA, NA, NA, 66, NA,NA,NA, 0, NA, NA, NA, 14, NA, NA, NA, NA),
initOSF2 = c(NA, NA, NA, NA, NA, "Spanish", NA,NA,NA, "-19 to -10", NA, NA, NA, "Rock", NA, NA, NA, NA),
initOSF2_freq = c(NA, NA, NA, NA, NA, 0, NA,NA,NA, 6, NA, NA, NA, 14, NA, NA, NA, NA))
Df
I would like to organize this into
3 columns consisting: c("propositions", "ranks", "freqs"),
where,
Propositions column has the values: "French", "Spanish", "-29 to -20", "19 to -10", "Pop", "Rock", and having a separate columns for the rank values e.g., 0 for French, 10 for Spanish, etc., and frequency values e.g., 66 for French, 0 for Spanish, etc.
This is not an easy one. Probably a better solution exists:
library(tidyverse)
library(data.table)
setDT(Df) %>%
select(contains(c('prop', 'rank', 'freq'))) %>%
filter(!if_all(everything(), is.na)) %>%
melt(measure.vars = patterns(c('prop.$', 'rank$', 'freq'))) %>%
group_by(gr=cumsum(!is.na(value1)))%>%
summarise(across(-variable, ~if(length(.x)>1) na.omit(.x) else .x))
# A tibble: 12 x 4
gr value1 value2 value3
<int> <chr> <dbl> <dbl>
1 1 French 0 66
2 2 -29 to -20 11 0
3 3 Pop 1 14
4 4 French 40 NA
5 5 -29 to -20 0 NA
6 6 Pop 2 NA
7 7 Spanish 10 0
8 8 -19 to -10 4 6
9 9 Rock 1 14
10 10 Spanish 1 NA
11 11 -19 to -10 0 NA
12 12 Rock 2 NA

dplyr::starts_with and ends_with not subsetting based on arguments

I want to select a number of variables based on thier names to transform them. The variable names all start with inq and end with 7, 8, 10, 13:15. This is not working for me... Apologies if this is obvious, but I cannot get it to work. Am I using the wrong functions, putting my functions and arguments together wrong, or something else?
A reproducible example:
structure(list(inq1_1 = c(NA, 7, 5, 1, 1, 6, 5, 2, NA, NA), inq1_2 = c(NA,
7, 5, 1, 1, 6, 5, 5, NA, NA), inq1_3 = c(NA, 6, 4, 2, 1, 5, 2,
1, NA, NA), inq1_4 = c(NA, 6, 6, 1, 1, 6, 5, 1, NA, NA), inq1_5 = c(NA,
7, 3, 1, 1, 6, 2, 1, NA, NA), inq1_6 = c(NA, 7, 4, 4, 2, 7, 2,
4, NA, NA), inq1_7 = c(NA, 2, 4, 6, 7, 3, 1, 7, NA, NA), inq1_8 = c(NA,
1, NA, 2, 7, 2, 1, 4, NA, NA), inq1_9 = c(NA, 4, 6, 3, 1, 3,
7, 1, NA, NA), inq1_10 = c(NA, 3, 5, 7, 4, 4, 2, 7, NA, NA),
inq1_11 = c(NA, 5, 4, 7, 1, 6, 7, 6, NA, NA), inq1_12 = c(NA,
7, 5, 7, 4, 6, 7, 2, NA, NA), inq1_13 = c(NA, 3, 4, 6, 4,
3, 4, 4, NA, NA), inq1_14 = c(NA, 3, 2, 4, 4, 2, 1, 4, NA,
NA), inq1_15 = c(NA, 2, 2, 3, 5, 2, 4, 4, NA, NA), inqfinal_1 = c(5,
NA, 3, NA, NA, NA, NA, NA, NA, NA), inqfinal_2 = c(5, NA,
3, NA, NA, NA, NA, NA, NA, NA), inqfinal_3 = c(6, NA, 3,
NA, NA, NA, NA, NA, NA, NA), inqfinal_4 = c(5, NA, 3, NA,
NA, NA, NA, NA, NA, NA), inqfinal_5 = c(5, NA, 3, NA, NA,
NA, NA, NA, NA, NA), inqfinal_6 = c(6, NA, 3, NA, NA, NA,
NA, NA, NA, NA), inqfinal_7 = c(4, NA, 3, NA, NA, NA, NA,
NA, NA, NA), inqfinal_8 = c(2, NA, 3, NA, NA, NA, NA, NA,
NA, NA), inqfinal_9 = c(5, NA, 3, NA, NA, NA, NA, NA, NA,
NA), inqfinal_10 = c(4, NA, 3, NA, NA, NA, NA, NA, NA, NA
), inqfinal_11 = c(6, NA, 4, NA, NA, NA, NA, NA, NA, NA),
inqfinal_12 = c(6, NA, 4, NA, NA, NA, NA, NA, NA, NA), inqfinal_13 = c(4,
NA, 3, NA, NA, NA, NA, NA, NA, NA), inqfinal_14 = c(2, NA,
2, NA, NA, NA, NA, NA, NA, NA), inqfinal_15 = c(2, NA, 2,
NA, NA, NA, NA, NA, NA, NA)), row.names = c(NA, -10L), class = c("tbl_df",
"tbl", "data.frame"))
I am trying to become tidy and utilising dplyr as per the code below:
# select specific columns
sf_df %>% select(starts_with("inq"),
ends_with(7, 8, 10, 13:15)) %>% view(title = "test")
Alas, I get the following error:
Error in ends_with(7, 8, 10, 13:15) : unused argument (13:15)
14. .f(.x[[i]], ...)
13. map(.x[sel], .f, ...)
12. map_if(ind_list, is_helper, eval_tidy)
11. vars_select_eval(.vars, quos)
10. tidyselect::vars_select(names(.data), !!!quos(...))
9. select.data.frame(., starts_with("inq"), ends_with(7, 8, 10, 13:15))
8. select(., starts_with("inq"), ends_with(7, 8, 10, 13:15))
7. function_list[[i]](value)
6. freduce(value, `_function_list`)
5. `_fseq`(`_lhs`)
4. eval(quote(`_fseq`(`_lhs`)), env, env)
3. eval(quote(`_fseq`(`_lhs`)), env, env)
2. withVisible(eval(quote(`_fseq`(`_lhs`)), env, env))
1. sf_df %>% select(starts_with("inq"), ends_with(7, 8, 10, 13:15)) %>% view(title = "test")
Any help would be greatly appreciated! Thank you in advance.
Cheers,
Atanas.
A better option would be matches to match a regex pattern in the column name. Here, it matches the pattern 'ing' at the beginning (^) of the column name and numbers at the end ($) of the column name
sf_df %>%
select(matches('^inq.*(7|8|10|13|14|15)$'))
# A tibble: 10 x 12
# inq1_7 inq1_8 inq1_10 inq1_13 inq1_14 inq1_15 inqfinal_7 inqfinal_8 inqfinal_10 inqfinal_13 inqfinal_14 inqfinal_15
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 NA NA NA NA NA NA 4 2 4 4 2 2
# 2 2 1 3 3 3 2 NA NA NA NA NA NA
# 3 4 NA 5 4 2 2 3 3 3 3 2 2
# 4 6 2 7 6 4 3 NA NA NA NA NA NA
# 5 7 7 4 4 4 5 NA NA NA NA NA NA
# 6 3 2 4 3 2 2 NA NA NA NA NA NA
# 7 1 1 2 4 1 4 NA NA NA NA NA NA
# 8 7 4 7 4 4 4 NA NA NA NA NA NA
# 9 NA NA NA NA NA NA NA NA NA NA NA NA
#10 NA NA NA NA NA NA NA NA NA NA NA NA
Note that by using both starts_with and ends_with, the desired result may not be the expected one. The OP's dataset has 30 columns where all the column names start with 'inq'. So, with starts_with, it returns all columns, and adding ends_with, it is checking an OR match, e.g.
sf_df %>%
select(starts_with("inq"), ends_with("5")) %>%
ncol
#[1] 30 # returns 30 columns
It is not removing the columns that have no match for 5 at the string
It is not a behavior of the order of arguments as
sf_df %>%
select(ends_with("5"), starts_with("inq")) %>%
ncol
#[1] 30
Now, if we use only ends_with
sf_df %>%
select(ends_with("5")) %>%
ncol
#[1] 4
Based on the example, all columns starts with 'inq', so, ends_with alone would be sufficient for a single string match as the documentation for ?ends_with specifies
match - A string.
and not multiple strings
where the Usage is
starts_with(match, ignore.case = TRUE, vars = peek_vars())

r - merge rows in group while replacing NAs [duplicate]

This question already has answers here:
Collapsing rows where some are all NA, others are disjoint with some NAs
(5 answers)
Closed 4 years ago.
I was trying to find the answer to this but couldn't. If there is an answer I apologize and will immediately delete my question.
I'm trying to merge several rows into one (this calculation should be done separately on groups, in this case variable id can be used to group), so that no NA values are left.
# initial dataframe
df_start <- data.frame(
id = c("as", "as", "as", "as", "as", "bs", "bs", "bs", "bs", "bs"),
b = c(NA, NA, NA, NA, "A", NA, NA, 6, NA, NA),
c = c(2, NA, NA, NA, NA, 7, NA, NA, NA, NA),
d = c(NA, 4, NA, NA, NA, NA, 8, NA, NA, NA),
e = c(NA, NA, NA, 3, NA, NA, NA, NA, "B", NA),
f = c(NA, NA, 5, NA, NA, NA, NA, NA, NA, 10))
# desired output
df_end <- data.frame(id = c("as", "bs"),
b = c("A", 6),
c = c(2, 7),
d = c(4, 8),
e = c(3,"B"),
f = c(5, 10))
No need to delete the question, it may be helpful to some users. This summarises each group to the first non NA occurrence for each column.
library(dplyr)
df_start <- data.frame(
id = c("as", "as", "as", "as", "as", "bs", "bs", "bs", "bs", "bs"),
b = c(NA, NA, NA, NA, "A", NA, NA, 6, NA, NA),
c = c(2, NA, NA, NA, NA, 7, NA, NA, NA, NA),
d = c(NA, 4, NA, NA, NA, NA, 8, NA, NA, NA),
e = c(NA, NA, NA, 3, NA, NA, NA, NA, "B", NA),
f = c(NA, NA, 5, NA, NA, NA, NA, NA, NA, 10))
df_start %>%
group_by(id) %>%
summarise_all(list(~first(na.omit(.))))
Output:
# A tibble: 2 x 6
id b c d e f
<fct> <fct> <dbl> <dbl> <fct> <dbl>
1 as A 2. 4. 3 5.
2 bs 6 7. 8. B 10.
You will, of course, get some data lost if there is more than one occurrence of a value with each group for each column.
Hope this helps, Using dplyr
df_start <- sapply(df_start, as.character)
df_start[is.na(df_start)] <- " "
df_start <- as.data.frame(df_start)
library(dplyr)
df_start %>%
group_by(id) %>%
summarise_all(funs(trimws(paste(., collapse = '')))) -> df

How to delete rows that contain certain numbers

I'm working on a table that contains a lot of NAs and answers by numbering
and it looks like this
structure(list(ID = c(101, 102, 103, 104, 105, 106, 107, 108, 109, 110), a = c(NA, 9, NA, NA, NA, NA, NA, NA, NA, NA), b = c(NA, 10, 9, 9, NA, NA, 2, NA, NA,NA), c = c(NA, NA, NA, 9, 1, NA, NA, 4, 11, 9), d = c(NA, NA, NA, NA, 8, NA, NA, 7, 9, 9), e = c(NA, NA, NA, NA, 9, NA, NA, 8, NA, 9), f = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), g = c(NA, NA, NA, NA, NA, NA, NA, 9, NA, NA)), .Names = c("ID", "a", "b", "c", "d", "e", "f", "g"), row.names = c(NA, -10L), class = c("tbl_df", "tbl", "data.frame"))
And what I am trying to do is delete rows that only contains number 9.
In this case ID 103, 104, 110 would be the case. I want those 3 rows to be removed.
I tried the code below
df1[rowSums(df1[-1]==9)==0,]
But, with having NAs in the table, it only reproduces NA table.
Please help :( !
You can use apply to check for the whole row:
df1[apply(df1[,-1], 1, function(x) !all(na.omit(x) == 9) | all(is.na(x))), ]
# ID a b c d e f g
# 1 101 NA NA NA NA NA NA NA
# 2 102 9 10 NA NA NA NA NA
# 5 105 NA NA 1 8 9 NA NA
# 6 106 NA NA NA NA NA NA NA
# 7 107 NA 2 NA NA NA NA NA
# 8 108 NA NA 4 7 8 NA 9
# 9 109 NA NA 11 9 NA NA NA
I use na.omit to get rid of the NA-values in each row and then check if all the remaining values are equal to 9.
There's probably a much more efficient way, but the following works, I believe:
df1[!(apply(df1[-1] == 9, 1, prod, na.rm = TRUE) * !apply(is.na(df1[-1]), 1, prod)), ]
You can use the na.rm argument to ignore the NAs:
df1[rowSums(df1[-1]==9, na.rm = TRUE) == 0, ]
But also note that this code will only keep the rows that don't have any 9, which isn't exactly what you are asking for in the question.
edit after comment:
in that case simply flip:
df1[rowSums(df1[-1]!=9, na.rm = TRUE) > 0, ]

Conditionally replace elements of a vector based on an index

It's best explained with an example.
I have a vector, or column from data.frame named vec:
vec <- c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA)
I would like a vectorized process (not a for loop) to change the three trailing NA when a 1 is observed.
The end vector would be:
c(NA, NA, 1, 1, 1, 1, NA, 1, 1, 1, 1, NA, NA, NA)
If we had:
vec <- c(NA, NA, 1, NA, 1, NA, NA, 1, NA, NA, NA, NA, NA, NA)
The end vector would look like:
c(NA, NA, 1, 1, 1, 1, 1, 1, 1, 1, 1, NA, NA, NA)
A very badly written solution is:
vec2 <- vec
for(i in index(v)){
if(!is.na(v[i])) vec2[i] <- 1
if(i>3){
if(!is.na(vec[i-1])) vec2[i] <- 1
if(!is.na(vec[i-2])) vec2[i] <- 1
if(!is.na(vec[i-3])) vec2[i] <- 1
}
if(i==3){
if(!is.na(vec[i-1])) vec2[i] <- 1
if(!is.na(vec[i-2])) vec2[i] <- 1
}
if(i==2){
if(!is.na(vec[i-1])) vec2[i] <- 1
}
}
Another option:
`[<-`(vec,c(outer(which(vec==1),1:3,"+")),1)
# [1] NA NA 1 1 1 1 NA 1 1 1 1 NA NA NA
Although the above works with the examples, it stretches the length of vec if a 1 is found in the last positions. Better to make a simple check and wrap into a function:
threeNAs<-function(vec) {
ind<-c(outer(which(vec==1),1:3,"+"))
ind<-ind[ind<=length(vec)]
`[<-`(vec,ind,1)
}
Another fast solution:
vec[rep(which(vec == 1), each = 3) + c(1:3)] <- 1
which gives:
> vec
[1] NA NA 1 1 1 1 NA 1 1 1 1 NA NA NA
Benchmarking is only really useful when done on larger datasets. A benchmark with a 10k larger vector and the several posted solutions:
library(microbenchmark)
microbenchmark(ans.jaap = {vec <- rep(c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA),1e4);
vec[rep(which(vec == 1), each = 3) + c(1:3)] <- 1},
ans.989 = {vec <- rep(c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA),1e4);
r <- which(vec==1);
vec[c(mapply(seq, r, r+3))] <- 1},
ans.sotos = {vec <- rep(c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA),1e4);
vec[unique(as.vector(t(sapply(which(vec == 1), function(i) seq(i+1, length.out = 3)))))] <- 1},
ans.gregor = {vec <- rep(c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA),1e4);
vec[is.na(vec)] <- 0;
n <- length(vec);
vec <- vec + c(0, vec[1:(n-1)]) + c(0, 0, vec[1:(n-2)]) + c(0, 0, 0, vec[1:(n-3)]);
vec[vec == 0] <- NA},
ans.moody = {vec <- rep(c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA),1e4);
output <- sapply(1:length(vec),function(i){any(!is.na(vec[max(0,i-3):i]))});
output[output] <- 1;
output[output==0] <- NA},
ans.nicola = {vec <- rep(c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA),1e4);
`[<-`(vec,c(outer(which(vec==1),1:3,"+")),1)})
which gives the following benchmark:
Unit: microseconds
expr min lq mean median uq max neval cld
ans.jaap 1778.905 1937.414 3064.686 2100.595 2257.695 86233.593 100 a
ans.989 87688.166 89638.133 96992.231 90986.269 93326.393 182431.366 100 c
ans.sotos 125344.157 127968.113 132386.664 130117.438 132951.380 214460.174 100 d
ans.gregor 4036.642 5824.474 10861.373 6533.791 7654.587 87806.955 100 b
ans.moody 173146.810 178369.220 183698.670 180318.799 184000.062 264892.878 100 e
ans.nicola 966.927 1390.486 1723.395 1604.037 1904.695 3310.203 100 a
What really is 'vectorised', if not a loop written in a C-language?
Here's a C++ loop that benchmarks well.
vec <- c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA)
library(Rcpp)
cppFunction('NumericVector fixVec(NumericVector myVec){
int n = myVec.size();
int foundCount = 0;
for(int i = 0; i < n; i++){
if(myVec[i] == 1) foundCount = 1;
if(ISNA(myVec[i])){
if(foundCount >= 1 & foundCount <= 3){
myVec[i] = 1;
foundCount++;
}
}
}
return myVec;
}')
fixVec(vec)
# [1] NA NA 1 1 1 1 NA 1 1 1 1 NA NA NA
Benchmarks
library(microbenchmark)
microbenchmark(
ans.jaap = {
vec <- rep(c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA),1e4);
vec[rep(which(vec == 1), each = 4) + c(0:3)] <- 1
},
ans.nicola = {
vec <- rep(c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA),1e4);
`[<-`(vec,c(outer(which(vec==1),0:3,"+")),1)
},
ans.symbolix = {
vec <- rep(c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA),1e4);
vec <- fixVec(vec)
}
)
# Unit: microseconds
# expr min lq mean median uq max neval
# ans.jaap 2017.789 2264.318 2905.2437 2579.315 3588.4850 4667.249 100
# ans.nicola 1242.002 1626.704 3839.4768 2095.311 3066.4795 81299.962 100
# ans.symbolix 504.577 533.426 838.5661 718.275 966.9245 2354.373 100
vec <- rep(c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA),1e4)
vec <- fixVec(vec)
vec2 <- rep(c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA),1e4)
vec2[rep(which(vec2 == 1), each = 4) + c(0:3)] <- 1
identical(vec, vec2)
# [1] TRUE
The following code does what you asked for. It involves "shifting" the vector and then adding the shifted versions
vec[is.na(vec)] <- 0
n <- length(vec)
vec <- vec + c(0, vec[1:(n-1)]) + c(0, 0, vec[1:(n-2)]) + c(0, 0, 0, vec[1:(n-3)])
vec[vec == 0] <- NA
vec[vec != 0] <- 1
# vec | 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0 ,0, 0
# c(0, vec[1:(n-1)]) | + 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0 ,0, 0
# c(0, 0, vec[1:(n-2)]) | + 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0 ,0
# c(0,0,0,vec[1:(n-3)]) | + 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0
# |-------------------------------------------
# | 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0
A non-Vectorized solution, but nevertheless, another option using base R,
vec[unique(as.vector(t(sapply(which(vec == 1), function(i) seq(i+1, length.out = 3)))))] <- 1
vec
#[1] NA NA 1 1 1 1 NA 1 1 1 1 NA NA NA
vec1[unique(as.vector(t(sapply(which(vec1 == 1), function(i) seq(i+1, length.out = 3)))))] <- 1
vec1
#[1] NA NA 1 1 1 1 1 1 1 1 1 NA NA NA
How about this:
r <- which(vec==1)
vec[c(mapply(seq, r, r+3))] <- 1
Examples:
vec <- c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA)
#[1] NA NA 1 1 1 1 NA 1 1 1 1 NA NA NA
vec <- c(NA, NA, 1, NA, 1, NA, NA, 1, NA, NA, NA, NA, NA, NA)
#[1] NA NA 1 1 1 1 1 1 1 1 1 NA NA NA
With sapply, any, and is.na:
output <- sapply(1:length(vec),function(i){any(!is.na(vec[max(0,i-3):i]))})
output[output] <- 1
output[output==0] <- NA

Resources