I have an R tibble called my_data which is composed of either (1) zeros, or (2) numbers between zero and one:
> my_data
# A tibble: 30 x 40
s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 s12 s13
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0 0 0 0 0 0 0.969 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0 0 0 0 0
5 0 0 0 0 0 0 0 0 0 0 0 0 0
6 0 0 0 0 0 0 0 0 0 0 0 0 0
7 0 0 0 0 0 0 0 0 0 0 0 0 0
8 0 0 0 0 0 0 0 0 0 0 0 0 0
9 0 0 0 0 0 0 0 0 0 0 0 0 0
10 0 0 0 0 0 0 0 0 0 0 0 0 0
# ... with 20 more rows, and 27 more variables: s14 <dbl>, s15 <dbl>, s16 <dbl>,
# s17 <dbl>, s18 <dbl>, s19 <dbl>, s20 <dbl>, s21 <dbl>, s22 <dbl>, s23 <dbl>,
# s24 <dbl>, s25 <dbl>, s26 <dbl>, s27 <dbl>, s28 <dbl>, s29 <dbl>, s30 <dbl>,
# s31 <dbl>, s32 <dbl>, s33 <dbl>, s34 <dbl>, s35 <dbl>, s36 <dbl>, s37 <dbl>,
# s38 <dbl>, s39 <dbl>, s40 <dbl>
I want to replace all the non-zero numbers in my_data (such as 0.969 in column s7) with 1s at a certain probability where the numbers themselves are the probabilities, otherwise they get replaced with 0s. For example, there is a 0.969 probability that the number 0.969 (in the column named s7) will be replaced by 1, and a 0.031 probability that it will be replaced by 0.
I tried this but it doesn't work:
# Doesn't work:
my_data %>%
mutate_all(function(x) {
case_when(x == 0 ~ 0,
x > 0 ~ sample(0:1, size = 1, prob = c(1 - x, x)))
})
How do I do this? Should I be using purrr::map() (how?) or something else? Thank you!
Here is the dput() of my_data:
structure(list(s1 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), s2 = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0.956159271283707, 0), s3 = c(0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.982878394164842,
0, 0, 0, 0, 0.982878394164842), s4 = c(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0.959674748019852, 0, 0, 0, 0, 0, 0, 0,
0, 0.959674748019852, 0, 0, 0, 0, 0), s5 = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.876892497722416,
0, 0, 0, 0, 0, 0.876892497722416, 0, 0), s6 = c(0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0.989641778880238, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0.989641778880238, 0, 0, 0, 0, 0), s7 = c(0.969355168732184,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.969355168732184,
0, 0, 0, 0, 0, 0, 0, 0), s8 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0.991517098892877, 0, 0, 0, 0, 0, 0, 0, 0, 0.991517098892877,
0, 0, 0, 0.991517098892877, 0.991517098892877), s9 = c(0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.929770500656618, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.929770500656618, 0, 0), s10 = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), s11 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.993637560789263,
0), s12 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0.949340969426271, 0, 0, 0.949340969426271,
0, 0), s13 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.919919910704918, 0), s14 = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.937896138681889,
0, 0, 0.937896138681889, 0, 0, 0, 0, 0), s15 = c(0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0.991517098892877, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.991517098892877), s16 = c(0.956159271283707,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.956159271283707, 0, 0,
0, 0, 0, 0, 0, 0.956159271283707, 0, 0, 0, 0, 0, 0, 0.956159271283707,
0.956159271283707), s17 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0.597187792371775, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0.597187792371775, 0), s18 = c(0.975209130375021, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.975209130375021, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.975209130375021), s19 = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), s20 = c(0.937234650859115, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.937234650859115, 0, 0, 0,
0, 0, 0.937234650859115, 0, 0, 0, 0, 0, 0, 0, 0), s21 = c(0.929770500656618,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.929770500656618,
0, 0, 0, 0, 0, 0.929770500656618, 0, 0, 0, 0, 0, 0, 0.929770500656618,
0), s22 = c(0.929770500656618, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.929770500656618, 0, 0, 0, 0,
0, 0, 0, 0.929770500656618), s23 = c(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0.921445826350068), s24 = c(0.919919910704918, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0), s25 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.993637560789263, 0),
s26 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.942968974602997, 0.942968974602997
), s27 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.959674748019852,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), s28 = c(0.999498946154851,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), s29 = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0.988925875658174, 0), s30 = c(0, 0.975209130375021,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0.975209130375021, 0), s31 = c(0.986350500013957,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.986350500013957, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.986350500013957
), s32 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.942968974602997,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.942968974602997),
s33 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.927760110879459,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), s34 = c(0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.919919910704918,
0, 0, 0, 0, 0, 0, 0, 0, 0.919919910704918, 0, 0, 0.919919910704918,
0, 0), s35 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.956159271283707,
0, 0, 0, 0, 0, 0, 0, 0.956159271283707, 0, 0, 0, 0, 0, 0,
0.956159271283707, 0), s36 = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0.991517098892877, 0, 0.991517098892877, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0.991517098892877, 0.991517098892877,
0), s37 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0.919919910704918, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.919919910704918,
0), s38 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), s39 = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0.999972102622884, 0, 0, 0, 0, 0), s40 = c(0.942968974602997,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0.942968974602997, 0, 0, 0, 0, 0, 0, 0, 0)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -30L))
You are trying to sample from a binomial distribution. Fortunately rbinom is vectorized regarding its prob parameter and you can avoid any R loops (for, apply, Vectorize, etc.).
m <- as.matrix(DF)
set.seed(42) #for reproducibility
m[m != 0] <- rbinom(sum(m != 0), 1, m[m != 0])
You could try :
library(tidyverse)
as_tibble(apply(df, c(1,2), function(x) sample(c(0,1),1,prob=c(1-x,x))))
It's usually discouraged to convert from matrix to data.frame but here it seems you really have a matrix formatted as a data.frame so I went for it.
To avoid the conversion you could do:
df %>% mutate_all(~ map_dbl(.,~sample(c(0,1),1,prob=c(1-.x,.x))))
The following will test the value before sampling, but I'm not sure if it'll be much faster or any faster:
df %>% mutate_all(~ map_if(.,~. != 0, ~sample(c(0,1),1,prob=c(1-.x,.x))) %>% unlist)
I would use runif:
df %>%
map_df(~ if_else(runif(length(.x)) < .x, 1, 0))
If you really want to use your custom function (with case_when) you can do
df %>%
rowwise() %>%
mutate_all(function(x) {
case_when(x == 0 ~ 0L,
x > 0 ~ sample(0:1, size = 1, prob = c(1 - x, x)))
})
Or
f = function(x) {
case_when(x == 0 ~ 0L,
x > 0 ~ sample(0:1, size = 1, prob = c(1 - x, x))) }
f = Vectorize(f)
df %>% mutate_all(f)
You had 2 issues with your approach.
1) Your function wasn't vectorized and was considering the whole columns of probabilities. The error was Error in mutate_impl(.data, dots) :
Evaluation error: incorrect number of probabilities. Using rowwise or vectorising your function will solve this.
2) case_when didn't return values of the same type. The error was Error in mutate_impl(.data, dots) :
Evaluation error: must be type double, not integer. Using 0L insted of 0 will solve this.
I have a matrix full of 1's and 0's. The columns represent samples and the rows represent chromosomes.
I would like to keep all rows that have consecutive 1's in them (ie at least two consecutive rows with a 1 in it). This has to be restricted per chromosome (so that consecutive 1's between two chromosomes is not counted).
I would like to do this for each column in the matrix.
My data is as follows:
chr leftPos OC_030_ST.res OC_031_WG.res
1 4324 0 1
1 23433 1 1
1 34436 1 0
1 64755 1 1
3 234 1 0
3 354 0 1
4 1666 0 1
4 4565 0 1
5 34777 1 1
7 2345 1 1
7 4567 1 1
and the output should be:
chr leftPos OC_030_ST.res OC_031_WG.res
1 4324 0 1
1 23433 1 1
1 34436 1 0
1 64755 1 0
3 234 0 0
3 354 0 0
4 1666 0 1
4 4565 0 1
5 34777 0 0
7 2345 1 1
7 4567 1 1
I don't know how to compare consecutive rows according to chromosome. I imagine I could group by dplyr and somehow compare rows but the comparison is a bit beyond me.
EDIT
Using dput actual data
structure(list(chr = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), leftPos = c(240000,
1080000, 1200000, 1320000, 1440000, 1800000, 2400000, 2520000,
3120000, 3360000, 3480000, 3600000, 3720000, 4200000, 4560000,
4920000, 5040000, 5160000, 5280000, 6000000, 7080000, 7200000,
7320000, 7440000, 7560000, 7680000, 7800000, 8280000, 8400000,
8520000, 8760000, 9120000, 9720000, 9840000, 9960000, 10080000,
10200000, 10320000, 10440000, 10560000, 10800000, 11040000, 11160000,
11280000, 11400000, 11520000, 11760000, 11880000, 12000000, 12120000
), chr.res = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), leftPos.res = c(0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0), OC_AH_026C.res = c(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
), OC_AH_026C.1.res = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), OC_AH_026C.2.res = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), OC_AH_084C.res = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0), OC_AH_086C.res = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), OC_AH_086C.1.res = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), OC_AH_086C.2.res = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0), OC_AH_086C.3.res = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0), OC_AH_088C.res = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), OC_AH_094C.res = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0), OC_AH_094C.1.res = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), OC_AH_094C.2.res = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), OC_AH_094C.3.res = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0), OC_AH_094C.4.res = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), OC_AH_094C.5.res = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), OC_AH_094C.6.res = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0), OC_AH_094C.7.res = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), OC_AH_096C.res = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), OC_AH_100C.res = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0), OC_AH_100C.1.res = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), OC_AH_127C.res = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), OC_AH_133C.res = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0), OC_ED_008C.res = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), OC_ED_008C.1.res = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0), OC_ED_008C.2.res = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0), OC_ED_008C.3.res = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), OC_ED_016C.res = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), OC_ED_031C.res = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0), OC_ED_036C.res = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), OC_GS_001C.res = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), OC_QE_062C.res = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0), OC_RS_010C.res = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), OC_RS_027C.res = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), OC_RS_027C.1.res = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0), OC_RS_027C.2.res = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), OC_SH_051C.res = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), OC_ST_014C.res = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0), OC_ST_014C.1.res = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), OC_ST_020C.res = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), OC_ST_024C.res = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0), OC_ST_033C.res = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), OC_ST_034C.res = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), OC_ST_034C.1.res = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0), OC_ST_034C.2.res = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), OC_ST_035C.res = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), OC_ST_036C.res = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0), OC_ST_040C.res = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), OC_WG_002C.res = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), OC_WG_005C.res = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0), OC_WG_006C.res = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), OC_WG_019C.res = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), Type.res = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), ZSSLX.10457.FastSeqA.BloodDMets_16AF_AHMMH.s_1.r_1.fq.gz.res = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), ZSSLX.10457.FastSeqB.BloodDMets_13AF_AHMMH.s_1.r_1.fq.gz.res = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), ZSSLX.10457.FastSeqC.BloodDMets_16AF_AHMMH.s_1.r_1.fq.gz.res = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0), ZSSLX.10457.FastSeqD.BloodDMets_27AF_AHMMH.s_1.r_1.fq.gz.res = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0), Means.res = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
sd.res = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), count = 1:50), .Names = c("chr",
"leftPos", "chr.res", "leftPos.res", "OC_AH_026C.res", "OC_AH_026C.1.res",
"OC_AH_026C.2.res", "OC_AH_084C.res", "OC_AH_086C.res", "OC_AH_086C.1.res",
"OC_AH_086C.2.res", "OC_AH_086C.3.res", "OC_AH_088C.res", "OC_AH_094C.res",
"OC_AH_094C.1.res", "OC_AH_094C.2.res", "OC_AH_094C.3.res", "OC_AH_094C.4.res",
"OC_AH_094C.5.res", "OC_AH_094C.6.res", "OC_AH_094C.7.res", "OC_AH_096C.res",
"OC_AH_100C.res", "OC_AH_100C.1.res", "OC_AH_127C.res", "OC_AH_133C.res",
"OC_ED_008C.res", "OC_ED_008C.1.res", "OC_ED_008C.2.res", "OC_ED_008C.3.res",
"OC_ED_016C.res", "OC_ED_031C.res", "OC_ED_036C.res", "OC_GS_001C.res",
"OC_QE_062C.res", "OC_RS_010C.res", "OC_RS_027C.res", "OC_RS_027C.1.res",
"OC_RS_027C.2.res", "OC_SH_051C.res", "OC_ST_014C.res", "OC_ST_014C.1.res",
"OC_ST_020C.res", "OC_ST_024C.res", "OC_ST_033C.res", "OC_ST_034C.res",
"OC_ST_034C.1.res", "OC_ST_034C.2.res", "OC_ST_035C.res", "OC_ST_036C.res",
"OC_ST_040C.res", "OC_WG_002C.res", "OC_WG_005C.res", "OC_WG_006C.res",
"OC_WG_019C.res", "Type.res", "ZSSLX.10457.FastSeqA.BloodDMets_16AF_AHMMH.s_1.r_1.fq.gz.res",
"ZSSLX.10457.FastSeqB.BloodDMets_13AF_AHMMH.s_1.r_1.fq.gz.res",
"ZSSLX.10457.FastSeqC.BloodDMets_16AF_AHMMH.s_1.r_1.fq.gz.res",
"ZSSLX.10457.FastSeqD.BloodDMets_27AF_AHMMH.s_1.r_1.fq.gz.res",
"Means.res", "sd.res", "count"), row.names = c(NA, 50L), class = "data.frame")
Here's a solution applying a function across chr values using the by = argument to data.table. Non-adjacent sequences are located using rle(). Should be fast too.
First, here is the data as I input it:
df <- read.table(textConnection(
"chr leftPos OC_030_ST.res OC_031_WG.res
1 4324 0 1
1 23433 1 1
1 34436 1 0
1 64755 1 1
3 234 1 0
3 354 0 1
4 1666 0 1
4 4565 0 1
5 34777 1 1
7 2345 1 1
7 4567 1 1"), header = TRUE)
Then the code to process the result:
# function to take an integer vector and make non-consecutive 1s into 0s
convertNonRuns <- function(booleanVec) {
rleVals <- rle(booleanVec)
makeZeroIndex1 <- which(rleVals$lengths == 1 & rleVals$values == 1)
makeZeroIndex2 <- sapply(makeZeroIndex1, function(x) cumsum(rleVals$lengths[1:x])[x])
if (length(makeZeroIndex2))
booleanVec[makeZeroIndex2] <- 0L
as.integer(booleanVec)
}
require(data.table)
dt <- data.table(df)
# use data.table's by command to convert runs within chr(omosome)
dt[, c("OC_030_ST.res", "OC_031_WG.res") :=
list(convertNonRuns(OC_030_ST.res), convertNonRuns(OC_031_WG.res)),
by = chr]
dt
## chr leftPos OC_030_ST.res OC_031_WG.res
## 1: 1 4324 0 1
## 2: 1 23433 1 1
## 3: 1 34436 1 0
## 4: 1 64755 1 0
## 5: 3 234 0 0
## 6: 3 354 0 0
## 7: 4 1666 0 1
## 8: 4 4565 0 1
## 9: 5 34777 0 0
## 10: 7 2345 1 1
## 11: 7 4567 1 1
Added
For the newly added dput data, this will work:
# select all variables OC*.res
varnamesToChange <- names(dt)[grep("^OC.*\\.res$", names(dt))]
dt[, varnamesToChange := lapply(varnamesToChange, function(x) dt[[x]]), by = chr]
I am using data.table version 1.9.6.
data.table solution, building on my initial ave solution, which is also below:
library(data.table)
setDT(dat)
for (nam in names(dat)[3:4]) {
dat[,
c(nam) := ((length((get(nam)==1)[get(nam)]) >= 2) & get(nam)==1)+0L,
by=list(chr, cumsum(get(nam)==0))
]
}
# chr leftPos OC_030_ST.res OC_031_WG.res
# 1: 1 4324 0 1
# 2: 1 23433 1 1
# 3: 1 34436 1 0
# 4: 1 64755 1 0
# 5: 3 234 0 0
# 6: 3 354 0 0
# 7: 4 1666 0 1
# 8: 4 4565 0 1
# 9: 5 34777 0 0
#10: 7 2345 1 1
#11: 7 4567 1 1
And my attempt using ave with a custom function:
fun <- function(x,grp,limit=2) {
runs <- ave(
x==1,
list(grp,cumsum(x==0)),
FUN=function(g) length(g[g]) >= limit
)
as.numeric(runs & x==1)
}
lapply(dat[3:4], fun, grp=dat$chr)
#$OC_030_ST.res
# [1] 0 1 1 1 0 0 0 0 0 1 1
#
#$OC_031_WG.res
# [1] 1 1 0 0 0 0 1 1 0 1 1
To overwrite your original data:
dat[3:4] <- lapply(dat[3:4], fun, grp=dat$chr)
f0(colNr,df) contains the row numbers in which the column df[,colNr] should change to 0. g(df) is the converted data frame.
f0 <- function( colNr, df )
{
col <- df[,colNr]
n1 <- which( col == 1 ) # The `1`-rows.
d0 <- which( diff(col) == 0 ) # Consecutive entries are equal.
dc0 <- which( diff(df[,1]) == 0 ) # Same chromosome.
m <- intersect( n1-1, intersect( d0, dc0 ) )
return ( setdiff( 1:nrow(df), union(m,m+1) ) )
}
g <- function( df )
{
for ( i in 3:ncol(df) ) { df[f0(i,df),i] <- 0 }
return ( df )
}
.
Example 1:
> df
chr leftPos OC_030_ST.res OC_031_WG.res
1 1 4324 0 1
2 1 23433 1 1
3 1 34436 1 0
4 1 64755 1 1
5 3 234 1 0
6 3 354 0 1
7 4 1666 0 1
8 4 4565 0 1
9 5 34777 0 1
10 7 2345 1 1
11 7 4567 1 1
> g(df)
chr leftPos OC_030_ST.res OC_031_WG.res
1 1 4324 0 1
2 1 23433 1 1
3 1 34436 1 0
4 1 64755 1 0
5 3 234 0 0
6 3 354 0 0
7 4 1666 0 1
8 4 4565 0 1
9 5 34777 0 0
10 7 2345 1 1
11 7 4567 1 1
>
Example 2:
> df
chr leftPos OC_030_ST.res OC_031_WG.res
1 1 4324 0 1
2 1 23433 1 1
3 1 34436 1 0
4 1 64755 1 1
5 3 234 1 0
6 3 354 1 1
7 4 1666 0 1
8 4 4565 1 1
9 5 34777 0 0
10 5 1234 1 0
11 7 2345 1 1
12 7 4567 1 1
> g(df)
chr leftPos OC_030_ST.res OC_031_WG.res
1 1 4324 0 1
2 1 23433 1 1
3 1 34436 1 0
4 1 64755 1 0
5 3 234 1 0
6 3 354 1 0
7 4 1666 0 1
8 4 4565 0 1
9 5 34777 0 0
10 5 1234 0 0
11 7 2345 1 1
12 7 4567 1 1
>
A simple trick can be to compare the original data set, say df, with its own copy df[-1,], which essentially takes the first row off.
Comparing (columnswise) df$OC_030_ST.res == df[-1,]$OC_030_ST.res (likewise for the others) gives back a true table where each element is being compared with its next one.
Perhaps you can make the next piece into a function and apply that per column per chromosome:
rand <- c(0,0,0,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,1,0)
first=T
keep <- vector(length=length(rand),'numeric')
for (i in 1:length(rand)){
if (first == T){first=F;if ((rand[i] == 1) & (rand[i+1] == 1)){keep[i] <- 1}} #check if first is 1 and had neigbour 1
else if (rand[i] == 0){keep[i] <- 0} # if 0 than keep = 0
else if (i == length(rand)){if (rand[i-1] == 1){keep[i] <- 1}} #if last than check if 1 and neighbour is 1 than keep = 1
else if ((rand[i-1]==1) | (rand[i+1]==1)){keep[i] <- 1} #if 1 and has neighbour 1 than keep =1
}
Output:
[1] 0 0 0 1 1 1 0 0 0 0 0 0 1 1 1 0 0 1 1 0