I looked for previous posts and found this answer to a similar question from Aug '15. The code is:
logit <- glm(y~x1+x2+x3,family="binomial")
predict(logit)
pred <- predict(logit,newdata=data) #gives you b0 + b1x1 + b2x2 + b3x3
probs <- exp(pred)/(1+exp(pred)) #gives you probability that y=1 for each observation
What I don't understand is how I get the newdata=data part of the code.
How do I specify the data in the newdata part of the code?
How do I assign these predicted values to a variable?
Sample data
structure(list(CustomerID = 1:400, binary_depvar = c(1L, NA,
1L, NA, 1L, NA, NA, NA, 0L, NA, 0L, NA, 0L, NA, 1L, 1L, 1L, NA,
NA, NA, NA, 1L, NA, NA, 1L, NA, NA, NA, NA, 1L, 1L, NA, 0L, 1L,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0L, 1L, NA, 0L, NA,
NA, 1L, NA, NA, 1L, NA, 1L, 1L, 0L, 1L, 0L, 0L, 0L, NA, NA, NA,
1L, NA, 0L, NA, NA, NA, 0L, 1L, NA, 0L, 0L, NA, 1L, NA, 1L, NA,
NA, 1L, 1L, 1L, NA, NA, NA, 1L, 0L, NA, NA, 0L, NA, NA, NA, NA,
0L, 1L, NA, NA, NA, NA, 0L, 0L, NA, NA, NA, 0L, 1L, NA, 0L, NA,
NA, 1L, NA, 0L, NA, 1L, NA, NA, 1L, NA, NA, 1L, 1L, 0L, NA, NA,
NA, 1L, 1L, NA, NA, NA, 1L, NA, 1L, NA, NA, NA, NA, 1L, NA, NA,
NA, 1L, NA, NA, 0L, 1L, 1L, 1L, NA, 0L, NA, NA, NA, NA, 1L, NA,
0L, 0L, NA, 0L, 0L, NA, NA, 0L, 1L, 1L, 0L, 1L, 1L, NA, NA, NA,
NA, NA, NA, NA, NA, 1L, 1L, 0L, 1L, NA, NA, 0L, NA, NA, NA, 1L,
NA, NA, NA, NA, NA, NA, 0L, 1L, 0L, 0L, 0L, 1L, 1L, NA, 0L, NA,
NA, 1L, 1L, 0L, 0L, 0L, NA, 0L, 0L, 1L, NA, 0L, NA, 0L, 1L, NA,
0L, 1L, 1L, 1L, 1L, NA, 0L, NA, NA, NA, NA, NA, NA, NA, NA, NA,
0L, 1L, NA, 0L, 0L, NA, 1L, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 1L, 1L, 0L, NA, 1L, 0L, NA, 1L, NA, NA, 1L, 1L, 1L,
NA, 0L, 1L, 1L, 1L, 1L, NA, 0L, 0L, 1L, NA, NA, 1L, 1L, 0L, 1L,
NA, NA, NA, 0L, 1L, 1L, 1L, 0L, 0L, NA, 1L, 1L, NA, NA, NA, NA,
NA, NA, 0L, NA, 1L, 0L, 1L, NA, 1L, 0L, 0L, 1L, NA, NA, 1L, 1L,
1L, NA, 0L, 1L, 1L, NA, NA, 1L, NA, 0L, NA, NA, 1L, NA, NA, NA,
NA, 1L, NA, 0L, NA, 0L, NA, 0L, 1L, 1L, NA, 0L, 1L, NA, NA, 1L,
1L, 1L, NA, 1L, 0L, NA, NA, 0L, 0L, NA, NA, NA, 1L, 1L, NA, NA,
0L, 1L, NA, NA, 1L, 0L, 1L, NA, NA, NA, NA, NA, 1L, 1L, 0L, 0L,
NA, NA, NA, NA, 1L, NA, 1L, 0L, 1L, NA, NA, NA, 0L, 0L), binary_A = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L,
0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L),
binary_B = c(1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 0L,
1L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L,
0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L,
0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L,
0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L,
0L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L,
1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L,
1L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L,
1L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 1L,
0L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 1L,
1L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L,
0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 0L,
1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L,
0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L,
1L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 1L,
0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L,
0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 0L,
1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L,
0L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L),
binary_C = c(1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L,
0L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L,
0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L,
1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 0L,
1L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L,
1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L,
1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L,
1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L,
1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L,
0L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L,
1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L,
1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L,
1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 1L,
1L, 0L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 1L,
1L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L,
0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 0L,
1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L,
1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 1L,
1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L,
0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L,
1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L),
categ_A = c(4L, 4L, 1L, 1L, 37L, 4L, 17L, 55L, 7L, 4L, 62L,
11L, 56L, 38L, 39L, 13L, 62L, 10L, 13L, 6L, 4L, 7L, 57L,
1L, 9L, 69L, 22L, 17L, 13L, 6L, 7L, 7L, 13L, 7L, 27L, 12L,
4L, 7L, 13L, 62L, 25L, 17L, 17L, 19L, 27L, 7L, 7L, 13L, 17L,
7L, 27L, 4L, 38L, 37L, 13L, 1L, 37L, 33L, 13L, 44L, 22L,
53L, 17L, 17L, 38L, 2L, 1L, 19L, 19L, 11L, 31L, 4L, 57L,
37L, 13L, 30L, 17L, 13L, 17L, 27L, 11L, 53L, 7L, 25L, 20L,
6L, 6L, 7L, 7L, 41L, 7L, 7L, 62L, 12L, 4L, 53L, 13L, 53L,
37L, 5L, 4L, 1L, 57L, 1L, 2L, 37L, 17L, 39L, 53L, 17L, 38L,
22L, 62L, 12L, 5L, 1L, 6L, 1L, 1L, 4L, 1L, 53L, 37L, 5L,
4L, 4L, 4L, 27L, 17L, 22L, 4L, 7L, 6L, 52L, 2L, 46L, 20L,
11L, 48L, 53L, 19L, 13L, 19L, 57L, 27L, 1L, 33L, 17L, 7L,
53L, 37L, 37L, 36L, 1L, 37L, 17L, 47L, 55L, 33L, 11L, 34L,
13L, 1L, 57L, 17L, 53L, 27L, 48L, 41L, 7L, 11L, 7L, 62L,
17L, 4L, 1L, 19L, 27L, 27L, 37L, 13L, 5L, 41L, 62L, 27L,
38L, 48L, 11L, 27L, 46L, 13L, 37L, 17L, 3L, 7L, 4L, 1L, 10L,
1L, 2L, 5L, 37L, 34L, 6L, 2L, 4L, 33L, 2L, 47L, 7L, 3L, 4L,
1L, 6L, 13L, 13L, 31L, 13L, 24L, 1L, 7L, 7L, 4L, 55L, 11L,
4L, 19L, 4L, 1L, 37L, 27L, 17L, 13L, 4L, 13L, 19L, 26L, 62L,
5L, 24L, 38L, 27L, 2L, 8L, 19L, 4L, 38L, 1L, 13L, 4L, 4L,
17L, 54L, 4L, 17L, 17L, 2L, 11L, 13L, 17L, 4L, 6L, 8L, 9L,
38L, 40L, 17L, 70L, 11L, 50L, 14L, 7L, 8L, 7L, 17L, 17L,
62L, 1L, 4L, 17L, 4L, 4L, 6L, 38L, 17L, 4L, 53L, 59L, 13L,
7L, 17L, 4L, 7L, 13L, 7L, 38L, 24L, 20L, 17L, 4L, 4L, 13L,
7L, 7L, 4L, 19L, 7L, 7L, 38L, 62L, 4L, 17L, 17L, 19L, 36L,
17L, 47L, 13L, 13L, 2L, 36L, 26L, 25L, 1L, 2L, 4L, 4L, 27L,
27L, 19L, 41L, 53L, 11L, 62L, 37L, 47L, 37L, 13L, 1L, 27L,
17L, 24L, 11L, 17L, 17L, 27L, 62L, 38L, 38L, 7L, 17L, 53L,
37L, 7L, 17L, 7L, 7L, 4L, 11L, 26L, 13L, 4L, 7L, 38L, 24L,
37L, 12L, 1L, 17L, 25L, 26L, 19L, 25L, 33L, 27L, 53L, 5L,
27L, 7L, 62L, 4L, 1L, 1L, 25L, 5L, 62L, 47L, 4L, 7L, 48L,
12L, 17L, 18L, 7L, 9L, 37L, 63L, 37L, 46L, 1L), categ_B = c(0L,
0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 3L, 0L,
1L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 3L, 1L,
0L, 0L, 2L, 2L, 0L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 0L, 0L, 0L,
1L, 3L, 0L, 2L, 1L, 0L, 1L, 0L, 2L, 1L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 0L, 0L,
1L, 0L, 2L, 0L, 0L, 0L, 2L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 2L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L,
1L, 0L, 3L, 0L, 3L, 0L, 0L, 3L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 2L, 0L,
0L, 0L, 3L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 3L, 0L,
0L, 3L, 0L, 0L, 2L, 0L, 1L, 1L, 1L, 2L, 3L, 1L, 0L, 3L, 0L,
0L, 0L, 1L, 3L, 0L, 3L, 0L, 0L, 0L, 1L, 3L, 0L, 0L, 0L, 0L,
0L, 0L, 3L, 0L, 2L, 0L, 0L, 1L, 0L, 0L, 2L, 3L, 1L, 3L, 0L,
0L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 0L,
1L, 0L, 1L, 2L, 3L, 0L, 1L, 1L, 0L, 3L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 2L, 1L, 0L, 1L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 0L,
0L, 1L, 1L, 0L, 3L, 1L, 0L, 1L, 0L, 0L, 0L, 2L, 0L, 1L, 0L,
1L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 1L,
3L, 0L, 3L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 2L, 1L, 0L, 1L,
0L, 3L, 0L, 0L, 3L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 0L,
1L, 0L, 3L, 0L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 0L, 0L, 0L, 0L,
2L, 0L, 0L, 0L, 1L, 0L, 3L, 0L, 3L, 0L, 1L, 0L, 3L, 0L, 3L,
0L, 1L, 1L, 0L, 0L, 0L, 0L, 3L, 1L, 1L, 1L, 1L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 3L, 0L, 2L, 0L, 3L, 0L, 3L, 0L, 1L, 0L,
1L, 0L, 0L, 0L, 1L, 3L, 3L, 0L, 0L), binary_D = c(1L, 0L,
0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L,
1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L,
0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 1L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L,
0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
1L, 0L, 1L, 0L, 0L, 0L, 0L, 1L), categ_C = c(2L, 1L, 1L,
1L, 6L, 3L, 9L, 8L, 3L, 2L, 0L, 2L, 0L, 3L, 0L, 0L, 10L,
3L, 5L, 8L, 4L, 6L, 0L, 2L, 2L, 0L, 6L, 6L, 0L, 2L, 1L, 8L,
7L, 3L, 3L, 5L, 2L, 2L, 9L, 0L, 6L, 4L, 6L, 7L, 3L, 0L, 0L,
4L, 0L, 4L, 0L, 1L, 7L, 8L, 6L, 1L, 6L, 6L, 7L, 10L, 6L,
7L, 4L, 3L, 3L, 2L, 2L, 3L, 5L, 1L, 5L, 5L, 8L, 7L, 2L, 6L,
5L, 8L, 6L, 4L, 5L, 5L, 7L, 8L, 3L, 4L, 6L, 8L, 2L, 0L, 3L,
4L, 0L, 4L, 3L, 7L, 4L, 8L, 10L, 1L, 5L, 1L, 0L, 1L, 1L,
3L, 0L, 7L, 6L, 2L, 4L, 7L, 0L, 4L, 5L, 3L, 5L, 1L, 2L, 1L,
1L, 6L, 6L, 2L, 1L, 3L, 3L, 1L, 4L, 4L, 2L, 3L, 2L, 0L, 2L,
7L, 5L, 4L, 10L, 9L, 5L, 5L, 9L, 0L, 0L, 1L, 0L, 7L, 10L,
7L, 3L, 0L, 1L, 1L, 9L, 0L, 2L, 5L, 4L, 3L, 2L, 0L, 1L, 0L,
4L, 7L, 6L, 0L, 3L, 0L, 0L, 0L, 0L, 2L, 1L, 2L, 0L, 9L, 3L,
0L, 6L, 9L, 0L, 8L, 0L, 6L, 8L, 9L, 0L, 8L, 1L, 3L, 4L, 1L,
0L, 4L, 2L, 2L, 1L, 0L, 0L, 3L, 5L, 3L, 2L, 2L, 0L, 3L, 8L,
4L, 2L, 2L, 2L, 9L, 0L, 0L, 3L, 0L, 0L, 1L, 6L, 7L, 1L, 7L,
2L, 3L, 0L, 1L, 2L, 3L, 0L, 0L, 7L, 5L, 0L, 1L, 0L, 0L, 5L,
4L, 5L, 6L, 1L, 2L, 1L, 5L, 2L, 2L, 5L, 4L, 2L, 4L, 4L, 2L,
5L, 6L, 3L, 0L, 8L, 8L, 3L, 1L, 7L, 2L, 6L, 10L, 2L, 0L,
5L, 5L, 9L, 5L, 7L, 5L, 2L, 0L, 0L, 1L, 2L, 6L, 4L, 4L, 2L,
1L, 3L, 3L, 0L, 0L, 4L, 4L, 9L, 1L, 0L, 4L, 6L, 8L, 1L, 3L,
1L, 1L, 1L, 8L, 5L, 0L, 2L, 0L, 8L, 5L, 9L, 4L, 1L, 2L, 0L,
3L, 0L, 0L, 0L, 0L, 0L, 2L, 5L, 0L, 4L, 2L, 1L, 4L, 3L, 6L,
1L, 0L, 0L, 4L, 0L, 7L, 9L, 0L, 9L, 8L, 1L, 5L, 0L, 3L, 3L,
9L, 0L, 10L, 0L, 0L, 2L, 0L, 7L, 8L, 7L, 2L, 0L, 6L, 7L,
4L, 4L, 0L, 6L, 2L, 4L, 5L, 0L, 7L, 3L, 1L, 10L, 6L, 5L,
2L, 10L, 0L, 2L, 0L, 1L, 5L, 5L, 4L, 3L, 3L, 3L, 8L, 1L,
0L, 3L, 3L, 5L, 6L, 1L, 6L, 6L, 0L, 1L, 0L, 0L, 6L, 10L,
2L), categ_D = c(1L, 2L, 4L, 8L, 5L, 2L, 4L, 5L, 4L, 3L,
4L, 3L, 6L, 2L, 3L, 3L, 7L, 2L, 4L, 7L, 8L, 3L, 8L, 4L, 10L,
2L, 5L, 2L, 1L, 8L, 3L, 2L, 3L, 2L, 2L, 4L, 2L, 6L, 3L, 1L,
9L, 5L, 4L, 3L, 5L, 8L, 2L, 4L, 5L, 2L, 5L, 2L, 4L, 6L, 7L,
1L, 6L, 3L, 3L, 9L, 5L, 2L, 8L, 3L, 6L, 3L, 8L, 3L, 5L, 3L,
4L, 4L, 5L, 2L, 1L, 7L, 5L, 5L, 6L, 5L, 1L, 1L, 1L, 7L, 4L,
5L, 7L, 9L, 3L, 3L, 2L, 2L, 1L, 4L, 1L, 9L, 2L, 8L, 3L, 4L,
1L, 6L, 2L, 2L, 2L, 2L, 1L, 8L, 5L, 1L, 3L, 3L, 1L, 3L, 4L,
3L, 2L, 2L, 2L, 2L, 2L, 7L, 5L, 2L, 2L, 3L, 4L, 5L, 7L, 4L,
1L, 3L, 5L, 3L, 5L, 4L, 3L, 1L, 7L, 9L, 6L, 6L, 6L, 3L, 3L,
3L, 2L, 5L, 3L, 3L, 2L, 4L, 9L, 5L, 4L, 3L, 6L, 5L, 6L, 3L,
8L, 6L, 2L, 2L, 9L, 1L, 2L, 9L, 3L, 2L, 1L, 1L, 3L, 4L, 8L,
3L, 4L, 6L, 2L, 3L, 3L, 10L, 6L, 2L, 3L, 3L, 7L, 2L, 6L,
9L, 5L, 3L, 2L, 2L, 2L, 3L, 4L, 4L, 3L, 3L, 1L, 4L, 5L, 1L,
4L, 3L, 1L, 1L, 5L, 3L, 5L, 3L, 1L, 7L, 1L, 6L, 2L, 2L, 1L,
4L, 4L, 2L, 2L, 7L, 3L, 7L, 4L, 2L, 2L, 3L, 2L, 7L, 5L, 2L,
5L, 6L, 3L, 1L, 9L, 7L, 4L, 3L, 1L, 5L, 1L, 1L, 3L, 1L, 3L,
10L, 3L, 3L, 8L, 3L, 5L, 3L, 5L, 3L, 4L, 5L, 3L, 9L, 4L,
2L, 3L, 8L, 5L, 8L, 2L, 9L, 5L, 4L, 4L, 6L, 6L, 2L, 1L, 6L,
7L, 6L, 7L, 2L, 8L, 7L, 2L, 3L, 3L, 2L, 2L, 2L, 3L, 2L, 5L,
2L, 2L, 6L, 4L, 3L, 3L, 4L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 2L,
5L, 1L, 1L, 4L, 1L, 6L, 9L, 4L, 2L, 1L, 2L, 2L, 9L, 1L, 9L,
2L, 2L, 1L, 5L, 4L, 4L, 4L, 9L, 8L, 5L, 1L, 5L, 10L, 6L,
8L, 2L, 3L, 5L, 7L, 1L, 4L, 3L, 4L, 3L, 3L, 7L, 1L, 6L, 5L,
3L, 3L, 1L, 7L, 2L, 2L, 5L, 1L, 1L, 5L, 3L, 6L, 6L, 4L, 3L,
2L, 4L, 7L, 4L, 3L, 8L, 2L, 1L, 4L, 2L, 5L, 3L, 1L, 2L, 2L,
4L, 9L, 1L, 4L, 4L, 6L, 5L, 5L, 5L, 4L, 8L, 3L, 6L, 1L, 6L,
6L, 7L, 3L), categ_E = c(4L, 4L, 1L, 1L, 9L, 3L, 1L, 10L,
1L, 8L, 8L, 1L, 1L, 1L, 9L, 1L, 10L, 1L, 5L, 2L, 5L, 6L,
2L, 1L, 7L, 9L, 6L, 5L, 1L, 3L, 1L, 4L, 6L, 6L, 7L, 7L, 1L,
6L, 8L, 1L, 9L, 6L, 9L, 1L, 4L, 5L, 4L, 6L, 8L, 4L, 5L, 5L,
8L, 8L, 1L, 3L, 9L, 7L, 7L, 7L, 6L, 6L, 7L, 5L, 8L, 1L, 1L,
7L, 1L, 4L, 9L, 6L, 9L, 1L, 6L, 2L, 6L, 6L, 8L, 1L, 1L, 7L,
5L, 8L, 5L, 4L, 3L, 8L, 5L, 8L, 3L, 7L, 9L, 4L, 5L, 3L, 7L,
7L, 8L, 1L, 1L, 1L, 2L, 1L, 4L, 6L, 6L, 3L, 1L, 5L, 6L, 7L,
8L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 10L, 9L, 1L, 5L, 5L,
7L, 5L, 1L, 6L, 7L, 1L, 6L, 2L, 1L, 7L, 7L, 1L, 8L, 10L,
1L, 6L, 6L, 4L, 7L, 5L, 2L, 7L, 1L, 9L, 8L, 7L, 9L, 6L, 7L,
7L, 9L, 8L, 2L, 7L, 9L, 7L, 1L, 4L, 7L, 5L, 8L, 8L, 2L, 7L,
1L, 4L, 9L, 1L, 7L, 5L, 5L, 8L, 5L, 9L, 8L, 5L, 2L, 1L, 6L,
4L, 1L, 1L, 8L, 10L, 1L, 6L, 5L, 5L, 5L, 6L, 5L, 7L, 1L,
1L, 3L, 6L, 9L, 1L, 5L, 3L, 9L, 4L, 9L, 6L, 1L, 1L, 1L, 3L,
8L, 8L, 8L, 3L, 1L, 3L, 7L, 2L, 1L, 9L, 5L, 5L, 7L, 5L, 1L,
8L, 6L, 6L, 6L, 1L, 7L, 6L, 3L, 5L, 2L, 3L, 9L, 1L, 1L, 7L,
7L, 7L, 7L, 6L, 4L, 5L, 4L, 1L, 4L, 4L, 7L, 6L, 1L, 5L, 9L,
7L, 3L, 3L, 6L, 4L, 10L, 5L, 7L, 4L, 5L, 8L, 6L, 6L, 6L,
1L, 6L, 7L, 5L, 1L, 2L, 4L, 8L, 4L, 3L, 8L, 1L, 5L, 10L,
2L, 3L, 1L, 9L, 1L, 5L, 6L, 4L, 9L, 3L, 6L, 4L, 2L, 4L, 4L,
5L, 5L, 3L, 8L, 7L, 7L, 8L, 8L, 4L, 7L, 7L, 7L, 6L, 5L, 8L,
6L, 7L, 3L, 9L, 5L, 1L, 3L, 1L, 7L, 1L, 1L, 7L, 8L, 8L, 9L,
2L, 6L, 10L, 5L, 9L, 6L, 5L, 7L, 7L, 2L, 4L, 6L, 4L, 5L,
8L, 1L, 10L, 3L, 7L, 8L, 9L, 3L, 6L, 2L, 7L, 3L, 5L, 1L,
5L, 1L, 7L, 8L, 2L, 1L, 5L, 2L, 4L, 8L, 5L, 8L, 2L, 7L, 6L,
6L, 4L, 1L, 6L, 6L, 1L, 2L, 1L, 6L, 1L, 3L, 4L, 7L, 8L, 10L,
7L, 3L, 7L, 6L, 1L, 7L, 2L, 1L, 10L, 5L), binary_E = c(1L,
1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L,
1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L,
1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L,
1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L,
1L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L,
1L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L,
0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L,
1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L,
1L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 1L,
1L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L,
0L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L,
1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L,
1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L,
0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
0L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L,
1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L,
1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L,
1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 0L), percentA = c(5.2, 5.28,
7.71, 3.78, 0, 4.16, 0, 6.42, 2.56, 2.69, 2.15, 3.95, 0,
2.06, 0, 5.45, 7.2, 1.51, 3.57, 12.74, 3.51, 5.06, 8.44,
8.43, 5.59, 6.85, 7.37, 7.37, 1.51, 5.23, 4.69, 1.91, 7.34,
5.32, 7.62, 5.06, 6.48, 7.38, 28.16, 3.66, 8.37, 4.01, 10.6,
6.46, 8.63, 3.76, 6.09, 8.03, 3.78, 7.44, 3.67, 10.24, 7.4,
3, 0, 8.88, 15.17, 6.92, 0, 4.24, 4.2, 10.17, 14.73, 9.22,
6.27, 4.11, 8.43, 8.56, 4.05, 0, 0, 3.5, 4.39, 13.19, 13.01,
6.9, 4.79, 7.23, 14.28, 15.67, 8.76, 3.86, 4.58, 6.51, 18.31,
3.92, 7.79, 6.94, 6.94, 8.76, 4.83, 10.54, 0, 0, 5.62, 0,
1.06, 4.13, 6.52, 4.17, 6.89, 4.25, 3.51, 6.4, 4.83, 5.01,
4.69, 3.54, 4.67, 9.5, 1.75, 0, 16.63, 2.6, 1.43, 3.59, 10.26,
6.15, 0, 5.81, 0, 7.28, 0, 8.91, 7.33, 4.89, 2.7, 13.14,
92.59, 11.03, 7.75, 5.49, 3.31, 7.82, 5.57, 0, 10.03, 7.35,
6.36, 8.94, 7.35, 0, 6.85, 3.85, 4.19, 4.48, 7.43, 10.47,
5.11, 9.42, 3.42, 4.74, 0.89, 6.12, 6.46, 11.31, 0, 4.19,
4.76, 5.86, 8.23, 17.76, 7.69, 18.39, 10.72, 12.73, 7.08,
9.44, 4.14, 26.5, 3.72, 9.47, 12.66, 29.22, 7.64, 3.9, 13.01,
3.53, 10.75, 8.26, 4.8, 14.54, 3.7, 14.15, 8.47, 7.33, 3.78,
5.78, 8.88, 0, 19.19, 16.91, 6.57, 6.36, 7.6, 6.52, 5.55,
9.05, 7.3, 2.81, 5.09, 5.25, 8.22, 7.31, 8.73, 5.27, 4.85,
2.31, 15.63, 0, 8.22, 9.19, 7.05, 6.47, 5.53, 0, 5.03, 0,
0, 6.77, 12.47, 5.71, 0, 6.55, 11.59, 25.13, 1.8, 7.35, 4.66,
7.56, 4.08, 2.98, 6.7, 8.91, 3.3, 5.85, 5.74, 8.73, 13.9,
9.97, 4.27, 10.56, 35.68, 6.56, 5.85, 35.95, 6.24, 1.95,
6.63, 4.23, 4.1, 4.03, 24.37, 4.68, 12.24, 2.41, 0, 3.47,
11.93, 4.15, 6.6, 8.17, 4.65, 4.09, 0, 1.44, 10.95, 0, 7.94,
4.38, 4.18, 1.93, 5.67, 9.42, 0, 13.1, 2.72, 7.25, 2, 5.27,
3.35, 2.12, 9.26, 7.11, 35.44, 2.06, 4.77, 4.24, 12.55, 8.71,
4.38, 5.42, 3.14, 2.7, 10.94, 0, 6.8, 2.47, 14.33, 4.6, 14.06,
5.82, 0, 14.25, 10.57, 9.29, 10.65, 5.58, 4.85, 2.33, 4.03,
6.41, 24.69, 7.61, 2.82, 6.78, 0, 0, 1.51, 0, 6.9, 3.97,
7.7, 0, 3.17, 8.43, 5.52, 5.68, 10.05, 15.86, 1.73, 4.1,
1.38, 5.35, 6.07, 5.68, 10.33, 10.42, 9.44, 0, 0, 7.21, 4.78,
0, 6.26, 7.39, 18.2, 3.44, 5.22, 3.41, 4.73, 10.21, 16.28,
8.51, 12.14, 4.94, 14.65, 3.23, 0, 4.2, 3.13, 4, 10.47, 2.81,
8.22, 0, 3.44, 0, 13.32, 6.93, 0, 3.3, 9.18, 6.71, 8.35,
0, 8.32, 5.43, 12.38, 5.15, 5.74, 0, 2.75, 10.21, 1.58, 8.86,
13.95, 7.1, 11.47, 11.72, 7.09, 0, 2.92, 7.55, 6.58, 14.32,
21.71, 4.01, 1.81, 2.39), percentB = c(43.97, 59.38, 43.37,
46.67, 13.04, 65.31, 43.82, 28.73, 32.44, 25.18, 34.82, 38.13,
19.15, 43.89, 100, 29.47, 43.16, 29.19, 37.42, 29.36, 47.94,
42.58, 18.11, 49.25, 40.12, 36.99, 39.65, 37.06, 48.05, 33.33,
43.79, 41.32, 41.1, 33.91, 25.45, 41.82, 45.68, 40.54, 37.92,
41.08, 34.3, 32.72, 46.55, 48.28, 39.66, 45.79, 42.67, 35.38,
48, 35.3, 45.96, 44.21, 36.27, 43.53, 45, 39.82, 22.19, 29.79,
70, 51.2, 53.52, 36.62, 22.75, 16.53, 46.78, 40.96, 53.79,
19.44, 28.05, 61.04, 42.86, 57.29, 32.62, 42.19, 34.15, 36.31,
41.66, 46.28, 33.92, 31.95, 40, 20.63, 45.59, 29.08, 24.84,
45.41, 39.32, 47.63, 33.85, 24.15, 43.13, 21.05, 12.5, 12.5,
42.67, 0, 63.06, 30.23, 46.15, 39.64, 41.34, 38.05, 38.82,
43.42, 52.62, 34.43, 54.7, 26.82, 36.86, 52.41, 43.85, 60.71,
33.33, 48.48, 47.56, 44.35, 39.47, 50, 57.32, 35.78, 41.67,
36.9, 5.26, 36.52, 44.6, 35.29, 47.31, 38.29, 0, 45.89, 45.83,
40.8, 47.42, 46.03, 31.22, 14.29, 36.08, 31.9, 23.04, 24.71,
42.52, 50, 29.71, 34.34, 43.46, 46.86, 51.31, 45.45, 42.34,
40, 27.54, 38.1, 42.5, 36.51, 35.26, 30.52, 54.05, 56, 28.3,
44.44, 33.03, 42.6, 36.01, 41.35, 34.07, 23.91, 41.32, 43.68,
44.44, 36.47, 46.33, 26.25, 23.33, 43.81, 52.12, 53.94, 8.45,
34.56, 43.2, 29.19, 38.1, 36.3, 53.57, 21.32, 17.11, 38.03,
42.95, 27.22, 41.67, 28.57, 47.37, 30.35, 42.65, 40, 45.25,
41.83, 38.55, 34.99, 30.34, 62.98, 32.14, 42.11, 45.65, 45.14,
40.41, 46.61, 15.47, 48.48, 37.01, 82.86, 42.96, 28.23, 34.36,
44.44, 33.22, 77.78, 41.79, 22.86, 42.59, 27.18, 46.81, 50.94,
38.75, 33.11, 25.73, 0, 52.11, 36.36, 34.89, 35.81, 37.84,
35.93, 43.75, 38.28, 20.68, 33.33, 34.38, 30.27, 8.33, 34.44,
15.16, 28.36, 25, 41.12, 39.76, 33.33, 31.65, 35.32, 39.81,
37.53, 52.73, 29.24, 26.12, 43.21, 29.29, 51.08, 32.14, 28.02,
29.41, 53.47, 27.27, 47.06, 20, 54.42, 12.5, 49.18, 36.85,
23.23, 38.55, 52.93, 39.24, 34.21, 26.73, 40.76, 26.09, 28.57,
50.67, 34.54, 39.07, 30.13, 39.77, 48.85, 46.15, 33.47, 56.52,
37.93, 32.35, 36.04, 45.29, 51.4, 40.51, 37.56, 42.61, 32.9,
41.07, 22.86, 37.86, 35.21, 37.35, 53.69, 31.5, 49.79, 29.55,
47.69, 29.09, 48.27, 41.15, 32.54, 31.95, 37.7, 53.6, 33.33,
20.41, 16.44, 45.65, 41.06, 33.33, 35.98, 36.77, 64.52, 39.26,
32.84, 40.96, 36.23, 44.71, 27.13, 39.65, 30.51, 43.96, 36.36,
35.71, 45.12, 40.27, 36.09, 40.41, 32.83, 28.19, 30.13, 21.51,
15, 28.57, 52.14, 22.66, 40.28, 35.24, 49.43, 25.54, 36.11,
41.62, 38.57, 32.06, 42.7, 29.61, 35.95, 32.78, 23.79, 31.71,
41.92, 2.94, 52.45, 47.82, 44.73, 45.32, 40.85, 51.83, 30.77,
31.24, 22.22, 11.74, 43.11, 42.86, 29.84, 47.89, 27.11, 34.91,
23.4, 38.36, 38.81, 36.31, 45.63, 41.51, 45.83, 36.5, 41.28,
37.28, 28.09, 51.72, 28.96, 34.5, 25.92, 41.56, 48.15, 50.1,
63.28, 49.39, 25, 55.1, 43.3, 8.08, 18.52), percentC = c(0.4,
4.69, 9.18, 0, 0, 6.12, 1.12, 0.67, 0.73, 3.08, 13.77, 0.43,
0, 2.26, 0, 0.97, 0.71, 0.62, 0.33, 0, 8.67, 3.57, 0, 3.14,
2.69, 0, 0.39, 0.94, 2.34, 10.42, 11.85, 3.4, 0, 2.3, 1.82,
2, 1.13, 0, 20.84, 1.29, 0.56, 0.89, 0.24, 3.45, 20.11, 3.18,
0.52, 35.35, 3.45, 1.53, 0.51, 9.5, 0.19, 2.52, 0, 0.3, 0.3,
0, 0, 10.47, 7.75, 0.33, 0, 0.45, 0.11, 1.2, 1.23, 8.33,
0, 23.38, 0, 0.75, 1.55, 1.56, 0.41, 0.83, 1.11, 2.66, 0,
0, 4.8, 0, 7.7, 0.49, 0.62, 4.03, 0.2, 0.38, 1.56, 0.52,
3.57, 0, 0, 0, 0, 0, 0.9, 0, 2.2, 2.16, 0.62, 1.33, 0.36,
1.84, 4.33, 0.55, 16.87, 0, 2.4, 9.65, 0, 0, 0.69, 6.06,
0.81, 1.66, 0, 2, 2.44, 0.83, 12.5, 1.47, 0, 0.34, 1.88,
0.59, 2.34, 1.46, 0, 1.43, 2.08, 1.35, 4.57, 3.17, 0.37,
0, 0.63, 0.31, 0.81, 0, 0.36, 0, 2.96, 0.54, 0.64, 4.02,
1.39, 2.02, 4.5, 0, 1.45, 1.07, 1.25, 0.74, 0.25, 2.28, 2.7,
4, 0.94, 0, 1.59, 0.59, 15.46, 6.02, 6.31, 0, 0, 0.99, 0.65,
2.35, 10.45, 1.67, 0, 0, 6.26, 1.52, 0.7, 0, 2.34, 1.16,
3.17, 2.42, 9.52, 0, 3.95, 1.43, 11.74, 0, 0, 0, 5.26, 13.33,
2.05, 2.29, 0.23, 0.58, 7.49, 5.64, 0.29, 0.88, 0.29, 0,
3.26, 25.4, 15.17, 1.73, 0.55, 0, 2.01, 0, 0.31, 1.88, 0.61,
2.96, 0.68, 0, 0.71, 0, 0, 4.01, 0, 4, 2.5, 0, 3.4, 12.5,
1.1, 4.24, 3.88, 2.43, 10.81, 0.85, 0, 4.31, 2.92, 0, 3.91,
0.43, 0, 0.75, 0.26, 0.84, 0, 0, 8.12, 0, 0.4, 1.37, 3.74,
0.87, 2.42, 0.26, 0, 1.54, 0.4, 4.98, 0, 0.6, 0, 1.81, 0,
0, 0, 1.13, 0, 0, 0.65, 2.02, 6.75, 9.14, 2.53, 25.54, 0.04,
7.82, 0, 7.14, 5.33, 3.65, 5.3, 1.95, 0.8, 0.71, 0, 20.15,
0, 0, 1.24, 3.9, 9.34, 4.2, 0, 9.58, 1.32, 0.92, 0.77, 0,
0.73, 1.41, 8.43, 2.29, 5.61, 6.83, 2.27, 5.09, 0, 5.31,
0.52, 1.83, 0.54, 2.19, 5.63, 0, 0, 35.62, 0, 4.35, 0, 0.53,
0.32, 0, 1.7, 1.49, 0.1, 0, 0.33, 0.33, 3.05, 5.08, 0, 0,
2.38, 1.83, 10.74, 1.38, 0.38, 0.33, 0.34, 3.33, 5.4, 11.43,
57.14, 3.45, 0, 16.67, 23.5, 0.76, 1.45, 0, 1.95, 0, 2.52,
4.55, 0, 2.87, 0.43, 1.17, 1.69, 0.8, 0, 1.4, 0.27, 0.27,
6.24, 6.81, 2.51, 7.69, 4.35, 0, 0, 0, 0, 0.14, 3.31, 0,
3.37, 0, 0.8, 0, 0.71, 4.51, 0.17, 0, 1.83, 4.18, 1.78, 1.87,
6.9, 0.26, 11.37, 2, 1.93, 0, 3.45, 3.39, 0.56, 0, 5.51,
2.3, 0, 0), percentD = c(0.93, 0, 0.86, 0, 0, 0.27, 0, 0,
0, 0, 0, 0, 0.33, 0, 0, 0, 0.75, 0, 1, 0, 0, 0, 0, 0.29,
1, 0, 0, 1, 0, 1, 0.75, 1, 0, 0, 0, 1, 1, 0.43, 0, 0.78,
0, 0.71, 0, 0, 0, 0, 0.29, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0.81, 0, 0.56, 0, 1, 0, 1, 0, 0, 0, 0.8, 0, 0.43, 0,
0, 0, 1, 1, 0, 0, 0.8, 0.67, 1, 0, 0, 0, 0.82, 0, 0, 0, 0.07,
1, 0, 0, 0, 1, 0, 0, 0.25, 0, 0, 0, 0.25, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0.75, 0, 0.98, 0, 0.9, 0, 0.67, 0,
0, 0.95, 0.67, 1, 0, 0, 0.33, 0, 0, 0.78, 0, 0.96, 0, 0,
0, 0, 0, 0, 0.78, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0.25,
0, 0, 0, 1, 1, 0, 0, 0.07, 0, 0.3, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0.08, 0, 0, 0, 0,
0, 0, 0, 0.88, 0, 0.58, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0.99, 0, 0, 0.75, 0, 0, 0.22, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 1, 0, 0, 0.86, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 0, 0.79, 1, 0, 1, 0, 0.95, 0, 0, 0, 1, 0, 1, 0,
0, 0, 0.13, 0, 0.93, 0, 0, 0, 0, 0.05, 1, 0, 0.93, 0, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0.78,
0.25, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.67, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0.99, 0, 0, 0, 0, 1, 0, 0, 0.8, 0,
0, 0, 0, 0, 0.8, 0, 0, 0, 0, 0, 0.24, 0, 0, 1, 0, 0, 0.45,
0, 0, 1, 1, 0, 0.44, 0, 0, 0, 0, 0, 0, 0.4, 0, 0, 0, 1, 0,
0.57, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0.49, 0, 0, 1, 0, 0, 0,
0.92, 0, 0.73, 0, 0, 0, 0, 0.9, 0, 0, 0, 1, 0, 1, 0.96, 0,
0, 0, 0, 0)), .Names = c("CustomerID", "binary_depvar", "binary_A",
"binary_B", "binary_C", "categ_A", "categ_B", "binary_D", "categ_C",
"categ_D", "categ_E", "binary_E", "percentA", "percentB", "percentC",
"percentD"), class = "data.frame", row.names = c(NA, -400L))
I have a dataset which contains 'hits' at each position in a genome. I want to normalize it in a very specific way:
When the column df$HC contains the value 'HC',
Take the value from df$pos which contains the position in bp,
Sum up df$Hits +/-1000bp away from the one in question e.g. if df$pos = 3000, add up hits where df$pos>=2000 and <=4000,
Divide every df$Hits value for those 2000 positions by the total worked out in step 3.
So, each 2000bp patch around each instance of 'HC' (most values in the HC column are NA and don't need to be normalized), has each hit divided by the total number of hits in that patch.
I guess I might be able to do this by subsetting each block of 2000bp around each 'HC' and processing them seperately, but there are ~3000 'HC' positions.
Edit: Due to regions where 'HC+/-1000bp' regions overlap, I think now that I need to extract and process each region seperately, so regions of overlap would be repeated in each subset.
Thanks for any help with this, it's so confusing I have a headache!
dput sample dataframe (due to the character limit it only contains 1000 lines, so try a smaller window than 2000bp):
structure(list(chr = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = "chr1", class = "factor"), pos = 1:1000, Hits = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 11L,
2L, 0L, 0L, 2L, 0L, 8L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 64L, 1L,
0L, 2L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 3L, 7L, 0L, 0L, 2L,
0L, 0L, 0L, 3L, 0L, 0L, 1L, 0L, 1L, 40L, 2L, 0L, 29L, 0L, 0L,
0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 7L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 3L, 0L,
0L, 0L, 0L, 0L, 5L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
3L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), HC = structure(c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA), .Label = "HC", class = "factor")), .Names = c("chr",
"pos", "Hits", "HC"), class = "data.frame", row.names = c(NA,
-1000L))
A smaller sample dataset and expected output:
pos <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
Hits <- c(0, 1, 1, 2, 2, 3, 2, 2, 1, 1)
HC <- c(NA, NA, NA, NA, NA, 'HC', NA, NA, NA, NA)
df <- data.frame(pos, Hits, HC)
#total hits in a +/-3bp window around HC = 13
#divide each read in the window by 13:
Hits <- c(0, 1, 0.077, 0.154, 0.154, 0.231, 0.154, 0.154, 0.077, 1)
Okay, this should cover at least the simplified problem:
n <- 3
len <- length(df[['Hits']])
for(i in which(df[['HC']] %in% 'HC')){
ran <- max(i-n,1):min(i+n,len)
reg <- df[['Hits']][ran]
s <- sum(reg)
reg <- reg / s
df[['Hits']] <- replace(df[['Hits']],ran,reg)
}
fiddle