R Creating a p-value matrix with missing values - r
I have a dataframe with many missing NAs. I want to create a correlation matrix with a p-value matrix as shown in this link: Link
I created the correlation matrix like this:
as.data.frame(round(cor(df, use = "pairwise.complete.obs", method = c("spearman")), 1))
Now I am trying to create a matrix that shows the p-values for each correlation. I have used this code successfully for other dataframes, which include less NAs.
cor.mtest <- function(mat) {
mat <- as.matrix(mat)
n <- ncol(mat)
p.mat<- matrix(NA, n, n)
diag(p.mat) <- 0
for (i in 1:(n - 1)) {
for (j in (i + 1):n) {
tmp <- cor.test(mat[, i], mat[, j])
p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
}
}
colnames(p.mat) <- rownames(p.mat) <- colnames(mat)
p.mat
}
p.mat <- cor.mtest(df)
But now I am getting an error:
Error in cor.test.default(mat[, i], mat[, j]) : not enough finite
observations
I also tried to use the "Hmisc" package for the rcorr-function. But the package does not load correctly. Any idea how to solve this?
structure(list(V1 = c(21L, 18L, 11L, 20L, 17L, 18L, 20L, 23L,
10L, 25L, 11L, 24L, 13L, 17L, 30L, 12L, 24L, 27L, 19L, 24L, 14L,
14L, 10L, 21L, 12L, 14L, 19L, 19L, 16L, 15L, 25L, 15L, 20L, 18L,
21L, 9L, 18L, 10L, 21L, 17L, 15L, 6L, 21L, 27L, 16L, 15L, 20L,
12L, 20L, 11L, 17L, 14L, 22L, 14L, 18L, 17L, 19L, 18L, 16L, 13L,
11L, 19L, 14L, 9L, 13L, 13L, 8L, 7L, 29L, 14L, 16L, 13L, 8L,
28L, 12L, 33L, 20L, 13L, 12L, 14L, 16L, 15L, 23L, 19L, 20L, 23L,
21L, 14L, 12L, 30L, 11L, 12L, 14L, 13L, 15L, 13L, 6L, 15L, 19L,
15L, 18L, 23L, 19L, 11L, 18L, 9L, 18L, 17L, 15L, 8L, 13L, 8L,
20L, 17L, 25L, 11L, 25L, 19L, 13L, 15L, 15L, 15L, 12L, 16L, 20L,
13L, 24L, 12L, 23L, 21L, 15L, 18L, 14L, 20L, 21L, 20L, 19L, 21L,
11L, 24L, 12L, 15L, 16L, 26L, 8L, 19L, 19L, 12L, 13L, 20L, 23L,
11L, 17L, 17L, 11L, 19L, 17L, 15L, 14L, 13L, 14L, 20L, 22L, 21L,
17L, 17L, 16L, 14L, 11L, 7L, 21L, 15L, 15L, 17L, 11L, 15L, 18L,
13L, 23L, 16L, 16L, 23L, 12L, 16L, 15L, 8L, 19L, 14L, 18L, 13L,
17L, 16L, 25L, 14L, 22L, 14L, 14L, 18L, 9L, 11L), V2 = c(1L,
0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 0L,
3L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L,
0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 2L, 0L, 0L, 1L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L,
0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 2L, 0L,
2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 2L, 0L, 1L,
0L, 1L, 1L, 1L, 1L, 0L, 0L, 3L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L,
1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L,
0L, 2L, 0L, 0L, 0L, 1L, 0L, 2L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 1L, 0L, 0L, 2L, 1L, 1L, 2L, 1L, 0L, 0L, 0L, 1L, 0L,
0L, 0L, 3L, 0L, 0L, 1L, 0L), V3 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 2L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L,
0L), V6 = c(5L, 2L, 0L, 3L, 3L, 1L, 2L, 5L, 0L, 3L, 0L, 3L, 4L,
0L, 7L, 3L, 6L, 2L, 1L, 6L, 0L, 0L, 3L, 1L, 0L, 1L, 1L, 0L, 1L,
2L, 4L, 1L, 5L, 3L, 0L, 3L, 0L, 0L, 2L, 3L, 0L, 1L, 6L, 3L, 1L,
0L, 1L, 1L, 2L, 1L, 1L, 2L, 3L, 3L, 3L, 0L, 2L, 5L, 2L, 1L, 2L,
2L, 0L, 1L, 0L, 2L, 0L, 1L, 4L, 3L, 2L, 3L, 1L, 2L, 2L, 4L, 1L,
0L, 0L, 6L, 1L, 3L, 4L, 1L, 2L, 1L, 3L, 3L, 0L, 4L, 1L, 0L, 0L,
2L, 1L, 1L, 0L, 2L, 1L, 2L, 4L, 2L, 2L, 1L, 1L, 2L, 5L, 5L, 2L,
2L, 2L, 1L, 1L, 3L, 5L, 1L, 2L, 5L, 3L, 4L, 0L, 1L, 2L, 1L, 5L,
4L, 2L, 3L, 3L, 3L, 0L, 3L, 0L, 2L, 1L, 3L, 1L, 4L, 3L, 2L, 0L,
3L, 1L, 1L, 1L, 2L, 3L, 1L, 3L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L,
1L, 3L, 1L, 2L, 1L, 8L, 2L, 4L, 5L, 2L, 3L, 2L, 1L, 4L, 2L, 1L,
0L, 1L, 1L, 4L, 2L, 6L, 4L, 2L, 2L, 1L, 0L, 1L, 0L, 5L, 3L, 2L,
1L, 2L, 2L, 0L, 2L, 4L, 2L, 2L, 1L, 0L, 1L), V40 = c(0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L), V29 = c(1L, 0L, 0L, 0L, 2L, 0L, 2L,
1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 1L, 0L, 1L, 1L, 0L, 2L, 1L, 0L, 0L, 1L, 0L, 2L, 0L, 2L,
1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 2L, 0L, 0L, 1L, 0L, 2L, 0L,
0L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 2L, 2L, 1L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 2L, 0L, 0L, 2L, 0L,
1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 2L, 0L, 0L, 1L, 1L, 2L, 1L, 0L,
1L, 0L, 0L, 0L, 3L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), V56 = c(0.2, 0, 0, 8.5, 3.1, 0.1, 4.5, 26.6, 1, 0, 0, 1.5,
3.7, 0, 0, 0.3, 10.8, 0.5, 0, 2.7, 0, 0, 8.8, 0, 0, 0, 0.4, 0,
0, 0, 0, 16.4, 4.2, 3.9, 3.5, 3.1, 0, 9, 16, 0, 0, 6, 0, 7.9,
0, 3.2, 0.9, 0, 4.2, 0, 1.2, 0, 0, 1.1, 0, 0, 0.2, 0, 0, 0, 0,
13.1, 0, 0.3, 0.1, 0.6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.1, 0.1,
0, 0, 0, 0, 3.6, 2, 30.3, 0, 0, 0, 0, 0.3, 0, 4.2, 0, 2.6, 0,
4.8, 0, 0, 0, 2.2, 0.5, 0, 0, 0, 0, 0, 2.9, 0, 2.9, 0.4, 2.4,
0, 0, 11.5, 6.3, 0, 0, 0.2, 16.3, 0, 0, 0.2, 0, 5, 0, 0, 0, 0,
0.7, 4.8, 0, 1.8, 0.1, 0, 0, 0, 0, 0, 0, 0, 0.4, 1.4, 1.2, 0,
0, 1.4, 0, 1.1, 0, 1.7, 0.1, 0, 0, 0, 0, 0, 3, 0, 0, 0, 1.6,
0, 2.5, 0, 0.5, 0, 1.4, 0.3, 0, 0, 0.1, 0, 12, 0, 0, 4.9, 4.8,
0.2, 0.9, 1.6, 7.8, 0, 0, 0, 0, 0.6, 2.8, 0, 2.2, 0, 0, 2.8,
0, 0.6, 0.3, 0, 9.9, 2.8, 0.8, 0.1), V62 = c(28.8, 19.5, 26,
29.8, 13, 7.1, 22.6, 11, 21.2, 0.1, 31.7, 7.2, 5.3, 18.4, -1.4,
0.9, 3.2, 5, 31.9, 8.7, 7.9, 30.6, 7.9, 17.2, 24.7, 26.1, 22,
29, -6.3, 30.9, 5.7, 11.7, 28.1, 22.9, 12.2, 29.7, 2.7, 5.5,
19.7, 17.8, 24, 28.6, 24.4, 20, 29.1, 13.7, 8.7, 12, 8.8, 10.4,
9.7, 10, 19.6, -0.5, 25.6, 17.9, 14.2, 12, 3.6, 2.9, 5.9, 26.7,
8.7, 20.9, 0.8, 10.5, 14.3, 19.5, -0.3, 28.8, 26.5, 4.9, -0.5,
23.8, -1.3, 12.1, 2.4, 17.2, 22.1, 23.5, 17, -0.9, 19.3, 4.9,
20.1, 12.2, 10.8, 31.6, 26.1, 2.5, 26.7, 7.5, 8.2, 11.8, 22.3,
28.3, 21.4, 25.4, -0.4, 11.4, 27, 9.3, 23.6, 19.9, 23.5, 19.2,
6.7, 18.9, 2.8, 28, 9.6, 15.2, 13.1, 0, 22.7, 5.7, 3, 4.7, 9.9,
21.9, -1.6, 19, 11, 17.2, 12.9, 27.4, 21.5, 14.3, 4.5, 6.1, 23.1,
-0.1, 5.1, 18.7, 3.7, 10.1, 22.6, 16.1, 7.9, 0.9, 30.8, 2.6,
30.3, 25.9, 20.5, 5.2, 26.9, 22.9, 24.8, 19.6, 10.7, 14.9, 21.9,
24.5, 21, 11.3, 1.5, 17.6, -8.8, 5.3, -1.2, 29.1, 22.6, 6.7,
24.6, 22.2, 1.9, 12.8, 19.6, 20.5, 15, 2.9, 27.2, 16.5, -1.4,
17.1, 8.2, 16, 4.2, 6.6, 19.8, -4.8, 21.7, 27.7, 4.3, 0.4, 25.4,
27.2, 28.7, 17.9, 22.7, 8.9, 22.1, 16.3, 5.4, 15.3, 9.9, 30.2,
14.7, 14.2), V73 = c(NA, NA, NA, -0.09275986, NA, NA, 0.52943606,
NA, NA, NA, 0.39573934, NA, NA, 0.06665112, NA, NA, NA, NA, 0.09889552,
NA, NA, 0.52411667, NA, NA, 0.0786277, 0.39117113, NA, 0.30804176,
NA, 0.4984171, NA, NA, 0.69054695, 0.61838979, NA, 0.49298138,
NA, NA, NA, NA, NA, 0.44718356, NA, 0.24114516, 0.00855375, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 0.31341432, NA, NA, NA, NA, NA,
NA, 0.38816502, NA, 0.69810769, NA, NA, NA, 0.46607416, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.39012246, NA, NA, NA, NA,
0.42507386, NA, NA, -0.26830461, NA, NA, 0.29439447, NA, NA,
NA, 0.18582551, -0.00246774, 0.33244636, 0.26097549, NA, NA,
0.56932173, NA, 0.33573443, NA, NA, NA, NA, NA, NA, 0.74612433,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, 0.02980432, NA, NA, NA, NA, NA, NA, 0.60470877,
NA, NA, 0.29230953, NA, -0.11296095, 0.09783287, NA, NA, 0.32181372,
NA, NA, NA, NA, NA, NA, 0.3255947, 0.4099077, NA, NA, NA, NA,
NA, NA, 0.42345733, 0.29293533, NA, 0.52832981, NA, NA, NA, NA,
NA, NA, NA, 0.55373453, NA, NA, NA, NA, NA, NA, NA, 0.4070331,
NA, 0.30780722, 0.59547858, NA, NA, 0.66333634, NA, 0.38209532,
NA, NA, NA, NA, NA, NA, NA, NA, 0.35778449, NA, NA), V77 = c(NA,
NA, 0.45406227, NA, 0.87348132, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, 0.78536916, NA, -0.01870051, NA, NA, NA, NA,
NA, NA, -0.00150528, NA, NA, NA, NA, -0.49992833, NA, NA, NA,
NA, NA, NA, NA, -0.12002325, -0.16249647, NA, 0.51132754, NA,
NA, NA, -0.20643247, 0.59529347, NA, 0.32442411, NA, NA, NA,
NA, NA, NA, NA, 0.80611793, NA, NA, NA, NA, NA, NA, NA, 0.75247001,
0.65079036, NA, NA, 0.29773326, -0.2164507, NA, NA, 0.36336748,
NA, NA, NA, NA, 0.49664945, NA, NA, NA, 0.35610758, NA, NA, NA,
0.3734933, NA, 0.58752714, NA, NA, NA, -0.38266847, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.28871445, NA, 0.05455121,
NA, NA, NA, NA, NA, NA, 0.0408944, NA, NA, NA, NA, NA, NA, 0.87592639,
NA, NA, NA, NA, NA, NA, NA, 0.28923257, NA, NA, NA, -0.16730842,
NA, -0.122933, 0.25704385, NA, NA, NA, NA, NA, NA, NA, 0.92475694,
NA, NA, NA, 0.15886697, 0.51925536, NA, NA, 0.25372613, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.89195925,
NA, NA, NA, -0.60877514, NA, 0.33866615, NA, NA, 0.60955791,
NA, NA, NA, NA, NA, NA, NA, NA, NA, -0.05461735, NA, NA, 0.33697054,
NA, -0.12079077, -0.14805299, -0.24541818, NA, 0.36340054, NA
), V81 = c(NA, NA, -0.08490089, NA, 0.0555794, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, -0.22856711, NA, -0.57790508,
NA, NA, NA, NA, NA, NA, 0.04856018, NA, NA, NA, NA, -0.38039271,
NA, NA, NA, NA, NA, NA, NA, -0.63132241, -0.35266074, NA, 0.01961822,
NA, NA, NA, -0.34551275, -0.39085104, NA, -0.27725445, NA, NA,
NA, NA, NA, NA, NA, -0.21599455, NA, NA, NA, NA, NA, NA, NA,
-0.19924471, -0.18365343, NA, NA, -0.53484587, -0.32543563, NA,
NA, -0.19992419, NA, NA, NA, NA, -0.18500223, NA, NA, NA, -0.12990151,
NA, NA, NA, -0.39083879, NA, -0.59264661, NA, NA, NA, 0.13154274,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -0.23261324,
NA, -0.03944042, NA, NA, NA, NA, NA, NA, -0.22193873, NA, NA,
NA, NA, NA, NA, -0.20022085, NA, NA, NA, NA, NA, NA, NA, 0.08615186,
NA, NA, NA, -0.74607469, NA, 0.23032189, 0.0449706, NA, NA, NA,
NA, NA, NA, NA, -0.04848046, NA, NA, NA, -0.6370161, -0.02900035,
NA, NA, -0.23145663, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, 0.14884929, NA, NA, NA, 0.22450133,
NA, 0.24769837, NA, NA, -0.29667428, NA, NA, NA, NA, NA, NA,
NA, NA, NA, -0.03071992, NA, NA, 0.07786378, NA, 0.23027039,
-0.20214392, -0.3032353, NA, -0.47432158, NA), V89 = c(0.0834995,
0.00066815, NA, NA, NA, NA, NA, NA, 0.02511399, NA, NA, NA, 0.052432,
NA, NA, NA, -0.14814967, NA, NA, NA, NA, NA, -0.33114922, 0.34514567,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.19468406, NA, NA, NA,
-0.38972029, NA, NA, NA, NA, NA, NA, NA, NA, NA, -0.23425484,
NA, -0.11003854, NA, -0.26367322, NA, NA, 0.29238575, 0.07886438,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, -0.15248164, NA, -0.15641155, NA, NA, -0.08752716, NA,
NA, NA, NA, 0.34809891, NA, NA, NA, NA, NA, NA, NA, -0.27401719,
NA, NA, NA, NA, NA, -0.32273288, NA, 0.02669399, NA, 0.0727079,
0.08290143, NA, -0.16476099, NA, NA, NA, NA, -0.1029079, -0.11614262,
NA, NA, -0.14913232, NA, -0.29380582, -0.537503, 0.11869562,
NA, NA, NA, -0.17315201, NA, 0.10272535, 0.0932595, 0.0793467,
-0.0845297, NA, NA, NA, -0.02889606, NA, NA, NA, NA, NA, 0.15552849,
0.04599214, NA, 0.19864881, NA, NA, NA, NA, NA, -0.11474285,
NA, NA, NA, 0.10901186, NA, NA, NA, 0.13339891, NA, 0.07056403,
NA, NA, NA, NA, NA, NA, NA, -0.25760406, 0.2062942, -0.00981489,
0.3282743, 0.06509166, NA, NA, NA, -0.26049214, NA, -0.13281234,
NA, 0.32791015, -0.13518787, NA, NA, NA, NA, NA, NA, NA, NA,
0.05660112, NA, NA, 0.12368526, -0.15672689, NA, -0.42175072,
NA, NA, NA, NA, NA, -0.22635573), V90 = c(-0.04245051, 0.3507695,
NA, NA, NA, NA, NA, NA, 0.32893767, NA, NA, NA, -0.35288827,
NA, NA, NA, -0.02734148, NA, NA, NA, NA, NA, -0.01271804, -0.26617777,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -0.37528838, NA, NA,
NA, 0.14921273, NA, NA, NA, NA, NA, NA, NA, NA, NA, -0.46296948,
NA, -0.20223671, NA, 0.12754582, NA, NA, 0.05006781, 0.22653775,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, -0.26092513, NA, 0.54215354, NA, NA, -0.23136087, NA,
NA, NA, NA, -0.04596987, NA, NA, NA, NA, NA, NA, NA, 0.14239809,
NA, NA, NA, NA, NA, 0.11650203, NA, 0.17058915, NA, -0.18403288,
0.10295627, NA, -0.15530088, NA, NA, NA, NA, -0.45405281, -0.10929859,
NA, NA, 0.14782657, NA, -0.15852471, -0.05266618, -0.18175069,
NA, NA, NA, -0.11917474, NA, 0.16136416, -0.14499177, -0.17504283,
0.13272865, NA, NA, NA, -0.17429991, NA, NA, NA, NA, NA, -0.22030747,
0.29022488, NA, 0.05889091, NA, NA, NA, NA, NA, 0.30446594, NA,
NA, NA, 0.23796595, NA, NA, NA, 0.14051101, NA, -0.05704354,
NA, NA, NA, NA, NA, NA, NA, 0.25256272, -0.14193822, 0.06924969,
0.00445279, 0.29815696, NA, NA, NA, 0.25643083, NA, 0.35649173,
NA, -0.25180143, -0.05787895, NA, NA, NA, NA, NA, NA, NA, NA,
0.03069952, NA, NA, -0.18662018, -0.15144552, NA, 0.06595208,
NA, NA, NA, NA, NA, 0.32091592)), .Names = c("V1", "V2", "V3",
"V6", "V40", "V29", "V56", "V62", "V73", "V77", "V81", "V89",
"V90"), class = "data.frame", row.names = c(NA, -200L))
This error happens because you need at least 3 non NA in each pair of data.
To solve this, you may want to set p-value = NA when you find an error like this. You can use this variation of the function:
cor.mtest <- function(mat) {
mat <- as.matrix(mat)
n <- ncol(mat)
p.mat<- matrix(NA, n, n)
diag(p.mat) <- 0
for (i in 1:(n - 1)) {
for (j in (i + 1):n) {
error <- try(tmp <- cor.test(mat[, i], mat[, j]),
silent =T)
if (class(error) == "try-error") {
p.mat[i, j] <- NA
} else {
p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
}
}
}
colnames(p.mat) <- rownames(p.mat) <- colnames(mat)
p.mat
}
Related
Why does ggplot (.predict) not plot in R /rms package?
Please find My Data of w and w1 at the bottom of this page. I have a Predictor (w$test and w1$test) which is the quantity of positive lymph nodes per total lymph node yield, i.e. ranging between 0 and 1. I have produced two models - each representing two different disease stages. I wish to plot them together but I all I get is this: The plot is produced with this code: library(ggplot2) library(rms) library(ggsci) d <- datadist(w) j <- options(datadist="d") d1 <- datadist(w1) j1 <- options(datadist="d1") model <- cph(Surv(os.neck,mors)~rcs(test),data=w) model1 <- cph(Surv(os.neck,mors)~rcs(test),data=w1) ggplot(Predict(model1, fun=exp)) + scale_x_continuous(limits = c(0,0.80)) out <- bind_rows(fortify(Predict(model, fun=exp)), fortify(Predict(model1, fun=exp)), .id = "model") ggplot(as.data.frame(out), aes(x = test)) + geom_ribbon(aes(fill = model, ymin = lower, ymax = upper), alpha = .05) + geom_line(aes(y = yhat, col = model)) + scale_color_jco(name="", labels = c("A", "B")) + scale_fill_jco(name="", labels = c("A", "B")) + geom_segment(aes(x = 0, y = 1, xend = 0.55, yend = 1), lty="dashed", size=0.1, alpha=0.75) As you can see, the plot is cut around 0.35 on the x-axis. I don't get why and I want the plot to continue as there are several w$test and w1$test values greater than 0.35. Please note that this code is produced from a dput() of 30 samples and not the entire cohort. When I look at View(out), I realize that there is only 400 entities - 200 from each model and model1. It seem that each entity number 200 equal to the test-value-cut-off of 0.35. Please see here: And How can I make the plot complete according to all test-values? My data w and w1 w1 <- structure(list(sex = c(1L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L), mors = c(1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L), os = c(26.01, 138.68, 8.41, 29.63, 10, 19.59, 22.17, 63.52, 21.44, 27.87, 40.81, 64.15, 43.24, 8.14, 17.01, 23.16, 24.38, 25.61, 29.59, 29.9, 44.7, 44.52, 64.65, 93.06, 102.88, 140.79, 157.07, 34.1, 81.15, 133.42, 24.57, 2.35, 3.44, 3.98, 4.8 ), os.beh = c(20.9, 138.68, NA, 20.24, 4.7, 13.01, 16.1, 45.17, 15.56, 20.24, NA, 45.47, 42.32, 2.49, 12.26, 19, 17.02, 18.6, NA, 20.83, 31.28, 39.86, 45.34, 67.02, 96.45, NA, NA, 32.99, 77.73, 131.98, 17.38, 0.79, 0.5, 2.23, 2.33), os.neck = c(18.2, 138.68, 5.42, 19.55, 6.6, 13.01, 16.1, 45.17, 14.29, 20.24, 28.85, 45.47, 42.32, 4.99, 11.73, 16.36, 17.02, 18.6, 20.53, 20.83, 31.28, 31.51, 45.31, 67.02, 73.07, 99.98, 112.03, 32.99, 80.46, 131.98, 17.38, 0.79, 2.04, 2.23, 2.3), rfs.neck = c(11.07, 10.32, 4.44, 17.25, 5.39, 5.49, 7.03, 33.61, 12.71, 5.49, 16.92, 14.52, 13.37, 4.14, 9.36, 11.53, 8.8, 9.59, 16.53, 8.34, 8.28, 18.17, 29.6, 10.32, 7.13, 22.51, 43.93, 24.74, 12.85, 28.94, NA, NA, NA, NA, NA), rfs.neck.tsite = c(11.07, 10.32, NA, NA, NA, NA, 7.03, 33.61, NA, NA, NA, NA, NA, 4.14, 9.36, 11.53, 8.8, 9.59, 16.53, 8.34, 8.28, 18.17, 29.6, 10.32, 7.13, 22.51, 43.93, 24.74, 12.85, 28.94, NA, NA, NA, NA, NA), rfs.neck.nsite = c(11.07, 10.32, 4.44, 17.25, NA, NA, 7.03, 33.61, 12.71, 5.49, 16.92, 14.52, 13.37, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), rfs.neck.msite = c(11.07, 10.32, 4.44, 17.25, 5.39, 5.49, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), recidiv.tsite = c(1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L), recidiv.nsite = c(1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), n.fjernet = c(19L, 7L, 28L, 2L, 15L, 12L, 19L, 17L, 9L, 5L, 6L, 33L, 10L, 27L, 34L, 28L, 14L, NA, 8L, 11L, 14L, 19L, 5L, 38L, 5L, 8L, 10L, 55L, 22L, 8L, 16L, 18L, 6L, 23L, 5L), n.sygdom = c(2L, 0L, 2L, 0L, 9L, 1L, 1L, 1L, 0L, 1L, 0L, 4L, 0L, 4L, 0L, 0L, 0L, NA, 2L, 1L, 0L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 2L, 1L), stadie = c(1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L), test = c(0.105263157894737, 0, 0.0714285714285714, 0, 0.6, 0.0833333333333333, 0.0526315789473684, 0.0588235294117647, 0, 0.2, 0, 0.121212121212121, 0, 0.148148148148148, 0, 0, 0, NA, 0.25, 0.0909090909090909, 0, 0, 0, 0, 0.4, 0, 0, 0, 0, 0, 0, 0, 0.333333333333333, 0.0869565217391304, 0.2)), .Names = c("sex", "mors", "os", "os.beh", "os.neck", "rfs.neck", "rfs.neck.tsite", "rfs.neck.nsite", "rfs.neck.msite", "recidiv.tsite", "recidiv.nsite", "n.fjernet", "n.sygdom", "stadie", "test"), row.names = c(3L, 4L, 5L, 12L, 29L, 40L, 59L, 61L, 69L, 74L, 78L, 82L, 86L, 95L, 101L, 108L, 109L, 113L, 115L, 116L, 120L, 121L, 128L, 130L, 134L, 139L, 141L, 144L, 150L, 153L, 156L, 159L, 164L, 165L, 166L), class = "data.frame") w <- structure(list(sex = c(1L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 1L), mors = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), os = c(16.56, 12.03, 12.08, 18.28, 17.28, 20.86, 23.48, 38.27, 58.63, 96.18, 47.84, 25.7, 27.01, 45.38, 50.04, 70.21, 7.69, 13.26, 15.2, 15.79, 15.74, 15.29, 18.59, 17.24, 22.35, 26.6, 31.85, 31.94, 31.62, 33.52, 34.2, 55.92, 55.92, 67.27, 80.17 ), os.beh = c(NA, 7.28, NA, 11.17, 4.93, 64.33, 15.77, 26.94, 40.77, 69.09, 31.7, 17.05, 15.16, 32.3, 34.46, 49.81, 4.9, 5.47, 8.73, 9.92, 10.05, 10.77, 12.48, 12.52, 14.82, 18.19, 21.45, 27.05, NA, 27.01, 24.28, 40.11, 51.39, 62.11, 76.28 ), os.neck = c(10.97, 8.02, 8.77, 11.66, 12.55, 13.8, 15.77, 26.94, 40.77, 69.06, 46.82, 17.05, 18.76, 32.3, 34.46, 49.81, 4.9, 8.61, 9.92, 9.92, 10.05, 10.51, 12.48, 12.52, 14.82, 15.87, 21.45, 22.14, 22.97, 23.26, 24.28, 40.11, 40.11, 47.08, 52.14), rfs.neck = c(8.21, 6.7, 5.36, 7.72, 3.71, 5.39, 8.61, 18.46, 9.56, 19.29, 12.42, 11.01, 18.14, 26.05, 15.87, 9.46, 3.81, 7.79, 8.34, 8.61, 8.28, 9.79, 6.21, 5.36, 7.49, 9.56, 16.07, 4.63, 13.31, 12.68, 20.67, 21.59, 30.16, 22.21, 0), rfs.neck.tsite = c(8.21, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 11.01, 18.14, 26.05, 15.87, 9.46, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA ), rfs.neck.nsite = c(8.21, 6.7, 5.36, 7.72, 3.71, 5.39, 8.61, 18.46, 9.56, 19.29, 12.42, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), rfs.neck.msite = c(8.21, 6.7, 5.36, 7.72, 3.71, 5.39, 8.61, 18.47, 9.56, 19.29, 12.42, 11.01, 18.14, 26.06, 15.87, 9.46, 3.81, 7.79, 8.35, 8.61, 8.28, 9.79, 6.21, 5.36, 7.49, 9.56, 16.07, 4.63, 13.31, 12.68, 20.67, 21.59, 30.16, 22.21, 0), recidiv.tsite = c(1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), recidiv.nsite = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), n.fjernet = c(15L, 7L, 12L, 57L, 6L, 27L, 18L, 11L, 24L, 9L, 25L, 9L, 13L, 19L, 8L, 10L, 33L, 23L, 10L, 3L, 15L, 15L, 3L, 6L, 16L, 9L, 9L, 13L, 10L, 12L, 20L, 30L, 16L, 16L, NA), n.sygdom = c(2L, 1L, 6L, 6L, 0L, 0L, 9L, 0L, 0L, 0L, 0L, 2L, 3L, 0L, 0L, 0L, 2L, 1L, 0L, 2L, 1L, 4L, 1L, 2L, 4L, 3L, 2L, 0L, 0L, 2L, 0L, 0L, 1L, 0L, NA), stadie = c(4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 3L, 4L, 4L, 3L, 4L, 3L, 3L, 3L, 4L, 3L, 4L, 4L, 4L, 4L, 4L, 3L, 5L, 3L, 3L, 4L, 4L, 4L, 4L), test = c(0.133333333333333, 0.142857142857143, 0.5, 0.105263157894737, 0, 0, 0.5, 0, 0, 0, 0, 0.222222222222222, 0.230769230769231, 0, 0, 0, 0.0606060606060606, 0.0434782608695652, 0, 0.666666666666667, 0.0666666666666667, 0.266666666666667, 0.333333333333333, 0.333333333333333, 0.25, 0.333333333333333, 0.222222222222222, 0, 0, 0.166666666666667, 0, 0, 0.0625, 0, NA)), .Names = c("sex", "mors", "os", "os.beh", "os.neck", "rfs.neck", "rfs.neck.tsite", "rfs.neck.nsite", "rfs.neck.msite", "recidiv.tsite", "recidiv.nsite", "n.fjernet", "n.sygdom", "stadie", "test"), row.names = c(2L, 6L, 7L, 8L, 9L, 10L, 11L, 14L, 15L, 17L, 18L, 22L, 23L, 24L, 25L, 26L, 28L, 31L, 34L, 35L, 36L, 37L, 38L, 39L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L ), class = "data.frame")
Markus is right, and the way to overcome that is to define your own range values: Predict(model1,test=seq(0,0.6,by=0.1)) test yhat lower upper 1 0.0 -0.4295911 -0.754044179 -0.1051379 2 0.1 0.6336235 0.027948982 1.2392981 3 0.2 0.7307858 0.175765821 1.2858057 4 0.3 0.6062680 -0.001284515 1.2138206 5 0.4 0.4817503 -0.453891190 1.4173919 6 0.5 0.3572326 -0.994418951 1.7088842 7 0.6 0.2327149 -1.562760195 2.0281900 So: out <- bind_rows(fortify(Predict(model,test=seq(0,0.6,by=0.01), fun=exp)), fortify(Predict(model1,test=seq(0,0.6,by=0.01), fun=exp)), .id = "model") ggplot(as.data.frame(out), aes(x = test)) + geom_ribbon(aes(fill = model, ymin = lower, ymax = upper), alpha = .05) + geom_line(aes(y = yhat, col = model)) + scale_color_jco(name="", labels = c("A", "B")) + scale_fill_jco(name="", labels = c("A", "B")) + geom_segment(aes(x = 0, y = 1, xend = 0.55, yend = 1), lty="dashed", size=0.1, alpha=0.75) gives
How can I add specific value on x-axis in ggsurvplot/survminer in R?
I want 56 to show on the x-axis, but I can't figure it out. I have the following script. I have tried to add the following to the script xlim = c(seq(0,100, by=10),56) but that does not seem to work. I have tried to google it and I have read on R-documentation. I hope you can help. library(survival) library(survminer) library(ggplot2) fit <- survfit(Surv(p$time.recur.months, p$recurrence) ~ p$simpson.grade, conf.type="log", data=p) j <- ggsurvplot( fit, data = p, fun="cumhaz", risk.table = TRUE, pval = TRUE, pval.coord = c(0, 0.25), conf.int = F, legend.labs=c("Simpson Grade 1" ,"Simpson Grade 2", "Simpson Grade 3", "Simpson Grade 4"), size=c(0.7,0.7,0.7,0.7), xlim = c(0,100), alpha=c(0.7), break.time.by = 10, xlab="Time in months", #ylab="Survival probability", ggtheme = theme_gray(), risk.table.y.text.col = T, risk.table.y.text = TRUE, ylim=c(0,0.5), cumevents=T, palette="Set1" ) # My Data p <- structure(list(recurrence = c(0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, NA, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L), time.recur.months = c(NA, NA, NA, NA, NA, NA, 92L, NA, NA, NA, 74L, NA, NA, NA, 2L, 8L, NA, NA, NA, NA, 58L, NA, NA, NA, NA, NA, 3L, NA, 4L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 39L, NA, NA, NA, NA, 15L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 12L, 56L, 57L, NA, NA, 49L, 17L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 9L, NA, 89L, NA, NA, NA, 8L, 6L, 8L, 4L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 60L, NA, NA, 38L, NA, NA, NA, NA, NA, 90L, NA, 58L, 54L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 53L, NA, NA, 124L, NA, NA, NA, NA, NA, NA, 7L, NA), simpson.grade = c(3L, 1L, 1L, 2L, 4L, 1L, 1L, 1L, 2L, 1L, 4L, 1L, 1L, 2L, 1L, 2L, 1L, 4L, 2L, 3L, 2L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 3L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 4L, 3L, 1L, 1L, 4L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 3L, 4L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 4L, 1L, 1L, 1L, 4L, 1L, 1L, 1L, 2L, 1L, 2L, 4L, 4L, 1L, 4L, 4L, 1L, 2L, 1L, 1L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 2L, 1L, 4L, 1L, 1L, 4L, 4L, 1L, 3L, 1L, 1L, 1L, 3L, 2L, 4L, 4L, 1L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 4L, 1L, 4L, 4L, 1L, 4L, 4L, 1L, 4L, 4L, 3L, 1L, 1L, 1L, 4L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 2L, 2L, 2L, 4L, 1L, 4L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 3L, 1L)), .Names = c("recurrence", "time.recur.months", "simpson.grade"), class = "data.frame", row.names = c(NA, -176L))
j is a ggsurvplot object, which is in turn a list of other objects. You can change the ggplot object at j$plot. The following will add 56 to the x-axis labels: j$plot <- j$plot + scale_x_continuous(breaks = sort(c(seq(0, 100, 10), 56))) j Personally I don't think it's a good look, as I expect evenly spaced breaks along the x-axis to match the tables below. If you want to draw attention to the position 56, I would suggest a vertical line and/or annotated label instead: j$plot <- j$plot + geom_vline(xintercept = 56, linetype = "dashed") + annotate("text", x = 56, y = 0, label = "56", hjust = -0.5) j
Change label name in ggsurvplot
I have attached My Data below. I wish to relabel "Cumulative number of events", which seem to be the default text. I would like it to read: "Cumulative number of recurrences". I can't seem to figure out how to change it - is it even possible to change the text? My graph looks like this: The graph was computed with this library(survival) library(survminer) library(ggplot2) fit <- survfit(Surv(p$time.recur.months, p$recurrence) ~ p$simpson.grade, conf.type="log", data=p) j <- ggsurvplot( fit, data = p, fun="cumhaz", risk.table = TRUE, pval = TRUE, pval.coord = c(0, 0.25), conf.int = F, legend.labs=c("Simpson Grade 1" ,"Simpson Grade 2", "Simpson Grade 3", "Simpson Grade 4"), size=c(0.7,0.7,0.7,0.7), xlim = c(0,100), alpha=c(0.7), break.time.by = 10, xlab="Time in months", #ylab="Survival probability", ggtheme = theme_gray(), risk.table.y.text.col = T, risk.table.y.text = TRUE, ylim=c(0,0.5), cumevents=T, palette="Set1" ) My Data p <- structure(list(recurrence = c(0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, NA, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L), time.recur.months = c(NA, NA, NA, NA, NA, NA, 92L, NA, NA, NA, 74L, NA, NA, NA, 2L, 8L, NA, NA, NA, NA, 58L, NA, NA, NA, NA, NA, 3L, NA, 4L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 39L, NA, NA, NA, NA, 15L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 12L, 56L, 57L, NA, NA, 49L, 17L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 9L, NA, 89L, NA, NA, NA, 8L, 6L, 8L, 4L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 60L, NA, NA, 38L, NA, NA, NA, NA, NA, 90L, NA, 58L, 54L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 53L, NA, NA, 124L, NA, NA, NA, NA, NA, NA, 7L, NA), simpson.grade = c(3L, 1L, 1L, 2L, 4L, 1L, 1L, 1L, 2L, 1L, 4L, 1L, 1L, 2L, 1L, 2L, 1L, 4L, 2L, 3L, 2L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 3L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 4L, 3L, 1L, 1L, 4L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 3L, 4L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 4L, 1L, 1L, 1L, 4L, 1L, 1L, 1L, 2L, 1L, 2L, 4L, 4L, 1L, 4L, 4L, 1L, 2L, 1L, 1L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 2L, 1L, 4L, 1L, 1L, 4L, 4L, 1L, 3L, 1L, 1L, 1L, 3L, 2L, 4L, 4L, 1L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 4L, 1L, 4L, 4L, 1L, 4L, 4L, 1L, 4L, 4L, 3L, 1L, 1L, 1L, 4L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 2L, 2L, 2L, 4L, 1L, 4L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 3L, 1L)), .Names = c("recurrence", "time.recur.months", "simpson.grade"), class = "data.frame", row.names = c(NA, -176L))
To change title for cumevents table you need to use argument cumevents.title. ggsurvplot(fit, p, fun = "cumhaz", risk.table = TRUE, cumevents = TRUE, pval = TRUE, pval.coord = c(0, 0.25), conf.int = FALSE, legend.labs = paste("Simpson Grade", 1:4), xlab = "Time in months", cumevents.title = "Cumulative number of recurrences", size = rep(0.7, 4), xlim = c(0, 100), ylim = c(0, 0.5), alpha = 0.7, break.time.by = 10, ggtheme = theme_gray(), risk.table.y.text.col = TRUE, risk.table.y.text = TRUE, palette = "Set1")
R Normalizing a dataset in a specific way
I have a dataset which contains 'hits' at each position in a genome. I want to normalize it in a very specific way: When the column df$HC contains the value 'HC', Take the value from df$pos which contains the position in bp, Sum up df$Hits +/-1000bp away from the one in question e.g. if df$pos = 3000, add up hits where df$pos>=2000 and <=4000, Divide every df$Hits value for those 2000 positions by the total worked out in step 3. So, each 2000bp patch around each instance of 'HC' (most values in the HC column are NA and don't need to be normalized), has each hit divided by the total number of hits in that patch. I guess I might be able to do this by subsetting each block of 2000bp around each 'HC' and processing them seperately, but there are ~3000 'HC' positions. Edit: Due to regions where 'HC+/-1000bp' regions overlap, I think now that I need to extract and process each region seperately, so regions of overlap would be repeated in each subset. Thanks for any help with this, it's so confusing I have a headache! dput sample dataframe (due to the character limit it only contains 1000 lines, so try a smaller window than 2000bp): structure(list(chr = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "chr1", class = "factor"), pos = 1:1000, Hits = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 11L, 2L, 0L, 0L, 2L, 0L, 8L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 64L, 1L, 0L, 2L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 3L, 7L, 0L, 0L, 2L, 0L, 0L, 0L, 3L, 0L, 0L, 1L, 0L, 1L, 40L, 2L, 0L, 29L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 7L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 0L, 0L, 0L, 5L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), HC = structure(cabel = "HC", class = "factor")), .Names = c("chr", "pos", "Hits", "HC"), class = "data.frame", row.names = c(NA, -1000L)) A smaller sample dataset and expected output: pos <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10) Hits <- c(0, 1, 1, 2, 2, 3, 2, 2, 1, 1) HC <- c(NA, NA, NA, NA, NA, 'HC', NA, NA, NA, NA) df <- data.frame(pos, Hits, HC) #total hits in a +/-3bp window around HC = 13 #divide each read in the window by 13: Hits <- c(0, 1, 0.077, 0.154, 0.154, 0.231, 0.154, 0.154, 0.077, 1)
Okay, this should cover at least the simplified problem: n <- 3 len <- length(df[['Hits']]) for(i in which(df[['HC']] %in% 'HC')){ ran <- max(i-n,1):min(i+n,len) reg <- df[['Hits']][ran] s <- sum(reg) reg <- reg / s df[['Hits']] <- replace(df[['Hits']],ran,reg) } fiddle
Select only rows if its value in a particular column is 'NA' in R
I'm trying to create a subset of data that contains only the rows with missing data in one of my columns. The data: data<-structure(list(ID = c(1, 2, 3, 4, 7, 9, 10, 12, 13, 14, 15, 16, 17, 18, 20, 21, 22, 23, 24, 25, 27, 28, 29, 31, 34, 37, 38, 39, 40, 41), QnSinV1 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), QnSinV2 = c(1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), QnSinV3 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), QnSize = c(0.032140423, 0.017620319, NA, -0.093448167, -0.051090375, 0.001188913, NA, -0.144868599, -0.000260992, 0.008502255, -0.00346349, 0.017208373, 0.004301855, 0.004420431, -0.007564124, NA, 0.174388101, -0.142412328, 0.064935852, -0.052174354, NA, 0.005180317, 0.05728222, 0.041215822, -0.002449455, -0.040942923, -0.082284946, -0.173656321, 0.022723036, -0.061326436 ), QnWt = c(15.8, 16.5, 11.9, 13.7, 15, 15.3, 13.7, 15.8, 16.3, 15.9, 15.1, 14.5, 14.4, 15.7, 14.4, 13.3, 14.8, 15.1, 15.1, 14.7, 15.8, 17.8, 16.4, 13.4, 15.1, 14.8, 14.2, 12.7, 17.9, 16.2), QnWtLsCL = c(NA, 0.503030303, 0.596638655, NA, 0.446666667, 0.509803922, 0.408759124, 0.462025316, 0.552147239, 0.509433962, 0.456953642, 0.455172414, 0.506944444, NA, 0.486111111, 0.473684211, 0.513513514, 0.516556291, 0.582781457, 0.537414966, 0.474683544, 0.43258427, 0.432926829, NA, 0.569536424, 0.445945946, 0.485915493, 0.543307087, NA, 0.543209877), ClaustPer = c(NA, 1L, 2L, NA, 3L, 0L, 2L, 0L, 1L, 0L, 0L, 0L, 1L, NA, 0L, 7L, 1L, 0L, 1L, 0L, 1L, 2L, 2L, NA, 2L, 3L, 2L, 2L, NA, 0L), QnSurvCL = c(0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L), ColWtCL = c(NA, 11.7, 7.3, NA, 9.1, 11.1, 9.6, 11.2, 9, 11.2, 12, 11, 10.9, NA, 9.9, 8.6, 10.8, 10.9, 8.7, 10.8, 11.6, 13.7, 10.8, NA, 9.3, 9.6, 9.8, 8.7, NA, 11.1), ColWtCL_6 = c(NA, 57.1, 45, NA, 73.6, NA, NA, NA, 43.8, NA, NA, 71.1, NA, NA, 53.7, NA, 84.4, NA, NA, NA, 56, 56.1, NA, NA, 59.4, NA, 45.7, NA, NA, NA), ColGrowthCL_6 = c(NA, 4.88034188, 6.164383562, NA, 8.087912088, NA, NA, NA, 4.866666667, NA, NA, 6.463636364, NA, NA, 5.424242424, NA, 7.814814815, NA, NA, NA, 4.827586207, 4.094890511, NA, NA, 6.387096774, NA, 4.663265306, NA, NA, NA), QnSurvCL_6 = c(NA, 1L, NA, NA, 1L, NA, NA, NA, 1L, NA, NA, 1L, NA, NA, 1L, 0L, 1L, NA, NA, NA, 1L, 1L, NA, NA, 1L, NA, 1L, NA, NA, NA), IR = c(-0.1919695, 0.0214441, NA, 0.0886954, 0.4221713, 0.0869788, 0.2716466, 0.0289674, -0.0291414, -0.1739616, -0.0215773, -0.1473209, 0.0370336, 0.254584, 0.0332632, -0.0203844, 0.1524175, -0.051451, -0.0612144, 0.1617955, 0.0354173, 0.0904954, 0.3344705, 0.0990583, 0.1985931, 0.0419539, -0.0159598, 0.1159526, -0.0057495, -0.1811458), SH = c(1.2064, 1.1093, NA, 0.922, 0.643, 0.9284, 0.7225, 0.9866, 1.0804, 1.2226, 1.0315, 1.1953, 1.007, 0.6991, 1.0264, 1.0265, 0.8865, 1.1184, 1.094, 0.829, 1.0142, 0.9824, 0.6793, 0.9188, 0.7853, 1.0352, 1.0648, 0.9654, 1.0366, 1.2044), HL = c(0.3774, 0.4349, NA, 0.5091, 0.6187, 0.5168, 0.6405, 0.4691, 0.4555, 0.3444, 0.4908, 0.3819, 0.4846, 0.6256, 0.4638, 0.4778, 0.5219, 0.433, 0.447, 0.564, 0.4899, 0.4612, 0.6542, 0.5162, 0.5549, 0.4928, 0.4471, 0.4959, 0.4523, 0.3511), MLH = c(0.534090909090909, 0.5, NA, 0.40506329113924, 0.298507462686567, 0.410958904109589, 0.293103448275862, 0.442105263157895, 0.48, 0.554347826086957, 0.453488372093023, 0.535353535353535, 0.443298969072165, 0.304878048780488, 0.457446808510638, 0.455555555555556, 0.397849462365591, 0.494252873563218, 0.48314606741573, 0.377777777777778, 0.457446808510638, 0.445652173913043, 0.3, 0.412371134020619, 0.354838709677419, 0.464646464646465, 0.474226804123711, 0.43010752688172, 0.46078431372549, 0.541666666666667)), .Names = c("ID", "QnSinV1", "QnSinV2", "QnSinV3", "QnSize", "QnWt", "QnWtLsCL", "ClaustPer", "QnSurvCL", "ColWtCL", "ColWtCL_6", "ColGrowthCL_6", "QnSurvCL_6", "IR", "SH", "HL", "MLH"), row.names = c(1L, 2L, 3L, 4L, 7L, 9L, 10L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 20L, 21L, 22L, 23L, 24L, 25L, 27L, 28L, 29L, 31L, 34L, 37L, 38L, 39L, 40L, 41L), class = "data.frame") My guess (which doesn't work): test<-subset(data, data$ColWtCL_6=='NA') test
You can do it also without subset(). To select NA values you should use function is.na(). data[is.na(data$ColWtCL_6),] Or with subset() subset(data,is.na(ColWtCL_6))
A tidyverse approach (package dplyr): test <- data %>% filter(is.na(ColWtCL_6)) If you want to filter based on NAs in multiple columns, please consider using function filter_at() in combinations with a valid function to select the columns to apply the filtering condition and the filtering condition itself. Example 1: select rows of data with NA in all columns starting with Col: test <- data %>% filter_at(vars(starts_with("Col")), all_vars(is.na(.))) Example 2: select rows of data with NA in one of the columns starting with Col: test <- data %>% filter_at(vars(starts_with("Col")), any_vars(is.na(.))) This link from tidyverse documentation is very inspiring: https://dplyr.tidyverse.org/reference/filter_all.html
Here's another solution to find ǸA's across all columns in a dataframe using dplyr: library(dplyr) # get column names colnms <- colnames(df) # filter df %>% filter_at(vars(all_of(colnms)), any_vars(is.na(.)))