Why does ggplot (.predict) not plot in R /rms package? - r

Please find My Data of w and w1 at the bottom of this page. I have a Predictor (w$test and w1$test) which is the quantity of positive lymph nodes per total lymph node yield, i.e. ranging between 0 and 1.
I have produced two models - each representing two different disease stages. I wish to plot them together but I all I get is this:
The plot is produced with this code:
library(ggplot2)
library(rms)
library(ggsci)
d <- datadist(w)
j <- options(datadist="d")
d1 <- datadist(w1)
j1 <- options(datadist="d1")
model <- cph(Surv(os.neck,mors)~rcs(test),data=w)
model1 <- cph(Surv(os.neck,mors)~rcs(test),data=w1)
ggplot(Predict(model1, fun=exp)) + scale_x_continuous(limits = c(0,0.80))
out <- bind_rows(fortify(Predict(model, fun=exp)), fortify(Predict(model1, fun=exp)), .id = "model")
ggplot(as.data.frame(out), aes(x = test)) +
geom_ribbon(aes(fill = model, ymin = lower, ymax = upper), alpha = .05) +
geom_line(aes(y = yhat, col = model)) +
scale_color_jco(name="", labels = c("A", "B")) +
scale_fill_jco(name="", labels = c("A", "B")) +
geom_segment(aes(x = 0, y = 1, xend = 0.55, yend = 1), lty="dashed", size=0.1, alpha=0.75)
As you can see, the plot is cut around 0.35 on the x-axis. I don't get why and I want the plot to continue as there are several w$test and w1$test values greater than 0.35. Please note that this code is produced from a dput() of 30 samples and not the entire cohort.
When I look at View(out), I realize that there is only 400 entities - 200 from each model and model1. It seem that each entity number 200 equal to the test-value-cut-off of 0.35. Please see here:
And
How can I make the plot complete according to all test-values?
My data w and w1
w1 <- structure(list(sex = c(1L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 1L,
0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L,
1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L), mors = c(1L, 0L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L),
os = c(26.01, 138.68, 8.41, 29.63, 10, 19.59, 22.17, 63.52,
21.44, 27.87, 40.81, 64.15, 43.24, 8.14, 17.01, 23.16, 24.38,
25.61, 29.59, 29.9, 44.7, 44.52, 64.65, 93.06, 102.88, 140.79,
157.07, 34.1, 81.15, 133.42, 24.57, 2.35, 3.44, 3.98, 4.8
), os.beh = c(20.9, 138.68, NA, 20.24, 4.7, 13.01, 16.1,
45.17, 15.56, 20.24, NA, 45.47, 42.32, 2.49, 12.26, 19, 17.02,
18.6, NA, 20.83, 31.28, 39.86, 45.34, 67.02, 96.45, NA, NA,
32.99, 77.73, 131.98, 17.38, 0.79, 0.5, 2.23, 2.33), os.neck = c(18.2,
138.68, 5.42, 19.55, 6.6, 13.01, 16.1, 45.17, 14.29, 20.24,
28.85, 45.47, 42.32, 4.99, 11.73, 16.36, 17.02, 18.6, 20.53,
20.83, 31.28, 31.51, 45.31, 67.02, 73.07, 99.98, 112.03,
32.99, 80.46, 131.98, 17.38, 0.79, 2.04, 2.23, 2.3), rfs.neck = c(11.07,
10.32, 4.44, 17.25, 5.39, 5.49, 7.03, 33.61, 12.71, 5.49,
16.92, 14.52, 13.37, 4.14, 9.36, 11.53, 8.8, 9.59, 16.53,
8.34, 8.28, 18.17, 29.6, 10.32, 7.13, 22.51, 43.93, 24.74,
12.85, 28.94, NA, NA, NA, NA, NA), rfs.neck.tsite = c(11.07,
10.32, NA, NA, NA, NA, 7.03, 33.61, NA, NA, NA, NA, NA, 4.14,
9.36, 11.53, 8.8, 9.59, 16.53, 8.34, 8.28, 18.17, 29.6, 10.32,
7.13, 22.51, 43.93, 24.74, 12.85, 28.94, NA, NA, NA, NA,
NA), rfs.neck.nsite = c(11.07, 10.32, 4.44, 17.25, NA, NA,
7.03, 33.61, 12.71, 5.49, 16.92, 14.52, 13.37, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA), rfs.neck.msite = c(11.07, 10.32, 4.44, 17.25,
5.39, 5.49, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA), recidiv.tsite = c(1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L,
0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L), recidiv.nsite = c(1L,
1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), n.fjernet = c(19L, 7L, 28L, 2L, 15L, 12L,
19L, 17L, 9L, 5L, 6L, 33L, 10L, 27L, 34L, 28L, 14L, NA, 8L,
11L, 14L, 19L, 5L, 38L, 5L, 8L, 10L, 55L, 22L, 8L, 16L, 18L,
6L, 23L, 5L), n.sygdom = c(2L, 0L, 2L, 0L, 9L, 1L, 1L, 1L,
0L, 1L, 0L, 4L, 0L, 4L, 0L, 0L, 0L, NA, 2L, 1L, 0L, 0L, 0L,
0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 2L, 1L), stadie = c(1L,
2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L,
2L, 2L, 1L, 1L), test = c(0.105263157894737, 0, 0.0714285714285714,
0, 0.6, 0.0833333333333333, 0.0526315789473684, 0.0588235294117647,
0, 0.2, 0, 0.121212121212121, 0, 0.148148148148148, 0, 0,
0, NA, 0.25, 0.0909090909090909, 0, 0, 0, 0, 0.4, 0, 0, 0,
0, 0, 0, 0, 0.333333333333333, 0.0869565217391304, 0.2)), .Names = c("sex",
"mors", "os", "os.beh", "os.neck", "rfs.neck", "rfs.neck.tsite",
"rfs.neck.nsite", "rfs.neck.msite", "recidiv.tsite", "recidiv.nsite",
"n.fjernet", "n.sygdom", "stadie", "test"), row.names = c(3L,
4L, 5L, 12L, 29L, 40L, 59L, 61L, 69L, 74L, 78L, 82L, 86L, 95L,
101L, 108L, 109L, 113L, 115L, 116L, 120L, 121L, 128L, 130L, 134L,
139L, 141L, 144L, 150L, 153L, 156L, 159L, 164L, 165L, 166L), class = "data.frame")
w <- structure(list(sex = c(1L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 1L,
0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L,
0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 1L), mors = c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
os = c(16.56, 12.03, 12.08, 18.28, 17.28, 20.86, 23.48, 38.27,
58.63, 96.18, 47.84, 25.7, 27.01, 45.38, 50.04, 70.21, 7.69,
13.26, 15.2, 15.79, 15.74, 15.29, 18.59, 17.24, 22.35, 26.6,
31.85, 31.94, 31.62, 33.52, 34.2, 55.92, 55.92, 67.27, 80.17
), os.beh = c(NA, 7.28, NA, 11.17, 4.93, 64.33, 15.77, 26.94,
40.77, 69.09, 31.7, 17.05, 15.16, 32.3, 34.46, 49.81, 4.9,
5.47, 8.73, 9.92, 10.05, 10.77, 12.48, 12.52, 14.82, 18.19,
21.45, 27.05, NA, 27.01, 24.28, 40.11, 51.39, 62.11, 76.28
), os.neck = c(10.97, 8.02, 8.77, 11.66, 12.55, 13.8, 15.77,
26.94, 40.77, 69.06, 46.82, 17.05, 18.76, 32.3, 34.46, 49.81,
4.9, 8.61, 9.92, 9.92, 10.05, 10.51, 12.48, 12.52, 14.82,
15.87, 21.45, 22.14, 22.97, 23.26, 24.28, 40.11, 40.11, 47.08,
52.14), rfs.neck = c(8.21, 6.7, 5.36, 7.72, 3.71, 5.39, 8.61,
18.46, 9.56, 19.29, 12.42, 11.01, 18.14, 26.05, 15.87, 9.46,
3.81, 7.79, 8.34, 8.61, 8.28, 9.79, 6.21, 5.36, 7.49, 9.56,
16.07, 4.63, 13.31, 12.68, 20.67, 21.59, 30.16, 22.21, 0),
rfs.neck.tsite = c(8.21, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 11.01, 18.14, 26.05, 15.87, 9.46, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
), rfs.neck.nsite = c(8.21, 6.7, 5.36, 7.72, 3.71, 5.39,
8.61, 18.46, 9.56, 19.29, 12.42, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA), rfs.neck.msite = c(8.21, 6.7, 5.36, 7.72, 3.71,
5.39, 8.61, 18.47, 9.56, 19.29, 12.42, 11.01, 18.14, 26.06,
15.87, 9.46, 3.81, 7.79, 8.35, 8.61, 8.28, 9.79, 6.21, 5.36,
7.49, 9.56, 16.07, 4.63, 13.31, 12.68, 20.67, 21.59, 30.16,
22.21, 0), recidiv.tsite = c(1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), recidiv.nsite = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), n.fjernet = c(15L, 7L, 12L, 57L, 6L, 27L,
18L, 11L, 24L, 9L, 25L, 9L, 13L, 19L, 8L, 10L, 33L, 23L,
10L, 3L, 15L, 15L, 3L, 6L, 16L, 9L, 9L, 13L, 10L, 12L, 20L,
30L, 16L, 16L, NA), n.sygdom = c(2L, 1L, 6L, 6L, 0L, 0L,
9L, 0L, 0L, 0L, 0L, 2L, 3L, 0L, 0L, 0L, 2L, 1L, 0L, 2L, 1L,
4L, 1L, 2L, 4L, 3L, 2L, 0L, 0L, 2L, 0L, 0L, 1L, 0L, NA),
stadie = c(4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L,
3L, 4L, 4L, 3L, 4L, 3L, 3L, 3L, 4L, 3L, 4L, 4L, 4L, 4L, 4L,
3L, 5L, 3L, 3L, 4L, 4L, 4L, 4L), test = c(0.133333333333333,
0.142857142857143, 0.5, 0.105263157894737, 0, 0, 0.5, 0,
0, 0, 0, 0.222222222222222, 0.230769230769231, 0, 0, 0, 0.0606060606060606,
0.0434782608695652, 0, 0.666666666666667, 0.0666666666666667,
0.266666666666667, 0.333333333333333, 0.333333333333333,
0.25, 0.333333333333333, 0.222222222222222, 0, 0, 0.166666666666667,
0, 0, 0.0625, 0, NA)), .Names = c("sex", "mors", "os", "os.beh",
"os.neck", "rfs.neck", "rfs.neck.tsite", "rfs.neck.nsite", "rfs.neck.msite",
"recidiv.tsite", "recidiv.nsite", "n.fjernet", "n.sygdom", "stadie",
"test"), row.names = c(2L, 6L, 7L, 8L, 9L, 10L, 11L, 14L, 15L,
17L, 18L, 22L, 23L, 24L, 25L, 26L, 28L, 31L, 34L, 35L, 36L, 37L,
38L, 39L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L
), class = "data.frame")

Markus is right, and the way to overcome that is to define your own range values:
Predict(model1,test=seq(0,0.6,by=0.1))
test yhat lower upper
1 0.0 -0.4295911 -0.754044179 -0.1051379
2 0.1 0.6336235 0.027948982 1.2392981
3 0.2 0.7307858 0.175765821 1.2858057
4 0.3 0.6062680 -0.001284515 1.2138206
5 0.4 0.4817503 -0.453891190 1.4173919
6 0.5 0.3572326 -0.994418951 1.7088842
7 0.6 0.2327149 -1.562760195 2.0281900
So:
out <- bind_rows(fortify(Predict(model,test=seq(0,0.6,by=0.01), fun=exp)), fortify(Predict(model1,test=seq(0,0.6,by=0.01), fun=exp)), .id = "model")
ggplot(as.data.frame(out), aes(x = test)) +
geom_ribbon(aes(fill = model, ymin = lower, ymax = upper), alpha = .05) +
geom_line(aes(y = yhat, col = model)) +
scale_color_jco(name="", labels = c("A", "B")) +
scale_fill_jco(name="", labels = c("A", "B")) +
geom_segment(aes(x = 0, y = 1, xend = 0.55, yend = 1), lty="dashed", size=0.1, alpha=0.75)
gives

Related

R Creating a p-value matrix with missing values

I have a dataframe with many missing NAs. I want to create a correlation matrix with a p-value matrix as shown in this link: Link
I created the correlation matrix like this:
as.data.frame(round(cor(df, use = "pairwise.complete.obs", method = c("spearman")), 1))
Now I am trying to create a matrix that shows the p-values for each correlation. I have used this code successfully for other dataframes, which include less NAs.
cor.mtest <- function(mat) {
mat <- as.matrix(mat)
n <- ncol(mat)
p.mat<- matrix(NA, n, n)
diag(p.mat) <- 0
for (i in 1:(n - 1)) {
for (j in (i + 1):n) {
tmp <- cor.test(mat[, i], mat[, j])
p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
}
}
colnames(p.mat) <- rownames(p.mat) <- colnames(mat)
p.mat
}
p.mat <- cor.mtest(df)
But now I am getting an error:
Error in cor.test.default(mat[, i], mat[, j]) : not enough finite
observations
I also tried to use the "Hmisc" package for the rcorr-function. But the package does not load correctly. Any idea how to solve this?
structure(list(V1 = c(21L, 18L, 11L, 20L, 17L, 18L, 20L, 23L,
10L, 25L, 11L, 24L, 13L, 17L, 30L, 12L, 24L, 27L, 19L, 24L, 14L,
14L, 10L, 21L, 12L, 14L, 19L, 19L, 16L, 15L, 25L, 15L, 20L, 18L,
21L, 9L, 18L, 10L, 21L, 17L, 15L, 6L, 21L, 27L, 16L, 15L, 20L,
12L, 20L, 11L, 17L, 14L, 22L, 14L, 18L, 17L, 19L, 18L, 16L, 13L,
11L, 19L, 14L, 9L, 13L, 13L, 8L, 7L, 29L, 14L, 16L, 13L, 8L,
28L, 12L, 33L, 20L, 13L, 12L, 14L, 16L, 15L, 23L, 19L, 20L, 23L,
21L, 14L, 12L, 30L, 11L, 12L, 14L, 13L, 15L, 13L, 6L, 15L, 19L,
15L, 18L, 23L, 19L, 11L, 18L, 9L, 18L, 17L, 15L, 8L, 13L, 8L,
20L, 17L, 25L, 11L, 25L, 19L, 13L, 15L, 15L, 15L, 12L, 16L, 20L,
13L, 24L, 12L, 23L, 21L, 15L, 18L, 14L, 20L, 21L, 20L, 19L, 21L,
11L, 24L, 12L, 15L, 16L, 26L, 8L, 19L, 19L, 12L, 13L, 20L, 23L,
11L, 17L, 17L, 11L, 19L, 17L, 15L, 14L, 13L, 14L, 20L, 22L, 21L,
17L, 17L, 16L, 14L, 11L, 7L, 21L, 15L, 15L, 17L, 11L, 15L, 18L,
13L, 23L, 16L, 16L, 23L, 12L, 16L, 15L, 8L, 19L, 14L, 18L, 13L,
17L, 16L, 25L, 14L, 22L, 14L, 14L, 18L, 9L, 11L), V2 = c(1L,
0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 0L,
3L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L,
0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 2L, 0L, 0L, 1L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L,
0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 2L, 0L,
2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 2L, 0L, 1L,
0L, 1L, 1L, 1L, 1L, 0L, 0L, 3L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L,
1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L,
0L, 2L, 0L, 0L, 0L, 1L, 0L, 2L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 1L, 0L, 0L, 2L, 1L, 1L, 2L, 1L, 0L, 0L, 0L, 1L, 0L,
0L, 0L, 3L, 0L, 0L, 1L, 0L), V3 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 2L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L,
0L), V6 = c(5L, 2L, 0L, 3L, 3L, 1L, 2L, 5L, 0L, 3L, 0L, 3L, 4L,
0L, 7L, 3L, 6L, 2L, 1L, 6L, 0L, 0L, 3L, 1L, 0L, 1L, 1L, 0L, 1L,
2L, 4L, 1L, 5L, 3L, 0L, 3L, 0L, 0L, 2L, 3L, 0L, 1L, 6L, 3L, 1L,
0L, 1L, 1L, 2L, 1L, 1L, 2L, 3L, 3L, 3L, 0L, 2L, 5L, 2L, 1L, 2L,
2L, 0L, 1L, 0L, 2L, 0L, 1L, 4L, 3L, 2L, 3L, 1L, 2L, 2L, 4L, 1L,
0L, 0L, 6L, 1L, 3L, 4L, 1L, 2L, 1L, 3L, 3L, 0L, 4L, 1L, 0L, 0L,
2L, 1L, 1L, 0L, 2L, 1L, 2L, 4L, 2L, 2L, 1L, 1L, 2L, 5L, 5L, 2L,
2L, 2L, 1L, 1L, 3L, 5L, 1L, 2L, 5L, 3L, 4L, 0L, 1L, 2L, 1L, 5L,
4L, 2L, 3L, 3L, 3L, 0L, 3L, 0L, 2L, 1L, 3L, 1L, 4L, 3L, 2L, 0L,
3L, 1L, 1L, 1L, 2L, 3L, 1L, 3L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L,
1L, 3L, 1L, 2L, 1L, 8L, 2L, 4L, 5L, 2L, 3L, 2L, 1L, 4L, 2L, 1L,
0L, 1L, 1L, 4L, 2L, 6L, 4L, 2L, 2L, 1L, 0L, 1L, 0L, 5L, 3L, 2L,
1L, 2L, 2L, 0L, 2L, 4L, 2L, 2L, 1L, 0L, 1L), V40 = c(0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L), V29 = c(1L, 0L, 0L, 0L, 2L, 0L, 2L,
1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 1L, 0L, 1L, 1L, 0L, 2L, 1L, 0L, 0L, 1L, 0L, 2L, 0L, 2L,
1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 2L, 0L, 0L, 1L, 0L, 2L, 0L,
0L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 2L, 2L, 1L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 2L, 0L, 0L, 2L, 0L,
1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 2L, 0L, 0L, 1L, 1L, 2L, 1L, 0L,
1L, 0L, 0L, 0L, 3L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), V56 = c(0.2, 0, 0, 8.5, 3.1, 0.1, 4.5, 26.6, 1, 0, 0, 1.5,
3.7, 0, 0, 0.3, 10.8, 0.5, 0, 2.7, 0, 0, 8.8, 0, 0, 0, 0.4, 0,
0, 0, 0, 16.4, 4.2, 3.9, 3.5, 3.1, 0, 9, 16, 0, 0, 6, 0, 7.9,
0, 3.2, 0.9, 0, 4.2, 0, 1.2, 0, 0, 1.1, 0, 0, 0.2, 0, 0, 0, 0,
13.1, 0, 0.3, 0.1, 0.6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.1, 0.1,
0, 0, 0, 0, 3.6, 2, 30.3, 0, 0, 0, 0, 0.3, 0, 4.2, 0, 2.6, 0,
4.8, 0, 0, 0, 2.2, 0.5, 0, 0, 0, 0, 0, 2.9, 0, 2.9, 0.4, 2.4,
0, 0, 11.5, 6.3, 0, 0, 0.2, 16.3, 0, 0, 0.2, 0, 5, 0, 0, 0, 0,
0.7, 4.8, 0, 1.8, 0.1, 0, 0, 0, 0, 0, 0, 0, 0.4, 1.4, 1.2, 0,
0, 1.4, 0, 1.1, 0, 1.7, 0.1, 0, 0, 0, 0, 0, 3, 0, 0, 0, 1.6,
0, 2.5, 0, 0.5, 0, 1.4, 0.3, 0, 0, 0.1, 0, 12, 0, 0, 4.9, 4.8,
0.2, 0.9, 1.6, 7.8, 0, 0, 0, 0, 0.6, 2.8, 0, 2.2, 0, 0, 2.8,
0, 0.6, 0.3, 0, 9.9, 2.8, 0.8, 0.1), V62 = c(28.8, 19.5, 26,
29.8, 13, 7.1, 22.6, 11, 21.2, 0.1, 31.7, 7.2, 5.3, 18.4, -1.4,
0.9, 3.2, 5, 31.9, 8.7, 7.9, 30.6, 7.9, 17.2, 24.7, 26.1, 22,
29, -6.3, 30.9, 5.7, 11.7, 28.1, 22.9, 12.2, 29.7, 2.7, 5.5,
19.7, 17.8, 24, 28.6, 24.4, 20, 29.1, 13.7, 8.7, 12, 8.8, 10.4,
9.7, 10, 19.6, -0.5, 25.6, 17.9, 14.2, 12, 3.6, 2.9, 5.9, 26.7,
8.7, 20.9, 0.8, 10.5, 14.3, 19.5, -0.3, 28.8, 26.5, 4.9, -0.5,
23.8, -1.3, 12.1, 2.4, 17.2, 22.1, 23.5, 17, -0.9, 19.3, 4.9,
20.1, 12.2, 10.8, 31.6, 26.1, 2.5, 26.7, 7.5, 8.2, 11.8, 22.3,
28.3, 21.4, 25.4, -0.4, 11.4, 27, 9.3, 23.6, 19.9, 23.5, 19.2,
6.7, 18.9, 2.8, 28, 9.6, 15.2, 13.1, 0, 22.7, 5.7, 3, 4.7, 9.9,
21.9, -1.6, 19, 11, 17.2, 12.9, 27.4, 21.5, 14.3, 4.5, 6.1, 23.1,
-0.1, 5.1, 18.7, 3.7, 10.1, 22.6, 16.1, 7.9, 0.9, 30.8, 2.6,
30.3, 25.9, 20.5, 5.2, 26.9, 22.9, 24.8, 19.6, 10.7, 14.9, 21.9,
24.5, 21, 11.3, 1.5, 17.6, -8.8, 5.3, -1.2, 29.1, 22.6, 6.7,
24.6, 22.2, 1.9, 12.8, 19.6, 20.5, 15, 2.9, 27.2, 16.5, -1.4,
17.1, 8.2, 16, 4.2, 6.6, 19.8, -4.8, 21.7, 27.7, 4.3, 0.4, 25.4,
27.2, 28.7, 17.9, 22.7, 8.9, 22.1, 16.3, 5.4, 15.3, 9.9, 30.2,
14.7, 14.2), V73 = c(NA, NA, NA, -0.09275986, NA, NA, 0.52943606,
NA, NA, NA, 0.39573934, NA, NA, 0.06665112, NA, NA, NA, NA, 0.09889552,
NA, NA, 0.52411667, NA, NA, 0.0786277, 0.39117113, NA, 0.30804176,
NA, 0.4984171, NA, NA, 0.69054695, 0.61838979, NA, 0.49298138,
NA, NA, NA, NA, NA, 0.44718356, NA, 0.24114516, 0.00855375, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 0.31341432, NA, NA, NA, NA, NA,
NA, 0.38816502, NA, 0.69810769, NA, NA, NA, 0.46607416, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.39012246, NA, NA, NA, NA,
0.42507386, NA, NA, -0.26830461, NA, NA, 0.29439447, NA, NA,
NA, 0.18582551, -0.00246774, 0.33244636, 0.26097549, NA, NA,
0.56932173, NA, 0.33573443, NA, NA, NA, NA, NA, NA, 0.74612433,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, 0.02980432, NA, NA, NA, NA, NA, NA, 0.60470877,
NA, NA, 0.29230953, NA, -0.11296095, 0.09783287, NA, NA, 0.32181372,
NA, NA, NA, NA, NA, NA, 0.3255947, 0.4099077, NA, NA, NA, NA,
NA, NA, 0.42345733, 0.29293533, NA, 0.52832981, NA, NA, NA, NA,
NA, NA, NA, 0.55373453, NA, NA, NA, NA, NA, NA, NA, 0.4070331,
NA, 0.30780722, 0.59547858, NA, NA, 0.66333634, NA, 0.38209532,
NA, NA, NA, NA, NA, NA, NA, NA, 0.35778449, NA, NA), V77 = c(NA,
NA, 0.45406227, NA, 0.87348132, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, 0.78536916, NA, -0.01870051, NA, NA, NA, NA,
NA, NA, -0.00150528, NA, NA, NA, NA, -0.49992833, NA, NA, NA,
NA, NA, NA, NA, -0.12002325, -0.16249647, NA, 0.51132754, NA,
NA, NA, -0.20643247, 0.59529347, NA, 0.32442411, NA, NA, NA,
NA, NA, NA, NA, 0.80611793, NA, NA, NA, NA, NA, NA, NA, 0.75247001,
0.65079036, NA, NA, 0.29773326, -0.2164507, NA, NA, 0.36336748,
NA, NA, NA, NA, 0.49664945, NA, NA, NA, 0.35610758, NA, NA, NA,
0.3734933, NA, 0.58752714, NA, NA, NA, -0.38266847, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.28871445, NA, 0.05455121,
NA, NA, NA, NA, NA, NA, 0.0408944, NA, NA, NA, NA, NA, NA, 0.87592639,
NA, NA, NA, NA, NA, NA, NA, 0.28923257, NA, NA, NA, -0.16730842,
NA, -0.122933, 0.25704385, NA, NA, NA, NA, NA, NA, NA, 0.92475694,
NA, NA, NA, 0.15886697, 0.51925536, NA, NA, 0.25372613, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.89195925,
NA, NA, NA, -0.60877514, NA, 0.33866615, NA, NA, 0.60955791,
NA, NA, NA, NA, NA, NA, NA, NA, NA, -0.05461735, NA, NA, 0.33697054,
NA, -0.12079077, -0.14805299, -0.24541818, NA, 0.36340054, NA
), V81 = c(NA, NA, -0.08490089, NA, 0.0555794, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, -0.22856711, NA, -0.57790508,
NA, NA, NA, NA, NA, NA, 0.04856018, NA, NA, NA, NA, -0.38039271,
NA, NA, NA, NA, NA, NA, NA, -0.63132241, -0.35266074, NA, 0.01961822,
NA, NA, NA, -0.34551275, -0.39085104, NA, -0.27725445, NA, NA,
NA, NA, NA, NA, NA, -0.21599455, NA, NA, NA, NA, NA, NA, NA,
-0.19924471, -0.18365343, NA, NA, -0.53484587, -0.32543563, NA,
NA, -0.19992419, NA, NA, NA, NA, -0.18500223, NA, NA, NA, -0.12990151,
NA, NA, NA, -0.39083879, NA, -0.59264661, NA, NA, NA, 0.13154274,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -0.23261324,
NA, -0.03944042, NA, NA, NA, NA, NA, NA, -0.22193873, NA, NA,
NA, NA, NA, NA, -0.20022085, NA, NA, NA, NA, NA, NA, NA, 0.08615186,
NA, NA, NA, -0.74607469, NA, 0.23032189, 0.0449706, NA, NA, NA,
NA, NA, NA, NA, -0.04848046, NA, NA, NA, -0.6370161, -0.02900035,
NA, NA, -0.23145663, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, 0.14884929, NA, NA, NA, 0.22450133,
NA, 0.24769837, NA, NA, -0.29667428, NA, NA, NA, NA, NA, NA,
NA, NA, NA, -0.03071992, NA, NA, 0.07786378, NA, 0.23027039,
-0.20214392, -0.3032353, NA, -0.47432158, NA), V89 = c(0.0834995,
0.00066815, NA, NA, NA, NA, NA, NA, 0.02511399, NA, NA, NA, 0.052432,
NA, NA, NA, -0.14814967, NA, NA, NA, NA, NA, -0.33114922, 0.34514567,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.19468406, NA, NA, NA,
-0.38972029, NA, NA, NA, NA, NA, NA, NA, NA, NA, -0.23425484,
NA, -0.11003854, NA, -0.26367322, NA, NA, 0.29238575, 0.07886438,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, -0.15248164, NA, -0.15641155, NA, NA, -0.08752716, NA,
NA, NA, NA, 0.34809891, NA, NA, NA, NA, NA, NA, NA, -0.27401719,
NA, NA, NA, NA, NA, -0.32273288, NA, 0.02669399, NA, 0.0727079,
0.08290143, NA, -0.16476099, NA, NA, NA, NA, -0.1029079, -0.11614262,
NA, NA, -0.14913232, NA, -0.29380582, -0.537503, 0.11869562,
NA, NA, NA, -0.17315201, NA, 0.10272535, 0.0932595, 0.0793467,
-0.0845297, NA, NA, NA, -0.02889606, NA, NA, NA, NA, NA, 0.15552849,
0.04599214, NA, 0.19864881, NA, NA, NA, NA, NA, -0.11474285,
NA, NA, NA, 0.10901186, NA, NA, NA, 0.13339891, NA, 0.07056403,
NA, NA, NA, NA, NA, NA, NA, -0.25760406, 0.2062942, -0.00981489,
0.3282743, 0.06509166, NA, NA, NA, -0.26049214, NA, -0.13281234,
NA, 0.32791015, -0.13518787, NA, NA, NA, NA, NA, NA, NA, NA,
0.05660112, NA, NA, 0.12368526, -0.15672689, NA, -0.42175072,
NA, NA, NA, NA, NA, -0.22635573), V90 = c(-0.04245051, 0.3507695,
NA, NA, NA, NA, NA, NA, 0.32893767, NA, NA, NA, -0.35288827,
NA, NA, NA, -0.02734148, NA, NA, NA, NA, NA, -0.01271804, -0.26617777,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -0.37528838, NA, NA,
NA, 0.14921273, NA, NA, NA, NA, NA, NA, NA, NA, NA, -0.46296948,
NA, -0.20223671, NA, 0.12754582, NA, NA, 0.05006781, 0.22653775,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, -0.26092513, NA, 0.54215354, NA, NA, -0.23136087, NA,
NA, NA, NA, -0.04596987, NA, NA, NA, NA, NA, NA, NA, 0.14239809,
NA, NA, NA, NA, NA, 0.11650203, NA, 0.17058915, NA, -0.18403288,
0.10295627, NA, -0.15530088, NA, NA, NA, NA, -0.45405281, -0.10929859,
NA, NA, 0.14782657, NA, -0.15852471, -0.05266618, -0.18175069,
NA, NA, NA, -0.11917474, NA, 0.16136416, -0.14499177, -0.17504283,
0.13272865, NA, NA, NA, -0.17429991, NA, NA, NA, NA, NA, -0.22030747,
0.29022488, NA, 0.05889091, NA, NA, NA, NA, NA, 0.30446594, NA,
NA, NA, 0.23796595, NA, NA, NA, 0.14051101, NA, -0.05704354,
NA, NA, NA, NA, NA, NA, NA, 0.25256272, -0.14193822, 0.06924969,
0.00445279, 0.29815696, NA, NA, NA, 0.25643083, NA, 0.35649173,
NA, -0.25180143, -0.05787895, NA, NA, NA, NA, NA, NA, NA, NA,
0.03069952, NA, NA, -0.18662018, -0.15144552, NA, 0.06595208,
NA, NA, NA, NA, NA, 0.32091592)), .Names = c("V1", "V2", "V3",
"V6", "V40", "V29", "V56", "V62", "V73", "V77", "V81", "V89",
"V90"), class = "data.frame", row.names = c(NA, -200L))
This error happens because you need at least 3 non NA in each pair of data.
To solve this, you may want to set p-value = NA when you find an error like this. You can use this variation of the function:
cor.mtest <- function(mat) {
mat <- as.matrix(mat)
n <- ncol(mat)
p.mat<- matrix(NA, n, n)
diag(p.mat) <- 0
for (i in 1:(n - 1)) {
for (j in (i + 1):n) {
error <- try(tmp <- cor.test(mat[, i], mat[, j]),
silent =T)
if (class(error) == "try-error") {
p.mat[i, j] <- NA
} else {
p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
}
}
}
colnames(p.mat) <- rownames(p.mat) <- colnames(mat)
p.mat
}

Different results in Stata and R with the "same" anova code

I have some Stata code and I want to replicate the results in R. However, even with the same dataset and, I think, the same code, I get different results in R from those in Stata. I think it could be because Stata makes the order of the regression different than keyed in.
Do I need exactly the same order as in Stata to get the same results and how can I do this?
I changed all the variables to factors and tried again but the problem is still there.
I noticed that when I change the order of the explanatory variables I get different results, but I don`t find "the right order" to replicate the Stata results.
Stata code:
. anova testm2 c.testm1 i.hptreat c.cortm1 c.cortm2 i.female if inelig == 0 & anyoutv1 == 0
Number of obs =39 R-squared =0.7048
Root MSE= 16.0144 Adj R-squared =0.6601
Source | Partial SS df MS F Prob>F
---------------------------------------------------------------
Model | 20209.281 5 4041.8563 15.76 0.0000
testm1 | 3516.6527 1 3516.6527 13.71 0.0008
hptreat| 1183.5007 1 1183.5007 4.61 0.0391
cortm1 | 8.5753841 1 8.5753841 0.03 0.8560
cortm2 | 2810.9353 1 2810.9353 10.96 0.0023
female | 2557.3444 1 2557.3444 9.97 0.0034
Residual| 8463.2532 33 256.46222
----------------------------------------------------------------
Total | 28672.535 38 754.54038
R code:
FosseTest<-aov(testm2~testm1+hptreat+cortm1+cortm2+female,data=X2data)
summary(FosseTest)
Df Sum Sq Mean Sq F value Pr(>F)
testm1 1 15121 15121 58.962 7.68e-09 ***
hptreat 1 524 524 2.043 0.16228
cortm1 1 23 23 0.089 0.76715
cortm2 1 1984 1984 7.735 0.00888 **
female 1 2557 2557 9.972 0.00339 **
Residuals 33 8463 256
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
You can see that I get totally different values in the replication.
in the X2data Set I already subset the values for if inelig == 0 & anyoutv1 == 0
for the reconstruction of the data:
dput(X2data)
structure(list(id = c(29L, 30L, 31L, 32L, 34L, 35L, 36L, 37L,
39L, 41L, 42L, 43L, 44L, 46L, 47L, 49L, 50L, 51L, 52L, 53L, 54L,
55L, 57L, 58L, 59L, 60L, 61L, 62L, 64L, 65L, 66L, 67L, 68L, 69L,
70L, 71L, 72L, 73L, 74L), inelig = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("Analytic sample (keep)", "Ineligible (drop)"
), class = "factor"), ccydrop = c(0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
), cortm1v2 = c(0.003, 0.086, 0.047, 0.106, NA, 0.153, 0.086,
0.005, 0.133, 0.036, 0.03, 0.015, 0.014, 0.111, 0.389, 0.298,
0.4, 0.215, 0.062, 0.021, 0.075, 0.073, 0.033, 0.243, 0.126,
0.147, 0.019, 0.048, 0.28, 0.052, 0.039, 0.105, 0.111, 0.133,
0.065, 0.051, 0.143, 0.127, 0.095), cortm2v2 = c(0.025, 0.167,
0.059, 0.112, 0.171, 0.183, 0.102, 0.018, 0.08, 0.015, 0.027,
0.05, 0.025, 0.046, 0.085, 0.144, 0.155, 0.09, 0.057, 0.023,
0.038, 0.205, 0.035, 0.198, 0.112, 0.211, 0.042, 0.142, 0.328,
0.076, 0.067, 0.094, 0.245, 0.153, 0.115, 0.127, 0.257, 0.125,
0.096), cdiffv2 = c(0.022, 0.081, 0.012, 0.006, NA, 0.03, 0.016,
0.013, -0.053, -0.021, -0.003, 0.035, 0.011, -0.065, -0.304,
-0.154, -0.245, -0.125, -0.005, 0.002, -0.037, 0.132, 0.002,
-0.045, -0.014, 0.064, 0.023, 0.094, 0.048, 0.024, 0.028, -0.011,
0.134, 0.02, 0.05, 0.076, 0.114, -0.002, 0.001), testm1v2 = c(38.72,
32.77, 32.32, 17.99, 73.58, 80.69, 48.56, 21.92, 27.24, 40.93,
31.73, 60.05, 38.04, 30.17, 59.07, 26.92, 25.41, 47.81, 63.02,
34.49, 104.38, 38.08, 30.99, 35.23, 104.81, 49.33, 50.03, 11.65,
143.57, 48.31, 90.37, 48.56, 41.67, 75.23, 60.56, 39.03, 18.16,
37.9, 84.5), testm2v2 = c(62.37, 29.23, 27.51, 28.66, 44.67,
105.48, 42.67, 15.01, 21.33, 10.87, 2.14, 44.53, 35.8, 10.43,
47.54, 48.5, 38.98, 91.32, 52.94, 22.43, 58.68, 81.63, 34.79,
38.57, 94.86, 50.83, 55.75, 45.33, 111.62, 65.15, 81.08, 50.08,
44.86, 58.63, 85.85, 58.69, 16.35, 35.97, 99.08), tdiffv2 = c(23.65,
-3.54, -4.81, 10.67, -28.91, 24.79, -5.89, -6.91, -5.91, -30.06,
-29.59, -15.52, -2.24, -19.74, -11.53, 21.58, 13.57, 43.51, -10.08,
-12.06, -45.7, 43.55, 3.8, 3.34, -9.95, 1.5, 5.72, 33.68, -31.95,
16.84, -9.29000000000001, 1.52, 3.19, -16.6, 25.29, 19.66, -1.81,
-1.93, 14.58), testoutv1 = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("Not selected", "Selected"), class = "factor"),
cortoutv1 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("Not selected", "Selected"), class = "factor"),
anyoutv1 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("Not selected", "Selected"), class = "factor"),
testoutv2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("Not selected", "Selected"), class = "factor"),
cortoutv2 = structure(c(1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("Not selected", "Selected"), class = "factor"),
anyoutv2 = structure(c(1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("Not selected", "Selected"), class = "factor"),
pose1rate = c(6L, 7L, 6L, 6L, 7L, 7L, 6L, 7L, 5L, 6L, 7L,
4L, 7L, 7L, 7L, 6L, 7L, 7L, 7L, 7L, 6L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L), pose2rate = c(6L,
6L, 5L, 7L, 7L, 7L, 7L, 7L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 6L,
6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 6L, 7L, 6L, 7L, 7L, 7L,
6L, 7L, 7L, 7L, 7L, 7L, 6L, 6L), poseratem = c(6, 6.5, 5.5,
6.5, 7, 7, 6.5, 7, 5.5, 6.5, 7, 5.5, 7, 7, 7, 6, 6.5, 7,
7, 7, 6.5, 7, 7, 7, 7, 6.5, 7, 6.5, 7, 7, 7, 6.5, 7, 7, 7,
7, 7, 6.5, 6.5), saldiff = c(24.30555556, 20.83333333, 29.16666667,
18.75, 23.61111111, 34.02777778, 18.05555556, 19.44444444,
21.52777778, 15.97222222, 22.91666667, 13.88888889, 22.22222222,
25, 22.22222222, 22.22222222, 18.05555556, 17.36111111, 22.22222222,
27.08333333, 20.83333333, 24.30555556, 22.22222222, 28.47222222,
24.30555556, 25, 27.77777778, 22.22222222, 15.97222222, 24.30555556,
21.52777778, 19.44444444, 15.97222222, 15.27777778, 15.97222222,
24.30555556, 19.44444444, 24.30555556, 15.27777778), sal2manip = c(19.80555556,
16.33333333, 24.66666667, 14.25, 19.11111111, 29.52777778,
13.55555556, 14.94444444, 17.02777778, 11.47222222, 18.41666667,
9.38888889, 17.72222222, 20.5, 17.72222222, 17.72222222,
13.55555556, 12.86111111, 17.72222222, 22.58333333, 16.33333333,
19.80555556, 17.72222222, 23.97222222, 19.80555556, 20.5,
23.27777778, 17.72222222, 11.47222222, 19.80555556, 17.02777778,
14.94444444, 11.47222222, 10.77777778, 11.47222222, 19.80555556,
14.94444444, 19.80555556, 10.77777778), hptreat = structure(c(2L,
1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L), .Label = c("0", "1"), class = "factor"),
female = structure(c(1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L
), .Label = c("0", "1"), class = "factor"), age = c(19L,
20L, 20L, 18L, 21L, 20L, 18L, 21L, 35L, 20L, 18L, 20L, 20L,
18L, 20L, 25L, 18L, 23L, 21L, 19L, 20L, 20L, 30L, 19L, 22L,
18L, 19L, 22L, 19L, 20L, 28L, 28L, 19L, 19L, 20L, 25L, 20L,
25L, 23L), cort1a1 = c(0.004, 0.085, 0.049, 0.107, 0.486,
0.159, 0.088, 0.004, 0.138, 0.035, 0.03, 0.018, 0.017, 0.111,
0.39, 0.292, 0.396, 0.213, 0.065, 0.022, 0.074, 0.077, 0.035,
0.241, 0.126, 0.154, 0.021, 0.05, 0.296, 0.054, 0.04, 0.109,
0.114, 0.133, 0.063, 0.055, 0.149, 0.134, 0.098), cort1a2 = c(0.001,
0.086, 0.045, 0.105, 0.482, 0.147, 0.085, 0.005, 0.127, 0.037,
0.031, 0.013, 0.011, 0.111, 0.389, 0.304, 0.405, 0.218, 0.059,
0.02, 0.076, 0.069, 0.032, 0.246, 0.126, 0.141, 0.017, 0.046,
0.264, 0.051, 0.038, 0.101, 0.109, 0.133, 0.068, 0.048, 0.137,
0.12, 0.092), cort2a1 = c(0.027, 0.174, 0.056, 0.111, 0.175,
0.179, 0.103, 0.021, 0.079, 0.014, 0.028, 0.051, 0.024, 0.051,
0.083, 0.148, 0.156, 0.086, 0.062, 0.024, 0.038, 0.209, 0.036,
0.199, 0.114, 0.207, 0.041, 0.141, 0.333, 0.078, 0.065, 0.088,
0.238, 0.157, 0.119, 0.132, 0.268, 0.132, 0.099), cort2a2 = c(0.023,
0.161, 0.062, 0.113, 0.166, 0.188, 0.101, 0.016, 0.081, 0.015,
0.026, 0.049, 0.026, 0.041, 0.086, 0.139, 0.154, 0.093, 0.052,
0.022, 0.038, 0.202, 0.034, 0.198, 0.111, 0.215, 0.042, 0.142,
0.324, 0.075, 0.068, 0.101, 0.252, 0.149, 0.111, 0.123, 0.247,
0.118, 0.093), cortm1 = c(0.0024999999, 0.085500002, 0.046999998,
0.106, 0.484, 0.153, 0.086499996, 0.0044999998, 0.13249999,
0.035999998, 0.0305, 0.0155, 0.014, 0.111, 0.38949999, 0.29800001,
0.4005, 0.2155, 0.061999999, 0.021, 0.075000003, 0.072999999,
0.033500001, 0.24349999, 0.126, 0.14749999, 0.018999999,
0.048, 0.28, 0.052499998, 0.039000001, 0.105, 0.1115, 0.133,
0.065499999, 0.0515, 0.14300001, 0.127, 0.094999999), cortm2 = c(0.025,
0.1675, 0.059, 0.112, 0.1705, 0.18350001, 0.102, 0.0185,
0.079999998, 0.0145, 0.027000001, 0.050000001, 0.025, 0.046,
0.0845, 0.1435, 0.155, 0.089500003, 0.057, 0.023, 0.037999999,
0.20550001, 0.035, 0.19850001, 0.1125, 0.211, 0.041499998,
0.1415, 0.3285, 0.076499999, 0.066500001, 0.094499998, 0.245,
0.153, 0.115, 0.1275, 0.25749999, 0.125, 0.096000001), cdiff = c(0.022500001,
0.082000002, 0.012000002, 0.0060000047, -0.31349999, 0.03050001,
0.015500002, 0.014, -0.052499995, -0.021499999, -0.0034999996,
0.034500003, 0.011, -0.064999998, -0.30500001, -0.15450001,
-0.2455, -0.12599999, -0.004999999, 0.0020000003, -0.037000004,
0.13250001, 0.0014999993, -0.044999987, -0.013500005, 0.063500002,
0.022499999, 0.093499996, 0.048500001, 0.024, 0.0275, -0.010499999,
0.13350001, 0.019999996, 0.049500003, 0.075999998, 0.11449999,
-0.0020000041, 0.001000002), test1a1 = c(39.87, 33.22, 32.52,
19.74, 78.85, 83.51, 48.37, 22.31, 28.17, 41.44, 32.92, 61.4,
40.31, 30.36, 59.44, 27.52, 26.14, 46.75, 63.73, 34.03, 98.47,
36.62, 30.26, 37.15, 105.64, 47.99, 50.15, 11.33, 149.12,
48.57, 92.04, 51.22, 42.25, 77.07, 62.75, 38.8, 17.91, 40.28,
88.47), test1a2 = c(37.58, 32.32, 32.12, 16.25, 68.31, 77.88,
48.75, 21.53, 26.32, 40.42, 30.55, 58.7, 35.78, 29.97, 58.7,
26.32, 24.69, 48.87, 62.32, 34.95, 110.29, 39.53, 31.72,
33.32, 103.99, 50.67, 49.9, 11.97, 138.02, 48.05, 88.7, 45.89,
41.08, 73.39, 58.38, 39.25, 18.41, 35.53, 80.54), test2a1 = c(64.22,
29.43, 27.98, 28.17, 46.14, 105.92, 43.68, 16.41, 21.42,
11.35, 1.66, 44.17, 38.58, 11.11, 48.57, 48.31, 39.71, 92.04,
52.73, 22.3, 58.23, 82.01, 35.76, 39.59, 94.06, 50.52, 55.82,
45.91, 115.13, 67.59, 82.97, 49.89, 45.09, 57.86, 86.76,
58.83, 16.53, 36.7, 100.4), test2a2 = c(60.53, 29.04, 27.04,
29.14, 43.2, 105.05, 41.66, 13.62, 21.25, 10.39, 2.63, 44.9,
33.02, 9.75, 46.52, 48.7, 38.25, 90.59, 53.15, 22.57, 59.14,
81.24, 33.81, 37.55, 95.66, 51.14, 55.69, 44.74, 108.1, 62.71,
79.18, 50.27, 44.63, 59.39, 84.94, 58.55, 16.16, 35.24, 97.75
), testm1 = c(38.724998, 32.77, 32.32, 17.995001, 73.580002,
80.695, 48.560001, 21.92, 27.245001, 40.93, 31.735001, 60.049999,
38.044998, 30.165001, 59.07, 26.92, 25.415001, 47.810001,
63.025002, 34.490002, 104.38, 38.075001, 30.99, 35.235001,
104.815, 49.330002, 50.025002, 11.65, 143.57001, 48.310001,
90.370003, 48.555, 41.665001, 75.230003, 60.564999, 39.025002,
18.16, 37.904999, 84.504997), testm2 = c(62.375, 29.235001,
27.51, 28.655001, 44.669998, 105.485, 42.669998, 15.015,
21.334999, 10.87, 2.145, 44.535, 35.799999, 10.43, 47.544998,
48.505001, 38.98, 91.315002, 52.939999, 22.434999, 58.685001,
81.625, 34.785, 38.57, 94.860001, 50.830002, 55.755001, 45.325001,
111.615, 65.150002, 81.074997, 50.080002, 44.860001, 58.625,
85.849998, 58.689999, 16.344999, 35.970001, 99.074997), tdiff = c(23.650002,
-3.5349998, -4.8099995, 10.66, -28.910004, 24.790001, -5.8900032,
-6.9049997, -5.9100018, -30.060001, -29.59, -15.514999, -2.2449989,
-19.735001, -11.525002, 21.585001, 13.564999, 43.505001,
-10.085003, -12.055002, -45.694996, 43.549999, 3.7950001,
3.3349991, -9.9550018, 1.5, 5.7299995, 33.675003, -31.955009,
16.84, -9.2950058, 1.5250015, 3.1949997, -16.605003, 25.285,
19.664997, -1.8150005, -1.9349976, 14.57), feelpower = structure(c(2L,
3L, 1L, 2L, 1L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 3L, 1L, 2L, 1L,
1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 4L, 3L,
4L, 3L, 1L, 3L, 4L, 2L, 2L, 3L), .Label = c("2", "3", "Not at all",
"Very much"), class = "factor"), incharge = structure(c(1L,
1L, 3L, 4L, 1L, 2L, 3L, 3L, 1L, 1L, 3L, 4L, 3L, 2L, 2L, 1L,
3L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L,
1L, 3L, 1L, 1L, 4L, 3L, 1L, 1L), .Label = c("2", "3", "Not at all",
"Very much"), class = "factor"), powm = structure(c(3L, 1L,
1L, 5L, 2L, 4L, 6L, 6L, 1L, 1L, 6L, 7L, 6L, 3L, 4L, 2L, 1L,
4L, 4L, 3L, 2L, 4L, 2L, 2L, 3L, 3L, 3L, 4L, 1L, 5L, 1L, 4L,
6L, 2L, 1L, 7L, 2L, 3L, 1L), .Label = c("1.5", "2", "2.5",
"3", "3.5", "Not at all", "Very much"), class = "factor"),
diceroll = structure(c(2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L
), .Label = c("No", "Yes"), class = "factor")), row.names = c(2L,
3L, 4L, 5L, 7L, 8L, 9L, 10L, 12L, 14L, 15L, 16L, 17L, 19L, 20L,
22L, 23L, 24L, 25L, 26L, 27L, 28L, 30L, 31L, 32L, 33L, 34L, 35L,
37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L), class = "data.frame")
You can get the same results in R using drop1(FosseTest, test = "F"). This will test the effect of leaving one of the variables off the aov.
drop1(FosseTest, test = "F")
#
# Single term deletions
#
# Model:
# testm2 ~ testm1 + hptreat + cortm1 + cortm2 + female
# Df Sum of Sq RSS AIC F value Pr(>F)
# <none> 8463.3 221.82
# testm1 1 3516.7 11979.9 233.37 13.7122 0.0007751 ***
# hptreat 1 1183.5 9646.8 224.92 4.6147 0.0391333 *
# cortm1 1 8.6 8471.8 219.86 0.0334 0.8560279
# cortm2 1 2810.9 11274.2 231.00 10.9604 0.0022605 **
# female 1 2557.3 11020.6 230.11 9.9716 0.0033895 **
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
summary(FosseTest) displays the sequential effect of addeding the variables one after another.
There was a different way how to access this, but at the moment I can't remember...

Difficulties in combining two boxplots per x-value in ggplot2 / R

Please find My Data below.
I wish to produce a combined boxplot as this one:
I have four different patient categories in w$WHO==1,2,3,4 each corresponding to WHO-I, WHO-II, WHO-III and Unknown tumors.
I wish to produce two boxplots per w$WHO
The first boxplot should show time to death as in w$Follow.up.death[w$Death==1]. I would like to color/outline + fill to be blue but the fill should have an alpha=0.2.
The second boxplot should show time to progression as in w$resp.time[w$response==1]. I would like to color/outline + fill to be red but the fill should have an alpha=0.2.
w$Death and w$response does not have the same length.
The corresponding legend should read "Overall survival" and "Progression-free survival".
I tried several manuals but I can't figure how to do this specifically - I have only managed to produce single plots.
Any ideas how to solve this??
A sample of My data
w <- structure(list(WHO = c(1L, 3L, 2L, 2L, 2L, 3L, 2L, 3L, 1L, 2L,
3L, 3L, 3L, 1L, 2L, 1L, 2L, 3L, 3L, 1L, 2L, 1L, 3L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 4L, 4L, 1L, 4L, 1L, 2L, 1L, 4L, 1L, 4L, 4L, 4L,
4L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 4L, 4L, 2L, 1L, 2L, 2L, 4L, 4L,
4L, 2L, 4L, 1L, 4L, 4L, 2L, 4L, 4L, 3L, 4L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 3L, 2L, 2L, 3L, 3L, 3L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L
), Death = c(0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 1L,
0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L), Follow.up.death = c(18,
2, 14, 17, 31, 4, 20, 15, 12, 19, 10, 17, 27, 22, 3, 43, 24,
14, 13, 5, 12, 137, 22, 87, 48, 24, 72, 32, 14, 83, 68, 56, 57,
18, 16, 70, 1.9, 69.2, 126.3, 41.6, 17.9, 1.3, 87.4, 4.4, 137.4,
17.5, 95.8, 65.2, 14.8, 98.5, 16.6, 74.9, 10.3, 43.4, 32.5, 4.8,
7.3, 107.8, 6.8, 18.3, 33, 25.2, 49.2, 15.9, 1.2, 42.7, 1, 9,
1.8, 15.6, 8.9, 15, 16.4, 7.7, 75.5, 12.2, 54.8, 22.2, 9.7, 14.3,
5.2, 64.5, 21.8, 0.2, 7.3, 18.7, 5.1, 17.3, 27.4, 16, 24.2, 9.7,
8.2, 5.7, 41.8, 10.6, 22.8, 4.8, 6, 4, 50, 21, 30, 5, 11, 12),
response = c(0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L,
1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 0L), resp.time = c(18, 2, 13, 17, 22, 2,
6, 5, 12, 8, 3, 2, 1, 21, 2, 43, 4, 2, 4, 5, 0.1, 137, 4,
87, 17, 24, 72, 19, 14, 83, 68, 56, 57, 18, 14, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, 15.6, 8.9, 15, 16.4, 7.7, 75.5, 3, 54.8, 22.2, 12, 14.3,
6, 12, 21.8, 6, 3, 12, 3, 6, 3, 3, 12, 9.7, 3, 3, 12, 3,
6, 3, 6, 4, 50, 21, 30, 5, 11, 12)), .Names = c("WHO", "Death",
"Follow.up.death", "response", "resp.time"), class = "data.frame", row.names = c(NA,
-106L))
Does this come close?
# Wrangling data into a digestable format
v <- w[w$Death == 1,]
vv <- w[w$response == 1,]
df <- data.frame(x = as.factor(c(v$WHO, vv$WHO)),
y = c(v$Follow.up.death, vv$resp.time),
f = rep(c("Death", "Response"), c(nrow(v), nrow(vv))))
df <- df[!is.na(df$x),]
And then making the plot:
ggplot(df) +
geom_boxplot(aes(x, y, fill = f, colour = f)) +
scale_x_discrete(name = "WHO") +
scale_y_continuous(name = "Time") +
# Alpha encoded as hex values as to force to apply on fill only
# Not sure if alpha = 0.2 corresponds to '33'
scale_fill_manual(values = c("#FF000033", "#0000FF33"), name = "Survival",
labels = c("Overall", "Progression-free")) +
scale_colour_manual(values = c("red", "blue"), name = "Survival",
labels = c("Overall", "Progression-free"))
Similar to #tuenbrand's approach, but using dplyr/tidyr to wrangle the data:
library(tidyverse)
df <- w %>%
rename(Overall = "Follow.up.death", `Progression-free` = "resp.time") %>%
gather(key = Survival, value = Time, Overall, `Progression-free`) %>%
filter((Death == 1 & Survival == "Overall") |
(response == 1 & Survival == "Progression-free")) %>%
mutate(WHO = paste("WHO:", WHO))
And then facet on WHO to space the boxplots:
ggplot(df, aes(x = Survival, y = Time, fill = Survival)) +
geom_boxplot() + facet_wrap(~WHO, nrow = 1) +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.x = element_blank(),
legend.position = "bottom")

How to create score variable for predicted value from Logistic regression

I looked for previous posts and found this answer to a similar question from Aug '15. The code is:
logit <- glm(y~x1+x2+x3,family="binomial")
predict(logit)
pred <- predict(logit,newdata=data) #gives you b0 + b1x1 + b2x2 + b3x3
probs <- exp(pred)/(1+exp(pred)) #gives you probability that y=1 for each observation
What I don't understand is how I get the newdata=data part of the code.
How do I specify the data in the newdata part of the code?
How do I assign these predicted values to a variable?
Sample data
structure(list(CustomerID = 1:400, binary_depvar = c(1L, NA,
1L, NA, 1L, NA, NA, NA, 0L, NA, 0L, NA, 0L, NA, 1L, 1L, 1L, NA,
NA, NA, NA, 1L, NA, NA, 1L, NA, NA, NA, NA, 1L, 1L, NA, 0L, 1L,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0L, 1L, NA, 0L, NA,
NA, 1L, NA, NA, 1L, NA, 1L, 1L, 0L, 1L, 0L, 0L, 0L, NA, NA, NA,
1L, NA, 0L, NA, NA, NA, 0L, 1L, NA, 0L, 0L, NA, 1L, NA, 1L, NA,
NA, 1L, 1L, 1L, NA, NA, NA, 1L, 0L, NA, NA, 0L, NA, NA, NA, NA,
0L, 1L, NA, NA, NA, NA, 0L, 0L, NA, NA, NA, 0L, 1L, NA, 0L, NA,
NA, 1L, NA, 0L, NA, 1L, NA, NA, 1L, NA, NA, 1L, 1L, 0L, NA, NA,
NA, 1L, 1L, NA, NA, NA, 1L, NA, 1L, NA, NA, NA, NA, 1L, NA, NA,
NA, 1L, NA, NA, 0L, 1L, 1L, 1L, NA, 0L, NA, NA, NA, NA, 1L, NA,
0L, 0L, NA, 0L, 0L, NA, NA, 0L, 1L, 1L, 0L, 1L, 1L, NA, NA, NA,
NA, NA, NA, NA, NA, 1L, 1L, 0L, 1L, NA, NA, 0L, NA, NA, NA, 1L,
NA, NA, NA, NA, NA, NA, 0L, 1L, 0L, 0L, 0L, 1L, 1L, NA, 0L, NA,
NA, 1L, 1L, 0L, 0L, 0L, NA, 0L, 0L, 1L, NA, 0L, NA, 0L, 1L, NA,
0L, 1L, 1L, 1L, 1L, NA, 0L, NA, NA, NA, NA, NA, NA, NA, NA, NA,
0L, 1L, NA, 0L, 0L, NA, 1L, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 1L, 1L, 0L, NA, 1L, 0L, NA, 1L, NA, NA, 1L, 1L, 1L,
NA, 0L, 1L, 1L, 1L, 1L, NA, 0L, 0L, 1L, NA, NA, 1L, 1L, 0L, 1L,
NA, NA, NA, 0L, 1L, 1L, 1L, 0L, 0L, NA, 1L, 1L, NA, NA, NA, NA,
NA, NA, 0L, NA, 1L, 0L, 1L, NA, 1L, 0L, 0L, 1L, NA, NA, 1L, 1L,
1L, NA, 0L, 1L, 1L, NA, NA, 1L, NA, 0L, NA, NA, 1L, NA, NA, NA,
NA, 1L, NA, 0L, NA, 0L, NA, 0L, 1L, 1L, NA, 0L, 1L, NA, NA, 1L,
1L, 1L, NA, 1L, 0L, NA, NA, 0L, 0L, NA, NA, NA, 1L, 1L, NA, NA,
0L, 1L, NA, NA, 1L, 0L, 1L, NA, NA, NA, NA, NA, 1L, 1L, 0L, 0L,
NA, NA, NA, NA, 1L, NA, 1L, 0L, 1L, NA, NA, NA, 0L, 0L), binary_A = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L,
0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L),
binary_B = c(1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 0L,
1L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L,
0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L,
0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L,
0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L,
0L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L,
1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L,
1L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L,
1L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 1L,
0L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 1L,
1L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L,
0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 0L,
1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L,
0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L,
1L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 1L,
0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L,
0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 0L,
1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L,
0L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L),
binary_C = c(1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L,
0L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L,
0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L,
1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 0L,
1L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L,
1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L,
1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L,
1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L,
1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L,
0L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L,
1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L,
1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L,
1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 1L,
1L, 0L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 1L,
1L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L,
0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 0L,
1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L,
1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 1L,
1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L,
0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L,
1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L),
categ_A = c(4L, 4L, 1L, 1L, 37L, 4L, 17L, 55L, 7L, 4L, 62L,
11L, 56L, 38L, 39L, 13L, 62L, 10L, 13L, 6L, 4L, 7L, 57L,
1L, 9L, 69L, 22L, 17L, 13L, 6L, 7L, 7L, 13L, 7L, 27L, 12L,
4L, 7L, 13L, 62L, 25L, 17L, 17L, 19L, 27L, 7L, 7L, 13L, 17L,
7L, 27L, 4L, 38L, 37L, 13L, 1L, 37L, 33L, 13L, 44L, 22L,
53L, 17L, 17L, 38L, 2L, 1L, 19L, 19L, 11L, 31L, 4L, 57L,
37L, 13L, 30L, 17L, 13L, 17L, 27L, 11L, 53L, 7L, 25L, 20L,
6L, 6L, 7L, 7L, 41L, 7L, 7L, 62L, 12L, 4L, 53L, 13L, 53L,
37L, 5L, 4L, 1L, 57L, 1L, 2L, 37L, 17L, 39L, 53L, 17L, 38L,
22L, 62L, 12L, 5L, 1L, 6L, 1L, 1L, 4L, 1L, 53L, 37L, 5L,
4L, 4L, 4L, 27L, 17L, 22L, 4L, 7L, 6L, 52L, 2L, 46L, 20L,
11L, 48L, 53L, 19L, 13L, 19L, 57L, 27L, 1L, 33L, 17L, 7L,
53L, 37L, 37L, 36L, 1L, 37L, 17L, 47L, 55L, 33L, 11L, 34L,
13L, 1L, 57L, 17L, 53L, 27L, 48L, 41L, 7L, 11L, 7L, 62L,
17L, 4L, 1L, 19L, 27L, 27L, 37L, 13L, 5L, 41L, 62L, 27L,
38L, 48L, 11L, 27L, 46L, 13L, 37L, 17L, 3L, 7L, 4L, 1L, 10L,
1L, 2L, 5L, 37L, 34L, 6L, 2L, 4L, 33L, 2L, 47L, 7L, 3L, 4L,
1L, 6L, 13L, 13L, 31L, 13L, 24L, 1L, 7L, 7L, 4L, 55L, 11L,
4L, 19L, 4L, 1L, 37L, 27L, 17L, 13L, 4L, 13L, 19L, 26L, 62L,
5L, 24L, 38L, 27L, 2L, 8L, 19L, 4L, 38L, 1L, 13L, 4L, 4L,
17L, 54L, 4L, 17L, 17L, 2L, 11L, 13L, 17L, 4L, 6L, 8L, 9L,
38L, 40L, 17L, 70L, 11L, 50L, 14L, 7L, 8L, 7L, 17L, 17L,
62L, 1L, 4L, 17L, 4L, 4L, 6L, 38L, 17L, 4L, 53L, 59L, 13L,
7L, 17L, 4L, 7L, 13L, 7L, 38L, 24L, 20L, 17L, 4L, 4L, 13L,
7L, 7L, 4L, 19L, 7L, 7L, 38L, 62L, 4L, 17L, 17L, 19L, 36L,
17L, 47L, 13L, 13L, 2L, 36L, 26L, 25L, 1L, 2L, 4L, 4L, 27L,
27L, 19L, 41L, 53L, 11L, 62L, 37L, 47L, 37L, 13L, 1L, 27L,
17L, 24L, 11L, 17L, 17L, 27L, 62L, 38L, 38L, 7L, 17L, 53L,
37L, 7L, 17L, 7L, 7L, 4L, 11L, 26L, 13L, 4L, 7L, 38L, 24L,
37L, 12L, 1L, 17L, 25L, 26L, 19L, 25L, 33L, 27L, 53L, 5L,
27L, 7L, 62L, 4L, 1L, 1L, 25L, 5L, 62L, 47L, 4L, 7L, 48L,
12L, 17L, 18L, 7L, 9L, 37L, 63L, 37L, 46L, 1L), categ_B = c(0L,
0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 3L, 0L,
1L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 3L, 1L,
0L, 0L, 2L, 2L, 0L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 0L, 0L, 0L,
1L, 3L, 0L, 2L, 1L, 0L, 1L, 0L, 2L, 1L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 0L, 0L,
1L, 0L, 2L, 0L, 0L, 0L, 2L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 2L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L,
1L, 0L, 3L, 0L, 3L, 0L, 0L, 3L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 2L, 0L,
0L, 0L, 3L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 3L, 0L,
0L, 3L, 0L, 0L, 2L, 0L, 1L, 1L, 1L, 2L, 3L, 1L, 0L, 3L, 0L,
0L, 0L, 1L, 3L, 0L, 3L, 0L, 0L, 0L, 1L, 3L, 0L, 0L, 0L, 0L,
0L, 0L, 3L, 0L, 2L, 0L, 0L, 1L, 0L, 0L, 2L, 3L, 1L, 3L, 0L,
0L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 0L,
1L, 0L, 1L, 2L, 3L, 0L, 1L, 1L, 0L, 3L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 2L, 1L, 0L, 1L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 0L,
0L, 1L, 1L, 0L, 3L, 1L, 0L, 1L, 0L, 0L, 0L, 2L, 0L, 1L, 0L,
1L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 1L,
3L, 0L, 3L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 2L, 1L, 0L, 1L,
0L, 3L, 0L, 0L, 3L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 0L,
1L, 0L, 3L, 0L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 0L, 0L, 0L, 0L,
2L, 0L, 0L, 0L, 1L, 0L, 3L, 0L, 3L, 0L, 1L, 0L, 3L, 0L, 3L,
0L, 1L, 1L, 0L, 0L, 0L, 0L, 3L, 1L, 1L, 1L, 1L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 3L, 0L, 2L, 0L, 3L, 0L, 3L, 0L, 1L, 0L,
1L, 0L, 0L, 0L, 1L, 3L, 3L, 0L, 0L), binary_D = c(1L, 0L,
0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L,
1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L,
0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 1L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L,
0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
1L, 0L, 1L, 0L, 0L, 0L, 0L, 1L), categ_C = c(2L, 1L, 1L,
1L, 6L, 3L, 9L, 8L, 3L, 2L, 0L, 2L, 0L, 3L, 0L, 0L, 10L,
3L, 5L, 8L, 4L, 6L, 0L, 2L, 2L, 0L, 6L, 6L, 0L, 2L, 1L, 8L,
7L, 3L, 3L, 5L, 2L, 2L, 9L, 0L, 6L, 4L, 6L, 7L, 3L, 0L, 0L,
4L, 0L, 4L, 0L, 1L, 7L, 8L, 6L, 1L, 6L, 6L, 7L, 10L, 6L,
7L, 4L, 3L, 3L, 2L, 2L, 3L, 5L, 1L, 5L, 5L, 8L, 7L, 2L, 6L,
5L, 8L, 6L, 4L, 5L, 5L, 7L, 8L, 3L, 4L, 6L, 8L, 2L, 0L, 3L,
4L, 0L, 4L, 3L, 7L, 4L, 8L, 10L, 1L, 5L, 1L, 0L, 1L, 1L,
3L, 0L, 7L, 6L, 2L, 4L, 7L, 0L, 4L, 5L, 3L, 5L, 1L, 2L, 1L,
1L, 6L, 6L, 2L, 1L, 3L, 3L, 1L, 4L, 4L, 2L, 3L, 2L, 0L, 2L,
7L, 5L, 4L, 10L, 9L, 5L, 5L, 9L, 0L, 0L, 1L, 0L, 7L, 10L,
7L, 3L, 0L, 1L, 1L, 9L, 0L, 2L, 5L, 4L, 3L, 2L, 0L, 1L, 0L,
4L, 7L, 6L, 0L, 3L, 0L, 0L, 0L, 0L, 2L, 1L, 2L, 0L, 9L, 3L,
0L, 6L, 9L, 0L, 8L, 0L, 6L, 8L, 9L, 0L, 8L, 1L, 3L, 4L, 1L,
0L, 4L, 2L, 2L, 1L, 0L, 0L, 3L, 5L, 3L, 2L, 2L, 0L, 3L, 8L,
4L, 2L, 2L, 2L, 9L, 0L, 0L, 3L, 0L, 0L, 1L, 6L, 7L, 1L, 7L,
2L, 3L, 0L, 1L, 2L, 3L, 0L, 0L, 7L, 5L, 0L, 1L, 0L, 0L, 5L,
4L, 5L, 6L, 1L, 2L, 1L, 5L, 2L, 2L, 5L, 4L, 2L, 4L, 4L, 2L,
5L, 6L, 3L, 0L, 8L, 8L, 3L, 1L, 7L, 2L, 6L, 10L, 2L, 0L,
5L, 5L, 9L, 5L, 7L, 5L, 2L, 0L, 0L, 1L, 2L, 6L, 4L, 4L, 2L,
1L, 3L, 3L, 0L, 0L, 4L, 4L, 9L, 1L, 0L, 4L, 6L, 8L, 1L, 3L,
1L, 1L, 1L, 8L, 5L, 0L, 2L, 0L, 8L, 5L, 9L, 4L, 1L, 2L, 0L,
3L, 0L, 0L, 0L, 0L, 0L, 2L, 5L, 0L, 4L, 2L, 1L, 4L, 3L, 6L,
1L, 0L, 0L, 4L, 0L, 7L, 9L, 0L, 9L, 8L, 1L, 5L, 0L, 3L, 3L,
9L, 0L, 10L, 0L, 0L, 2L, 0L, 7L, 8L, 7L, 2L, 0L, 6L, 7L,
4L, 4L, 0L, 6L, 2L, 4L, 5L, 0L, 7L, 3L, 1L, 10L, 6L, 5L,
2L, 10L, 0L, 2L, 0L, 1L, 5L, 5L, 4L, 3L, 3L, 3L, 8L, 1L,
0L, 3L, 3L, 5L, 6L, 1L, 6L, 6L, 0L, 1L, 0L, 0L, 6L, 10L,
2L), categ_D = c(1L, 2L, 4L, 8L, 5L, 2L, 4L, 5L, 4L, 3L,
4L, 3L, 6L, 2L, 3L, 3L, 7L, 2L, 4L, 7L, 8L, 3L, 8L, 4L, 10L,
2L, 5L, 2L, 1L, 8L, 3L, 2L, 3L, 2L, 2L, 4L, 2L, 6L, 3L, 1L,
9L, 5L, 4L, 3L, 5L, 8L, 2L, 4L, 5L, 2L, 5L, 2L, 4L, 6L, 7L,
1L, 6L, 3L, 3L, 9L, 5L, 2L, 8L, 3L, 6L, 3L, 8L, 3L, 5L, 3L,
4L, 4L, 5L, 2L, 1L, 7L, 5L, 5L, 6L, 5L, 1L, 1L, 1L, 7L, 4L,
5L, 7L, 9L, 3L, 3L, 2L, 2L, 1L, 4L, 1L, 9L, 2L, 8L, 3L, 4L,
1L, 6L, 2L, 2L, 2L, 2L, 1L, 8L, 5L, 1L, 3L, 3L, 1L, 3L, 4L,
3L, 2L, 2L, 2L, 2L, 2L, 7L, 5L, 2L, 2L, 3L, 4L, 5L, 7L, 4L,
1L, 3L, 5L, 3L, 5L, 4L, 3L, 1L, 7L, 9L, 6L, 6L, 6L, 3L, 3L,
3L, 2L, 5L, 3L, 3L, 2L, 4L, 9L, 5L, 4L, 3L, 6L, 5L, 6L, 3L,
8L, 6L, 2L, 2L, 9L, 1L, 2L, 9L, 3L, 2L, 1L, 1L, 3L, 4L, 8L,
3L, 4L, 6L, 2L, 3L, 3L, 10L, 6L, 2L, 3L, 3L, 7L, 2L, 6L,
9L, 5L, 3L, 2L, 2L, 2L, 3L, 4L, 4L, 3L, 3L, 1L, 4L, 5L, 1L,
4L, 3L, 1L, 1L, 5L, 3L, 5L, 3L, 1L, 7L, 1L, 6L, 2L, 2L, 1L,
4L, 4L, 2L, 2L, 7L, 3L, 7L, 4L, 2L, 2L, 3L, 2L, 7L, 5L, 2L,
5L, 6L, 3L, 1L, 9L, 7L, 4L, 3L, 1L, 5L, 1L, 1L, 3L, 1L, 3L,
10L, 3L, 3L, 8L, 3L, 5L, 3L, 5L, 3L, 4L, 5L, 3L, 9L, 4L,
2L, 3L, 8L, 5L, 8L, 2L, 9L, 5L, 4L, 4L, 6L, 6L, 2L, 1L, 6L,
7L, 6L, 7L, 2L, 8L, 7L, 2L, 3L, 3L, 2L, 2L, 2L, 3L, 2L, 5L,
2L, 2L, 6L, 4L, 3L, 3L, 4L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 2L,
5L, 1L, 1L, 4L, 1L, 6L, 9L, 4L, 2L, 1L, 2L, 2L, 9L, 1L, 9L,
2L, 2L, 1L, 5L, 4L, 4L, 4L, 9L, 8L, 5L, 1L, 5L, 10L, 6L,
8L, 2L, 3L, 5L, 7L, 1L, 4L, 3L, 4L, 3L, 3L, 7L, 1L, 6L, 5L,
3L, 3L, 1L, 7L, 2L, 2L, 5L, 1L, 1L, 5L, 3L, 6L, 6L, 4L, 3L,
2L, 4L, 7L, 4L, 3L, 8L, 2L, 1L, 4L, 2L, 5L, 3L, 1L, 2L, 2L,
4L, 9L, 1L, 4L, 4L, 6L, 5L, 5L, 5L, 4L, 8L, 3L, 6L, 1L, 6L,
6L, 7L, 3L), categ_E = c(4L, 4L, 1L, 1L, 9L, 3L, 1L, 10L,
1L, 8L, 8L, 1L, 1L, 1L, 9L, 1L, 10L, 1L, 5L, 2L, 5L, 6L,
2L, 1L, 7L, 9L, 6L, 5L, 1L, 3L, 1L, 4L, 6L, 6L, 7L, 7L, 1L,
6L, 8L, 1L, 9L, 6L, 9L, 1L, 4L, 5L, 4L, 6L, 8L, 4L, 5L, 5L,
8L, 8L, 1L, 3L, 9L, 7L, 7L, 7L, 6L, 6L, 7L, 5L, 8L, 1L, 1L,
7L, 1L, 4L, 9L, 6L, 9L, 1L, 6L, 2L, 6L, 6L, 8L, 1L, 1L, 7L,
5L, 8L, 5L, 4L, 3L, 8L, 5L, 8L, 3L, 7L, 9L, 4L, 5L, 3L, 7L,
7L, 8L, 1L, 1L, 1L, 2L, 1L, 4L, 6L, 6L, 3L, 1L, 5L, 6L, 7L,
8L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 10L, 9L, 1L, 5L, 5L,
7L, 5L, 1L, 6L, 7L, 1L, 6L, 2L, 1L, 7L, 7L, 1L, 8L, 10L,
1L, 6L, 6L, 4L, 7L, 5L, 2L, 7L, 1L, 9L, 8L, 7L, 9L, 6L, 7L,
7L, 9L, 8L, 2L, 7L, 9L, 7L, 1L, 4L, 7L, 5L, 8L, 8L, 2L, 7L,
1L, 4L, 9L, 1L, 7L, 5L, 5L, 8L, 5L, 9L, 8L, 5L, 2L, 1L, 6L,
4L, 1L, 1L, 8L, 10L, 1L, 6L, 5L, 5L, 5L, 6L, 5L, 7L, 1L,
1L, 3L, 6L, 9L, 1L, 5L, 3L, 9L, 4L, 9L, 6L, 1L, 1L, 1L, 3L,
8L, 8L, 8L, 3L, 1L, 3L, 7L, 2L, 1L, 9L, 5L, 5L, 7L, 5L, 1L,
8L, 6L, 6L, 6L, 1L, 7L, 6L, 3L, 5L, 2L, 3L, 9L, 1L, 1L, 7L,
7L, 7L, 7L, 6L, 4L, 5L, 4L, 1L, 4L, 4L, 7L, 6L, 1L, 5L, 9L,
7L, 3L, 3L, 6L, 4L, 10L, 5L, 7L, 4L, 5L, 8L, 6L, 6L, 6L,
1L, 6L, 7L, 5L, 1L, 2L, 4L, 8L, 4L, 3L, 8L, 1L, 5L, 10L,
2L, 3L, 1L, 9L, 1L, 5L, 6L, 4L, 9L, 3L, 6L, 4L, 2L, 4L, 4L,
5L, 5L, 3L, 8L, 7L, 7L, 8L, 8L, 4L, 7L, 7L, 7L, 6L, 5L, 8L,
6L, 7L, 3L, 9L, 5L, 1L, 3L, 1L, 7L, 1L, 1L, 7L, 8L, 8L, 9L,
2L, 6L, 10L, 5L, 9L, 6L, 5L, 7L, 7L, 2L, 4L, 6L, 4L, 5L,
8L, 1L, 10L, 3L, 7L, 8L, 9L, 3L, 6L, 2L, 7L, 3L, 5L, 1L,
5L, 1L, 7L, 8L, 2L, 1L, 5L, 2L, 4L, 8L, 5L, 8L, 2L, 7L, 6L,
6L, 4L, 1L, 6L, 6L, 1L, 2L, 1L, 6L, 1L, 3L, 4L, 7L, 8L, 10L,
7L, 3L, 7L, 6L, 1L, 7L, 2L, 1L, 10L, 5L), binary_E = c(1L,
1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L,
1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L,
1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L,
1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L,
1L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L,
1L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L,
0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L,
1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L,
1L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 1L,
1L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L,
0L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L,
1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L,
1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L,
0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
0L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L,
1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L,
1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L,
1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 0L), percentA = c(5.2, 5.28,
7.71, 3.78, 0, 4.16, 0, 6.42, 2.56, 2.69, 2.15, 3.95, 0,
2.06, 0, 5.45, 7.2, 1.51, 3.57, 12.74, 3.51, 5.06, 8.44,
8.43, 5.59, 6.85, 7.37, 7.37, 1.51, 5.23, 4.69, 1.91, 7.34,
5.32, 7.62, 5.06, 6.48, 7.38, 28.16, 3.66, 8.37, 4.01, 10.6,
6.46, 8.63, 3.76, 6.09, 8.03, 3.78, 7.44, 3.67, 10.24, 7.4,
3, 0, 8.88, 15.17, 6.92, 0, 4.24, 4.2, 10.17, 14.73, 9.22,
6.27, 4.11, 8.43, 8.56, 4.05, 0, 0, 3.5, 4.39, 13.19, 13.01,
6.9, 4.79, 7.23, 14.28, 15.67, 8.76, 3.86, 4.58, 6.51, 18.31,
3.92, 7.79, 6.94, 6.94, 8.76, 4.83, 10.54, 0, 0, 5.62, 0,
1.06, 4.13, 6.52, 4.17, 6.89, 4.25, 3.51, 6.4, 4.83, 5.01,
4.69, 3.54, 4.67, 9.5, 1.75, 0, 16.63, 2.6, 1.43, 3.59, 10.26,
6.15, 0, 5.81, 0, 7.28, 0, 8.91, 7.33, 4.89, 2.7, 13.14,
92.59, 11.03, 7.75, 5.49, 3.31, 7.82, 5.57, 0, 10.03, 7.35,
6.36, 8.94, 7.35, 0, 6.85, 3.85, 4.19, 4.48, 7.43, 10.47,
5.11, 9.42, 3.42, 4.74, 0.89, 6.12, 6.46, 11.31, 0, 4.19,
4.76, 5.86, 8.23, 17.76, 7.69, 18.39, 10.72, 12.73, 7.08,
9.44, 4.14, 26.5, 3.72, 9.47, 12.66, 29.22, 7.64, 3.9, 13.01,
3.53, 10.75, 8.26, 4.8, 14.54, 3.7, 14.15, 8.47, 7.33, 3.78,
5.78, 8.88, 0, 19.19, 16.91, 6.57, 6.36, 7.6, 6.52, 5.55,
9.05, 7.3, 2.81, 5.09, 5.25, 8.22, 7.31, 8.73, 5.27, 4.85,
2.31, 15.63, 0, 8.22, 9.19, 7.05, 6.47, 5.53, 0, 5.03, 0,
0, 6.77, 12.47, 5.71, 0, 6.55, 11.59, 25.13, 1.8, 7.35, 4.66,
7.56, 4.08, 2.98, 6.7, 8.91, 3.3, 5.85, 5.74, 8.73, 13.9,
9.97, 4.27, 10.56, 35.68, 6.56, 5.85, 35.95, 6.24, 1.95,
6.63, 4.23, 4.1, 4.03, 24.37, 4.68, 12.24, 2.41, 0, 3.47,
11.93, 4.15, 6.6, 8.17, 4.65, 4.09, 0, 1.44, 10.95, 0, 7.94,
4.38, 4.18, 1.93, 5.67, 9.42, 0, 13.1, 2.72, 7.25, 2, 5.27,
3.35, 2.12, 9.26, 7.11, 35.44, 2.06, 4.77, 4.24, 12.55, 8.71,
4.38, 5.42, 3.14, 2.7, 10.94, 0, 6.8, 2.47, 14.33, 4.6, 14.06,
5.82, 0, 14.25, 10.57, 9.29, 10.65, 5.58, 4.85, 2.33, 4.03,
6.41, 24.69, 7.61, 2.82, 6.78, 0, 0, 1.51, 0, 6.9, 3.97,
7.7, 0, 3.17, 8.43, 5.52, 5.68, 10.05, 15.86, 1.73, 4.1,
1.38, 5.35, 6.07, 5.68, 10.33, 10.42, 9.44, 0, 0, 7.21, 4.78,
0, 6.26, 7.39, 18.2, 3.44, 5.22, 3.41, 4.73, 10.21, 16.28,
8.51, 12.14, 4.94, 14.65, 3.23, 0, 4.2, 3.13, 4, 10.47, 2.81,
8.22, 0, 3.44, 0, 13.32, 6.93, 0, 3.3, 9.18, 6.71, 8.35,
0, 8.32, 5.43, 12.38, 5.15, 5.74, 0, 2.75, 10.21, 1.58, 8.86,
13.95, 7.1, 11.47, 11.72, 7.09, 0, 2.92, 7.55, 6.58, 14.32,
21.71, 4.01, 1.81, 2.39), percentB = c(43.97, 59.38, 43.37,
46.67, 13.04, 65.31, 43.82, 28.73, 32.44, 25.18, 34.82, 38.13,
19.15, 43.89, 100, 29.47, 43.16, 29.19, 37.42, 29.36, 47.94,
42.58, 18.11, 49.25, 40.12, 36.99, 39.65, 37.06, 48.05, 33.33,
43.79, 41.32, 41.1, 33.91, 25.45, 41.82, 45.68, 40.54, 37.92,
41.08, 34.3, 32.72, 46.55, 48.28, 39.66, 45.79, 42.67, 35.38,
48, 35.3, 45.96, 44.21, 36.27, 43.53, 45, 39.82, 22.19, 29.79,
70, 51.2, 53.52, 36.62, 22.75, 16.53, 46.78, 40.96, 53.79,
19.44, 28.05, 61.04, 42.86, 57.29, 32.62, 42.19, 34.15, 36.31,
41.66, 46.28, 33.92, 31.95, 40, 20.63, 45.59, 29.08, 24.84,
45.41, 39.32, 47.63, 33.85, 24.15, 43.13, 21.05, 12.5, 12.5,
42.67, 0, 63.06, 30.23, 46.15, 39.64, 41.34, 38.05, 38.82,
43.42, 52.62, 34.43, 54.7, 26.82, 36.86, 52.41, 43.85, 60.71,
33.33, 48.48, 47.56, 44.35, 39.47, 50, 57.32, 35.78, 41.67,
36.9, 5.26, 36.52, 44.6, 35.29, 47.31, 38.29, 0, 45.89, 45.83,
40.8, 47.42, 46.03, 31.22, 14.29, 36.08, 31.9, 23.04, 24.71,
42.52, 50, 29.71, 34.34, 43.46, 46.86, 51.31, 45.45, 42.34,
40, 27.54, 38.1, 42.5, 36.51, 35.26, 30.52, 54.05, 56, 28.3,
44.44, 33.03, 42.6, 36.01, 41.35, 34.07, 23.91, 41.32, 43.68,
44.44, 36.47, 46.33, 26.25, 23.33, 43.81, 52.12, 53.94, 8.45,
34.56, 43.2, 29.19, 38.1, 36.3, 53.57, 21.32, 17.11, 38.03,
42.95, 27.22, 41.67, 28.57, 47.37, 30.35, 42.65, 40, 45.25,
41.83, 38.55, 34.99, 30.34, 62.98, 32.14, 42.11, 45.65, 45.14,
40.41, 46.61, 15.47, 48.48, 37.01, 82.86, 42.96, 28.23, 34.36,
44.44, 33.22, 77.78, 41.79, 22.86, 42.59, 27.18, 46.81, 50.94,
38.75, 33.11, 25.73, 0, 52.11, 36.36, 34.89, 35.81, 37.84,
35.93, 43.75, 38.28, 20.68, 33.33, 34.38, 30.27, 8.33, 34.44,
15.16, 28.36, 25, 41.12, 39.76, 33.33, 31.65, 35.32, 39.81,
37.53, 52.73, 29.24, 26.12, 43.21, 29.29, 51.08, 32.14, 28.02,
29.41, 53.47, 27.27, 47.06, 20, 54.42, 12.5, 49.18, 36.85,
23.23, 38.55, 52.93, 39.24, 34.21, 26.73, 40.76, 26.09, 28.57,
50.67, 34.54, 39.07, 30.13, 39.77, 48.85, 46.15, 33.47, 56.52,
37.93, 32.35, 36.04, 45.29, 51.4, 40.51, 37.56, 42.61, 32.9,
41.07, 22.86, 37.86, 35.21, 37.35, 53.69, 31.5, 49.79, 29.55,
47.69, 29.09, 48.27, 41.15, 32.54, 31.95, 37.7, 53.6, 33.33,
20.41, 16.44, 45.65, 41.06, 33.33, 35.98, 36.77, 64.52, 39.26,
32.84, 40.96, 36.23, 44.71, 27.13, 39.65, 30.51, 43.96, 36.36,
35.71, 45.12, 40.27, 36.09, 40.41, 32.83, 28.19, 30.13, 21.51,
15, 28.57, 52.14, 22.66, 40.28, 35.24, 49.43, 25.54, 36.11,
41.62, 38.57, 32.06, 42.7, 29.61, 35.95, 32.78, 23.79, 31.71,
41.92, 2.94, 52.45, 47.82, 44.73, 45.32, 40.85, 51.83, 30.77,
31.24, 22.22, 11.74, 43.11, 42.86, 29.84, 47.89, 27.11, 34.91,
23.4, 38.36, 38.81, 36.31, 45.63, 41.51, 45.83, 36.5, 41.28,
37.28, 28.09, 51.72, 28.96, 34.5, 25.92, 41.56, 48.15, 50.1,
63.28, 49.39, 25, 55.1, 43.3, 8.08, 18.52), percentC = c(0.4,
4.69, 9.18, 0, 0, 6.12, 1.12, 0.67, 0.73, 3.08, 13.77, 0.43,
0, 2.26, 0, 0.97, 0.71, 0.62, 0.33, 0, 8.67, 3.57, 0, 3.14,
2.69, 0, 0.39, 0.94, 2.34, 10.42, 11.85, 3.4, 0, 2.3, 1.82,
2, 1.13, 0, 20.84, 1.29, 0.56, 0.89, 0.24, 3.45, 20.11, 3.18,
0.52, 35.35, 3.45, 1.53, 0.51, 9.5, 0.19, 2.52, 0, 0.3, 0.3,
0, 0, 10.47, 7.75, 0.33, 0, 0.45, 0.11, 1.2, 1.23, 8.33,
0, 23.38, 0, 0.75, 1.55, 1.56, 0.41, 0.83, 1.11, 2.66, 0,
0, 4.8, 0, 7.7, 0.49, 0.62, 4.03, 0.2, 0.38, 1.56, 0.52,
3.57, 0, 0, 0, 0, 0, 0.9, 0, 2.2, 2.16, 0.62, 1.33, 0.36,
1.84, 4.33, 0.55, 16.87, 0, 2.4, 9.65, 0, 0, 0.69, 6.06,
0.81, 1.66, 0, 2, 2.44, 0.83, 12.5, 1.47, 0, 0.34, 1.88,
0.59, 2.34, 1.46, 0, 1.43, 2.08, 1.35, 4.57, 3.17, 0.37,
0, 0.63, 0.31, 0.81, 0, 0.36, 0, 2.96, 0.54, 0.64, 4.02,
1.39, 2.02, 4.5, 0, 1.45, 1.07, 1.25, 0.74, 0.25, 2.28, 2.7,
4, 0.94, 0, 1.59, 0.59, 15.46, 6.02, 6.31, 0, 0, 0.99, 0.65,
2.35, 10.45, 1.67, 0, 0, 6.26, 1.52, 0.7, 0, 2.34, 1.16,
3.17, 2.42, 9.52, 0, 3.95, 1.43, 11.74, 0, 0, 0, 5.26, 13.33,
2.05, 2.29, 0.23, 0.58, 7.49, 5.64, 0.29, 0.88, 0.29, 0,
3.26, 25.4, 15.17, 1.73, 0.55, 0, 2.01, 0, 0.31, 1.88, 0.61,
2.96, 0.68, 0, 0.71, 0, 0, 4.01, 0, 4, 2.5, 0, 3.4, 12.5,
1.1, 4.24, 3.88, 2.43, 10.81, 0.85, 0, 4.31, 2.92, 0, 3.91,
0.43, 0, 0.75, 0.26, 0.84, 0, 0, 8.12, 0, 0.4, 1.37, 3.74,
0.87, 2.42, 0.26, 0, 1.54, 0.4, 4.98, 0, 0.6, 0, 1.81, 0,
0, 0, 1.13, 0, 0, 0.65, 2.02, 6.75, 9.14, 2.53, 25.54, 0.04,
7.82, 0, 7.14, 5.33, 3.65, 5.3, 1.95, 0.8, 0.71, 0, 20.15,
0, 0, 1.24, 3.9, 9.34, 4.2, 0, 9.58, 1.32, 0.92, 0.77, 0,
0.73, 1.41, 8.43, 2.29, 5.61, 6.83, 2.27, 5.09, 0, 5.31,
0.52, 1.83, 0.54, 2.19, 5.63, 0, 0, 35.62, 0, 4.35, 0, 0.53,
0.32, 0, 1.7, 1.49, 0.1, 0, 0.33, 0.33, 3.05, 5.08, 0, 0,
2.38, 1.83, 10.74, 1.38, 0.38, 0.33, 0.34, 3.33, 5.4, 11.43,
57.14, 3.45, 0, 16.67, 23.5, 0.76, 1.45, 0, 1.95, 0, 2.52,
4.55, 0, 2.87, 0.43, 1.17, 1.69, 0.8, 0, 1.4, 0.27, 0.27,
6.24, 6.81, 2.51, 7.69, 4.35, 0, 0, 0, 0, 0.14, 3.31, 0,
3.37, 0, 0.8, 0, 0.71, 4.51, 0.17, 0, 1.83, 4.18, 1.78, 1.87,
6.9, 0.26, 11.37, 2, 1.93, 0, 3.45, 3.39, 0.56, 0, 5.51,
2.3, 0, 0), percentD = c(0.93, 0, 0.86, 0, 0, 0.27, 0, 0,
0, 0, 0, 0, 0.33, 0, 0, 0, 0.75, 0, 1, 0, 0, 0, 0, 0.29,
1, 0, 0, 1, 0, 1, 0.75, 1, 0, 0, 0, 1, 1, 0.43, 0, 0.78,
0, 0.71, 0, 0, 0, 0, 0.29, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0.81, 0, 0.56, 0, 1, 0, 1, 0, 0, 0, 0.8, 0, 0.43, 0,
0, 0, 1, 1, 0, 0, 0.8, 0.67, 1, 0, 0, 0, 0.82, 0, 0, 0, 0.07,
1, 0, 0, 0, 1, 0, 0, 0.25, 0, 0, 0, 0.25, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0.75, 0, 0.98, 0, 0.9, 0, 0.67, 0,
0, 0.95, 0.67, 1, 0, 0, 0.33, 0, 0, 0.78, 0, 0.96, 0, 0,
0, 0, 0, 0, 0.78, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0.25,
0, 0, 0, 1, 1, 0, 0, 0.07, 0, 0.3, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0.08, 0, 0, 0, 0,
0, 0, 0, 0.88, 0, 0.58, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0.99, 0, 0, 0.75, 0, 0, 0.22, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 1, 0, 0, 0.86, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 0, 0.79, 1, 0, 1, 0, 0.95, 0, 0, 0, 1, 0, 1, 0,
0, 0, 0.13, 0, 0.93, 0, 0, 0, 0, 0.05, 1, 0, 0.93, 0, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0.78,
0.25, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.67, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0.99, 0, 0, 0, 0, 1, 0, 0, 0.8, 0,
0, 0, 0, 0, 0.8, 0, 0, 0, 0, 0, 0.24, 0, 0, 1, 0, 0, 0.45,
0, 0, 1, 1, 0, 0.44, 0, 0, 0, 0, 0, 0, 0.4, 0, 0, 0, 1, 0,
0.57, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0.49, 0, 0, 1, 0, 0, 0,
0.92, 0, 0.73, 0, 0, 0, 0, 0.9, 0, 0, 0, 1, 0, 1, 0.96, 0,
0, 0, 0, 0)), .Names = c("CustomerID", "binary_depvar", "binary_A",
"binary_B", "binary_C", "categ_A", "categ_B", "binary_D", "categ_C",
"categ_D", "categ_E", "binary_E", "percentA", "percentB", "percentC",
"percentD"), class = "data.frame", row.names = c(NA, -400L))

Select only rows if its value in a particular column is 'NA' in R

I'm trying to create a subset of data that contains only the rows with missing data in one of my columns.
The data:
data<-structure(list(ID = c(1, 2, 3, 4, 7, 9, 10, 12, 13, 14, 15, 16,
17, 18, 20, 21, 22, 23, 24, 25, 27, 28, 29, 31, 34, 37, 38, 39,
40, 41), QnSinV1 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), QnSinV2 = c(1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L), QnSinV3 = c(0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), QnSize = c(0.032140423, 0.017620319,
NA, -0.093448167, -0.051090375, 0.001188913, NA, -0.144868599,
-0.000260992, 0.008502255, -0.00346349, 0.017208373, 0.004301855,
0.004420431, -0.007564124, NA, 0.174388101, -0.142412328, 0.064935852,
-0.052174354, NA, 0.005180317, 0.05728222, 0.041215822, -0.002449455,
-0.040942923, -0.082284946, -0.173656321, 0.022723036, -0.061326436
), QnWt = c(15.8, 16.5, 11.9, 13.7, 15, 15.3, 13.7, 15.8, 16.3,
15.9, 15.1, 14.5, 14.4, 15.7, 14.4, 13.3, 14.8, 15.1, 15.1, 14.7,
15.8, 17.8, 16.4, 13.4, 15.1, 14.8, 14.2, 12.7, 17.9, 16.2),
QnWtLsCL = c(NA, 0.503030303, 0.596638655, NA, 0.446666667,
0.509803922, 0.408759124, 0.462025316, 0.552147239, 0.509433962,
0.456953642, 0.455172414, 0.506944444, NA, 0.486111111, 0.473684211,
0.513513514, 0.516556291, 0.582781457, 0.537414966, 0.474683544,
0.43258427, 0.432926829, NA, 0.569536424, 0.445945946, 0.485915493,
0.543307087, NA, 0.543209877), ClaustPer = c(NA, 1L, 2L,
NA, 3L, 0L, 2L, 0L, 1L, 0L, 0L, 0L, 1L, NA, 0L, 7L, 1L, 0L,
1L, 0L, 1L, 2L, 2L, NA, 2L, 3L, 2L, 2L, NA, 0L), QnSurvCL = c(0L,
1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L),
ColWtCL = c(NA, 11.7, 7.3, NA, 9.1, 11.1, 9.6, 11.2, 9, 11.2,
12, 11, 10.9, NA, 9.9, 8.6, 10.8, 10.9, 8.7, 10.8, 11.6,
13.7, 10.8, NA, 9.3, 9.6, 9.8, 8.7, NA, 11.1), ColWtCL_6 = c(NA,
57.1, 45, NA, 73.6, NA, NA, NA, 43.8, NA, NA, 71.1, NA, NA,
53.7, NA, 84.4, NA, NA, NA, 56, 56.1, NA, NA, 59.4, NA, 45.7,
NA, NA, NA), ColGrowthCL_6 = c(NA, 4.88034188, 6.164383562,
NA, 8.087912088, NA, NA, NA, 4.866666667, NA, NA, 6.463636364,
NA, NA, 5.424242424, NA, 7.814814815, NA, NA, NA, 4.827586207,
4.094890511, NA, NA, 6.387096774, NA, 4.663265306, NA, NA,
NA), QnSurvCL_6 = c(NA, 1L, NA, NA, 1L, NA, NA, NA, 1L, NA,
NA, 1L, NA, NA, 1L, 0L, 1L, NA, NA, NA, 1L, 1L, NA, NA, 1L,
NA, 1L, NA, NA, NA), IR = c(-0.1919695, 0.0214441, NA, 0.0886954,
0.4221713, 0.0869788, 0.2716466, 0.0289674, -0.0291414, -0.1739616,
-0.0215773, -0.1473209, 0.0370336, 0.254584, 0.0332632, -0.0203844,
0.1524175, -0.051451, -0.0612144, 0.1617955, 0.0354173, 0.0904954,
0.3344705, 0.0990583, 0.1985931, 0.0419539, -0.0159598, 0.1159526,
-0.0057495, -0.1811458), SH = c(1.2064, 1.1093, NA, 0.922,
0.643, 0.9284, 0.7225, 0.9866, 1.0804, 1.2226, 1.0315, 1.1953,
1.007, 0.6991, 1.0264, 1.0265, 0.8865, 1.1184, 1.094, 0.829,
1.0142, 0.9824, 0.6793, 0.9188, 0.7853, 1.0352, 1.0648, 0.9654,
1.0366, 1.2044), HL = c(0.3774, 0.4349, NA, 0.5091, 0.6187,
0.5168, 0.6405, 0.4691, 0.4555, 0.3444, 0.4908, 0.3819, 0.4846,
0.6256, 0.4638, 0.4778, 0.5219, 0.433, 0.447, 0.564, 0.4899,
0.4612, 0.6542, 0.5162, 0.5549, 0.4928, 0.4471, 0.4959, 0.4523,
0.3511), MLH = c(0.534090909090909, 0.5, NA, 0.40506329113924,
0.298507462686567, 0.410958904109589, 0.293103448275862,
0.442105263157895, 0.48, 0.554347826086957, 0.453488372093023,
0.535353535353535, 0.443298969072165, 0.304878048780488,
0.457446808510638, 0.455555555555556, 0.397849462365591,
0.494252873563218, 0.48314606741573, 0.377777777777778, 0.457446808510638,
0.445652173913043, 0.3, 0.412371134020619, 0.354838709677419,
0.464646464646465, 0.474226804123711, 0.43010752688172, 0.46078431372549,
0.541666666666667)), .Names = c("ID", "QnSinV1", "QnSinV2",
"QnSinV3", "QnSize", "QnWt", "QnWtLsCL", "ClaustPer", "QnSurvCL",
"ColWtCL", "ColWtCL_6", "ColGrowthCL_6", "QnSurvCL_6", "IR",
"SH", "HL", "MLH"), row.names = c(1L, 2L, 3L, 4L, 7L, 9L, 10L,
12L, 13L, 14L, 15L, 16L, 17L, 18L, 20L, 21L, 22L, 23L, 24L, 25L,
27L, 28L, 29L, 31L, 34L, 37L, 38L, 39L, 40L, 41L), class = "data.frame")
My guess (which doesn't work):
test<-subset(data, data$ColWtCL_6=='NA')
test
You can do it also without subset(). To select NA values you should use function is.na().
data[is.na(data$ColWtCL_6),]
Or with subset()
subset(data,is.na(ColWtCL_6))
A tidyverse approach (package dplyr):
test <-
data %>%
filter(is.na(ColWtCL_6))
If you want to filter based on NAs in multiple columns, please consider using function filter_at() in combinations with a valid function to select the columns to apply the filtering condition and the filtering condition itself.
Example 1: select rows of data with NA in all columns starting with Col:
test <-
data %>%
filter_at(vars(starts_with("Col")), all_vars(is.na(.)))
Example 2: select rows of data with NA in one of the columns starting with Col:
test <-
data %>%
filter_at(vars(starts_with("Col")), any_vars(is.na(.)))
This link from tidyverse documentation is very inspiring: https://dplyr.tidyverse.org/reference/filter_all.html
Here's another solution to find ǸA's across all columns in a dataframe using dplyr:
library(dplyr)
# get column names
colnms <- colnames(df)
# filter
df %>%
filter_at(vars(all_of(colnms)), any_vars(is.na(.)))

Resources