Why does geom_smooth() automatically excludes some values? - r

I make a ggplot2 graphic like this:
ggplot(dat, aes(x=timepoint, y=y, size=Status, shape=Status)) +
geom_point(fill="red") +
geom_smooth(method=lm, se=FALSE, size=1, linetype="twodash") +
facet_grid(Test ~ Batch, scales="free_y")
And it gives:
There are other options in my code to control legend appearance, etc, but nowhere I have asked geom_smooth() to exclude some values, and as you can see it automatically excludes the points having Status=="FAIL" !
You can generate such a plot without the other options (but showing the same problem) with the following dataframe:
dat <- structure(list(Test = structure(c(2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L), .Label = c("PH",
"ANTIGENIC ACTIVITY BY ELISA", "WATER CONTENT BY µKARL FISCHER"
), class = "factor"), Batch = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("HB07",
"HB08"), class = "factor"), timepoint = c(0, 1, 2, 3, 0, 1, 2,
3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3), Status = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("PASS", "FAIL"), class = "factor"), y = c(11.7,
12.7, 12.8, 17.6, 8.6, 9.6, 16.5, 15.4, 10.1, 9, 11.7, 12.5,
7.9, 9.3, 15.5, 14.7, 12.9, 10.8, 14.5, 16.5)), .Names = c("Test",
"Batch", "timepoint", "Status", "y"), row.names = c(NA, -20L), class = "data.frame")

By providing the shape aes you group the data. If the group consists only of one member, geom_smooth cannot print anything for that group.
Possibly you want this:
geom_smooth(aes(shape=NA),method=lm, se=FALSE, size=1, linetype="twodash")

Related

"Error in Contrasts" message in glm model after confirming that all Factor variables are longer than 2 unique values [R]:

I am not sure why I am still receiving this message when running a base model with all variables in my dataset:
My data, with anonymized variables:
set.seed(1234)
#dput(df)
structure(list(outcome_1= structure(c(2L, 1L, 1L, 2L,
1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L,
NA, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L,
1L, 2L, 1L, NA, 2L, 1L), .Label = c("0", "1"), class = "factor"),
outcome_2= structure(c(2L, 1L, 1L, 1L, 1L, 2L, 1L,
2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, NA, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, NA, 2L, 1L), .Label = c("0", "1"), class = "factor"),
outcome_3= structure(c(2L, 1L, 1L, 1L, 1L, 2L, 1L,
2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, NA, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L,
1L, 1L, NA, 1L, 1L), .Label = c("0", "1"), class = "factor"),
bl_ep = c(16, 92, 10, 40, 19, 1, 16, 10, 22, 28, 8, 11, 6,
47, 12, 1, 9, 20, 2, 14, 72, 28, 5, 16, 61, 12, 24, 22, 44,
44, 16, 36, 62, 10, 16, 10, 89, 22, 5, 38, 8, 11), bl_days = c(12,
28, 10, 25, 19, 1, 10, 9, 13, 28, 4, 11, 6, 20, 12, 1, 8,
16, 2, 12, 27, 28, 5, 13, 24, 10, 18, 18, 16, 16, 10, 28,
22, 5, 15, 8, 28, 15, 5, 22, 7, 11), score_1 = c(11,
19, 17, 17, 12, 14, 8, 12, 14, 15, 14, 13, 12, 14, 15, 5,
11, 14, 14, 13, 16, 11, 11, 14, 20, 14, 12, 11, 17, 15, 14,
18, 15, 14, 12, 10, 17, 16, 11, 13, 18, 17), score_2 = c(1.1,
1.6, 1.6, 2.8, 1.9, 3.3, 4, 3.8, 1.8, 1.4, 2, 3.55, 1.6,
1.8, 2.4, 3.7, 1.4, 2.9, 3.55, 2.5, 1.6, 3.2, 3.5, 2.4, 3.1,
2.3, 3.8, 3.9, 1.1, 1.7, 2.3, 1.5, 1.9, 3.3, 3, 2.9, 1.6,
3.1, 3.7, 2.8, 1.2, 1.9), score_3 = c(1,
1.22222222222222, 1.11111111111111, 1.88888888888889, 1.44444444444444,
1.44444444444444, 3.22222222222222, 2.77777777777778, 1.11111111111111,
1, 1, 2.83333333333333, 1.22222222222222, 1.875, 1.55555555555556,
2.66666666666667, 1, 2.25, 1.72222222222222, 2.05555555555556,
1.22222222222222, 2, 2, 1.77777777777778, 1.33333333333333,
1.11111111111111, 2.5, 2.55555555555556, 1, 1.22222222222222,
1.77777777777778, 1.22222222222222, 2.44444444444444, 1.55555555555556,
1.77777777777778, 1.66666666666667, 1.11111111111111, 2.33333333333333,
2.88888888888889, 1.55555555555556, 1, 1.25), score_4 = c(1.31428571428571,
1.37142857142857, 1.08571428571429, 1.83809523809524, 1.37142857142857,
1.8952380952381, 4, 3.88571428571429, 3.02857142857143, 2.12222222222222,
1.43333333333333, 3.39047619047619, 1.74285714285714, 1.67619047619048,
2.02857142857143, 3.48571428571429, 1.24761904761905, 3.73333333333333,
3.08571428571429, 2.56666666666667, 1.74285714285714, 2.6952380952381,
3.45714285714286, 2.27619047619048, 1.9047619047619, 2.62857142857143,
3.74285714285714, 3.74285714285714, 1.24761904761905, 1.39047619047619,
1.83809523809524, 2.74285714285714, 4, 1.77142857142857,
3.42857142857143, 3.2, 1.65714285714286, 2.55238095238095,
2.38095238095238, 2.40952380952381, 2.07619047619048, 2.56666666666667
), score_5 = c(1, 1, 1, 1, 1.33333333333333,
1, 3.33333333333333, 3.66666666666667, 1.66666666666667,
1.66666666666667, 2, 2.5, 1.66666666666667, 1, 1.33333333333333,
3, 1, 1.66666666666667, 2.16666666666667, 2.16666666666667,
1.33333333333333, 2.66666666666667, 3, 2.66666666666667,
1.33333333333333, 2.66666666666667, 3, 1.33333333333333,
1, 1, 1, 1, 1, 1.33333333333333, 3, 3.66666666666667, 1.66666666666667,
1.33333333333333, 2.33333333333333, 1.66666666666667, 2,
2), sex = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("F", "M"), class = "factor"), age = c(64,
66, 51, 69, 60, 65, 65, 69, 50, 78, 75, 78, 35, 77, 69, 48,
65, 72, 60, 64, 78, 71, 58, 55, 55, 57, 81, 76, 56, 71, 56,
73, 69, 51, 43, 77, 31, 64, 69, 63, 38, 71), childbirth = structure(c(2L,
2L, 2L, 1L, 2L, 2L, 2L, NA, 2L, 2L, 2L, 2L, NA, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, NA, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L), .Label = c("N",
"Y"), class = "factor"), x1= c(3, 2, 2, NA,
3, 2, 3, NA, 3, 3, 2, 2, NA, 2, 5, 2, 2, 2, 4, 3, 2, 2, 3,
NA, 2, 3, NA, NA, 2, 2, 2, 2, 2, 2, 3, 2, 1, NA, 2, 2, 1,
3), x2= c(0, 0, 0, NA, 1, 0, 0, NA, 0, 0,
0, 0, NA, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, NA, 0, 0, NA, NA,
0, 0, 0, 0, 0, 0, 0, 0, 1, NA, 0, 0, 0, 0), x3= structure(c(4L,
1L, 1L, 2L, 1L, 1L, 1L, NA, 4L, 1L, 1L, 4L, NA, 4L, 1L, 4L,
4L, 4L, 4L, 3L, 1L, 1L, 1L, 2L, 4L, 1L, NA, 2L, 1L, 4L, 1L,
1L, 4L, 4L, 1L, 4L, 4L, 2L, 4L, 4L, 4L, 1L), .Label = c("N",
"NA", "UNK", "Y"), class = "factor"), x4= structure(c(4L,
1L, 1L, 2L, 1L, 1L, 1L, NA, 1L, 1L, 4L, 1L, NA, 1L, 1L, 4L,
3L, 1L, 4L, 4L, 1L, 4L, 4L, 2L, 1L, 4L, NA, 2L, 4L, 1L, 4L,
1L, 1L, 4L, 4L, 1L, 4L, 2L, 4L, 1L, 4L, 4L), .Label = c("N",
"NA", "UNK", "Y"), class = "factor"), x5= structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, NA, 2L, 2L, 2L, 2L, NA, 2L, 2L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, NA, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L), .Label = c("N",
"Y"), class = "factor"), x6= structure(c(2L, 2L, 2L, 1L,
1L, 2L, 2L, NA, 1L, 1L, 1L, 2L, NA, 2L, 2L, 1L, 2L, 2L, 1L,
2L, 2L, 2L, 1L, 1L, 2L, 2L, NA, 2L, 2L, 2L, 1L, 2L, 1L, 1L,
1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L), .Label = c("N", "Y"), class = "factor"),
x7= structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, NA, 1L, 1L, 1L, 1L, NA, 1L, 3L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 3L, 1L, NA, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 3L,
3L, 1L, 1L, 1L, 1L, 2L, 3L), .Label = c("N", "NA", "Y"), class = "factor"),
x8= structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, NA, 1L,
2L, 2L, 2L, NA, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, NA, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L,
2L, 2L, 2L), .Label = c("N", "Y"), class = "factor"), x9= structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L,
1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("N",
"Y"), class = "factor"), x10= structure(c(1L, 2L, 2L,
1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L,
1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L), .Label = c("N", "Y"), class = "factor"),
x11= structure(c(1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L,
2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L,
2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("N", "Y"), class = "factor"),
x12= structure(c(1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 1L), .Label = c("N", "Y"), class = "factor"),
x13= structure(c(2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 1L, 1L), .Label = c("N", "Y"), class = "factor"),
x14= c(41, 7, 8, 9, 7, 2, 1, 5, 9, 6, 6, 8,
14, 2, 4, NA, 11, 9, 31, 13, 8, 2, 11, 20, 8, 7, 6, 8, 2,
12, 32, 1, 2, 38, 10, 17, 5, 28, 31, 10, 3, 6), x15= structure(c(3L,
4L, 2L, 2L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 3L, 5L, 1L, 3L, 3L,
3L, 3L, 3L, 3L, 2L, 2L, 3L, 2L, 3L, 1L, 2L, 2L, 3L, 3L, 3L,
2L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L), .Label = c("IATRO",
"IDIO", "OBST", "OBST/IDIO", "TRAUM"), class = "factor"),
x16= structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L,
1L, 1L, 1L, 1L, 1L), .Label = c("N", "Y"), class = "factor"),
x17= structure(c(2L, 2L, 1L, 2L, 1L, 2L,
2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L,
2L, 1L, 1L, 1L, 1L, 1L), .Label = c("N", "Y"), class = "factor"),
x18= c(31.8, 20, 30.9, 23.3, 22.5, 23.1, 23.6, 25.9, 22.8,
25.2, 30.2, 23.4, 22.2, 29, 24.8, 32.7, 20.8, 28.5, 24.6,
23, 23.4, 21.1, 24.9, 18, 21.7, 27.6, 27, 29, 32.9, 26, 29.3,
27.1, 22.7, 19.7, 25, 22.3, 21.3, 17.5, 20.9, 20.1, 25.1,
22.1), x19= structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L), .Label = c("No", "Yes"), class = "factor"),
x20 = structure(c(2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L,
1L, 1L, 1L, 2L, 2L), .Label = c("NO", "YES"), class = "factor"),
x21= structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L,
1L, 1L, 1L, 1L, 1L), .Label = c("NO", "YES"), class = "factor")), row.names = c(NA,
-42L), class = c("tbl_df", "tbl", "data.frame"))
logit1 <-glm(outcome_1~., data = df, family = "binomial")
Which yielded the classic error message for a logit model:
#Error in `contrasts<-`(`*tmp*`, value = contr.funs[1 + isOF[nn]]) :
# contrasts can be applied only to factors with 2 or more levels
Ok, so I went to double check that all factor variables indeed have more than 1 unique value, and can verify:
sapply(lapply(df, unique), length)
returned all variables showing 2 or more unique values. Still same error message when I ran the model again.
I even attempted to run one solution I found online:
values_count <- sapply(lapply(df, unique), length)
logit1 <-
lm(outcome_1~ ., df[ , values_count > 1])
What's going on? Am I blind in seeing some variable that is secretly saying it has more than one unique value and does not?
Thank you!
The regression works on the supplied data for simple models, such as
logit1 <-glm(outcome_1~ sex + age, data = df, family = "binomial")
It's a small data set with lots of variables, the computer is not going to be able to pull out the meaningful relationships even if they are there. Start with some exploratory data plots, and think about how the (biological) relationship between your outcomes and other variables in order to come up with hypotheses you can test with you data. Realistically, which measurements do you think actually affect patient outcomes?

create correct legend for geom_point with different colors and shapes; data comes from two variables with different levels

I already googled a lot on this problem, but none of the suggested solutions worked:
The plot shows actual and expected values for each subject on four sessions ('sitz' 1 to 4). The actual value and the expected value come from two different variables, actual value has three levels 'cont', 'treat and 'other', expected value has two levels:'cont' and 'treat'.
The actual value is depicted by a filled black circle and the expected one by an unfilled blue circle.
The problem: I don't find any method so that my legend is printed correctly. The best I could get was a black and filled and a blue and filled button, but the blue one shouldn't be filled.
Here's the data and code:
df <- structure(list(vp = c(2, 5, 15, 28, 32, 2, 18, 7, 28, 16, 24,
6, 16, 22, 25, 26, 28, 14, 4, 8, 15, 38, 21, 29, 26, 21, 21,
12, 11, 23), sitz = structure(c(3L, 2L, 4L, 1L, 4L, 3L, 4L, 2L,
2L, 1L, 4L, 3L, 3L, 3L, 3L, 2L, 1L, 1L, 2L, 3L, 1L, 3L, 3L, 4L,
1L, 4L, 2L, 2L, 4L, 4L), .Label = c("1", "2", "3", "4"), class = "factor"),
GROUP = structure(c(2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L,
2L, 2L, 2L, 2L, 2L), .Label = c("cont", "treat"), class = "factor"),
img_50group = structure(c(3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L,
3L, 1L, 1L, 3L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 1L, 1L, 1L,
1L, 1L, 3L, 3L, 3L, 3L, 3L), .Label = c("cont", "other",
"treat"), class = "factor")), row.names = c(NA, -30L), class = "data.frame")
ggplot(df, aes(x = sitz, y = img_50group)) +
geom_point(aes(x = sitz, y = img_50group, color = "black", shape = 16), size = 2) +
geom_point(aes(x = sitz, y = GROUP, color = "darkblue", shape = 1), size = 5) +
facet_wrap(~vp) +
scale_colour_manual(name="Strategies", labels = c("Actual", "Expected"), values=c("black", "darkblue")) +
scale_shape_identity(name="Strategies", labels = c("Actual", "Expected"), breaks =c("black", "darkblue"), guide = "legend") ```
You should melt you data and then use color, shape, and size as aesthetics in the aes() call in geom_point().
library(tidyverse)
df <- structure(list(vp = c(2, 5, 15, 28, 32, 2, 18, 7, 28, 16, 24,
6, 16, 22, 25, 26, 28, 14, 4, 8, 15, 38, 21, 29, 26, 21, 21,
12, 11, 23), sitz = structure(c(3L, 2L, 4L, 1L, 4L, 3L, 4L, 2L,
2L, 1L, 4L, 3L, 3L, 3L, 3L, 2L, 1L, 1L, 2L, 3L, 1L, 3L, 3L, 4L,
1L, 4L, 2L, 2L, 4L, 4L), .Label = c("1", "2", "3", "4"), class = "factor"),
GROUP = structure(c(2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L,
2L, 2L, 2L, 2L, 2L), .Label = c("cont", "treat"), class = "factor"),
img_50group = structure(c(3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L,
3L, 1L, 1L, 3L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 1L, 1L, 1L,
1L, 1L, 3L, 3L, 3L, 3L, 3L), .Label = c("cont", "other",
"treat"), class = "factor")), row.names = c(NA, -30L), class = "data.frame")
# Melt the data frame and use factors to preserve the order in the original graph
df <- df %>% pivot_longer(cols = c(-vp, -sitz)) %>%
mutate(value = fct_relevel(value, "cont", "other", "treat"),
name = factor(name, levels = c("img_50group", "GROUP")))
ggplot(df, aes(x = sitz, y = value)) +
geom_point(aes(x = sitz, y = value, color = name, shape = name, size = name)) +
facet_wrap(~vp) +
scale_colour_manual(name="Strategies",
labels = c("Actual", "Expected"),
values=c("black", "darkblue")) +
scale_shape_manual(name="Strategies",
labels = c("Actual", "Expected"),
values = c(16, 1)) +
scale_size_manual(name ="Strategies",
labels = c("Actual", "Expected"),
values = c(2, 5))
Created on 2020-04-09 by the reprex package (v0.3.0)

Removing specific strips in a double-strip plot

I'm trying to remove the redundant "pro/retro" labels on the second row of panels on my plot. However, I still want to keep the top row of panel labels intact. I've tried for the past hour to selectively remove the 1st strip on the 2nd panel row and I was wondering if anyone here knows how to do this. See below for technical details.
I have the following plot:
It was generated from the following data:
absBtwnDat <- structure(list(setSize = structure(c(1L, 2L, 3L, 4L, 5L, 6L,
7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L,
6L, 7L), .Label = c("2", "3", "4", "5", "6", "7", "8"), class = "factor"),
Measure = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L), .Label = c("Actual", "Predicted"), class = "factor"),
Location = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L), .Label = c("fix", "forced"), class = "factor"),
JudgementType = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("pro", "retro"), class = "factor"),
Accuracy = c(1.91388888888889, 2.95555555555556, 3.74861111111111,
4.37777777777778, 4.21527777777778, 3.0875, 2.85277777777778,
2, 2.99444444444444, 4, 4.77222222222222, 5.24444444444444,
5.18472222222222, 5.20277777777778, 1.98888888888889, 3,
3.97222222222222, 4.85972222222222, 5.70555555555556, 6.56944444444444,
7.27222222222222, 2, 3, 3.99444444444444, 4.99444444444444,
5.86944444444444, 6.75555555555556, 7.57777777777778, 1.96111111111111,
2.97777777777778, 3.78333333333333, 3.97222222222222, 4.22361111111111,
3.64722222222222, 3.68888888888889, 2, 3, 3.97222222222222,
4.67777777777778, 5.26944444444444, 5.4625, 5.8, 2, 3, 3.98333333333333,
4.87777777777778, 5.73055555555556, 6.48333333333333, 7.62916666666667,
2, 3, 3.98333333333333, 4.96666666666667, 5.96944444444444,
6.94444444444444, 7.93333333333333), LL = c(1.85, 2.87777777777778,
3.59861111111111, 4.15555555555556, 3.78888888888889, 2.73055555555556,
2.55555555555556, 2, 2.96111111111111, 4, 4.64444444444444,
5.01666666666667, 4.88333333333333, 4.88611111111111, 1.91111111111111,
3, 3.89444444444444, 4.73611111111111, 5.47777777777778,
6.20277777777778, 6.71666666666667, 2, 3, 3.96666666666667,
4.95555555555556, 5.65096686319131, 6.48333333333333, 7.17222222222222,
1.86637442123568, 2.92222222222222, 3.65, 3.61666666666667,
3.88333333333333, 3.17092476055122, 3.18888888888889, 2,
3, 3.92222222222222, 4.49444444444444, 5.0375, 5.09444444444444,
5.40555555555556, 2, 3, 3.92777777777778, 4.72222222222222,
5.52777777777778, 6.24444444444444, 7.37361111111111, 2,
3, 3.95, 4.88888888888889, 5.93333333333333, 6.88333333333333,
7.73065763697428), UL = c(1.95555555555556, 2.98333333333333,
3.84444444444444, 4.56666666666667, 4.6, 3.43611111111111,
3.17916666666667, 2, 3, 4, 4.86111111111111, 5.42777777777778,
5.48656054159421, 5.58611111111111, 2, 3, 4, 4.93888888888889,
5.83888888888889, 6.76944444444444, 7.6, 2, 3, 4, 5, 5.94166666666667,
6.88888888888889, 7.78888888888889, 1.98888888888889, 2.99444444444444,
3.87777777777778, 4.22777777777778, 4.53611111111111, 4.19722222222222,
4.20555555555556, 2, 3, 3.98888888888889, 4.78333333333333,
5.45555555555556, 5.79583333333333, 6.16666666666667, 2,
3, 3.99444444444444, 4.95, 5.85972222222222, 6.67222222222222,
7.80138888888889, 2, 3, 3.99444444444444, 4.98888888888889,
5.9875, 6.97222222222222, 7.98333333333333)), .Names = c("setSize",
"Measure", "Location", "JudgementType", "Accuracy", "LL", "UL"
), row.names = c(NA, -56L), class = "data.frame")
I visualized it using using the following code:
library(ggplot2)
p1 <- ggplot(data = absBtwnDat, aes(x = as.numeric(as.character(setSize)),
y = Accuracy, group = Measure,
colour = Measure))+
geom_point()+
geom_line(aes(linetype = Measure))+
scale_x_continuous("Trial Set Size", breaks = 2:8)+
scale_y_continuous("Accuracy (# Correct)", breaks = 0:8, limits = c(0, 8))+
geom_errorbar(aes(ymin = LL, ymax = UL), width = .1, size = .75)+
scale_colour_grey(start = .8, end = .4)+
facet_wrap(~JudgementType+Location, dir = "v")+
theme(legend.position = "top")
Just to be certain, I've highlighted unwanted strip in the following image:
With this you'll only have one row of labels per panel, but they still include both words.
p1 <- ggplot(data = absBtwnDat,
aes(x = as.numeric(as.character(setSize)), y = Accuracy,
group = Measure,
colour = Measure))+
geom_point()+
geom_line(aes(linetype = Measure))+
scale_x_continuous("Trial Set Size", breaks = 2:8)+
scale_y_continuous("Accuracy (# Correct)",
breaks = 0:8, limits = c(0, 8))+
geom_errorbar(aes(ymin = LL, ymax = UL),
width = .1, size = .75)+
scale_colour_grey(start = .8, end = .4)+
facet_wrap(~JudgementType + Location,
dir = "v",
labeller = label_wrap_gen(multi_line=FALSE)) +
theme(legend.position = "top")
p1
Here is a possible solution:
g1 <- ggplotGrob(p1)
k <- which(g1$layout$name=="strip-t-1-2")
g1$grobs[[k]]$grobs[[1]]$children[[2]]$children[[1]]$label <- ""
g1$grobs[[k]]$grobs[[1]]$children[[1]]$gp$fill <- NA
k <- which(g1$layout$name=="strip-t-2-2")
g1$grobs[[k]]$grobs[[1]]$children[[2]]$children[[1]]$label <- ""
g1$grobs[[k]]$grobs[[1]]$children[[1]]$gp$fill <- NA
library(grid)
grid.draw(g1)

geom_path incompatible lengths when creating GGPLOT geom_bar

I have the following data frame and plotting code:
d <- structure(list(a = structure(c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L,
1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L), .Label = c("-20", "20-", "40-",
"50-"), class = "factor"), tci = structure(c(1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("A",
"B"), class = "factor"), Score = c(1, 2, 3, 4, 5,
6, 7, 8, 40.7, 51.9, 14.8, 3.7, 15, 75, 35, 20)), .Names = c("Foo",
"bar", "Score"), row.names = c(NA, -16L), class = "data.frame")
library(ggplot2)
p <-ggplot(d,aes(x=Foo,y=Score,fill=bar))+geom_bar(position="dodge",stat="identity")
What I want to do is to add the p-value bracket on the columns, which looks like this:
But why this code failed:
> p + geom_path(x=c(1,1,2,2),y=c(42,45,45,42))
Error: Incompatible lengths for set aesthetics: x, y
One solution is to put x and y in the dataframe
p + geom_path(data=data.frame(x=c(0.75,0.75,1.25,1.25),y=c(42,45,45,42)),
aes(x,y),inherit.aes=FALSE)
Another solution is to use annotate() instead of geom_path().
p + annotate(x=c(0.75,0.75,1.25,1.25),y=c(42,45,45,42),"path")

Gantt chart simulation using ggplot

Is there a way to make the thinner lines in the plot (those without an y axis tick label) appear closer to the lines above (those with a label) so as to better simulate pairs of baseline / actual bars of the same activity in a gantt chart?
See gantt chart examples here and here.
mdfr <- structure(list(name = structure(c(8L, 8L, 8L, 8L, 6L, 6L, 6L,
6L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 7L, 7L, 7L, 7L, 5L, 5L, 5L,
5L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 8L, 8L, 8L, 8L, 6L, 6L, 6L,
6L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 7L, 7L, 7L, 7L, 5L, 5L, 5L,
5L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L), .Label = c("100 A", "100 B",
"101 A", "101 B", "102 A", "102 B", "103 A", "103 B"), class = "factor"),
stadio = c(2, 4, 5, 7, 2, 4, 5, 7, 2, 4, 5, 7, 2, 4, 5, 7,
1, 3, 6, 8, 1, 3, 6, 8, 1, 3, 6, 8, 1, 3, 6, 8, 2, 4, 5,
7, 2, 4, 5, 7, 2, 4, 5, 7, 2, 4, 5, 7, 1, 3, 6, 8, 1, 3,
6, 8, 1, 3, 6, 8, 1, 3, 6, 8), variable = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("start_date", "end_date"), class = "factor"),
value = c("05/10/2012", "17/12/2012", "12/03/2012", "30/05/2013",
"10/01/2013", "14/10/2013", "24/10/2013", "10/01/2014", "30/09/2013",
"29/01/2014", "30/01/2014", "06/05/2014", "30/09/2013", "29/01/2014",
"30/01/2014", "06/05/2014", "05/10/2012", "17/12/2012", "12/03/2012",
"30/05/2013", "10/01/2013", "14/10/2013", "24/10/2013", "10/01/2014",
"30/09/2013", "29/01/2014", "30/01/2014", "05/06/2014", "30/09/2013",
"29/01/2014", "30/01/2014", "05/06/2014", "17/12/2012", "12/03/2012",
"30/05/2013", "30/05/2014", "14/10/2013", "24/10/2013", "10/01/2014",
"11/07/2014", "29/01/2014", "30/01/2014", "06/05/2014", "23/12/2014",
"29/01/2014", "30/01/2014", "06/05/2014", "23/12/2014", "17/12/2012",
"12/03/2012", "30/05/2013", "30/05/2014", "14/10/2013", "24/10/2013",
"10/01/2014", "11/07/2014", "29/01/2014", "30/01/2014", "05/06/2014",
"28/12/2014", "29/01/2014", "30/01/2014", "05/06/2014", "29/12/2014"
), rating = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("3",
"5"), class = "factor")), row.names = c(NA, -64L), .Names = c("name",
"stage", "variable", "value", "rating"), class = "data.frame")
names <- as.character(unique(mdfr$name))
names1 <- names[gsub("[^ B]","",names) == " B"]
names1 <- paste("No.",gsub("[ B]","",names1),sep="")
names2 <- rep("",length(names1))
new.names <- c(names1,names2)
ggplot(mdfr, aes(as.POSIXct(as.Date(value, "%d/%m/%Y")), name, colour = factor(stage))) +
geom_line(aes(size=rating)) +
labs(colour="(Baseline/Actual):", x = "", y = "") +
scale_colour_brewer(palette="RdYlGn",breaks = c("1", "3", "6","8"), guide = "none") +
scale_size_manual(breaks = levels(mdfr$rating), values = as.integer(levels(mdfr$rating)), guide = "none") +
scale_y_discrete(breaks=names, labels=new.names)
I would use facets to do this. Below you find a possible solution. This may not be the most elegant solution, but it lets you change the distance between thinner and thicker lines by changing the expand argument in scale_x_discrete.
# numbers to facet by (levels used for order of the facets)
mdfr$nr <- factor(paste0("No.", as.numeric(gsub("A|B", "", mdfr$name))),
levels=unique(paste0("No.", as.numeric(gsub("A|B", "", mdfr$name)))))
# recast your data
df <- dcast(mdfr, nr+stage+rating~variable)
# plot as before, switched x and y values
ggplot(df, aes(x=factor(rating),
ymin=as.POSIXct(as.Date(start_date, "%d/%m/%Y")),
ymax=as.POSIXct(as.Date(end_date, "%d/%m/%Y")),
color=factor(stage),
size=rating
)) +
geom_linerange() + # linerange instead of line
facet_grid(nr~., scales="free_x") + # faceting
coord_flip() + # flip coordinates back
scale_x_discrete(name="", breaks=NULL, expand=c(4,1)) + # use the expand variable to change the distances
scale_colour_brewer(palette="RdYlGn",breaks = c("1", "3", "6","8"), guide = "none") +
scale_size_manual(breaks = levels(mdfr$rating), values = as.integer(levels(mdfr$rating)), guide = "none")

Resources