I recently received great help from SO in producing this customized facet_wrap-plot shown below.
Question: how can I add the dotted linetype used in geom_quantile(linetype=2) as legend with the text "50th percentile"?
I have sought solutions in similar questions on SO, but my question has not been answered.
My current plot looks like this
Produced with the following code
cols = c("#E1B930", "#2C77BF","#E38072","#6DBCC3")
ggplot(p, aes(x=n.fjernet,y=os.neck)) + geom_point(aes(color=uiccc),shape=20, size=5,alpha=0.7) +
geom_quantile(quantiles = 0.5,col="black", size=1,linetype=2, show.legend = F) + facet_wrap(.~factor(uiccc)) +
scale_fill_manual(values=cols) +
scale_colour_manual(values=cols) +
scale_x_continuous(breaks = seq(0,50, by=10), name="Lymph nodal yield") +
scale_y_continuous(name="Time to death (months)") +
theme(strip.background = element_blank(),
strip.text = element_text(color = "transparent"),
axis.title.x = element_text(color = "grey20", size = 14, face="bold", margin=ggplot2::margin(t=10)),
axis.title.y = element_text(color = "grey20", size = 14, face="bold", margin=ggplot2::margin(r=10)),
legend.position="none",
plot.margin = unit(c(1,3,1,1), "lines")) +
coord_cartesian(clip = "off",ylim = c(0,175)) +
geom_text(data = . %>% distinct(uiccc),
aes(label = factor(uiccc), color = uiccc), y = 190, x = 30, hjust = 0.5, fontface = "bold",cex=5)
I would like the plot to have a legend that indicates the "50th percentile" as demonstrated by the linetype=2 in geom_quantile(), exclusively, which looks like this (manually added in photoshop):
First: I have removed theme(legend.position="none")
Second: I have added show.legend=TRUE in geom_quantile
Third: I have added show.legend=FALSE in geom_point
Unfortunately, these edits does not produce the requested legend:
How can I proceed?
My data p
p <- structure(list(n.fjernet = c(18L, 11L, 14L, 15L, 9L, 6L, 3L,
16L, 4L, 6L, 10L, 13L, 33L, 16L, 6L, 9L, 23L, 9L, 8L, 13L, 5L,
30L, 25L, 3L, 9L, 9L, 12L, 7L, 38L, 5L, 7L, 15L, 4L, 6L, 15L,
9L, 8L, 7L, 4L, 6L, 10L, 8L, 4L, 9L, 10L, 14L, 14L, 3L, 4L, 6L,
6L, 20L, 3L, 26L, 13L, 13L, 13L, 13L, 3L, 7L, 6L, 5L, 10L, 15L,
29L, 7L, 6L, 11L, 17L, 14L, 18L, 22L, 9L, 20L, 34L, 9L, 8L, 8L,
11L, 3L, 4L, 4L, 5L, 3L, 2L, 8L, 5L, 18L, 7L, 9L, 13L, 18L, 19L,
14L, 46L, 23L, 11L, 6L, 18L, 20L, 4L, 2L, 7L, 7L, 4L, 11L, 13L,
13L, 9L, 9L, 9L, 12L, 11L, 16L, 6L, 13L, 8L, 17L, 5L, 8L, 22L,
19L, 3L, 15L, 14L, 7L, 18L, 9L, 10L, 18L, 24L, 11L, 15L, 7L,
6L, 4L, 24L, 23L, 8L, 20L, 9L, 22L, 11L, 2L, 24L, 15L, 5L, 8L,
11L, 11L, 11L, 15L, 6L, 16L, 7L, 9L, 16L, 11L, 33L, 27L, 16L,
57L, 5L, 7L, 8L, 11L, 15L, 15L, 12L, 5L, 9L, 49L, 11L, 28L, 19L,
13L, 23L, 11L, 12L, 10L, 4L, 14L, 6L, 12L, 32L, 13L, 12L, 4L,
11L, 17L, 10L, 5L, 15L, 21L, 19L, 11L, 31L, 9L, 20L, 11L, 16L,
12L, 6L, 16L, 27L, 30L, 18L, 18L, 10L, 7L, 23L, 16L, 15L, 4L,
12L, 9L, 10L, 11L, 7L, 8L, 8L, 7L, 6L, 9L, 9L, 13L, 15L, 12L,
35L, 12L, 5L, 19L, 27L, 34L, 10L, 16L, 18L, 6L, 22L), os.neck = c(11.5,
74.38, 17.02, 7.89, 96.03, 40.48, 17.74, 14.65, 62.46, 12.55,
9.92, 26.05, 45.47, 17.38, 39.72, 51.45, 8.61, 76.98, 67.09,
94.79, 72.15, 93.93, 17.05, 12.48, 91.6, 15.87, 11.04, 67.22,
67.02, 8.94, 6.6, 5.09, 10.68, 17.15, 0.07, 5.19, 40.77, 0.2,
170.88, 5.55, 1.61, 38.28, 10.58, 32.99, 110.98, 103.69, 122.32,
14.78, 42.74, 4.04, 8.28, 84.96, 11.7, 49.97, 120.48, 52.6, 71.26,
16.3, 100.14, 55.03, 6.51, 89.89, 51.71, 24.97, 55.66, 21.91,
81.48, 30.92, 1.58, 7.52, 30.75, 3.45, 19.22, 5.42, 17.68, 45.54,
76.22, 125.34, 83.62, 30.82, 90.32, 1.84, 19.98, 20.53, 32.59,
54.77, 2.3, 106.84, 22.28, 45.18, 4.47, 39.66, 32.3, 16.23, 3.88,
2.23, 0.23, 18.73, 0.79, 28.75, 79.54, 14.46, 15.15, 54.97, 48.59,
34.83, 58.42, 35.29, 45.73, 57.53, 63.11, 65.05, 29.54, 77.21,
63.48, 83.35, 34.3, 64.49, 29.54, 62.69, 21.62, 49.35, 99.02,
15.8, 41.89, 12.98, 13.8, 43.6, 57.23, 31.38, 70.74, 39.46, 20.76,
67.22, 127.15, 74.12, 1.97, 7.39, 25.17, 28.22, 14, 36.53, 20.83,
19.55, 40.77, 27.76, 45.31, 34.46, 35.55, 26.94, 9.43, 10.51,
6.8, 8.18, 8.02, 14.29, 6.11, 13.8, 4.9, 4.04, 14.82, 11.66,
73.07, 92.91, 99.98, 10.64, 10.05, 95.8, 7.23, 12.81, 43.99,
13.9, 10.25, 16.36, 18.2, 18.76, 12.32, 8.64, 11.79, 112.04,
70.97, 31.28, 28.85, 21.49, 19.94, 22.14, 29.44, 67.62, 11.01,
45.24, 110.72, 20.24, 14.06, 12.88, 31.51, 8.08, 13.08, 21.45,
24.28, 21.98, 32.89, 23.26, 15.41, 15.41, 13.8, 40.12, 8.02,
15.77, 49.81, 18.17, 24.21, 47.08, 6.6, 37.16, 13.01, 8.38, 14.36,
18.27, 17.28, 73.76, 68.21, 22.83, 2.66, 69.06, 17.05, 8.61,
23.33, 13.34, 12.65, 8.77, 128.92, 16.1, 4.99, 11.73, 22.97,
40.12, 20.37, 2.04, 45.73), uiccc = structure(c(4L, 3L, 3L, 2L,
2L, 2L, 2L, 4L, 1L, 1L, 2L, 1L, 4L, 2L, 1L, 2L, 3L, 1L, 2L, 3L,
2L, 1L, 2L, 3L, 2L, 4L, 1L, 1L, 2L, 4L, 4L, 1L, 3L, 3L, 4L, 3L,
1L, 4L, 2L, 3L, 4L, 4L, 4L, 3L, 2L, 4L, 1L, 4L, 2L, 4L, 4L, 2L,
4L, 4L, 1L, 4L, 2L, 3L, 2L, 2L, 3L, 2L, 4L, 4L, 2L, 2L, 3L, 1L,
4L, 4L, 4L, 4L, 4L, 3L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 4L, 2L, 4L, 1L, 2L, 1L, 1L, 3L, 3L, 4L, 4L, 4L, 4L, 4L,
4L, 2L, 3L, 3L, 4L, 1L, 1L, 3L, 1L, 4L, 2L, 1L, 3L, 1L, 2L, 1L,
1L, 4L, 1L, 1L, 4L, 1L, 1L, 3L, 2L, 2L, 1L, 4L, 4L, 4L, 4L, 1L,
1L, 1L, 2L, 2L, 4L, 4L, 2L, 3L, 4L, 2L, 4L, 1L, 1L, 3L, 3L, 1L,
1L, 3L, 4L, 4L, 2L, 4L, 4L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 2L,
2L, 4L, 3L, 1L, 4L, 3L, 4L, 4L, 3L, 1L, 4L, 4L, 4L, 4L, 2L, 2L,
4L, 4L, 1L, 4L, 4L, 2L, 4L, 4L, 4L, 3L, 4L, 3L, 3L, 4L, 4L, 2L,
4L, 4L, 2L, 4L, 4L, 4L, 4L, 1L, 4L, 4L, 3L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 2L, 3L, 1L, 2L, 1L, 2L, 2L, 4L, 4L, 4L, 4L,
4L, 4L, 1L, 3L, 4L, 4L, 1L, 3L, 3L, 4L, 3L), .Label = c("UICC Stage I",
"UICC Stage II", "UICC Stage III", "UICC Stage IV"), class = "factor")), row.names = c(NA,
-239L), class = "data.frame")
One possible solution is to specify a "fake" color title in the aes of geom_quantile.
Then, you can remove legend for points and labels by adding show.legend = FALSE into their respective geom and manipulate breaks in scale_color_manual to show only the color attribute for your 50th percentile category. Finally, manipulating various parameters for legend in theme will get you to what you desired.
Altogether, you can have a code like this (PS: scale_fill_manual is useless in your current example):
ggplot(p, aes(x=n.fjernet,y=os.neck)) +
geom_point(aes(color=uiccc),shape=20, size=5,alpha=0.7, show.legend = FALSE) +
geom_quantile(aes(colour = "50th percentile"), quantiles = 0.5, size=1,linetype=2) +
facet_wrap(.~factor(uiccc)) +
#scale_fill_manual(values=cols) +
scale_colour_manual(values=cols, breaks = c("50th percentile"), name = "") +
scale_x_continuous(breaks = seq(0,50, by=10), name="Lymph nodal yield") +
scale_y_continuous(name="Time to death (months)") +
theme(strip.background = element_blank(),
strip.text = element_text(color = "transparent"),
axis.title.x = element_text(color = "grey20", size = 14, face="bold", margin=ggplot2::margin(t=10)),
axis.title.y = element_text(color = "grey20", size = 14, face="bold", margin=ggplot2::margin(r=10)),
legend.position="top",
legend.text=element_text(size=rel(2)),
legend.key.size = unit(2, "cm"),
plot.margin = unit(c(1,3,1,1), "lines")) +
coord_cartesian(clip = "off",ylim = c(0,175)) +
geom_text(data = . %>% distinct(uiccc),
aes(label = factor(uiccc), color = uiccc), y = 190, x = 30, hjust = 0.5, fontface = "bold",cex=5, show.legend = FALSE)
I am using the following code to make my graph: .
#Labels
label1 <- data.frame( x = 2, y = 2, Type = "FYS", label = "N=15")
label2 <- data.frame( x = 2, y = 2, Type = "SNR", label = "N=24")
# make graph
ggplot(data = Q, mapping = aes(y = Rating, x = weeks, group= StudentFactor, colour=StudentFactor))+
geom_point()+
geom_line()+
facet_grid(Type ~.)+
geom_smooth(method = 'lm', formula = y ~ poly(x), colour= "black", aes(group=1), se= FALSE)+
theme(legend.position="none") +
labs (x= "Date", y="Students' Average Engagement over Time")+
geom_text(data = label1, aes(x = x, y = y, label = label), inherit.aes = FALSE)+
geom_text(data = label2, aes(x = x, y = y, label = label), inherit.aes = FALSE)
However, the dates at the bottom are out of order. Instead of using x= weeks, I could use x=timePeriod which would make the points be in order, but the labels to be wrong.
I have tried adding the following code to order the levels of weeks,
df$weeks <- factor(df$weeks, order=TRUE, levels=weeks)
but I keep getting an error saying object of type 'closure' is not subsettable.
I have attached my data below:
> dput (Q)
structure(list(StudentFactor = structure(c(1L, 3L, 4L, 8L, 11L,
13L, 14L, 15L, 18L, 19L, 21L, 22L, 24L, 30L, 31L, 32L, 36L, 38L,
27L, 34L, 35L, 1L, 3L, 4L, 8L, 11L, 13L, 14L, 18L, 19L, 21L,
22L, 24L, 2L, 5L, 6L, 7L, 9L, 10L, 12L, 16L, 17L, 20L, 23L, 25L,
26L, 28L, 29L, 30L, 31L, 32L, 33L, 36L, 37L, 38L, 40L, 41L, 34L,
39L, 1L, 3L, 4L, 8L, 11L, 13L, 14L, 15L, 18L, 19L, 21L, 24L,
2L, 5L, 6L, 7L, 9L, 10L, 12L, 16L, 17L, 20L, 23L, 25L, 28L, 30L,
31L, 33L, 36L, 37L, 38L, 40L, 41L, 34L, 35L, 39L, 1L, 3L, 4L,
8L, 11L, 14L, 15L, 18L, 21L, 22L, 24L, 2L, 6L, 7L, 9L, 10L, 12L,
16L, 17L, 20L, 23L, 31L, 33L, 36L, 37L, 40L, 27L, 34L, 1L, 3L,
4L, 8L, 11L, 13L, 14L, 15L, 18L, 19L, 21L, 22L, 2L, 5L, 6L, 7L,
9L, 10L, 12L, 16L, 17L, 20L, 23L, 28L, 30L, 31L, 32L, 33L, 36L,
38L, 41L, 27L, 34L, 35L, 1L, 3L, 4L, 11L, 14L, 15L, 18L, 19L,
21L, 22L, 24L, 2L, 5L, 6L, 9L, 10L, 12L, 16L, 20L, 23L, 29L,
30L, 31L, 32L, 33L, 36L, 38L, 41L, 27L, 34L, 35L, 1L, 3L, 11L,
13L, 14L, 15L, 18L, 19L, 21L, 22L, 24L, 2L, 6L, 7L, 9L, 10L,
12L, 16L, 17L, 20L, 23L, 28L, 29L, 30L, 31L, 36L, 37L, 38L, 40L,
41L, 27L, 34L, 35L, 39L, 1L, 3L, 4L, 11L, 13L, 14L, 15L, 18L,
19L, 21L, 22L, 24L, 2L, 7L, 10L, 12L, 16L, 17L, 20L, 28L, 29L,
30L, 31L, 32L, 33L, 36L, 37L, 38L, 40L, 41L, 27L, 34L, 35L, 1L,
11L, 13L, 14L, 18L, 19L, 21L, 22L, 24L, 2L, 6L, 7L, 10L, 12L,
16L, 28L, 30L, 31L, 33L, 36L, 34L, 1L, 4L, 14L, 15L, 18L, 19L,
21L, 22L, 24L, 2L, 7L, 9L, 10L, 12L, 16L, 17L, 20L, 23L, 29L,
30L, 31L, 32L, 33L, 36L, 37L, 40L, 41L, 27L, 34L, 39L, 1L, 3L,
4L, 11L, 13L, 14L, 15L, 18L, 22L, 24L, 2L, 6L, 7L, 9L, 10L, 12L,
16L, 17L, 20L, 23L, 30L, 31L, 36L, 37L, 38L, 41L, 27L), .Label = c("789331",
"796882", "805933", "826523", "827911", "830271", "831487", "832929",
"834598", "836364", "838607", "839802", "841903", "843618", "852125",
"855524", "873527", "876406", "879972", "885409", "885650", "888712",
"894218", "903303", "928026", "932196", "952797", "955389", "956952",
"957206", "957759", "959200", "962490", "965873", "967416", "968728",
"969005", "971179", "975424", "976863", "981621"), class = "factor"),
Type = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("FYS", "SNR"), class = "factor"),
weeks = structure(c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Apr5",
"Feb1", "Feb15", "Feb8", "Jan11", "Jan25", "Mar1", "Mar15",
"Mar22", "Mar29", "Mar8"), class = "factor"), timePeriod = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L), class = "factor", .Label = c("Rt1", "Rt2", "Rt3", "Rt4",
"Rt5", "Rt6", "Rt7", "Rt8", "Rt9", "Rt10", "Rt11")), Rating = c(3.6,
4.8, 4.4, 3.8, 5, 3.2, 4.4, 3.2, 3.6, 3.8, 4, 4.4, 3.6, 4,
3.8, 3, 3.6, 4.4, 3.6, 3.4, 4.2, 3.8, 4, 4.2, 3.8, 5, 4.2,
4.4, 4, 3.8, 4.4, 4, 3.8, 4.4, 4.2, 4.6, 4.4, 5, 4, 3.4,
5, 3.8, 4.8, 4.4, 4.6, 3.2, 5, 4.2, 4.4, 4.4, 3.4, 3.8, 3.8,
3.6, 4.8, 4.4, 4.8, 4.75, 4, 4, 4, 4.2, 3.8, 5, 4.2, 4.6,
3.8, 4.2, 3.8, 4, 4.6, 4, 3.6, 4.8, 4.2, 3.8, 4, 2, 4.6,
3.8, 4.6, 4.4, 4.8, 4.6, 4, 4.4, 4.2, 3.6, 4.6, 4.4, 5, 4.6,
5, 4.2, 3.4, 4.2, 3.6, 4.4, 4, 5, 4.4, 4, 4, 4, 4.2, 4, 4,
5, 4.6, 4, 4, 1.8, 4.6, 4.2, 4.8, 4.6, 4.4, 4.2, 3.4, 4.4,
3.8, 4, 5, 3.4, 3.2, 4.6, 3.6, 5, 3.6, 4.4, 3.8, 4, 4, 4.2,
4.4, 2.8, 3.4, 5, 4.4, 4.2, 3.6, 4.2, 4.2, 4, 4.4, 5, 4,
4, 3.8, 3.2, 4.2, 3.4, 4.4, 5, 4.4, 4, 4.2, 2.4, 3.2, 4.6,
4.4, 4.4, 3.6, 2.4, 4.2, 4, 4.4, 3.4, 3.6, 3.4, 4.4, 4, 3.2,
2.2, 4.4, 4.4, 5, 3.2, 4.4, 4, 3, 4.6, 3, 4.25, 4.2, 3.6,
3.8, 4.4, 3, 3.2, 4.2, 4, 4.4, 3.6, 2.8, 4, 4.4, 4.6, 3.8,
2.8, 4.8, 4.2, 4, 3.6, 3, 4.8, 4.2, 4.2, 5, 4.4, 4.4, 4,
3.2, 1, 4.4, 4.2, 3.6, 3.8, 4, 1.4, 4.6, 2.8, 3.2, 3.2, 4.6,
4.4, 3.4, 4.2, 4, 3.8, 4, 4.2, 3.8, 3.6, 1.4, 4.6, 3.6, 4.2,
4, 4.4, 4.4, 4.6, 4.2, 4.2, 3.2, 4, 3.6, 3, 4.6, 4.8, 3.6,
4.2, 4.2, 2.2, 5, 3.2, 3.8, 4.2, 3.6, 3, 4, 3.8, 4.2, 3.8,
2.2, 5, 4.8, 3.4, 2.8, 5, 4.4, 4, 3, 1, 3, 1.6, 3.6, 4.2,
4, 3.4, 3.2, 4, 4, 4, 3.6, 2, 4.4, 4, 3.4, 1.8, 4.2, 3.8,
3.8, 4, 4.2, 3.8, 4.2, 4.2, 3.2, 1.6, 4.6, 4, 5, 4, 3.4,
3.6, 4, 3.2, 4.2, 3.6, 4.6, 4.4, 4.6, 4.2, 4.6, 4.6, 4.2,
5, 4.6, 4.2, 4, 4, 4.6, 4.4, 3.6, 5, 4.4, 4.6, 1.6, 4.6,
5, 5, 4)), class = "data.frame", row.names = c(NA, -333L), .Names = c("StudentFactor",
"Type", "weeks", "timePeriod", "Rating"))
I just changed the format of the week column. Does it work for you?
newdate <- as.Date(Q[, 3], "%b%d")
newdate <- strftime(newdate,"%m %d")
QQ <- cbind(Q, newdate)
ggplot(data = QQ, mapping = aes(y = Rating, x = factor(newdate), group= StudentFactor, colour=StudentFactor))+
geom_point()+
geom_line()+
facet_grid(Type ~.)+
geom_smooth(method = 'lm', formula = y ~ poly(x), colour= "black", aes(group=1), se= FALSE)+
theme(legend.position="none") +
labs (x= "Date", y="Students' Average Engagement over Time")+
geom_text(data = label1, aes(x = x, y = y, label = label), inherit.aes = FALSE)+
geom_text(data = label2, aes(x = x, y = y, label = label), inherit.aes = FALSE)
Problem
I am trying to count the data factor-wise and display it on the scale of a axis.
My closest solution is the following:
aes(x=(paste(A_REF,"(n=", length(A_REF), ")"))
n is the number displaying how many occurances of the factor exist in the data field.
Edit: How do I achieve that the first and fifth factor of V43 show up? --> forgot to library("foreign")
Current State: Solved
My Code so far
# Load libraries & packages =================================
library("ggplot2")
library("scales")
library("dplyr")
library("foreign")
# Data setup =================================
spss_file_path <- "D:\\Programming\\Testing\\2017-03-15_data_import&ggplot2\\Beispieldatensatz(fiktiv).sav"
exampledata <- read.spss(spss_file_path, use.value.labels = TRUE,
to.data.frame = TRUE, reencode = TRUE)
names(exampledata) <- c(V101, A_REF, V43)
exampledata$V43 <- factor(exampledata$V43,
levels = c(1,2,3,4,5),
labels = c("1 Sehr zufrieden","2","3","4", "5 Sehr unzufrieden"))
exampledata$V43 <- factor(exampledata$V43, levels = rev(unique(levels(exampledata$V43))))
exampledata$A_REF <- factor(exampledata$A_REF, levels = rev(unique(levels(exampledata$A_REF))))
exampledata$V101 <- factor(exampledata$V101, levels = rev(unique(levels(exampledata$V101))))
labels <- exampledata %>%
filter(!is.na(V101), !is.na(V43)) %>%
count(A_REF) %>%
mutate(labels = paste(A_REF,"(n=", n, ")")) %>%
select(A_REF, labels)
plot_data <- exampledata %>%
filter(!is.na(V101), !is.na(V43)) %>%
left_join(labels, by = "A_REF")
# Plot =================================
ggplot(plot_data, aes(x = labels, fill = V43)) +
geom_bar(position = "fill") +
scale_y_continuous(labels = scales::percent, breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1)) +
labs(y=NULL, x=NULL, fill=NULL) +
ggtitle(paste(attr(exampledata, "variable.labels")[77])) +
theme_classic() +
geom_text(stat="count",aes(label = scales::percent((..count..)/sum(..count..))), position = position_fill(vjust=0.5)) +
coord_flip()
Data
structure(list(exampledata.V101 = structure(c(2L, NA, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, NA, 2L, 2L, 2L, 1L, 2L, NA,
NA, NA, 1L, 1L, 2L, NA, 2L, 2L, 2L, NA, 2L, 2L, NA, NA, 1L, NA,
2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, NA, NA, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, NA, 1L, NA, 1L, NA,
1L, 2L, NA, NA, 2L, NA, 1L, 2L, 2L, NA, 2L, NA, 2L, 2L, 1L, 2L,
1L, 2L, 1L, 1L, 2L, 1L, NA, 2L, 2L, 2L, 2L, NA, 2L, 1L, 2L, 2L
), .Label = c("Weiblich", "Männlich"), class = "factor"), exampledata.A_REF = structure(c(18L,
18L, 18L, 18L, 18L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 16L, 18L,
18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 16L, 18L, 18L, 16L, 18L,
16L, 18L, 18L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L,
16L, 18L, 18L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 17L, 16L, 18L,
18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 17L, 18L, 18L,
16L, 18L, 16L, 18L, 18L, 16L, 16L, 18L, 18L, 18L, 18L, 18L, 18L,
18L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 16L, 18L,
16L, 16L, 18L, 18L, 18L, 17L, 16L, 18L), .Label = c("Zertifikat eines Aufbau- oder Ergänzungsstudiums",
"LA Berufliche Schulen", "LA Sonderschule", "LA Gymnasium", "LA Haupt- und Realschule",
"LA Grundschule", "Künstlerischer/musischer Abschluss", "Kirchlicher Abschluss",
"Staatsexamen (ohne Lehramt)", "Diplom Fachhochschule, Diplom I an Gesamthochschulen",
"Diplom Universität, Diplom II an Gesamthochschulen", "Sonstiges",
"Promotion", "Staatsexamen", "Magister", "Diplom", "Master",
"Bachelor"), class = "factor"), exampledata.V43 = structure(c(3L,
5L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 4L, 3L, 3L, 2L, NA, 4L, 5L, 5L,
4L, 4L, 4L, 4L, NA, 2L, 4L, 3L, 5L, 4L, 4L, 4L, NA, 4L, 4L, NA,
NA, 3L, 5L, 2L, 4L, 5L, 4L, 4L, 5L, 5L, 4L, NA, NA, 4L, NA, 3L,
4L, 5L, 5L, 2L, 4L, 4L, 3L, 4L, 4L, 4L, 3L, 5L, 4L, 5L, NA, 4L,
NA, 4L, NA, 4L, 5L, 4L, NA, 5L, NA, 4L, 4L, 4L, NA, 4L, NA, 5L,
4L, 4L, 4L, 4L, 4L, 3L, 3L, 4L, 2L, 4L, 4L, 4L, 3L, 4L, NA, 4L,
5L, 5L, 4L), .Label = c("5 Sehr unzufrieden", "4", "3", "2",
"1 Sehr zufrieden"), class = "factor")), .Names = c("exampledata.V101",
"exampledata.A_REF", "exampledata.V43"), row.names = c(NA, 100L
), class = "data.frame")
I think the easiest way is to compute the labels outside of ggplot.
Note that with your data, the 5th level of V43 doesn't show up.
library(ggplot2)
library(dplyr)
names(exampledata) <- c("V101", "A_REF", "V43")
I count A_REF and then apply your formula to compute the labels.
labels <- exampledata %>%
filter(!is.na(V101), !is.na(V43)) %>%
count(A_REF) %>%
mutate(labels = paste(A_REF,"(n=", n, ")")) %>%
select(A_REF, labels)
I then join the labels to the data
plot_data <- exampledata %>%
filter(!is.na(V101), !is.na(V43)) %>%
left_join(labels, by = "A_REF")
And finally, here is the plot. Note that the title doesn't show up as well.
ggplot(plot_data, aes(x = labels, fill = V43)) +
geom_bar(position = "fill") +
scale_y_continuous(labels = scales::percent, breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1)) +
labs(y=NULL, x=NULL, fill=NULL) +
ggtitle(paste(attr(exampledata, "variable.labels")[77])) +
theme_classic() +
geom_text(stat="count",aes(label = scales::percent((..count..)/sum(..count..))), position = position_fill(vjust=0.5)) +
coord_flip()
This question already has an answer here:
Position geom_text on dodged barplot
(1 answer)
Closed 8 years ago.
I've a problem plotting a simple bar plot in ggplot2. I'd like to display labels of values above each bar, but am using postion=position_dodge to plot bars along side each other as I have multiple factor variables along my x-axis for a number of data points which refer to individual countries.
Currently using the following code I am getting all values centred over the name of the COUNTRY along my x-axis, whereas I'd like each value centred over each bar along the x-axis:
ggplot(maln_complete_recent_melt, aes(x=COUNTRY, y=percent, fill=location)) + geom_bar(position="dodge", colour="black") +labs(title="Percentage of malnoursihed children according to height for age", fill="DHS Survey") + geom_text(aes(label=round(percent))) + scale_x_discrete(labels=c(paste(maln_complete_recent_melt$COUNTRY, maln_complete_recent_melt$Year, sep="")))
And this is how my plot currently looks:
I'm sure this is something really simple, but having trawled the forums and consulted a number of books on R, I can't find what I'm looking for.
Here's my data for replication:
structure(list(COUNTRY = structure(c(4L, 10L, 5L, 8L, 14L, 13L,
9L, 2L, 11L, 1L, 6L, 7L, 12L, 3L, 4L, 10L, 5L, 8L, 14L, 13L,
9L, 2L, 11L, 1L, 6L, 7L, 12L, 3L, 4L, 10L, 5L, 8L, 14L, 13L,
9L, 2L, 11L, 1L, 6L, 7L, 12L, 3L, 4L, 10L, 5L, 8L, 14L, 13L,
9L, 2L, 11L, 1L, 6L, 7L, 12L, 3L, 4L, 10L, 5L, 8L, 14L, 13L,
9L, 2L, 11L, 1L, 6L, 7L, 12L, 3L, 4L, 10L, 5L, 8L, 14L, 13L,
9L, 2L, 11L, 1L, 6L, 7L, 12L, 3L), class = "factor", .Label = c("Swaziland",
"Namibia", "Zimbabwe", "Comoros", "Kenya", "Tanzania", "Uganda",
"Lesotho", "Mozambique", "Ethiopia", "Rwanda", "Zambia", "Malawi",
"Madagascar")), Year = structure(c(5L, 23L, 20L, 21L, 20L, 14L,
12L, 18L, 15L, 18L, 14L, 17L, 19L, 22L, 5L, 23L, 20L, 21L, 20L,
14L, 12L, 18L, 15L, 18L, 14L, 17L, 19L, 22L, 5L, 23L, 20L, 21L,
20L, 14L, 12L, 18L, 15L, 18L, 14L, 17L, 19L, 22L, 5L, 23L, 20L,
21L, 20L, 14L, 12L, 18L, 15L, 18L, 14L, 17L, 19L, 22L, 5L, 23L,
20L, 21L, 20L, 14L, 12L, 18L, 15L, 18L, 14L, 17L, 19L, 22L, 5L,
23L, 20L, 21L, 20L, 14L, 12L, 18L, 15L, 18L, 14L, 17L, 19L, 22L
), class = "factor", .Label = c("1992", "1993", "1994", "1995",
"1996", "1997", "1998", "1999", "2000", "2000/1", "2001/2", "2003",
"2003/4", "2004", "2005", "2005/6", "2006", "2006/7", "2007",
"2008/9", "2009", "2010", "2011")), location = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L), .Label = c("Urban", "Rural", "Total", "Capital.City",
"Urban.Non.slum", "Urban.Slum"), class = "factor"), percent = c(29.9107142857143,
31.5, 26.4, 30.2, 43.4, 37.8, 29.2, 23.8, 33.1, 23.1, 25.8, 25.5,
39, 27.5, 35.0071736011478, 46.2, 37.1, 41, 50.9, 49.2, 45.7,
31.4, 47.2501049401341, 30, 40.5, 39.5, 47.9, 33.4, 33.8, 44.4,
35.3, 39.2, 50.1, 47.8, 41, 29, 45.3, 28.9, 37.7, 38.1, 45.4,
32, NA, 22, 28.5, 31.7, 46.8, NA, 20.6, NA, 29.2, NA, 16.9, NA,
37.2, 29, 21.6666666666667, 10.7799374501425, 16.0100871430598,
21.2511075992408, 23.7772452013661, 23.8620542603877, 8.15543422154615,
12.5321762341041, 13.5923403336176, 13.0494984481826, 16.4644101423357,
21.5426476162423, 27.4144189089535, 16.2492480072194, 32.9268292682927,
27.8790407698325, 25.0985529949154, 26.2950824206824, 37.9693371617388,
40.5197947988247, 29.3593820697417, 28.5099311066166, 36.0242620139593,
20.7853672439948, 28.5877166151249, 22.8001055045257, 35.5434627654004,
28.0633523545878)), .Names = c("COUNTRY", "Year", "location",
"percent"), row.names = c(NA, -84L), class = "data.frame")
The answer was in the following post: Position geom_text on dodged barplot
Just needed to add the position=position_dodge call into geom_text() as follows:
ggplot(maln_complete_recent_melt, aes(x=COUNTRY, y=percent, fill=location)) + geom_bar(position="dodge", colour="black") +labs(title="Percentage of malnoursihed children according to height for age", fill="DHS Survey") + geom_text(aes(label=round(percent)), position = position_dodge(width=0.9), vjust=-0.5) + scale_x_discrete(labels=c(paste(maln_complete_recent_melt$COUNTRY, maln_complete_recent_melt$Year, sep="")))
Which produced this:
#marty_c answered your question. If you're not required to use this format, though, you might consider facets (using df = maln_complete_recent_melt):
ggplot(df, aes(x=COUNTRY, y=percent, fill=location)) +
geom_histogram(stat="identity") +
labs(title="Percentage of malnoursihed children according to height for age", fill="DHS Survey") +
geom_text(aes(label=round(percent),y=0),vjust=-0.5,size=4)+
scale_x_discrete(labels=c(paste(df$COUNTRY, df$Year, sep="")))+
theme(axis.text.x=element_text(angle=-90, color="black",hjust=0,vjust=0.2))+
facet_grid(location~.)