Count mean subgroup occurrence within subgroup - r

I have the following dataframe:
date hour_of_day distance weather_of_the_day
2017-06-13 6 10.32 1
2017-06-13 8 2.32 1
2017-06-14 10 4.21 2
2017-06-15 7 4.56 4
2017-06-15 7 8.92 4
2017-06-16 22 2.11 3
structure(list(startdat = structure(c(17272, 17272, 17272, 17272,17272, 17272, 17272, 17272, 17272, 17272, 17272, 17272, 17272,17272, 17272, 17272, 17273, 17273, 17273, 17273), class = "Date"), hOfDay = c(22L, 16L, 12L, 13L, 18L, 19L, 19L, 16L, 22L, 10L,
10L, 16L, 11L, 20L, 9L, 15L, 18L, 12L, 16L, 18L), tripDKM = c(0.2,
6.4, 3.4, 0.8, 2.4, 2.2, 2.2, 7.3, 2.6, 3.8, 7.5, 5.8, 3.7,
2.1, 2.6, 5.2, 2.9, 1.7, 3.2, 3.1), totDMIN = c(1.85, 27.4,
8.2, 4.21666666666667, 15.65, 8.91666666666667, 11.5666666666667,
29.5166666666667, 7.01666666666667, 12.2166666666667, 15.8833333333333,
19.5666666666667, 21.7166666666667, 8.66666666666667, 11.2333333333333,
13.4, 7.58333333333333, 10.6166666666667, 6.76666666666667,
17.7), weather_day = structure(c(3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L), .Label = c("1",
"2", "3", "4"), class = "factor")), row.names = c(1L, 2L,3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 15L, 16L, 17L, 19L, 20L, 21L, 22L), class = "data.frame")
My final goal is to have a line ggplot, where the x-axis shows the hour_of_day, the y-axis stands for the mean number of occurrences. Eventually the lines should represent the 4 weather conditions. So one line ought to represent weather_of_the_day=1, and the y axis shows how often, on average weather_day=1 has an occurrence with hour_of_day=6 (as an example) and so on for 7, 8, etc.. What I want, are not only the number of occurrences, but the average number of occurrences.
I've been struggling for 2 days with this. I've tried different approaches, with for loops and subgrouping. But non of them brought a usable solution. Thank you very much for your help in advance!

Your posted data set is a little small but this is what I would suggest. It only makes sense with more data points though. df is the set you posted.
library(dplyr)
library(ggplot2)
df_plot <- df %>%
mutate(weather_of_the_day = factor(weather_of_the_day)) %>%
group_by(hour_of_day, weather_of_the_day) %>%
summarize(occurances = n())
ggplot(data = df_plot,
aes(x = hour_of_day,
y = occurances,
group = weather_of_the_day,
color = weather_of_the_day)) +
geom_line()+
geom_point()

I'm not completely sure if this mathes your desired output, but I gave it a try:
#Importing packages
library(dplyr)
library(ggplot2)
d <- structure(list(startdat = structure(c(17272, 17272, 17272, 17272,17272, 17272, 17272, 17272, 17272, 17272, 17272, 17272, 17272,17272, 17272, 17272, 17273, 17273, 17273, 17273),
class = "Date"),
hOfDay = c(22L, 16L, 12L, 13L, 18L, 19L, 19L, 16L, 22L, 10L, 10L, 16L, 11L, 20L, 9L, 15L, 18L, 12L, 16L, 18L),
tripDKM = c(0.2, 6.4, 3.4, 0.8, 2.4, 2.2, 2.2, 7.3, 2.6, 3.8, 7.5, 5.8, 3.7, 2.1, 2.6, 5.2, 2.9, 1.7, 3.2, 3.1),
totDMIN = c(1.85, 27.4, 8.2, 4.21666666666667, 15.65, 8.91666666666667, 11.5666666666667, 29.5166666666667, 7.01666666666667, 12.2166666666667, 15.8833333333333, 19.5666666666667, 21.7166666666667, 8.66666666666667, 11.2333333333333, 13.4, 7.58333333333333, 10.6166666666667, 6.76666666666667, 17.7),
weather_day = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L),
.Label = c("1", "2", "3", "4"),
class = "factor")),
row.names = c(1L, 2L,3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 15L, 16L, 17L, 19L, 20L, 21L, 22L),
class = "data.frame")
#Count how often every weather_day occurs during every hOfDay
plot_data <- d %>%
group_by(hOfDay, weather_day) %>%
summarize(n_occurences = n())
#Create plot
ggplot(plot_data, aes(x = hOfDay, y = n_occurences)) +
geom_line(aes(col = weather_day))

Related

How can I add a geom_quantile legend to this customized facet_wrap?

I recently received great help from SO in producing this customized facet_wrap-plot shown below.
Question: how can I add the dotted linetype used in geom_quantile(linetype=2) as legend with the text "50th percentile"?
I have sought solutions in similar questions on SO, but my question has not been answered.
My current plot looks like this
Produced with the following code
cols = c("#E1B930", "#2C77BF","#E38072","#6DBCC3")
ggplot(p, aes(x=n.fjernet,y=os.neck)) + geom_point(aes(color=uiccc),shape=20, size=5,alpha=0.7) +
geom_quantile(quantiles = 0.5,col="black", size=1,linetype=2, show.legend = F) + facet_wrap(.~factor(uiccc)) +
scale_fill_manual(values=cols) +
scale_colour_manual(values=cols) +
scale_x_continuous(breaks = seq(0,50, by=10), name="Lymph nodal yield") +
scale_y_continuous(name="Time to death (months)") +
theme(strip.background = element_blank(),
strip.text = element_text(color = "transparent"),
axis.title.x = element_text(color = "grey20", size = 14, face="bold", margin=ggplot2::margin(t=10)),
axis.title.y = element_text(color = "grey20", size = 14, face="bold", margin=ggplot2::margin(r=10)),
legend.position="none",
plot.margin = unit(c(1,3,1,1), "lines")) +
coord_cartesian(clip = "off",ylim = c(0,175)) +
geom_text(data = . %>% distinct(uiccc),
aes(label = factor(uiccc), color = uiccc), y = 190, x = 30, hjust = 0.5, fontface = "bold",cex=5)
I would like the plot to have a legend that indicates the "50th percentile" as demonstrated by the linetype=2 in geom_quantile(), exclusively, which looks like this (manually added in photoshop):
First: I have removed theme(legend.position="none")
Second: I have added show.legend=TRUE in geom_quantile
Third: I have added show.legend=FALSE in geom_point
Unfortunately, these edits does not produce the requested legend:
How can I proceed?
My data p
p <- structure(list(n.fjernet = c(18L, 11L, 14L, 15L, 9L, 6L, 3L,
16L, 4L, 6L, 10L, 13L, 33L, 16L, 6L, 9L, 23L, 9L, 8L, 13L, 5L,
30L, 25L, 3L, 9L, 9L, 12L, 7L, 38L, 5L, 7L, 15L, 4L, 6L, 15L,
9L, 8L, 7L, 4L, 6L, 10L, 8L, 4L, 9L, 10L, 14L, 14L, 3L, 4L, 6L,
6L, 20L, 3L, 26L, 13L, 13L, 13L, 13L, 3L, 7L, 6L, 5L, 10L, 15L,
29L, 7L, 6L, 11L, 17L, 14L, 18L, 22L, 9L, 20L, 34L, 9L, 8L, 8L,
11L, 3L, 4L, 4L, 5L, 3L, 2L, 8L, 5L, 18L, 7L, 9L, 13L, 18L, 19L,
14L, 46L, 23L, 11L, 6L, 18L, 20L, 4L, 2L, 7L, 7L, 4L, 11L, 13L,
13L, 9L, 9L, 9L, 12L, 11L, 16L, 6L, 13L, 8L, 17L, 5L, 8L, 22L,
19L, 3L, 15L, 14L, 7L, 18L, 9L, 10L, 18L, 24L, 11L, 15L, 7L,
6L, 4L, 24L, 23L, 8L, 20L, 9L, 22L, 11L, 2L, 24L, 15L, 5L, 8L,
11L, 11L, 11L, 15L, 6L, 16L, 7L, 9L, 16L, 11L, 33L, 27L, 16L,
57L, 5L, 7L, 8L, 11L, 15L, 15L, 12L, 5L, 9L, 49L, 11L, 28L, 19L,
13L, 23L, 11L, 12L, 10L, 4L, 14L, 6L, 12L, 32L, 13L, 12L, 4L,
11L, 17L, 10L, 5L, 15L, 21L, 19L, 11L, 31L, 9L, 20L, 11L, 16L,
12L, 6L, 16L, 27L, 30L, 18L, 18L, 10L, 7L, 23L, 16L, 15L, 4L,
12L, 9L, 10L, 11L, 7L, 8L, 8L, 7L, 6L, 9L, 9L, 13L, 15L, 12L,
35L, 12L, 5L, 19L, 27L, 34L, 10L, 16L, 18L, 6L, 22L), os.neck = c(11.5,
74.38, 17.02, 7.89, 96.03, 40.48, 17.74, 14.65, 62.46, 12.55,
9.92, 26.05, 45.47, 17.38, 39.72, 51.45, 8.61, 76.98, 67.09,
94.79, 72.15, 93.93, 17.05, 12.48, 91.6, 15.87, 11.04, 67.22,
67.02, 8.94, 6.6, 5.09, 10.68, 17.15, 0.07, 5.19, 40.77, 0.2,
170.88, 5.55, 1.61, 38.28, 10.58, 32.99, 110.98, 103.69, 122.32,
14.78, 42.74, 4.04, 8.28, 84.96, 11.7, 49.97, 120.48, 52.6, 71.26,
16.3, 100.14, 55.03, 6.51, 89.89, 51.71, 24.97, 55.66, 21.91,
81.48, 30.92, 1.58, 7.52, 30.75, 3.45, 19.22, 5.42, 17.68, 45.54,
76.22, 125.34, 83.62, 30.82, 90.32, 1.84, 19.98, 20.53, 32.59,
54.77, 2.3, 106.84, 22.28, 45.18, 4.47, 39.66, 32.3, 16.23, 3.88,
2.23, 0.23, 18.73, 0.79, 28.75, 79.54, 14.46, 15.15, 54.97, 48.59,
34.83, 58.42, 35.29, 45.73, 57.53, 63.11, 65.05, 29.54, 77.21,
63.48, 83.35, 34.3, 64.49, 29.54, 62.69, 21.62, 49.35, 99.02,
15.8, 41.89, 12.98, 13.8, 43.6, 57.23, 31.38, 70.74, 39.46, 20.76,
67.22, 127.15, 74.12, 1.97, 7.39, 25.17, 28.22, 14, 36.53, 20.83,
19.55, 40.77, 27.76, 45.31, 34.46, 35.55, 26.94, 9.43, 10.51,
6.8, 8.18, 8.02, 14.29, 6.11, 13.8, 4.9, 4.04, 14.82, 11.66,
73.07, 92.91, 99.98, 10.64, 10.05, 95.8, 7.23, 12.81, 43.99,
13.9, 10.25, 16.36, 18.2, 18.76, 12.32, 8.64, 11.79, 112.04,
70.97, 31.28, 28.85, 21.49, 19.94, 22.14, 29.44, 67.62, 11.01,
45.24, 110.72, 20.24, 14.06, 12.88, 31.51, 8.08, 13.08, 21.45,
24.28, 21.98, 32.89, 23.26, 15.41, 15.41, 13.8, 40.12, 8.02,
15.77, 49.81, 18.17, 24.21, 47.08, 6.6, 37.16, 13.01, 8.38, 14.36,
18.27, 17.28, 73.76, 68.21, 22.83, 2.66, 69.06, 17.05, 8.61,
23.33, 13.34, 12.65, 8.77, 128.92, 16.1, 4.99, 11.73, 22.97,
40.12, 20.37, 2.04, 45.73), uiccc = structure(c(4L, 3L, 3L, 2L,
2L, 2L, 2L, 4L, 1L, 1L, 2L, 1L, 4L, 2L, 1L, 2L, 3L, 1L, 2L, 3L,
2L, 1L, 2L, 3L, 2L, 4L, 1L, 1L, 2L, 4L, 4L, 1L, 3L, 3L, 4L, 3L,
1L, 4L, 2L, 3L, 4L, 4L, 4L, 3L, 2L, 4L, 1L, 4L, 2L, 4L, 4L, 2L,
4L, 4L, 1L, 4L, 2L, 3L, 2L, 2L, 3L, 2L, 4L, 4L, 2L, 2L, 3L, 1L,
4L, 4L, 4L, 4L, 4L, 3L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 4L, 2L, 4L, 1L, 2L, 1L, 1L, 3L, 3L, 4L, 4L, 4L, 4L, 4L,
4L, 2L, 3L, 3L, 4L, 1L, 1L, 3L, 1L, 4L, 2L, 1L, 3L, 1L, 2L, 1L,
1L, 4L, 1L, 1L, 4L, 1L, 1L, 3L, 2L, 2L, 1L, 4L, 4L, 4L, 4L, 1L,
1L, 1L, 2L, 2L, 4L, 4L, 2L, 3L, 4L, 2L, 4L, 1L, 1L, 3L, 3L, 1L,
1L, 3L, 4L, 4L, 2L, 4L, 4L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 2L,
2L, 4L, 3L, 1L, 4L, 3L, 4L, 4L, 3L, 1L, 4L, 4L, 4L, 4L, 2L, 2L,
4L, 4L, 1L, 4L, 4L, 2L, 4L, 4L, 4L, 3L, 4L, 3L, 3L, 4L, 4L, 2L,
4L, 4L, 2L, 4L, 4L, 4L, 4L, 1L, 4L, 4L, 3L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 2L, 3L, 1L, 2L, 1L, 2L, 2L, 4L, 4L, 4L, 4L,
4L, 4L, 1L, 3L, 4L, 4L, 1L, 3L, 3L, 4L, 3L), .Label = c("UICC Stage I",
"UICC Stage II", "UICC Stage III", "UICC Stage IV"), class = "factor")), row.names = c(NA,
-239L), class = "data.frame")
One possible solution is to specify a "fake" color title in the aes of geom_quantile.
Then, you can remove legend for points and labels by adding show.legend = FALSE into their respective geom and manipulate breaks in scale_color_manual to show only the color attribute for your 50th percentile category. Finally, manipulating various parameters for legend in theme will get you to what you desired.
Altogether, you can have a code like this (PS: scale_fill_manual is useless in your current example):
ggplot(p, aes(x=n.fjernet,y=os.neck)) +
geom_point(aes(color=uiccc),shape=20, size=5,alpha=0.7, show.legend = FALSE) +
geom_quantile(aes(colour = "50th percentile"), quantiles = 0.5, size=1,linetype=2) +
facet_wrap(.~factor(uiccc)) +
#scale_fill_manual(values=cols) +
scale_colour_manual(values=cols, breaks = c("50th percentile"), name = "") +
scale_x_continuous(breaks = seq(0,50, by=10), name="Lymph nodal yield") +
scale_y_continuous(name="Time to death (months)") +
theme(strip.background = element_blank(),
strip.text = element_text(color = "transparent"),
axis.title.x = element_text(color = "grey20", size = 14, face="bold", margin=ggplot2::margin(t=10)),
axis.title.y = element_text(color = "grey20", size = 14, face="bold", margin=ggplot2::margin(r=10)),
legend.position="top",
legend.text=element_text(size=rel(2)),
legend.key.size = unit(2, "cm"),
plot.margin = unit(c(1,3,1,1), "lines")) +
coord_cartesian(clip = "off",ylim = c(0,175)) +
geom_text(data = . %>% distinct(uiccc),
aes(label = factor(uiccc), color = uiccc), y = 190, x = 30, hjust = 0.5, fontface = "bold",cex=5, show.legend = FALSE)

Changing the order of discrete variables on the X axis --R

I am using the following code to make my graph: .
#Labels
label1 <- data.frame( x = 2, y = 2, Type = "FYS", label = "N=15")
label2 <- data.frame( x = 2, y = 2, Type = "SNR", label = "N=24")
# make graph
ggplot(data = Q, mapping = aes(y = Rating, x = weeks, group= StudentFactor, colour=StudentFactor))+
geom_point()+
geom_line()+
facet_grid(Type ~.)+
geom_smooth(method = 'lm', formula = y ~ poly(x), colour= "black", aes(group=1), se= FALSE)+
theme(legend.position="none") +
labs (x= "Date", y="Students' Average Engagement over Time")+
geom_text(data = label1, aes(x = x, y = y, label = label), inherit.aes = FALSE)+
geom_text(data = label2, aes(x = x, y = y, label = label), inherit.aes = FALSE)
However, the dates at the bottom are out of order. Instead of using x= weeks, I could use x=timePeriod which would make the points be in order, but the labels to be wrong.
I have tried adding the following code to order the levels of weeks,
df$weeks <- factor(df$weeks, order=TRUE, levels=weeks)
but I keep getting an error saying object of type 'closure' is not subsettable.
I have attached my data below:
> dput (Q)
structure(list(StudentFactor = structure(c(1L, 3L, 4L, 8L, 11L,
13L, 14L, 15L, 18L, 19L, 21L, 22L, 24L, 30L, 31L, 32L, 36L, 38L,
27L, 34L, 35L, 1L, 3L, 4L, 8L, 11L, 13L, 14L, 18L, 19L, 21L,
22L, 24L, 2L, 5L, 6L, 7L, 9L, 10L, 12L, 16L, 17L, 20L, 23L, 25L,
26L, 28L, 29L, 30L, 31L, 32L, 33L, 36L, 37L, 38L, 40L, 41L, 34L,
39L, 1L, 3L, 4L, 8L, 11L, 13L, 14L, 15L, 18L, 19L, 21L, 24L,
2L, 5L, 6L, 7L, 9L, 10L, 12L, 16L, 17L, 20L, 23L, 25L, 28L, 30L,
31L, 33L, 36L, 37L, 38L, 40L, 41L, 34L, 35L, 39L, 1L, 3L, 4L,
8L, 11L, 14L, 15L, 18L, 21L, 22L, 24L, 2L, 6L, 7L, 9L, 10L, 12L,
16L, 17L, 20L, 23L, 31L, 33L, 36L, 37L, 40L, 27L, 34L, 1L, 3L,
4L, 8L, 11L, 13L, 14L, 15L, 18L, 19L, 21L, 22L, 2L, 5L, 6L, 7L,
9L, 10L, 12L, 16L, 17L, 20L, 23L, 28L, 30L, 31L, 32L, 33L, 36L,
38L, 41L, 27L, 34L, 35L, 1L, 3L, 4L, 11L, 14L, 15L, 18L, 19L,
21L, 22L, 24L, 2L, 5L, 6L, 9L, 10L, 12L, 16L, 20L, 23L, 29L,
30L, 31L, 32L, 33L, 36L, 38L, 41L, 27L, 34L, 35L, 1L, 3L, 11L,
13L, 14L, 15L, 18L, 19L, 21L, 22L, 24L, 2L, 6L, 7L, 9L, 10L,
12L, 16L, 17L, 20L, 23L, 28L, 29L, 30L, 31L, 36L, 37L, 38L, 40L,
41L, 27L, 34L, 35L, 39L, 1L, 3L, 4L, 11L, 13L, 14L, 15L, 18L,
19L, 21L, 22L, 24L, 2L, 7L, 10L, 12L, 16L, 17L, 20L, 28L, 29L,
30L, 31L, 32L, 33L, 36L, 37L, 38L, 40L, 41L, 27L, 34L, 35L, 1L,
11L, 13L, 14L, 18L, 19L, 21L, 22L, 24L, 2L, 6L, 7L, 10L, 12L,
16L, 28L, 30L, 31L, 33L, 36L, 34L, 1L, 4L, 14L, 15L, 18L, 19L,
21L, 22L, 24L, 2L, 7L, 9L, 10L, 12L, 16L, 17L, 20L, 23L, 29L,
30L, 31L, 32L, 33L, 36L, 37L, 40L, 41L, 27L, 34L, 39L, 1L, 3L,
4L, 11L, 13L, 14L, 15L, 18L, 22L, 24L, 2L, 6L, 7L, 9L, 10L, 12L,
16L, 17L, 20L, 23L, 30L, 31L, 36L, 37L, 38L, 41L, 27L), .Label = c("789331",
"796882", "805933", "826523", "827911", "830271", "831487", "832929",
"834598", "836364", "838607", "839802", "841903", "843618", "852125",
"855524", "873527", "876406", "879972", "885409", "885650", "888712",
"894218", "903303", "928026", "932196", "952797", "955389", "956952",
"957206", "957759", "959200", "962490", "965873", "967416", "968728",
"969005", "971179", "975424", "976863", "981621"), class = "factor"),
Type = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("FYS", "SNR"), class = "factor"),
weeks = structure(c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Apr5",
"Feb1", "Feb15", "Feb8", "Jan11", "Jan25", "Mar1", "Mar15",
"Mar22", "Mar29", "Mar8"), class = "factor"), timePeriod = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L), class = "factor", .Label = c("Rt1", "Rt2", "Rt3", "Rt4",
"Rt5", "Rt6", "Rt7", "Rt8", "Rt9", "Rt10", "Rt11")), Rating = c(3.6,
4.8, 4.4, 3.8, 5, 3.2, 4.4, 3.2, 3.6, 3.8, 4, 4.4, 3.6, 4,
3.8, 3, 3.6, 4.4, 3.6, 3.4, 4.2, 3.8, 4, 4.2, 3.8, 5, 4.2,
4.4, 4, 3.8, 4.4, 4, 3.8, 4.4, 4.2, 4.6, 4.4, 5, 4, 3.4,
5, 3.8, 4.8, 4.4, 4.6, 3.2, 5, 4.2, 4.4, 4.4, 3.4, 3.8, 3.8,
3.6, 4.8, 4.4, 4.8, 4.75, 4, 4, 4, 4.2, 3.8, 5, 4.2, 4.6,
3.8, 4.2, 3.8, 4, 4.6, 4, 3.6, 4.8, 4.2, 3.8, 4, 2, 4.6,
3.8, 4.6, 4.4, 4.8, 4.6, 4, 4.4, 4.2, 3.6, 4.6, 4.4, 5, 4.6,
5, 4.2, 3.4, 4.2, 3.6, 4.4, 4, 5, 4.4, 4, 4, 4, 4.2, 4, 4,
5, 4.6, 4, 4, 1.8, 4.6, 4.2, 4.8, 4.6, 4.4, 4.2, 3.4, 4.4,
3.8, 4, 5, 3.4, 3.2, 4.6, 3.6, 5, 3.6, 4.4, 3.8, 4, 4, 4.2,
4.4, 2.8, 3.4, 5, 4.4, 4.2, 3.6, 4.2, 4.2, 4, 4.4, 5, 4,
4, 3.8, 3.2, 4.2, 3.4, 4.4, 5, 4.4, 4, 4.2, 2.4, 3.2, 4.6,
4.4, 4.4, 3.6, 2.4, 4.2, 4, 4.4, 3.4, 3.6, 3.4, 4.4, 4, 3.2,
2.2, 4.4, 4.4, 5, 3.2, 4.4, 4, 3, 4.6, 3, 4.25, 4.2, 3.6,
3.8, 4.4, 3, 3.2, 4.2, 4, 4.4, 3.6, 2.8, 4, 4.4, 4.6, 3.8,
2.8, 4.8, 4.2, 4, 3.6, 3, 4.8, 4.2, 4.2, 5, 4.4, 4.4, 4,
3.2, 1, 4.4, 4.2, 3.6, 3.8, 4, 1.4, 4.6, 2.8, 3.2, 3.2, 4.6,
4.4, 3.4, 4.2, 4, 3.8, 4, 4.2, 3.8, 3.6, 1.4, 4.6, 3.6, 4.2,
4, 4.4, 4.4, 4.6, 4.2, 4.2, 3.2, 4, 3.6, 3, 4.6, 4.8, 3.6,
4.2, 4.2, 2.2, 5, 3.2, 3.8, 4.2, 3.6, 3, 4, 3.8, 4.2, 3.8,
2.2, 5, 4.8, 3.4, 2.8, 5, 4.4, 4, 3, 1, 3, 1.6, 3.6, 4.2,
4, 3.4, 3.2, 4, 4, 4, 3.6, 2, 4.4, 4, 3.4, 1.8, 4.2, 3.8,
3.8, 4, 4.2, 3.8, 4.2, 4.2, 3.2, 1.6, 4.6, 4, 5, 4, 3.4,
3.6, 4, 3.2, 4.2, 3.6, 4.6, 4.4, 4.6, 4.2, 4.6, 4.6, 4.2,
5, 4.6, 4.2, 4, 4, 4.6, 4.4, 3.6, 5, 4.4, 4.6, 1.6, 4.6,
5, 5, 4)), class = "data.frame", row.names = c(NA, -333L), .Names = c("StudentFactor",
"Type", "weeks", "timePeriod", "Rating"))
I just changed the format of the week column. Does it work for you?
newdate <- as.Date(Q[, 3], "%b%d")
newdate <- strftime(newdate,"%m %d")
QQ <- cbind(Q, newdate)
ggplot(data = QQ, mapping = aes(y = Rating, x = factor(newdate), group= StudentFactor, colour=StudentFactor))+
geom_point()+
geom_line()+
facet_grid(Type ~.)+
geom_smooth(method = 'lm', formula = y ~ poly(x), colour= "black", aes(group=1), se= FALSE)+
theme(legend.position="none") +
labs (x= "Date", y="Students' Average Engagement over Time")+
geom_text(data = label1, aes(x = x, y = y, label = label), inherit.aes = FALSE)+
geom_text(data = label2, aes(x = x, y = y, label = label), inherit.aes = FALSE)

How to count and display factors on the x/y scale?

Problem
I am trying to count the data factor-wise and display it on the scale of a axis.
My closest solution is the following:
aes(x=(paste(A_REF,"(n=", length(A_REF), ")"))
n is the number displaying how many occurances of the factor exist in the data field.
Edit: How do I achieve that the first and fifth factor of V43 show up? --> forgot to library("foreign")
Current State: Solved
My Code so far
# Load libraries & packages =================================
library("ggplot2")
library("scales")
library("dplyr")
library("foreign")
# Data setup =================================
spss_file_path <- "D:\\Programming\\Testing\\2017-03-15_data_import&ggplot2\\Beispieldatensatz(fiktiv).sav"
exampledata <- read.spss(spss_file_path, use.value.labels = TRUE,
to.data.frame = TRUE, reencode = TRUE)
names(exampledata) <- c(V101, A_REF, V43)
exampledata$V43 <- factor(exampledata$V43,
levels = c(1,2,3,4,5),
labels = c("1 Sehr zufrieden","2","3","4", "5 Sehr unzufrieden"))
exampledata$V43 <- factor(exampledata$V43, levels = rev(unique(levels(exampledata$V43))))
exampledata$A_REF <- factor(exampledata$A_REF, levels = rev(unique(levels(exampledata$A_REF))))
exampledata$V101 <- factor(exampledata$V101, levels = rev(unique(levels(exampledata$V101))))
labels <- exampledata %>%
filter(!is.na(V101), !is.na(V43)) %>%
count(A_REF) %>%
mutate(labels = paste(A_REF,"(n=", n, ")")) %>%
select(A_REF, labels)
plot_data <- exampledata %>%
filter(!is.na(V101), !is.na(V43)) %>%
left_join(labels, by = "A_REF")
# Plot =================================
ggplot(plot_data, aes(x = labels, fill = V43)) +
geom_bar(position = "fill") +
scale_y_continuous(labels = scales::percent, breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1)) +
labs(y=NULL, x=NULL, fill=NULL) +
ggtitle(paste(attr(exampledata, "variable.labels")[77])) +
theme_classic() +
geom_text(stat="count",aes(label = scales::percent((..count..)/sum(..count..))), position = position_fill(vjust=0.5)) +
coord_flip()
Data
structure(list(exampledata.V101 = structure(c(2L, NA, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, NA, 2L, 2L, 2L, 1L, 2L, NA,
NA, NA, 1L, 1L, 2L, NA, 2L, 2L, 2L, NA, 2L, 2L, NA, NA, 1L, NA,
2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, NA, NA, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, NA, 1L, NA, 1L, NA,
1L, 2L, NA, NA, 2L, NA, 1L, 2L, 2L, NA, 2L, NA, 2L, 2L, 1L, 2L,
1L, 2L, 1L, 1L, 2L, 1L, NA, 2L, 2L, 2L, 2L, NA, 2L, 1L, 2L, 2L
), .Label = c("Weiblich", "Männlich"), class = "factor"), exampledata.A_REF = structure(c(18L,
18L, 18L, 18L, 18L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 16L, 18L,
18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 16L, 18L, 18L, 16L, 18L,
16L, 18L, 18L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L,
16L, 18L, 18L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 17L, 16L, 18L,
18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 17L, 18L, 18L,
16L, 18L, 16L, 18L, 18L, 16L, 16L, 18L, 18L, 18L, 18L, 18L, 18L,
18L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 16L, 18L,
16L, 16L, 18L, 18L, 18L, 17L, 16L, 18L), .Label = c("Zertifikat eines Aufbau- oder Ergänzungsstudiums",
"LA Berufliche Schulen", "LA Sonderschule", "LA Gymnasium", "LA Haupt- und Realschule",
"LA Grundschule", "Künstlerischer/musischer Abschluss", "Kirchlicher Abschluss",
"Staatsexamen (ohne Lehramt)", "Diplom Fachhochschule, Diplom I an Gesamthochschulen",
"Diplom Universität, Diplom II an Gesamthochschulen", "Sonstiges",
"Promotion", "Staatsexamen", "Magister", "Diplom", "Master",
"Bachelor"), class = "factor"), exampledata.V43 = structure(c(3L,
5L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 4L, 3L, 3L, 2L, NA, 4L, 5L, 5L,
4L, 4L, 4L, 4L, NA, 2L, 4L, 3L, 5L, 4L, 4L, 4L, NA, 4L, 4L, NA,
NA, 3L, 5L, 2L, 4L, 5L, 4L, 4L, 5L, 5L, 4L, NA, NA, 4L, NA, 3L,
4L, 5L, 5L, 2L, 4L, 4L, 3L, 4L, 4L, 4L, 3L, 5L, 4L, 5L, NA, 4L,
NA, 4L, NA, 4L, 5L, 4L, NA, 5L, NA, 4L, 4L, 4L, NA, 4L, NA, 5L,
4L, 4L, 4L, 4L, 4L, 3L, 3L, 4L, 2L, 4L, 4L, 4L, 3L, 4L, NA, 4L,
5L, 5L, 4L), .Label = c("5 Sehr unzufrieden", "4", "3", "2",
"1 Sehr zufrieden"), class = "factor")), .Names = c("exampledata.V101",
"exampledata.A_REF", "exampledata.V43"), row.names = c(NA, 100L
), class = "data.frame")
I think the easiest way is to compute the labels outside of ggplot.
Note that with your data, the 5th level of V43 doesn't show up.
library(ggplot2)
library(dplyr)
names(exampledata) <- c("V101", "A_REF", "V43")
I count A_REF and then apply your formula to compute the labels.
labels <- exampledata %>%
filter(!is.na(V101), !is.na(V43)) %>%
count(A_REF) %>%
mutate(labels = paste(A_REF,"(n=", n, ")")) %>%
select(A_REF, labels)
I then join the labels to the data
plot_data <- exampledata %>%
filter(!is.na(V101), !is.na(V43)) %>%
left_join(labels, by = "A_REF")
And finally, here is the plot. Note that the title doesn't show up as well.
ggplot(plot_data, aes(x = labels, fill = V43)) +
geom_bar(position = "fill") +
scale_y_continuous(labels = scales::percent, breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1)) +
labs(y=NULL, x=NULL, fill=NULL) +
ggtitle(paste(attr(exampledata, "variable.labels")[77])) +
theme_classic() +
geom_text(stat="count",aes(label = scales::percent((..count..)/sum(..count..))), position = position_fill(vjust=0.5)) +
coord_flip()

How to subtract rows where column factor matches using dplyr?

I'm moving to dplyr for much of my data wrangling, but I can't figure out how to do a row-diff based on a factor with it.
I can use ddply from plyr as follows:
ddply(.data = dat_frame, .variables = .(the_factor), .fun = summarise, diff = diff(the_number))
the_factor diff
1 169 0.000
2 169 0.000
3 372 22.557
4 372 0.000
5 372 -19.491
6 372 2.940
7 372 -2.767
8 372 -5.310
9 508 0.000
Source data:
structure(list(the_factor = structure(c(3L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 8L, 7L, 10L, 5L, 9L, 2L, 2L, 2L, 1L, 11L, 6L, 6L), .Label = c("166",
"169", "276", "372", "409", "508", "523", "714", "846", "876",
"969"), class = "factor"), the_date = structure(c(4L, 12L, 13L,
14L, 15L, 16L, 17L, 18L, 1L, 8L, 7L, 2L, 11L, 2L, 3L, 5L, 10L,
8L, 6L, 9L), .Label = c("2012-05-19 21:27:00", "2012-08-02 03:49:00",
"2012-08-02 03:50:00", "2012-08-02 03:52:00", "2012-08-02 08:36:00",
"2013-03-15 03:38:00", "2013-03-15 03:40:00", "2013-03-15 03:41:00",
"2013-03-15 09:14:00", "2013-04-24 13:45:00", "2013-09-04 09:17:00",
"2014-03-12 14:21:00", "2014-03-12 19:45:00", "2014-03-13 04:51:00",
"2014-03-13 21:04:00", "2014-03-14 01:18:00", "2014-03-14 04:49:00",
"2014-03-14 12:09:00"), class = "factor"), the_number = c(0.02,
17.443, 40, 40, 20.509, 23.449, 20.682, 15.372, 0.02, 0.02, 0.02,
0.02, 1.74, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02)), .Names = c("the_factor",
"the_date", "the_number"), row.names = c(NA, -20L), class = "data.frame")

Bar plot values that are dodged [duplicate]

This question already has an answer here:
Position geom_text on dodged barplot
(1 answer)
Closed 8 years ago.
I've a problem plotting a simple bar plot in ggplot2. I'd like to display labels of values above each bar, but am using postion=position_dodge to plot bars along side each other as I have multiple factor variables along my x-axis for a number of data points which refer to individual countries.
Currently using the following code I am getting all values centred over the name of the COUNTRY along my x-axis, whereas I'd like each value centred over each bar along the x-axis:
ggplot(maln_complete_recent_melt, aes(x=COUNTRY, y=percent, fill=location)) + geom_bar(position="dodge", colour="black") +labs(title="Percentage of malnoursihed children according to height for age", fill="DHS Survey") + geom_text(aes(label=round(percent))) + scale_x_discrete(labels=c(paste(maln_complete_recent_melt$COUNTRY, maln_complete_recent_melt$Year, sep="")))
And this is how my plot currently looks:
I'm sure this is something really simple, but having trawled the forums and consulted a number of books on R, I can't find what I'm looking for.
Here's my data for replication:
structure(list(COUNTRY = structure(c(4L, 10L, 5L, 8L, 14L, 13L,
9L, 2L, 11L, 1L, 6L, 7L, 12L, 3L, 4L, 10L, 5L, 8L, 14L, 13L,
9L, 2L, 11L, 1L, 6L, 7L, 12L, 3L, 4L, 10L, 5L, 8L, 14L, 13L,
9L, 2L, 11L, 1L, 6L, 7L, 12L, 3L, 4L, 10L, 5L, 8L, 14L, 13L,
9L, 2L, 11L, 1L, 6L, 7L, 12L, 3L, 4L, 10L, 5L, 8L, 14L, 13L,
9L, 2L, 11L, 1L, 6L, 7L, 12L, 3L, 4L, 10L, 5L, 8L, 14L, 13L,
9L, 2L, 11L, 1L, 6L, 7L, 12L, 3L), class = "factor", .Label = c("Swaziland",
"Namibia", "Zimbabwe", "Comoros", "Kenya", "Tanzania", "Uganda",
"Lesotho", "Mozambique", "Ethiopia", "Rwanda", "Zambia", "Malawi",
"Madagascar")), Year = structure(c(5L, 23L, 20L, 21L, 20L, 14L,
12L, 18L, 15L, 18L, 14L, 17L, 19L, 22L, 5L, 23L, 20L, 21L, 20L,
14L, 12L, 18L, 15L, 18L, 14L, 17L, 19L, 22L, 5L, 23L, 20L, 21L,
20L, 14L, 12L, 18L, 15L, 18L, 14L, 17L, 19L, 22L, 5L, 23L, 20L,
21L, 20L, 14L, 12L, 18L, 15L, 18L, 14L, 17L, 19L, 22L, 5L, 23L,
20L, 21L, 20L, 14L, 12L, 18L, 15L, 18L, 14L, 17L, 19L, 22L, 5L,
23L, 20L, 21L, 20L, 14L, 12L, 18L, 15L, 18L, 14L, 17L, 19L, 22L
), class = "factor", .Label = c("1992", "1993", "1994", "1995",
"1996", "1997", "1998", "1999", "2000", "2000/1", "2001/2", "2003",
"2003/4", "2004", "2005", "2005/6", "2006", "2006/7", "2007",
"2008/9", "2009", "2010", "2011")), location = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L), .Label = c("Urban", "Rural", "Total", "Capital.City",
"Urban.Non.slum", "Urban.Slum"), class = "factor"), percent = c(29.9107142857143,
31.5, 26.4, 30.2, 43.4, 37.8, 29.2, 23.8, 33.1, 23.1, 25.8, 25.5,
39, 27.5, 35.0071736011478, 46.2, 37.1, 41, 50.9, 49.2, 45.7,
31.4, 47.2501049401341, 30, 40.5, 39.5, 47.9, 33.4, 33.8, 44.4,
35.3, 39.2, 50.1, 47.8, 41, 29, 45.3, 28.9, 37.7, 38.1, 45.4,
32, NA, 22, 28.5, 31.7, 46.8, NA, 20.6, NA, 29.2, NA, 16.9, NA,
37.2, 29, 21.6666666666667, 10.7799374501425, 16.0100871430598,
21.2511075992408, 23.7772452013661, 23.8620542603877, 8.15543422154615,
12.5321762341041, 13.5923403336176, 13.0494984481826, 16.4644101423357,
21.5426476162423, 27.4144189089535, 16.2492480072194, 32.9268292682927,
27.8790407698325, 25.0985529949154, 26.2950824206824, 37.9693371617388,
40.5197947988247, 29.3593820697417, 28.5099311066166, 36.0242620139593,
20.7853672439948, 28.5877166151249, 22.8001055045257, 35.5434627654004,
28.0633523545878)), .Names = c("COUNTRY", "Year", "location",
"percent"), row.names = c(NA, -84L), class = "data.frame")
The answer was in the following post: Position geom_text on dodged barplot
Just needed to add the position=position_dodge call into geom_text() as follows:
ggplot(maln_complete_recent_melt, aes(x=COUNTRY, y=percent, fill=location)) + geom_bar(position="dodge", colour="black") +labs(title="Percentage of malnoursihed children according to height for age", fill="DHS Survey") + geom_text(aes(label=round(percent)), position = position_dodge(width=0.9), vjust=-0.5) + scale_x_discrete(labels=c(paste(maln_complete_recent_melt$COUNTRY, maln_complete_recent_melt$Year, sep="")))
Which produced this:
#marty_c answered your question. If you're not required to use this format, though, you might consider facets (using df = maln_complete_recent_melt):
ggplot(df, aes(x=COUNTRY, y=percent, fill=location)) +
geom_histogram(stat="identity") +
labs(title="Percentage of malnoursihed children according to height for age", fill="DHS Survey") +
geom_text(aes(label=round(percent),y=0),vjust=-0.5,size=4)+
scale_x_discrete(labels=c(paste(df$COUNTRY, df$Year, sep="")))+
theme(axis.text.x=element_text(angle=-90, color="black",hjust=0,vjust=0.2))+
facet_grid(location~.)

Resources