Forgive me is this is asked and answered, but so far I have not found a solution that meets my use case. If you know of one, please point me in the right direction.
Here is a small working sample of my code:
library(tidyverse)
source <- c("D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P")
subject <- c("M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R")
grade <- c(1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 2, 2, 2, 1, 1, 2, 2, 2, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2)
domain <- c("Alg", "Alg", "Alg", "Alg", "Alg", "Alg", "Geo", "Geo", "Geo", "Geo", "Geo", "Geo", "Comp", "Comp", "Comp", "Comp", "Comp", "Comp", "Voc", "Voc", "Voc", "Voc", "Voc", "Voc", "Alg", "Alg", "Alg", "Alg", "Alg", "Geo", "Geo", "Geo", "Geo", "Geo", "Comp", "Comp", "Comp", "Comp", "Comp", "Voc", "Voc", "Voc", "Voc", "Voc", "Voc")
placement <- c("A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A")
qty <- c(425, 389, 96, 460, 293, 163, 518, 291, 101, 366, 349, 201, 889, 661, 150, 680, 617, 465, 445, 293, 112, 381, 292, 208, 223, 232, 131, 270, 72, 27, 45, 9, 99, 40, 79, 194, 72, 126, 133, 123, 456, 98, 234, 432, 65)
test <- data.frame(source, subject, grade, domain, placement, qty)
plot4 <- test %>%
ggplot(aes(x = grade, y = qty, fill = placement)) +
geom_bar(stat = "identity", position = "fill") +
stat_count(aes(label = paste(sprintf("%1.2f", ..count../sum(..count..)*100), "%\n", ..count..), y = 0.5*..count..),
geom = "text",
colour = "black",
size = 2.5,
position = position_fill(vjust = 0.5)) +
scale_x_discrete("Grade", limits = c(1, 2),
labels = c("1st", "2nd"),
guide = guide_axis(angle = 90)) +
scale_y_continuous("Prop place") +
scale_fill_manual("Placement",
values = rev(c("#de7e7e", "#ebeb4d", "#70e65e")),
labels = rev(c("C",
"B",
"A"))
) +
theme(axis.title.y = element_blank(),
axis.text.y = element_blank()) +
facet_wrap(vars(subject, domain), scales = "free_x")
plot4
which produces the following plot:
What I need for each grade level (in this case, 1st and 2nd) is to have two stacked bars with the data centered in each region of the stack. There should be one bar from each source ("D" and "P") associated with each grade level.
I do not know how to add second variable to the x-axis to create the bar from each source FOR EACH grade. I can either do grade, or source, but not both.
*BONUS POINTS for helping me figure out the stat_count line. I need the n() and % of each stack relative to the total for each source/grade/placement value. These numbers aren't even close, but this code has worked for me on less complicated plots.
This may be what you are after
my_labels <- test %>%
group_by(grade, source,domain, subject) %>%
summarise(n = qty,p = qty/sum(qty)) %>%
mutate(lab = paste(n,"\n",sprintf("%1.2f",p*100),"%"),
x = interaction (grade,source)) # get the labels calculated and add at the end
The interaction() function helps to get combinations of grade and source for plotting
test %>%
ggplot(aes(x = interaction (grade,source),
y = qty, fill = placement)) +
geom_bar(stat = "identity", position ="fill") +
scale_x_discrete("Grade",
labels = c("1st D","2nd D","1st P","2nd P"), # add new X labels
guide = guide_axis(angle = 90)) +
scale_fill_manual("Placement",
values = rev(c("#de7e7e", "#ebeb4d", "#70e65e")),
labels = rev(c("C","B","A"))) +
scale_y_continuous("Prop place") +
facet_wrap(vars(subject, domain), scales = "free_x") +
geom_text(data = my_labels, aes(label = lab,y = p),position = position_stack(vjust = 0.2)) # adjust position to get labels where you want.
Some comments here may help Adding percentage labels to ggplot when using stat_count
Thank you #e.matt. With your answer (and a little tweaking) I was able to get this exactly how I wanted it. I used mutate() to add a pcnt column, making the geom_text() layer a bit easier to work with. The interaction() function worked perfectly to give me two stacked and dodged bar charts per category (Grade in this case). Below is the modified solution code that I was able to make work out:
source <- c("D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P")
subject <- c("M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R")
grade <- c(1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 2, 2, 2, 1, 1, 2, 2, 2, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2)
domain <- c("Alg", "Alg", "Alg", "Alg", "Alg", "Alg", "Geo", "Geo", "Geo", "Geo", "Geo", "Geo", "Comp", "Comp", "Comp", "Comp", "Comp", "Comp", "Voc", "Voc", "Voc", "Voc", "Voc", "Voc", "Alg", "Alg", "Alg", "Alg", "Alg", "Geo", "Geo", "Geo", "Geo", "Geo", "Comp", "Comp", "Comp", "Comp", "Comp", "Voc", "Voc", "Voc", "Voc", "Voc", "Voc")
placement <- c("A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A")
qty <- c(425, 389, 96, 460, 293, 163, 518, 291, 101, 366, 349, 201, 889, 661, 150, 680, 617, 465, 445, 293, 112, 381, 292, 208, 223, 232, 131, 270, 72, 27, 45, 9, 99, 40, 79, 194, 72, 126, 133, 123, 456, 98, 234, 432, 65)
#pcnt was added to the dataframe using mutate() as shown below
pcnt <- pcnt <- c(46.7, 41.16, 11.31, 50.22, 30.08, 16.77, 56.92, 38.39, 12.38, 39.96, 32.92, 20.85, 56.04, 30.97, 16.74, 45.52, 32.07, 19.21, 52.75, 35.9, 16.38, 43.67, 19.5, 13.5, 52.29, 44.33, 10.37, 38.59, 41.58, 37.74, 47.29, 38.26, 10.65, 57.43, 22.92, 19.21, 53.41, 44.51, 9.94, 37.68, 33.44, 22.72, 48, 34.29, 7.8, 72.64, 29.01, 5.38, 52.35, 37.28, 14.27, 43.25, 40.39, 31.37, 38.05, 36.65, 27.7, 73.17, 50, 33.33, 29.41, 6.08, 60, 15.27, 10.7, 58.68, 7.22, 21.83, 16.94, 29.65, 51.99, 22.14, 50.34, 44.29, 22.9, 49.49, 21.75, 41.04, 39.82, 21.24, 78.06, 22.12, 12.5, 17.2, 15.56, 77.62, 10.13, 15.18, 37.73, 32.9, 17.36, 100)
test <- data.frame(source, subject, grade, domain, placement, qty)
test <- test %>%
group_by(subject, grade, domain, placement) %>%
mutate(pcnt = round(qty / sum(qty) * 100, 2)) %>%
arrange(domain, desc(qty))
plot <- test %>%
ggplot(aes(x = interaction(source, grade),
y = qty,
fill = placement)) +
geom_bar(stat = "identity", position = "fill") +
scale_x_discrete("Grade",
labels = c("1.D", "1.P","2.D", "2.P"),
guide = guide_axis(angle = 90)) +
scale_y_continuous("Prop place") +
scale_fill_manual("Relative Placement",
values = rev(c("#de7e7e", "#ebeb4d", "#70e65e")),
labels = rev(c("C", "B", "A"))
) +
facet_wrap(vars(domain), scales = "free_x") +
geom_text(size = 2.5, aes(label = paste(sprintf("%1.2f", pcnt), "%\n", qty), y = 0.5 * qty),
position = position_fill(vjust=0.5))
plot
I have problems finding the best way to use directlabels in a x,y-scatterplot.
library(ggplot2)
library(directlabels)
p1 <- ggplot()+
geom_point(data=sites, aes(X, Y, col=Treatment), alpha=1,show_guide=FALSE) +
geom_polygon(data = hulls, aes(X, Y, colour=Treatment, fill=Treatment), lty="dashed", alpha = 0.1, show_guide=FALSE) +
theme_bw() +
#geom_text(data=sites, aes(X,Y, label=Sample, color=Treatment), size=2, show_guide=FALSE) +
theme(axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
#panel.border = element_blank(),
panel.background = element_blank()) +
coord_fixed() +
annotate("text", x=-0.8, y=-0.55, label="Stress = 0.102")
p2 <- p1 + geom_dl(data=sites, aes(X,Y,label=Sample, colour=Treatment, list( cex = 0.6)), method="smart.grid", show_guide=FALSE)
p2
While this is much better than using vjust/hjust in the geom_text() line, it still has some problems:
For example, the lower labels in the far-left triangle are unnecessarily plotted onto the area, and some of the labels in the greenish triangles (R53, R52 for example) should be placed outside the area. I tried many options available in the directlabels-package, but smart.grid was the best method so far. Is there something i can do to improve the labelling other than using photoshop?
Here is my data:
sites <- structure(list(Sample = c("R11", "R12", "R13", "R21", "R22",
"R23", "R31", "R32", "R33", "R41", "R42", "R43", "R51", "R52",
"R53", "R61", "R62", "R63", "R71", "R72", "R73", "D21", "D22",
"D23", "D31", "D32", "D33", "D41", "D42", "D43", "D51", "D52",
"D53", "D61", "D62", "D63"), X = c(-0.0960291142274892, 0.0842575226370376,
0.407178028123943, -0.00597471992061621, 0.340822839455987, 0.430580770893079,
0.432294207388092, 0.239276903425903, 0.202428525444242, 0.219457881130952,
0.325079921807492, 0.362628649343193, 0.0434810152644517, 0.459448269977165,
0.0743637831168788, 0.0629705355701924, 0.269757227770524, 0.0428786936573877,
0.642912005685253, 0.715465545056878, 0.870415884623661, -0.951515101512284,
-0.596539639874245, -0.939843921119596, -0.522589716428025, -0.233436702923438,
-0.176869256803805, -0.340990181400083, -0.320797597759894, -0.246047602937319,
-0.23780172425706, -0.265780334876648, -0.140281405966232, -0.890481118743505,
-0.0757195299492111, -0.185000541672864), Y = c(-0.338951234980643,
-0.177800321292734, -0.324895018639169, -0.0739123902386802,
-0.345019713119787, 0.18359750205563, -0.108461977599771, -0.0275340962048548,
-0.129423067267885, 0.0143496668618822, -0.355317429073615, 0.0866462123708121,
-0.00768509589834154, -0.258685480417501, 0.288488538303651,
-0.363105213242044, -0.54704407232382, 0.0570134733389543, 0.224034690932126,
0.43051937630073, 0.780464857796767, 0.266199859599797, 0.759400919418545,
0.103161222551216, -0.178046911975698, -0.116472972897424, -0.0289716671368776,
-0.146023515436316, -0.284526289182701, 0.0764403706902978, 0.150831452033757,
0.226303952103805, -0.226670040280512, -0.15689508307977, 0.268053395023382,
0.279936100906792), Treatment = c("A", "A", "A", "B", "B", "B",
"C", "C", "C", "D", "D", "D", "E", "E", "E", "F", "F", "F", "G",
"G", "G", "H", "H", "H", "I", "I", "I", "J", "J", "J", "K", "K",
"K", "L", "L", "L")), .Names = c("Sample", "X", "Y", "Treatment"
), row.names = c(NA, -36L), class = "data.frame")
hulls <- structure(list(Sample = c("R13", "R11", "R12", "R22", "R21",
"R23", "R31", "R33", "R32", "R42", "R41", "R43", "R52", "R51",
"R53", "R62", "R61", "R63", "R71", "R72", "R73", "D23", "D21",
"D22", "D32", "D31", "D33", "D42", "D41", "D43", "D53", "D52",
"D51", "D61", "D63", "D62"), X = c(0.407178028123943, -0.0960291142274892,
0.0842575226370376, 0.340822839455987, -0.00597471992061621,
0.430580770893079, 0.432294207388092, 0.202428525444242, 0.239276903425903,
0.325079921807492, 0.219457881130952, 0.362628649343193, 0.459448269977165,
0.0434810152644517, 0.0743637831168788, 0.269757227770524, 0.0629705355701924,
0.0428786936573877, 0.642912005685253, 0.715465545056878, 0.870415884623661,
-0.939843921119596, -0.951515101512284, -0.596539639874245, -0.233436702923438,
-0.522589716428025, -0.176869256803805, -0.320797597759894, -0.340990181400083,
-0.246047602937319, -0.140281405966232, -0.265780334876648, -0.23780172425706,
-0.890481118743505, -0.185000541672864, -0.0757195299492111),
Y = c(-0.324895018639169, -0.338951234980643, -0.177800321292734,
-0.345019713119787, -0.0739123902386802, 0.18359750205563,
-0.108461977599771, -0.129423067267885, -0.0275340962048548,
-0.355317429073615, 0.0143496668618822, 0.0866462123708121,
-0.258685480417501, -0.00768509589834154, 0.288488538303651,
-0.54704407232382, -0.363105213242044, 0.0570134733389543,
0.224034690932126, 0.43051937630073, 0.780464857796767, 0.103161222551216,
0.266199859599797, 0.759400919418545, -0.116472972897424,
-0.178046911975698, -0.0289716671368776, -0.284526289182701,
-0.146023515436316, 0.0764403706902978, -0.226670040280512,
0.226303952103805, 0.150831452033757, -0.15689508307977,
0.279936100906792, 0.268053395023382), Treatment = c("A",
"A", "A", "B", "B", "B", "C", "C", "C", "D", "D", "D", "E",
"E", "E", "F", "F", "F", "G", "G", "G", "H", "H", "H", "I",
"I", "I", "J", "J", "J", "K", "K", "K", "L", "L", "L")), .Names = c("Sample",
"X", "Y", "Treatment"), row.names = c(NA, -36L), class = "data.frame")
directlabels is not really for labeling individual points on scatterplots (that is NP-hard, https://en.wikipedia.org/wiki/Automatic_label_placement)
that being said, you may want to try to write your own Positioning Method:
your.method <- function(point.df, ...){
print(point.df)
browser()
label.df <- your_label_computation_function(point.df)
label.df
}
p2 <- p1 + geom_dl(data=sites, aes(X,Y,label=Sample, colour=Treatment, list( cex = 0.6)), method="your.method", show_guide=FALSE)