Both n and percent labels on geom_col, ggplot2 - r

If I plot df with the code below, I can put the n for each column over the column itself, as seen in this example plot. What I would like to do is also put the percentage for each column in the label. That is the percentage of the total that the column makes up. So, for example, the label on the first column would read 127(42.9%), instead of just 127. How could I do that?
df <- structure(list(Letter = structure(1:7,
.Label = c("A", "B", "C", "D", "E", "F", "G"),
class = "factor"), Freq = c(127L, 101L, 24L, 19L, 3L, 0L, 22L)),
.Names = c("Letter", "Freq"),
row.names = c(NA, -7L),
class = "data.frame")
ggplot(df, aes(Letter, Freq, label = Freq)) +
geom_col() +
geom_text(size = 3, position = position_dodge(width = 1), vjust = -0.25)

Just create the text you want to use as a label.
df$pct = df$Freq / sum(df$Freq) * 100
df$label = sprintf("%s (%s%%)", df$Freq, round(df$pct, 1))
ggplot(df, aes(Letter, Freq, label = label)) +
geom_col() +
geom_text(size = 3, position = position_dodge(width = 1), vjust = -0.25)

Related

R how to prevent ggplot geom_text() from using new database data on a named plot object

I am attempting to make a series of plots using the same code with unique coral species databases.
Databases
data_1 <- structure(list(Site_long = structure(c(1L, 1L, 2L, 2L), .Label = c("Hanauma Bay",
"Waikiki"), class = "factor"), Shelter = structure(c(1L, 2L,
1L, 2L), .Label = c("Low", "High"), class = c("ordered", "factor"
)), mean = c(1.19986885018767, 2.15593884020962, 0.369605100791602,
0.31005865611133), sd = c(2.5618758944073, 3.67786619671933,
1.0285671157698, 0.674643037178562), lower = c(0.631321215232725,
1.33972360808602, 0.141339007832154, 0.160337623931733), upper = c(1.76841648514261,
2.97215407233321, 0.59787119375105, 0.459779688290928), sample_size = c(78L,
78L, 78L, 78L)), row.names = c(NA, -4L), groups = structure(list(
Site_long = structure(1:2, .Label = c("Hanauma Bay", "Waikiki"
), class = "factor"), .rows = structure(list(1:2, 3:4), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = 1:2, class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
data_2 <- structure(list(Site_long = structure(c(2L, 2L, 1L, 1L), .Label = c("Hanauma Bay",
"Waikiki"), class = "factor"), Shelter = structure(c(1L, 2L,
1L, 2L), .Label = c("Low", "High"), class = c("ordered", "factor"
)), mean = c(0.695203162997812, 0.838720069947102, 0.76957780057238,
0.771070502382599), sd = c(1.17117437618039, 1.02766824928792,
1.43499288333539, 1.28634022958585), lower = c(0.435288768568787,
0.610653459098997, 0.451115141323908, 0.485597776371556), upper = c(0.955117557426838,
1.06678668079521, 1.08804045982085, 1.05654322839364), sample_size = c(78L,
78L, 78L, 78L)), row.names = c(NA, -4L), groups = structure(list(
Site_long = structure(1:2, .Label = c("Hanauma Bay", "Waikiki"
), class = "factor"), .rows = structure(list(3:4, 1:2), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = 1:2, class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
When I run my code on the first species database (data_1), the barplots and associated error bar annotations render correctly. Notice I also made a new variable "data" that will be the same object used in later for species 2. In order to keep this plot to make a composite of a number of plots later, I named the plot "species_1_plot" to save it to the global environment.
Code for Species 1 Plot
data <- data_1
mult_compare_recruitment <- c("A", "A", "A", "A")
data <- data[c(3, 4, 1, 2),]
data$Shelter <- factor(data$Shelter, levels = c("Low", "High"))
# reorder summary dataframe for plotting
position <- c("Waikiki", "Hanauma Bay")
# ggplot2 barplot position with Waikiki (Low-High Shelter) and Hanauma Bay
recruitment_plot_3 <- ggplot(data = data, aes(fill=Shelter, y=mean, x=Site_long)) +
geom_bar(position = "dodge", stat="identity", width = .8) +
scale_x_discrete(limits = position) +
geom_errorbar(aes(ymin = lower, ymax = upper), position = position_dodge(.8), width = .1) +
geom_text(aes(label = mult_compare_recruitment, y = data$upper), vjust = -.5, position = position_dodge(width = 0.8), size = 4) +
scale_fill_grey(name = "Shelter", start = .8, end = .2) +
labs(x = "Site", y = expression(paste("Coral recruitment per m"^"2"))) +
theme_classic(base_size = 14.5) +
theme(text = element_text(size = 18), axis.title.x = element_blank(),
legend.position = "none", axis.text.y = element_text(angle = 90))
species_1_plot <- recruitment_plot_3
species_1_plot
In order to create my next plot, I run the same code on a different species database (data_2) while once again assigning the new database to the object "data". Once again, I saved the new plot "species_2_plot" to the global environment.
Code for Species 2 Plot
data <- data_2
mult_compare_recruitment <- c("A", "A", "B", "B")
data <- data[c(3, 4, 1, 2),]
data$Shelter <- factor(data$Shelter, levels = c("Low", "High"))
# reorder summary dataframe for plotting
position <- c("Waikiki", "Hanauma Bay")
# ggplot2 barplot position with Waikiki (Low-High Shelter) and Hanauma Bay
recruitment_plot_3 <- ggplot(data = data, aes(fill=Shelter, y=mean, x=Site_long)) +
geom_bar(position = "dodge", stat="identity", width = .8) +
scale_x_discrete(limits = position) +
geom_errorbar(aes(ymin = lower, ymax = upper), position = position_dodge(.8), width = .1) +
geom_text(aes(label = mult_compare_recruitment, y = data$upper), vjust = -.5, position = position_dodge(width = 0.8), size = 4) +
scale_fill_grey(name = "Shelter", start = .8, end = .2) +
labs(x = "Site", y = expression(paste("Coral recruitment per m"^"2"))) +
theme_classic(base_size = 14.5) +
theme(text = element_text(size = 18), axis.title.x = element_blank(),
legend.position = "none", axis.text.y = element_text(angle = 90))
species_2_plot <- recruitment_plot_3
species_2_plot
The problem is, when I plot the first species plot again (species_1_plot), the data are correct (bars), but the height of text annotations and their letter values are not correct. They are in fact the values from species_2_plot.
species_1_plot
I saved each plot to the global environment with a unique name knowing this would be an issue. But despite this, geom_text() seems to be using data from the second plot (code that is in the global environment) instead despite that the actual data (bars) in the plot are correct (from species_plot_1). My understanding was that when you name a plot as an object (species_1_plot and species_2_plot) that its akin to saving the plot and therefore preventing any changes to plot and annotations unless specified. Is there any way to prevent this from happening without specifically naming the databases (data_1 and data_2)? All input is appreciated. Thanks in advance!
I would suggest you to use an approach with a function. The fact of using data twice is maybe changing the environment and as a result the plots change. I have made a function with parameters for data, position and recruitment and I display the outputs. You have to fill them in the same way you defined that variables in your code. Functions work on internal environments so there might not be issues about how data is processed. Here the code where I used the data you shared:
library(ggplot2)
#Function
myplotfunc <- function(x,y,z)
{
data <- x
mult_compare_recruitment <- y
data <- data[c(3, 4, 1, 2),]
data$Shelter <- factor(data$Shelter, levels = c("Low", "High"))
# reorder summary dataframe for plotting
position <- z
# ggplot2 barplot position with Waikiki (Low-High Shelter) and Hanauma Bay
plot <- ggplot(data = data, aes(fill=Shelter, y=mean, x=Site_long)) +
geom_bar(position = "dodge", stat="identity", width = .8) +
scale_x_discrete(limits = position) +
geom_errorbar(aes(ymin = lower, ymax = upper), position = position_dodge(.8), width = .1) +
geom_text(aes(label = mult_compare_recruitment, y = data$upper), vjust = -.5, position = position_dodge(width = 0.8), size = 4) +
scale_fill_grey(name = "Shelter", start = .8, end = .2) +
labs(x = "Site", y = expression(paste("Coral recruitment per m"^"2"))) +
theme_classic(base_size = 14.5) +
theme(text = element_text(size = 18), axis.title.x = element_blank(),
legend.position = "none", axis.text.y = element_text(angle = 90))
return(plot)
}
#Code
o1 <- myplotfunc(x=data_1,y=c("A", "A", "A", "A"),z=c("Waikiki", "Hanauma Bay"))
o2 <- myplotfunc(x=data_2,y=c("A", "A", "B", "B"),z=c("Waikiki", "Hanauma Bay"))
Outputs:

How to change linetype conditional on x-axis factor in ggplot when using geom_smooth?

Suppose I have a LOESS regression plot where the x-axis correspond to a categorical variable:
library(ggplot2)
b <- structure(list(Expression = c(16.201081535896, 16.5138880401065,
16.4244615700828, 1.62923743262849, 3.35379087562868, 6.99935683212696,
4.81932543877313, 3.85300704208448, 7.32436891427261, 4.23627699164079,
6.95731601433845, 4.33315521361287, 5.50596153247422, 13.0788494583573,
13.6909487566244, 12.9520674350314), stage = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L), .Label = c("A",
"B", "C", "D", "E"), class = "factor")), row.names = c(NA, 16L
), class = "data.frame")
ggplot(b, aes(as.numeric(stage), Expression)) +
geom_point() +
geom_smooth(span = 0.8) +
scale_x_continuous(breaks = as.numeric(b$stage), labels = b$stage, minor_breaks = NULL)
I want to use 2 different line types at different sections of a LOESS regression.
Specifically, I would like to have a dashed line between A and B, a continuous line between B and D, and a dashed line again between D and E.
So I follow the example in:
conditional plot linetype in ggplot2
But the connection in the left and right are lost, and only the central part of the loess regression remains.
line.groups <- plyr::mapvalues(b$stage,
from = c("A", "B", "C", "D", "E"),
to = c(0, 1, 1, 1, 2))
ggplot(b, aes(as.numeric(stage), Expression)) +
geom_point() +
geom_smooth(aes(group=line.groups, linetype=line.groups), span = 0.8) +
scale_linetype_manual(values=c(2,1,2)) +
guides(linetype=FALSE) +
scale_x_continuous(breaks = as.numeric(b$stage), labels = b$stage, minor_breaks = NULL)
Is there a way to change the linetype of the geom_smooth ggplot, conditional to the x-axis (where x is a factor)?
EDIT:
I tried using three separate calls to geom_smooth for each section as suggested by a comment, but the standard error bounds won't be "smooth" between each call.
ggplot(b, aes(as.numeric(stage), Expression)) +
geom_point() +
geom_smooth(data=b[b$stage %in% c("A", "B"),], linetype = "dashed", span = 0.8) +
geom_smooth(data=b[b$stage %in% c("B", "C", "D"),], linetype = "solid", span = 0.8) +
geom_smooth(data=b[b$stage %in% c("D", "E"),], linetype = "dashed",span = 0.8) +
scale_linetype_manual(values=c(2,1,2)) +
guides(linetype=FALSE) +
scale_x_continuous(breaks = as.numeric(b$stage), labels = b$stage, minor_breaks = NULL)
Link to sub-optimal solution
Thanks
For completeness, I will post here the solution offered by user OTStats in the comments above:
ggplot(b, aes(as.numeric(stage), Expression)) +
geom_point() +
geom_smooth(data=b[b$stage %in% c("A", "B"),], linetype = "dashed", span = 0.8,se = FALSE) +
geom_smooth(data=b[b$stage %in% c("B", "C", "D"),], linetype = "solid", span = 0.8, se = FALSE) +
geom_smooth(data=b[b$stage %in% c("D", "E"),], linetype = "dashed",span = 0.8, se = FALSE) +
geom_smooth(linetype = "blank",span = 0.4) +
guides(linetype=FALSE) +
scale_x_continuous(breaks = as.numeric(b$stage), labels = b$stage, minor_breaks = NULL)
Note that the level of smoothing needs to be adjusted in the fourth call of geom_smooth to produce satisfactory results but, overall, this trick solves the question.
Link to solution

adjusting position of text above an error bar in ggplot

I have the following data frame:
df <- structure(list(Gender = c("M", "M", "M", "M", "F", "F", "F",
"F"), HGGroup = structure(c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L), .Label =
c("Low: \n F: <11.5, M: <12.5",
"Medium: \n F: > 11.5 & < 13, M: >12.5 & < 14.5", "High: \n F: >= 13, M >=
14.5", "No data"), class = "factor"), MeanBlood = c(0.240740740740741,
1.20689655172414, 0.38150289017341, 0.265957446808511, 0.272727272727273,
1.07821229050279, 0.257309941520468, 0.288776796973518), SEBlood =
c(0.0694516553311722, 0.154646785911315, 0.0687932999815165,
0.0383529942166715, 0.0406072582435844, 0.0971802933392401,
0.0327856332532931, 0.0289636037703526),
N = c(108L, 116L, 173L, 376L, 319L, 179L, 342L, 793L)), row.names = c(NA,
-8L), class = c("tbl_df", "tbl", "data.frame"))
I have the following command for plotting the means and confidence intervals for each group:
ggplot(df, aes(x = Gender, y = MeanBlood, colour = Gender)) +
geom_errorbar(aes(ymin = MeanBlood - SEBlood*qnorm(0.975), ymax = MeanBlood
+ SEBlood*qnorm(0.975)), width = 0.3, stat = "identity") +
geom_point(size = 3) + facet_grid(~HGGroup) + theme(legend.position =
"none") +
geom_text(aes(label = N, x = Gender), vjust = -5)
I am trying to get the text exactly on top of the error bar, but it needs to be in a different location for each group and currently comes out weird.
I think the problem originates from the fact that the confidence interval has a different length for each group, so that a constant justification would not work - it has to be relative to the lower quartile.
Any suggestions?
This seems to work, the y of your label, as you want it, is not the y set in the aes of ggplot, but is ymax:
ggplot(df, aes(x = Gender, y = MeanBlood, colour = Gender)) +
geom_errorbar(aes(ymin = MeanBlood - SEBlood*qnorm(0.975), ymax = MeanBlood
+ SEBlood*qnorm(0.975)), width = 0.3, stat = "identity") +
geom_point(size = 3) + facet_grid(~HGGroup) + theme(legend.position =
"none") +
geom_text(aes(y = MeanBlood + SEBlood*qnorm(0.975), label = N, x = Gender), vjust = -1)
If you move ymax to the ggplot call other layers will be able to access it so no need to redefine it:
ggplot(df, aes(x = Gender, y = MeanBlood, colour = Gender,
ymin = MeanBlood - SEBlood*qnorm(0.975), ymax = MeanBlood
+ SEBlood*qnorm(0.975))) +
geom_errorbar(aes(width = 0.3), stat = "identity") +
geom_point(size = 3) + facet_grid(~HGGroup) + theme(legend.position =
"none") +
geom_text(aes(y = stat(ymax), label = N, x = Gender), vjust = -1)

How to colormap from melt in R

I've mapped colors in R before. But something isn't clicking.
Ideally, I'd like to map color names to the variable value "student", but I'm getting a length error. However, the number of students being mapped to colors is equal. Also, I've tried creating two separate color columns - as a string and as an id. The colors then end up getting labeled on the legend. Adding the manual scale color options doesn't do much.
Here is a sample of the data:
m3 <- structure(list(student = structure(c(7L, 11L, 9L, 2L, 8L, 4L), .Label = c("a","b", "c", "d", "e", "f", "g","h", "i", "j", "k", "l", "m", "n","o", "p"), class = "factor"), colorz = structure(4:9, .Label = c("#66CC99","#9999CC", "#CC6666", "#FF0000FF", "#FF2000FF", "#FF4000FF","#FF6000FF", "#FF8000FF", "#FF9F00FF", "#FFBF00FF", "#FFDF00FF","#FFFF00FF", "green", "red"), class = "factor"), variable = structure(c(1L,1L, 1L, 1L, 1L, 1L), .Label = c("pre", "c1", "c2","b1", "c3", "c4", "b2", "u1", "u2","u3", "u4", "total"), class = "factor"), value = c(3, 31,49, 88, 31, 40), col = c("#FF0000FF", "#FF2000FF", "#FF4000FF","#FF6000FF", "#FF8000FF", "#FF9F00FF")), .Names = c("student","colorz", "variable", "value", "col"), row.names = c(NA, 6L), class = "data.frame")
And then graphing with: ggplot(m3, aes(x=variable, y=value, group=student,linetype=student)) + geom_line(size=.75) + geom_point(size=2) + xlab("test") + ylab(paste("score")) + geom_hline(yintercept=70, linetype="dashed", size=3) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + scale_color_identity()
The example is much smaller than the actual data.
Ideally, I'd like to be able to use something like: color.names <- setNames( c( "#FF0000FF", "#FF2000FF", "#FF4000FF", "#FF6000FF", "#FF8000FF", "#FF9F00FF","#CC6666", "#9999CC", "#66CC99", "#FFBF00FF", "#FFDF00FF", "#FFFF00FF","green","red"), c("a","b","c", "d","e","f","g","h","i","j","k","l","m","n" ))
and call the colors. I'm not sure what's messing up. It could look as if I were trying to map 12 colors to 14 values, but I've tried 14 as well.
First just assign color to student like this, aes(color = student), and then just use scale_color_manual() instead. Since you already named your color vector, ggplot will handle the matching based on names, unless a name isn't in the palette then that value will be dropped and not plotted:
ggplot(m3, aes(x=variable, y=value, group=student, linetype=student, color = student)) +
geom_line(size=.75) + geom_point(size=2) + xlab("test") +
ylab(paste("score")) + geom_hline(yintercept=70, linetype="dashed", size=3) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_color_manual(values = color.names)
Looks like this now:

Stacked Bar Graph Labels with ggplot2

I am trying to graph the following data:
to_graph <- structure(list(Teacher = c("BS", "BS", "FA"
), Level = structure(c(2L, 1L, 1L), .Label = c("BE", "AE", "ME",
"EE"), class = "factor"), Count = c(2L, 25L, 28L)), .Names = c("Teacher",
"Level", "Count"), row.names = c(NA, 3L), class = "data.frame")
and want to add labels in the middle of each piece of the bars that are the percentage for that piece. Based on this post, I came up with:
ggplot(data=to_graph, aes(x=Teacher, y=Count, fill=Level), ordered=TRUE) +
geom_bar(aes(fill = Level), position = 'fill') +
opts(axis.text.x=theme_text(angle=45)) +
scale_y_continuous("",formatter="percent") +
opts(title = "Score Distribution") +
scale_fill_manual(values = c("#FF0000", "#FFFF00","#00CC00", "#0000FF")) +
geom_text(aes(label = Count), size = 3, hjust = 0.5, vjust = 3, position = "stack")
But it
Doesn't have any effect on the graph
Probably doesn't display the percentage if it did (although I'm not entirely sure of this point)
Any help is greatly appreciated. Thanks!
The y-coordinate of the text is the actual count (2, 25 or 28), whereas the y-coordinates in the plot panel range from 0 to 1, so the text is being printed off the top.
Calculate the fraction of counts using ddply (or tapply or whatever).
graph_avgs <- ddply(
to_graph,
.(Teacher),
summarise,
Count.Fraction = Count / sum(Count)
)
to_graph <- cbind(to_graph, graph_avgs$Count.Fraction)
A simplified version of your plot. I haven't bothered to play about with factor orders so the numbers match up to the bars yet.
ggplot(to_graph, aes(Teacher), ordered = TRUE) +
geom_bar(aes(y = Count, fill = Level), position = 'fill') +
scale_fill_manual(values = c("#FF0000", "#FFFF00","#00CC00", "#0000FF")) +
geom_text(
aes(y = graph_avgs$Count.Fraction, label = graph_avgs$Count.Fraction),
size = 3
)

Resources