Stacked Bar Graph Labels with ggplot2 - r

I am trying to graph the following data:
to_graph <- structure(list(Teacher = c("BS", "BS", "FA"
), Level = structure(c(2L, 1L, 1L), .Label = c("BE", "AE", "ME",
"EE"), class = "factor"), Count = c(2L, 25L, 28L)), .Names = c("Teacher",
"Level", "Count"), row.names = c(NA, 3L), class = "data.frame")
and want to add labels in the middle of each piece of the bars that are the percentage for that piece. Based on this post, I came up with:
ggplot(data=to_graph, aes(x=Teacher, y=Count, fill=Level), ordered=TRUE) +
geom_bar(aes(fill = Level), position = 'fill') +
opts(axis.text.x=theme_text(angle=45)) +
scale_y_continuous("",formatter="percent") +
opts(title = "Score Distribution") +
scale_fill_manual(values = c("#FF0000", "#FFFF00","#00CC00", "#0000FF")) +
geom_text(aes(label = Count), size = 3, hjust = 0.5, vjust = 3, position = "stack")
But it
Doesn't have any effect on the graph
Probably doesn't display the percentage if it did (although I'm not entirely sure of this point)
Any help is greatly appreciated. Thanks!

The y-coordinate of the text is the actual count (2, 25 or 28), whereas the y-coordinates in the plot panel range from 0 to 1, so the text is being printed off the top.
Calculate the fraction of counts using ddply (or tapply or whatever).
graph_avgs <- ddply(
to_graph,
.(Teacher),
summarise,
Count.Fraction = Count / sum(Count)
)
to_graph <- cbind(to_graph, graph_avgs$Count.Fraction)
A simplified version of your plot. I haven't bothered to play about with factor orders so the numbers match up to the bars yet.
ggplot(to_graph, aes(Teacher), ordered = TRUE) +
geom_bar(aes(y = Count, fill = Level), position = 'fill') +
scale_fill_manual(values = c("#FF0000", "#FFFF00","#00CC00", "#0000FF")) +
geom_text(
aes(y = graph_avgs$Count.Fraction, label = graph_avgs$Count.Fraction),
size = 3
)

Related

How do I create a frequency stacked bar chart however have percentage labels on the bars and frequencies on the y axis, in R?

I started with the code below, however it is not showing the right output. I would just like a normal frequency stacked bar chart to show percentages on the bars but frequencies on the y axis. Could anyone offer any suggestions please?
ggplot(data = df, mapping = aes(x = Family_Size, y = Freq, fill = Survived)) + geom_bar(stat = "identity") +
geom_text(aes(label = paste0(df$Percentage),y=Percentage),size = 3) +
theme(plot.title = element_text(hjust = 0.5))
<table><tbody><tr><th>Survived</th><th>Family_Size</th><th>Frequency</th><th>Percentage</th></tr><tr><td>Yes</td><td>1</td><td>20</td><td>20%</td></tr><tr><td>No</td><td>1</td><td>80</td><td>80%</td></tr><tr><td>Yes</td><td>2</td><td>40</td><td>40%</td></tr><tr><td>No</td><td>2</td><td>60</td><td>60%</td></tr></tbody></table>
Are you looking for something like that ?
ggplot(df, aes(x = Family_Size, y = Frequency, fill = Survived))+
geom_col()+
scale_y_continuous(breaks = seq(0,100, by = 20))+
geom_text(aes(label = Percentage), position = position_stack(0.5))
EDIT: Formatting percentages with two decimales
ggplot(df, aes(x = Family_Size, y = Frequency, fill = Survived))+
geom_col()+
scale_y_continuous(breaks = seq(0,100, by = 20))+
geom_text(aes(label = paste(format(round(Frequency,2),nsmall = 2),"%")), position = position_stack(0.5))
Reproducible example
structure(list(Survived = c("Yes", "No", "Yes", "No"), Family_Size = c(1L,
1L, 2L, 2L), Frequency = c(20L, 80L, 40L, 60L), Percentage = c("20%",
"80%", "40%", "60%")), row.names = c(NA, -4L), class = c("data.table",
"data.frame"))

adjust width of dodge bar chart with ggplot when other series is completely missing

I have prepared below function to plot dodge chart using ggplot:
frq_dodge2 <- function(chart_data) {
sapphire<-c("#00A8C8","#006D9E","#002C77","#A6E2EF","#51d5ee","#1d5cc7")
g<-ggplot(chart_data, aes(x=X, y=value,fill=Q))
chart <- g+
geom_bar(position = position_dodge2(preserve = "single",width=0.9),stat='identity') +
scale_fill_manual(values = sapphire)+
labs(x= NULL, y= NULL, subtitle=NULL) +
ylab(NULL) +
geom_text(chart_data = subset(chart_data,value!=0),aes(label=paste0(value,"%")),
position=position_dodge2(width=0.9), vjust=-0.25,
size=3,fontface="bold", colour="#404040") +
labs(x=NULL, y=NULL)+
scale_y_continuous( labels = number_format(suffix = "%"),
limits = c(min(0,min(chart_data$value)+min(chart_data$value)),
max(0,max(chart_data$value) + max(chart_data$value) / 10)))+
scale_x_discrete(labels = function(x) str_wrap(x, width = 10),limits=unique(chart_data$Stats))
chart
}
The issue when in the data one of the series is completely missing the bars are too wide, not looking good. For example for the below data the bars are plotted too wide.
> dput(expat)
structure(list(X = structure(c(1L, 1L), .Label = c("Less than 50",
"50-100", "100-250", "250-500", "500-1000", "1000-3000", "3000-5000",
"more than 5000"), class = "factor"), Q = structure(1:2, .Label = c("2018 (Actual)",
"2019 (Forecast)"), class = "factor"), value = c(100, 100)), class = "data.frame", row.names = c(NA,
-2L))
frq_dodge2(expat) will give the graph output
whereas in other data where the other series is not completely missing plot is ok:
> dput(localplus)
structure(list(X = structure(c(6L, 1L, 6L, 2L, 1L), .Label = c("Less than 50",
"50-100", "100-250", "250-500", "500-1000", "1000-3000", "3000-5000",
"more than 5000"), class = "factor"), Q = structure(c(1L, 1L,
2L, 2L, 2L), .Label = c("2018 (Actual)", "2019 (Forecast)"), class = "factor"),
value = c(14, 86, 11, 22, 67)), class = "data.frame", row.names = c(NA,
-5L))
I had used preserve="single" to fix the bars width in case of missing data in other series but this is not helping if other series is completely missing in the data (like in expat).
Is there any way to fix this?
As mentioned in this answer, you need to have drop = FALSE set in your x scale calls.
In your function, that's the last line :
scale_x_discrete(labels = function(x) str_wrap(x, width = 10), drop = F)
For me, this yields the following :
EDIT : remove unneeded labels in x axis
Just check which levels are missing and change their label to "". The full function thus becomes :
frq_dodge2 <- function(chart_data) {
sapphire<-c("#00A8C8","#006D9E","#002C77","#A6E2EF","#51d5ee","#1d5cc7")
g<-ggplot(chart_data, aes(x=X, y=value,fill=Q))
lvs <- levels(chart_data$X)
miss_lvs <- which(!lvs%in%unique(chart_data$X))
lvl_labs <- lvs
lvl_labs[miss_lvs] <- ""
chart <- g+
geom_bar(position = position_dodge2(preserve = "single",width=0.9),stat='identity') +
scale_fill_manual(values = sapphire)+
labs(x= NULL, y= NULL, subtitle=NULL) +
ylab(NULL) +
geom_text(data = subset(chart_data,value!=0),aes(label=paste0(value,"%")),
position=position_dodge2(width=0.9), vjust=-0.25,
size=3,fontface="bold", colour="#404040") +
labs(x=NULL, y=NULL)+
scale_y_continuous( labels = number_format(suffix = "%"),
limits = c(min(0,min(chart_data$value)+min(chart_data$value)),
max(0,max(chart_data$value) + max(chart_data$value) / 10)))+
scale_x_discrete(labels = lvl_labs, drop = F)
chart
}
and yields
As a side note, you had written chart_data = subset... in the geom_text, instead of data = subset....

Positioning labels and color coding in sunburst - R

This is what is the output.I have a data set which contains unit, weight of each unit and compliance score for each unit in year 2016.
I was not able to add the table but here is the screenshot for the data in csv
I have named the columns in the data as unit, weight and year(which is compliance score) .
I want to create a sunburst chart where the first ring will be the unit divided based on weight and the second ring will be the same but will have labels compliance score.
The colour for each ring will be different.
I was able to do some code with the help from an online blog and the output I have gotten is similar to what I want but I am facing difficulty in positioning of the labels and also the colour coding for each ring
#using ggplot
library(ggplot2) # Visualisation
library(dplyr) # data wrangling
library(scales) # formatting
#read file
weight.eg = read.csv("Dummy Data.csv", header = FALSE, sep =
";",encoding = "UTF-8")
#change column names
colnames(weight.eg) <- c ("unit","weight","year")
#as weight column is factor change into integer
weight.eg$weight = as.numeric(levels(weight.eg$weight))
[as.integer(weight.eg$weight)]
weight.eg$year = as.numeric(levels(weight.eg$year))
[as.integer(weight.eg$year)]
#Nas are introduced, remove
weight.eg <- na.omit(weight.eg)
#Sum of the total weight
sum_total_weight = sum(weight.eg$weight)
#First layer
firstLevel = weight.eg %>% summarize(total_weight=sum(weight))
sunburst_0 = ggplot(firstLevel) # Just a foundation
#this will generate a bar chart
sunburst_1 =
sunburst_0 +
geom_bar(data=firstLevel, aes(x=1, y=total_weight),
fill='darkgrey', stat='identity') +
geom_text(aes(x=1, y=sum_total_weight/2, label=paste("Total
Weight", comma(total_weight))), color='black')
#View
sunburst_1
#this argument is used to rotate the plot around the y-axis which
the total weight
sunburst_1 + coord_polar(theta = "y")
sunburst_2=
sunburst_1 +
geom_bar(data=weight.eg,
aes(x=2, y=weight.eg$weight, fill=weight.eg$weight),
color='white', position='stack', stat='identity', size=0.6)
+
geom_text(data=weight.eg, aes(label=paste(weight.eg$unit,
weight.eg$weight), x=2, y=weight.eg$weight), position='stack')
sunburst_2 + coord_polar(theta = "y")
sunburst_3 =
sunburst_2 +
geom_bar(data=weight.eg,
aes(x=3, y=weight.eg$weight,fill=weight.eg$weight),
color='white', position='stack', stat='identity',
size=0.6)+
geom_text(data = weight.eg,
aes(label=paste(weight.eg$year),x=3,y=weight.eg$weight),position =
'stack')
sunburst_3 + coord_polar(theta = "y")
sunburst_3 + scale_y_continuous(labels=comma) +
scale_fill_continuous(low='white', high='darkred') +
coord_polar('y') + theme_minimal()
Output for dput(weight.eg)
structure(list(unit = structure(2:7, .Label = c("", "A", "B",
"C", "D", "E", "F", "Unit"), class = "factor"), weight = c(30,
25, 10, 17, 5, 13), year = c(70, 80, 50, 30, 60, 40)), .Names =
c("unit",
"weight", "year"), row.names = 2:7, class = "data.frame", na.action
= structure(c(1L,
8L), .Names = c("1", "8"), class = "omit"))
output for dput(firstLevel)
structure(list(total_weight = 100), .Names = "total_weight", row.names
= c(NA,
-1L), na.action = structure(c(1L, 8L), .Names = c("1", "8"), class =
"omit"), class = "data.frame")
So I think I might have some sort of solution for you. I wasn't sure what you wanted to color-code on the outer ring; from your code it seems you wanted it to be the weight again, but it was not obvious to me. For different colour scales per ring, you could use the ggnewscale package:
library(ggnewscale)
For the centering of the labels you could write a function:
cs_fun <- function(x){(cumsum(x) + c(0, cumsum(head(x , -1))))/ 2}
Now the plotting code could look something like this:
ggplot(weight.eg) +
# Note: geom_col is equivalent to geom_bar(stat = "identity")
geom_col(data = firstLevel,
aes(x = 1, y = total_weight)) +
geom_text(data = firstLevel,
aes(x = 1, y = total_weight / 2,
label = paste("Total Weight:", total_weight)),
colour = "black") +
geom_col(aes(x = 2,
y = weight, fill = weight),
colour = "white", size = 0.6) +
scale_fill_gradient(name = "Weight",
low = "white", high = "darkred") +
# Open up new fill scale for next ring
new_scale_fill() +
geom_text(aes(x = 2, y = cs_fun(weight),
label = paste(unit, weight))) +
geom_col(aes(x = 3, y = weight, fill = weight),
size = 0.6, colour = "white") +
scale_fill_gradient(name = "Another Weight?",
low = "forestgreen", high = "white") +
geom_text(aes(label = paste0(year), x = 3,
y = cs_fun(weight))) +
coord_polar(theta = "y")
Which looks like this:

vjust inconsistent in stacked bar plot

I have a stacked bar plot, with highly unequal heights of bars. I would like to show the percentages on top of each bar.
What I have done so far is the following
df = structure(list(Type = c("Bronchoscopy", "Bronchoscopy", "Endoscopy",
"Endoscopy"), Bacteremia = structure(c(1L, 2L, 1L, 2L), .Label = c("False",
"True"), class = "factor"), count = c(2710L, 64L, 13065L, 103L
), perc = c(97.6928622927181, 2.3071377072819, 99.2178007290401,
0.782199270959903)), class = c("grouped_df", "tbl_df", "tbl",
"data.frame"), row.names = c(NA, -4L), groups = structure(list(
Type = c("Bronchoscopy", "Endoscopy"), .rows = list(1:2,
3:4)), row.names = c(NA, -2L), class = c("tbl_df", "tbl",
"data.frame"), .drop = TRUE))
ggplot(df, aes(x = Type, y = perc, fill = Bacteremia)) +
geom_bar(stat = "identity") +
ylab("percent") +
geom_text(aes(label = paste0(round(perc, 2), "%")), position =
position_stack(vjust = -0.1), color = "black", fontface = "bold")
I can't seem to get the vjust right. It seems like it's not behaving in the same way for the bottom versus the top bar.
What I would like to achieve is to place the percentages slightly higher than the top edge of each bar.
Any ideas?
Here's a possible approach:
ggplot(df, aes(x = Type, y = perc, fill = Bacteremia)) +
geom_bar(stat = "identity") +
ylab("percent") +
geom_text(aes(label = paste0("", round(perc, 2), "%\n"), y = perc),
color = "black", fontface = "bold", nudge_y = 2)
I should elaborate that ggplot2 is going to try to place the geom_text() relative to the data. If you are trying to align horizontally the text labels, you will need to either use annotate() or supply a labelling dataset with type, percent and Bacteremia and call that in geom_text() as below.
labdf <- cbind(df, ypos = c(103, 5, 103, 5))
ggplot(df, aes(x = Type, y = perc, fill = Bacteremia)) +
geom_bar(stat = "identity") +
ylab("percent") +
geom_text(data = labdf,
aes(label = paste0("", round(perc, 2), "%"), y = ypos, x = Type),
color = "black", fontface = "bold")
Here's one way to do it:
df <-
tibble(
Type = c("Bronchoscopy", "Bronchoscopy", "Endoscopy", "Endoscopy"),
Bacteremia = c("False", "True", "False", "True"),
count = c(2710L, 64L, 13065L, 103L)
) %>%
group_by(Type) %>%
mutate(Percent = round((count / sum(count) * 100), 1))
df %>%
ggplot(aes(x = Type, y = Percent, fill = Bacteremia)) +
geom_col() +
geom_label(
data = . %>% filter(Bacteremia == "True"),
aes(y = Percent + 5, label = str_c(Percent, "%")),
show.legend = FALSE
) +
geom_label(
data = . %>% filter(Bacteremia == "False"),
aes(y = 105, label = str_c(Percent, "%")),
show.legend = FALSE
)
The choices of 5 and 105 work on my computer, but may need to be tweaked a bit based on your specific settings and aspect ratio. The first geom_label call sets the y-axis based on the precise percentage, while the second one sets it at a constant level above the bars.
You might also want to play around with using geom_text vs. geom_label to experiment with different color and label settings. The nice thing about geom_label is that it will make it very clear which group is being labeled.

ggplot: stacked barplot in reverse order

So I have data frame
dput(df)
structure(list(Frequency = structure(c(1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L), .Label = c("2", "3", "4", "5"), class = "factor"), Prcentage = c(1,
33, 58, 8, 2, 40, 53, 5), label = list("Insufficient", "Average",
"Good", "Excellent", "Insufficient", "Average", "Good", "Excellent"),
name = c("implementation", "implementation", "implementation",
"implementation", "energy", "energy", "energy", "energy")), .Names = c("Frequency",
"Prcentage", "label", "name"), row.names = c(NA, 8L), class = "data.frame")
And with following code
# Get the levels for type in the required order
df$label = factor(df$label, levels = c("Unacceptable","Insufficient", "Average","Good","Excellent"))
df = arrange(df, name, desc(label))
# Format the labels and calculate their positions
df = ddply(df, .(name), transform, pos = (cumsum(Prcentage) - 0.5 * Prcentage))
df$label1 = paste0(sprintf("%.0f", df$Prcentage), "%")
# Plot
ggplot(df, aes(x = factor(name), y = Prcentage, fill = label, order=desc(label))) +
geom_bar(stat = "identity", width = 0.5) +
geom_text(aes(y = pos, label = label1), size = 4) + theme_classic() +
scale_y_continuous(position = "top",expand = c(0, 0),breaks = seq(min(0), max(0,102), by = 10),limits = c(0,102),labels = dollar_format(suffix = "%", prefix = "")) +
coord_flip() +
xlab("") + ylab("") +
theme(legend.position="bottom",legend.title = element_blank()) +
scale_fill_manual(values = c("#ff0000","#fff68f","#b2b2b2","#1baf05","#006080"),drop = FALSE)
I produce the following plot
But now I am struggling to get the bars in reverse order. Sm my output should be reverse stacked with the right values in bars (e.g. 1% yellow should be positioned first in the left side of the plot, then follows 33%, then 56% and far right 8%). I've already tried to do this with adding
+ geom_col(position = position_stack(reverse = TRUE)) (after geom_bar)
Which produced this
But this in not correct as values in bars are not correct.
I've also looked here
How to control ordering of stacked bar chart using identity on ggplot2
Reverse fill order for histogram bars in ggplot2
Order Stacked Bar Graph in ggplot
Reverse fill order for histogram bars in ggplot2
The position of the labels is directly set by the pos value, you need to reverse that f you reverse the stack order:
ggplot(df, aes(x = factor(name))) +
geom_col(aes(y = Prcentage, fill = label),
position = position_stack(reverse = TRUE),
width = .5) +
# Set the position to its complementary
geom_text(aes(y = 100 - pos, label = label1)) +
# Rest of theme
coord_flip() +
scale_y_continuous(position = "top",
expand = c(0, 0),
breaks = seq(min(0), max(0,102), by = 10),
limits = c(0,102),
labels = dollar_format(suffix = "%", prefix = "")) +
scale_fill_manual(values = c("#ff0000","#fff68f","#b2b2b2","#1baf05","#006080"), drop = FALSE) +
xlab("") + ylab("") +
theme_classic() +
theme(legend.position="bottom",legend.title = element_blank())

Resources