ggplot: stacked barplot in reverse order - r

So I have data frame
dput(df)
structure(list(Frequency = structure(c(1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L), .Label = c("2", "3", "4", "5"), class = "factor"), Prcentage = c(1,
33, 58, 8, 2, 40, 53, 5), label = list("Insufficient", "Average",
"Good", "Excellent", "Insufficient", "Average", "Good", "Excellent"),
name = c("implementation", "implementation", "implementation",
"implementation", "energy", "energy", "energy", "energy")), .Names = c("Frequency",
"Prcentage", "label", "name"), row.names = c(NA, 8L), class = "data.frame")
And with following code
# Get the levels for type in the required order
df$label = factor(df$label, levels = c("Unacceptable","Insufficient", "Average","Good","Excellent"))
df = arrange(df, name, desc(label))
# Format the labels and calculate their positions
df = ddply(df, .(name), transform, pos = (cumsum(Prcentage) - 0.5 * Prcentage))
df$label1 = paste0(sprintf("%.0f", df$Prcentage), "%")
# Plot
ggplot(df, aes(x = factor(name), y = Prcentage, fill = label, order=desc(label))) +
geom_bar(stat = "identity", width = 0.5) +
geom_text(aes(y = pos, label = label1), size = 4) + theme_classic() +
scale_y_continuous(position = "top",expand = c(0, 0),breaks = seq(min(0), max(0,102), by = 10),limits = c(0,102),labels = dollar_format(suffix = "%", prefix = "")) +
coord_flip() +
xlab("") + ylab("") +
theme(legend.position="bottom",legend.title = element_blank()) +
scale_fill_manual(values = c("#ff0000","#fff68f","#b2b2b2","#1baf05","#006080"),drop = FALSE)
I produce the following plot
But now I am struggling to get the bars in reverse order. Sm my output should be reverse stacked with the right values in bars (e.g. 1% yellow should be positioned first in the left side of the plot, then follows 33%, then 56% and far right 8%). I've already tried to do this with adding
+ geom_col(position = position_stack(reverse = TRUE)) (after geom_bar)
Which produced this
But this in not correct as values in bars are not correct.
I've also looked here
How to control ordering of stacked bar chart using identity on ggplot2
Reverse fill order for histogram bars in ggplot2
Order Stacked Bar Graph in ggplot
Reverse fill order for histogram bars in ggplot2

The position of the labels is directly set by the pos value, you need to reverse that f you reverse the stack order:
ggplot(df, aes(x = factor(name))) +
geom_col(aes(y = Prcentage, fill = label),
position = position_stack(reverse = TRUE),
width = .5) +
# Set the position to its complementary
geom_text(aes(y = 100 - pos, label = label1)) +
# Rest of theme
coord_flip() +
scale_y_continuous(position = "top",
expand = c(0, 0),
breaks = seq(min(0), max(0,102), by = 10),
limits = c(0,102),
labels = dollar_format(suffix = "%", prefix = "")) +
scale_fill_manual(values = c("#ff0000","#fff68f","#b2b2b2","#1baf05","#006080"), drop = FALSE) +
xlab("") + ylab("") +
theme_classic() +
theme(legend.position="bottom",legend.title = element_blank())

Related

adjust width of dodge bar chart with ggplot when other series is completely missing

I have prepared below function to plot dodge chart using ggplot:
frq_dodge2 <- function(chart_data) {
sapphire<-c("#00A8C8","#006D9E","#002C77","#A6E2EF","#51d5ee","#1d5cc7")
g<-ggplot(chart_data, aes(x=X, y=value,fill=Q))
chart <- g+
geom_bar(position = position_dodge2(preserve = "single",width=0.9),stat='identity') +
scale_fill_manual(values = sapphire)+
labs(x= NULL, y= NULL, subtitle=NULL) +
ylab(NULL) +
geom_text(chart_data = subset(chart_data,value!=0),aes(label=paste0(value,"%")),
position=position_dodge2(width=0.9), vjust=-0.25,
size=3,fontface="bold", colour="#404040") +
labs(x=NULL, y=NULL)+
scale_y_continuous( labels = number_format(suffix = "%"),
limits = c(min(0,min(chart_data$value)+min(chart_data$value)),
max(0,max(chart_data$value) + max(chart_data$value) / 10)))+
scale_x_discrete(labels = function(x) str_wrap(x, width = 10),limits=unique(chart_data$Stats))
chart
}
The issue when in the data one of the series is completely missing the bars are too wide, not looking good. For example for the below data the bars are plotted too wide.
> dput(expat)
structure(list(X = structure(c(1L, 1L), .Label = c("Less than 50",
"50-100", "100-250", "250-500", "500-1000", "1000-3000", "3000-5000",
"more than 5000"), class = "factor"), Q = structure(1:2, .Label = c("2018 (Actual)",
"2019 (Forecast)"), class = "factor"), value = c(100, 100)), class = "data.frame", row.names = c(NA,
-2L))
frq_dodge2(expat) will give the graph output
whereas in other data where the other series is not completely missing plot is ok:
> dput(localplus)
structure(list(X = structure(c(6L, 1L, 6L, 2L, 1L), .Label = c("Less than 50",
"50-100", "100-250", "250-500", "500-1000", "1000-3000", "3000-5000",
"more than 5000"), class = "factor"), Q = structure(c(1L, 1L,
2L, 2L, 2L), .Label = c("2018 (Actual)", "2019 (Forecast)"), class = "factor"),
value = c(14, 86, 11, 22, 67)), class = "data.frame", row.names = c(NA,
-5L))
I had used preserve="single" to fix the bars width in case of missing data in other series but this is not helping if other series is completely missing in the data (like in expat).
Is there any way to fix this?
As mentioned in this answer, you need to have drop = FALSE set in your x scale calls.
In your function, that's the last line :
scale_x_discrete(labels = function(x) str_wrap(x, width = 10), drop = F)
For me, this yields the following :
EDIT : remove unneeded labels in x axis
Just check which levels are missing and change their label to "". The full function thus becomes :
frq_dodge2 <- function(chart_data) {
sapphire<-c("#00A8C8","#006D9E","#002C77","#A6E2EF","#51d5ee","#1d5cc7")
g<-ggplot(chart_data, aes(x=X, y=value,fill=Q))
lvs <- levels(chart_data$X)
miss_lvs <- which(!lvs%in%unique(chart_data$X))
lvl_labs <- lvs
lvl_labs[miss_lvs] <- ""
chart <- g+
geom_bar(position = position_dodge2(preserve = "single",width=0.9),stat='identity') +
scale_fill_manual(values = sapphire)+
labs(x= NULL, y= NULL, subtitle=NULL) +
ylab(NULL) +
geom_text(data = subset(chart_data,value!=0),aes(label=paste0(value,"%")),
position=position_dodge2(width=0.9), vjust=-0.25,
size=3,fontface="bold", colour="#404040") +
labs(x=NULL, y=NULL)+
scale_y_continuous( labels = number_format(suffix = "%"),
limits = c(min(0,min(chart_data$value)+min(chart_data$value)),
max(0,max(chart_data$value) + max(chart_data$value) / 10)))+
scale_x_discrete(labels = lvl_labs, drop = F)
chart
}
and yields
As a side note, you had written chart_data = subset... in the geom_text, instead of data = subset....

Positioning labels and color coding in sunburst - R

This is what is the output.I have a data set which contains unit, weight of each unit and compliance score for each unit in year 2016.
I was not able to add the table but here is the screenshot for the data in csv
I have named the columns in the data as unit, weight and year(which is compliance score) .
I want to create a sunburst chart where the first ring will be the unit divided based on weight and the second ring will be the same but will have labels compliance score.
The colour for each ring will be different.
I was able to do some code with the help from an online blog and the output I have gotten is similar to what I want but I am facing difficulty in positioning of the labels and also the colour coding for each ring
#using ggplot
library(ggplot2) # Visualisation
library(dplyr) # data wrangling
library(scales) # formatting
#read file
weight.eg = read.csv("Dummy Data.csv", header = FALSE, sep =
";",encoding = "UTF-8")
#change column names
colnames(weight.eg) <- c ("unit","weight","year")
#as weight column is factor change into integer
weight.eg$weight = as.numeric(levels(weight.eg$weight))
[as.integer(weight.eg$weight)]
weight.eg$year = as.numeric(levels(weight.eg$year))
[as.integer(weight.eg$year)]
#Nas are introduced, remove
weight.eg <- na.omit(weight.eg)
#Sum of the total weight
sum_total_weight = sum(weight.eg$weight)
#First layer
firstLevel = weight.eg %>% summarize(total_weight=sum(weight))
sunburst_0 = ggplot(firstLevel) # Just a foundation
#this will generate a bar chart
sunburst_1 =
sunburst_0 +
geom_bar(data=firstLevel, aes(x=1, y=total_weight),
fill='darkgrey', stat='identity') +
geom_text(aes(x=1, y=sum_total_weight/2, label=paste("Total
Weight", comma(total_weight))), color='black')
#View
sunburst_1
#this argument is used to rotate the plot around the y-axis which
the total weight
sunburst_1 + coord_polar(theta = "y")
sunburst_2=
sunburst_1 +
geom_bar(data=weight.eg,
aes(x=2, y=weight.eg$weight, fill=weight.eg$weight),
color='white', position='stack', stat='identity', size=0.6)
+
geom_text(data=weight.eg, aes(label=paste(weight.eg$unit,
weight.eg$weight), x=2, y=weight.eg$weight), position='stack')
sunburst_2 + coord_polar(theta = "y")
sunburst_3 =
sunburst_2 +
geom_bar(data=weight.eg,
aes(x=3, y=weight.eg$weight,fill=weight.eg$weight),
color='white', position='stack', stat='identity',
size=0.6)+
geom_text(data = weight.eg,
aes(label=paste(weight.eg$year),x=3,y=weight.eg$weight),position =
'stack')
sunburst_3 + coord_polar(theta = "y")
sunburst_3 + scale_y_continuous(labels=comma) +
scale_fill_continuous(low='white', high='darkred') +
coord_polar('y') + theme_minimal()
Output for dput(weight.eg)
structure(list(unit = structure(2:7, .Label = c("", "A", "B",
"C", "D", "E", "F", "Unit"), class = "factor"), weight = c(30,
25, 10, 17, 5, 13), year = c(70, 80, 50, 30, 60, 40)), .Names =
c("unit",
"weight", "year"), row.names = 2:7, class = "data.frame", na.action
= structure(c(1L,
8L), .Names = c("1", "8"), class = "omit"))
output for dput(firstLevel)
structure(list(total_weight = 100), .Names = "total_weight", row.names
= c(NA,
-1L), na.action = structure(c(1L, 8L), .Names = c("1", "8"), class =
"omit"), class = "data.frame")
So I think I might have some sort of solution for you. I wasn't sure what you wanted to color-code on the outer ring; from your code it seems you wanted it to be the weight again, but it was not obvious to me. For different colour scales per ring, you could use the ggnewscale package:
library(ggnewscale)
For the centering of the labels you could write a function:
cs_fun <- function(x){(cumsum(x) + c(0, cumsum(head(x , -1))))/ 2}
Now the plotting code could look something like this:
ggplot(weight.eg) +
# Note: geom_col is equivalent to geom_bar(stat = "identity")
geom_col(data = firstLevel,
aes(x = 1, y = total_weight)) +
geom_text(data = firstLevel,
aes(x = 1, y = total_weight / 2,
label = paste("Total Weight:", total_weight)),
colour = "black") +
geom_col(aes(x = 2,
y = weight, fill = weight),
colour = "white", size = 0.6) +
scale_fill_gradient(name = "Weight",
low = "white", high = "darkred") +
# Open up new fill scale for next ring
new_scale_fill() +
geom_text(aes(x = 2, y = cs_fun(weight),
label = paste(unit, weight))) +
geom_col(aes(x = 3, y = weight, fill = weight),
size = 0.6, colour = "white") +
scale_fill_gradient(name = "Another Weight?",
low = "forestgreen", high = "white") +
geom_text(aes(label = paste0(year), x = 3,
y = cs_fun(weight))) +
coord_polar(theta = "y")
Which looks like this:

Why are only some of the tick marks showing and how can I show all of them?

I have a chart that shows a week's worth of data.
The trouble is that it doesn't show all of the date values on the axis (code is below):
How can I fix this to show all 7 date values?
Code:
Dataset:
us_chats <- structure(list(date = structure(c(1524783600, 1524870000, 1524870000,
1524956400, 1525042800, 1525042800, 1525129200, 1525129200, 1525215600,
1525215600, 1525302000), class = c("POSIXct", "POSIXt"), tzone = ""),
type = c("Completed", "Completed", "Missed", "Completed",
"Completed", "Missed", "Completed", "Missed", "Completed",
"Missed", "Completed"), count = c(2L, 2L, 1L, 1L, 7L, 2L,
2L, 1L, 5L, 4L, 4L)), row.names = c(NA, -11L), class = "data.frame")
Chart code:
us_chats %>%
spread(type, count, fill = 0) %>% # Spread the count column in missed and completed
mutate(Total = Completed + Missed) %>% # Create the Total column
ggplot(aes(date, Total)) +
geom_col(aes(fill = "Total"),
colour = "black") + # total bar (with stat = "identity")
geom_col(aes(y = Missed, fill = "Missed"),
colour = "black") + # missed bar
geom_text(aes(label = paste("Total chats:", Total)), # add total label
hjust = -0.05, vjust = 1) +
geom_text(aes(label = paste("Missed chats:", Missed, "(", round(Missed/Total*100, 2), "%)")), # add missed label and calculate percentage
hjust = -0.05, vjust = -0.5, color = "red") +
scale_fill_manual(name = "", # Manual fill scale
values = c("Total" = "forestgreen", "Missed" = "red")) +
scale_y_continuous(limits = c(0, max(us_chats$count) * 3)) + # Make labels visible
coord_flip() # switch x and y axes
Jack Brookes suggested using scale_x_date(date_breaks = "1 day"). That doesn't quite work because it requires the data to be of class Date. Unfortunately, that is a little ambiguous, because the date corresponding to a time depends on your time zone. If you happen to be in timezone NZ, then this may give you what you want:
us_chats %>%
spread(type, count, fill = 0) %>% # Spread the count column in missed and completed
mutate(Total = Completed + Missed) %>% # Create the Total column
ggplot(aes(as.Date(date, tz = "NZ"), Total)) +
geom_col(aes(fill = "Total"),
colour = "black") + # total bar (with stat = "identity")
geom_col(aes(y = Missed, fill = "Missed"),
colour = "black") + # missed bar
geom_text(aes(label = paste("Total chats:", Total)), # add total label
hjust = -0.05, vjust = 1) +
geom_text(aes(label = paste("Missed chats:", Missed, "(", round(Missed/Total*100, 2), "%)")), # add missed label and calculate percentage
hjust = -0.05, vjust = -0.5, color = "red") +
scale_fill_manual(name = "", # Manual fill scale
values = c("Total" = "forestgreen", "Missed" = "red")) +
scale_y_continuous(limits = c(0, max(us_chats$count) * 3)) + # Make labels visible
scale_x_date(date_breaks = "1 day", name = "Date") +
coord_flip()
You can use OlsonNames() to see a list of timezone names that R will recognize. Your system may accept some others, too.

Add axis text above horizontal geom_bars; justify text flush left

I would like to place axis text above the appropriate horizontal bars in a ggplot2 plot. Below is as far as I have gotten, with the plotting code afterwards. The data is at the bottom of this question.
My questions, aside from the ever-present "what code would accomplish the goal better", are (1) instead of my manually entering rectangle and text locations, how can R place them algorithmically, (2) how can R move the text in the rectangles to the far left (I tried with calculating a mid-point based on the number of characters in the text, but it doesn't work)?
For the plotting I created the sequence variable instead of struggling with as.numeric(as.character(risks).
ggplot(plotpg19, aes(x = sequence, y = scores)) +
geom_bar(stat = "identity", width = 0.4) +
coord_flip() +
labs(x = "", y = "") +
theme_bw() +
theme(axis.text.y = element_blank(), axis.ticks.y = element_blank()) +
geom_rect(data=plotpg19, aes(xmin= seq(1.5, 8.5, 1),
xmax= seq(1.8, 8.8, 1), ymin=0, ymax=30), fill = "white") +
geom_text(data=plotpg19, aes(x=seq(1.6, 8.6, 1), y= nchar(as.character(risks))/2, label=risks, size = 5, show_guide = FALSE)) +
guides(size = FALSE)
Below is the data.
plotpg19 <- structure(list(risks = structure(c(8L, 7L, 6L, 5L, 4L, 3L, 2L,
1L), .Label = c("Other", "Third parties/associates acting on our behalf",
"Rogue employees", "Lack of understanding by top executives",
"Lack of anti-bribery/corruption training or awareness within the business",
"Geographic locations in which the company operates", "Industries/sector(s) in which the company operates",
"Inadequate resources for anti-bribery/corruption compliance activities"
), class = "factor"), scores = c(15, 28, 71, 16, 5, 48, 55, 2
), sequence = 1:8), .Names = c("risks", "scores", "sequence"), class = "data.frame", row.names = c(NA,
-8L))
This question gave me some guidance. fitting geom_text inside geom_rect
I do not understand why your are plotting white geom_rect. For the second question, setting y=0 in the aes of geom_text and adding hjust=0 (start the text at precisely y) works. I adjusted the x parameter so that the text are plotted halfway through bars :
library(dplyr)
plotpg19 <- mutate(plotpg19, xtext = sequence + 0.55)
library(ggplot2)
ggplot(plotpg19, aes(x = sequence, y = scores)) +
geom_bar(stat = "identity", width = 0.4) +
coord_flip() +
labs(x = "", y = "") +
theme_bw() +
theme(axis.text.y = element_blank(), axis.ticks.y = element_blank()) +
geom_text(data = plotpg19,
aes(x = xtext, y = 0, label = risks, size = 5, show_guide = FALSE),
hjust = 0, vjust = 1) +
guides(size = FALSE)

Stacked Bar Graph Labels with ggplot2

I am trying to graph the following data:
to_graph <- structure(list(Teacher = c("BS", "BS", "FA"
), Level = structure(c(2L, 1L, 1L), .Label = c("BE", "AE", "ME",
"EE"), class = "factor"), Count = c(2L, 25L, 28L)), .Names = c("Teacher",
"Level", "Count"), row.names = c(NA, 3L), class = "data.frame")
and want to add labels in the middle of each piece of the bars that are the percentage for that piece. Based on this post, I came up with:
ggplot(data=to_graph, aes(x=Teacher, y=Count, fill=Level), ordered=TRUE) +
geom_bar(aes(fill = Level), position = 'fill') +
opts(axis.text.x=theme_text(angle=45)) +
scale_y_continuous("",formatter="percent") +
opts(title = "Score Distribution") +
scale_fill_manual(values = c("#FF0000", "#FFFF00","#00CC00", "#0000FF")) +
geom_text(aes(label = Count), size = 3, hjust = 0.5, vjust = 3, position = "stack")
But it
Doesn't have any effect on the graph
Probably doesn't display the percentage if it did (although I'm not entirely sure of this point)
Any help is greatly appreciated. Thanks!
The y-coordinate of the text is the actual count (2, 25 or 28), whereas the y-coordinates in the plot panel range from 0 to 1, so the text is being printed off the top.
Calculate the fraction of counts using ddply (or tapply or whatever).
graph_avgs <- ddply(
to_graph,
.(Teacher),
summarise,
Count.Fraction = Count / sum(Count)
)
to_graph <- cbind(to_graph, graph_avgs$Count.Fraction)
A simplified version of your plot. I haven't bothered to play about with factor orders so the numbers match up to the bars yet.
ggplot(to_graph, aes(Teacher), ordered = TRUE) +
geom_bar(aes(y = Count, fill = Level), position = 'fill') +
scale_fill_manual(values = c("#FF0000", "#FFFF00","#00CC00", "#0000FF")) +
geom_text(
aes(y = graph_avgs$Count.Fraction, label = graph_avgs$Count.Fraction),
size = 3
)

Resources