Add axis text above horizontal geom_bars; justify text flush left - r

I would like to place axis text above the appropriate horizontal bars in a ggplot2 plot. Below is as far as I have gotten, with the plotting code afterwards. The data is at the bottom of this question.
My questions, aside from the ever-present "what code would accomplish the goal better", are (1) instead of my manually entering rectangle and text locations, how can R place them algorithmically, (2) how can R move the text in the rectangles to the far left (I tried with calculating a mid-point based on the number of characters in the text, but it doesn't work)?
For the plotting I created the sequence variable instead of struggling with as.numeric(as.character(risks).
ggplot(plotpg19, aes(x = sequence, y = scores)) +
geom_bar(stat = "identity", width = 0.4) +
coord_flip() +
labs(x = "", y = "") +
theme_bw() +
theme(axis.text.y = element_blank(), axis.ticks.y = element_blank()) +
geom_rect(data=plotpg19, aes(xmin= seq(1.5, 8.5, 1),
xmax= seq(1.8, 8.8, 1), ymin=0, ymax=30), fill = "white") +
geom_text(data=plotpg19, aes(x=seq(1.6, 8.6, 1), y= nchar(as.character(risks))/2, label=risks, size = 5, show_guide = FALSE)) +
guides(size = FALSE)
Below is the data.
plotpg19 <- structure(list(risks = structure(c(8L, 7L, 6L, 5L, 4L, 3L, 2L,
1L), .Label = c("Other", "Third parties/associates acting on our behalf",
"Rogue employees", "Lack of understanding by top executives",
"Lack of anti-bribery/corruption training or awareness within the business",
"Geographic locations in which the company operates", "Industries/sector(s) in which the company operates",
"Inadequate resources for anti-bribery/corruption compliance activities"
), class = "factor"), scores = c(15, 28, 71, 16, 5, 48, 55, 2
), sequence = 1:8), .Names = c("risks", "scores", "sequence"), class = "data.frame", row.names = c(NA,
-8L))
This question gave me some guidance. fitting geom_text inside geom_rect

I do not understand why your are plotting white geom_rect. For the second question, setting y=0 in the aes of geom_text and adding hjust=0 (start the text at precisely y) works. I adjusted the x parameter so that the text are plotted halfway through bars :
library(dplyr)
plotpg19 <- mutate(plotpg19, xtext = sequence + 0.55)
library(ggplot2)
ggplot(plotpg19, aes(x = sequence, y = scores)) +
geom_bar(stat = "identity", width = 0.4) +
coord_flip() +
labs(x = "", y = "") +
theme_bw() +
theme(axis.text.y = element_blank(), axis.ticks.y = element_blank()) +
geom_text(data = plotpg19,
aes(x = xtext, y = 0, label = risks, size = 5, show_guide = FALSE),
hjust = 0, vjust = 1) +
guides(size = FALSE)

Related

make height of colour bar match the height of the main plot in R/ggplot [duplicate]

I have generated a simple plot in R that shows the correlation coefficients for a set of data. Currently, the legend colorbar on the right side of the plot is a fraction of the entire plot size.
I would like the legend colorbar to be same height as the plot. I thought that I could use the legend.key.height to do this, but I have found that is not the case. I investigated the grid package unit function and found that there were some normalized units in there but when I tried them (unit(1, "npc")), the colorbar was way too tall and went off the page.
How can I make the legend the same height as the plot itself?
A full self contained example is below:
library(ggplot2)
corrs <- structure(list(Var1 = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), levels = c("Var1", "Var2", "Var3"), class = "factor"), Var2 = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L), levels = c("Var1", "Var2", "Var3"), class = "factor"), value = c(1, -0.11814395012334, -0.91732952510938, -0.969618394505233, 1, -0.00122085912153125, -0.191116513684392, -0.0373711776919663, 1)), class = "data.frame", row.names = c(NA, -9L))
ggplot(corrs, aes(x = Var1, y = Var2, fill = value)) +
geom_tile() +
theme(
panel.border = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
aspect.ratio = 1,
legend.position = "right",
legend.key.height = unit(1, "inch")
)
Created on 2022-12-29 with reprex v2.0.2
Edit Updating to ggplot v3.0.0
This is messy, but based on this answer, and delving deeper into the ggplot grob, the legend can be positioned precisely.
# Load the needed libraries
library(ggplot2)
library(gtable) #
library(grid)
library(scales)
library(reshape2)
# Generate a collection of sample data
variables = c("Var1", "Var2", "Var3")
data = matrix(runif(9, -1, 1), 3, 3)
diag(data) = 1
colnames(data) = variables
rownames(data) = variables
# Generate the plot
corrs = data
plot = ggplot(melt(corrs), aes(x = Var1, y = Var2, fill = value)) +
geom_tile() +
theme_bw() +
theme(panel.border = element_blank()) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
theme(aspect.ratio = 1) +
# theme(legend.position = "right", legend.key.height = unit(1, "inch")) +
labs(x = "", y = "", fill = "", title = "Correlation Coefficients") +
scale_fill_gradient2(limits = c(-1, 1), breaks = c(-1, -.5, 0, .5, 1), expand = c(0,0),
low = muted("red"), mid = "black", high = muted("blue")) + # Modified line
geom_text(parse = TRUE, aes(label = sprintf("%.2f", value)), size = 3, color = "white") +
scale_x_discrete(expand = c(0,0)) + # New line
scale_y_discrete(expand = c(0,0)) # New line
plot
# Get the ggplot grob
gt = ggplotGrob(plot)
# Get the legend
leg = gtable_filter(gt, "guide-box")
# Raster height
leg[[1]][[1]][[1]][[1]][[1]][[2]]$height = unit(1, "npc")
# Positions for labels and tick marks - five breaks, therefore, five positions
pos = unit.c(unit(0.01,"npc"), unit(.25, "npc"), unit(.5, "npc"), unit(.75, "npc"), unit(.99, "npc"))
# Positions the labels
leg[[1]][[1]][[1]][[1]][[1]][[3]]$children[[1]]$y = pos
# Positions the tick marks
leg[[1]][[1]][[1]][[1]][[1]][[5]]$y0 = pos
leg[[1]][[1]][[1]][[1]][[1]][[5]]$y1 = pos
# Legend key height ?
leg[[1]][[1]][[1]][[1]]$heights = unit.c(rep(unit(0, "mm"), 3),
unit(1, "npc"),
unit(0, "mm"))
# Legend height
leg[[1]][[1]]$heights[[3]] = sum(rep(unit(0, "mm"), 3),
unit(1, "npc"),
unit(0, "mm"))
# grid.draw(leg) # Check on heights and y values
# gtable_show_layout(gt) # Manually locate position of legend in layout
gt.new = gtable_add_grob(gt, leg, t = 7, l = 9)
# Draw it
grid.newpage()
grid.draw(gt.new)
It seems quite tricky, the closest I got was this,
## panel height is 1null, so we work it out by subtracting the other heights from 1npc
## and 1line for the default plot margins
panel_height <- unit(1,"npc") - sum(ggplotGrob(plot)[["heights"]][-3]) - unit(1,"line")
plot + guides(fill= guide_colorbar(barheight=panel_height))
unfortunately the vertical justification is a bit off.
The following option is a function that can be added to a ggplot to take the plot and make the colorbar the same height as the panel.
Essentially it uses the same technique as Baptiste, but is a bit more robust to changes in the ggplot implementation, and moves the legend title to a more natural position, allowing neater alignment. It also allows more recognisable ggplot-style syntax.
make_fullsize <- function() structure("", class = "fullsizebar")
ggplot_add.fullsizebar <- function(obj, g, name = "fullsizebar") {
h <- ggplotGrob(g)$heights
panel <- which(grid::unitType(h) == "null")
panel_height <- unit(1, "npc") - sum(h[-panel])
g +
guides(fill = guide_colorbar(barheight = panel_height,
title.position = "right")) +
theme(legend.title = element_text(angle = -90, hjust = 0.5))
}
You can then do:
ggplot(corrs, aes(x = Var1, y = Var2, fill = value)) +
geom_tile() +
coord_cartesian(expand = FALSE) +
make_fullsize()
The drawback is that the plot needs re-drawn if the plotting window is resized after the plot is drawn. This is a bit of a pain, but it's a fairly quick-and-simple fix. It will still work well with ggsave.
Note that the color bar is the same height as the panel, which is why it looks a bit neater to turn the expansion off in coord_cartesian, so it matches the actual tiles of the heatmap.
Another example for one of the linked duplicates:
library(reshape2)
dat <- iris[,1:4]
cor <- melt(cor(dat, use="p"))
ggplot(data=cor, aes(x=Var1, y=Var2, fill=value)) +
geom_tile() +
labs(x = "", y = "") +
scale_fill_gradient2(limits=c(-1, 1)) +
coord_cartesian(expand = FALSE) +
make_fullsize()
The problem is that the plot panel does not have defined dimensions until drawing ("NULL unit"), but your legend guide has. See also npc coordinates of geom_point in ggplot2 or figuring out panel size given dimensions for the final plot in ggsave (to show count for geom_dotplot). I think it will be extremely tricky to draw the legend guide in the exact same size as your panel.
However, you can make use of a trick when dealing with complex legend formatting: Create a fake legend. The challenge here is to adjust the fill scale to perfectly match the range of your plot (which is not usually exactly the range of your data values). The rest is just a bit of R semantics. Some important comments in the code.
library(ggplot2)
corrs <- structure(list(Var1 = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), levels = c("Var1", "Var2", "Var3"), class = "factor"), Var2 = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L), levels = c("Var1", "Var2", "Var3"), class = "factor"), value = c(1, -0.11814395012334, -0.91732952510938, -0.969618394505233, 1, -0.00122085912153125, -0.191116513684392, -0.0373711776919663, 1)), class = "data.frame", row.names = c(NA, -9L))
## to set the scale properly, you will need to set limits and breaks,
## I am doing this semi-automatically
range_fill <- range(corrs$value)
lim_fill <- c(floor(range_fill[1]), ceiling(range_fill[2]))
## len = 5 and round to 2 is hard coded, depending on the scale breaks that you want
breaks_fill <- round(seq(lim_fill[1], lim_fill[2], len = 5), 2)
## need to rescale the fill to the range of you y values,
## so that your fill scale correctly corresponds to the range of your y
## however, the actual range of your plot depends if you're in a discrete or continuous range.
## here in a discrete range
lim_y <- range(as.integer(corrs$Var2))
lim_x <- range(as.integer(corrs$Var1))
lim_vals <- lim_y + c(-.5, .5)
## actual rescaling happens here
new_y <- scales::rescale(breaks_fill, lim_vals)
## the position and width of the color bar are defined with
scl_x <- lim_x[2] + .7 # the constant is hard coded
scl_xend <- scl_x + .2 # also hard coded
##  make a data frame for the segments to be created
## using approx so that we have many little segments
approx_fill <- approx(new_y, breaks_fill, n = 1000)
df_seg <- data.frame(y = approx_fill$x, color = approx_fill$y)
## data frame for labels, xend position being hard coded
df_lab <- data.frame(y = new_y, x = scl_xend + .1, label = breaks_fill)
## data frame for separators
sep_len <- .05
df_sep <- data.frame(
y = new_y, yend = new_y,
x = rep(c(scl_x, scl_xend - sep_len), each = 5),
xend = rep(c(scl_x + sep_len, scl_xend), each = 5)
)
ggplot(corrs) +
geom_tile(aes(x = Var1, y = Var2, fill = value)) +
geom_segment(
data = df_seg,
aes(x = scl_x, xend = scl_xend, y = y, yend = y, color = color)
) +
## now the labels, the size being hard coded
geom_text(data = df_lab, aes(x, y, label = label), size = 9 * 5 / 14) +
## now make the white little separators
geom_segment(
data = df_sep, aes(x = x, xend = xend, y = y, yend = yend),
color = "white"
) +
## set both color and fill scales exactly
scale_fill_continuous(limits = lim_fill, breaks = breaks_fill) +
scale_color_continuous(limits = lim_fill, breaks = breaks_fill) +
## turn off coordinate clipping and limit panel to data area)
coord_cartesian(xlim = lim_x, ylim = lim_y, clip = "off") +
## last but not least remove the other legends and add a bit of margin
theme(
legend.position = "none",
plot.margin = margin(r = 1, unit = "in")
)
Created on 2022-12-29 with reprex v2.0.2

How to plot two variables side by side in the same ggplot using geom_col?

I have the following data
structure(list(id = 1:7, date = c(2019L, 2019L, 2019L, 2019L,
2019L, 2019L, 2019L), station = structure(1:7, .Label = c("41B004",
"41B011", "41MEU1", "41N043", "41R001", "41R012", "41WOL1"), class = "factor"),
days = c(6L, 21L, 5L, 9L, 13L, 14L, 3L), mean3y = c(8.33,
21.3, NA, 10, 11.3, 16.3, 3.67), environ = structure(c(3L,
4L, 2L, 1L, 3L, 4L, 3L), .Label = c("Industriel avec influence modérée du trafic",
"Urbain avec faible influence du trafic", "Urbain avec influence modérée du trafic",
"Urbain avec très faible influence du trafic"), class = "factor")), class = "data.frame", row.names = c(NA,
-7L))
which is plotted with the following ggplot code
ggplot(data, aes(x = reorder(station, -days),
y = days, fill = environ)) +
geom_col(width = 0.5, colour = "black", size = 0.5) +
guides(fill = guide_legend(ncol = 2)) +
geom_text(aes(label = days),
vjust=-0.3, color="black", size = 3.5) +
geom_hline(aes(yintercept = 25),
linetype = 'dashed', colour = 'red', size = 1) +
labs(x = '', y = bquote("Nombre de jours de dépassement de NET60" ~ O[3] ~ "en 2019")) +
theme_minimal() +
theme(legend.position="bottom", legend.title = element_blank(),
legend.margin=margin(l = -2, unit='line'),
legend.text = element_text(size = 11),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 11),
axis.text.x = element_text(size = 11),
panel.grid.major.x = element_blank()) +
geom_hline(yintercept = 0)
generating this figure.
I would like to also add in this figure the variable mean3y besides days for each x value using another geom_col, such as
p <- ggplot(data, aes(x = reorder(station, -days),
y = days, fill = environ)) +
geom_col(width = 0.5, colour = "black", size = 0.5) +
guides(fill = guide_legend(ncol = 2)) +
geom_text(aes(label = days),
vjust=-0.3, color="black", size = 3.5) +
geom_col(aes(x = reorder(station, -days),
y = mean3y, fill = environ),
inherit.aes = FALSE,
width = 0.5, colour = "black", size = 0.5) +
geom_hline(aes(yintercept = 25),
linetype = 'dashed', colour = 'red', size = 1) +
labs(x = '', y = bquote("Nombre de jours de dépassement de NET60" ~ O[3] ~ "en 2019")) +
theme_minimal() +
theme(legend.position="bottom",
legend.title = element_blank(),
legend.margin=margin(l = -2, unit='line'),
legend.text = element_text(size = 11),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 11),
axis.text.x = element_text(size = 11),
panel.grid.major.x = element_blank()) +
geom_hline(yintercept = 0)
However, I was not able to achieve the desired result, despite the use of position = "dodge", as illustrated by this figure where both variables are overlapping.
Is there a way to achieve this, please ?
Many thanks.
Position dodges only work in a single layer and not between multiple layers. You could either solve the problem by manually nudging them or by formatting the data in such a way that it can be dodged. Examples of both in code below.
Your data was hard to copy into my R session and your code was more elaborate than necessary to demonstrate the problem, so I've kept both to a minimum.
library(ggplot2)
df <- data.frame(
x = c("A", "B"),
y = c(10, 15),
z = c(12, 9)
)
# Example of nudging
# Choose width and nudge values manually to fit your data
ggplot(df, aes(x, y)) +
geom_col(aes(fill = "first col"),
width = 0.45,
position = position_nudge(x = -0.225)) +
geom_col(aes(y = z, fill = "second_col"),
width = 0.45,
position = position_nudge(x = 0.225))
library(dplyr)
#> Warning: package 'dplyr' was built under R version 3.6.3
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
# Example of dodging + data formatting
ggplot(mapping = aes(x, y)) +
geom_col(data = rbind(mutate(df, a = "first_col"),
mutate(df, y = z, a = "second_col")),
aes(fill = a),
position = "dodge")
Created on 2020-04-16 by the reprex package (v0.3.0)
Consider this possible solution for your dataset - although you may want to play around with the aesthetics. I attempted to keep the aesthetics as similar as possible and set the bars to be the same color (based on df$environ), but make the difference between "days" and "mean3y" clear with text labels.
Data Preparation
First, we need to take the information from two columns and combine them: "days" and "mean3y". In your original data frame, these two columns can (and should) be combined to show type of value and the value itself. What we want to do is convert this type of data:
day.type.1 day.type.2
1 4 1
2 5 3
3 6 4
4 7 5
To this type of data:
day.type day.value
1 day.type.1 4
2 day.type.1 5
3 day.type.1 6
4 day.type.1 7
5 day.type.2 1
6 day.type.2 3
7 day.type.2 4
8 day.type.2 5
In the above example, you can use the gather() function from dplyr:
t %>% gather('day.type', 'day.value')
If we apply that to your data frame, we have to specify to do that to the data frame, but ignore the other columns:
df1 <- df %>% gather('variable', 'value', -date, -station, -environ)
This converts your "days" and "mean3y" columns into two new columns called "variable" (which is either "days" or "mean3y") and "value" (which is the actual number)
I also had to convert the new column "value" into numeric... but that could have been due to how I had to import your data, which was... difficult. Please note, it is recommended that you include your dataset in future questions via the output of dput(your.data.frame)... believe me it makes all the difference. ;)
Plotting the new Dataset
Here the idea is to keep your same x axis, but we are now setting "value" as the y aesthetic. In addition, you want to make sure to include a group= aesthetic of "variable" so that dodging works appropriately for text and columns. If you are not familiar, "dodging" is the term for when a geom is kind of "split" across an axis aesthetic: like "subsetting" of discrete axis values.
The geom_col call is set for position='dodge'... not much else changes there. You need this because the default position is set to "stacked" (which is why your attempt resulted in columns "stacked" on top of one another.
The geom_text call has a few things going on:
The dodge is set here with position=position_dodge(), which allows you to specify how far apart the "dodge" will be. It allowed me to "push apart" the labels to be a bit wider so that the text looks okay and doesn't run into the adjacent column. A larger width= argument in position_dodge() results in "pushing" the labels further apart. A value of 0 would be putting the labels in the center of the x axis aesthetic... 0.5 is default.
The label aesthetic is actually using both "variable" and "value" columns as a way to differentiate your columns from one another. I used paste0 and stuck a '\n' in-between so that you had two lines and could fit them. Had to adjust the size a bit too.
By default, the labels would be positioned right at y (value), which would mean they would overlap with your columns. You need to "nudge" them up, but cannot use nudge_y to push them up because you cannot combine nudge_y with position. What to do? Well, we can just overwrite the default y aesthetic by setting it equal to y + "a number" to nudge them up. Much better to do it this way.
Here's the final code:
ggplot(df1, aes(x = reorder(station, -value),
y = value, fill = environ,
group=variable)) +
geom_col(width = 0.5, colour = "black", size = 0.5, position='dodge') +
guides(fill = guide_legend(ncol = 2)) +
geom_text(aes(label = paste0(variable,'\n', value), y=value+1.5),
color="black", size = 3,
position=position_dodge(0.7)) +
geom_hline(aes(yintercept = 25),
linetype = 'dashed', colour = 'red', size = 1) +
labs(x = '', y = bquote("Nombre de jours de dépassement de NET60" ~ O[3] ~ "en 2019")) +
theme_minimal() +
theme(legend.position="bottom", legend.title = element_blank(),
legend.margin=margin(l = -2, unit='line'),
legend.text = element_text(size = 11),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 11),
axis.text.x = element_text(size = 11),
panel.grid.major.x = element_blank()) +
geom_hline(yintercept = 0)
One way to achieve this is to convert the data to long format via e.g. tidyr::pivot_longer, so that the variables we want to plot are categories of one variable. To get the order of the stations right I reorder station according to days before converting to long. To get the bars side-by-side I use position_dodge2 both in geom_col and geom_text. To show which bar corresponds to which var I put the names of the vars in the labels above the bars.
library(ggplot2)
library(dplyr)
library(tidyr)
data1 <- data %>%
mutate(station = forcats::fct_reorder(station,-days)) %>%
pivot_longer(c(days, mean3y), names_to = "var", values_to = "value")
my_labels <- function(x) {
gsub("(days.|mean3y.)", "", x)
}
p <- ggplot(data1, aes(x = station, y = value, fill = environ)) +
geom_col(position = position_dodge2(preserve = "single"), colour = "black") +
guides(fill = guide_legend(ncol = 2)) +
geom_text(aes(label = paste(var, "\n", value)), position = position_dodge2(width = .9, preserve = "single"), vjust=-0.3, color="black", size = 3.5) +
scale_x_discrete(labels = my_labels) +
geom_hline(aes(yintercept = 25), linetype = 'dashed', colour = 'red', size = 1) +
labs(x = '', y = bquote("Nombre de jours de dépassement de NET60" ~ O[3] ~ "en 2019")) +
theme_minimal() + theme(legend.position="bottom", legend.title = element_blank(), legend.margin=margin(l = -2, unit='line'),
legend.text = element_text(size = 11),
axis.text.y = element_text(size = 12), axis.title.y = element_text(size = 11),
axis.text.x = element_text(size = 11),
panel.grid.major.x = element_blank()) + geom_hline(yintercept = 0)

ggplot: stacked barplot in reverse order

So I have data frame
dput(df)
structure(list(Frequency = structure(c(1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L), .Label = c("2", "3", "4", "5"), class = "factor"), Prcentage = c(1,
33, 58, 8, 2, 40, 53, 5), label = list("Insufficient", "Average",
"Good", "Excellent", "Insufficient", "Average", "Good", "Excellent"),
name = c("implementation", "implementation", "implementation",
"implementation", "energy", "energy", "energy", "energy")), .Names = c("Frequency",
"Prcentage", "label", "name"), row.names = c(NA, 8L), class = "data.frame")
And with following code
# Get the levels for type in the required order
df$label = factor(df$label, levels = c("Unacceptable","Insufficient", "Average","Good","Excellent"))
df = arrange(df, name, desc(label))
# Format the labels and calculate their positions
df = ddply(df, .(name), transform, pos = (cumsum(Prcentage) - 0.5 * Prcentage))
df$label1 = paste0(sprintf("%.0f", df$Prcentage), "%")
# Plot
ggplot(df, aes(x = factor(name), y = Prcentage, fill = label, order=desc(label))) +
geom_bar(stat = "identity", width = 0.5) +
geom_text(aes(y = pos, label = label1), size = 4) + theme_classic() +
scale_y_continuous(position = "top",expand = c(0, 0),breaks = seq(min(0), max(0,102), by = 10),limits = c(0,102),labels = dollar_format(suffix = "%", prefix = "")) +
coord_flip() +
xlab("") + ylab("") +
theme(legend.position="bottom",legend.title = element_blank()) +
scale_fill_manual(values = c("#ff0000","#fff68f","#b2b2b2","#1baf05","#006080"),drop = FALSE)
I produce the following plot
But now I am struggling to get the bars in reverse order. Sm my output should be reverse stacked with the right values in bars (e.g. 1% yellow should be positioned first in the left side of the plot, then follows 33%, then 56% and far right 8%). I've already tried to do this with adding
+ geom_col(position = position_stack(reverse = TRUE)) (after geom_bar)
Which produced this
But this in not correct as values in bars are not correct.
I've also looked here
How to control ordering of stacked bar chart using identity on ggplot2
Reverse fill order for histogram bars in ggplot2
Order Stacked Bar Graph in ggplot
Reverse fill order for histogram bars in ggplot2
The position of the labels is directly set by the pos value, you need to reverse that f you reverse the stack order:
ggplot(df, aes(x = factor(name))) +
geom_col(aes(y = Prcentage, fill = label),
position = position_stack(reverse = TRUE),
width = .5) +
# Set the position to its complementary
geom_text(aes(y = 100 - pos, label = label1)) +
# Rest of theme
coord_flip() +
scale_y_continuous(position = "top",
expand = c(0, 0),
breaks = seq(min(0), max(0,102), by = 10),
limits = c(0,102),
labels = dollar_format(suffix = "%", prefix = "")) +
scale_fill_manual(values = c("#ff0000","#fff68f","#b2b2b2","#1baf05","#006080"), drop = FALSE) +
xlab("") + ylab("") +
theme_classic() +
theme(legend.position="bottom",legend.title = element_blank())

How to avoid overlapping plots in ggplot2

I want to plot estimates for three age groups (agecat) by two exposures (expo). The code below produced overlapped plots with alphabetically rearranged age groups. How could I avoid overlap of the plots and plot maintain the existing order of the age groups?
I used this code:
ggplot(mydf, aes(x = agecat, y = est,ymin = lcl, ymax = ucl, group=agecat,color=agecat,shape=agecat)) +
geom_point(position="dodge",size = 4) +
geom_linerange(position="dodge",size =0.7) +
geom_hline(aes(yintercept = 0)) +
labs(colour="Age Group", shape="Age Group") + theme(axis.title=element_text(face="bold",size="12"),axis.text=element_text(size=12,face="bold"))
Sample data:
> dput(mydf)
structure(list(expo = c(0, 1, 0, 1, 0, 1), est = c(0.290780632898979,
0.208093573361601, 0.140524761247529, 0.156713614649751, 0.444402395010579,
0.711469870845916), lcl = c(0.0679784035303221, -0.00413163014975071,
-0.208866152400888, -0.175393089838871, -0.227660022186016, 0.0755871550441212
), ucl = c(0.514078933380535, 0.420769190852455, 0.491138970050864,
0.489925205664665, 1.12099179726843, 1.35139300089608), agecat = c("young",
"young", "middle", "middle", "old", "old")), .Names = c("expo",
"est", "lcl", "ucl", "agecat"), row.names = c(2L, 4L, 6L, 8L,
10L, 12L), class = "data.frame")
I would do this by using expo as a variable in the plot. This would let ggplot know that you have overlap and so you need dodging at each level of your x variable. Once you do this, you can use position = position_dodge() directly in the two geoms and set the width argument to whatever you'd like. See the help page for position_dodge for examples of when you need to set width explicitly.
Here I'll replace group = agecat with group = expo. Using group instead of an aesthetic like shape means that there is no indication which point represents which expo level on the graphic.
mydf$agecat = factor(mydf$agecat, levels = c("young", "middle", "old"))
ggplot(mydf, aes(x = agecat, y = est, ymin = lcl, ymax = ucl, group = expo, color = agecat, shape = agecat)) +
geom_point(position = position_dodge(width = .5), size = 4) +
geom_linerange(position = position_dodge(width = .5), size = 0.7) +
geom_hline(aes(yintercept = 0)) +
labs(colour="Age Group", shape="Age Group") +
theme(axis.title = element_text(face="bold", size="12"),
axis.text = element_text(size=12, face="bold"))
You can convert the column agecat to factor with the levels in the desired order. Then, as Heroka pointed out in the comments, we can achieve a similar effect using facet_wrap:
mydf$agecat <- factor(mydf$agecat, levels=c("young", "middle", "old"))
ggplot(mydf, aes(x = agecat, y = est, ymin = lcl, ymax = ucl, group=agecat,color=agecat, shape=agecat)) +
geom_linerange(size =0.7) +
geom_hline(aes(yintercept = 0)) + labs(colour="Age Group", shape="Age Group") +
facet_wrap(agecat~est, scales="free_x", ncol=6) + geom_point(size = 4)+ theme(axis.title=element_text(face="bold",size="12"),axis.text=element_text(size=12,face="bold"),strip.text.x = element_blank())

Stacked Bar Graph Labels with ggplot2

I am trying to graph the following data:
to_graph <- structure(list(Teacher = c("BS", "BS", "FA"
), Level = structure(c(2L, 1L, 1L), .Label = c("BE", "AE", "ME",
"EE"), class = "factor"), Count = c(2L, 25L, 28L)), .Names = c("Teacher",
"Level", "Count"), row.names = c(NA, 3L), class = "data.frame")
and want to add labels in the middle of each piece of the bars that are the percentage for that piece. Based on this post, I came up with:
ggplot(data=to_graph, aes(x=Teacher, y=Count, fill=Level), ordered=TRUE) +
geom_bar(aes(fill = Level), position = 'fill') +
opts(axis.text.x=theme_text(angle=45)) +
scale_y_continuous("",formatter="percent") +
opts(title = "Score Distribution") +
scale_fill_manual(values = c("#FF0000", "#FFFF00","#00CC00", "#0000FF")) +
geom_text(aes(label = Count), size = 3, hjust = 0.5, vjust = 3, position = "stack")
But it
Doesn't have any effect on the graph
Probably doesn't display the percentage if it did (although I'm not entirely sure of this point)
Any help is greatly appreciated. Thanks!
The y-coordinate of the text is the actual count (2, 25 or 28), whereas the y-coordinates in the plot panel range from 0 to 1, so the text is being printed off the top.
Calculate the fraction of counts using ddply (or tapply or whatever).
graph_avgs <- ddply(
to_graph,
.(Teacher),
summarise,
Count.Fraction = Count / sum(Count)
)
to_graph <- cbind(to_graph, graph_avgs$Count.Fraction)
A simplified version of your plot. I haven't bothered to play about with factor orders so the numbers match up to the bars yet.
ggplot(to_graph, aes(Teacher), ordered = TRUE) +
geom_bar(aes(y = Count, fill = Level), position = 'fill') +
scale_fill_manual(values = c("#FF0000", "#FFFF00","#00CC00", "#0000FF")) +
geom_text(
aes(y = graph_avgs$Count.Fraction, label = graph_avgs$Count.Fraction),
size = 3
)

Resources