Align error bars with bar chart factor levels - r

I am trying to plot a bar chart with multiple error bars that align with the relevant values for each factor level on the x axis.
I have followed instructions as per - Grouped barplot in R with error bars - but I cannot figure out why my script isn't cooperating.
Here is the result I get, where obviously I want the errorbars to align with the relevant data.
Here is my data:
structure(list(key = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L), .Label = c("2030", "2050", "2100"
), class = "factor"), value = c(0.4, 0.4, 0.1, 0.1, 0.1, 3.4,
4.5, 6.8, 3.6, 3, 5.7, 12.4, 14.9, 9.5, 10.8), scenario = structure(c(4L,
2L, 1L, 6L, 5L, 4L, 2L, 1L, 6L, 5L, 4L, 2L, 1L, 6L, 5L), .Label = c("1.5°C-high-OS",
"1.5°C-limited-OS", "1.5°C-low-OS", "Below-1.5°C", "Higher-2°C",
"Lower-2°C"), class = "factor"), Lower.Quartile = c(0, 0, 0,
0, 0, 0, 3.4, 3.7, 1.8, 1.6, 0, 6.4, 12.1, 6.9, 8.2), Upper.Quartile = c(1.1,
1, 0.4, 0.3, 0.2, 8.3, 6.3, 9.5, 4.6, 4.9, 13.4, 15, 16.3, 12.1,
15.3)), row.names = c(1L, 3L, 4L, 5L, 6L, 7L, 9L, 10L, 11L, 12L,
13L, 15L, 16L, 17L, 18L), class = "data.frame")
Here is my code:
library(ggplot2)
library(tidyr)
ggplot(Global.BECCS.mean, aes(x = key)) + geom_bar(aes(fill = scenario, y = value), position = position_dodge(), stat='identity', col = "black") + theme_classic() +
xlab('Year') + ylab(expression(paste(GtC,' sequestered by BECCS per year'))) + geom_hline(yintercept = 5.5, linetype = 'dashed') + geom_hline(yintercept = 16, linetype = 'dotted') +
geom_errorbar(aes(ymin = Global.BECCS.mean$Lower.Quartile, ymax = Global.BECCS.mean$Upper.Quartile), width = 0.1, position = position_dodge())

A couple of things:
define the y and fill aesthetics in ggplot to allow all layers to inherit them as well
specify the width in position_dodge to get the bars in the wanted position
and finally there's no need, I think, to use Global.BECCS.mean$, the name of the variable should be sufficient.
.
ggplot(Global.BECCS.mean, aes(x = key, fill = scenario, y = value)) +
geom_bar(position = position_dodge(), stat='identity', col = "black") +
theme_classic() +
xlab('Year') +
ylab(expression(paste(GtC,' sequestered by BECCS per year'))) +
geom_hline(yintercept = 5.5, linetype = 'dashed') +
geom_hline(yintercept = 16, linetype = 'dotted') +
geom_errorbar(aes(ymin = Lower.Quartile,
ymax = Upper.Quartile),
width = 0.3,
position = position_dodge(0.9))

Related

How to plot columns above and below data points in ggplot2

Using ggplot2, I am plotting percentage values for 15 species across three sites (each species occurs in each site). The data points associated with site 'C' are my reference points.
Now, instead of plotting sites 'A' and 'B' as points, I would like to display them using vertical lines or column-like structures. As such, these data points should be extended as vertical lines to the top or bottom side of the site 'C' points (green colour), i.e. to the top where values are larger than the reference value and bottom for smaller values.
Specifically, I would hope for a red line from a red point to the green point and a blue line from the blue point to the green point. The red line should ideally have the same width as the red point (and same for blue). The line should also be offset as are the red and blue points (relative to the green point), so that lines do not overlap. Finally, the line should not go to the center but the edge of a point.
For this purpose I have offset points for 'A' and 'B' and also reduced their size to half of the reference point size.
library(ggplot2)
MyData$species <- as.character(MyData$species)
MyData$species <- factor(MyData$species, levels=unique(MyData$species))
pos <- position_dodge(width=0.21)
cols <- c("C" = "darkgreen", "B" = "blue", "A" = "red")
tiff(file = "MyData.tiff", height=10, width=10, units="in", res=300, compression="lzw")
ggplot(data = MyData, aes(x=species, y=value, group=site, colour=site)) +
geom_point(data=subset(MyData, site=="C"), size = 4, shape=15, alpha=1, position=pos) +
geom_line(data=subset(MyData, site=="C"), size = 2, lwd=2, alpha=0.4, show_guide=FALSE) +
geom_point(data=subset(MyData, site!="C"), size = 1.8, shape=15, alpha=1, position = pos) +
scale_colour_manual(values = cols) +
xlab("Species") +
ylab("Value (%)") +
scale_y_continuous(expand=c(0.01,0.01),
limits=c(0.0,100),
breaks=c(0,20,40,60,80,100),
labels=c("0","20","40","60","80","100")) +
theme_bw() +
theme(legend.position="none") +
theme(axis.title.x = element_text(vjust=0.1,face="bold", size=16),
axis.text.x = element_text(vjust=0.4, size=14, angle=90, hjust=1.0)) +
theme(axis.title.y = element_text(vjust=0.1,face="bold", size=16),
axis.text.y = element_text(face="bold", size=14, angle=0)) +
theme(panel.grid.minor=element_blank(), panel.grid.major=element_blank()) +
theme(panel.border = element_rect(size=1, color = "black")) +
theme(plot.margin = unit(c(0.3,0.4,0.3,0.3),"lines"))
dev.off()
This is my current plot. So basically, I would like to replace the red and blue points with lines that extend to the green points (without overlapping them).
Many thanks in advance for any advice on an elegant solution for this.
This is a dput() of my dataset.
structure(list(site = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L), .Label = c("A", "B", "C"), class = "factor"),
species = structure(c(13L, 11L, 2L, 14L, 1L, 9L, 12L, 10L,
6L, 8L, 15L, 7L, 3L, 4L, 5L, 13L, 11L, 2L, 14L, 1L, 9L, 12L,
10L, 6L, 8L, 15L, 7L, 3L, 4L, 5L, 13L, 11L, 2L, 14L, 1L,
9L, 12L, 10L, 6L, 8L, 15L, 7L, 3L, 4L, 5L), .Label = c("Species 1",
"Species 10", "Species 11", "Species 12", "Species 13", "Species 14",
"Species 15", "Species 2", "Species 3", "Species 4", "Species 5",
"Species 6", "Species 7", "Species 8", "Species 9"), class = "factor"),
value = c(2, 3.25, 3.53, 4.31, 4.59, 5.26, 6.02, 6.42, 6.6,
7.26, 8.89, 12.45, 35.62, 72.42, 73.55, 1.36, 2.36, 2.17,
10.34, 6.84, 1.88, 5.09, 7.35, 3.87, 10.55, 6.6, 14.64, 39.57,
88.06, 64.54, 5.03, 12.34, 5.42, 3.63, 5.16, 6.04, 3, 8.94,
3.28, 7.64, 6.25, 21.96, 39.35, 78.55, 47.35)), .Names = c("site",
"species", "value"), class = "data.frame", row.names = c(NA,
-45L))
You can try geom_linerange() for the lines from points A/B to point C.
Define the ymin/ymax values for each site/species, & reorder site such that A / B lines drop down to each side of point C:
library(dplyr)
MyData <- MyData %>%
group_by(species) %>%
mutate(value.C = value[site == "C"]) %>%
rowwise() %>%
mutate(value.min = min(value, value.C),
value.max = max(value, value.C)) %>%
ungroup() %>%
mutate(site = factor(site, levels = c("A", "C", "B")))
Plot:
# vary dodge width such that the lines drop to the edge of point C
# for your chosen dimensions (for mine 0.5 was about right)
pos <- position_dodge(width = 0.5)
ggplot(data = MyData,
aes(x = species, y = value,
ymin = value.min, ymax = value.max,
group = site, colour = site, size = site)) +
geom_linerange(size = 1.8, alpha = 0.4, position = pos) +
geom_line(data = subset(MyData, site == "C"),
size = 2, lwd = 2, alpha = 0.4) +
geom_point(data = subset(MyData, site == "C"),
size = 4, shape = 15, position = pos) +
scale_color_manual(values = cols) +
theme_classic() +
theme(legend.position = "none")
# + other theme-related settings...
You can add geom_line to draw the vertical lines
library(ggplot2)
MyData$species <- as.character(MyData$species)
MyData$species <- factor(MyData$species, levels=unique(MyData$species))
pos <- position_dodge(width=0.21)
cols <- c("C" = "darkgreen", "B" = "blue", "A" = "red")
windows()
ggplot(data = MyData, aes(x=species, y=value, group=site, colour=site)) +
geom_point(data=subset(MyData, site=="C"), size = 4, shape=15, alpha=1, position=pos) +
geom_line(data=subset(MyData, site=="C"), size = 2, lwd=2, alpha=0.4, show_guide=FALSE) +
geom_point(data=subset(MyData, site!="C"), size = 1.8, shape=15, alpha=1, position = pos) +
geom_line(aes(group = species)) + #New code Added
scale_colour_manual(values = cols) +
xlab("Species") +
ylab("Value (%)") +
scale_y_continuous(expand=c(0.01,0.01),
limits=c(0.0,100),
breaks=c(0,20,40,60,80,100),
labels=c("0","20","40","60","80","100")) +
theme_bw() +
theme(legend.position="none") +
theme(axis.title.x = element_text(vjust=0.1,face="bold", size=16),
axis.text.x = element_text(vjust=0.4, size=14, angle=90, hjust=1.0)) +
theme(axis.title.y = element_text(vjust=0.1,face="bold", size=16),
axis.text.y = element_text(face="bold", size=14, angle=0)) +
theme(panel.grid.minor=element_blank(), panel.grid.major=element_blank()) +
theme(panel.border = element_rect(size=1, color = "black")) +
theme(plot.margin = unit(c(0.3,0.4,0.3,0.3),"lines"))

Add geom_hline legend to existing geom bar legend

I want to add a legend under the existing legend that represents the dashed line, such that the dashed line could be labeled "avg tx effect" and be placed under study 3.
library(ggplot2)
library(ggthemes)
#dput(df)
df=structure(list(study = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("study1", "study2",
"study3"), class = "factor"), d = c(-0.205, 0.1075, 0.3525, -0.37,
0.3, 0.42, -0.28, 0.09, 0.59, 0.11, -0.05, 0.25, 0, 0.25, 0.49
), Outcome = c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L,
5L, 5L, 5L), Outcome2 = structure(c(1L, 1L, 1L, 4L, 4L, 4L, 7L,
7L, 7L, 10L, 10L, 10L, 13L, 13L, 13L), .Label = c("1", "1", "1",
"2", "2", "2", "3", "3", "3", "4", "4", "4", "5", "5", "5"), class = "factor")), .Names = c("study",
"d", "Outcome", "Outcome2"), row.names = c(NA, -15L), class = "data.frame")
ggplot(df, aes(x=Outcome2, y=d, fill=study)) +
geom_bar(position=position_dodge(), aes(x=Outcome2),stat="identity",
colour="black", # Use black outlines,
size=.3) + # Thinner lines
xlab("Outcome") +
ylab("Cohen's D Effect Size") +
scale_fill_grey(name="Study",
labels=c("study1","study2", "study3"))+
theme_bw()+
geom_hline(yintercept=.15,linetype=2)
A general feature of ggplot is that to generate a legend, you need to map your aesthetics (e.g. linetype) to a variable in the data, instead of setting it to constant. In the case of geom_hline, this may be achieved by putting the intercept in a separate data frame. Also note show_guide = TRUE.
Then customize the legend using scale_linetype_manual. The black lines in the fill legend are removed using override.aes.
Here's a stripped down version of your code to show only the most necessary steps:
df2 <- data.frame(yi = 0.15)
ggplot(data = df, aes(x = Outcome2, y = d, fill = study)) +
geom_bar(position = "dodge", stat = "identity") +
geom_hline(data = df2, aes(yintercept = yi, linetype = factor(yi)), show_guide = TRUE) +
scale_linetype_manual(name = "avg tx effect", values = "dashed", labels = "") +
guides(fill = guide_legend(override.aes = list(linetype = "blank")))
As #Gregor suggested, you could use a direct label for this line by adding annotate() as shown below:
ggplot(df, aes(x=Outcome2, y=d, fill=study)) +
geom_bar(position=position_dodge(), aes(x=Outcome2),stat="identity",
colour="black", # Use black outlines,
size=.3) + # Thinner lines
xlab("Outcome") +
ylab("Cohen's D Effect Size") +
scale_fill_grey(name="Study",
labels=c("study1","study2", "study3"))+
theme_bw()+
geom_hline(yintercept=.15,linetype=2) +annotate("text",x=.7,y=.17,size=3,label=c('avg tx ef'))
If space is an issue you can use the wrapper described here to wrap the text. Just run wrapper <- function(x, ...) paste(strwrap(x, ...), collapse = "\n") then add +annotate("text",x=.7,y=.18,size=3,label=wrapper('avg tx effect',10)). Which produces:

Controlling bar width with ggplot2 [duplicate]

This question is motivated by further exploring this question. The problem with the accepted solution becomes more obvious when there is a greater disparity in the number of bars per facet. Take a look at this data and the resultant plot using that solution:
# create slightly contrived data to better highlight width problems
data <- data.frame(ID=factor(c(rep(1,9), rep(2,6), rep(3,6), rep(4,3), rep(5,3))),
TYPE=factor(rep(1:3,length(ID)/3)),
TIME=factor(c(1,1,1,2,2,2,3,3,3,1,1,1,2,2,2,1,1,1,2,2,2,1,1,1,1,1,1)),
VAL=runif(27))
# implement previously suggested solution
base.width <- 0.9
data$w <- base.width
# facet two has 3 bars compared to facet one's 5 bars
data$w[data$TIME==2] <- base.width * 3/5
# facet 3 has 1 bar compared to facet one's 5 bars
data$w[data$TIME==3] <- base.width * 1/5
ggplot(data, aes(x=ID, y=VAL, fill=TYPE)) +
facet_wrap(~TIME, ncol=1, scale="free") +
geom_bar(position="stack", aes(width = w),stat = "identity") +
coord_flip()
You'll notice the widths look exactly right, but the whitespace in facet 3 is quite glaring. There is no easy way to fix this in ggplot2 that I have seen yet (facet_wrap does not have a space option).
Next step is to try to solve this using gridExtra:
# create each of the three plots, don't worry about legend for now
p1 <- ggplot(data[data$TIME==1,], aes(x=ID, y=VAL, fill=TYPE)) +
facet_wrap(~ TIME, ncol=1) +
geom_bar(position="stack", show_guide=FALSE) +
coord_flip()
p2 <- ggplot(data[data$TIME==2,], aes(x=ID, y=VAL, fill=TYPE)) +
facet_wrap(~ TIME, ncol=1) +
geom_bar(position="stack", show_guide=FALSE) +
coord_flip()
p3 <- ggplot(data[data$TIME==3,], aes(x=ID, y=VAL, fill=TYPE)) +
facet_wrap(~ TIME, ncol=1) +
geom_bar(position="stack", show_guide=FALSE) +
coord_flip()
# use similar arithmetic to try and get layout correct
require(gridExtra)
heights <- c(5, 3, 1) / sum(5, 3, 1)
print(arrangeGrob(p1 ,p2, p3, ncol=1,
heights=heights))
You'll notice I used the same arithmetic previously suggested based off the number of bars per facet, but in this case it ends up horribly wrong. This seems to be because there are extra "constant height" elements that I need to take into consideration in the math.
Another complication (I believe) is that the final output (and whether or not the widths match) will also depend on the width and height of where I'm outputting the final grob to, whether its in a R/RStudio environment, or to a PNG file.
How can I accomplish this?
Something like this appear to work, but it doesn't - not completely. It has the appearance of working because the levels of the ID factor are sequential. Anything else, and scale = "free" fails. But it might be possible to develop further. The method uses facet_grid, and thus space = "free" can be used. The method uses geom_rect to layer differently coloured rectangles on top of each other. It needs cumulative sums to be calculated so that the right-hand edge of each rectangle can be positioned.
data <- data.frame(ID=factor(c(rep(1,9), rep(2,6), rep(3,6), rep(4,3), rep(5,3))),
TYPE=factor(rep(1:3,3)),
TIME=factor(c(1,1,1,2,2,2,3,3,3,1,1,1,2,2,2,1,1,1,2,2,2,1,1,1,1,1,1)),
VAL=runif(27))
library(ggplot2)
library(plyr)
# Get the cumulative sums
data = ddply(data, .(ID, TIME), mutate, CUMSUMVAL = cumsum(VAL))
ggplot(data, aes(x=VAL, y = as.numeric(ID), fill=TYPE)) +
geom_rect(data = subset(data, TYPE == 3), aes(xmin = 0, xmax = CUMSUMVAL, ymin = as.numeric(ID)-.2, ymax = as.numeric(ID)+.2)) +
geom_rect(data = subset(data, TYPE == 2), aes(xmin = 0, xmax = CUMSUMVAL, ymin = as.numeric(ID)-.2, ymax = as.numeric(ID)+.2)) +
geom_rect(data = subset(data, TYPE == 1), aes(xmin = 0, xmax = CUMSUMVAL, ymin = as.numeric(ID)-.2, ymax = as.numeric(ID)+.2)) +
facet_grid(TIME~., space = "free", scale="free") +
scale_y_continuous(breaks = c(1:5), expand = c(0, 0.2))
EDIT: OR really thick lines work a little better (I think)
ggplot(data, aes(x=VAL, y = ID, colour=TYPE)) +
geom_segment(data = subset(data, TYPE == 3), aes(x = 0, xend = CUMSUMVAL, y = ID, yend = ID), size = 10) +
geom_segment(data = subset(data, TYPE == 2), aes(x = 0, xend = CUMSUMVAL, y = ID, yend = ID), size = 10) +
geom_segment(data = subset(data, TYPE == 1), aes(x = 0, xend = CUMSUMVAL, y = ID, yend = ID), size = 10) +
facet_grid(TIME~., space = "free", scale="free")
Additional Edit Taking the data from your earleir post, and modifying it a little.
Updated opts is deprecated; using theme instead.
df <- structure(list(ID = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L), .Label = c("a",
"b", "c", "d", "e", "f", "g"), class = "factor"), TYPE = structure(c(1L,
2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L,
1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L,
5L, 6L, 1L, 2L, 3L), .Label = c("1", "2", "3", "4", "5", "6",
"7", "8"), class = "factor"), TIME = structure(c(2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L,
1L, 1L, 1L), .Label = c("One", "Five", "Fifteen"), class = "factor"), VAL = c(0.937377670081332,
0.522220720537007, 0.278690102742985, 0.967633064137772, 0.116124767344445,
0.0544306698720902, 0.470229141646996, 0.62017166428268, 0.195459847105667,
0.732876230962574, 0.996336271753535, 0.983087373664603, 0.666449476964772,
0.291554537601769, 0.167933790013194, 0.860138458199799, 0.172361251665279,
0.833266809117049, 0.620465772924945, 0.786503327777609, 0.761877260869369,
0.425386636285111, 0.612077651312575, 0.178726130630821, 0.528709076810628,
0.492527724476531, 0.472576208412647, 0.0702785139437765, 0.696220921119675,
0.230852259788662, 0.359884874196723, 0.518227979075164, 0.259466265095398,
0.149970305617899, 0.00682218233123422, 0.463400925742462, 0.924704828299582,
0.229068386601284)), .Names = c("ID", "TYPE", "TIME", "VAL"), row.names = c(NA,
-38L), class = "data.frame")
library(ggplot2)
library(plyr)
data = ddply(df, .(ID, TIME), mutate, CUMSUMVAL = cumsum(VAL))
ggplot(data, aes(x=VAL, y = ID, colour=TYPE)) +
geom_segment(data = subset(data, TYPE == 6), aes(x = 0, xend = CUMSUMVAL, y = ID, yend = ID), size = 10) +
geom_segment(data = subset(data, TYPE == 5), aes(x = 0, xend = CUMSUMVAL, y = ID, yend = ID), size = 10) +
geom_segment(data = subset(data, TYPE == 4), aes(x = 0, xend = CUMSUMVAL, y = ID, yend = ID), size = 10) +
geom_segment(data = subset(data, TYPE == 3), aes(x = 0, xend = CUMSUMVAL, y = ID, yend = ID), size = 10) +
geom_segment(data = subset(data, TYPE == 2), aes(x = 0, xend = CUMSUMVAL, y = ID, yend = ID), size = 10) +
geom_segment(data = subset(data, TYPE == 1), aes(x = 0, xend = CUMSUMVAL, y = ID, yend = ID), size = 10) +
facet_grid(TIME~., space = "free", scale="free") +
theme(strip.text.y = element_text(angle = 0))
Changing the gtable doesn't help, unfortunately, as the bar width is in relative units,
g = ggplot_gtable(ggplot_build(p))
panels = which(sapply(g$heights, attr, "unit") == "null")
g$heights[[panels[1]]] <- unit(5, "null")
g$heights[[panels[2]]] <- unit(3, "null")
g$heights[[panels[3]]] <- unit(1, "null")
grid.draw(g)

How do I change the colour of an outline in a ggplot bubble plot?

I am putting together a bubble chart in ggplot2, and want to change the outline of my bubbles (to comply with work's formatting guidelines). My problem is that I'm using colors to group variables, and I'm using a custom palette (again, for work). Ideally, I'd like to put black borders on the bubbles.
Here's my data-frame:
mmur <- structure(list(Medgrowth = c(-1.02232983588915, 3.01155115511551,-0.220617729642996, 1.96506550218342, 0.943970767356888, 0.810810810810807,0.0166694449074782, 0.21064457239153, 0.0876731544801004, 0.132216835610393,0.370644922164558,0.23378141437756, 1.27810650887574, 0.42301184433164,0.394880174291941, 0.54216172568924, 1.32690882134916, 0.499722376457527,-0.108885017421599), Medunemp = c(4.430550475, 2.5060469975,4.1239796475, 2.0585977455, 3.846659243, 3.1792594425, 4.0033450105,6.0882984255, 3.091889808,3.7462810695, 2.4038147815, 3.0065393475,2.3331894185, 4.9482480125, 2.0955470885, 1.616694725, 1.873037069,3.060170157, 3.0131425595), Empsize = c(324.2,270.6, 962.1,149, 962.4, 421.1, 1197.8, 777.8, 552.8, 234.8, 421.1, 203.2,915.7, 396.1, 685.9, 904.5, 1366.9, 215.4, 440.5), Eduratio = c(0.1,0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.1, 0.1, 0.3, 0.3, 0.2, 0.5, 0.2,0.3, 0.6, 0.4, 0.2, 0.1), Names = structure(c(3L, 12L, 11L, 7L,5L, 19L, 17L, 1L, 18L, 10L, 8L, 16L, 14L, 2L, 15L, 6L, 9L, 4L,13L), .Label = c("Accom", "Admin","Agric", "Arts.", "Const","Educa", "Elect", "Finan", "Healt","Infor","Manuf","Minin","Other", "Profe", "Publi", "Renta", "Retai", "Trans", "Whole"), class = "factor"), colour1 = structure(c(6L, 5L, 6L, 5L, 6L,5L, 6L, 6L, 6L, 4L, 4L, 5L, 2L, 5L, 4L, 1L, 3L, 5L, 6L), .Label = c("#8C2D04","#CC4C02", "#EC7014", "#FE9929","#FEC44F", "#FEE391"), class = "factor")), .Names = c("Medgrowth","Medunemp", "Empsize", "Eduratio", "Names", "colour1"), row.names = c("Agric","Minin", "Manuf", "Elect", "Const", "Whole", "Retai", "Accom","Trans", "Infor", "Finan", "Renta", "Profe", "Admin", "Publi","Educa", "Healt", "Arts.", "Other"), class = "data.frame")
And here's my plot code:
bbubc1 <- ggplot(mmur, aes(x = Medgrowth, y = Medunemp, size = Empsize, label = Names, colour = colour1)) +
geom_point() +
scale_size(range = c(5, sqrt(max(mmur$Empsize)/min(mmur$Empsize)*5^2)), name = "Employment in\n2012 (thousands)") +
geom_text(size = 4, colour = "black", vjust = -1) +
scale_colour_manual(values = levels(mmur$colour1), name = "Per cent with\ntertiary degree", label = c(60, 50, 40, 30, 20, 10)) +
xlab("Median employment growth rate 2001 - 2012") +
ylab("Median unemployment rate 2001 - 2012") +
opts(axis.text.x=theme_text(angle=0, hjust=0, size = 16)) +
opts(axis.text.y=theme_text(angle=0, hjust=0, size = 16)) +
opts(axis.title.x=theme_text(size = 16)) +
opts(legend.title = theme_text(size = 16)) +
opts(axis.title.y=theme_text(size = 16, angle = 90)) +
geom_vline(colour = I("grey")) +
geom_hline(colour = I("grey")) +
ylim(c(0,7))
The plot is here:
Some of R's plotting characters allow both an internal fill and an edge colour, so using one of these shapes to draw your points, and replacing your colour aesthetics with fill, should do the job:
ggplot(mmur, aes(x = Medgrowth, y = Medunemp, size = Empsize, label = Names, fill = colour1)) +
geom_point(shape=21, colour='black') +
scale_size(range = c(5, sqrt(max(mmur$Empsize)/min(mmur$Empsize)*5^2)), name = "Employment in\n2012 (thousands)") +
geom_text(size = 4, colour = "black", vjust = -1) +
scale_fill_manual(values = levels(mmur$colour1), name = "Per cent with\ntertiary degree", label = c(60, 50, 40, 30, 20, 10)) +
xlab("Median employment growth rate 2001 - 2012") +
ylab("Median unemployment rate 2001 - 2012") +
opts(axis.text.x=theme_text(angle=0, hjust=0, size = 16)) +
opts(axis.text.y=theme_text(angle=0, hjust=0, size = 16)) +
opts(axis.title.x=theme_text(size = 16)) +
opts(legend.title = theme_text(size = 16)) +
opts(axis.title.y=theme_text(size = 16, angle = 90)) +
geom_vline(colour = I("grey")) +
geom_hline(colour = I("grey")) +
ylim(c(0,7))
Run example(points) and go to the third plot to see which shapes can be filled like this, i.e. shapes 21:25.

ggplot2 + gridExtra: how to ensure geom_bar in different size plot grobs result in exact same bar width

This question is motivated by further exploring this question. The problem with the accepted solution becomes more obvious when there is a greater disparity in the number of bars per facet. Take a look at this data and the resultant plot using that solution:
# create slightly contrived data to better highlight width problems
data <- data.frame(ID=factor(c(rep(1,9), rep(2,6), rep(3,6), rep(4,3), rep(5,3))),
TYPE=factor(rep(1:3,length(ID)/3)),
TIME=factor(c(1,1,1,2,2,2,3,3,3,1,1,1,2,2,2,1,1,1,2,2,2,1,1,1,1,1,1)),
VAL=runif(27))
# implement previously suggested solution
base.width <- 0.9
data$w <- base.width
# facet two has 3 bars compared to facet one's 5 bars
data$w[data$TIME==2] <- base.width * 3/5
# facet 3 has 1 bar compared to facet one's 5 bars
data$w[data$TIME==3] <- base.width * 1/5
ggplot(data, aes(x=ID, y=VAL, fill=TYPE)) +
facet_wrap(~TIME, ncol=1, scale="free") +
geom_bar(position="stack", aes(width = w),stat = "identity") +
coord_flip()
You'll notice the widths look exactly right, but the whitespace in facet 3 is quite glaring. There is no easy way to fix this in ggplot2 that I have seen yet (facet_wrap does not have a space option).
Next step is to try to solve this using gridExtra:
# create each of the three plots, don't worry about legend for now
p1 <- ggplot(data[data$TIME==1,], aes(x=ID, y=VAL, fill=TYPE)) +
facet_wrap(~ TIME, ncol=1) +
geom_bar(position="stack", show_guide=FALSE) +
coord_flip()
p2 <- ggplot(data[data$TIME==2,], aes(x=ID, y=VAL, fill=TYPE)) +
facet_wrap(~ TIME, ncol=1) +
geom_bar(position="stack", show_guide=FALSE) +
coord_flip()
p3 <- ggplot(data[data$TIME==3,], aes(x=ID, y=VAL, fill=TYPE)) +
facet_wrap(~ TIME, ncol=1) +
geom_bar(position="stack", show_guide=FALSE) +
coord_flip()
# use similar arithmetic to try and get layout correct
require(gridExtra)
heights <- c(5, 3, 1) / sum(5, 3, 1)
print(arrangeGrob(p1 ,p2, p3, ncol=1,
heights=heights))
You'll notice I used the same arithmetic previously suggested based off the number of bars per facet, but in this case it ends up horribly wrong. This seems to be because there are extra "constant height" elements that I need to take into consideration in the math.
Another complication (I believe) is that the final output (and whether or not the widths match) will also depend on the width and height of where I'm outputting the final grob to, whether its in a R/RStudio environment, or to a PNG file.
How can I accomplish this?
Something like this appear to work, but it doesn't - not completely. It has the appearance of working because the levels of the ID factor are sequential. Anything else, and scale = "free" fails. But it might be possible to develop further. The method uses facet_grid, and thus space = "free" can be used. The method uses geom_rect to layer differently coloured rectangles on top of each other. It needs cumulative sums to be calculated so that the right-hand edge of each rectangle can be positioned.
data <- data.frame(ID=factor(c(rep(1,9), rep(2,6), rep(3,6), rep(4,3), rep(5,3))),
TYPE=factor(rep(1:3,3)),
TIME=factor(c(1,1,1,2,2,2,3,3,3,1,1,1,2,2,2,1,1,1,2,2,2,1,1,1,1,1,1)),
VAL=runif(27))
library(ggplot2)
library(plyr)
# Get the cumulative sums
data = ddply(data, .(ID, TIME), mutate, CUMSUMVAL = cumsum(VAL))
ggplot(data, aes(x=VAL, y = as.numeric(ID), fill=TYPE)) +
geom_rect(data = subset(data, TYPE == 3), aes(xmin = 0, xmax = CUMSUMVAL, ymin = as.numeric(ID)-.2, ymax = as.numeric(ID)+.2)) +
geom_rect(data = subset(data, TYPE == 2), aes(xmin = 0, xmax = CUMSUMVAL, ymin = as.numeric(ID)-.2, ymax = as.numeric(ID)+.2)) +
geom_rect(data = subset(data, TYPE == 1), aes(xmin = 0, xmax = CUMSUMVAL, ymin = as.numeric(ID)-.2, ymax = as.numeric(ID)+.2)) +
facet_grid(TIME~., space = "free", scale="free") +
scale_y_continuous(breaks = c(1:5), expand = c(0, 0.2))
EDIT: OR really thick lines work a little better (I think)
ggplot(data, aes(x=VAL, y = ID, colour=TYPE)) +
geom_segment(data = subset(data, TYPE == 3), aes(x = 0, xend = CUMSUMVAL, y = ID, yend = ID), size = 10) +
geom_segment(data = subset(data, TYPE == 2), aes(x = 0, xend = CUMSUMVAL, y = ID, yend = ID), size = 10) +
geom_segment(data = subset(data, TYPE == 1), aes(x = 0, xend = CUMSUMVAL, y = ID, yend = ID), size = 10) +
facet_grid(TIME~., space = "free", scale="free")
Additional Edit Taking the data from your earleir post, and modifying it a little.
Updated opts is deprecated; using theme instead.
df <- structure(list(ID = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L), .Label = c("a",
"b", "c", "d", "e", "f", "g"), class = "factor"), TYPE = structure(c(1L,
2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L,
1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L,
5L, 6L, 1L, 2L, 3L), .Label = c("1", "2", "3", "4", "5", "6",
"7", "8"), class = "factor"), TIME = structure(c(2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L,
1L, 1L, 1L), .Label = c("One", "Five", "Fifteen"), class = "factor"), VAL = c(0.937377670081332,
0.522220720537007, 0.278690102742985, 0.967633064137772, 0.116124767344445,
0.0544306698720902, 0.470229141646996, 0.62017166428268, 0.195459847105667,
0.732876230962574, 0.996336271753535, 0.983087373664603, 0.666449476964772,
0.291554537601769, 0.167933790013194, 0.860138458199799, 0.172361251665279,
0.833266809117049, 0.620465772924945, 0.786503327777609, 0.761877260869369,
0.425386636285111, 0.612077651312575, 0.178726130630821, 0.528709076810628,
0.492527724476531, 0.472576208412647, 0.0702785139437765, 0.696220921119675,
0.230852259788662, 0.359884874196723, 0.518227979075164, 0.259466265095398,
0.149970305617899, 0.00682218233123422, 0.463400925742462, 0.924704828299582,
0.229068386601284)), .Names = c("ID", "TYPE", "TIME", "VAL"), row.names = c(NA,
-38L), class = "data.frame")
library(ggplot2)
library(plyr)
data = ddply(df, .(ID, TIME), mutate, CUMSUMVAL = cumsum(VAL))
ggplot(data, aes(x=VAL, y = ID, colour=TYPE)) +
geom_segment(data = subset(data, TYPE == 6), aes(x = 0, xend = CUMSUMVAL, y = ID, yend = ID), size = 10) +
geom_segment(data = subset(data, TYPE == 5), aes(x = 0, xend = CUMSUMVAL, y = ID, yend = ID), size = 10) +
geom_segment(data = subset(data, TYPE == 4), aes(x = 0, xend = CUMSUMVAL, y = ID, yend = ID), size = 10) +
geom_segment(data = subset(data, TYPE == 3), aes(x = 0, xend = CUMSUMVAL, y = ID, yend = ID), size = 10) +
geom_segment(data = subset(data, TYPE == 2), aes(x = 0, xend = CUMSUMVAL, y = ID, yend = ID), size = 10) +
geom_segment(data = subset(data, TYPE == 1), aes(x = 0, xend = CUMSUMVAL, y = ID, yend = ID), size = 10) +
facet_grid(TIME~., space = "free", scale="free") +
theme(strip.text.y = element_text(angle = 0))
Changing the gtable doesn't help, unfortunately, as the bar width is in relative units,
g = ggplot_gtable(ggplot_build(p))
panels = which(sapply(g$heights, attr, "unit") == "null")
g$heights[[panels[1]]] <- unit(5, "null")
g$heights[[panels[2]]] <- unit(3, "null")
g$heights[[panels[3]]] <- unit(1, "null")
grid.draw(g)

Resources