Custom legend for multi layer geom_bar plot - r

R version 3.1.1 (2014-07-10) Platform: i386-w64-mingw32/i386 (32-bit)
I am working on a barplot with ggplot2. The aim is to have a combination of a stacked and dodged barplot for the data. My problem is to include a legend, which includes both layers or shows them separately.
Data:
df <- structure(list(year = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
1L, 2L, 3L, 4L, 5L, 6L, 7L), .Label = c("2008", "2009", "2010",
"2011", "2012", "2013", "2014"), class = "factor"), product = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("a",
"b"), class = "factor"), total = c(1663L, 1344L, 1844L, 444L,
1336L, 897L, 655L, 3433L, 3244L, 2044L, 3344L, 1771L, 1410L,
726L), partial = c(1663L, 1344L, 1844L, 444L, 949L, 302L, 5L,
3433L, 3244L, 2044L, 3344L, 1476L, 1158L, 457L)), .Names = c("year",
"product", "total", "partial"), row.names = c(NA, -14L), class = "data.frame")
The plan was, to plot two geom_bar layers to combine dodge and stacked. The first layer is the total amount, the second layer is the partial amount. The alpha value for the first layer is reduced to see the difference between the two layers. So far it worked.
Example:
ggplot(df, aes(x = year))+
geom_bar(aes(y = total, fill = product), alpha= 0.3, stat = "identity", position = "dodge", width = 0.3)+
geom_bar(aes(y = partial, fill = product), alpha= 1, stat = "identity", position = "dodge", width = 0.3)
Now the legend is not sufficiant. It shows the colour of fill = product and is not sensitive to the alpha value of the first layer.
My approach was to use scale_fill_manual and manually add a new lable with a new colour, which did not worked.
My idea:
ggplot(df, aes(x = year))+
geom_bar(aes(y = total, fill = product), alpha= 0.3, stat = "identity", position = "dodge", width = 0.3)+
geom_bar(aes(y = partial, fill = product), alpha= 1, stat = "identity", position = "dodge", width = 0.3)+
scale_fill_manual(name = "",
values=c("red", "black","blue"),
labels=c("a","b","test"))
Thank you for any help on my problem!

Try to use different fill values for total and partial data.
Quick and dirty solution:
ggplot(df, aes(x = year))+
geom_bar(aes(y = total, fill = factor(as.numeric(product))), alpha= 0.3, stat = "identity", position = "dodge", width = 0.3) +
geom_bar(aes(y = partial, fill = factor(as.numeric(product) * 3)), alpha= 1, stat = "identity", position = "dodge", width = 0.3) +
scale_fill_manual(name = "", values=c("red", "black","blue", "green"), labels=c("A","B","Partial A", "Partial B"))
Tested on
R x64 3.2.2

Related

Width of bars in ggplot2 with many groups

I tried to reproduce the answer given by Roman in this post: The same width of the bars in geom_bar(position = "dodge")
but I couldnot fix my problem. When bars have the same width, the distance between the groups are too big. Same problem when I use facet_grid
My df:
df <- structure(list(discipline = structure(c(2L, 3L, 3L, 2L, 2L, 2L, 4L, 6L, 7L, 3L, 4L, 6L, 8L, 8L, 2L, 2L, 2L, 3L, 3L, 3L), .Label = c("", "Biogeochemistry", "Ecology", "Geochemistry", "Geography", "Management", "Microbiology", "Oceanography"), class = "factor"), focus = structure(c(34L, 55L, 40L, 47L, 54L, 57L, 47L, 19L, 31L, 25L, 23L, 25L, 47L, 52L,13L, 20L, 23L, 16L, 26L, 27L), .Label = c("", "Abiotic measures", "Acidification", "Biogeochemichal budgets", "Biogeochemistry", "Biogeochemistry, discharge", "Blue Carbon", "Chromophoric Dissolved organic matter, river plume", "Coastal anthromes", "Connectivity", "Coral reefs", "Ecology", "Ecosystem Function", "Ecosystem Services", "Embryo plants", "Fisheries", "Food webs", "Global change", "Governance", "Groundwater", "Hidrology", "Integrative Magamenet", "Isotopes", "Land-sea interactions","Land-sea interface", "Land use", "Life history", "Life traits", "Livelihoods", "Management", "Microbial community", "Modelling water quality", "Nitrogen fluxes", "Nutrients", "Parasites", "ph, CO2", "Planning", "Pollutants", "Pollution", "Primary production", "Remote Sensing", "Resilience", "resilience, self-organization", "Restoration",
"Salinization", "Sea level rise", "Sediment flux", "Sediments", "socio land-sea interactions", "Species interaction", "Submarine ground water", "Submarine groundwater", "Subsidies", "Trace metals", "Trophic interactions", "Water quality", "Water resources"), class = "factor"), n = c(39L, 17L, 11L, 9L, 6L, 5L, 5L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L)), row.names = c(NA, -20L), class = c("tbl_df","tbl", "data.frame"))
First I tried with position = position_dodge2(preserve = "single")
ggplot(df, aes(x = (discipline), y = n, fill = reorder(focus, n))) +
geom_bar(position = position_dodge2(width = 0.9, preserve = "single"), stat = "identity") + ylab("N") + theme_classic() + geom_text(aes(label=focus), position = position_dodge2(width = 0.9, preserve = "single"), angle = 90, hjust = -0.1) + theme(legend.position = "none")
Then I used facet_grid
ggplot(df, aes(x = (discipline), y = n, fill = reorder(focus, n))) +
geom_bar(position = "dodge", stat = "identity") + ylab("N") + theme_classic() + geom_text(aes(label=focus), position = position_dodge2(width = 0.9, preserve = "single"), angle = 90, hjust = -0.1) + theme(legend.position = "none") + facet_grid(scales = "free_x", space = "free_x", switch = "x")
Even when width of bars are equal, distance between groups are too big.
What can I do to solve this problem?
Maybe try this. It looks like the issue is with position. If you define position_dodge2() for the bars you can avoid the big bars you got. Here the code:
library(ggplot2)
#Code
ggplot(df, aes(x = (discipline), y = n, fill = reorder(focus, n))) +
geom_bar(position = position_dodge2(0.9,preserve = 'single'),
stat = "identity") + ylab("N") +
theme_classic() +
geom_text(aes(label=focus), position = position_dodge2(width = 0.9, preserve = "single"),
angle = 90, hjust = -0.1) + theme(legend.position = "none") +
facet_grid(scales = "free_x", space = "free_x", switch = "x")
Output:
Whereas, the original code produces this (using position = "dodge"):

Transform y axis in bar plot using scale_y_log10()

Using the data.frame below, I want to have a bar plot with y axis log transformed.
I got this plot
using this code
ggplot(df, aes(x=id, y=ymean , fill=var, group=var)) +
geom_bar(position="dodge", stat="identity",
width = 0.7,
size=.9)+
geom_errorbar(aes(ymin=ymin,ymax=ymax),
size=.25,
width=.07,
position=position_dodge(.7))+
theme_bw()
to log transform y axis to show the "low" level in B and D which is close to zero, I used
+scale_y_log10()
which resulted in
Any suggestions how to transform y axis of the first plot?
By the way, some values in my data is close to zero but none of it is zero.
UPDATE
Trying this suggested answer by #computermacgyver
ggplot(df, aes(x=id, y=ymean , fill=var, group=var)) +
geom_bar(position="dodge", stat="identity",
width = 0.7,
size=.9)+
scale_y_log10("y",
breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x)))+
geom_errorbar(aes(ymin=ymin,ymax=ymax),
size=.25,
width=.07,
position=position_dodge(.7))+
theme_bw()
I got
DATA
dput(df)
structure(list(id = structure(c(7L, 7L, 7L, 1L, 1L, 1L, 2L, 2L,
2L, 6L, 6L, 6L, 5L, 5L, 5L, 3L, 3L, 3L, 4L, 4L, 4L), .Label = c("A",
"B", "C", "D", "E", "F", "G"), class = "factor"), var = structure(c(1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L), .Label = c("high", "medium", "low"), class = "factor"),
ymin = c(0.189863418, 0.19131948, 0.117720496, 0.255852069,
0.139624146, 0.048182771, 0.056593774, 0.037262727, 0.001156667,
0.024461299, 0.026203592, 0.031913077, 0.040168571, 0.035235902,
0.019156667, 0.04172913, 0.03591233, 0.026405094, 0.019256055,
0.011310755, 0.000412414), ymax = c(0.268973856, 0.219709677,
0.158936508, 0.343307692, 0.205225352, 0.068857143, 0.06059596,
0.047296296, 0.002559633, 0.032446541, 0.029476821, 0.0394,
0.048959184, 0.046833333, 0.047666667, 0.044269231, 0.051,
0.029181818, 0.03052381, 0.026892857, 0.001511628), ymean = c(0.231733739333333,
0.204891473333333, 0.140787890333333, 0.295301559666667,
0.173604191666667, 0.057967681, 0.058076578, 0.043017856,
0.00141152033333333, 0.0274970166666667, 0.0273799226666667,
0.0357511486666667, 0.0442377366666667, 0.0409452846666667,
0.0298284603333333, 0.042549019, 0.0407020586666667, 0.0272998796666667,
0.023900407, 0.016336106, 0.000488014)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -21L), .Names = c("id",
"var", "ymin", "ymax", "ymean"))
As #Miff has written bars are generally not useful on a log scale. With barplots, we compare the height of the bars to one another. To do this, we need a fixed point from which to compare, usually 0, but log(0) is negative infinity.
So, I would strongly suggest that you consider using geom_point() instead of geom_bar(). I.e.,
ggplot(df, aes(x=id, y=ymean , color=var)) +
geom_point(position=position_dodge(.7))+
scale_y_log10("y",
breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x)))+
geom_errorbar(aes(ymin=ymin,ymax=ymax),
size=.25,
width=.07,
position=position_dodge(.7))+
theme_bw()
If you really, really want bars, then you should use geom_rect instead of geom_bar and set your own baseline. That is, the baseline for geom_bar is zero but you will have to invent a new baseline in a log scale. Your Plot 1 seems to use 10^-7.
This can be accomplished with the following, but again, I consider this a really bad idea.
ggplot(df, aes(xmin=as.numeric(id)-.4,xmax=as.numeric(id)+.4, x=id, ymin=10E-7, ymax=ymean, fill=var)) +
geom_rect(position=position_dodge(.8))+
scale_y_log10("y",
breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x)))+
geom_errorbar(aes(ymin=ymin,ymax=ymax),
size=.25,
width=.07,
position=position_dodge(.8))+
theme_bw()
If you need bars flipped, maybe calculate your own log10(y), see example:
library(ggplot2)
library(dplyr)
# make your own log10
dfPlot <- df %>%
mutate(ymin = -log10(ymin),
ymax = -log10(ymax),
ymean = -log10(ymean))
# then plot
ggplot(dfPlot, aes(x = id, y = ymean, fill = var, group = var)) +
geom_bar(position = "dodge", stat = "identity",
width = 0.7,
size = 0.9)+
geom_errorbar(aes(ymin = ymin, ymax = ymax),
size = 0.25,
width = 0.07,
position = position_dodge(0.7)) +
scale_y_continuous(name = expression(-log[10](italic(ymean)))) +
theme_bw()
Firstly, don't do it! The help file from ?geom_bar says:
A bar chart uses height to represent a value, and so the base of the
bar must always be shown to produce a valid visual comparison. Naomi
Robbins has a nice article on this topic. This is why it doesn't make
sense to use a log-scaled y axis with a bar chart.
To give a concrete example, the following is a way of producing the graph you want, but a larger k will also be correct but produce a different plot visually.
k<- 10000
ggplot(df, aes(x=id, y=ymean*k , fill=var, group=var)) +
geom_bar(position="dodge", stat="identity",
width = 0.7,
size=.9)+
geom_errorbar(aes(ymin=ymin*k,ymax=ymax*k),
size=.25,
width=.07,
position=position_dodge(.7))+
theme_bw() + scale_y_log10(labels=function(x)x/k)
k=1e4
k=1e6

Is it possible to put space between stacks in ggplot2 stacked bar?

I took this example from here:
DF <- read.table(text="Rank F1 F2 F3
1 500 250 50
2 400 100 30
3 300 155 100
4 200 90 10", header=TRUE)
library(reshape2)
DF1 <- melt(DF, id.var="Rank")
library(ggplot2)
ggplot(DF1, aes(x = Rank, y = value, fill = variable)) +
geom_bar(stat = "identity")
Is it possible to create a stacked bar such as the following graph using ggplot2? I do not want to differentiate stacks by different colors.
EDIT: Based on Pascal's comments,
ggplot(DF1, aes(x = Rank, y = value)) +
geom_bar(stat = "identity",lwd=2, color="white")
I still have the white borders for the bars.
This is the closest I could get to your example figure. It is not much of an improvement beyond what you've already sorted but puts less of an emphasis on the white bar borders on the grey background.
library(ggplot2)
p <- ggplot(DF1, aes(x = Rank, y = value, group = variable))
p <- p + geom_bar(stat = "identity", position = "stack", lwd = 1.5,
width = 0.5, colour = "white", fill = "black")
p <- p + theme_classic()
p <- p + theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
p
That produces:
If you want to keep the grey background you can find out exactly what shade of grey it is and use that colour for the line while removing the background grids (this is not the right shade).
p <- ggplot(DF1, aes(x = Rank, y = value))
p <- p + geom_bar(stat = "identity", position = "stack", lwd = 1.5,
width = 0.5, colour = "grey", fill = "black")
p <- p + theme(panel.grid = element_blank())
p
An issue with this solution is that very small groups will not be seen (e.g., when Rank = 4 variable F3 = 10; this small value is completely covered by the white bar outline).
Your sample data:
DF1 <- structure(list(Rank = c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L), variable = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L), .Label = c("F1", "F2", "F3"), class = "factor"),
value = c(500L, 400L, 300L, 200L, 250L, 100L, 155L, 90L,
50L, 30L, 100L, 10L)), row.names = c(NA, -12L), .Names = c("Rank",
"variable", "value"), class = "data.frame")

How to organize percentage values on top of a stacked bar chart ggplot2

I have a very skewed bar chart in ggplot2.
Here's the dput text output:
structure(list(Name = structure(c(1L, 3L, 4L, 5L, 6L, 2L, 1L,
3L, 4L, 5L, 6L, 2L), .Label = c("A", "Average", "B", "C", "D",
"E"), class = "factor"), variable = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Undiscounted", "Discounted"
), class = "factor"), value = c(18453601.4400001, 11941354.11,
10748756.04, 6488253.74000001, 6078914.73000002, 2509377.50173653,
1157538776.56, 833907589.89, 674006380.96, 574466340.26, 534854603.27,
13191411.5509581)), row.names = c(NA, -12L), .Names = c("Name",
"variable", "value"), class = "data.frame")
Here's the code I use to plot it:
library(ggplot2)
text_size= 18
label_bottom_size=18
plot1<- ggplot(df1, aes(x = Name, y = value, fill = variable)) +
geom_bar(stat = "identity")+
ggtitle(sprintf("Bar chart of Stuff" ))+
theme(axis.title=element_text(size=text_size))+
theme(plot.title=element_text(size=text_size+20))+
theme(axis.text.x= element_text(size=label_bottom_size))+
theme(axis.text.y= element_text(size=text_size))+
theme(legend.text = element_text(size=text_size))+
theme(legend.title = element_text(size=text_size))
As some of the bar charts are so small and text doesn't fit, what I want to do is just have a (X%/Y%) above each bar that shows the percentage breakout. The values shown are in dollars.
Thank you!
This is some hack using data.table for aggregating the data and then displaying it with geom_text (there are probably better ways though)
library(data.table)
temp <- data.table(df1)[, per := (value/sum(value))*100, by = Name]
temp <- temp[, list(value = sum(value),
per = paste(sprintf("%.02f%%", per), collapse = " / "),
variable = variable), by = Name]
library(ggplot2)
text_size= 18
label_bottom_size=18
ggplot(df1, aes(x = Name, y = value, fill = variable)) +
geom_bar(stat = "identity")+
ggtitle(sprintf("Bar chart of Stuff" ))+
theme(axis.title=element_text(size=text_size),
plot.title=element_text(size=text_size+20),
axis.text.x= element_text(size=label_bottom_size),
axis.text.y= element_text(size=text_size),
legend.text = element_text(size=text_size),
legend.title = element_text(size=text_size)) +
geom_text(data = temp, aes(x = Name, y = value, label = per), vjust=-0.3)

Alignment of numbers on the individual bars

I have the need to place labels above bars on ggplot. I used to use the method found (HERE) but this does not appear to work anymore since my ggplot2 update as I now get the error message:
Error in continuous_scale(c("y", "ymin", "ymax", "yend", "yintercept", :
unused argument(s) (formatter = "percent")
How can I again plot numeric values above the bars when using the example:
df <- structure(list(A = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L,
3L), .Label = c("0-50,000", "50,001-250,000", "250,001-Over"), class = "factor"),
B = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("0-50,000",
"50,001-250,000", "250,001-Over"), class = "factor"), Freq = c(0.507713884992987,
0.258064516129032, 0.23422159887798, 0.168539325842697, 0.525280898876405,
0.306179775280899, 0.160958904109589, 0.243150684931507,
0.595890410958904)), .Names = c("A", "B", "Freq"), class = "data.frame", row.names = c(NA,
-9L))
library(ggplot2)
ggplot(data=df, aes(x=A, y=Freq))+
geom_bar(aes(fill=B), position = position_dodge()) +
geom_text(aes(label = paste(sprintf("%.1f", Freq*100), "%", sep=""),
y = Freq+0.015, x=A),
size = 3, position = position_dodge(width=0.9)) +
scale_y_continuous(formatter = "percent") +
theme_bw()
Running R 2.15 ggplot2 0.9 on a win 7 machine
The error is from the scale_y_continuous call. Formatting of labels is now handled by the labels argument. See the ggplot2 0.9.0 transition guide for more details.
There was another problem with the labels not lining up correctly; I fixed that by adding a group=B to the aesthetics for the geom_text; I'm not quite sure why this is necessary, though. I also took out x=A from the geom_text aesthetics because it was not needed (it would be inherited from the ggplot call.
library("ggplot2")
library("scales")
ggplot(data=df, aes(x=A, y=Freq))+
geom_bar(aes(fill=B), position = position_dodge()) +
geom_text(aes(label = paste(sprintf("%.1f", Freq*100), "%", sep=""),
y = Freq+0.015, group=B),
size = 3, position = position_dodge(width=0.9)) +
scale_y_continuous(labels = percent) +
theme_bw()

Resources