Reposition geom_errorbar on a faceted bargraph - r

I have found a lot of questions that deal with repositioning error bars in ggplot2, but none that have my particular problem. I hope this isn't a duplicate!
I have a faceted barplot, where I am trying to add in error bars for confidence intervals that have already been calculated. The arguments stat and position don't seem to be having any effect, whether they are in the aes() argument or just with geom_errorbar(). Here is what I am working with:
> dput(anthro.bp)
structure(list(Source = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("PED", "RES"), class = "factor"),
Response = structure(c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L), .Label = c("A", "B", "C", "D"
), class = "factor"), Value = c(0.5043315, 0.03813694, 0.20757498,
0.249956615, 0.9232598, 0.0142572, 0.0537258, 0.008757155,
0.897265, 0.03153401, 0.06610772, 0.005093254, 0.8360081,
0.03893782, 0.0370325, 0.088021559), Distance = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("Near", "Far"), class = "factor"), UCI = c(0.5853133,
0.07247573, 0.27357566, 0.32335335, 0.9744858, 0.03844421,
0.08841988, 0.04262752, 0.9422062, 0.0540748, 0.09600908,
0.03348959, 1.2445932, 0.11196198, 0.10133358, 0.52272511
), LCI = c(0.4233497, 0.003798153, 0.1415743, 0.17655988,
0.8720338, -0.009929805, 0.01903172, -0.02511321, 0.8523238,
0.008993231, 0.03620636, -0.02330308, 0.427423, -0.034086335,
-0.02726858, -0.34668199)), .Names = c("Source", "Response",
"Value", "Distance", "UCI", "LCI"), row.names = c(NA, -16L), class = "data.frame")
anthro.bp[,4]<-factor(anthro.bp[,4], levels=c("Near","Far"))
bp <- ggplot(anthro.bp, aes(Value, fill=Response))
bp + geom_bar(aes(x=Source,y=Value), stat="identity", position="dodge") +
geom_errorbar(aes(ymin=LCI,ymax=UCI), stat="identity", position="dodge",width=0.25) +
facet_wrap(~Distance) +
labs(x="Disturbance Source", y="Mean Probability")
I have also tried to use position=position_dodge(width=1), again both within the aes() argument and outside of that in the geom_errorbar() command. My graph is as follows in the link (I don't have a high enough reputation to embed images yet, apologies!).
I'm also getting two error messages:
Warning messages:
1: In loop_apply(n, do.ply) :
position_dodge requires non-overlapping x intervals
2: In loop_apply(n, do.ply) :
position_dodge requires non-overlapping x intervals
This is the first time that I have used ggplot2 outside of a classroom environment, so constructive criticism is highly encouraged.

For some reason which I am not clear on, ggplot2 is dodging your bars and error bars by different values. I got around this by manually specifying the dodging width. Also you've set the y and x aesthetics only geom_bar. Note where they are placed now. Lastly, stat='identity' is not needed for the geom_errorbar call.
bp <- ggplot(anthro.bp, aes(x=Source,y=Value, fill=Response))
bp + geom_bar(stat="identity", position = position_dodge(width = 0.90)) +
geom_errorbar(aes(ymin=LCI,ymax=UCI), position = position_dodge(width = 0.90),width=0.25) +
facet_wrap(~Distance) +
labs(x="Disturbance Source", y="Mean Probability")

according to How to make dodge in geom_bar agree with dodge in geom_errorbar, geom_point
is it what you wanted?
anthro.bp$dmin <- anthro.bp$Value - anthro.bp$LCI
anthro.bp$dmax <- anthro.bp$UCI - anthro.bp$Value
ggplot(data=anthro.bp, aes(x=Source, ymin=Value-dmin, ymax=Value+dmax, fill=Response)) +
geom_bar(position=position_dodge(), aes(y=Value), stat="identity") +
geom_errorbar(position=position_dodge(width=0.9), colour="black") + facet_wrap(~Distance) + labs(x="Disturbance Source", y="Mean Probability")

I believe your main problem was the undefined x value (Source) in the aes of bp.
bp <- ggplot(anthro.bp, aes(x=Source,y=Value, fill=Response))
bp + geom_bar(stat="identity", position=position_dodge()) +
geom_errorbar(aes(ymin=LCI,ymax=UCI),width=0.25, stat="identity", position=position_dodge(0.9)) +
facet_wrap(~Distance) +
labs(x="Disturbance Source", y="Mean Probability")

Related

Having Two or More Line Breaks and Italicized Words in Axis Labels in a 'ggplot()' Plot in R

I have the following data:
df <- structure(list(Site = structure(c(5L, 5L, 5L, 5L, 5L, 5L, 4L,
4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("Permafrost", "Palsa",
"Palsa Hollow", "Rich Sphagnum Lawn", "Tall Graminoid Fen"), class = "factor"),
Depth = structure(c(2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L), .Label = c("Upper Depth", "Lower Depth"
), class = "factor"), ug.Al.m2 = c(0.093375394, 0.15684537,
0.025747986, 0.031130205, 0.074247144, 0.054740061, 0.006671475,
0.002208133, 0.003427595, 0.001447068, 0.013960114, 0.008988422,
0.047630561, 0.005434406, 0.041627689, 0.004127627, 0.013713378,
0.00501951, 0.512382579, 0.628336756, 0.293063584, 0.460299194,
0.188002926, 0.385744659, 0.220549738, 0.003135834, 0.006755556,
0.012846966, 0.008662843, 0.0064347, 0.004951768)), row.names = c(NA,
-31L), class = "data.frame")
I am using it to make a barplot:
library (cowplot)
library (ggplot2)
library (RColorBrewer)
X_Axis_Labels <- c("Permafrost", "Palsa", expression(atop("Palsa", "Hollows")), expression(atop("Rich", italic("Sphagnum"), "Lawn")), expression(atop("Tall", "Graminoid", "Fen")))
Legend_Labels <- c("Permafrost", "Palsa", "Palsa Hollows", expression(paste("Rich ", italic("Sphagnum"), " Lawn")), "Tall Graminoid Fen")
Palette1 <- c(brewer.pal(11, "RdBu")[c(11,10,9,8,7)])
ggplot(df, aes(x = Site, y = ug.Al.m2, fill = Site)) +
stat_summary(geom = "bar", width = 0.6, fun = mean, colour = "black") +
stat_summary(geom = "errorbar", width = 0.2, fun.data = mean_se) +
ggtitle("Total Aluminum Concentrations in Permafrost Peatland Communities") +
scale_x_discrete(labels = X_Axis_Labels) +
scale_fill_manual(values = Palette1, labels = Legend_Labels) +
ylab(expression(paste("Aluminum Concentration, ", mu, "g m" ^ "-2"))) +
xlab("Site") +
theme_cowplot(13)
Here's what the graph looks like:
I'm having a lot of trouble getting all three lines of the x axis labels to appear on my graph. The word 'Lawn', which should appear under 'Sphagnum', is lost. Since the word 'Sphagnum' needs to be italicized, I can't simply use the standard line break (\n). I've also tried playing with the plot margins to no avail.
Is there a solution to this problem?
Thank you!
Try this approach with ggtext and element_markdown(). You can use ** for italic and <br> for the break line. You can customize at any level you wish. Here the code:
library (cowplot)
library (ggplot2)
library (RColorBrewer)
library(ggtext)
X_Axis_Labels <- c("Permafrost", "Palsa", "Palsa<br>Hollows", "Rich<br>*Sphagnum*<br>Lawn",
"Tall<br>*Graminoid*<br>Fen")
Legend_Labels <- c("Permafrost", "Palsa", "Palsa Hollows", expression(paste("Rich ", italic("Sphagnum"), " Lawn")), "Tall Graminoid Fen")
Palette1 <- c(brewer.pal(11, "RdBu")[c(11,10,9,8,7)])
ggplot(df, aes(x = Site, y = ug.Al.m2, fill = Site)) +
stat_summary(geom = "bar", width = 0.6, fun = mean, colour = "black") +
stat_summary(geom = "errorbar", width = 0.2, fun.data = mean_se) +
ggtitle("Total Aluminum Concentrations in Permafrost Peatland Communities") +
scale_x_discrete(labels = X_Axis_Labels) +
scale_fill_manual(values = Palette1, labels = Legend_Labels) +
ylab(expression(paste("Aluminum Concentration, ", mu, "g m" ^ "-2"))) +
xlab("Site") +
theme_cowplot(13)+
theme(axis.text.x = element_markdown())
Output:

How can I set median crossbars to align within factors?

I have a data frame like so:
my_df <- structure(list(SampleID = c("sample01", "sample02", "sample03",
"sample04", "sample05", "sample06", "sample07", "sample08", "sample09",
"sample10", "sample11", "sample12", "sample13", "sample14", "sample15",
"sample16", "sample17", "sample18", "sample19", "sample20"),
y = c(1.68547922357333, 0.717650914301956, 1.18156420566867,
1.31643130248052, 1.2021341615705, 0.946937741954258, 1.75576099871947,
0.952670480793451, 2.00921185693852, 0.968642950473789, 1.65243482711174,
2.14332269635055, 0.30556964944383, 0.860605616591314, 0.933339331803171,
1.31797519903504, 0.857873539291964, -0.328227710452388,
-0.22023346428776, 1.6600566728651), week = structure(c(1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 3L, 1L, 2L,
3L, 1L, 2L, 3L), .Label = c("0", "3", "6"), class = "factor"),
grumpy = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L), .Label = c("No",
"Yes"), class = "factor"), week_grumpy = structure(c(2L,
4L, 6L, 2L, 4L, 6L, 1L, 3L, 5L, 2L, 4L, 6L, 1L, 5L, 2L, 4L,
6L, 1L, 3L, 5L), .Label = c("0 No", "0 Yes", "3 No", "3 Yes",
"6 No", "6 Yes"), class = "factor")), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -20L))
#packages needed if you don't have
install.packages("ggbeeswarm")
install.packages("ggplot2")
This is typically how I graph:
library(ggplot2)
library(ggbeeswarm)
ggplot(data = my_df, aes(x=week, y=y, color=grumpy)) +
geom_quasirandom(dodge.width = 0.75)
Which is nice because it separates the colors rather nicely. Nowadays, I like to add a median crossbars to further show the differences between groups. Like so:
ggplot(data = my_df, aes(x=week, y=y, color=grumpy)) +
geom_quasirandom(dodge.width = 0.75) +
stat_summary(aes(group = grumpy), fun = median, fun.min = median, fun.max = median, geom = "crossbar", color = "black", width = 0.7, lwd = 0.2)
Now, what I would love to have is the median crossbars to align with the colors within each factor on the x-axis. Is there a way to do this within R? Or am I relegated to manually editing the crossbars to line up?
Here's is one thing I have tried:
ggplot(data = my_df, aes(x=week_grumpy, y=y, color=grumpy)) +
geom_jitter(width = 0.1) +
stat_summary(aes(group = grumpy), fun = median, fun.min = median, fun.max = median, geom = "crossbar", color = "black", width = 0.7, lwd = 0.2)
But now the x-axis is not the way I want it (However, it would be easier to manually edit in something like Inkscape than the previous example).
I've found some hints here and here but have yet to arrive at a satisfactory solution.
What you are looking for is to dodge the crossbar geom. For example:
ggplot(data = my_df, aes(x=week, y=y, color=grumpy)) +
geom_quasirandom(dodge.width = 0.75) +
stat_summary(
aes(group = grumpy), fun = median, fun.min = median, fun.max = median,
geom = "crossbar", color = "black", width = 0.7, lwd = 0.2,
# add this bit here to your stat_summary function
position=position_dodge(width=0.75)
)
It seems that geom_quasirandom() is acting here very similarly to geom_point(position=position_jitterdodge(dodge.width=0.75)). In this case, since dodge.width is specified in geom_quasirandom(), you use the same width for position_dodge in the crossbar geom.
Note: you may want to play around with aesthetic formatting to be able to make the distinction a bit more clear what the crossbars are telling you, but this should answer your question.

ggplot2 boxplots - How to group factors levels on the x-axis (and add reference lines for each group mean)

I have 30 plant species for which I have displayed the distributions of midday leaf water potential (lwp_md) using boxplots and the package ggplot2. But how do I group these species along the x-axis according to their leaf habits (e.g. Deciduous, Evergreen) as well as display a reference line indicating the mean lwp_md value for each leaf habit level?
I have attempted with the package forcats but really have no idea how to proceed with this one. I can't find anything after an extensive search online. The best I seem able to do is order species by some other function e.g. the median.
Below is an example of my code so far. Note I have used the packages ggplot2 and ggthemes:
library(ggplot2)
ggplot(zz, aes(x=fct_reorder(species, lwp_md, fun=median, .desc=T), y=lwp_md)) +
geom_boxplot(aes(fill=leaf_habit)) +
theme_few(base_size=14) +
theme(legend.position="top",
axis.text.x=element_text(size=8, angle=45, vjust=1, hjust =1)) +
xlab("Species") +
ylab("Maximum leaf water potential (MPa)") +
scale_y_reverse() +
scale_fill_discrete(name="Leaf habit",
breaks=c("DEC", "EG"),
labels=c("Deciduous", "Evergreen"))
Here's a subset of my data including 4 of my species (2 deciduous, 2 evergreen):
> dput(zz)
structure(list(id = 1:20, species = structure(c(1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L
), .Label = c("AMYELE", "BURSIM", "CASXYL", "COLARB"), class = "factor"),
leaf_habit = structure(c(2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L), .Label = c("DEC",
"EG"), class = "factor"), lwp_md = c(-2.1, -2.5, -2.35, -2.6,
-2.45, -1.7, -1.55, -1.4, -1.55, -0.6, -2.6, -3.6, -2.9,
-3.1, -3.3, -2, -1.8, -2, -4.9, -5.35)), class = "data.frame", row.names = c(NA,
-20L))
An example of how I'm looking to display my data, cut and edited - I would like species on x-axis, lwp_md on y-axis:
gpplot defaults to ordering your factors alphabetically. To avoid this you have to supply them as ordered factors. This can be done by arranging the data.frame and then redeclaring the factors. To generate the mean value we can use group_by and mutate a new mean column in the df, that can later be plotted.
Here is the complete code:
library(ggplot)
library(ggthemes)
library(dplyr)
zz2 <- zz %>% arrange(leaf_habit) %>% group_by(leaf_habit) %>% mutate(mean=mean(lwp_md))
zz2$species <- factor(zz2$species,levels=unique(zz2$species))
ggplot(zz2, aes(x=species, y=lwp_md)) +
geom_boxplot(aes(fill=leaf_habit)) +
theme_few(base_size=14) +
theme(legend.position="top",
axis.text.x=element_text(size=8, angle=45, vjust=1, hjust =1)) +
xlab("Species") +
ylab("Maximum leaf water potential (MPa)") +
scale_y_reverse() +
scale_fill_discrete(name="Leaf habit",
breaks=c("DEC", "EG"),
labels=c("Deciduous", "Evergreen")) +
geom_errorbar(aes(species, ymax = mean, ymin = mean),
size=0.5, linetype = "longdash", inherit.aes = F, width = 1)

Convert two ggplots into one

I am facing some problem to have one plot instead of two from separate data frames. I explained the situation a bit below. The data frames look like:
df1 <- structure(list(value = c(9921L, 21583L, 11822L, 1054L, 13832L,
16238L, 13838L, 20801L, 20204L, 13881L, 19935L, 13829L, 14012L,
20654L, 13862L, 21191L, 3777L, 15552L, 13817L, 20428L, 16850L,
21003L, 11072L, 22477L, 12321L, 12856L, 16295L, 11431L, 13469L,
14680L, 10552L, 15272L, 9132L, 9374L, 15123L, 22754L, 10363L,
12160L, 13729L, 11151L, 11451L, 11272L, 14900L, 14688L, 17133L,
7315L, 7268L, 6262L, 72769L, 7650L, 16389L, 13027L, 7134L, 6465L,
6490L, 15183L, 7201L, 14070L, 11210L, 10146L), limit = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("1Mbit",
"5Mbit", "10Mbit"), class = "factor")), class = "data.frame", row.names = c(NA,
-60L))
df2 <- structure(list(value = c(37262L, 39881L, 30914L, 32976L, 28657L,
39364L, 39915L, 30115L, 29326L, 36199L, 37976L, 36694L, 33718L,
36945L, 33182L, 35866L, 34188L, 33426L, 32804L, 34986L, 29355L,
30470L, 37420L, 26465L, 28975L, 29144L, 27491L, 30507L, 27146L,
26257L, 31231L, 30521L, 30370L, 31683L, 33774L, 35654L, 34172L,
38554L, 38030L, 33439L, 34817L, 31278L, 33579L, 31175L, 31001L,
29908L, 31658L, 33381L, 28709L, 34794L, 34154L, 30157L, 33362L,
30363L, 31097L, 29116L, 27703L, 31229L, 30196L, 30077L), limit = structure(c(3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("180ms",
"190ms", "200ms"), class = "factor")), class = "data.frame", row.names = c(NA,
-60L))
from the data frames above, I have these plots:
limit_bw <- factor(df1$limit, levels = c("1Mbit", "5Mbit", "10Mbit"))
limit_lt <- factor(df2$limit, levels = c("200ms", "190ms", "180ms"))
(to use them sequentially)
bw_line <- ggplot(df1, aes(x = limit_bw, y = value, group=1)) + geom_quantile(method = "loess")
lt_line <- ggplot(df2, aes(x = limit_lt, y = value, group=1)) + geom_quantile(method = "loess")
(I actually have many data so I used geom_quantile())
And also two plots in a grid using rbind/cbind (which is not I want now):
grid.draw(rbind(ggplotGrob(ggplot(df1, aes(limit_bw,value,group=1)) + geom_quantile(method = "loess") + labs(title = "value vs bw",x="bandwidth",y="value")),
ggplotGrob(ggplot(df2, aes(limit_lt, value, group = 1)) + geom_quantile(method="loess") + labs(title="value vs latency", x="latency", y="value")), size = "last"))
I am seeking your help to merge them together into one plot (putting bw_line and lt_line together in the same graph) showing two x-axes either at the top and bottom or two axes in the bottom mentioning their title. Please note, the value has different range for each of the data set. However I need to show two y-axes for separate ranges for each data frame or may be one y-axis showing all the values (min to max) from the both data frame.
I actually seen one very close solution here from #RichieCotton but could not figure out for my data since I have some factors instead of integer values.
I really appreciate your help. Thank you.
I think it's probably easiest to approach this by combining the data into one data frame first. Here I make combined x-values and map your data to those. Then we map as usual, with the addition of a secondary y axis.
library(tidyverse); library(forcats)
# Create shared x axis and combine data frames
limit_combo <- data.frame(level_num = 1:3,
level = as_factor(c("1Mbit\n200ms",
"5Mbit\n190ms",
"10Mbit\n180ms")))
df1b <- df1 %>%
mutate(level_num = limit %>% as.numeric) %>%
left_join(limit_combo)
df2b <- df2 %>%
mutate(level_num = 4 - (limit %>% as.numeric)) %>%
left_join(limit_combo)
df3 <- bind_rows(df1b, df2b, .id = "plot") %>%
mutate(plot = if_else(plot == "1", "bw", "lt"))
# plot with adjusted y values and second axis for reference
ggplot(df3, aes(x = level,
y = value * if_else(plot == "lt", 0.44, 1),
group=plot, color = plot)) +
geom_quantile(method = "loess") +
scale_y_continuous("value", sec.axis = sec_axis(~./0.44)) +
theme(axis.text.y.left = element_text(color = "#F8766D"),
axis.text.y.right = element_text(color = "#00BFC4"))
Here is a different approach to create a single plot from the two datasets which avoids to combine both datasets into one and deal with the factors of limit. df1, df2, limit_bw, and limit_lt are used as given by the OP.
The plot is refined in three steps.
1. Common x axis, common y scale
library(ggplot2)
ggplot() + aes(y = value) +
geom_quantile(aes(x = as.integer(limit_bw), colour = "bw"), df1, method = "loess") +
geom_quantile(aes(x = as.integer(limit_lt), colour = "lt"), df2, method = "loess") +
scale_x_continuous("limit",
breaks = 1:nlevels(limit_bw),
labels = paste(levels(limit_bw), levels(limit_lt), sep = "\n")) +
scale_colour_discrete(NULL)
2. Separate x axes, common y scale
library(ggplot2)
ggplot() + aes(y = value) +
geom_quantile(aes(x = as.integer(limit_bw), colour = "bw"), df1, method = "loess") +
geom_quantile(aes(x = as.integer(limit_lt), colour = "lt"), df2, method = "loess") +
scale_x_continuous("limit",
breaks = 1:nlevels(limit_bw),
labels = levels(limit_bw),
sec.axis = dup_axis(labels = levels(limit_lt))) +
scale_colour_manual(NULL, values = c(bw = "blue", lt = "red")) +
theme(axis.text.x.bottom = element_text(color = "blue"),
axis.text.x.top = element_text(color = "red"))
3. Separate x axes, separate y axes
Here, the y-values of the second dataset are scaled such that the min and max values of the two datasets will coincide.
# compute scaling factor and offset
library(magrittr) # used to improve readability
bw_rng <- loess(df1$value ~ as.integer(limit_bw)) %>% fitted() %>% range()
lt_rng <- loess(df2$value ~ as.integer(limit_lt)) %>% fitted() %>% range()
scl <- diff(bw_rng) / diff(lt_rng)
ofs <- bw_rng[1] - scl * lt_rng[1]
library(ggplot2)
ggplot() +
geom_quantile(aes(x = as.integer(limit_bw), y = value, colour = "bw"),
df1, method = "loess") +
geom_quantile(aes(x = as.integer(limit_lt), y = scl * value + ofs, colour = "lt"),
df2, method = "loess") +
scale_x_continuous("limit",
breaks = 1:nlevels(limit_bw),
labels = levels(limit_bw),
sec.axis = dup_axis(labels = levels(limit_lt))) +
scale_y_continuous(sec.axis = sec_axis(~ (. - ofs) / scl)) +
scale_colour_manual(NULL, values = c(bw = "blue", lt = "red")) +
theme(axis.text.x.bottom = element_text(color = "blue"),
axis.text.x.top = element_text(color = "red"),
axis.text.y.left = element_text(color = "blue"),
axis.text.y.right = element_text(color = "red"))

How to plot errorbars on this plot and change the overlay?

Hi have this dataset :
tdat=structure(list(Condition = structure(c(1L, 3L, 2L, 1L, 3L, 2L,
1L, 3L, 2L, 1L, 3L, 2L, 1L, 3L, 2L, 1L, 3L, 2L, 1L, 3L, 2L, 1L,
3L, 2L, 1L, 3L, 2L), .Label = c("AS", "Dup", "MCH"), class = "factor"),
variable = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L), .Label = c("Bot", "Top", "All"), class = "factor"),
value = c(1.782726022, 1, 2.267946449, 1.095240234, 1, 1.103630141,
1.392545278, 1, 0.854984833, 4.5163067, 1, 4.649271897, 0.769428018,
1, 0.483117123, 0.363854608, 1, 0.195799358, 0.673186975,
1, 1.661568993, 1.174998373, 1, 1.095026419, 1.278455823,
1, 0.634152231)), .Names = c("Condition", "variable", "value"
), row.names = c(NA, -27L), class = "data.frame")
> head(tdat)
Condition variable value
1 AS Bot 1.782726
2 MCH Bot 1.000000
3 Dup Bot 2.267946
4 AS Bot 1.095240
5 MCH Bot 1.000000
6 Dup Bot 1.103630
I can plot it like that using this code :
ggplot(tdat, aes(x=interaction(Condition,variable,drop=TRUE,sep='-'), y=value,
fill=Condition)) +
geom_point() +
scale_color_discrete(name='interaction levels')+
stat_summary(fun.y='mean', geom='bar',
aes(label=signif(..y..,4),x=as.integer(interaction(Condition,variable))))
I have 2 questions :
How to change the overlay so the black points are not hidden by the
bar chart (3points should be visible per column)
How to add vertical errorbar on top of the bars using the standard
deviation from the black points ?
I'm not much in favor of mixing error bars with a bar plot.
In ggplot2 geoms are drawn in the order you add them to the plot. So, in order to have the points not hidden, add them after the bars.
ggplot(tdat, aes(x=interaction(Condition,variable,drop=TRUE,sep='-'), y=value,
fill=Condition)) +
stat_summary(fun.data="mean_sdl", mult=1, geom="errorbar") +
stat_summary(fun.y='mean', geom='bar') +
geom_point(show_guide=FALSE) +
scale_fill_discrete(name='interaction levels')
Like this:
tdat$x <- with(tdat,interaction(Condition,variable,drop=TRUE,sep='-'))
tdat_err <- ddply(tdat,.(x),
summarise,ymin = mean(value) - sd(value),
ymax = mean(value) + sd(value))
ggplot(tdat, aes(x=x, y=value)) +
stat_summary(fun.y='mean', geom='bar',
aes(label=signif(..y..,4),fill=Condition)) +
geom_point() +
geom_errorbar(data = tdat_err,aes(x = x,ymin = ymin,ymax = ymax,y = NULL),width = 0.5) +
labs(fill = 'Interaction Levels')
I've cleaned up your code somewhat. You will run into fewer problems if you move any extraneous computations outside of your ggplot() call. Better to create the new x variable first. Everything is more readable that way too.
The overlaying issue just requires re-ordering the layers.
Note that you were using scale_colour_* when you had mapped fill not colour (this is a very common error).
The only other "trick" was the un-mapping of y. Normally, when things get tricky I omit aes from the top level ggplot call entirely to make sure that each layer gets only the aesthetics that it needs.
The error bars again I tend to create the data frame outside of ggplot first. I find that cleaner and easier to read.

Resources