Adding x-axis lables on a faceted plot-ggplot2 - r

I have a facetted geom_col() plot with ggplot2 and I would like to have the x-axis labels on each level of the plots. So after each row, I would have the labels. My graph currently looks like this:
dput(res2)
structure(list(X = structure(c(8L, 1L, 7L, 9L, 6L, 4L, 5L, 3L,
2L, 1L, 7L, 9L, 6L, 4L, 5L, 3L, 2L, 8L, 7L, 9L, 6L, 4L, 5L, 3L,
2L, 8L, 1L, 9L, 6L, 4L, 5L, 3L, 2L, 8L, 1L, 7L, 6L, 4L, 5L, 3L,
2L, 8L, 1L, 7L, 9L, 4L, 5L, 3L, 2L, 8L, 1L, 7L, 9L, 6L, 5L, 3L,
2L, 8L, 1L, 7L, 9L, 6L, 4L, 3L, 2L, 8L, 1L, 7L, 9L, 6L, 4L, 5L
), .Label = c("Blue", "Green", "Magenta", "Maroon", "Orange",
"Pink", "Purple", "Red", "Yellow"), class = "factor"), Phenotype = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("Blue", "Green", "Magenta",
"Maroon", "Orange", "Pink", "Purple", "Red", "Yellow"), class = "factor"),
heritability = c(0.12, 0.14, 0.34, 0.21, 0.33, 0.35, 0.25,
0.49, 0.19, 0.42, -0.12, 0.4, 0.13, 0.42, 0.47, 0.2, 0.17,
0.14, -0.1, 0.14, 0.45, 0.24, 0.47, -0.28, 0.34, 0.18, 0.15,
0.37, -0.47, 0.12, 0.17, -0.11, 0.53, 0.41, -0.2, 0.14, 0.26,
0.45, 0.41, 0.48, 0.15, -0.35, 0.22, 0.32, 0.29, 0.47, 0.17,
-0.25, 0.27, 0.38, 0.52, -0.11, 0.5, 0.28, 0.34, 0.31, 0.52,
0.14, -0.23, 0.21, 0.11, -0.42, 0.39, 0.32, 0.51, 0.39, 0.15,
0.46, 0.5, 0.42, 0.46, 0.18), pvalue = c(0.05, 0.09, 0.05,
0.05, 0.09, 0.02, 0.01, 0.1, 0.05, 0.04, 0.08, 0.01, 0.08,
0.05, 0.07, 0.06, 0.01, 0.04, 0.04, 0.01, 0.06, 0.1, 0.07,
0.01, 0.05, 0.02, 0.08, 0.1, 0.03, 0.06, 0.02, 0.08, 0.09,
0.01, 0.06, 0.04, 0.07, 0.03, 0.03, 0.07, 0.01, 0.01, 0.06,
0.05, 0.04, 0.06, 0.04, 0.03, 0.04, 0.04, 0.09, 0.1, 0.07,
0.01, 0.08, 0.06, 0.01, 0.07, 0.06, 0.08, 0.09, 0.1, 0.09,
0.01, 0.07, 0.05, 0.07, 0.06, 0.1, 0.1, 0.08, 0.09)), class = "data.frame",
row.names = c(NA, -72L))
And here is my plot code:
A <- ggplot(res2, aes(Phenotype, heritability))
# uses a bar chart, geom_col represents hereditity values as the hights of the bars.
A + geom_col(position = 'stack', fill = "#0000ff") +
# Facets the data according to the Phenotypes in the X column of the data
facet_wrap(.~ X) +
# Theme info: tilts the x-axis labels 90 degrees and pushes labels to be centered below the bars
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .4), plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5))+
labs(title ="Heritability of Phenotype Permutations", subtitle = "P-values indicated")+
# adds the pvalues above the bars, sets their position to be above or below the bar.
geom_text(aes(y = heritability + .06 * sign(heritability), label = pvalue), position = position_dodge(width = 0.9), size = 3.3)

In facet_wrap you can specify scales = 'free_x', which will allow each subplot to have its own x-axis including its own axis labels.
(Similar for free_y or both)
In your example using facet_wrap(.~X, scales= 'free_x') will produce the following plot:

Related

ggplot2 - customize two-factor legend

I am using ggplot2 to plot monthly vertical profiles of soil moisture in two sites, for both observed and modeled data.
I am using interaction to add colours to both factors (month and type). I am also creating two different manual color palettes with the colors I need. This is how to to reproduce the plot:
library(ggplot2)
df1<- structure(list(site = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L), .Label = c("IL_Shabbona_5_NNE", "ME_Limestone_4_NNW",
"ME_Old_Town_2_W", "MI_Chatham_1_SE", "MI_Gaylord_9_SSW", "MN_Goodridge_12_NNW",
"MN_Sandstone_6_W", "NY_Ithaca_13_E", "NY_Millbrook_3_W", "WI_Necedah_5_WNW"
), class = "factor"), month = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L), depth = c(5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 50, 50, 50,
50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
50, 50, 50, 50, 50, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100), value = c(0.38,
0.4, 0.37, 0.32, 0.29, 0.3, 0.24, 0.28, 0.24, 0.26, 0.32, 0.39,
0.13, NaN, 0.13, 0.12, 0.1, 0.1, 0.06, 0.07, 0.09, 0.1, 0.12,
0.13, 0.39, 0.39, 0.37, 0.35, 0.33, 0.31, 0.27, 0.29, 0.27, 0.28,
0.34, 0.38, 0.1, NaN, 0.12, 0.11, 0.09, 0.09, 0.05, 0.06, 0.09,
0.09, 0.11, 0.11, 0.39, 0.41, 0.38, 0.35, 0.34, 0.32, 0.29, 0.33,
0.31, 0.3, 0.34, 0.36, 0.1, NaN, 0.1, 0.1, 0.09, 0.08, 0.05,
0.05, 0.08, 0.08, 0.1, 0.1, 0.32, 0.31, 0.33, 0.34, 0.36, 0.34,
0.29, 0.33, 0.32, 0.31, 0.32, 0.33, 0.06, 0.06, 0.07, 0.06, 0.06,
0.05, 0.03, 0.03, 0.04, 0.05, 0.06, 0.06, 0.4, 0.4, 0.41, 0.41,
0.45, 0.47, 0.43, 0.4, 0.39, 0.38, 0.38, 0.4, 0.05, 0.05, 0.05,
0.06, 0.05, 0.05, 0.04, 0.04, 0.05, 0.05, 0.06, 0.05, 0.35, 0.35,
0.36, 0.33, 0.29, 0.28, 0.27, 0.26, 0.26, 0.28, 0.3, 0.36, 0.35,
0.35, 0.36, 0.33, 0.29, 0.28, 0.27, 0.27, 0.27, 0.28, 0.3, 0.35,
0.34, 0.35, 0.35, 0.34, 0.3, 0.29, 0.28, 0.28, 0.28, 0.29, 0.3,
0.34, 0.28, 0.29, 0.3, 0.32, 0.31, 0.3, 0.29, 0.29, 0.29, 0.3,
0.3, 0.29, 0.26, 0.27, 0.27, 0.29, 0.29, 0.29, 0.28, 0.28, 0.28,
0.29, 0.29, 0.28, 0.38, 0.38, 0.39, 0.38, 0.31, 0.3, 0.29, 0.29,
0.3, 0.31, 0.35, 0.39, 0.36, 0.36, 0.37, 0.37, 0.31, 0.31, 0.29,
0.3, 0.3, 0.31, 0.33, 0.37, 0.37, 0.37, 0.37, 0.38, 0.32, 0.32,
0.31, 0.31, 0.31, 0.32, 0.33, 0.37, 0.31, 0.32, 0.32, 0.34, 0.33,
0.32, 0.31, 0.31, 0.32, 0.32, 0.31, 0.3, 0.27, 0.28, 0.28, 0.29,
0.31, 0.3, 0.3, 0.29, 0.3, 0.3, 0.3, 0.28), type = rep(c("observed","modeled"), each=120)), class = "data.frame", row.names = c(NA,
-240L))
# Create blue and red palettes
mypal.blue <- colorRampPalette(RColorBrewer::brewer.pal(6,"PuBu"))
mypal.red <- colorRampPalette(RColorBrewer::brewer.pal(6,"YlOrRd"))
# Plot
ggplot(df1, aes(x=value, y=-depth, colour=interaction(as.factor(month),type))) +
geom_path(size=1) + geom_point(size=0.7) +
facet_wrap(~ site, nrow=3) +
theme_bw(base_size=20) +
scale_colour_manual(values=c(mypal.blue(12),mypal.red(12))) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(legend.title=element_blank()) + theme(legend.position = c(0.75, 0.13))
However, the legend is a complete mess.
I would like to create two separate legends, loosely based on this example.
one showing orange for observed and blue for modeled
the other one showing the actual color gradients and the months (ideally with the first letter instead of numbers)
How to create such legends?
Updated Answer
It just hit me that there is a relatively straightforward way to hack the legend to get pretty close to what you want. We relabel the legend labels and add a title. The hacky part is that you have to fiddle with the legend title spacing, legend key width, and text size to get the titles lined up over the legend keys.
With all those lines and colors and the complicated legend, the plot seems very busy and difficult to interpret beyond showing that the model doesn't fit the data very well, so maybe it would still be better to consider one of the other options in my or #neilfws's answer. In addition, because the legend title is manually hardcoded, it's not linked to the aesthetic mapping and you therefore have to be careful that "Modeled" and "Observed" are in the right order above the legend keys.
ggplot(df1, aes(x=value, y=-depth, colour=interaction(as.factor(month),type))) +
geom_path(size=1) + geom_point(size=0.7) +
facet_wrap(~ site, nrow=3) +
theme_bw(base_size=20) +
scale_colour_manual(values=c(mypal.blue(12),mypal.red(12)),
labels=rep(month.abb, 2)) +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.title=element_text(size=rel(0.6)),
legend.text=element_text(size=rel(0.5)),
legend.key.width=unit(1.1,"cm")) +
labs(colour="Modeled Observed")
Original Answer
AFAIK, there's no way to generate two separate legends for a single aesthetic within the normal ggplot workflow. In this case, that means you can have only a single color legend. Probably you could hack two different color legends by manipulating the underlying ggplot grob structure.
Another option would be to use two different aesthetics. The example below uses linetype to distinguish modeled and observed, but it doesn't provide as much constrast as the two different color sets.
library(tidyverse)
ggplot(df1 %>%
mutate(month=factor(month.abb[month], levels=month.abb)),
aes(x=value, y=-depth, linetype=type, colour=month)) +
geom_path(size=1) + geom_point(size=0.7) +
facet_wrap(~ site, nrow=3) +
theme_bw(base_size=20) +
scale_colour_manual(values=mypal.red(12)) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(legend.title=element_blank())
For reference, here's what your original code produces (minus the change in legend position):
Another option would be to facet by month in addition to type. This takes up more space, but makes it easier to see both the month trend and the difference between modeled and observed.
ggplot(df1 %>%
mutate(month=factor(month.abb[month], levels=month.abb)),
aes(x=value, y=-depth, colour=type)) +
geom_path(size=1) + geom_point(size=0.7) +
facet_grid(month ~ site) +
theme_classic() +
theme(panel.background=element_rect(colour="grey50", fill=NA))
Looking at your data, it seems to me that what you want to visualize can be expressed something like this:
"How do observed values compare to modelled values at different depths, for each site, through time?"
So I would approach the chart differently: plot value versus month, color by type and use facets for site and depth.
library(tidyverse)
df1 %>%
mutate(Month = factor(month.abb[month],
levels = month.abb)) %>%
ggplot(aes(Month, value)) +
geom_point(aes(color = type)) +
facet_grid(depth~site) +
theme_bw()
It's now immediately apparent that the modeled values for site IL_Shabbona_5_NNE are closer to the observed, and more so at shallower depth.

Rotate a faceted, grouped bar plot

**UPDATED BELOW
I have created a plot, I literally need it horizontal, but the coord_flip() leaves the facets on the bottom instead of having nested groups on the left.
The data:
srvc_data <- structure(list(dept = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("Distribution Centre Services",
"IT", "Marketing", "Merchandise & Inventory", "Operations and Communication"
), class = "factor"), label = c("test5", "test7", "test3", "test10",
"test4", "test6", "test2", "test1", "test11", "test12", "test9",
"test8", "test18", "test19", "test15", "test17", "test13", "test16",
"test20", "test14", "test22", "test21", "test25", "test23", "test24",
"test27", "test26", "test28", "test29", "test31", "test33", "test30",
"test32", "test38", "test36", "test37", "test43", "test34", "test35",
"test40", "test39", "test42", "test41", "test5", "test7", "test3",
"test10", "test4", "test6", "test2", "test1", "test11", "test12",
"test9", "test8", "test18", "test19", "test15", "test17", "test13",
"test16", "test20", "test14", "test22", "test21", "test25", "test23",
"test24", "test27", "test26", "test28", "test29", "test31", "test33",
"test30", "test32", "test38", "test36", "test37", "test43", "test34",
"test35", "test40", "test39", "test42", "test41"), Gap = c(-0.07,
-0.13, -0.15, -0.16, -0.16, -0.21, -0.22, -0.24, -0.24, -0.25,
-0.3, -0.3, -0.18, -0.19, -0.24, -0.29, -0.3, -0.34, -0.36, -0.41,
-0.46, -0.63, -0.16, -0.18, -0.21, -0.22, -0.27, -0.29, -0.31,
-0.31, -0.35, -0.39, -0.42, -0.15, -0.15, -0.2, -0.21, -0.22,
-0.27, -0.29, -0.29, -0.31, -0.36, -0.07, -0.13, -0.15, -0.16,
-0.16, -0.21, -0.22, -0.24, -0.24, -0.25, -0.3, -0.3, -0.18,
-0.19, -0.24, -0.29, -0.3, -0.34, -0.36, -0.41, -0.46, -0.63,
-0.16, -0.18, -0.21, -0.22, -0.27, -0.29, -0.31, -0.31, -0.35,
-0.39, -0.42, -0.15, -0.15, -0.2, -0.21, -0.22, -0.27, -0.29,
-0.29, -0.31, -0.36), impeff = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("Importance", "Effectiveness"), class = "factor"),
score = c(0.87, 0.79, 0.78, 0.82, 0.81, 0.81, 0.92, 0.92,
0.78, 0.81, 0.86, 0.91, 0.79, 0.79, 0.87, 0.93, 0.9, 0.9,
0.82, 0.95, 0.91, 0.95, 0.77, 0.79, 0.82, 0.8, 0.83, 0.9,
0.91, 0.94, 0.89, 0.94, 0.91, 0.82, 0.74, 0.78, 0.81, 0.83,
0.85, 0.82, 0.81, 0.8, 0.83, 0.8, 0.66, 0.63, 0.66, 0.65,
0.6, 0.7, 0.68, 0.54, 0.56, 0.56, 0.61, 0.61, 0.6, 0.63,
0.64, 0.6, 0.56, 0.46, 0.54, 0.45, 0.32, 0.61, 0.61, 0.61,
0.58, 0.56, 0.61, 0.6, 0.63, 0.54, 0.55, 0.49, 0.67, 0.59,
0.58, 0.6, 0.61, 0.58, 0.53, 0.52, 0.49, 0.47)), row.names = c(NA,
-86L), .Names = c("dept", "label", "Gap", "impeff", "score"), class = "data.frame")
And the code:
ggplot(data = srvc_data, aes(x = label, y = score)) +
geom_bar( aes(fill = impeff),stat = "identity", position = "dodge",width = 1) +
facet_grid(~dept, switch = "x", scales = "free", space = "free") +
#coord_flip()+
The plot (without the flip) looks like the below, I need it horizontal, with the facet categories on the far left. How does the coord_flip() work? Why wouldn't it also flip/move the facet strips? Please ignore the crammed formatting!
**UPDATE
So thanks to #neilfws I have fixed the plot, by switching the order of the data.
ggplot(data = srvc_data, aes(x = label, y = score)) +
geom_bar( aes(fill = impeff),stat = "identity", position = "dodge",width = 1) +
facet_grid(dept~., switch = "y", scales = "free_y", space = "free") +
coord_flip()
Now I have the correctly oriented plot, but there is lots of unused space for all the labels that are unused in each facet. Within the facet_grid call, setting scales = "free" doesn't work, nor does drop = T. Any ideas? Plot below for reference.
If you coord_flip, you also need to reverse the faceting relationship (~), to place it on the side, and the switch, to place it on the y-axis. Does this get you close to what you want?
ggplot(srvc_data, aes(label, score)) +
geom_bar( aes(fill = impeff), stat = "identity", position = "dodge", width = 1) +
facet_grid(dept ~ ., switch = "y", scales = "free", space = "free") + coord_flip()

R plot color legend by factor

Using R 3.3.1 in Windows 10. I'm making an x-y plot from 95 rows of data. The data are in 6 different groupings (a factor called "group"). The plot itself is easy enough, but I can't get the legend to properly account for the factor and color correctly.
Here's the data in a variable v1:
v1 <- structure(list(group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), .Label = c("F9", "T26", "W37",
"W40", "W41", "W42"), class = "factor"), point = c(1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L), x = c(-7.064, -5.1681,
-6.4866, -2.7522, -4.6305, -4.2957, -3.7552, -4.9482, -5.6452,
-6.0302, -5.3244, -3.9819, -3.8123, -5.3085, -5.6096, -6.4557,
-5.2549, -3.4893, -3.5909, -2.5546, -3.7247, -5.1733, -3.3451,
-2.8993, -2.6835, -3.9495, -4.9649, -2.8438, -4.6926, -3.4768,
-3.1221, -4.8175, -4.5641, -3.549, -3.08, -2.4153, -2.9882, -3.4045,
-4.6394, -3.3404, -2.6728, -3.3517, -2.6098, -3.7733, -4.051,
-2.9385, -4.5024, -4.59, -4.5617, -4.0658, -2.4986, -3.7559,
-4.245, -4.8045, -4.6615, -4.0696, -4.6638, -4.6505, -3.7978,
-4.5649, -5.7669, -4.519, -3.8561, -3.779, -3.0549, -3.1241,
-2.1423, -3.2759, -4.224, -4.028, -3.3412, -2.8832, -3.3866,
-0.1852, -3.3763, -4.317, -5.3607, -3.3398, -1.9087, -4.431,
-3.7535, -3.2545, -0.806, -3.1419, -3.7269, -3.4853, -4.3129,
-2.8891, -3.0572, -5.3309, -2.5837, -4.1128, -4.6631, -3.4695,
-4.1045), y = c(7.76, 0.72, 4.1, 1.36, 0.13, -0.02, 0.13, 0.42,
1.49, 2.64, 1.01, 0.08, 0.22, 1.01, 1.53, 4.39, 0.99, 0.56, 0.43,
2.31, 0.31, 0.59, 0.62, 1.65, 2.12, 0.1, 0.24, 1.68, 0.09, 0.59,
1.23, 0.4, 0.36, 0.49, 1.41, 3.29, 1.22, 0.56, 0.1, 0.67, 2.38,
0.43, 1.56, 0.07, 0.08, 1.53, -0.01, 0.12, 0.1, 0.04, 3.42, 0.23,
0, 0.34, 0.15, 0.03, 0.19, 0.17, 0.2, 0.09, 2.3, 0.07, 0.15,
0.18, 1.07, 1.21, 3.4, 0.8, -0.04, 0.02, 0.74, 1.59, 0.71, 10.64,
0.64, -0.01, 1.06, 0.81, 4.58, 0.01, 0.14, 0.59, 7.35, 0.63,
0.17, 0.38, -0.08, 1.1, 0.89, 0.94, 1.52, 0.01, 0.1, 0.38, 0.02
)), .Names = c("group", "point", "x", "y"), class = "data.frame", row.names = c(NA,
-95L))
Here's the plot my attempts to overlay a legend:
> attach(v1)
> plot(x,y, pch=16, col=group) #simple plot, automatic colors
> #first legend
> legend("topleft", legend=group, pch=16, col=group)
> # colors matched, but it's breaking out every point
> legend("topright", legend=levels(group), pch=16, col=group)
> # Corrected the number of levels in legend, but no colors
>
You can see that the first legend appears correct color-wise, but it shows an entry for every point and runs out of space. The second legend shows group as factor levels, which is what I want, but it doesn't change the colors.
I realize that I could color as a vector (e.g. col(c("black","red", etc.), but since the original plot command automatically assigned colors, I'm looking to do it "automatically" in my legend and avoid the risk of putting the wrong colors in my vector.
Thanks!
base R solution:
attach(v1)
plot(x,y, pch=16, col=group)
legend("topleft", legend=levels(group), pch=16, col=unique(group))
ggplot2 solution
ggplot(v1)+
geom_point(aes(x=x,y=y,colour=group))+
theme_bw()
Again, I would strongly suggest the use of ggplot2 over base R unless you're only exploring the data. There are plenty of questions/answers on the matter on SO.
Try creating a new column in v1 that is a number based on the value of group (as a factor). Pass this column as the col when plotting the points. Then create a vector of numbers for legend in the same way and pass that as the col for legend.
v1$cols = as.numeric(as.factor(v1$group))
legend.cols = as.numeric(as.factor(levels(v1$group)))
plot(v1$x , v1$y, pch=16, col=v1$cols)
legend("topright", legend=levels(group), pch=16, col=legend.cols)

JAGS code for estimating group means with Beta distribution

I'd like to estimate the means and sd's of percent canopy cover for 13 sites (9 are birds and 4 are potential habitats) using JAGS. I'm using a beta distribution to account for the fact that the data are bound by 0 and 1.
I have code for the model statement that works perfectly for other distributions (Poisson and log-normal) and I was attempting to adapt that code but I failed miserably.
Below are the R code, the model statement, and the data. I'm using R 3.1.1 in Windows Vista. If you could look at the model statement and straighten me out I would be very thankful.
Thanks,
Jeff
######## MODEL ##############
model{
for (i in 1:227) {
log(mean[i]) <- a[site[i]]
cover20p[i] ~ dbeta(1, 0.5)
}
for (i in 1:13){
a[i] ~ dnorm(0, tau)
median[i] <- exp(a[i])
}
sd ~ dunif(0, 10)
tau <- 1 / (sd*sd) # precision
}
######### R code ##########
frag <- read.csv("f:\\brazil\\TIandFRAG.csv", header=T)
library(R2jags)
library(rjags)
setwd("f://brazil")
site <- frag$site
cover20p <- frag$cover20p/100
N <- length(frag$site)
jags.data <- list("site", "cover20p")
jags.params <- c("median", "test100MF","test100MT","test100fc","test100fa",
"test100gv","test100hm","test100mc", "test100ca","test100ct", "test10MF",
"test10MT", "test10fc","test10fa", "test10gv", "test10hm", "test10mc", "test10ca", "test10ct",
"test1MF", "test1MT", "test1fc", "test1fa", "test1gv", "test1hm",
"test1mc", "test1ca", "test1ct", "t1est1_con","t2est10_con","t3est100_con",
"t4est1_100","t5est1_10","t6est10_100")
#inits1 <- list(a=0, sd=0)
#inits2 <- list(a=100, sd=50)
#jags.inits <- list(inits1, inits2)
jags.inits <- function() {
list(a=c(0,0,0,0,0,0,0,0,0,0,0,0,0), sd=1)}
jagsfit <- jags(data=jags.data, inits=jags.inits, jags.params,
n.iter=1000000, n.burnin=20000, model.file="fragmodelbeta.txt")
my.coda <- as.mcmc(jagsfit)
summary(my.coda, quantiles=c(0.05, 0.25,0.5,0.75, 0.95))
print(jagsfit, digits=3)
##### DATA ###################
structure(list(site = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 10L,
10L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L,
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L
), canopy = c(0.95, 0.8, 0.85, 0.9, 0.35, 0.999, 0.999, 0.999,
0.95, 0.55, 0.9, 0.85, 0.7, 0.65, 0.05, 0.6, 0.999, 0.999, 0.85,
0.9, 1e-04, 0.45, 0.999, 0.7, 0.95, 0.5, 0.95, 0.6, 0.65, 0.7,
0.4, 0.85, 0.6, 0.95, 0.75, 0.9, 0.85, 0.75, 0.7, 0.85, 0.3,
0.7, 0.8, 0.7, 0.75, 0.8, 0.75, 0.95, 0.9, 0.05, 0.85, 0.6, 0.65,
0.5, 0.85, 0.95, 0.85, 0.25, 0.75, 0.999, 0.65, 0.95, 0.8, 0.9,
0.6, 0.8, 0.999, 0.2, 0.8, 0.4, 0.999, 0.95, 0.4, 0.999, 0.999,
0.95, 0.45, 0.2, 0.7, 0.95, 0.7, 0.8, 0.5, 0.85, 0.55, 1e-04,
0.25, 0.45, 0.999, 0.95, 0.999, 0.9, 0.6, 0.35, 0.95, 0.3, 0.999,
0.999, 0.5, 0.4, 0.9, 0.999, 0.7, 0.999, 0.9, 0.999, 0.4, 0.55,
0.8, 0.7, 0.999, 1e-04, 0.8, 1e-04, 0.7, 0.5, 0.8, 0.75, 1e-04,
0.45, 0.1, 1e-04, 0.4, 0.55, 0.4, 0.999, 0.9, 0.9, 0.15, 0.55,
0.35, 0.9, 0.65, 0.25, 0.999, 0.85, 0.999, 0.95, 0.7, 0.5, 0.7,
0.2, 0.95, 0.999, 0.999, 0.25, 0.85, 0.5, 0.8, 0.75, 0.85, 0.7,
0.95, 0.05, 0.65, 0.65, 0.999, 0.999, 0.999, 0.65, 0.4, 0.6,
0.9, 0.85, 0.75, 0.5, 0.65, 0.999, 0.65, 0.55, 0.75, 0.4, 0.9,
0.35, 0.999, 0.999, 0.4, 0.5, 0.8, 0.95, 0.95, 0.55, 0.7, 0.85,
0.8, 0.8, 0.65, 0.999, 0.6, 0.5, 0.999, 0.8, 0.999, 0.45, 0.999,
0.999, 0.8, 0.85, 0.999, 0.999, 0.999, 0.999, 0.5, 0.6, 0.15,
0.75, 0.6, 0.1, 0.05, 1e-04, 0.999, 0.6, 0.1, 0.35, 0.9, 0.9,
0.95, 0.95, 0.9, 0.55, 0.65, 0.9, 0.4, 0.999, 0.65, 0.5, 0.8)), .Names = c("site",
"canopy"), class = "data.frame", row.names = c(NA, -227L))
In your model, you have cover20p as one of the variables, but have the data for canopy in the frag data.frame. I suspect you want canopy[i] ~ dbeta(1,0.5) in your model specification, and canopy <- frag$canopy and jags.params = "median" in your r code.
I think you could use a logit model for your probabilities. Maybe something like the following.
First, I convert your canopy observations back to the format that I suspect they began in, i.e. the number of canopy hits out of 20 samples at each site. I set 0.0001 to 0 and 0.999 to 1, and multiply the other canopy values by 20.
d$hits <- ifelse(d$canopy < 0.05, 0, ifelse(d$canopy > 0.95, 20, d$canopy * 20))
M <- function() {
for (i in 1:n) {
hits[i] ~ dbin(p[site[i]], 20)
}
for (j in 1:nsites) {
logit.p[j] ~ dnorm(mu, sigma^-2)
logit(p[j]) <- logit.p[j]
}
mu ~ dnorm(0, 0.0001) # uninformative prior for grand mean of logit(p)
sigma ~ dunif(0, 10) # uninformative prior for sd of logit(p)
}
j <- jags(list(site=d$site, hits=d$hits, n=nrow(d), nsites=length(unique(d$site))),
NULL, 'p', M)
plot(j$BUGSoutput$summary[-1, '50%'], pch=20, xlab='site', xaxt='n', las=1,
ylim=c(0, 1), ylab=expression("p (median" %+-% "95% credible interval)"))
segments(1:13, j$BUGSoutput$summary[-1, '2.5%'],
y1=j$BUGSoutput$summary[-1, '97.5%'])
axis(1, 1:13, 1:13)

how to use ggplot conditional on data

I asked this question and it seams ggplot2 currently has a bug with empty data.frames.
Therefore I am trying to check if the dataframe is empty, before I make the plot. But what ever I come up with, it gets really ugly, and doesn't work. So I am asking for your help.
example data:
SOdata <- structure(list(id = 10:55, one = c(7L, 8L, 7L, NA, 7L, 8L, 5L,
7L, 7L, 8L, NA, 10L, 8L, NA, NA, NA, NA, 6L, 5L, 6L, 8L, 4L,
7L, 6L, 9L, 7L, 5L, 6L, 7L, 6L, 5L, 8L, 8L, 7L, 7L, 6L, 6L, 8L,
6L, 8L, 8L, 7L, 7L, 5L, 5L, 8L), two = c(7L, NA, 8L, NA, 10L,
10L, 8L, 9L, 4L, 10L, NA, 10L, 9L, NA, NA, NA, NA, 7L, 8L, 9L,
10L, 9L, 8L, 8L, 8L, 8L, 8L, 9L, 10L, 8L, 8L, 8L, 10L, 9L, 10L,
8L, 9L, 10L, 8L, 8L, 7L, 10L, 8L, 9L, 7L, 9L), three = c(7L,
10L, 7L, NA, 10L, 10L, NA, 10L, NA, NA, NA, NA, 10L, NA, NA,
4L, NA, 7L, 7L, 4L, 10L, 10L, 7L, 4L, 7L, NA, 10L, 4L, 7L, 7L,
7L, 10L, 10L, 7L, 10L, 4L, 10L, 10L, 10L, 4L, 10L, 10L, 10L,
10L, 7L, 10L), four = c(7L, 10L, 4L, NA, 10L, 7L, NA, 7L, NA,
NA, NA, NA, 10L, NA, NA, 4L, NA, 10L, 10L, 7L, 10L, 10L, 7L,
7L, 7L, NA, 10L, 7L, 4L, 10L, 4L, 7L, 10L, 2L, 10L, 4L, 12L,
4L, 7L, 10L, 10L, 12L, 12L, 4L, 7L, 10L), five = c(7L, NA, 6L,
NA, 8L, 8L, 7L, NA, 9L, NA, NA, NA, 9L, NA, NA, NA, NA, 7L, 8L,
NA, NA, 7L, 7L, 4L, NA, NA, NA, NA, 5L, 6L, 5L, 7L, 7L, 6L, 9L,
NA, 10L, 7L, 8L, 5L, 7L, 10L, 7L, 4L, 5L, 10L), six = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("2010-05-25",
"2010-05-27", "2010-06-07"), class = "factor"), seven = c(0.777777777777778,
0.833333333333333, 0.333333333333333, 0.888888888888889, 0.5,
0.888888888888889, 0.777777777777778, 0.722222222222222, 0.277777777777778,
0.611111111111111, 0.722222222222222, 1, 0.888888888888889, 0.722222222222222,
0.555555555555556, NA, 0, 0.666666666666667, 0.666666666666667,
0.833333333333333, 0.833333333333333, 0.833333333333333, 0.833333333333333,
0.722222222222222, 0.833333333333333, 0.888888888888889, 0.666666666666667,
1, 0.777777777777778, 0.722222222222222, 0.5, 0.833333333333333,
0.722222222222222, 0.388888888888889, 0.722222222222222, 1, 0.611111111111111,
0.777777777777778, 0.722222222222222, 0.944444444444444, 0.555555555555556,
0.666666666666667, 0.722222222222222, 0.444444444444444, 0.333333333333333,
0.777777777777778), eight = c(0.666666666666667, 0.333333333333333,
0.833333333333333, 0.666666666666667, 1, 1, 0.833333333333333,
0.166666666666667, 0.833333333333333, 0.833333333333333, 1, 1,
0.666666666666667, 0.666666666666667, 0.333333333333333, 0.5,
0, 0.666666666666667, 0.5, 1, 0.666666666666667, 0.5, 0.666666666666667,
0.666666666666667, 0.666666666666667, 0.333333333333333, 0.333333333333333,
1, 0.666666666666667, 0.833333333333333, 0.666666666666667, 0.666666666666667,
0.5, 0, 0.833333333333333, 1, 0.666666666666667, 0.5, 0.666666666666667,
0.666666666666667, 0.5, 1, 0.833333333333333, 0.666666666666667,
0.833333333333333, 0.666666666666667), nine = c(0.307692307692308,
NA, 0.461538461538462, 0.538461538461538, 1, 0.769230769230769,
0.538461538461538, 0.692307692307692, 0, 0.153846153846154, 0.769230769230769,
NA, 0.461538461538462, NA, NA, NA, NA, 0, 0.615384615384615,
0.615384615384615, 0.769230769230769, 0.384615384615385, 0.846153846153846,
0.923076923076923, 0.615384615384615, 0.692307692307692, 0.0769230769230769,
0.846153846153846, 0.384615384615385, 0.384615384615385, 0.461538461538462,
0.384615384615385, 0.461538461538462, NA, 0.923076923076923,
0.692307692307692, 0.615384615384615, 0.615384615384615, 0.769230769230769,
0.0769230769230769, 0.230769230769231, 0.692307692307692, 0.769230769230769,
0.230769230769231, 0.769230769230769, 0.615384615384615), ten = c(0.875,
0.625, 0.375, 0.75, 0.75, 0.75, 0.625, 0.875, 1, 0.125, 1, NA,
0.625, 0.75, 0.75, 0.375, NA, 0.625, 0.5, 0.75, 0.875, 0.625,
0.875, 0.75, 0.625, 0.875, 0.5, 0.75, 0, 0.5, 0.875, 1, 0.75,
0.125, 0.5, 0.5, 0.5, 0.625, 0.375, 0.625, 0.625, 0.75, 0.875,
0.375, 0, 0.875), elleven = c(1, 0.8, 0.7, 0.9, 0, 1, 0.9, 0.5,
0, 0.8, 0.8, NA, 0.8, NA, NA, 0.8, NA, 0.4, 0.8, 0.5, 1, 0.4,
0.5, 0.9, 0.8, 1, 0.8, 0.5, 0.3, 0.9, 0.2, 1, 0.8, 0.1, 1, 0.8,
0.5, 0.2, 0.7, 0.8, 1, 0.9, 0.6, 0.8, 0.2, 1), twelve = c(0.666666666666667,
NA, 0.133333333333333, 1, 1, 0.8, 0.4, 0.733333333333333, NA,
0.933333333333333, NA, NA, 0.6, 0.533333333333333, NA, 0.533333333333333,
NA, 0, 0.6, 0.533333333333333, 0.733333333333333, 0.6, 0.733333333333333,
0.666666666666667, 0.533333333333333, 0.733333333333333, 0.466666666666667,
0.733333333333333, 1, 0.733333333333333, 0.666666666666667, 0.533333333333333,
NA, 0.533333333333333, 0.6, 0.866666666666667, 0.466666666666667,
0.533333333333333, 0.333333333333333, 0.6, 0.6, 0.866666666666667,
0.666666666666667, 0.6, 0.6, 0.533333333333333)), .Names = c("id",
"one", "two", "three", "four", "five", "six", "seven", "eight",
"nine", "ten", "elleven", "twelve"), class = "data.frame", row.names = c(NA,
-46L))
And the plot
iqr <- function(x, ...) {
qs <- quantile(as.numeric(x), c(0.25, 0.5, 0.75), na.rm = T)
names(qs) <- c("ymin", "y", "ymax")
qs
}
magic <- function(y, ...) {
high <- median(SOdata[[y]], na.rm=T)+1.5*sd(SOdata[[y]],na.rm=T)
low <- median(SOdata[[y]], na.rm=T)-1.5*sd(SOdata[[y]],na.rm=T)
ggplot(SOdata, aes_string(x="six", y=y))+
stat_summary(fun.data="iqr", geom="crossbar", fill="grey", alpha=0.3)+
geom_point(data = SOdata[SOdata[[y]] > high,], position=position_jitter(w=0.1, h=0),col="green", alpha=0.5)+
geom_point(data = SOdata[SOdata[[y]] < low,], position=position_jitter(w=0.1, h=0),col="red", alpha=0.5)+
stat_summary(fun.y=median, geom="point",shape=18 ,size=4, col="orange")
}
for (i in names(SOdata)[-c(1,7)]) {
p<- magic(i)
ggsave(paste("magig_plot_",i,".png",sep=""), plot=p, height=3.5, width=5.5)
}
The problem is that sometimes in the call to geom_point the subset returns an empty dataframe, which sometimes (!) causes ggplot2 to plot all the data instead of none of the data.
geom_point(data = SOdata[SOdata[[y]] > high,], position=position_jitter(w=0.1, h=0),col="green", alpha=0.5)+
This is kindda of important to me, and I am really stuck trying to find a solution. Any help that will get me started is much appreciated.
Thanks in advance.
I guess you could replace this
magic <- function(y, ...) {
high <- median(SOdata[[y]], na.rm=T)+1.5*sd(SOdata[[y]],na.rm=T)
low <- median(SOdata[[y]], na.rm=T)-1.5*sd(SOdata[[y]],na.rm=T)
ggplot(SOdata, aes_string(x="six", y=y))+
stat_summary(fun.data="iqr", geom="crossbar", fill="grey", alpha=0.3)+
geom_point(data = SOdata[SOdata[[y]] > high,], position=position_jitter(w=0.1, h=0),col="green", alpha=0.5)+
geom_point(data = SOdata[SOdata[[y]] < low,], position=position_jitter(w=0.1, h=0),col="red", alpha=0.5)+
stat_summary(fun.y=median, geom="point",shape=18 ,size=4, col="orange")
}
with something like
magic <- function(y, ...) {
high <- median(SOdata[[y]], na.rm=T)+1.5*sd(SOdata[[y]],na.rm=T)
low <- median(SOdata[[y]], na.rm=T)-1.5*sd(SOdata[[y]],na.rm=T)
k <- SOdata[[y]] > high
z <- SOdata[[y]] < low
k[is.na(k)]<- FALSE
z[is.na(z)]<- FALSE
p <- ggplot(SOdata, aes_string(x="six", y=y))+
stat_summary(fun.data="iqr", geom="crossbar", fill="grey", alpha=0.3)
if (sum(k) > 0) {p <- p + geom_point(data = SOdata[k,], position=position_jitter(w=0.1, h=0),col="green", alpha=0.5)}
if (sum(z) > 0) {p <- p + geom_point(data = SOdata[z,], position=position_jitter(w=0.1, h=0),col="red", alpha=0.5)}
p + stat_summary(fun.y=median, geom="point",shape=18 ,size=4, col="orange")
}

Resources