R plot color legend by factor - r

Using R 3.3.1 in Windows 10. I'm making an x-y plot from 95 rows of data. The data are in 6 different groupings (a factor called "group"). The plot itself is easy enough, but I can't get the legend to properly account for the factor and color correctly.
Here's the data in a variable v1:
v1 <- structure(list(group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), .Label = c("F9", "T26", "W37",
"W40", "W41", "W42"), class = "factor"), point = c(1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L), x = c(-7.064, -5.1681,
-6.4866, -2.7522, -4.6305, -4.2957, -3.7552, -4.9482, -5.6452,
-6.0302, -5.3244, -3.9819, -3.8123, -5.3085, -5.6096, -6.4557,
-5.2549, -3.4893, -3.5909, -2.5546, -3.7247, -5.1733, -3.3451,
-2.8993, -2.6835, -3.9495, -4.9649, -2.8438, -4.6926, -3.4768,
-3.1221, -4.8175, -4.5641, -3.549, -3.08, -2.4153, -2.9882, -3.4045,
-4.6394, -3.3404, -2.6728, -3.3517, -2.6098, -3.7733, -4.051,
-2.9385, -4.5024, -4.59, -4.5617, -4.0658, -2.4986, -3.7559,
-4.245, -4.8045, -4.6615, -4.0696, -4.6638, -4.6505, -3.7978,
-4.5649, -5.7669, -4.519, -3.8561, -3.779, -3.0549, -3.1241,
-2.1423, -3.2759, -4.224, -4.028, -3.3412, -2.8832, -3.3866,
-0.1852, -3.3763, -4.317, -5.3607, -3.3398, -1.9087, -4.431,
-3.7535, -3.2545, -0.806, -3.1419, -3.7269, -3.4853, -4.3129,
-2.8891, -3.0572, -5.3309, -2.5837, -4.1128, -4.6631, -3.4695,
-4.1045), y = c(7.76, 0.72, 4.1, 1.36, 0.13, -0.02, 0.13, 0.42,
1.49, 2.64, 1.01, 0.08, 0.22, 1.01, 1.53, 4.39, 0.99, 0.56, 0.43,
2.31, 0.31, 0.59, 0.62, 1.65, 2.12, 0.1, 0.24, 1.68, 0.09, 0.59,
1.23, 0.4, 0.36, 0.49, 1.41, 3.29, 1.22, 0.56, 0.1, 0.67, 2.38,
0.43, 1.56, 0.07, 0.08, 1.53, -0.01, 0.12, 0.1, 0.04, 3.42, 0.23,
0, 0.34, 0.15, 0.03, 0.19, 0.17, 0.2, 0.09, 2.3, 0.07, 0.15,
0.18, 1.07, 1.21, 3.4, 0.8, -0.04, 0.02, 0.74, 1.59, 0.71, 10.64,
0.64, -0.01, 1.06, 0.81, 4.58, 0.01, 0.14, 0.59, 7.35, 0.63,
0.17, 0.38, -0.08, 1.1, 0.89, 0.94, 1.52, 0.01, 0.1, 0.38, 0.02
)), .Names = c("group", "point", "x", "y"), class = "data.frame", row.names = c(NA,
-95L))
Here's the plot my attempts to overlay a legend:
> attach(v1)
> plot(x,y, pch=16, col=group) #simple plot, automatic colors
> #first legend
> legend("topleft", legend=group, pch=16, col=group)
> # colors matched, but it's breaking out every point
> legend("topright", legend=levels(group), pch=16, col=group)
> # Corrected the number of levels in legend, but no colors
>
You can see that the first legend appears correct color-wise, but it shows an entry for every point and runs out of space. The second legend shows group as factor levels, which is what I want, but it doesn't change the colors.
I realize that I could color as a vector (e.g. col(c("black","red", etc.), but since the original plot command automatically assigned colors, I'm looking to do it "automatically" in my legend and avoid the risk of putting the wrong colors in my vector.
Thanks!

base R solution:
attach(v1)
plot(x,y, pch=16, col=group)
legend("topleft", legend=levels(group), pch=16, col=unique(group))
ggplot2 solution
ggplot(v1)+
geom_point(aes(x=x,y=y,colour=group))+
theme_bw()
Again, I would strongly suggest the use of ggplot2 over base R unless you're only exploring the data. There are plenty of questions/answers on the matter on SO.

Try creating a new column in v1 that is a number based on the value of group (as a factor). Pass this column as the col when plotting the points. Then create a vector of numbers for legend in the same way and pass that as the col for legend.
v1$cols = as.numeric(as.factor(v1$group))
legend.cols = as.numeric(as.factor(levels(v1$group)))
plot(v1$x , v1$y, pch=16, col=v1$cols)
legend("topright", legend=levels(group), pch=16, col=legend.cols)

Related

ggplot2 - customize two-factor legend

I am using ggplot2 to plot monthly vertical profiles of soil moisture in two sites, for both observed and modeled data.
I am using interaction to add colours to both factors (month and type). I am also creating two different manual color palettes with the colors I need. This is how to to reproduce the plot:
library(ggplot2)
df1<- structure(list(site = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L), .Label = c("IL_Shabbona_5_NNE", "ME_Limestone_4_NNW",
"ME_Old_Town_2_W", "MI_Chatham_1_SE", "MI_Gaylord_9_SSW", "MN_Goodridge_12_NNW",
"MN_Sandstone_6_W", "NY_Ithaca_13_E", "NY_Millbrook_3_W", "WI_Necedah_5_WNW"
), class = "factor"), month = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L), depth = c(5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 50, 50, 50,
50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
50, 50, 50, 50, 50, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100), value = c(0.38,
0.4, 0.37, 0.32, 0.29, 0.3, 0.24, 0.28, 0.24, 0.26, 0.32, 0.39,
0.13, NaN, 0.13, 0.12, 0.1, 0.1, 0.06, 0.07, 0.09, 0.1, 0.12,
0.13, 0.39, 0.39, 0.37, 0.35, 0.33, 0.31, 0.27, 0.29, 0.27, 0.28,
0.34, 0.38, 0.1, NaN, 0.12, 0.11, 0.09, 0.09, 0.05, 0.06, 0.09,
0.09, 0.11, 0.11, 0.39, 0.41, 0.38, 0.35, 0.34, 0.32, 0.29, 0.33,
0.31, 0.3, 0.34, 0.36, 0.1, NaN, 0.1, 0.1, 0.09, 0.08, 0.05,
0.05, 0.08, 0.08, 0.1, 0.1, 0.32, 0.31, 0.33, 0.34, 0.36, 0.34,
0.29, 0.33, 0.32, 0.31, 0.32, 0.33, 0.06, 0.06, 0.07, 0.06, 0.06,
0.05, 0.03, 0.03, 0.04, 0.05, 0.06, 0.06, 0.4, 0.4, 0.41, 0.41,
0.45, 0.47, 0.43, 0.4, 0.39, 0.38, 0.38, 0.4, 0.05, 0.05, 0.05,
0.06, 0.05, 0.05, 0.04, 0.04, 0.05, 0.05, 0.06, 0.05, 0.35, 0.35,
0.36, 0.33, 0.29, 0.28, 0.27, 0.26, 0.26, 0.28, 0.3, 0.36, 0.35,
0.35, 0.36, 0.33, 0.29, 0.28, 0.27, 0.27, 0.27, 0.28, 0.3, 0.35,
0.34, 0.35, 0.35, 0.34, 0.3, 0.29, 0.28, 0.28, 0.28, 0.29, 0.3,
0.34, 0.28, 0.29, 0.3, 0.32, 0.31, 0.3, 0.29, 0.29, 0.29, 0.3,
0.3, 0.29, 0.26, 0.27, 0.27, 0.29, 0.29, 0.29, 0.28, 0.28, 0.28,
0.29, 0.29, 0.28, 0.38, 0.38, 0.39, 0.38, 0.31, 0.3, 0.29, 0.29,
0.3, 0.31, 0.35, 0.39, 0.36, 0.36, 0.37, 0.37, 0.31, 0.31, 0.29,
0.3, 0.3, 0.31, 0.33, 0.37, 0.37, 0.37, 0.37, 0.38, 0.32, 0.32,
0.31, 0.31, 0.31, 0.32, 0.33, 0.37, 0.31, 0.32, 0.32, 0.34, 0.33,
0.32, 0.31, 0.31, 0.32, 0.32, 0.31, 0.3, 0.27, 0.28, 0.28, 0.29,
0.31, 0.3, 0.3, 0.29, 0.3, 0.3, 0.3, 0.28), type = rep(c("observed","modeled"), each=120)), class = "data.frame", row.names = c(NA,
-240L))
# Create blue and red palettes
mypal.blue <- colorRampPalette(RColorBrewer::brewer.pal(6,"PuBu"))
mypal.red <- colorRampPalette(RColorBrewer::brewer.pal(6,"YlOrRd"))
# Plot
ggplot(df1, aes(x=value, y=-depth, colour=interaction(as.factor(month),type))) +
geom_path(size=1) + geom_point(size=0.7) +
facet_wrap(~ site, nrow=3) +
theme_bw(base_size=20) +
scale_colour_manual(values=c(mypal.blue(12),mypal.red(12))) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(legend.title=element_blank()) + theme(legend.position = c(0.75, 0.13))
However, the legend is a complete mess.
I would like to create two separate legends, loosely based on this example.
one showing orange for observed and blue for modeled
the other one showing the actual color gradients and the months (ideally with the first letter instead of numbers)
How to create such legends?
Updated Answer
It just hit me that there is a relatively straightforward way to hack the legend to get pretty close to what you want. We relabel the legend labels and add a title. The hacky part is that you have to fiddle with the legend title spacing, legend key width, and text size to get the titles lined up over the legend keys.
With all those lines and colors and the complicated legend, the plot seems very busy and difficult to interpret beyond showing that the model doesn't fit the data very well, so maybe it would still be better to consider one of the other options in my or #neilfws's answer. In addition, because the legend title is manually hardcoded, it's not linked to the aesthetic mapping and you therefore have to be careful that "Modeled" and "Observed" are in the right order above the legend keys.
ggplot(df1, aes(x=value, y=-depth, colour=interaction(as.factor(month),type))) +
geom_path(size=1) + geom_point(size=0.7) +
facet_wrap(~ site, nrow=3) +
theme_bw(base_size=20) +
scale_colour_manual(values=c(mypal.blue(12),mypal.red(12)),
labels=rep(month.abb, 2)) +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.title=element_text(size=rel(0.6)),
legend.text=element_text(size=rel(0.5)),
legend.key.width=unit(1.1,"cm")) +
labs(colour="Modeled Observed")
Original Answer
AFAIK, there's no way to generate two separate legends for a single aesthetic within the normal ggplot workflow. In this case, that means you can have only a single color legend. Probably you could hack two different color legends by manipulating the underlying ggplot grob structure.
Another option would be to use two different aesthetics. The example below uses linetype to distinguish modeled and observed, but it doesn't provide as much constrast as the two different color sets.
library(tidyverse)
ggplot(df1 %>%
mutate(month=factor(month.abb[month], levels=month.abb)),
aes(x=value, y=-depth, linetype=type, colour=month)) +
geom_path(size=1) + geom_point(size=0.7) +
facet_wrap(~ site, nrow=3) +
theme_bw(base_size=20) +
scale_colour_manual(values=mypal.red(12)) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(legend.title=element_blank())
For reference, here's what your original code produces (minus the change in legend position):
Another option would be to facet by month in addition to type. This takes up more space, but makes it easier to see both the month trend and the difference between modeled and observed.
ggplot(df1 %>%
mutate(month=factor(month.abb[month], levels=month.abb)),
aes(x=value, y=-depth, colour=type)) +
geom_path(size=1) + geom_point(size=0.7) +
facet_grid(month ~ site) +
theme_classic() +
theme(panel.background=element_rect(colour="grey50", fill=NA))
Looking at your data, it seems to me that what you want to visualize can be expressed something like this:
"How do observed values compare to modelled values at different depths, for each site, through time?"
So I would approach the chart differently: plot value versus month, color by type and use facets for site and depth.
library(tidyverse)
df1 %>%
mutate(Month = factor(month.abb[month],
levels = month.abb)) %>%
ggplot(aes(Month, value)) +
geom_point(aes(color = type)) +
facet_grid(depth~site) +
theme_bw()
It's now immediately apparent that the modeled values for site IL_Shabbona_5_NNE are closer to the observed, and more so at shallower depth.

How to differentiate Bars in geom_bar without color: ggplot

Note: A similar question is present at link, but I posed it a separate question due to: 1) only a hack is provided to the previos question which I thought would make code unnecessary complex 2) I thought after 2013 a fix might have been suggested for this
I am using following code to draw bars/stacks
ggplot(finaldataframe,aes(day,score))+
geom_bar(aes(fill=identify),stat="identity",position = "dodge",width = .7, show.legend = TRUE)+
labs(x= "Day of the Month", y="Anomaly Score") +
scale_fill_discrete(name="Method", labels=c("Mean","Maximum","Cumulative \n sum"))+
theme(axis.text= element_text(color="Black"))+ scale_x_continuous(breaks=seq(1,31,5))
A portion of output is as
The problem with this figure is that once I print this via black and white printer It gets hard to differentiate between different stacks. Is there any way to make the stacks differentiable on a black and white print. I am looking for some what like this:
For reproduction, Here is the dput of dataframe:
> dput(finaldataframe)
finaldataframe = structure(list(day = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L,
24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L,
20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 30L, 31L), score = c(0, 0.02, 0.01, 0, 0.02, 0.01, 0.01,
0.02, 0.02, 0.28, 0.24, 0.01, 0.94, 0.22, 0.25, 0.01, 0.31, 0.22,
0.24, 0.83, 0.4, 0.44, 0.06, 0.02, 0.37, 0.07, 0.12, 0.06, 0.1,
0.06, 0.1, 0, 0.05, 0.04, 0.02, 0.05, 0.01, 0.02, 0.03, 0.04,
0.37, 0.36, 0.04, 1, 0.28, 0.34, 0.03, 0.55, 0.35, 0.32, 1, 0.71,
1, 0.13, 0.04, 0.47, 0.12, 0.17, 0.1, 0.18, 0.1, 0.14, 0, 0.02,
0.01, 0, 0.02, 0.01, 0.01, 0.02, 0.02, 0.3, 0.25, 0.01, 1, 0.23,
0.27, 0, 0.33, 0.24, 0.26, 0.89, 0.42, 0.47, 0.06, 0.02, 0.4,
0.07, 0.13, 0.06, 0.11, 0.06, 0.1), identify = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("Mean",
"Maximum", "Cummulative Sum"), class = "factor")), .Names = c("day",
"score", "identify"), row.names = c(NA, 93L), class = "data.frame")

Boxplot with multiple x variables

I'm new to R and having a few issues with using ggplot2.
This is an example of my data (subset of larger data set) :
df <-
structure(list(logpvalue = c(22.36, 6.93, 16.78, 1.78, 17.75,
20.99, 21.03, 9.19, 15.01, 22.25, 13.4, 6.47, 1.34, 13.4, 3.21,
0.37, 0.5, 0.12, 1.8, 0.71, 1.15, 6.73, 0.12, 6.97, 0.64, 9.85,
1.45, 1.67, 2.6, 1.8, 1.35, 4.69, 0.37, 1.91, 0.31, 0, 2.45,
1.68, 2.31, 1.35, 6.48, 4.68), SNP = structure(c(1L, 7L, 6L,
5L, 11L, 1L, 9L, 5L, 8L, 11L, 7L, 5L, 8L, 11L, 1L, 7L, 1L, 4L,
2L, 3L, 10L, 7L, 1L, 4L, 2L, 3L, 10L, 4L, 2L, 3L, 10L, 4L, 2L,
3L, 10L, 4L, 2L, 3L, 7L, 9L, 5L, 1L), .Label = c("rs10244", "rs10891244",
"rs10891245", "rs11213821", "rs12296076", "rs138567267", "rs45615536",
"rs6589218", "rs7103178", "rs7127721", "rs7944895"), class = "factor"),
X173 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L), .Label = c("het", "hom"), class = "factor")), .Names = c("logpvalue",
"SNP", "X173"), class = "data.frame", row.names = c(NA, -42L))
I want to plot a boxplot of logpvalue on y axis, with SNP on the x-axis but with each SNP also categorized by whether the patient is het or hom for X173. So from this data I'd imagine 4 boxes on my boxplot.
If possible I'd also like to incorporate the individual data points (dotplot-boxplot overlay) with jitter.
This is the usual code I'd use for a boxplot of logpavlue vs SNP:
qplot(logpvalue, SNP, data = mydata, geom="boxplot")
+ geom_jitter(position=position_jitter(w=0.1, h=0.1)) + theme_bw()
How do I add the extra x variable into this code?
Try this:
boxplot(df$logpvalue~paste(df$SNP,df$X173))
Or using ggolot2 :
library(ggplot2)
ggplot(data=df,aes(SNP,logpvalue,colour=SNP)) +
geom_boxplot() +
geom_jitter() +
facet_grid(.~X173)

Two legends for polar ggplot (with one customized)

Here is my data:
data <- structure(list(Indicator = structure(c(1L, 1L, 1L, 2L, 2L, 2L,
3L, 3L, 3L, 6L, 6L, 6L, 6L, 4L, 4L, 5L, 5L, 1L, 1L, 1L, 2L, 2L,
2L, 3L, 3L, 3L, 6L, 6L, 6L, 6L, 4L, 4L, 5L, 5L, 1L, 1L, 1L, 2L,
2L, 2L, 3L, 3L, 3L, 6L, 6L, 6L, 6L, 4L, 4L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 6L, 6L, 6L, 6L, 4L, 4L, 5L, 5L, 1L, 1L,
1L, 2L, 2L, 2L, 3L, 3L, 3L, 6L, 6L, 6L, 6L, 4L, 4L, 5L, 5L), .Label = c("Causality",
"Climatechangeriskperceptions", "Currentadaptationoptions", "Fishingasalivelihoodactivity",
"Governance", "Roleofshadowstateactors"), class = "factor"),
Village = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L), .Label = c("Andra", "lahapau", "Pelipowai", "Ponam",
"Tulu"), class = "factor"), Variables = structure(c(13L,
3L, 10L, 11L, 12L, 16L, 5L, 8L, 1L, 2L, 15L, 17L, 6L, 14L,
9L, 4L, 7L, 13L, 3L, 10L, 11L, 12L, 16L, 5L, 8L, 1L, 2L,
15L, 17L, 6L, 14L, 9L, 4L, 7L, 13L, 3L, 10L, 11L, 12L, 16L,
5L, 8L, 1L, 2L, 15L, 17L, 6L, 14L, 9L, 4L, 7L, 13L, 3L, 10L,
11L, 12L, 16L, 5L, 8L, 1L, 2L, 15L, 17L, 6L, 14L, 9L, 4L,
7L, 13L, 3L, 10L, 11L, 12L, 16L, 5L, 8L, 1L, 2L, 15L, 17L,
6L, 14L, 9L, 4L, 7L), .Label = c("alternativelivelihood",
"anyactorsinvolvedinsustainability", "Attributionfactors",
"discusswithelectedleaders", "Effortsdirectedtoreducerisks",
"fishercommunityinfluence", "Infrastructureeffectiveness",
"multiplicityofactors", "Occupationforchildren", "Reversibility",
"Riskasamajorconsideration", "Riskbeingaddressed", "Statusoffisheries",
"Timefishing", "Whatwasdone", "Whoisatrisk", "whowasinvolved?"
), class = "factor"), legend.var = structure(c(1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L), .Label = c("a", "b", "c", "d", "e", "f", "g",
"h", "i", "j", "k", "l", "m", "n", "o", "p", "q"), class = "factor"),
score = c(1, 0.97, 1, 0.76, 0.794, 1, 0.71, 0.9, 0.5, 1,
1, 1, 1, 0.49, 0.72, 1, 0.7, 1, 1, 0, 0.67, 0.5, 1, 0.2,
1, 1, 0.7, 0.4, 0.5, 0.3, 0.67, 0.5, 0.7, 0.8, 1, 0, 0.46,
0.56, 0.375, 1, 0.13, 0.3, 0.5, 0.3, 0.3, 0.4, 0.6, 1, 1,
0.7, 0.8, 1, 0.86, 0.69, 0.51, 0.429, 1, 0.44, 0.3, 0.5,
0.6, 0.6, 0.7, 0.8, 0.4, 0.79, 0.8, 1, 1, 0.82, 0.85, 0.25,
0.226, 1, 0.18, 0.1, 1, 0.7, 0.3, 0.6, 0.3, 0.48, 0.16, 0.4,
0.8)), .Names = c("Indicator", "Village", "Variables", "legend.var",
"score"), class = "data.frame", row.names = c(NA, -85L))
I have made polar plots as follows:
library(ggplot2)
ggplot(data, aes(x = legend.var, y = score, fill = Indicator), color='black') +
geom_bar(width = 1, alpha=0.5, stat="identity") +
scale_y_continuous() +
coord_polar() +
theme( axis.ticks = element_blank()) +
facet_wrap(~Village, nrow=2, ncol=3) +
guides(colour = guide_legend(title.hjust = 0.5))
As you can see in the data, column legend.var is grouped by the column indicator. I would like to insert a table or a second legend which associates each of the indicator with the legend.var and variables column. Ideally if this is inserted as a second legend, the unique items comprised in legend.var column would have the same fill color as the corresponding indicator. The fill legend is based on the column indicator. The inserted table/extra legend would comprise columns legend.var with a unique alphabet and column variables with the meaning of the respective alphabet. These can then have the same fill color as the corresponding indicator. I hope this is clear.
Here is a solution using a gtable:
library(ggplot2)
p <- ggplot(data, aes(x = legend.var, y = score, fill = Indicator), color='black') +
geom_bar(width = 1, alpha=0.5, stat="identity") +
scale_y_continuous() +
coord_polar() +
theme( axis.ticks = element_blank()) +
facet_wrap(~Village, nrow=2, ncol=3) +
guides(colour = guide_legend(title.hjust = 0.5)) +
theme(legend.position=c(0.85,0.25))
#create table
library(gridExtra)
tab <- tableGrob(unique(data[, c("legend.var", "Variables")]),
show.rownames=FALSE, gpar.coretext=gpar(fontsize=10),
gpar.coltext=gpar(fontsize=10, fontface='bold'),
gpar.corefill = gpar(fill = "grey90", col = "white"),
gpar.colfill = gpar(fill = "grey80", col = "white"))
#arrange grobs
library(gtable)
a <- gtable(unit(c(0.7, 0.3) ,c("npc")), unit(1, "npc"))
a <- gtable_add_grob(a, ggplotGrob(p),1,1)
a <- gtable_add_grob(a, tab,1,2)
#plot
grid.draw(a)
For a start, you may try something like this. You need to adjust arrangement and layout according to your own preferences.
library(ggplot2)
library(gridExtra)
gg <- ggplot(data, aes(x = legend.var, y = score, fill = Indicator), color='black') +
geom_bar(width = 1, alpha=0.5, stat="identity") +
coord_polar() +
theme(axis.ticks = element_blank()) +
facet_wrap(~Village, nrow=2, ncol = 3)
# create a table that translates legend.var to Variables
tbl <- unique(data[ , c("legend.var", "Variables")])
# create a table grob
tt <- tableGrob(d = tbl,
col.just = "left",
gpar.coretext = gpar(col = "black", cex = 0.5),
gpar.coltext = gpar(col = "black", cex = 0.5, fontface = "bold"),
gpar.rowtext = gpar(col = "black", cex = 0.5, fontface = "italic"))
# arrange plot and table grob
grid.arrange(gg, tt, ncol = 2)
update with a quick and dirty ggplot-only alternative
# create labels
labs <- with(tbl, paste(legend.var, Variables))
gg <- ggplot(data, aes(x = legend.var, y = score, fill = Indicator, col = Variables)) +
geom_bar(width = 1, alpha = 0.5, stat = "identity") +
coord_polar() +
theme(axis.ticks = element_blank()) +
facet_wrap(~ Village, nrow = 2, ncol = 3) +
scale_color_grey(labels = labs, start = 0.8, end = 0.9)
gg

Changing the order of plotting levels in Latitice

I am trying to get a boxplot with a specific order of the levels that are being plotted.
Using the following data and code I generate the boxplot, but the order in which I need this is 6,12,15,18.
I have tried a number of thing using the with() function but can't make it work.
library(lattice)
rate<-structure(list(Temp = c(6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L,
15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L,
15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L,
15L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L,
18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L,
18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L,
18L, 18L), Rep = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L), Ind = structure(c(1L, 1L, 1L, 1L, 5L, 5L,
5L, 5L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 6L, 6L,
6L, 6L, 5L, 5L, 5L, 5L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 5L, 5L, 5L, 5L, 4L, 4L, 4L, 4L, 3L, 3L,
3L, 3L, 2L, 2L, 2L, 2L, 6L, 6L, 6L, 6L, 5L, 5L, 5L, 5L, 4L, 4L,
4L, 4L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 5L, 5L,
5L, 5L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 6L, 6L,
6L, 6L, 5L, 5L, 5L, 5L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 5L, 5L, 5L, 5L, 4L, 4L, 4L, 4L, 3L, 3L,
3L, 3L, 2L, 2L, 2L, 2L, 6L, 6L, 6L, 6L, 5L, 5L, 5L, 5L, 4L, 4L,
4L, 4L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L), .Label = c("B", "MBCT",
"MBT", "MSCT", "MST", "S"), class = "factor"), Week = c(1L, 2L,
6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L,
6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L,
6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L,
6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L,
6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L,
6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L,
6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L,
6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L,
6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L,
6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L), Weight = c(1.756,
1.756, 1.756, 1.756, 0.92, 0.92, 0.92, 0.92, 1.201, 1.201, 1.201,
1.201, 2.601, 2.601, 2.601, 2.601, 2.057, 2.057, 2.057, 2.057,
0.784, 0.784, 0.784, 0.784, 0.663, 0.663, 0.663, 0.663, 1.272,
1.272, 1.272, 1.272, 3.389, 3.389, 3.389, 3.389, 1.433, 1.433,
1.433, 1.433, 3.822, 3.822, 3.822, 3.822, 1.55, 1.55, 1.55, 1.55,
1.198, 1.198, 1.198, 1.198, 1.029, 1.029, 1.029, 1.029, 1.113,
1.113, 1.113, 1.113, 0.261, 0.261, 0.261, 0.261, 0.639, 0.639,
0.639, 0.639, 0.749, 0.749, 0.749, 0.749, 1.083, 1.083, 1.083,
1.083, 1.429, 1.429, 1.429, 1.429, 3.083, 3.083, 3.083, 3.083,
1.061, 1.061, 1.061, 1.061, 1.154, 1.154, 1.154, 1.154, 1.691,
1.691, 1.691, 1.691, 1.185, 1.185, 1.185, 1.185, 0.552, 0.552,
0.552, 0.552, 1.507, 1.507, 1.507, 1.507, 1.175, 1.175, 1.175,
1.175, 1.773, 1.773, 1.773, 1.773, 1.712, 1.712, 1.712, 1.712,
3.784, 3.784, 3.784, 3.784, 0.715, 0.715, 0.715, 0.715, 1.271,
1.271, 1.271, 1.271, 0.788, 0.788, 0.788, 0.788, 1.72, 1.72,
1.72, 1.72, 0.571, 0.571, 0.571, 0.571, 1, 1, 1, 1, 1.037, 1.037,
1.037, 1.037, 1.656, 1.656, 1.656, 1.656, 2.083, 2.083, 2.083,
2.083), Rate = c(0.387, 0.116, -0.141, 0.184, 0.785, 0.151, -0.69,
0.16, 0.477, 0.368, -0.544, 0.49, 0.152, 0.183, -0.137, 0.259,
0.239, 0.292, 0.018, 0.411, 0.322, 0.073, -0.148, 0.287, 0.214,
0.21, -0.579, 0.419, 0.23, 0.271, 0.685, 0.426, 0.248, 0.125,
0.053, 0.176, 0.465, 0.107, 0.02, 0.339, 0.261, 0.327, 0.279,
0.424, 0.308, 0.223, 0.287, 0.383, 0.306, 0.24, 0.258, 0.253,
0.437, 0.315, 0.275, 0.481, 0.372, 0.306, 0.267, 0.449, 0.727,
0.441, 0.624, 1.262, 0.334, 0.447, 0.548, 0.654, 0.846, 0.661,
0.66, 0.734, 0.191, 0.316, 0.551, 0.581, 0.332, 0.403, 0.509,
0.603, 0.411, 0.683, 0.427, 0.516, 0.498, 0.674, 0.371, 0.326,
0.288, 0.435, 0.297, 0.435, 0.165, 0.387, 0.212, 0.345, 0.334,
0.664, 0.526, 0.338, 0.094, 0.066, 0.39, 0.525, 0.215, 0.431,
0.151, 0.361, 0.153, 0.297, 0.127, 0.339, 0.292, 0.434, 0.411,
0.442, 0.25, 0.607, 0.369, 0.567, 0.189, 0.39, 0.372, 0.333,
0.339, 0.327, 0.449, 0.224, 0.086, 0.242, 0.465, 0.374, -0.063,
-0.006, 0.364, 0.308, 0.069, 0.223, 0.397, 0.264, 0.478, 0.345,
0.582, 0.36, 0.426, 0.403, 0.583, 0.544, 0.57, 0.567, 0.388,
0.531, 0.111, 0.125, 0.366, 0.266, 0.26, 0.315, 0.387, 0.549)), .Names = c("Temp",
"Rep", "Ind", "Week", "Weight", "Rate"), class = "data.frame", row.names = c(NA,
-160L))
rate$Temp <- as.character(rate$Temp)
rate$Week <- as.character(rate$Week)
rate$Rep <- as.character(rate$Rep)
rate$Weight<- as.character(rate$Weight)
bwplot(Rate~Temp, rate,
main="Boxplot for data over all weeks by temperature"
)
This can be tackled in the same manner as your question from a month ago. You need to set the order of levels of a factor. I would generally advise you work with factors, unless you have a really good reason to use characters.
rate$Temp <- as.factor(rate$Temp)
levels(rate$Temp) <- c("6", "12", "15", "18")

Resources