Creating a facet grid with two different set labels - r

I am hoping to create a facet grid that has two different set labels.
Below is the graph I have created:
I would like to change the y axis scales to go from 1 to 5 for the top grid and -3 to +3 for the bottom grid.
Here is my current code:
# make graph
ggplot(data = S2, mapping = aes(y = Rating, x = week, group= Question, colour= Question))+
geom_point()+
geom_line()+
scale_colour_manual(values=c(mycols))+
facet_grid(measureType ~., scales= "free")+
geom_smooth(method = 'loess', colour= "black", aes(group=1), se= FALSE)+
theme(legend.position="bottom",
axis.text = element_text(family= "sans", face= "bold", colour = "black"),
axis.title.x = element_text(family= "sans", face= "bold", colour = "black"),
axis.title.y = element_text(family= "sans", face= "bold", colour = "black"),
strip.text.x = element_text(family= "sans", face= "bold", colour = "black"),
strip.text.y = element_text(family= "sans", face= "bold", colour = "black"),
strip.background = element_rect(colour="#9C964A", fill="#FAD77B"),
panel.border = element_rect(color = "#9C964A", fill = NA, size = 1),
axis.line = element_line(colour = "#9C964A", size=1)) +
labs (x= "Date", y="Ratings by Question: 838607")
Thank you in advance!!
***** EDIT******
This is not a duplicate of the other question, because I don't want my scales to be "free". I want them to be explicitly set as 1 to 5 and +3 to -3. What I am having trouble with is how to make them different but set.
Please find my attached data.
> dput (S2)
structure(list(StudentFactor = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "838607", class = "factor"),
Question = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L), .Label = c("Q", "Q10", "Q11_1", "Q11_2", "Q11_3", "Q11_4",
"Q11_5", "Q12", "Q2", "Q8"), class = "factor"), Type = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L), .Label = c("FYS", "SNR"), class = "factor"),
measureType = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Rating",
"Delta"), class = "factor"), week = structure(c(1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 11L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 11L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 11L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 11L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 11L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L), .Label = c("Jan11", "Jan25",
"Feb1", "Feb8", "Feb15", "Mar1", "Mar8", "Mar15", "Mar22",
"Mar29", "Apr5"), class = "factor"), Rating = c(5, 5, 5,
5, 5, 5, 5, 5, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, -1, 5, 5, 5,
5, 5, 4, 4, 5, 4, 3, 0, 0, 0, 0, 0, -1, 0, 1, -1, 5, 5, 5,
5, 5, 5, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, -1, 0, 0, 5, 5, 5,
5, 5, 4, 4, 4, 3, 4, 0, 0, 0, 0, 0, -1, 0, 0, -1, 5, 5, 5,
5, 5, 4, 4, 4, 4, 3, 0, 0, 0, 0, 0, -1, 0, 0, 0)), class = "data.frame", row.names = c(NA,
-95L), .Names = c("StudentFactor", "Question", "Type", "measureType",
"week", "Rating"))

Related

Summarise proportions of treatment response by two subgroups

I generated the following random data which has the same structure as my real data:
data <- structure(list(gender = structure(c(1L, 2L, 1L, 2L, 2L, 1L, 1L,
1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L,
2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L,
1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L), .Label = c("Female",
"Male"), class = "factor"), treatment_response = structure(c(2L,
2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L,
1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L,
2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L,
1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L,
1L, 2L, 1L), .Label = c("Non-responder", "Responder"), class = "factor"),
country = structure(c(1L, 3L, 9L, 7L, 2L, 2L, 6L, 6L, 4L,
9L, 5L, 7L, 3L, 2L, 8L, 8L, 10L, 3L, 1L, 1L, 9L, 2L, 5L,
5L, 2L, 7L, 5L, 1L, 6L, 10L, 7L, 10L, 2L, 10L, 1L, 4L, 6L,
8L, 9L, 4L, 3L, 6L, 1L, 8L, 7L, 3L, 2L, 10L, 7L, 6L, 1L,
9L, 8L, 4L, 8L, 7L, 3L, 5L, 3L, 4L, 7L, 4L, 8L, 4L, 5L, 6L,
8L, 1L, 7L, 5L, 8L, 1L, 7L, 10L, 8L, 1L, 9L, 8L, 6L, 6L,
10L, 7L, 3L, 6L, 5L, 10L, 2L, 1L, 9L, 5L, 5L, 10L, 2L, 6L,
10L, 4L, 8L, 7L, 9L, 8L), .Label = c("Switzerland", "Czech Republic",
"Denmark", "Iceland", "Netherlands", "Norway", "Portugal",
"Romania", "Sweden", "Finland"), class = "factor")), class = "data.frame", row.names = c(NA,
-100L))
This is how it looks like
'data.frame': 100 obs. of 3 variables:
$ gender : Factor w/ 2 levels "Female","Male": 1 2 1 2 2 1 1 1 2 1 ...
$ treatment_response: Factor w/ 2 levels "Non-responder",..: 2 2 1 2 1 1 2 2 2 1 ...
$ country : Factor w/ 10 levels "Switzerland",..: 1 3 9 7 2 2 6 6 4 9 ...
I wish to create a data.frame where the treatment_response is summarised by gender and country in percentages or proportions. For example, 60% (or 0.6) of males in Switzerland are responders and 40% are non-responder, similarly for females, etc.
I am familiar with the existence of dplyr and have managed to find a method to do this for mean but not for proportion or percentages.

How to make create two y-axis labels with a grid of facets with a single x-axis label

I have been struggling with ggplot to display these plots how I would like. My data have 2 factors, quarter and species. Station will be on the x-axis, value on the y-axis, and the constituent will be used with the facet_wrap. I want quarter differentiated with shapes, and species with colors.
The issue is I'm trying to replicate a figure done in SigmaPlot. It is 4x4 grid of plots, with the first two rows of the first column are empty, to allow for the placement of the legend. My original plan was to have two separate facets made using facet-wrap, and combine those, however, this doesn't maintain the 4x4 arrangement, it transforms it into a 1x2, which ruins alignment of plots and shrinks the larger faceted grid.
My next thought was to create each plot individually, then arrange them in a grid using cowplot. This presents the plots how I'd like them arranged, but I can't figure out how to have two y-axis labels, due to different units. One label would be centered on the two leftmost plots, and one centered on the left of the next column of 4 plots.
I'm trying to use this code (just copy the example data below, and run):
library(ggplot)
library(gridExtra)
test.data1 <- test.data[1:95, ]
test.data2 <- test.data[96:111, ]
testplot1 <- ggplot(test.data1, aes(Station, value)) +
geom_point(aes(shape = factor(quarter), fill = Species)) +
scale_shape_manual(values = c(21, 22)) +
labs(x = "Station", y = "Unit a", shape = "Sampling Quarter", fill = "Species") +
theme(legend.position = "none", legend.title = element_blank()) +
guides(fill = guide_legend(override.aes = list(shape = 21), nrow = 2, byrow = TRUE), shape = guide_legend(nrow = 2, byrow = TRUE)) +
facet_wrap( ~ constituent, ncol = 3, scales = "free_y")
testplot2 <- ggplot(test.data2, aes(Station, value)) +
geom_point(aes(shape = factor(quarter), fill = Species))
scale_shape_manual(values = c(21, 22)) +
labs(x = "Station", y = "Unit b", shape = "Sampling Quarter", fill = "Species") +
theme(legend.position = "top", legend.title = element_blank()) +
guides(fill = guide_legend(override.aes = list(shape = 21), nrow = 2, byrow = TRUE), shape = guide_legend(nrow = 2, byrow = TRUE)) +
facet_wrap( ~ constituent, ncol = 1, scales = "free_y")
grid.arrange(testplot2, testplot1, ncol = 2)
Which generates this:
But I want it to be arranged like this, where the XX and YY plots from above are normalized in size with the other plots (this was done using individual plots, and using plot_grid):
Example data from a larger set:
test.data <- structure(list(Station = structure(c(1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("StA", "StB"), class = "factor"),
CollectionDate = structure(c(3L, 2L, 3L, 1L, 3L, 1L, 3L,
1L, 3L, 2L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 2L, 3L, 1L, 3L, 1L,
3L, 1L, 3L, 2L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 2L, 3L, 1L, 3L,
1L, 3L, 1L, 3L, 2L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 2L, 3L, 1L,
3L, 1L, 3L, 1L, 3L, 2L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 2L, 3L,
1L, 3L, 1L, 3L, 1L, 3L, 2L, 3L, 1L, 3L, 1L, 1L, 3L, 2L, 3L,
1L, 3L, 1L, 3L, 1L, 3L, 2L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 2L,
3L, 1L, 3L, 1L, 3L, 1L, 3L, 2L, 3L, 1L, 3L, 1L, 3L, 1L), .Label = c("10/1/2017",
"10/16/2017", "4/1/2017"), class = "factor"), Species = structure(c(1L,
2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L,
1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L,
3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L,
2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L,
2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L,
1L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L,
1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L,
3L, 1L, 2L, 2L, 3L), .Label = c("SpA", "SpB", "SpC"), class = "factor"),
quarter = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("2017 Q2",
"2017 Q4"), class = "factor"), constituent = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L,
13L, 13L, 13L, 13L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L
), .Label = c("A", "B", "C", "D", "E", "F", "G", "H", "I",
"J", "K", "L", "XX", "YY"), class = "factor"), value = c(16,
35, 46, 23, 40, 19, 9, 50, 0.2, 1, 0.5698, 0.322, 1, 0.45,
0.322, 0.5, 16, 9, 6, 19, 14, 13, 16, 9, 0, 0.004, 0, 0.004,
1, 0.32, 1, 0.678, 0, 0.39, 0.23, 0, 0, 1.1, 0.5, 0.5, 9,
4.9, 7, 4.768, 9, 8.65, 4.768, 6.54, 195, 195, 46, 46, 124,
124, 218, 218, 2, 1, 1, 1, 1, 2, 1, 1, 0.1, 0.4, 0.22, 0.4,
0.22, 0.4, 0.22, 0.1, 0.99, 0.99, 1.2, 0.45, 0.765, 0.99,
0.99, 0.99, 0.99, 1.2, 4.3, 0.98, 0.99, 1.2, 1.2, 34, 34,
65, 98, 150, 34, 65, 65, 2, 0, 4, 1.3, 5, 3.3, 1.56, 1, 9,
0.36, 4, 4, 11, 2, 2.22, 11)), class = "data.frame", row.names = c(NA,
-111L))

boxplot with multiple factor labels using base R functions

How can one possibly reproduce the ggplot-based boxplot shown in this answer but using base R boxplot function?
Sample date from the above link:
d<-data.frame(x=rnorm(1500),f1=rep(seq(1:20),75),f2=rep(letters[1:3],500))
# first factor has 20+ levels
d$f1<-factor(d$f1)
# second factor a,b,c
d$f2<-factor(d$f2)
boxplot(x~f2*f1,data=d,col=c("red","blue","green"),frame.plot=TRUE,axes=FALSE)
It would be great if the groups on the x-axis are spaced from each other.
I have limited knowledge about ggplot2.
EDIT
While waiting for more suggestions using base R functions, I am making some progress with ggplot2.
Using this sample data how can I produce a plot with well aligned x-axis as the one in the link above?
The following does not give me the correct alignment (I want the numbers 1:8 aligned at the center of each group):
library(ggplot2)
ggplot(dat3, aes(x = ID, y = value, group=interaction(obs, ID), fill=obs)) +
geom_boxplot() +
scale_fill_manual(values = c("yellow", "orange"))
dat3=structure(list(values = c(0, 0, 0, 0, 0, 0, 0, 0, -0.0169491525423729,
0, 0, 0, 0, 1, 1, 0.64367816091954, 0.64367816091954, 0, 0, -0.0163934426229508,
-0.021978021978022, 0.109195402298851, 0, 0, 0, 0, 0.207650273224044,
0.4375, 0, 0, 0, 0, 0.302325581395349, 0.303370786516854, 0.270588235294118,
-0.0188679245283019, 0.156462585034014, 0.092436974789916, 0.69,
-0.021978021978022, 0.64367816091954, 0.614906832298137, 0.612903225806452,
0.274853801169591, 0, 0.303370786516854, 0, 0, -0.03125, 0.229813664596273,
0.557142857142857, 0, 0.109195402298851, 0.0746268656716418,
0.180616740088106, 0.210526315789474, 0.310344827586207, 1, 1,
0.0825688073394495, 0.294117647058824, 0, 0.4375, 0, 0.230769230769231,
0.347826086956522, -0.0163934426229508, 0.156462585034014, 0,
0, 0, 1, 0, 0, 0, 0.483333333333333, 0.483333333333333, 0, 0,
0, 0, 0, -0.0169491525423729, 0, 0.310344827586207, 0, 0.296875,
0.302325581395349, 0, 0, 0, 0, 0, 0, 0.482758620689655, 0, 0,
0, 0, 0, 0, 0, 0, 0.150684931506849, 0.150684931506849, 0, 0,
-0.021978021978022, -0.021978021978022, 0.270588235294118, 0,
0, 0.482758620689655, 0.482758620689655, 0.272727272727273, 0.272727272727273,
0, 1, 0, 0, 0.642857142857143, 0.211864406779661, 0.156462585034014,
-0.0449438202247191, -0.0449438202247191, 0.389763779527559,
0.389763779527559, -0.021978021978022, 0.211864406779661, 0.213197969543147,
0.213197969543147, 0.358620689655172, -0.0163934426229508, 0.483333333333333,
0, 0, 0.362139917695473, 0.362139917695473, 0.261904761904762,
0.483333333333333, 1, 1, 0.236453201970443, 0.302325581395349,
0.310344827586207, 1, 1, 0.358974358974359, 0.358974358974359,
-0.0606060606060606, 0.0721649484536082, 0.615384615384615, 0.615384615384615,
0.347826086956522, 1, 0, 0, 0, -0.0273972602739726, -0.0273972602739726,
-0.0169491525423729, -0.0256410256410256, 0.107142857142857,
0.107142857142857, 0.302325581395349, -0.0163934426229508, -0.0264900662251656,
0.311111111111111, 0.311111111111111, 0.156462585034014, 0.156462585034014,
-0.0483091787439614, 0.311111111111111, -0.0333333333333333,
-0.0333333333333333, 0.311111111111111), ind = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L), .Label = c("ETS",
"ETS.1", "ETS.2", "ETS.3", "ETS.4", "ETS.5", "ETS.6", "ETS.7"
), class = "factor"), ID = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L), .Label = c("4", "5",
"6", "7", "8", "9", "10", "11"), class = "factor"), obs = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("obs",
"capa"), class = "factor")), .Names = c("values", "ind", "ID",
"obs"), row.names = c(NA, 176L), class = "data.frame")
You can specify the location of the boxes using at option.
set.seed(1)
d<-data.frame(x=rnorm(1500),f1=rep(seq(1:20),75),f2=rep(letters[1:3],500))
# first factor has 20+ levels
d$f1<-factor(d$f1)
# second factor a,b,c
d$f2<-factor(d$f2)
boxplot(x~f2*f1,data=d, at = (1:80)[-4*(1:20)], col=c("red","blue","green"),frame.plot=TRUE,axes=FALSE)
axis(1,at=seq(2,80,4),labels=1:20,cex.axis=0.7)

drawing line segments connecting sets of points

I am trying to connect sets of (two) points at each level of x, in each facet. Here is a reproducible example:
datum <- structure(list(frequency = c(8L, 7L, 6L, 18L, 5L, 11L, 16L, 15L,
9L, 8L, 8L, 10L, 2L, 20L, 14L, 3L, 6L, 2L, 2L, 11L, 10L, 6L,
15L, 19L, 18L, 18L, 8L, 2L, 10L, 15L, 12L, 17L, 1L, 18L, 7L,
8L, 16L, 4L, 9L, 2L, 7L, 3L, 16L, 7L, 18L, 20L, 9L, 10L, 13L,
2L, 15L, 7L, 3L, 20L, 4L, 15L, 5L, 7L, 9L, 16L, 5L, 8L, 10L,
10L, 7L, 10L, 10L, 17L, 7L, 8L, 13L, 13L, 16L, 5L, 20L, 18L,
13L, 19L, 3L, 8L, 14L, 12L, 20L, 2L, 9L, 13L, 7L, 2L, 5L, 5L,
13L, 9L, 13L, 7L, 9L, 4L, 4L, 20L, 1L, 4L), band = structure(c(2L,
4L, 2L, 3L, 2L, 1L, 4L, 1L, 2L, 1L, 3L, 4L, 2L, 4L, 3L, 4L, 3L,
2L, 3L, 2L, 2L, 4L, 2L, 1L, 1L, 2L, 1L, 4L, 4L, 1L, 4L, 4L, 2L,
1L, 4L, 4L, 3L, 4L, 1L, 1L, 3L, 4L, 1L, 3L, 4L, 1L, 2L, 1L, 1L,
2L, 2L, 1L, 3L, 4L, 2L, 1L, 2L, 4L, 2L, 2L, 4L, 4L, 2L, 4L, 4L,
1L, 1L, 4L, 2L, 3L, 4L, 1L, 2L, 4L, 1L, 2L, 4L, 1L, 1L, 3L, 4L,
4L, 2L, 2L, 2L, 1L, 3L, 2L, 2L, 2L, 3L, 3L, 1L, 3L, 4L, 3L, 3L,
1L, 3L, 4L), .Label = c("1", "2", "3", "4"), class = "factor"),
test = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L,
2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L,
2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L
), .Label = c("1", "2"), class = "factor"), knowledge = structure(c(2L,
3L, 1L, 3L, 1L, 1L, 3L, 3L, 1L, 3L, 1L, 3L, 2L, 2L, 1L, 1L,
1L, 1L, 3L, 3L, 1L, 2L, 3L, 1L, 1L, 2L, 2L, 1L, 1L, 3L, 2L,
3L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 3L, 3L, 1L, 1L, 2L, 3L,
3L, 2L, 2L, 3L, 1L, 1L, 2L, 2L, 2L, 3L, 1L, 3L, 1L, 1L, 2L,
1L, 1L, 2L, 3L, 1L, 1L, 1L, 1L, 3L, 2L, 2L, 1L, 2L, 3L, 2L,
1L, 2L, 3L, 3L, 2L, 1L, 3L, 1L, 3L, 2L, 1L, 3L, 2L, 2L, 3L,
1L, 1L, 2L, 1L, 2L, 3L, 1L, 3L, 1L), .Label = c("1", "2",
"3"), class = "factor")), .Names = c("frequency", "band",
"test", "knowledge"), row.names = c(NA, -100L), class = "data.frame")
Here is the code I have so far:
ggplot(datum, aes(knowledge, frequency, color=test)) +
stat_summary(fun.y='mean', geom='point', position=position_dodge(width=.9), size=3) +
facet_grid(~band) +
labs(y='number of words (max = 20)', x='self-report knowledge') +
scale_x_discrete(labels=c('none', 'form', 'meaning'))
Looking at the left-most facet ('1') in the graph, I would like a line to connect the pretest to posttest in the none column, another line connecting pretest to posttest in the form column, and a line connecting the pretest to the posttest in the meaning column. I would like this done in each facet.
I hope that makes sense, and thanks!
I find relying on ggplot too much for data manipulation/summarizing can hurt more than it helps. I have no idea how to connect the position-dodged points with a line. Instead, I'd do something like this:
library(dplyr)
datsum = datum %>%
group_by(band, knowledge, test) %>%
summarize(mean = mean(frequency)) %>%
ungroup %>%
mutate(knowledge_fac = factor(knowledge, labels = c('none', 'form', 'meaning')))
ggplot(datsum, aes(x = test, y = mean)) +
geom_path(aes(group = band:knowledge)) +
geom_point(aes(color = factor(test))) +
facet_grid(band ~ knowledge_fac) +
labs(y='number of words (max = 20)', x='self-report knowledge')
Borrowing from Gregor's work in munging the data, I think this does what was requested. The mutate() chunk creates Test to be a numeric offset of -0.1 for test 1 and 0.1 for test 2. This is then added to the numeric value of knowledge. The result is the numeric x passed to ggplot2. Gregor correctly defined the groups, so the rest is straightforward.
library(dplyr)
datsum <- datum %>%
group_by(band, knowledge, test) %>%
summarize(mean = mean(frequency)) %>%
mutate(Test = 0.1 * (2 * (test == 2) - 1),
Knowledge = as.numeric(knowledge) + Test) %>%
ungroup
ggplot(datsum, aes(x = Knowledge, y = mean, color = test)) +
geom_path(aes(group = band:knowledge), color = "black") +
geom_point(size = 3) +
facet_wrap(~ band, nrow = 1) +
labs(y='number of words (max = 20)', x='self-report knowledge') +
scale_color_manual(values = c("orange", "blue")) +
scale_x_continuous(limits = c(0.5, 3.5), breaks = 1:3,
labels = c("none", "form", "meaning"))

Overlay ggplot grouped tiles with polygon border depending on extra factor

I have a data frame with x and y positions and two factor columns blocknr and cat:
dput(testData)
structure(list(xpos = c(2L, 8L, 5L, 8L, 1L, 4L, 5L, 1L, 8L, 4L,
3L, 2L, 6L, 5L, 1L, 7L, 3L, 4L, 3L, 7L, 1L, 6L, 7L, 7L, 2L, 5L,
3L, 4L, 6L, 7L, 1L, 5L, 1L, 6L, 4L, 5L, 3L, 6L, 4L, 8L, 1L, 3L,
4L, 6L, 7L, 3L, 2L, 6L, 4L, 2L, 1L, 7L, 4L, 8L, 2L, 3L, 2L, 5L,
8L, 2L, 8L, 3L, 3L, 5L, 6L, 7L, 1L, 5L, 6L, 4L, 2L, 6L, 7L, 1L,
5L, 7L, 2L), ypos = c(1L, 2L, 8L, 1L, 6L, 7L, 1L, 4L, 6L, 1L,
2L, 3L, 4L, 5L, 7L, 8L, 10L, 2L, 6L, 9L, 1L, 2L, 10L, 4L, 5L,
6L, 3L, 5L, 9L, 3L, 9L, 10L, 3L, 7L, 8L, 2L, 5L, 6L, 3L, 4L,
10L, 1L, 4L, 10L, 2L, 8L, 9L, 3L, 6L, 8L, 5L, 7L, 10L, 3L, 4L,
7L, 2L, 4L, 5L, 6L, 7L, 9L, 4L, 7L, 8L, 1L, 2L, 9L, 5L, 9L, 10L,
1L, 6L, 8L, 3L, 5L, 7L), blocknr = c(1L, 3L, 2L, 3L, 1L, 2L,
2L, 1L, 3L, 2L, 1L, 1L, 3L, 2L, 1L, 3L, 2L, 2L, 1L, 3L, 1L, 2L,
3L, 3L, 1L, 2L, 1L, 2L, 3L, 3L, 1L, 2L, 1L, 3L, 2L, 2L, 1L, 3L,
2L, 3L, 1L, 1L, 2L, 3L, 3L, 2L, 1L, 3L, 2L, 1L, 1L, 3L, 2L, 3L,
1L, 2L, 1L, 2L, 3L, 1L, 3L, 2L, 1L, 2L, 3L, 3L, 1L, 2L, 3L, 2L,
1L, 2L, 3L, 1L, 2L, 3L, 1L), cat = structure(c(2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L), .Label = c("A", "B", "C"
), class = "factor")), .Names = c("xpos", "ypos", "blocknr",
"cat"), row.names = c(NA, -77L), class = "data.frame")
I've made the following ggplot code to make 2D overview:
ggplot(data=testData, aes(x=xpos,y=ypos))+
geom_tile(aes(fill=cat), colour = "white")+
scale_fill_manual(values = c('A' = '#F8766D','C' = '#8ABF54','B' = '#C1DDA5'))+
geom_text(aes(x=xpos,y=ypos,label=blocknr),size=3)+
coord_cartesian(ylim = c(0.5, ymax + 0.5)) +
coord_cartesian(xlim = c(0.5, xmax + 0.5)) +
scale_x_continuous(breaks=seq(1,xmax,1))+
scale_y_continuous(breaks=seq(1,ymax,1))+
#geom_polygon(aes(group=blocknr))+
theme(axis.line = element_line(colour = "white"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())
which produces the following result:
Now I would like to highlight each group of blocknrs by drawing a border around them as shown below:
I've played around with geom_polygon, geom_path, but I can't quite find a way to do this. Is there a general way to achieve this in ggplot without constructing an algorithm to compute where each line should be and add those lines as a geom_segment?
As far as I know, there is no way to do this with standard ggplot2 tile options. But it's not to much trouble to constuct them if you do it as segments. For example
ymax <- max(testData$ypos)
xmax <- max(testData$xpos)
m <- matrix(0, nrow=ymax, ncol=xmax)
m[as.matrix(testData[,2:1])] <- testData[,3]
Here we are basically taking all the row/col assignment data and creating a matrix that essentially looks like the plot but we will with the block numbers. Now, we will scan for the locations we need to add "wall" by looking for changes in the block numbers as we go across each row and column separately.
has.breaks<-function(x) ncol(x)==2 & nrow(x)>0
hw<-do.call(rbind.data.frame, Filter(has.breaks, Map(function(i,x)
cbind(y=i,x=which(diff(c(0,x,0))!=0)), 1:nrow(m), split(m, 1:nrow(m)))))
vw<-do.call(rbind.data.frame, Filter(has.breaks, Map(function(i,x)
cbind(x=i,y=which(diff(c(0,x,0))!=0)), 1:ncol(m), as.data.frame(m))))
And you can add calls to geom_segments to add the horizontal and vertical walls to the plot.
ggplot(data=testData, aes(x=xpos,y=ypos))+
geom_tile(aes(fill=cat), colour = "white")+
scale_fill_manual(values = c('A' = '#F8766D','C' = '#8ABF54','B' = '#C1DDA5'))+
geom_text(aes(x=xpos,y=ypos,label=blocknr),size=3)+
geom_segment(data=hw, aes(x=x-.5, xend=x-.5, y=y-.5, yend=y+.5))+
geom_segment(data=vw, aes(x=x-.5, xend=x+.5, y=y-.5, yend=y-.5))+
coord_cartesian(ylim = c(0.4, ymax + 0.6)) +
coord_cartesian(xlim = c(0.4, xmax + 0.6)) +
scale_x_continuous(breaks=seq(1,xmax,1))+
scale_y_continuous(breaks=seq(1,ymax,1))+
theme(axis.line = element_line(colour = "white"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())
which gives
The desplot package will do this for you (using lattice):
library(desplot)
desplot(cat ~ xpos*ypos, testData, out1=blocknr, text=blocknr, main="testData")

Resources