How to adjust error box to violin plot using geom_violin? - r

***editing:
errbar_lims <- group_by(dt, together) %>%
dplyr::summarize(mean=mean(score), se=sd(score)/sqrt(n()),
upper=mean+(2*se), lower=mean-(2*se))
> dput(dt)
structure(list(ï..count = c(50L, 7L, 21L, 22L, 94L, 58L, 147L,
4L, 30L, 67L, 91L, 75L, 143L, 15L, 64L, 141L, 39L, 18L, 27L,
70L, 142L, 95L, 26L, 78L, 8L, 146L, 46L, 138L, 36L, 63L, 66L,
97L, 56L, 25L, 19L, 59L, 99L, 5L, 33L, 17L, 55L, 98L, 31L, 42L,
76L, 23L, 44L, 32L, 52L, 60L, 20L, 37L, 140L, 93L, 65L, 87L,
13L, 68L, 51L, 16L, 152L, 81L, 54L, 35L, 149L, 77L, 90L, 38L,
48L, 153L, 2L, 14L, 12L, 10L, 3L, 28L, 61L, 71L, 6L, 45L, 69L,
43L, 53L, 47L, 34L, 92L, 9L, 57L, 145L, 11L, 62L, 49L, 148L,
144L, 1L, 40L, 24L, 88L, 13L, 96L), condition = c(2L, 3L, 1L,
2L, 2L, 2L, 3L, 4L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 1L, 3L, 2L, 3L,
2L, 2L, 3L, 3L, 2L, 4L, 2L, 3L, 2L, 4L, 3L, 2L, 4L, 4L, 1L, 3L,
3L, 3L, 1L, 1L, 1L, 3L, 2L, 4L, 2L, 4L, 3L, 4L, 1L, 4L, 4L, 4L,
4L, 4L, 4L, 1L, 2L, 2L, 1L, 3L, 3L, 4L, 1L, 2L, 3L, 1L, 1L, 2L,
2L, 4L, 2L, 1L, 2L, 4L, 2L, 3L, 4L, 1L, 3L, 2L, 2L, 1L, 4L, 1L,
3L, 1L, 4L, 3L, 1L, 1L, 3L, 2L, 1L, 4L, 4L, 1L, 4L, 2L, 3L, 1L,
1L), together = structure(c(2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L,
2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L), .Label = c("Shared Negative",
"Shared Positive"), class = "factor", label = "together"), second = structure(c(1L,
2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L,
1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L,
1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L,
2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L,
2L, 1L, 1L), .Label = c("Negative Second", "Positive Second"), class = "factor"),
experimenter = c(1L, 1L, 4L, 4L, 4L, 1L, 1L, 2L, 4L, 2L,
2L, 2L, 1L, 2L, 1L, 4L, 3L, 4L, 2L, 4L, 2L, 4L, 2L, 4L, 1L,
2L, 4L, 4L, 3L, 3L, 2L, 2L, 4L, 3L, 4L, 1L, 2L, 2L, 1L, 2L,
2L, 2L, 2L, 3L, 2L, 1L, 2L, 3L, 1L, 2L, 4L, 2L, 1L, 4L, 2L,
2L, 4L, 4L, 1L, 4L, 4L, 3L, 2L, 3L, 2L, 4L, 2L, 4L, 2L, 4L,
2L, 2L, 1L, 3L, 3L, 2L, 2L, 4L, 2L, 2L, 3L, 4L, 1L, 2L, 2L,
4L, 2L, 4L, 1L, 3L, 3L, 1L, 2L, 1L, 3L, 3L, 4L, 3L, 1L, 4L
), age = structure(c(23L, 24L, 25L, 23L, 24L, 35L, 25L, 23L,
23L, 24L, 23L, 24L, 31L, 23L, 25L, 23L, 20L, 23L, 23L, 22L,
27L, 22L, 25L, 25L, 23L, 31L, 23L, 24L, 25L, 23L, 26L, 24L,
24L, 26L, 22L, 24L, 23L, 24L, 21L, 22L, 22L, 22L, 27L, 26L,
63L, 23L, 22L, 32L, 24L, 22L, 23L, 31L, 40L, 24L, 24L, 22L,
23L, 38L, 22L, 27L, 29L, 24L, 22L, 25L, 32L, 24L, 24L, 23L,
23L, 23L, 56L, 48L, 27L, 25L, 23L, 24L, 21L, 25L, 23L, 27L,
31L, 26L, 26L, 24L, 30L, 23L, 25L, 25L, 26L, 26L, 25L, 35L,
28L, 30L, 21L, 25L, 23L, 37L, 21L, 44L), label = "Age"),
sex = structure(c(2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L,
1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L
), .Label = c("Female", "Male"), class = "factor", label = "Sex"),
q1 = structure(c(0L, 11L, 18L, 0L, 18L, 19L, 9L, 13L, 36L,
41L, 29L, 34L, 33L, 53L, 27L, 35L, NA, 40L, 49L, 34L, 38L,
48L, 35L, 29L, 23L, 47L, 35L, 69L, 50L, 45L, 60L, 49L, 34L,
NA, 43L, 44L, 51L, 20L, 37L, 69L, 36L, 41L, 45L, 60L, 47L,
62L, 58L, 47L, 18L, 30L, 52L, 48L, 60L, 51L, 53L, 54L, 42L,
52L, 47L, 51L, 56L, NA, 54L, 51L, 47L, 57L, 47L, 66L, 53L,
54L, NA, 54L, 57L, 49L, 67L, 80L, 49L, 36L, 58L, 57L, 50L,
87L, 51L, 55L, 59L, 70L, 65L, 59L, 61L, 67L, 50L, 63L, 60L,
73L, 70L, 88L, 88L, 83L, 100L, 100L), label = "Question 1"),
q2 = c(0L, 13L, 12L, 22L, 23L, 19L, 31L, 11L, 71L, 13L, 36L,
12L, 33L, 24L, 37L, 37L, 37L, 41L, 50L, 35L, 58L, 39L, 42L,
37L, 64L, 48L, 44L, NA, 40L, 57L, 46L, 49L, 37L, 42L, 67L,
53L, 51L, 35L, 49L, 65L, 49L, 58L, 51L, 49L, 46L, 59L, 40L,
47L, NA, 51L, 53L, 47L, 60L, 51L, 53L, NA, 63L, 52L, 41L,
49L, 50L, NA, 53L, 52L, 64L, 54L, 57L, 50L, 57L, 54L, 55L,
64L, 62L, 27L, 65L, 70L, 53L, 68L, 65L, 54L, 59L, 53L, 61L,
56L, 58L, 69L, 74L, 62L, 56L, 67L, 62L, 58L, 60L, 72L, 78L,
100L, 84L, 96L, 100L, 99L), q3 = c(0L, 14L, 18L, NA, 19L,
16L, 53L, 66L, 81L, 35L, 36L, 65L, 32L, 73L, 75L, 30L, 64L,
44L, 13L, 51L, 62L, 50L, 42L, 51L, 47L, 12L, 60L, 62L, 66L,
77L, 55L, 43L, 69L, 70L, 85L, 68L, 50L, 56L, 46L, 97L, 67L,
80L, 59L, 63L, 46L, 60L, 35L, 47L, 35L, 52L, 48L, 58L, 62L,
51L, 65L, 67L, 65L, 61L, 71L, 58L, 56L, 57L, 54L, 73L, 60L,
62L, 57L, 63L, 53L, 54L, 79L, 87L, 59L, 61L, 78L, 16L, 43L,
81L, 69L, 69L, 58L, 47L, 49L, 56L, 58L, 51L, 77L, 65L, 61L,
67L, 83L, 90L, 81L, 83L, 78L, 66L, 98L, 9L, 71L, 100L), q4 = c(0L,
10L, 23L, 33L, 20L, 17L, 7L, 20L, 3L, 41L, 29L, 17L, 32L,
0L, 39L, 50L, 22L, 42L, 52L, 43L, 18L, 24L, 46L, 53L, 31L,
14L, 31L, 43L, 24L, 41L, 19L, 42L, 38L, 42L, 37L, 69L, 33L,
57L, 51L, 7L, 49L, 10L, 44L, 29L, 50L, 24L, 59L, 48L, 63L,
46L, 9L, 49L, 44L, 51L, 44L, 43L, 38L, 45L, 12L, 52L, 49L,
NA, 53L, 42L, 40L, 46L, 68L, 46L, 53L, 54L, 33L, 41L, 39L,
42L, 32L, 41L, 66L, 36L, 21L, 55L, 44L, 61L, 47L, 56L, 61L,
57L, 68L, 41L, 39L, 67L, 23L, 47L, 68L, 34L, 61L, 25L, 68L,
92L, 70L, 100L), q5 = c(5L, 7L, 15L, 0L, 19L, 17L, 31L, 19L,
23L, 19L, 26L, 34L, 35L, 52L, 46L, 44L, NA, 42L, 34L, 18L,
34L, 47L, 51L, 34L, 47L, 39L, 57L, 46L, 48L, 43L, 49L, 41L,
41L, 42L, 48L, 43L, 50L, 49L, 68L, 43L, 38L, 46L, 46L, 48L,
61L, 57L, 50L, 49L, 47L, 51L, NA, 48L, 34L, 51L, 52L, 52L,
46L, 54L, 48L, 55L, 57L, NA, 53L, 51L, 58L, 48L, 50L, 64L,
57L, 54L, 52L, 53L, 61L, 79L, 58L, 78L, 51L, 64L, 68L, 58L,
55L, 59L, 64L, 62L, 60L, 58L, 67L, 63L, 66L, 68L, 72L, 65L,
72L, 69L, 75L, 27L, 70L, 95L, 100L, 100L), q6 = c(3L, 13L,
14L, 43L, 17L, 23L, 0L, 20L, 11L, 33L, 38L, 3L, 44L, 0L,
6L, 50L, 0L, 46L, 34L, 53L, 34L, 19L, 50L, 43L, 47L, 63L,
46L, 27L, 18L, 42L, 42L, 41L, 51L, 32L, 43L, 24L, 50L, 52L,
11L, 4L, 49L, 31L, 59L, 28L, 61L, 46L, 56L, 50L, 40L, 51L,
30L, 47L, 57L, 46L, 46L, 44L, 43L, 42L, 53L, 50L, 50L, 53L,
53L, 66L, 30L, 40L, 52L, 39L, 52L, 54L, 51L, 32L, 32L, 64L,
43L, 7L, 58L, 31L, 39L, 51L, 57L, 44L, 59L, 62L, 61L, 57L,
32L, 39L, 68L, 58L, 83L, 62L, 43L, 32L, 61L, 65L, 60L, 94L,
86L, 100L), q7 = c(40L, 20L, 31L, 67L, 18L, 27L, 50L, 49L,
29L, 38L, 45L, 53L, 53L, 53L, 53L, 56L, 70L, 45L, 43L, 53L,
42L, 69L, 53L, 47L, 47L, 48L, 53L, 48L, 80L, 66L, 46L, 48L,
61L, 62L, 37L, 69L, 49L, 61L, 69L, 86L, 50L, 68L, 49L, 50L,
35L, 44L, 43L, 50L, 62L, 51L, 53L, 50L, 46L, 51L, 53L, 51L,
71L, 53L, 87L, 57L, 56L, 54L, 53L, 42L, 69L, 61L, 45L, 47L,
53L, 54L, 68L, 73L, 66L, 65L, 56L, 72L, 69L, 71L, 71L, 60L,
57L, 63L, 75L, 58L, 61L, 52L, 24L, 69L, 71L, 53L, 94L, 81L,
43L, 91L, 61L, 57L, 46L, 95L, 84L, 100L), q8 = c(3L, 25L,
13L, 0L, 18L, 27L, 15L, 17L, 13L, 38L, 31L, 29L, 26L, 53L,
11L, 36L, 23L, 30L, 46L, 43L, 57L, 39L, 25L, 42L, 63L, 69L,
30L, 64L, 47L, 41L, 54L, 42L, 37L, 38L, 39L, 21L, 50L, 47L,
50L, 24L, 49L, 45L, 45L, 55L, 47L, 43L, 46L, 49L, 62L, 51L,
43L, 47L, 63L, 51L, 48L, 49L, 40L, 54L, 46L, 49L, 58L, 49L,
53L, 52L, 41L, 50L, 45L, 47L, 53L, 54L, 50L, 56L, 64L, 39L,
57L, 38L, 49L, 43L, 48L, 52L, 58L, 55L, 68L, 62L, 59L, 58L,
64L, 68L, 46L, 56L, 31L, 63L, 67L, 71L, 62L, 99L, 82L, 98L,
100L, 100L), q9 = c(0L, 13L, 5L, 0L, 18L, 25L, 0L, 19L, 0L,
15L, 22L, 64L, 26L, 0L, 51L, 37L, 60L, 43L, 43L, 50L, 17L,
38L, 51L, 49L, 28L, 32L, 40L, 13L, 16L, 19L, 36L, 51L, 55L,
46L, 35L, 26L, 41L, 48L, 31L, 21L, 43L, 61L, 39L, 40L, 46L,
49L, 50L, 50L, 52L, 53L, 87L, 55L, 36L, 51L, 48L, 52L, 42L,
53L, 59L, 50L, 41L, 53L, 53L, 52L, 66L, 60L, 56L, 54L, 53L,
54L, 25L, 34L, 37L, 50L, 38L, 73L, 46L, 72L, 56L, 52L, 59L,
53L, 40L, 62L, 60L, 58L, 64L, 63L, 67L, 59L, 79L, 63L, 95L,
39L, 66L, 75L, 72L, 91L, 79L, 100L), q10 = c(0L, 2L, 3L,
0L, 18L, 25L, 21L, 17L, 22L, 31L, 24L, 16L, 26L, 53L, 37L,
16L, 53L, 40L, 50L, 40L, 61L, 49L, 34L, 44L, 34L, 69L, 47L,
37L, 66L, 24L, 49L, 50L, 35L, 42L, 37L, 56L, 50L, 52L, 66L,
65L, 51L, 41L, 45L, 62L, 46L, 42L, 49L, 50L, 61L, 53L, 69L,
48L, 38L, 51L, 48L, 50L, 69L, 53L, 65L, 58L, 58L, 53L, 53L,
51L, 58L, 57L, 59L, 60L, 53L, 54L, 74L, 48L, 68L, 69L, 52L,
74L, 65L, 47L, 59L, 56L, 67L, 57L, 70L, 61L, 59L, 67L, 63L,
70L, 66L, 53L, 75L, 65L, 72L, 100L, 67L, 100L, 78L, 98L,
100L, 100L), score = structure(c(5.1, 12.8, 15.2, 18.33,
18.8, 21.5, 21.7, 25.1, 28.9, 30.4, 31.6, 32.7, 34, 36.1,
38.2, 39.1, 41.13, 41.3, 41.4, 42, 42.1, 42.2, 42.9, 42.9,
43.1, 44.1, 44.3, 45.44, 45.5, 45.5, 45.6, 45.6, 45.8, 46.22,
47.1, 47.3, 47.5, 47.7, 47.8, 48.1, 48.1, 48.1, 48.2, 48.4,
48.5, 48.6, 48.6, 48.7, 48.89, 48.9, 49.33, 49.7, 50, 50.5,
51, 51.33, 51.9, 51.9, 52.9, 52.9, 53.1, 53.17, 53.2, 53.2,
53.3, 53.5, 53.6, 53.6, 53.7, 54, 54.11, 54.2, 54.5, 54.5,
54.6, 54.9, 54.9, 54.9, 55.4, 56.4, 56.4, 57.9, 58.4, 59,
59.6, 59.7, 59.8, 59.9, 60.1, 61.5, 65.2, 65.7, 66.1, 66.4,
67.9, 70.2, 74.6, 85.1, 89, 99.9), label = "Evaluation Score")), row.names = c(NA,
-100L), class = "data.frame")
I created a basic violin plot with a boxplot, and I want to add an error box. Here is some code:
dplyr::summarize(mean=mean(score), se=sd(score)/sqrt(n()),
upper=mean+(2*se), lower=mean-(2*se))
p <- ggplot() +
geom_violin(data=dt, aes(x=together, y=score, fill=second, color=second)) +
geom_point(data=errbar_lims, aes(x=together, y=mean), size=3) +
geom_errorbar(aes(x=errbar_lims$together, ymax=errbar_lims$upper,
ymin=errbar_lims$lower), stat='identity', width=.25) +
theme_minimal()
print(p)
This is what I get:
The errors are only shown for the two groups.
How can I add define the errors and display an error box to all four violins? I there a way to overlay the error box on the violins? like those:
Any help would be appreciated!

You have a total of four violins on your plot, because you have placed the factor level together along the x axis, and used the column second for your fill aesthetic. Each value of together therefore has two violins: one for "Negative Second" and one for "Positive Second".
The problem is that when you made the data frame errbar_lims, you only grouped by together, so if we examine it we will see it does not contain any information about second. It only has two rows, so can only produce two error bars:
errbar_lims
#> # A tibble: 2 x 5
#> together mean se upper lower
#> <fct> <dbl> <dbl> <dbl> <dbl>
#> 1 Shared Negative 53.3 1.92 57.1 49.4
#> 2 Shared Positive 45.3 2.00 49.3 41.3
What you are looking for is a data frame with four rows to cover all 4 possible combinations of together and second. You could achieve that by doing this:
errbar_lims <- group_by(dt, together, second) %>%
summarize(mean = mean(score),
se = sd(score) / sqrt(n()),
upper = mean + (2 * se),
lower = mean - (2 * se))
If we examine this, we will see it has the four rows that we need for our four error bars:
errbar_lims
#> # A tibble: 4 x 6
#> # Groups: together [2]
#> together second mean se upper lower
#> <fct> <fct> <dbl> <dbl> <dbl> <dbl>
#> 1 Shared Negative Negative Second 55.7 3.35 62.4 49.0
#> 2 Shared Negative Positive Second 50.9 1.91 54.8 47.1
#> 3 Shared Positive Negative Second 45.0 3.02 51.1 39.0
#> 4 Shared Positive Positive Second 45.5 2.69 50.9 40.2
Now we can plot, but we need to remember to tell geom_point and geom_errorbar that they should be grouped according to the second column. We also need to tell them we want the geoms to be dodged so that they are not all plotted between the violins:
ggplot(dt, aes(x = together, fill = second)) +
geom_violin(aes(y = score, color = second)) +
geom_point(data = errbar_lims, aes(x = together, y = mean, group = second),
size = 3,
position = position_dodge(width = 0.9)) +
geom_errorbar(data = errbar_lims,
aes(ymax = upper, ymin = lower, group = second),
stat = 'identity',
position = position_dodge(width = 0.9),
width = 0.25) +
theme_minimal()

Related

Is there a way to produce multiple x-y scatterplots at once based on grouping value, ordered by a third variable?

I have multi-level data. The group level is individual persons, which are designated by id. The variable index indicates different time points. Is there a way to make a separate scatterplot (x vs. y) for each individual, all displayed in the same output, and ordered based on a third variable (z)? If so, can color then be added to indicate degree of third variable (z)? Data below, Thanks.
> dput(dat1.1)
structure(list(id = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L), index = c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L,
15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L), x = c(7.443917, 7.520429, 7.446833,
8.07893, 8.534033, 8.263931, 7.598647, 6.902987, 7.672617, 7.739256,
7.591341, 8.101125, 7.811751, 6.596834, 6.637652, 8.467165, 7.835399,
6.500149, 7.083198, 7.531798, 6.110208, 6.368534, 5.26318, 6.735778,
5.580152, 5.460161, 5.844303, 6.258181, 7.191627, 5.105033, 6.760193,
5.857215, 5.866264, 6.769086, 6.547294, 5.623804, 4.675815, 6.153901,
6.040519, 6.236045, 8.216397, 6.097841, 5.491311, 5.831432, 6.297337,
6.655688, 5.553445, 6.37449, 6.271961, 6.959645, 7.080341, 6.46092,
6.476955, 7.221111, 6.219023, NA, NA, NA, NA, NA, 8.21752, 7.589581,
8.363739, 8.849697, 7.78645, 7.494006, 7.827766, 9.11352, 7.80884,
6.701855, 6.259061, 5.523358, 6.186617, 6.548538, 6.6937, 7.213297,
5.243428, 7.510827, 7.054297, 7.603241), y = c(106L, 114L, 50L,
50L, 56L, 46L, 50L, 52L, 114L, 50L, 56L, 26L, 48L, 52L, 48L,
54L, 54L, 56L, 52L, 50L, 84L, 86L, 88L, 86L, 82L, 84L, 88L, 84L,
86L, 84L, 86L, 86L, 84L, 84L, 88L, 88L, 88L, 84L, 86L, 120L,
106L, 168L, 116L, 56L, 108L, 68L, 68L, 70L, 74L, 76L, 76L, 76L,
72L, 70L, 118L, NA, NA, NA, NA, NA, 60L, 62L, 52L, 90L, 50L,
50L, 54L, 56L, 52L, 30L, 78L, 30L, 52L, 54L, 52L, 80L, 86L, 46L,
54L, 84L), z = c(33L, 33L, 33L, 33L, 33L, 33L, 33L, 33L, 33L,
33L, 33L, 33L, 33L, 33L, 33L, 33L, 33L, 33L, 33L, 33L, 54L, 54L,
54L, 54L, 54L, 54L, 54L, 54L, 54L, 54L, 54L, 54L, 54L, 54L, 54L,
54L, 54L, 54L, 54L, 54L, 56L, 56L, 56L, 56L, 56L, 56L, 56L, 56L,
56L, 56L, 56L, 56L, 56L, 56L, 56L, 56L, 56L, 56L, 56L, 56L, 50L,
50L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 50L,
50L, 50L, 50L, 50L, 50L, 50L)), class = "data.frame", row.names = c(NA,
-80L))
Does this come close to giving you what you want?
library(tidyverse)
d %>%
group_by(id) %>%
mutate(z=as.factor(z)) %>%
group_map(
function(.x, .y) {
.x %>%
ggplot() +
geom_point(aes(x=x, y=y, colour=z)) +
facet_wrap(vars(z)) +
scale_colour_manual(drop=FALSE, values=d %>% distinct(z) %>% pull(z)) +
labs(title=.x$id[1])
},
.keep=TRUE
)
Points to note:
group_map applies a function to each group of a grouped data frame. .x refers to the data in the current group, .y is a one row tibble defining the group. .keep requests that the grouping variables are kept in .x.
drop=FALSE in the call to scale_colour_manual() ensures that unused factor levels are retained in the legend (and hence different levels of z are distinguishable between plots).

Creating scatter plot class or group wise

Im using ggstatsplot's ggscatterstats function to calculate correlation between various clinical parameters and then plotting them. For example
here my variables are age and WBC. This is taking all the data points irrespective of the class they belong. I would like to do the same with each FAB classification that is present in my data.
dat <- merge_clinical_class_TMB %>% select(FAB,AGE,Wbc,Platelet,HB,PB_Blasts,BM_Blasts,TMB_NONSYNONYMOUS)
df2 <- dat
library(ggstatsplot)
ggscatterstats(
df2,
x = AGE,
y = Wbc,
type = "np" # try the "robust" correlation too! It might be even better here
#, marginal.type = "boxplot"
)
My dataframe looks like this
head(df2)
FAB AGE Wbc Platelet HB PB_Blasts BM_Blasts TMB_NONSYNONYMOUS
1 M4 50 17 231 10 88 52 0.3000000
2 M3 61 1 90 10 44 0 0.4333333
3 M3 30 6 114 11 82 6 0.2333333
4 M0 77 92 105 9 67 56 0.4000000
5 M1 46 29 90 9 90 81 0.5666667
6 M1 68 3 63 8 91 55 0.9000000
My data
dput(df2)
structure(list(FAB = structure(c(5L, 4L, 4L, 1L, 2L, 2L, 3L,
3L, 3L, 5L, 3L, 5L, 1L, 5L, 5L, 3L, 3L, 3L, 1L, 2L, 1L, 4L, 6L,
6L, 5L, 3L, 5L, 7L, 5L, 1L, 6L, 5L, 5L, 6L, 5L, 6L, 3L, 3L, 4L,
4L, 5L, 7L, 3L, 3L, 5L, 2L, 5L, 1L, 3L, 6L, 2L, 5L, 2L, 5L, 7L,
3L, 3L, 8L, 6L, 4L, 2L, 2L, 2L, 2L, 3L, 8L, 3L, 2L, 2L, 4L, 6L,
3L, 3L, 3L, 2L, 3L, 2L, 2L, 2L, 3L, 6L, 2L, 1L, 3L, 2L, 5L, 5L,
1L, 2L, 5L, 6L, 6L, 2L, 6L, 4L, 2L, 5L, 2L, 2L, 2L, 1L, 4L, 4L,
1L, 3L, 9L, 6L, 5L, 5L, 1L, 3L, 3L, 5L, 1L, 2L, 2L, 3L, 5L, 1L,
5L, 5L, 6L, 2L, 2L, 2L, 1L, 3L, 3L, 6L, 5L, 2L, 5L, 1L, 2L, 8L,
2L, 3L, 9L, 5L, 2L, 1L, 5L, 3L, 5L, 5L, 1L, 3L, 2L, 5L, 3L, 6L,
5L, 1L, 2L, 2L, 5L, 3L, 5L, 5L, 6L, 5L, 5L, 3L, 5L, 6L, 3L, 2L,
3L, 3L, 2L, 4L, 6L, 4L, 1L, 2L, 6L, 3L, 6L, 2L, 3L, 2L, 4L, 2L,
2L, 4L, 3L, 3L, 4L, 4L, 4L, 3L, 4L, 3L, 6L, 2L, 4L, 2L, 5L, 2L,
4L), .Label = c("M0", "M1", "M2", "M3", "M4", "M5", "M6", "M7",
"nc"), class = "factor"), AGE = c(50L, 61L, 30L, 77L, 46L, 68L,
23L, 64L, 76L, 81L, 25L, 78L, 39L, 49L, 57L, 63L, 62L, 52L, 76L,
64L, 65L, 61L, 44L, 31L, 64L, 33L, 55L, 50L, 64L, 59L, 59L, 77L,
33L, 48L, 35L, 66L, 67L, 51L, 74L, 51L, 64L, 77L, 63L, 37L, 57L,
53L, 62L, 39L, 72L, 66L, 51L, 51L, 18L, 63L, 54L, 75L, 40L, 60L,
76L, 33L, 63L, 53L, 75L, 67L, 66L, 77L, 64L, 76L, 51L, 42L, 51L,
59L, 43L, 45L, 60L, 47L, 68L, 24L, 48L, 73L, 60L, 44L, 71L, 25L,
60L, 57L, 55L, 69L, 42L, 42L, 45L, 50L, 41L, 21L, 50L, 69L, 76L,
70L, 27L, 76L, 65L, 48L, 59L, 69L, 81L, 22L, 61L, 51L, 63L, 61L,
22L, 73L, 49L, 41L, 47L, 54L, 44L, 55L, 83L, 78L, 59L, 57L, 57L,
88L, 43L, 71L, 62L, 75L, 62L, 58L, 65L, 66L, 60L, 35L, 76L, 72L,
35L, 73L, 67L, 70L, 48L, 65L, 41L, 52L, 67L, 58L, 34L, 60L, 55L,
56L, 61L, 31L, 71L, 56L, 57L, 60L, 57L, 58L, 79L, 55L, 34L, 76L,
82L, 67L, 67L, 54L, 53L, 71L, 61L, 30L, 50L, 35L, 29L, 45L, 38L,
81L, 31L, 75L, 67L, 29L, 51L, 40L, 32L, 57L, 25L, 63L, 75L, 25L,
68L, 62L, 25L, 31L, 68L, 45L, 61L, 35L, 22L, 23L, 21L, 53L),
Wbc = c(17L, 1L, 6L, 92L, 29L, 3L, 32L, 117L, 62L, 91L, 34L,
10L, 2L, 57L, 88L, 77L, 75L, 4L, 15L, 1L, 3L, 86L, 9L, 137L,
132L, 3L, 22L, 6L, 3L, 1L, 12L, 40L, 26L, 116L, 53L, 112L,
2L, 42L, 32L, 4L, 2L, 3L, 17L, 19L, 14L, 3L, 119L, 5L, 3L,
79L, 104L, 3L, 35L, 77L, 2L, 8L, 8L, 1L, 4L, 1L, 46L, 2L,
6L, 31L, 3L, 2L, 3L, 34L, 2L, 2L, 15L, 12L, 4L, 29L, 12L,
12L, 60L, 224L, 33L, 2L, 7L, 14L, 5L, 11L, 47L, 5L, 31L,
6L, 11L, 38L, 5L, 7L, 134L, 93L, 3L, 10L, 3L, 48L, 90L, 297L,
1L, 1L, 1L, 2L, 2L, 115L, 35L, 50L, 18L, 62L, 52L, 15L, 12L,
48L, 81L, 13L, 35L, 28L, 78L, 17L, 30L, 99L, 20L, 3L, 172L,
6L, 28L, 98L, 59L, 101L, 68L, 2L, 2L, 43L, 4L, 38L, 34L,
59L, 37L, 1L, 111L, 49L, 43L, 298L, 26L, 47L, 14L, 16L, 114L,
203L, 8L, 133L, 1L, 31L, 3L, 68L, 3L, 20L, 19L, 73L, 20L,
5L, 1L, 15L, 45L, 68L, 88L, 36L, 10L, 23L, 1L, 72L, 1L, 2L,
40L, 12L, 13L, 7L, 46L, 2L, 64L, NA, 5L, 103L, 8L, 1L, 3L,
16L, 29L, 1L, 99L, 2L, 6L, 2L, 3L, 2L, 115L, 27L, 8L, 1L),
Platelet = c(231L, 90L, 114L, 105L, 90L, 63L, 38L, 100L,
32L, 32L, 23L, 98L, 215L, 14L, 56L, 19L, 110L, 22L, 85L,
42L, 16L, 22L, 50L, 42L, 15L, 61L, 65L, 50L, 134L, 102L,
57L, 29L, 111L, 50L, 44L, 34L, 28L, 232L, 42L, 58L, 27L,
86L, 23L, 38L, 76L, 108L, 52L, 175L, 52L, 132L, 23L, 143L,
30L, 41L, 9L, 21L, 95L, 59L, 79L, 38L, 11L, 68L, 22L, 141L,
168L, 70L, 41L, 21L, 25L, 35L, 14L, 20L, 67L, 116L, 45L,
57L, 8L, 34L, 32L, 60L, 93L, 145L, 48L, 33L, 50L, 129L, 9L,
61L, 176L, 12L, 53L, 136L, 40L, 73L, 27L, 12L, 166L, 30L,
87L, 40L, 94L, 52L, 23L, 127L, 39L, 57L, 35L, 21L, 148L,
25L, 149L, 64L, 351L, 71L, 53L, 22L, 35L, 31L, 46L, 85L,
18L, 80L, 62L, 156L, 32L, 50L, 69L, 31L, 20L, 57L, 142L,
37L, 79L, 66L, 21L, 31L, 88L, 11L, 15L, 82L, 53L, 76L, 51L,
68L, 64L, 55L, 40L, 90L, 37L, 45L, 36L, 52L, 86L, 88L, 35L,
174L, 28L, 121L, 131L, 17L, 152L, 52L, 30L, 79L, 79L, 87L,
30L, 44L, 140L, 59L, 58L, 19L, 29L, 156L, 19L, 61L, 36L,
11L, 71L, 13L, 45L, 34L, 39L, 82L, 18L, 43L, 118L, 32L, 73L,
15L, 60L, 208L, 96L, 257L, 61L, 12L, 32L, 23L, 52L, 46L),
HB = c(10L, 10L, 11L, 9L, 9L, 8L, 7L, 10L, 10L, 11L, 11L,
10L, 10L, 8L, 10L, 13L, 11L, 9L, 9L, 8L, 9L, 12L, 8L, 6L,
10L, 7L, 8L, 9L, 11L, 12L, 11L, 10L, 10L, 9L, 8L, 10L, 9L,
13L, 9L, 8L, 12L, 9L, 12L, 9L, 9L, 9L, 11L, 10L, 11L, 12L,
12L, 11L, 9L, 10L, 9L, 9L, 10L, 9L, 10L, 9L, 8L, 9L, 9L,
10L, 12L, 10L, 10L, 8L, 10L, 9L, 11L, 11L, 11L, 8L, 9L, 9L,
9L, 6L, 10L, 10L, 9L, 9L, 8L, 9L, 9L, 7L, 9L, 11L, 12L, 10L,
9L, 10L, 12L, NA, 10L, 7L, 11L, 10L, 9L, 11L, 10L, 9L, 8L,
8L, 10L, 9L, 12L, 11L, 8L, 13L, 11L, 9L, 9L, 12L, 10L, 9L,
10L, 8L, 9L, 9L, 9L, 10L, 9L, 10L, 10L, 9L, 10L, 8L, 7L,
9L, 9L, 8L, 9L, 9L, 8L, 10L, 8L, 9L, 9L, 8L, 9L, 9L, 9L,
9L, 9L, 10L, 9L, 8L, 9L, 10L, 7L, 11L, 11L, 10L, 6L, 8L,
9L, 9L, 10L, 8L, 11L, 10L, 11L, 8L, 9L, 8L, 9L, 8L, 10L,
10L, 10L, 9L, 9L, 12L, 9L, 9L, 11L, 9L, 13L, 9L, 10L, 8L,
9L, 10L, 10L, 11L, 9L, 9L, 10L, 9L, 9L, 11L, 7L, 13L, 14L,
12L, 8L, 12L, 8L, 9L), PB_Blasts = c(88L, 44L, 82L, 67L,
90L, 91L, 59L, 60L, 48L, 98L, 53L, 40L, 75L, 81L, 90L, 57L,
46L, 67L, 74L, 61L, 99L, 73L, 74L, 83L, 72L, 33L, 35L, 70L,
85L, 61L, 95L, 80L, 71L, 83L, 90L, 90L, 50L, 64L, 51L, 93L,
95L, 75L, 80L, 52L, 61L, 72L, 65L, 83L, 45L, 32L, 85L, 73L,
86L, 82L, 30L, 48L, 47L, 58L, 78L, 100L, 81L, 82L, 40L, 89L,
70L, 47L, 80L, 73L, 62L, 88L, 57L, 70L, 40L, 56L, 86L, 37L,
90L, 77L, 75L, 37L, 94L, 86L, 97L, 72L, 87L, 40L, 52L, 60L,
68L, 40L, 95L, 81L, 92L, 90L, 90L, 42L, 37L, 84L, 77L, 99L,
83L, 65L, 79L, 82L, 46L, 94L, 71L, 39L, 62L, 95L, 55L, 11L,
51L, 42L, 77L, 72L, 39L, 69L, 75L, 70L, 75L, 52L, 91L, 33L,
87L, 55L, 72L, 76L, 85L, 79L, 79L, 81L, 50L, 81L, 33L, 88L,
34L, 90L, 69L, 32L, 92L, 90L, 47L, 75L, 30L, 59L, 57L, 62L,
54L, 60L, 89L, 82L, 90L, 90L, 64L, 89L, 43L, 58L, 58L, 97L,
71L, 91L, 53L, 75L, 85L, 67L, 86L, 70L, 43L, 86L, 74L, 87L,
0L, 0L, 86L, 53L, 63L, 41L, 76L, 45L, 85L, 0L, 94L, 6L, 91L,
0L, 2L, 93L, 85L, 82L, 56L, 40L, 48L, 0L, 14L, 90L, 71L,
51L, 91L, 42L), BM_Blasts = c(52L, 0L, 6L, 56L, 81L, 55L,
0L, 0L, 88L, 37L, 87L, 6L, 4L, 48L, 84L, 70L, 53L, 18L, 82L,
5L, 34L, 68L, 5L, 6L, 90L, 0L, 67L, 0L, 22L, 12L, 0L, 2L,
14L, 3L, 18L, 7L, 17L, 79L, 0L, 40L, 0L, 8L, 71L, 33L, 17L,
41L, 65L, 53L, 0L, 11L, 85L, 2L, 90L, 39L, 0L, 54L, 23L,
0L, 0L, 0L, 97L, 42L, 48L, 61L, 6L, 0L, 46L, 55L, 10L, 2L,
0L, 48L, 39L, 37L, 43L, 0L, 91L, 76L, 41L, 16L, 30L, 17L,
54L, 50L, 65L, 0L, 59L, 22L, 51L, 16L, 6L, 10L, 90L, 72L,
0L, 32L, 0L, 49L, 88L, 98L, 0L, 0L, 15L, 0L, 0L, 94L, 55L,
39L, 9L, 86L, 70L, 11L, 5L, 74L, 79L, 90L, 83L, 57L, 74L,
28L, 17L, 4L, 91L, 0L, 91L, 50L, 49L, 80L, 22L, 64L, 84L,
12L, 14L, 86L, 6L, 18L, 40L, 0L, 61L, 6L, 87L, 0L, 62L, 51L,
6L, 72L, 59L, 29L, 24L, 96L, 0L, 53L, 13L, 45L, 61L, 56L,
35L, 10L, 0L, 8L, 58L, 16L, 25L, 10L, 3L, 71L, 52L, 67L,
32L, 88L, 10L, 8L, 0L, 0L, 97L, 7L, 45L, 0L, 49L, 9L, 85L,
0L, 70L, 91L, 7L, 0L, 2L, 0L, 32L, 11L, 71L, 0L, 48L, 0L,
14L, 7L, 90L, 63L, 83L, 29L), TMB_NONSYNONYMOUS = c(0.3,
0.433333333333, 0.233333333333, 0.4, 0.566666666667, 0.9,
0.3, 0.133333333333, 0.4, 0.3, 0.233333333333, 0.5, 0.266666666667,
0, 0.2, 0.4, 0.266666666667, 0.333333333333, 0.4, 0.4, 0.566666666667,
0.0333333333333, 0.166666666667, 0.1, 0.166666666667, 0.266666666667,
0.3, 0.3, 0.466666666667, 0.0666666666667, 0.266666666667,
0.266666666667, 0.0333333333333, 0.1, 0.133333333333, 0.0333333333333,
0.5, 0.6, 0.0333333333333, 0.1, 0.0333333333333, 0.333333333333,
0.433333333333, 0.2, 0.466666666667, 0.2, 0.0333333333333,
0.733333333333, 0.2, 0.233333333333, 0.233333333333, 0.3,
0.133333333333, 0, 0.3, 0.333333333333, 0.333333333333, 0.266666666667,
0.533333333333, 0.2, 0.533333333333, 0.466666666667, 0.533333333333,
0.0333333333333, 0.3, 0.5, 0.333333333333, 0.266666666667,
0.5, 0.333333333333, 0.0666666666667, 0.466666666667, 0.333333333333,
0.266666666667, 0.7, 0.433333333333, 0.166666666667, 0.0666666666667,
0.233333333333, 0.5, 0.0333333333333, 0.2, 0.433333333333,
0.433333333333, 0.4, 0.233333333333, 0.0666666666667, 0.233333333333,
0.466666666667, 0.0666666666667, 0, 0.1, 0.4, 0.1, 0.2, 0.4,
0.433333333333, 0.566666666667, 0.2, 0.0333333333333, 0.533333333333,
0.566666666667, 0.3, 0.466666666667, 0.566666666667, 0.0333333333333,
0.4, 0.0666666666667, 0.633333333333, 0.4, 0.466666666667,
0.466666666667, 0.3, 0.5, 0.0333333333333, 0.333333333333,
0.333333333333, 0.266666666667, 0.366666666667, 0.666666666667,
0.333333333333, 0.533333333333, 0.466666666667, 0.6, 0.333333333333,
0.4, 0.266666666667, 0.366666666667, 0.2, 0.0333333333333,
0.266666666667, 0.3, 0.166666666667, 0.4, 0.566666666667,
0.4, 0.1, 0.1, 0.0666666666667, 0.366666666667, 0, 0.4, 0.0333333333333,
0.1, 0.0666666666667, 0.5, 0.3, 0.466666666667, 0.0333333333333,
0.4, 0.1, 0.0666666666667, 0.766666666667, 0.5, 0.466666666667,
0.333333333333, 0.4, 0.333333333333, 0.4, 0.266666666667,
0.2, 0.3, 0.7, 0.166666666667, 0.2, 0, 0.5, 0.166666666667,
0.533333333333, 0.233333333333, 0.166666666667, 0.133333333333,
0.0666666666667, 0.4, 0.333333333333, 0.133333333333, 0.4,
0.233333333333, 0.466666666667, 0.366666666667, 0.266666666667,
0.266666666667, 0.266666666667, 0.4, 0.2, 0.166666666667,
0.4, 0.333333333333, 0.166666666667, 0.266666666667, 0.1,
0.333333333333, 0.733333333333, 0.466666666667, 0.466666666667,
0.2, 0.1, 1.13333333333, 0.2, 0.3)), class = "data.frame", row.names = c(NA,
-200L))
Objective I would like to do the same with various FABI have FAB label from M0 to M7 I would like to ignore nc
So for each FAB label I would like to see the correlation for example if I have to take the M0 class then I would like to see their Age vs Wbc correlation and similarly for other FAB class as well. Is it possible to do these in ggstataplot as I don't see for correlation any such functionality there .
Simple way is I can subset them and do the same like M0 ,M1, M2 etc etc but that is a long process can I split the FAB column and pass it to the library?
I would like to know other ways to do the above and plot the same
Any help or suggestion would be appreciated
Update: We could also use the built in function see comments:
Many thanks to #Indrajeet Patil: https://indrajeetpatil.github.io/ggstatsplot/articles/web_only/ggscatterstats.html#grouped-analysis-with-grouped_ggscatterstats
To subset FAB we use filter:
## for reproducibility
set.seed(123)
## plot
grouped_ggscatterstats(
## arguments relevant for ggscatterstats
data = df2 %>% filter(as.integer(FAB)<5),
x = AGE,
y = Wbc,
grouping.var = FAB,
type = "r",
# ggtheme = ggthemes::theme_tufte(),
## arguments relevant for combine_plots
annotation.args = list(
title = "Relationship between Wbc and Age",
caption = "Source: stackoverflow"
),
plotgrid.args = list(nrow = 2, ncol = 2)
)
First answer:
We could do something like this:
write a function and pass the data frame + the column FAB value:
library(ggstatsplot)
my_function <- function(df, x){
ggscatterstats(
df %>% filter(FAB == x),
x = AGE,
y = Wbc,
type = "np" # try the "robust" correlation too! It might be even better here
#, marginal.type = "boxplot"
)
}
M0 <- my_function(df2, "M0")
M1 <- my_function(df2, "M1")
M2 <- my_function(df2, "M2")
M3 <- my_function(df2, "M3")
.
.
.
library(patchwork)
(M0 / M1 | M2 / M3)

Subtract Values based on Multiple Grouping Factors

I have a dataset with phosphorus concentrations for 17 separate days (concentrations are cumulative, so increase from Day1 to Day102 in all cases). There are 22 different treatments (column = Trmt). Each Trmt has 3 Levels (Level = X, Y, Z). 2 measurements per Level for a total of 6 per Trmt.
My goal is to plot a 3-line graph of Days (x-axis; numeric) by Concentration (y-axis) using ggplot2. Data should be grouped by Trmt, Level and day for a total of 51 measurements (3 lines x 17 days).
My data looks as follows:
structure(list(Trmt = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 6L, 6L, 6L, 6L, 6L, 6L, 5L, 5L, 5L, 5L, 5L, 5L, 8L, 8L, 8L, 8L, 8L, 8L, 7L, 7L, 7L, 7L, 7L, 7L, 10L, 10L, 10L, 10L, 10L, 10L, 9L, 9L, 9L, 9L, 9L, 9L, 12L, 12L, 12L, 12L, 12L, 12L, 11L, 11L, 11L, 11L, 11L, 11L, 14L, 14L, 14L, 14L, 14L, 14L, 13L, 13L, 13L, 13L, 13L, 13L, 16L, 16L, 16L, 16L, 16L, 16L, 15L, 15L, 15L, 15L, 15L, 15L, 18L, 18L, 18L, 18L, 18L, 18L, 17L, 17L, 17L, 17L, 17L, 17L, 20L, 20L, 20L, 20L, 20L, 20L, 19L, 19L, 19L, 19L, 19L, 19L, 22L, 22L, 22L, 22L, 22L, 22L, 21L, 21L, 21L, 21L, 21L, 21L), .Label = c("A01nF", "A01yT", "A02nF", "A02yT", "A03nF", "A03yT", "A04nF", "A04yT", "A05nF", "A05yT", "A06nF", "A06yT", "A07nF", "A07yT", "A08nF", "A08yT", "A10nF", "A10yT", "A11nF", "A11yT", "A13nF", "A13yT"), class = "factor"), Level = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("X", "Y", "Z"), class = "factor"), Day1 = c(3L, 1L, 4L, 2L, 4L, 2L, 5L, 4L, 1L, 2L, 5L, 1L, 5L, 2L, 5L, 5L, 3L, 5L, 3L, 3L, 1L, 4L, 1L, 1L, 5L, 4L, 1L, 5L, 4L, 5L, 3L, 5L, 3L, 5L, 3L, 4L, 2L, 4L, 2L, 4L, 3L, 1L, 1L, 3L, 1L, 3L, 1L, 5L, 2L, 4L, 4L, 3L, 1L, 4L, 4L, 1L, 4L, 1L, 2L, 5L, 1L, 5L, 1L, 2L, 4L, 4L, 4L, 4L, 2L, 4L, 5L, 5L, 4L, 1L, 3L, 2L, 3L, 5L, 4L, 3L, 2L, 3L, 5L, 4L, 1L, 3L, 4L, 3L, 3L, 5L, 3L, 1L, 1L, 4L, 4L, 5L, 1L, 4L, 4L, 4L, 1L, 4L, 5L, 5L, 1L, 5L, 3L, 1L, 4L, 1L, 4L, 5L, 5L, 3L, 3L, 2L, 4L, 5L, 3L, 2L, 1L, 5L, 5L, 2L, 2L, 3L, 4L, 3L, 4L, 2L, 2L, 4L), Day2 = c(10L, 9L, 7L, 7L, 6L, 7L, 10L, 9L, 10L, 6L, 10L, 7L, 8L, 9L, 8L, 9L, 7L, 10L, 7L, 10L, 6L, 8L, 6L, 8L, 8L, 8L, 10L, 6L, 8L, 8L, 6L, 10L, 7L, 10L, 7L, 10L, 6L, 6L, 7L, 9L, 8L, 10L, 8L, 7L, 9L, 8L, 6L, 9L, 7L, 9L, 8L, 6L, 6L, 8L, 10L, 7L, 8L, 6L, 8L, 8L, 6L, 9L, 10L, 6L, 8L, 7L, 9L, 7L, 8L, 10L, 10L, 6L, 7L, 10L, 9L, 9L, 8L, 9L, 6L, 8L, 6L, 8L, 6L, 9L, 10L, 7L, 7L, 7L, 8L, 7L, 8L, 10L, 7L, 8L, 9L, 6L, 8L, 9L, 8L, 9L, 6L, 7L, 10L, 9L, 10L, 7L, 6L, 9L, 9L, 9L, 6L, 10L, 9L, 8L, 9L, 7L, 10L, 7L, 10L, 9L, 6L, 8L, 9L, 8L, 9L, 6L, 6L, 10L, 9L, 8L, 8L, 7L), Day4 = c(11L, 12L, 14L, 11L, 15L, 15L, 12L, 11L, 15L, 12L, 15L, 12L, 12L, 11L, 15L, 15L, 13L, 11L, 13L, 14L, 12L, 11L, 13L, 12L, 15L, 15L, 14L, 11L, 15L, 11L, 12L, 11L, 13L, 11L, 12L, 13L, 13L, 14L, 13L, 15L, 14L, 15L, 12L, 14L, 11L, 13L, 15L, 11L, 12L, 13L, 11L, 15L, 11L, 13L, 11L, 11L, 14L, 12L, 14L, 15L, 11L, 12L, 15L, 12L, 13L, 12L, 14L, 12L, 11L, 13L, 12L, 12L, 11L, 15L, 13L, 12L, 11L, 12L, 13L, 14L, 14L, 14L, 13L, 12L, 15L, 12L, 15L, 15L, 12L, 13L, 12L, 12L, 12L, 14L, 13L, 13L, 14L, 11L, 12L, 11L, 15L, 11L, 11L, 11L, 14L, 11L, 12L, 15L, 15L, 11L, 12L, 14L, 15L, 14L, 14L, 12L, 14L, 13L, 15L, 15L, 14L, 13L, 12L, 15L, 15L, 11L, 13L, 12L, 11L, 13L, 12L, 14L), Day7 = c(19L, 17L, 17L, 20L, 17L, 19L, 18L, 19L, 17L, 20L, 16L, 20L, 19L, 18L, 20L, 19L, 17L, 16L, 18L, 18L, 17L, 18L, 19L, 18L, 17L, 19L, 17L, 20L, 19L, 20L, 19L, 20L, 17L, 18L, 20L, 19L, 20L, 18L, 18L, 20L, 18L, 20L, 17L, 19L, 17L, 19L, 17L, 17L, 20L, 18L, 18L, 17L, 16L, 18L, 20L, 16L, 17L, 19L, 16L, 19L, 16L, 17L, 16L, 20L, 16L, 19L, 19L, 17L, 17L, 17L, 20L, 19L, 18L, 16L, 20L, 17L, 19L, 16L, 18L, 19L, 16L, 19L, 20L, 20L, 16L, 16L, 18L, 17L, 16L, 18L, 16L, 17L, 16L, 18L, 20L, 16L, 16L, 20L, 20L, 16L, 20L, 18L, 17L, 19L, 18L, 18L, 19L, 19L, 16L, 18L, 19L, 19L, 17L, 17L, 18L, 18L, 20L, 18L, 20L, 20L, 18L, 19L, 19L, 16L, 16L, 17L, 20L, 16L, 17L, 18L, 16L, 20L), Day10 = c(24L, 23L, 23L, 21L, 21L, 23L, 21L, 21L, 22L, 25L, 21L, 23L, 21L, 25L, 25L, 25L, 24L, 22L, 25L, 24L, 21L, 23L, 24L, 23L, 23L, 22L, 23L, 22L, 22L, 25L, 25L, 22L, 21L, 24L, 25L, 23L, 23L, 23L, 24L, 23L, 25L, 23L, 21L, 23L, 22L, 24L, 22L, 23L, 24L, 22L, 25L, 23L, 23L, 21L, 25L, 24L, 24L, 25L, 25L, 25L, 22L, 23L, 21L, 22L, 24L, 22L, 23L, 22L, 24L, 22L, 21L, 22L, 23L, 21L, 25L, 25L, 22L, 21L, 25L, 24L, 22L, 21L, 25L, 24L, 21L, 24L, 25L, 22L, 23L, 22L, 24L, 23L, 25L, 25L, 23L, 25L, 22L, 23L, 23L, 23L, 22L, 25L, 22L, 23L, 24L, 25L, 22L, 21L, 21L, 22L, 23L, 24L, 21L, 24L, 23L, 23L, 25L, 24L, 25L, 23L, 22L, 25L, 25L, 25L, 21L, 22L, 23L, 21L, 24L, 24L, 25L, 21L), Day13 = c(29L, 29L, 26L, 27L, 30L, 30L, 30L, 26L, 30L, 29L, 30L, 27L, 26L, 29L, 28L, 26L, 30L, 28L, 29L, 27L, 28L, 26L, 29L, 28L, 30L, 26L, 27L, 30L, 26L, 29L, 26L, 28L, 29L, 28L, 29L, 28L, 27L, 27L, 28L, 26L, 26L, 27L, 27L, 29L, 27L, 29L, 27L, 30L, 26L, 27L, 30L, 26L, 29L, 29L, 27L, 29L, 26L, 29L, 28L, 28L, 29L, 30L, 28L, 30L, 30L, 30L, 28L, 29L, 28L, 27L, 28L, 27L, 27L, 28L, 27L, 30L, 27L, 30L, 27L, 28L, 29L, 27L, 30L, 29L, 30L, 30L, 26L, 30L, 29L, 30L, 27L, 26L, 27L, 27L, 28L, 26L, 30L, 28L, 30L, 30L, 30L, 30L, 26L, 28L, 27L, 26L, 29L, 26L, 29L, 26L, 30L, 29L, 30L, 26L, 27L, 30L, 29L, 30L, 27L, 30L, 28L, 26L, 30L, 27L, 30L, 26L, 28L, 29L, 26L, 28L, 28L, 26L), Day18 = c(32L, 31L, 32L, 31L, 31L, 34L, 32L, 34L, 32L, 33L, 31L, 34L, 35L, 34L, 34L, 32L, 33L, 35L, 32L, 35L, 31L, 31L, 33L, 33L, 32L, 31L, 32L, 31L, 32L, 34L, 33L, 33L, 34L, 31L, 35L, 35L, 31L, 34L, 32L, 32L, 34L, 33L, 34L, 33L, 33L, 35L, 35L, 31L, 35L, 31L, 33L, 34L, 31L, 33L, 34L, 32L, 32L, 33L, 31L, 32L, 35L, 34L, 31L, 32L, 34L, 35L, 34L, 31L, 34L, 33L, 35L, 35L, 31L, 32L, 35L, 34L, 31L, 32L, 32L, 33L, 32L, 35L, 32L, 32L, 35L, 33L, 34L, 32L, 34L, 35L, 34L, 33L, 33L, 31L, 31L, 31L, 35L, 34L, 33L, 32L, 33L, 33L, 33L, 35L, 34L, 33L, 31L, 34L, 34L, 34L, 34L, 33L, 33L, 31L, 31L, 31L, 33L, 33L, 35L, 32L, 32L, 31L, 31L, 32L, 33L, 32L, 34L, 34L, 31L, 35L, 31L, 35L), Day23 = c(39L, 40L, 38L, 37L, 37L, 38L, 37L, 36L, 37L, 36L, 36L, 38L, 40L, 38L, 37L, 36L, 36L, 40L, 40L, 40L, 40L, 39L, 40L, 36L, 38L, 36L, 36L, 37L, 38L, 37L, 36L, 37L, 39L, 39L, 38L, 38L, 37L, 40L, 36L, 38L, 37L, 40L, 36L, 37L, 39L, 38L, 38L, 38L, 40L, 38L, 37L, 36L, 38L, 36L, 36L, 36L, 39L, 40L, 39L, 37L, 39L, 39L, 37L, 36L, 37L, 39L, 39L, 37L, 36L, 37L, 40L, 36L, 39L, 40L, 39L, 40L, 39L, 38L, 39L, 40L, 37L, 40L, 38L, 38L, 38L, 40L, 40L, 36L, 39L, 39L, 39L, 39L, 38L, 37L, 37L, 36L, 37L, 39L, 37L, 40L, 40L, 40L, 38L, 38L, 39L, 38L, 36L, 37L, 36L, 36L, 40L, 39L, 39L, 39L, 36L, 39L, 38L, 40L, 36L, 37L, 38L, 38L, 36L, 37L, 39L, 36L, 40L, 40L, 39L, 38L, 37L, 38L), Day28 = c(42L, 43L, 43L, 44L, 44L, 44L, 42L, 42L, 43L, 42L, 45L, 43L, 43L, 43L, 42L, 44L, 42L, 44L, 45L, 44L, 44L, 45L, 44L, 41L, 41L, 42L, 44L, 44L, 44L, 45L, 43L, 42L, 43L, 42L, 41L, 44L, 43L, 43L, 42L, 42L, 44L, 42L, 42L, 42L, 45L, 44L, 45L, 42L, 43L, 45L, 45L, 44L, 41L, 42L, 42L, 41L, 44L, 44L, 44L, 44L, 42L, 45L, 41L, 42L, 45L, 43L, 44L, 45L, 44L, 42L, 41L, 43L, 41L, 44L, 43L, 41L, 45L, 42L, 45L, 41L, 45L, 41L, 45L, 42L, 45L, 42L, 45L, 45L, 41L, 41L, 43L, 41L, 41L, 42L, 43L, 41L, 42L, 44L, 43L, 45L, 41L, 41L, 44L, 41L, 44L, 43L, 43L, 45L, 44L, 41L, 44L, 43L, 42L, 45L, 45L, 41L, 45L, 42L, 41L, 44L, 41L, 41L, 41L, 43L, 41L, 41L, 45L, 41L, 42L, 45L, 41L, 44L), Day35 = c(50L, 50L, 50L, 50L, 48L, 46L, 50L, 46L, 48L, 50L, 50L, 50L, 46L, 49L, 46L, 47L, 49L, 49L, 48L, 49L, 46L, 47L, 49L, 46L, 49L, 50L, 49L, 46L, 49L, 50L, 46L, 48L, 50L, 46L, 50L, 48L, 46L, 48L, 50L, 50L, 47L, 47L, 47L, 47L, 47L, 49L, 48L, 46L, 46L, 48L, 50L, 46L, 49L, 48L, 46L, 49L, 50L, 49L, 48L, 48L, 48L, 50L, 49L, 47L, 48L, 50L, 50L, 46L, 47L, 46L, 48L, 48L, 48L, 47L, 49L, 48L, 49L, 46L, 47L, 50L, 47L, 50L, 47L, 47L, 46L, 46L, 47L, 50L, 49L, 49L, 48L, 47L, 46L, 50L, 46L, 50L, 50L, 46L, 47L, 47L, 49L, 50L, 50L, 46L, 47L, 50L, 47L, 48L, 46L, 50L, 49L, 46L, 46L, 50L, 50L, 49L, 46L, 49L, 46L, 46L, 46L, 48L, 47L, 47L, 50L, 47L, 46L, 48L, 50L, 48L, 46L, 46L), Day42 = c(52L, 51L, 53L, 53L, 54L, 55L, 55L, 54L, 52L, 51L, 55L, 51L, 54L, 53L, 53L, 55L, 54L, 55L, 51L, 51L, 55L, 54L, 54L, 53L, 55L, 53L, 52L, 53L, 53L, 51L, 54L, 54L, 55L, 53L, 54L, 55L, 51L, 51L, 54L, 52L, 51L, 51L, 55L, 54L, 54L, 52L, 52L, 55L, 55L, 51L, 55L, 52L, 55L, 51L, 53L, 52L, 53L, 54L, 51L, 54L, 54L, 55L, 52L, 54L, 52L, 52L, 51L, 52L, 55L, 52L, 54L, 51L, 52L, 55L, 51L, 52L, 55L, 54L, 52L, 53L, 53L, 52L, 55L, 51L, 51L, 55L, 52L, 55L, 55L, 55L, 53L, 52L, 53L, 54L, 52L, 52L, 52L, 52L, 53L, 51L, 54L, 54L, 51L, 53L, 55L, 51L, 54L, 54L, 54L, 53L, 53L, 54L, 54L, 55L, 52L, 52L, 54L, 51L, 52L, 51L, 51L, 55L, 52L, 51L, 51L, 53L, 54L, 51L, 51L, 54L, 55L, 52L), Day52 = c(59L, 57L, 56L, 58L, 59L, 59L, 57L, 59L, 57L, 56L, 58L, 58L, 60L, 59L, 56L, 56L, 60L, 57L, 60L, 57L, 59L, 56L, 60L, 59L, 59L, 56L, 60L, 58L, 60L, 57L, 57L, 60L, 56L, 57L, 59L, 60L, 56L, 58L, 57L, 57L, 58L, 58L, 59L, 56L, 58L, 56L, 57L, 60L, 58L, 59L, 58L, 56L, 56L, 57L, 60L, 59L, 60L, 58L, 59L, 60L, 57L, 60L, 59L, 57L, 60L, 56L, 57L, 56L, 58L, 60L, 56L, 58L, 56L, 60L, 57L, 57L, 57L, 60L, 58L, 59L, 58L, 60L, 59L, 58L, 56L, 56L, 58L, 57L, 60L, 56L, 58L, 56L, 57L, 58L, 58L, 60L, 59L, 60L, 59L, 59L, 59L, 57L, 57L, 60L, 59L, 57L, 57L, 58L, 59L, 57L, 59L, 58L, 60L, 59L, 56L, 57L, 57L, 56L, 57L, 60L, 58L, 57L, 56L, 59L, 59L, 59L, 57L, 57L, 58L, 56L, 58L, 60L), Day62 = c(67L, 65L, 68L, 65L, 69L, 70L, 69L, 66L, 65L, 70L, 70L, 65L, 67L, 68L, 65L, 67L, 65L, 66L, 66L, 68L, 68L, 66L, 65L, 67L, 66L, 69L, 69L, 69L, 68L, 67L, 66L, 69L, 65L, 65L, 69L, 66L, 69L, 68L, 69L, 67L, 65L, 69L, 69L, 69L, 70L, 67L, 65L, 65L, 65L, 66L, 66L, 69L, 68L, 66L, 67L, 66L, 70L, 70L, 70L, 69L, 70L, 70L, 67L, 66L, 65L, 69L, 67L, 66L, 70L, 70L, 70L, 65L, 66L, 67L, 66L, 66L, 67L, 68L, 70L, 67L, 69L, 66L, 67L, 65L, 70L, 65L, 70L, 66L, 66L, 69L, 68L, 65L, 65L, 67L, 68L, 67L, 69L, 68L, 69L, 66L, 68L, 70L, 69L, 68L, 70L, 66L, 69L, 66L, 66L, 67L, 65L, 69L, 69L, 67L, 70L, 65L, 70L, 69L, 66L, 68L, 67L, 68L, 66L, 65L, 67L, 70L, 66L, 67L, 66L, 67L, 67L, 70L), Day72 = c(74L, 74L, 71L, 75L, 74L, 71L, 75L, 71L, 75L, 71L, 72L, 72L, 75L, 73L, 75L, 74L, 74L, 74L, 71L, 74L, 72L, 71L, 71L, 74L, 74L, 73L, 72L, 73L, 71L, 71L, 75L, 72L, 73L, 74L, 75L, 73L, 71L, 71L, 74L, 71L, 73L, 75L, 75L, 74L, 71L, 75L, 74L, 72L, 72L, 71L, 72L, 75L, 73L, 74L, 71L, 75L, 75L, 73L, 72L, 73L, 73L, 72L, 75L, 72L, 71L, 72L, 73L, 72L, 72L, 74L, 72L, 72L, 73L, 75L, 74L, 75L, 73L, 74L, 75L, 72L, 75L, 73L, 71L, 71L, 72L, 74L, 72L, 75L, 71L, 71L, 71L, 73L, 72L, 71L, 75L, 75L, 74L, 73L, 71L, 71L, 72L, 71L, 71L, 74L, 72L, 73L, 71L, 75L, 74L, 75L, 74L, 73L, 73L, 73L, 72L, 75L, 73L, 71L, 71L, 72L, 72L, 71L, 71L, 71L, 72L, 73L, 75L, 75L, 72L, 73L, 75L, 75L), Day82 = c(76L, 78L, 78L, 78L, 79L, 77L, 78L, 77L, 80L, 79L, 80L, 76L, 76L, 80L, 80L, 80L, 78L, 78L, 78L, 78L, 80L, 78L, 76L, 79L, 76L, 77L, 76L, 79L, 78L, 76L, 76L, 79L, 79L, 77L, 77L, 77L, 78L, 78L, 80L, 77L, 77L, 76L, 77L, 79L, 78L, 78L, 78L, 80L, 79L, 76L, 79L, 77L, 76L, 80L, 78L, 77L, 79L, 80L, 77L, 80L, 78L, 79L, 78L, 76L, 76L, 79L, 77L, 77L, 78L, 78L, 79L, 78L, 78L, 78L, 80L, 79L, 78L, 77L, 78L, 78L, 78L, 79L, 80L, 77L, 77L, 80L, 77L, 80L, 77L, 76L, 77L, 76L, 77L, 77L, 80L, 79L, 77L, 78L, 80L, 80L, 79L, 80L, 79L, 79L, 78L, 76L, 76L, 79L, 79L, 80L, 79L, 78L, 76L, 79L, 77L, 77L, 76L, 76L, 78L, 78L, 79L, 78L, 76L, 78L, 79L, 76L, 77L, 78L, 76L, 79L, 78L, 77L), Day92 = c(85L, 84L, 85L, 85L, 83L, 82L, 83L, 82L, 85L, 85L, 82L, 85L, 85L, 85L, 81L, 81L, 84L, 81L, 85L, 82L, 85L, 84L, 81L, 82L, 83L, 82L, 84L, 84L, 81L, 85L, 83L, 85L, 82L, 81L, 83L, 83L, 85L, 83L, 81L, 83L, 82L, 84L, 83L, 83L, 82L, 85L, 85L, 82L, 82L, 82L, 85L, 81L, 81L, 82L, 82L, 84L, 81L, 85L, 81L, 82L, 81L, 81L, 85L, 83L, 81L, 83L, 83L, 84L, 83L, 85L, 85L, 83L, 81L, 85L, 81L, 84L, 83L, 83L, 85L, 83L, 82L, 82L, 82L, 83L, 82L, 83L, 81L, 84L, 83L, 84L, 82L, 83L, 81L, 83L, 81L, 82L, 82L, 82L, 85L, 85L, 84L, 81L, 81L, 81L, 84L, 81L, 84L, 81L, 81L, 84L, 84L, 83L, 83L, 82L, 82L, 81L, 85L, 85L, 82L, 83L, 81L, 83L, 82L, 84L, 83L, 82L, 84L, 81L, 83L, 82L, 84L, 85L), Day102 = c(89L, 88L, 88L, 90L, 88L, 90L, 87L, 88L, 89L, 87L, 90L, 86L, 86L, 89L, 86L, 89L, 90L, 88L, 87L, 88L, 88L, 87L, 90L, 86L, 90L, 87L, 88L, 89L, 88L, 90L, 88L, 87L, 89L, 90L, 88L, 87L, 89L, 88L, 87L, 86L, 90L, 86L, 89L, 89L, 90L, 88L, 90L, 86L, 88L, 88L, 90L, 89L, 88L, 88L, 90L, 87L, 88L, 88L, 87L, 90L, 89L, 87L, 90L, 90L, 86L, 87L, 86L, 90L, 88L, 87L, 86L, 88L, 90L, 86L, 89L, 90L, 87L, 87L, 88L, 86L, 86L, 89L, 89L, 86L, 87L, 86L, 86L, 88L, 88L, 88L, 89L, 90L, 88L, 86L, 88L, 88L, 87L, 88L, 90L, 89L, 89L, 86L, 90L, 89L, 89L, 88L, 90L, 88L, 86L, 90L, 90L, 87L, 89L, 90L, 90L, 88L, 88L, 89L, 90L, 88L, 90L, 90L, 87L, 89L, 90L, 90L, 90L, 89L, 86L, 88L, 89L, 88L)), class = "data.frame", row.names = c(NA, -132L))
Required libraries:
tidyr, plyr, ggplot2
The steps that I have taken so far are to:
Convert the data to long format (df = name of dataset):
Fig1 <- gather(df, day, phosphorus, Day1:Day102, factor_key=TRUE)
Change the factor day to numeric
df$day2 <-revalue(df$day, c("Day1"="1", "Day2"="2", "Day4"="4", "Day7"="7", "Day10"="10", "Day13"="13", "Day18"="18", "Day23" = "23","Day28" = "28", "Day35" = "35", "Day42" = "42", "Day52" = "52", "Day62" = "62", "Day72" = "72", "Day82" = "82", Day92" = "92", "Day102" = "102"))
and
df$day3 <- as.numeric(as.character(df$day2))
Group by Trmt, Level and day3
GroupedDF <- df %>% group_by(Trmt, Level, day3)
GroupedCO2M <- GroupedDF %>% summarise(disp = mean(phosphorus))
I would now like to subtract values by accounting for Trmt and Level, thus reducing the number of rows from 102 to 51. I would like to subtract 'yT' Trmt cases from respective 'nF' cases, uniquely for each Level (X, Y and Z). For example, subtract A01yT_X from A01nf_X, A01yT_Y from A01nf_Y, A01yT_Z from A01nf_Z etc. This should give a total of 51 points, 17 for each Level.
Here is a figure of what I have in mind:
Many thanks for any advice.
thanks for sharing the data. The data you have posted is a bit long, hence might not be able to totally copy and paste
Your data is in the wide format, and you need to find the average for each measurement between similar groups (defined by Day, Level, Treatment). So we can work on this in the wide format:
tmp <- Data %>% group_by(Trmt,Level) %>% summarise_all(mean)
> head(tmp)
# A tibble: 6 x 19
# Groups: Trmt [2]
Trmt Level Day1 Day2 Day4 Day7 Day10 Day13 Day18 Day23 Day28 Day35 Day42
<fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 A01nF X 3.5 8 12 19 23 29.5 32.5 36.5 42 50 53
2 A01nF Y 4.5 9.5 13 17.5 21 28 32.5 36 43.5 48 54.5
3 A01nF Z 1 8.5 13.5 18.5 22.5 28.5 33 37.5 43 49 51.5
4 A01yT X 2.5 8.5 11 19.5 22.5 28 31.5 38 43 50 52.5
5 A01yT Y 2.5 7.5 13.5 17 22 29.5 31 38.5 43.5 49 52.5
6 A01yT Z 3 7 14.5 18 23 28 33 38 43.5 48 54
This gives you the average for each Trmt,Level, and each column (Day) is average separately. Next step is to define the 2 subgroups under Trmt (nF and yT for A01,A02..), and for this we can introduce a subgroup called "site", which is Trmt without the nF,yT. Once you group your data.frame with this "site" and level, the first row will always be nF, and 2nd row yT, so taking the diff for all your Day columns within this grouping, will give you the difference. So we do it like this:
# need to ungroup Trmt to remove it later
tmp <- tmp%>% ungroup(Trmt) %>%
mutate(site = sub("[yn][TF]","",Trmt)) %>%
select(-Trmt) %>%
group_by(site,Level) %>%
summarize_all(diff)
Now you have the nF - yT values for each treatment, each level and each day
> head(tmp)
# A tibble: 6 x 19
# Groups: site [2]
site Level Day1 Day2 Day4 Day7 Day10 Day13 Day18 Day23 Day28 Day35 Day42
<chr> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 A01 X -1 0.5 -1 0.5 -0.5 -1.5 -1 1.5 1 0 -0.5
2 A01 Y -2 -2 0.5 -0.5 1 1.5 -1.5 2.5 0 1 -2
3 A01 Z 2 -1.5 1 -0.5 0.5 -0.5 0 0.5 0.5 -1 2.5
4 A02 X 1.5 1 1.5 1 -1 -1.5 2 -1.5 -1.5 -1 2
5 A02 Y 0.5 0 -1.5 -1 0.5 1.5 -0.5 -3 -1.5 0 1
6 A02 Z 4 2 1 0.5 1.5 0 2.5 0.5 0.5 1.5 0
Come the last part, which is to plot. We convert it to long and also make "Day", a numeric form of day.
plotdf <- gather(tmp, day, Diff, Day1:Day102, factor_key=TRUE) %>%
mutate(Day=as.numeric(sub("Day","",day)))
# and plot
ggplot(plotdf,aes(x=Day,y=Diff,col=Level,shape=Level)) + geom_line() + geom_point() + facet_wrap(~site) + scale_color_manual(values=c("grey10","grey40","grey80"))
Plot above shows the difference for each site. For diff that is the average across all sites:
meandf <- plotdf %>% group_by(Level,Day) %>% summarize(Diff=mean(Diff))
ggplot(meandf,aes(x=Day,y=Diff,col=Level,shape=Level)) + geom_line() + geom_point() + scale_color_manual(values=c("grey10","grey40","grey80"))
example dataset, subsetted for Day1, Day2 and Day4
Data <- structure(list(Trmt = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L,
3L, 6L, 6L, 6L, 6L, 6L, 6L, 5L, 5L, 5L, 5L, 5L, 5L, 8L, 8L, 8L,
8L, 8L, 8L, 7L, 7L, 7L, 7L, 7L, 7L, 10L, 10L, 10L, 10L, 10L,
10L, 9L, 9L, 9L, 9L, 9L, 9L, 12L, 12L, 12L, 12L, 12L, 12L, 11L,
11L, 11L, 11L, 11L, 11L, 14L, 14L, 14L, 14L, 14L, 14L, 13L, 13L,
13L, 13L, 13L, 13L, 16L, 16L, 16L, 16L, 16L, 16L, 15L, 15L, 15L,
15L, 15L, 15L, 18L, 18L, 18L, 18L, 18L, 18L, 17L, 17L, 17L, 17L,
17L, 17L, 20L, 20L, 20L, 20L, 20L, 20L, 19L, 19L, 19L, 19L, 19L,
19L, 22L, 22L, 22L, 22L, 22L, 22L, 21L, 21L, 21L, 21L, 21L, 21L
), .Label = c("A01nF", "A01yT", "A02nF", "A02yT", "A03nF", "A03yT",
"A04nF", "A04yT", "A05nF", "A05yT", "A06nF", "A06yT", "A07nF",
"A07yT", "A08nF", "A08yT", "A10nF", "A10yT", "A11nF", "A11yT",
"A13nF", "A13yT"), class = "factor"), Level = structure(c(1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L,
1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L,
1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L,
1L, 2L, 3L), .Label = c("X", "Y", "Z"), class = "factor"), Day1 = c(3L,
1L, 4L, 2L, 4L, 2L, 5L, 4L, 1L, 2L, 5L, 1L, 5L, 2L, 5L, 5L, 3L,
5L, 3L, 3L, 1L, 4L, 1L, 1L, 5L, 4L, 1L, 5L, 4L, 5L, 3L, 5L, 3L,
5L, 3L, 4L, 2L, 4L, 2L, 4L, 3L, 1L, 1L, 3L, 1L, 3L, 1L, 5L, 2L,
4L, 4L, 3L, 1L, 4L, 4L, 1L, 4L, 1L, 2L, 5L, 1L, 5L, 1L, 2L, 4L,
4L, 4L, 4L, 2L, 4L, 5L, 5L, 4L, 1L, 3L, 2L, 3L, 5L, 4L, 3L, 2L,
3L, 5L, 4L, 1L, 3L, 4L, 3L, 3L, 5L, 3L, 1L, 1L, 4L, 4L, 5L, 1L,
4L, 4L, 4L, 1L, 4L, 5L, 5L, 1L, 5L, 3L, 1L, 4L, 1L, 4L, 5L, 5L,
3L, 3L, 2L, 4L, 5L, 3L, 2L, 1L, 5L, 5L, 2L, 2L, 3L, 4L, 3L, 4L,
2L, 2L, 4L), Day2 = c(10L, 9L, 7L, 7L, 6L, 7L, 10L, 9L, 10L,
6L, 10L, 7L, 8L, 9L, 8L, 9L, 7L, 10L, 7L, 10L, 6L, 8L, 6L, 8L,
8L, 8L, 10L, 6L, 8L, 8L, 6L, 10L, 7L, 10L, 7L, 10L, 6L, 6L, 7L,
9L, 8L, 10L, 8L, 7L, 9L, 8L, 6L, 9L, 7L, 9L, 8L, 6L, 6L, 8L,
10L, 7L, 8L, 6L, 8L, 8L, 6L, 9L, 10L, 6L, 8L, 7L, 9L, 7L, 8L,
10L, 10L, 6L, 7L, 10L, 9L, 9L, 8L, 9L, 6L, 8L, 6L, 8L, 6L, 9L,
10L, 7L, 7L, 7L, 8L, 7L, 8L, 10L, 7L, 8L, 9L, 6L, 8L, 9L, 8L,
9L, 6L, 7L, 10L, 9L, 10L, 7L, 6L, 9L, 9L, 9L, 6L, 10L, 9L, 8L,
9L, 7L, 10L, 7L, 10L, 9L, 6L, 8L, 9L, 8L, 9L, 6L, 6L, 10L, 9L,
8L, 8L, 7L), Day4 = c(11L, 12L, 14L, 11L, 15L, 15L, 12L, 11L,
15L, 12L, 15L, 12L, 12L, 11L, 15L, 15L, 13L, 11L, 13L, 14L, 12L,
11L, 13L, 12L, 15L, 15L, 14L, 11L, 15L, 11L, 12L, 11L, 13L, 11L,
12L, 13L, 13L, 14L, 13L, 15L, 14L, 15L, 12L, 14L, 11L, 13L, 15L,
11L, 12L, 13L, 11L, 15L, 11L, 13L, 11L, 11L, 14L, 12L, 14L, 15L,
11L, 12L, 15L, 12L, 13L, 12L, 14L, 12L, 11L, 13L, 12L, 12L, 11L,
15L, 13L, 12L, 11L, 12L, 13L, 14L, 14L, 14L, 13L, 12L, 15L, 12L,
15L, 15L, 12L, 13L, 12L, 12L, 12L, 14L, 13L, 13L, 14L, 11L, 12L,
11L, 15L, 11L, 11L, 11L, 14L, 11L, 12L, 15L, 15L, 11L, 12L, 14L,
15L, 14L, 14L, 12L, 14L, 13L, 15L, 15L, 14L, 13L, 12L, 15L, 15L,
11L, 13L, 12L, 11L, 13L, 12L, 14L)), class = "data.frame", row.names = c(NA,
-132L))

Converting table to matrix for clustering analysis

So I have a table that that tells the freq (N) of two variables (V1 and V2) appearing together. Here is a sample:
> dput(ans)
structure(list(V1 = c(2L, 7L, 7L, 7L, 7L, 7L, 9L, 9L, 9L, 10L,
10L, 11L, 12L, 12L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 14L, 14L,
14L, 14L, 15L, 15L, 15L, 16L, 16L, 16L, 16L, 17L, 17L, 17L, 20L,
20L, 21L, 25L, 29L, 29L, 29L, 33L, 35L, 38L, 42L, 46L, 46L, 46L,
46L, 46L, 46L, 46L, 46L, 46L, 46L, 46L, 46L, 47L, 47L, 48L, 52L,
52L, 52L, 52L, 52L, 56L, 56L, 56L, 56L, 56L, 56L, 56L, 57L, 57L,
57L, 57L, 57L, 57L, 58L, 58L, 58L, 58L, 58L, 59L, 59L, 59L, 59L,
60L, 60L, 60L, 61L, 61L, 62L, 65L, 65L, 65L, 65L, 67L, 67L, 67L,
68L, 70L, 70L, 71L, 73L, 73L, 74L), V2 = c(3L, 8L, 20L, 21L,
22L, 78L, 10L, 11L, 12L, 11L, 12L, 12L, 38L, 39L, 14L, 15L, 16L,
17L, 18L, 29L, 64L, 15L, 16L, 17L, 18L, 16L, 17L, 18L, 17L, 18L,
29L, 30L, 18L, 29L, 30L, 21L, 22L, 22L, 26L, 30L, 47L, 64L, 34L,
36L, 39L, 43L, 47L, 48L, 49L, 52L, 65L, 67L, 70L, 71L, 72L, 73L,
74L, 75L, 48L, 49L, 49L, 65L, 67L, 73L, 74L, 75L, 57L, 58L, 59L,
60L, 61L, 62L, 63L, 58L, 59L, 60L, 61L, 62L, 63L, 59L, 60L, 61L,
62L, 63L, 60L, 61L, 62L, 63L, 61L, 62L, 63L, 62L, 63L, 63L, 67L,
73L, 74L, 75L, 73L, 74L, 75L, 69L, 71L, 72L, 72L, 74L, 75L, 75L
), N = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L)),
row.names = c(NA, -108L), class = c("data.table", "data.frame"))
I want to convert it to a 696x696 matrix where I have V1 and V2 as the rows and columns (from 1-696 in both rows and columns), and N as the values. V1 and V2 represents materials in my dataset. If a V1 and V2 combination does not exist in the table, the value should be 0. This is because I want to cluster the materials based on their freq of appearing together, using the hclust with centroid function.
EDIT: Only way I can give an example of the expected output is a picture from an article i'm following:
To duplicate the picture you have added to the original question, I'd do something like this:
# convert your contingency table to the appropriate matrix
M <- sparseMatrix(df$V1, df$V2, x = df$N, dims = c(696, 696))
M <- as.matrix(M)
rownames(M) <- 1:696
colnames(M) <- 1:696
There are many formatting options for displaying the matrix to image, but to start, try:
View(M)
That's a common task for rasters... using the raster package and converting it back to matrix may not be the fastest solution, but it works well on your test data (here named as df)...
library(raster)
r <- raster(nrow=696, ncol=696, crs = NA,
xmn = 0, xmx = 696, ymn = 0, ymx = 696)
# some indexing corrections
new_xy <- cbind(df[, 2] - 1, 697 - df[, 1])
cells <- cellFromXY(r, new_xy)
r[] <- 0
r[cells] <- unlist(df[, 3])
r <- as.matrix(r)
Then we can check with str(r) it is a 696x696 numeric, and max(r) is a value of 3, as expected. Also, r[2, 3] = 1

Internal ordering of facets ggplot2

I'm trying to plot a facets in ggplot2 but I struggle to get the internal ordering of the different facets right. The data looks like this:
head(THAT_EXT)
ID FILE GENRE NODE
1 CKC_1823_01 CKC Novels better
2 CKC_1824_01 CKC Novels better
3 EW9_192_03 EW9 Popular Science better
4 H0B_265_01 H0B Popular Science sad
5 CS2_231_03 CS2 Academic Prose desirable
6 FED_8_05 FED Academic Prose certain
str(THAT_EXT)
'data.frame': 851 obs. of 4 variables:
$ ID : Factor w/ 851 levels "A05_122_01","A05_277_07",..: 345 346 439 608 402 484 319 395 228 5 ...
$ FILE : Factor w/ 241 levels "A05","A06","A0K",..: 110 110 127 169 120 135 105 119 79 2 ...
$ GENRE: Factor w/ 5 levels "Academic Prose",..: 4 4 5 5 1 1 1 5 1 5 ...
$ NODE : Factor w/ 115 levels "absurd","accepted",..: 14 14 14 89 23 16 59 59 18 66 ...
Part of the problem is that can't get the sorting right. Here is the code for the sorting of NODE that I use:
THAT_EXT <- within(THAT_EXT,
NODE <- factor(NODE,
levels=names(sort(table(NODE),
decreasing=TRUE))))
When I plot this with the code below I get a graphs in which the NODE is not correctly sorted in the individual GENREs since different NODEs are more frequent in different GENREs:
p1 <-
ggplot(THAT_EXT, aes(x=NODE)) +
geom_bar() +
scale_x_discrete("THAT_EXT", breaks=NULL) + # supress tick marks on x axis
facet_wrap(~GENRE)
What I want is for every facet to have NODE sorted in decreasing order for that particular GENRE. Can anyone help with this?
structure(list(ID = structure(c(1L, 2L, 3L, 4L, 10L, 133L, 137L,
138L, 139L, 140L, 141L, 142L, 143L, 144L, 145L, 146L, 147L, 148L,
149L, 150L, 151L, 152L, 153L, 154L, 155L, 156L, 157L, 158L, 159L,
160L, 161L, 162L, 163L, 164L, 165L, 166L, 167L, 168L, 169L, 170L,
171L, 172L, 173L, 174L, 175L, 176L, 177L, 178L, 179L, 180L, 181L,
182L, 183L, 184L, 185L, 186L, 187L, 188L, 189L, 190L, 191L, 192L,
193L, 194L, 195L, 196L, 197L, 198L, 199L, 200L, 201L, 202L, 203L,
204L, 205L, 206L, 207L, 208L, 212L, 213L, 214L, 215L, 216L, 217L,
218L, 219L, 220L, 221L, 222L, 223L, 224L, 225L, 226L, 227L, 228L,
229L, 230L, 231L, 232L, 233L, 234L, 235L, 236L, 237L, 238L, 239L,
240L, 241L, 267L, 268L, 269L, 270L, 271L, 272L, 273L, 274L, 275L,
276L, 277L, 278L, 279L, 280L, 281L, 282L, 283L, 284L, 290L, 291L,
298L, 299L, 300L, 303L, 304L, 305L, 306L, 307L, 308L, 309L, 310L,
313L, 314L, 315L, 316L, 317L, 318L, 319L, 327L, 328L, 329L, 330L,
331L, 332L, 333L, 334L, 335L, 336L, 337L, 338L, 339L, 340L, 341L,
342L, 343L, 344L, 345L, 346L, 347L, 348L, 352L, 353L, 354L, 355L,
356L, 357L, 358L, 359L, 360L, 349L, 350L, 351L, 361L, 362L, 363L,
364L, 365L, 366L, 367L, 368L, 369L, 370L, 371L, 372L, 373L, 374L,
375L, 376L, 377L, 378L, 379L, 380L, 381L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 41L, 42L, 43L, 44L, 45L,
46L, 50L, 54L, 72L, 73L, 74L, 75L, 76L, 90L, 91L, 92L, 97L, 98L,
102L, 115L, 125L, 126L, 127L, 128L, 129L, 130L, 131L, 132L, 209L,
210L, 211L, 242L, 243L, 244L, 245L, 246L, 289L, 292L, 293L, 294L,
295L, 296L, 297L, 301L, 302L, 311L, 312L, 320L, 321L, 322L, 323L,
324L, 325L, 326L, 382L, 383L, 384L, 385L, 386L, 387L, 388L, 5L,
6L, 7L, 8L, 9L, 11L, 37L, 38L, 39L, 40L, 47L, 48L, 49L, 51L,
52L, 53L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L,
66L, 67L, 68L, 69L, 70L, 71L, 77L, 78L, 79L, 80L, 81L, 82L, 83L,
84L, 85L, 86L, 87L, 88L, 89L, 93L, 94L, 95L, 96L, 99L, 100L,
101L, 103L, 104L, 105L, 106L, 107L, 108L, 109L, 110L, 111L, 112L,
113L, 114L, 116L, 117L, 118L, 119L, 120L, 121L, 122L, 123L, 124L,
134L, 135L, 136L, 247L, 248L, 249L, 250L, 251L, 252L, 253L, 254L,
255L, 256L, 257L, 258L, 259L, 260L, 261L, 262L, 263L, 264L, 265L,
266L, 285L, 286L, 287L, 288L), .Label = c("A05_122_01", "A05_277_07",
"A05_400_01", "A05_99_01", "A06_1283_02", "A06_1389_01", "A06_1390_01",
"A06_1441_02", "A06_884_03", "A0K_1190_03", "A77_1684_01", "A8K_525_03",
"A8K_582_01", "A8K_645_01", "A8K_799_01", "A90_341_02", "A90_496_01",
"A94_217_01", "A94_472_01", "A94_477_03", "A9M_164_01", "A9M_259_03",
"A9N_199_01", "A9N_489_01", "A9N_591_01", "A9R_173_01", "A9R_425_02",
"A9W_536_02", "AA5_121_01", "AAE_203_01", "AAE_243_01", "AAE_412_01",
"AAW_14_03", "AAW_244_02", "AAW_297_04", "AAW_365_04", "ADG_1398_01",
"ADG_1500_01", "ADG_1507_01", "ADG_1516_01", "AHB_336_01", "AHB_421_01",
"AHJ_1090_02", "AHJ_619_01", "AR3_340_01", "AR3_91_03", "ARF_879_01",
"ARF_985_01", "ARF_991_02", "ARK_1891_01", "ASL_33_04", "ASL_43_01",
"ASL_9_01", "AT7_1031_01", "B09_1162_01", "B09_1475_01", "B09_1493_01",
"B09_1539_01", "B0G_197_01", "B0G_320_01", "B0N_1037_01", "B0N_624_01",
"B0N_645_02", "B0N_683_01", "B3G_313_04", "B3G_320_03", "B3G_398_02",
"B7M_1630_01", "B7M_1913_01", "BNN_746_02", "BNN_895_01", "BP7_2426_01",
"BP7_2777_01", "BP7_2898_01", "BP9_410_01", "BP9_599_01", "BPK_829_01",
"C93_1407_02", "C9A_181_01", "C9A_196_01", "C9A_365_01", "C9A_82_02",
"C9A_9_01", "CB9_306_02", "CB9_63_04", "CB9_86_01", "CBJ_439_01",
"CBJ_702_02", "CBJ_705_01", "CCM_320_01", "CCM_665_01", "CCM_669_02",
"CCN_1036_02", "CCN_1078_01", "CCN_1119_01", "CCN_784_01", "CCW_2284_02",
"CCW_2349_03", "CE7_242_02", "CE7_284_01", "CE7_39_01", "CEB_1675_01",
"CER_145_03", "CER_23_01", "CER_235_02", "CER_378_10", "CET_1056_02",
"CET_680_01", "CET_705_01", "CET_797_01", "CET_838_01", "CET_879_05",
"CET_946_03", "CET_986_01", "CEY_2977_01", "CJ3_107_02", "CJ3_114_03",
"CJ3_20_01", "CJ3_81_01", "CK2_112_01", "CK2_22_01", "CK2_392_01",
"CK2_42_01", "CK2_75_01", "CKC_1776_01", "CKC_1777_01", "CKC_1823_01",
"CKC_1824_01", "CKC_1860_01", "CKC_1883_01", "CKC_1883_02", "CKC_2127_01",
"CMN_1439_02", "CRM_5767_01", "CRM_5770_03", "CRM_5789_01", "CS2_110_01",
"CS2_131_01", "CS2_139_01", "CS2_187_01", "CS2_187_03", "CS2_231_03",
"CS2_249_02", "CS2_301_01", "CS2_35_01", "CS2_58_02", "EV6_16_01",
"EV6_206_02", "EV6_240_01", "EV6_244_02", "EV6_28_01", "EV6_30_01",
"EV6_32_01", "EV6_450_01", "EV6_69_01", "EV6_80_01", "EV6_91_01",
"FAC_1019_01", "FAC_1026_01", "FAC_1027_01", "FAC_1235_01", "FAC_1269_05",
"FAC_1270_05", "FAC_1393_01", "FAC_1406_03", "FAC_933_01", "FAC_950_01",
"FAC_960_01", "FED_105_01", "FED_120_02", "FED_21_02", "FED_281_02",
"FED_302_02", "FED_53_01", "FED_8_05", "FEF_498_03", "FEF_674_03",
"FR2_410_01", "FR2_557_02", "FR2_593_01", "FR2_691_01", "FR4_232_01",
"FR4_331_01", "FR4_346_01", "FS7_818_01", "FS7_919_01", "FU0_368_02",
"FYT_1138_01", "FYT_1183_01", "FYT_901_05", "G08_1336_01", "G1E_385_01",
"G1N_824_01", "G1N_860_01", "G1N_868_01", "G1N_975_01", "GU5_854_01",
"GUJ_423_01", "GUJ_501_01", "GUJ_611_01", "GUJ_629_03", "GUJ_700_01",
"GV0_10_01", "GV0_104_01", "GV0_111_01", "GV0_122_01", "GV0_160_01",
"GV0_232_02", "GV2_1465_01", "GV2_1899_01", "GV6_2683_01", "GW6_297_01",
"GW6_306_05", "GW6_307_01", "GW6_322_01", "GW6_330_02", "GW6_335_01",
"GW6_338_01", "GW6_367_02", "GW6_373_01", "GW6_407_01", "GW6_411_01",
"GW6_413_01", "GW6_421_01", "GW6_423_01", "GW6_424_01", "GW6_428_01",
"GW6_447_01", "GWM_480_01", "GWM_533_02", "GWM_554_02", "GWM_554_03",
"GWM_609_01", "GWM_609_04", "GWM_610_01", "GWM_730_01", "GWM_731_01",
"GWM_738_01", "GWM_804_06", "GWM_815_01", "GWM_832_03", "GVP_179_01",
"GVP_211_01", "GVP_393_02", "GVP_443_02", "GVP_710_01", "H0B_171_04",
"H0B_216_01", "H0B_265_01", "H0B_32_01", "H0B_361_03", "H0B_365_01",
"H0B_369_01", "H0B_74_01", "H0B_93_01", "H10_1002_01", "H10_1032_04",
"H10_653_01", "H10_803_01", "H10_824_01", "H10_825_03", "H10_881_01",
"H10_986_01", "H78_851_04", "H78_891_01", "H78_946_04", "H79_1959_19",
"H7S_110_05", "H7S_130_06", "H7S_131_03", "H7S_131_04", "H7S_146_01",
"H7S_148_01", "H7S_164_01", "H7S_179_01", "H7S_54_01", "H7S_56_05",
"H7S_62_03", "H7S_79_01", "H7S_8_01", "H7S_81_01", "H7S_83_01",
"H7S_87_01", "H7S_92_03", "H7X_1028_02", "H7X_1091_01", "H7X_691_01",
"H7X_695_01", "H8H_2917_01", "H8K_153_01", "H8K_55_01", "H8M_1897_01",
"H8M_2104_02", "H8T_3316_03", "H98_3204_01", "H98_3410_01", "H98_3490_02",
"H9R_130_02", "H9R_39_01", "H9S_1297_01", "HA2_3107_02", "HA2_3284_01",
"HPY_754_04", "HPY_785_09", "HPY_799_03", "HPY_807_04", "HPY_830_04",
"HPY_838_02", "HPY_843_01", "HPY_869_11", "HR7_190_01", "HR7_440_01",
"HTP_540_01", "HTP_585_01", "HTP_588_05", "HTP_593_01", "HTP_601_01",
"HTP_613_01", "HTP_648_02", "HTW_197_01", "HTW_494_01", "HTW_750_01",
"HWL_2770_01", "HWL_2919_01", "HWM_45_01", "HWM_45_02", "HXY_1047_03",
"HXY_701_01", "HXY_781_01", "HXY_783_01", "HXY_784_01", "HXY_836_01",
"HXY_931_01", "HXY_963_01", "HXY_972_01", "HXY_985_03", "HY6_1024_01",
"HY6_1025_01", "HY6_1164_01", "HY6_1223_01", "HY6_988_03", "HY6_989_01",
"HY8_160_01", "HY8_164_01", "HY8_292_03", "HY8_316_01", "HY9_778_03",
"HY9_845_02", "HYX_235_08", "HYX_245_01", "HYX_88_01", "J12_1474_02",
"J12_1492_01", "J12_1571_01", "J12_1845_01", "J14_341_01", "J18_597_04",
"J18_698_02", "J18_759_01", "J18_828_01", "J3R_197_01", "J3R_219_02",
"J3R_277_04", "J3T_267_01", "J3T_269_02", "J3T_57_02", "J41_41_02",
"J41_58_03", "J9B_133_03", "J9B_341_02", "J9B_341_03", "J9D_147_05",
"J9D_218_01", "J9D_411_01", "J9D_616_01", "J9D_616_02", "JNB_563_02",
"JT7_118_01", "JT7_129_02", "JT7_218_02", "JT7_344_02", "JXS_3663_01",
"JXU_407_01", "JXU_468_02", "JXU_559_01", "JXV_1439_04", "JXV_1592_01",
"JY1_100_01"), class = "factor"), GENRE = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L), .Label = c("Academic Prose", "Conversation", "News",
"Novels", "Popular Science"), class = "factor"), NODE = structure(c(9L,
10L, 10L, 10L, 4L, 10L, 71L, 35L, 49L, 6L, 5L, 15L, 28L, 44L,
64L, 64L, 28L, 28L, 18L, 18L, 32L, 18L, 58L, 10L, 72L, 28L, 18L,
10L, 64L, 10L, 35L, 64L, 64L, 69L, 8L, 10L, 50L, 69L, 49L, 49L,
15L, 69L, 10L, 49L, 8L, 64L, 49L, 10L, 69L, 18L, 61L, 67L, 67L,
61L, 57L, 69L, 11L, 10L, 64L, 10L, 59L, 61L, 49L, 10L, 59L, 1L,
61L, 35L, 54L, 54L, 39L, 44L, 61L, 64L, 69L, 1L, 23L, 49L, 49L,
8L, 69L, 49L, 69L, 49L, 49L, 69L, 35L, 49L, 49L, 49L, 35L, 10L,
49L, 48L, 10L, 49L, 11L, 44L, 50L, 11L, 50L, 69L, 49L, 10L, 59L,
68L, 47L, 69L, 49L, 35L, 29L, 8L, 49L, 50L, 35L, 10L, 35L, 8L,
35L, 8L, 10L, 35L, 10L, 10L, 10L, 35L, 44L, 61L, 35L, 44L, 28L,
47L, 39L, 39L, 49L, 61L, 43L, 60L, 19L, 10L, 10L, 10L, 44L, 44L,
62L, 44L, 10L, 59L, 10L, 61L, 1L, 53L, 33L, 10L, 8L, 8L, 64L,
64L, 10L, 57L, 61L, 64L, 66L, 19L, 61L, 64L, 10L, 10L, 8L, 19L,
35L, 28L, 10L, 61L, 35L, 42L, 35L, 28L, 32L, 64L, 10L, 18L, 28L,
25L, 35L, 35L, 10L, 18L, 10L, 22L, 55L, 28L, 10L, 1L, 55L, 51L,
1L, 38L, 28L, 28L, 33L, 10L, 44L, 29L, 16L, 8L, 28L, 69L, 32L,
10L, 61L, 20L, 35L, 10L, 28L, 10L, 32L, 10L, 46L, 59L, 64L, 35L,
66L, 2L, 35L, 28L, 30L, 18L, 69L, 32L, 10L, 28L, 17L, 36L, 64L,
61L, 10L, 64L, 33L, 3L, 37L, 26L, 28L, 64L, 44L, 28L, 64L, 64L,
6L, 6L, 64L, 50L, 32L, 8L, 64L, 50L, 28L, 24L, 18L, 47L, 35L,
40L, 24L, 55L, 44L, 22L, 1L, 49L, 44L, 18L, 45L, 63L, 64L, 35L,
12L, 35L, 10L, 35L, 10L, 10L, 10L, 44L, 44L, 44L, 65L, 44L, 55L,
32L, 49L, 64L, 39L, 69L, 1L, 60L, 7L, 14L, 44L, 33L, 10L, 19L,
10L, 70L, 53L, 8L, 61L, 61L, 44L, 61L, 65L, 28L, 68L, 69L, 27L,
61L, 28L, 72L, 34L, 61L, 32L, 10L, 49L, 35L, 49L, 10L, 10L, 69L,
39L, 40L, 19L, 59L, 53L, 49L, 49L, 44L, 49L, 35L, 49L, 61L, 61L,
1L, 10L, 28L, 49L, 35L, 49L, 61L, 50L, 69L, 35L, 61L, 35L, 50L,
10L, 28L, 69L, 61L, 21L, 69L, 29L, 35L, 35L, 35L, 11L, 69L, 8L,
41L, 56L, 35L, 61L, 69L, 49L, 49L, 49L, 1L, 13L, 64L, 64L, 52L,
44L, 64L, 64L, 50L, 49L, 69L, 11L, 59L, 49L, 31L), .Label = c("apparent",
"appropriate", "awful", "axiomatic", "best", "better", "breathtaking",
"certain", "characteristic", "clear", "conceivable", "convenient",
"crucial", "cruel", "desirable", "disappointing", "emphatic",
"essential", "evident", "expected", "extraordinary", "fair",
"fortunate", "Funny", "good", "great", "imperative", "important",
"impossible", "incredible", "inescapable", "inevitable", "interesting",
"ironic", "likely", "Likely", "lucky", "ludicrous", "natural",
"necessary", "needful", "notable", "noteworthy", "obvious", "odd",
"paradoxical", "plain", "plausible", "possible", "probable",
"proper", "relevant", "remarkable", "revealing", "right", "Sad",
"self-evident", "sensible", "significant", "striking", "surprising",
"symptomatic", "terrible", "true", "typical", "understandable",
"unexpected", "unfortunate", "unlikely", "unreasonable", "untrue",
"vital"), class = "factor")), .Names = c("ID", "GENRE", "NODE"
), class = "data.frame", row.names = c(NA, -388L))
As I mentioned already: facet_wrap is not intended for having individual scales. At least I didn't find a solution. Hence, setting the labels in scale_x_discrete did not bring the desired result.
But this my workaround:
library(plyr)
library(ggplot2)
nodeCount <- ddply( df, c("GENRE", "NODE"), nrow )
nodeCount$factors <- paste( nodeCount$GENRE, nodeCount$NODE, sep ="." )
nodeCount <- nodeCount[ order( nodeCount$GENRE, nodeCount$V1, decreasing=TRUE ), ]
nodeCount$factors <- factor( nodeCount$factors, levels=nodeCount$factors )
head(nodeCount)
GENRE NODE V1 factors
121 Popular Science possible 14 Popular Science.possible
128 Popular Science surprising 11 Popular Science.surprising
116 Popular Science likely 9 Popular Science.likely
132 Popular Science unlikely 9 Popular Science.unlikely
103 Popular Science clear 7 Popular Science.clear
129 Popular Science true 5 Popular Science.true
g <- ggplot( nodeCount, aes( y=V1, x = factors ) ) +
geom_bar() +
scale_x_discrete( breaks=NULL ) + # supress tick marks on x axis
facet_wrap( ~GENRE, scale="free_x" ) +
geom_text( aes( label = NODE, y = V1+2 ), angle = 45, vjust = 0, hjust=0, size=3 )
Which gives:

Resources