How to show the id of outliers on a boxplot - r

How can you view the id of the outliers in a boxplot?
structure(list(pot = c(1L, 2L, 3L, 4L, 21L, 22L, 23L, 24L, 5L,
6L, 7L, 8L, 25L, 26L, 27L, 28L, 9L, 10L, 11L, 12L, 29L, 30L,
31L, 32L, 13L, 14L, 15L, 16L, 33L, 34L, 35L, 36L, 17L, 18L, 19L,
20L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 61L, 62L, 63L, 64L,
45L, 46L, 47L, 48L, 65L, 66L, 67L, 68L, 49L, 50L, 51L, 52L, 69L,
70L, 71L, 72L, 53L, 54L, 55L, 56L, 73L, 74L, 75L, 76L, 57L, 58L,
59L, 60L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 101L, 102L,
103L, 104L, 85L, 86L, 87L, 88L, 105L, 106L, 107L, 108L, 89L,
90L, 91L, 92L, 109L, 110L, 111L, 112L, 93L, 94L, 95L, 96L, 113L,
114L, 115L, 116L, 97L, 98L, 99L, 100L, 117L, 118L, 119L, 120L,
121L, 122L, 123L, 124L, 141L, 142L, 143L, 144L, 125L, 126L, 127L,
128L, 145L, 146L, 147L, 148L, 129L, 130L, 131L, 132L, 149L, 150L,
151L, 152L, 133L, 134L, 135L, 136L, 153L, 154L, 155L, 156L, 137L,
138L, 139L, 140L, 157L, 158L, 159L, 160L), rep = c(1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L), cultivar = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("Dinninup",
"Riverina", "Seaton Park", "Yarloop"), class = "factor"), Waterlogging = structure(c(2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L), .Label = c("Non-waterlogged",
"Waterlogged"), class = "factor"), P = c(12.1, 12.1, 12.1, 12.1,
12.1, 12.1, 12.1, 12.1, 15.17, 15.17, 15.17, 15.17, 15.17, 15.17,
15.17, 15.17, 18.24, 18.24, 18.24, 18.24, 18.24, 18.24, 18.24,
18.24, 24.39, 24.39, 24.39, 24.39, 24.39, 24.39, 24.39, 24.39,
48.35, 48.35, 48.35, 48.35, 48.35, 48.35, 48.35, 48.35, 12.1,
12.1, 12.1, 12.1, 12.1, 12.1, 12.1, 12.1, 15.17, 15.17, 15.17,
15.17, 15.17, 15.17, 15.17, 15.17, 18.24, 18.24, 18.24, 18.24,
18.24, 18.24, 18.24, 18.24, 24.39, 24.39, 24.39, 24.39, 24.39,
24.39, 24.39, 24.39, 48.35, 48.35, 48.35, 48.35, 48.35, 48.35,
48.35, 48.35, 12.1, 12.1, 12.1, 12.1, 12.1, 12.1, 12.1, 12.1,
15.17, 15.17, 15.17, 15.17, 15.17, 15.17, 15.17, 15.17, 18.24,
18.24, 18.24, 18.24, 18.24, 18.24, 18.24, 18.24, 24.39, 24.39,
24.39, 24.39, 24.39, 24.39, 24.39, 24.39, 48.35, 48.35, 48.35,
48.35, 48.35, 48.35, 48.35, 48.35, 12.1, 12.1, 12.1, 12.1, 12.1,
12.1, 12.1, 12.1, 15.17, 15.17, 15.17, 15.17, 15.17, 15.17, 15.17,
15.17, 18.24, 18.24, 18.24, 18.24, 18.24, 18.24, 18.24, 18.24,
24.39, 24.39, 24.39, 24.39, 24.39, 24.39, 24.39, 24.39, 48.35,
48.35, 48.35, 48.35, 48.35, 48.35, 48.35, 48.35), total = c(3.66,
2.02, 1.59, 1.67, 2.12, 2.46, 1.79, 2.09, 2.03, 2.13, 1.83, 2.34,
2.66, 2.2, 1.79, 1.97, 2.17, 2.44, 1.49, 2.19, 2.92, 2.43, 1.58,
2.07, 2.48, 2.49, 1.69, 2.1, 2.38, 2.52, 2.41, 2.46, 2.22, 2.07,
1.97, 2.3, 2.48, 3.16, 1.76, 2.38, 2.81, 2.64, 2.59, 3.28, 3.18,
2.57, 2.9, 3, 2.38, 2.72, 2.58, 2.73, 3.06, 3.01, 3.01, 2.77,
2.95, 2.36, 2.91, 2.38, 3.33, 3.19, 3.17, 3.16, 3.16, 3.2, 2.58,
3.71, 3.11, 2.7, 2.92, 1.93, 2.95, 2.57, 2.68, 2.48, 3.34, 2.75,
2.52, 1.88, 1.19, 0.57, 0.64, 0.66, 1.13, 1.28, 0.85, 0.96, 1.34,
2.14, 0.63, 1.27, 1.13, 0.64, 1.21, 1.95, 1.11, 0.91, 0.75, 0.63,
1.06, 1.07, 1.05, 0.8, 1.41, 1.13, 0.75, 0.89, 1.98, 1.27, 1.01,
1, 1.16, 0.64, 0.64, 1.02, 1.03, 1.13, 0.79, 0.6, 3.88, 2.79,
2.73, 2.77, 3.54, 2.05, 1.51, 1.88, 3.86, 3.13, 1.97, 3.46, 3.98,
3.6, 2.12, 2.86, 2.95, 1.65, 1.94, 2.53, 2.21, 1.94, 2.05, 2.22,
3, 3.28, 1.55, 3.85, 2.4, 2.1, 1.98, 1.81, 2.48, 1.66, 2.06,
1.23, 3.75, 1.99, 1.67, 1.93)), class = "data.frame", row.names = c(NA,
-160L))
boxplot(total~cultivar*as.factor(P),data=x)
This is what I am after....
I have tried following example but does not work....
boxplot(total~cultivar*as.factor(P),data=x,id=list(n=Inf))
Identifying the outliers on the plot will make it easier to remove them from analysis. For some reason its not as straightforward as I thought. The post is asking me to add more details but I think there is sufficient.

You can use the car package:
library(car)
Boxplot(total ~ cultivar*as.factor(P), id.method="y", data = x)
Update:
Is it possible to flip the coordinates in car::Boxplot?
For the sake of the challenge, I tried some hacky methods. After all, I was able to rotate the plot, but it's not as conventional as it is for ggplot2::coord_flip. Here, I am just rotating the plot. So, the labels are still in their previous alignment. We can go further, remove the labels and rewrite them, but that would defeat the whole purpose of this solution which is simplicity.
library(car)
library(gridGraphics)
p <- Boxplot(total ~ cultivar*as.factor(P), id.method="y", data = x)
grab_grob <- function(){
grid.echo()
grid.grab()
}
g <- grab_grob()
grid.newpage()
pushViewport(viewport(width=0.5,angle=90))
grid.draw(g)

Unfortunately, though boxplot does return a list structure that provides the values of the outliers (e.g., boxplot(..., plot=FALSE)$out), this doesn't help here since there are equal values in other groups that are not outliers there. (In fact, I find using $out always a bit risky unless it is just one group.)
But you can use $stats to get the whisker parameters and find everything yourself. Unfortunately, this is not a one-liner.
First, though, since I don't know what you mean by "id", I'll add something to the data:
x$id <- seq_len(nrow(x))
base R
bp <- boxplot(total ~ cultivar * as.factor(P), data = x)
lims <- data.frame(nm = bp$names, t(bp$stats[c(1,5),]))
tmpx <- merge(transform(x, nm = paste(cultivar, as.factor(P), sep = ".")), lims, by = "nm", all.x = TRUE)
tmpx <- subset(tmpx, total < X1 | total > X2)
tmpx$xval <- match(tmpx$nm, bp$names)
text(total ~ xval, id, data = tmpx, adj = c(-0.5, 0.5))
Overlaying text over boxplots might be a problem for you; you can play with various shifting and/or flipping the coordinates to control this. Clipping (not shown here, but when a text label disappears out of the plot region) can also be a problem, so you might need to manually control the limits of the plot region.
dplyr
In case you like the tidyverse-way of looking at data-munging, here's an alternative that produces the same plot.
library(dplyr)
bp <- boxplot(total ~ cultivar * as.factor(P), data = x)
x %>%
mutate( nm = paste(cultivar, as.factor(P), sep = ".") ) %>%
left_join(data.frame(nm = bp$names, t(bp$stats[c(1,5),]), stringsAsFactors = FALSE),
by = "nm") %>%
filter(total < X1 | total > X2) %>%
mutate(xval = match(nm, bp$names)) %>%
text(data = ., total ~ xval, as.character(id), adj = c(-0.5, 0.5))
(Same plot.)
dplyr and ggplot2
library(dplyr)
library(ggplot2)
bp <- boxplot(total ~ cultivar * as.factor(P), data = x, plot = FALSE)
x %>%
mutate( nm = paste(cultivar, as.factor(P), sep = ".") ) %>%
left_join(data.frame(nm = bp$names, t(bp$stats[c(1,5),]), stringsAsFactors = FALSE),
by = "nm") %>%
mutate(outlier = total < X1 | total > X2) %>%
ggplot(aes(interaction(cultivar, P), total)) +
geom_boxplot() +
geom_text(aes(label = id), hjust = -0.5, data = ~ filter(., outlier)) +
coord_flip()
I chose to flip the coordinates so that the labels would be all included and shown, but it's not required for the method. One trick I used is that the data= argument to the ggplot2 functions can take an expression (I think of it as a tilde-function), which allows subsetting of the main dataset in-place. Here I use dplyr::filter, but in this case it is just as easy to use subset (base R) in case you are not otherwise using dplyr.

Related

ggstattsplot : Why ggwithinstats not working with categorical data

I'm trying to run the ggwithinstats function from ggstatplot but I get the following error.
This is I'm running
ggwithinstats( # independent samples
data = dat,
x = FAB,
y = BM_percentage,
plot.type = "box", # for boxplot
type = "nonparametric", # for wilcoxon
centrality.plotting = FALSE # remove median
)
########################
ggwithinstats(
df2,
x = FAB,
y = BM_percentage
)
I get the error for the first code
Error in validObject(.Object) : invalid class “dsparseModelMatrix”
object: superclass "Mnumeric" not defined in the environment of the
object's class Error in validObject(.Object) : invalid class
“dsparseModelMatrix” object: superclass "Mnumeric" not defined in the
environment of the object's class Error in complete.cases(x, y) : not
all arguments have the same length
And for the second code I get this
Error in p.adjust(pp[lower.tri(pp, TRUE)], p.adjust.method) :
(list) object cannot be coerced to type 'double'
My data which I'm trying to run
b <- dput(dat)
structure(list(FAB = structure(c(5L, 1L, 5L, 3L, 2L, 4L, 6L,
2L, 1L, 6L, 5L, 1L, 5L, 1L, 5L, 6L, 3L, 5L, 2L, 5L, 3L, 3L, 3L,
1L, 3L, 1L, 1L, 1L, 6L, 4L, 2L, 5L, 4L, 3L, 2L, 3L, 2L, 3L, 3L,
1L, 5L, 2L, 2L, 3L, 2L, 5L, 4L, 6L, 5L, 3L, 1L, 3L, 5L, 5L, 3L,
5L, 2L, 2L, 1L, 5L, 2L, 2L, 3L, 2L, 5L, 4L, 4L, 6L, 3L, 3L, 5L,
2L, 2L, 2L, 3L, 5L, 2L, 3L, 6L, 1L, 2L, 3L, 2L, 3L, 3L, 6L, 2L,
2L, 5L, 3L, 3L, 3L, 2L, 5L, 2L, 4L, 1L, 6L, 6L, 3L, 3L, 5L, 6L,
2L, 1L, 5L, 2L, 4L, 5L, 2L, 6L, 6L, 3L, 4L, 5L, 3L, 2L, 4L, 6L,
2L, 5L, 4L, 2L, 4L, 5L, 3L, 3L, 3L, 3L, 5L, 2L, 2L, 3L, 1L, 3L,
3L, 2L, 2L, 6L, 4L, 2L, 4L, 4L, 3L, 2L, 5L, 5L), .Label = c("M0",
"M1", "M2", "M3", "M4", "M5"), class = "factor"), WBC = c(76.7,
5, 5, 27.7, 10.7, 2.1, 78.5, 8.2, 47.2, 72.1, 67.5, 2.9, 22.2,
1, 15.2, 7.3, 12.6, 27.6, 46.4, 27.1, 34.2, 33.5, 2.5, 2.3, 8.3,
61.6, 47.6, 5.6, 137.2, 3.4, 48, 13.6, 0.9, 5.5, 1.5, 4.1, 90.4,
34.8, 3.1, 2.3, 37, 34, 2.7, 17, 29.4, 50.3, 0.5, 4, 12.4, 12,
111, 77.3, 2.9, 37.9, 8.3, 57.1, 11.5, 37.5, 6.1, 90.6, 80.5,
10.1, 1.5, 30.8, 2.3, 0.4, 86.4, 34.6, 15.1, 35.9, 17.9, 0.7,
33.2, 45.6, 98.8, 26.1, 134.4, 98.2, 8.4, 2.1, 67.9, 51.8, 5.1,
2.6, 43.1, 6.7, 30.5, 171.9, 29.7, 75.2, 45, 11.5, 22.9, 131.5,
63.7, 1.6, 5.4, 116.2, 14.9, 202.7, 18.7, 52.9, 99.2, 13.5, 14.5,
2.7, 1.2, 8.2, 30.9, 103.6, 93, 5.1, 3.4, 0.9, 4.9, 42.1, 6.4,
1.5, 59.3, 88.1, 25.9, 31.5, 223.8, 29, 9.9, 1.7, 0.6, 14.3,
61.6, 2.2, 1.2, 16, 11, 92, 29.4, 32.4, 42.8, 2.9, 6.7, 1.2,
13.1, 1, 3.6, 4.3, 39.8, 19.6, 101.3), TMB = c(0, 0.733333333333,
0.3, 0.266666666667, 0.466666666667, 0.333333333333, 0.233333333333,
0.2, 0.5, 0.133333333333, 0.333333333333, 0.566666666667, 0.3,
0.766666666667, 0.166666666667, 0.233333333333, 0.4, 0.266666666667,
0.533333333333, 1.13333333333, 0.233333333333, 0.1, 0.4, 0.4,
0.333333333333, 0.4, 0.5, 0.4, 0.1, 0.2, 0.566666666667, 0.466666666667,
0.2, 0.733333333333, 0.5, 0.333333333333, 0.2, 0.333333333333,
0.4, 0.266666666667, 0.0666666666667, 0.266666666667, 0.2, 0.433333333333,
0.566666666667, 0.0666666666667, 0.166666666667, 0.533333333333,
0.3, 0.433333333333, 0, 0.4, 0.466666666667, 0.0666666666667,
0.333333333333, 0, 0.7, 0.4, 0.233333333333, 0.3, 0.0333333333333,
0.4, 0.566666666667, 0.0333333333333, 0.0333333333333, 0.266666666667,
0.0333333333333, 0.4, 0.466666666667, 0.166666666667, 0.633333333333,
0.366666666667, 0.233333333333, 0.466666666667, 0.1, 0.0666666666667,
0.4, 0.366666666667, 0.1, 0.166666666667, 0.266666666667, 0.466666666667,
0.266666666667, 0.333333333333, 0.0333333333333, 0.1, 0.5, 0.333333333333,
0.333333333333, 0.266666666667, 0, 0.466666666667, 0.233333333333,
0.166666666667, 0.266666666667, 0.333333333333, 0.433333333333,
0.1, 0.0666666666667, 0.4, 0.2, 0.133333333333, 0.533333333333,
0.2, 0.4, 0.433333333333, 0.1, 0.2, 0.0666666666667, 0.233333333333,
0.1, 0, 0.3, 0.266666666667, 0.233333333333, 0.6, 0.533333333333,
0.2, 0.2, 0.5, 0.0333333333333, 0.0333333333333, 0.0666666666667,
0.166666666667, 0.5, 0.5, 0.166666666667, 0.3, 0.4, 0.3, 0.4,
0.466666666667, 0.433333333333, 0.4, 0.266666666667, 0.3, 0.4,
0.6, 0.0333333333333, 0.0666666666667, 0.333333333333, 0.3, 0.1,
0.333333333333, 0.333333333333, 0.2, 0.0333333333333), BM_percentage = c(82L,
83L, 91L, 72L, 68L, 88L, 32L, 91L, 59L, 87L, 89L, 99L, 35L, 90L,
75L, 41L, 63L, 69L, 81L, 51L, 53L, 34L, 63L, 75L, 47L, 95L, 42L,
55L, 83L, 90L, 84L, 61L, 100L, 64L, 62L, 67L, 77L, 39L, 43L,
75L, 69L, 73L, 72L, 80L, 90L, 39L, 74L, 78L, 51L, 37L, 92L, 57L,
85L, 40L, 48L, 81L, 86L, 88L, 60L, 98L, 77L, 42L, 46L, 89L, 95L,
90L, 73L, 71L, 32L, 70L, 62L, 32L, 75L, 76L, 56L, 30L, 92L, 76L,
89L, 50L, 79L, 55L, 94L, 80L, 47L, 81L, 90L, 87L, 75L, 46L, 67L,
70L, 86L, 72L, 85L, 40L, 97L, 83L, 57L, 60L, 52L, 90L, 52L, 86L,
74L, 37L, 71L, 91L, 52L, 85L, 90L, 95L, 70L, 82L, 40L, 64L, 40L,
90L, 85L, 86L, 71L, 51L, 77L, 85L, 40L, 37L, 35L, 57L, 48L, 81L,
60L, 62L, 72L, 67L, 56L, 59L, 81L, 33L, 94L, 85L, 72L, 42L, 93L,
40L, 86L, 71L, 79L), Risk_Cyto = structure(c(2L, 4L, 2L, 2L,
4L, 1L, 2L, 2L, 4L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 4L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L,
4L, 4L, 2L, 4L, 1L, 1L, 4L, 4L, 1L, 4L, 1L, 4L, 2L, 4L, 4L, 4L,
2L, 1L, 1L, 1L, 2L, 4L, 2L, 2L, 2L, 2L, 4L, 2L, 2L, 1L, 1L, 4L,
4L, 2L, 2L, 4L, 1L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 1L, 2L, 2L,
3L, 4L, 2L, 4L, 2L, 2L, 2L, 4L, 2L, 2L, 2L, 1L, 4L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 4L, 2L, 2L, 1L, 1L, 2L, 2L, 4L, 2L, 1L, 1L, 2L,
2L, 1L, 2L, 2L, 4L, 1L, 2L, 1L, 4L, 4L, 2L, 1L, 3L, 4L, 2L, 4L,
1L, 2L, 4L, 2L, 1L, 2L, 4L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L), .Label = c("Good",
"Intermediate", "N.D.", "Poor"), class = "factor"), Risk_Molecular = structure(c(4L,
4L, 2L, 4L, 4L, 1L, 2L, 2L, 4L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L,
2L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 2L, 1L, 2L, 1L, 1L,
2L, 2L, 1L, 4L, 4L, 2L, 4L, 1L, 1L, 4L, 4L, 1L, 4L, 1L, 4L, 2L,
4L, 4L, 4L, 2L, 1L, 1L, 1L, 2L, 4L, 2L, 2L, 4L, 2L, 4L, 2L, 4L,
1L, 1L, 4L, 4L, 2L, 2L, 4L, 1L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L,
1L, 2L, 2L, 3L, 2L, 2L, 4L, 2L, 2L, 2L, 4L, 2L, 2L, 2L, 1L, 4L,
2L, 2L, 2L, 2L, 4L, 2L, 2L, 4L, 2L, 2L, 1L, 1L, 2L, 2L, 4L, 2L,
1L, 1L, 2L, 2L, 1L, 2L, 2L, 4L, 1L, 2L, 1L, 4L, 4L, 2L, 1L, 3L,
4L, 2L, 4L, 1L, 2L, 4L, 2L, 1L, 2L, 4L, 1L, 2L, 1L, 1L, 1L, 2L,
2L, 2L), .Label = c("Good", "Intermediate", "N.D.", "Poor"), class = "factor")), class = "data.frame", row.names = c("TCGA-AB-2856",
"TCGA-AB-2849", "TCGA-AB-2971", "TCGA-AB-2930", "TCGA-AB-2891",
"TCGA-AB-2872", "TCGA-AB-2851", "TCGA-AB-3011", "TCGA-AB-2949",
"TCGA-AB-2981", "TCGA-AB-2965", "TCGA-AB-2822", "TCGA-AB-2828",
"TCGA-AB-2959", "TCGA-AB-2973", "TCGA-AB-2987", "TCGA-AB-2986",
"TCGA-AB-2921", "TCGA-AB-2863", "TCGA-AB-3009", "TCGA-AB-2812",
"TCGA-AB-2940", "TCGA-AB-2996", "TCGA-AB-2983", "TCGA-AB-2859",
"TCGA-AB-2913", "TCGA-AB-2917", "TCGA-AB-2929", "TCGA-AB-2825",
"TCGA-AB-2897", "TCGA-AB-2900", "TCGA-AB-2846", "TCGA-AB-2862",
"TCGA-AB-3002", "TCGA-AB-2871", "TCGA-AB-2819", "TCGA-AB-2901",
"TCGA-AB-2920", "TCGA-AB-2966", "TCGA-AB-2814", "TCGA-AB-2942",
"TCGA-AB-2870", "TCGA-AB-2847", "TCGA-AB-2844", "TCGA-AB-2806",
"TCGA-AB-2911", "TCGA-AB-2980", "TCGA-AB-2861", "TCGA-AB-2916",
"TCGA-AB-2878", "TCGA-AB-2944", "TCGA-AB-2817", "TCGA-AB-2830",
"TCGA-AB-2892", "TCGA-AB-2858", "TCGA-AB-2815", "TCGA-AB-2877",
"TCGA-AB-2939", "TCGA-AB-2890", "TCGA-AB-2811", "TCGA-AB-2918",
"TCGA-AB-2898", "TCGA-AB-2908", "TCGA-AB-2866", "TCGA-AB-2842",
"TCGA-AB-2991", "TCGA-AB-2823", "TCGA-AB-2910", "TCGA-AB-2915",
"TCGA-AB-2977", "TCGA-AB-2912", "TCGA-AB-2943", "TCGA-AB-2881",
"TCGA-AB-2988", "TCGA-AB-3000", "TCGA-AB-2948", "TCGA-AB-2895",
"TCGA-AB-2931", "TCGA-AB-2956", "TCGA-AB-2936", "TCGA-AB-2934",
"TCGA-AB-2914", "TCGA-AB-2992", "TCGA-AB-2869", "TCGA-AB-2946",
"TCGA-AB-2894", "TCGA-AB-2963", "TCGA-AB-2928", "TCGA-AB-2924",
"TCGA-AB-2818", "TCGA-AB-2975", "TCGA-AB-2874", "TCGA-AB-2979",
"TCGA-AB-2826", "TCGA-AB-2990", "TCGA-AB-3001", "TCGA-AB-2885",
"TCGA-AB-2835", "TCGA-AB-2873", "TCGA-AB-2955", "TCGA-AB-2845",
"TCGA-AB-2836", "TCGA-AB-2925", "TCGA-AB-2884", "TCGA-AB-2820",
"TCGA-AB-2899", "TCGA-AB-3008", "TCGA-AB-2994", "TCGA-AB-2889",
"TCGA-AB-2853", "TCGA-AB-2896", "TCGA-AB-2893", "TCGA-AB-2867",
"TCGA-AB-2999", "TCGA-AB-2888", "TCGA-AB-2839", "TCGA-AB-2865",
"TCGA-AB-3007", "TCGA-AB-2932", "TCGA-AB-2976", "TCGA-AB-2834",
"TCGA-AB-2840", "TCGA-AB-2880", "TCGA-AB-2998", "TCGA-AB-2813",
"TCGA-AB-2882", "TCGA-AB-2995", "TCGA-AB-2950", "TCGA-AB-2810",
"TCGA-AB-2935", "TCGA-AB-2821", "TCGA-AB-2952", "TCGA-AB-2886",
"TCGA-AB-2805", "TCGA-AB-2876", "TCGA-AB-2808", "TCGA-AB-2937",
"TCGA-AB-2927", "TCGA-AB-2883", "TCGA-AB-2982", "TCGA-AB-2919",
"TCGA-AB-3012", "TCGA-AB-2841", "TCGA-AB-2875", "TCGA-AB-2984",
"TCGA-AB-2970", "TCGA-AB-2933"))
I'm not sure what is going wrong here given that in my X there are groups and in y there is the dependent variable.
Any suggestion or help would be really appreciated.
ggwithinstats is for plotting repeated measures in individual subjects, but the fact that you have different sized groups for FAB and no ID column suggests that this is not what you have. If the groups are different sizes, you will always get an error because you cannot have a repeated measures structure.
I suspect you are looking for ggbetweenstats rather than ggwithinstats, since all you appear to be doing is comparing different groups.
ggbetweenstats( # independent samples
data = dat,
x = FAB,
y = BM_percentage,
type = "nonparametric", # for wilcoxon
plot.type = "box", # for boxplot
centrality.plotting = FALSE # remove median
)

R: Compute multiple correlations by group (and save output to csv file)

Is there a way to make a file with the correlation statistic between the raw number of fish observed ("num") and each environmental data column ("temp", "do", etc.) by species ("group")?
*As well as correlations between the means and medians of num vs. env. factors?
I'd also like to be able to choose which correlation method to use (Pearson correlation, Kendall rank correlation, Spearman correlation, etc.)
My data:
zeros <- structure(list(year = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("2019", "2020"), class = "factor"), season = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("dry", "wet"), class = "factor"),
site = structure(c(1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L,
1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 1L, 1L, 2L, 2L, 3L,
3L, 4L, 4L, 5L, 5L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L
), .Label = c("1", "2", "3", "4", "5"), class = "factor"),
group = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L
), .Label = c("Hardhead silverside", "Sailfin molly"), class = "factor"),
num = c(0, 8, 0, 9, 0, 13, 0, 9, 0, 10, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 7, 0, 2,
0, 3, 0, 13, 0), temp = c(23L, 36L, 35L, 34L, 30L, 28L, 18L,
19L, 33L, 33L, 25L, 20L, 33L, 23L, 36L, 32L, 28L, 17L, 34L,
31L, 26L, 34L, 26L, 35L, 15L, 25L, 26L, 20L, 18L, 14L, 23L,
17L, 26L, 17L, 17L, 19L, 29L, 31L, 18L, 15L), sal = c(12.5,
25.5, 8.5, 15.5, 17.5, 27.5, 9.5, 31.5, 1.5, 34.5, 25.5,
21.5, 10.5, 8.5, 32.5, 19.5, 6.5, 5.5, 15.5, 28.5, 6.5, 3.5,
29.5, 13.5, 7.5, 16.5, 3.5, 28.5, 22.5, 5.5, 9.5, 12.5, 29.5,
24.5, 8.5, 32.5, 37.5, 3.5, 12.5, 19.5), do = c(9.66, 7.66,
1.66, 14.66, 15.66, 1.66, 14.66, 15.66, 0.66, 5.66, 10.66,
11.66, 4.66, 0.66, 13.66, 1.66, 13.66, 6.66, 6.66, 10.66,
9.66, 15.66, 9.66, 15.66, 4.66, 13.66, 1.66, 11.66, 6.66,
8.66, 12.66, 0.66, 6.66, 0.66, 9.66, 16.66, 1.66, 10.66,
15.66, 10.66), depth = c(120L, 161L, 52L, 52L, 43L, 105L,
165L, 23L, 79L, 136L, 41L, 59L, 65L, 118L, 122L, 69L, 137L,
88L, 152L, 105L, 108L, 79L, 96L, 80L, 22L, 110L, 157L, 118L,
126L, 93L, 156L, 64L, 74L, 24L, 111L, 113L, 157L, 78L, 121L,
130L)), class = "data.frame", row.names = c(NA, -40L))
The first part of your question is straightforward:
zeros.spl <- split(zeros, zeros$group)
zeros.cors <- sapply(zeros.spl, function(x) cor(x[, "num"], x[, 6:9]))
dimnames(zeros.cors)[[1]] <- colnames(zeros)[6:9]
zeros.cors
# Hardhead silverside Sailfin molly
# temp -0.3080334 0.36174046
# sal 0.1393580 0.47095129
# do 0.2544695 -0.06646818
# depth 0.1296208 0.08777425
t(zeros.cors)
# temp sal do depth
# Hardhead silverside -0.3080334 0.1393580 0.25446948 0.12962078
# Sailfin molly 0.3617405 0.4709513 -0.06646818 0.08777425
Use write.csv(zeros.cors, file="results.csv") or write.csv(t(zeros.cors), file="results.csv") depending on what you want the rows/cols to be.
The second question is not clear. The means/medians of a group will be a single value so you cannot correlate it with the environmental variables. You could compute the means by group with aggregate:
aggregate(zeros[, 5:9], by=list(zeros$group), "mean")
# Group.1 num temp sal do depth
# 1 Hardhead silverside 1.45 25.95 15.35 8.51 105.20
# 2 Sailfin molly 2.45 25.00 18.90 9.06 90.25
aggregate(zeros[, 5:9], by=list(zeros$group), "median")
# Group.1 num temp sal do depth
# 1 Hardhead silverside 0 26 11.5 9.66 115.5
# 2 Sailfin molly 0 24 19.5 10.66 90.5

showing three factors in line graph

I am trying to make a line graph with three factors. In addition, I would like to have my legend labels have two symbols.
This is the published example I am trying to emulate....
as you can see, each label has two symbols, one for each level of one factor. Furthermore, the same factor is shown twice on the same panel for each level, i.e. cultivar with two different levels of a factor.
I am trying to make one panel at a time, I will join them later and put the legend at the top like in the above example.
Here is the start of my attempt....
I have got my points on the graph, though im baffled as to how to make a line join through each cultivar, show error bars, and make the legend similar to the above example. I want the legend to have one label for each cultivar, but with two symbols, one symbol for Waterlogging and one for Non-waterlogging
structure(list(pot = c(41L, 42L, 43L, 44L, 61L, 62L, 63L, 64L,
45L, 46L, 47L, 48L, 65L, 66L, 67L, 68L, 49L, 50L, 51L, 52L, 69L,
70L, 71L, 72L, 53L, 54L, 55L, 56L, 73L, 74L, 75L, 76L, 57L, 58L,
59L, 60L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 101L, 102L,
103L, 104L, 85L, 86L, 87L, 88L, 105L, 106L, 107L, 108L, 89L,
90L, 91L, 92L, 109L, 110L, 111L, 112L, 93L, 94L, 95L, 96L, 113L,
114L, 115L, 116L, 97L, 98L, 99L, 100L, 117L, 118L, 119L, 120L
), rep = c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L), cultivar = structure(c(4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Dinninup", "Riverina",
"Seaton Park", "Yarloop"), class = "factor"), Waterlogging = structure(c(2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L), .Label = c("Non-waterlogged",
"Waterlogged"), class = "factor"), P = c(12.1, 12.1, 12.1, 12.1,
12.1, 12.1, 12.1, 12.1, 15.17, 15.17, 15.17, 15.17, 15.17, 15.17,
15.17, 15.17, 18.24, 18.24, 18.24, 18.24, 18.24, 18.24, 18.24,
18.24, 24.39, 24.39, 24.39, 24.39, 24.39, 24.39, 24.39, 24.39,
48.35, 48.35, 48.35, 48.35, 48.35, 48.35, 48.35, 48.35, 12.1,
12.1, 12.1, 12.1, 12.1, 12.1, 12.1, 12.1, 15.17, 15.17, 15.17,
15.17, 15.17, 15.17, 15.17, 15.17, 18.24, 18.24, 18.24, 18.24,
18.24, 18.24, 18.24, 18.24, 24.39, 24.39, 24.39, 24.39, 24.39,
24.39, 24.39, 24.39, 48.35, 48.35, 48.35, 48.35, 48.35, 48.35,
48.35, 48.35), form = c(2.81, 2.64, 2.59, 3.28, 3.18, 2.57, 2.9,
3, 2.38, 2.72, 2.58, 2.73, 3.06, 3.01, 3.01, 2.77, 2.95, 2.36,
2.91, 2.38, 3.33, 3.19, 3.17, 3.16, 3.16, 3.2, 2.58, 3.71, 3.11,
2.7, 2.92, 1.93, 2.95, 2.57, 2.68, 2.48, 3.34, 2.75, 2.52, 1.88,
1.19, 0.57, 0.64, 0.66, 1.13, 1.28, 0.85, 0.96, 1.34, 2.14, 0.63,
1.27, 1.13, 0.64, 1.21, 1.95, 1.11, 0.91, 0.75, 0.63, 1.06, 1.07,
1.05, 0.8, 1.41, 1.13, 0.75, 0.89, 1.98, 1.27, 1.01, 1, 1.16,
0.64, 0.64, 1.02, 1.03, 1.13, 0.79, 0.6)), row.names = 41:120, class = "data.frame")
library(Rmisc)
library(ggplot2)
tglf3 <- summarySE(yar, measurevar="form", groupvars=c("P","cultivar","Waterlogging"),na.rm=TRUE)
ggplot(tglf3, aes(x=P, y=form)) +
geom_point(aes(colour = factor(Waterlogging),
shape=factor(cultivar)),size=3.5,position=position_dodge(1))+
geom_errorbar(aes(ymin=form-se, ymax=form+se),colour="black", width=.1,position=position_dodge(1))
Is this approaching what you're looking for?
library(tidyverse)
tglf3 <- summarySE(yar, measurevar="form", groupvars=c("P","cultivar","Waterlogging"),na.rm=TRUE)
Mostly, making use of dplyr::group_by can solve the point-line pairing:
tglf3 %>%
group_by(cultivar) %>%
ggplot(aes(x=P, y=form, colour=Waterlogging, shape=cultivar)) +
geom_errorbar(aes(ymin=form-se, ymax=form+se), colour="black", width=.6) +
geom_point(size=3.5) +
geom_line() +
theme_classic() +
theme(legend.position = 'top', legend.direction="vertical")
More formatting using scale_x_manual:
tglf3 %>%
unite(new, cultivar, Waterlogging, sep = ', ') %>%
group_by(new) %>%
ggplot(aes(x=P, y=form, colour=new, shape=new, linetype=new)) +
geom_errorbar(aes(ymin=form-se, ymax=form+se), colour="black", width=.6) +
geom_line(color="black") +
geom_point(size=3.5) +
scale_colour_manual(name = "Cultivar, Waterlogging",
labels = c("Riverina, Non-waterlogged", "Riverina, Waterlogged", "Yarloop, Non-waterlogged", "Yarloop, Waterlogged"),
values = c("blue", "red", "blue", "red")) +
scale_shape_manual(name = "Cultivar, Waterlogging",
labels = c("Riverina, Non-waterlogged", "Riverina, Waterlogged", "Yarloop, Non-waterlogged", "Yarloop, Waterlogged"),
values = c(19, 19, 17, 17)) +
scale_linetype_manual(name = "Cultivar, Waterlogging", values=c("longdash", "solid", "longdash", "solid")) +
theme_classic() +
theme(legend.position = 'top', legend.direction="vertical") +
guides(color=guide_legend(ncol=2))
Without %>%:
tglf4 <- unite(data = tglf3, new, cultivar, Waterlogging, sep = ', ')
tglf5 <- group_by(.data = tglf4, new)
ggplot(tglf5, aes(x=P, y=form, colour=new, shape=new, linetype=new)) +
geom_errorbar(aes(ymin=form-se, ymax=form+se), colour="black", width=.6) +
geom_line(color="black") +
geom_point(size=3.5) +
scale_colour_manual(name = "Cultivar, Waterlogging",
labels = c("Riverina, Non-waterlogged", "Riverina, Waterlogged", "Yarloop, Non-waterlogged", "Yarloop, Waterlogged"),
values = c("blue", "red", "blue", "red")) +
scale_shape_manual(name = "Cultivar, Waterlogging",
labels = c("Riverina, Non-waterlogged", "Riverina, Waterlogged", "Yarloop, Non-waterlogged", "Yarloop, Waterlogged"),
values = c(19, 19, 17, 17)) +
scale_linetype_manual(name = "Cultivar, Waterlogging", values=c("longdash", "solid", "longdash", "solid")) +
theme_classic() +
theme(legend.position = 'top', legend.direction="vertical") +
guides(color=guide_legend(ncol=2))
Using facet_grid (example using fake data):
tglf3 %>%
mutate(
form = form * 1.5
) %>%
bind_rows(tglf3, .id = 'species') %>% # Add fake data to the real data as an example
mutate(
species = case_when(species == 1 ~ 'O. sativus',
T ~ 'O. compressus')
) %>%
unite(new, cultivar, Waterlogging, sep = ', ') %>%
group_by(new) %>%
ggplot(aes(x=P, y=form, colour=new, shape=new, linetype=new)) +
geom_errorbar(aes(ymin=form-se, ymax=form+se), colour="black", width=.6) +
geom_line(color="black") +
geom_point(size=3.5) +
scale_colour_manual(name = "Cultivar, Waterlogging",
labels = c("Riverina, Non-waterlogged", "Riverina, Waterlogged", "Yarloop, Non-waterlogged", "Yarloop, Waterlogged"),
values = c("blue", "red", "blue", "red")) +
scale_shape_manual(name = "Cultivar, Waterlogging",
labels = c("Riverina, Non-waterlogged", "Riverina, Waterlogged", "Yarloop, Non-waterlogged", "Yarloop, Waterlogged"),
values = c(19, 19, 17, 17)) +
scale_linetype_manual(name = "Cultivar, Waterlogging", values=c("longdash", "solid", "longdash", "solid")) +
theme_classic() +
theme(legend.position = 'top', legend.direction="vertical") +
guides(color=guide_legend(ncol=2)) +
facet_grid(.~species)

Changing the style of symbols in the legend so they are identical to those shown on the plot

I am trying change the legend so it is more representative of what is shown on the plot and is up to a publishing standard.
Here is an example I am trying to follow....
As you can see, the symbols in the legend are identical to those shown in the plot.
Here is my graph....
I am not happy with the legend that ggplot produces and I cant find a way to alter it so it matches the published example above.
If I understood your question correctly, you could just modify the the answer to your previous question by #dc37 Moving error bars in a line graph with three factors. I only added two lines for linetype.
library(Rmisc)
library(ggplot2)
tglf3 <- summarySE(df, measurevar="form",
groupvars=c("P","cultivar","Waterlogging"),na.rm=TRUE)
pd <- position_dodge(0.5)
ggplot(tglf3,aes(x=P, y=form,
shape = interaction(cultivar, Waterlogging),
color = interaction(cultivar, Waterlogging),
linetype = interaction(cultivar, Waterlogging)))+
geom_errorbar(aes(ymin=form-se, ymax=form+se), width=.6,position=pd) +
geom_point(size=3.5,position=pd) +
geom_line(position=pd) +
theme_bw() +
theme(legend.position = 'top') +
guides(color=guide_legend(ncol=2, title = "Legend"),
shape = guide_legend(ncol =2, title = "Legend"),
linetype = guide_legend(ncol =2, title = "Legend"))
Edit
Initially, I thought it cannot be done in ggplot2, but I have managed to trick ggplot2 to get what you want(?). I have learned some new tricks through this.
p <-ggplot(tglf3, aes(x=P, y=form,
color = interaction(cultivar, Waterlogging),
shape = interaction(cultivar, Waterlogging),
linetype = interaction(cultivar, Waterlogging)))+
geom_errorbar(aes(ymin=form-se, ymax=form+se), width=.6,position=pd) +
geom_point(size=3.5,position=pd) +
geom_line(position=pd) +
theme_bw() +
theme(legend.position = 'top') +
guides(linetype = guide_legend(ncol=2, title = "Riverina \nYarloop"),
shape = guide_legend(ncol =2, title = "Riverina \nYarloop"),
color = guide_legend(ncol =2, title = "Riverina \nYarloop"))
df <- droplevels(df)
brks <- levels(interaction(df$cultivar, df$Waterlogging))
lbs <- c("Non-waterlogged", "Non-waterlogged", "Waterlogged", "Waterlogged")
p + scale_shape_discrete(breaks=brks, labels=lbs) +
scale_color_discrete(breaks=brks, labels=lbs) +
scale_linetype_discrete(breaks=brks, labels=lbs)
Data
> dput(df)
structure(list(pot = c(41L, 42L, 43L, 44L, 61L, 62L, 63L, 64L,
45L, 46L, 47L, 48L, 65L, 66L, 67L, 68L, 49L, 50L, 51L, 52L, 69L,
70L, 71L, 72L, 53L, 54L, 55L, 56L, 73L, 74L, 75L, 76L, 57L, 58L,
59L, 60L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 101L, 102L,
103L, 104L, 85L, 86L, 87L, 88L, 105L, 106L, 107L, 108L, 89L,
90L, 91L, 92L, 109L, 110L, 111L, 112L, 93L, 94L, 95L, 96L, 113L,
114L, 115L, 116L, 97L, 98L, 99L, 100L, 117L, 118L, 119L, 120L
), rep = c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L), cultivar = structure(c(4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Dinninup", "Riverina",
"Seaton Park", "Yarloop"), class = "factor"), Waterlogging = structure(c(2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L), .Label = c("Non-waterlogged",
"Waterlogged"), class = "factor"), P = c(12.1, 12.1, 12.1, 12.1,
12.1, 12.1, 12.1, 12.1, 15.17, 15.17, 15.17, 15.17, 15.17, 15.17,
15.17, 15.17, 18.24, 18.24, 18.24, 18.24, 18.24, 18.24, 18.24,
18.24, 24.39, 24.39, 24.39, 24.39, 24.39, 24.39, 24.39, 24.39,
48.35, 48.35, 48.35, 48.35, 48.35, 48.35, 48.35, 48.35, 12.1,
12.1, 12.1, 12.1, 12.1, 12.1, 12.1, 12.1, 15.17, 15.17, 15.17,
15.17, 15.17, 15.17, 15.17, 15.17, 18.24, 18.24, 18.24, 18.24,
18.24, 18.24, 18.24, 18.24, 24.39, 24.39, 24.39, 24.39, 24.39,
24.39, 24.39, 24.39, 48.35, 48.35, 48.35, 48.35, 48.35, 48.35,
48.35, 48.35), form = c(2.81, 2.64, 2.59, 3.28, 3.18, 2.57, 2.9,
3, 2.38, 2.72, 2.58, 2.73, 3.06, 3.01, 3.01, 2.77, 2.95, 2.36,
2.91, 2.38, 3.33, 3.19, 3.17, 3.16, 3.16, 3.2, 2.58, 3.71, 3.11,
2.7, 2.92, 1.93, 2.95, 2.57, 2.68, 2.48, 3.34, 2.75, 2.52, 1.88,
1.19, 0.57, 0.64, 0.66, 1.13, 1.28, 0.85, 0.96, 1.34, 2.14, 0.63,
1.27, 1.13, 0.64, 1.21, 1.95, 1.11, 0.91, 0.75, 0.63, 1.06, 1.07,
1.05, 0.8, 1.41, 1.13, 0.75, 0.89, 1.98, 1.27, 1.01, 1, 1.16,
0.64, 0.64, 1.02, 1.03, 1.13, 0.79, 0.6)), row.names = 41:120, class = "data.frame")

Trying to label only one facet in ggplot

I am trying to label only one of my facets, but i am receiving an error message when I try to apply a solution.
structure(list(pot = c(1L, 2L, 3L, 4L, 21L, 22L, 23L, 24L, 5L,
6L, 7L, 8L, 25L, 26L, 27L, 28L, 9L, 10L, 11L, 12L, 29L, 30L,
31L, 32L, 13L, 14L, 15L, 16L, 33L, 34L, 35L, 36L, 17L, 18L, 19L,
20L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 61L, 62L, 63L, 64L,
45L, 46L, 47L, 48L, 65L, 66L, 67L, 68L, 49L, 50L, 51L, 52L, 69L,
70L, 71L, 72L, 53L, 54L, 55L, 56L, 73L, 74L, 75L, 76L, 57L, 58L,
59L, 60L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 101L, 102L,
103L, 104L, 85L, 86L, 87L, 88L, 105L, 106L, 107L, 108L, 89L,
90L, 91L, 92L, 109L, 110L, 111L, 112L, 93L, 94L, 95L, 96L, 113L,
114L, 115L, 116L, 97L, 98L, 99L, 100L, 117L, 118L, 119L, 120L,
121L, 122L, 123L, 124L, 141L, 142L, 143L, 144L, 125L, 126L, 127L,
128L, 145L, 146L, 147L, 148L, 129L, 130L, 131L, 132L, 149L, 150L,
151L, 152L, 133L, 134L, 135L, 136L, 153L, 154L, 155L, 156L, 137L,
138L, 139L, 140L, 157L, 158L, 159L, 160L), rep = c(1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L), cultivar = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("Dinninup",
"Riverina", "Seaton Park", "Yarloop"), class = "factor"), Waterlogging = structure(c(2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L), .Label = c("Non-waterlogged",
"Waterlogged"), class = "factor"), P = c(12.1, 12.1, 12.1, 12.1,
12.1, 12.1, 12.1, 12.1, 15.17, 15.17, 15.17, 15.17, 15.17, 15.17,
15.17, 15.17, 18.24, 18.24, 18.24, 18.24, 18.24, 18.24, 18.24,
18.24, 24.39, 24.39, 24.39, 24.39, 24.39, 24.39, 24.39, 24.39,
48.35, 48.35, 48.35, 48.35, 48.35, 48.35, 48.35, 48.35, 12.1,
12.1, 12.1, 12.1, 12.1, 12.1, 12.1, 12.1, 15.17, 15.17, 15.17,
15.17, 15.17, 15.17, 15.17, 15.17, 18.24, 18.24, 18.24, 18.24,
18.24, 18.24, 18.24, 18.24, 24.39, 24.39, 24.39, 24.39, 24.39,
24.39, 24.39, 24.39, 48.35, 48.35, 48.35, 48.35, 48.35, 48.35,
48.35, 48.35, 12.1, 12.1, 12.1, 12.1, 12.1, 12.1, 12.1, 12.1,
15.17, 15.17, 15.17, 15.17, 15.17, 15.17, 15.17, 15.17, 18.24,
18.24, 18.24, 18.24, 18.24, 18.24, 18.24, 18.24, 24.39, 24.39,
24.39, 24.39, 24.39, 24.39, 24.39, 24.39, 48.35, 48.35, 48.35,
48.35, 48.35, 48.35, 48.35, 48.35, 12.1, 12.1, 12.1, 12.1, 12.1,
12.1, 12.1, 12.1, 15.17, 15.17, 15.17, 15.17, 15.17, 15.17, 15.17,
15.17, 18.24, 18.24, 18.24, 18.24, 18.24, 18.24, 18.24, 18.24,
24.39, 24.39, 24.39, 24.39, 24.39, 24.39, 24.39, 24.39, 48.35,
48.35, 48.35, 48.35, 48.35, 48.35, 48.35, 48.35), form = c(3.66,
2.02, 1.59, 1.67, 2.12, 2.46, 1.79, 2.09, 2.03, 2.13, 1.83, 2.34,
2.66, 2.2, 1.79, 1.97, 2.17, 2.44, 1.49, 2.19, 2.92, 2.43, 1.58,
2.07, 2.48, 2.49, 1.69, 2.1, 2.38, 2.52, 2.41, 2.46, 2.22, 2.07,
1.97, 2.3, 2.48, 3.16, 1.76, 2.38, 2.81, 2.64, 2.59, 3.28, 3.18,
2.57, 2.9, 3, 2.38, 2.72, 2.58, 2.73, 3.06, 3.01, 3.01, 2.77,
2.95, 2.36, 2.91, 2.38, 3.33, 3.19, 3.17, 3.16, 3.16, 3.2, 2.58,
3.71, 3.11, 2.7, 2.92, 1.93, 2.95, 2.57, 2.68, 2.48, 3.34, 2.75,
2.52, 1.88, 1.19, 0.57, 0.64, 0.66, 1.13, 1.28, 0.85, 0.96, 1.34,
2.14, 0.63, 1.27, 1.13, 0.64, 1.21, 1.95, 1.11, 0.91, 0.75, 0.63,
1.06, 1.07, 1.05, 0.8, 1.41, 1.13, 0.75, 0.89, 1.98, 1.27, 1.01,
1, 1.16, 0.64, 0.64, 1.02, 1.03, 1.13, 0.79, 0.6, 3.88, 2.79,
2.73, 2.77, 3.54, 2.05, 1.51, 1.88, 3.86, 3.13, 1.97, 3.46, 3.98,
3.6, 2.12, 2.86, 2.95, 1.65, 1.94, 2.53, 2.21, 1.94, 2.05, 2.22,
3, 3.28, 1.55, 3.85, 2.4, 2.1, 1.98, 1.81, 2.48, 1.66, 2.06,
1.23, 3.75, 1.99, 1.67, 1.93)), class = "data.frame", row.names = c(NA,
-160L))
library(Rmisc)
library(ggplot2)
tglf2 <- summarySE(iso, measurevar="form", groupvars=c("P","cultivar","Waterlogging"),na.rm=TRUE)
pd <- position_dodge(0.5)
p=ggplot(tglf2, aes(x=P, y=form,colour=cultivar,group=cultivar)) +
geom_errorbar(aes(ymin=form-se, ymax=form+se),colour="black",
width=.2,position=pd) +
geom_line(position=pd) +
geom_point(aes(shape=cultivar),size=3.5,position=pd)+
scale_shape_manual(values=c(0, 16, 17,1))+
scale_color_manual(values=c("#009E73", "#F0E442", "#0072B2", "#D55E00"))+
facet_grid(~Waterlogging)
ann_text <- data.frame(P = 30,shoot = 3,lab = "Text",
Waterlogging = factor(Waterlogged,levels = c("Non-waterlogged","Waterlogged")))
p + geom_text(data = ann_text,label = "Text")
WHen I try to run the ann_text line I get an error message that it cant find the object waterlogging. I am trying to find the solution found here Annotating text on individual facet in ggplot2
Here is a workaround adding some variables to the original data. I just added three columns filled with NA, except for the position where you want your text and the text itself (making sure it is located in the facet level where you want your label, i.e., waterlogged).
#Adding the three variables to the data
tglf2 <- data.frame(tglf2,
P2 = c(rep(NA,(nrow(tglf2)-1)),30),
text = c(rep(NA,(nrow(tglf2)-1)),"MyText"),
form2 = c(rep(NA,(nrow(tglf2)-1)),3))
As the text information is already contained in the data, you just need to add the following code to your plot.
geom_text(aes(x = P2, y = form2, label = text),
col = "black")
Here is the final result

Resources