Getting Wider Violin Plots - r

I am trying to make some violin plots with ggplot using this dataframe
df = structure(list(nid.weight = c(2.46, 0.319, 1.169, 1.631, 2.03,
0.148, 0.252, 5.614, 2.557, 6.062, 2.939, 6.04, 18.858, 28.727,
18.3, 9.831, 16.298, 17.176, 13.391, 15.044, 35.42, 5.421, 10.073,
15.499, 12.712, 16.046, 23.003, 11.656, 19.79, 20.593, 19.264,
26.35, 13.752, 31.795, 18.604, 18.871, 22.848, 34.46, 14.176,
20.73, 31.97, 18.7, 17.837, 15.875, 14.44, 38.78, 14.595, 21.522,
13.041, 18.051, 20.748, 17.91, 14.831, 9.523, 9.865, 38.2, 19.531,
25.724, 16.208, 18.059, 14.98, 11.9, 14.9, 13, 16.3, 15.555,
0.031, 1.99, 8.924, 21.081, 30.12, 8.658, 0.078, 0.111, 0.373,
0.217, 0.276, 20.993, 12.936, 0.142, 0.188, 0.154, 0.182, 0.14,
0.172, 0.123, 0.187, 0.104, 17.903, 0.18, 21.026, 0.124, 0.108,
21.394, 0.14, 0.189, 0.173, 0.271, 0.124, 0.122, 0.097, 0.16,
8.087, 0.107, 0.149, 0.072, 16.732, 12.663, 0.268, 0.268, 0.315,
0.277, 0.154, 0.233, 0.323, 14.043, 0.424, 0.296, 0.531, 0.287,
0.45, 0.248, 0.475, 0.726, 0.379, 0.623, 0.257, 0.558, 0.34,
13.687, 0.722, 14.936, 0.243, 0.731, 0.216, 0.4, 0.476, 0.114,
1.46, 0.861, 0.861, 1.64, 0.655, 1.096, 0.649, 0.789, 1.083,
1.072, 1.748, 1.404, 0.721, 1.026, 0.305, 0.59, 0.929, 0.937,
1.984, 1.078, 1.632, 3.373, 2.183, 0.546, 2.745, 2.598, 0.789,
0.925, 0.636, 1.184, 1.171, 1, 1.229, 1.503, 1.172, 1.89, 0.946,
0.641, 0.701, 0.228, 0.169, 0.389, 0.894, 3.299, 1.491, 3.022,
1.395, 1.472, 0.7, 1.195, 0.865, 2.414, 0.442, 1.282, 1.228,
1.403, 0.655, 1.34, 2.014, 1.612, 1.08, 0.326, 1.131, 1.133,
1.362, 2.424, 0.565, 0.67, 1.04, 0.997, 1.022, 0.48, 0.837, 0.746,
0.483, 0.696, 0.934, 1.105, 0.86, 0.75, 0.82, 0.48, 2.437, 0.372,
0.234, 0.099, 0.051, 2.716, 0.621, 0.611, 0.384, 0.82, 0.646,
0.68, 0.768, 0.378, 0.305, 2.462, 2.185, 0.598, 1.529, 2.175,
5.242, 7.084, 0.105, 1.29, 1.154, 2.961, 6.741, 1.742, 1.632,
3.47, 1.232, 2.359, 0.111, 1.638, 2.38, 1.162, 5.291, 1.114,
0.487, 0.874, 0.564, 1.318, 5.55, 7.685, 2.543, 0.401, 6.578,
7.53, 7.89, 3.312, 2.555, 0.233, 7.749, 1.289, 0.94, 0.839, 3.408,
6.603, 10.832, 7.353, 8.789, 5.352, 8.341, 3.897, 21.308, 16.963,
14.393, 3.852, 26.156, 21.705, 8.573, 9.504, 8.813, 2.458, 2.22,
32.4, 10.468, 7.66, 18.072, 2.135, 20.67, 4.79, 15.467, 8.484,
4.28, 13.36, 3.515, 7.835, 9.168, 2.443, 4.076, 9.953, 3.515,
5.206, 11.493, 3.059, 5.311, 7.07, 0.045, 5.309, 0.52, 9.56,
19.989, 36.894, 30.305, 21.25, 20.387, 10.685, 26.185, 0.404,
25.427, 5.755, 16.112, 14.832, 16.072, 14.835, 7.67, 8.717, 17.025,
19.564, 30.922, 0.049, 0.632, 0.415, 6.621, 13.701, 21.269, 17.527,
18.9, 16.574, 22.877, 28.866, 27.756, 7.535, 13.557, 19.082,
8.287, 18.617, 17.219, 14.733, 14.484, 12.481, 6.201, 35.361,
19.888, 24.468, 19.198, 29.679, 22.218, 29.408, 36.102, 23.984,
13.494, 30.313, 18.847, 0.731, 6.166, 28.418, 17.481, 20.235,
31.187, 26.49, 32.56, 14.459, 15.121, 2.385, 31.06, 14.626, 18.43,
9.808, 10.926, 10.1, 18.711, 26.396, 17.722, 12.006, 8.995, 17.874,
15.124, 10.318, 15.23, 22.661, 11.005, 6.016, 22.408, 7.561,
13.97, 8.252, 14.08, 10.254, 15.43, 25.756, 14.52, 9.588, 8.775,
29.909, 24.27, 10.459, 18.974, 11.11, 20.189, 16.73, 14.201,
28.025, 19.849, 20.307, 24.715, 10.688, 13.465, 15.817, 21.798,
19.616, 18.622, 12.703, 15.037, 24.377, 21.071, 10.81, 16.02,
15.576, 36.77, 21.363, 17.874, 19.724, 14.749, 9.152, 16.923,
0.065, 37.676, 25.147, 19.729, 18.345, 14.74, 14.938, 16.49,
20.211, 11.397, 15.34, 11.787, 12.373, 11.504, 10.563, 13.459,
12.091, 14.487, 7.769, 10.006, 9.041, 8.031, 9.05, 1.856, 3.405,
0.036, 12.772, 12.104, 8.282, 10.581, 4.867, 11.029, 10.558,
11.115, 16.303, 11.409, 12.732, 11.417, 11.352, 16.167, 23.197,
15.232, 17.714, 14.234, 23.325, 13.902, 13.66, 17.23, 15.176,
20.037, 15.751, 25.133, 15.217, 29.949, 24.001, 26.291, 39.325,
0.101, 0.148, 0.095, 0.194, 0.112, 0.07, 0.13, 0.096, 0.151,
15.518, 11.961, 19.033, 10.798, 0.114, 17.396), Fmaturity = structure(c(3L,
1L, 2L, 2L, 3L, 1L, 2L, 3L, 3L, 4L, 3L, 4L, 4L, 5L, 4L, 4L, 4L,
5L, 4L, 5L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 4L, 5L, 4L, 5L, 4L, 5L,
5L, 5L, 5L, 4L, 5L, 5L, 5L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 4L, 5L, 4L, 4L, 4L, 4L, 4L, 5L, 4L, 4L, 4L, 4L, 4L, 5L, 5L,
5L, 1L, 2L, 5L, 4L, 5L, 4L, 1L, 1L, 2L, 2L, 2L, 5L, 5L, 2L, 1L,
2L, 2L, 1L, 1L, 1L, 2L, 2L, 5L, 2L, 5L, 2L, 2L, 5L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 4L, 2L, 2L, 2L, 4L, 5L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 4L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 2L, 2L, 3L, 2L, 2L, 2L,
4L, 3L, 4L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
1L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 1L, 2L, 2L, 2L, 3L, 3L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 2L, 2L, 3L, 3L, 3L, 2L, 2L, 2L, 3L,
3L, 2L, 2L, 2L, 3L, 4L, 3L, 3L, 3L, 3L, 3L, 5L, 3L, 3L, 3L, 4L,
4L, 3L, 4L, 4L, 2L, 2L, 4L, 5L, 3L, 5L, 2L, 4L, 2L, 4L, 3L, 3L,
4L, 3L, 3L, 3L, 3L, 2L, 4L, 2L, 3L, 4L, 3L, 3L, 3L, 1L, 3L, 2L,
5L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 2L, 5L, 3L, 4L, 4L, 4L, 4L, 4L,
5L, 5L, 4L, 4L, 1L, 2L, 2L, 3L, 4L, 5L, 5L, 4L, 5L, 4L, 5L, 4L,
4L, 5L, 5L, 3L, 4L, 4L, 5L, 4L, 5L, 4L, 5L, 5L, 5L, 4L, 5L, 4L,
4L, 4L, 5L, 4L, 5L, 4L, 2L, 3L, 5L, 5L, 5L, 5L, 4L, 5L, 4L, 5L,
2L, 5L, 4L, 4L, 4L, 5L, 4L, 5L, 5L, 5L, 5L, 4L, 5L, 5L, 5L, 5L,
5L, 5L, 4L, 5L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 4L, 5L, 5L, 5L,
5L, 5L, 4L, 5L, 5L, 5L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
4L, 4L, 5L, 5L, 5L, 5L, 4L, 5L, 5L, 4L, 4L, 5L, 5L, 5L, 1L, 4L,
4L, 5L, 4L, 4L, 5L, 5L, 5L, 5L, 4L, 5L, 5L, 5L, 4L, 5L, 5L, 5L,
4L, 4L, 4L, 5L, 4L, 2L, 3L, 1L, 4L, 4L, 5L, 5L, 3L, 5L, 5L, 5L,
5L, 5L, 4L, 5L, 5L, 5L, 5L, 5L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 4L, 5L, 4L, 4L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L,
5L, 5L, 5L, 4L, 2L, 4L), levels = c("1", "2", "3", "4", "5"), class = "factor")), class = "data.frame", row.names = c(NA,
-519L))
Starting out I tried using the simplest code
ggplot(df, aes(x=Fmaturity, y=nid.weight)) +
geom_violin()
But my violin graphs are really thin, and they don't seem to help me visualize the data. So I tried using
ggplot(squid, aes(x=Fmaturity, y=nid.weight)) +
geom_violin(scale = "width")
which made the plots wider, except for the first violin plot. And what I'm wondering is:
when I use scale = "width" is that changing the data that the graph is using. I don't want to manipulate the data, I just want to graph it so that I can see the violin plots
Is there a better way to show these violin plots so we can visualize the data at all stages? Can I make them wider without manipulating data?

With scale = "width" the violins are all the same width:
ggplot(df, aes(x=Fmaturity, y=nid.weight)) +
geom_violin(scale = "width")
but the first violin is squashed vertically because...that's just the range of the data. If you want to be able to see the details of each distribution then you need a log scale on the y axis:
ggplot(df, aes(x=Fmaturity, y=nid.weight)) +
geom_violin() +
scale_y_log10()

Related

How to perform multiple t.tests with R package srvyr?

As a follow up on a recent SO question (see here) I am wondering how to perform multiple t.tests in R with weighted data (package srvyr). I cant make it run and would be happy if anyone could help me here. I added a random sample in the code below.
Many thanks!
#create data
surveydata <- as.data.frame(replicate(1,sample(1:5,1000,rep=TRUE)))
colnames(surveydata)[1] <- "q1"
surveydata$q2 <- sample(6, size = nrow(surveydata), replace = TRUE)
surveydata$q3 <- sample(6, size = nrow(surveydata), replace = TRUE)
surveydata$q4 <- sample(6, size = nrow(surveydata), replace = TRUE)
surveydata$group <- c(1,2)
#replace all value "6" wir NA
surveydata[surveydata == 6] <- NA
#add NAs to group 1 in q1
surveydata$q1[which(surveydata$q1==1 & surveydata$group==1)] = NA
surveydata$q1[which(surveydata$q1==2 & surveydata$group==1)] = NA
surveydata$q1[which(surveydata$q1==3 & surveydata$group==1)] = NA
surveydata$q1[which(surveydata$q1==4 & surveydata$group==1)] = NA
surveydata$q1[which(surveydata$q1==5 & surveydata$group==1)] = NA
#add weights
surveydata$weights <- round(runif(nrow(surveydata), min=0.2, max=1.5), 3)
#create vector for relevant questions
rquest <- names(surveydata)[1:4]
# create survey design
library(srvyr)
surveydesign <- surveydata %>%
as_survey_design(strata = group, weights = weights, variables = c("group", all_of(rquest)))
# perform multiple t.test (doesn't work yet)
outcome <- do.call(rbind, lapply(names(surveydesign$variables)[-1], function(i) {
tryCatch({
test <- t.test(as.formula(paste(i, "~ survey")), data = surveydesign)
data.frame(question = i,
group1 = test$estimate[1],
group2 = test$estimate[2],
difference = diff(test$estimate),
p_value = test$p.value, row.names = 1)
}, error = function(e) {
data.frame(question = i,
group1 = NA,
group2 = NA,
difference = NA,
p_value = NA, row.names = 1)
})
}))
As I understand it you have a series of question columns in the example q1 to q4. You've used srvyr to generate a weights column. It is possible in our data that for a particular question one entire group maybe all NA and you'd like to generate results into a df even when that is true. You want a weighted Student's t-test making use of the weights column not a simple t-test. The only function I found that provides that is weights::wtd.t.test which doesn't offer a formula interface but wants to be fed vectors.
In order of steps taken:
Load requisite libraries
library(srvyr)
library(dplyr)
library(rlang)
library(weights)
Build a custom function that removes the NAs by variable, pulls the vectors for x, y, weightx, weighty, runs the test, and extracts the info you want into a df row.
multiple_wt_ttest <- function(i) {
i <- ensym(i)
xxx <- surveydata %>%
filter(!is.na(!!i)) %>%
split(.$group)
newx <- pull(xxx[[1]], i)
newy <- pull(xxx[[2]], i)
wtx <- pull(xxx[[1]], weights)
wty <- pull(xxx[[2]], weights)
test <- wtd.t.test(x = newx,
y = newy,
weight = wtx,
weighty = wty,
samedata = FALSE)
data.frame(question = rlang::as_name(i),
group1 = test$additional[[2]],
group2 = test$additional[[3]],
difference = test$additional[[1]],
p.value = test$coefficients[[3]])
}
Once we have the function we can use lapply to apply it column by column (notice it handles the case in q2 where group == 1 is all NA.
lapply(names(surveydata)[1:4], multiple_wt_ttest)
#> [[1]]
#> question group1 group2 difference p.value
#> 1 q1 NaN 3.010457 NaN NA
#>
#> [[2]]
#> question group1 group2 difference p.value
#> 1 q2 3.009003 3.071842 -0.06283922 0.515789
#>
#> [[3]]
#> question group1 group2 difference p.value
#> 1 q3 2.985096 2.968867 0.0162288 0.8734034
#>
#> [[4]]
#> question group1 group2 difference p.value
#> 1 q4 2.856255 3.047787 -0.1915322 0.04290471
Finally, wrap it in a do.call and rbind to make the df you desire
do.call(rbind, lapply(names(surveydata)[1:4], multiple_wt_ttest))
#> question group1 group2 difference p.value
#> 1 q1 NaN 3.010457 NaN NA
#> 2 q2 3.009003 3.071842 -0.06283922 0.51578905
#> 3 q3 2.985096 2.968867 0.01622880 0.87340343
#> 4 q4 2.856255 3.047787 -0.19153218 0.04290471
Your data (without showing all the gyrations to create it and heading the first 200 rows)
surveydata <-
structure(list(q1 = c(NA, 1L, NA, 4L, NA, 5L, NA, 3L, NA, 5L,
NA, 5L, NA, 1L, NA, 5L, NA, 3L, NA, 4L, NA, 5L, NA, 4L, NA, 2L,
NA, 5L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA, 5L,
NA, 4L, NA, 4L, NA, 3L, NA, 4L, NA, 2L, NA, 4L, NA, 3L, NA, 1L,
NA, 1L, NA, 3L, NA, 5L, NA, 3L, NA, 5L, NA, 5L, NA, 4L, NA, 2L,
NA, 5L, NA, 1L, NA, 3L, NA, 2L, NA, 5L, NA, 4L, NA, 1L, NA, 5L,
NA, 2L, NA, 2L, NA, 4L, NA, 1L, NA, 3L, NA, 4L, NA, 5L, NA, 3L,
NA, 5L, NA, 1L, NA, 1L, NA, 3L, NA, 2L, NA, 4L, NA, 4L, NA, 1L,
NA, 4L, NA, 3L, NA, 2L, NA, 3L, NA, 5L, NA, 2L, NA, 5L, NA, 2L,
NA, 1L, NA, 5L, NA, 2L, NA, 1L, NA, 2L, NA, 3L, NA, 2L, NA, 3L,
NA, 4L, NA, 4L, NA, 3L, NA, 1L, NA, 3L, NA, 1L, NA, 5L, NA, 3L,
NA, 5L, NA, 4L, NA, 1L, NA, 4L, NA, 1L, NA, 3L, NA, 1L, NA, 4L,
NA, 5L, NA, 4L, NA, 4L, NA, 3L, NA, 3L, NA, 2L, NA, 1L), q2 = c(NA,
2L, 2L, 1L, 5L, 4L, 3L, 2L, 4L, 4L, 5L, 1L, 4L, 5L, 1L, 4L, NA,
2L, 2L, 5L, 5L, 4L, 5L, 4L, NA, 1L, 3L, 4L, 5L, 2L, NA, 5L, 2L,
NA, 4L, 4L, 5L, 4L, 1L, NA, 5L, 1L, 4L, 2L, 1L, NA, 5L, 1L, 3L,
2L, 4L, NA, 2L, NA, 1L, 4L, NA, 2L, 3L, NA, 3L, 1L, 1L, 1L, 1L,
1L, 4L, 5L, 1L, 4L, 5L, 4L, NA, 2L, 3L, 2L, 2L, 2L, 4L, 2L, 3L,
5L, NA, 2L, NA, NA, 5L, 2L, 3L, 2L, 1L, 5L, 3L, 2L, 1L, 2L, NA,
1L, 3L, 5L, 5L, 1L, 1L, NA, 3L, 3L, 1L, 2L, 3L, 3L, 2L, 4L, 2L,
5L, 4L, 3L, 1L, NA, 4L, 3L, 1L, 5L, 5L, 5L, 2L, 2L, 4L, 5L, 4L,
1L, 3L, NA, 1L, 3L, 5L, 2L, 1L, 3L, 3L, NA, NA, 5L, NA, 5L, 2L,
5L, 2L, NA, NA, NA, 1L, 4L, 3L, 2L, 3L, 1L, 3L, 5L, 1L, 2L, 3L,
5L, 4L, 4L, NA, NA, 5L, 2L, 3L, 3L, 2L, 2L, 1L, 3L, 1L, 4L, 5L,
2L, 5L, 3L, 1L, 5L, NA, 4L, 3L, 5L, 1L, 1L, 3L, 4L, 4L, 1L, 4L,
3L, 3L, NA, 2L, 3L, 5L, 5L), q3 = c(4L, 4L, 1L, NA, 4L, 5L, 1L,
3L, 4L, 4L, 1L, 3L, 2L, 1L, 2L, 4L, 2L, 3L, 4L, 4L, 1L, 3L, 4L,
5L, 5L, 1L, 3L, 5L, 1L, 2L, 1L, 5L, 5L, 3L, 1L, 3L, 1L, 5L, 1L,
3L, NA, NA, 3L, 5L, NA, 2L, 2L, 1L, 1L, 3L, 5L, 5L, 2L, NA, 5L,
2L, 3L, NA, NA, 3L, 2L, 5L, 2L, 1L, NA, NA, 4L, 2L, NA, 1L, NA,
NA, 5L, 3L, 5L, 4L, 2L, 4L, NA, 2L, 4L, 5L, NA, 2L, 1L, 3L, NA,
5L, 5L, 4L, 5L, 1L, 5L, 4L, 5L, 3L, 2L, 2L, 2L, 1L, 2L, 1L, NA,
NA, 5L, 1L, 2L, 5L, 5L, 5L, 3L, 3L, 3L, 2L, 4L, NA, 3L, NA, 3L,
4L, 2L, 2L, 5L, 1L, NA, 1L, NA, 2L, 2L, 3L, 2L, 5L, 1L, 4L, 4L,
3L, 5L, 5L, NA, NA, 4L, NA, 5L, 1L, 1L, 2L, 5L, 4L, 5L, 4L, 1L,
1L, NA, 4L, 4L, 4L, 5L, 1L, NA, 2L, 3L, NA, 1L, NA, NA, NA, 4L,
2L, 4L, 2L, 1L, 1L, 2L, 1L, 5L, 1L, 3L, 3L, 4L, NA, 1L, 1L, 1L,
3L, 5L, 1L, NA, 3L, 5L, 5L, 4L, NA, 1L, 4L, 5L, 3L, 5L, NA, 1L,
4L), q4 = c(NA, 3L, 1L, 1L, 2L, NA, 1L, 5L, 1L, 3L, 3L, 1L, 3L,
5L, 1L, 3L, 2L, 1L, 1L, 3L, 5L, 5L, NA, 5L, 5L, 5L, 4L, 4L, 4L,
3L, 3L, 2L, 1L, 3L, 5L, 3L, 1L, 5L, NA, 3L, 2L, 5L, 4L, 4L, 4L,
2L, 1L, 1L, 2L, 5L, 2L, 1L, 3L, 4L, 3L, 1L, 1L, 4L, 4L, 1L, 2L,
3L, 3L, 4L, NA, 3L, 3L, 2L, 2L, NA, 3L, 5L, 4L, 4L, 3L, 3L, 4L,
NA, NA, 3L, NA, 1L, NA, 3L, 3L, 3L, 2L, 3L, 3L, 4L, 1L, 1L, 2L,
5L, 1L, 1L, 5L, 2L, 2L, 2L, 3L, 4L, 5L, 3L, NA, NA, 2L, 2L, 3L,
2L, 3L, 2L, 3L, 1L, 3L, 3L, 4L, 5L, NA, 4L, 4L, 3L, 1L, 4L, 5L,
4L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 5L, 1L, 5L, 2L, NA, 4L, 2L,
1L, 3L, 3L, 4L, 3L, 2L, 4L, 5L, 4L, 2L, 3L, 5L, 1L, NA, 3L, 2L,
5L, NA, 1L, 2L, 4L, 5L, 2L, NA, 1L, 3L, NA, 3L, 3L, 3L, 5L, 4L,
5L, 3L, 3L, NA, 4L, 2L, 3L, 2L, 5L, 4L, 4L, 5L, 5L, 3L, 2L, NA,
4L, 1L, 5L, 2L, 4L, 3L, 4L, NA, 3L, 1L, 3L), group = structure(c(1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("1", "2"), class = "factor"),
weights = c(1.445, 0.408, 0.621, 0.961, 1.492, 0.625, 1.131,
0.246, 0.612, 0.621, 1.311, 0.649, 1.282, 0.898, 1.268, 0.641,
0.764, 0.759, 0.306, 0.707, 0.899, 0.785, 1.279, 0.458, 0.882,
0.384, 1.492, 0.468, 0.785, 0.707, 0.489, 1.113, 0.692, 0.293,
0.642, 1.327, 0.362, 1.405, 1.173, 0.732, 0.661, 0.522, 1.001,
0.374, 1.181, 0.819, 1.389, 0.43, 0.477, 0.879, 0.634, 0.417,
0.359, 1.007, 0.866, 0.203, 1.469, 0.294, 1.326, 1.391, 0.871,
1.036, 1.251, 0.417, 1.074, 1.268, 0.963, 0.469, 0.215, 1.074,
0.644, 1.054, 0.787, 0.714, 0.568, 0.397, 1.421, 0.692, 0.262,
0.644, 0.793, 0.808, 0.25, 0.842, 1.039, 0.359, 0.987, 1.257,
0.301, 0.203, 0.823, 1.328, 1.192, 0.256, 1.099, 0.668, 1.129,
0.413, 0.266, 1.121, 0.893, 1.484, 0.568, 1.255, 0.531, 0.461,
0.773, 0.298, 0.233, 0.676, 0.478, 0.806, 0.556, 0.201, 0.801,
0.348, 1.396, 0.552, 0.384, 0.615, 0.499, 0.819, 0.954, 0.943,
0.956, 0.323, 0.706, 0.699, 0.9, 1.156, 1.436, 1.115, 0.762,
0.258, 1.421, 0.644, 1.349, 0.251, 0.735, 0.479, 1.055, 1.395,
1.062, 1.155, 0.869, 0.436, 0.415, 0.745, 1.247, 0.21, 0.879,
0.776, 0.747, 0.835, 0.609, 0.733, 0.563, 1.067, 1.436, 0.679,
1.497, 1.385, 1.087, 1.286, 0.503, 0.738, 0.504, 0.665, 1.421,
1.288, 0.691, 0.972, 0.467, 0.425, 0.406, 0.862, 0.749, 0.935,
0.291, 0.444, 1.118, 1.048, 0.886, 0.982, 0.578, 1.402, 0.778,
1.139, 0.804, 0.618, 1.147, 0.594, 0.984, 0.986, 0.941, 0.794,
0.323, 1.41, 0.902, 0.417)), row.names = c(NA, 200L), class = "data.frame")

error bars should not be very long in barplots in r

I am plotting grouped barplots with error bars, but my error bars are very long as in this image
[![https://i.stack.imgur.com/VUByO.png][1]][1].
I would like shorter error bars as in this image
[![https://i.stack.imgur.com/JhaUJ.png][2]][2]
The code used
per$Leaf_Location <- factor(per$Leaf_Location, levels = unique(per$Leaf_Location))
per$Time <- factor(per$Time, levels = unique(per$Time))
ggplot(per, aes(x=Leaf_Location, y=Damage, fill=as.factor(Time))) +
stat_summary(fun.y=mean,
geom="bar",position=position_dodge(),colour="black",width=.7,size=.7) +
stat_summary(fun.ymin=min,fun.ymax=max,geom="errorbar",
color="black",position=position_dodge(.7), width=.2) +
stat_summary(geom = 'text', fun.y = max, position = position_dodge(.7),
label = c("a","b","c","d","d","a","b","c","d","d","a","b","c","d","d"), vjust = -0.5) +
scale_fill_manual("Legend", values = c("grey36","grey46","grey56","grey76","grey86","grey96")) +
xlab("Leaf Location") +
ylab("Damage ") +
theme_bw()
data:
per =
structure(list(Site = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("Defathers",
"Kariithi", "Kimbimbi"), class = "factor"), Field = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L
), .Label = c("F1", "F2", "F3", "F4", "F5"), class = "factor"),
Leaf_Location = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("Lower", "Intermediate",
"Upper"), class = "factor"), Time = structure(c(1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L), .Label = c("20_days",
"40_days", "60_days", "80_days", "100_days"), class = "factor"),
Damage = c(25.25, 26.07, 24.43, 20.73, 17.8, 6.9, 45.05,
33.47, 24.43, 51.67, 41.72, 34.17, 81.67, 73.33, 55.83, 34.28,
26.08, 13.28, 26.27, 14.1, 6.93, 37.55, 29.33, 23.62, 49.17,
38.45, 31.38, 70.83, 60.83, 44.2, 31.03, 25.2, 14.97, 14.38,
6.5, 4.33, 52.2, 39.17, 30.97, 75, 62.5, 38.33, 87.5, 62.5,
57.5, 45.02, 31.02, 26.07, 46.72, 34.32, 21.5, 50.83, 34.23,
25.25, 45.83, 33.47, 27.7, 67.67, 57.5, 52.67, 30.98, 23.62,
9.1, 18.17, 18.57, 10.15, 46.67, 34.27, 23.62, 54.17, 40.05,
29.37, 70.83, 59.17, 47.53, 8.67, 5.63, 0.87, 9.87, 3.03,
0, 17.75, 6.88, 0, 62.5, 37.5, 27.7, 70.83, 57.5, 50.83,
6.5, 2.17, 1.3, 6.93, 3.03, 0.53, 14.82, 5.2, 0, 37.5, 28.52,
13, 75, 37.5, 37.5, 15.3, 9.53, 5.63, 9.43, 3.03, 0.43, 16.4,
6.07, 0, 57.5, 34.23, 21.98, 78.33, 62.5, 37.5, 12.08, 6.5,
1.3, 10.73, 3.03, 0, 15.2, 3.9, 0.43, 62.5, 37.5, 21.98,
64.17, 55.83, 41.73, 8.73, 3.57, 0, 8.57, 2.17, 0, 16.5,
7.7, 0.43, 42.58, 36.68, 13, 65.83, 47.5, 37.5, 8.03, 5.07,
0.43, 10.68, 7.27, 3.5, 48.38, 38.42, 24.83, 45.03, 38.4,
30.8, 73.33, 63.33, 50.83, 3.37, 2.17, 0.9, 9, 6.02, 5.2,
21.07, 12.37, 6.02, 45.02, 32.65, 21.67, 68.78, 56.68, 50,
0, 0, 0, 7.8, 4.33, 4.33, 25.17, 20.65, 13.15, 48.37, 39.23,
27.17, 75.83, 62.5, 49, 11.78, 12.72, 3.8, 20.18, 14.87,
8.95, 46.7, 39.32, 33.03, 49.18, 40.05, 24.43, 69.17, 60,
48.33, 0, 0, 0, 15.25, 9.82, 7.75, 45.9, 38.47, 35.52, 50.88,
37.61, 33.47, 79.17, 71.67, 58.33)), .Names = c("Site", "Field",
"Leaf_Location", "Time", "Damage"), row.names = c(NA, -225L), class = "data.frame")
Here's a simplified reproducible example to explain
first, some dummy data:
per = data.frame(x=rep(c('a','b'), each=100), y=c(2+rnorm(100), 3+rnorm(100,0,2)))
Now you are plotting the error bars, using fun.ymin=min, fun.ymax=max, which will cause them to extend the full range of the data, as in the following graph:
ggplot(per, aes(x, y)) +
stat_summary(fun.y = mean, geom="bar") +
geom_point(position = position_jitter(0.1)) +
stat_summary(fun.ymin=min, fun.ymax=max, geom="errorbar", width=0.4) +
theme_bw()
Whereas, it is more conventional to use error bars that extend either +/- one standard deviation, as in the following:
ggplot(per, aes(x, y)) +
stat_summary(fun.y = mean, geom="bar") +
stat_summary(
fun.ymin=function(y) {mean(y) - sd(y)},
fun.ymax=function(y) {mean(y) + sd(y)},
geom="errorbar", width=0.2) +
theme_bw()
Or one standard error, like this:
ggplot(per, aes(x, y)) +
stat_summary(fun.y = mean, geom="bar") +
stat_summary(
fun.ymin=function(y) {mean(y) - sqrt(var(y)/length(y))},
fun.ymax=function(y) {mean(y) + sqrt(var(y)/length(y))},
geom="errorbar", width=0.2) +
theme_bw()
EDIT - example data were added to question, after this answer was originally posted
We can applying exactly the same approach as above to your example data:
ggplot(per, aes(x=Leaf_Location, y=Damage, fill=as.factor(Time))) +
stat_summary(fun.y=mean, geom="bar",position=position_dodge(),colour="black",width=.7,size=.7) +
stat_summary(
fun.ymin=function(y) {mean(y) - sqrt(var(y)/length(y))},
fun.ymax=function(y) {mean(y) + sqrt(var(y)/length(y))},
geom="errorbar",
position=position_dodge(.7), width=.2)

Specify shape for points in ggplot2

I have a CSV with a Detect column where the result is Y or N. I've got my script to change shape dependent on that column, but I need to specify that Y is a filled in circle, while N is hollow circle.
library("ggplot2")
Report213 <- read.csv("FILE_NAME")
ggplot(data = Report213, aes(x = factor(Station_ID, level = c("NEB","NWB","LBC","WB","HR","FDP","FS","NR","PB")), y = Result, Group = Detect, colour = Station_ID,shape = Detect
)) + geom_point(aes(shape=Detect,size = 2)) +
facet_grid( . ~ Chemical ) +facet_wrap( ~ Chemical, scales= "free_y",ncol = 1) + theme(
panel.background = element_rect(fill = "white",
colour = "white",
size = 0.5, linetype = "solid"),
panel.grid.major = element_line(size = 0.5, linetype = 'solid',
colour = "gray"),
panel.grid.minor = element_line(size = 0.25, linetype = 'solid',
colour = "white"),
strip.background =element_rect(fill="#454545"),
strip.text = element_text(colour = 'white')
)
Appreciate any pointers.
dput output off Report213:
structure(list(Station_ID = structure(c(4L, 4L, 4L, 4L, 4L, 9L,
3L, 9L, 3L, 3L, 9L, 3L, 3L, 5L, 7L, 2L, 6L, 7L, 5L, 7L, 8L, 1L,
5L, 1L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
6L, 7L, 7L, 7L, 8L, 9L, 9L, 9L, 1L, 2L, 3L, 3L, 3L, 3L, 3L, 4L,
4L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 7L, 7L, 7L, 8L, 9L, 9L, 9L, 1L,
2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 7L,
7L, 7L, 8L, 9L, 9L, 9L, 1L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 5L, 5L, 5L, 6L, 7L, 7L, 7L, 8L, 9L, 9L, 9L), .Label = c("FDP",
"FS", "HR", "LBC", "NEB", "NR", "NWB", "PB", "WB"), class = "factor"),
Chemical = structure(c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("4,4'-DDT", "CHLORDANE", "Total Aroclors",
"Total PAHs", "Total PCB Congeners"), class = "factor"),
Result = c(78.4176, 66.8307, 59.7295, 50.4102, 40.9341, 36.6868,
34.6394, 26.7728, 23.192, 18.091, 15.47568, 14.539, 13.8006,
4.489, 2.0159, 1.99509, 1.71768, 1.69251, 1.5165, 1.39725,
1.27822, 1.22813, 0.89586, 507.7, 135, 684, 8911, 4946, 780,
4920, 137.9, 559.5, 239.51, 902, 376, 655.4, 8299, 6500,
889, 502.8, 361.1, 17440, 555.8, 953, 5691, 1790, 0.3, 1,
14, 12, 20, 20, 21, 10, 14, 7.6, 7.3, 23, 7.7, 11, 1.5, 0.28,
8.1, 5.4, 11, 0.31, 0.62, 20, 22, 4.2, 6.8, 3.9, 6.7, 4.6,
6.4, 13, 51, 4.2, 50.8, 43.1, 41.9, 4.1, 4.4, 3.9, 4, 4.2,
4.5, 2.3, 4.3, 13, 6.8, 35, 1.1, 0.62, 0.053, 1, 7.4, 23,
3.7, 0.056, 2, 0.055, 0.054, 0.12, 0.053, 0.057, 0.13, 0.088,
0.11, 0.058, 1.1, 21, 1.5, 4.7, 1.6), Detect = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L,
2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L), .Label = c("N", "Y"), class = "factor")), class = "data.frame", row.names = c(NA,
-115L))
You can specify the shape by using scale_shape_manual
P.S: Use either facet_grid or facet_wrap not both at the same time
Edit: with ggplot2 v3.0.0 released in July 2018, you can use text/string to specify the shape. E.g. scale_shape_manual(values = c("circle", "circle open")). See more here
library(tidyverse)
Report213 <- Report213 %>%
mutate(Station_ID = factor(Station_ID,
level = c("NEB","NWB","LBC","WB","HR","FDP","FS","NR","PB")))
ggplot(data = Report213,
aes(x = Station_ID,
y = Result)) +
geom_point(aes(color = Station_ID, shape = Detect), size = 2) +
scale_shape_manual(values = c(19, 1)) +
facet_wrap( ~ Chemical, scales = "free_y", ncol = 1) +
theme(
panel.background = element_rect(fill = "white",
colour = "white",
size = 0.5, linetype = "solid"),
panel.grid.major = element_line(size = 0.5, linetype = 'solid',
colour = "gray"),
panel.grid.minor = element_line(size = 0.25, linetype = 'solid',
colour = "white"),
strip.background =element_rect(fill = "#454545"),
strip.text = element_text(colour = 'white')
)
Edit 2: Add string ~ integer shape table for future references
pch_table <- c(
"square open" = 0,
"circle open" = 1,
"triangle open" = 2,
"plus" = 3,
"cross" = 4,
"diamond open" = 5,
"triangle down open" = 6,
"square cross" = 7,
"asterisk" = 8,
"diamond plus" = 9,
"circle plus" = 10,
"star" = 11,
"square plus" = 12,
"circle cross" = 13,
"square triangle" = 14,
"triangle square" = 14,
"square" = 15,
"circle small" = 16,
"triangle" = 17,
"diamond" = 18,
"circle" = 19,
"bullet" = 20,
"circle filled" = 21,
"square filled" = 22,
"diamond filled" = 23,
"triangle filled" = 24,
"triangle down filled" = 25
)
Data used
Report213 <- structure(list(Station_ID = structure(c(4L, 4L, 4L, 4L, 4L, 9L,
3L, 9L, 3L, 3L, 9L, 3L, 3L, 5L, 7L, 2L, 6L, 7L, 5L, 7L, 8L, 1L,
5L, 1L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
6L, 7L, 7L, 7L, 8L, 9L, 9L, 9L, 1L, 2L, 3L, 3L, 3L, 3L, 3L, 4L,
4L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 7L, 7L, 7L, 8L, 9L, 9L, 9L, 1L,
2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 7L,
7L, 7L, 8L, 9L, 9L, 9L, 1L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 5L, 5L, 5L, 6L, 7L, 7L, 7L, 8L, 9L, 9L, 9L), .Label = c("FDP",
"FS", "HR", "LBC", "NEB", "NR", "NWB", "PB", "WB"), class = "factor"),
Chemical = structure(c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("4,4'-DDT", "CHLORDANE", "Total Aroclors",
"Total PAHs", "Total PCB Congeners"), class = "factor"),
Result = c(78.4176, 66.8307, 59.7295, 50.4102, 40.9341, 36.6868,
34.6394, 26.7728, 23.192, 18.091, 15.47568, 14.539, 13.8006,
4.489, 2.0159, 1.99509, 1.71768, 1.69251, 1.5165, 1.39725,
1.27822, 1.22813, 0.89586, 507.7, 135, 684, 8911, 4946, 780,
4920, 137.9, 559.5, 239.51, 902, 376, 655.4, 8299, 6500,
889, 502.8, 361.1, 17440, 555.8, 953, 5691, 1790, 0.3, 1,
14, 12, 20, 20, 21, 10, 14, 7.6, 7.3, 23, 7.7, 11, 1.5, 0.28,
8.1, 5.4, 11, 0.31, 0.62, 20, 22, 4.2, 6.8, 3.9, 6.7, 4.6,
6.4, 13, 51, 4.2, 50.8, 43.1, 41.9, 4.1, 4.4, 3.9, 4, 4.2,
4.5, 2.3, 4.3, 13, 6.8, 35, 1.1, 0.62, 0.053, 1, 7.4, 23,
3.7, 0.056, 2, 0.055, 0.054, 0.12, 0.053, 0.057, 0.13, 0.088,
0.11, 0.058, 1.1, 21, 1.5, 4.7, 1.6), Detect = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L,
2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L),
.Label = c("N", "Y"), class = "factor")),
class = "data.frame", row.names = c(NA,
-115L))
Created on 2018-06-09 by the reprex package (v0.2.0).

How to draw polygon/ convex hull around Partitioned Around Medoids (PAM) clusters in R?

Good Day
Is it possible to produce a plot based on the output of a PAM dissimilarity clustering analysis with polygons drawn around the outer point of the clusters?
I have currently achieved something similar using the function clusplot however am more interested in seeing the clusters demarcated using straight lines.
# Installing packages
library(cluster)
library(fpc)
library(ggplot2)
library(ggfortify)
#Importing Koeberg matrix into R
KoebergAllCSV <- read.csv("C:/R/Koeberg Cluster/KoebergAllCSV.csv", row.names=1, sep=";")
#Checking if data is in the correct format/Checking class/mode of each column
sapply(KoebergAllCSV, mode)
sapply(KoebergAllCSV, class)
#Creating gower dissimilarity matrix using function "daisy"
#specifying variable type(numerics all ratioscaled and log transformed)
#and weighting all columns as 1
Koeberg.Diss = daisy(KoebergAllCSV, metric = "gower", type = list(logratio = c("Mass", "EF")), weights = rep.int(1,5))
attributes(Koeberg.Diss)
#Determine k
pamk(Koeberg.Diss, krange=2:50, criterion="asw", usepam=TRUE, scaling=FALSE, diss=TRUE, critout=FALSE)
#Run cluster analysis using PAM (Partitioning around medoids)
pam_fit= pam(Koeberg.Diss, diss = TRUE, k = 28)
#Export cluster info
KoebergClusInfo = paste("KoebergClusInfo", ".txt")
write.table(pam_fit$clustering, file = KoebergClusInfo, sep=",")
## Default S3 method:
clusplot(Koeberg.Diss, pam_fit$clustering, diss = TRUE,
stand = FALSE,
lines = 0, labels= 4, xlim = c(-1,1), plotchar = TRUE, span = TRUE,
shade = TRUE, color = TRUE, col.p = "black",
main = "Koeberg gower/pam Clusterplot",
verbose = getOption("verbose"))
I am aware that the function autoplot in ggplot2 accepts objects of class pam however when attempting to use it for my data and replacing the above clusplot function with
autoplot(pam(pam_fit), frame = TRUE)
or
autoplot(pam(Koeberg.Diss, diss = TRUE, k = 28), frame = TRUE)
I get the following errors...
Error in pam(pam_fit) : x is not a numeric dataframe or matrix.
and
Error in as.data.frame.default(x[[i]], optional = TRUE,
stringsAsFactors = stringsAsFactors) : cannot coerce class ""waiver""
to a data.frame Respectively...
I am relatively new to R and posting questions in these forums, so any help would be massively appreciated.
Edit: Got it to work using the fviz_cluster() in the factoextra package
# Installing packages
library(cluster)
library(fpc)
library(factoextra)
#Importing Koeberg matrix into R
KoebergAllCSV <- read.csv("C:/R/Koeberg Cluster/KoebergAllCSV.csv",
row.names=1, sep=";")
#creating gower dissimilarity matrix using daisy
Koeberg.Gower = as.matrix(daisy(KoebergAllCSV, metric = "gower", type =
list(logratio = c("Mass", "EF"))))
attributes(Koeberg.Gower)
pamk(Koeberg.Gower, krange=2:50, criterion="asw", usepam=TRUE,
scaling=FALSE, diss=TRUE, critout=FALSE)
Koeberg.Pam = pam(Koeberg.Gower, 28, diss = TRUE, keep.diss = TRUE)
fviz_cluster(object = list(data=Koeberg.Gower, cluster =
Koeberg.Pam$clustering), geom = c("point", "text"), ellipse.type =
"convex", stand = FALSE)
fviz_silhouette(silhouette(Koeberg.Pam))
# Installing packages
library(cluster)
library(fpc)
library(factoextra)
#Importing Koeberg matrix into R
KoebergAllCSV <- read.csv("C:/R/Koeberg Cluster/KoebergAllCSV.csv",
row.names=1, sep=";")
#creating gower dissimilarity matrix using daisy
Koeberg.Gower = as.matrix(daisy(KoebergAllCSV, metric = "gower", type =
list(logratio = c("Mass", "EF"))))
attributes(Koeberg.Gower)
pamk(Koeberg.Gower, krange=2:50, criterion="asw", usepam=TRUE,
scaling=FALSE, diss=TRUE, critout=FALSE)
Koeberg.Pam = pam(Koeberg.Gower, 28, diss = TRUE, keep.diss = TRUE)
fviz_cluster(object = list(data=Koeberg.Gower, cluster =
Koeberg.Pam$clustering), geom = c("point", "text"), ellipse.type =
"convex", stand = FALSE)
fviz_silhouette(silhouette(Koeberg.Pam))
Data used:
"KoebergAllCSV"
structure(list(Mass = c(157000, 775, 197, 15000, 3250, 628, 1815,
2070, 2000, 1218, 614, 536, 379, 235, 800, 672, 1960, 768, 1540,
1790, 3500, 7450, 4030, 2200, 830, 1180, 1310, 955, 590, 1168,
820, 790, 5000, 883, 824, 280, 184, 941, 293, 1250, 3900, 1700,
925, 220, 1040, 510, 690, 600, 539, 1018, 122, 1086, 118, 737,
370, 1236, 5820, 229, 226, 220, 305.5, 94.5, 390, 198, 445, 623,
1100, 377, 340, 418, 326, 202, 139, 47, 35.1, 46.1, 580, 1150,
66, 44, 50, 30, 34.2, 30, 91, 71, 59, 78.9, 110, 405, 19.5, 73,
64, 39, 54, 39, 37, 48, 21.2, 26.3, 24.2, 29, 15.2, 35, 16.1,
16.8, 29.7, 12.5, 55, 612, 630, 865, 22.4, 8.6, 47.3, 32.5, 28.8,
17.3, 38, 23.5, 22, 15.5, 18.1, 34, 23, 13.1, 13, 14.7, 19.1,
14, 18.6, 15.5, 37, 14.5, 24.6, 25, 28.5, 50.8, 52, 68.8, 76.1,
100, 85, 158, 113, 88, 25.6, 13, 10.2, 30.5, 38, 55, 45.5, 30,
52, 11, 17.8, 29, 13, 23.2, 38, 21, 25, 27.3, 427, 1572, 78.9,
15, 61, 212.9, 700, 11.1, 44, 29.6, 124, 3200, 5800, 5300, 950,
62.4, 205, 270, 93, 40.2, 102, 240, 90, 33, 16.6, 39.2, 47, 60.8,
13, 20.8, 8, 11, 165000, 180000, 63600, 11400, 21200, 41000,
11300, 840000, 240000, 320000, 900, 4090, 1250, 19000, 19000,
6400, 2610, 47, 4500, 1258, 238, 55, 113, 9990, 5360, 17800,
110.1973216, 238.1629085, 89.33169378, 245.0708356, 83.49190575,
7.323754897, 17.91558243, 2.259871723, 1.992123644, 78.63046291,
235.6804221, 413.5582987, 486.5966599, 7.418054089, 8.4510848,
8.4510848, 42.83324573, 8.4510848, 3.14445177, 2000, 496.2334891,
119.4158615, 805.4349144, 8.212468482, 25.0905618), Diet = structure(c(4L,
2L, 2L, 6L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 5L, 4L, 5L, 5L, 5L, 5L, 2L, 5L, 5L, 5L, 5L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L,
5L, 5L, 5L, 2L, 2L, 2L, 5L, 1L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
1L, 1L, 2L, 2L, 4L, 5L, 5L, 5L, 6L, 5L, 3L, 1L, 1L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 5L, 5L, 5L, 2L, 2L, 2L, 3L, 3L, 5L, 3L, 3L, 5L,
5L, 5L, 5L, 3L, 3L, 5L, 2L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 3L,
3L, 5L, 5L, 2L, 2L, 2L, 3L, 2L, 3L, 3L, 5L, 5L, 3L, 3L, 3L, 3L,
3L, 5L, 5L, 2L, 3L, 3L, 2L, 3L, 3L, 1L, 2L, 1L, 2L, 5L, 2L, 5L,
3L, 3L, 3L, 3L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 2L, 2L, 3L, 5L, 1L, 1L, 1L, 5L, 5L, 2L, 2L, 1L,
1L, 1L, 5L, 2L, 3L, 2L, 2L, 2L, 5L, 2L, 5L, 3L, 5L, 5L, 3L, 3L,
5L, 3L, 3L, 4L, 6L, 6L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L,
4L, 1L, 1L, 1L, 3L, 4L, 4L, 4L, 5L, 6L, 1L, 1L, 1L, 1L, 6L, 1L,
1L, 1L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 1L, 1L, 1L, 3L, 3L), .Label = c("A", "B", "C", "D", "E",
"F"), class = "factor"), Time = structure(c(3L, 3L, 3L, 3L, 3L,
3L, 3L, 1L, 1L, 2L, 3L, 3L, 3L, 1L, 4L, 3L, 3L, 3L, 3L, 3L, 3L,
1L, 2L, 1L, 3L, 3L, 4L, 2L, 3L, 1L, 3L, 2L, 3L, 2L, 3L, 3L, 1L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L,
3L, 1L, 3L, 3L, 3L, 1L, 3L, 1L, 3L, 1L, 1L, 1L, 4L, 3L, 3L, 1L,
3L, 3L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 1L, 3L, 3L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 4L, 1L, 4L, 4L,
1L, 4L, 1L, 3L, 1L, 4L, 1L, 1L, 1L, 4L, 4L, 1L, 4L, 4L, 3L, 3L,
4L, 4L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
1L, 3L), .Label = c("Cat", "Cr", "Di", "No"), class = "factor"),
Space = structure(c(5L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 5L, 2L,
2L, 2L, 5L, 2L, 2L, 2L, 5L, 2L, 5L, 2L, 2L, 5L, 5L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 5L, 1L, 1L, 1L, 5L, 1L, 1L, 1L,
3L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 5L, 5L, 5L, 5L, 2L, 2L, 2L,
2L, 5L, 5L, 4L, 2L, 2L, 2L, 2L, 2L, 5L, 5L, 2L, 2L, 2L, 5L,
5L, 5L, 5L, 5L, 3L, 3L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 5L, 3L, 3L, 3L, 3L, 5L, 5L, 5L,
1L, 1L, 5L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 5L, 1L, 3L, 3L, 3L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 3L, 3L, 5L, 5L, 5L, 1L,
3L, 5L, 5L, 3L, 3L, 5L, 5L, 5L, 5L, 3L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 3L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 2L, 5L, 3L, 5L, 5L, 5L, 5L, 5L, 5L, 2L,
2L, 1L, 1L, 5L, 5L, 2L, 2L, 5L, 2L, 1L, 5L, 3L, 5L, 1L, 3L,
5L, 5L, 5L, 1L, 3L, 3L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 4L, 5L, 4L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 4L,
5L, 4L, 4L, 5L, 5L, 5L, 5L, 3L, 5L, 5L, 4L, 5L), .Label = c("Ae",
"Aq", "Ar", "Fo", "Te"), class = "factor"), EF = c(36274.12643,
974.5491757, 383.4606456, 15194.1663, 4179.125464, 1043.599331,
1739.739739, 1902.677158, 1858.620513, 1325.913225, 831.6334703,
758.1419376, 598.7459669, 432.4008244, 995.8492032, 1104.982804,
1833.224631, 968.5460848, 1555.574839, 2526.177199, 2720.81891,
4551.218864, 2995.035921, 1983.25768, 1021.131045, 1297.600326,
1393.320578, 1123.496167, 809.3558665, 1288.599252, 1012.736663,
987.3550419, 3468.868783, 1065.095472, 1016.098313, 487.1962293,
366.0414243, 1112.253632, 502.488525, 1349.53769, 2928.89833,
1663.891544, 1099.339465, 413.4082553, 1190.663398, 732.8997761,
900.420823, 818.6726853, 354.4240516, 652.168291, 85.2606497,
693.8895747, 270.483605, 941.7478954, 589.026282, 1339.226017,
3846.819879, 533.5361702, 528.1460593, 517.3161179, 666.1149499,
269.8814521, 803.9152978, 477.0047921, 889.8724349, 1153.045225,
1786.335023, 783.201274, 723.3180648, 640.0447608, 540.3690726,
390.0619447, 302.4004117, 144.5066856, 118.4522857, 142.6164524,
799.9885345, 1275.042111, 182.0952927, 543.7188737, 634.8358004,
341.8068213, 400.635478, 341.8068213, 1311.800923, 191.3798609,
168.7094925, 205.6358063, 257.8562828, 626.4208001, 79.37849663,
195.0348258, 178.3190991, 127.263627, 158.8361858, 127.263627,
122.7819915, 144.6363257, 82.97394225, 96.07341942, 90.78786186,
102.6748266, 66.17393783, 116.6808568, 68.813704, 70.83430724,
104.3536694, 57.93381766, 158.6645733, 816.6280747, 832.8847551,
1033.247332, 86.13942963, 44.9254345, 143.1986471, 110.9466069,
102.1927841, 72.26112014, 123.3917674, 88.99374555, 85.0904386,
67.05928121, 74.51689843, 114.4034188, 87.7017517, 59.81055334,
59.49970587, 64.68582581, 77.29226408, 62.57493448, 75.91055058,
67.05928121, 121.1742978, 64.08606129, 91.80560821, 92.81807245,
101.4677065, 150.3213487, 152.7269268, 184.7538415, 197.8677001,
238.2502359, 213.3232585, 325.1772534, 258.896799, 218.4145451,
94.32710718, 59.49970587, 50.45235653, 106.2569217, 123.3917674,
158.6645733, 139.4700925, 105.0692888, 152.7269268, 53.11049696,
73.67479581, 102.6748266, 59.49970587, 88.21961721, 123.3917674,
82.44084978, 92.81807245, 98.54258242, 649.397604, 1577.514936,
202.7896107, 65.58060147, 170.2384385, 404.275036, 909.2871722,
53.43834074, 136.3267745, 104.1146163, 265.4017335, 2559.743414,
3837.812585, 3609.284914, 1119.48705, 196.0571475, 393.9976926,
605.6763891, 266.5768403, 139.7476799, 286.2283703, 438.6449711,
224.9201933, 112.1044413, 70.25978867, 126.0282381, 142.5804177,
169.8586911, 59.49970587, 81.90613014, 42.76953519, 53.11049696,
44893.11086, 48012.29543, 21505.57155, 5704.435068, 9209.019243,
15323.26221, 5665.766265, 157697.8254, 59952.20689, 74861.38869,
616.5285774, 2297.756619, 820.2217331, 23289.68486, 8728.776034,
3390.680499, 1555.167143, 82.25108625, 2783.313695, 2329.752262,
567.6985933, 163.9110073, 301.8499294, 4992.739194, 2906.435392,
8247.673366, 12.81581191, 25.42711978, 10.63408241, 26.08172622,
10.0137771, 1.178076499, 2.549050089, 0.353356528, 0.31350088,
9.49371787, 25.19136319, 41.52955076, 47.98985328, 1.091673606,
1.235456699, 1.235456699, 5.76431571, 1.235456699, 0.483456886,
112.0018952, 48.83385255, 13.76461928, 75.11335195, 1.274157763,
3.438909954)), .Names = c("Mass", "Diet", "Time", "Space",
"EF"), class = "data.frame", row.names = c("CommonOstrich", "GreatCrestedGrebe",
"LittleGrebe", "GreatWhitePelican", "White-breastedCormorant",
"ReedCormorant", "AfricanDarter", "GreyHeron", "Black-headedHeron",
"PurpleHeron", "LittleEgret", "Yellow-billedEgret", "CattleEgret",
"Green-backedHeron", "Black-crownedNight-Heron", "HamerkopHamerkop",
"AfricanSacredIbis", "GlossyIbis", "HadedaIbis", "AfricanSpoonbill",
"GreaterFlamingo", "Spur-wingedGoose", "EgyptianGoose", "SouthAfricanShelduck",
"CapeShoveler", "AfricanBlackDuck", "Yellow-billedDuck", "Red-billedTeal",
"CapeTeal", "SouthernPochard", "MaccoaDuck", "White-backedDuck",
"Secretarybird", "PeregrineFalcon", "LannerFalcon", "RockKestrel",
"LesserKestrel", "Yellow-billedKite", "Black-shoulderedKite",
"BootedEagle", "AfricanFish-eagle", "JackalBuzzard", "SteppeBuzzard",
"Rufous-chestedSparrowhawk", "BlackSparrowhawk", "AfricanGoshawk",
"AfricanMarsh-harrier", "BlackHarrier", "Grey-wingedFrancolin",
"CapeSpurfowl", "CommonQuail", "HelmetedGuineafowl", "BlackCrake",
"AfricanPurpleSwamphen", "CommonMoorhen", "Red-knobbedCoot",
"BlueCrane", "CrownedLapwing", "BlacksmithLapwing", "RuffRuff",
"CommonGreenshank", "WoodSandpiper", "PiedAvocet", "Black-wingedStilt",
"WaterThick-knee", "SpottedThick-knee", "KelpGull", "Grey-headedGull",
"Hartlaub'sGull", "SpeckledPigeon", "Red-eyedDove", "CapeTurtle-dove",
"LaughingDove", "NamaquaDove", "Klaas'sCuckoo", "DiderickCuckoo",
"BarnOwl", "SpottedEagle-owl", "Fiery-neckedNightjar", "CommonSwift",
"AfricanBlackSwift", "White-rumpedSwift", "HorusSwift", "LittleSwift",
"AlpineSwift", "SpeckledMousebird", "White-backedMousebird",
"Red-facedMousebird", "PiedKingfisher", "GiantKingfisher", "MalachiteKingfisher",
"EuropeanBee-eater", "AfricanHoopoe", "AcaciaPiedBarbet", "GreaterHoneyguide",
"LesserHoneyguide", "CardinalWoodpecker", "Large-billedLark",
"Grey-backedSparrowlark", "Red-cappedLark", "BarnSwallow", "White-throatedSwallow",
"Pearl-breastedSwallow", "GreaterStripedSwallow", "RockMartin",
"Brown-throatedMartin", "BandedMartin", "Black(Southernrace)Saw-wing",
"Fork-tailedDrongo", "PiedCrow", "CapeCrow", "White-neckedRaven",
"GreyTit", "CapePenduline-tit", "CapeBulbul", "CappedWheatear",
"FamiliarChat", "AfricanStonechat", "CapeRobin-chat", "KarooScrub-robin",
"LesserSwamp-warbler", "AfricanReed-warbler", "LittleRush-warbler",
"CapeGrassbird", "Long-billedCrombec", "Bar-throatedApalis",
"CloudCisticola", "Grey-backedCisticola", "Levaillant'sCisticola",
"AfricanDuskyFlycatcher", "Chestnut-ventedTit-babbler", "Layard'sTit-babbler",
"FiscalFlycatcher", "CapeBatis", "AfricanParadise-flycatcher",
"CapeWagtail", "AfricanPipit", "CapeLongclaw", "Common(Southern)Fiscal",
"SouthernBoubou", "Bokmakierie", "CommonStarling", "WattledStarling",
"Red-wingedStarling", "PiedStarling", "CapeSugarbird", "MalachiteSunbird",
"Orange-breastedSunbird", "SouthernDouble-collaredSunbird", "HouseSparrow",
"CapeSparrow", "CapeWeaver", "SouthernMasked-weaver", "SouthernRedBishop",
"YellowBishop", "CommonWaxbill", "Pin-tailedWhydah", "CapeCanary",
"Black-headedCanary", "BrimstoneCanary", "White-throatedCanary",
"YellowCanary", "Streaky-headedSeedeater", "CapeBunting", "RockDove",
"MallardDuck", "OliveThrush", "CapeWhite-eye", "CapeLong-billedLark",
"Burchell'sCoucal", "SouthernBlackKorhaan", "KarooPrinia", "CapeClapperLark",
"SouthernGrey-headedSparrow", "LittleBittern", "BlackStork",
"Verreaux'sEagle", "MartialEagle", "AfricanHarrier-Hawk", "BlackrumpedButtonquail",
"AfricanRail", "AfricanJacana", "CommonSandpiper", "LittleStint",
"White-wingedTern", "NamaquaSandgrouse", "Red-chestedCuckoo",
"KarooLark", "SandMartin", "SombreGreenbul", "MountainChat",
"Ant-eatingChat", "ZittingCisticola", "SpottedFlycatcher", "FairyFlycatcher",
"DuskySunbird", "RedHartebeest", "BlueWildebeest", "Bontebok",
"CapeGrysbok", "CommonDuiker", "Springbok", "Steenbok", "CommonEland",
"Gemsbok", "PlainsZebra", "YellowMongoose", "LargeGreyMongoose",
"SmallGreyMongoose", "CapePorcupine", "Caracal", "AfricanWildCat",
"Small-spottedGenet", "CapeGoldenMole", "ScrubHare", "CapeDuneMole-Rat",
"VleiRat", "FourStripedGrassMouse", "CapeGerbil", "Black-BackedJackal",
"Bat-earedFox", "AfricanClawlessOtter", "HeraldSnake", "RhombicEgg-eater",
"SpottedHarlequinSnake", "OliveHouseSnake", "SpottedHouseSnake",
"Knox'sDesertLizard", "NamaquaDwarfChameleon", "Austen'sThick-toedGecko",
"OcelatedThick-toedGecko", "CrossedWhipSnake", "CapeWhipSnake",
"Spotted/RhombicSkaapsteker", "MoleSnake", "Short-leggedseps",
"SilveryDwarfBurrowingSkink", "BloubergDwarfBurrowingSkink",
"CapeSkink", "Red-SidedSkink", "VariegatedSkink", "AngulateTortoise",
"Boomslang", "KarooWhipSnake", "CapeCobra", "Delalande'sBeakedBlindSnake",
"CapeGirdledLizard"))

Changing the order of plotting levels in Latitice

I am trying to get a boxplot with a specific order of the levels that are being plotted.
Using the following data and code I generate the boxplot, but the order in which I need this is 6,12,15,18.
I have tried a number of thing using the with() function but can't make it work.
library(lattice)
rate<-structure(list(Temp = c(6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L,
15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L,
15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L,
15L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L,
18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L,
18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L,
18L, 18L), Rep = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L), Ind = structure(c(1L, 1L, 1L, 1L, 5L, 5L,
5L, 5L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 6L, 6L,
6L, 6L, 5L, 5L, 5L, 5L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 5L, 5L, 5L, 5L, 4L, 4L, 4L, 4L, 3L, 3L,
3L, 3L, 2L, 2L, 2L, 2L, 6L, 6L, 6L, 6L, 5L, 5L, 5L, 5L, 4L, 4L,
4L, 4L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 5L, 5L,
5L, 5L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 6L, 6L,
6L, 6L, 5L, 5L, 5L, 5L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 5L, 5L, 5L, 5L, 4L, 4L, 4L, 4L, 3L, 3L,
3L, 3L, 2L, 2L, 2L, 2L, 6L, 6L, 6L, 6L, 5L, 5L, 5L, 5L, 4L, 4L,
4L, 4L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L), .Label = c("B", "MBCT",
"MBT", "MSCT", "MST", "S"), class = "factor"), Week = c(1L, 2L,
6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L,
6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L,
6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L,
6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L,
6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L,
6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L,
6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L,
6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L,
6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L,
6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L, 1L, 2L, 6L, 8L), Weight = c(1.756,
1.756, 1.756, 1.756, 0.92, 0.92, 0.92, 0.92, 1.201, 1.201, 1.201,
1.201, 2.601, 2.601, 2.601, 2.601, 2.057, 2.057, 2.057, 2.057,
0.784, 0.784, 0.784, 0.784, 0.663, 0.663, 0.663, 0.663, 1.272,
1.272, 1.272, 1.272, 3.389, 3.389, 3.389, 3.389, 1.433, 1.433,
1.433, 1.433, 3.822, 3.822, 3.822, 3.822, 1.55, 1.55, 1.55, 1.55,
1.198, 1.198, 1.198, 1.198, 1.029, 1.029, 1.029, 1.029, 1.113,
1.113, 1.113, 1.113, 0.261, 0.261, 0.261, 0.261, 0.639, 0.639,
0.639, 0.639, 0.749, 0.749, 0.749, 0.749, 1.083, 1.083, 1.083,
1.083, 1.429, 1.429, 1.429, 1.429, 3.083, 3.083, 3.083, 3.083,
1.061, 1.061, 1.061, 1.061, 1.154, 1.154, 1.154, 1.154, 1.691,
1.691, 1.691, 1.691, 1.185, 1.185, 1.185, 1.185, 0.552, 0.552,
0.552, 0.552, 1.507, 1.507, 1.507, 1.507, 1.175, 1.175, 1.175,
1.175, 1.773, 1.773, 1.773, 1.773, 1.712, 1.712, 1.712, 1.712,
3.784, 3.784, 3.784, 3.784, 0.715, 0.715, 0.715, 0.715, 1.271,
1.271, 1.271, 1.271, 0.788, 0.788, 0.788, 0.788, 1.72, 1.72,
1.72, 1.72, 0.571, 0.571, 0.571, 0.571, 1, 1, 1, 1, 1.037, 1.037,
1.037, 1.037, 1.656, 1.656, 1.656, 1.656, 2.083, 2.083, 2.083,
2.083), Rate = c(0.387, 0.116, -0.141, 0.184, 0.785, 0.151, -0.69,
0.16, 0.477, 0.368, -0.544, 0.49, 0.152, 0.183, -0.137, 0.259,
0.239, 0.292, 0.018, 0.411, 0.322, 0.073, -0.148, 0.287, 0.214,
0.21, -0.579, 0.419, 0.23, 0.271, 0.685, 0.426, 0.248, 0.125,
0.053, 0.176, 0.465, 0.107, 0.02, 0.339, 0.261, 0.327, 0.279,
0.424, 0.308, 0.223, 0.287, 0.383, 0.306, 0.24, 0.258, 0.253,
0.437, 0.315, 0.275, 0.481, 0.372, 0.306, 0.267, 0.449, 0.727,
0.441, 0.624, 1.262, 0.334, 0.447, 0.548, 0.654, 0.846, 0.661,
0.66, 0.734, 0.191, 0.316, 0.551, 0.581, 0.332, 0.403, 0.509,
0.603, 0.411, 0.683, 0.427, 0.516, 0.498, 0.674, 0.371, 0.326,
0.288, 0.435, 0.297, 0.435, 0.165, 0.387, 0.212, 0.345, 0.334,
0.664, 0.526, 0.338, 0.094, 0.066, 0.39, 0.525, 0.215, 0.431,
0.151, 0.361, 0.153, 0.297, 0.127, 0.339, 0.292, 0.434, 0.411,
0.442, 0.25, 0.607, 0.369, 0.567, 0.189, 0.39, 0.372, 0.333,
0.339, 0.327, 0.449, 0.224, 0.086, 0.242, 0.465, 0.374, -0.063,
-0.006, 0.364, 0.308, 0.069, 0.223, 0.397, 0.264, 0.478, 0.345,
0.582, 0.36, 0.426, 0.403, 0.583, 0.544, 0.57, 0.567, 0.388,
0.531, 0.111, 0.125, 0.366, 0.266, 0.26, 0.315, 0.387, 0.549)), .Names = c("Temp",
"Rep", "Ind", "Week", "Weight", "Rate"), class = "data.frame", row.names = c(NA,
-160L))
rate$Temp <- as.character(rate$Temp)
rate$Week <- as.character(rate$Week)
rate$Rep <- as.character(rate$Rep)
rate$Weight<- as.character(rate$Weight)
bwplot(Rate~Temp, rate,
main="Boxplot for data over all weeks by temperature"
)
This can be tackled in the same manner as your question from a month ago. You need to set the order of levels of a factor. I would generally advise you work with factors, unless you have a really good reason to use characters.
rate$Temp <- as.factor(rate$Temp)
levels(rate$Temp) <- c("6", "12", "15", "18")

Resources