How to model the residual variance using varFunc from nlme? - r

To see whether a linear trend exists between age and quartiles of some variable, I fitted a linear model using lm. Plots of the residuals against fitted values as well as residuals against the quartiles indicate heterogeneity of variance.
This image was created through:
m1 <- lm(age ~ quartile, data = DF) #DF = dataframe
op <- par(mfrow = c(1,3))
plot(resid(m1) ~ fitted(m1)) #Homogeneity of variances: graphical
plot(resid(m1) ~ DF$quartile)
qqnorm(resid(m1));qqline(resid(m1))
par(op)
Within the GLS framework, I would like to have the residual variance to depend on the quartiles using one of the classes from the varFunc from the nlme package. I tried multiple functions, though without success.
The sample data below roughly yield the same pattern:
reconstruct <- structure(list(quartile = structure(c(2L, 1L, 4L, 3L, 1L, 1L,
3L, 4L, 3L, 2L, 2L, 3L, 3L, 1L, 2L, 4L, 2L, 2L, 2L, 1L, 1L, 3L,
1L, 1L, 1L, 3L, 3L, 1L, 4L, 3L, 3L, 3L, 2L, 4L, 1L, 1L, 3L, 1L,
3L, 2L, 2L, 4L, 3L, 4L, 1L, 4L, 1L, 4L, 3L, 1L, 1L, 2L, 4L, 2L,
2L, 2L, 1L, 1L, 4L, 1L, 4L, 4L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 1L,
4L, 3L, 4L, 2L, 3L, 3L, 3L, 1L, 1L, 4L, 1L, 2L, 1L, 2L, 1L, 1L,
2L, 4L, 1L, 3L, 4L, 2L, 4L, 1L, 4L, 4L, 1L, 3L, 4L, 2L, 2L, 1L,
1L, 4L, 2L, 4L, 3L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 2L, 2L, 4L, 2L,
4L, 1L, 4L, 3L, 4L, 1L, 2L, 1L, 4L, 2L, 1L, 3L, 1L, 4L, 1L, 4L,
4L, 4L, 1L, 1L, 4L, 2L, 4L, 3L, 2L, 2L, 1L, 3L, 1L, 4L, 2L, 3L,
4L, 3L, 4L, 1L, 1L, 2L, 2L, 4L, 1L, 2L, 4L, 2L, 1L, 2L, 1L, 1L,
4L, 3L, 2L, 3L, 2L, 4L, 3L, 4L, 1L, 4L, 1L, 3L, 4L, 4L, 4L, 1L,
4L, 3L, 2L, 4L, 3L, 3L, 2L, 1L, 1L, 4L, 1L, 4L, 2L, 2L, 2L, 4L,
2L, 3L), .Label = c("1", "2", "3", "4"), class = c("ordered",
"factor")), age = c(40.45, 33.49, 41.02, 53.06, 63.46, 47.17,
39.45, 60.71, 67.13, 53.12, 62.78, 70.39, 56.14, 50.55, 35.64,
38.5, 68.53, 53.69, 50.84, 38.66, 35.31, 57.03, 37.84, 35.82,
50.68, 56.44, 65.36, 58.64, 55.98, 56.13, 42.09, 54.91, 35.16,
63.68, 44.5, 51.79, 69.56, 59.11, 55.39, 43.87, 58.12, 65.59,
52.58, 60.17, 48.57, 52.09, 40.04, 35.61, 77.14, 43.82, 48.98,
36.26, 44.63, 62.13, 69.59, 41.22, 47.85, 53.5, 42.08, 49.08,
75.49, 52.39, 41.21, 58.25, 74.37, 64.28, 34.01, 42.99, 34.05,
60.99, 68.82, 41.3, 71.07, 55.21, 52.01, 37.76, 64.54, 57.43,
45.78, 62.9, 67.73, 49.25, 69.68, 51.85, 37.32, 47.37, 53.41,
68.55, 35.31, 63.59, 69.04, 48.03, 50.74, 42.93, 79.23, 72.22,
35.42, 43.26, 45.81, 37.92, 39.26, 60.97, 47.36, 50.19, 43.52,
41.82, 40.42, 54.87, 55.32, 75.74, 69.54, 56.44, 59.85, 50.02,
49.23, 48.38, 34.07, 38.57, 46.57, 35.29, 42.04, 63.35, 34.68,
50.34, 72.5, 40.27, 58.41, 37.79, 34.62, 75.47, 38.91, 46.21,
49.72, 40.55, 66.98, 59.07, 55.8, 38.86, 47.76, 59.16, 74.79,
57.87, 54.82, 43.58, 66.15, 34.55, 50.12, 67.68, 61.1, 40.29,
54.1, 69.8, 60.68, 36.7, 38.31, 46.15, 34.68, 41.92, 38.97, 50.67,
68.53, 40.06, 46.5, 44.38, 47.6, 37.95, 78.39, 54.73, 79.07,
40.05, 48.67, 58.71, 73.07, 75.65, 43.07, 48.25, 44.03, 51.37,
62.16, 54.78, 66.27, 50.25, 60.56, 32.77, 68.41, 37.74, 38.46,
46.33, 41.59, 64.52, 53.66, 71.04, 64.55, 53.25, 40.58, 52.33,
39.64, 52.76, 43.52, 48.45)), row.names = c(1:200), class = "data.frame")
To obtain the image:
m2 <- lm(age ~ quartile, data = reconstruct)
op <- par(mfrow = c(1,3))
plot(resid(m2) ~ fitted(m2))
plot(resid(m2) ~ reconstruct$quartile)
qqnorm(resid(m2));qqline(resid(m2))
par(op)
Any suggestions?

Related

error bars should not be very long in barplots in r

I am plotting grouped barplots with error bars, but my error bars are very long as in this image
[![https://i.stack.imgur.com/VUByO.png][1]][1].
I would like shorter error bars as in this image
[![https://i.stack.imgur.com/JhaUJ.png][2]][2]
The code used
per$Leaf_Location <- factor(per$Leaf_Location, levels = unique(per$Leaf_Location))
per$Time <- factor(per$Time, levels = unique(per$Time))
ggplot(per, aes(x=Leaf_Location, y=Damage, fill=as.factor(Time))) +
stat_summary(fun.y=mean,
geom="bar",position=position_dodge(),colour="black",width=.7,size=.7) +
stat_summary(fun.ymin=min,fun.ymax=max,geom="errorbar",
color="black",position=position_dodge(.7), width=.2) +
stat_summary(geom = 'text', fun.y = max, position = position_dodge(.7),
label = c("a","b","c","d","d","a","b","c","d","d","a","b","c","d","d"), vjust = -0.5) +
scale_fill_manual("Legend", values = c("grey36","grey46","grey56","grey76","grey86","grey96")) +
xlab("Leaf Location") +
ylab("Damage ") +
theme_bw()
data:
per =
structure(list(Site = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("Defathers",
"Kariithi", "Kimbimbi"), class = "factor"), Field = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L
), .Label = c("F1", "F2", "F3", "F4", "F5"), class = "factor"),
Leaf_Location = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("Lower", "Intermediate",
"Upper"), class = "factor"), Time = structure(c(1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L), .Label = c("20_days",
"40_days", "60_days", "80_days", "100_days"), class = "factor"),
Damage = c(25.25, 26.07, 24.43, 20.73, 17.8, 6.9, 45.05,
33.47, 24.43, 51.67, 41.72, 34.17, 81.67, 73.33, 55.83, 34.28,
26.08, 13.28, 26.27, 14.1, 6.93, 37.55, 29.33, 23.62, 49.17,
38.45, 31.38, 70.83, 60.83, 44.2, 31.03, 25.2, 14.97, 14.38,
6.5, 4.33, 52.2, 39.17, 30.97, 75, 62.5, 38.33, 87.5, 62.5,
57.5, 45.02, 31.02, 26.07, 46.72, 34.32, 21.5, 50.83, 34.23,
25.25, 45.83, 33.47, 27.7, 67.67, 57.5, 52.67, 30.98, 23.62,
9.1, 18.17, 18.57, 10.15, 46.67, 34.27, 23.62, 54.17, 40.05,
29.37, 70.83, 59.17, 47.53, 8.67, 5.63, 0.87, 9.87, 3.03,
0, 17.75, 6.88, 0, 62.5, 37.5, 27.7, 70.83, 57.5, 50.83,
6.5, 2.17, 1.3, 6.93, 3.03, 0.53, 14.82, 5.2, 0, 37.5, 28.52,
13, 75, 37.5, 37.5, 15.3, 9.53, 5.63, 9.43, 3.03, 0.43, 16.4,
6.07, 0, 57.5, 34.23, 21.98, 78.33, 62.5, 37.5, 12.08, 6.5,
1.3, 10.73, 3.03, 0, 15.2, 3.9, 0.43, 62.5, 37.5, 21.98,
64.17, 55.83, 41.73, 8.73, 3.57, 0, 8.57, 2.17, 0, 16.5,
7.7, 0.43, 42.58, 36.68, 13, 65.83, 47.5, 37.5, 8.03, 5.07,
0.43, 10.68, 7.27, 3.5, 48.38, 38.42, 24.83, 45.03, 38.4,
30.8, 73.33, 63.33, 50.83, 3.37, 2.17, 0.9, 9, 6.02, 5.2,
21.07, 12.37, 6.02, 45.02, 32.65, 21.67, 68.78, 56.68, 50,
0, 0, 0, 7.8, 4.33, 4.33, 25.17, 20.65, 13.15, 48.37, 39.23,
27.17, 75.83, 62.5, 49, 11.78, 12.72, 3.8, 20.18, 14.87,
8.95, 46.7, 39.32, 33.03, 49.18, 40.05, 24.43, 69.17, 60,
48.33, 0, 0, 0, 15.25, 9.82, 7.75, 45.9, 38.47, 35.52, 50.88,
37.61, 33.47, 79.17, 71.67, 58.33)), .Names = c("Site", "Field",
"Leaf_Location", "Time", "Damage"), row.names = c(NA, -225L), class = "data.frame")
Here's a simplified reproducible example to explain
first, some dummy data:
per = data.frame(x=rep(c('a','b'), each=100), y=c(2+rnorm(100), 3+rnorm(100,0,2)))
Now you are plotting the error bars, using fun.ymin=min, fun.ymax=max, which will cause them to extend the full range of the data, as in the following graph:
ggplot(per, aes(x, y)) +
stat_summary(fun.y = mean, geom="bar") +
geom_point(position = position_jitter(0.1)) +
stat_summary(fun.ymin=min, fun.ymax=max, geom="errorbar", width=0.4) +
theme_bw()
Whereas, it is more conventional to use error bars that extend either +/- one standard deviation, as in the following:
ggplot(per, aes(x, y)) +
stat_summary(fun.y = mean, geom="bar") +
stat_summary(
fun.ymin=function(y) {mean(y) - sd(y)},
fun.ymax=function(y) {mean(y) + sd(y)},
geom="errorbar", width=0.2) +
theme_bw()
Or one standard error, like this:
ggplot(per, aes(x, y)) +
stat_summary(fun.y = mean, geom="bar") +
stat_summary(
fun.ymin=function(y) {mean(y) - sqrt(var(y)/length(y))},
fun.ymax=function(y) {mean(y) + sqrt(var(y)/length(y))},
geom="errorbar", width=0.2) +
theme_bw()
EDIT - example data were added to question, after this answer was originally posted
We can applying exactly the same approach as above to your example data:
ggplot(per, aes(x=Leaf_Location, y=Damage, fill=as.factor(Time))) +
stat_summary(fun.y=mean, geom="bar",position=position_dodge(),colour="black",width=.7,size=.7) +
stat_summary(
fun.ymin=function(y) {mean(y) - sqrt(var(y)/length(y))},
fun.ymax=function(y) {mean(y) + sqrt(var(y)/length(y))},
geom="errorbar",
position=position_dodge(.7), width=.2)

Agglomerative hierachial clustering using R

Plotting a dendogram from a agglomerative hierachial clustering does not yield the expected results. I have attached the example of the expected output in the image here . The y axis shows the treatment groups.
My MWE is
library(cluster)
dist<-daisy(cluster, metric = "gower")
kaari <-hclust(dist, method = "ward.D2")
plot(kaari,cex = 0.6, hang = -1)
Here is the data frame:
structure(list(Variety = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("Cal J",
"Pesa F1", "Rambo F1", "Riograde"), class = "factor"), Sample.Part = structure(c(3L,
3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L,
4L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L,
2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 3L,
3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L,
4L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L,
2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 3L,
3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L,
4L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L,
2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 3L,
3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L,
4L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L,
2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 3L,
3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L,
4L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L,
2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 3L,
3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L,
4L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L,
2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("% fruit damage",
"Intermediate", "Lower", "Upper"), class = "factor"), overall = c(8.33,
15.83, 18.33, 18.33, 16.67, 15.83, 17.5, 15, 14.17, 16.67, 15,
18.33, 6.67, 14.17, 6.67, 15.83, 10, 12.5, 10, 15, 35, 55, 50,
25, 12.5, 11.67, 12.5, 13.33, 15.83, 13.33, 14.17, 10, 11.67,
15.83, 8.33, 10.83, 7.5, 7.5, 10.83, 9.17, 5.83, 5.83, 10, 17.5,
20, 12.5, 20, 5, 18.33, 15, 15, 12.5, 10, 15.83, 20.83, 15.83,
18.33, 10, 11.67, 18.33, 10.83, 6.67, 7.5, 14.17, 6.67, 10.83,
37.5, 17.5, 25, 15, 30, 20, 24.17, 22.5, 16.67, 19.17, 14.17,
24.17, 26.67, 20.83, 16.67, 17.5, 14.17, 20, 12.5, 20.83, 11.67,
6.67, 12.5, 11.67, 55, 55, 55, 60, 55, 57.5, 24.17, 28.33, 19.17,
21.67, 20, 18.33, 24.17, 20.83, 17.5, 15, 16.67, 15, 15, 10.83,
11.67, 16.67, 14.17, 10, 30, 45, 55, 42.5, 55, 37.5, 33.33, 20.83,
20, 17.5, 18.33, 20, 28.33, 13.33, 17.5, 13.33, 20.83, 11.67,
11.67, 10.83, 13.33, 8.33, 8.33, 13.33, 55, 40, 55, 52.5, 45,
45, 12.5, 17.5, 15, 21.67, 17.5, 17.5, 14.17, 14.17, 16.67, 14.17,
19.17, 15, 10.83, 13.33, 6.67, 9.17, 8.33, 13.33, 45, 50, 40,
35, 55, 45, 10.83, 9.17, 23.33, 22.5, 15.83, 11.67, 26.67, 8.33,
20, 12.5, 10.83, 18.33, 9.17, 7.5, 9.17, 7.5, 5.83, 13.33, 37.5,
35, 45, 22.5, 30, 25, 15, 13.33, 20, 13.33, 20, 20, 9.17, 21.67,
12.5, 10, 14.17, 24.17, 10.83, 10, 13.33, 9.17, 11.67, 10.83,
45, 45, 42.5, 30, 55, 40, 11.67, 21.67, 18.33, 16.67, 16.67,
16.67, 14.17, 15, 15.83, 20.83, 12.5, 16.67, 10, 12.5, 9.17,
10, 7.5, 6.67, 27.5, 30, 32.5, 45, 17.5, 25, 15.83, 15.83, 17.5,
13.33, 12.5, 13.33, 13.33, 10.83, 19.17, 12.5, 13.33, 12.5, 7.5,
8.33, 9.17, 5.83, 10.83, 10.83, 47.5, 15, 20, 20, 30, 30, 10,
18.33, 12.5, 11.67, 10.83, 13.33, 13.33, 12.5, 10, 10, 13.33,
15, 6.67, 14.17, 7.5, 7.5, 10.83, 7.5, 22.5, 15, 22.5, 20, 25,
15)), .Names = c("Variety", "Sample.Part", "overall"), class = "data.frame", row.names = c(NA,
-288L))
My first and second columns in my data set are categorical while the third is numeric, I have attached the the data here.
Variety Sample.Part overall
Cal J Lower 8.33
Cal J Lower 15.83
Cal J Lower 18.33
Cal J Lower 18.33
Cal J Lower 16.67
Cal J Lower 15.83
Cal J Intermediate 17.50
Cal J Intermediate 15.00
Cal J Intermediate 14.17
Cal J Intermediate 16.67
Cal J Intermediate 15.00
Cal J Intermediate 18.33
Cal J Upper 6.67
Cal J Upper 14.17
Cal J Upper 6.67
Cal J Upper 15.83
Cal J Upper 10.00
Cal J Upper 12.50
Cal J % fruit damage 10.00
Cal J % fruit damage 15.00
Cal J % fruit damage 35.00
Cal J % fruit damage 55.00
Cal J % fruit damage 50.00
I would like to have the factor levels in the first column appear as leaf nodes in the y axis. Any help?

Polygon draws pointwise instead of area ggplot2 and ggmap in r

I would like to draw a map similar to this
My aim is to remove different point shapes and keep the rest as it is. However, I end up with the following plot.
Except polygon shape, everything is correct. How can I change this drawing of polygon?
library(data.table)
plot.data <- as.data.frame(fread("plotdata.csv", header = TRUE, sep = ","))
library(ggmap)
library(ggplot2)
map <- get_map(location = "california", maptype = "roadmap", zoom = 6, source = 'google', scale = 1,color = "color")
plot.size = 11
point.size = 0.85
line.size = 0.3
p2 <- ggmap(map) +
geom_point(data = plot.data, size = 1,
aes_string("x", "y", color = as.factor(plot.data$cluster)), alpha = 0.7) +
scale_shape(solid = T) +
geom_polygon(data = plot.data, mapping = aes_string(x = "x",
y = "y",
colour = as.factor(plot.data$cluster),
fill = as.factor(plot.data$cluster)),
alpha = 0.3) +
geom_text(aes(label = "", x = -114, y = 41), size = plot.size-3) +
xlab(expression(paste("Longitude [", degree, "]"))) +
ylab(expression(paste("Latitude [", degree, "]"))) +
theme_bw() +
theme(plot.margin = unit(c(1,1,0,0), "lines"), text = element_text(family = "Times"), axis.title = element_text(size = plot.size), axis.text = element_text(size = plot.size), legend.position = "none")
p2
The data is in the following link.
https://mega.nz/#!J9JiwD4R!3cEmCvoE1oDvGoYZtl7Pumw95uQtHqtPvrfR3y5mUc4
get_map gives error sometimes, please keep trying. Eventually, it downloads the map.
> dput(plot.data)
structure(list(x = c(-114.62, -115.1, -116.26, -116.7, -115.66,
-116.02, -116.86, -115.98, -116.22, -115.22, -115.58, -116.42,
-117.3, -117.46, -116.62, -117.46, -114.74, -114.94, -116.26,
-117.46, -115.62, -116.26, -116.78, -116.58, -117.22, -117.82,
-115.42, -118.1, -114.54, -114.7, -115.42, -115.46, -114.82,
-117.06, -117.1, -117.94, -114.94, -115.86, -117.74, -117.18,
-115.38, -117.3, -115.42, -115.66, -116.62, -118.94, -114.34,
-116.38, -116.98, -117.94, -119.14, -118.02, -118.7, -118.94,
-114.58, -115.34, -116.26, -114.78, -115.5, -114.7, -115.06,
-115.7, -117.74, -116.06, -116.26, -120.58, -118.02, -119.22,
-114.54, -116.1, -116.98, -117.46, -115.94, -119.02, -119.34,
-119.54, -120.5, -115.1, -115.42, -116.46, -116.98, -117.46,
-115.42, -116.02, -116.54, -117.62, -114.7, -115.38, -116.34,
-117.54, -117.86, -116.66, -120.58, -114.9, -117.14, -117.9,
-119.38, -119.9, -117.3, -120.18, -117.5, -120.1, -119.26, -120.38,
-120.46, -115.38, -115.86, -117.34, -116.7, -119.9, -119.14,
-115.82, -116.22, -116.94, -119.3, -116.62, -116.74, -118.74,
-119.1, -119.26, -119.94, -115.78, -115.86, -119.26, -118.3,
-120.46, -118.38, -119.66, -116.62, -118.22, -118.3, -121.02,
-116.5, -117.06, -117.62, -117.74, -118.98, -119.62, -119.98,
-120.74, -120.9, -115.98, -116.66, -118.06, -118.22, -119.1,
-117.82, -119.34, -119.54, -117.26, -117.38, -120.34, -120.94,
-119.66, -120.62, -116.46, -121.42, -116.58, -117.62, -118.7,
-118.94, -120.46, -119.34, -120.86, -116.54, -117.94, -119.86,
-117.1, -119.26, -121.14, -117.86, -119.5, -120.62, -120.66,
-118.46, -119.5, -120.7, -120.74, -119.26, -120.74, -121.58,
-117.26, -120.9, -121.38, -116.82, -117.74, -118.66, -118.98,
-120.22, -117.26, -118.1, -117.14, -117.58, -119.26, -120.66,
-121.18, -118.1, -119.18, -119.26, -117.58, -119.22, -121.06,
-117.38, -118.54, -120.86, -122.02, -117.7, -119.34, -120.7,
-120.14, -120.66, -118.1, -119.06, -117.5, -118.02, -119.14,
-120.74, -121.54, -117.54, -120.02, -121.78, -118.58, -119.66,
-119.7, -117.82, -118.94, -119.38, -119.7, -118.1, -121.7, -121.06,
-117.9, -118.1, -118.9, -118.58, -119.46, -120.7, -120.82, -121.22,
-119.9, -121.5, -122.46, -118.34, -119.42, -119.62, -118.98,
-119.62, -119.74, -120.62, -121.18, -122.18, -122.54, -122.14,
-121.94, -120.22, -120.98, -121.14, -122.98, -122.58, -119.38,
-119.5, -121.06, -121.46, -122.74, -119.1, -121.74, -119.1, -121.46,
-121.9, -121.22, -119.82, -120.74, -120.98, -121.22, -119.34,
-123.02, -122.78, -122.1, -122.62, -120.06, -120.98, -120.9,
-123.34, -120.38, -120.66, -120.46, -123.66, -122.46, -120.14,
-120.1, -120.62, -122.14, -120.82, -121.38, -120.06, -123.18,
-121.42, -120.58, -120.94, -121.18, -121.62, -121.02, -121.82,
-122.3, -120.1, -121.34, -121.46, -122.02, -121.74, -121.9, -120.06,
-120.94, -122.18, -122.66, -123.06, -123.78, -121.5, -120.62,
-121.3, -120.22, -121.02, -120.3, -120.1, -120.98, -120.98, -121.66,
-121.82, -121.54, -121.1, -121.06, -122.98, -123.14, -123.9,
-120.14, -122.3, -120.42, -122.86, -123.46, -122.74, -123.46,
-121.46, -121.54, -122.42, -122.94, -123.94, -120.46, -121.54,
-121.94, -120.86, -123.22, -120.54, -120.98, -123.7, -121.42,
-122.58, -120.82, -121.38, -121.62, -122.06, -120.38, -120.54,
-120.62, -121.58, -123.1, -123.9, -120.94, -121.74, -122.38,
-121.78, -123.54, -121.74, -122.46, -122.78, -122.34, -123.26,
-123.06, -122.94, -120.42, -121.06, -121.1, -120.82, -122.26,
-123.58, -120.58, -121.82, -123.82, -120.58, -120.62, -122.22,
-123.74, -120.5, -123.7, -120.74, -122.42, -122.46, -120.78,
-122.46, -120.42, -123.5, -123.74), y = c(32.81, 32.81, 32.89,
32.97, 33.09, 33.09, 33.17, 33.21, 33.21, 33.29, 33.37, 33.37,
33.41, 33.41, 33.45, 33.53, 33.57, 33.57, 33.61, 33.65, 33.69,
33.73, 33.73, 33.81, 33.81, 33.81, 33.85, 33.85, 33.89, 33.89,
33.93, 33.97, 34.05, 34.09, 34.09, 34.09, 34.13, 34.17, 34.17,
34.21, 34.25, 34.25, 34.29, 34.29, 34.29, 34.29, 34.33, 34.33,
34.37, 34.37, 34.37, 34.41, 34.41, 34.41, 34.45, 34.45, 34.45,
34.49, 34.49, 34.53, 34.53, 34.57, 34.57, 34.61, 34.61, 34.61,
34.65, 34.65, 34.69, 34.69, 34.69, 34.69, 34.73, 34.73, 34.73,
34.73, 34.73, 34.77, 34.77, 34.77, 34.77, 34.81, 34.85, 34.85,
34.93, 34.93, 34.97, 35.01, 35.01, 35.01, 35.01, 35.05, 35.05,
35.09, 35.09, 35.09, 35.09, 35.09, 35.13, 35.13, 35.17, 35.21,
35.29, 35.29, 35.29, 35.33, 35.37, 35.37, 35.45, 35.49, 35.53,
35.57, 35.57, 35.57, 35.57, 35.69, 35.69, 35.69, 35.69, 35.69,
35.69, 35.73, 35.73, 35.73, 35.77, 35.77, 35.81, 35.81, 35.85,
35.85, 35.85, 35.85, 35.89, 35.89, 35.89, 35.89, 35.89, 35.89,
35.89, 35.89, 35.93, 35.97, 35.97, 35.97, 35.97, 35.97, 36.09,
36.09, 36.09, 36.13, 36.13, 36.13, 36.13, 36.17, 36.17, 36.21,
36.25, 36.29, 36.29, 36.29, 36.33, 36.33, 36.37, 36.37, 36.41,
36.41, 36.41, 36.45, 36.45, 36.45, 36.49, 36.49, 36.49, 36.49,
36.53, 36.53, 36.53, 36.57, 36.61, 36.61, 36.61, 36.65, 36.65,
36.65, 36.69, 36.73, 36.73, 36.73, 36.77, 36.81, 36.81, 36.85,
36.85, 36.85, 36.85, 36.85, 36.89, 36.89, 36.89, 36.93, 36.93,
36.93, 36.97, 36.97, 36.97, 36.97, 37.01, 37.01, 37.01, 37.05,
37.05, 37.09, 37.09, 37.17, 37.17, 37.17, 37.17, 37.21, 37.25,
37.25, 37.29, 37.33, 37.33, 37.33, 37.37, 37.37, 37.37, 37.37,
37.41, 37.41, 37.45, 37.49, 37.53, 37.53, 37.57, 37.61, 37.61,
37.61, 37.61, 37.69, 37.69, 37.69, 37.77, 37.77, 37.77, 37.81,
37.81, 37.81, 37.81, 37.81, 37.81, 37.89, 37.97, 38.01, 38.05,
38.05, 38.05, 38.05, 38.09, 38.17, 38.17, 38.17, 38.17, 38.21,
38.25, 38.29, 38.33, 38.33, 38.33, 38.37, 38.41, 38.41, 38.41,
38.41, 38.53, 38.53, 38.57, 38.61, 38.61, 38.65, 38.65, 38.69,
38.73, 38.81, 38.89, 38.93, 38.93, 38.97, 39.01, 39.09, 39.13,
39.13, 39.17, 39.17, 39.21, 39.21, 39.33, 39.41, 39.41, 39.41,
39.41, 39.45, 39.45, 39.53, 39.57, 39.57, 39.57, 39.61, 39.65,
39.69, 39.73, 39.73, 39.73, 39.77, 39.77, 39.77, 39.81, 39.85,
39.85, 39.93, 39.93, 39.97, 40.01, 40.01, 40.05, 40.05, 40.05,
40.13, 40.17, 40.21, 40.21, 40.21, 40.21, 40.25, 40.25, 40.33,
40.33, 40.33, 40.37, 40.37, 40.41, 40.41, 40.41, 40.41, 40.45,
40.49, 40.49, 40.49, 40.61, 40.61, 40.65, 40.65, 40.65, 40.69,
40.69, 40.73, 40.77, 40.81, 40.81, 40.85, 40.85, 40.85, 40.89,
40.89, 40.89, 40.93, 40.97, 41.01, 41.09, 41.09, 41.17, 41.17,
41.17, 41.21, 41.21, 41.25, 41.33, 41.37, 41.37, 41.37, 41.41,
41.41, 41.49, 41.57, 41.57, 41.57, 41.61, 41.61, 41.61, 41.65,
41.69, 41.73, 41.77, 41.77, 41.77, 41.85, 41.85, 41.89, 41.93,
41.97), cluster = c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
5L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 5L, 3L, 3L, 5L, 3L, 3L, 3L, 3L,
3L, 3L, 5L, 3L, 3L, 3L, 5L, 5L, 5L, 5L, 5L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 5L, 3L, 3L, 2L, 5L, 2L, 3L, 3L, 3L, 5L, 3L, 5L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 5L, 3L, 3L, 3L, 5L, 3L, 3L, 3L, 5L, 5L,
3L, 2L, 3L, 5L, 5L, 2L, 2L, 5L, 2L, 5L, 2L, 2L, 2L, 2L, 3L, 3L,
5L, 5L, 2L, 2L, 3L, 3L, 5L, 2L, 5L, 5L, 5L, 2L, 2L, 2L, 3L, 3L,
2L, 5L, 2L, 5L, 2L, 5L, 5L, 5L, 2L, 5L, 5L, 5L, 5L, 2L, 2L, 2L,
2L, 2L, 5L, 5L, 5L, 5L, 2L, 5L, 2L, 2L, 5L, 5L, 2L, 2L, 2L, 2L,
5L, 2L, 5L, 5L, 5L, 2L, 2L, 2L, 2L, 5L, 5L, 2L, 5L, 2L, 2L, 5L,
2L, 2L, 2L, 5L, 2L, 2L, 2L, 2L, 2L, 2L, 5L, 2L, 2L, 5L, 5L, 2L,
2L, 2L, 5L, 5L, 5L, 5L, 2L, 2L, 2L, 5L, 2L, 2L, 5L, 2L, 2L, 5L,
5L, 2L, 1L, 5L, 2L, 2L, 2L, 2L, 5L, 2L, 5L, 5L, 2L, 2L, 1L, 5L,
2L, 1L, 2L, 2L, 2L, 5L, 2L, 2L, 2L, 5L, 1L, 1L, 5L, 5L, 2L, 2L,
2L, 1L, 1L, 1L, 2L, 1L, 1L, 5L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L,
4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L)), .Names = c("x", "y", "cluster"), row.names = c(NA,
-400L), class = "data.frame")
You can use chull(Compute Convex Hull of a Set of Points).
find_hull <- function(df) df[chull(df$x, df$y), ]
hulls <- plyr::ddply(plot.data, "cluster", find_hull)
p2 <- ggmap(map) +
geom_point(data = plot.data, size = 1,
aes_string("x", "y", color = as.factor(plot.data$cluster)), alpha = 0.7) +
scale_shape(solid = T) +
geom_polygon(data = hulls, mapping = aes_string(x = "x",
y = "y",
colour = as.factor(hulls$cluster),
fill = as.factor(hulls$cluster)),
alpha = 0.3) +
geom_text(aes(label = "", x = -114, y = 41), size = plot.size-3) +
xlab(expression(paste("Longitude [", degree, "]"))) +
ylab(expression(paste("Latitude [", degree, "]"))) +
theme_bw() +
theme(plot.margin = unit(c(1,1,0,0), "lines"),
text = element_text(family = "Times"),
axis.title = element_text(size = plot.size),
axis.text = element_text(size = plot.size),
legend.position = "none")
p2

Changing labels R ggplot in two variable facet wrapped plot

I'm trying to create a facet wrapped ggplot boxplot with dataframe dataw and I'm trying to modify the labels of each subplot.
dataw <- structure(list(base = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L), .Label = c("A", "C", "G", "T"), class = "factor"), pos = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L), values = c(13, 22, 16, 21, 52, 1,
1.709, 2.121, 2.061, 2.233, 3.388, 1, 5, 6, 6, 2, 1, 0.856, 1.116,
1.207, 1.175, 0.95, 76, 45, 5, 1, 1, 15, 8.558, 5.44, 1.147,
0.857, 0.831, 10, 7, 40, 4, 10, 5, 1.547, 1.174, 4.777, 1.071,
1.356, 7, 0, 1, 6, 1, 8, 1.322, 0.728, 0.83, 1.178, 0.831, 4,
2, 0, 1, 3, 0, 1.098, 0.96, 0.63, 0.888, 1.013, 13, 22, 16, 21,
52, 1, 1.709, 2.121, 2.061, 2.233, 3.388, 3, 6, 7, 2, 9, 11,
0.952, 1.474, 1.45, 0.967, 1.306, 13, 22, 16, 21, 52, 1, 1.709,
2.121, 2.061, 2.233, 3.388, 3, 8, 15, 0, 5, 2, 1.014, 1.583,
2.289, 0.773, 1.135, 10, 3, 8, 1, 4, 2, 1.504, 1.03, 1.244, 0.884,
1.047, 4, 1, 0, 2, 5, 1, 1.066, 0.862, 0.689, 0.963, 1.125, 2,
0, 0, 2, 0, 1, 0.919, 0.723, 0.479, 0.922, 0.721, 7, 8, 0, 8,
7, 0, 1.299, 1.236, 0.779, 1.298, 1.224, 13, 22, 16, 21, 52,
1, 1.709, 2.121, 2.061, 2.233, 3.388, 45, 38, 41, 13, 34, 1,
2.817, 2.264, 2.398, 1.374, 3.848, 3, 0, 1, 1, 2, 14, 0.973,
0.641, 0.846, 0.866, 0.909, 13, 22, 16, 21, 52, 1, 1.709, 2.121,
2.061, 2.233, 3.388, 7, 0, 0, 1, 2, 1, 1.37, 0.436, 0.706, 0.685,
0.902, 0, 5, 5, 0, 7, 1, 0.597, 1.113, 1.079, 0.71, 1.222, 3,
1, 4, 0, 23, 8, 0.992, 0.84, 1.07, 0.762, 2.399, 17, 7, 18, 6,
10, 1, 2.4, 1.315, 1.948, 1.135, 1.306, 21, 8, 50, 4, 6, 12,
2.412, 1.254, 3.857, 1.075, 1.168, 13, 22, 16, 21, 52, 1, 1.709,
2.121, 2.061, 2.233, 3.388), type = structure(c(2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 1L), .Label = c("ipdRatio", "score"), class = "factor"),
labels = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L
), .Label = c("D<U+2192>", "G<U+2192>", "A<U+2192>", "K<U+2192>",
"C<U+2192>", "T<U+2192>"), class = "factor")), .Names = c("base",
"pos", "values", "type", "labels"), row.names = c("1", "2", "3",
"4", "5", "3942", "3943", "3944", "3945", "3946", "3947", "11",
"21", "31", "41", "51", "63", "64", "65", "66", "67", "68", "12",
"22", "32", "42", "52", "2953", "2954", "2955", "2956", "2957",
"2958", "13", "23", "33", "43", "53", "2461", "2462", "2463",
"2464", "2465", "2466", "14", "24", "34", "44", "54", "7493",
"7494", "7495", "7496", "7497", "7498", "111", "214", "311",
"411", "511", "4874", "4875", "4876", "4877", "4878", "4879",
"121", "221", "321", "421", "521", "9356", "9357", "9358", "9359",
"9360", "9361", "131", "231", "331", "431", "531", "9221", "9222",
"9223", "9224", "9225", "9226", "15", "25", "35", "45", "55",
"93561", "93571", "93581", "93591", "93601", "93611", "112",
"215", "312", "412", "512", "1579", "1580", "1581", "1582", "1583",
"1584", "122", "222", "322", "422", "522", "1782", "1783", "1784",
"1785", "1786", "1787", "132", "232", "332", "432", "532", "3398",
"3399", "3400", "3401", "3402", "3403", "16", "26", "36", "46",
"56", "2257", "2258", "2259", "2260", "2261", "2262", "113",
"216", "313", "413", "513", "1027", "1028", "1029", "1030", "1031",
"1032", "123", "223", "323", "423", "523", "8654", "8655", "8656",
"8657", "8658", "8659", "133", "233", "333", "433", "539", "702",
"703", "704", "705", "706", "707", "17", "27", "37", "47", "57",
"8123", "8124", "8125", "8126", "8127", "8128", "114", "217",
"314", "414", "514", "93562", "93572", "93582", "93592", "93602",
"93612", "124", "224", "324", "424", "524", "3700", "3701", "3702",
"3703", "3704", "3705", "134", "234", "334", "434", "5310", "8233",
"8234", "8235", "8236", "8237", "8238", "18", "28", "38", "48",
"58", "1542", "1543", "1544", "1545", "1546", "1547", "115",
"218", "315", "415", "515", "533", "534", "535", "536", "537",
"538", "125", "225", "325", "425", "525", "208", "209", "210",
"211", "212", "213", "135", "235", "335", "435", "5311", "93563",
"93573", "93583", "93593", "93603", "93613"), class = "data.frame")
These are the first few rows of dataw
head(dataw)
base pos values type labels
1 A 1 13 score D<U+2192>
2 A 1 22 score D<U+2192>
3 A 1 16 score D<U+2192>
4 A 1 21 score D<U+2192>
5 A 1 52 score D<U+2192>
3942 A 1 1 score D<U+2192>
I'm plotting it like so.
prettify <- theme(panel.background = element_rect(fill = NA,color="gray"),
panel.grid.major.y = element_blank(),
panel.grid.major.x = element_line(size=.1, color="black",linetype="dotted"),
panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_line(size=.1, color="black"),
legend.position="bottom")
ggplot(dataw,aes(x = base, y = values, color = type, group = base)) +
geom_boxplot() +
facet_wrap(type ~ pos, scales="free_y", nrow = 2) +
theme_gray() %+replace% prettify
Currently the sublabels are the type value followed by a comma and the pos value. However I would like to get rid of the type value, and label it so that the labels of each subplot are in the format: "Position [pos value], [labels value]"
What would be the best way to go about this? Thank you.
Try replacing the entire ggplot statement with
ggplot(data=transform(dataw, plt_labels = paste("Position ", pos, ", ", labels, sep="")),aes(x = base, y = values, color = type, group = base)) +
geom_boxplot() +
facet_grid(type ~ plt_labels, scales="free_y") +
theme_gray() %+replace% prettify
which should give

R ggplot2 Facet wrapping with four boxplots in each plot

I have a dataframe called dataw that I'm trying to plot into dual facet wrapped boxplots.
dataw <- structure(list(base = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L,
4L), .Label = c("A", "C", "G", "T"), class = "factor"), pos = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L), values = c(13, 22, 16, 1, 1.709,
2.121, 2.061, 1, 5, 6, 1, 0.856, 1.116, 1.207, 76, 45, 5, 15,
8.558, 5.44, 1.147, 10, 7, 40, 5, 1.547, 1.174, 4.777, 7, 0,
1, 8, 1.322, 0.728, 0.83, 4, 2, 0, 0, 1.098, 0.96, 0.63, 13,
22, 16, 1, 1.709, 2.121, 2.061, 3, 6, 7, 11, 0.952, 1.474, 1.45,
13, 22, 16, 1, 1.709, 2.121, 2.061, 3, 8, 15, 2, 1.014, 1.583,
2.289, 10, 3, 8, 2, 1.504, 1.03, 1.244, 4, 1, 0, 1, 1.066, 0.862,
0.689, 2, 0, 0, 1, 0.919, 0.723, 0.479, 7, 8, 0, 0, 1.299, 1.236,
0.779, 13, 22, 16, 1, 1.709, 2.121, 2.061, 45, 38, 41, 1, 2.817,
2.264, 2.398, 3, 0, 1, 14, 0.973, 0.641, 0.846, 13, 22, 16, 1,
1.709, 2.121, 2.061, 7, 0, 0, 1, 1.37, 0.436, 0.706, 0, 5, 5,
1, 0.597, 1.113, 1.079, 3, 1, 4, 8, 0.992, 0.84, 1.07, 17, 7,
18, 1, 2.4, 1.315, 1.948, 21, 8, 50, 12, 2.412, 1.254, 3.857,
13, 22, 16, 1, 1.709, 2.121, 2.061), type = structure(c(2L, 2L,
2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L), .Label = c("ipdRatio", "score"), class = "factor")), .Names = c("base",
"pos", "values", "type"), row.names = c("1", "2", "3", "3942",
"3943", "3944", "3945", "11", "21", "31", "63", "64", "65", "66",
"12", "22", "32", "2953", "2954", "2955", "2956", "13", "23",
"33", "2461", "2462", "2463", "2464", "14", "24", "34", "7493",
"7494", "7495", "7496", "111", "212", "311", "4874", "4875",
"4876", "4877", "121", "221", "321", "9356", "9357", "9358",
"9359", "131", "231", "331", "9221", "9222", "9223", "9224",
"15", "25", "35", "93561", "93571", "93581", "93591", "112",
"213", "312", "1579", "1580", "1581", "1582", "122", "222", "322",
"1782", "1783", "1784", "1785", "132", "232", "332", "3398",
"3399", "3400", "3401", "16", "26", "36", "2257", "2258", "2259",
"2260", "113", "214", "313", "1027", "1028", "1029", "1030",
"123", "223", "323", "8654", "8655", "8656", "8657", "133", "233",
"333", "702", "703", "704", "705", "17", "27", "37", "8123",
"8124", "8125", "8126", "114", "215", "314", "93562", "93572",
"93582", "93592", "124", "224", "324", "3700", "3701", "3702",
"3703", "134", "234", "334", "8233", "8234", "8235", "8236",
"18", "28", "38", "1542", "1543", "1544", "1545", "115", "216",
"315", "533", "534", "535", "536", "125", "225", "325", "208",
"209", "210", "211", "135", "235", "335", "93563", "93573", "93583",
"93593"), class = "data.frame")
I'm plotting it like this:
prettify <- theme(panel.background = element_rect(fill = NA,color="gray"),
panel.grid.major.y = element_blank(),
panel.grid.major.x = element_line(size=.1, color="black",linetype="dotted"),
panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_line(size=.1, color="black"),
legend.position="bottom")
ggplot(dataw,aes(x = base, y = values, color = type, group = type)) +
geom_boxplot() +
facet_wrap(type ~ pos, scales="free", nrow = 2) +
theme_gray() %+replace% prettify
But I keep getting only one boxplot in each plot square like so, when in fact I want 4 boxplots for each square:
Does anyone see what I am doing wrong here? Thanks!

Resources