Related
I am running a logistic regression model using complex survey data using the survey package in R. After fitting the model, I performed regression diagnostics using the car package. I noticed outlying and influential observations that I would like to remove and then refit the model to check for their effects on the regression coefficients but my current approach is not giving me want I expect.
My dataset has about 10,000 observations. Here is sample data and code I have tried using:
library(car); library(survey)
dat <- structure(list(id = c(1009918, 1012826, 1029625, 1000926, 1027525,
1000115, 1000201, 1000202, 1000214, 1000219, 1000313, 1000324,
1000510, 1000521, 1000624, 1000708, 1000811, 1000817, 1000818,
1000906, 1000922, 1001002, 1001005, 1001401, 1001411, 1001413,
1001420, 1001424, 1001501, 1001510, 1001518, 1001526, 1001621,
1001807, 1001922, 1001926, 1002106, 1002217, 1002406, 1002416,
1002618, 1002709, 1003004, 1003017, 1003103, 1003108, 1003304,
1003319, 1003723, 1003804, 1003811, 1003819, 1004014, 1008902,
1008913, 1009011, 1009022, 1009123, 1009212, 1009215), strata = c(1,
2, 6, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), cluster = c(785,
938, 2337, 28, 2122, 3, 6, 6, 6, 6, 10, 10, 16, 16, 19, 22, 24,
24, 24, 28, 28, 33, 33, 45, 45, 45, 45, 45, 50, 50, 50, 50, 53,
60, 63, 63, 69, 74, 96, 96, 100, 102, 111, 111, 115, 115, 122,
122, 178, 193, 193, 193, 210, 755, 755, 759, 759, 762, 765, 765
), weights = c(621.921704979739, 5440.9107594311, 8450.49341643626,
2457.37241774248, 7174.79930450487, 930.492019594546, 443.253676607562,
443.253676607562, 886.507353215123, 443.253676607562, 1552.30979801343,
517.436599337811, 403.146111343943, 806.292222687886, 439.775494378883,
839.561001668328, 1210.77101540146, 403.590338467152, 403.590338467152,
457.23211170669, 914.464223413381, 584.557580338056, 584.557580338056,
233.135312658304, 233.135312658304, 233.135312658304, 466.270625316608,
233.135312658304, 287.94933168791, 287.94933168791, 287.94933168791,
287.94933168791, 2354.32022397843, 213.628591090648, 300.596873749779,
300.596873749779, 1121.27419052962, 528.482361549292, 1936.60489456861,
1291.06992971241, 282.360930726457, 3526.73915258957, 337.531162185852,
337.531162185852, 2183.63202546241, 2729.54003182802, 1035.32340123929,
1552.98510185893, 1400.62601417017, 717.92144006312, 358.96072003156,
1435.84288012624, 275.058410167952, 557.874242565598, 278.937121282799,
1687.48015279064, 1012.48809167438, 424.663883556537, 227.805527040477,
227.805527040477), age = c(20, 19, 93, 24, 18, 23, 22, 23, 24,
19, 18, 24, 20, 19, 18, 17, 19, 23, 19, 19, 21, 22, 21, 20, 23,
24, 24, 19, 21, 22, 20, 23, 21, 23, 20, 22, 23, 15, 20, 23, 24,
18, 24, 24, 15, 21, 24, 16, 22, 20, 20, 18, 21, 20, 21, 21, 24,
22, 24, 18), gender = structure(c(1L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), levels = c("Male", "Female"), class = "factor"),
educ = structure(c(4L, 2L, 1L, 3L, 2L, 3L, 2L, 2L, 2L, 2L,
3L, 2L, 3L, 2L, 3L, 2L, 2L, 3L, 3L, 3L, 3L, 2L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 2L, 4L, 2L, 3L, 2L,
4L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 3L, 2L,
4L, 4L, 4L, 3L, 3L), levels = c("No formal education", "Primary",
"Secondary", "Tertiary"), class = "factor"), employ = structure(c(4L,
3L, 4L, 2L, 3L, 3L, 3L, 3L, 2L, 3L, 4L, 3L, 2L, 2L, 2L, 4L,
3L, 3L, 3L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 2L, 4L, 4L, 4L, 4L,
4L, 4L, 2L, 3L, 3L, 1L, 3L, 2L, 3L, 3L, 4L, 3L, 2L, 4L, 4L,
2L, 3L, 1L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 1L, 4L, 3L, 4L), levels = c("Unemployed",
"Employed", "Self-employed", "Other"), class = "factor"),
know = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L), levels = c("No/Don't know", "Yes"), class = "factor"),
status = structure(c(2L, 4L, 2L, 1L, 5L, 3L, 2L, 2L, 2L,
4L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 5L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 5L, 4L, 2L, 5L, 5L, 4L, 3L,
2L, 3L, 2L, 2L, 2L, 2L, 4L, 3L, 3L, 4L, 3L, 2L, 2L, 3L, 1L,
2L, 1L, 1L, 2L, 2L, 2L), levels = c("1", "2", "3", "4", "5"
), class = "factor"), smoker = structure(c(2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), levels = c("no",
"yes"), class = "factor")), row.names = c(81L, 4174L, 6722L,
1255L, 2712L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L,
26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L,
39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L,
52L, 53L, 54L, 55L), class = "data.frame")
## Create survey design object
dat_svy <- survey::svydesign(ids = ~cluster, strat = ~strata, weights = ~weights, data = dat, nest = TRUE)
options(survey.lonely.psu = "adjust")
## fit logistic regression model
mod <- survey::svyglm(formula = smoker ~ age + educ + gender + employ + educ + know + status, design = dat_svy, family = "quasibinomial")
I have tried the following:
update(mod, subset = !(rownames(dat_svy) %in% c(2, 5, 9, 13, 21))) # returns an error
update(mod, subset = -c(2, 5, 9, 13, 21)) # only removes one (first specified) observation
I have the following dataset:
data <- structure(list(Year = structure(c(1L, 2L, 1L, 2L, 2L, 1L, 3L, 1L, 3L, 1L, 2L, 2L, 3L, 2L, 3L,
1L, 3L, 2L, 2L, 2L, 3L, 1L, 2L, 3L, 3L, 3L, 2L, 1L, 3L, 1L,
1L, 2L, 1L, 2L, 3L, 2L, 2L, 1L, 1L, 3L, 3L, 3L, 3L, 1L, 3L,
3L, 2L, 3L, 2L, 1L, 1L, 2L, 2L, 1L),
.Label = c("2013", "2014", "2015"),
class = "factor"),
Place = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L,
2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L,
2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L),
.Label = c("Inside", "Outside"),
class = "factor"),
Variable = structure(c(6L, 4L, 8L, 13L, 16L, 11L, 12L, 13L, 4L, 10L, 10L, 11L,
1L, 3L, 13L, 7L, 11L, 7L, 6L, 2L, 6L, 1L, 1L, 7L, 5L,
3L, 14L, 3L, 14L, 2L, 9L, 6L, 6L, 9L, 2L, 5L, 9L, 5L,
9L, 9L, 15L, 1L, 13L, 3L, 6L, 3L, 3L, 9L, 15L, 1L, 13L,
1L, 13L, 15L),
.Label = c("X1", "X2", "X3", "X4", "X5", "X6", "X7", "X8",
"Y1", "Y2", "Y3", "Y4", "Y5", "Y6", "Y7", "Y8"),
class = "factor"),
Group = structure(c(2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L,
2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L,
2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L),
.Label = c("Var1", "Var2"),
class = "factor"),
Percent = c(0.2, 0.3, 0.4, 0.5, 0.5, 0.6, 0.7, 0.7, 1.3, 1.6, 1.9, 1.9, 2.3, 2.7,
2.9, 3.4, 3.7, 4.5, 4.7, 5.3, 5.7, 6.2, 7.6, 7.9, 10.6, 10.7, 12.5,
13.3, 14.4, 15.4, 15.8, 16.9, 17.7, 19.6, 20.5, 24.8, 25.3, 30.4, 31,
36.8, 41.6, 43.9, 43.9, 44.2, 45.4, 51.8, 52.8, 56.1, 57.4, 68.9, 68.9,
80.4, 80.4, 81.5)),
class = "data.frame", row.names = c(NA, -54L))
I would really like to display the data in a a multilevel like this:
I tried it by doing:
library(ggplot2)
ggplot(data, aes(x = Group, y = Percent, fill = Variable)) +
geom_bar(stat = "identity", position = "fill") +
facet_grid(Year ~ Place) +
geom_text(aes(label= paste(Percent, "%", Variable)) ,
position = position_fill(0.9), size = 3)+
coord_polar(theta = "y")
But because some percentages are very low, the layers overlap. I would like to either place the labels outside like the example if that's possible.
I have looked at the other forum topics, but because my data is structured in a different way I wasn't able to translate that for me. The other problem is, is that this is just an example but my data input is actually dependent on he Shiny input. So fixing specific angles for this example also doesn't work.
I would be very grateful if anyone could help me.
I have been struggling with ggplot to display these plots how I would like. My data have 2 factors, quarter and species. Station will be on the x-axis, value on the y-axis, and the constituent will be used with the facet_wrap. I want quarter differentiated with shapes, and species with colors.
The issue is I'm trying to replicate a figure done in SigmaPlot. It is 4x4 grid of plots, with the first two rows of the first column are empty, to allow for the placement of the legend. My original plan was to have two separate facets made using facet-wrap, and combine those, however, this doesn't maintain the 4x4 arrangement, it transforms it into a 1x2, which ruins alignment of plots and shrinks the larger faceted grid.
My next thought was to create each plot individually, then arrange them in a grid using cowplot. This presents the plots how I'd like them arranged, but I can't figure out how to have two y-axis labels, due to different units. One label would be centered on the two leftmost plots, and one centered on the left of the next column of 4 plots.
I'm trying to use this code (just copy the example data below, and run):
library(ggplot)
library(gridExtra)
test.data1 <- test.data[1:95, ]
test.data2 <- test.data[96:111, ]
testplot1 <- ggplot(test.data1, aes(Station, value)) +
geom_point(aes(shape = factor(quarter), fill = Species)) +
scale_shape_manual(values = c(21, 22)) +
labs(x = "Station", y = "Unit a", shape = "Sampling Quarter", fill = "Species") +
theme(legend.position = "none", legend.title = element_blank()) +
guides(fill = guide_legend(override.aes = list(shape = 21), nrow = 2, byrow = TRUE), shape = guide_legend(nrow = 2, byrow = TRUE)) +
facet_wrap( ~ constituent, ncol = 3, scales = "free_y")
testplot2 <- ggplot(test.data2, aes(Station, value)) +
geom_point(aes(shape = factor(quarter), fill = Species))
scale_shape_manual(values = c(21, 22)) +
labs(x = "Station", y = "Unit b", shape = "Sampling Quarter", fill = "Species") +
theme(legend.position = "top", legend.title = element_blank()) +
guides(fill = guide_legend(override.aes = list(shape = 21), nrow = 2, byrow = TRUE), shape = guide_legend(nrow = 2, byrow = TRUE)) +
facet_wrap( ~ constituent, ncol = 1, scales = "free_y")
grid.arrange(testplot2, testplot1, ncol = 2)
Which generates this:
But I want it to be arranged like this, where the XX and YY plots from above are normalized in size with the other plots (this was done using individual plots, and using plot_grid):
Example data from a larger set:
test.data <- structure(list(Station = structure(c(1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("StA", "StB"), class = "factor"),
CollectionDate = structure(c(3L, 2L, 3L, 1L, 3L, 1L, 3L,
1L, 3L, 2L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 2L, 3L, 1L, 3L, 1L,
3L, 1L, 3L, 2L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 2L, 3L, 1L, 3L,
1L, 3L, 1L, 3L, 2L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 2L, 3L, 1L,
3L, 1L, 3L, 1L, 3L, 2L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 2L, 3L,
1L, 3L, 1L, 3L, 1L, 3L, 2L, 3L, 1L, 3L, 1L, 1L, 3L, 2L, 3L,
1L, 3L, 1L, 3L, 1L, 3L, 2L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 2L,
3L, 1L, 3L, 1L, 3L, 1L, 3L, 2L, 3L, 1L, 3L, 1L, 3L, 1L), .Label = c("10/1/2017",
"10/16/2017", "4/1/2017"), class = "factor"), Species = structure(c(1L,
2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L,
1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L,
3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L,
2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L,
2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L,
1L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L,
1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L,
3L, 1L, 2L, 2L, 3L), .Label = c("SpA", "SpB", "SpC"), class = "factor"),
quarter = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("2017 Q2",
"2017 Q4"), class = "factor"), constituent = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L,
13L, 13L, 13L, 13L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L
), .Label = c("A", "B", "C", "D", "E", "F", "G", "H", "I",
"J", "K", "L", "XX", "YY"), class = "factor"), value = c(16,
35, 46, 23, 40, 19, 9, 50, 0.2, 1, 0.5698, 0.322, 1, 0.45,
0.322, 0.5, 16, 9, 6, 19, 14, 13, 16, 9, 0, 0.004, 0, 0.004,
1, 0.32, 1, 0.678, 0, 0.39, 0.23, 0, 0, 1.1, 0.5, 0.5, 9,
4.9, 7, 4.768, 9, 8.65, 4.768, 6.54, 195, 195, 46, 46, 124,
124, 218, 218, 2, 1, 1, 1, 1, 2, 1, 1, 0.1, 0.4, 0.22, 0.4,
0.22, 0.4, 0.22, 0.1, 0.99, 0.99, 1.2, 0.45, 0.765, 0.99,
0.99, 0.99, 0.99, 1.2, 4.3, 0.98, 0.99, 1.2, 1.2, 34, 34,
65, 98, 150, 34, 65, 65, 2, 0, 4, 1.3, 5, 3.3, 1.56, 1, 9,
0.36, 4, 4, 11, 2, 2.22, 11)), class = "data.frame", row.names = c(NA,
-111L))
I'm trying to create a facet wrapped ggplot boxplot with dataframe dataw and I'm trying to modify the labels of each subplot.
dataw <- structure(list(base = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L), .Label = c("A", "C", "G", "T"), class = "factor"), pos = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L), values = c(13, 22, 16, 21, 52, 1,
1.709, 2.121, 2.061, 2.233, 3.388, 1, 5, 6, 6, 2, 1, 0.856, 1.116,
1.207, 1.175, 0.95, 76, 45, 5, 1, 1, 15, 8.558, 5.44, 1.147,
0.857, 0.831, 10, 7, 40, 4, 10, 5, 1.547, 1.174, 4.777, 1.071,
1.356, 7, 0, 1, 6, 1, 8, 1.322, 0.728, 0.83, 1.178, 0.831, 4,
2, 0, 1, 3, 0, 1.098, 0.96, 0.63, 0.888, 1.013, 13, 22, 16, 21,
52, 1, 1.709, 2.121, 2.061, 2.233, 3.388, 3, 6, 7, 2, 9, 11,
0.952, 1.474, 1.45, 0.967, 1.306, 13, 22, 16, 21, 52, 1, 1.709,
2.121, 2.061, 2.233, 3.388, 3, 8, 15, 0, 5, 2, 1.014, 1.583,
2.289, 0.773, 1.135, 10, 3, 8, 1, 4, 2, 1.504, 1.03, 1.244, 0.884,
1.047, 4, 1, 0, 2, 5, 1, 1.066, 0.862, 0.689, 0.963, 1.125, 2,
0, 0, 2, 0, 1, 0.919, 0.723, 0.479, 0.922, 0.721, 7, 8, 0, 8,
7, 0, 1.299, 1.236, 0.779, 1.298, 1.224, 13, 22, 16, 21, 52,
1, 1.709, 2.121, 2.061, 2.233, 3.388, 45, 38, 41, 13, 34, 1,
2.817, 2.264, 2.398, 1.374, 3.848, 3, 0, 1, 1, 2, 14, 0.973,
0.641, 0.846, 0.866, 0.909, 13, 22, 16, 21, 52, 1, 1.709, 2.121,
2.061, 2.233, 3.388, 7, 0, 0, 1, 2, 1, 1.37, 0.436, 0.706, 0.685,
0.902, 0, 5, 5, 0, 7, 1, 0.597, 1.113, 1.079, 0.71, 1.222, 3,
1, 4, 0, 23, 8, 0.992, 0.84, 1.07, 0.762, 2.399, 17, 7, 18, 6,
10, 1, 2.4, 1.315, 1.948, 1.135, 1.306, 21, 8, 50, 4, 6, 12,
2.412, 1.254, 3.857, 1.075, 1.168, 13, 22, 16, 21, 52, 1, 1.709,
2.121, 2.061, 2.233, 3.388), type = structure(c(2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 1L), .Label = c("ipdRatio", "score"), class = "factor"),
labels = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L
), .Label = c("D<U+2192>", "G<U+2192>", "A<U+2192>", "K<U+2192>",
"C<U+2192>", "T<U+2192>"), class = "factor")), .Names = c("base",
"pos", "values", "type", "labels"), row.names = c("1", "2", "3",
"4", "5", "3942", "3943", "3944", "3945", "3946", "3947", "11",
"21", "31", "41", "51", "63", "64", "65", "66", "67", "68", "12",
"22", "32", "42", "52", "2953", "2954", "2955", "2956", "2957",
"2958", "13", "23", "33", "43", "53", "2461", "2462", "2463",
"2464", "2465", "2466", "14", "24", "34", "44", "54", "7493",
"7494", "7495", "7496", "7497", "7498", "111", "214", "311",
"411", "511", "4874", "4875", "4876", "4877", "4878", "4879",
"121", "221", "321", "421", "521", "9356", "9357", "9358", "9359",
"9360", "9361", "131", "231", "331", "431", "531", "9221", "9222",
"9223", "9224", "9225", "9226", "15", "25", "35", "45", "55",
"93561", "93571", "93581", "93591", "93601", "93611", "112",
"215", "312", "412", "512", "1579", "1580", "1581", "1582", "1583",
"1584", "122", "222", "322", "422", "522", "1782", "1783", "1784",
"1785", "1786", "1787", "132", "232", "332", "432", "532", "3398",
"3399", "3400", "3401", "3402", "3403", "16", "26", "36", "46",
"56", "2257", "2258", "2259", "2260", "2261", "2262", "113",
"216", "313", "413", "513", "1027", "1028", "1029", "1030", "1031",
"1032", "123", "223", "323", "423", "523", "8654", "8655", "8656",
"8657", "8658", "8659", "133", "233", "333", "433", "539", "702",
"703", "704", "705", "706", "707", "17", "27", "37", "47", "57",
"8123", "8124", "8125", "8126", "8127", "8128", "114", "217",
"314", "414", "514", "93562", "93572", "93582", "93592", "93602",
"93612", "124", "224", "324", "424", "524", "3700", "3701", "3702",
"3703", "3704", "3705", "134", "234", "334", "434", "5310", "8233",
"8234", "8235", "8236", "8237", "8238", "18", "28", "38", "48",
"58", "1542", "1543", "1544", "1545", "1546", "1547", "115",
"218", "315", "415", "515", "533", "534", "535", "536", "537",
"538", "125", "225", "325", "425", "525", "208", "209", "210",
"211", "212", "213", "135", "235", "335", "435", "5311", "93563",
"93573", "93583", "93593", "93603", "93613"), class = "data.frame")
These are the first few rows of dataw
head(dataw)
base pos values type labels
1 A 1 13 score D<U+2192>
2 A 1 22 score D<U+2192>
3 A 1 16 score D<U+2192>
4 A 1 21 score D<U+2192>
5 A 1 52 score D<U+2192>
3942 A 1 1 score D<U+2192>
I'm plotting it like so.
prettify <- theme(panel.background = element_rect(fill = NA,color="gray"),
panel.grid.major.y = element_blank(),
panel.grid.major.x = element_line(size=.1, color="black",linetype="dotted"),
panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_line(size=.1, color="black"),
legend.position="bottom")
ggplot(dataw,aes(x = base, y = values, color = type, group = base)) +
geom_boxplot() +
facet_wrap(type ~ pos, scales="free_y", nrow = 2) +
theme_gray() %+replace% prettify
Currently the sublabels are the type value followed by a comma and the pos value. However I would like to get rid of the type value, and label it so that the labels of each subplot are in the format: "Position [pos value], [labels value]"
What would be the best way to go about this? Thank you.
Try replacing the entire ggplot statement with
ggplot(data=transform(dataw, plt_labels = paste("Position ", pos, ", ", labels, sep="")),aes(x = base, y = values, color = type, group = base)) +
geom_boxplot() +
facet_grid(type ~ plt_labels, scales="free_y") +
theme_gray() %+replace% prettify
which should give
I have a dataframe called dataw that I'm trying to plot into dual facet wrapped boxplots.
dataw <- structure(list(base = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L,
4L), .Label = c("A", "C", "G", "T"), class = "factor"), pos = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L), values = c(13, 22, 16, 1, 1.709,
2.121, 2.061, 1, 5, 6, 1, 0.856, 1.116, 1.207, 76, 45, 5, 15,
8.558, 5.44, 1.147, 10, 7, 40, 5, 1.547, 1.174, 4.777, 7, 0,
1, 8, 1.322, 0.728, 0.83, 4, 2, 0, 0, 1.098, 0.96, 0.63, 13,
22, 16, 1, 1.709, 2.121, 2.061, 3, 6, 7, 11, 0.952, 1.474, 1.45,
13, 22, 16, 1, 1.709, 2.121, 2.061, 3, 8, 15, 2, 1.014, 1.583,
2.289, 10, 3, 8, 2, 1.504, 1.03, 1.244, 4, 1, 0, 1, 1.066, 0.862,
0.689, 2, 0, 0, 1, 0.919, 0.723, 0.479, 7, 8, 0, 0, 1.299, 1.236,
0.779, 13, 22, 16, 1, 1.709, 2.121, 2.061, 45, 38, 41, 1, 2.817,
2.264, 2.398, 3, 0, 1, 14, 0.973, 0.641, 0.846, 13, 22, 16, 1,
1.709, 2.121, 2.061, 7, 0, 0, 1, 1.37, 0.436, 0.706, 0, 5, 5,
1, 0.597, 1.113, 1.079, 3, 1, 4, 8, 0.992, 0.84, 1.07, 17, 7,
18, 1, 2.4, 1.315, 1.948, 21, 8, 50, 12, 2.412, 1.254, 3.857,
13, 22, 16, 1, 1.709, 2.121, 2.061), type = structure(c(2L, 2L,
2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L), .Label = c("ipdRatio", "score"), class = "factor")), .Names = c("base",
"pos", "values", "type"), row.names = c("1", "2", "3", "3942",
"3943", "3944", "3945", "11", "21", "31", "63", "64", "65", "66",
"12", "22", "32", "2953", "2954", "2955", "2956", "13", "23",
"33", "2461", "2462", "2463", "2464", "14", "24", "34", "7493",
"7494", "7495", "7496", "111", "212", "311", "4874", "4875",
"4876", "4877", "121", "221", "321", "9356", "9357", "9358",
"9359", "131", "231", "331", "9221", "9222", "9223", "9224",
"15", "25", "35", "93561", "93571", "93581", "93591", "112",
"213", "312", "1579", "1580", "1581", "1582", "122", "222", "322",
"1782", "1783", "1784", "1785", "132", "232", "332", "3398",
"3399", "3400", "3401", "16", "26", "36", "2257", "2258", "2259",
"2260", "113", "214", "313", "1027", "1028", "1029", "1030",
"123", "223", "323", "8654", "8655", "8656", "8657", "133", "233",
"333", "702", "703", "704", "705", "17", "27", "37", "8123",
"8124", "8125", "8126", "114", "215", "314", "93562", "93572",
"93582", "93592", "124", "224", "324", "3700", "3701", "3702",
"3703", "134", "234", "334", "8233", "8234", "8235", "8236",
"18", "28", "38", "1542", "1543", "1544", "1545", "115", "216",
"315", "533", "534", "535", "536", "125", "225", "325", "208",
"209", "210", "211", "135", "235", "335", "93563", "93573", "93583",
"93593"), class = "data.frame")
I'm plotting it like this:
prettify <- theme(panel.background = element_rect(fill = NA,color="gray"),
panel.grid.major.y = element_blank(),
panel.grid.major.x = element_line(size=.1, color="black",linetype="dotted"),
panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_line(size=.1, color="black"),
legend.position="bottom")
ggplot(dataw,aes(x = base, y = values, color = type, group = type)) +
geom_boxplot() +
facet_wrap(type ~ pos, scales="free", nrow = 2) +
theme_gray() %+replace% prettify
But I keep getting only one boxplot in each plot square like so, when in fact I want 4 boxplots for each square:
Does anyone see what I am doing wrong here? Thanks!