How to include Pairwise wilcox test in tbl_summary()? - r

I really cannot find anything usefull online for this problem. I've got dataset where one variable has been measured on 4 different occasion (CRP1, CRP4, CRP7, CRP10) and I've run pairwise Wilcoxon test to compare CRP drop trend between 2 groups. I've also made ggplot to show the significant p values.
Now I want to present my p values through the tbl_summary() function, but it seems that I cannot accomplish that. Wilcoxon test that I've run to get my p values was performed on the long format of my dataset.
structure(list(LEK = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L), levels = c("Lek +", "Lek -"), class = "factor", label = "Terapija"),
CRP1 = c(103.9, 155.6, 102.2, 89.2, 32.3, 258.8, 58.5, 196.7,
89.3, 175, 170.9, 204.3, 82.2, 196.9, 220.4, 92, 37.1, 34,
223.6, 261.5, 82, 37.4, 112, 81.8, 47.5, 70.1, 137.2, 84.7,
151.9, 159.8, 149.7, 140, 37.9, 143, 130.6, 110.7, 112.9,
48.1, 115.5, 43, 102.1, 35.9, 120.3, 40.9, 169.9, 105.6,
90, 139.6, 246.4, 146.9, 13.9, 60.9, 123.1, 187.3, 23.6,
112.9, 17.5, 9.9, 120.4, 103.7, 12.4, 96.7, 144.9, 54.1,
186.6, 143.6, 30.6, 41.8, 146.4, 94.9, 144.2, 98.5, 63.3,
137.1, 81.1, 14.1, 117.3, 55.4, 92.7, 40.3, 189.7, 77.2,
36.7, 73.7, 19.8, 39.1, 119, 60.6, 110.6, 63.2, 135.1, 131.6,
206.9, 117.1, 92.6, 123.3, 297, 153.3, 210.4, 116.1, 59.3,
177.3, 37.1, 101.2, 87.8, 138.6, 88.9, 95.6, 71.6, 81.1,
394.3, 4.8, 36.4, 229.3, 108.4, 404.1, 259.5, 292.9, 134.1,
127.5, 33.2, 29.2, 346.3, 116, 302), CRP4 = c(21, 74.7, 26.3,
48.1, 23.7, 86, 15.1, 33.7, 29.3, 16.9, 115, 79.5, 48, 58.5,
332.2, 153, 28.2, 11.6, 94.4, 50.2, 85.8, 48.7, 25, 14.3,
150.2, 145.5, 64.2, 28, 143.9, 57.6, 11, 132.9, 96.9, 44.4,
200.2, 45.2, 7.4, 95, 38.1, 12.5, 29.2, 6.8, 104.5, 15.3,
32.9, 26.5, 25, 49.7, 142.5, 37.5, 6.4, 32.1, 44.3, 70.9,
22.2, 72.2, 40.2, 3.5, 9.3, 72.5, 36, 17.9, 161.8, 18.5,
48.2, 198.7, 52.8, 77.6, 93.7, 162.7, 45.6, 206.4, 269.9,
21.1, 14.1, 22, 97.3, 52.2, 61, 34.8, 45.9, 43.6, 16.4, 203.6,
35.5, 28.1, 87.7, 23.2, 35.8, 44.2, 104.4, 83.7, 49.2, 23.5,
21.7, 118.6, 78.8, 101.6, 162.5, 23.9, 21.6, 109.3, 62.8,
146.1, 84.6, 57.6, 225.3, 143.4, 104.1, 29.7, 319.5, 104.5,
110.2, 120.3, 99.7, 172.1, 293.3, 262.9, 190.2, 82.4, 129.1,
5.5, 75.2, 36.8, 69.2), CRP7 = c(2.8, 110.8, 63.4, 51.3,
20.8, 27.8, 2.2, 194.6, 24.2, 8.3, 70.7, 93.3, 6.4, 38.3,
188.3, 75.1, 49.4, 5, 107.2, 37.8, 246.3, 26.4, 4.2, 4.3,
28.2, 22.4, 9.1, 195.9, 150.7, 67.4, 8.6, 283.6, 63.1, 100.9,
82.7, 9.9, 7.6, 207, 6.7, 9.2, 245.8, 42.5, 179.8, 12.3,
4.2, 6, 8.8, 5.9, 28.8, 27, 3.5, 24.8, 14.4, 55.5, 3.9, 106.7,
49.8, 11.1, 3.77, 68, 52.4, 32.7, 223.6, 12.3, 117.7, 66.1,
184.5, 29.3, 174.7, 119.3, 80.2, 87.9, 135.6, 22.8, 12.2,
82.7, 9.1, 32.3, 21.3, 82, 12.1, 37.8, 48.2, 56.6, 6.5, 37,
112.9, 11, 142.8, 18.4, 71.5, 91.1, 8.9, 7, 166.7, 55.4,
123.8, 46.8, 64.5, 5, 10.5, 201.7, 188.5, 198.7, 271.8, 276,
181.8, 190.2, 164.6, 65.1, 322.8, 61.9, 195.7, 225.5, 66.6,
119.4, 268.5, 350.3, 223.2, 161, 34.3, 22.4, 243.8, 62, 39.8
), CRP10 = c(NA, NA, NA, 184.3, 4.4, 7.7, NA, NA, 1.2, 1,
1, 12.3, 2.5, 62.2, 43.4, 57.7, 100.3, 15.6, 4.2, 11.5, NA,
8.3, 1, 1.3, 11.9, 63, NA, 71.4, 60.3, 54.6, 6.7, 313.8,
37, NA, 123.7, 2.5, 2.2, NA, 252.4, 9.7, NA, 82.2, 230.8,
5.8, 1, NA, 3.9, 1, 6.9, 34.7, 2.6, NA, 15.2, 6.4, 6.1, NA,
214.7, NA, 22.5, 86.5, 13.9, 41, 246.5, 9, 26.5, 270, 270.8,
7.6, 65.7, 90.5, 202.3, 288.6, 464, 92.1, 19.7, 307.5, 10.6,
71.2, 80.6, 159.8, 4.1, 103.7, 80.6, 324, 9.5, 6.3, 9.6,
4.1, 151.5, 20.3, 63.6, 311, 2.8, 52.8, 62.3, 13.5, 248,
72.2, 83.5, 13.8, 37.8, 179, 72.4, 206.4, 76.4, 210.6, 69.5,
87.9, 303.3, 59.1, 174.5, 211, 211.2, 240.8, 38.6, 109.6,
251.7, 328.9, 87.1, 113.5, 48.9, 16.4, 277, 25.7, 122)), row.names = c(NA,
-125L), class = c("tbl_df", "tbl", "data.frame"))
LongCRP <- Statistika %>%
select(LEK, CRP1, CRP4, CRP7, CRP10) %>%
filter(complete.cases(.)) %>%
gather("Vreme", "CRP", CRP1, CRP4, CRP7, CRP10) %>%
mutate(Vreme = factor(Vreme, levels = c("CRP1", "CRP4", "CRP7", "CRP10"))) #Long format
pairwise.wilcox.CRP <- LongCRP %>%
group_by(LEK) %>%
pairwise_wilcox_test(
CRP ~ Vreme
)
pairwise.wilcox.CRP <- pairwise.wilcox.CRP %>% add_xy_position(x = "LEK")
ggplot(LongCRP, aes(x = LEK, y = CRP, color = Vreme)) +
geom_boxplot() +
stat_pvalue_manual(pairwise.wilcox.CRP, label = "p.adj.signif",
step.increase = 0.03,
bracket.nudge.y = 20) +
theme_minimal() +
ggtitle("Trend opadanja vrednosti CRP-a kod lečenih i nelečenih bolesnika") +
labs(x = "Terapija") +
scale_color_discrete(name = "Vreme\nmerenja CRP-a")
GGplot
Now I know that I can extract p values from my ggplot graph, but I would really want to use tbl_summary() for this.
Thank you for the help!

Related

Compute multiple Wilcoxon Signed Rank tests by group in R

I'm working with (un-paired/independent) environmental data collected over 2 consecutive months that I'd like to compare for each calendar year (CYR). I have many years and several months of data so running each test one by one is too tedious. I found a useful piece of code for running multiple Kruskal-Wallis tests, but given that the Wilcoxon only compares 2 groups at once and my groups (Month or Month2) change slightly per year (depending on when data were collected) this code won't work - that I know of. Thanks in advance!
# Kruskal-Wallis code (hoping for something like this using wilcoxon test instead):
by(dry_season, dry_season$CYR, function(z) kruskal.test(temp ~ Month2, data = z))
# With these settings (March and April are just examples from my data):
wilcox.test(March, April, mu=0, alt="two.sided", paired=F, conf.int=T, conf.level=0.8, exact = F, correct = F)
# Data:
> dput(dry_season)
structure(list(use_for_analysis = structure(c(3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L,
3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L,
3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 1L,
1L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 1L, 1L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 1L,
3L, 3L, 1L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L,
1L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
1L, 1L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 1L, 3L, 1L, 3L, 3L, 3L, 3L, 1L, 3L, 1L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 1L, 3L, 3L, 1L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
1L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
1L, 3L, 3L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 1L, 1L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 1L,
3L, 3L, 3L, 1L, 3L, 3L, 1L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 1L, 3L,
3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), levels = c("Pre_SAV", "Pre_storm", "Standard"
), class = "factor"), CYR = structure(c(9L, 9L, 9L, 9L, 12L,
12L, 9L, 9L, 9L, 9L, 12L, 7L, 6L, 12L, 6L, 6L, 12L, 12L, 2L,
9L, 9L, 9L, 2L, 9L, 7L, 5L, 6L, 6L, 7L, 9L, 6L, 12L, 12L, 12L,
12L, 2L, 9L, 2L, 9L, 9L, 9L, 12L, 5L, 7L, 2L, 9L, 12L, 6L, 5L,
6L, 6L, 7L, 6L, 5L, 12L, 12L, 2L, 9L, 12L, 7L, 9L, 9L, 7L, 2L,
5L, 5L, 12L, 2L, 2L, 9L, 12L, 2L, 5L, 7L, 6L, 9L, 6L, 7L, 12L,
5L, 7L, 6L, 6L, 6L, 12L, 9L, 12L, 6L, 2L, 2L, 5L, 9L, 2L, 9L,
5L, 12L, 6L, 9L, 12L, 2L, 12L, 7L, 2L, 5L, 7L, 2L, 6L, 9L, 7L,
6L, 6L, 5L, 6L, 2L, 9L, 6L, 2L, 9L, 12L, 2L, 6L, 7L, 9L, 12L,
7L, 12L, 9L, 12L, 5L, 5L, 12L, 6L, 2L, 2L, 7L, 7L, 6L, 2L, 9L,
7L, 5L, 6L, 2L, 6L, 5L, 6L, 12L, 12L, 9L, 5L, 9L, 2L, 7L, 2L,
5L, 7L, 9L, 6L, 2L, 7L, 2L, 5L, 12L, 6L, 7L, 7L, 6L, 7L, 2L,
6L, 6L, 5L, 5L, 12L, 12L, 6L, 7L, 9L, 5L, 9L, 12L, 2L, 9L, 6L,
2L, 7L, 12L, 2L, 7L, 6L, 9L, 6L, 7L, 5L, 5L, 5L, 2L, 7L, 6L,
5L, 7L, 7L, 2L, 9L, 7L, 12L, 12L, 2L, 12L, 6L, 9L, 12L, 6L, 5L,
6L, 9L, 5L, 9L, 2L, 5L, 7L, 7L, 9L, 7L, 7L, 5L, 7L, 5L, 2L, 6L,
12L, 2L, 2L, 6L, 12L, 7L, 5L, 5L, 9L, 9L, 12L, 5L, 7L, 6L, 5L,
5L, 6L, 5L, 7L, 2L, 2L, 7L, 12L, 12L, 2L, 12L, 5L, 5L, 6L, 2L,
5L, 7L, 7L, 2L, 5L, 6L, 2L, 5L, 2L, 7L, 7L, 12L, 5L, 5L, 2L,
5L, 12L, 5L, 7L, 5L, 7L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), levels = c("2005", "2006", "2007",
"2008", "2014", "2015", "2016", "2017", "2018", "2019", "2021",
"2022"), class = "factor"), Season = c("DRY", "DRY", "DRY", "DRY",
"DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY",
"DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY",
"DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY",
"DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY",
"DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY",
"DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY",
"DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY",
"DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY",
"DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY",
"DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY",
"DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY",
"DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY",
"DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY",
"DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY",
"DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY",
"DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY",
"DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY",
"DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY",
"DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY",
"DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY",
"DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY",
"DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY",
"DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY",
"DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY",
"DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY",
"DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY",
"DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY",
"DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY",
"DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY",
"DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY",
"DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY",
"DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY",
"DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY",
"DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY",
"DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY",
"DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY", "DRY",
"DRY"), Month = c(3, 2, 3, 2, 3, 2, 2, 3, 2, 3, 3, 4, 4, 3, 4,
3, 3, 2, 3, 2, 3, 2, 3, 3, 4, 3, 4, 4, 4, 3, 3, 3, 2, 3, 3, 2,
2, 3, 2, 3, 3, 3, 3, 4, 3, 3, 3, 4, 3, 4, 3, 4, 3, 3, 3, 2, 3,
2, 3, 3, 2, 3, 3, 2, 4, 3, 3, 2, 3, 2, 3, 2, 3, 4, 4, 3, 3, 4,
3, 3, 4, 3, 3, 4, 3, 2, 2, 3, 3, 2, 3, 3, 2, 3, 3, 3, 4, 2, 3,
3, 3, 4, 2, 3, 4, 3, 3, 2, 3, 3, 4, 4, 3, 3, 3, 3, 2, 2, 3, 2,
4, 4, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 2, 4, 3, 4, 3, 2, 4, 4,
3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 4, 2, 3, 3, 3, 3, 2, 3, 3, 3,
3, 4, 4, 3, 4, 3, 3, 3, 3, 3, 3, 3, 2, 3, 4, 3, 3, 3, 3, 3, 3,
4, 2, 4, 3, 2, 3, 4, 2, 3, 3, 4, 3, 3, 2, 4, 3, 3, 3, 3, 3, 3,
4, 3, 3, 3, 3, 3, 3, 2, 3, 4, 4, 3, 3, 2, 3, 3, 3, 3, 3, 4, 4,
3, 3, 4, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 4,
3, 3, 4, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 2, 4, 3, 4, 2, 3, 3, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2),
Site = c(17, 46, 27, 37, 18, 40, 45, 16, 47, 26, 29, 23,
17, 1, 9, 47, 19, 41, 16, 44, 15, 36, 17, 25, 6, 47, 8, 16,
22, 8, 40, 30, 42, 2, 20, 31, 35, 18, 43, 14, 24, 11, 16,
21, 15, 7, 31, 15, 46, 6, 31, 13, 41, 39, 21, 43, 14, 42,
3, 41, 34, 23, 47, 47, 8, 45, 10, 30, 19, 40, 32, 39, 15,
20, 14, 6, 21, 5, 22, 38, 12, 39, 46, 7, 4, 33, 44, 30, 13,
29, 44, 13, 38, 22, 14, 9, 13, 41, 33, 20, 23, 4, 46, 17,
19, 8, 20, 39, 46, 45, 5, 7, 38, 12, 12, 29, 37, 32, 5, 28,
12, 3, 5, 24, 40, 45, 21, 8, 37, 43, 34, 19, 21, 45, 18,
45, 4, 7, 38, 11, 6, 28, 11, 37, 13, 44, 25, 46, 31, 36,
4, 27, 2, 36, 42, 27, 20, 18, 44, 39, 22, 18, 35, 3, 10,
34, 11, 44, 10, 27, 36, 12, 35, 6, 47, 43, 17, 3, 41, 11,
26, 6, 19, 10, 26, 1, 36, 35, 38, 2, 30, 26, 26, 5, 19, 34,
43, 9, 35, 40, 33, 43, 23, 10, 16, 7, 27, 5, 37, 25, 2, 39,
42, 4, 1, 18, 33, 29, 9, 20, 37, 42, 9, 15, 8, 11, 25, 3,
25, 24, 28, 34, 42, 34, 14, 32, 32, 21, 1, 28, 12, 10, 24,
23, 22, 2, 33, 31, 14, 33, 41, 31, 38, 15, 3, 13, 9, 23,
22, 24, 1, 36, 7, 40, 30, 32, 32, 24, 2, 30, 35, 16, 25,
29, 1, 28, 17, 26, 29, 27, 28, 4, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
39, 40, 41, 42, 43, 44, 45, 46, 47), temp = c(24.7, 24.7,
24.3, 24.8, 23.5, 26.3, 24.2, 24.6, 24.1, 24.6, 22.5, 25.8,
23.2, 25.4, 23.7, 25.8, 23.9, 25.6, 18.66, 25.7, 24.8, 24.6,
21.36, 24, 24.7, 20.9, 24, 23.3, 25.7, 22.5, 24.8, 23.5,
25.3, 26.3, 23.9, 25.03, 24.9, 21.58, 25.6, 24.7, 24.5, 25.4,
22.4, 25.9, 19.24, 23.4, 23.4, 23.2, 20.5, 25.3, 26.3, 22.5,
25, 22.2, 24, 26, 19.32, 24.5, 25.8, 23.2, 25.7, 24.8, 26,
23.6, 21.7, 19.9, 25.4, 25.57, 21.95, 27.1, 23.9, 24.9, 21.9,
24.6, 23.8, 24.2, 26.1, 24.7, 24, 21.6, 22.9, 27.4, 26.3,
25.2, 25.6, 25.4, 25.3, 26.4, 19.48, 25.82, 21.4, 25, 25.15,
25.2, 22.3, 26.1, 24.1, 25.8, 24.1, 23.04, 23.6, 24.6, 24.18,
22.9, 26, 22.85, 26, 27.3, 26.9, 26.6, 25, 22.4, 28.4, 19.79,
25.3, 26.3, 25.72, 24.8, 26.6, 25.29, 24.1, 25.1, 25.1, 23.5,
23.1, 24.9, 25.7, 26.4, 21.5, 20.8, 24.3, 26.2, 23.82, 23.9,
26.1, 27, 25.8, 23.37, 28.5, 23.9, 23.2, 26.2, 20.55, 26.6,
22.5, 26.2, 23.6, 25.1, 25.4, 22.4, 24.8, 26.04, 25.3, 25.88,
21.8, 28.6, 25.5, 26.8, 24.51, 23.7, 24.02, 22.9, 24.4, 25.9,
23.3, 28.2, 25, 26.3, 21, 26.7, 28.6, 22.5, 22.3, 26.5, 26.5,
28, 25.9, 25.5, 21.5, 25.8, 23.6, 23.79, 26.1, 24.7, 27.16,
25.5, 24.3, 26.97, 23.7, 26.2, 25.8, 27.2, 29.9, 23.7, 23,
21.5, 24.93, 24.5, 28.6, 22.1, 28.3, 27.4, 24.17, 25.8, 26.1,
26.8, 24.1, 23.66, 24.3, 26.6, 24.5, 27.3, 28.1, 24.2, 26.6,
25.8, 22.4, 26.2, 22.13, 24.5, 24, 27.2, 26.9, 25.3, 24.8,
22.6, 29.5, 24.7, 28.06, 27.1, 24.3, 27.37, 25.89, 26, 27.5,
28.7, 22.3, 24.2, 26, 26.7, 26.8, 22, 29.2, 27.7, 24, 24.4,
27.9, 22.7, 27.2, 28.09, 26.83, 28.4, 25.3, 27, 25.52, 27.9,
23.4, 24.6, 27.4, 28.3, 24.9, 24.4, 26.1, 26.58, 23.6, 28.3,
28.94, 24.4, 26.3, 29.5, 24.6, 28.1, 25.9, 24.6, 26.48, 24.8,
28.5, 25.3, 29.9, 24.6, 29.3, 24.46, 20, 20, 19, 20, 20,
19, 23, 21, 22, 21, 21, 20, 19, 19, 19, 19, 20, 19, 20, 17,
18, 19, 19, 20, 20, 19, 18, 17.5, 19, 19, 19, 19, 18, 18,
19, 19, 19, 19, 20, 20, 19, 20, 20, 20, 20, 21, 21), sal = c(21.29,
33.36, 15.14, 21.77, 25.37, 22.98, 32.4, 22.6, 32.12, 15.49,
20.52, 11.92, 27.33, 28.37, 30.53, 34.62, 24.45, 22.04, 32.48,
33.58, 25.2, 20.77, 27.89, 11.36, 23.64, 28.55, 31.21, 27.49,
13.21, 29.39, 31.54, 21.53, 23.25, 27.55, 22.52, 23.99, 20.4,
25.94, 32.65, 26.36, 11.76, 25.08, 24.33, 13.2, 32.46, 29.36,
22.7, 27.51, 30.08, 31.35, 27.92, 20.49, 32.29, 19.09, 20.72,
25.37, 32.41, 29.26, 28.22, 20.01, 20.07, 11.69, 26.48, 25.8,
30.29, 30.64, 25.47, 25.88, 24.12, 32.13, 22.37, 29.3, 24.44,
12.71, 28.69, 29.94, 25.05, 25.01, 20.79, 13.21, 21.48, 31.62,
33.74, 31.89, 28.01, 20.16, 23.74, 27.41, 32.55, 26.18, 27.49,
27.94, 27.29, 12.98, 26.13, 25.97, 29.49, 25.37, 22.47, 24.47,
20.04, 25.29, 26.56, 23.94, 15.42, 31.41, 24.39, 28.7, 26.42,
33.79, 30.42, 29.19, 31.53, 31.66, 28.33, 25.14, 26.8, 17.55,
27.37, 26.61, 29.8, 25.43, 30.31, 20.04, 17.71, 21.32, 13.05,
26.14, 17.23, 28.6, 22.52, 23.33, 19.29, 26.6, 13.54, 28.12,
31.57, 29.08, 27.46, 22.86, 22.71, 24.7, 32.59, 29.62, 28.31,
33.71, 19.66, 21.39, 16.24, 17.31, 30.67, 24.28, 25.54, 26.56,
26.9, 15.19, 16.56, 22.54, 26.2, 8.76, 19.63, 21.29, 22.82,
31.26, 22.2, 17.99, 30.07, 26.71, 29.02, 25.31, 29.7, 28.69,
17.48, 27.75, 27.64, 33.26, 18.74, 30.66, 28.05, 28.95, 19.8,
33.7, 13.48, 30.12, 24.23, 25.18, 22.57, 25.72, 7.88, 30.94,
15.33, 25.33, 15.89, 26.62, 15.4, 18.21, 27.07, 22.95, 29.72,
27.77, 18.55, 28, 19, 29.13, 18.57, 28.48, 20.25, 34, 21.65,
23.11, 29.77, 20.19, 32.93, 29.61, 32.25, 15.67, 18.5, 15.12,
30.52, 12.57, 9.62, 28.82, 29.05, 16.39, 23.45, 29.5, 10.56,
29.33, 23.72, 23.66, 20.33, 25.49, 25.69, 27.77, 25.3, 17.2,
20.69, 12.68, 30.88, 14.86, 24.92, 29.62, 8.06, 22.97, 13.57,
27.39, 27.45, 21.81, 16.97, 24.86, 26.03, 17.07, 15.57, 25.08,
33.34, 25.08, 29.94, 14.42, 23.65, 24.78, 30.59, 10.25, 24.55,
26.69, 23.37, 26.26, 25.24, 16.62, 31.83, 17.7, 10.51, 24.08,
17.45, 22.16, 32.63, 21.56, 23.51, 21.5, 14.04, 21.57, 13.7,
32.12, 37, 40, 38, 37, 38, 37, 28, 35, 32, 35, 36, 39, 36,
37, 35, 38, 36, 37, 38, 36, 31, 30, 28, 28, 28, 35, 31, 32,
31, 34, 34, 34, 25, 30, 25, 35, 35, 35, 34, 34, 32, 33, 32,
34, 33, 34, 34), DO = c(5.2, 2.7, 5.3, 4, 4.98, 5.04, 4,
5.4, 5, 6.1, 4.29, 4.68, 4.2, 6.51, 3.17, 4.91, 5.02, 4.24,
5.99, 4.5, 4.9, 5, NA, 5.9, 3.56, 5.7, 3.22, 5.2, 5.25, 5.9,
2.4, 4.45, 5.61, 5.42, 6.03, 4.47, 5.6, 9.91, 5.2, 5.9, 6.7,
2.05, 3.74, 6.4, NA, 5.5, 4.77, 7.07, 6.57, 5.17, 2.16, 4.4,
3.85, 5.05, 5.68, 4.74, NA, 6.8, 5.66, 5.57, 5.5, 6.9, 5.05,
7.89, 4.29, 6.78, 3.02, 4.48, 5.73, 5.3, 5.16, 5.96, 5.23,
7.16, 3.92, 4.9, 4.94, 6.7, 5.73, 7.05, 4.46, 3.53, 5.45,
5.05, 7.64, 6.2, 6.19, 4.09, NA, 4.61, 6.69, 5.1, 5.76, 7.2,
4.85, 4.09, 4.69, 10.2, 4.55, 9.87, 5.94, 6.96, 7.25, 6.65,
5.8, NA, 5.64, 5.5, 7.26, 6.83, 3.35, 5.48, 4.15, NA, 5.4,
3.59, 6.69, 5.3, 5.45, 6.22, 4.4, 7.98, 6.1, 6.07, 8.14,
6.45, 7.6, 5.72, 6.94, 7.13, 4.6, 5.03, 6.32, 7.21, 6.88,
8.69, 10.57, NA, 6.6, 7.05, 5.63, 5.41, NA, 3.61, 5.48, 6.42,
5.97, 6.94, 6.1, 8.26, 7.5, 6.06, 8.04, 6.07, 7.49, 4.94,
8.1, 5.52, 8.33, 8.82, 9.2, 7.63, 5.73, 4.69, 5.14, 7.18,
4.6, 7.32, NA, 5.33, 5.9, 5.83, 7.49, 5.21, 6.17, 7.99, 10.5,
7.2, 7.62, 5.3, 6.01, NA, 8.4, 3.92, 8.61, 7.85, 5.16, 7.28,
8.68, 3.79, 7.2, 6.19, 7.29, 5.72, 9.48, 7.15, 8.29, 7.8,
7.33, 7.66, 12.55, 9.88, 10.38, 5.3, 11.45, 4.45, 5.54, NA,
5.41, 4.52, 5.5, 6.73, 9.1, 8.15, 7.59, 9.4, 9.98, 7.7, NA,
9.3, 8.94, 9.74, 7.8, 8.95, 9.32, 7.25, 7.12, 8.11, 6.76,
5.75, 5.34, 7, 9.45, 6.19, 5.56, 7.84, 7.03, 9.26, 7.7, 8.6,
4.59, 6.01, 6.47, 7.6, 8.97, 5.17, 6.42, 7.32, 12.07, 8.38,
8.58, 7.2, 5.88, 4.77, NA, 8.23, 8.19, 12.67, 8.45, 8.76,
6.38, 9.51, 11.91, 8.1, 7.77, 5.58, 10.13, 10.21, NA, 11.72,
9.22, 7.87, 14.43, 9.22, NA, 9.88, 7.36, 10.71, 7.92, 7.42,
8.09, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA), water_depth = c(70, 45, 64, 76, 68,
95, 75, 91, 65, 84, 80, 80, 55, 98, 51, 97, 85, 92, 62, 65,
98, 98, 58, 83, 68, NA, 60, 80, 92, 68, 95, 85, 143, 108,
112, 72, 101, 63, 80, 106, 103, 75, 51, 85, 49, 85, 101,
72, NA, 70, 90, 117, 95, 81, 103, 98, 58, 53, 107, 72, 106,
102, 85, 74, 63, NA, 73, 70, 62, 81, 113, 79, 68, 96, 79,
90, 86, 95, 118, 86, 128, 101, 42, 70, 143, 95, 68, 100,
52, 60, NA, 90, 52, 102, 69, 84, 90, 43, 110, 64, 109, 96,
62, 99, 80, 110, 105, 90, 52, 83, 70, 80, 91, 40, 110, 105,
59, 96, 97, 56, 85, 102, 105, 113, 87, 98, 91, 75, 86, NA,
118, 103, 63, 84, 63, 62, 52, 115, 55, 83, 88, 104, 33, 78,
74, 43, 94, 59, 80, 80, 100, 50, 120, 72, NA, 30, 103, 98,
74, 95, 62, 79, 119, 62, 89, 57, 35, 53, 55, 85, 76, 88,
79, 75, 95, 45, 75, 79, NA, 74, 95, 65, 76, 50, 50, 95, 104,
35, 100, 62, 76, 78, 83, 88, 72, 75, 60, 60, 49, NA, 76,
50, 64, 73, 64, 83, 73, 80, 92, 64, 90, 78, 55, 64, 60, 57,
75, 71, 60, 48, 90, 67, 53, 67, 49, 65, 61, 77, 52, 60, 88,
68, 68, 70, 85, 75, 79, 64, 71, 57, 86, 52, 63, 70, 66, 82,
63, 60, 60, 70, 39, 77, 88, 84, 52, 98, 39, 50, 75, 62, 80,
75, 38, 72, 45, 66, 67, 50, 62, 80, 80, 70, 48, 59, 47, 70,
68, 65, 81, 46, 85, 49, 31, 29, 46, 41, 67, 42, 82, 80, 70,
68, 78, 52, 38, 30, 90, 90, 80, 83, 87, 75, 69, 28, 91, 108,
109, 80, 59, 68, 90, 90, 85, 80, 90, 90, 85, 95, 80, 80,
91, 89, 42, 78, 85, 72, 87, 90, 87), sed_depth = c(51, 4,
52, 47, 2, 45, 36, 39, 25, 54, 17, 18, 10, 45, 25, 78, 7,
69, NA, 105, 60, 35, NA, 58, 27, NA, 0, 15, 33, 6, 60, 29,
39, 22, 14, NA, 40, NA, 80, 34, 50, 19, 93, 33, NA, 39, 32,
15, NA, 50, 40, 4, 80, 92, 25, 72, NA, 27, 8, 73, 40, 66,
45, NA, 0, NA, 22, NA, NA, 46, 9, NA, 34, 27, 50, 47, 34,
21, 23, 54, 7, 49, 7, 60, 7, 28, 72, 36, NA, NA, NA, 30,
NA, 15, 87, 10, 10, 73, 59, NA, 23, 5, NA, 24, 25, NA, 15,
55, 4, 81, 25, 41, 61, NA, 35, 25, NA, 7, 5, NA, 15, 63,
25, 34, 73, 63, 32, 0, 45, NA, 25, 27, NA, NA, 0, 3, 5, NA,
61, 52, 32, 70, NA, 48, 53, 100, 30, 4, 37, 61, 9, NA, 10,
NA, NA, 75, 18, 18, NA, 75, NA, 1, 24, 33, 40, 35, 30, 100,
NA, 65, 50, 34, 58, 17, 45, 90, 19, 61, NA, 61, 33, NA, 13,
35, NA, 94, 42, NA, 57, 50, 26, 75, 27, 13, 40, 57, NA, 24,
61, NA, 9, 68, NA, 29, 43, 10.17, 21, NA, 30, 30, 38, 22,
90, 3, 60, 2, 14, 21, NA, 78, 42, 55, 30, 48, 0, 67, 69,
73, NA, 50, 23, NA, NA, 35, 29, 13, 53, 30, 74, 33, 1, 58,
43, 35, 30, 44, 26, 52, 35, NA, NA, 56, 45, 42, NA, 10, 21,
30, 30, NA, 73, 45, 57, NA, 63, 29, NA, 45, NA, 35, 38, 20,
35, 42, NA, 65, 24, 50, 5, 63, 15, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Month2 = structure(c(3L,
2L, 3L, 2L, 3L, 2L, 2L, 3L, 2L, 3L, 3L, 4L, 4L, 3L, 4L, 3L,
3L, 2L, 3L, 2L, 3L, 2L, 3L, 3L, 4L, 3L, 4L, 4L, 4L, 3L, 3L,
3L, 2L, 3L, 3L, 2L, 2L, 3L, 2L, 3L, 3L, 3L, 3L, 4L, 3L, 3L,
3L, 4L, 3L, 4L, 3L, 4L, 3L, 3L, 3L, 2L, 3L, 2L, 3L, 3L, 2L,
3L, 3L, 2L, 4L, 3L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 4L, 4L, 3L,
3L, 4L, 3L, 3L, 4L, 3L, 3L, 4L, 3L, 2L, 2L, 3L, 3L, 2L, 3L,
3L, 2L, 3L, 3L, 3L, 4L, 2L, 3L, 3L, 3L, 4L, 2L, 3L, 4L, 3L,
3L, 2L, 3L, 3L, 4L, 4L, 3L, 3L, 3L, 3L, 2L, 2L, 3L, 2L, 4L,
4L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 4L, 3L,
4L, 3L, 2L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 3L, 3L,
2L, 4L, 2L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 3L,
4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 4L, 3L, 3L, 3L, 3L,
3L, 3L, 4L, 2L, 4L, 3L, 2L, 3L, 4L, 2L, 3L, 3L, 4L, 3L, 3L,
2L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 3L, 3L, 3L, 3L, 3L, 3L,
2L, 3L, 4L, 4L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 3L,
3L, 4L, 2L, 3L, 3L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L,
3L, 3L, 3L, 3L, 4L, 3L, 3L, 4L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 2L, 4L, 3L, 4L, 2L, 3L, 3L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), levels = c("Jan",
"Feb", "Mar", "Apr"), class = "factor")), row.names = c(NA,
-329L), class = c("tbl_df", "tbl", "data.frame"))
This will run the analysis for the temp data and should give you what you need to get the other variables you want. First we need to get rid of the empty factor levels in CYR:
dry_season <- droplevels(dry_season)
Now split the data and get rid of the empty factor levels in Month2:
dry_season.splt <- split(dry_season, dry_season$CYR)
dry_season.splt <- lapply(dry_season.splt, droplevels)
Now run the analysis for temp
results.temp <- lapply(dry_season.splt, function(x) wilcox.test(temp~Month2, x, conf.int=TRUE, conf.level=0.8, exact=FALSE, correct=FALSE))
names(results.temp)
results.temp[["2005"]] # or results.temp[[1]]
#
# Wilcoxon rank sum test
#
# data: temp by Month2
# W = 87.5, p-value = 0.5245
# alternative hypothesis: true location shift is not equal to 0
# 80 percent confidence interval:
# -9.999840e-01 1.470944e-05
# sample estimates:
# difference in location
# -1.393135e-05
Just change temp to the other variables to get their results.

Adding sample size to ggplot boxplot

I'm interested to see how age is related to a continuous outcome, for which I have the following data:
library(dplyr)
library(tidyverse)
library(magrittr)
library(ggplot2)
mydata <-
structure(list(ID = c(104, 157, 52, 152, 114, 221, 320, 125,
75, 171, 80, 76, 258, 82, 142, 203, 37, 92, 202, 58, 194, 38,
4, 137, 25, 87, 40, 117, 21, 255, 277, 315, 96, 134, 185, 94,
3, 153, 172, 65, 279, 209, 60, 13, 154, 160, 24, 29, 159, 213,
127, 74, 48, 126, 184, 132, 61, 141, 27, 49, 8, 39, 164, 162,
34, 205, 179, 119, 77, 135, 138, 165, 103, 253, 14, 20, 310,
84, 30, 273, 22, 105, 262, 116, 86, 83, 145, 31, 95, 51, 81,
271, 36, 50, 189, 2, 115, 7, 197, 54), age = c(67.1, 70.7, 53,
61.7, 66.1, 57.7, 54.1, 67.2, 60.9, 55.8, 40.7, 57.6, 64.1, 70.7,
47.5, 46.3, 66.7, 55, 63.3, 68.2, 61.2, 60.5, 52, 65.3, 48.9,
56.9, 62.7, 75.2, 61.4, 57.9, 53.6, 58.1, 51, 67.3, 63.9, 57,
43.2, 64.7, 62.8, 56.3, 51.7, 39.4, 45.2, 57.8, 55.7, 69.6, 61.5,
50.1, 73.7, 55.5, 65.2, 54.6, 49, 35.2, 52.9, 46.3, 55, 52.5,
54.2, 61, 57.4, 56.5, 53.6, 47.7, 64.2, 53.4, 60.9, 58.2, 60.7,
50.3, 48.3, 74.7, 52.1, 59.9, 52.4, 70.8, 61.2, 66.5, 55.4, 57.5,
59.2, 60.1, 52.3, 60.2, 54.8, 36.3, 61.5, 48.6, 56, 62, 64.8,
40.4, 68.3, 60, 69.1, 56.6, 45.3, 58.5, 52.3, 52), continuous_outcome = c(3636.6,
1128.2, 2007.5, 802.9, 332.3, 2636.1, 169.5, 67.9, 3261.8, 1920.3,
155.2, 1677.2, 198.2, 11189.7, 560.9, 633.1, 196.1, 13.9, 100.7,
7594.5, 1039.8, 83.9, 2646.8, 284.6, 306, 1135.6, 1883.1, 5681.4,
1706.2, 2241.1, 97.7, 1106.8, 1107.1, 290.8, 2123.4, 267, 115.3,
138.5, 152.7, 1338.9, 6709.8, 561.7, 1931.7, 3112.4, 1876.3,
3795.9, 5706.7, 7.4, 1324.9, 4095.4, 205.4, 1886, 177.3, 304.4,
1319.1, 415.9, 537.2, 3141.1, 740, 1976.7, 624.8, 983.1, 1163.5,
1432.6, 3730.4, 2023.4, 498.2, 652.5, 982.7, 1345.3, 138.4, 1505.1,
3528.1, 11.9, 884.5, 10661.6, 1911.4, 2800.8, 81.5, 396.4, 409.1,
417.3, 186, 1892.4, 1689.7, 0, 210.1, 210.5, 3484.5, 3196.8,
57.2, 20.2, 947, 540, 1603.1, 1571.8, 9.1, 149.2, 122, 63.2),
age_decades = structure(c(3L, 4L, 2L, 3L, 3L, 2L, 2L, 3L,
3L, 2L, 1L, 2L, 3L, 4L, 1L, 1L, 3L, 2L, 3L, 3L, 3L, 3L, 2L,
3L, 1L, 2L, 3L, 4L, 3L, 2L, 2L, 2L, 2L, 3L, 3L, 2L, 1L, 3L,
3L, 2L, 2L, 1L, 1L, 2L, 2L, 3L, 3L, 2L, 4L, 2L, 3L, 2L, 1L,
1L, 2L, 1L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 1L, 3L, 2L, 3L, 2L,
3L, 2L, 1L, 4L, 2L, 2L, 2L, 4L, 3L, 3L, 2L, 2L, 2L, 3L, 2L,
3L, 2L, 1L, 3L, 1L, 2L, 3L, 3L, 1L, 3L, 2L, 3L, 2L, 1L, 2L,
2L, 2L), .Label = c("1", "2", "3", "4"), class = "factor")), row.names = c(NA,
-100L), class = c("tbl_df", "tbl", "data.frame"))
To make a boxplot of age decades on the x axis and my continuous outcome I'm using ggplot2.
I want to make several, and automatically plot the sample size on the x-axis ticks. To do so I've computed labels in the dataset as follows:
mydata <-
mydata %>%
group_by(age_decades) %>%
mutate(n_decades=as_character(n())) %>%
mutate(label_decades=case_when(age_decades==1 ~ "Below 50",
age_decades==2 ~ "Between 50 and 60",
age_decades==3 ~ "Between 60 and 70",
age_decades==4 ~ "Above 70")) %>%
mutate(label_decades=paste0(label_decades, '\n n = ', n_decades)) %>%
ungroup() %>%
relocate(age_decades, label_decades, .after=age) %>%
select(-n_decades) %>%
arrange(ID)
Then I've tried to plot the boxplot using the newly created variable label_decades to label. The first thing I tried was:
ggplot(mydata, aes(x=age_decades, y=continuous_outcome)) +
geom_boxplot() +
scale_x_discrete(labels=mydata$label_decades)
But that just plots the first few labels as they occur in the dataset (so they dont correspond to the actual boxplot):
Then I tried:
ggplot(mydata, aes(x=age_decades, y=continuous_outcome)) +
geom_boxplot() +
geom_text(data=mydata, aes(age_decades, Inf, label=label_decades),
vjust = 15, size=4)
Which works better but the font is really weird and also the original x axis labels/ticks are still showing.
Anyone know how to solve this issue? Thanks!
The font looks wired because there are many labels with the same text plotted on top of each other. You can use distinct to get only one label per x tick and use the theme function to get rid of x tick labels:
mydata %>%
ggplot(aes(age_decades, continuous_outcome)) +
geom_boxplot() +
geom_text(
data = mydata %>% distinct(age_decades, label_decades),
mapping = aes(label = label_decades),
y = 9e3
) +
theme(
axis.text.x = element_blank()
)
One way would be to turn the labels to factor as well.
library(dplyr)
library(ggplot2)
mydata <- mydata %>%
group_by(age_decades) %>%
mutate(n_decades= as.character(n())) %>%
mutate(label_decades= case_when(age_decades==1 ~ "Below 50",
age_decades==2 ~ "Between 50 and 60",
age_decades==3 ~ "Between 60 and 70",
age_decades==4 ~ "Above 70")) %>%
mutate(label_decades= factor(paste0(label_decades, '\n n = ', n_decades))) %>%
ungroup() %>%
relocate(age_decades, label_decades, .after=age) %>%
select(-n_decades) %>%
arrange(ID)
You can then use it's levels in scale_x_discrete.
ggplot(mydata, aes(x=age_decades, y=continuous_outcome)) +
geom_boxplot() +
scale_x_discrete(labels= levels(mydata$label_decades))

How to plot one variable against each of the three time points

I have a dataset containing one DV called Soma(Somatotype) and three IV called WT2(weight at age 2),WT9(weight at age9),WT18(weight at age18) and I am going to plot Soma against weight at each of the three time points. But since it is not exactly like a time series dataset and I am totally stuck with this.
I was thinking of use ggplot but I am not familiar with that and failed a lot.
The dataset:
structure(list(X = 67:136, Sex = c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
WT2 = c(13.6, 11.3, 17, 13.2, 13.3, 11.3, 11.6, 11.6, 12.4,
17, 12.2, 15, 14.5, 10.2, 12.2, 12.8, 13.6, 10.9, 13.1, 13.4,
11.8, 12.7, 11.8, 14.1, 10.9, 11.8, 13.6, 12.7, 12.3, 11.5,
12.6, 14.1, 11.5, 12, 10.9, 12.7, 11.3, 11.8, 15.4, 10.9,
13.2, 14.3, 11.1, 13.6, 12.9, 13.5, 16.3, 13.6, 10.2, 12.6,
12.9, 13.3, 13.4, 12.7, 12.2, 15.4, 12.7, 13.2, 12.4, 10.9,
13.4, 10.6, 11.8, 14.2, 12.7, 13.2, 11.8, 13.3, 13.2, 15.9
), HT2 = c(87.7, 90, 89.6, 90.3, 89.4, 85.5, 90.2, 82.2,
85.6, 97.3, 87.1, 88.9, 87.6, 82.6, 87.1, 84, 83.6, 81.4,
89.7, 88.4, 86.4, 83.8, 87.6, 94, 82, 86.4, 88.9, 86.7, 86.4,
86.4, 83.8, 88.9, 85.9, 86.2, 85.1, 88.6, 83, 88.9, 89.7,
81.3, 88.7, 88.4, 85.1, 91.4, 87.6, 86.1, 94, 85.9, 82.2,
88.2, 87.5, 88.6, 86.9, 86.4, 80.9, 90, 94, 89.7, 86.4, 82.6,
86.4, 81.8, 86.2, 86, 91.4, 88.9, 88.6, 86.4, 94, 89.2),
WT9 = c(32.5, 27.8, 44.4, 40.5, 29.9, 22.8, 30, 24.3, 29.9,
44.5, 31.8, 32.1, 39.2, 23.7, 26, 36.3, 29.9, 22.2, 34.4,
35.5, 33, 25.7, 29.2, 31.7, 23.7, 35.3, 39, 30.8, 29.3, 28,
33, 47.4, 27.6, 34.2, 28.1, 27.5, 23.9, 32.2, 29.4, 22, 28.8,
38.8, 36, 31.3, 26.9, 33.3, 36.2, 29.5, 23.4, 33.8, 34.5,
34.4, 38.2, 31.7, 26.6, 34.2, 27.7, 28.5, 30.5, 26.6, 39,
25, 25.6, 34.2, 29.8, 27.9, 27, 41.4, 41.6, 42.4), HT9 = c(133.4,
134.8, 141.5, 137.1, 136.1, 130.6, 136, 128, 132.4, 152.5,
138.4, 135.2, 142.3, 129.1, 133.2, 136.3, 133.1, 123.2, 135.8,
139.5, 139.4, 124.2, 135.6, 144.1, 123.8, 134.6, 137.2, 139.8,
128.8, 134.2, 136.5, 140.8, 132.1, 137, 129, 139.4, 125.6,
137.1, 133.6, 121.4, 133.6, 134.1, 139.4, 138.1, 133.2, 138.4,
139.5, 132.8, 129.8, 144.8, 138.9, 140.3, 143.8, 133.6, 123.5,
139.9, 136.1, 135.8, 131.9, 133.1, 130.9, 126.3, 135.9, 135,
135.5, 136.5, 134, 138.2, 142, 140.8), LG9 = c(28.4, 26.9,
31.9, 31.8, 27.7, 23.4, 27.2, 25.1, 27.5, 32.7, 28.3, 26.9,
31.6, 25.9, 26.7, 28.4, 26.2, 24.9, 32.3, 30, 26.9, 26.2,
26.3, 27.2, 25.5, 30.4, 32.4, 26, 28.3, 25, 29, 32.3, 26.3,
27.3, 27.4, 25.7, 24.5, 28.2, 26.6, 24.4, 26.5, 31.1, 28.2,
27.6, 26.3, 29.4, 28, 27.6, 22.6, 28.3, 30.5, 31.2, 29.8,
27.5, 27.2, 29.1, 26.7, 25.5, 28.6, 25.4, 29.3, 25, 23.7,
27.6, 27, 26.5, 26.5, 32.5, 31, 32.6), ST9 = c(74L, 65L,
104L, 79L, 83L, 60L, 67L, 44L, 76L, 81L, 59L, 67L, 72L, 40L,
40L, 54L, 67L, 58L, 57L, 61L, 64L, 48L, 61L, 74L, 50L, 58L,
80L, 57L, 44L, 46L, 57L, 69L, 51L, 44L, 48L, 68L, 22L, 59L,
58L, 44L, 58L, 57L, 64L, 64L, 58L, 73L, 52L, 52L, 60L, 107L,
62L, 88L, 78L, 52L, 40L, 71L, 30L, 76L, 59L, 75L, 38L, 50L,
45L, 62L, 57L, 66L, 54L, 44L, 56L, 74L), WT18 = c(56.9, 49.9,
55.3, 65.9, 62.3, 47.4, 57.3, 50, 58.8, 80.2, 59.9, 56.3,
67.9, 52.9, 58.5, 73.2, 54.7, 44.1, 70.5, 60.6, 73.2, 57.2,
56.4, 56.6, 46.3, 63.3, 65.4, 60.1, 55, 55.7, 71.2, 65.5,
57.2, 58.2, 56, 64.5, 53, 52.4, 56.8, 49.2, 55.6, 77.8, 69.6,
56.2, 52.5, 64.9, 59.3, 54.2, 49.8, 62.6, 66.6, 65.3, 65.9,
59, 47.4, 60.4, 56.3, 61.7, 52.4, 52.1, 58.4, 52.8, 60.4,
61, 67.4, 54.3, 56.3, 97.7, 68.1, 63.1), HT18 = c(158.9,
166, 162.2, 167.8, 170.9, 164.9, 168.1, 164, 163.3, 183.2,
167, 163.8, 174, 163, 167.1, 168.1, 163, 154.6, 170.3, 170.6,
175.1, 156.5, 160.3, 170.8, 156.5, 165.2, 169.8, 171.2, 160.4,
163.8, 169.6, 172.7, 162.4, 166.8, 157.1, 181.1, 158.4, 165.6,
166.7, 156.5, 168.1, 165.3, 163.7, 173.7, 163.9, 169.2, 170.1,
166, 164.2, 176, 170.9, 169.2, 172, 163, 154.5, 172.5, 175.6,
167.2, 164, 162.1, 161.6, 153.6, 177.5, 169.8, 173.5, 166.8,
166.2, 162.8, 168.6, 169.2), LG18 = c(34.6, 33.8, 35.1, 39.3,
36.3, 31.8, 35, 31.2, 36.2, 42.9, 36.5, 32.6, 37.5, 37.7,
34.5, 37.2, 33.2, 32.4, 40.1, 38.2, 35.1, 35.6, 34.6, 32.6,
32.9, 38.5, 38.6, 33, 36.3, 33.2, 38.8, 36.2, 36.5, 34.3,
37.8, 34.2, 32.4, 33.8, 32.7, 33.5, 34.1, 39.8, 38.6, 34.2,
34.6, 36.7, 32.8, 34.9, 30.3, 35.8, 38.8, 39, 35.7, 32.7,
32.2, 35.7, 34, 35.5, 34.8, 34.1, 33, 33.4, 34.3, 34.5, 34.5,
33.6, 36.2, 42.5, 38.4, 37.9), ST18 = c(143L, 117L, 143L,
148L, 152L, 126L, 134L, 77L, 118L, 135L, 118L, 96L, 131L,
108L, 99L, 105L, 122L, 146L, 126L, 124L, 100L, 118L, 123L,
131L, 101L, 121L, 182L, 116L, 127L, 130L, 107L, 134L, 120L,
130L, 101L, 149L, 112L, 136L, 118L, 110L, 104L, 138L, 108L,
134L, 108L, 141L, 122L, 125L, 128L, 168L, 126L, 142L, 132L,
116L, 112L, 137L, 114L, 122L, 121L, 148L, 107L, 140L, 125L,
124L, 123L, 89L, 135L, 125L, 142L, 142L), Soma = c(5, 4,
5.5, 5.5, 4.5, 3, 5, 4, 5, 5.5, 5, 5, 5.5, 4, 5, 6.5, 4.5,
3.5, 5.5, 4.5, 6, 5, 4.5, 4, 4, 5, 4.5, 4.5, 5, 5, 6, 4.5,
5, 5, 5, 4, 5, 4, 4.5, 4, 4.5, 6.5, 5.5, 3.5, 4, 5, 4.5,
4, 4, 5, 5, 5, 5.5, 5.5, 4, 4, 3, 4.5, 5, 4, 6.5, 5, 3.5,
5.5, 5, 4, 4.5, 7, 5.5, 5.5)), .Names = c("X", "Sex", "WT2",
"HT2", "WT9", "HT9", "LG9", "ST9", "WT18", "HT18", "LG18", "ST18",
"Soma"), row.names = 67:136, class = "data.frame")
my command:
library(tidyr)
library(ggplot2)
newdata.girls %>%
# put WT2, WT9, WT18 in the weight column
# and the weights in the value column
gather(weight, value, -Soma) %>%
# make WT2, WT9, WT18 factors and order them so as
# they plot in the correct order
mutate(weight = factor(weight, levels = c("WT2", "WT9", "WT18"))) %>%
# plot Soma versus value by time
ggplot(aes(Soma, value)) + geom_point() + facet_grid(. ~ weight)
It gives out a column of NA.
result
It's not entirely clear how you would like the output to look, or if Soma is continuous or categorical. But taking your sentence "Soma against weight at each of the three time points" as a start point, an initial attempt could look like this. Assume your data frame is named df1:
library(tidyr)
library(dplyr)
library(ggplot2)
df1 %>%
# put WT2, WT9, WT18 in the weight column
# and the weights in the value column
gather(weight, value, -Soma) %>%
# make WT2, WT9, WT18 factors and order them so as
# they plot in the correct order
mutate(weight = factor(weight, levels = c("WT2", "WT9", "WT18"))) %>%
# plot Soma versus value by time
ggplot(aes(Soma, value)) + geom_point() + facet_grid(. ~ weight) + theme_light()
Result:

Stacked barplot in ggplot of numeric values

I am trying to plot monthly average precipitation broken down into snow and rain in a stacked barplot. From searching around on this site I found some code that does what I want, however, since I am not fully understanding the code I am not able to change the aesthetics of it.
Below is the code that creates the plot that I want, but it looks..well .. a little "ugly". Usually when working with ggplot I save the plot to a variable and then keep adding and changing things. Since in this code the plot function is embedded I don't know how to save the plot output to a variable.
correct_order <- c("Jan","Feb","Mar","Apr","May","Jun",
"Jul","Aug","Sep","Oct","Nov","Dec")
cn %>% group_by(Months) %>%
summarise(Rain = mean(rain_mm,na.rm = TRUE),Snow = mean(snow_cm,na.rm = TRUE)) %>%
gather(Legend, Precipitation, -Months) %>%
ggplot(.,aes(x = Months, y = Precipitation,
group = Legend, color = Legend)) +
scale_x_discrete(limits=correct_order) +
geom_bar(stat="identity")
Below is a dput of my dataset.
structure(list(Months = structure(c(5L, 4L, 8L, 1L, 9L, 7L, 6L,
2L, 12L, 11L, 10L, 3L, 5L, 4L, 8L, 1L, 9L, 7L, 6L, 2L, 12L, 11L,
10L, 3L, 5L, 4L, 8L, 1L, 9L, 7L, 6L, 2L, 12L, 11L, 10L, 3L, 5L,
4L, 8L, 1L, 9L, 7L, 6L, 2L, 12L, 11L, 10L, 3L, 5L, 4L, 8L, 1L,
9L, 7L, 6L, 2L, 12L, 11L, 10L, 3L, 5L, 4L, 8L, 1L, 9L, 7L, 6L,
2L, 12L, 11L, 10L, 3L, 5L, 4L, 8L, 1L, 9L, 7L, 6L, 2L, 12L, 11L,
10L, 3L, 5L, 4L, 8L, 1L, 9L, 7L, 6L, 2L, 12L, 11L, 10L, 3L, 5L,
4L, 8L, 1L, 9L, 7L, 6L, 2L, 12L, 11L, 10L, 3L, 5L, 4L, 8L, 1L,
9L, 7L, 6L, 2L, 12L, 11L, 10L, 3L, 5L, 4L, 8L, 1L, 9L, 7L, 6L,
2L, 12L, 11L, 10L, 3L, 5L, 4L, 8L, 1L, 9L, 7L, 6L, 2L, 12L, 11L,
10L, 3L, 5L, 4L, 8L, 1L, 9L, 7L, 6L, 2L, 12L, 11L, 10L, 3L, 5L,
4L, 8L, 1L, 9L, 7L, 6L, 2L, 12L, 11L, 10L, 3L), .Label = c("Apr",
"Aug", "Dec", "Feb", "Jan", "Jul", "Jun", "Mar", "May", "Nov",
"Oct", "Sep"), class = "factor"), station = structure(c(7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 13L,
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 14L, 14L, 14L, 14L, 14L, 14L, 14L,
14L, 14L, 14L, 14L, 14L), .Label = c("albernirob", "blackcreeek",
"campbellrivairp", "campbellrivsurf", "capemudge", "comoxairp",
"courtney", "mudbay", "oysterriver", "powriv", "powrivairp",
"qualicumhatch", "qualicumriverres", "stillwater"), class = "factor"),
temp_davg_c = c(3, 3.6, 5.7, 9.1, 12.5, 15.5, 17.9, 17.6,
14.2, 9, 5.1, 3.1, 2.8, 3.4, 5.4, 8.5, 11.7, 14.8, 17.1,
16.9, 13.6, 8.6, 5, 2.8, 2.4, 3.2, 5.2, 8, 11.6, 14.7, 17.3,
17.2, 13.7, 8.6, 4.4, 2.1, 2.6, 3.8, 5.9, 7.4, 11.5, 14.3,
16.2, 17.2, 12.7, 8.1, 4.1, NA, 4.1, 4.6, 6.3, 8.8, 12.1,
14.9, 17.2, 17.1, 14.2, 9.6, 5.8, 3.8, 3.9, 4.3, 6.1, 8.8,
12.4, 15.5, 18, 17.9, 14.5, 9.5, 5.7, 3.5, 3.5, 4, 5.9, 8.6,
12.1, 15.1, 17.5, 17.4, 14.1, 9.3, 5.3, 3.1, 3.3, 3.8, 5.6,
8.3, 12, 15.1, 17.3, 17.2, 13.6, 8.9, 5.2, 3.2, 3.9, 4.2,
5.9, 8.6, 12, 14.9, 17.1, 16.7, 13.6, 9.2, 5.6, 3.5, 2.8,
3.7, 5.8, 8.5, 11.9, 14.9, 17.3, 17.4, 14.1, 9.2, 4.9, 2.6,
2, 3, 5.7, 8.5, 12.3, 15.5, 18.3, 18.5, 15.3, 9.8, 4.6, 1.8,
4.6, 5.1, 7, 9.6, 13, 15.8, 18.4, 18.6, 15.6, 10.8, 6.8,
4.3, 3.6, 3.9, 5.9, 8.6, 11.9, 14.9, 17.2, 17.2, 14.1, 9.4,
5.3, 3.1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
temp_dmax_c = c(5.6, 7.1, 10, 14.3, 18.1, 21, 23.8, 23.7,
20.1, 13, 8, 5.4, 5.8, 7.4, 10, 13.9, 17.3, 20.2, 22.9, 22.8,
19.6, 13, 8.4, 5.4, 5.5, 7.2, 9.7, 13.2, 17, 20.1, 23, 23.3,
19.8, 13.1, 7.7, 4.9, 5.6, 7.5, 10.6, 12.2, 16.7, 19.5, 21.6,
23.2, 18, 12.3, 7.5, NA, 6.6, 7.6, 9.8, 12.9, 16.5, 19.5,
22.1, 22, 18.6, 12.8, 8.5, 6.2, 6.4, 7.4, 9.6, 12.9, 16.6,
19.8, 22.8, 22.7, 19, 12.9, 8.5, 5.9, 6.2, 7.5, 10.1, 13.5,
17.2, 20.3, 23.1, 23.1, 19.5, 13.4, 8.3, 5.6, 6.2, 7.4, 9.8,
13.2, 17.1, 20.2, 22.6, 22.5, 18.9, 12.8, 8.3, 5.8, 6.5,
7.5, 9.9, 12.9, 16.7, 19.6, 22.3, 22.1, 18.7, 13, 8.5, 5.9,
5.5, 7.4, 10.1, 13.5, 17.2, 20.3, 23.1, 23.5, 20, 13.3, 7.8,
5, 4.3, 6.6, 10.5, 14.2, 18.6, 21.9, 25.6, 26.1, 22.4, 14.4,
7.3, 3.8, 6.8, 7.8, 10.4, 13.5, 17.1, 19.8, 22.7, 22.9, 19.5,
13.6, 9, 6.4, 5.8, 6.9, 9.4, 12.8, 16.5, 19.4, 22.1, 22.3,
18.7, 12.6, 7.7, 5.3, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA), temp_dmin_c = c(0.3, 0, 1.3, 3.9, 6.8, 9.9,
11.9, 11.5, 8.2, 5, 2.1, 0.7, -0.3, -0.6, 0.9, 3.1, 6.1,
9.3, 11.3, 10.9, 7.5, 4.2, 1.6, 0.2, -0.8, -0.7, 0.7, 2.8,
6.2, 9.3, 11.5, 11.1, 7.6, 4, 1, -0.8, -0.5, 0, 1.3, 2.6,
6.2, 9, 10.8, 11.1, 7.4, 3.8, 0.6, NA, 1.6, 1.5, 2.8, 4.7,
7.7, 10.3, 12.2, 12.2, 9.7, 6.4, 3.1, 1.4, 1.4, 1.2, 2.5,
4.6, 8, 11.1, 13.3, 13, 9.9, 6, 2.9, 0.9, 0.7, 0.5, 1.7,
3.7, 6.9, 9.8, 11.8, 11.7, 8.6, 5.3, 2.3, 0.5, 0.3, 0.1,
1.5, 3.4, 6.9, 9.8, 11.7, 11.7, 8.2, 5, 2, 0.5, 1.2, 0.8,
2, 4.1, 7.3, 10.1, 11.8, 11.3, 8.4, 5.3, 2.7, 0.9, 0.1, 0.1,
1.4, 3.5, 6.6, 9.4, 11.5, 11.2, 8.2, 5, 1.9, 0.2, -0.3, -0.6,
0.7, 2.7, 6, 9, 10.9, 10.9, 8, 5, 1.8, -0.3, 2.3, 2.4, 3.6,
5.6, 8.8, 11.8, 14, 14.3, 11.6, 8, 4.6, 2.2, 1.2, 0.9, 2.3,
4.3, 7.3, 10.4, 12.3, 12.1, 9.4, 6.1, 2.8, 0.9, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA), rain_mm = c(216, 134.8,
127, 90.7, 53, 53, 29.9, 35.4, 45.7, 146.9, 232.3, 236.4,
216, 166.8, 149.3, 105, 72.8, 63.2, 42.3, 43.1, 54, 171.8,
256.2, 247.3, 194.6, 135.5, 128.4, 91.6, 68.4, 62.9, 39.4,
44.6, 55.2, 161, 222.1, 204.2, 186, 140.2, 120.3, 87.2, 58.2,
51.3, 35.1, 39, 52.9, 154.8, 228.4, 218.5, 215.2, 135.1,
130.8, 93.6, 70.2, 61.1, 39.5, 45.6, 58.7, 168.6, 241, 220.8,
159.1, 107.8, 95.7, 64.4, 45.6, 42.8, 26.7, 29.2, 41.8, 122.7,
191.9, 168.9, 256.9, 174.1, 151.6, 98, 56.6, 45.2, 26, 37.6,
53.6, 189.7, 285.2, 256.7, 182, 144.2, 139.3, 87.2, 64.6,
54.7, 36.4, 39, 48.9, 152.9, 228.4, 215.9, 200.6, 131.1,
116.3, 79.4, 51.3, 45.3, 26, 34.6, 46.3, 146.8, 214, 180.7,
219.3, 150.4, 141, 101.1, 72.1, 62.8, 41.9, 49.5, 59.3, 180.8,
249.9, 234.2, 317, 222.7, 215.6, 143.6, 87.8, 62.2, 31, 46.4,
61.4, 218.3, 345.2, 323.2, 132, 88.4, 92.4, 70.8, 70.9, 57.4,
36.5, 42.3, 51.4, 117.5, 154.9, 134.5, 145.7, 101.9, 104.2,
83.2, 76.6, 67.6, 37.5, 45.3, 54.7, 125.5, 171.6, 146.5,
185.2, 125.5, 127.8, 99.6, 92.4, 73.7, 46, 50.7, 64.6, 152.1,
212.6, 178.5), snow_cm = c(15.9, 9.3, 11.3, 0.1, 0, 0, 0,
0, 0, 0.2, 6, 12.1, 17.3, 10, 6.7, 0.2, 0, 0, 0, 0, 0, 1.1,
6.4, 16, 23.3, 14.4, 11.7, 0.5, 0, 0, 0, 0, 0, 1.2, 10.5,
22.6, 13.2, 8.4, 7.6, 0, 0, 0, 0, 0, 0, 0.8, 7.3, 14.3, 13.8,
6.4, 6.3, 0.2, 0, 0, 0, 0, 0, 0.6, 6, 14.7, 11.9, 6, 9.9,
0.2, 0, 0, 0, 0, 0, 0.1, 8.2, 18.7, 12.9, 13.3, 8.2, 0, 0,
0, 0, 0, 0, 1.1, 4.8, 15.2, 14.9, 7.8, 4.6, 0, 0, 0, 0, 0,
0, 0.9, 4.1, 8.6, 10.4, 8.8, 4.3, 0, 0, 0, 0, 0, 0, 0.4,
4.2, 9.2, 14.8, 10.1, 7.1, 0.1, 0, 0, 0, 0, 0, 0.5, 7.2,
16.5, 22.6, 16.9, 8.2, 0.6, 0, 0, 0, 0, 0, 1.6, 8, 21.4,
6.1, 4.6, 3.8, 0, 0, 0, 0, 0, 0, 0.2, 3.4, 4.2, 13.6, 7.8,
6.8, 0.1, 0, 0, 0, 0, 0, 0.3, 6.5, 11.5, 8.1, 4.8, 2.7, 0,
0, 0, 0, 0, 0, 0.2, 4.4, 9), precip_mm = c(231.8, 144.1,
138.3, 90.7, 53, 53, 29.9, 35.4, 45.7, 147.1, 238.3, 248.5,
233.3, 176.8, 155.9, 105.2, 72.8, 63.2, 42.3, 43.1, 54, 172.9,
262.6, 263.3, 217.5, 149.5, 140, 92.1, 68.4, 62.9, 39.4,
44.6, 55.2, 162.2, 231.9, 225.7, 198.9, 148.6, 127.9, 87.2,
58.2, 51.3, 35.1, 39, 52.9, 155.6, 235.7, 232.8, 229.1, 141.4,
137.1, 93.8, 70.2, 61.1, 39.5, 45.6, 58.7, 169.2, 246.9,
235.5, 171.9, 114.3, 105.7, 64.6, 45.6, 42.8, 26.7, 29.2,
41.8, 122.8, 200.5, 187.9, 269.9, 187.4, 159.8, 98, 56.6,
45.2, 26, 37.6, 53.6, 190.8, 290, 272, 196.9, 151.9, 143.9,
87.2, 64.6, 54.7, 36.4, 39, 48.9, 153.8, 232.6, 224.5, 211,
139.9, 120.6, 79.4, 51.3, 45.3, 26, 34.6, 46.3, 147.2, 218.1,
189.8, 234.1, 160.4, 148, 101.2, 72.1, 62.8, 41.9, 49.5,
59.3, 181.3, 257.1, 250.7, 339.5, 239.6, 223.8, 144.2, 87.8,
62.2, 31, 46.4, 61.4, 219.8, 353.2, 344.6, 138.1, 93.1, 96.1,
70.8, 70.9, 57.4, 36.5, 42.3, 51.4, 117.7, 158.3, 138.7,
158.9, 109.4, 110.7, 83.3, 76.6, 67.6, 37.5, 45.3, 54.7,
125.8, 178, 157.8, 193.3, 130.3, 130.6, 99.6, 92.4, 73.7,
46, 50.7, 64.6, 152.3, 216.9, 187.5), date = structure(c(14610,
14641, 14669, 14700, 14730, 14761, 14791, 14822, 14853, 14883,
14914, 14944, 14610, 14641, 14669, 14700, 14730, 14761, 14791,
14822, 14853, 14883, 14914, 14944, 14610, 14641, 14669, 14700,
14730, 14761, 14791, 14822, 14853, 14883, 14914, 14944, 14610,
14641, 14669, 14700, 14730, 14761, 14791, 14822, 14853, 14883,
14914, 14944, 14610, 14641, 14669, 14700, 14730, 14761, 14791,
14822, 14853, 14883, 14914, 14944, 14610, 14641, 14669, 14700,
14730, 14761, 14791, 14822, 14853, 14883, 14914, 14944, 14610,
14641, 14669, 14700, 14730, 14761, 14791, 14822, 14853, 14883,
14914, 14944, 14610, 14641, 14669, 14700, 14730, 14761, 14791,
14822, 14853, 14883, 14914, 14944, 14610, 14641, 14669, 14700,
14730, 14761, 14791, 14822, 14853, 14883, 14914, 14944, 14610,
14641, 14669, 14700, 14730, 14761, 14791, 14822, 14853, 14883,
14914, 14944, 14610, 14641, 14669, 14700, 14730, 14761, 14791,
14822, 14853, 14883, 14914, 14944, 14610, 14641, 14669, 14700,
14730, 14761, 14791, 14822, 14853, 14883, 14914, 14944, 14610,
14641, 14669, 14700, 14730, 14761, 14791, 14822, 14853, 14883,
14914, 14944, 14610, 14641, 14669, 14700, 14730, 14761, 14791,
14822, 14853, 14883, 14914, 14944), class = "Date")), .Names = c("Months",
"station", "temp_davg_c", "temp_dmax_c", "temp_dmin_c", "rain_mm",
"snow_cm", "precip_mm", "date"), row.names = c(NA, -168L), class = "data.frame")
You can separate the dplyr part of your code from the gg-plotting part:
correct_order <- c("Jan","Feb","Mar","Apr","May","Jun", "Jul","Aug","Sep","Oct","Nov","Dec")
weather_data <-
dtt %>%
group_by(Months) %>%
summarise(Rain = mean(rain_mm,na.rm = TRUE),Snow = mean(snow_cm,na.rm = TRUE)) %>%
gather(Legend, Precipitation, -Months)
ggplot(weather_data, aes(x = Months, y = Precipitation, fill = Legend)) +
scale_x_discrete(limits=correct_order) +
geom_col()
As it has been mentioned before, fill= instead of color= is probably what you are looking for:

Superimposing two plots in R with same axis and limits

I have two plots from two different data frames
The DPUT from data frame 1 is as follows
ppv_npv2 <- structure(list(pred.prob = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
44, 45, 46, 47, 48, 49, 50, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
44, 45, 46, 47, 48, 49, 50, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
44, 45, 46, 47, 48, 49, 50), variable = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L), .Label = c("ppv_2.5", "ppv_50", "ppv_97.5"), class = "factor"),
value = c(4.8, 9.3, 13.4, 17.2, 20.8, 24.2, 27.3, 30.3, 33.1,
35.7, 38.2, 40.5, 42.8, 44.9, 46.9, 48.8, 50.6, 52.3, 54,
55.6, 57.1, 58.5, 59.9, 61.2, 62.5, 63.7, 64.9, 66, 67.1,
68.2, 69.2, 70.2, 71.1, 72, 72.9, 73.8, 74.6, 75.4, 76.2,
76.9, 77.7, 78.4, 79, 79.7, 80.4, 81, 81.6, 82.2, 82.8, 83.3,
7.2, 13.6, 19.3, 24.4, 28.9, 33, 36.8, 40.2, 43.3, 46.2,
48.9, 51.3, 53.6, 55.7, 57.7, 59.6, 61.3, 62.9, 64.5, 65.9,
67.3, 68.6, 69.8, 70.9, 72, 73.1, 74.1, 75, 75.9, 76.8, 77.6,
78.4, 79.2, 79.9, 80.6, 81.3, 82, 82.6, 83.2, 83.8, 84.3,
84.8, 85.4, 85.9, 86.3, 86.8, 87.3, 87.7, 88.1, 88.5, 11.7,
21.1, 28.8, 35.3, 40.8, 45.5, 49.7, 53.3, 56.4, 59.3, 61.8,
64.1, 66.2, 68.1, 69.8, 71.4, 72.9, 74.2, 75.5, 76.6, 77.7,
78.7, 79.7, 80.5, 81.4, 82.2, 82.9, 83.6, 84.3, 84.9, 85.5,
86, 86.6, 87.1, 87.6, 88.1, 88.5, 88.9, 89.3, 89.7, 90.1,
90.5, 90.8, 91.1, 91.5, 91.8, 92.1, 92.4, 92.6, 92.9)),
.Names =c("pred.prob","variable", "value"), row.names = c(NA, -150L),
class = "data.frame")
The plot that i have created is from the following code
p1 <- ggplot(ppv_npv2,aes(x=pred.prob,y=value))+
geom_line(data=ppv_npv2[ppv_npv2$variable=="ppv_50",],
colour="red",linetype=2)+
geom_line(data=ppv_npv2[ ppv_npv2$variable=="ppv_2.5", ],
colour="blue",linetype=4)+
geom_line(data=ppv_npv2[ ppv_npv2$variable=="ppv_97.5", ],
colour="blue",linetype=4)+
theme_classic()+
ylab("Predicted positive predictive value (%) \n")+
xlab("\n Prevalence (%)")+
scale_x_continuous(limits=c(0,50),breaks=seq(0,50,2))+
scale_y_continuous(limits=c(0,100),breaks=seq(0,100,10), expand=c(0,0))+
theme(axis.text.x = element_text(size=12,hjust=.5,vjust=.8,face="plain"),
axis.text.y = element_text(size=12,hjust=.5,vjust=.8,face="plain"))+
theme(axis.title.x = element_text(size=14,face="bold"),
axis.title.y = element_text(size=14,face="bold"))
p1
The dput for the second data frame is
dat <- structure(list(PPV = c(57, 89, 19, 52, 52, 62, 63, 46, 31, 52,
54, 13, 17, 47, 48, 52, 96, 88, 64, 33, 62, 77, 75, 72), Prevalence = c(19,
35, 12, 16, 24, 6, 28, 13, 8, 19, 30, 6, 8, 20, 11, 25, 29, 55,
46, 13, 16, 22, 23, 20), total = c(939L, 323L, 306L, 703L, 137L,
833L, 360L, 317L, 440L, 2072L, 209L, 386L, 142L, 358L, 167L,
503L, 180L, 233L, 342L, 478L, 4870L, 1104L, 1813L, 1567L),
Author = structure(c(1L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 9L, 10L, 11L, 12L,
15L,18L, 19L, 8L, 14L, 16L, 17L, 21L, 20L, 20L, 13L, 10L),
.Label = c("Aldous",
"Bahrmann", "Body", "Christ ", "Collinson", "Eggers", "Freund",
"Giannitis", "Hammerer-Lercher", "Hoeller", "Inoue", "Invernizi",
"Keller", "Khan", "Lotze", "Melki ", "Normann", "Santalol", "Sebbane",
"Shah", "Thelin "), class = "factor"), Study.assay = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L), .Label = c("TnI", "TnT"), class = "factor")),
.Names = c("PPV", "Prevalence", "total", "Author", "Study.assay"),
class ="data.frame", row.names = c(NA, -24L))
And the plot from dataframe 2 is as follows
p2 <- ggplot(dat, aes(x=dat$Prevalence, y=dat$PPV, size=dat$total,
label=dat$Author),guide=F)+
geom_point(colour="white", fill="red", shape=21)+
scale_size_area(max_size = 10)+
scale_x_continuous(name="\n Prevalence", limits=c(0,100))+
scale_y_continuous(name="Predicted positive predictive value (%) \n",
limits=c(0,100))+
geom_text(size=2.5)+
theme_classic()+
ylab("Predicted positive predictive value (%) \n")+
xlab("\n Prevalence (%)")+
scale_x_continuous(limits=c(0,50),breaks=seq(0,50,2))+
scale_y_continuous(limits=c(0,100),breaks=seq(0,100,10), expand=c(0,0))+
theme(axis.text.x = element_text(size=12,hjust=.5,vjust=.8,face="plain"),
axis.text.y = element_text(size=12,hjust=.5,vjust=.8,face="plain"))+
theme(axis.title.x = element_text(size=14,face="bold"),
axis.title.y = element_text(size=14,face="bold"))+
theme(legend.position='none')
p2
As you can see both plots have the same axis and limits. I have two questions:
a) Can i overlay plot 2 onto plot 1?
b) Can i make the bubbles on plot 2 more transparent and choose colours by the factor dat$Study.assay (green and purple)?
Many thanks in advance - have spent a day researching this but no solution yet.
Here's a start using your data,
(plot2 <- ggplot() +
geom_line(data = ppv_npv2,aes(pred.prob, value,
group= variable, colour = variable)) +
geom_point(data = dat, aes(Prevalence, PPV, label=Author, size = total,
colour = Study.assay), alpha = I(0.4)) +
geom_text(data = dat, aes(Prevalence, PPV, label=Author,
size = total), size=3, hjust=-1, vjust=0)
)
It's not the orthodox ggplot2 way, but it's a start.

Resources