Custom legend with collapsed factor values in ggplot2 - r
This might have been asked before but I cannot find it after searching for a while.
I have the following data.frame.
structure(list(genotype = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4), treatment = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2), group_val = c(1.57837321136062, 1.76334487045417,
1.73586017158848, 2.04109599956349, 1.80010448171344, 2.07090618591467,
1.07574792716769, 1.18397923178828, 1.21889101529495, 1.20248500773822,
1.3808338457315, 1.42210495550068, 1.64573799027085, 1.55264650622629,
1.70883543195709, 1.50659245289343, 0.90200663935181, 0.881584819347461,
0.954018876774318, 0.930280832877143, 1.85156683945601, 1.84753564786241,
1.96298425756247, 1.97329138022375, 1.89502726316024, 1.88250460242058,
1.12763625255165, 0.849376374224505, 1.04073813233643, 1.00903241221572,
1.58053330474755, 1.60670456352336, 2.02389070564365, 1.88873097588837,
2.05477131909231, 1.9945072156688, 1.25082256791521, 1.19811638234775,
1.06975634816231, 1.20976663827858, 2.10380372095596, 2.14921911265538,
2.18892848376085, 2.15381486434453, 1.82607480270083, 1.98677173426624,
0.954242509439325, 1.26717172840301, 1.02118929906994, 0.8750612633917,
0.602059991327962, 0.751757501701102, 1.62038696281561, 1.20836885846782,
1.32651612490137, 1.13698195289592, 1.6421025338509, 1.41206291695827,
1.6101194399672, 1.6712113404111, 2.11429641123473, 1.84505371972817,
2.27595666174897, 2.2231986751043, 2.24564757180665, 2.24707729700922,
1.47310327692139, 1.1447387331723, 1.24550565752405, 1.07766801873253,
1.85452622982568, 1.87613186339641, 2.09397999968991, 1.96262712830201,
2.2095435542086, 2.10814923581137, 1.00067107824743, 0.983971241990881,
1.24468845794328, 1.15181012595794)), row.names = c(NA, -80L), groups = structure(list(
genotype = c(1, 1, 2, 2, 3, 3, 4, 4), treatment = c(1, 2,
1, 2, 1, 2, 1, 2), .rows = structure(list(1:10, 11:20, 21:30,
31:40, 41:50, 51:60, 61:70, 71:80), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, 8L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
My aim is to have the following plot
But with the levels on the interaction(genotype, treatment) collapsed. The only relevant information here is that "light" colors equals treatment level 1 and "dark" colors equals treatment level "2".
I would like a legend to reflect that, meaning only two points, could be "#CFCFCF" and "gray50" to reflect that the values of treatment are 1 and 2 respectively.
Here's the code to make the plot as shown in the image
library(tidyverse)
target_colors <- c("#FF9BB4", "#FA234C", "#A2D3FF", "#2987FA", "#47C947", "darkgreen",
"#CFCFCF", "gray50")
color_order <- interaction(df$genotype, df$treatment) %>% levels() %>% sort
df %>%
ggplot(aes(genotype, group_val,
color=interaction(genotype, treatment)))+
ggbeeswarm::geom_quasirandom(dodge.width = 1,
show.legend = T) +
# if flipping, the levels of the factor must be modified
#coord_flip()+
geom_boxplot(
position=position_dodge(1),
width=0.1, fill='black', show.legend = F)+
scale_color_manual(values = setNames(target_colors,
color_order))
One option would be to use only four colors, map genotype on color and treatment on alpha:
library(tidyverse)
target_colors <- c("#FA234C", "#2987FA", "darkgreen", "gray50")
df %>%
ggplot(aes(genotype, group_val,
color = factor(genotype),
alpha = factor(treatment),
group = interaction(genotype, treatment)))+
ggbeeswarm::geom_quasirandom(dodge.width = 1,
show.legend = T) +
geom_boxplot(
position=position_dodge(1),
width=0.1, fill='black', show.legend = F)+
scale_color_manual(values = target_colors) +
scale_alpha_manual(values = c(.6, 1))
Related
Error using aggregate to find length with missing values
I am trying to use the aggregate function in R to summarise a data using the length function. My data has some NA's and I have tried using 'na.rm = T' or 'na.omit' however none sees to work. I keep getting this error 'Error in FUN(X[[i]], ...) : 2 arguments passed to 'length' which requires 1' data10 <- structure(list(Group = c(1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 2, 2, 1, 2, 2, 1, 2, 2, 2, 1, 2, 2, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1), SUBJECT = c(1, 1, 2, 3, 3, 4, 5, 5, 6, 7, 8, 8, 9, 10, 10, 11, 12, 14, 14, 15, 16, 16, 17, 18, 19, 19, 20, 21, 21, 22, 23, 23, 24, 25), test = c(1, 2, 1, 1, 2, 2, 1, 2, 2, 1, 1, 2, 2, 1, 2, 2, 1, 1, 2, 1, 1, 2, 2, 1, 1, 2, 1, 1, 2, 2, 1, 2, 2, 1 ), trial = c(1, 3, 5, 7, 1, 3, 5, 7, 1, 3, 5, 7, 1, 3, 5, 7, 1, 3, 5, 7, 1, 3, 5, 7, 1, 3, 5, 7, 1, 3, 5, 7, 1, 3), Condition = c(1, 2, 3, 1, 3, 1, 2, 3, 2, 3, 1, 2, 1, 2, 3, 1, 3, 1, 2, 3, 2, 3, 1, 2, 1, 2, 3, 1, 3, 1, 2, 3, 2, 3), Sac2 = c(1, 1, 1, NA, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1), Sac = c(1, 1, 1, NA, 3, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, 7, 1, 1, 1, 1, 1, 1, 3, 3, 1, 1), Saccade...8 = c(1, 1, 1, NA, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1), T_APPEAR = c(9.236, 17.85, 28.942, 63.724, 9.463, 22.963, 52.068, 57.021, 15.344, 19.783, 37.825, 46.17, 4.339, 21.241, 29.179, 31.823, 12.164, 22.84, 23.954, 73.663, 27.269, 22.131, 30.361, 62.674, 6.928, 16.413, 47.555, 48.893, 7.291, 15.796, 31.788, 54.946, 10.117, 28.83)), row.names = c(NA, -34L), class = c("tbl_df", "tbl", "data.frame")) data14 = aggregate(data10, by = list(data10$SUBJECT,data10$Condition, data10$Group, data10$test), FUN = length(), na.rm=TRUE)
converting NULL to numeric and taking the sum of lists
I have a BTO dataset, which I converted from long to wide format to prepare it for diversity measurements using the diversity function from the vegan package. To achieve this I used this code: diversity <- pivot_wider(bird_case, names_from = ENGLISH_NAME, values_from = HOW_MANY) The results comes up with list elements as I converted the months into seasons with a previous code. I wish to take the sums of all the lists, so only a single sum remains in each cell. As for the NULL values I want these converted to 0. I have tried to replace the NULL values to zero using this diversity[diversity == "NULL"] <- 0 it won't work. As for converting the list elements and taking the sum, I have tried aggregate to no avail. Heres a reproducible code: structure(list(year = c(2018, 2019, 2017, 2015, 2014, 2015, 2017, 2017, 2016, 2019, 2018, 2016, 2016, 2016, 2019, 2019, 2018, 2017, 2015, 2018, 2015, 2017, 2015, 2016, 2016, 2016, 2018, 2018, 2017, 2014, 2015, 2017, 2014, 2014, 2017, 2019, 2010, 2011, 2011, 2012, 2019, 2012, 2013, 2019, 2017, 2011, 2017, 2016, 2016, 2010), Season = c("Winter", "Winter", "Summer", "Winter", "Winter", "Autumn", "Autumn", "Winter", "Spring", "Autumn", "Spring", "Winter", "Summer", "Autumn", "Summer", "Spring", "Summer", "Spring", "Spring", "Autumn", "Summer", "Summer", "Autumn", "Summer", "Autumn", "Winter", "Spring", "Winter", "Winter", "Summer", "Winter", "Autumn", "Autumn", "Winter", "Spring", "Winter", "Summer", "Spring", "Summer", "Autumn", "Winter", "Winter", "Winter", "Spring", "Summer", "Winter", "Autumn", "Winter", "Spring", "Winter"), POSTCODE = c("NR29 5QA", "NR29 5QA", "NR29 5QA", "NR29 5QA", "NR29 5QA", "NR29 5QA", "NR29 5QA", "NR29 5QA", "NR29 5QA", "NR29 5QA", "NR29 5QA", "NR29 5QA", "NR29 5QA", "NR29 5QA", "NR29 5QA", "NR29 5QA", "NR29 5QA", "NR29 5QA", "NR29 5QA", "NR29 5QA", "NR29 5QA", "NR15 1TS", "NR15 1TS", "NR15 1TS", "NR15 1TS", "NR15 1TS", "NR15 1TS", "NR15 1TS", "NR15 1TS", "NR15 1TS", "NR15 1TS", "NR15 1TS", "NR15 1TS", "NR15 1TS", "NR15 1TS", "NR15 1TS", "PE32 1TL", "PE32 1TL", "PE32 1TL", "PE32 1TL", "PE32 1TL", "PE32 1TL", "PE32 1TL", "PE32 1TL", "PE32 1TL", "PE32 1TL", "PE32 1TL", "PE32 1TL", "PE32 1TL", "PE32 1TL"), LOC_ID = c("LOC568364", "LOC568364", "LOC568364", "LOC568364", "LOC568364", "LOC568364", "LOC568364", "LOC568364", "LOC568364", "LOC568364", "LOC568364", "LOC568364", "LOC568364", "LOC568364", "LOC568364", "LOC568364", "LOC568364", "LOC568364", "LOC568364", "LOC568364", "LOC568364", "LOC1163128", "LOC1163128", "LOC1163128", "LOC1163128", "LOC1163128", "LOC1163128", "LOC1163128", "LOC1163128", "LOC1163128", "LOC1163128", "LOC1163128", "LOC1163128", "LOC1163128", "LOC1163128", "LOC1163128", "LOC569508", "LOC569508", "LOC569508", "LOC569508", "LOC569508", "LOC569508", "LOC569508", "LOC569508", "LOC569508", "LOC569508", "LOC569508", "LOC569508", "LOC569508", "LOC569508"), Wren = list( c(1, 1, 1, 1, 1, 1, 1, 1), c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1), c(1, 1, 1, 1), c(1, 1, 1, 1, 1, 1, 1), 1, c(1, 1, 1, 1, 1), c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), c(1, 1, 1, 1, 1, 1), c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1), c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1), c(1, 1, 1, 1, 1, 1), c(1, 1, 1, 1, 3, 1, 1, 1), c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1), c(1, 1, 1, 1, 1, 1, 1, 1), c(1, 1, 1, 1, 1, 1, 2, 1, 1), c(1, 1, 1, 1, 1, 1, 1), c(1, 1, 1, 1, 2, 1), c(2, 1, 2, 3, 1, 1, 1), c(1, 1), c(1, 1, 1), c(1, 1, 1, 1, 1), c(1, 1), c(1, 1), c(1, 1, 1), c(1, 1, 1, 1, 1, 1, 1, 1, 1), NULL, 1, c(2, 1, 1, 2, 1), NULL, NULL, c(1, 1, 1), NULL, 1, NULL, NULL, c(1, 1), c(1, 1, 1, 1, 1, 1), c(1, 1, 1, 1), c(1, 1), NULL, NULL, c(1, 1), 1, c(1, 1, 1), NULL, c(1, 1, 1, 1)), Dunnock = list(c(2, 2, 1, 2, 1, 1, 1, 2, 2, 2), c(2, 1, 2, 2, 2, 2, 1), c(1, 1, 2, 1, 3, 1, 2), c(1, 2, 2, 2, 2, 2, 2, 1, 1), 2, c(1, 1, 1, 2, 1, 1, 2), c(1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1), c(2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2), c(2, 5, 2, 1, 3, 2, 2, 3, 2, 3, 1), c(2, 1, 1, 2, 2), c(3, 2, 3, 2, 2, 3, 3, 2, 2), c(1, 1, 1, 1, 1, 1), c(1, 2, 1, 2, 2, 2, 2), c(1, 1, 2, 1, 2, 1, 2, 2, 1, 2), c(3, 4, 2, 5, 3, 5, 4, 2), c(2, 2, 2, 2, 1, 2, 3, 2, 2), c(3, 3, 3, 3, 3, 2, 1, 2, 3, 1), c(2, 3, 2, 2, 2, 2, 2, 2, 2, 5, 4, 2), c(2, 2, 2, 1, 2, 1, 2, 2, 2, 2), c(1, 2, 1, 1, 2, 2, 1), c(1, 1, 1, 2, 1, 1), c(3, 4, 6, 3, 3, 3), c(1, 1, 2, 1), c(2, 1), c(2, 2, 1, 2, 1), c(2, 1, 2, 1, 2, 2), c(2, 2), c(1, 1, 1, 2), c(2, 3, 2, 2, 2, 3, 3, 2, 2, 2), 2, c(2, 2, 3, 2, 2), c(2, 1, 2, 2, 2, 2), 1, NULL, c(3, 2), c(1, 1), c(1, 2, 1, 1, 1, 1, 1, 2), c(2, 2, 1, 1, 1, 1), c(3, 3, 2, 1, 2, 2, 2, 1, 1, 1), c(2, 1, 1, 1, 1), c(1, 1, 1, 1, 1, 2, 2, 2), c(1, 1, 2, 1, 1, 2, 1, 1), c(1, 1, 2, 1, 1), c(3, 2, 1, 5, 2, 1, 2, 2, 2), c(3, 3, 1, 1, 1, 3, 2, 1), c(1, 1, 2, 1), c(1, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 1), c(1, 1, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 1), c(1, 1, 2, 1, 4, 4, 1, 2, 2, 2), c(2, 1, 1, 4, 1, 1, 1, 2, 1, 1)), `Blue Tit` = list(c(1, 1, 2, 3), c(2, 2, 3, 2, 2), 4, c(4, 2, 3, 4), 2, c(2, 2), c(1, 2, 2), c(2, 2), c(2, 2, 1, 2, 2), NULL, c(2, 2, 2, 5, 2, 2, 2, 2, 2), c(2, 1, 2, 2, 3, 2), 2, NULL, 7, c(2, 2, 2, 2, 2, 2, 2, 2, 2), NULL, c(1, 1, 2, 2, 2, 2, 2), c(4, 2, 4, 3, 7, 3, 2), 1, c(2, 2, 3), c(8, 10, 10, 12, 10, 8, 5, 12 ), c(6, 4, 4, 6, 4), c(12, 6, 6, 6, 6), c(4, 4, 5, 5, 8), c(10, 6, 6, 4, 6, 6, 4), 4, c(10, 4, 4, 8, 6), c(4, 6, 4, 10, 6, 6, 8, 7, 6), c(12, 12, 6), c(12, 8, 12, 12, 12, 10, 10), c(10, 5, 10, 5, 10), c(12, 12, 6), c(6, 6), c(4, 2, 2, 2), c(2, 6), c(3, 2, 2, 1, 2, 1, 2), c(2, 2, 2, 1, 2, 1), c(2, 4, 1, 2, 1, 2, 2, 1, 2), c(4, 3, 1, 2, 2, 2, 2, 3, 5, 4), c(2, 4, 3, 3, 1, 2, 2), c(2, 4, 2, 2, 1, 2, 1, 1, 3), c(3, 3, 2, 2, 3, 2, 3, 2), c(1, 2, 1, 2, 2, 2, 2, 1, 2), c(5, 3, 9, 4, 4, 3, 9, 5), c(1, 2, 1, 2, 3, 2, 1, 2, 3, 3, 2), c(4, 3, 5, 2, 3, 4, 3, 3, 4, 5, 2), c(3, 3, 3, 3, 4, 2, 3, 4, 3, 5, 3), c(2, 2, 2, 1, 1, 2, 1, 2, 1, 2, 4), c(2, 2, 2, 3, 2, 2, 2, 1)), `Pied/White Wagtail` = list(c(1, 2, 2, 2, 1, 1, 1, 2, 2, 2), c(2, 1, 1, 1, 2, 1, 1, 2, 2), c(1, 1, 1, 1), NULL, NULL, NULL, 2, c(2, 2, 2, 2), c(1, 1, 1, 1, 1), c(2, 2, 2, 1), c(2, 2, 2, 2, 2, 2, 2, 2, 2), NULL, c(2, 2), NULL, c(2, 2, 1, 2, 2, 2), c(2, 2, 2, 2, 2, 2, 1, 2, 2, 2), c(2, 2, 2, 2, 1, 2, 3), c(1, 2, 2, 2, 2, 2), NULL, c(1, 1), 1, 1, NULL, NULL, 1, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1, NULL, 1, c(1, 1, 1, 1, 1), c(2, 1, 2, 1), c(1, 1), c(1, 1), 1, 1, 1, 1, c(1, 1), c(1, 1, 1, 1)), `Collared Dove` = list( c(2, 2, 2, 2, 2, 2, 2, 2, 2), c(2, 2, 3, 2, 2, 2, 2, 3, 2, 2), c(2, 3, 2, 2, 2, 2, 2, 3, 3), c(1, 1, 2, 2), NULL, c(2, 2, 2, 2, 2), c(2, 2, 2, 2, 2, 2, 2, 2, 2, 3), c(2, 2, 2, 1, 2, 2, 2, 1, 1), c(2, 2, 2, 2, 2, 2, 2), c(2, 2, 4, 4, 2, 2, 22, 2), c(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2), c(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2), c(2, 2, 2, 2, 2, 2, 2, 2, 2), c(2, 2, 2, 2, 2, 2, 2, 2), c(2, 3, 3, 4, 2, 2, 2, 2, 2), c(2, 3, 3, 3, 2, 3, 3, 3, 3, 3, 2), c(2, 1, 2, 2, 2, 3, 2, 2, 2, 2, 2, 3), c(2, 2, 2, 2, 2, 2), c(1, 2, 2, 1, 2), c(2, 2, 2, 2, 2, 2, 2, 2, 2), c(1, 2, 2, 1, 2), c(2, 2, 2, 2, 2, 2, 1), c(1, 1), c(1, 1), c(1, 2, 2, 2, 1), c(2, 2, 1, 2), 2, c(2, 1), c(3, 1, 1, 1, 1, 2, 2), NULL, c(2, 1, 1), c(2, 2), 1, 1, c(2, 2), NULL, c(9, 9, 17, 8, 19), c(6, 3, 2, 3, 3, 5, 3), c(16, 9, 12, 3, 7, 5), c(4, 4, 3, 3, 5, 3, 2), c(2, 2, 3, 3, 2, 3, 4, 2), c(2, 2, 2, 3, 4, 4, 2, 12, 3, 5, 4), c(2, 2, 3, 3, 2, 3, 2, 3, 3), c(3, 3, 3, 3, 2, 5, 3, 1, 3), c(4, 2, 3, 2, 7, 2, 3), c(3, 1, 12, 3, 4, 4, 2, 5, 5, 12), c(3, 2, 1, 5, 3, 2, 2, 1, 2, 3, 2), c(3, 2, 2, 5, 3, 3, 2, 2, 10), c(2, 2, 1, 1, 3, 2, 1, 1, 2), c(6, 2, 6, 2, 5, 3, 2, 2, 4, 11, 3, 2)), `Great Tit` = list(c(1, 2, 1, 1, 1, 1, 1, 1), c(1, 2, 1, 2, 1, 2, 1, 1), NULL, c(1, 3, 2, 5, 3, 3, 4, 1), NULL, c(1, 2, 1, 1), c(1, 1), NULL, c(1, 1, 1, 2, 1, 1), 1, c(1, 1), c(1, 1, 1, 1), 1, NULL, c(2, 2, 1, 1), c(2, 2, 2, 2, 2, 2, 2, 2, 2, 2), 1, 3, c(2, 2, 2, 1), c(4, 2, 1, 2), c(2, 1), c(8, 8, 12, 6, 8), c(2, 2, 3, 2), c(8, 3, 6, 4, 6), c(2, 2, 4, 2), c(1, 1, 2), c(2, 2, 2), c(1, 2, 1, 2, 1, 2), c(2, 2, 2, 2, 2, 2, 2), c(4, 4, 6), c(2, 4, 2, 2, 4, 2, 2), c(3, 4, 2, 2, 3), 6, c(2, 2), c(1, 2), 2, c(2, 1, 1, 1, 1, 2), c(1, 1, 2, 2, 1, 2, 1), c(1, 1, 1, 2, 1, 2, 1, 2), c(2, 2, 3, 1, 2, 4, 1, 3), c(3, 1, 1, 2), c(1, 2, 2, 1, 1, 2, 2, 2, 1, 2), c(2, 1, 2, 1, 1, 1), c(2, 1, 2, 1), c(2, 3, 2, 3, 2, 1), c(1, 1, 1, 2, 2, 2, 1), c(1, 2, 2, 1, 1, 2, 3, 1, 2, 1, 3, 3), c(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2), c(2, 2, 2, 8, 1, 2, 2, 2, 1, 2, 2, 2), c(2, 1, 1, 1, 1, 1, 1, 2)), Robin = list(c(1, 1, 3, 1, 3, 1, 1, 3, 3, 2), c(2, 2, 1, 2, 2), c(1, 2, 1, 1, 1, 1, 1), c(1, 1, 2, 1, 1, 1, 1, 1, 1, 2), 1, c(1, 1, 1, 1, 2, 1, 1), c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1), c(1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1), c(2, 2, 1, 1, 3, 1, 1, 2, 2, 2), c(2, 2, 1, 2, 2, 2, 2), c(1, 2, 2, 3, 1, 2, 2, 3), c(1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1), c(2, 1, 1, 1, 2, 1), c(1, 1, 1, 1, 1, 1, 1, 2, 1, 1), c(1, 5, 1, 1, 2, 2, 2, 1), c(2, 2, 2, 1, 2, 2, 2, 2, 2), c(2, 2, 2, 1, 3, 3, 2), c(2, 2, 6, 1, 1, 2), c(1, 2, 1, 1, 2, 2, 2, 2, 2), c(1, 1, 1, 1, 2, 2, 2, 2, 1), c(1, 1, 1, 1, 2, 1, 1), c(1, 3, 3, 3), c(3, 1, 1, 2), c(1, 1, 1, 1, 1), c(2, 2, 2, 2, 2, 3, 2), c(2, 1, 2, 1, 3), c(2, 2), c(3, 1, 3, 5, 2, 2, 2, 2, 2), c(3, 4, 4, 2, 3, 2, 2, 4, 2), 1, c(4, 2, 4, 2, 4), c(1, 1, 3), c(2, 2, 2), 2, c(3, 2, 2), c(1, 2), c(1, 1, 1, 1, 1), c(3, 2, 2, 2, 4, 2, 2, 1), c(2, 1, 2, 1, 1), c(1, 1, 1, 1), c(1, 1, 1, 1, 1, 1, 1, 1, 1), c(1, 1, 2, 1, 1), c(1, 1, 1, 1, 1), c(1, 1, 1, 1, 1, 1, 1), c(2, 4, 3, 2, 1, 6, 2, 3, 1, 2), c(1, 1, 2, 1, 1, 1, 2, 1, 2, 1), c(1, 1, 1, 1, 1, 2, 1, 1, 1), c(1, 1, 1, 2, 1, 1, 1, 1, 1), c(2, 1, 2, 1, 2, 1, 2, 1), c(1, 1, 1, 1, 2, 1, 1, 1, 2)), Greenfinch = list(2, c(2, 2, 2, 2, 2, 2, 2 ), 1, c(1, 1, 2), NULL, NULL, NULL, c(2, 2, 2), c(3, 1, 2, 3, 3), 2, c(2, 5, 2, 2, 2, 2, 2, 5, 2, 2), NULL, c(2, 1, 3, 2), NULL, c(1, 2, 1, 2, 1, 1), c(2, 2), 2, 1, c(2, 2, 2, 1, 2, 2, 1, 2, 2, 2), NULL, c(3, 1, 3), c(4, 2, 4), 1, c(2, 2, 4, 3, 2, 2), c(2, 2, 1, 2, 4, 2, 2, 2), c(2, 2, 3, 2, 3), c(3, 1), c(2, 2, 2, 2, 3), c(2, 6, 4, 2, 2, 2), 4, c(5, 5, 5, 5), c(2, 2, 1, 4, 2, 4, 4), 4, c(2, 2), c(4, 1, 4), 2, c(7, 2, 3, 2, 2, 3, 4, 4, 3), c(4, 3, 2, 1, 2, 2, 2), c(6, 1, 3, 2, 1, 2, 2), c(3, 1, 2, 3), 1, c(1, 1, 3, 3, 1, 5, 2, 1, 1, 3, 1), c(1, 2, 2, 2, 3, 1, 3), c(1, 1, 3, 1, 1, 3, 1), c(1, 4, 1, 3, 4), c(2, 2, 1, 1, 1), c(2, 2, 5, 2, 1, 2, 1, 1), c(7, 2, 6, 1, 2), c(2, 1, 2, 1, 1), c(4, 2, 1, 1, 2, 1)), `House Sparrow` = list( NULL, c(2, 2, 2, 2, 2, 2), NULL, c(2, 2, 4, 6, 3, 4, 3, 3), 3, c(3, 2, 2, 2), NULL, NULL, NULL, c(2, 2, 2), c(1, 2, 2), c(2, 2), NULL, NULL, c(3, 5), c(2, 2, 2, 2, 2, 2, 2, 2), NULL, NULL, c(3, 3, 3, 3, 2, 3, 5, 3, 3, 3), NULL, c(2, 2, 1, 1, 1, 2), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, c(20, 14, 12, 10, 8, 14, 21), c(6, 5, 9, 9, 9, 6), c(13, 12, 5, 21, 11, 12, 16, 10, 15), c(3, 2, 7, 3, 1), c(10, 11, 15, 8, 12, 15, 5, 16), c(1, 5, 5, 5, 5, 4, 5, 2, 6, 4, 4), c(2, 4, 1, 4, 3, 3, 7, 7, 3, 5, 3), c(9, 10, 10, 7, 8, 10, 10, 6, 8, 6, 12, 9), c(10, 5, 13, 14, 4, 5, 9, 9, 10, 8, 9), c(10, 9, 10, 7, 9, 10, 8, 7, 9, 14), c(3, 7, 5, 10, 2, 6, 14, 6, 3, 7, 3), c(7, 9, 11, 5, 5, 7, 7, 6, 6, 10, 5, 7, 16), c(5, 7, 5, 5, 6, 8, 7, 4, 5), c(15, 10, 12, 9, 3, 9, 10, 11 )), `Coal Tit` = list(1, c(1, 1, 1), NULL, c(2, 3, 2, 2, 4, 2, 2), NULL, 2, NULL, NULL, c(1, 1, 1), NULL, c(2, 1), 1, 1, NULL, 2, NULL, 1, NULL, 2, 1, 2, c(1, 1, 1), c(2, 2, 2, 2, 2, 1), c(1, 1, 1, 1, 1), c(1, 1, 1, 1), c(1, 1, 2), 1, c(2, 1, 1, 1, 1, 1), c(2, 2, 1, 2, 1, 2, 2), c(2, 2), c(2, 2, 2), c(2, 2, 2, 2, 2, 2, 2, 2), c(2, 2, 2), 2, c(1, 1), NULL, 1, NULL, c(1, 1, 1), c(2, 1, 1, 2, 2, 1, 1, 3, 1), c(1, 1, 1, 1), c(2, 1, 1, 1, 1, 2), c(1, 1, 1, 2, 1, 1), NULL, c(2, 1, 1, 1), c(1, 1), c(1, 2, 1, 1, 2, 1, 2, 1, 2), c(1, 2, 1, 2, 1, 1, 1), c(1, 1, 1, 1, 1, 1, 1, 1, 1), 1), Woodpigeon = list(c(2, 3, 3, 3, 3, 3, 3, 5, 3), c(3, 4, 3, 3, 3, 5, 3, 3, 2, 4, 3), c(2, 1, 3, 3, 3, 3, 1, 3), c(3, 3, 3, 4, 1, 1, 5, 5, 5), 2, c(3, 4, 1, 3, 3, 3, 1, 5, 3), c(2, 6, 5, 3, 7, 5, 2, 1, 3, 2, 2), c(3, 3, 3, 3, 2, 2, 3, 4, 3, 3, 5, 5, 1), c(5, 5, 5, 3, 5, 4, 4, 5, 7), c(5, 4, 3, 4, 5, 4), c(3, 3, 3, 3, 3, 3, 5), c(3, 2, 3, 3, 3, 5, 6, 3, 3, 5, 5), c(5, 3, 2, 5, 3, 5, 3, 3, 3), c(2, 3, 3, 3, 4, 5, 3, 5, 5), c(3, 3, 3, 3, 5, 3, 3, 2, 3), c(3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 2), c(3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3), c(3, 4, 3, 3, 5, 3, 3, 3), c(3, 3, 4, 5, 5, 3, 3, 3), c(3, 3, 3, 3, 2, 3, 3, 3), c(2, 2, 3, 3, 3, 2, 3, 3), c(4, 4, 4, 4), c(10, 8, 10 ), c(5, 5, 5), c(6, 4, 6), c(6, 6, 6, 10, 6), c(6, 10 ), c(20, 10, 10, 10, 4, 10, 8), c(6, 6, 4, 4, 6, 4, 4, 6, 6), NULL, c(8, 8, 8), c(3, 4, 4, 6, 3, 6, 3), NULL, NULL, c(6, 4, 6, 4), 1, c(3, 3, 2, 3, 1, 3, 2, 1), c(5, 3, 4, 4, 2, 3, 3), c(1, 2, 5, 1, 4, 4, 3, 4, 2, 5, 2), c(3, 2, 2, 1, 3, 2, 2, 1), c(5, 6, 2, 6, 2), c(1, 6, 2, 6, 2, 3, 3, 3), c(5, 3, 5, 4, 4, 2, 2, 5), c(5, 5, 3, 4, 2, 3, 5, 4), c(3, 2, 2, 2, 3, 2, 5), c(2, 2, 5, 3, 3, 5, 3, 4), c(2, 2, 1, 1, 5, 6, 2, 7, 5, 2, 3), c(5, 2, 3, 5, 2, 1, 5, 6, 4, 2), c(2, 3, 4, 3, 3, 4, 3, 3, 3, 3), c(7, 5, 3, 2, 5, 9, 2, 3, 3, 4, 3)), Blackbird = list( c(3, 3, 1, 3, 3, 3, 3, 5, 5), c(3, 3, 3, 3, 3, 3, 5, 3, 3, 3), c(2, 1, 3, 3, 3, 3, 3), c(5, 5, 11, 7, 3, 11, 15, 10, 5, 3), NULL, c(7, 2, 9, 3, 6, 3, 2, 3, 5), c(5, 2, 3, 1, 3, 5, 2, 1), c(3, 3, 4, 1, 2, 3, 3, 2, 3, 4, 2), c(4, 3, 3, 5, 4, 5, 5, 4, 3, 3, 5, 3), c(11, 7, 5, 4, 11, 11, 5), c(2, 4, 2, 3, 5, 6, 3, 3), c(3, 3, 3, 3, 4, 4, 3, 3), c(3, 3, 2, 2, 2, 3, 4, 3, 2), c(5, 13, 3, 5, 7, 4, 3, 7), c(5, 8, 6, 5, 5, 6, 3, 5, 10), c(4, 3, 8, 4, 3, 6, 3), c(5, 5, 5, 2, 5, 3, 3, 3), c(3, 3, 3, 3, 5, 5, 4, 4, 5, 4, 3), c(3, 3, 4, 5, 4, 5, 5, 2), c(5, 5, 1, 3, 3, 5, 5, 1), c(5, 1, 3, 5, 2, 3, 3), c(2, 3, 3, 3, 2, 2, 3), c(1, 2, 2), c(2, 2, 2), c(3, 6, 4, 2, 4), c(3, 3), c(2, 3), c(1, 4, 4, 2, 3, 5), c(6, 6, 6, 6, 6, 4, 6), 2, c(4, 2, 4, 4, 2, 2), c(1, 3, 3, 1, 2, 1, 3, 3, 2), 2, 3, c(4, 4, 6, 4), 2, c(2, 2, 5, 6, 4, 8), c(4, 3, 5, 5, 5), c(4, 4, 1, 4, 3, 6, 4, 5, 7), c(6, 2, 5, 3, 1, 3, 1), c(3, 4, 3, 4, 2, 5, 3, 3, 5), c(6, 7, 8, 7, 3, 8, 5, 10, 4, 5), c(6, 13, 3, 6, 8, 6, 14, 4, 5, 2, 4, 2), c(8, 8, 6, 6, 2, 2, 3, 5, 5), c(7, 4, 7, 4, 4, 6, 4, 4, 4), c(6, 7, 5, 7, 6, 8, 4, 7, 6, 11), c(2, 3, 3, 4, 2, 5, 3, 2, 3, 2), c(4, 3, 2, 3, 3, 3, 4, 3, 2, 4), c(3, 8, 7, 7, 4, 6, 4, 7, 3, 3), c(4, 9, 7, 6, 3, 2, 6, 3, 5)), `Song Thrush` = list(c(1, 1, 1, 1, 1, 1, 1, 2, 1, 1), c(1, 1, 1, 1, 1), c(1, 1, 1, 1, 1, 1, 1, 2), c(1, 1, 1, 1, 1, 1), 1, c(1, 1, 11), c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1), c(1, 1, 1, 1), 1, c(1, 1, 1, 1, 1, 1, 1), c(1, 2, 1, 1, 1, 1, 1, 1), c(1, 1, 1, 1, 2, 1, 1, 1), c(1, 1, 1, 1, 1, 1, 1, 1, 1), c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), c(1, 1, 1, 1, 1, 1), c(2, 1, 1, 1, 2, 2, 2, 1, 1, 1), c(1, 1, 1, 1, 1, 1, 1, 1), c(1, 1, 1, 1, 1, 1, 1), c(1, 1, 1, 1, 1, 1), c(2, 1, 1, 1, 1, 1), c(2, 2, 1, 1), NULL, 1, 1, c(1, 1, 1), 1, c(1, 1, 2, 1), c(1, 2, 1, 2, 3, 1), NULL, c(1, 1), NULL, NULL, NULL, c(1, 2, 2), NULL, 1, c(1, 1), NULL, NULL, NULL, NULL, NULL, NULL, 1, c(1, 1), 1, 1, c(1, 1, 2, 1, 2, 1, 1, 1, 1), NULL), Chaffinch = list(c(2, 1, 3, 3), c(2, 2, 2, 1, 2, 1), c(1, 2), c(1, 1, 3, 2, 2, 2, 2, 2, 1, 2), NULL, c(3, 3, 3, 2, 2, 2, 1), c(3, 1, 3, 2), NULL, c(5, 2, 2, 2, 4), c(2, 2), c(2, 2, 2, 2, 2, 3, 2, 2, 2, 2), c(5, 1, 3, 2, 3), c(2, 1, 1, 3), c(3, 2, 2, 2, 2, 3, 1), c(2, 2), c(2, 2, 2, 2, 2, 2, 2, 2, 2), c(2, 2, 3), c(2, 1, 2, 2, 2), c(4, 3, 2, 5, 2, 2), c(1, 3), c(3, 3, 1, 5, 1), c(2, 4, 5, 2, 2), c(2, 4, 2, 4, 1), c(6, 4, 4), c(5, 2, 4, 4, 5, 4, 4), c(6, 3, 4, 5, 4, 4, 3), c(4, 4, 4), c(4, 2, 6, 2), c(7, 6, 8, 8, 8, 4, 6, 4, 4, 4), c(10, 6), c(2, 6, 6, 4), c(4, 4, 5, 4, 4, 4, 5), c(10, 10, 10), NULL, c(2, 2, 2, 4, 4, 4), NULL, c(6, 6, 5, 7, 3, 2), c(4, 4, 2, 3, 10), c(1, 5, 3, 5, 4, 5, 3, 2, 4, 2), c(5, 4, 4, 2, 7, 6, 10, 2, 7, 2), c(2, 4, 2, 3, 4, 1, 4, 3, 1, 1), c(13, 7, 3, 6, 13, 9, 5, 7, 7, 11), c(10, 7, 9, 7, 9, 17, 11, 8, 4), c(1, 3, 3), c(1, 3, 4, 1, 1, 1, 2, 6, 4), c(5, 8, 6, 9, 9, 3, 11, 2, 5), c(2, 3, 3, 3, 2, 3), c(4, 3, 3, 5, 3, 4, 4, 4, 6, 3, 3, 3), c(3, 2, 3, 2, 3, 2, 4, 3, 2, 1, 2, 5, 3), c(12, 5, 12, 8, 18, 6, 3, 4, 9, 15, 7, 10)), Starling = list(c(1, 3), 1, 3, c(5, 5, 5, 5, 7, 7, 5), NULL, NULL, NULL, NULL, c(5, 9, 7, 5, 7, 7), NULL, c(1, 1, 2, 2, 1, 2), 3, NULL, NULL, 1, c(1, 3), c(1, 1), c(2, 2, 2, 2), c(5, 2, 1, 3, 7, 13, 1, 2, 2, 3), c(1, 2), NULL, NULL, NULL, NULL, NULL, NULL, c(1, 1), NULL, 4, NULL, NULL, NULL, NULL, NULL, c(4, 12), NULL, c(2, 28, 9, 2, 3, 9), c(3, 7, 8, 2, 3, 12, 3), c(2, 1, 6, 9, 18), c(11, 1, 5, 30, 10), c(25, 9, 8, 39, 20, 18, 30), c(15, 10, 9, 27, 14, 15, 30, 30, 19, 12), c(3, 8, 14, 2, 21, 19, 35), c(13, 8, 9, 21, 9, 28, 1, 5, 16), c(1, 2, 2, 1, 1, 8, 1), c(6, 27, 6, 25, 16, 10, 3, 40, 5, 30), c(2, 1, 3, 2, 3, 2, 1), c(6, 4, 24, 6, 8, 7, 9, 10), c(17, 3, 1, 11, 5, 5, 2, 6, 6, 5, 2, 3), c(2, 4, 1, 5, 3, 3, 14, 7, 5, 2, 6)), Goldfinch = list( c(1, 3, 5, 1, 1), NULL, 2, NULL, NULL, NULL, c(2, 2, 2), c(3, 3, 3), c(2, 1), NULL, c(2, 3, 3, 2, 2, 3, 2), NULL, NULL, NULL, NULL, NULL, NULL, NULL, c(2, 2, 2), c(5, 3), 2, c(6, 10, 6, 6, 6, 8, 4), c(2, 6, 3), c(6, 4, 2), c(10, 10, 8, 8, 10, 10), c(1, 6, 6, 6, 1, 2, 6 ), c(2, 2), c(2, 2, 4, 6), c(1, 4, 4, 4, 4, 8, 4, 6), c(10, 8, 8), c(6, 6, 6, 2, 1), c(7, 8, 5, 8, 4), 10, 4, c(4, 3, 6, 4), 3, c(3, 5, 4, 2, 2, 6, 3), c(2, 4, 7, 6, 6, 6), c(10, 4, 6, 4, 5, 5, 5, 6), c(11, 15, 12, 9, 15, 8, 25), c(2, 1, 1, 1, 1, 1, 2, 1), c(23, 24, 12, 14, 20, 17, 13, 6, 18), c(18, 13, 19, 42, 10, 12, 21, 27, 7, 7), c(2, 2, 2, 1, 4), c(1, 5, 1, 7, 3, 3), c(6, 6, 18, 8, 6, 14, 16, 3, 7, 5, 4), c(8, 3, 1, 2, 2, 1, 1, 3, 1), c(1, 1, 1, 2, 6, 2), c(1, 1, 2, 2, 2, 1, 1), c(12, 3, 6, 9, 9, 4)), Brambling = list(c(2, 2), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, c(2, 2, 2, 2), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 2, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, c(1, 2, 2, 1), NULL, NULL, 1, NULL, NULL, NULL, 1), Blackcap = list(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1, NULL, c(1, 2, 2), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1, NULL, c(1, 1, 1), NULL, NULL, 1, 1, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1, NULL, NULL, NULL, NULL, NULL, NULL, 1, NULL, NULL, NULL, NULL, NULL, 1, NULL), Jackdaw = list( 2, c(1, 2), NULL, NULL, NULL, NULL, NULL, NULL, c(1, 1, 1, 2), NULL, c(1, 4, 1, 1, 4, 1, 1), 1, NULL, NULL, c(6, 5, 5, 5, 5, 5), c(4, 2, 4, 1, 5, 1, 5, 1), c(7, 2, 5), c(1, 1, 1, 1), 1, NULL, NULL, NULL, NULL, c(4, 4), NULL, 2, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, c(2, 1), NULL, c(3, 5, 1, 2, 2), c(3, 2, 1, 1, 2, 4, 3, 3), c(3, 3, 6, 2, 9, 4, 10, 3, 2), c(6, 1, 1, 3, 4, 2, 3, 1), c(5, 3, 5, 4, 5, 4, 4), c(3, 2, 6, 5, 2, 3, 1, 3, 3, 4), c(6, 3, 2, 6, 2, 2, 3, 3, 3, 5, 5, 3), c(6, 5, 6, 5, 5, 8, 5, 4, 7, 6), c(5, 2, 3, 5, 4, 3, 3, 5, 2), c(3, 1, 2, 4, 2, 3, 1, 2), c(3, 5, 9, 4, 3, 5, 5, 5, 6, 5, 4, 5, 5), c(5, 1, 8, 6, 5, 6, 3, 3, 8, 6, 4), c(7, 6, 6, 6, 6, 5, 4, 3), c(3, 4, 2, 4, 2, 2, 2, 7, 11, 3, 6)), Siskin = list(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, c(2, 2, 2), NULL, NULL, NULL, NULL, NULL, NULL, NULL, c(1, 1), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, c(2, 3), c(4, 2, 1, 2, 1), NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1, c(2, 2, 2), 1, NULL, c(3, 3), 2, c(4, 1, 2, 3), c(1, 4, 2), c(1, 1, 1, 1, 2), c(3, 3), NULL, c(2, 2, 1), c(5, 1, 2, 2, 2, 2, 2, 2), NULL), `Spotted Flycatcher` = list( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 2, NULL, 1, NULL, NULL, NULL, NULL, NULL, c(1, 1), NULL, c(1, 1, 1), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)), row.names = c(NA, -50L ), class = c("tbl_df", "tbl", "data.frame"))
Is this what you need? library(dplyr) library(purrr) # map_dbl group_by(zz, year, Season, POSTCODE, LOC_ID) %>% summarize_all(~ map_dbl(., sum, na.rm = TRUE)) %>% ungroup() # # A tibble: 50 x 25 # year Season POSTCODE LOC_ID Wren Dunnock `Blue Tit` `Pied/White Wag~ `Collared Dove` `Great Tit` Robin Greenfinch # <dbl> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> # 1 2010 Summer PE32 1TL LOC56~ 1 10 13 0 62 8 5 30 # 2 2010 Winter PE32 1TL LOC56~ 4 15 16 4 48 10 11 11 # 3 2011 Spring PE32 1TL LOC56~ 0 8 10 1 25 10 18 16 # 4 2011 Summer PE32 1TL LOC56~ 0 18 17 0 52 11 7 17 # 5 2011 Winter PE32 1TL LOC56~ 2 5 22 1 51 10 13 7 # 6 2012 Autumn PE32 1TL LOC56~ 2 6 28 1 24 18 4 9 # 7 2012 Winter PE32 1TL LOC56~ 4 10 18 6 43 16 6 22 # 8 2013 Winter PE32 1TL LOC56~ 2 6 20 2 23 8 5 14 # 9 2014 Autumn NR15 1TS LOC11~ 0 1 30 0 1 6 6 4 # 10 2014 Summer NR15 1TS LOC11~ 0 2 30 0 0 14 1 4 # # ... with 40 more rows, and 13 more variables: `House Sparrow` <dbl>, `Coal Tit` <dbl>, Woodpigeon <dbl>, # # Blackbird <dbl>, `Song Thrush` <dbl>, Chaffinch <dbl>, Starling <dbl>, Goldfinch <dbl>, Brambling <dbl>, # # Blackcap <dbl>, Jackdaw <dbl>, Siskin <dbl>, `Spotted Flycatcher` <dbl> (You can do it without purrr::map_dbl, just use sapply in its place.)
We can use summarise with across library(dplyr) library(purrr) zz %>% group_by(year, Season, POSTCODE, LOC_ID) %>% summarise(across(everything(), ~ map_dbl(., sum, na.rm = TRUE)))
How to plot a rating scale in R
What is the best way to represent the following trait rating scale? I'd like to label the traits (8 traits) and degrees or each emotion (1 being low feelings, 5 being strong feelings), across the democratic and republican parties? Do I need to aggregate the items? I'm new to R and not sure how to tackle this. Survey question and scale: "Below is a list of feelings or moods that could be caused by an object. Please use the list below to describe how the U.S. FEDERAL parties (and its elected officials) make you feel. If the word definitely describes how a party makes you feel, then choose the number 5. If you decide that the word does not at all describe how the party makes you feel, then choose the number 1. Use the intermediate numbers between 1 and 5 to indicate responses between these two extremes." Survey sample: dput(df[Book3(1:nrow(df), 30),]) structure(list(TRAITDEM1 = c(3, 4, 3, 3, 3, 3, 3, 1, 2, 2, 2, 3, 3, 2, 2, 1, 1, 3, 1, 5, 1, 1, 3, 1, 4, 4, 3, 1, 2, 4), TRAITDEM2 = c(3, 1, 1, 2, 2, 2, 3, 5, 4, 2, 2, 2, 3, 3, 3, 4, 1, 2, 3, 1, 4, 5, 2, 3, 1, 1, 1, 4, 1, 2), TRAITDEM3 = c(3, 4, 4, 2, 3, 3, 3, 1, 1, 2, 2, 3, 3, 2, 2, 1, 1, 3, 1, 5, 1, 1, 3, 1, 4, 5, 4, 1, 3, 5), TRAITDEM4 = c(3, 2, 1, 2, 2, 2, 4, 5, 4, 5, 2, 3, 2, 3, 3, 4, 3, 4, 3, 1, 5, 4, 1, 4, 3, 4, 2, 4, 2, 1), TRAITDEM5 = c(3, 4, 3, 4, 4, 3, 2, 1, 1, 2, 2, 3, 4, 2, 2, 1, 1, 3, 1, 5, 1, 1, 2, 1, 4, 4, 4, 1, 3, 4), TRAITDEM6 = c(3, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 2, 4, 3, 1, 1, 1, 4, 5, 1, 3, 1, 1, 1, 1, 1, 1), TRAITDEM7 = c(3, 1, 3, 3, 2, 2, 1, 1, 1, 2, 3, 4, 3, 2, 2, 1, 1, 2, 2, 5, 1, 1, 1, 3, 3, 4, 2, 1, 5, 5), TRAITDEM8 = c(3, 1, 1, 1, 2, 1, 3, 5, 2, 4, 1, 1, 2, 2, 3, 1, 3, 1, 2, 1, 5, 5, 2, 2, 1, 2, 1, 2, 1, 1), TRAITREP1 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1), TRAITREP2 = c(1, 5, 5, 5, 5, 5, 5, 2, 5, 2, 5, 5, 5, 5, 4, 5, 1, 5, 5, 5, 5, 1, 5, 4, 5, 5, 5, 3, 5, 5), TRAITREP3 = c(1, 1, 1, 1, 2, 1, 1, 2, 1, 4, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 2), TRAITREP4 = c(1, 5, 5, 1, 5, 5, 5, 3, 5, 2, 5, 4, 5, 5, 5, 5, 3, 5, 5, 5, 5, 1, 5, 3, 5, 5, 5, 4, 5, 1), TRAITREP5 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, 1), TRAITREP6 = c(1, 5, 5, 5, 3, 3, 3, 1, 1, 1, 3, 3, 5, 3, 4, 5, 3, 4, 5, 4, 5, 1, 5, 3, 4, 4, 5, 1, 1, 3), TRAITREP7 = c(1, 1, 1, 1, 2, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, 2), TRAITREP8 = c(1, 5, 5, 5, 4, 5, 5, 2, 5, 2, 5, 4, 5, 5, 4, 1, 3, 5, 5, 5, 5, 3, 4, 4, 5, 5, 5, 3, 5, 5), PARTYID_Strength = c(5, 1, 2, 1, 2, 1, 8, 7, 6, 3, 1, 6, 6, 1, 7, 8, 7, 1, 1, 1, 2, 4, 1, 6, 1, 1, 1, 7, 6, 8)), row.names = c(NA, -30L), class = c("tbl_df", "tbl", "data.frame")) "PartyID_Strength" represents 8 measures of political parties: 1 - Strong Democrat 2 - Not very strong Democrat 3 - Strong Republican 4 - Not very strong Republican 5 - Independent 6 - Independent - Democrat 7 - Independent - Republican 8 - Other I tried it this way (graph below) but it's still not plotting the remaining four traits:
Cleaning the data In order to solve your problem, we have to transform your data, in order to convert it into tidy format. Observation There are few particular problems with your original dataset: Data are in a wide format, i.e. most of the columns from your data frame, can be represented by 3 variables; Names of the variables are not self-explanatory. Names are in upper case which, by itself, does not hold any useful information, they are not readable and not good for typing/writing. There is additional information we can extract from the variable names: Party and Feelings toward the Party. First one is an abbreviation ('dem' or 'rep') second one is the numerically encoded feeling towards the political party. However the order of numbers encoding the feeling does not reflect natural order of emotions from the disgust up to joy; Variable PARTYID_Strength is numerically encoded Political Party [self-]Identification it also does not reflect natural order from strongest democrats through independent towards strongest republicans; Plan Convert data from wide into long format using all variables starting with TRAIT, and leaving PARTYID_Strength variable unchanged; Extract useful information from the TRAIT... variables (Political Party, Feelings Toward the Party); Convert all numerically encoded variables into the factors with reasonably ordered levels; Give all variables meaningful names; Summarize the data; Transformations We need to create several lookup tables, which will simplify the workflow. Affiliation lookup table: aff_lookup <- c( 'Strong Democrat', 'Not very strong Democrat', 'Strong Republican', 'Not very strong Republican', 'Independent', 'Independent-Democrat', 'Independent-Republican', 'Other' ) We can further order aff_lookup by this vector: aff_order = c(1, 2, 6, 5, 7, 4, 3, 8) Emotions/Feelings lookup table: emo_lookup <- c( 'Delighted', 'Angry', 'Happy', 'Annoyed', 'Joy', 'Hateful', 'Relaxed', 'Disgusted' ) And we can order emo_lookup by this vector: emo_order <- emo_order <- c(8, 6, 2, 4, 7, 3, 1, 5) Political party lookup table: party_lookup <- c( dem = 'National Democratic Party', rep = 'National Republican Party' ) Finally, with all helper variables, we can transform our data into desirable form. library(tidyverse) dat %<>% rename_all(tolower) %>% pivot_longer( cols = starts_with('trait'), names_to = c('party', 'emotion'), names_pattern = 'trait(dem|rep)(\\d)', values_to = 'score' ) %>% mutate( party = factor(party_lookup[party]), affiliation = factor( aff_lookup[partyid_strength], levels = aff_lookup[aff_order] ), emotion = factor( emo_lookup[as.numeric(emotion)], levels = emo_lookup[emo_order] ) ) %>% group_by(party, emotion, affiliation) %>% summarise(score = median(score)) %>% ungroup() head(dat) ## A tibble: 6 x 4 # party emotion affiliation score # <fct> <fct> <fct> <dbl> #1 National Democratic Party Disgusted Strong Democrat 1 #2 National Democratic Party Disgusted Not very strong Democrat 2 #3 National Democratic Party Disgusted Independent-Democrat 2 #4 National Democratic Party Disgusted Independent 3 #5 National Democratic Party Disgusted Independent-Republican 3 #6 National Democratic Party Disgusted Not very strong Republican 5 Plot the data Plan Now we can plot the data, as two separate plots for Democrats and Republicans with Affiliation (Political Party Identification) on X-axis and Emotions (Feelings) on Y-axis. Each Emotion/Affilation point is going to be represented as a bar with the height of the bar representing the Score. We can also add color encoding to our plot. From my point of view, encoding Emotions/Feelings with a color gradient from red (Disgust) to green (Joy) could help as to gather the internal structure of our data. Plot dat %>% ggplot( aes( x = affiliation, y = as.numeric(emotion) + (score / max(score) * .95) / 2, height = (score / max(score) * .95), width = .95, fill = emotion, label = score ) ) + geom_tile(show.legend = FALSE) + geom_text(size = 3.5, color = 'gray25', alpha = .75) + facet_wrap(~ party, scales = 'free') + scale_fill_brewer(palette = 'RdYlGn') + scale_y_continuous(breaks = sort(emo_order), labels = emo_lookup[emo_order]) + labs(x = 'Affiliations', y = 'Emotions') + ggthemes::theme_tufte() + theme( axis.text.x = element_text(angle = 45, hjust = 1), axis.ticks.x = element_blank(), axis.text.y = element_text(hjust = 0, vjust = -0.025), axis.ticks.y = element_blank() ) Which gives as following figure: Explanation There is a trick with this plot: it looks like a series of barplots, bot it is not real barplots (by the fact, not functionally). What I do: The core of this solution is the use of geom_tile() for each data point. It is just a rectangle (square by default) with geometrical center of mass determined by the given coordinates (Affilation, Emotion). Both Affilation and Emotion are factors, not numerics. And it is OK for Affiliation, because we want only to position our tile according to the Affiliation it represents. It is more complicated with Emotion, because we want to position each tile according to the Emotion it represents, but also we want to encode Score by the height of the tile. To define the height of the tile we use height parameter within the aes(). We want our tile height to be less or equall to one (with 0.05 offset) so the tiles between let say Angry and Annoyed do not overlap. That's why we use (score / max(score) * .95 for the height parameter. We also need to give different y-coordinates for each tile, so the center of the tile is placed not on the imaginary line representing each emotion, but half-height up. So when tile is drawn, it's center (on y-axis) is placed half-height up from the "base line" and the tile extends half-height up and down, creating a fake barplot. That's what the following line of code does as.numeric(emotion) + (score / max(score) * .95) / 2. We also give a tile a fixed width of .95 by width = .95, file the tile with Red-Yellow-Green gradient and lable each tile with the relevant Score. The rest are just decorations. However, note how we relable the Y-axis. Because, as it defined in aes() it is continuous scale, but we want to make it fake discrete axis we use this row: scale_y_continuous(breaks = sort(emo_order), labels = emo_lookup[emo_order]) Here we just use our emo_order to say that we want breaks for integers from 1 to 8, and after that we label this breaks with feelings from ordered emo_lookup table.
dplyr: Add variable as function of all variables in each row
I am trying to add a new variable in a dataframe using dplyr but I find it difficult. The new variable should be the number of runs with length 2 (of all the variable values in each line). Using apply I would do this: tmp$rle = apply(tmp,1,function(x) sum(rle(x)$lengths==2)) How can I perform this action using dplyr and mutate (without defining all variable names) ? tmp <- structure(list(X1 = c(3, 1, 1, 4, 4, 1, 3, 2, 2, 2, 1, 3, 3, 2, 3, 1, 4, 2, 3, 2), X2 = c(2, 4, 2, 2, 3, 2, 1, 1, 3, 1, 3, 1, 4, 4, 4, 1, 3, 1, 2, 1), X3 = c(2, 4, 3, 3, 3, 2, 4, 3, 4, 4, 2, 3, 3, 3, 1, 3, 1, 4, 4, 2), X4 = c(1, 3, 3, 1, 1, 3, 2, 4, 4, 1, 4, 4, 1, 1, 1, 3, 1, 3, 1, 1), X5 = c(4, 2, 4, 2, 1, 4, 1, 2, 2, 4, 3, 4, 1, 1, 4, 4, 2, 4, 4, 3), X6 = c(3, 1, 4, 3, 4, 4, 4, 1, 1, 3, 4, 2, 2, 2, 3, 2, 3, 2, 2, 3), X7 = c(4, 2, 1, 1, 2, 1, 3, 3, 3, 3, 2, 2, 4, 4, 2, 4, 4, 3, 3, 4), X8 = c(1, 3, 2, 4, 2, 3, 2, 4, 1, 2, 1, 1, 2, 3, 2, 2, 2, 1, 1, 4)), .Names = c("X1", "X2", "X3", "X4", "X5", "X6", "X7", "X8"), row.names = c(NA, 20L), class = "data.frame")
Rather than dplyr, you might consider using the purrr package which RStudio has fairly recently introduced as a complement to dplyr to, among other things, better handle vectors and lists. In your case, tmp is a numeric data frame where you want to treat each row as a vector. The code could look like: library(purrr) tmp <- tmp %>% by_row(..f=function(x) sum(rle(x)$lengths==2), .to = "rle", .collate = "cols")
In dplyr: tmp <- mutate(tmp, rle = apply(tmp, 1, function(x) sum(rle(x)$lengths==2))) I am having a difficult time QA'ing this as I am unfamiliar with what results I should expect out of the rle function. I tried comparing results with your apply version of the code, and it seems that set.seed() is perhaps important for replicability? Am I understanding this correctly? Here is the QA attempt I made: (original tmp should be exactly the same: I just wrapped the lines at the list() and structure() arguments.) set.seed(1) tmp <- structure(list(X1 = c(3, 1, 1, 4, 4, 1, 3, 2, 2, 2, 1, 3, 3, 2, 3, 1, 4, 2, 3, 2), X2 = c(2, 4, 2, 2, 3, 2, 1, 1, 3, 1, 3, 1, 4, 4, 4, 1, 3, 1, 2, 1), X3 = c(2, 4, 3, 3, 3, 2, 4, 3, 4, 4, 2, 3, 3, 3, 1, 3, 1, 4, 4, 2), X4 = c(1, 3, 3, 1, 1, 3, 2, 4, 4, 1, 4, 4, 1, 1, 1, 3, 1, 3, 1, 1), X5 = c(4, 2, 4, 2, 1, 4, 1, 2, 2, 4, 3, 4, 1, 1, 4, 4, 2, 4, 4, 3), X6 = c(3, 1, 4, 3, 4, 4, 4, 1, 1, 3, 4, 2, 2, 2, 3, 2, 3, 2, 2, 3), X7 = c(4, 2, 1, 1, 2, 1, 3, 3, 3, 3, 2, 2, 4, 4, 2, 4, 4, 3, 3, 4), X8 = c(1, 3, 2, 4, 2, 3, 2, 4, 1, 2, 1, 1, 2, 3, 2, 2, 2, 1, 1, 4)), .Names = c("X1", "X2", "X3", "X4", "X5", "X6", "X7", "X8"), row.names = c(NA, 20L), class = "data.frame") tmpApply <- tmp tmpApply$rle = apply(tmp, 1, function(x) sum(rle(x)$lengths==2)) tmpDplyr <- tmp %>% mutate(rle = apply(tmp, 1, function(x) sum(rle(x)$lengths==2))) tmpApply tmpDplyr
Creating a barplot from matrix
So, I have a matrix like that: > dput(tbl_sum_peaks[1:40]) structure(c(2, 8, 3, 4, 1, 2, 1, 3, 1, 3, 1, 4, 4, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 2, 1, 5, 4, 2, 1, 1, 2, 1, 4, 2), .Names = c("AT1G01050", "AT1G01080", "AT1G01090", "AT1G01320", "AT1G01470", "AT1G01800", "AT1G01910", "AT1G01960", "AT1G01980", "AT1G02150", "AT1G02470", "AT1G02500", "AT1G02560", "AT1G02780", "AT1G02816", "AT1G02880", "AT1G02920", "AT1G02930", "AT1G03030", "AT1G03090", "AT1G03110", "AT1G03210", "AT1G03220", "AT1G03230", "AT1G03330", "AT1G03475", "AT1G03630", "AT1G03680", "AT1G03740", "AT1G03870", "AT1G04080", "AT1G04170", "AT1G04270", "AT1G04410", "AT1G04420", "AT1G04530", "AT1G04640", "AT1G04650", "AT1G04690", "AT1G04750")) I would like to make a barplot which will have on yaxis the number of rows with specific number. As we see it the given example data most of the rows has a number 1 so the barplot for number 1 will be the tallest. That's a basic but I can't turn on my brain... so help from someone will be rewarded!
Try barplot(table(tbl_sum_peaks))