Problems with geom_text and pausing the animation - r

I'm trying to replicate the following code (thomasp85/gganimate). I have two problems:
I don't want to show the labels (variable: partidos) of the first geom_point;
I would like that once the animation ends, for a few seconds the labels (variable: partidos) of the last geom_point are shown.
dput(polls_)
structure(list(week = structure(c(1551571200, 1551571200, 1551571200,
1551571200, 1551571200, 1550966400, 1550966400, 1550966400, 1550966400,
1550966400, 1550361600, 1550361600, 1550361600, 1550361600, 1550361600,
1549756800, 1549756800, 1549756800, 1549756800, 1549756800, 1549152000,
1549152000, 1549152000, 1549152000, 1549152000, 1548547200, 1548547200,
1548547200, 1548547200, 1548547200, 1547942400, 1547942400, 1547942400,
1547942400, 1547942400, 1547337600, 1547337600, 1547337600, 1547337600,
1547337600, 1546732800, 1546732800, 1546732800, 1546732800, 1546732800,
1546128000, 1546128000, 1546128000, 1546128000, 1546128000, 1545523200,
1545523200, 1545523200, 1545523200, 1545523200, 1544918400, 1544918400,
1544918400, 1544918400, 1544918400, 1544313600, 1544313600, 1544313600,
1544313600, 1544313600, 1543708800, 1543708800, 1543708800, 1543708800,
1543708800, 1541894400, 1541894400, 1541894400, 1541894400, 1541894400,
1541289600, 1541289600, 1541289600, 1541289600, 1541289600, 1540684800,
1540684800, 1540684800, 1540684800, 1540684800, 1540080000, 1540080000,
1540080000, 1540080000, 1540080000, 1539475200, 1539475200, 1539475200,
1539475200, 1539475200, 1538870400, 1538870400, 1538870400, 1538870400,
1538870400), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
partidos = c("PPopular", "PSOE", "ahorapodemos", "CiudadanosCs",
"VOX", "PPopular", "PSOE", "ahorapodemos", "CiudadanosCs",
"VOX", "PPopular", "PSOE", "ahorapodemos", "CiudadanosCs",
"VOX", "PPopular", "PSOE", "ahorapodemos", "CiudadanosCs",
"VOX", "PPopular", "PSOE", "ahorapodemos", "CiudadanosCs",
"VOX", "PPopular", "PSOE", "ahorapodemos", "CiudadanosCs",
"VOX", "PPopular", "PSOE", "ahorapodemos", "CiudadanosCs",
"VOX", "PPopular", "PSOE", "ahorapodemos", "CiudadanosCs",
"VOX", "PPopular", "PSOE", "ahorapodemos", "CiudadanosCs",
"VOX", "PPopular", "PSOE", "ahorapodemos", "CiudadanosCs",
"VOX", "PPopular", "PSOE", "ahorapodemos", "CiudadanosCs",
"VOX", "PPopular", "PSOE", "ahorapodemos", "CiudadanosCs",
"VOX", "PPopular", "PSOE", "ahorapodemos", "CiudadanosCs",
"VOX", "PPopular", "PSOE", "ahorapodemos", "CiudadanosCs",
"VOX", "PPopular", "PSOE", "ahorapodemos", "CiudadanosCs",
"VOX", "PPopular", "PSOE", "ahorapodemos", "CiudadanosCs",
"VOX", "PPopular", "PSOE", "ahorapodemos", "CiudadanosCs",
"VOX", "PPopular", "PSOE", "ahorapodemos", "CiudadanosCs",
"VOX", "PPopular", "PSOE", "ahorapodemos", "CiudadanosCs",
"VOX", "PPopular", "PSOE", "ahorapodemos", "CiudadanosCs",
"VOX"), resultados = c(16.7, 33.3, 14.5, 15.3, 5.9, 21, 27.3,
14.2, 16, 11.3, 21.75, 25.85, 14.4, 17.9, 10.9, 20.5, 25.4,
13.9, 17, 11.7, 21.3, 23.9, 13.5, 20.9, 11.2, 22.25, 23.65,
14.85, 19.15, 10.05, 21.5, 23.2, 14.4, 23, 8.9, 19.2, 23.75,
16.7, 18, 8.4, 18.3, 24.1, 16.1, 18.5, 11.5, 20.6, 22.6,
15.5, 19.65, 10.75, 21.8, 23.5, 15.2, 22.7, 7.8, 21.4, 24.15,
16.15, 20.1, 8.6, 22.8, 24.4, 17.2, 19.8, 5.9, 19.7, 23.2,
17.65, 18.2, 10.7, 23.35, 26.4, 17.2, 20.15, 1.8, 22.3, 26.6,
16.6, 21.9, 3.4, 22.65, 24.95, 17.15, 21.55, 2.55, 22.6,
25.2, 17.7, 19.2, 5.1, 26.7, 26.8, 16.8, 19.5, 1.9, 23, 26.2,
18, 20.7, 0)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"
), row.names = c(NA, -100L), vars = "week", indices = list(95:99,
90:94, 85:89, 80:84, 75:79, 70:74, 65:69, 60:64, 55:59, 50:54,
45:49, 40:44, 35:39, 30:34, 25:29, 20:24, 15:19, 10:14, 5:9,
0:4), drop = TRUE, group_sizes = c(5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), biggest_group_size = 5L, labels = structure(list(
week = structure(c(1538870400, 1539475200, 1540080000, 1540684800,
1541289600, 1541894400, 1543708800, 1544313600, 1544918400,
1545523200, 1546128000, 1546732800, 1547337600, 1547942400,
1548547200, 1549152000, 1549756800, 1550361600, 1550966400,
1551571200), class = c("POSIXct", "POSIXt"), tzone = "UTC")), class = "data.frame", row.names = c(NA,
-20L), vars = "week", indices = list(315:319, 310:314, 305:309,
275:304, 270:274, 240:269, 230:239, 195:229, 155:194, 150:154,
140:149, 125:139, 95:124, 90:94, 70:89, 65:69, 50:64, 30:49,
5:29, 0:4), drop = TRUE, group_sizes = c(5L, 5L, 5L, 30L,
5L, 30L, 10L, 35L, 40L, 5L, 10L, 15L, 30L, 5L, 20L, 5L, 15L,
20L, 25L, 5L), biggest_group_size = 40L, labels = structure(list(
week = structure(c(1538870400, 1539475200, 1540080000, 1540684800,
1541289600, 1541894400, 1543708800, 1544313600, 1544918400,
1545523200, 1546128000, 1546732800, 1547337600, 1547942400,
1548547200, 1549152000, 1549756800, 1550361600, 1550966400,
1551571200), tzone = "UTC", class = c("POSIXct", "POSIXt"
))), class = "data.frame", row.names = c(NA, -20L), vars = "week", indices = list(
c(63L, 127L, 191L, 255L, 319L), c(62L, 126L, 190L, 254L,
318L), c(61L, 125L, 189L, 253L, 317L), c(55L, 56L, 57L, 58L,
59L, 60L, 119L, 120L, 121L, 122L, 123L, 124L, 183L, 184L,
185L, 186L, 187L, 188L, 247L, 248L, 249L, 250L, 251L, 252L,
311L, 312L, 313L, 314L, 315L, 316L), c(54L, 118L, 182L, 246L,
310L), c(48L, 49L, 50L, 51L, 52L, 53L, 112L, 113L, 114L,
115L, 116L, 117L, 176L, 177L, 178L, 179L, 180L, 181L, 240L,
241L, 242L, 243L, 244L, 245L, 304L, 305L, 306L, 307L, 308L,
309L), c(46L, 47L, 110L, 111L, 174L, 175L, 238L, 239L, 302L,
303L), c(39L, 40L, 41L, 42L, 43L, 44L, 45L, 103L, 104L, 105L,
106L, 107L, 108L, 109L, 167L, 168L, 169L, 170L, 171L, 172L,
173L, 231L, 232L, 233L, 234L, 235L, 236L, 237L, 295L, 296L,
297L, 298L, 299L, 300L, 301L), c(31L, 32L, 33L, 34L, 35L,
36L, 37L, 38L, 95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L,
159L, 160L, 161L, 162L, 163L, 164L, 165L, 166L, 223L, 224L,
225L, 226L, 227L, 228L, 229L, 230L, 287L, 288L, 289L, 290L,
291L, 292L, 293L, 294L), c(30L, 94L, 158L, 222L, 286L), c(28L,
29L, 92L, 93L, 156L, 157L, 220L, 221L, 284L, 285L), c(25L,
26L, 27L, 89L, 90L, 91L, 153L, 154L, 155L, 217L, 218L, 219L,
281L, 282L, 283L), c(19L, 20L, 21L, 22L, 23L, 24L, 83L, 84L,
85L, 86L, 87L, 88L, 147L, 148L, 149L, 150L, 151L, 152L, 211L,
212L, 213L, 214L, 215L, 216L, 275L, 276L, 277L, 278L, 279L,
280L), c(18L, 82L, 146L, 210L, 274L), c(14L, 15L, 16L, 17L,
78L, 79L, 80L, 81L, 142L, 143L, 144L, 145L, 206L, 207L, 208L,
209L, 270L, 271L, 272L, 273L), c(13L, 77L, 141L, 205L, 269L
), c(10L, 11L, 12L, 74L, 75L, 76L, 138L, 139L, 140L, 202L,
203L, 204L, 266L, 267L, 268L), c(6L, 7L, 8L, 9L, 70L, 71L,
72L, 73L, 134L, 135L, 136L, 137L, 198L, 199L, 200L, 201L,
262L, 263L, 264L, 265L), c(1L, 2L, 3L, 4L, 5L, 65L, 66L,
67L, 68L, 69L, 129L, 130L, 131L, 132L, 133L, 193L, 194L,
195L, 196L, 197L, 257L, 258L, 259L, 260L, 261L), c(0L, 64L,
128L, 192L, 256L)), drop = TRUE, group_sizes = c(5L, 5L,
5L, 30L, 5L, 30L, 10L, 35L, 40L, 5L, 10L, 15L, 30L, 5L, 20L,
5L, 15L, 20L, 25L, 5L), biggest_group_size = 40L)))
anim <- ggplot(polls_, aes(semana, resultados, group = partidos)) +
geom_line() +
geom_segment(aes(xend = as.POSIXct("2019-03-08 00:00:00", tz="UTC"), yend = resultados),
linetype = 2, colour = 'grey') +
geom_point(size = 2) +
geom_text(aes(x = as.POSIXct("2019-03-15 00:00:00", tz="UTC"), label = partidos),
hjust = 0) +
transition_reveal(semana) +
coord_cartesian(clip = 'off') +
labs(title = 'Opinion polling for the 2019 Spanish general election',
y = 'Estimated results', x = 'week') +
theme_minimal() +
theme(plot.margin = margin(5.5, 40, 5.5, 5.5))
animate(anim, width = 900, height = 600, fps = 10, rewind = FALSE, duration = 15)
Here I include the animation:

Here are two steps to address your two problems:
Sort your data frame before passing it to ggplot():
polls_ <- arrange(polls_, week)
Include end_pause = <some positive integer> in animate(anim, ...).
Note: the column name in your sample data frame is week, while your code used semana. I'm going with the former here.
Explanation:
Your data frame is arranged with the latest week values on top, and the earliest below. This does not work well with the default parameters for transition_reveal.
From ?transition_reveal:
transition_reveal(along, range = NULL, keep_last = TRUE, id)
where keep_last is a TRUE / FALSE value for whether the last row of the data should be kept for subsequent frames.
When the earliest week rows are in this position, they are scheduled to appear first due to their week values, & are kept visible till the end due to keep_last = TRUE.
When we sort rows by week, on the other hand, the latest week values get sorted to the bottom rows instead. Now keep_last = TRUE work in our favour, because we want these values to be kept for all subsequent frames--most importantly, the last frame, which is where end_pause becomes useful.
Demonstration:
library(dplyr)
anim <- polls_ %>%
arrange(week) %>%
ggplot(aes(week, resultados, group = partidos)) +
geom_line() +
geom_segment(aes(xend = as.POSIXct("2019-03-08 00:00:00", tz="UTC"), yend = resultados),
linetype = 2, colour = 'grey') +
geom_point(size = 2) +
geom_text(aes(x = as.POSIXct("2019-03-15 00:00:00", tz="UTC"), label = partidos),
hjust = 0) +
transition_reveal(week) +
coord_cartesian(clip = 'off') +
labs(title = 'Opinion polling for the 2019 Spanish general election',
y = 'Estimated results', x = 'week') +
theme_minimal() +
theme(plot.margin = margin(5.5, 40, 5.5, 5.5))
animate(anim, width = 900, height = 600,
end_pause = 10,
fps = 10, rewind = FALSE, duration = 15)

Related

ggplot2: legend symbols matching plot symbols

I made a ggplot where I make use of the viridis color package. I adjusted the geom_point in the graph to different symbols. The symbols are not displaying in my legend, although the colors are correctly programmed. How can I match my legend (including symbols and colors) with my ggplot?
Attempt:
library(ggplot)
library(viridis)
ggplot(df, aes(`Lengte_(cm)`, verschil_lengte))+
geom_point(aes(shape = Lengteklasse, colour = Lengteklasse), size = 3)+
geom_hline(yintercept = 1.0, linetype="dashed", color = "red")+
geom_hline(yintercept = 2.0, linetype="dashed", color = "red")+
scale_shape_manual(values = c(16, 17, 15, 3, 8), guide = "none")+
scale_color_viridis(discrete = T, option = "D")+
scale_x_continuous(breaks = seq(7,12, by = 0.5))+
scale_y_continuous(breaks = seq(0,3, by = 0.5))+
labs(x = "Lengte (cm)", y = "Verschaling (mm)")+
guides(col = guide_legend("Lengteklasse (cm)"))+
theme_classic()
current outcome:
df =
structure(list(`Lengte_(cm)` = c(9, 10.7, 10.7, 7.7, 9.1, 11.2,
9.7, 10.2, 8.6, 8.9, 11.2, 11.4, 10.5, 10.5, 11.1, 8.9, 11.5,
10.4, 9.1, 9.2, 10.1, 7.8, 9.8, 8.2, 10.1, 10.5, 10.2, 7.9, 9.3,
8, 8.7, 8.9, 8.8, 9.3, 8.5, 7.7, 11.2, 9.4, 9.7, 11.2, 11, 10.7,
9), Lengteklasse = structure(c(4L, 5L, 5L, 2L, 4L, 6L, 4L, 5L,
3L, 3L, 6L, 6L, 5L, 5L, 6L, 3L, 6L, 5L, 4L, 4L, 5L, 2L, 4L, 3L,
5L, 5L, 5L, 2L, 4L, 3L, 3L, 3L, 3L, 4L, 3L, 2L, 6L, 4L, 4L, 6L,
6L, 5L, 4L), .Label = c("6", "7", "8", "9", "10", "11", "12",
"13"), class = "factor"), verschil_lengte = c(0, 1.4, 1.8, 1.8,
1.4, 0.800000000000001, 0.600000000000001, 0.600000000000001,
1.4, 1.9, 1.3, 1.5, 0.300000000000001, 0.5, 0.9, 2.2, 1, 1, 1.4,
2.1, 1.3, 2.2, 0.899999999999999, 2.3, 1.1, 0.699999999999999,
2.1, 0.4, 0.5, 0.9, 2.1, 1.6, 1.7, 0.799999999999999, 2, 2.1,
0.5, 0.799999999999999, 1.3, 0.4, 0.300000000000001, 1.6, 0.199999999999999
)), row.names = c(NA, -43L), class = c("tbl_df", "tbl", "data.frame"
), na.action = structure(c(`1` = 1L, `2` = 2L, `3` = 3L, `4` = 4L,
`5` = 5L, `6` = 6L, `7` = 7L, `8` = 8L, `9` = 9L, `10` = 10L,
`11` = 11L, `12` = 12L, `13` = 13L, `14` = 14L, `15` = 15L, `16` = 16L,
`17` = 17L, `18` = 18L, `19` = 19L, `20` = 20L, `21` = 21L, `22` = 22L,
`23` = 23L, `24` = 24L, `25` = 25L, `26` = 26L, `27` = 27L, `28` = 28L,
`29` = 29L, `30` = 30L, `31` = 31L, `32` = 32L, `33` = 33L, `34` = 34L,
`35` = 35L, `36` = 36L, `37` = 37L, `38` = 38L, `39` = 39L, `40` = 40L,
`41` = 41L, `42` = 42L, `43` = 43L, `44` = 44L, `45` = 45L, `46` = 46L,
`47` = 47L, `48` = 48L, `49` = 49L, `50` = 50L, `51` = 51L, `52` = 52L,
`53` = 53L, `54` = 54L, `55` = 55L, `56` = 56L, `57` = 57L, `58` = 58L,
`59` = 59L, `60` = 60L, `61` = 61L, `62` = 62L, `63` = 63L, `64` = 64L,
`65` = 65L, `66` = 66L, `67` = 67L, `68` = 68L, `69` = 69L, `70` = 70L,
`71` = 71L, `72` = 72L, `73` = 73L, `74` = 74L, `75` = 75L, `76` = 76L,
`77` = 77L, `78` = 78L, `79` = 79L, `80` = 80L, `81` = 81L, `82` = 82L,
`83` = 83L, `84` = 84L, `85` = 85L, `86` = 86L, `87` = 87L, `88` = 88L,
`89` = 89L, `90` = 90L, `91` = 91L, `92` = 92L, `93` = 93L, `94` = 94L,
`95` = 95L, `96` = 96L, `97` = 97L, `98` = 98L, `99` = 99L, `100` = 100L,
`101` = 101L, `102` = 102L, `103` = 103L, `104` = 104L, `105` = 105L,
`106` = 106L, `107` = 107L, `108` = 108L, `109` = 109L, `110` = 110L,
`111` = 111L, `112` = 112L, `113` = 113L, `114` = 114L, `115` = 115L,
`116` = 116L, `117` = 117L, `118` = 118L, `119` = 119L, `120` = 120L,
`121` = 121L, `122` = 122L, `123` = 123L, `124` = 124L, `125` = 125L,
`126` = 126L, `127` = 127L, `128` = 128L, `129` = 129L, `130` = 130L,
`131` = 131L, `132` = 132L, `133` = 133L, `134` = 134L, `135` = 135L,
`136` = 136L, `137` = 137L, `138` = 138L, `139` = 139L, `140` = 140L,
`141` = 141L, `142` = 142L, `143` = 143L, `144` = 144L, `145` = 145L,
`146` = 146L, `147` = 147L, `148` = 148L, `149` = 149L, `150` = 150L,
`151` = 151L, `152` = 152L, `153` = 153L, `154` = 154L, `155` = 155L,
`156` = 156L, `157` = 157L, `158` = 158L, `159` = 159L, `160` = 160L,
`161` = 161L, `162` = 162L, `163` = 163L, `164` = 164L, `165` = 165L,
`166` = 166L, `167` = 167L, `168` = 168L, `169` = 169L, `170` = 170L,
`171` = 171L, `172` = 172L, `173` = 173L, `174` = 174L, `175` = 175L,
`176` = 176L, `177` = 177L, `178` = 178L, `179` = 179L, `180` = 180L,
`181` = 181L, `182` = 182L, `183` = 183L, `184` = 184L, `185` = 185L,
`186` = 186L, `187` = 187L, `188` = 188L, `189` = 189L, `190` = 190L,
`191` = 191L, `192` = 192L, `193` = 193L, `194` = 194L, `195` = 195L,
`196` = 196L, `197` = 197L, `198` = 198L, `199` = 199L, `200` = 200L,
`201` = 201L, `202` = 202L, `203` = 203L, `204` = 204L, `205` = 205L,
`206` = 206L, `207` = 207L, `208` = 208L, `209` = 209L, `210` = 210L,
`211` = 211L, `212` = 212L, `213` = 213L, `214` = 214L, `215` = 215L,
`216` = 216L, `217` = 217L, `218` = 218L, `219` = 219L, `220` = 220L,
`221` = 221L, `222` = 222L, `223` = 223L, `224` = 224L, `225` = 225L,
`226` = 226L, `227` = 227L, `228` = 228L, `229` = 229L, `230` = 230L,
`231` = 231L, `232` = 232L, `233` = 233L, `234` = 234L, `235` = 235L,
`236` = 236L, `237` = 237L, `238` = 238L, `239` = 239L, `240` = 240L,
`241` = 241L, `242` = 242L, `243` = 243L, `244` = 244L, `245` = 245L,
`246` = 246L, `247` = 247L, `248` = 248L, `249` = 249L, `250` = 250L,
`251` = 251L, `252` = 252L, `253` = 253L, `254` = 254L, `255` = 255L,
`256` = 256L, `257` = 257L, `258` = 258L, `259` = 259L, `260` = 260L,
`261` = 261L, `262` = 262L, `263` = 263L, `264` = 264L, `265` = 265L,
`266` = 266L, `267` = 267L, `268` = 268L, `269` = 269L, `270` = 270L,
`271` = 271L, `272` = 272L, `273` = 273L, `274` = 274L, `275` = 275L,
`277` = 277L, `278` = 278L, `279` = 279L, `280` = 280L, `281` = 281L,
`282` = 282L, `284` = 284L, `285` = 285L, `286` = 286L, `288` = 288L,
`289` = 289L, `290` = 290L, `291` = 291L, `292` = 292L, `293` = 293L,
`294` = 294L, `295` = 295L, `296` = 296L, `297` = 297L, `298` = 298L,
`300` = 300L, `301` = 301L, `302` = 302L, `303` = 303L, `304` = 304L,
`305` = 305L, `306` = 306L, `308` = 308L, `309` = 309L, `310` = 310L,
`311` = 311L, `312` = 312L, `313` = 313L, `314` = 314L, `315` = 315L,
`316` = 316L, `317` = 317L, `318` = 318L, `319` = 319L, `321` = 321L,
`322` = 322L, `323` = 323L, `324` = 324L, `325` = 325L, `326` = 326L,
`327` = 327L, `328` = 328L, `329` = 329L, `330` = 330L, `331` = 331L,
`333` = 333L, `334` = 334L, `335` = 335L, `336` = 336L, `337` = 337L,
`338` = 338L, `339` = 339L, `340` = 340L, `341` = 341L, `342` = 342L,
`343` = 343L, `344` = 344L, `345` = 345L, `346` = 346L, `347` = 347L,
`348` = 348L, `349` = 349L, `351` = 351L, `352` = 352L, `354` = 354L,
`356` = 356L, `357` = 357L, `358` = 358L, `359` = 359L, `360` = 360L,
`361` = 361L, `362` = 362L, `363` = 363L, `364` = 364L, `366` = 366L,
`368` = 368L, `369` = 369L, `370` = 370L, `371` = 371L, `372` = 372L,
`373` = 373L, `374` = 374L, `375` = 375L, `376` = 376L, `377` = 377L,
`378` = 378L, `379` = 379L, `380` = 380L, `382` = 382L, `383` = 383L,
`384` = 384L, `387` = 387L, `388` = 388L, `390` = 390L, `391` = 391L,
`392` = 392L, `393` = 393L, `394` = 394L, `395` = 395L, `396` = 396L,
`397` = 397L, `399` = 399L, `400` = 400L, `401` = 401L, `402` = 402L,
`404` = 404L, `405` = 405L, `406` = 406L, `407` = 407L, `408` = 408L,
`409` = 409L, `410` = 410L, `411` = 411L, `412` = 412L, `413` = 413L,
`414` = 414L, `415` = 415L, `416` = 416L, `417` = 417L, `419` = 419L,
`420` = 420L, `423` = 423L, `424` = 424L, `425` = 425L, `426` = 426L,
`427` = 427L, `429` = 429L, `430` = 430L, `431` = 431L, `432` = 432L,
`433` = 433L, `434` = 434L, `435` = 435L, `436` = 436L, `437` = 437L,
`438` = 438L, `439` = 439L, `440` = 440L, `441` = 441L, `442` = 442L,
`443` = 443L, `444` = 444L, `446` = 446L, `447` = 447L, `448` = 448L,
`450` = 450L, `451` = 451L, `452` = 452L, `453` = 453L, `454` = 454L,
`455` = 455L, `456` = 456L, `457` = 457L, `459` = 459L, `460` = 460L,
`462` = 462L, `463` = 463L, `464` = 464L, `465` = 465L, `466` = 466L,
`467` = 467L, `468` = 468L, `469` = 469L, `470` = 470L, `471` = 471L,
`472` = 472L, `473` = 473L, `474` = 474L, `475` = 475L, `476` = 476L,
`478` = 478L, `479` = 479L, `480` = 480L, `481` = 481L, `482` = 482L,
`483` = 483L, `484` = 484L, `485` = 485L, `486` = 486L, `487` = 487L,
`488` = 488L, `489` = 489L, `490` = 490L, `491` = 491L, `493` = 493L,
`495` = 495L, `496` = 496L, `497` = 497L, `498` = 498L, `499` = 499L,
`500` = 500L, `501` = 501L, `502` = 502L, `503` = 503L, `504` = 504L,
`505` = 505L, `506` = 506L, `507` = 507L, `508` = 508L, `509` = 509L,
`510` = 510L, `511` = 511L, `512` = 512L, `513` = 513L, `514` = 514L,
`515` = 515L, `516` = 516L, `517` = 517L, `518` = 518L, `519` = 519L,
`520` = 520L, `521` = 521L, `522` = 522L, `523` = 523L, `524` = 524L,
`525` = 525L, `526` = 526L, `527` = 527L, `528` = 528L, `529` = 529L,
`530` = 530L, `531` = 531L, `532` = 532L, `533` = 533L, `535` = 535L,
`536` = 536L, `537` = 537L, `538` = 538L, `539` = 539L, `540` = 540L,
`542` = 542L, `543` = 543L, `544` = 544L, `545` = 545L, `546` = 546L,
`547` = 547L, `548` = 548L, `549` = 549L, `550` = 550L, `551` = 551L,
`553` = 553L, `554` = 554L, `555` = 555L, `556` = 556L, `557` = 557L,
`558` = 558L, `559` = 559L, `560` = 560L, `561` = 561L, `562` = 562L,
`563` = 563L, `564` = 564L, `565` = 565L, `566` = 566L, `567` = 567L,
`568` = 568L, `569` = 569L, `570` = 570L, `571` = 571L, `572` = 572L,
`573` = 573L, `574` = 574L, `575` = 575L, `576` = 576L, `577` = 577L,
`578` = 578L, `579` = 579L, `580` = 580L, `581` = 581L, `582` = 582L,
`583` = 583L, `584` = 584L, `585` = 585L, `586` = 586L, `587` = 587L,
`588` = 588L, `589` = 589L, `590` = 590L, `591` = 591L, `593` = 593L,
`595` = 595L, `596` = 596L, `597` = 597L, `598` = 598L, `599` = 599L,
`601` = 601L, `602` = 602L, `603` = 603L, `604` = 604L, `605` = 605L,
`606` = 606L, `608` = 608L, `609` = 609L, `610` = 610L, `611` = 611L,
`612` = 612L, `614` = 614L, `615` = 615L, `616` = 616L, `617` = 617L,
`618` = 618L, `619` = 619L, `620` = 620L, `621` = 621L, `622` = 622L,
`623` = 623L, `624` = 624L, `625` = 625L, `626` = 626L, `627` = 627L,
`628` = 628L, `629` = 629L, `631` = 631L, `632` = 632L, `633` = 633L,
`634` = 634L, `635` = 635L, `636` = 636L, `637` = 637L, `638` = 638L,
`639` = 639L, `640` = 640L, `641` = 641L, `642` = 642L, `643` = 643L,
`645` = 645L, `646` = 646L, `647` = 647L, `648` = 648L, `649` = 649L,
`650` = 650L, `651` = 651L, `652` = 652L, `653` = 653L, `654` = 654L,
`655` = 655L, `657` = 657L, `658` = 658L, `659` = 659L, `661` = 661L,
`662` = 662L, `663` = 663L, `664` = 664L, `666` = 666L, `667` = 667L,
`668` = 668L, `669` = 669L, `670` = 670L, `671` = 671L, `672` = 672L,
`673` = 673L, `675` = 675L, `677` = 677L, `678` = 678L, `679` = 679L,
`680` = 680L, `681` = 681L, `682` = 682L, `683` = 683L, `684` = 684L,
`685` = 685L, `686` = 686L, `687` = 687L, `688` = 688L, `689` = 689L,
`690` = 690L, `691` = 691L, `692` = 692L, `693` = 693L, `696` = 696L,
`697` = 697L, `698` = 698L), class = "omit"))
Your guide="none" implies you don't want a legend for shape. That's why the shapes don't appear. To combine two legends, give them the same name. Thus:
library(ggplot2) # Note typo correction
library(viridis)
ggplot(df, aes(`Lengte_(cm)`, verschil_lengte))+
geom_point(aes(shape = Lengteklasse, colour = Lengteklasse), size = 3)+
geom_hline(yintercept = 1.0, linetype="dashed", color = "red")+
geom_hline(yintercept = 2.0, linetype="dashed", color = "red")+
scale_shape_manual(values = c(16, 17, 15, 3, 8), name="Lengteklasse (cm)")+
scale_color_viridis(discrete = T, option = "D", name="Lengteklasse (cm)")+
scale_x_continuous(breaks = seq(7,12, by = 0.5))+
scale_y_continuous(breaks = seq(0,3, by = 0.5))+
labs(x = "Lengte (cm)", y = "Verschaling (mm)")+
guides(col = guide_legend("Lengteklasse (cm)"))+
theme_classic()
produces
[You can also do away with guides(col = guide_legend("Lengteklasse (cm)"))+.]
Here is another approach. That of #Limey was my first thought. But already posted. Anyway. The clue is:
If you want to have same color and shape in one legend then you have to give them the same name in the aesthetics!
then to give them the same name in one column legend, we have to identify the names argument in scale_color_viridis and scale_shape_manual
then you could remove guides as already stated by Limey!:
library(ggplot)
library(viridis)
ggplot(df, aes(`Lengte_(cm)`, verschil_lengte, shape = Lengteklasse, colour = Lengteklasse))+
geom_point(size = 3)+
geom_hline(yintercept = 1.0, linetype="dashed", color = "red")+
geom_hline(yintercept = 2.0, linetype="dashed", color = "red")+
scale_color_viridis(name = "Lengteklasse (cm)",
discrete = T, option = "D")+
scale_shape_manual(name = "Lengteklasse (cm)",
values = c(16, 17, 15, 3, 8))+
scale_x_continuous(breaks = seq(7,12, by = 0.5))+
scale_y_continuous(breaks = seq(0,3, by = 0.5))+
labs(x = "Lengte (cm)", y = "Verschaling (mm)")+
theme_classic()

How to only select rows that are duplicated in a column in a dataframe

I have joined two dataframes together and I am trying to select only the 'Branch Codes' that are duplicated.
I want to join the datasets 'BranchData' and 'BranchCode' so that any branch codes that are common to both datasets are included as well as those are not common to both datasets.
However, the last line of the code below does not seem to work!
BranchData$'Branch Code' <
as.numeric(BranchData$'Branch Code')
BranchCalls$'Branch Code' <- as.numeric(BranchCalls$'Branch Code')
BranchData <- na.omit(BranchData)
merged <- full_join(BranchData,BranchCalls)
merged <- merged %>% group_by(merged$`Branch Code`) %>% filter(n() >= 2)
Also, when I try to put the duplicates into groups, so that all the duplicates are together, but the following code doesn't seem to work!:
merged <- group_by(merged,merged$'Branch Code')
Minimal Reproducible Example:
structure(list(`Branch Code` = c(401801, 436801, 403801, 164801,
198801), `Location Type` = c("Urban", "Urban", "Urban Deprived",
"Rural", "Urban"), Type = c("MAIN", "MAIN", "MAIN", "MAIN", "LM"
), Status = c("Open", "Open", "Open", "Open", "Open"), Segment = c("Agency",
"Agency", "Agency", "Agency", "Agency"), `Multiple (partner that owns multiple branches)` = c("Multiple 11",
"Multiple 11", "Multiple 12", "Multiple 13", "Multiple 13"),
RetailType = c("Books_Stationery", "Books_Stationery", "Convenience",
"Convenience", "Convenience"), `Volume of transactions` = c(2238,
1514, 1346, 1338, 625), `Open hours` = c(47.75, 50.2500000000001,
46.5, 48.25, 114.25), `X Pos` = c(394169, 393488, 394434,
392153, 393094), `Y Pos` = c(806326, 805877, 804347, 796902,
802789), Urbanity = c("Major Centre", "Major Centre", "High Density",
"Low Density", "Low Density"), `Case Reference Number` = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), `Created On` = structure(c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), tzone = "UTC", class = c("POSIXct",
"POSIXt")), `Branch Type` = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), L1 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), L2 = c(NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_), L3 = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_), L4 = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), `Case Type` = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
)), row.names = c(NA, -5L), class = c("tbl_df", "tbl", "data.frame"
), na.action = structure(c(`3` = 3L, `4` = 4L, `5` = 5L, `6` = 6L,
`7` = 7L, `8` = 8L, `9` = 9L, `11` = 11L, `13` = 13L, `16` = 16L,
`17` = 17L, `18` = 18L, `20` = 20L, `21` = 21L, `22` = 22L, `23` = 23L,
`26` = 26L, `27` = 27L, `28` = 28L, `29` = 29L, `31` = 31L, `32` = 32L,
`33` = 33L, `34` = 34L, `35` = 35L, `36` = 36L, `37` = 37L, `39` = 39L,
`40` = 40L, `41` = 41L, `42` = 42L, `43` = 43L, `44` = 44L, `45` = 45L,
`46` = 46L, `47` = 47L, `48` = 48L, `49` = 49L, `51` = 51L, `52` = 52L,
`54` = 54L, `55` = 55L, `57` = 57L, `58` = 58L, `59` = 59L, `60` = 60L,
`61` = 61L, `62` = 62L, `63` = 63L, `65` = 65L, `67` = 67L, `68` = 68L,
`69` = 69L, `70` = 70L, `71` = 71L, `72` = 72L, `74` = 74L, `75` = 75L,
`76` = 76L, `77` = 77L, `78` = 78L, `80` = 80L, `81` = 81L, `82` = 82L,
`83` = 83L, `84` = 84L, `86` = 86L, `87` = 87L, `88` = 88L, `89` = 89L,
`91` = 91L, `92` = 92L, `93` = 93L, `96` = 96L, `97` = 97L, `98` = 98L,
`99` = 99L, `100` = 100L, `101` = 101L, `103` = 103L, `106` = 106L,
`107` = 107L, `108` = 108L, `109` = 109L, `110` = 110L, `111` = 111L,
`112` = 112L, `113` = 113L, `114` = 114L, `115` = 115L, `116` = 116L,
`117` = 117L, `118` = 118L, `119` = 119L, `120` = 120L, `121` = 121L,
`122` = 122L, `123` = 123L, `124` = 124L, `126` = 126L, `127` = 127L,
`129` = 129L, `130` = 130L, `131` = 131L, `132` = 132L, `133` = 133L,
`134` = 134L, `135` = 135L, `136` = 136L, `137` = 137L, `139` = 139L,
`140` = 140L, `141` = 141L, `142` = 142L, `143` = 143L, `144` = 144L,
`145` = 145L, `146` = 146L, `147` = 147L, `148` = 148L, `149` = 149L,
`150` = 150L, `151` = 151L, `152` = 152L, `153` = 153L, `155` = 155L,
`156` = 156L, `157` = 157L, `160` = 160L, `161` = 161L, `162` = 162L,
`163` = 163L, `165` = 165L, `166` = 166L, `167` = 167L, `168` = 168L,
`169` = 169L, `174` = 174L, `175` = 175L, `176` = 176L, `177` = 177L,
`178` = 178L, `179` = 179L, `180` = 180L, `182` = 182L, `183` = 183L,
`185` = 185L, `186` = 186L, `188` = 188L, `189` = 189L, `190` = 190L,
`191` = 191L, `192` = 192L, `193` = 193L, `194` = 194L, `195` = 195L,
`196` = 196L, `197` = 197L, `198` = 198L, `199` = 199L, `200` = 200L,
`201` = 201L, `203` = 203L, `204` = 204L, `205` = 205L, `206` = 206L,
`207` = 207L, `209` = 209L, `210` = 210L, `211` = 211L, `212` = 212L,
`213` = 213L, `214` = 214L, `215` = 215L, `216` = 216L, `217` = 217L,
`218` = 218L, `219` = 219L, `220` = 220L, `221` = 221L, `222` = 222L,
`223` = 223L, `224` = 224L, `226` = 226L, `227` = 227L, `228` = 228L,
`229` = 229L, `230` = 230L, `231` = 231L, `232` = 232L, `233` = 233L,
`234` = 234L, `236` = 236L, `237` = 237L, `238` = 238L, `239` = 239L,
`240` = 240L, `241` = 241L, `242` = 242L, `243` = 243L, `244` = 244L,
`245` = 245L, `247` = 247L, `248` = 248L, `249` = 249L, `250` = 250L,
`251` = 251L, `252` = 252L, `253` = 253L, `254` = 254L, `255` = 255L,
`256` = 256L, `257` = 257L, `258` = 258L, `259` = 259L, `260` = 260L,
`261` = 261L, `262` = 262L, `263` = 263L, `264` = 264L, `265` = 265L,
`266` = 266L, `267` = 267L, `268` = 268L, `269` = 269L, `270` = 270L,
`271` = 271L, `272` = 272L, `273` = 273L, `274` = 274L, `276` = 276L,
`278` = 278L, `280` = 280L, `281` = 281L, `282` = 282L, `283` = 283L,
`284` = 284L, `285` = 285L, `286` = 286L, `288` = 288L, `289` = 289L,
`291` = 291L, `292` = 292L, `293` = 293L, `294` = 294L, `296` = 296L,
`297` = 297L, `298` = 298L, `299` = 299L, `300` = 300L, `301` = 301L,
`304` = 304L, `305` = 305L, `306` = 306L, `307` = 307L, `308` = 308L,
`311` = 311L, `312` = 312L, `313` = 313L, `316` = 316L, `319` = 319L,
`321` = 321L, `322` = 322L, `323` = 323L, `324` = 324L, `325` = 325L,
`326` = 326L, `327` = 327L, `328` = 328L, `329` = 329L, `330` = 330L,
`331` = 331L, `332` = 332L, `333` = 333L, `335` = 335L, `337` = 337L,
`338` = 338L, `339` = 339L, `340` = 340L, `341` = 341L, `342` = 342L,
`343` = 343L, `344` = 344L, `345` = 345L, `346` = 346L, `347` = 347L,
`348` = 348L, `349` = 349L, `350` = 350L, `351` = 351L, `352` = 352L,
`353` = 353L, `354` = 354L, `355` = 355L, `356` = 356L, `357` = 357L,
`359` = 359L, `360` = 360L, `361` = 361L, `362` = 362L, `363` = 363L,
`365` = 365L, `366` = 366L, `367` = 367L, `368` = 368L, `370` = 370L,
`371` = 371L, `372` = 372L, `373` = 373L, `375` = 375L, `376` = 376L,
`378` = 378L, `379` = 379L, `380` = 380L, `381` = 381L, `382` = 382L,
`384` = 384L, `385` = 385L, `387` = 387L, `388` = 388L, `389` = 389L,
`390` = 390L, `391` = 391L, `392` = 392L, `393` = 393L, `395` = 395L,
`396` = 396L, `397` = 397L, `398` = 398L, `399` = 399L, `400` = 400L,
`401` = 401L, `403` = 403L, `404` = 404L, `405` = 405L, `409` = 409L,
`412` = 412L, `413` = 413L, `414` = 414L, `415` = 415L, `416` = 416L,
`418` = 418L, `419` = 419L, `420` = 420L, `421` = 421L, `422` = 422L,
`423` = 423L, `426` = 426L, `427` = 427L, `428` = 428L, `429` = 429L,
`432` = 432L, `433` = 433L, `435` = 435L, `436` = 436L, `437` = 437L,
`438` = 438L, `440` = 440L, `441` = 441L, `442` = 442L, `443` = 443L,
I would be so grateful if anybody could give me a helping hand!
Thank you so much!
You can do it using table:
merged %>% filter(table(`Branch Code`)[`Branch Code`] > 1)
or using add_count:
merged %>% add_count(`Branch Code`) %>% filter(n > 1)
I created a small sample data:
merged <- data.frame(branch_code = c("401801", "436801", "401801"),
location_type = c("Urban", "Urban", "Rural"))
branch_code location_type
1 401801 Urban
2 436801 Urban
3 401801 Rural
You can use this code:
merged %>%
group_by(branch_code) %>%
mutate(n = n()) %>%
filter(n > 1) %>%
select(-n)
Output:
# A tibble: 2 × 2
# Groups: branch_code [1]
branch_code location_type
<chr> <chr>
1 401801 Urban
2 401801 Rural

How can i draw a barplot with 3 variables?

i'm having some trouble making a barplot.
I want to make a barplot with 3 ordinal variables (scale: yes, no, i don't know (for each))
I need the x-axis to show the bars side by side (yes1, yes2, yes3, no1, no2... and so on). They y-axis should show the frequency or the percentage.
Each variable belongs to a different wave in a panel and i want to show the changes through a barplot.
I've come so far, to draw a plot for each variable (see code)
What i need is to combine the 3 plots, i'm just don't know yet how to do it. I've tried facet_wrap/facet_grid, but that i haven't been able to solve my problem with that approach. I also get the error:
"Don't know how to automatically pick scale for object of type haven_labelled. Defaulting to continuous."so the labels on the x-axis can't be shown.
Can someone please help me?
Thanks,
Ingrid.
Here is my the data:
dput(veraenderung[1:4, ])
structure(list(vor = structure(c(2, 3, 3, 1), label = "Erwartung, dass sich durch die Teilnahme an der FoBi Veränderungen im Berufsallt", labels = c(ja = 1,
nein = 2, `weiß nicht` = 3), class = "haven_labelled"), nach = structure(c(2,
3, 1, 1), label = "Erwarten Sie, dass Ihre Teilnahme an dieser FoBi zu Veränderungen in Ihrem Beruf", labels = c(ja = 1,
nein = 2, `weiß nicht` = 3), class = "haven_labelled"), sechs_monate_spaeter = structure(c(2,
2, 1, 3), label = "Hat sich durch Ihre Teilnahme an der Fortbildung zur interkulturellen Kompetenz", labels = c(ja = 1,
nein = 2, `weiß nicht` = 9), class = "haven_labelled"), Welle123 = c(1,
1, 1, 1)), na.action = structure(c(`4` = 4L, `7` = 7L, `8` = 8L,
`9` = 9L, `10` = 10L, `11` = 11L, `12` = 12L, `13` = 13L, `14` = 14L,
`15` = 15L, `16` = 16L, `17` = 17L, `19` = 19L, `20` = 20L, `24` = 24L,
`26` = 26L, `27` = 27L, `29` = 29L, `30` = 30L, `31` = 31L, `33` = 33L,
`34` = 34L, `35` = 35L, `36` = 36L, `37` = 37L, `38` = 38L, `39` = 39L,
`41` = 41L, `43` = 43L, `44` = 44L, `46` = 46L, `47` = 47L, `48` = 48L,
`49` = 49L, `50` = 50L, `52` = 52L, `54` = 54L, `55` = 55L, `58` = 58L,
`59` = 59L, `60` = 60L, `63` = 63L, `64` = 64L, `66` = 66L, `68` = 68L,
`71` = 71L, `72` = 72L, `73` = 73L, `74` = 74L, `75` = 75L, `78` = 78L,
`80` = 80L, `81` = 81L, `82` = 82L, `83` = 83L, `84` = 84L, `86` = 86L,
`87` = 87L, `91` = 91L, `92` = 92L, `94` = 94L, `97` = 97L, `99` = 99L,
`101` = 101L, `102` = 102L, `105` = 105L, `106` = 106L, `107` = 107L,
`108` = 108L, `109` = 109L, `112` = 112L, `113` = 113L, `114` = 114L,
`116` = 116L, `117` = 117L, `119` = 119L, `121` = 121L, `122` = 122L,
`123` = 123L, `124` = 124L, `127` = 127L, `128` = 128L, `130` = 130L,
`132` = 132L, `134` = 134L, `135` = 135L, `136` = 136L, `138` = 138L,
`139` = 139L, `140` = 140L, `141` = 141L, `142` = 142L, `144` = 144L,
`146` = 146L, `147` = 147L, `148` = 148L, `149` = 149L, `151` = 151L,
`152` = 152L, `153` = 153L, `156` = 156L, `157` = 157L, `159` = 159L,
`164` = 164L, `165` = 165L, `166` = 166L, `168` = 168L, `169` = 169L,
`170` = 170L, `172` = 172L, `173` = 173L, `174` = 174L, `176` = 176L,
`177` = 177L, `178` = 178L, `179` = 179L, `180` = 180L, `181` = 181L,
`183` = 183L, `184` = 184L, `185` = 185L, `190` = 190L, `191` = 191L,
`192` = 192L, `194` = 194L, `195` = 195L, `196` = 196L, `197` = 197L,
`202` = 202L, `205` = 205L, `206` = 206L, `208` = 208L, `209` = 209L,
`210` = 210L, `211` = 211L, `212` = 212L, `213` = 213L, `215` = 215L,
`216` = 216L, `217` = 217L, `218` = 218L, `221` = 221L, `223` = 223L,
`225` = 225L, `226` = 226L, `227` = 227L, `228` = 228L, `229` = 229L,
`230` = 230L, `231` = 231L, `232` = 232L, `233` = 233L, `234` = 234L,
`235` = 235L, `236` = 236L, `237` = 237L, `238` = 238L, `239` = 239L,
`240` = 240L, `241` = 241L, `242` = 242L, `243` = 243L, `244` = 244L,
`245` = 245L, `246` = 246L, `247` = 247L, `248` = 248L, `249` = 249L
), class = "omit"), row.names = c(NA, 4L), class = "data.frame")
Here is the code:
library(tidyverse)
veraenderung <- ikoe %>%
select(v13, wn06, xn2, Welle123) %>%
rename(vor = v13,
nach = wn06,
sechs_monate_spaeter = xn2) %>%
na.omit(veraenderung) %>%
as.data.frame()
ggplot(veraenderung, aes(x = vor)) +
geom_bar()
ggplot(veraenderung, aes(x = nach)) +
geom_bar()
ggplot(veraenderung, aes(x = sechs_monate_spaeter)) +
geom_bar()
Your haven object is a bit a challenge for tidyverse manipulations. See below what I suggest to make this object a bit "cleaner" (remove labels, change your values to character class). And then making long and plotting.
library(tidyverse)
names(veraenderung) <- c('vor','nach','sechs','welle') #remove labels in names
veraenderung <- as_tibble(veraenderung) %>% transmute_all(as.character) #change values to character class
veraenderung <- veraenderung %>% pivot_longer(cols = everything(), names_to = 'key', values_to = 'value')
ggplot(veraenderung, aes(key)) +
geom_bar(aes(fill = value), position = position_dodge(preserve = 'single'))
#try without preserve or position_dodge and see what happens
Created on 2020-02-06 by the reprex package (v0.3.0)

Have ticks at edges of bins (instead of center) with ggplot2 in R?

I have the following R data frame nPhotosClassified:
> glimpse(nPhotosClassified)
Observations: 236
Variables: 2
$ person_id <int> 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 194, 195, 199...
$ nPhotosClassified <int> 113, 164, 2126, 637, 75, 16, 161, 29, 15, 6338, 596, 18, 14, 63, 36777, 19117, 5625...
With it I tried to make a geom_histogram of the nPhotosClassified variable with ggplot2:
ggplot(data = nPhotosClassified, mapping = aes(x = nPhotosClassified)) +
geom_histogram(bins = 10) +
scale_x_log10(name = "Number of photos classified",
breaks = c(1, 10, 100, 1000, 10000)) +
ylab(label = "Number of users") +
theme_bw() +
geom_vline(xintercept = 100, colour = "red") +
theme(# This gets rid of the whole border around the plot, but also makes
# the axes disappear:
panel.border = element_blank(),
# So manually add lines for the axes back:
axis.line = element_line())
Which gives me this result:
For this question, I've added a red vertical line to indicate that the major tick marks fall on the center of these bins.
Question: How do I adjust the bins (or the tick marks???) so that all the tick marks fall on the edge of bins rather than in the middle of them?
For example, how do I end up with two bins between 1 and 10, two bins between 10 and 100, and so on? Please note that I want my x-axis to be on the log10 scale.
Thank you!
EDIT: Here is the full dataset:
> dput(nPhotosClassified)
structure(list(person_id = c(179L, 180L, 181L, 182L, 183L, 184L,
185L, 186L, 187L, 188L, 189L, 190L, 191L, 192L, 194L, 195L, 199L,
201L, 204L, 205L, 207L, 208L, 209L, 210L, 211L, 213L, 214L, 215L,
216L, 217L, 219L, 220L, 221L, 222L, 223L, 224L, 225L, 226L, 227L,
228L, 229L, 230L, 234L, 235L, 237L, 238L, 241L, 242L, 243L, 246L,
249L, 250L, 251L, 252L, 253L, 255L, 256L, 259L, 261L, 264L, 265L,
266L, 267L, 268L, 271L, 272L, 274L, 275L, 276L, 277L, 278L, 281L,
282L, 283L, 285L, 294L, 296L, 298L, 299L, 302L, 304L, 305L, 307L,
309L, 310L, 311L, 312L, 317L, 318L, 319L, 320L, 323L, 325L, 326L,
327L, 330L, 331L, 332L, 335L, 341L, 344L, 347L, 348L, 363L, 367L,
375L, 376L, 377L, 378L, 386L, 388L, 389L, 390L, 396L, 397L, 398L,
399L, 401L, 402L, 404L, 406L, 407L, 409L, 412L, 413L, 414L, 415L,
419L, 421L, 425L, 426L, 428L, 429L, 432L, 433L, 440L, 441L, 445L,
448L, 452L, 456L, 461L, 462L, 464L, 468L, 471L, 473L, 474L, 475L,
478L, 483L, 486L, 491L, 492L, 493L, 494L, 495L, 497L, 498L, 501L,
502L, 505L, 509L, 512L, 518L, 520L, 532L, 533L, 535L, 537L, 539L,
540L, 543L, 544L, 550L, 551L, 552L, 554L, 562L, 564L, 581L, 582L,
590L, 592L, 593L, 597L, 599L, 601L, 602L, 612L, 618L, 622L, 632L,
634L, 635L, 637L, 650L, 651L, 658L, 659L, 660L, 661L, 665L, 666L,
668L, 671L, 672L, 675L, 684L, 686L, 693L, 697L, 705L, 708L, 719L,
725L, 726L, 730L, 733L, 734L, 752L, 756L, 777L, 785L, 789L, 791L,
796L, 797L, 799L, 800L, 802L, 807L, 808L, 810L, 813L, 814L),
nPhotosClassified = c(113L, 164L, 2126L, 637L, 75L, 16L,
161L, 29L, 15L, 6338L, 596L, 18L, 14L, 63L, 36777L, 19117L,
5625L, 584L, 3477L, 541L, 6L, 6L, 112L, 8L, 5L, 290L, 120L,
12L, 9L, 2675L, 9L, 4L, 657L, 149L, 151L, 8L, 4104L, 285L,
192L, 734L, 5L, 129L, 155L, 11L, 516L, 410L, 55L, 1L, 581L,
293L, 28L, 17810L, 2690L, 5L, 587L, 359L, 9L, 493L, 404L,
21L, 3L, 2L, 91L, 23L, 3L, 728L, 29L, 1540L, 10556L, 1L,
54L, 905L, 25L, 22L, 1L, 14L, 16L, 13L, 10L, 21L, 121L, 7870L,
53L, 1777L, 11L, 850L, 35L, 635L, 7L, 5728L, 1972L, 3613L,
16L, 51L, 131L, 77L, 267L, 718L, 11L, 18L, 5088L, 113L, 48L,
302L, 33L, 44L, 20L, 22L, 7L, 30L, 8L, 69L, 4L, 11L, 2428L,
3131L, 2459L, 12L, 150L, 21L, 702L, 10L, 23L, 38L, 1L, 1L,
24L, 10L, 6L, 1443L, 221L, 4363L, 27L, 46L, 9L, 8L, 10633L,
56L, 38L, 20L, 171L, 36L, 5L, 3L, 108L, 10L, 559L, 83L, 60L,
3L, 9L, 697L, 100L, 27L, 114L, 186L, 8127L, 10L, 58L, 76L,
472L, 6L, 72L, 3748L, 130L, 9L, 2459L, 80L, 468L, 198L, 4L,
108L, 35L, 10L, 310L, 207L, 499L, 20L, 32L, 1178L, 730L,
999L, 13L, 1L, 5L, 2L, 1L, 178L, 4L, 31L, 16L, 1592L, 385L,
73L, 698L, 4L, 42L, 90L, 772L, 509L, 1L, 17L, 17L, 36L, 987L,
395L, 15L, 23194L, 16L, 956L, 15L, 5614L, 3L, 1700L, 74L,
65L, 18L, 389L, 35L, 8L, 3L, 9L, 1271L, 12L, 80L, 117L, 356L,
3L, 59L, 85L, 382L, 8L, 6L, 33L, 5L, 119L)), class = c("tbl_df",
"tbl", "data.frame"), .Names = c("person_id", "nPhotosClassified"
), row.names = c(NA, -236L))
In the end, I thought using the breaks argument to be the most straightforward way to think about this, mostly due to the complication of an x scale transformation.
The histogram bin breaks need to ultimately be set on the transformed scale. This translates to setting the histogram breaks on the scale of log10(nPhotosClassified).
The breaks depends on the range of log10(nPhotosClassified).
with(nPhotosClassified, range(log10(nPhotosClassified)) )
[1] 0.000000 4.565576
So the breaks need to go from 0 to 5. You wanted these evenly spaced between integers (i.e., 2 bins per 10^integer), so we want a break every 0.5 units.
ggplot(data = nPhotosClassified, mapping = aes(x = nPhotosClassified)) +
geom_histogram(breaks = seq(0, 5, by = .5) ) +
scale_x_log10(name = "Number of photos classified",
breaks = c(1, 10, 100, 1000, 10000))
There may be a less manual way to do this, but the other arguments to control the histogram bins, like boundary, didn't seem to translate well with scale transformation.

Calling function inside with-statement gives error variable not found in function scope

I am preparing a bootstrapped estimation of a mean prediction error on a multiple imputed dataset. My function seems to be unable to find the dependent variable in scope. Is there some way to circumvent that?
Multiple imputation runs smoothly, but the specific problem seems to be that the line
mod.nb.train <- with(data = data.mi.train, exp = glm.nb(f))
cannot find the variable CG.tot:
Error in eval(expr, envir, enclos) : object 'CG.tot' not found
However, if I state the formula as a string:
glm.nb(formula=CG.tot~Fibrinogen)
it works...
Minimal running example:
library(mice)
library(MASS)
#compute the mean prediction error on a dataframe with missing data
predicterr <- function(f, data, indices){
if(!(class(f)=="formula")){stop("'f' must be of the 'formula' type")}
if(!(class(data)=="data.frame")){stop("'data' must be of the 'data.frame' type")}
#recompute random sampling & multiple imputation
data.test <- data[sample(nrow(data), 15),]
data.train <- data[setdiff(rownames(data), rownames(data.test)),]
data.mi.train <- mice(data.train)
data.mi.test <- mice(data.test)
#recompute model
mod.nb.train <- with(data = data.mi.train, exp = glm.nb(f))
coeffs <- summary(pool(mod.nb.train))[,"est"]
#compute prediction error on each dataset row
errvec <- apply(complete(data.mi.test, include = F, action = "long")[,c(names(coeffs)[-1], as.character(f)[2])],
1, function(x){
return(exp(sum(x[1:length(x)-1]*coeffs[-1], coeffs[1]))-x[length(x)])
})
return(mean(errvec))
}
predicterr(CG.tot~Fibrinogen, d.mi)
Dataset (a little long, but that's for the imputation...):
d <- structure(list(Hb = c(7.5, 12.9, 12.9, 10.2, 10.5, 11.2, 12.7,
9.3, 11.7, 13.4, 151, 10.9, 5.9, 12.8, 10.2, 15.3, 13.8, 9.6,
7.6, 12.2, 11.1, 13.6, 8.9, 7.2, 7.8, 8.7, 10.3, 14, 8.8, 7.5
), Hct = c(23, 39.8, 39.4, 31.6, 32.5, 34.4, 39, 28, 35.9, 41.2,
43.8, 33.7, 18.6, 37.7, 31.7, 44, 87.3, 29.4, 23.6, 37.7, 34.3,
39.8, 27.4, 22.6, 24.2, 29.1, 31.8, 43.1, 27.3, 23.3), EXTEM.CT = c(51L,
60L, 45L, 115L, 55L, 48L, 49L, 106L, 56L, 68L, 61L, 53L, 69L,
44L, 58L, 126L, 47L, 68L, 49L, 68L, 51L, 84L, 63L, 66L, 51L,
108L, 63L, 51L, 53L, 63L), EXTEM.CFT = c(133L, 162L, 175L, 216L,
101L, 60L, 140L, 248L, 137L, 203L, 113L, 199L, 316L, 90L, 224L,
235L, 133L, 46L, 308L, 300L, 119L, 420L, 44L, 207L, 91L, 69L,
96L, 130L, 153L, 99L), EXTEM.MCF = c(59L, 55L, 50L, 46L, 64L,
72L, 52L, 46L, 50L, 50L, 60L, 40L, 40L, 56L, 46L, 47L, 52L, 67L,
40L, 35L, 83L, 30L, 82L, 47L, 61L, 76L, 63L, 51L, 58L, 58L),
INTEM.CT = c(NA, 158L, 154L, 240L, 141L, 141L, 143L, 122L,
104L, 193L, 183L, 186L, 182L, 172L, 192L, 149L, 133L, 162L,
238L, 158L, 144L, 144L, 162L, 213L, 139L, 157L, 104L, 376L,
140L, 192L), INTEM.CFT = c(NA, 91L, 119L, 165L, 97L, 51L,
118L, 190L, 84L, 90L, 82L, 114L, 226L, 90L, 89L, 209L, NA,
64L, 203L, 222L, 64L, 104L, 43L, 170L, 66L, 50L, 61L, 332L,
70L, 66L), INTEM.MCF = c(NA, 57L, 48L, 48L, 74L, 70L, 49L,
50L, 50L, 55L, 58L, 49L, 40L, 57L, 48L, 46L, 64L, 68L, 44L,
39L, 64L, 54L, 80L, 51L, 64L, 78L, 68L, 54L, 62L, 61L), FIBTEM.CT = c(50L,
62L, 101L, 123L, 58L, 49L, 49L, 74L, 77L, 117L, 61L, 54L,
79L, 41L, 69L, 189L, 49L, 67L, 55L, 56L, 57L, 59L, 56L, 62L,
57L, 65L, 51L, 58L, 68L, 67L), FIBTEM.CFT = c(NA, NA, NA,
NA, NA, 94L, NA, NA, NA, NA, NA, 615L, NA, 56L, NA, NA, NA,
79L, NA, NA, 625L, NA, 75L, NA, 892L, NA, NA, NA, NA, 1206L
), FIBTEM.MCF = c(9L, 9L, NA, 5L, 10L, 21L, 11L, 4L, 6L,
3L, 16L, 7L, 6L, 31L, NA, 4L, NA, 35L, 11L, 10L, 42L, NA,
28L, 13L, 22L, 28L, 8L, 7L, 9L, 21L), INR = c(1.14, 1, 1,
1.33, 1.01, 1.07, 1.06, 1.43, 1.22, 1.12, 1.18, 1.54, NA,
1.3, 1.13, 1.05, 1.09, 1.11, 1.49, 1.22, 1.33, 1.04, NA,
1.87, 1.67, 1, 1, 1.07, 1.12, 1.88), PTT = c(30, 28.4, 22.1,
37.8, 25.6, 28.9, 27.2, 32.7, 27.2, 28.9, 27.3, 69.9, 132,
31.9, 26.5, NA, 28.9, 44.3, 50.8, 36.6, NA, 23.5, 30, 70.6,
41.2, 30.1, 25.7, 26.7, 26, 41.9), Platelets = c(150, 193,
343, 138, 284, 216, 141, 291, 142, 230, 254, 126, NA, 249,
153, 308, 253, 66, 30, 41, 293, 208, 545, 141, 136, 256,
249, 305, 327, 112), Fibrinogen = c(1.3, NA, NA, 0.9, 2.1,
3.4, 2.3, 1.1, 1.5, 1.1, 1.8, 0.8, NA, 2.3, 2.4, NA, 2.2,
7.4, 1.8, 1.7, NA, 2.6, 7.1, 0.6, 1.2, NA, 1.1, 2.5, 1.7,
2), CG.tot = c(3L, 2L, 3L, 11L, 12L, 0L, 1L, 10L, 4L, 4L,
5L, 0L, 12L, 11L, 3L, 9L, 5L, 0L, 4L, 0L, 0L, 3L, 0L, 21L,
2L, 1L, 1L, 1L, 2L, 3L)), .Names = c("Hb", "Hct", "EXTEM.CT",
"EXTEM.CFT", "EXTEM.MCF", "INTEM.CT", "INTEM.CFT", "INTEM.MCF",
"FIBTEM.CT", "FIBTEM.CFT", "FIBTEM.MCF", "INR", "PTT", "Platelets",
"Fibrinogen", "CG.tot"), row.names = c(50L, 38L, 54L, 82L, 86L,
4L, 24L, 78L, 59L, 58L, 72L, 16L, 85L, 81L, 45L, 77L, 70L, 6L,
63L, 7L, 11L, 53L, 13L, 93L, 36L, 30L, 18L, 19L, 40L, 43L), class = "data.frame")
You're missing one parameter in glm.nb:
mod.nb.train <- with(data = data.mi.train, exp = glm.nb(f, environment()))
and it works.

Resources