How can i draw a barplot with 3 variables? - r

i'm having some trouble making a barplot.
I want to make a barplot with 3 ordinal variables (scale: yes, no, i don't know (for each))
I need the x-axis to show the bars side by side (yes1, yes2, yes3, no1, no2... and so on). They y-axis should show the frequency or the percentage.
Each variable belongs to a different wave in a panel and i want to show the changes through a barplot.
I've come so far, to draw a plot for each variable (see code)
What i need is to combine the 3 plots, i'm just don't know yet how to do it. I've tried facet_wrap/facet_grid, but that i haven't been able to solve my problem with that approach. I also get the error:
"Don't know how to automatically pick scale for object of type haven_labelled. Defaulting to continuous."so the labels on the x-axis can't be shown.
Can someone please help me?
Thanks,
Ingrid.
Here is my the data:
dput(veraenderung[1:4, ])
structure(list(vor = structure(c(2, 3, 3, 1), label = "Erwartung, dass sich durch die Teilnahme an der FoBi Veränderungen im Berufsallt", labels = c(ja = 1,
nein = 2, `weiß nicht` = 3), class = "haven_labelled"), nach = structure(c(2,
3, 1, 1), label = "Erwarten Sie, dass Ihre Teilnahme an dieser FoBi zu Veränderungen in Ihrem Beruf", labels = c(ja = 1,
nein = 2, `weiß nicht` = 3), class = "haven_labelled"), sechs_monate_spaeter = structure(c(2,
2, 1, 3), label = "Hat sich durch Ihre Teilnahme an der Fortbildung zur interkulturellen Kompetenz", labels = c(ja = 1,
nein = 2, `weiß nicht` = 9), class = "haven_labelled"), Welle123 = c(1,
1, 1, 1)), na.action = structure(c(`4` = 4L, `7` = 7L, `8` = 8L,
`9` = 9L, `10` = 10L, `11` = 11L, `12` = 12L, `13` = 13L, `14` = 14L,
`15` = 15L, `16` = 16L, `17` = 17L, `19` = 19L, `20` = 20L, `24` = 24L,
`26` = 26L, `27` = 27L, `29` = 29L, `30` = 30L, `31` = 31L, `33` = 33L,
`34` = 34L, `35` = 35L, `36` = 36L, `37` = 37L, `38` = 38L, `39` = 39L,
`41` = 41L, `43` = 43L, `44` = 44L, `46` = 46L, `47` = 47L, `48` = 48L,
`49` = 49L, `50` = 50L, `52` = 52L, `54` = 54L, `55` = 55L, `58` = 58L,
`59` = 59L, `60` = 60L, `63` = 63L, `64` = 64L, `66` = 66L, `68` = 68L,
`71` = 71L, `72` = 72L, `73` = 73L, `74` = 74L, `75` = 75L, `78` = 78L,
`80` = 80L, `81` = 81L, `82` = 82L, `83` = 83L, `84` = 84L, `86` = 86L,
`87` = 87L, `91` = 91L, `92` = 92L, `94` = 94L, `97` = 97L, `99` = 99L,
`101` = 101L, `102` = 102L, `105` = 105L, `106` = 106L, `107` = 107L,
`108` = 108L, `109` = 109L, `112` = 112L, `113` = 113L, `114` = 114L,
`116` = 116L, `117` = 117L, `119` = 119L, `121` = 121L, `122` = 122L,
`123` = 123L, `124` = 124L, `127` = 127L, `128` = 128L, `130` = 130L,
`132` = 132L, `134` = 134L, `135` = 135L, `136` = 136L, `138` = 138L,
`139` = 139L, `140` = 140L, `141` = 141L, `142` = 142L, `144` = 144L,
`146` = 146L, `147` = 147L, `148` = 148L, `149` = 149L, `151` = 151L,
`152` = 152L, `153` = 153L, `156` = 156L, `157` = 157L, `159` = 159L,
`164` = 164L, `165` = 165L, `166` = 166L, `168` = 168L, `169` = 169L,
`170` = 170L, `172` = 172L, `173` = 173L, `174` = 174L, `176` = 176L,
`177` = 177L, `178` = 178L, `179` = 179L, `180` = 180L, `181` = 181L,
`183` = 183L, `184` = 184L, `185` = 185L, `190` = 190L, `191` = 191L,
`192` = 192L, `194` = 194L, `195` = 195L, `196` = 196L, `197` = 197L,
`202` = 202L, `205` = 205L, `206` = 206L, `208` = 208L, `209` = 209L,
`210` = 210L, `211` = 211L, `212` = 212L, `213` = 213L, `215` = 215L,
`216` = 216L, `217` = 217L, `218` = 218L, `221` = 221L, `223` = 223L,
`225` = 225L, `226` = 226L, `227` = 227L, `228` = 228L, `229` = 229L,
`230` = 230L, `231` = 231L, `232` = 232L, `233` = 233L, `234` = 234L,
`235` = 235L, `236` = 236L, `237` = 237L, `238` = 238L, `239` = 239L,
`240` = 240L, `241` = 241L, `242` = 242L, `243` = 243L, `244` = 244L,
`245` = 245L, `246` = 246L, `247` = 247L, `248` = 248L, `249` = 249L
), class = "omit"), row.names = c(NA, 4L), class = "data.frame")
Here is the code:
library(tidyverse)
veraenderung <- ikoe %>%
select(v13, wn06, xn2, Welle123) %>%
rename(vor = v13,
nach = wn06,
sechs_monate_spaeter = xn2) %>%
na.omit(veraenderung) %>%
as.data.frame()
ggplot(veraenderung, aes(x = vor)) +
geom_bar()
ggplot(veraenderung, aes(x = nach)) +
geom_bar()
ggplot(veraenderung, aes(x = sechs_monate_spaeter)) +
geom_bar()

Your haven object is a bit a challenge for tidyverse manipulations. See below what I suggest to make this object a bit "cleaner" (remove labels, change your values to character class). And then making long and plotting.
library(tidyverse)
names(veraenderung) <- c('vor','nach','sechs','welle') #remove labels in names
veraenderung <- as_tibble(veraenderung) %>% transmute_all(as.character) #change values to character class
veraenderung <- veraenderung %>% pivot_longer(cols = everything(), names_to = 'key', values_to = 'value')
ggplot(veraenderung, aes(key)) +
geom_bar(aes(fill = value), position = position_dodge(preserve = 'single'))
#try without preserve or position_dodge and see what happens
Created on 2020-02-06 by the reprex package (v0.3.0)

Related

ggplot2: legend symbols matching plot symbols

I made a ggplot where I make use of the viridis color package. I adjusted the geom_point in the graph to different symbols. The symbols are not displaying in my legend, although the colors are correctly programmed. How can I match my legend (including symbols and colors) with my ggplot?
Attempt:
library(ggplot)
library(viridis)
ggplot(df, aes(`Lengte_(cm)`, verschil_lengte))+
geom_point(aes(shape = Lengteklasse, colour = Lengteklasse), size = 3)+
geom_hline(yintercept = 1.0, linetype="dashed", color = "red")+
geom_hline(yintercept = 2.0, linetype="dashed", color = "red")+
scale_shape_manual(values = c(16, 17, 15, 3, 8), guide = "none")+
scale_color_viridis(discrete = T, option = "D")+
scale_x_continuous(breaks = seq(7,12, by = 0.5))+
scale_y_continuous(breaks = seq(0,3, by = 0.5))+
labs(x = "Lengte (cm)", y = "Verschaling (mm)")+
guides(col = guide_legend("Lengteklasse (cm)"))+
theme_classic()
current outcome:
df =
structure(list(`Lengte_(cm)` = c(9, 10.7, 10.7, 7.7, 9.1, 11.2,
9.7, 10.2, 8.6, 8.9, 11.2, 11.4, 10.5, 10.5, 11.1, 8.9, 11.5,
10.4, 9.1, 9.2, 10.1, 7.8, 9.8, 8.2, 10.1, 10.5, 10.2, 7.9, 9.3,
8, 8.7, 8.9, 8.8, 9.3, 8.5, 7.7, 11.2, 9.4, 9.7, 11.2, 11, 10.7,
9), Lengteklasse = structure(c(4L, 5L, 5L, 2L, 4L, 6L, 4L, 5L,
3L, 3L, 6L, 6L, 5L, 5L, 6L, 3L, 6L, 5L, 4L, 4L, 5L, 2L, 4L, 3L,
5L, 5L, 5L, 2L, 4L, 3L, 3L, 3L, 3L, 4L, 3L, 2L, 6L, 4L, 4L, 6L,
6L, 5L, 4L), .Label = c("6", "7", "8", "9", "10", "11", "12",
"13"), class = "factor"), verschil_lengte = c(0, 1.4, 1.8, 1.8,
1.4, 0.800000000000001, 0.600000000000001, 0.600000000000001,
1.4, 1.9, 1.3, 1.5, 0.300000000000001, 0.5, 0.9, 2.2, 1, 1, 1.4,
2.1, 1.3, 2.2, 0.899999999999999, 2.3, 1.1, 0.699999999999999,
2.1, 0.4, 0.5, 0.9, 2.1, 1.6, 1.7, 0.799999999999999, 2, 2.1,
0.5, 0.799999999999999, 1.3, 0.4, 0.300000000000001, 1.6, 0.199999999999999
)), row.names = c(NA, -43L), class = c("tbl_df", "tbl", "data.frame"
), na.action = structure(c(`1` = 1L, `2` = 2L, `3` = 3L, `4` = 4L,
`5` = 5L, `6` = 6L, `7` = 7L, `8` = 8L, `9` = 9L, `10` = 10L,
`11` = 11L, `12` = 12L, `13` = 13L, `14` = 14L, `15` = 15L, `16` = 16L,
`17` = 17L, `18` = 18L, `19` = 19L, `20` = 20L, `21` = 21L, `22` = 22L,
`23` = 23L, `24` = 24L, `25` = 25L, `26` = 26L, `27` = 27L, `28` = 28L,
`29` = 29L, `30` = 30L, `31` = 31L, `32` = 32L, `33` = 33L, `34` = 34L,
`35` = 35L, `36` = 36L, `37` = 37L, `38` = 38L, `39` = 39L, `40` = 40L,
`41` = 41L, `42` = 42L, `43` = 43L, `44` = 44L, `45` = 45L, `46` = 46L,
`47` = 47L, `48` = 48L, `49` = 49L, `50` = 50L, `51` = 51L, `52` = 52L,
`53` = 53L, `54` = 54L, `55` = 55L, `56` = 56L, `57` = 57L, `58` = 58L,
`59` = 59L, `60` = 60L, `61` = 61L, `62` = 62L, `63` = 63L, `64` = 64L,
`65` = 65L, `66` = 66L, `67` = 67L, `68` = 68L, `69` = 69L, `70` = 70L,
`71` = 71L, `72` = 72L, `73` = 73L, `74` = 74L, `75` = 75L, `76` = 76L,
`77` = 77L, `78` = 78L, `79` = 79L, `80` = 80L, `81` = 81L, `82` = 82L,
`83` = 83L, `84` = 84L, `85` = 85L, `86` = 86L, `87` = 87L, `88` = 88L,
`89` = 89L, `90` = 90L, `91` = 91L, `92` = 92L, `93` = 93L, `94` = 94L,
`95` = 95L, `96` = 96L, `97` = 97L, `98` = 98L, `99` = 99L, `100` = 100L,
`101` = 101L, `102` = 102L, `103` = 103L, `104` = 104L, `105` = 105L,
`106` = 106L, `107` = 107L, `108` = 108L, `109` = 109L, `110` = 110L,
`111` = 111L, `112` = 112L, `113` = 113L, `114` = 114L, `115` = 115L,
`116` = 116L, `117` = 117L, `118` = 118L, `119` = 119L, `120` = 120L,
`121` = 121L, `122` = 122L, `123` = 123L, `124` = 124L, `125` = 125L,
`126` = 126L, `127` = 127L, `128` = 128L, `129` = 129L, `130` = 130L,
`131` = 131L, `132` = 132L, `133` = 133L, `134` = 134L, `135` = 135L,
`136` = 136L, `137` = 137L, `138` = 138L, `139` = 139L, `140` = 140L,
`141` = 141L, `142` = 142L, `143` = 143L, `144` = 144L, `145` = 145L,
`146` = 146L, `147` = 147L, `148` = 148L, `149` = 149L, `150` = 150L,
`151` = 151L, `152` = 152L, `153` = 153L, `154` = 154L, `155` = 155L,
`156` = 156L, `157` = 157L, `158` = 158L, `159` = 159L, `160` = 160L,
`161` = 161L, `162` = 162L, `163` = 163L, `164` = 164L, `165` = 165L,
`166` = 166L, `167` = 167L, `168` = 168L, `169` = 169L, `170` = 170L,
`171` = 171L, `172` = 172L, `173` = 173L, `174` = 174L, `175` = 175L,
`176` = 176L, `177` = 177L, `178` = 178L, `179` = 179L, `180` = 180L,
`181` = 181L, `182` = 182L, `183` = 183L, `184` = 184L, `185` = 185L,
`186` = 186L, `187` = 187L, `188` = 188L, `189` = 189L, `190` = 190L,
`191` = 191L, `192` = 192L, `193` = 193L, `194` = 194L, `195` = 195L,
`196` = 196L, `197` = 197L, `198` = 198L, `199` = 199L, `200` = 200L,
`201` = 201L, `202` = 202L, `203` = 203L, `204` = 204L, `205` = 205L,
`206` = 206L, `207` = 207L, `208` = 208L, `209` = 209L, `210` = 210L,
`211` = 211L, `212` = 212L, `213` = 213L, `214` = 214L, `215` = 215L,
`216` = 216L, `217` = 217L, `218` = 218L, `219` = 219L, `220` = 220L,
`221` = 221L, `222` = 222L, `223` = 223L, `224` = 224L, `225` = 225L,
`226` = 226L, `227` = 227L, `228` = 228L, `229` = 229L, `230` = 230L,
`231` = 231L, `232` = 232L, `233` = 233L, `234` = 234L, `235` = 235L,
`236` = 236L, `237` = 237L, `238` = 238L, `239` = 239L, `240` = 240L,
`241` = 241L, `242` = 242L, `243` = 243L, `244` = 244L, `245` = 245L,
`246` = 246L, `247` = 247L, `248` = 248L, `249` = 249L, `250` = 250L,
`251` = 251L, `252` = 252L, `253` = 253L, `254` = 254L, `255` = 255L,
`256` = 256L, `257` = 257L, `258` = 258L, `259` = 259L, `260` = 260L,
`261` = 261L, `262` = 262L, `263` = 263L, `264` = 264L, `265` = 265L,
`266` = 266L, `267` = 267L, `268` = 268L, `269` = 269L, `270` = 270L,
`271` = 271L, `272` = 272L, `273` = 273L, `274` = 274L, `275` = 275L,
`277` = 277L, `278` = 278L, `279` = 279L, `280` = 280L, `281` = 281L,
`282` = 282L, `284` = 284L, `285` = 285L, `286` = 286L, `288` = 288L,
`289` = 289L, `290` = 290L, `291` = 291L, `292` = 292L, `293` = 293L,
`294` = 294L, `295` = 295L, `296` = 296L, `297` = 297L, `298` = 298L,
`300` = 300L, `301` = 301L, `302` = 302L, `303` = 303L, `304` = 304L,
`305` = 305L, `306` = 306L, `308` = 308L, `309` = 309L, `310` = 310L,
`311` = 311L, `312` = 312L, `313` = 313L, `314` = 314L, `315` = 315L,
`316` = 316L, `317` = 317L, `318` = 318L, `319` = 319L, `321` = 321L,
`322` = 322L, `323` = 323L, `324` = 324L, `325` = 325L, `326` = 326L,
`327` = 327L, `328` = 328L, `329` = 329L, `330` = 330L, `331` = 331L,
`333` = 333L, `334` = 334L, `335` = 335L, `336` = 336L, `337` = 337L,
`338` = 338L, `339` = 339L, `340` = 340L, `341` = 341L, `342` = 342L,
`343` = 343L, `344` = 344L, `345` = 345L, `346` = 346L, `347` = 347L,
`348` = 348L, `349` = 349L, `351` = 351L, `352` = 352L, `354` = 354L,
`356` = 356L, `357` = 357L, `358` = 358L, `359` = 359L, `360` = 360L,
`361` = 361L, `362` = 362L, `363` = 363L, `364` = 364L, `366` = 366L,
`368` = 368L, `369` = 369L, `370` = 370L, `371` = 371L, `372` = 372L,
`373` = 373L, `374` = 374L, `375` = 375L, `376` = 376L, `377` = 377L,
`378` = 378L, `379` = 379L, `380` = 380L, `382` = 382L, `383` = 383L,
`384` = 384L, `387` = 387L, `388` = 388L, `390` = 390L, `391` = 391L,
`392` = 392L, `393` = 393L, `394` = 394L, `395` = 395L, `396` = 396L,
`397` = 397L, `399` = 399L, `400` = 400L, `401` = 401L, `402` = 402L,
`404` = 404L, `405` = 405L, `406` = 406L, `407` = 407L, `408` = 408L,
`409` = 409L, `410` = 410L, `411` = 411L, `412` = 412L, `413` = 413L,
`414` = 414L, `415` = 415L, `416` = 416L, `417` = 417L, `419` = 419L,
`420` = 420L, `423` = 423L, `424` = 424L, `425` = 425L, `426` = 426L,
`427` = 427L, `429` = 429L, `430` = 430L, `431` = 431L, `432` = 432L,
`433` = 433L, `434` = 434L, `435` = 435L, `436` = 436L, `437` = 437L,
`438` = 438L, `439` = 439L, `440` = 440L, `441` = 441L, `442` = 442L,
`443` = 443L, `444` = 444L, `446` = 446L, `447` = 447L, `448` = 448L,
`450` = 450L, `451` = 451L, `452` = 452L, `453` = 453L, `454` = 454L,
`455` = 455L, `456` = 456L, `457` = 457L, `459` = 459L, `460` = 460L,
`462` = 462L, `463` = 463L, `464` = 464L, `465` = 465L, `466` = 466L,
`467` = 467L, `468` = 468L, `469` = 469L, `470` = 470L, `471` = 471L,
`472` = 472L, `473` = 473L, `474` = 474L, `475` = 475L, `476` = 476L,
`478` = 478L, `479` = 479L, `480` = 480L, `481` = 481L, `482` = 482L,
`483` = 483L, `484` = 484L, `485` = 485L, `486` = 486L, `487` = 487L,
`488` = 488L, `489` = 489L, `490` = 490L, `491` = 491L, `493` = 493L,
`495` = 495L, `496` = 496L, `497` = 497L, `498` = 498L, `499` = 499L,
`500` = 500L, `501` = 501L, `502` = 502L, `503` = 503L, `504` = 504L,
`505` = 505L, `506` = 506L, `507` = 507L, `508` = 508L, `509` = 509L,
`510` = 510L, `511` = 511L, `512` = 512L, `513` = 513L, `514` = 514L,
`515` = 515L, `516` = 516L, `517` = 517L, `518` = 518L, `519` = 519L,
`520` = 520L, `521` = 521L, `522` = 522L, `523` = 523L, `524` = 524L,
`525` = 525L, `526` = 526L, `527` = 527L, `528` = 528L, `529` = 529L,
`530` = 530L, `531` = 531L, `532` = 532L, `533` = 533L, `535` = 535L,
`536` = 536L, `537` = 537L, `538` = 538L, `539` = 539L, `540` = 540L,
`542` = 542L, `543` = 543L, `544` = 544L, `545` = 545L, `546` = 546L,
`547` = 547L, `548` = 548L, `549` = 549L, `550` = 550L, `551` = 551L,
`553` = 553L, `554` = 554L, `555` = 555L, `556` = 556L, `557` = 557L,
`558` = 558L, `559` = 559L, `560` = 560L, `561` = 561L, `562` = 562L,
`563` = 563L, `564` = 564L, `565` = 565L, `566` = 566L, `567` = 567L,
`568` = 568L, `569` = 569L, `570` = 570L, `571` = 571L, `572` = 572L,
`573` = 573L, `574` = 574L, `575` = 575L, `576` = 576L, `577` = 577L,
`578` = 578L, `579` = 579L, `580` = 580L, `581` = 581L, `582` = 582L,
`583` = 583L, `584` = 584L, `585` = 585L, `586` = 586L, `587` = 587L,
`588` = 588L, `589` = 589L, `590` = 590L, `591` = 591L, `593` = 593L,
`595` = 595L, `596` = 596L, `597` = 597L, `598` = 598L, `599` = 599L,
`601` = 601L, `602` = 602L, `603` = 603L, `604` = 604L, `605` = 605L,
`606` = 606L, `608` = 608L, `609` = 609L, `610` = 610L, `611` = 611L,
`612` = 612L, `614` = 614L, `615` = 615L, `616` = 616L, `617` = 617L,
`618` = 618L, `619` = 619L, `620` = 620L, `621` = 621L, `622` = 622L,
`623` = 623L, `624` = 624L, `625` = 625L, `626` = 626L, `627` = 627L,
`628` = 628L, `629` = 629L, `631` = 631L, `632` = 632L, `633` = 633L,
`634` = 634L, `635` = 635L, `636` = 636L, `637` = 637L, `638` = 638L,
`639` = 639L, `640` = 640L, `641` = 641L, `642` = 642L, `643` = 643L,
`645` = 645L, `646` = 646L, `647` = 647L, `648` = 648L, `649` = 649L,
`650` = 650L, `651` = 651L, `652` = 652L, `653` = 653L, `654` = 654L,
`655` = 655L, `657` = 657L, `658` = 658L, `659` = 659L, `661` = 661L,
`662` = 662L, `663` = 663L, `664` = 664L, `666` = 666L, `667` = 667L,
`668` = 668L, `669` = 669L, `670` = 670L, `671` = 671L, `672` = 672L,
`673` = 673L, `675` = 675L, `677` = 677L, `678` = 678L, `679` = 679L,
`680` = 680L, `681` = 681L, `682` = 682L, `683` = 683L, `684` = 684L,
`685` = 685L, `686` = 686L, `687` = 687L, `688` = 688L, `689` = 689L,
`690` = 690L, `691` = 691L, `692` = 692L, `693` = 693L, `696` = 696L,
`697` = 697L, `698` = 698L), class = "omit"))
Your guide="none" implies you don't want a legend for shape. That's why the shapes don't appear. To combine two legends, give them the same name. Thus:
library(ggplot2) # Note typo correction
library(viridis)
ggplot(df, aes(`Lengte_(cm)`, verschil_lengte))+
geom_point(aes(shape = Lengteklasse, colour = Lengteklasse), size = 3)+
geom_hline(yintercept = 1.0, linetype="dashed", color = "red")+
geom_hline(yintercept = 2.0, linetype="dashed", color = "red")+
scale_shape_manual(values = c(16, 17, 15, 3, 8), name="Lengteklasse (cm)")+
scale_color_viridis(discrete = T, option = "D", name="Lengteklasse (cm)")+
scale_x_continuous(breaks = seq(7,12, by = 0.5))+
scale_y_continuous(breaks = seq(0,3, by = 0.5))+
labs(x = "Lengte (cm)", y = "Verschaling (mm)")+
guides(col = guide_legend("Lengteklasse (cm)"))+
theme_classic()
produces
[You can also do away with guides(col = guide_legend("Lengteklasse (cm)"))+.]
Here is another approach. That of #Limey was my first thought. But already posted. Anyway. The clue is:
If you want to have same color and shape in one legend then you have to give them the same name in the aesthetics!
then to give them the same name in one column legend, we have to identify the names argument in scale_color_viridis and scale_shape_manual
then you could remove guides as already stated by Limey!:
library(ggplot)
library(viridis)
ggplot(df, aes(`Lengte_(cm)`, verschil_lengte, shape = Lengteklasse, colour = Lengteklasse))+
geom_point(size = 3)+
geom_hline(yintercept = 1.0, linetype="dashed", color = "red")+
geom_hline(yintercept = 2.0, linetype="dashed", color = "red")+
scale_color_viridis(name = "Lengteklasse (cm)",
discrete = T, option = "D")+
scale_shape_manual(name = "Lengteklasse (cm)",
values = c(16, 17, 15, 3, 8))+
scale_x_continuous(breaks = seq(7,12, by = 0.5))+
scale_y_continuous(breaks = seq(0,3, by = 0.5))+
labs(x = "Lengte (cm)", y = "Verschaling (mm)")+
theme_classic()

How to only select rows that are duplicated in a column in a dataframe

I have joined two dataframes together and I am trying to select only the 'Branch Codes' that are duplicated.
I want to join the datasets 'BranchData' and 'BranchCode' so that any branch codes that are common to both datasets are included as well as those are not common to both datasets.
However, the last line of the code below does not seem to work!
BranchData$'Branch Code' <
as.numeric(BranchData$'Branch Code')
BranchCalls$'Branch Code' <- as.numeric(BranchCalls$'Branch Code')
BranchData <- na.omit(BranchData)
merged <- full_join(BranchData,BranchCalls)
merged <- merged %>% group_by(merged$`Branch Code`) %>% filter(n() >= 2)
Also, when I try to put the duplicates into groups, so that all the duplicates are together, but the following code doesn't seem to work!:
merged <- group_by(merged,merged$'Branch Code')
Minimal Reproducible Example:
structure(list(`Branch Code` = c(401801, 436801, 403801, 164801,
198801), `Location Type` = c("Urban", "Urban", "Urban Deprived",
"Rural", "Urban"), Type = c("MAIN", "MAIN", "MAIN", "MAIN", "LM"
), Status = c("Open", "Open", "Open", "Open", "Open"), Segment = c("Agency",
"Agency", "Agency", "Agency", "Agency"), `Multiple (partner that owns multiple branches)` = c("Multiple 11",
"Multiple 11", "Multiple 12", "Multiple 13", "Multiple 13"),
RetailType = c("Books_Stationery", "Books_Stationery", "Convenience",
"Convenience", "Convenience"), `Volume of transactions` = c(2238,
1514, 1346, 1338, 625), `Open hours` = c(47.75, 50.2500000000001,
46.5, 48.25, 114.25), `X Pos` = c(394169, 393488, 394434,
392153, 393094), `Y Pos` = c(806326, 805877, 804347, 796902,
802789), Urbanity = c("Major Centre", "Major Centre", "High Density",
"Low Density", "Low Density"), `Case Reference Number` = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), `Created On` = structure(c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), tzone = "UTC", class = c("POSIXct",
"POSIXt")), `Branch Type` = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), L1 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), L2 = c(NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_), L3 = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_), L4 = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), `Case Type` = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
)), row.names = c(NA, -5L), class = c("tbl_df", "tbl", "data.frame"
), na.action = structure(c(`3` = 3L, `4` = 4L, `5` = 5L, `6` = 6L,
`7` = 7L, `8` = 8L, `9` = 9L, `11` = 11L, `13` = 13L, `16` = 16L,
`17` = 17L, `18` = 18L, `20` = 20L, `21` = 21L, `22` = 22L, `23` = 23L,
`26` = 26L, `27` = 27L, `28` = 28L, `29` = 29L, `31` = 31L, `32` = 32L,
`33` = 33L, `34` = 34L, `35` = 35L, `36` = 36L, `37` = 37L, `39` = 39L,
`40` = 40L, `41` = 41L, `42` = 42L, `43` = 43L, `44` = 44L, `45` = 45L,
`46` = 46L, `47` = 47L, `48` = 48L, `49` = 49L, `51` = 51L, `52` = 52L,
`54` = 54L, `55` = 55L, `57` = 57L, `58` = 58L, `59` = 59L, `60` = 60L,
`61` = 61L, `62` = 62L, `63` = 63L, `65` = 65L, `67` = 67L, `68` = 68L,
`69` = 69L, `70` = 70L, `71` = 71L, `72` = 72L, `74` = 74L, `75` = 75L,
`76` = 76L, `77` = 77L, `78` = 78L, `80` = 80L, `81` = 81L, `82` = 82L,
`83` = 83L, `84` = 84L, `86` = 86L, `87` = 87L, `88` = 88L, `89` = 89L,
`91` = 91L, `92` = 92L, `93` = 93L, `96` = 96L, `97` = 97L, `98` = 98L,
`99` = 99L, `100` = 100L, `101` = 101L, `103` = 103L, `106` = 106L,
`107` = 107L, `108` = 108L, `109` = 109L, `110` = 110L, `111` = 111L,
`112` = 112L, `113` = 113L, `114` = 114L, `115` = 115L, `116` = 116L,
`117` = 117L, `118` = 118L, `119` = 119L, `120` = 120L, `121` = 121L,
`122` = 122L, `123` = 123L, `124` = 124L, `126` = 126L, `127` = 127L,
`129` = 129L, `130` = 130L, `131` = 131L, `132` = 132L, `133` = 133L,
`134` = 134L, `135` = 135L, `136` = 136L, `137` = 137L, `139` = 139L,
`140` = 140L, `141` = 141L, `142` = 142L, `143` = 143L, `144` = 144L,
`145` = 145L, `146` = 146L, `147` = 147L, `148` = 148L, `149` = 149L,
`150` = 150L, `151` = 151L, `152` = 152L, `153` = 153L, `155` = 155L,
`156` = 156L, `157` = 157L, `160` = 160L, `161` = 161L, `162` = 162L,
`163` = 163L, `165` = 165L, `166` = 166L, `167` = 167L, `168` = 168L,
`169` = 169L, `174` = 174L, `175` = 175L, `176` = 176L, `177` = 177L,
`178` = 178L, `179` = 179L, `180` = 180L, `182` = 182L, `183` = 183L,
`185` = 185L, `186` = 186L, `188` = 188L, `189` = 189L, `190` = 190L,
`191` = 191L, `192` = 192L, `193` = 193L, `194` = 194L, `195` = 195L,
`196` = 196L, `197` = 197L, `198` = 198L, `199` = 199L, `200` = 200L,
`201` = 201L, `203` = 203L, `204` = 204L, `205` = 205L, `206` = 206L,
`207` = 207L, `209` = 209L, `210` = 210L, `211` = 211L, `212` = 212L,
`213` = 213L, `214` = 214L, `215` = 215L, `216` = 216L, `217` = 217L,
`218` = 218L, `219` = 219L, `220` = 220L, `221` = 221L, `222` = 222L,
`223` = 223L, `224` = 224L, `226` = 226L, `227` = 227L, `228` = 228L,
`229` = 229L, `230` = 230L, `231` = 231L, `232` = 232L, `233` = 233L,
`234` = 234L, `236` = 236L, `237` = 237L, `238` = 238L, `239` = 239L,
`240` = 240L, `241` = 241L, `242` = 242L, `243` = 243L, `244` = 244L,
`245` = 245L, `247` = 247L, `248` = 248L, `249` = 249L, `250` = 250L,
`251` = 251L, `252` = 252L, `253` = 253L, `254` = 254L, `255` = 255L,
`256` = 256L, `257` = 257L, `258` = 258L, `259` = 259L, `260` = 260L,
`261` = 261L, `262` = 262L, `263` = 263L, `264` = 264L, `265` = 265L,
`266` = 266L, `267` = 267L, `268` = 268L, `269` = 269L, `270` = 270L,
`271` = 271L, `272` = 272L, `273` = 273L, `274` = 274L, `276` = 276L,
`278` = 278L, `280` = 280L, `281` = 281L, `282` = 282L, `283` = 283L,
`284` = 284L, `285` = 285L, `286` = 286L, `288` = 288L, `289` = 289L,
`291` = 291L, `292` = 292L, `293` = 293L, `294` = 294L, `296` = 296L,
`297` = 297L, `298` = 298L, `299` = 299L, `300` = 300L, `301` = 301L,
`304` = 304L, `305` = 305L, `306` = 306L, `307` = 307L, `308` = 308L,
`311` = 311L, `312` = 312L, `313` = 313L, `316` = 316L, `319` = 319L,
`321` = 321L, `322` = 322L, `323` = 323L, `324` = 324L, `325` = 325L,
`326` = 326L, `327` = 327L, `328` = 328L, `329` = 329L, `330` = 330L,
`331` = 331L, `332` = 332L, `333` = 333L, `335` = 335L, `337` = 337L,
`338` = 338L, `339` = 339L, `340` = 340L, `341` = 341L, `342` = 342L,
`343` = 343L, `344` = 344L, `345` = 345L, `346` = 346L, `347` = 347L,
`348` = 348L, `349` = 349L, `350` = 350L, `351` = 351L, `352` = 352L,
`353` = 353L, `354` = 354L, `355` = 355L, `356` = 356L, `357` = 357L,
`359` = 359L, `360` = 360L, `361` = 361L, `362` = 362L, `363` = 363L,
`365` = 365L, `366` = 366L, `367` = 367L, `368` = 368L, `370` = 370L,
`371` = 371L, `372` = 372L, `373` = 373L, `375` = 375L, `376` = 376L,
`378` = 378L, `379` = 379L, `380` = 380L, `381` = 381L, `382` = 382L,
`384` = 384L, `385` = 385L, `387` = 387L, `388` = 388L, `389` = 389L,
`390` = 390L, `391` = 391L, `392` = 392L, `393` = 393L, `395` = 395L,
`396` = 396L, `397` = 397L, `398` = 398L, `399` = 399L, `400` = 400L,
`401` = 401L, `403` = 403L, `404` = 404L, `405` = 405L, `409` = 409L,
`412` = 412L, `413` = 413L, `414` = 414L, `415` = 415L, `416` = 416L,
`418` = 418L, `419` = 419L, `420` = 420L, `421` = 421L, `422` = 422L,
`423` = 423L, `426` = 426L, `427` = 427L, `428` = 428L, `429` = 429L,
`432` = 432L, `433` = 433L, `435` = 435L, `436` = 436L, `437` = 437L,
`438` = 438L, `440` = 440L, `441` = 441L, `442` = 442L, `443` = 443L,
I would be so grateful if anybody could give me a helping hand!
Thank you so much!
You can do it using table:
merged %>% filter(table(`Branch Code`)[`Branch Code`] > 1)
or using add_count:
merged %>% add_count(`Branch Code`) %>% filter(n > 1)
I created a small sample data:
merged <- data.frame(branch_code = c("401801", "436801", "401801"),
location_type = c("Urban", "Urban", "Rural"))
branch_code location_type
1 401801 Urban
2 436801 Urban
3 401801 Rural
You can use this code:
merged %>%
group_by(branch_code) %>%
mutate(n = n()) %>%
filter(n > 1) %>%
select(-n)
Output:
# A tibble: 2 × 2
# Groups: branch_code [1]
branch_code location_type
<chr> <chr>
1 401801 Urban
2 401801 Rural

R nls() Initial Parameter Problem, nonlinear Regression

I get a error message:
Error in nlsModel(formula, mf, start, wts) :
singular gradient matrix at initial parameter estimates
when using the nls() function like
form_Q10_parabolic_SM <- as.formula(Lin_Flux..mymol.m.2.s.1. ~ (rRef<- 5.5354)*a*exp(b*Mean_Soil_Temp_V2..C.)*((-c*Soil_Moist_V3**2)+(d*Soil_Moist_V3)+e))
Q10_parabolic_SM <- nls(form_Q10_parabolic_SM, data = conB1_2015, start = list(a = 1, b = 0.11, c = 0.0001, d = 0.01, e = 0.1))
I got my initial parameters by using the preview() function of the nsltools library like this (same definition of the formula like above)
preview(form_Q10_parabolic_SM, data = conB1_2015, start = c(a = 1, b = 0.11, c = 0.0001, d = 0.01, e = 0.1), variable = 1)
Which gives me this output with the parameters a-e above:
This looks quite good by my eyes and I really don't know what to do at this point since the preview() works just fine.
Is my model too complex or overparameterized? Or did I just do something wrong with the nls function?
Any tips would be really appreciated!
> dput(head(conB1_2015, 30))
structure(list(X = c(13L, 68L, 69L, 70L, 71L, 72L, 73L, 74L,
75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 85L, 86L, 87L,
88L, 89L, 90L, 91L, 92L, 93L, 94L, 95L, 96L), IV_Date = c("2015-01-14",
"2015-03-11", "2015-03-12", "2015-03-13", "2015-03-14", "2015-03-15",
"2015-03-16", "2015-03-17", "2015-03-18", "2015-03-19", "2015-03-20",
"2015-03-21", "2015-03-22", "2015-03-23", "2015-03-24", "2015-03-25",
"2015-03-26", "2015-03-27", "2015-03-28", "2015-03-29", "2015-03-30",
"2015-03-31", "2015-04-01", "2015-04-02", "2015-04-03", "2015-04-04",
"2015-04-05", "2015-04-06", "2015-04-07", "2015-04-08"), SMmean010.... = c(24.5341666666667,
23.4754166666667, 23.0585416666667, 22.830625, 22.7447916666667,
22.7729166666666, 22.7929166666667, 22.7354166666667, 22.6579166666667,
22.5935416666667, 22.5233333333333, 22.7641666666667, 23.6010416666667,
23.445625, 23.404375, 23.2845833333333, 23.0672916666667, 22.9347916666667,
22.8272916666667, 23.0316666666667, 23.988125, 25.5647916666667,
27.055, 27.7995833333333, 26.23125, 25.4658333333333, 25.0845833333333,
24.8175, 24.605, 24.4216666666667), Lin_Flux..mymol.m.2.s.1. = c(1.13,
2.146, 1.98708333333333, 1.88416666666667, 1.57083333333333,
1.93041666666667, 2.69875, 2.8075, 3.23272727272727, 2.35818181818182,
2.23833333333333, 1.84958333333333, 2.18695652173913, 2.16958333333333,
2.69791666666667, 3.025, 1.985, 1.88083333333333, 2.30416666666667,
2.775, 1.44458333333333, 1.78791666666667, 1.04863636363636,
1.03458333333333, 1.4725, 1.86833333333333, 1.71125, 1.79, 1.53166666666667,
1.97666666666667), Mean_Soil_Temp_V2..C. = c(4.739, 5.1864, 4.08408333333333,
3.61625, 3.68508333333333, 4.09925, 4.87079166666667, 5.64720833333333,
6.58433333333333, 5.05075, 4.93708333333333, 4.109, 3.2295, 3.537,
5.1395, 5.65270833333333, 5.931875, 5.61775, 5.88695833333333,
6.86308333333333, 5.61833333333333, 4.24566666666667, 3.05952173913043,
2.45716666666667, 3.6365, 3.68820833333333, 3.83766666666667,
4.3435, 4.8745, 6.29133333333333), Soil_Moist_V3 = c(25.603137,
21.98744709, 21.8053864833333, 21.6770563291667, 20.1319423708333,
19.9826592666667, 19.8279438958333, 20.1589541791667, 21.5796382,
21.5971315083333, 21.3742824541667, 21.8992939333333, 23.9737254583333,
23.4506886041667, 23.0956395708333, 22.574581225, 22.3561680833333,
21.3806269916667, 21.4045219791667, 21.5611478916667, 25.5090813166667,
28.6440265, 31.4434210347826, 31.9276734541667, 27.5706909333333,
25.1139413583333, 24.2945348333333, 24.0232171416667, 23.705631425,
22.8323341625), precip50..mm. = c(0.6, 0, 0, 0, 0.9, 1.3, 0,
0, 0, 0, 0, 6.6, 0, 0, 0, 0, 0.1, 0.2, 0.1, 6.1, 5, 17.6, 10.4,
6.6, 0, 0, 0, 0, 0, 0), RWI = c(0.6, 0.4, 0.2, 0.133333333333333,
0.9, 1.3, 1.3, 0.65, 0.433333333333333, 0.325, 0.26, 6.6, 6.6,
3.3, 2.2, 1.65, 0.1, 0.2, 0.1, 6.1, 5, 17.6, 10.4, 6.6, 6.6,
3.3, 2.2, 1.65, 1.32, 1.1)), na.action = structure(c(`1` = 1L,
`2` = 2L, `3` = 3L, `4` = 4L, `5` = 5L, `6` = 6L, `7` = 7L, `8` = 8L,
`9` = 9L, `10` = 10L, `11` = 11L, `12` = 12L, `13` = 13L, `15` = 15L,
`16` = 16L, `17` = 17L, `18` = 18L, `19` = 19L, `20` = 20L, `21` = 21L,
`22` = 22L, `23` = 23L, `24` = 24L, `25` = 25L, `26` = 26L, `27` = 27L,
`28` = 28L, `29` = 29L, `30` = 30L, `31` = 31L, `32` = 32L, `33` = 33L,
`34` = 34L, `35` = 35L, `36` = 36L, `37` = 37L, `38` = 38L, `39` = 39L,
`40` = 40L, `41` = 41L, `42` = 42L, `43` = 43L, `44` = 44L, `45` = 45L,
`46` = 46L, `47` = 47L, `48` = 48L, `49` = 49L, `50` = 50L, `51` = 51L,
`52` = 52L, `53` = 53L, `54` = 54L, `55` = 55L, `56` = 56L, `57` = 57L,
`58` = 58L, `59` = 59L, `60` = 60L, `61` = 61L, `62` = 62L, `63` = 63L,
`64` = 64L, `65` = 65L, `66` = 66L, `67` = 67L, `68` = 68L, `199` = 199L,
`218` = 218L, `219` = 219L, `220` = 220L, `221` = 221L, `222` = 222L,
`223` = 223L, `224` = 224L, `225` = 225L, `226` = 226L, `227` = 227L,
`228` = 228L, `229` = 229L, `230` = 230L, `231` = 231L, `232` = 232L,
`264` = 264L, `265` = 265L, `266` = 266L, `267` = 267L, `352` = 352L,
`353` = 353L, `354` = 354L, `355` = 355L, `356` = 356L, `357` = 357L,
`358` = 358L, `359` = 359L, `360` = 360L, `361` = 361L, `362` = 362L,
`363` = 363L, `364` = 364L, `365` = 365L, `366` = 366L), class = "omit"), row.names = c(14L,
69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L, 80L, 81L,
82L, 83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L, 91L, 92L, 93L, 94L,
95L, 96L, 97L), class = "data.frame")
The main problem is that the parameters are not uniquely identifiable. We can multiply a by an arbitrary number and divide c, d and e by that same number and we get the same model. Omit a.
Although it won't hurt the use of as.formula is redundant since it is already a formula.
Having an assignment within an nls formula is highly unusual. nls will think that Rref is a parameter and fail on that account. Remove the assignment.
If we make these changes then it does give an answer with the data in the updated version of the question.
form_Q10_parabolic_SM <- Lin_Flux..mymol.m.2.s.1. ~
exp(b*Mean_Soil_Temp_V2..C.) * ( (-c*Soil_Moist_V3**2) + (d*Soil_Moist_V3) + e)
Q10_parabolic_SM <- nls(form_Q10_parabolic_SM, data = conB1_2015,
start = list(b = 0.11, c = 0.0001, d = 0.01, e = 0.1))
giving:
> Q10_parabolic_SM
Nonlinear regression model
model: Lin_Flux..mymol.m.2.s.1. ~ exp(b * Mean_Soil_Temp_V2..C.) * ((-c * Soil_Moist_V3^2) + (d * Soil_Moist_V3) + e)
data: conB1_2015
b c d e
0.103062 -0.001564 -0.135531 3.528621
residual sum-of-squares: 3.979
Number of iterations to convergence: 6
Achieved convergence tolerance: 4.401e-06
plinear
Note that nls also has the plinear algorithm which has the advantage that only nonlinear parameters (in this case only b) need starting values. In that case the formula's RHS should be a matrix with the columns that multiply each linear parameter. It gives the same answer as above except the linear parameters are given names starting with .lin . Note that the plinear version converges in fewer iterations than the version using the default algorithm above. (Also it seems that the plinear version is not very sensitive to the starting value and even if we use b=1 as the starting value it converges.)
fo <- Lin_Flux..mymol.m.2.s.1. ~
cbind(-Soil_Moist_V3**2, Soil_Moist_V3, 1) * exp(b*Mean_Soil_Temp_V2..C.)
fm <- nls(fo, data = conB1_2015, start = list(b = 0.11), algorithm = "plinear")
giving:
> fm
Nonlinear regression model
model: Lin_Flux..mymol.m.2.s.1. ~ cbind(-Soil_Moist_V3^2, Soil_Moist_V3, 1) * exp(b * Mean_Soil_Temp_V2..C.)
data: conB1_2015
b .lin1 .lin.Soil_Moist_V3 .lin3
0.103062 -0.001564 -0.135528 3.528593
residual sum-of-squares: 3.979
Number of iterations to convergence: 3
Achieved convergence tolerance: 2.189e-06

Removing underscore from species names in phylo4d class when constructing dotplot

I have created a dotplot.phylo4d using the phylosignal package. When using species names it requires that an underscore separates the genus and species like this: Genus_species however on the tree itself the tip nodes should appear without this underscore but it does not. I have tried adjusting the underscore parameter, but no luck.
Here is the code that makes the tree:
library("phylosignal")
dotplot.phylo4d(local, dot.col=points.col, dot.pchFpoints.pch, underscore=TRUE,
trait.labels=c("Water Repellency", "Barb Stiffness"), trait.bg.col="white")
I have tried manually adding tip.lables but it does not place the species in the correct locations. When I try to extract the lable names from the local phylo4d class I get an error: Error in local$lable : $ operator not defined for this S4 class
Does anyone have any ideas?
Here is an example of dput(local)
new("phylo4d", data = structure(list(water_repelency_factor = c(0.406853948726056,
0.607154878704302, 0.650989064481201, 0.124886215381352, 0.0723507857767838,
-0.0723167215080719, 0.013459653778258, -1.78914935357281, -0.564983339285733,
0.606337089022796, 0.659663703834298, 0.0176228716122535, 0.0127574040830885,
-0.196395841638203, -0.0459106519882355, -0.00530956412638191,
0.639508124725596, 0.777990116847955, 0.451610618568295, 0.355696905949063,
0.263869048235165, -0.221564237669859, 0.147157051369543, 0.117382140996346,
0.0780014518176963, -0.0432113445007319, 1.63762850650646, 3.18758974791682,
3.1915764230193, 2.59818280850751, 0.13012860558643, 0.438800747071309,
0.220659982250533, 0.0406142785151005, -0.0966974465314274, 0.123091856737923,
-0.00273950242722704, -0.0440442594202772, -0.018999718854047,
-0.0913244261412157, 0.425022676610531, 0.632243165802067, 0.368361182363585,
-0.00812059600733897, -0.147599248089371, 0.0148070440392808,
-0.16383227579288, 0.276110714945516, 0.232486022888682, 0.120444921178624,
0.0247910093458199, 0.0199371653665217, 0.0137880171357138, -0.00359380232761909,
-0.135584983429275, 0.824091682655614, 0.589492230611259, -0.386326654254213,
0.656926986174162, 0.647598093829388, 1.10465298748181, 1.08603679621794,
0.185419991568835, 0.536392662698255), stiffness_parameter = c(-0.451152645392232,
-0.0675275753134292, -0.365241405962641, 0.0905545163858439,
0.288842041901916, 0.266165143212233, 0.136623204129795, 0.0897899100887545,
0.165206538251347, 0.115331641755025, 0.0705395185451837, 0.00977078724881693,
0.25245123194264, 0.600773298772429, 0.589060413404627, -0.403924106606226,
0.0347747504905809, 0.0413013238266368, 0.0719688786031885, 0.0546088786069706,
-0.468432315476859, -0.145643606900637, 0.366171679501629, 0.312574773140502,
0.0838000268770967, 0.0810893539547085, 1.08795895425903, 2.06681624860963,
2.02014470904867, 1.67845452279315, -0.00619064668359723, 0.064525330741118,
0.0321501505674897, -0.148400036867211, -0.374596029376242, 0.000137596835294316,
0.00173620676450802, -0.252397879000816, 0.141994990326184, 0.147348381636395,
-0.505048807104088, -0.461332473091121, -0.121426983017065, -0.136696496034141,
-0.146453001600327, -0.393612849002826, -0.401688658534651, -0.131022632470828,
-0.0516916448217204, 0.0447178990330358, -0.00917528749341404,
-0.0645128131411127, 0.0230480464064479, -0.0807760835928284,
-0.211124585904092, 0.0742996834809848, 0.393936005786656, -0.178168184216292,
0.496357516284941, 0.218262874943631, 1.67041563314074, 1.67041563314074,
-0.0264417662148244, 0.170746918626684)), row.names = c(NA, 64L
), class = "data.frame"), metadata = list(), edge = structure(c(0L,
65L, 66L, 67L, 67L, 68L, 68L, 66L, 69L, 69L, 65L, 70L, 71L, 71L,
70L, 72L, 73L, 74L, 75L, 76L, 77L, 77L, 78L, 79L, 79L, 78L, 80L,
80L, 76L, 81L, 81L, 82L, 82L, 83L, 83L, 75L, 84L, 84L, 85L, 85L,
86L, 87L, 87L, 86L, 88L, 88L, 74L, 89L, 90L, 91L, 91L, 92L, 92L,
93L, 93L, 90L, 94L, 94L, 95L, 95L, 96L, 96L, 89L, 97L, 97L, 98L,
98L, 73L, 99L, 100L, 101L, 102L, 102L, 101L, 103L, 103L, 100L,
104L, 104L, 105L, 105L, 99L, 106L, 107L, 108L, 108L, 107L, 109L,
109L, 110L, 110L, 106L, 111L, 112L, 113L, 113L, 112L, 114L, 114L,
111L, 115L, 115L, 116L, 117L, 117L, 118L, 118L, 116L, 119L, 119L,
72L, 120L, 120L, 121L, 122L, 122L, 123L, 123L, 124L, 124L, 121L,
125L, 125L, 126L, 126L, 127L, 127L, 65L, 66L, 67L, 1L, 68L, 2L,
3L, 69L, 4L, 5L, 70L, 71L, 6L, 7L, 72L, 73L, 74L, 75L, 76L, 77L,
12L, 78L, 79L, 8L, 9L, 80L, 10L, 11L, 81L, 16L, 82L, 13L, 83L,
14L, 15L, 84L, 22L, 85L, 21L, 86L, 87L, 17L, 18L, 88L, 19L, 20L,
89L, 90L, 91L, 26L, 92L, 25L, 93L, 23L, 24L, 94L, 27L, 95L, 30L,
96L, 28L, 29L, 97L, 31L, 98L, 32L, 33L, 99L, 100L, 101L, 102L,
34L, 35L, 103L, 36L, 37L, 104L, 38L, 105L, 39L, 40L, 106L, 107L,
108L, 41L, 42L, 109L, 43L, 110L, 44L, 45L, 111L, 112L, 113L,
46L, 47L, 114L, 48L, 49L, 115L, 50L, 116L, 117L, 53L, 118L, 51L,
52L, 119L, 54L, 55L, 120L, 64L, 121L, 122L, 56L, 123L, 59L, 124L,
57L, 58L, 125L, 63L, 126L, 60L, 127L, 61L, 62L), .Dim = c(127L,
2L), .Dimnames = list(NULL, c("ancestor", "descendant"))), edge.length = c(`0-65` = NA,
`65-66` = 0.936507936507937, `66-67` = 0.0317460317460317, `67-1` = 0.0317460317460317,
`67-68` = 0.0158730158730159, `68-2` = 0.0158730158730159, `68-3` = 0.0158730158730159,
`66-69` = 0.0476190476190476, `69-4` = 0.0158730158730159, `69-5` = 0.0158730158730159,
`65-70` = 0.0793650793650794, `70-71` = 0.904761904761905, `71-6` = 0.0158730158730159,
`71-7` = 0.0158730158730159, `70-72` = 0.0317460317460317, `72-73` = 0.142857142857143,
`73-74` = 0.349206349206349, `74-75` = 0.174603174603175, `75-76` = 0.0952380952380952,
`76-77` = 0.0634920634920635, `77-12` = 0.0634920634920635, `77-78` = 0.0158730158730159,
`78-79` = 0.0317460317460317, `79-8` = 0.0158730158730159, `79-9` = 0.0158730158730159,
`78-80` = 0.0317460317460317, `80-10` = 0.0158730158730159, `80-11` = 0.0158730158730159,
`76-81` = 0.0793650793650794, `81-16` = 0.0476190476190476, `81-82` = 0.0158730158730159,
`82-13` = 0.0317460317460317, `82-83` = 0.0158730158730159, `83-14` = 0.0158730158730159,
`83-15` = 0.0158730158730159, `75-84` = 0.142857142857143, `84-22` = 0.0793650793650794,
`84-85` = 0.0158730158730159, `85-21` = 0.0634920634920635, `85-86` = 0.0158730158730159,
`86-87` = 0.0317460317460317, `87-17` = 0.0158730158730159, `87-18` = 0.0158730158730159,
`86-88` = 0.0317460317460317, `88-19` = 0.0158730158730159, `88-20` = 0.0158730158730159,
`74-89` = 0.238095238095238, `89-90` = 0.0476190476190476, `90-91` = 0.0634920634920635,
`91-26` = 0.0476190476190476, `91-92` = 0.0158730158730159, `92-25` = 0.0317460317460317,
`92-93` = 0.0158730158730159, `93-23` = 0.0158730158730159, `93-24` = 0.0158730158730159,
`90-94` = 0.0634920634920635, `94-27` = 0.0476190476190476, `94-95` = 0.0158730158730159,
`95-30` = 0.0317460317460317, `95-96` = 0.0158730158730159, `96-28` = 0.0158730158730159,
`96-29` = 0.0158730158730159, `89-97` = 0.126984126984127, `97-31` = 0.0317460317460317,
`97-98` = 0.0158730158730159, `98-32` = 0.0158730158730159, `98-33` = 0.0158730158730159,
`73-99` = 0.412698412698413, `99-100` = 0.238095238095238, `100-101` = 0.0476190476190476,
`101-102` = 0.0317460317460317, `102-34` = 0.0158730158730159,
`102-35` = 0.0158730158730159, `101-103` = 0.0317460317460317,
`103-36` = 0.0158730158730159, `103-37` = 0.0158730158730159,
`100-104` = 0.0634920634920635, `104-38` = 0.0317460317460317,
`104-105` = 0.0158730158730159, `105-39` = 0.0158730158730159,
`105-40` = 0.0158730158730159, `99-106` = 0.111111111111111,
`106-107` = 0.158730158730159, `107-108` = 0.0476190476190476,
`108-41` = 0.0158730158730159, `108-42` = 0.0158730158730159,
`107-109` = 0.0317460317460317, `109-43` = 0.0317460317460317,
`109-110` = 0.0158730158730159, `110-44` = 0.0158730158730159,
`110-45` = 0.0158730158730159, `106-111` = 0.0793650793650794,
`111-112` = 0.0952380952380952, `112-113` = 0.0317460317460317,
`113-46` = 0.0158730158730159, `113-47` = 0.0158730158730159,
`112-114` = 0.0317460317460317, `114-48` = 0.0158730158730159,
`114-49` = 0.0158730158730159, `111-115` = 0.0634920634920635,
`115-50` = 0.0793650793650794, `115-116` = 0.0158730158730159,
`116-117` = 0.0317460317460317, `117-53` = 0.0317460317460317,
`117-118` = 0.0158730158730159, `118-51` = 0.0158730158730159,
`118-52` = 0.0158730158730159, `116-119` = 0.0476190476190476,
`119-54` = 0.0158730158730159, `119-55` = 0.0158730158730159,
`72-120` = 0.761904761904762, `120-64` = 0.126984126984127, `120-121` = 0.0158730158730159,
`121-122` = 0.0634920634920635, `122-56` = 0.0476190476190476,
`122-123` = 0.0158730158730159, `123-59` = 0.0317460317460317,
`123-124` = 0.0158730158730159, `124-57` = 0.0158730158730159,
`124-58` = 0.0158730158730159, `121-125` = 0.0634920634920635,
`125-63` = 0.0476190476190476, `125-126` = 0.0158730158730159,
`126-60` = 0.0317460317460317, `126-127` = 0.0158730158730159,
`127-61` = 0.0158730158730159, `127-62` = 0.0158730158730159),
label = c(`1` = "Anhima_cornuta", `2` = "Alopochen_aegyptiaca",
`3` = "Anas_undulata", `4` = "Francolinus_coqui", `5` = "Meleagris_gallopavo",
`6` = "Pterocles_namaqua", `7` = "Streptopelia_lugens", `8` = "Anhinga_melanogaster",
`9` = "Phalacrocorax_capensis", `10` = "Morus_bassanus",
`11` = "Morus_capensis", `12` = "Fregata_minor", `13` = "Threskiornis_aethiopicus",
`14` = "Ephippiorhynchus_senegalensis", `15` = "Mycteria_ibis",
`16` = "Gavia_immer", `17` = "Pterodroma_macroptera", `18` = "Procellaria_cinerea",
`19` = "Halobaena_caerulea", `20` = "Pelecanoides_urinatrix",
`21` = "Thalassarche_chlororhynchos", `22` = "Hydrobates_pelagicus",
`23` = "Pelecanus_occidentalis", `24` = "Pelecanus_rufescens",
`25` = "Pelecanus_onocrotalus", `26` = "Scopus_umbretta",
`27` = "Pygoscelis_papua", `28` = "Spheniscus_demersus",
`29` = "Spheniscus_magellanicus", `30` = "Eudyptes_chrysocome",
`31` = "Egretta_garzetta", `32` = "Ardea_melanocephala",
`33` = "Ardea_cinerea", `34` = "Fulica_cristata", `35` = "Podica_senegalensis",
`36` = "Aramus_guarauna", `37` = "Grus_paradisea", `38` = "Phoenicopterus_ruber",
`39` = "Podiceps_nigricollis", `40` = "Tachybaptus_ruficollis",
`41` = "Chionis_albus", `42` = "Burhinus_capensis", `43` = "Charadrius_marginatus",
`44` = "Recurvirostra_avosetta", `45` = "Haematopus_moquini",
`46` = "Numenius_arquata", `47` = "Phalaropus_fulicarius",
`48` = "Actophilornis_africanus", `49` = "Rostratula_benghalensis",
`50` = "Dromas_ardeola", `51` = "Larus_fuscus", `52` = "Rynchops_flavirostris",
`53` = "Sterna_fuscata", `54` = "Stercorarius_pomarinus",
`55` = "Uria_aalge", `56` = "Apus_caffer", `57` = "Apaloderma_narina",
`58` = "Alcedo_semitorquata", `59` = "Caprimulgus_rufigena",
`60` = "Sturnus_vulgaris", `61` = "Cinclus_leucocephalus",
`62` = "Cinclus_schulzi", `63` = "Poicephalus_robustus",
`64` = "Centropus_senegalensis"), edge.label = structure(character(0), .Names = character(0)),
order = "preorder", annote = list())
To access slots of an S4 object you need to use #, not $:
instead of local$label use local#label
Get a straightforward intro into S4 classes here:
http://adv-r.had.co.nz/S4.html

merge two data.frame, keeping all matching rows, in R

I'm struggling to merge two data.frame with na values occuring in one or the other df.
sampleA <- structure(list(Nom_xp = "A1MRJ", Rep = 1L, GB05 = 102L, GB05.1 = 102L,
GB18 = 177L, GB18.1 = 177L, GB06 = 240L, GB06.1 = 240L, GB27 = 169L,
GB27.1 = 169L, GB24 = 240L, GB24.1 = 242L, GB28 = NA_integer_,
GB28.1 = NA_integer_, GB15 = 142L, GB15.1 = 144L, GB02 = 197L,
GB02.1 = 197L, GB10 = 126L, GB10.1 = 134L, GB14 = 181L, GB14.1 = 193L), .Names = c("Nom_xp",
"Rep", "GB05", "GB05.1", "GB18", "GB18.1", "GB06", "GB06.1",
"GB27", "GB27.1", "GB24", "GB24.1", "GB28", "GB28.1", "GB15",
"GB15.1", "GB02", "GB02.1", "GB10", "GB10.1", "GB14", "GB14.1"
), row.names = 32L, class = "data.frame")
sampleB <- structure(list(Nom_xp = "A1MRJ", Rep = 2L, GB05 = NA, GB05.1 = NA,
GB18 = 177L, GB18.1 = 177L, GB06 = 240L, GB06.1 = 240L, GB27 = 169L,
GB27.1 = 169L, GB24 = 240L, GB24.1 = 242L, GB28 = 390L, GB28.1 = 390L,
GB15 = 142L, GB15.1 = 144L, GB02 = 197L, GB02.1 = 197L, GB10 = 126L,
GB10.1 = 134L, GB14 = 181L, GB14.1 = 193L), .Names = c("Nom_xp",
"Rep", "GB05", "GB05.1", "GB18", "GB18.1", "GB06", "GB06.1",
"GB27", "GB27.1", "GB24", "GB24.1", "GB28", "GB28.1", "GB15",
"GB15.1", "GB02", "GB02.1", "GB10", "GB10.1", "GB14", "GB14.1"
), row.names = 33L, class = "data.frame")
Output needed, as a data.frame. Only one line every for matching "Nom_xp", so the NA get's replaced by the values in either A or B, if the value exist in one or the other DF.
Nom_xp GB05 GB05 GB18 GB18 GB06 GB06 GB27 GB27 GB24 GB24 GB28 GB28 GB15 GB15 GB02 GB02 GB10 GB10 GB14 GB14
A1MRJ 102 102 177 177 240 240 169 169 240 242 390 390 142 144 197 197 126 134 181 193
I would've thought that :
output <- merge(A,B,by="Nom_xp",all.x=T,all.y=T)
or
output <- join(A,B,by="Nom_xp",match="all")
would give me what I need, but no luck so far... What am I missing ? Actual data.frame has more than one row.
Do you have just one row? Then, wouldn't this be sufficient? You can get the result in sampleB as:
sampleB[, is.na(sampleB)] <- sampleA[, is.na(sampleB)]
No, apply, join and merge are not necessary here, I think. Not tested, but this would work.
sampleB[is.na(sampleB)] <- sampleA[is.na(sampleB)]
Not entierly sure on how your whole data set looks like but I assume you could have several samples with the same "Nom_xp" and not only 2? And that you probably have all your data in a big dataframe or such?
If so, maybe this code could be a good start (maybe someone can help out and re-write this much much more efficient?). Anyhow:
sampleA <- structure(list(Nom_xp = "A1MRJ", Rep = 1L, GB05 = 102L, GB05.1 = 102L,
GB18 = 177L, GB18.1 = 177L, GB06 = 240L, GB06.1 = 240L, GB27 = 169L,
GB27.1 = 169L, GB24 = 240L, GB24.1 = 242L, GB28 = NA_integer_,
GB28.1 = NA_integer_, GB15 = 142L, GB15.1 = 144L, GB02 = 197L,
GB02.1 = 197L, GB10 = 126L, GB10.1 = 134L, GB14 = 181L, GB14.1 = 193L), .Names = c("Nom_xp", "Rep", "GB05", "GB05.1", "GB18", "GB18.1", "GB06", "GB06.1","GB27", "GB27.1", "GB24", "GB24.1", "GB28", "GB28.1", "GB15","GB15.1", "GB02", "GB02.1", "GB10", "GB10.1", "GB14", "GB14.1"), row.names = 32L, class = "data.frame")
sampleB <- structure(list(Nom_xp = "A1MRJ", Rep = 2L, GB05 = NA, GB05.1 = NA,
GB18 = 177L, GB18.1 = 177L, GB06 = 240L, GB06.1 = 240L, GB27 = 169L,
GB27.1 = 169L, GB24 = 240L, GB24.1 = 242L, GB28 = 390L, GB28.1 = 390L,
GB15 = 142L, GB15.1 = 144L, GB02 = 197L, GB02.1 = 197L, GB10 = 126L,
GB10.1 = 134L, GB14 = 181L, GB14.1 = 193L), .Names = c("Nom_xp","Rep", "GB05", "GB05.1", "GB18", "GB18.1", "GB06", "GB06.1", "GB27", "GB27.1", "GB24", "GB24.1", "GB28", "GB28.1", "GB15", "GB15.1", "GB02", "GB02.1", "GB10", "GB10.1", "GB14", "GB14.1" ), row.names = 33L, class = "data.frame")
sampleC <- structure(list(Nom_xp = "ASDF", Rep = 2L, GB05 = NA, GB05.1 = NA,
GB18 = 177L, GB18.1 = 177L, GB06 = 240L, GB06.1 = 240L, GB27 = 12349L,
GB27.1 = 3, GB24 = 234112, GB24.1 = 242L, GB28 = 234, GB28.1 = 390L,
GB15 = NA, GB15.1 = 144L, GB02 = 197L, GB02.1 = 197L, GB10 = 126L,
GB10.1 = 134L, GB14 = NA, GB14.1 = 193L), .Names = c("Nom_xp", "Rep", "GB05", "GB05.1", "GB18", "GB18.1", "GB06", "GB06.1", "GB27", "GB27.1", "GB24", "GB24.1", "GB28", "GB28.1", "GB15", "GB15.1", "GB02", "GB02.1", "GB10", "GB10.1", "GB14", "GB14.1"), row.names = 34L, class = "data.frame")
sampleD <- structure(list(Nom_xp = "ASDF", Rep = 2L, GB05 = 214, GB05.1 = 34,
GB18 = 177L, GB18.1 = 177L, GB06 = 240L, GB06.1 = 240L, GB27 = 169L,
GB27.1 = 3, GB24 = NA, GB24.1 = 242L, GB28 = 234, GB28.1 = 390L,
GB15 = 56, GB15.1 = 144L, GB02 = 197L, GB02.1 = 197L, GB10 = 15466L,
GB10.1 = 134L, GB14 = 34, GB14.1 = 193L), .Names = c("Nom_xp", "Rep", "GB05", "GB05.1", "GB18", "GB18.1", "GB06", "GB06.1", "GB27", "GB27.1", "GB24", "GB24.1", "GB28", "GB28.1", "GB15", "GB15.1", "GB02", "GB02.1", "GB10", "GB10.1", "GB14", "GB14.1"), row.names = 35L, class = "data.frame")
cdat<-rbind(sampleA,sampleB,sampleC,sampleD) #simulating your data set (?)
dcols<-dim(cdat)[2]
mat<-matrix(nrow=length(unique(cdat$Nom_xp)),ncol=dcols)
colnames(mat)<-colnames(cdat)
for (j in 1:length(unique(cdat$Nom_xp)))
{
g<-grep(unique(cdat$Nom_xp)[j],cdat$Nom_xp) #Get the Nom_xp rows that match
mat[j,1]<-cdat[g[1],1] #Fill in the "Nom_xp"
mat[j,2]<-paste(g,collapse=" ") #Fill in the "rep"
mat[j,3:dcols]<-apply(cdat[g,3:dcols],2, #Calculate a mean for each column
function(x){as.numeric(mean(x,na.rm=T))})
}

Resources