ggplot2: legend symbols matching plot symbols - r

I made a ggplot where I make use of the viridis color package. I adjusted the geom_point in the graph to different symbols. The symbols are not displaying in my legend, although the colors are correctly programmed. How can I match my legend (including symbols and colors) with my ggplot?
Attempt:
library(ggplot)
library(viridis)
ggplot(df, aes(`Lengte_(cm)`, verschil_lengte))+
geom_point(aes(shape = Lengteklasse, colour = Lengteklasse), size = 3)+
geom_hline(yintercept = 1.0, linetype="dashed", color = "red")+
geom_hline(yintercept = 2.0, linetype="dashed", color = "red")+
scale_shape_manual(values = c(16, 17, 15, 3, 8), guide = "none")+
scale_color_viridis(discrete = T, option = "D")+
scale_x_continuous(breaks = seq(7,12, by = 0.5))+
scale_y_continuous(breaks = seq(0,3, by = 0.5))+
labs(x = "Lengte (cm)", y = "Verschaling (mm)")+
guides(col = guide_legend("Lengteklasse (cm)"))+
theme_classic()
current outcome:
df =
structure(list(`Lengte_(cm)` = c(9, 10.7, 10.7, 7.7, 9.1, 11.2,
9.7, 10.2, 8.6, 8.9, 11.2, 11.4, 10.5, 10.5, 11.1, 8.9, 11.5,
10.4, 9.1, 9.2, 10.1, 7.8, 9.8, 8.2, 10.1, 10.5, 10.2, 7.9, 9.3,
8, 8.7, 8.9, 8.8, 9.3, 8.5, 7.7, 11.2, 9.4, 9.7, 11.2, 11, 10.7,
9), Lengteklasse = structure(c(4L, 5L, 5L, 2L, 4L, 6L, 4L, 5L,
3L, 3L, 6L, 6L, 5L, 5L, 6L, 3L, 6L, 5L, 4L, 4L, 5L, 2L, 4L, 3L,
5L, 5L, 5L, 2L, 4L, 3L, 3L, 3L, 3L, 4L, 3L, 2L, 6L, 4L, 4L, 6L,
6L, 5L, 4L), .Label = c("6", "7", "8", "9", "10", "11", "12",
"13"), class = "factor"), verschil_lengte = c(0, 1.4, 1.8, 1.8,
1.4, 0.800000000000001, 0.600000000000001, 0.600000000000001,
1.4, 1.9, 1.3, 1.5, 0.300000000000001, 0.5, 0.9, 2.2, 1, 1, 1.4,
2.1, 1.3, 2.2, 0.899999999999999, 2.3, 1.1, 0.699999999999999,
2.1, 0.4, 0.5, 0.9, 2.1, 1.6, 1.7, 0.799999999999999, 2, 2.1,
0.5, 0.799999999999999, 1.3, 0.4, 0.300000000000001, 1.6, 0.199999999999999
)), row.names = c(NA, -43L), class = c("tbl_df", "tbl", "data.frame"
), na.action = structure(c(`1` = 1L, `2` = 2L, `3` = 3L, `4` = 4L,
`5` = 5L, `6` = 6L, `7` = 7L, `8` = 8L, `9` = 9L, `10` = 10L,
`11` = 11L, `12` = 12L, `13` = 13L, `14` = 14L, `15` = 15L, `16` = 16L,
`17` = 17L, `18` = 18L, `19` = 19L, `20` = 20L, `21` = 21L, `22` = 22L,
`23` = 23L, `24` = 24L, `25` = 25L, `26` = 26L, `27` = 27L, `28` = 28L,
`29` = 29L, `30` = 30L, `31` = 31L, `32` = 32L, `33` = 33L, `34` = 34L,
`35` = 35L, `36` = 36L, `37` = 37L, `38` = 38L, `39` = 39L, `40` = 40L,
`41` = 41L, `42` = 42L, `43` = 43L, `44` = 44L, `45` = 45L, `46` = 46L,
`47` = 47L, `48` = 48L, `49` = 49L, `50` = 50L, `51` = 51L, `52` = 52L,
`53` = 53L, `54` = 54L, `55` = 55L, `56` = 56L, `57` = 57L, `58` = 58L,
`59` = 59L, `60` = 60L, `61` = 61L, `62` = 62L, `63` = 63L, `64` = 64L,
`65` = 65L, `66` = 66L, `67` = 67L, `68` = 68L, `69` = 69L, `70` = 70L,
`71` = 71L, `72` = 72L, `73` = 73L, `74` = 74L, `75` = 75L, `76` = 76L,
`77` = 77L, `78` = 78L, `79` = 79L, `80` = 80L, `81` = 81L, `82` = 82L,
`83` = 83L, `84` = 84L, `85` = 85L, `86` = 86L, `87` = 87L, `88` = 88L,
`89` = 89L, `90` = 90L, `91` = 91L, `92` = 92L, `93` = 93L, `94` = 94L,
`95` = 95L, `96` = 96L, `97` = 97L, `98` = 98L, `99` = 99L, `100` = 100L,
`101` = 101L, `102` = 102L, `103` = 103L, `104` = 104L, `105` = 105L,
`106` = 106L, `107` = 107L, `108` = 108L, `109` = 109L, `110` = 110L,
`111` = 111L, `112` = 112L, `113` = 113L, `114` = 114L, `115` = 115L,
`116` = 116L, `117` = 117L, `118` = 118L, `119` = 119L, `120` = 120L,
`121` = 121L, `122` = 122L, `123` = 123L, `124` = 124L, `125` = 125L,
`126` = 126L, `127` = 127L, `128` = 128L, `129` = 129L, `130` = 130L,
`131` = 131L, `132` = 132L, `133` = 133L, `134` = 134L, `135` = 135L,
`136` = 136L, `137` = 137L, `138` = 138L, `139` = 139L, `140` = 140L,
`141` = 141L, `142` = 142L, `143` = 143L, `144` = 144L, `145` = 145L,
`146` = 146L, `147` = 147L, `148` = 148L, `149` = 149L, `150` = 150L,
`151` = 151L, `152` = 152L, `153` = 153L, `154` = 154L, `155` = 155L,
`156` = 156L, `157` = 157L, `158` = 158L, `159` = 159L, `160` = 160L,
`161` = 161L, `162` = 162L, `163` = 163L, `164` = 164L, `165` = 165L,
`166` = 166L, `167` = 167L, `168` = 168L, `169` = 169L, `170` = 170L,
`171` = 171L, `172` = 172L, `173` = 173L, `174` = 174L, `175` = 175L,
`176` = 176L, `177` = 177L, `178` = 178L, `179` = 179L, `180` = 180L,
`181` = 181L, `182` = 182L, `183` = 183L, `184` = 184L, `185` = 185L,
`186` = 186L, `187` = 187L, `188` = 188L, `189` = 189L, `190` = 190L,
`191` = 191L, `192` = 192L, `193` = 193L, `194` = 194L, `195` = 195L,
`196` = 196L, `197` = 197L, `198` = 198L, `199` = 199L, `200` = 200L,
`201` = 201L, `202` = 202L, `203` = 203L, `204` = 204L, `205` = 205L,
`206` = 206L, `207` = 207L, `208` = 208L, `209` = 209L, `210` = 210L,
`211` = 211L, `212` = 212L, `213` = 213L, `214` = 214L, `215` = 215L,
`216` = 216L, `217` = 217L, `218` = 218L, `219` = 219L, `220` = 220L,
`221` = 221L, `222` = 222L, `223` = 223L, `224` = 224L, `225` = 225L,
`226` = 226L, `227` = 227L, `228` = 228L, `229` = 229L, `230` = 230L,
`231` = 231L, `232` = 232L, `233` = 233L, `234` = 234L, `235` = 235L,
`236` = 236L, `237` = 237L, `238` = 238L, `239` = 239L, `240` = 240L,
`241` = 241L, `242` = 242L, `243` = 243L, `244` = 244L, `245` = 245L,
`246` = 246L, `247` = 247L, `248` = 248L, `249` = 249L, `250` = 250L,
`251` = 251L, `252` = 252L, `253` = 253L, `254` = 254L, `255` = 255L,
`256` = 256L, `257` = 257L, `258` = 258L, `259` = 259L, `260` = 260L,
`261` = 261L, `262` = 262L, `263` = 263L, `264` = 264L, `265` = 265L,
`266` = 266L, `267` = 267L, `268` = 268L, `269` = 269L, `270` = 270L,
`271` = 271L, `272` = 272L, `273` = 273L, `274` = 274L, `275` = 275L,
`277` = 277L, `278` = 278L, `279` = 279L, `280` = 280L, `281` = 281L,
`282` = 282L, `284` = 284L, `285` = 285L, `286` = 286L, `288` = 288L,
`289` = 289L, `290` = 290L, `291` = 291L, `292` = 292L, `293` = 293L,
`294` = 294L, `295` = 295L, `296` = 296L, `297` = 297L, `298` = 298L,
`300` = 300L, `301` = 301L, `302` = 302L, `303` = 303L, `304` = 304L,
`305` = 305L, `306` = 306L, `308` = 308L, `309` = 309L, `310` = 310L,
`311` = 311L, `312` = 312L, `313` = 313L, `314` = 314L, `315` = 315L,
`316` = 316L, `317` = 317L, `318` = 318L, `319` = 319L, `321` = 321L,
`322` = 322L, `323` = 323L, `324` = 324L, `325` = 325L, `326` = 326L,
`327` = 327L, `328` = 328L, `329` = 329L, `330` = 330L, `331` = 331L,
`333` = 333L, `334` = 334L, `335` = 335L, `336` = 336L, `337` = 337L,
`338` = 338L, `339` = 339L, `340` = 340L, `341` = 341L, `342` = 342L,
`343` = 343L, `344` = 344L, `345` = 345L, `346` = 346L, `347` = 347L,
`348` = 348L, `349` = 349L, `351` = 351L, `352` = 352L, `354` = 354L,
`356` = 356L, `357` = 357L, `358` = 358L, `359` = 359L, `360` = 360L,
`361` = 361L, `362` = 362L, `363` = 363L, `364` = 364L, `366` = 366L,
`368` = 368L, `369` = 369L, `370` = 370L, `371` = 371L, `372` = 372L,
`373` = 373L, `374` = 374L, `375` = 375L, `376` = 376L, `377` = 377L,
`378` = 378L, `379` = 379L, `380` = 380L, `382` = 382L, `383` = 383L,
`384` = 384L, `387` = 387L, `388` = 388L, `390` = 390L, `391` = 391L,
`392` = 392L, `393` = 393L, `394` = 394L, `395` = 395L, `396` = 396L,
`397` = 397L, `399` = 399L, `400` = 400L, `401` = 401L, `402` = 402L,
`404` = 404L, `405` = 405L, `406` = 406L, `407` = 407L, `408` = 408L,
`409` = 409L, `410` = 410L, `411` = 411L, `412` = 412L, `413` = 413L,
`414` = 414L, `415` = 415L, `416` = 416L, `417` = 417L, `419` = 419L,
`420` = 420L, `423` = 423L, `424` = 424L, `425` = 425L, `426` = 426L,
`427` = 427L, `429` = 429L, `430` = 430L, `431` = 431L, `432` = 432L,
`433` = 433L, `434` = 434L, `435` = 435L, `436` = 436L, `437` = 437L,
`438` = 438L, `439` = 439L, `440` = 440L, `441` = 441L, `442` = 442L,
`443` = 443L, `444` = 444L, `446` = 446L, `447` = 447L, `448` = 448L,
`450` = 450L, `451` = 451L, `452` = 452L, `453` = 453L, `454` = 454L,
`455` = 455L, `456` = 456L, `457` = 457L, `459` = 459L, `460` = 460L,
`462` = 462L, `463` = 463L, `464` = 464L, `465` = 465L, `466` = 466L,
`467` = 467L, `468` = 468L, `469` = 469L, `470` = 470L, `471` = 471L,
`472` = 472L, `473` = 473L, `474` = 474L, `475` = 475L, `476` = 476L,
`478` = 478L, `479` = 479L, `480` = 480L, `481` = 481L, `482` = 482L,
`483` = 483L, `484` = 484L, `485` = 485L, `486` = 486L, `487` = 487L,
`488` = 488L, `489` = 489L, `490` = 490L, `491` = 491L, `493` = 493L,
`495` = 495L, `496` = 496L, `497` = 497L, `498` = 498L, `499` = 499L,
`500` = 500L, `501` = 501L, `502` = 502L, `503` = 503L, `504` = 504L,
`505` = 505L, `506` = 506L, `507` = 507L, `508` = 508L, `509` = 509L,
`510` = 510L, `511` = 511L, `512` = 512L, `513` = 513L, `514` = 514L,
`515` = 515L, `516` = 516L, `517` = 517L, `518` = 518L, `519` = 519L,
`520` = 520L, `521` = 521L, `522` = 522L, `523` = 523L, `524` = 524L,
`525` = 525L, `526` = 526L, `527` = 527L, `528` = 528L, `529` = 529L,
`530` = 530L, `531` = 531L, `532` = 532L, `533` = 533L, `535` = 535L,
`536` = 536L, `537` = 537L, `538` = 538L, `539` = 539L, `540` = 540L,
`542` = 542L, `543` = 543L, `544` = 544L, `545` = 545L, `546` = 546L,
`547` = 547L, `548` = 548L, `549` = 549L, `550` = 550L, `551` = 551L,
`553` = 553L, `554` = 554L, `555` = 555L, `556` = 556L, `557` = 557L,
`558` = 558L, `559` = 559L, `560` = 560L, `561` = 561L, `562` = 562L,
`563` = 563L, `564` = 564L, `565` = 565L, `566` = 566L, `567` = 567L,
`568` = 568L, `569` = 569L, `570` = 570L, `571` = 571L, `572` = 572L,
`573` = 573L, `574` = 574L, `575` = 575L, `576` = 576L, `577` = 577L,
`578` = 578L, `579` = 579L, `580` = 580L, `581` = 581L, `582` = 582L,
`583` = 583L, `584` = 584L, `585` = 585L, `586` = 586L, `587` = 587L,
`588` = 588L, `589` = 589L, `590` = 590L, `591` = 591L, `593` = 593L,
`595` = 595L, `596` = 596L, `597` = 597L, `598` = 598L, `599` = 599L,
`601` = 601L, `602` = 602L, `603` = 603L, `604` = 604L, `605` = 605L,
`606` = 606L, `608` = 608L, `609` = 609L, `610` = 610L, `611` = 611L,
`612` = 612L, `614` = 614L, `615` = 615L, `616` = 616L, `617` = 617L,
`618` = 618L, `619` = 619L, `620` = 620L, `621` = 621L, `622` = 622L,
`623` = 623L, `624` = 624L, `625` = 625L, `626` = 626L, `627` = 627L,
`628` = 628L, `629` = 629L, `631` = 631L, `632` = 632L, `633` = 633L,
`634` = 634L, `635` = 635L, `636` = 636L, `637` = 637L, `638` = 638L,
`639` = 639L, `640` = 640L, `641` = 641L, `642` = 642L, `643` = 643L,
`645` = 645L, `646` = 646L, `647` = 647L, `648` = 648L, `649` = 649L,
`650` = 650L, `651` = 651L, `652` = 652L, `653` = 653L, `654` = 654L,
`655` = 655L, `657` = 657L, `658` = 658L, `659` = 659L, `661` = 661L,
`662` = 662L, `663` = 663L, `664` = 664L, `666` = 666L, `667` = 667L,
`668` = 668L, `669` = 669L, `670` = 670L, `671` = 671L, `672` = 672L,
`673` = 673L, `675` = 675L, `677` = 677L, `678` = 678L, `679` = 679L,
`680` = 680L, `681` = 681L, `682` = 682L, `683` = 683L, `684` = 684L,
`685` = 685L, `686` = 686L, `687` = 687L, `688` = 688L, `689` = 689L,
`690` = 690L, `691` = 691L, `692` = 692L, `693` = 693L, `696` = 696L,
`697` = 697L, `698` = 698L), class = "omit"))

Your guide="none" implies you don't want a legend for shape. That's why the shapes don't appear. To combine two legends, give them the same name. Thus:
library(ggplot2) # Note typo correction
library(viridis)
ggplot(df, aes(`Lengte_(cm)`, verschil_lengte))+
geom_point(aes(shape = Lengteklasse, colour = Lengteklasse), size = 3)+
geom_hline(yintercept = 1.0, linetype="dashed", color = "red")+
geom_hline(yintercept = 2.0, linetype="dashed", color = "red")+
scale_shape_manual(values = c(16, 17, 15, 3, 8), name="Lengteklasse (cm)")+
scale_color_viridis(discrete = T, option = "D", name="Lengteklasse (cm)")+
scale_x_continuous(breaks = seq(7,12, by = 0.5))+
scale_y_continuous(breaks = seq(0,3, by = 0.5))+
labs(x = "Lengte (cm)", y = "Verschaling (mm)")+
guides(col = guide_legend("Lengteklasse (cm)"))+
theme_classic()
produces
[You can also do away with guides(col = guide_legend("Lengteklasse (cm)"))+.]

Here is another approach. That of #Limey was my first thought. But already posted. Anyway. The clue is:
If you want to have same color and shape in one legend then you have to give them the same name in the aesthetics!
then to give them the same name in one column legend, we have to identify the names argument in scale_color_viridis and scale_shape_manual
then you could remove guides as already stated by Limey!:
library(ggplot)
library(viridis)
ggplot(df, aes(`Lengte_(cm)`, verschil_lengte, shape = Lengteklasse, colour = Lengteklasse))+
geom_point(size = 3)+
geom_hline(yintercept = 1.0, linetype="dashed", color = "red")+
geom_hline(yintercept = 2.0, linetype="dashed", color = "red")+
scale_color_viridis(name = "Lengteklasse (cm)",
discrete = T, option = "D")+
scale_shape_manual(name = "Lengteklasse (cm)",
values = c(16, 17, 15, 3, 8))+
scale_x_continuous(breaks = seq(7,12, by = 0.5))+
scale_y_continuous(breaks = seq(0,3, by = 0.5))+
labs(x = "Lengte (cm)", y = "Verschaling (mm)")+
theme_classic()

Related

How to only select rows that are duplicated in a column in a dataframe

I have joined two dataframes together and I am trying to select only the 'Branch Codes' that are duplicated.
I want to join the datasets 'BranchData' and 'BranchCode' so that any branch codes that are common to both datasets are included as well as those are not common to both datasets.
However, the last line of the code below does not seem to work!
BranchData$'Branch Code' <
as.numeric(BranchData$'Branch Code')
BranchCalls$'Branch Code' <- as.numeric(BranchCalls$'Branch Code')
BranchData <- na.omit(BranchData)
merged <- full_join(BranchData,BranchCalls)
merged <- merged %>% group_by(merged$`Branch Code`) %>% filter(n() >= 2)
Also, when I try to put the duplicates into groups, so that all the duplicates are together, but the following code doesn't seem to work!:
merged <- group_by(merged,merged$'Branch Code')
Minimal Reproducible Example:
structure(list(`Branch Code` = c(401801, 436801, 403801, 164801,
198801), `Location Type` = c("Urban", "Urban", "Urban Deprived",
"Rural", "Urban"), Type = c("MAIN", "MAIN", "MAIN", "MAIN", "LM"
), Status = c("Open", "Open", "Open", "Open", "Open"), Segment = c("Agency",
"Agency", "Agency", "Agency", "Agency"), `Multiple (partner that owns multiple branches)` = c("Multiple 11",
"Multiple 11", "Multiple 12", "Multiple 13", "Multiple 13"),
RetailType = c("Books_Stationery", "Books_Stationery", "Convenience",
"Convenience", "Convenience"), `Volume of transactions` = c(2238,
1514, 1346, 1338, 625), `Open hours` = c(47.75, 50.2500000000001,
46.5, 48.25, 114.25), `X Pos` = c(394169, 393488, 394434,
392153, 393094), `Y Pos` = c(806326, 805877, 804347, 796902,
802789), Urbanity = c("Major Centre", "Major Centre", "High Density",
"Low Density", "Low Density"), `Case Reference Number` = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), `Created On` = structure(c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), tzone = "UTC", class = c("POSIXct",
"POSIXt")), `Branch Type` = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), L1 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), L2 = c(NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_), L3 = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_), L4 = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), `Case Type` = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
)), row.names = c(NA, -5L), class = c("tbl_df", "tbl", "data.frame"
), na.action = structure(c(`3` = 3L, `4` = 4L, `5` = 5L, `6` = 6L,
`7` = 7L, `8` = 8L, `9` = 9L, `11` = 11L, `13` = 13L, `16` = 16L,
`17` = 17L, `18` = 18L, `20` = 20L, `21` = 21L, `22` = 22L, `23` = 23L,
`26` = 26L, `27` = 27L, `28` = 28L, `29` = 29L, `31` = 31L, `32` = 32L,
`33` = 33L, `34` = 34L, `35` = 35L, `36` = 36L, `37` = 37L, `39` = 39L,
`40` = 40L, `41` = 41L, `42` = 42L, `43` = 43L, `44` = 44L, `45` = 45L,
`46` = 46L, `47` = 47L, `48` = 48L, `49` = 49L, `51` = 51L, `52` = 52L,
`54` = 54L, `55` = 55L, `57` = 57L, `58` = 58L, `59` = 59L, `60` = 60L,
`61` = 61L, `62` = 62L, `63` = 63L, `65` = 65L, `67` = 67L, `68` = 68L,
`69` = 69L, `70` = 70L, `71` = 71L, `72` = 72L, `74` = 74L, `75` = 75L,
`76` = 76L, `77` = 77L, `78` = 78L, `80` = 80L, `81` = 81L, `82` = 82L,
`83` = 83L, `84` = 84L, `86` = 86L, `87` = 87L, `88` = 88L, `89` = 89L,
`91` = 91L, `92` = 92L, `93` = 93L, `96` = 96L, `97` = 97L, `98` = 98L,
`99` = 99L, `100` = 100L, `101` = 101L, `103` = 103L, `106` = 106L,
`107` = 107L, `108` = 108L, `109` = 109L, `110` = 110L, `111` = 111L,
`112` = 112L, `113` = 113L, `114` = 114L, `115` = 115L, `116` = 116L,
`117` = 117L, `118` = 118L, `119` = 119L, `120` = 120L, `121` = 121L,
`122` = 122L, `123` = 123L, `124` = 124L, `126` = 126L, `127` = 127L,
`129` = 129L, `130` = 130L, `131` = 131L, `132` = 132L, `133` = 133L,
`134` = 134L, `135` = 135L, `136` = 136L, `137` = 137L, `139` = 139L,
`140` = 140L, `141` = 141L, `142` = 142L, `143` = 143L, `144` = 144L,
`145` = 145L, `146` = 146L, `147` = 147L, `148` = 148L, `149` = 149L,
`150` = 150L, `151` = 151L, `152` = 152L, `153` = 153L, `155` = 155L,
`156` = 156L, `157` = 157L, `160` = 160L, `161` = 161L, `162` = 162L,
`163` = 163L, `165` = 165L, `166` = 166L, `167` = 167L, `168` = 168L,
`169` = 169L, `174` = 174L, `175` = 175L, `176` = 176L, `177` = 177L,
`178` = 178L, `179` = 179L, `180` = 180L, `182` = 182L, `183` = 183L,
`185` = 185L, `186` = 186L, `188` = 188L, `189` = 189L, `190` = 190L,
`191` = 191L, `192` = 192L, `193` = 193L, `194` = 194L, `195` = 195L,
`196` = 196L, `197` = 197L, `198` = 198L, `199` = 199L, `200` = 200L,
`201` = 201L, `203` = 203L, `204` = 204L, `205` = 205L, `206` = 206L,
`207` = 207L, `209` = 209L, `210` = 210L, `211` = 211L, `212` = 212L,
`213` = 213L, `214` = 214L, `215` = 215L, `216` = 216L, `217` = 217L,
`218` = 218L, `219` = 219L, `220` = 220L, `221` = 221L, `222` = 222L,
`223` = 223L, `224` = 224L, `226` = 226L, `227` = 227L, `228` = 228L,
`229` = 229L, `230` = 230L, `231` = 231L, `232` = 232L, `233` = 233L,
`234` = 234L, `236` = 236L, `237` = 237L, `238` = 238L, `239` = 239L,
`240` = 240L, `241` = 241L, `242` = 242L, `243` = 243L, `244` = 244L,
`245` = 245L, `247` = 247L, `248` = 248L, `249` = 249L, `250` = 250L,
`251` = 251L, `252` = 252L, `253` = 253L, `254` = 254L, `255` = 255L,
`256` = 256L, `257` = 257L, `258` = 258L, `259` = 259L, `260` = 260L,
`261` = 261L, `262` = 262L, `263` = 263L, `264` = 264L, `265` = 265L,
`266` = 266L, `267` = 267L, `268` = 268L, `269` = 269L, `270` = 270L,
`271` = 271L, `272` = 272L, `273` = 273L, `274` = 274L, `276` = 276L,
`278` = 278L, `280` = 280L, `281` = 281L, `282` = 282L, `283` = 283L,
`284` = 284L, `285` = 285L, `286` = 286L, `288` = 288L, `289` = 289L,
`291` = 291L, `292` = 292L, `293` = 293L, `294` = 294L, `296` = 296L,
`297` = 297L, `298` = 298L, `299` = 299L, `300` = 300L, `301` = 301L,
`304` = 304L, `305` = 305L, `306` = 306L, `307` = 307L, `308` = 308L,
`311` = 311L, `312` = 312L, `313` = 313L, `316` = 316L, `319` = 319L,
`321` = 321L, `322` = 322L, `323` = 323L, `324` = 324L, `325` = 325L,
`326` = 326L, `327` = 327L, `328` = 328L, `329` = 329L, `330` = 330L,
`331` = 331L, `332` = 332L, `333` = 333L, `335` = 335L, `337` = 337L,
`338` = 338L, `339` = 339L, `340` = 340L, `341` = 341L, `342` = 342L,
`343` = 343L, `344` = 344L, `345` = 345L, `346` = 346L, `347` = 347L,
`348` = 348L, `349` = 349L, `350` = 350L, `351` = 351L, `352` = 352L,
`353` = 353L, `354` = 354L, `355` = 355L, `356` = 356L, `357` = 357L,
`359` = 359L, `360` = 360L, `361` = 361L, `362` = 362L, `363` = 363L,
`365` = 365L, `366` = 366L, `367` = 367L, `368` = 368L, `370` = 370L,
`371` = 371L, `372` = 372L, `373` = 373L, `375` = 375L, `376` = 376L,
`378` = 378L, `379` = 379L, `380` = 380L, `381` = 381L, `382` = 382L,
`384` = 384L, `385` = 385L, `387` = 387L, `388` = 388L, `389` = 389L,
`390` = 390L, `391` = 391L, `392` = 392L, `393` = 393L, `395` = 395L,
`396` = 396L, `397` = 397L, `398` = 398L, `399` = 399L, `400` = 400L,
`401` = 401L, `403` = 403L, `404` = 404L, `405` = 405L, `409` = 409L,
`412` = 412L, `413` = 413L, `414` = 414L, `415` = 415L, `416` = 416L,
`418` = 418L, `419` = 419L, `420` = 420L, `421` = 421L, `422` = 422L,
`423` = 423L, `426` = 426L, `427` = 427L, `428` = 428L, `429` = 429L,
`432` = 432L, `433` = 433L, `435` = 435L, `436` = 436L, `437` = 437L,
`438` = 438L, `440` = 440L, `441` = 441L, `442` = 442L, `443` = 443L,
I would be so grateful if anybody could give me a helping hand!
Thank you so much!
You can do it using table:
merged %>% filter(table(`Branch Code`)[`Branch Code`] > 1)
or using add_count:
merged %>% add_count(`Branch Code`) %>% filter(n > 1)
I created a small sample data:
merged <- data.frame(branch_code = c("401801", "436801", "401801"),
location_type = c("Urban", "Urban", "Rural"))
branch_code location_type
1 401801 Urban
2 436801 Urban
3 401801 Rural
You can use this code:
merged %>%
group_by(branch_code) %>%
mutate(n = n()) %>%
filter(n > 1) %>%
select(-n)
Output:
# A tibble: 2 × 2
# Groups: branch_code [1]
branch_code location_type
<chr> <chr>
1 401801 Urban
2 401801 Rural

How can i draw a barplot with 3 variables?

i'm having some trouble making a barplot.
I want to make a barplot with 3 ordinal variables (scale: yes, no, i don't know (for each))
I need the x-axis to show the bars side by side (yes1, yes2, yes3, no1, no2... and so on). They y-axis should show the frequency or the percentage.
Each variable belongs to a different wave in a panel and i want to show the changes through a barplot.
I've come so far, to draw a plot for each variable (see code)
What i need is to combine the 3 plots, i'm just don't know yet how to do it. I've tried facet_wrap/facet_grid, but that i haven't been able to solve my problem with that approach. I also get the error:
"Don't know how to automatically pick scale for object of type haven_labelled. Defaulting to continuous."so the labels on the x-axis can't be shown.
Can someone please help me?
Thanks,
Ingrid.
Here is my the data:
dput(veraenderung[1:4, ])
structure(list(vor = structure(c(2, 3, 3, 1), label = "Erwartung, dass sich durch die Teilnahme an der FoBi Veränderungen im Berufsallt", labels = c(ja = 1,
nein = 2, `weiß nicht` = 3), class = "haven_labelled"), nach = structure(c(2,
3, 1, 1), label = "Erwarten Sie, dass Ihre Teilnahme an dieser FoBi zu Veränderungen in Ihrem Beruf", labels = c(ja = 1,
nein = 2, `weiß nicht` = 3), class = "haven_labelled"), sechs_monate_spaeter = structure(c(2,
2, 1, 3), label = "Hat sich durch Ihre Teilnahme an der Fortbildung zur interkulturellen Kompetenz", labels = c(ja = 1,
nein = 2, `weiß nicht` = 9), class = "haven_labelled"), Welle123 = c(1,
1, 1, 1)), na.action = structure(c(`4` = 4L, `7` = 7L, `8` = 8L,
`9` = 9L, `10` = 10L, `11` = 11L, `12` = 12L, `13` = 13L, `14` = 14L,
`15` = 15L, `16` = 16L, `17` = 17L, `19` = 19L, `20` = 20L, `24` = 24L,
`26` = 26L, `27` = 27L, `29` = 29L, `30` = 30L, `31` = 31L, `33` = 33L,
`34` = 34L, `35` = 35L, `36` = 36L, `37` = 37L, `38` = 38L, `39` = 39L,
`41` = 41L, `43` = 43L, `44` = 44L, `46` = 46L, `47` = 47L, `48` = 48L,
`49` = 49L, `50` = 50L, `52` = 52L, `54` = 54L, `55` = 55L, `58` = 58L,
`59` = 59L, `60` = 60L, `63` = 63L, `64` = 64L, `66` = 66L, `68` = 68L,
`71` = 71L, `72` = 72L, `73` = 73L, `74` = 74L, `75` = 75L, `78` = 78L,
`80` = 80L, `81` = 81L, `82` = 82L, `83` = 83L, `84` = 84L, `86` = 86L,
`87` = 87L, `91` = 91L, `92` = 92L, `94` = 94L, `97` = 97L, `99` = 99L,
`101` = 101L, `102` = 102L, `105` = 105L, `106` = 106L, `107` = 107L,
`108` = 108L, `109` = 109L, `112` = 112L, `113` = 113L, `114` = 114L,
`116` = 116L, `117` = 117L, `119` = 119L, `121` = 121L, `122` = 122L,
`123` = 123L, `124` = 124L, `127` = 127L, `128` = 128L, `130` = 130L,
`132` = 132L, `134` = 134L, `135` = 135L, `136` = 136L, `138` = 138L,
`139` = 139L, `140` = 140L, `141` = 141L, `142` = 142L, `144` = 144L,
`146` = 146L, `147` = 147L, `148` = 148L, `149` = 149L, `151` = 151L,
`152` = 152L, `153` = 153L, `156` = 156L, `157` = 157L, `159` = 159L,
`164` = 164L, `165` = 165L, `166` = 166L, `168` = 168L, `169` = 169L,
`170` = 170L, `172` = 172L, `173` = 173L, `174` = 174L, `176` = 176L,
`177` = 177L, `178` = 178L, `179` = 179L, `180` = 180L, `181` = 181L,
`183` = 183L, `184` = 184L, `185` = 185L, `190` = 190L, `191` = 191L,
`192` = 192L, `194` = 194L, `195` = 195L, `196` = 196L, `197` = 197L,
`202` = 202L, `205` = 205L, `206` = 206L, `208` = 208L, `209` = 209L,
`210` = 210L, `211` = 211L, `212` = 212L, `213` = 213L, `215` = 215L,
`216` = 216L, `217` = 217L, `218` = 218L, `221` = 221L, `223` = 223L,
`225` = 225L, `226` = 226L, `227` = 227L, `228` = 228L, `229` = 229L,
`230` = 230L, `231` = 231L, `232` = 232L, `233` = 233L, `234` = 234L,
`235` = 235L, `236` = 236L, `237` = 237L, `238` = 238L, `239` = 239L,
`240` = 240L, `241` = 241L, `242` = 242L, `243` = 243L, `244` = 244L,
`245` = 245L, `246` = 246L, `247` = 247L, `248` = 248L, `249` = 249L
), class = "omit"), row.names = c(NA, 4L), class = "data.frame")
Here is the code:
library(tidyverse)
veraenderung <- ikoe %>%
select(v13, wn06, xn2, Welle123) %>%
rename(vor = v13,
nach = wn06,
sechs_monate_spaeter = xn2) %>%
na.omit(veraenderung) %>%
as.data.frame()
ggplot(veraenderung, aes(x = vor)) +
geom_bar()
ggplot(veraenderung, aes(x = nach)) +
geom_bar()
ggplot(veraenderung, aes(x = sechs_monate_spaeter)) +
geom_bar()
Your haven object is a bit a challenge for tidyverse manipulations. See below what I suggest to make this object a bit "cleaner" (remove labels, change your values to character class). And then making long and plotting.
library(tidyverse)
names(veraenderung) <- c('vor','nach','sechs','welle') #remove labels in names
veraenderung <- as_tibble(veraenderung) %>% transmute_all(as.character) #change values to character class
veraenderung <- veraenderung %>% pivot_longer(cols = everything(), names_to = 'key', values_to = 'value')
ggplot(veraenderung, aes(key)) +
geom_bar(aes(fill = value), position = position_dodge(preserve = 'single'))
#try without preserve or position_dodge and see what happens
Created on 2020-02-06 by the reprex package (v0.3.0)

In R, how can I count the number of variable after argmax?

this below is sample of raw data which I have:
sentiment
pos neu neg
1 0 0
0.2 0.3 0.5
0.3 0.3 0.4
0 0 1
0.2 0.7 0.1
In this raw data of sentiment, "pos" refers to probability of positivity in comment, "neu" refers to probability of neutrality and "neg" refers to probability of negativity. I want to choose the highest probability among pos, neu and neg. and know the number of each sentiment. For example, pos : 0.6, neu : 0.2, neg : 0.2 is positive comment.
My desired output is like below:
pos_count neu_count neg_count
1 1 3
Could you help me make this?
Below is also dput :
structure(list(likes_count = c(0L, 0L, 1L, 0L, 0L, 0L, 1L, 1L,
0L, 0L, 0L, 3L, 1L, 2L, 2L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
2L, 5L, 2L, 0L, 1L, 0L), neg = c(0, 0.41, 0, 0, 0,
0.19, 0, 1, 0, 0.52, 0, 0.11, 0.02, 0.05, 0.02, 0, 0, 0.01, 0.01,
0, 0, 0.97, 0, 0.01, 0.24, 0.34, 0.94, 0.44, 0.15, 0.01), neu = c(0,
0.1, 0, 0, 0, 0, 0.13, 0, 0.98, 0.32, 0, 0.08, 0.02, 0.04, 0.07,
0, 0, 0.98, 0.07, 0, 0, 0.03, 0.02, 0.21, 0.48, 0.62, 0.01, 0.2,
0.85, 0.67), pos = c(1, 0.48, 1, 1, 1, 0.81, 0.86,
0, 0.02, 0.16, 1, 0.81, 0.96, 0.91, 0.91, 1, 1, 0.01, 0.92, 1,
1, 0, 0.98, 0.78, 0.28, 0.04, 0.05, 0.36, 0, 0.32)), na.action = structure(c(`7` = 7L,
`11` = 11L, `38` = 38L, `53` = 53L, `88` = 88L, `101` = 101L,
`106` = 106L, `138` = 138L, `139` = 139L, `155` = 155L, `165` = 165L,
`176` = 176L, `178` = 178L, `179` = 179L, `199` = 199L, `200` = 200L,
`201` = 201L, `208` = 208L, `209` = 209L, `250` = 250L, `281` = 281L,
`293` = 293L, `299` = 299L, `316` = 316L, `321` = 321L, `322` = 322L,
`328` = 328L, `332` = 332L, `333` = 333L, `334` = 334L, `335` = 335L,
`336` = 336L, `342` = 342L, `347` = 347L, `352` = 352L, `354` = 354L,
`355` = 355L, `395` = 395L, `398` = 398L, `400` = 400L, `411` = 411L,
`420` = 420L, `449` = 449L, `454` = 454L, `456` = 456L, `457` = 457L,
`464` = 464L, `471` = 471L, `491` = 491L, `495` = 495L, `502` = 502L,
`503` = 503L, `504` = 504L, `506` = 506L, `526` = 526L, `536` = 536L,
`541` = 541L, `542` = 542L, `546` = 546L, `556` = 556L, `558` = 558L,
`563` = 563L, `579` = 579L, `581` = 581L, `582` = 582L, `584` = 584L,
`602` = 602L, `603` = 603L, `604` = 604L, `606` = 606L, `614` = 614L,
`617` = 617L, `619` = 619L, `620` = 620L, `621` = 621L, `622` = 622L,
`623` = 623L, `625` = 625L, `626` = 626L, `629` = 629L, `630` = 630L,
`631` = 631L, `632` = 632L, `633` = 633L, `636` = 636L, `637` = 637L,
`638` = 638L, `639` = 639L, `640` = 640L, `643` = 643L, `645` = 645L,
`646` = 646L, `647` = 647L, `648` = 648L, `650` = 650L, `652` = 652L,
`653` = 653L, `655` = 655L, `656` = 656L, `658` = 658L, `661` = 661L,
`665` = 665L, `666` = 666L, `667` = 667L, `669` = 669L, `671` = 671L,
`673` = 673L, `674` = 674L, `679` = 679L, `680` = 680L, `682` = 682L,
`683` = 683L, `684` = 684L, `685` = 685L, `686` = 686L, `687` = 687L,
`689` = 689L, `692` = 692L, `694` = 694L, `696` = 696L, `697` = 697L,
`699` = 699L, `700` = 700L, `701` = 701L, `702` = 702L, `703` = 703L,
`704` = 704L, `705` = 705L, `707` = 707L, `708` = 708L, `712` = 712L,
`713` = 713L, `714` = 714L, `717` = 717L, `718` = 718L, `719` = 719L,
`720` = 720L, `721` = 721L, `722` = 722L, `723` = 723L, `724` = 724L,
`725` = 725L, `726` = 726L, `727` = 727L, `728` = 728L, `730` = 730L,
`738` = 738L, `750` = 750L, `753` = 753L, `754` = 754L, `761` = 761L,
`766` = 766L, `767` = 767L, `769` = 769L, `771` = 771L, `775` = 775L,
`786` = 786L, `808` = 808L, `810` = 810L, `812` = 812L, `814` = 814L,
`817` = 817L, `820` = 820L, `841` = 841L, `862` = 862L, `864` = 864L,
`865` = 865L, `866` = 866L, `867` = 867L, `874` = 874L, `877` = 877L,
`878` = 878L, `881` = 881L, `882` = 882L, `890` = 890L, `891` = 891L,
`913` = 913L, `934` = 934L, `938` = 938L, `951` = 951L, `961` = 961L,
`962` = 962L, `967` = 967L, `971` = 971L, `972` = 972L, `981` = 981L,
`983` = 983L, `986` = 986L, `988` = 988L, `1000` = 1000L, `1014` = 1014L
), class = "omit"), row.names = c(NA, -30L), class = "data.frame")
Here is a base R solution.
It gets the index to the (first) maximum value on each row and then tables those values, assigning this table the same names as the data set's names.
s <- apply(sentiment[-1], 1, which.max)
res <- setNames(table(s), names(sentiment)[-1])
res
#neg neu pos
# 5 6 19
If there are ties, for instance, if two values have the same probability 0.5, use the following.
sent2 <- sentiment # work with a copy
sent2[8, c(2, 4)] <- 0.5 # assign 0.5 to 8th row, cols 2, 4
s2 <- apply(sent2[-1], 1, function(x) which(x == max(x)))
res2 <- setNames(table(unlist(s2)), names(sent2)[-1])
res2
#neg neu pos
# 5 6 20

Have ticks at edges of bins (instead of center) with ggplot2 in R?

I have the following R data frame nPhotosClassified:
> glimpse(nPhotosClassified)
Observations: 236
Variables: 2
$ person_id <int> 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 194, 195, 199...
$ nPhotosClassified <int> 113, 164, 2126, 637, 75, 16, 161, 29, 15, 6338, 596, 18, 14, 63, 36777, 19117, 5625...
With it I tried to make a geom_histogram of the nPhotosClassified variable with ggplot2:
ggplot(data = nPhotosClassified, mapping = aes(x = nPhotosClassified)) +
geom_histogram(bins = 10) +
scale_x_log10(name = "Number of photos classified",
breaks = c(1, 10, 100, 1000, 10000)) +
ylab(label = "Number of users") +
theme_bw() +
geom_vline(xintercept = 100, colour = "red") +
theme(# This gets rid of the whole border around the plot, but also makes
# the axes disappear:
panel.border = element_blank(),
# So manually add lines for the axes back:
axis.line = element_line())
Which gives me this result:
For this question, I've added a red vertical line to indicate that the major tick marks fall on the center of these bins.
Question: How do I adjust the bins (or the tick marks???) so that all the tick marks fall on the edge of bins rather than in the middle of them?
For example, how do I end up with two bins between 1 and 10, two bins between 10 and 100, and so on? Please note that I want my x-axis to be on the log10 scale.
Thank you!
EDIT: Here is the full dataset:
> dput(nPhotosClassified)
structure(list(person_id = c(179L, 180L, 181L, 182L, 183L, 184L,
185L, 186L, 187L, 188L, 189L, 190L, 191L, 192L, 194L, 195L, 199L,
201L, 204L, 205L, 207L, 208L, 209L, 210L, 211L, 213L, 214L, 215L,
216L, 217L, 219L, 220L, 221L, 222L, 223L, 224L, 225L, 226L, 227L,
228L, 229L, 230L, 234L, 235L, 237L, 238L, 241L, 242L, 243L, 246L,
249L, 250L, 251L, 252L, 253L, 255L, 256L, 259L, 261L, 264L, 265L,
266L, 267L, 268L, 271L, 272L, 274L, 275L, 276L, 277L, 278L, 281L,
282L, 283L, 285L, 294L, 296L, 298L, 299L, 302L, 304L, 305L, 307L,
309L, 310L, 311L, 312L, 317L, 318L, 319L, 320L, 323L, 325L, 326L,
327L, 330L, 331L, 332L, 335L, 341L, 344L, 347L, 348L, 363L, 367L,
375L, 376L, 377L, 378L, 386L, 388L, 389L, 390L, 396L, 397L, 398L,
399L, 401L, 402L, 404L, 406L, 407L, 409L, 412L, 413L, 414L, 415L,
419L, 421L, 425L, 426L, 428L, 429L, 432L, 433L, 440L, 441L, 445L,
448L, 452L, 456L, 461L, 462L, 464L, 468L, 471L, 473L, 474L, 475L,
478L, 483L, 486L, 491L, 492L, 493L, 494L, 495L, 497L, 498L, 501L,
502L, 505L, 509L, 512L, 518L, 520L, 532L, 533L, 535L, 537L, 539L,
540L, 543L, 544L, 550L, 551L, 552L, 554L, 562L, 564L, 581L, 582L,
590L, 592L, 593L, 597L, 599L, 601L, 602L, 612L, 618L, 622L, 632L,
634L, 635L, 637L, 650L, 651L, 658L, 659L, 660L, 661L, 665L, 666L,
668L, 671L, 672L, 675L, 684L, 686L, 693L, 697L, 705L, 708L, 719L,
725L, 726L, 730L, 733L, 734L, 752L, 756L, 777L, 785L, 789L, 791L,
796L, 797L, 799L, 800L, 802L, 807L, 808L, 810L, 813L, 814L),
nPhotosClassified = c(113L, 164L, 2126L, 637L, 75L, 16L,
161L, 29L, 15L, 6338L, 596L, 18L, 14L, 63L, 36777L, 19117L,
5625L, 584L, 3477L, 541L, 6L, 6L, 112L, 8L, 5L, 290L, 120L,
12L, 9L, 2675L, 9L, 4L, 657L, 149L, 151L, 8L, 4104L, 285L,
192L, 734L, 5L, 129L, 155L, 11L, 516L, 410L, 55L, 1L, 581L,
293L, 28L, 17810L, 2690L, 5L, 587L, 359L, 9L, 493L, 404L,
21L, 3L, 2L, 91L, 23L, 3L, 728L, 29L, 1540L, 10556L, 1L,
54L, 905L, 25L, 22L, 1L, 14L, 16L, 13L, 10L, 21L, 121L, 7870L,
53L, 1777L, 11L, 850L, 35L, 635L, 7L, 5728L, 1972L, 3613L,
16L, 51L, 131L, 77L, 267L, 718L, 11L, 18L, 5088L, 113L, 48L,
302L, 33L, 44L, 20L, 22L, 7L, 30L, 8L, 69L, 4L, 11L, 2428L,
3131L, 2459L, 12L, 150L, 21L, 702L, 10L, 23L, 38L, 1L, 1L,
24L, 10L, 6L, 1443L, 221L, 4363L, 27L, 46L, 9L, 8L, 10633L,
56L, 38L, 20L, 171L, 36L, 5L, 3L, 108L, 10L, 559L, 83L, 60L,
3L, 9L, 697L, 100L, 27L, 114L, 186L, 8127L, 10L, 58L, 76L,
472L, 6L, 72L, 3748L, 130L, 9L, 2459L, 80L, 468L, 198L, 4L,
108L, 35L, 10L, 310L, 207L, 499L, 20L, 32L, 1178L, 730L,
999L, 13L, 1L, 5L, 2L, 1L, 178L, 4L, 31L, 16L, 1592L, 385L,
73L, 698L, 4L, 42L, 90L, 772L, 509L, 1L, 17L, 17L, 36L, 987L,
395L, 15L, 23194L, 16L, 956L, 15L, 5614L, 3L, 1700L, 74L,
65L, 18L, 389L, 35L, 8L, 3L, 9L, 1271L, 12L, 80L, 117L, 356L,
3L, 59L, 85L, 382L, 8L, 6L, 33L, 5L, 119L)), class = c("tbl_df",
"tbl", "data.frame"), .Names = c("person_id", "nPhotosClassified"
), row.names = c(NA, -236L))
In the end, I thought using the breaks argument to be the most straightforward way to think about this, mostly due to the complication of an x scale transformation.
The histogram bin breaks need to ultimately be set on the transformed scale. This translates to setting the histogram breaks on the scale of log10(nPhotosClassified).
The breaks depends on the range of log10(nPhotosClassified).
with(nPhotosClassified, range(log10(nPhotosClassified)) )
[1] 0.000000 4.565576
So the breaks need to go from 0 to 5. You wanted these evenly spaced between integers (i.e., 2 bins per 10^integer), so we want a break every 0.5 units.
ggplot(data = nPhotosClassified, mapping = aes(x = nPhotosClassified)) +
geom_histogram(breaks = seq(0, 5, by = .5) ) +
scale_x_log10(name = "Number of photos classified",
breaks = c(1, 10, 100, 1000, 10000))
There may be a less manual way to do this, but the other arguments to control the histogram bins, like boundary, didn't seem to translate well with scale transformation.

merge two data.frame, keeping all matching rows, in R

I'm struggling to merge two data.frame with na values occuring in one or the other df.
sampleA <- structure(list(Nom_xp = "A1MRJ", Rep = 1L, GB05 = 102L, GB05.1 = 102L,
GB18 = 177L, GB18.1 = 177L, GB06 = 240L, GB06.1 = 240L, GB27 = 169L,
GB27.1 = 169L, GB24 = 240L, GB24.1 = 242L, GB28 = NA_integer_,
GB28.1 = NA_integer_, GB15 = 142L, GB15.1 = 144L, GB02 = 197L,
GB02.1 = 197L, GB10 = 126L, GB10.1 = 134L, GB14 = 181L, GB14.1 = 193L), .Names = c("Nom_xp",
"Rep", "GB05", "GB05.1", "GB18", "GB18.1", "GB06", "GB06.1",
"GB27", "GB27.1", "GB24", "GB24.1", "GB28", "GB28.1", "GB15",
"GB15.1", "GB02", "GB02.1", "GB10", "GB10.1", "GB14", "GB14.1"
), row.names = 32L, class = "data.frame")
sampleB <- structure(list(Nom_xp = "A1MRJ", Rep = 2L, GB05 = NA, GB05.1 = NA,
GB18 = 177L, GB18.1 = 177L, GB06 = 240L, GB06.1 = 240L, GB27 = 169L,
GB27.1 = 169L, GB24 = 240L, GB24.1 = 242L, GB28 = 390L, GB28.1 = 390L,
GB15 = 142L, GB15.1 = 144L, GB02 = 197L, GB02.1 = 197L, GB10 = 126L,
GB10.1 = 134L, GB14 = 181L, GB14.1 = 193L), .Names = c("Nom_xp",
"Rep", "GB05", "GB05.1", "GB18", "GB18.1", "GB06", "GB06.1",
"GB27", "GB27.1", "GB24", "GB24.1", "GB28", "GB28.1", "GB15",
"GB15.1", "GB02", "GB02.1", "GB10", "GB10.1", "GB14", "GB14.1"
), row.names = 33L, class = "data.frame")
Output needed, as a data.frame. Only one line every for matching "Nom_xp", so the NA get's replaced by the values in either A or B, if the value exist in one or the other DF.
Nom_xp GB05 GB05 GB18 GB18 GB06 GB06 GB27 GB27 GB24 GB24 GB28 GB28 GB15 GB15 GB02 GB02 GB10 GB10 GB14 GB14
A1MRJ 102 102 177 177 240 240 169 169 240 242 390 390 142 144 197 197 126 134 181 193
I would've thought that :
output <- merge(A,B,by="Nom_xp",all.x=T,all.y=T)
or
output <- join(A,B,by="Nom_xp",match="all")
would give me what I need, but no luck so far... What am I missing ? Actual data.frame has more than one row.
Do you have just one row? Then, wouldn't this be sufficient? You can get the result in sampleB as:
sampleB[, is.na(sampleB)] <- sampleA[, is.na(sampleB)]
No, apply, join and merge are not necessary here, I think. Not tested, but this would work.
sampleB[is.na(sampleB)] <- sampleA[is.na(sampleB)]
Not entierly sure on how your whole data set looks like but I assume you could have several samples with the same "Nom_xp" and not only 2? And that you probably have all your data in a big dataframe or such?
If so, maybe this code could be a good start (maybe someone can help out and re-write this much much more efficient?). Anyhow:
sampleA <- structure(list(Nom_xp = "A1MRJ", Rep = 1L, GB05 = 102L, GB05.1 = 102L,
GB18 = 177L, GB18.1 = 177L, GB06 = 240L, GB06.1 = 240L, GB27 = 169L,
GB27.1 = 169L, GB24 = 240L, GB24.1 = 242L, GB28 = NA_integer_,
GB28.1 = NA_integer_, GB15 = 142L, GB15.1 = 144L, GB02 = 197L,
GB02.1 = 197L, GB10 = 126L, GB10.1 = 134L, GB14 = 181L, GB14.1 = 193L), .Names = c("Nom_xp", "Rep", "GB05", "GB05.1", "GB18", "GB18.1", "GB06", "GB06.1","GB27", "GB27.1", "GB24", "GB24.1", "GB28", "GB28.1", "GB15","GB15.1", "GB02", "GB02.1", "GB10", "GB10.1", "GB14", "GB14.1"), row.names = 32L, class = "data.frame")
sampleB <- structure(list(Nom_xp = "A1MRJ", Rep = 2L, GB05 = NA, GB05.1 = NA,
GB18 = 177L, GB18.1 = 177L, GB06 = 240L, GB06.1 = 240L, GB27 = 169L,
GB27.1 = 169L, GB24 = 240L, GB24.1 = 242L, GB28 = 390L, GB28.1 = 390L,
GB15 = 142L, GB15.1 = 144L, GB02 = 197L, GB02.1 = 197L, GB10 = 126L,
GB10.1 = 134L, GB14 = 181L, GB14.1 = 193L), .Names = c("Nom_xp","Rep", "GB05", "GB05.1", "GB18", "GB18.1", "GB06", "GB06.1", "GB27", "GB27.1", "GB24", "GB24.1", "GB28", "GB28.1", "GB15", "GB15.1", "GB02", "GB02.1", "GB10", "GB10.1", "GB14", "GB14.1" ), row.names = 33L, class = "data.frame")
sampleC <- structure(list(Nom_xp = "ASDF", Rep = 2L, GB05 = NA, GB05.1 = NA,
GB18 = 177L, GB18.1 = 177L, GB06 = 240L, GB06.1 = 240L, GB27 = 12349L,
GB27.1 = 3, GB24 = 234112, GB24.1 = 242L, GB28 = 234, GB28.1 = 390L,
GB15 = NA, GB15.1 = 144L, GB02 = 197L, GB02.1 = 197L, GB10 = 126L,
GB10.1 = 134L, GB14 = NA, GB14.1 = 193L), .Names = c("Nom_xp", "Rep", "GB05", "GB05.1", "GB18", "GB18.1", "GB06", "GB06.1", "GB27", "GB27.1", "GB24", "GB24.1", "GB28", "GB28.1", "GB15", "GB15.1", "GB02", "GB02.1", "GB10", "GB10.1", "GB14", "GB14.1"), row.names = 34L, class = "data.frame")
sampleD <- structure(list(Nom_xp = "ASDF", Rep = 2L, GB05 = 214, GB05.1 = 34,
GB18 = 177L, GB18.1 = 177L, GB06 = 240L, GB06.1 = 240L, GB27 = 169L,
GB27.1 = 3, GB24 = NA, GB24.1 = 242L, GB28 = 234, GB28.1 = 390L,
GB15 = 56, GB15.1 = 144L, GB02 = 197L, GB02.1 = 197L, GB10 = 15466L,
GB10.1 = 134L, GB14 = 34, GB14.1 = 193L), .Names = c("Nom_xp", "Rep", "GB05", "GB05.1", "GB18", "GB18.1", "GB06", "GB06.1", "GB27", "GB27.1", "GB24", "GB24.1", "GB28", "GB28.1", "GB15", "GB15.1", "GB02", "GB02.1", "GB10", "GB10.1", "GB14", "GB14.1"), row.names = 35L, class = "data.frame")
cdat<-rbind(sampleA,sampleB,sampleC,sampleD) #simulating your data set (?)
dcols<-dim(cdat)[2]
mat<-matrix(nrow=length(unique(cdat$Nom_xp)),ncol=dcols)
colnames(mat)<-colnames(cdat)
for (j in 1:length(unique(cdat$Nom_xp)))
{
g<-grep(unique(cdat$Nom_xp)[j],cdat$Nom_xp) #Get the Nom_xp rows that match
mat[j,1]<-cdat[g[1],1] #Fill in the "Nom_xp"
mat[j,2]<-paste(g,collapse=" ") #Fill in the "rep"
mat[j,3:dcols]<-apply(cdat[g,3:dcols],2, #Calculate a mean for each column
function(x){as.numeric(mean(x,na.rm=T))})
}

Resources