justifying labels and label transparency in ggplot - r

I have the following data:
new_pairs
x y Freq start.latittude start.longitude start.station end.latitude
1 359 519 929 40.75188 -73.97770 Pershing\nSquare N 40.75510
2 477 465 5032 40.75514 -73.98658 Broadway &\nW 41 St 40.75641
3 484 519 1246 40.75188 -73.97770 Pershing\nSquare N 40.75500
4 484 318 2654 40.75320 -73.97799 E 43 St &\nVanderbilt\nAve 40.75500
5 492 267 1828 40.75098 -73.98765 Broadway &\nW 36 St 40.75020
6 492 498 957 40.74855 -73.98808 Broadway &\nW 32 St 40.75020
7 492 362 1405 40.75173 -73.98754 Broadway &\nW 37 St 40.75020
8 493 477 1582 40.75641 -73.99003 W 41 St &\n8 Ave 40.75680
9 493 529 728 40.75757 -73.99099 W 42 St &\n8 Ave 40.75680
10 529 2021 1748 40.75929 -73.98860 W 45 St &\n8 Ave 40.75757
end.longitude end.station interaction
1 -73.97499 E 47 St &\nPark Av E 47 St &Park Av > PershingSquare N
2 -73.99003 W 41 St &\n8 Ave W 41 St &8 Ave > Broadway &W 41 St
3 -73.98014 W 44 St &\n5 Ave W 44 St &5 Ave > PershingSquare N
4 -73.98014 W 44 St &\n5 Ave W 44 St &5 Ave > E 43 St &VanderbiltAve
5 -73.99093 W 33 St &\n7 Ave W 33 St &7 Ave > Broadway &W 36 St
6 -73.99093 W 33 St &\n7 Ave W 33 St &7 Ave > Broadway &W 32 St
7 -73.99093 W 33 St &\n7 Ave W 33 St &7 Ave > Broadway &W 37 St
8 -73.98291 W 45 St &\n6 Ave W 45 St &6 Ave > W 41 St &8 Ave
9 -73.98291 W 45 St &\n6 Ave W 45 St &6 Ave > W 42 St &8 Ave
10 -73.99099 W 42 St &\n8 Ave W 42 St &8 Ave > W 45 St &8 Ave
I would like to change the plot so that the labels are all justified to the center and change transparency based on their Freq so that the lower Freq are more transparent and the higher Freq are less transparent
ggplot(data= new_pairs, aes(x= reorder(interaction, -Freq), y=Freq))+ geom_bar(stat="identity", aes(fill = Freq, alpha = .7)) + ylab("Bikes received")+ xlab("Station")+ geom_text(aes(x = interaction, label = interaction), vjust="inward",hjust = "inward", size = 4, nudge_y = 1, fontface ="bold")+theme(axis.text.y=element_blank())+ggtitle("Bikes received viarebalancing")+coord_flip()+theme(legend.position = "none")
dput(new_pairs)
structure(list(x = structure(c(146L, 253L, 260L, 260L, 268L,
268L, 268L, 269L, 269L, 304L), .Label = c("72", "79", "82", "83",
"116", "119", "120", "127", "128", "137", "143", "144", "146",
"147", "150", "151", "152", "153", "157", "160", "161", "164",
"167", "168", "173", "174", "195", "212", "216", "217", "218",
"223", "224", "225", "228", "229", "232", "233", "236", "237",
"238", "239", "241", "242", "243", "244", "245", "247", "248",
"249", "250", "251", "252", "253", "254", "257", "258", "259",
"260", "261", "262", "263", "264", "265", "266", "267", "268",
"270", "271", "274", "275", "276", "278", "279", "280", "281",
"282", "284", "285", "289", "290", "291", "293", "294", "295",
"296", "297", "298", "300", "301", "302", "303", "304", "305",
"306", "307", "308", "309", "310", "311", "312", "313", "314",
"315", "316", "317", "318", "319", "320", "321", "322", "323",
"324", "325", "326", "327", "328", "329", "330", "331", "332",
"334", "335", "336", "337", "339", "340", "341", "342", "343",
"344", "345", "346", "347", "348", "349", "350", "351", "352",
"353", "354", "355", "356", "357", "358", "359", "360", "361",
"362", "363", "364", "365", "366", "367", "368", "369", "372",
"373", "375", "376", "377", "379", "380", "382", "383", "384",
"385", "386", "387", "388", "389", "390", "391", "392", "393",
"394", "395", "396", "397", "398", "399", "400", "401", "402",
"403", "404", "405", "406", "407", "408", "409", "410", "411",
"412", "414", "415", "416", "417", "418", "419", "420", "421",
"422", "423", "426", "427", "428", "430", "431", "432", "433",
"434", "435", "436", "437", "438", "439", "440", "441", "442",
"443", "444", "445", "446", "447", "448", "449", "450", "453",
"454", "455", "456", "457", "458", "459", "460", "461", "462",
"463", "464", "465", "466", "467", "468", "469", "470", "471",
"472", "473", "474", "475", "476", "477", "478", "479", "480",
"481", "482", "483", "484", "485", "486", "487", "488", "489",
"490", "491", "492", "493", "494", "495", "496", "497", "498",
"499", "500", "501", "502", "503", "504", "505", "507", "508",
"509", "510", "511", "512", "513", "514", "515", "516", "517",
"518", "519", "520", "521", "522", "523", "524", "525", "526",
"527", "528", "529", "530", "531", "532", "533", "534", "536",
"537", "538", "539", "540", "545", "546", "2000", "2002", "2003",
"2004", "2005", "2006", "2008", "2009", "2010", "2012", "2017",
"2021", "2022", "2023", "3002"), class = "factor"), y = structure(c(294L,
241L, 294L, 107L, 66L, 274L, 149L, 253L, 304L, 327L), .Label = c("72",
"79", "82", "83", "116", "119", "120", "127", "128", "137", "143",
"144", "146", "147", "150", "151", "152", "153", "157", "160",
"161", "164", "167", "168", "173", "174", "195", "212", "216",
"217", "218", "223", "224", "225", "228", "229", "232", "233",
"236", "237", "238", "239", "241", "242", "243", "244", "245",
"247", "248", "249", "250", "251", "252", "253", "254", "257",
"258", "259", "260", "261", "262", "263", "264", "265", "266",
"267", "268", "270", "271", "274", "275", "276", "278", "279",
"280", "281", "282", "284", "285", "289", "290", "291", "293",
"294", "295", "296", "297", "298", "300", "301", "302", "303",
"304", "305", "306", "307", "308", "309", "310", "311", "312",
"313", "314", "315", "316", "317", "318", "319", "320", "321",
"322", "323", "324", "325", "326", "327", "328", "329", "330",
"331", "332", "334", "335", "336", "337", "339", "340", "341",
"342", "343", "344", "345", "346", "347", "348", "349", "350",
"351", "352", "353", "354", "355", "356", "357", "358", "359",
"360", "361", "362", "363", "364", "365", "366", "367", "368",
"369", "372", "373", "375", "376", "377", "379", "380", "382",
"383", "384", "385", "386", "387", "388", "389", "390", "391",
"392", "393", "394", "395", "396", "397", "398", "399", "400",
"401", "402", "403", "404", "405", "406", "407", "408", "409",
"410", "411", "412", "414", "415", "416", "417", "418", "419",
"420", "421", "422", "423", "426", "427", "428", "430", "431",
"432", "433", "434", "435", "436", "437", "438", "439", "440",
"441", "442", "443", "444", "445", "446", "447", "448", "449",
"450", "453", "454", "455", "456", "457", "458", "459", "460",
"461", "462", "463", "464", "465", "466", "467", "468", "469",
"470", "471", "472", "473", "474", "475", "476", "477", "478",
"479", "480", "481", "482", "483", "484", "485", "486", "487",
"488", "489", "490", "491", "492", "493", "494", "495", "496",
"497", "498", "499", "500", "501", "502", "503", "504", "505",
"507", "508", "509", "510", "511", "512", "513", "514", "515",
"516", "517", "518", "519", "520", "521", "522", "523", "524",
"525", "526", "527", "528", "529", "530", "531", "532", "533",
"534", "536", "537", "538", "539", "540", "545", "546", "2000",
"2002", "2003", "2004", "2006", "2008", "2009", "2010", "2012",
"2017", "2021", "2022", "2023", "3002"), class = "factor"), Freq = c(929L,
5032L, 1246L, 2654L, 1828L, 957L, 1405L, 1582L, 728L, 1748L),
start.latittude = c(40.75188406, 40.75513557, 40.75188406,
40.75320159, 40.75097711, 40.74854862, 40.75172632, 40.75640548,
40.7575699, 40.75929124), start.longitude = c(-73.97770164,
-73.98658032, -73.97770164, -73.9779874, -73.98765428, -73.98808416,
-73.98753523, -73.9900262, -73.99098507, -73.98859651), start.station = c("Pershing\nSquare N",
"Broadway &\nW 41 St", "Pershing\nSquare N", "E 43 St &\nVanderbilt\nAve",
"Broadway &\nW 36 St", "Broadway &\nW 32 St", "Broadway &\nW 37 St",
"W 41 St &\n8 Ave", "W 42 St &\n8 Ave", "W 45 St &\n8 Ave"
), end.latitude = c(40.75510267, 40.75640548, 40.75500254,
40.75500254, 40.75019995, 40.75019995, 40.75019995, 40.7568001,
40.7568001, 40.7575699), end.longitude = c(-73.97498696,
-73.9900262, -73.98014437, -73.98014437, -73.99093085, -73.99093085,
-73.99093085, -73.98291153, -73.98291153, -73.99098507),
end.station = c("E 47 St &\nPark Av", "W 41 St &\n8 Ave",
"W 44 St &\n5 Ave", "W 44 St &\n5 Ave", "W 33 St &\n7 Ave",
"W 33 St &\n7 Ave", "W 33 St &\n7 Ave", "W 45 St &\n6 Ave",
"W 45 St &\n6 Ave", "W 42 St &\n8 Ave"), interaction = c("E 47 St &Park Av > PershingSquare N",
"W 41 St &8 Ave > Broadway &W 41 St", "W 44 St &5 Ave > PershingSquare N",
"W 44 St &5 Ave > E 43 St &VanderbiltAve", "W 33 St &7 Ave > Broadway &W 36 St",
"W 33 St &7 Ave > Broadway &W 32 St", "W 33 St &7 Ave > Broadway &W 37 St",
"W 45 St &6 Ave > W 41 St &8 Ave", "W 45 St &6 Ave > W 42 St &8 Ave",
"W 42 St &8 Ave > W 45 St &8 Ave")), .Names = c("x", "y",
"Freq", "start.latittude", "start.longitude", "start.station",
"end.latitude", "end.longitude", "end.station", "interaction"
), row.names = c(NA, -10L), class = "data.frame")`

Here's an option:
ggplot(data= new_pairs, aes(x= reorder(interaction, -Freq), y=Freq))+
geom_bar(stat="identity", aes(fill = Freq, alpha = Freq)) +
ylab("Bikes received")+ xlab("Station")+
ylim(0, max(new_pairs$Freq)+50) +
geom_text(aes(label = interaction,y=(max(new_pairs$Freq)+50)/2,alpha = Freq), vjust="center",hjust = "center", size = 4, nudge_y = 1, fontface ="bold")+
theme(axis.text.y=element_blank())+ggtitle("Bikes received viarebalancing")+
coord_flip()+theme(legend.position = "none")
You can set a y value in your geom_text aes to put the labels where you want them to be (you use coord_flip so changing y controls the horizontal placement of the text).
I set ylim manually to be able to max(new_pairs$Freq)+50 to center the text labels.

If you want to center the text to each bar, here's a solution (based on the thread I linked above):
library(plyr)
new_pairs <- ddply(new_pairs, .(interaction), transform, pos = cumsum(Freq) - (0.5 * Freq))
ggplot(data= new_pairs, aes(x= reorder(interaction, -Freq), y=Freq))+
geom_bar(stat="identity", aes(fill = Freq, alpha = Freq)) +
ylab("Bikes received")+ xlab("Station")+
geom_text(aes(label = interaction, y = pos, alpha = Freq), vjust="center",hjust = "center", size = 4, nudge_y = 1, fontface ="bold")+
theme(axis.text.y=element_blank())+ggtitle("Bikes received via rebalancing")+
coord_flip()+theme(legend.position = "none")

Related

emmeans "consec" contrast method sorting as characters instead of numeric (R)

I have a dataset with multiple timepoints, and I would like to contrast time2-time1, time3-time2, etc.
Unfortunately, the time data is being sorted as characters instead of numeric, resulting in 10 being next to 100 instead of 20 and 30.
I tried the following function:
lm_Time <- lm(measure ~ time, data = mydata)
lm_Time %>%
emmeans(consec ~ time, adjust = "dunnettx") %>%
pluck("contrasts") -> TimeContrasts
The function works, except it sorts the time column as characters instead of numeric (see below).
contrast
estimate
SE
df
t.ratio
p.value
time10 - time0
-0.06926
0.433
186
-0.160
1.0000
time100 - time10
0.51698
0.433
186
1.193
0.9424
time110 - time100
-0.23915
0.433
186
-0.552
0.9995
time120 - time110
0.06666
0.433
186
0.154
1.0000
time130 - time120
-0.13622
0.433
186
-0.314
1.0000
time140 - time130
0.09445
0.433
186
0.218
1.0000
time150 - time140
-0.53962
0.433
186
-1.245
0.9280
time160 - time150
0.17560
0.433
186
0.405
0.9999
time170 - time160
-0.45169
0.433
186
-1.042
0.9726
time180 - time170
0.22070
0.433
186
0.509
0.9997
time190 - time180
0.13796
0.433
186
0.318
1.0000
time20 - time190
0.19190
0.433
186
0.443
0.9999
time200 - time20
0.12803
0.433
186
0.295
1.0000
I have tried applying as.numeric() to within the emmeans, and I have also tried applying several sort commands within the emmeans. Neither of those strategies worked.
Any help is greatly appreciated :)
The dput output is below:
structure(list(ID = c("NSFA1", "NSFA2", "NSFA3", "NSFA4", "NSFA5",
"NSFA6", "NSFA7", "NSFA1", "NSFA2", "NSFA3", "NSFA4", "NSFA5",
"NSFA6", "NSFA7", "NSFA1", "NSFA2", "NSFA3", "NSFA4", "NSFA5",
"NSFA6", "NSFA7", "NSFA1", "NSFA2", "NSFA3", "NSFA4", "NSFA5",
"NSFA6", "NSFA7", "NSFA1", "NSFA2", "NSFA3", "NSFA4", "NSFA5",
"NSFA6", "NSFA7", "NSFA1", "NSFA2", "NSFA3", "NSFA4", "NSFA5",
"NSFA6", "NSFA7", "NSFA1", "NSFA2", "NSFA3", "NSFA4", "NSFA5",
"NSFA6", "NSFA7", "NSFA1", "NSFA2", "NSFA3", "NSFA4", "NSFA5",
"NSFA6", "NSFA7", "NSFA1", "NSFA2", "NSFA3", "NSFA4", "NSFA5",
"NSFA6", "NSFA7", "NSFA1", "NSFA2", "NSFA3", "NSFA4", "NSFA5",
"NSFA6", "NSFA7", "NSFA1", "NSFA2", "NSFA3", "NSFA4", "NSFA5",
"NSFA6", "NSFA7", "NSFA1", "NSFA2", "NSFA3", "NSFA4", "NSFA5",
"NSFA6", "NSFA7", "NSFA1", "NSFA2", "NSFA3", "NSFA4", "NSFA5",
"NSFA6", "NSFA7", "NSFA1", "NSFA2", "NSFA3", "NSFA4", "NSFA5",
"NSFA6", "NSFA7", "NSFA1", "NSFA2", "NSFA3", "NSFA4", "NSFA5",
"NSFA6", "NSFA7", "NSFA1", "NSFA2", "NSFA3", "NSFA4", "NSFA5",
"NSFA6", "NSFA7", "NSFA1", "NSFA2", "NSFA3", "NSFA4", "NSFA5",
"NSFA6", "NSFA7", "NSFA1", "NSFA2", "NSFA3", "NSFA4", "NSFA5",
"NSFA6", "NSFA7", "NSFA1", "NSFA2", "NSFA3", "NSFA4", "NSFA5",
"NSFA6", "NSFA7", "NSFA1", "NSFA2", "NSFA3", "NSFA4", "NSFA5",
"NSFA6", "NSFA7", "NSFA1", "NSFA2", "NSFA3", "NSFA4", "NSFA5",
"NSFA6", "NSFA7", "NSFA1", "NSFA2", "NSFA3", "NSFA4", "NSFA5",
"NSFA6", "NSFA7", "NSFA1", "NSFA2", "NSFA3", "NSFA4", "NSFA5",
"NSFA6", "NSFA7", "NSFA1", "NSFA2", "NSFA3", "NSFA4", "NSFA5",
"NSFA6", "NSFA7", "NSFA1", "NSFA2", "NSFA3", "NSFA4", "NSFA5",
"NSFA6", "NSFA7", "NSFA1", "NSFA2", "NSFA3", "NSFA4", "NSFA5",
"NSFA6", "NSFA7", "NSFA1", "NSFA2", "NSFA3", "NSFA4", "NSFA5",
"NSFA6", "NSFA7", "NSFA1", "NSFA2", "NSFA3", "NSFA4", "NSFA5",
"NSFA6", "NSFA7", "NSFA1", "NSFA2", "NSFA3", "NSFA4", "NSFA5",
"NSFA6", "NSFA7", "NSFA1", "NSFA2", "NSFA3", "NSFA4", "NSFA5",
"NSFA6", "NSFA7", "NSFA1", "NSFA2", "NSFA3", "NSFA4", "NSFA5",
"NSFA6", "NSFA7"), time = c("0", "0", "0", "0", "0", "0", "0",
"10", "10", "10", "10", "10", "10", "10", "20", "20", "20", "20",
"20", "20", "20", "30", "30", "30", "30", "30", "30", "30", "40",
"40", "40", "40", "40", "40", "40", "50", "50", "50", "50", "50",
"50", "50", "60", "60", "60", "60", "60", "60", "60", "70", "70",
"70", "70", "70", "70", "70", "80", "80", "80", "80", "80", "80",
"80", "90", "90", "90", "90", "90", "90", "90", "100", "100",
"100", "100", "100", "100", "100", "110", "110", "110", "110",
"110", "110", "110", "120", "120", "120", "120", "120", "120",
"120", "130", "130", "130", "130", "130", "130", "130", "140",
"140", "140", "140", "140", "140", "140", "150", "150", "150",
"150", "150", "150", "150", "160", "160", "160", "160", "160",
"160", "160", "170", "170", "170", "170", "170", "170", "170",
"180", "180", "180", "180", "180", "180", "180", "190", "190",
"190", "190", "190", "190", "190", "200", "200", "200", "200",
"200", "200", "200", "210", "210", "210", "210", "210", "210",
"210", "220", "220", "220", "220", "220", "220", "220", "230",
"230", "230", "230", "230", "230", "230", "240", "240", "240",
"240", "240", "240", "240", "250", "250", "250", "250", "250",
"250", "250", "260", "260", "260", "260", "260", "260", "260",
"270", "270", "270", "270", "270", "270", "270", "280", "280",
"280", "280", "280", "280", "280", "290", "290", "290", "290",
"290", "290", "290", "300", "300", "300", "300", "300", "300",
"300"), measure = c(1.63351915308149, 1.77874773175867, 3.50015705563717,
2.32439624137633, 2.3867235968419, 1.52099769112638, 2.12727500927918,
1.61164867026043, 1.61792828366444, 3.24946382696031, 2.69591940586543,
1.48047889731353, 2.6039168403024, 1.52764952726582, 1.3997227977924,
1.51431737410292, 2.56147122248565, 3.43364864277362, 1.7900436237908,
2.91196193028045, 1.43887322470176, 1.54086882673671, 0.674879288412828,
1.67032386923659, 1.32586875843305, 1.55071050734488, 2.50880307626376,
2.20798510204848, 1.3997227977924, 1.10183991260897, 2.67587406424878,
1.27976883573895, 1.84265098995821, 3.23205426515144, 1.31065772450765,
3.12284967113441, 1.78722703100539, 2.34069511686221, 1.11497149593301,
2.92374654058626, 1.10183991260897, 1.34840317315465, 2.0328425539765,
2.26187465285822, 2.01575691287562, 1.1366002145544, 2.24481304292639,
1.12798681094492, 2.05215784403175, 1.64588236163568, 0.788848102703265,
2.26822947635246, 1.41407494649988, 2.79612264008161, 3.2971566029204,
1.01114444121153, 2.34272179121274, 0.490670231210412, 3.35547534159059,
1.56697720910629, 3.76349049652496, 2.27245306481092, 3.34607147916713,
2.20579393251666, 0.510938075889187, 2.14099529429958, 2.47343216356879,
1.4632789901381, 1.16214289336498, 0.524281050785507, 2.47343216356879,
2.65390574185909, 2.74993445860681, 3.06105343797528, 2.01575691287562,
3.12944787442723, 2.32234810380723, 2.39460639823322, 1.14516360440023,
3.61354673134165, 3.13339592533708, 1.87769173477054, 2.99718218410231,
1.57021041547202, 2.80710002416611, 2.80710002416611, 2.32234810380723,
3.00704177887718, 1.81794375160058, 1.65815005889353, 2.77873927388457,
2.39263901736138, 1.43887322470176, 2.88066886611877, 3.33425302894405,
2.88517157922089, 1.9353384954053, 1.37793940174855, 4.06678904685626,
0.904333783554537, 1.95071681173142, 2.59164010266151, 2.43157405759104,
3.51860519251912, 1.44238347008031, 2.26399552889217, 0.391759719208298,
2.64367602470992, 2.34474611154949, 2.24052109566008, 1.02511451256556,
2.2188991165524, 1.40691564748298, 0.490670231210412, 1.79846414624875,
2.28715485377053, 2.2913325148839, 3.13339592533708, 2.94999390433702,
1.59901406500575, 0.46315712473212, 1.63041323278104, 2.18372649783188,
1.56373733250782, 2.66745615413171, 1.08858972736106, 2.3038053769111,
0.968404988487708, 2.52348814369453, 1.31447482577578, 2.12037300468822,
2.62131114755134, 0.889130408216507, 2.45832546944028, 2.43734073503924,
1.19552867313708, 2.64879810692067, 1.31065772450765, 2.32644196921343,
1.32964778894198, 3.1646915191577, 2.80710002416611, 2.33460090868772,
2.30794313445257, 1.44938009456903, 2.48280884261364, 1.3997227977924,
2.21454184581367, 0.44214095562798, 1.67939356690503, 1.07521853316776,
1.60849948302688, 3.11490171734115, 2.08072041821122, 2.28924494468403,
1.36324278630157, 2.26187465285822, 2.0255415025483, 1.80684117645744,
1.88832438338773, 1.63661899987595, 3.78350083526141, 0.626512719015478,
2.89712590600148, 2.90900413686774, 2.74185221146685, 1.99601399120726,
1.65202804664059, 1.70629759508941, 0.420784878794904, 2.82266964479624,
2.64709235591804, 1.27586287328904, 3.10692062902339, 0.563531228974086,
1.70333051302107, 0.490670231210412, 3.43477552606197, 1.61479159460257,
1.67032386923659, 1.7210511443671, 2.09013457393598, 3.57400449218521,
0.674879288412828, 2.94854531315548, 2.50880307626376, 1.48389609617481,
2.38078814285196, 1.11497149593301, 1.82346673568208, 2.0924799231334,
1.2160204292426, 2.65560509195653, 1.96342384608139, 1.58947126851799,
1.81517521048448, 1.63971279949374, 0.202489271623402, 1.81517521048448,
2.49585349398409, 2.06411794217993, 1.79285532806874, 0.963576913267833,
2.80553581934717, 0.141552843569217, 1.90151910036246, 2.5051128068377,
3.20564340068545, 2.61437354960207, 1.56697720910629, 2.45452855632671,
1.95581132475685, 1.51431737410292, 1.08414622910813, 1.83719277707321,
0.821294818608212, 3.13995801105766)), class = "data.frame", row.names = c(NA,
-217L))
Hello Thomas and welcome to SO!
The lm() is coercing the time variable as factor. In order to prevent that, you have to do it yourself prior to fitting the model:
> mydata$time <- factor(mydata$time, levels = unique(mydata$time))
> lm_Time <- lm(measure ~ time, data = mydata)
> lm_Time_emmeans <- emmeans(lm_Time, consec ~ time, adjust = "dunnettx")
>
> lm_Time_emmeans$contrasts
contrast estimate SE df t.ratio p.value
time10 - time0 -0.0693 0.433 186 -0.160 1.0000
time20 - time10 0.0376 0.433 186 0.087 1.0000
time30 - time20 -0.5101 0.433 186 -1.177 0.9463
time40 - time30 0.1947 0.433 186 0.449 0.9999
time50 - time40 0.1282 0.433 186 0.296 1.0000
time60 - time50 -0.1240 0.433 186 -0.286 1.0000
time70 - time60 0.0499 0.433 186 0.115 1.0000
time80 - time70 0.5595 0.433 186 1.291 0.9136
time90 - time80 -0.9510 0.433 186 -2.195 0.3813
time100 - time90 1.1321 0.433 186 2.613 0.1688
time110 - time100 -0.2392 0.433 186 -0.552 0.9995
time120 - time110 0.0667 0.433 186 0.154 1.0000
time130 - time120 -0.1362 0.433 186 -0.314 1.0000
time140 - time130 0.0945 0.433 186 0.218 1.0000
time150 - time140 -0.5396 0.433 186 -1.245 0.9280
time160 - time150 0.1756 0.433 186 0.405 0.9999
time170 - time160 -0.4517 0.433 186 -1.042 0.9726
time180 - time170 0.2207 0.433 186 0.509 0.9997
time190 - time180 0.1380 0.433 186 0.318 1.0000
time200 - time190 0.3199 0.433 186 0.738 0.9967
time210 - time200 -0.5330 0.433 186 -1.230 0.9324
time220 - time210 0.1509 0.433 186 0.348 1.0000
time230 - time220 0.4763 0.433 186 1.099 0.9630
time240 - time230 -0.5804 0.433 186 -1.340 0.8965
time250 - time240 0.0260 0.433 186 0.060 1.0000
time260 - time250 0.2801 0.433 186 0.646 0.9986
time270 - time260 -0.2186 0.433 186 -0.505 0.9997
time280 - time270 -0.3117 0.433 186 -0.719 0.9972
time290 - time280 0.5381 0.433 186 1.242 0.9290
time300 - time290 -0.2762 0.433 186 -0.637 0.9987
P value adjustment: dunnettx method for 30 tests
Hope it helps.
Perhaps change the type from character to numeric first:
mydata$time <- as.numeric(mydata$time)
before running your model?
Instead of manually coding the factor levels, convert time to numeic, then treat it as a factor when you model it.
> mydata = transform(mydata, time = as.numeric(time))
> lm_Time <- lm(measure ~ factor(time), data = mydata)
> EMM = emmeans(lm_Time, "time")
> contrast(EMM, "consec")
contrast estimate SE df t.ratio p.value
time10 - time0 -0.0693 0.433 186 -0.160 1.0000
time20 - time10 0.0376 0.433 186 0.087 1.0000
time30 - time20 -0.5101 0.433 186 -1.177 0.9984
time40 - time30 0.1947 0.433 186 0.449 1.0000
time50 - time40 0.1282 0.433 186 0.296 1.0000
time60 - time50 -0.1240 0.433 186 -0.286 1.0000
time70 - time60 0.0499 0.433 186 0.115 1.0000
time80 - time70 0.5595 0.433 186 1.291 0.9944
time90 - time80 -0.9510 0.433 186 -2.195 0.5339
time100 - time90 1.1321 0.433 186 2.613 0.2307
time110 - time100 -0.2392 0.433 186 -0.552 1.0000
time120 - time110 0.0667 0.433 186 0.154 1.0000
time130 - time120 -0.1362 0.433 186 -0.314 1.0000
time140 - time130 0.0945 0.433 186 0.218 1.0000
time150 - time140 -0.5396 0.433 186 -1.245 0.9965
time160 - time150 0.1756 0.433 186 0.405 1.0000
time170 - time160 -0.4517 0.433 186 -1.042 0.9997
time180 - time170 0.2207 0.433 186 0.509 1.0000
time190 - time180 0.1380 0.433 186 0.318 1.0000
time200 - time190 0.3199 0.433 186 0.738 1.0000
time210 - time200 -0.5330 0.433 186 -1.230 0.9971
time220 - time210 0.1509 0.433 186 0.348 1.0000
time230 - time220 0.4763 0.433 186 1.099 0.9994
time240 - time230 -0.5804 0.433 186 -1.340 0.9913
time250 - time240 0.0260 0.433 186 0.060 1.0000
time260 - time250 0.2801 0.433 186 0.646 1.0000
time270 - time260 -0.2186 0.433 186 -0.505 1.0000
time280 - time270 -0.3117 0.433 186 -0.719 1.0000
time290 - time280 0.5381 0.433 186 1.242 0.9967
time300 - time290 -0.2762 0.433 186 -0.637 1.0000
P value adjustment: mvt method for 30 tests

map over list and insert variables into a function

I have a list that I want to loop over and insert the variables into a function. However, the function I am using does not like the outputs I am getting from applying the map() function from {purr} package.
Here is my list:
$AAPL
# A tibble: 10 x 2
ticker string
<chr> <date>
1 AAPL 2020-01-28
2 AAPL 2020-04-30
3 AAPL 2020-07-30
4 AAPL 2020-10-29
5 AAPL 2021-01-27
6 AAPL 2020-01-29
7 AAPL 2020-05-01
8 AAPL 2020-07-31
9 AAPL 2020-10-30
10 AAPL 2021-01-28
$ABEV
# A tibble: 8 x 2
ticker string
<chr> <date>
1 ABEV 2020-02-26
2 ABEV 2020-05-06
3 ABEV 2020-07-29
4 ABEV 2020-10-28
5 ABEV 2020-02-27
6 ABEV 2020-05-07
7 ABEV 2020-07-30
8 ABEV 2020-10-29
my.list = list(AAPL = structure(list(ticker = c("AAPL", "AAPL", "AAPL",
"AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL"), string = structure(c(18289,
18382, 18473, 18564, 18654, 18290, 18383, 18474, 18565, 18655
), class = "Date")), row.names = c(NA, -10L), class = c("tbl_df",
"tbl", "data.frame"), na.action = structure(305:380, .Names = c("305",
"306", "307", "308", "309", "310", "311", "312", "313", "314",
"315", "316", "317", "318", "319", "320", "321", "322", "323",
"324", "325", "326", "327", "328", "329", "330", "331", "332",
"333", "334", "335", "336", "337", "338", "339", "340", "341",
"342", "343", "344", "345", "346", "347", "348", "349", "350",
"351", "352", "353", "354", "355", "356", "357", "358", "359",
"360", "361", "362", "363", "364", "365", "366", "367", "368",
"369", "370", "371", "372", "373", "374", "375", "376", "377",
"378", "379", "380"), class = "omit")), ABEV = structure(list(
ticker = c("ABEV", "ABEV", "ABEV", "ABEV", "ABEV", "ABEV",
"ABEV", "ABEV"), string = structure(c(18318, 18388, 18472,
18563, 18319, 18389, 18473, 18564), class = "Date")), row.names = c(NA,
-8L), class = c("tbl_df", "tbl", "data.frame"), na.action = structure(305:380, .Names = c("305",
"306", "307", "308", "309", "310", "311", "312", "313", "314",
"315", "316", "317", "318", "319", "320", "321", "322", "323",
"324", "325", "326", "327", "328", "329", "330", "331", "332",
"333", "334", "335", "336", "337", "338", "339", "340", "341",
"342", "343", "344", "345", "346", "347", "348", "349", "350",
"351", "352", "353", "354", "355", "356", "357", "358", "359",
"360", "361", "362", "363", "364", "365", "366", "367", "368",
"369", "370", "371", "372", "373", "374", "375", "376", "377",
"378", "379", "380"), class = "omit")))
I want to loop over this and add the variables into a Quanld fucntion. The Quandl function works with the following inputs.
Quandl.datatable('ORATS/VOL', tradedate=c('2021-02-19', "2020-01-20"), ticker='AAPL')
So what I am trying to do is loop over the list and insert the dates (string) and the ticker (ticker) into this function.
Here is what I have:
library(tidyverse)
map(my.list, ~Quandl.datatable('ORATS/VOL', tradedate=.x$string, ticker=.x$ticker[1]))
This gives an error because it looks like the format is not in a vector when being input into the function. What am I missing here? Thank you for your help.
If we look at how the tradedate values in the manual entry, it is character class, while the 'string' column is Date class. May be, we can change it to character with as.character
library(purrr)
out <- map(my.list, ~Quandl.datatable('ORATS/VOL',
tradedate = as.character(.x$string), ticker=.x$ticker[1]))
The reason could be that in the API call, it is converting to character anyway, but if we use a Date class, the coercion to integer storage values may prevent it from executing

Joining two datasets from separate seasons/years, sports related (e.g., NHL, 1991 & 1992 seasons)

I have data sets for NHL teams, over a certain number of years. I want to know the best way to join these data sets. For example, I have Chicago Blackhawks stats from 1991 and 1992, with Games Played (GP), Wins (W), Losses (L), etc.
How would I join these sets together, without creating two separate columns, GP.x and GP.y?
I've used dput() to get the first ten teams and their respective statistics:
# 1991 team stats - first ten teams
structure(list(Team = c("Chicago Blackhawks*", "St. Louis Blues*",
"Los Angeles Kings*", "Boston Bruins*", "Calgary Flames*",
"Montreal Canadiens*", "Pittsburgh Penguins*", "New York Rangers*",
"Washington Capitals*", "Buffalo Sabres*"),
GP = c("80", "80", "80", "80", "80", "80", "80", "80", "80", "80"),
W = c("49", "47", "46", "44", "46", "39", "41", "36", "37", "31"),
L = c("23", "22", "24", "24", "26", "30", "33", "31", "36", "30"),
T = c("8", "11", "10", "12", "8", "11", "6", "13", "7", "19"),
Pts = c("106", "105", "102", "100", "100", "89", "88", "85", "81","81"),
`Pts %` = c(".663", ".656", ".638", ".625", ".625", ".556", ".550",
".531", ".506", ".506"),
GF = c("284", "310", "340", "299", "344", "273", "342", "297", "258",
"292"),
GA = c("211", "250", "254", "264", "263", "249", "305", "265", "258",
"278"),
SRS = c("0.85", "0.70", "1.04",
"0.32", "0.98", "0.20", "0.42", "0.36", "0.00", "0.08"),
SOS = c("-0.06", "-0.05", "-0.04", "-0.12", "-0.03", "-0.10", "-0.04",
"-0.04", "0.00", "-0.09"),
`TG/G` = c("6.19", "7.00", "7.43", "7.04", "7.59", "6.53", "8.09",
"7.03", "6.45", "7.13"),
EVGF = c("177", "230", "252", "214", "236", "201", "241", "197", "181",
"204"),
EVGA = c("132", "177", "173", "192", "178", "185", "220", "182", "199",
"208"),
PP = c("87", "70", "80", "74", "91", "66", "89", "91", "64", "73"),
PPO = c("393", "348", "391", "351", "384", "357", "388", "389", "340",
"400"),
`PP%` = c("22.14", "20.11", "20.46", "21.08", "23.70", "18.49", "22.94",
"23.39", "18.82", "18.25"),
PPA = c("68", "55", "63", "64", "77", "54", "73", "73", "44", "62"),
PPOA = c("425", "339", "370", "368", "420", "282", "351", "362", "314",
"368"),
`PK%` = c("84.00", "83.78", "82.97", "82.61", "81.67", "80.85", "79.20",
"79.83", "85.99", "83.15"),
SH = c("20", "10", "8", "11", "17", "6", "12", "9", "13", "15"),
SHA = c("10", "18", "18", "8", "8", "10", "12", "10", "15", "8"),
`PIM/G` = c("29.9", "24.6", "27.6", "20.8", "27.1", "17.6", "20.4",
"23.4", "22.8", "21.3"),
`oPIM/G` = c("28.2", "25.3", "30.5", "23.3", "25.9", "19.5", "21.3",
"24.1", "25.3", "22.1"),
S = c("2564", "2550", "2410", "2512", "2604", "2385", "2416", "2444",
"2370", "2410"),
`S%` = c("11.1", "12.2", "14.1", "11.9", "13.2", "11.4", "14.2", "12.2",
"10.9", "12.1"),
SA = c("2214", "2345", "2412", "2240", "2200", "2316", "2723", "2550",
"2112", "2432"),
`SV%` = c(".905", ".893", ".895", ".882", ".880", ".892",
".888", ".896", ".878", ".886"),
PDO = c("", "", "", "", "", "", "", "", "", "")),
.Names = c("Team", "GP", "W", "L", "T", "Pts", "Pts %", "GF", "GA",
"SRS", "SOS", "TG/G", "EVGF", "EVGA", "PP", "PPO", "PP%", "PPA",
"PPOA", "PK%", "SH", "SHA", "PIM/G", "oPIM/G", "S", "S%", "SA",
"SV%", "PDO"),
row.names = 2:11, class = "data.frame")
# 1992 team stats - first ten teams
structure(list(Team = c("New York Rangers*", "Washington Capitals*",
"Detroit Red Wings*", "Vancouver Canucks*", "Montreal Canadiens*",
"Pittsburgh Penguins*", "Chicago Blackhawks*", "New Jersey Devils*",
"Boston Bruins*", "Los Angeles Kings*"),
GP = c("80", "80", "80",
"80", "80", "80", "80", "80", "80", "80"),
W = c("50", "45", "43", "42", "41", "39", "36", "38", "36", "35"),
L = c("25", "27", "25", "26", "28", "32", "29", "31", "32", "31"),
T = c("5", "8", "12", "12", "11", "9", "15", "11", "12", "14"),
Pts = c("105", "98", "98", "96", "93", "87", "87", "87", "84", "84"),
`Pts %` = c(".656", ".613", ".613", ".600", ".581", ".544", ".544",
".544", ".525", ".525"),
GF = c("321", "330", "320", "285", "267", "343", "257", "289", "270",
"287"),
GA = c("246", "275", "256", "250",
"207", "308", "236", "259", "275", "296"),
SRS = c("1.02", "0.78", "0.74", "0.31", "0.64", "0.52", "0.22", "0.48",
"-0.09", "-0.19"),
SOS = c("0.08", "0.09", "-0.06", "-0.13", "-0.12", "0.08", "-0.04",
"0.10", "-0.03", "-0.08"),
`TG/G` = c("7.09", "7.56", "7.20", "6.69", "5.93", "8.14", "6.16",
"6.85", "6.81", "7.29"),
EVGF = c("226", "224", "230", "188", "189", "235", "165", "215", "186",
"197"),
EVGA = c("174", "200", "171", "167", "142", "217", "150", "181", "189",
"208"),
PP = c("81", "92", "72", "85", "74", "92", "81", "59", "77", "79"),
PPO = c("387", "412", "386", "439", "379", "423", "467", "338", "406",
"411"),
`PP%` = c("20.93", "22.33", "18.65", "19.36", "19.53", "21.75", "17.34",
"17.46", "18.97", "19.22"),
PPA = c("60", "60", "78", "76", "60", "77", "76", "68", "72", "76"),
PPOA = c("395", "368", "419", "382", "320", "383", "482", "374", "363",
"417"),
`PK%` = c("84.81", "83.70", "81.38", "80.10", "81.25", "79.90",
"84.23", "81.82", "80.17", "81.77"),
SH = c("14", "14", "18",
"12", "4", "16", "11", "15", "7", "11"),
SHA = c("12", "15",
"7", "7", "5", "14", "10", "10", "14", "12"),
`PIM/G` = c("22.4", "21.8", "25.6", "25.7", "19.3", "23.7", "33.0",
"20.0", "21.8", "26.9"),
`oPIM/G` = c("24.1", "24.2", "23.9", "28.4", "22.0",
"23.9", "31.8", "20.4", "23.7", "25.6"),
S = c("2632", "2481", "2478", "2669", "2443", "2542", "2646", "2495",
"2664", "2419"),
`S%` = c("12.2", "13.3", "12.9", "10.7", "10.9", "13.5", "9.7",
"11.6", "10.1", "11.9"),
SA = c("2543", "2270", "2238", "2299", "2227", "2518", "2028", "2290",
"2339", "2663"),
`SV%` = c(".903", ".879", ".886", ".891", ".907", ".878", ".884",
".887", ".882", ".889"),
PDO = c("", "", "", "", "", "", "", "", "", "")),
.Names = c("Team", "GP", "W", "L", "T", "Pts", "Pts %", "GF", "GA",
"SRS", "SOS", "TG/G", "EVGF", "EVGA", "PP", "PPO", "PP%", "PPA",
"PPOA", "PK%", "SH", "SHA", "PIM/G", "oPIM/G", "S", "S%", "SA",
"SV%", "PDO"),
row.names = 2:11, class = "data.frame")
I understand joining these sets may be... tough, but any advice/thoughts would be great! Thanks!
The issue is that trying to merge, join, or cbind two data frames with the same column names will either give you column names like GP.x, which you said you don't want, or will throw errors when you try to create multiple columns with the same name. You can verify that the two data frames have identical column names like so:
names(df91) == names(df92)
#> [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
#> [15] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
#> [29] TRUE
It also is just more logical to have, for example, all your GP observations under a single column GP where observations are demarcated with the year in which they occurred, rather than two GP columns and no clear way of knowing which is for which year. This also wouldn't scale well—if you're scraping score data, I'm guessing at some point you'll want more than just these two years.
You instead want to be binding rows. You can give each data frame a column for the year, and use rbind in base R to bind them into one data frame.
df91$year <- 1991
df92$year <- 1992
df_base <- rbind(df91, df92)
head(df_base)
#> Team GP W L T Pts Pts % GF GA SRS SOS TG/G EVGF
#> 2 Chicago Blackhawks* 80 49 23 8 106 .663 284 211 0.85 -0.06 6.19 177
#> 3 St. Louis Blues* 80 47 22 11 105 .656 310 250 0.70 -0.05 7.00 230
#> 4 Los Angeles Kings* 80 46 24 10 102 .638 340 254 1.04 -0.04 7.43 252
#> 5 Boston Bruins* 80 44 24 12 100 .625 299 264 0.32 -0.12 7.04 214
#> 6 Calgary Flames* 80 46 26 8 100 .625 344 263 0.98 -0.03 7.59 236
#> 7 Montreal Canadiens* 80 39 30 11 89 .556 273 249 0.20 -0.10 6.53 201
#> EVGA PP PPO PP% PPA PPOA PK% SH SHA PIM/G oPIM/G S S% SA SV%
#> 2 132 87 393 22.14 68 425 84.00 20 10 29.9 28.2 2564 11.1 2214 .905
#> 3 177 70 348 20.11 55 339 83.78 10 18 24.6 25.3 2550 12.2 2345 .893
#> 4 173 80 391 20.46 63 370 82.97 8 18 27.6 30.5 2410 14.1 2412 .895
#> 5 192 74 351 21.08 64 368 82.61 11 8 20.8 23.3 2512 11.9 2240 .882
#> 6 178 91 384 23.70 77 420 81.67 17 8 27.1 25.9 2604 13.2 2200 .880
#> 7 185 66 357 18.49 54 282 80.85 6 10 17.6 19.5 2385 11.4 2316 .892
#> PDO year
#> 2 1991
#> 3 1991
#> 4 1991
#> 5 1991
#> 6 1991
#> 7 1991
Or you can do it in one step with dplyr's bind_rows, and mutate to create the year columns. bind_rows has the advantage also of not being limited to 2 arguments, so you can scale this if you have more than just these two years' worth of data.
df_dplyr <- dplyr::bind_rows(
dplyr::mutate(df91, year = 1991),
dplyr::mutate(df92, year = 1992)
)
head(df_dplyr)
#> Team GP W L T Pts Pts % GF GA SRS SOS TG/G EVGF
#> 1 Chicago Blackhawks* 80 49 23 8 106 .663 284 211 0.85 -0.06 6.19 177
#> 2 St. Louis Blues* 80 47 22 11 105 .656 310 250 0.70 -0.05 7.00 230
#> 3 Los Angeles Kings* 80 46 24 10 102 .638 340 254 1.04 -0.04 7.43 252
#> 4 Boston Bruins* 80 44 24 12 100 .625 299 264 0.32 -0.12 7.04 214
#> 5 Calgary Flames* 80 46 26 8 100 .625 344 263 0.98 -0.03 7.59 236
#> 6 Montreal Canadiens* 80 39 30 11 89 .556 273 249 0.20 -0.10 6.53 201
#> EVGA PP PPO PP% PPA PPOA PK% SH SHA PIM/G oPIM/G S S% SA SV%
#> 1 132 87 393 22.14 68 425 84.00 20 10 29.9 28.2 2564 11.1 2214 .905
#> 2 177 70 348 20.11 55 339 83.78 10 18 24.6 25.3 2550 12.2 2345 .893
#> 3 173 80 391 20.46 63 370 82.97 8 18 27.6 30.5 2410 14.1 2412 .895
#> 4 192 74 351 21.08 64 368 82.61 11 8 20.8 23.3 2512 11.9 2240 .882
#> 5 178 91 384 23.70 77 420 81.67 17 8 27.1 25.9 2604 13.2 2200 .880
#> 6 185 66 357 18.49 54 282 80.85 6 10 17.6 19.5 2385 11.4 2316 .892
#> PDO year
#> 1 1991
#> 2 1991
#> 3 1991
#> 4 1991
#> 5 1991
#> 6 1991
Created on 2018-06-18 by the reprex package (v0.2.0).

converting nested tbl_df into tibble or data frame

I'm running a function in the 'rnoaa' package that finds the 5 nearest weather stations to a data frame that contains nest box locations. This produces a nested tbl_df, a tibble for each nest box ID. I'd like to convert the tbl_df into a tibble or data frame that retains the corresponding nest box ID, but I'm not sure how to do it. Here's my code and an example of the data.
Import the data:
nests<-structure(list(id = structure(1:5, .Label = c("29", "36", "39",
"41", "42", "43", "45", "47", "48", "50", "51", "52", "53", "54",
"55", "57", "58", "59", "60", "61", "62", "64", "65", "67", "69",
"70", "71", "72", "73", "75", "77", "78", "79", "80", "81", "82",
"84", "87", "88", "89", "90", "91", "92", "93", "95", "97", "99",
"100", "102", "106", "108", "109", "110", "118", "123", "124",
"125", "126", "127", "129", "130", "131", "133", "134", "136",
"138", "140", "141", "144", "147", "149", "151", "155", "157",
"158", "160", "161", "162", "163", "165", "167", "168", "169",
"172", "174", "175", "177", "178", "179", "180", "181", "182",
"186", "189", "190", "193", "195", "202", "205", "207", "208",
"215", "217", "218", "225", "229", "230", "236", "240", "241",
"243", "244", "246", "247", "248", "249", "251", "253", "254",
"255", "257", "258", "259", "260", "261", "262", "263", "269",
"270", "276", "292", "294", "295", "296", "297", "298", "300",
"301", "302", "303", "305", "306", "307", "308", "309", "311",
"316", "317", "318", "322", "323", "324", "326", "329", "330",
"331", "332", "333", "334", "335", "336", "337", "338", "339",
"342", "345", "346", "350", "351", "353", "358", "362", "363",
"365", "366", "368", "369", "372", "379", "380", "381", "382",
"384", "386", "387", "388", "390", "391", "392", "393", "394",
"395", "396", "397", "398", "400", "401", "403", "404", "406",
"410", "411", "414", "415", "416", "418", "420", "424", "425",
"426", "428", "429", "430", "432", "433", "435", "436", "440",
"441", "442", "445", "446", "447", "448", "449", "450", "451",
"453", "458", "459", "461", "462", "463", "464", "465", "466",
"469", "470", "471", "478", "479", "488", "490", "497", "503",
"504", "506", "507", "508", "509", "512", "513", "514", "515",
"516", "517", "518", "519", "520", "521", "527", "528", "529",
"530", "531", "534", "540", "542", "545", "552", "553", "554",
"556", "558", "561", "562", "563", "565", "566", "568", "569",
"570", "571", "572", "573", "574", "575", "576", "577", "578",
"580", "583", "584", "585", "591", "592", "595", "606", "608",
"610", "612", "614", "615", "616", "617", "620", "621", "627",
"628", "634", "635", "636", "637", "638", "639", "643", "647",
"648", "651", "652", "653", "654", "656", "661", "662", "663",
"664", "665", "667", "669", "670", "673", "674", "676", "677",
"679", "680", "681", "684", "685", "690", "693", "694", "695",
"706", "708", "716", "717", "719", "720", "728", "757", "759",
"761", "777", "798", "801", "803", "818", "838", "839", "855",
"856", "864", "865", "867", "868", "880", "890", "899", "901",
"914", "915", "924", "985", "998", "999", "1002", "1003", "1004",
"1019", "1020", "1021", "1022", "1058", "1059", "1116", "1139",
"1146", "1164", "1169", "1170", "1178", "1183", "1186", "1188",
"1193", "1211", "1233", "1235", "1236", "1237", "1251", "1263",
"1285", "1288", "1289", "1294", "1296", "1298", "1299", "1300",
"1302", "1303", "1305", "1307", "1310", "1311", "1328", "1331",
"1332", "1333", "1334", "1335", "1455", "1456", "1459", "1461",
"1462", "1463", "1466", "1467", "1469", "1473", "1474", "1475",
"1476", "1478", "1479", "1480", "1482", "1485", "1487", "1503",
"1506", "1520", "1534", "1564", "1572", "1575", "1582", "1587",
"1588", "1592", "1593", "1594", "1597", "1602", "1607", "1611",
"1612", "1613", "1615", "1616", "1617", "1619", "1633", "1656",
"1657", "1658", "1660", "1663", "1664", "1667", "1668", "1669",
"1676", "1677", "1679", "1691", "1704", "1716", "1734", "1735",
"1736", "1766", "1771", "1772", "1773", "1775", "1777", "1783",
"1801", "1814", "1818", "1834", "1835", "1836", "1837", "1838",
"1840", "1843", "1845", "1846", "1847", "1850", "1852", "1856",
"1857", "1858", "1859", "1860", "1882", "1883", "1890", "1891",
"1897", "1899", "1901", "1902", "1909", "1910", "1912", "1914",
"1923", "1926", "1928", "1929", "1935", "1941", "1956", "1958",
"1960", "1968", "1991", "1994", "1998", "2002", "2010", "2012",
"2016", "2019", "2024", "2026", "2029", "2030", "2032", "2033",
"2034", "2035", "2036", "2039", "2042", "2046", "2049", "2053",
"2055", "2056", "2057", "2059", "2093", "2101", "2103", "2121",
"2134", "2146", "2147", "2152", "2184", "2185", "2186", "2187",
"2188", "2190", "2197", "2201", "2239", "2240", "2249", "2250",
"2291", "2313", "2322", "2347", "2351", "2353", "2354", "2355",
"2360", "2361", "2369", "2370", "2372", "2373", "2374", "2375",
"2376", "2402", "2426", "2427", "2445", "2447", "2449", "2459",
"2460", "2462", "2467", "2468", "2469", "2471", "2484", "2485",
"2486", "2488", "2490", "2494", "2496", "2517", "2613", "2623",
"2624", "2625", "2641", "2696", "2697", "2709", "2711", "2712",
"2713", "2714", "2997", "3000", "3004"), class = "factor"), latitude = c(43.29515222,
44.02074565, 44.44193, 44.146666, 43.98897), longitude = c(-89.29077182,
-92.04753707, -121.40635, -121.347223, -121.18639)), .Names = c("id",
"latitude", "longitude"), row.names = c(NA, -5L), class = c("tbl_df",
"tbl", "data.frame"))
This grabs the 5 nearest weather stations and produces a tbl_df:
nearest_station<-meteo_nearby_stations(lat_lon_df = nests, station_data = station_data,
limit = 5, var = c("TAVG"),
year_min = 2011, year_max = 2016)
nearest_station
Finally, I used do.call to produce a single data frame:
ns <- do.call(rbind, lapply(nearest_station, data.frame, stringsAsFactors=FALSE))
head(ns)
While the resulting data table shows the nest box ID next to the weather station ID (under id), the first column really only contains the weather station ID:
id name latitude longitude distance
29.1 USW00014837 MADISON DANE RGNL AP 43.1406 -89.3453 17.74438
29.2 USR0000WDDG DODGEVILLE WISCONSIN 43.1000 -90.0000 61.44939
29.3 USW00014839 MILWAUKEE MITCHELL AP 42.9550 -87.9044 118.69939
29.4 USW00094822 ROCKFORD GTR ROCKFORD AP 42.1928 -89.0931 123.63416
29.5 USW00094908 DUBUQUE RGNL AP 42.3978 -90.7036 152.38709
36.1 USW00014925 ROCHESTER INTL AP 43.9042 -92.4917 37.83807
ns[,1]
USW00014837
Is there a way to keep the nest box information in the weather station data frame?
(sorry for late reply)
If you use something like dplyr::bind_rows, you can do:
dplyr::bind_rows(nearest_station, .id = "nest_box_id")
to get
#> # A tibble: 25 x 6
#> nest_box_id id name latitude longitude distance
#> <chr> <chr> <chr> <dbl> <dbl> <dbl>
#> 29 USW00014837 MADISON DANE RGNL AP 43.1 -89.3 17.7
#> 29 USR0000WDDG DODGEVILLE WISCONSIN 43.1 -90.0 61.4
#> 29 USW00014839 MILWAUKEE MITCHELL AP 43.0 -87.9 119.
#> 29 USW00094822 ROCKFORD GTR ROCKFORD AP 42.2 -89.1 124.
#> 29 USW00094908 DUBUQUE RGNL AP 42.4 -90.7 152.
#> 36 USW00014925 ROCHESTER INTL AP 43.9 -92.5 37.8
#> 36 USW00014920 LA CROSSE MUNI AP 43.9 -91.3 65.5
#> 36 USR0000WBRF BLACK RIVER FALLS WISCONSIN 44.3 -90.8 102.
#> 36 USR0000WAUG AUGUSTA WISCONSIN 44.7 -91.1 105.
#> 36 USW00014922 MINNEAPOLIS/ST PAUL AP 44.9 -93.2 134.
#> # ... with 15 more rows

R: how to expand a row containing a "list" to several rows...one for each list member?

I am sure there is a simple solution to this, but i am going nuts trying to find it. Any help is very much appreciated.
I have a data frame with 2 columns; "pro" and "pep".
pro is formatted as factors and contains entries in the form 220;300;4 sometimes more numbers (seperated by ";") and sometimes just a single number (and no ";").
The pep column is formatted as integers and contains single numbers, e.g. 20.
What i would like to do is to "expand" e.g. the row pro: 220;300;4 and pep: 20
to three rows one with pro: 220 and pep: 20, one with pro: 300 and pep: 20 and one with pro: 4 and pep: 20.
I want to do this for the whole data frame and thus end up with a data frame with two character formatted columns where all the rows originally containing multiple ";" seperated numbers have been expanded.
I would prefer to avoid loops since the data frame is fairly large (>100000 rows)
I am sorry that i havent been able to post this in a more case-representative way...i am new here and got lost in the code format.
On a much appreciated request from simon:
> dput( head( dat , 10 ) )
structure(list(Protein.Group.IDs = structure(c(1095L, 60L, 299L,
242L, 1091L, 147L, 161L, 884L, 783L, 1040L), .Label = c("0",
"1", "10", "100", "101", "102", "103", "104", "105", "106", "107",
"108", "109", "11", "110", "111", "112", "113", "114", "114;680",
"115", "116", "117", "118", "119", "12", "120", "121", "121;920;530",
"121;920;530;589", "121;920;530;589;934", "121;920;589", "121;920;934",
"122;351", "122;351;950", "122;351;950;224;904", "122;351;950;687",
"122;901;224;904", "122;901;351", "122;901;351;950", "122;901;351;950;224",
"122;901;351;950;224;890;904", "122;901;351;950;224;890;904;687",
"122;901;351;950;890;687", "122;901;950", "122;901;950;904;687",
"122;950", "123", "124", "125", "126", "127", "127;952", "128",
"129", "13", "130", "131", "131;204", "132", "133", "134", "135",
"136", "137", "138", "139", "14", "140", "140;259;436", "141",
"142", "143", "144", "145", "146", "147", "148", "149", "15",
"150", "151", "152", "153", "154", "155", "156", "157", "158",
"159", "16", "16;331", "16;331;329", "16;331;329;62", "16;331;329;910",
"16;331;329;910;62", "16;331;62", "16;331;910", "160", "161",
"162", "163", "164", "165", "166", "166;743", "167", "167;595",
"168", "169", "17", "170", "170;48", "171", "172", "173", "174",
"175", "176", "177", "178", "179", "18", "180", "181", "182",
"183", "184", "185", "186", "187", "188", "188;813", "188;813;852",
"189", "19", "19;14", "19;6;9;14;11", "19;884;6;9;14;20;26;11;1",
"19;9", "19;9;14", "190", "190;260", "191", "192", "193", "194",
"195", "196", "197", "198", "199", "2", "20", "20;26", "200",
"201", "202", "203", "204", "205", "206", "207", "208", "209",
"21", "21;4", "210", "211", "212", "213", "214", "215", "216",
"217", "218", "219", "22", "220", "221", "222", "223", "224",
"224;890", "224;890;904", "225", "225;221", "225;221;308", "225;295",
"226", "227", "228", "228;396", "228;396;73", "228;73", "229",
"23", "23;137", "23;17;137", "230", "231", "232", "233", "234",
"235", "236", "237", "238", "239", "24", "240", "241", "242",
"242;171", "243", "244", "245", "246", "247", "248", "249", "25",
"250", "251", "252", "253", "254", "255", "256", "257", "258",
"259", "26", "260", "261", "262", "263", "264", "265", "266",
"267", "268", "269", "27", "270", "271", "272", "273", "273;541;905",
"273;905", "274", "275", "276", "277", "278", "279", "28", "280",
"281", "281;192", "282", "283", "284", "285", "286", "287", "288",
"289", "29", "290", "291", "292", "293", "294", "295", "296",
"297", "298", "299", "3", "30", "300", "301", "302", "303", "304",
"304;770", "305", "306", "307", "308", "309", "31", "310", "311",
"312", "313;293", "314", "314;658", "315", "316", "317", "318",
"319", "32", "320", "321", "322", "323", "324", "324;34;564;637;282;229;565",
"324;564;282", "324;637;229;565", "325", "326", "327", "328",
"328;586", "329", "33", "330", "331", "332", "333", "334", "335",
"336", "337", "338", "339", "34", "340", "341", "342", "343",
"344", "345", "346", "346;523", "347", "348", "349", "35", "350",
"351", "351;890", "352", "353", "353;277", "354", "355", "356",
"357", "358", "359", "36", "360", "361", "362", "363", "364",
"365", "366", "367", "368", "369", "37", "370", "371", "372",
"373", "374", "375", "376", "377", "377;938", "378", "379", "38",
"380", "381", "382", "382;147", "383", "384", "385", "386", "387",
"388", "389", "39", "39;417", "390", "391", "392", "393", "394",
"395", "396", "397", "398", "399", "399;955", "4", "40", "400",
"401", "402", "403", "404", "405", "406", "407", "408", "409",
"41", "410", "411", "412", "413", "414", "415", "416", "417",
"418", "419", "42", "420", "421", "422", "423", "424", "424;640",
"425", "426", "427", "427;930", "428", "429", "43", "430", "431",
"432", "433", "434", "435", "436", "437", "438", "438;178", "439",
"44", "440", "441", "442", "443", "444", "445", "446", "447",
"448", "449", "45", "450", "451", "452", "453", "454", "455",
"456", "457", "458", "459", "46", "460", "461", "462", "463",
"464", "465", "466", "467", "468", "469", "47", "470", "471",
"472", "473", "474", "475", "476", "477", "478", "479", "48",
"480", "481", "482", "483", "484", "485", "486", "487", "488",
"488;648", "489", "49", "490", "491", "492", "493", "494", "495",
"496", "497", "498", "499", "5", "50", "500", "501", "502", "503",
"504", "505", "506", "507", "508", "509", "51", "510", "511",
"512", "513", "514", "515", "516", "516;603;845", "516;603;845;837",
"517", "518", "519", "52", "520", "521", "522", "523", "524",
"525", "526", "527", "527;509", "528", "529", "53", "530", "531",
"532", "533", "534", "535", "536", "537", "538", "539", "54",
"540", "540;67", "541", "542", "543", "544", "545", "546", "547",
"548", "549", "55", "550", "550;549", "551", "552", "553", "554",
"555", "556", "557", "558", "559", "56", "560", "561", "562",
"563", "564", "564;282", "564;637", "565", "566", "567", "568",
"568;569", "568;569;286", "568;569;574", "568;569;574;286", "568;574",
"569", "57", "570", "571", "572", "573", "574", "575", "576",
"577", "578", "579", "579;577;578", "579;577;580", "579;577;580;578",
"58", "580", "581", "582", "583", "584", "585", "585;609", "586",
"587", "587;167", "587;167;595", "587;167;595;557", "588", "589",
"59", "590", "591", "592", "593", "594", "595", "596", "597",
"598", "599", "6", "60", "600", "601", "601;10", "602", "603",
"604", "605", "606", "607", "608", "609", "61", "610", "611",
"612", "613", "614", "615", "615;269", "615;926;269", "616",
"617", "618", "619", "62", "620", "621", "622", "623", "624",
"625", "626", "627", "628", "629", "63", "63;397", "630", "631",
"632", "633", "634", "635", "636", "637", "638", "639", "64",
"64;72", "640", "641", "642", "643", "643;529", "644", "645",
"646", "647", "648", "649", "65", "650", "651", "652", "653",
"654", "655", "656", "657", "658", "659", "66", "660", "661",
"662", "663", "663;819", "664", "665", "666", "667", "668", "669",
"67", "670", "671", "672", "673", "674", "675", "676", "677",
"678", "679", "68", "680", "681", "681;97", "682", "683", "684",
"685", "686", "687", "688", "689", "69", "690", "691", "692",
"693", "694", "695", "696", "697", "698", "699", "7", "7;25;5",
"7;752", "7;752;24", "7;752;25;24;8", "70", "700", "701", "702",
"703", "704", "705", "706", "707", "708", "709", "71", "710",
"711", "712", "713", "714", "715", "716", "717", "718", "719",
"72", "72;746;944", "72;746;944;772", "72;772", "72;927", "720",
"721", "722", "723", "724", "725", "726", "727", "728", "729",
"73", "730", "731", "732", "733", "734", "735", "735;522", "735;665",
"735;665;522", "735;665;876", "735;876", "735;876;522", "736",
"737", "738", "739", "74", "740", "741", "742", "743", "744",
"745", "746", "746;944", "746;944;772", "747", "748", "749",
"75", "750", "751", "752", "752;24", "753", "754", "755", "756",
"757", "758", "759", "76", "76;313", "76;313;293", "760", "761",
"762", "763", "764", "765", "766", "767", "768", "769", "77",
"770", "771", "772", "773", "774", "775", "776", "777", "778",
"779", "78", "780", "781", "782", "783", "784", "785", "786",
"787", "788", "789", "79", "790", "790;552", "791", "792", "793",
"793;863", "794", "795", "796", "797", "798", "799", "8", "80",
"800", "801", "802", "803", "804", "805", "806", "807", "808",
"808;21", "809", "81", "810", "811", "812", "813", "814", "815",
"815;413", "815;777", "815;777;339", "815;777;838", "815;838",
"816", "817", "818", "818;7;752", "818;7;752;23;25;17;8", "819",
"82", "820", "821", "822", "823", "824", "824;957", "825", "826",
"827", "828", "829", "83", "830", "831", "832", "833", "834",
"835", "836", "837", "838", "839", "84", "840", "841", "842",
"843", "844", "845", "846", "847", "847;560;590", "848", "849",
"85", "850", "850;817", "851", "852", "853", "853;420", "854",
"855", "856", "857", "858", "858;638", "858;638;409", "859",
"86", "860", "861", "861;593", "862", "863", "864", "865", "866",
"867", "868", "869", "869;614", "87", "870", "871", "872", "873",
"874", "875", "876", "877", "878", "879", "88", "880", "881",
"882", "883", "884", "884;6", "884;6;9", "885", "886", "887",
"888", "888;189", "889", "89", "890", "890;904", "891", "891;953",
"892", "892;941", "893", "894", "895", "896", "897", "898", "899",
"9", "90", "900", "901", "901;224", "902", "903", "904", "905",
"906", "907", "908", "909", "91", "910", "911", "912", "913",
"914", "915", "916", "917", "918", "918;947", "919", "92", "920;530;589",
"920;530;589;934", "921", "922", "923", "924", "924;576", "925",
"926", "927", "928", "929", "93", "930", "931", "932", "933",
"934", "935", "936", "937", "938", "939", "94", "940", "941",
"942", "943", "944", "945", "946", "947", "948", "949", "95",
"950", "951", "952", "953", "954", "955", "956", "957", "958",
"959", "96", "960", "961", "962", "963", "964", "965", "966",
"967", "97", "98", "99", "99;392"), class = "factor"), Mod..Peptide.ID = c(23L,
24L, 25L, 26L, 27L, 29L, 30L, 31L, 32L, 33L)), .Names = c("Protein.Group.IDs",
"Mod..Peptide.ID"), row.names = c(318L, 344L, 380L, 406L, 409L,
417L, 436L, 462L, 494L, 505L), class = "data.frame")
Kind Regards
Mads
I've grown to really love data.table for this kind of task. It is so very simple. But first, let's make some sample data (which you should provide idealy!)
# Sample data
set.seed(1)
df = data.frame( pep = replicate( 3 , paste( sample(999,3) , collapse=";") ) , pro = sample(3) , stringsAsFactors = FALSE )
Now we use the data.table package to do the reshaping in a couple of lines...
# Load data.table package
require(data.table)
# Turn data.frame into data.table, which looks like..
dt <- data.table(df)
# pep pro
#1: 266;372;572 1
#2: 908;202;896 3
#3: 944;660;628 2
# Transform it in one line like this...
dt[ , list( pep = unlist( strsplit( pep , ";" ) ) ) , by = pro ]
# pro pep
#1: 1 266
#2: 1 372
#3: 1 572
#4: 3 908
#5: 3 202
#6: 3 896
#7: 2 944
#8: 2 660
#9: 2 628
I think tidyr's unnest() is what you're looking for.
df <- tibble::tibble(x = 1:2, y = list(c("a", "b", "c"), c("alpha", "beta")))
df
#> # A tibble: 2 x 2
#> x y
#> <int> <list>
#> 1 1 <chr [3]>
#> 2 2 <chr [2]>
tidyr::unnest(df, cols = y)
#> # A tibble: 5 x 2
#> x y
#> <int> <chr>
#> 1 1 a
#> 2 1 b
#> 3 1 c
#> 4 2 alpha
#> 5 2 beta
Created on 2019-08-10 by the reprex package (v0.3.0)
You have already obtained a nice answer, but it may be useful to dig around in the R toolbox. Here's an example using a function from the splitstackshape package, concat.split.multiple. As the name suggests it "allows the user to split multiple columns at once". Although there is only one concatenated column to split in the current example, the function is convenient because it allows us to reshape the data to a long format in the same call. Using the minimal data set provided by #SimonO101:
library(splitstackshape)
df2 <- concat.split.multiple(data = df, split.cols = "pep", seps = ";", direction = "long")
df2
# pro time pep
# 1 1 1 236
# 2 3 1 465
# 3 2 1 641
# 4 1 2 16
# 5 3 2 721
# 6 2 2 323
# 7 1 3 912
# 8 3 3 459
# 9 2 3 283
An id variable ('time') is added to differentiate the multiple items ('pep') that is generated for each group ('pro'). If you wish to remove it, just run subset(df2, select = -time)
If the row lists are concatenated strings initially, tidyr::separate_rows is another very convenient method:
tibble::tibble(x = 1:2, y = list("a,b,c", "alpha,beta")) %>%
separate_rows(y, sep=",")
# A tibble: 5 x 2
x y
<int> <chr>
1 1 a
2 1 b
3 1 c
4 2 alpha
5 2 beta
>

Resources