Error: Column must be length 1 (a summary value), not 3 - r

I have been having this problem and I cannot see what is the error. I have seen in other posts that others run into similar problems but none of the answers helps me understand what is the problem in my df.
I have the following df.
structure(list(Position = c(2049L, 165L, 1949L, 1491L, 1550L,
118L, 164L, 2049L, 165L, 1654L, 1766L, 1949L, 891L, 1491L, 1550L,
118L, 26L, 1766L, 1949L, 1491L, 1550L, 775L, 775L, 2049L, 165L,
1949L, 100L, 891L, 1491L, 1550L, 118L, 2049L, 165L, 634L, 1654L,
1949L, 100L, 891L, 1491L, 1550L, 118L, 26L, 742L, 100L, 1491L,
26L, 934L, 2049L, 165L, 634L, 1654L, 1949L, 891L, 7L, 1491L,
1550L, 118L, 26L, 742L, 164L, 934L, 2049L, 165L, 634L, 1654L,
1949L, 891L, 1949L, 7L, 1491L, 1550L, 118L, 26L, 742L, 164L,
934L, 2049L, 165L, 634L, 1654L, 1949L, 891L, 1949L, 7L, 1491L,
1550L, 118L, 26L, 742L, 934L, 2049L, 634L, 1949L, 100L, 891L,
1491L, 1550L, 118L, 26L, 742L, 934L, 2049L, 165L, 634L, 1949L,
100L, 891L, 1491L, 1550L, 118L, 26L, 742L, 934L, 2049L, 165L,
634L, 1654L, 1949L, 100L, 891L, 1491L, 1550L, 118L, 26L), Freq = c(0.067775,
0.033818, 0.03713, 0.048681, 0.099359, 0.023134, 0.025509, 0.188382,
0.067254, 0.045069, 0.023901, 0.092243, 0.046262, 0.075173, 0.221062,
0.0453, 0.022977, 0.027029, 0.028103, 0.052525, 0.112694, 0.048416,
0.048416, 0.112287, 0.029838, 0.044125, 0.023682, 0.02216, 0.051012,
0.155826, 0.039267, 0.078809, 0.029748, 0.022649, 0.021723, 0.057707,
0.024649, 0.023452, 0.06311, 0.105783, 0.032374, 0.023256, 0.020603,
0.053108, 0.047462, 0.020855, 0.039699, 0.149017, 0.059824, 0.055523,
0.030769, 0.091152, 0.029758, 0.028419, 0.127958, 0.213058, 0.062456,
0.024057, 0.021788, 0.029876, 0.085926, 0.232437, 0.055515, 0.071291,
0.026907, 0.085498, 0.084755, 0.020671, 0.026855, 0.207147, 0.133883,
0.038205, 0.05364, 0.0545, 0.028277, 0.047527, 0.277206, 0.061392,
0.043723, 0.027954, 0.130286, 0.05974, 0.020242, 0.042113, 0.139535,
0.161506, 0.046344, 0.04523, 0.043121, 0.02829, 0.038206, 0.030329,
0.030099, 0.02749, 0.023106, 0.094997, 0.054054, 0.037677, 0.038858,
0.032011, 0.039477, 0.042833, 0.021013, 0.041847, 0.049717, 0.043711,
0.029877, 0.080454, 0.068994, 0.042294, 0.029737, 0.028315, 0.024932,
0.056885, 0.039822, 0.020568, 0.025144, 0.070069, 0.065646, 0.025337,
0.081133, 0.200188, 0.037447, 0.020874)), row.names = c(NA, -124L
), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), groups = structure(list(
Position = c(7L, 26L, 100L, 118L, 164L, 165L, 634L, 742L,
775L, 891L, 934L, 1491L, 1550L, 1654L, 1766L, 1949L, 2049L
), .rows = list(c(54L, 69L, 84L), c(17L, 42L, 46L, 58L, 73L,
88L, 99L, 111L, 124L), c(27L, 37L, 44L, 94L, 106L, 119L),
c(6L, 16L, 31L, 41L, 57L, 72L, 87L, 98L, 110L, 123L),
c(7L, 60L, 75L), c(2L, 9L, 25L, 33L, 49L, 63L, 78L, 103L,
115L), c(34L, 50L, 64L, 79L, 92L, 104L, 116L), c(43L,
59L, 74L, 89L, 100L, 112L), 22:23, c(13L, 28L, 38L, 53L,
67L, 82L, 95L, 107L, 120L), c(47L, 61L, 76L, 90L, 101L,
113L), c(4L, 14L, 20L, 29L, 39L, 45L, 55L, 70L, 85L,
96L, 108L, 121L), c(5L, 15L, 21L, 30L, 40L, 56L, 71L,
86L, 97L, 109L, 122L), c(10L, 35L, 51L, 65L, 80L, 117L
), c(11L, 18L), c(3L, 12L, 19L, 26L, 36L, 52L, 66L, 68L,
81L, 83L, 93L, 105L, 118L), c(1L, 8L, 24L, 32L, 48L,
62L, 77L, 91L, 102L, 114L))), row.names = c(NA, -17L), class = c("tbl_df",
"tbl", "data.frame")))
I run the following code:
X.3 %>%
group_by(Position) %>%
summarize(Freq, Sum = Sum(Freq))
and I get the message: Error: Column Freq must be length 1 (a summary value), not 3
Why is this taking it with a value of 3? Sorry, may be some very basics stuff but I cannot solve it.
many thanks in advance.

If we use the new version of dplyr (version >= 1.0), it won't show that error because summarise can return more than one row
library(dplyr)
df %>%
group_by(Position) %>%
summarize(Freq, Sum = sum(Freq), .groups = 'drop')
-output
# A tibble: 124 x 3
Position Freq Sum
<int> <dbl> <dbl>
1 7 0.0284 0.0974
2 7 0.0269 0.0974
3 7 0.0421 0.0974
4 26 0.0230 0.279
5 26 0.0233 0.279
6 26 0.0209 0.279
7 26 0.0241 0.279
8 26 0.0536 0.279
9 26 0.0452 0.279
10 26 0.0389 0.279
# … with 114 more rows
Or another option is to create a list column and then unnest
library(tidyr)
df %>%
group_by(Position) %>%
summarize(Sum = sum(Freq), Freq = list(Freq)) %>%
unnest(c(Freq))

Related

Use the result of enrichKEGG() to make the dotplot

entrezid_downgene=structure(list(SYMBOL = c("ARHGEF16", "ILDR1", "TMPRSS4", "MAP7", "SERINC2", "C9orf152", "TSPAN1", "RHEX", "TMC4", "CRB3", "UGT8", "CD24", "MAPK13", "AGR2", "GJB1", "ERBB3", "CNDP2", "LOC105378644", "GCNT3", "CEACAM1", "GPR160", "PRSS8", "HOOK1", "ABHD17C", "MOCOS", "CWH43", "EHF", "ACSL5", "SLC44A4", "RAP1GAP", "MUC13", "PPM1H", "ATP2C2", "RAB25", "H2BC5", "H4C12", "TJP3", "RXFP1", "GSTO2", "OVOL2", "TMEM125", "LIMS1", "DLX5", "ST6GALNAC1", "HNF1B", "STX19", "F2RL1", "MT1G", "PLPP2", "TMEM238", "SLC30A2", "GABRP", "EPCAM", "CLDN10", "HOXB5", "PRAME", "MAL2", "PLA2G10", "TSPAN12", "FAM174B", "TMC5", "ASRGL1", "SCNN1A", "FOXL2", "ALDH3B2", "ELF3", "SLC7A1", "MT1F", "CLDN3", "SPINT2", "SFN", "VWC2", "C9orf116", "SLC39A6", "TCN1", "IL20RA", "ACSM3", "FOXL2NB", "HGD", "PAX8", "IDO1", "C4BPA", "RHPN2", "HMGCR", "UGT2B11", "PIGR", "MUC20", "SLC3A1", "PLLP", "PSAT1", "SCGB2A1", "WNT5A", "DEFB1", "FGL1", "SLC2A8", "HOXB8", "CYP2J2", "WWC1", "MUC1", "PRKX", "RASEF", "BAIAP2L2", "PAPSS1", "MME", "HOMER2", "STRA6", "ARG2", "MOGAT1", "CDS1", "SCGB2A2", "MPZL2", "PHYHIPL", "INAVA", "IDO2", "GALNT4", "TMEM101", "HSD17B2", "AOC1", "CDCA7", "CAPS", "TFCP2L1", "PAEP", "PLAC9P1", "GAL", "RORB", "CCNO", "XDH", "C15orf48", "SLC1A1", "GPT2", "VNN1", "NWD1", "HABP2", "UGT2B7", "CYP26A1", "MSX1", "ENPP3", "KIR2DL3", "ADAMTS9", "KIR2DL4", "BRINP1", "PROM1", "APCDD1", "AGR3", "EYA2", "SLC2A1", "GNLY", "COL7A1", "FOXJ1", "MS4A8", "C20orf85", "RSPH1", "SCGB1D2", "SPP1", "RASD1", "CST1", "SCGB1D4", "LEFTY1", "LAMC3", "TEKT1", "LCN2", "VTCN1", "IRX3", "ROPN1L", "FAM183A", "NDP", "TUBB3", "DIO2", "IL2RB", "ADAMTS8", "SERPINA5", "NKG7", "ABCC8", "STC1", "LRRC26"),
ENTREZID = c("27237", "286676", "56649", "9053", "347735", "401546", "10103", "440712", "147798", "92359", "7368", "100133941", "5603", "10551", "2705", "2065", "55748", "105378644", "9245", "634", "26996", "5652", "51361", "58489", "55034", "80157", "26298", "51703", "80736", "5909", "56667", "57460", "9914", "57111", "3017", "8362", "27134", "59350", "119391", "58495", "128218", "3987", "1749", "55808", "6928", "415117", "2150", "4495", "8612", "388564", "7780", "2568", "4072", "9071", "3215", "23532", "114569", "8399", "23554", "400451", "79838", "80150", "6337", "668", "222", "1999", "6541", "4494", "1365", "10653", "2810", "375567", "138162", "25800", "6947", "53832", "6296", "401089", "3081", "7849", "3620", "722", "85415", "3156", "10720", "5284", "200958", "6519", "51090", "29968", "4246", "7474", "1672", "2267", "29988", "3218", "1573", "23286", "4582", "5613", "158158", "80115", "9061", "4311", "9455", "64220", "384", "116255", "1040", "4250", "10205", "84457", "55765", "169355", "8693", "84336", "3294", "26", "83879", "828", "29842", "5047", "389033", "51083", "6096", "10309", "7498", "84419", "6505", "84706", "8876", "284434", "3026", "7364", "1592", "4487", "5169", "3804", "56999", "3805", "1620", "8842", "147495", "155465", "2139", "6513", "10578", "1294", "2302", "83661", "128602", "89765", "10647", "6696", "51655", "1469", "404552", "10637", "10319", "83659", "3934", "79679", "79191", "83853", "440585", "4693", "10381", "1734", "3560", "11095", "5104", "4818", "6833", "6781", "389816")),
row.names = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L, 91L, 92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L, 103L, 104L, 105L, 106L, 107L, 108L, 109L, 110L, 111L, 112L, 113L, 114L, 115L, 116L, 117L, 118L, 119L, 120L, 121L, 123L, 124L, 125L, 126L, 127L, 128L, 129L, 130L, 131L, 132L, 133L, 134L, 135L, 136L, 137L, 138L, 139L, 140L, 141L, 142L, 143L, 144L, 145L, 146L, 147L, 148L, 149L, 150L, 151L, 152L, 153L, 154L, 155L, 156L, 157L, 158L, 159L, 160L, 161L, 162L, 163L, 164L, 165L, 166L, 167L, 168L, 169L, 170L, 171L, 172L, 173L, 174L, 175L, 176L, 177L), class = "data.frame")
down_ekk <- enrichKEGG(gene= c(entrezid_downgene$ENTREZID),
organism = 'hsa',
pvalueCutoff = 0.05,
minGSSize = 50,
maxGSSize = 500,
)
dot <- dotplot(down_ekk,font.size=6,title='down_kegg')
dot
Error in ans[ypos] <- rep(yes, length.out = len)[ypos] : Change the
parameter length to zero Warning message: In rep(yes, length.out =
len) : 'x' is NULL so the result will be NULL
please How to solve the error?
This is normal you can't plot the dotplot because you have no significant ontologies.
You can check with down_ekk :
downekk
#
# over-representation test
#
#...#organism hsa
#...#ontology KEGG
#...#keytype kegg
#...#gene chr [1:175] "27237" "286676" "56649" "9053" "347735" "401546" "10103" "440712" "147798" "92359" "7368" "100133941" "5603" "10551" "2705" "2065" "55748" "105378644" "9245" "634" "26996" "5652" ...
#...pvalues adjusted by 'BH' with cutoff <0.05
#...0 enriched terms found
#...Citation
Guangchuang Yu, Li-Gen Wang, Yanyan Han and Qing-Yu He.
clusterProfiler: an R package for comparing biological themes among
gene clusters. OMICS: A Journal of Integrative Biology
2012, 16(5):284-287
"0 enriched terms found" so this is why you get the error as no dotplot can be plotted

Split dataframe into separate and apply formula to calculate transitions from segments in R

I have data frame with 4 columns.
I want to split the dataframe into separate dataframes by column age_group and calculate transition from segment_2018 to segment_2020. So the result should several datatables (depending on number of age_group values) resulting from table(df$segment_2018, df$segment_2020). Any ideas?
Data sample:
structure(list(cust_id = c(5689748L, 1256987L, 8596263L, 4152659L,
4589521L, 0125698L, 2896359L, 2045975L, 3759826L, 4625831L, 1875964L,
6132852L, 8365472L, 1287465L, 9765287L, 9357452L, 8725691L, 4051697L,
5783105L, 6040870L), segment_2018 = c("256", "258", "259", "2061",
"2061", "2061", "7", "256", "259", "1029", "256", "258", "256",
"67", "12", "258", "4115", "4115", "13", "1029"), age_group = c("58_59",
"70_71", "62_63", "56_57", "62_63", "0", "46_47", "52_53", "52_53",
"52_53", "56_57", "50_51", "0", "52_53", "50_51", "62_63", "62_63",
"70_71", "44_45", "50_51"), segment_2020 = c("256", "258", "256",
"2061", "17", "0", "7", "17", "133", "528", "256", "258", "0",
"67", "12", "258", "133", "4114", "12", "1029")), row.names = c(NA,
20L), class = "data.frame")
cust_id segment_2018 age_group segment_2020
1 5689748 256 58_59 256
2 1256987 258 70_71 258
3 8596263 259 62_63 256
4 4152659 2061 56_57 2061
5 4589521 2061 62_63 17
6 125698 2061 0 0
7 2896359 7 46_47 7
8 2045975 256 52_53 17
9 3759826 259 52_53 133
10 4625831 1029 52_53 528
11 1875964 256 56_57 256
12 6132852 258 50_51 258
13 8365472 256 0 0
14 1287465 67 52_53 67
15 9765287 12 50_51 12
16 9357452 258 62_63 258
17 8725691 4115 62_63 133
18 4051697 4115 70_71 4114
19 5783105 13 44_45 12
20 6040870 1029 50_51 1029
Expected output:
structure(c(1859L, 3661L, 214L, 106L, 107L, 209L, 341L, 1770L,
16343L, 106881L, 5078L, 317L, 593L, 8237L, 1106L, 271L, 402L,
285L, 422L, 428L, 115L, 365L, 40507L, 11700L, 132L, 50L, 815L,
375L, 189L, 998L, 14207L, 3171L, 882L, 307L, 948L, 7774L, 1985L,
1414L, 2025L, 750L, 929L, 947L, 21L, 810L, 905L, 14358L, 4L,
0L, 97L, 115L, 21L, 547L, 12926L, 2285L, 154L, 24L, 1120L, 1851L,
346L, 215L, 122L, 79L, 98L, 310L, 1L, 72L, 502L, 251L, 10264L,
1837L, 85L, 33L, 14L, 17L, 240L, 185L, 74L, 21L, 48L, 401L, 225L,
111L, 115L, 23L, 57L, 77L, 94L, 187L, 313L, 150L, 206L, 5228L,
78L, 35L, 13L, 2L, 143L, 120L, 66L, 18L, 23L, 269L, 136L, 64L,
106L, 19L, 48L, 66L, 1057L, 121L, 1531L, 563L, 51L, 33L, 2922L,
266L, 86L, 24L, 305L, 74L, 513L, 311L, 85L, 875L, 1068L, 291L,
315L, 48L, 1116L, 902L, 15L, 197L, 497L, 418L, 66L, 28L, 439L,
1517L, 35L, 26L, 491L, 233L, 170L, 92L, 238L, 597L, 325L, 122L,
339L, 117L, 120L, 1209L, 32L, 91L, 236L, 739L, 4L, 0L, 43L, 26L,
5345L, 1443L, 182L, 171L, 432L, 190L, 69L, 823L, 202L, 7L, 138L,
72L, 23L, 72L, 0L, 15L, 44L, 274L, 3L, 1L, 3L, 4L, 68L, 4170L,
141L, 575L, 185L, 31L, 30L, 122L, 1L, 5L, 4L, 2L, 4L, 8L, 0L,
11L, 1891L, 6236L, 75L, 31L, 126L, 192L, 12L, 429L, 44940L, 11113L,
544L, 93L, 704L, 4536L, 414L, 529L, 175L, 88L, 266L, 385L, 26L,
476L, 1882L, 2654L, 84L, 48L, 78L, 186L, 171L, 1112L, 15439L,
64342L, 1394L, 174L, 531L, 5187L, 608L, 178L, 313L, 193L, 256L,
383L, 22L, 211L, 182L, 83L, 44L, 18L, 215L, 78L, 51L, 70L, 139L,
117L, 16367L, 912L, 85L, 182L, 71L, 104L, 327L, 99L, 214L, 233L,
15L, 142L, 136L, 49L, 16L, 10L, 194L, 63L, 65L, 49L, 63L, 35L,
2214L, 3989L, 35L, 124L, 38L, 6L, 166L, 39L, 43L, 128L, 13L,
49L, 159L, 2751L, 1L, 2L, 27L, 63L, 1L, 37L, 1371L, 444L, 85L,
13L, 1098L, 308L, 123L, 52L, 84L, 60L, 27L, 270L, 0L, 17L, 3610L,
10976L, 80L, 32L, 417L, 383L, 915L, 2046L, 29728L, 7587L, 1804L,
468L, 818L, 72508L, 7699L, 729L, 1357L, 735L, 669L, 960L, 17L,
448L, 1746L, 9166L, 38L, 13L, 526L, 232L, 250L, 212L, 4648L,
1099L, 433L, 129L, 859L, 16061L, 9197L, 471L, 1658L, 594L, 431L,
722L, 10L, 241L, 1062L, 864L, 87L, 4L, 177L, 61L, 2L, 7L, 473L,
177L, 105L, 2L, 129L, 810L, 487L, 3680L, 253L, 92L, 338L, 183L,
6L, 417L, 4791L, 3960L, 44L, 28L, 240L, 279L, 304L, 99L, 1559L,
545L, 947L, 332L, 1396L, 4115L, 4226L, 533L, 3921L, 624L, 222L,
1234L, 14L, 235L, 763L, 1480L, 5L, 1L, 46L, 84L, 123L, 41L, 628L,
165L, 124L, 46L, 601L, 1012L, 813L, 102L, 253L, 561L, 51L, 320L,
1L, 44L, 591L, 227L, 17L, 5L, 584L, 74L, 15L, 7L, 241L, 84L,
163L, 47L, 18L, 497L, 288L, 305L, 44L, 15L, 3920L, 146L, 5L,
109L, 1613L, 1577L, 61L, 32L, 1657L, 883L, 108L, 44L, 1195L,
465L, 493L, 219L, 951L, 1555L, 1275L, 296L, 1704L, 460L, 368L,
2584L, 25L, 199L, 254L, 67L, 232L, 276L, 176L, 82L, 6L, 5L, 95L,
110L, 73L, 32L, 30L, 170L, 126L, 54L, 98L, 13L, 53L, 144L, 4957L,
147L, 354L, 198L, 2424L, 98L, 53L, 26L, 6L, 14L, 168L, 133L,
53L, 6L, 37L, 323L, 127L, 427L, 81L, 25L, 44L, 51L, 37L, 12899L
), .Dim = 22:23, .Dimnames = structure(list(c("1029", "1031",
"12", "13", "133", "17", "2056", "2060", "2061", "256", "258",
"259", "265", "4114", "4115", "5", "528", "529", "65", "67",
"7", "9"), c("0", "1029", "1031", "12", "13", "133", "17", "2056",
"2060", "2061", "256", "258", "259", "265", "4114", "4115", "5",
"528", "529", "65", "67", "7", "9")), .Names = c("", "")), class = "table")
We can split the data and use table on each group. This could be done with split and lapply.
temp <- lapply(split(df, df$age_group), function(x)
table(x$segment_2018, x$segment_2020))
Or using by :
temp <- by(df, df$age_group, function(x) table(x$segment_2018, x$segment_2020))
This returns a list of tables. Usually, it is better to keep them as a list as it is easier to manage and doesn't clutter global environment but if you want them as separate objects we can use list2env.
#As temp have names with numbers prefixing "table" to it.
names(temp) <- paste0('table_', names(temp))
list2env(temp, .GlobalEnv)
We can also do
temp <- lapply(split(df, df$age_group), function(x)
table(x[c('segment_2018', 'segment_2020')]))

calculate sum of values in dataframe based on values in other columns

I have a dataframe in R in which values correspond to value estimates and their margin of error (MoE).
Column names consist of a pattern, an indicator character (e = estimate, m = margin of error) and an ID that matches estimate and margin of error.
So, the column names look like "XXXe1, XXXm1, XXXe2, XXXm2, ...".
Goal
I am trying to create a function to (for each row)
Calculate the sum of the estimates. (That is pretty straightforward.)
Calculate the aggregated margin of error. This is the square root of the sum of the squares of each MoE.
Condition: the MoE of estimates marked as 0 should only be added once.
Examples:
In row 20, the aggregated MoE should only be sqrt(123^2).
In row 13, B01001e4 and B01001e5 are 0, so their MoE is only counted once.
So far, I have done the following to build a function that does this:
estimate_aggregator <- function(DF_to_write_on, New_column_name, source_df, pattern){
subset_df <- source_df[, grepl(pattern, names(source_df))] # I subset all the columns named with the pattern, regardless of whether they are estimate or margin of error
subset_df_e <- source_df[, grepl(paste0(pattern, "e"), names(source_df))] # I create a table with only the estimated values to perform the sum
DF_to_write_on[paste0(New_column_name, "_e")]<- rowSums(subset_df_e) # I write a new column in the new DF with the rowSums of the estimates values, having calculated the new estimate
return(DF)
}
What I am missing: a way to write in the new dataframe the result of selecting the XXXmYY values of those columns that have no 0 value in their corresponding estimate. If there is one or more 0 in the estimates, then I should include the MoE 123 in the calculation only once.
What would be the cleanest way to achieve this? I see that my struggle is on dealing with several columns at once and the fact that the values on the XXXeYY columns determine the selection of the XXXmYY ones.
Expected output
row1: DF_to_write_on[paste0(New_column_name,"_m") <- sqrt(176^2 + 117^2+22^2 + 123^2)
row2: DF_to_write_on[paste0(New_column_name,"_m") <- sqrt(123^2)
B01001e1 B01001m1 B01001e2 B01001m2 B01001e3 B01001m3 B01001e4 B01001m4 B01001e5 B01001m5
15 566 176 371 117 14 22 0 123 0 123
20 0 123 0 123 0 123 0 123 0 123
Data
structure(list(B01001e1 = c(1691L, 2103L, 975L, 2404L, 866L,
2140L, 965L, 727L, 1602L, 1741L, 948L, 1771L, 1195L, 1072L, 566L,
1521L, 2950L, 770L, 1624L, 0L), B01001m1 = c(337L, 530L, 299L,
333L, 264L, 574L, 227L, 266L, 528L, 498L, 320L, 414L, 350L, 385L,
176L, 418L, 672L, 226L, 319L, 123L), B01001e2 = c(721L, 1191L,
487L, 1015L, 461L, 1059L, 485L, 346L, 777L, 857L, 390L, 809L,
599L, 601L, 371L, 783L, 1215L, 372L, 871L, 0L), B01001m2 = c(173L,
312L, 181L, 167L, 170L, 286L, 127L, 149L, 279L, 281L, 152L, 179L,
193L, 250L, 117L, 234L, 263L, 155L, 211L, 123L), B01001e3 = c(21L,
96L, 70L, 28L, 33L, 90L, 12L, 0L, 168L, 97L, 72L, 10L, 59L, 66L,
14L, 0L, 35L, 47L, 14L, 0L), B01001m3 = c(25L, 71L, 73L, 26L,
33L, 79L, 18L, 123L, 114L, 79L, 59L, 15L, 68L, 99L, 22L, 123L,
31L, 37L, 20L, 123L), B01001e4 = c(30L, 174L, 25L, 91L, 4L, 27L,
30L, 43L, 102L, 66L, 54L, 85L, 0L, 16L, 0L, 26L, 34L, 27L, 18L,
0L), B01001m4 = c(26L, 148L, 30L, 62L, 9L, 27L, 25L, 44L, 82L,
52L, 46L, 48L, 123L, 21L, 123L, 40L, 33L, 32L, 27L, 123L), B01001e5 = c(45L,
44L, 7L, 46L, 72L, 124L, 45L, 34L, 86L, 97L, 0L, 83L, 0L, 30L,
0L, 66L, 0L, 23L, 33L, 0L), B01001m5 = c(38L, 35L, 12L, 37L,
57L, 78L, 36L, 37L, 62L, 97L, 123L, 50L, 123L, 42L, 123L, 59L,
123L, 31L, 49L, 123L)), .Names = c("B01001e1", "B01001m1", "B01001e2",
"B01001m2", "B01001e3", "B01001m3", "B01001e4", "B01001m4", "B01001e5",
"B01001m5"), row.names = c(NA, 20L), class = "data.frame")
From your description it sounds like your desired output should have 2 columns, the row sum of the estimate, and the function of the row margins of errors using the logic you describe. Here is one (somewhat roundabout) solution to that problem.
I saved your data as df.
# Isolate estimate and MoE dataframes
df_e <- df[,grepl('e', names(df))]
df_m <- df[,grepl('m', names(df))]
# Temporary matrix used to isolate 0 values for MoE, count number of zero occurances, and convert those MoE values to NA
mat <- df_e == 0
mat <- t(apply(mat, 1, cumsum))
df_m[mat > 1] = NA
# Combine with estimate row sum
output_df <- data.frame(
e = rowSums(df[,grepl('e', names(df))]),
m = apply(df_m, 1, function(x) sqrt(sum(x^2, na.rm = T)))
)
head(output_df)
e m
1 2508 382.4173
2 3608 637.5061
3 1564 358.5178
4 3584 380.3512
5 1436 320.9595
6 3440 651.4031

Fill in matrix values from data frame in a vectorized manner

I have a dataframe in with 3 columns, two of which represent the i,j indices in a matrix. For each row of the dataframe, I would like to fill the corresponding i,j value in a matrix to 1.
Sharing the data and matrix below, which I think will make it easier to describe the problem:
data = structure(list(sale_id = c(0L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 10L, 10L, 11L, 11L, 11L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 13L, 14L, 15L, 16L, 16L,
17L, 17L, 17L, 17L, 17L, 18L, 18L, 19L, 19L, 20L, 20L, 21L, 22L,
22L, 23L, 23L, 23L, 24L, 24L, 25L, 25L, 26L, 26L, 27L, 27L, 28L,
28L, 29L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 31L, 31L, 32L, 32L,
33L, 33L, 33L, 33L, 33L, 33L, 34L, 34L), user_id = c(3219L, 144L,
2884L, 2884L, 2155L, 2155L, 2155L, 2155L, 2817L, 2817L, 2817L,
2817L, 2817L, 2817L, 144L, 144L, 2850L, 2850L, 2850L, 2850L,
2850L, 2850L, 2850L, 2850L, 2850L, 144L, 144L, 144L, 144L, 144L,
144L, 144L, 144L, 2817L, 2817L, 2075L, 2075L, 2546L, 2546L, 2546L,
2687L, 2687L, 2687L, 2687L, 2687L, 2687L, 2687L, 2687L, 170L,
2546L, 1963L, 144L, 144L, 1825L, 1825L, 1825L, 1825L, 1825L,
144L, 144L, 2155L, 2155L, 2546L, 2546L, 144L, 2155L, 2155L, 144L,
144L, 144L, 3182L, 3182L, 3343L, 3343L, 170L, 170L, 2155L, 2155L,
2793L, 2793L, 1564L, 2250L, 2250L, 2250L, 2250L, 2250L, 2250L,
2250L, 3083L, 3083L, 2075L, 2075L, 144L, 144L, 144L, 144L, 144L,
144L, 829L, 829L), item_id = c(174L, 10L, 179L, 162L, 171L, 182L,
179L, 185L, 199L, 179L, 195L, 174L, 162L, 198L, 144L, 69L, 57L,
47L, 83L, 80L, 10L, 117L, 14L, 90L, 88L, 186L, 167L, 192L, 142L,
162L, 173L, 151L, 134L, 191L, 166L, 118L, 128L, 98L, 95L, 119L,
130L, 154L, 155L, 181L, 120L, 118L, 77L, 120L, 101L, 31L, 139L,
10L, 30L, 182L, 179L, 139L, 173L, 171L, 80L, 39L, 26L, 69L, 163L,
151L, 175L, 150L, 148L, 121L, 147L, 88L, 183L, 177L, 132L, 167L,
176L, 172L, 57L, 78L, 98L, 99L, 118L, 102L, 141L, 97L, 99L, 79L,
32L, 17L, 16L, 30L, 66L, 54L, 57L, 91L, 81L, 39L, 92L, 123L,
87L, 62L)), .Names = c("sale_id", "user_id", "item_id"), row.names = c(NA,
100L), class = "data.frame")
M = matrix(0, nrow = max(data$user_id), ncol = max(data$item_id))
head(data, n = 6)
sale_id user_id item_id
1 0 3219 174
2 1 144 10
3 2 2884 179
4 2 2884 162
5 3 2155 171
6 3 2155 182
The i-column is user_id and the j-column is item_id. So for the first row, I would like for M[3219, 174] = 1, then I would like M[144, 10] = 1, etc. I would like to do this without a for-loop, which is too slow given the size of my matrix.
For reference, what I'm currently doing is:
for(i in 1:nrow(data)) {
M[data$user_id[i], data$item_id[i]] = 1
}
However, my problem scales quite large, and this is too slow for my problem. Any help is greatly appreciated! Thanks
EDIT: i tried something along the lines of:
apply(data, 1, FUN = function(x) M[x[2],x[3]] = 1)
but it didn't work as well as i would hope (takes even longer than the for-loop).
Try this:
M[cbind(data$user_id,data$item_id)] <- 1

filtering out lowest numbers in a column by group [duplicate]

This question already has answers here:
finding the index of a max value in R
(4 answers)
Closed 7 years ago.
I have the following dataset:
head(tot_docks, n = 10)
id tot
1 72 39
2771 79 33
5541 82 27
8310 83 62
8900 83 3
11079 116 39
13848 119 19
14584 119 8
14662 119 0
15922 119 2
I need to keep the ids with the highest values. There are sometimes several duplicates in id , and I only want to keep the one that is has the highest value in tot. For example, if we look at id 119, I only need like to keep the row where 119 is 39 as that is the highest value. Is there a quick formula to do this in R ?
dput(tot_docks)
structure(list(id = c(72L, 79L, 82L, 83L, 83L, 116L, 119L, 119L,
119L, 119L, 119L, 119L, 119L, 120L, 127L, 128L, 128L, 137L, 143L,
144L, 146L, 147L, 150L, 151L, 152L, 153L, 153L, 153L, 153L, 157L,
157L, 160L, 161L, 164L, 167L, 168L, 173L, 173L, 173L, 174L, 195L,
195L, 212L, 216L, 217L, 217L, 217L, 217L, 218L, 223L, 224L, 225L,
228L, 229L, 232L, 233L, 236L, 236L, 236L, 236L, 236L, 237L, 237L,
238L, 239L, 241L, 242L, 243L, 243L, 243L, 243L, 243L, 243L, 243L,
243L, 243L, 243L, 243L, 244L, 245L, 247L, 248L, 249L, 250L, 251L,
252L, 253L, 253L, 253L, 253L, 253L, 253L, 253L, 253L, 253L, 253L,
253L, 253L, 253L, 253L, 253L, 253L, 253L, 253L, 253L, 253L, 253L,
253L, 253L, 253L, 253L, 254L, 254L, 254L, 257L, 258L, 259L, 259L,
260L, 260L, 261L, 262L, 263L, 264L, 265L, 266L, 266L, 267L, 268L,
270L, 274L, 275L, 276L, 278L, 279L, 279L, 279L, 279L, 279L, 279L,
279L, 279L, 279L, 279L, 279L, 279L, 279L, 279L, 279L, 279L, 280L,
281L, 282L, 284L, 285L, 289L, 290L, 291L, 293L, 294L, 295L, 296L,
297L, 298L, 300L, 301L, 302L, 303L, 304L, 305L, 305L, 306L, 307L,
308L, 309L, 310L, 311L, 312L, 313L, 314L, 315L, 315L, 316L, 317L,
318L, 318L, 318L, 319L, 320L, 321L, 321L, 322L, 323L, 324L, 325L,
325L, 325L, 325L, 325L, 325L, 326L, 327L, 328L, 329L, 330L, 330L,
331L, 332L, 334L, 335L, 336L, 337L, 339L, 340L, 341L, 342L, 343L,
344L, 345L, 346L, 347L, 347L, 347L, 347L, 348L, 348L, 349L, 350L,
351L, 351L, 351L, 351L, 351L, 352L, 353L, 354L, 355L, 356L, 357L,
357L, 357L, 357L, 357L, 358L, 359L, 360L, 361L, 362L, 363L, 364L,
365L, 366L, 367L, 368L, 369L, 372L, 373L, 375L, 376L, 376L, 376L,
376L, 376L, 376L, 376L, 376L, 376L, 376L, 376L, 376L, 376L, 376L,
376L, 376L, 376L, 376L, 376L, 377L, 379L, 380L, 382L, 383L, 384L,
384L, 385L, 386L, 387L, 388L, 389L, 390L, 391L, 392L, 392L, 392L,
392L, 392L, 392L, 393L, 394L, 394L, 395L, 396L, 397L, 398L, 399L,
400L, 401L, 402L, 403L, 403L, 405L, 405L, 406L, 407L, 407L, 407L,
408L, 409L, 410L, 411L, 412L, 414L, 415L, 416L, 417L, 417L, 418L,
419L, 419L, 420L, 421L, 422L, 422L, 423L, 426L, 427L, 428L, 430L,
432L, 432L, 433L, 433L, 434L, 435L, 436L, 437L, 438L, 439L, 440L,
440L, 441L, 442L, 443L, 444L, 444L, 445L, 446L, 447L, 448L, 449L,
450L, 453L, 454L, 455L, 455L, 455L, 455L, 455L, 456L, 457L, 458L,
459L, 459L, 460L, 461L, 462L, 463L, 465L, 466L, 467L, 468L, 469L,
470L, 471L, 472L, 473L, 473L, 473L, 473L, 473L, 473L, 473L, 473L,
473L, 473L, 473L, 473L, 473L, 473L, 473L, 473L, 473L, 474L, 474L,
475L, 476L, 477L, 478L, 479L, 480L, 481L, 482L, 483L, 484L, 485L,
486L, 487L, 488L, 489L, 490L, 490L, 490L, 490L, 490L, 491L, 492L,
492L, 493L, 494L, 495L, 496L, 497L, 497L, 497L, 498L, 499L, 500L,
501L, 502L, 502L, 503L, 504L, 504L, 505L, 507L, 507L, 507L, 508L,
509L, 510L, 510L, 511L, 512L, 513L, 514L, 515L, 516L, 516L, 517L,
517L, 517L, 517L, 518L, 519L, 519L, 519L, 519L, 520L, 521L, 522L,
523L, 524L, 525L, 526L, 527L, 528L, 529L, 530L, 531L, 532L, 533L,
534L, 534L, 534L, 534L, 536L, 537L, 537L, 538L, 538L, 539L, 540L,
545L, 546L, 546L, 2000L, 2001L, 2002L, 2003L, 2004L, 2005L, 2006L,
2006L, 2008L, 2009L, 2010L, 2012L, 2017L, 2021L, 2022L, 2023L,
3002L, 3041L, 3042L, 3043L, 3044L, 3046L, 3046L, 3047L, 3048L,
3049L, 3050L, 3051L, 3052L, 3053L, 3054L, 3055L, 3056L, 3057L,
3058L, 3059L, 3060L, 3061L, 3061L, 3061L, 3062L, 3063L, 3063L,
3064L, 3065L, 3066L, 3067L, 3068L, 3069L, 3069L, 3069L, 3070L,
3071L, 3072L, 3073L, 3074L, 3075L, 3076L, 3077L, 3077L, 3078L,
3079L, 3080L, 3081L, 3082L, 3083L, 3084L, 3085L, 3086L, 3086L,
3086L, 3087L, 3087L, 3088L, 3089L, 3090L, 3090L, 3091L, 3092L,
3093L), tot = c(39L, 33L, 27L, 62L, 3L, 39L, 19L, 8L, 0L, 2L,
1L, 3L, 4L, 19L, 31L, 30L, 29L, 46L, 24L, 19L, 39L, 33L, 31L,
33L, 29L, 43L, 35L, 31L, 3L, 23L, 0L, 26L, 35L, 47L, 45L, 47L,
51L, 47L, 31L, 30L, 25L, 45L, 28L, 23L, 39L, 0L, 2L, 3L, 39L,
33L, 31L, 37L, 55L, 23L, 23L, 38L, 39L, 0L, 12L, 6L, 5L, 39L,
0L, 31L, 31L, 23L, 23L, 31L, 0L, 1L, 2L, 4L, 5L, 7L, 9L, 10L,
11L, 13L, 31L, 23L, 20L, 23L, 27L, 40L, 27L, 33L, 55L, 1L, 7L,
8L, 10L, 11L, 13L, 20L, 24L, 27L, 28L, 29L, 30L, 32L, 33L, 34L,
35L, 37L, 38L, 39L, 41L, 43L, 44L, 45L, 46L, 31L, 7L, 3L, 39L,
23L, 39L, 31L, 35L, 27L, 27L, 24L, 31L, 27L, 35L, 24L, 0L, 57L,
27L, 23L, 31L, 19L, 25L, 19L, 36L, 0L, 9L, 12L, 13L, 15L, 16L,
17L, 18L, 19L, 20L, 23L, 24L, 25L, 26L, 27L, 31L, 59L, 27L, 43L,
47L, 19L, 29L, 20L, 55L, 3L, 24L, 35L, 27L, 35L, 55L, 37L, 23L,
31L, 36L, 32L, 33L, 37L, 29L, 27L, 41L, 36L, 31L, 31L, 23L, 39L,
29L, 17L, 43L, 27L, 31L, 2L, 0L, 33L, 39L, 27L, 26L, 31L, 39L,
51L, 35L, 31L, 27L, 25L, 5L, 36L, 27L, 39L, 23L, 31L, 39L, 0L,
27L, 24L, 31L, 27L, 36L, 37L, 24L, 27L, 19L, 29L, 23L, 23L, 35L,
27L, 35L, 2L, 1L, 0L, 42L, 41L, 3L, 28L, 39L, 35L, 34L, 21L,
19L, 36L, 27L, 23L, 43L, 23L, 27L, 11L, 10L, 6L, 3L, 36L, 55L,
39L, 43L, 57L, 49L, 27L, 31L, 33L, 34L, 39L, 35L, 27L, 19L, 30L,
43L, 4L, 6L, 7L, 9L, 12L, 16L, 18L, 19L, 20L, 23L, 24L, 26L,
27L, 28L, 29L, 31L, 32L, 33L, 47L, 42L, 39L, 36L, 39L, 31L, 0L,
29L, 43L, 39L, 35L, 27L, 31L, 31L, 32L, 31L, 13L, 26L, 0L, 1L,
31L, 32L, 4L, 30L, 25L, 27L, 31L, 27L, 15L, 42L, 42L, 31L, 0L,
40L, 39L, 34L, 3L, 13L, 37L, 23L, 19L, 35L, 23L, 29L, 24L, 42L,
31L, 23L, 22L, 23L, 23L, 16L, 23L, 19L, 55L, 54L, 39L, 31L, 47L,
31L, 27L, 31L, 27L, 39L, 38L, 27L, 47L, 27L, 24L, 27L, 39L, 32L,
28L, 35L, 51L, 23L, 52L, 51L, 42L, 39L, 31L, 31L, 31L, 59L, 39L,
35L, 3L, 23L, 32L, 36L, 39L, 35L, 39L, 31L, 49L, 50L, 23L, 39L,
47L, 25L, 39L, 35L, 34L, 59L, 57L, 37L, 31L, 41L, 42L, 0L, 2L,
4L, 8L, 9L, 10L, 12L, 13L, 15L, 16L, 27L, 28L, 29L, 30L, 31L,
32L, 47L, 3L, 37L, 47L, 59L, 31L, 31L, 27L, 25L, 39L, 35L, 44L,
39L, 39L, 36L, 41L, 37L, 59L, 0L, 20L, 58L, 55L, 53L, 49L, 48L,
34L, 35L, 25L, 47L, 59L, 57L, 0L, 30L, 36L, 52L, 43L, 30L, 0L,
29L, 45L, 1L, 36L, 47L, 0L, 29L, 24L, 36L, 51L, 1L, 33L, 27L,
27L, 53L, 35L, 19L, 31L, 59L, 55L, 53L, 0L, 39L, 61L, 62L, 25L,
47L, 39L, 39L, 51L, 51L, 57L, 39L, 39L, 59L, 39L, 41L, 36L, 39L,
43L, 50L, 31L, 3L, 0L, 1L, 29L, 39L, 40L, 3L, 39L, 31L, 30L,
27L, 38L, 1L, 30L, 15L, 27L, 39L, 36L, 12L, 42L, 49L, 24L, 35L,
39L, 36L, 39L, 43L, 33L, 36L, 25L, 24L, 19L, 19L, 27L, 19L, 13L,
21L, 19L, 18L, 21L, 0L, 23L, 23L, 19L, 23L, 23L, 18L, 24L, 19L,
19L, 21L, 2L, 0L, 23L, 20L, 0L, 23L, 27L, 19L, 27L, 24L, 25L,
4L, 24L, 23L, 19L, 24L, 27L, 31L, 33L, 24L, 23L, 33L, 0L, 24L,
21L, 23L, 25L, 30L, 19L, 25L, 25L, 0L, 23L, 36L, 0L, 22L, 0L,
31L, 0L, 31L, 27L, 28L)), .Names = c("id", "tot"), row.names = c(1L,
2771L, 5541L, 8310L, 8900L, 11079L, 13848L, 14584L, 14662L, 15922L,
15927L, 15930L, 15932L, 16617L, 19386L, 22155L, 23595L, 24924L,
27693L, 30462L, 33231L, 36000L, 38769L, 41538L, 44307L, 47076L,
49118L, 49137L, 49240L, 49845L, 51090L, 52614L, 55383L, 58152L,
60921L, 63690L, 66459L, 67442L, 67993L, 69228L, 71997L, 72489L,
74766L, 77535L, 80304L, 82888L, 82892L, 82944L, 83073L, 85842L,
88611L, 91380L, 94149L, 96918L, 99687L, 102456L, 105225L, 105469L,
105494L, 105518L, 106008L, 107994L, 108449L, 110763L, 113532L,
116301L, 119070L, 121839L, 123709L, 123712L, 123713L, 123714L,
123716L, 123718L, 123719L, 123720L, 123721L, 123723L, 124608L,
127377L, 130146L, 132915L, 135684L, 138453L, 141222L, 143991L,
146760L, 148737L, 148738L, 148739L, 148740L, 148741L, 148742L,
148743L, 148744L, 148745L, 148746L, 148747L, 148748L, 148750L,
148751L, 148755L, 148756L, 148758L, 148764L, 148766L, 148767L,
148769L, 148770L, 148771L, 148777L, 149529L, 150081L, 150523L,
152298L, 155067L, 157836L, 158393L, 160605L, 160870L, 163374L,
166143L, 168912L, 171681L, 174450L, 177219L, 178808L, 179988L,
182757L, 185526L, 188295L, 191064L, 193833L, 196602L, 199371L,
201344L, 201345L, 201346L, 201347L, 201348L, 201350L, 201351L,
201352L, 201353L, 201354L, 201355L, 201357L, 201362L, 201363L,
201364L, 202140L, 204909L, 207678L, 210447L, 213216L, 215985L,
218754L, 221523L, 224292L, 227061L, 229830L, 232599L, 235368L,
238137L, 240906L, 243675L, 246444L, 249213L, 251982L, 254751L,
254889L, 257520L, 260289L, 263058L, 265827L, 268596L, 271365L,
274134L, 276903L, 279672L, 282441L, 282723L, 285210L, 287979L,
290748L, 292142L, 292162L, 293517L, 296286L, 299055L, 299778L,
301824L, 304593L, 307362L, 310131L, 310666L, 310683L, 311114L,
311125L, 312724L, 312900L, 315669L, 318438L, 321207L, 323976L,
324571L, 326745L, 329514L, 332283L, 335052L, 337821L, 340590L,
343359L, 346128L, 348897L, 351666L, 354435L, 357204L, 359973L,
362742L, 365511L, 366294L, 366840L, 368091L, 368280L, 368329L,
371049L, 373818L, 376587L, 376882L, 376953L, 376954L, 376955L,
379356L, 382125L, 384894L, 387663L, 390432L, 393201L, 393736L,
393758L, 393759L, 393760L, 395970L, 398739L, 401508L, 404277L,
407046L, 409815L, 412584L, 415353L, 418122L, 420891L, 423660L,
426429L, 429198L, 431967L, 434736L, 437505L, 437967L, 437968L,
437969L, 437971L, 437974L, 437975L, 437976L, 437977L, 437978L,
437979L, 437981L, 437982L, 437983L, 437986L, 437987L, 437989L,
437990L, 437995L, 440274L, 443043L, 445812L, 448581L, 451350L,
454119L, 456492L, 456888L, 459657L, 462426L, 465195L, 467964L,
470733L, 473502L, 476271L, 476569L, 478690L, 478752L, 478754L,
478761L, 479040L, 481809L, 484402L, 484578L, 487347L, 490116L,
492885L, 495654L, 498423L, 501192L, 503961L, 506730L, 507049L,
509499L, 511457L, 512268L, 515037L, 516020L, 516104L, 517806L,
520575L, 523344L, 526113L, 528882L, 531651L, 534420L, 537189L,
539958L, 542151L, 542727L, 545496L, 546211L, 548265L, 551034L,
553803L, 555911L, 556572L, 559340L, 562108L, 564876L, 567644L,
570412L, 572885L, 573180L, 573244L, 575948L, 578716L, 581484L,
584252L, 587020L, 589788L, 592556L, 593090L, 595324L, 598092L,
600860L, 603628L, 605895L, 606396L, 609164L, 611932L, 614700L,
617468L, 620236L, 623004L, 625772L, 628540L, 629075L, 629097L,
629098L, 629099L, 631308L, 634076L, 636844L, 639612L, 641803L,
642380L, 645148L, 647916L, 650684L, 653452L, 656220L, 658988L,
661756L, 664524L, 667292L, 670060L, 672828L, 675596L, 677254L,
677356L, 677357L, 677359L, 677360L, 677361L, 677363L, 677366L,
677368L, 677370L, 677377L, 677379L, 677380L, 677382L, 677384L,
677388L, 678364L, 679071L, 681132L, 683900L, 686668L, 689436L,
692204L, 694972L, 697740L, 700508L, 703276L, 706044L, 708812L,
711580L, 714348L, 717116L, 719884L, 722652L, 723762L, 723763L,
723764L, 723765L, 725420L, 728188L, 730486L, 730956L, 733724L,
736492L, 739260L, 742028L, 743160L, 743168L, 744796L, 747564L,
750332L, 753100L, 755868L, 757169L, 758636L, 761404L, 763351L,
764172L, 766940L, 767412L, 767413L, 769708L, 772476L, 775244L,
776780L, 778012L, 780780L, 783548L, 786316L, 789084L, 791852L,
792404L, 794620L, 796475L, 796567L, 796568L, 797388L, 800156L,
801270L, 802011L, 802197L, 802924L, 805692L, 808460L, 811228L,
813996L, 816764L, 819532L, 822300L, 825068L, 827836L, 830604L,
833372L, 836140L, 838908L, 841676L, 841944L, 841945L, 841951L,
844444L, 847212L, 849183L, 849980L, 850708L, 850726L, 853494L,
856262L, 859030L, 859697L, 861798L, 864566L, 867334L, 870102L,
872870L, 875638L, 878406L, 878460L, 881174L, 883942L, 886710L,
889478L, 892246L, 895014L, 897782L, 900550L, 903318L, 906086L,
908854L, 911622L, 914390L, 917158L, 919570L, 919926L, 922694L,
925462L, 928230L, 930998L, 933766L, 936534L, 939302L, 942069L,
944837L, 947605L, 950373L, 953141L, 955909L, 958677L, 961260L,
961261L, 961444L, 964212L, 966218L, 966980L, 969748L, 972516L,
975284L, 978052L, 980820L, 983560L, 983561L, 983588L, 986356L,
989124L, 991892L, 994660L, 997428L, 1000196L, 1002964L, 1002994L,
1005732L, 1008500L, 1011268L, 1014036L, 1016804L, 1019572L, 1022340L,
1025108L, 1027876L, 1028021L, 1028286L, 1030644L, 1032321L, 1033412L,
1036180L, 1038948L, 1041591L, 1041716L, 1044484L, 1047252L), class = "data.frame")
We can use data.table. Convert the 'data.frame' to 'data.table' (setDT(tot_docks)), grouped by 'id', get the index of the max value of 'tot' and subset the dataset (.SD)
library(data.table)
setDT(tot_docks)[, .SD[which.max(tot)], id]
Or using top_n from dplyr
library(dplyr)
tot_docks %>%
group_by(id) %>%
top_n(1)
# id tot
# (int) (int)
#1 72 39
#2 79 33
#3 82 27
#4 83 62
#5 116 39
#6 119 19
#7 120 19
#8 127 31
#9 128 30
#10 137 46
#.. ... ...
You can use dplyr arrange to do this:
library(dplyr)
df %>% arrange(id, desc(tot)) %>% group_by(id) %>% slice(1)
For your data, you will get output as follows:
Source: local data frame [380 x 2]
Groups: id [380]
id tot
(int) (int)
1 72 39
2 79 33
3 82 27
4 83 62
5 116 39
6 119 19
7 120 19
8 127 31
9 128 30
10 137 46

Resources