R keep rows based on list - r

I'm trying to figure out how to keep rows of my dataframe based on a list I created from another dataframe. I've looked at several other questions that tackled this issue and tried the codes suggested ( R- keep dataframe rows only if the row name is in a list , Select rows in a dataframe in r based on values in one row ) but the codes aren't working for me. I'm sure it's a simple error on my part, but I can't figure out what it is.
One code I tried:
A <- AllSiteBA[AllSiteBA$Cofecha %in% keeps,]
Output (doesn't have any matches......there SHOULD be 76 matches when I run the entire AllSiteBA data.frame):
> A
[1] Cofecha DBHinBark RBHinBark BAtotal MeanBAww PercentBAww
[7] Plot
<0 rows> (or 0-length row.names)
Another code I tried:
A <- subset(AllSiteBA, Cofecha %in% keeps)
Had exact same output as the first code I tried.
Any help on how to fix the code would be greatly appreciated. Below are reproducible sections of my data.frame and list.
List of names to "keep":
keeps <- list(structure(c(69L, 166L, 50L, 232L, 252L, 234L, 148L, 307L,
194L, 240L, 245L, 297L, 248L, 221L, 257L, 218L, 265L, 45L, 208L,
216L, 223L, 258L, 205L, 269L, 270L, 142L, 4L, 58L, 207L, 220L,
231L, 256L, 206L, 268L, 203L, 267L, 111L, 144L, 290L, 74L, 36L,
266L, 169L, 78L, 143L, 186L, 272L, 185L, 312L, 73L, 150L, 129L,
225L, 131L, 123L, 12L, 300L, 302L, 264L, 284L, 128L, 130L, 1L,
3L, 10L, 124L, 303L, 56L, 51L, 55L, 108L, 2L, 11L, 298L, 310L,
121L), .Label = c("LB1A002", "LB1A003", "LB1A101", "LB1A102",
"LB1A103", "LB1A212", "LB1A228", "LB1A231", "LB1A233", "LB1B001",
"LB1B002", "LB1B003", "LB1B210", "LB1B216", "LB2A001", "LB2A002",
"LB2A003", "LB2A004", "LB2A008", "LB2A009", "LB2A011", "LB2B001",
"LB2B005", "LB2B008", "LB2B101", "LB2B102", "LB2B103", "LB2C003",
"LB2C004", "LB2C008", "LB2C009", "LB2C010", "LB2C001", "LB2D005",
"LB2D006", "LB2D007", "LB2D008", "LB2D009", "LB2D010", "LB2D101",
"SM1A005", "SM1A101", "SM1A301", "SM1A302", "SM1B003", "SM1C005",
"SM1C302", "SM1D006", "SM2A004", "SM2A005", "SM2A007", "SM2A210",
"SM2A301", "SM2B001", "SM2B005", "SM2B006", "SM2B101", "SM2C005",
"SM2C101", "SM2C301", "SM2D006", "SM2D101", "SM2D221", "IR1A004",
"IR1A009", "IR1A206", "IR1B001", "IR1B004", "IR1B005", "IR1B301",
"IR1B302", "IR1C005", "IR1C006", "IR1C007", "IR1C008", "IR1C204",
"IR1C205", "IR1D002", "IR1D101", "IR2A003", "IR2A101", "IR2A211",
"IR2A234", "IR2B002", "IR2B005", "IR2B101", "IR2B201", "IR2B210",
"IR2B229", "IR2C230", "IR2C256", "IR2C301", "IR2C302", "IR2C002",
"IR2C009", "IR2C101", "IR2C204", "IR2C215", "IR2D227", "IR2D228",
"IR2D237", "IR2D254", "IR2D301", "IR2D302", "IR2D003", "IR2D006",
"IR2D009", "IR2D011", "IR2D207", "IR2D216", "JA1A101", "JA1A224",
"JA1A301", "JA1B004", "JA1B101", "JA1B102", "JA1B219", "JA1B233",
"JA1C002", "JA1C232", "JA1D001", "JA1D101", "JA2A101", "JA2A102",
"JA2A206", "JA2A209", "JA2A210", "JA2A004", "JA2A005", "JA2A006",
"JA2A007", "JA2A008", "JA2B005", "JA2B206", "JA2C001", "JA2C002",
"JA2C007", "JA2C101", "JA2C202", "JA3N007", "JA3N008", "JA3N009",
"JA3N010", "JA3N011", "JA3N012", "JA3N001", "JA3N002", "JA3N003",
"JA3N004", "JA3N005", "JA3N006", "SF5A007", "SF5B223", "SF5B227",
"SF5B228", "SF5B301", "SF5B302", "SF5C201", "SF5C214", "SF5C216",
"SF5C301", "SF5C303", "SF5D004", "SF5D101", "SF5D207", "AP1A001",
"AP1A004", "AP1A005", "AP1A006", "AP1A008", "AP1A009", "AP1A010",
"AP1A101", "AP1B005", "AP1B007", "AP1B011", "AP1B101", "AP1B102",
"AP1C006", "AP1C007", "AP1C010", "AP1C011", "AP1C001", "AP1C002",
"AP1D001", "AP1D005", "AP1D007", "AP1D008", "AP1D009", "AP1D010",
"AP1D011", "AP1D012", "AP1D013", "AP1D101", "AP1D102", "AP1D103",
"AP1D104", "AP1C004", "AP1C005", "AP2A001", "AP2A002", "AP2A003",
"AP2B001", "AP2B003", "AP2B004", "AP2B101", "AP2B102", "AP2C001",
"AP2C002", "AP2C003", "AP2C004", "AP2C005", "AP2C007", "AP2C008",
"AP2C102", "AP2C103", "AP2C104", "AP2D001", "AP2D002", "AP2D005",
"AP2D006", "AP2D009", "AP2D101", "AP2D102", "AP2D103", "AP3A003",
"AP3A005", "AP3A008", "AP3A014", "AP3A015", "AP3A101", "AP3A102",
"AP3B101", "AP3B102", "AP3B103", "AP3B104", "AP3B003", "AP3B007",
"AP3B010", "AP3B012", "AP3C003", "AP3C004", "AP3C006", "AP3C007",
"AP3C009", "AP3C011", "AP3C101", "AP3C102", "AP3C103", "AP3C104",
"AP3C105", "AP3D006", "AP3D011", "AP3D101", "AP3D102", "BF1A101",
"BF1A102", "BF1A103", "BF1A104", "BF1B003", "BF1B005", "BF1B006",
"BF1B007", "BF1B101", "BF1C007", "BF1C101", "BF1C102", "BF1D003",
"BF1D007", "BF1D010", "BF1D101", "BF1D102", "BF1D103", "BF1D210",
"BF2A001", "BF2A002", "BF2B001", "BF2B214", "BF2B219", "BF2C001",
"BF2C004", "BF2C008", "BF2C101", "BF2C102", "BF2C201", "BF2C205",
"BF2C213", "BF2C219", "BF2C301", "BF2D004", "BF2D013", "BF2D014",
"BF2D015", "BF3A001", "BF3A002", "BF3A004", "BF3A005", "BF3A007",
"BF3A008", "BF3A009", "BF3A101", "BF3B003", "BF3B101", "BF3C002",
"BF3C003", "BF3C007", "BF3C009", "BF3C010", "BF3D002", "BF3D003",
"BF3D004", "BF3D009", "BF3D010"), class = "factor"))
Small portion of data.frame I'm trying to select rows from:
AllSiteBA <- structure(list(Cofecha = structure(30:45, .Label = c("LB1A002",
"LB1A003", "LB1A101", "LB1A102", "LB1A103", "LB1A212", "LB1A228",
"LB1A231", "LB1A233", "LB1B001", "LB1B002", "LB1B003", "LB1B210",
"LB1B216", "LB2A001", "LB2A002", "LB2A003", "LB2A004", "LB2A008",
"LB2A009", "LB2A011", "LB2B001", "LB2B005", "LB2B008", "LB2B101",
"LB2B102", "LB2B103", "LB2C001", "LB2C003", "LB2C004", "LB2C008",
"LB2C009", "LB2C010", "LB2D005", "LB2D006", "LB2D007", "LB2D008",
"LB2D009", "LB2D010", "LB2D101", "SM1A005", "SM1A101", "SM1A301",
"SM1A302", "SM1B003", "SM1C005", "SM1C302", "SM1D006", "SM2A004",
"SM2A005", "SM2A007", "SM2A210", "SM2A301", "SM2B001", "SM2B005",
"SM2B006", "SM2B101", "SM2C005", "SM2C101", "SM2C301", "SM2D006",
"SM2D101", "SM2D221", "IR1A004", "IR1A009", "IR1A206", "IR1B001",
"IR1B004", "IR1B005", "IR1B301", "IR1B302", "IR1C005", "IR1C006",
"IR1C007", "IR1C008", "IR1C204", "IR1C205", "IR1D002", "IR1D101",
"IR2A003", "IR2A101", "IR2A211", "IR2A234", "IR2B002", "IR2B005",
"IR2B101", "IR2B201", "IR2B210", "IR2B229", "IR2C002", "IR2C009",
"IR2C101", "IR2C204", "IR2C215", "IR2C230", "IR2C256", "IR2C301",
"IR2C302", "IR2D003", "IR2D006", "IR2D009", "IR2D011", "IR2D207",
"IR2D216", "IR2D227", "IR2D228", "IR2D237", "IR2D254", "IR2D301",
"IR2D302", "JA1A101", "JA1A224", "JA1A301", "JA1B004", "JA1B101",
"JA1B102", "JA1B219", "JA1B233", "JA1C002", "JA1C232", "JA1D001",
"JA1D101", "JA2A004", "JA2A005", "JA2A006", "JA2A007", "JA2A008",
"JA2A101", "JA2A102", "JA2A206", "JA2A209", "JA2A210", "JA2B005",
"JA2B206", "JA2C001", "JA2C002", "JA2C007", "JA2C101", "JA2C202",
"JA3N001", "JA3N002", "JA3N003", "JA3N004", "JA3N005", "JA3N006",
"JA3N007", "JA3N008", "JA3N009", "JA3N010", "JA3N011", "JA3N012",
"SF5A007", "SF5B223", "SF5B227", "SF5B228", "SF5B301", "SF5B302",
"SF5C201", "SF5C214", "SF5C216", "SF5C301", "SF5C303", "SF5D004",
"SF5D101", "SF5D207", "AP1A001", "AP1A004", "AP1A005", "AP1A006",
"AP1A008", "AP1A009", "AP1A010", "AP1A101", "AP1B005", "AP1B007",
"AP1B011", "AP1B101", "AP1B102", "AP1C001", "AP1C002", "AP1C004",
"AP1C005", "AP1C006", "AP1C007", "AP1C010", "AP1C011", "AP1D001",
"AP1D005", "AP1D007", "AP1D008", "AP1D009", "AP1D010", "AP1D011",
"AP1D012", "AP1D013", "AP1D101", "AP1D102", "AP1D103", "AP1D104",
"AP2A001", "AP2A002", "AP2A003", "AP2B001", "AP2B003", "AP2B004",
"AP2B101", "AP2B102", "AP2C001", "AP2C002", "AP2C003", "AP2C004",
"AP2C005", "AP2C007", "AP2C008", "AP2C102", "AP2C103", "AP2C104",
"AP2D001", "AP2D002", "AP2D005", "AP2D006", "AP2D009", "AP2D101",
"AP2D102", "AP2D103", "AP3A003", "AP3A005", "AP3A008", "AP3A014",
"AP3A015", "AP3A101", "AP3A102", "AP3B003", "AP3B007", "AP3B010",
"AP3B012", "AP3B101", "AP3B102", "AP3B103", "AP3B104", "AP3C003",
"AP3C004", "AP3C006", "AP3C007", "AP3C009", "AP3C011", "AP3C101",
"AP3C102", "AP3C103", "AP3C104", "AP3C105", "AP3D006", "AP3D011",
"AP3D101", "AP3D102", "BF1A101", "BF1A102", "BF1A103", "BF1A104",
"BF1B003", "BF1B005", "BF1B006", "BF1B007", "BF1B101", "BF1C007",
"BF1C101", "BF1C102", "BF1D003", "BF1D007", "BF1D010", "BF1D101",
"BF1D102", "BF1D103", "BF1D210", "BF2A001", "BF2A002", "BF2B001",
"BF2B214", "BF2B219", "BF2C001", "BF2C004", "BF2C008", "BF2C101",
"BF2C102", "BF2C201", "BF2C205", "BF2C213", "BF2C219", "BF2C301",
"BF2D004", "BF2D013", "BF2D014", "BF2D015", "BF3A001", "BF3A002",
"BF3A004", "BF3A005", "BF3A007", "BF3A008", "BF3A009", "BF3A101",
"BF3B003", "BF3B101", "BF3C002", "BF3C003", "BF3C007", "BF3C009",
"BF3C010", "BF3D002", "BF3D003", "BF3D004", "BF3D009", "BF3D010"
), class = "factor"), DBHinBark = c(144, 147.6, 135.9, 144, 163.8,
119.7, 234.9, 180.9, 144.9, 202.5, 152.1, 180, 184.5, 68.4, 88.2,
231.3), RBHinBark = c(72, 73.8, 67.95, 72, 81.9, 59.85, 117.45,
90.45, 72.45, 101.25, 76.05, 90, 92.25, 34.2, 44.1, 115.65),
BAtotal = c(16286.0163162095, 17110.4958922176, 14505.3694541364,
16286.0163162095, 21072.5782991454, 11253.2555709933, 43336.7077139261,
25702.0056715304, 16490.2276926745, 32206.2334378166, 18169.7231252836,
25446.9004940773, 26735.14983159, 3674.53243134477, 6109.80080862797,
42018.5582683328), MeanBAww = c(7287.19846816407, 3511.25221054135,
5836.77552643544, 3226.29613334421, 6580.83174422834, 1240.15336040198,
15513.5106521598, 4648.51222574233, 4555.16310970877, 4909.94773909597,
1791.58819676346, 6095.75422479859, 2391.72518367973, 3568.38437887589,
3398.70860742085, 7723.82631584503), PercentBAww = c(44.7451256751543,
20.5210429473197, 40.2387236318961, 19.8102228973765, 31.2293619262301,
11.0203962984599, 35.7976216249915, 18.0861847326233, 27.6234094192182,
15.2453336357263, 9.86029442721902, 23.9548004135802, 8.9459950617284,
97.1112500854964, 55.62715895126, 18.3819403476917), Plot = c("LB2",
"LB2", "LB2", "LB2", "LB2", "LB2", "LB2", "LB2", "LB2", "LB2",
"LB2", "SM1", "SM1", "SM1", "SM1", "SM1")), .Names = c("Cofecha",
"DBHinBark", "RBHinBark", "BAtotal", "MeanBAww", "PercentBAww",
"Plot"), row.names = 30:45, class = "data.frame")

I get two rows, but only after I subset the first element of keeps, which is a list.
> AllSiteBA[AllSiteBA$Cofecha %in% keeps[[1]],]
Cofecha DBHinBark RBHinBark BAtotal MeanBAww PercentBAww Plot
36 LB2D007 234.9 117.45 43336.71 15513.511 35.79762 LB2
45 SM1B003 231.3 115.65 42018.56 7723.826 18.38194 SM1

Related

data frame sorting using column and row characters to match taxa annotations to different samples

I am looking to sort my taxonomic annotations (16S dataset) based off of columns 'sample', 'Family', and 'taxa'. Briefly, there are two sample types; m3ofair and m3NC, and I wish to know the unique ASV identifier in column 'taxa' if the annotation in 'Family' are identical across both sample types.
For example, the annotation 'f__Beijerinckiaceae' in column 'Family' is present in both samples, m3ofair and m3NC. Since this is the case, I would like to know the ASV taxa identifier for all three annotations of 'f__Beijerinckiaceae', which are 2ca928ad9749bb9726c35d6528fefec1, bcc8b318ede81a8b211e7bdd1531baec, and e31f3d32519dc9021ff790f87d76114c.
Thank you in advance!
> dput(example)
structure(list(taxa = structure(c(203L, 150L, 150L, 202L, 175L
), .Label = c("8c54f0af4445cdb1a6a36304a80b5a6d", "e6ff53b2675c61cf25ad3d79917b318e",
"38bbf752453c1c64f2e61966016d45c5", "903bcbbcb2ac130bb32d847ca7d191ed",
"7fc0ccbd86b09190f44369a934b3456c", "03c095b4239af5a7f63d39522b34186b",
"513c1bcb314fbf047f366f84a646c4a2", "0d3dca27868f7cfabbdbe1eb90657340",
"4b36fd18a35f063166412a83379bf797", "a6a6601f3a675697e92bc24d32042929",
"c46477febff8e68314f5baa81f4c082f", "f8753b9f13abb306b57aedce2cf6e7d0",
"230363069727610f8f1546cb314f2d1a", "100f4d61f5eaf8fb7369d9583b32ba46",
"69a70c343722f6851fc2191d4b3c3a1b", "0f222d9f6f748209e5d39cbbbb704c3e",
"06b8e39b562a2b31643b4f2c00d5dec7", "51141f001e298a5e44d99f7d7bdb78e2",
"56c98126dde3abb2d263088db55e12c8", "d8adf2a20249cefe6c627f9c17abb202",
"4e3acf7a1c4dddbc3f68097207cacb66", "fadd6b74d7cc847716a19679be001724",
"9815d07f1b564c9be5aab03db01608da", "bdd1f036611254ef872b334f7da681e6",
"028ecb2ed02be346c5f0347a9a3fba2a", "0310f41e594c49368dde5c5993a7a5d0",
"f510252f2eef402050f8169436cb4501", "3cb9f6500275fabe7c87a599a8c31749",
"f148144695ed0a63b976ea05243aba8c", "9ed070c666e12720a6bb6d3df1b09e45",
"e54bcf1c8ec24811e7c355b8d82fa59e", "c6608772f41c7d2202f2504190f2af8a",
"78732fda4b5a16d2bdddcd553953752c", "46271ec11fc9f127649526834a9fd805",
"dd0eaf58cb0b5378f465b4f59adf79d2", "585ab9ce79a8c90d675a66b9678b2137",
"4ef5dabd754e48c26c66c55b3615e69f", "bc3043f80a5fe9e5faa189fa7b050f61",
"f570f293e6b304776e5dce910a0b96e8", "432f8ae2390df378d7bab37a0026ff03",
"0d959da770b454f90e53757f8ab0893f", "0bc8e7f6bf04c1e745749edeb6d6628d",
"c78a82f200050febc553de67fffef578", "da36821195d462457b39c41aeb611ba1",
"2de266b1ffa8e9ec6383c98096caf8f0", "3845a467d4d557e0f035d045d0d86f70",
"630afd8b62db24d65410329610383a4f", "bfbf3b84b315e741c4345ebb43e2b9cc",
"c85df4c87e8960b573d8ae4bf7f65e15", "6c46511a89ec2a070fef341371ad02b7",
"6307431a0a1a9d90045dc5dccbfc5fe9", "0d1bf49752cd0c9211d28bc8577a9145",
"11630fb56d652f032964d7567e083d40", "04caec30dc35c55d62d69e6125cec2c1",
"854aeab59d32bfddbea09ed0571ca007", "f6436e7085d15342388e67b55d692fb6",
"ecbf086d6ccbe5e8c2a69d0afb144662", "da247df6fe91b74bf38d3ecc1c05b4b0",
"d4c4725af861d3daa18bddefb558cf50", "c06b07ff323a7b0bccf5d0e604b0afab",
"ec8bf881537838f56323ada7de070b19", "1d6f8731081df8944cd50ae875fb9b28",
"8101da5193e402ec2741419cd0515110", "381f3c0f833ce81483700d6dc0b940ef",
"d0484307102691e2482cb79f8784827c", "d0c9e10ee52d3c93502663df3328e2c6",
"2f09559bc1ffcc3ea06ae2766194920b", "3bb215c036a5b86e071708ce88665689",
"d8a6777829870b41d1544fb173608685", "10a4a0714d1b0063c37636f2ec22bbae",
"5740f0ff513cc2099d6beacec9336f45", "763f0806ad9068d1f4092259a970c925",
"50e96bbd1a8267119529843a1acb43a5", "500d6b485cef4c52aad211d6d6e6dbc7",
"4ea5683e2c2a183c11c45dc8fbd0c67c", "7e598ad34909a3fb8ff1623f1ede11a6",
"8659c05e82a2d1399cbc3fd26ab938e4", "9c886b1c9e3e2ec63e0dbbd085996334",
"8da02c7114ba0d9ee40312926e037a8f", "5cb28bd0b2eea6847a6328712217c141",
"a049763053c277b16c2a318f41eb23b4", "0ce84d495f51f4e5997076bddebe65a5",
"576fd9e4a384474af1d78a589118b213", "e74212c56f84c309729e86ad3b126716",
"095249b545f554a6a66a4e378eed9c23", "12f69cdd5a43e9c04fa76fe778493dbc",
"6eaaffd8bd37b95220a3bebdf503be32", "78d6bb358b0ec0c94a8caa7c7a28171c",
"57799c0748039c006705cb9c46ace954", "b335608a8c2309ce02373311b7737b55",
"ff4bc2548279113788b4bf164d2db0d0", "254ba9ed4527552e061da0653ae95392",
"2480590c44af81ddbc2f34d381864702", "e4b865c957d0d23766a0e8e87f6d153d",
"f98cc8f203873fd4078c589a6332e72e", "4eb88a4d445e0987c56d0197301bd4e7",
"c5f7ff7e9d319f880b2c41c4ce6d52de", "15e687c6e5701e1b3b10fc0223d96c59",
"5c3138b23334b1d6b4bf3943db5eae74", "e22be263cc3a006762b66711df3d6a23",
"e209a5a9add3e7a68f273a2f17942555", "b19efe6ae9dabd64b44e12f4f1b88643",
"ea0c2938cf670cfcec8bd497e1767ae8", "ea794ad075da6b9b427ecf13244a6e97",
"414cb56f046e21beb8cf72b9285ab2bd", "6f318147a66f5851564f04aba9092057",
"32b53bb775ee918b4b75ee4c87ecdb55", "f467d11f3c1a16ea55e2ab7ab88a85c2",
"dfb69af34af0dcc91f9596d47ea6882b", "db4c51d23607a40db2843c49a38ee271",
"02047d2e2da6d0e73846b63774619e77", "cb2fe0146e2fbcb101050edb996a0ee2",
"4b88b52dd068a7d432309dec77813d29", "d2da0518d0f3ac5dd8d7df770389fee5",
"5f86daf7f135fee1745dba9e874fe013", "9a2740e3bc27419a895dfb6e0b986854",
"3a2e70f61b311dc0cd2261732a9cb627", "313402c6104b2605890eda8d9fa89fba",
"e33cca4021036f12a70132aed9f9c52d", "d29fe3c70564fc0f69f2c03e0d1e5561",
"eb5f185c6ab0e5f7953654a2a5d12ce7", "89fa7b79c6b2c5019ca0d1bc2f509f0b",
"fcd4f95c05b868060121ff709085bf21", "7c14b01c89686b9e327fc7d3dee3af30",
"0161e22acf37955da1e4b8bafe9f54e6", "ac70e87927f15bf812e8cad1cd9003cb",
"bfbed36e63b69fec4627424163d20118", "3f0ecac3d878543cf67dae94e6309453",
"0b607a251255e7aa0ef0c4e242b3e87c", "3d28edbfcb341fbf3d2fd87056633569",
"fa2102f32f8f05f458f0833df8c78500", "3b588d7e9f41b7e5ea9539d52ccda834",
"646eeb6a205282fe7afb5d73d1af30be", "a3e8652354170f4ddc4b96f65ff42aaf",
"09b2620539f4861b9b2e6d271d4bb319", "a301810850607af459b6de0b8a6dfcfb",
"ee2babc92e666c2593d83df7ad086362", "1054403160f296f2f44fbdf81f31b32c",
"b62d8ace7310a137d1ff25156b20b881", "4043cc4801dcf236434002ac21298d76",
"feb46b2df4fc3958ccd80e768b79502b", "c2dfc10913176eda134cd88ed68e0a72",
"668584d4ca79707c62229c2b9a520634", "b48eec9298fe8ed0fdf2e968ee6cc4af",
"a75e4ea5f1add4aa39f660dd13084fd0", "2a8bd6428138528c12a10c06b52954db",
"e6922be77906565e6bae2a89da15d175", "e21e4030f895965d1846404a94ebcb11",
"d03ede724dcc3efd39db449fd5bced6e", "6639f71cd3265109c47115f4a5913958",
"8c08537b3e07c84c47eb29e4de593a02", "8c1456c47986f85320cdfe1c515ffb4c",
"3e00fb174071bfc76c017d666f43af65", "1f70bd0eeef30f2afcd750b70f7e7066",
"c1efd1322ab8cef2a84ace9e85e37467", "b18b389447d758e7cb173b4b4f2ad960",
"9af604229897c70de75a7740fe13e80d", "c9a7a8206f0d8eda7576e0bb44b058a7",
"ae65f0c53471069e8a19b8d38ffcd3e7", "b19b302de47cc21f8f4cf1464a10b2b3",
"8b9a8bca0a4a02cde893f0bf550e4b68", "2492e30becb2716ccd9557e27900a56d",
"997d9c1623cbaab34cdb0668950d5a08", "1c77f5f31494131fcbb21fd9f52c7618",
"bfbd7ffe99a84ab5d92dc545d1a5a3c7", "e91fee71ae5261d22530ee9afa22dae5",
"59988344137e8b4b1c56e5f886bc294d", "3a4d3ac6f2aeecdba4936b18e28d04e5",
"e48dc10d2f3df1f6795902f9d53625ee", "9259758b55aa71398e2cafd0db077fcd",
"61f0d618a9ccef240bf7f6b65ac3cd64", "955d8fcbb35e8bc04c8df0c6aae34d34",
"35a7084609b22c35a5162738624ef6d0", "bc2942a64ee2fa191613f41f7e0134df",
"e31f3d32519dc9021ff790f87d76114c", "594a57ff90b7936ce2cd8036ad681cef",
"a64788b07ff4d444a278dbdd45b9cda4", "ef1e892b6e84a8b51d80eb7d6037167f",
"ce036ee8825f6a2f95a01b9488144638", "5a4979556bf389cd5ba711b146417234",
"ac809fd715cced98911f73f1dfb1ffb9", "e206fe2391a57543027d38d4ab4355e5",
"780d57e7f37aba0e43ea58166978f258", "9995306be33bb6a067fbd14fd12ab247",
"d867858df4524fe62a46ed814274aaa4", "12c0941446564e8d3cb6c3fb07bebed8",
"d174fd25211450a7f7b6dbae6db61e2c", "491e6acf93836613376f546052749de2",
"be94715fa59eebb39dae561eee3e2f3a", "21bc9a5af6203656e6d1066a13121fb0",
"0b925631436d3775231b60f9d0011cae", "47c2b089e44e1baab72de03674f2526e",
"c4d8bbf6d24dd9c56321087cc3936763", "147f2c2c24c7e1b1c371d42a36816214",
"7c999fc012772119e945f6e87a941e92", "3423114aff9da85e4989c2c268769033",
"82eab43c427e6afb5bf1fd10be00cc4b", "f637087afddcd520f0691150409f4d26",
"54edafdb5df25d839fa4696a3d8a08d9", "bfa4fe32a85423441482e307c7ed5e65",
"34d0e4410278fca5bbc2eac3de76feac", "bcc8b318ede81a8b211e7bdd1531baec",
"2ca928ad9749bb9726c35d6528fefec1", "959a178855242ad3833af85d38ccff45",
"3830e37a7ac6cf83a20dc9b8f225bb69", "9205f49b7a5898bf9dec93aa53ff96ec",
"bf2b830db0f89f72b4bf68394b70fbf5", "cba5161b7eb1f07a14ca874577e69cf3",
"179e215ab39a0a69ca15100615efae28", "beb5157c4f4ffcb17a5aa73fb6efd009",
"38401e17915bca5aab62c763930d1cfb", "1fc7138b0d9568c4588eb34dabc430d3",
"d2c5f354da4c7a6e78626761b1801770", "68089dfb5a592ef9b73c307c746a940e",
"f8e8cca8fe936d9be35ed4fcf790d35b", "e68f3ead96583e125709c8a5ecf20a63",
"06b48070c8d8e4ad997399e958871cea", "aff65597908099c9ee322aff6bec0a68",
"c802baf89ae3088ec5027d645223e3d1", "59afa99ca520b0db55db192cf8be4edb",
"e131d6b9e4f997788460a450a69c3a99", "1b448e00c078f81e49a61933072345d2",
"0f48376fe94d49cef9a1ee1b46af7a4a", "79e9e337b10e2d298bb1b3bde946782d",
"1868b3eae7d694c939f6ee777c98ee82", "909755d7142ea53c7f61c97c3586f26c",
"c1bee78e8c05c7d1abd7122bff925e54", "3cdd97ba69a504117de24626ba790e96",
"bfd74cfd3e6c2b27cffc14334ee55878", "e2603d057e6cc94b2b3c774e5b24c09c",
"a7b2fd6bf0b1c028de90c11becdc1b39", "8bf361e85f0f9dbff5b9d6951fdae8c6",
"0c7b675ddf75b3a6ad648b004352b8c1", "f3ed29d9b4cd4e1ea7861fbe19314125",
"4aafbcb8ea0c9be6130d630345116249", "1d926e22515426e147f62e2d27fb25d5",
"6357497de4b956c00e393b9cbebaf228", "c8c6649cf27d3428a978248cb233b4ea",
"6013a4618dd16c1f100b084d722a817d", "fd188dccbbfa7e8d7aa3b6abe7fe59c4",
"c04b4c2c12d6408b4e741b3b91e91354", "4a43d5d77936e5dc6536931101c8d814",
"55982fe768843d5432c485e19f3ca7ae", "498128a80a796620b238d8ad5cd1d2f9",
"8c2d1b95d59fa2a627dd63d7a36c7483", "2ea78aa082eeffffc40ad0ac7e84dbb8",
"b8b381bb5c8bfaf50f1916a102161d01", "fe6a40c9234f9a57e1db10d0d87e3fa0",
"71727d41e7a3452a196977be7f013cb0"), class = "factor"), sample = c("m3ofair",
"m3NC", "m3ofair", "m3NC", "m3NC"), value = c(0.00110121133246571,
0, 0.00200220242266493, 0.00943847890021202, 0), Kingdom = structure(c(2L,
2L, 2L, 2L, 2L), .Label = c("d__Archaea", "d__Bacteria"), class = "factor"),
Phylum = structure(c(15L, 15L, 15L, 15L, 15L), .Label = c("p__Acidobacteriota",
"p__Actinobacteriota", "p__Armatimonadota", "p__Bacteroidota",
"p__Chloroflexi", "p__Cyanobacteria", "p__Deinococcota",
"p__Desulfobacterota", "p__Firmicutes", "p__Fusobacteriota",
"p__Gemmatimonadota", "p__Myxococcota", "p__Patescibacteria",
"p__Planctomycetota", "p__Proteobacteria", "p__SAR324_clade(Marine_group_B)",
"p__Synergistota", "p__Thermoplasmatota", "p__Verrucomicrobiota",
"p__WPS-2"), class = "factor"), Class = structure(c(4L, 4L,
4L, 4L, 4L), .Label = c("c__Acidimicrobiia", "c__Acidobacteriae",
"c__Actinobacteria", "c__Alphaproteobacteria", "c__Bacilli",
"c__Bacteroidia", "c__Chloroflexia", "c__Clostridia", "c__Coriobacteriia",
"c__CPR2", "c__Cyanobacteriia", "c__Deinococci", "c__Desulfitobacteriia",
"c__Desulfotomaculia", "c__Fimbriimonadia", "c__Fusobacteriia",
"c__Gammaproteobacteria", "c__Gemmatimonadetes", "c__KD4-96",
"c__Ktedonobacteria", "c__Myxococcia", "c__Phycisphaerae",
"c__Planctomycetes", "c__Polyangia", "c__Rhodothermia", "c__SAR324_clade(Marine_group_B)",
"c__Sericytochromatia", "c__Synergistia", "c__Syntrophia",
"c__Thermoanaerobacteria", "c__Thermoanaerobaculia", "c__Thermoleophilia",
"c__Thermoplasmata", "c__TK10", "c__Verrucomicrobiae", "c__Vicinamibacteria",
"c__WPS-2"), class = "factor"), Order = structure(c(72L,
7L, 7L, 72L, 72L), .Label = c("o__Acetobacterales", "o__Acidobacteriales",
"o__Actinomarinales", "o__Actinomycetales", "o__Alteromonadales",
"o__Arctic97B-4_marine_group", "o__Azospirillales", "o__Bacillales",
"o__Bacteroidales", "o__Balneolales", "o__Bifidobacteriales",
"o__Burkholderiales", "o__C0119", "o__Caulobacterales", "o__Chitinophagales",
"o__Chthoniobacterales", "o__Clostridia", "o__Clostridiales",
"o__Corynebacteriales", "o__CPR2", "o__Cyanobacteriales",
"o__Cytophagales", "o__Deinococcales", "o__Desulfitobacteriales",
"o__Desulfotomaculales", "o__Elsterales", "o__Enterobacterales",
"o__Eubacteriales", "o__Fimbriimonadales", "o__Flavobacteriales",
"o__Frankiales", "o__Fusobacteriales", "o__Gaiellales", "o__Gemmatales",
"o__Gemmatimonadales", "o__Haliangiales", "o__Halothiobacillales",
"o__IMCC26256", "o__Isosphaerales", "o__KD4-96", "o__Kiloniellales",
"o__Kineosporiales", "o__Ktedonobacterales", "o__Lachnospirales",
"o__Lactobacillales", "o__Legionellales", "o__Marine_Group_II",
"o__Methylococcales", "o__Micrococcales", "o__Micromonosporales",
"o__Micropepsales", "o__Microtrichales", "o__Myxococcales",
"o__Nitriliruptorales", "o__Nitrococcales", "o__Nitrosococcales",
"o__Oceanospirillales", "o__OPB41", "o__Oscillospirales",
"o__Paenibacillales", "o__Parvibaculales", "o__PeM15", "o__Peptostreptococcales-Tissierellales",
"o__Phycisphaerales", "o__Pirellulales", "o__Polyangiales",
"o__Propionibacteriales", "o__Proteinivoracales", "o__Pseudomonadales",
"o__Pseudonocardiales", "o__Puniceispirillales", "o__Rhizobiales",
"o__Rhodobacterales", "o__Rhodospirillales", "o__Rhodothermales",
"o__Rickettsiales", "o__Salinisphaerales", "o__SAR324_clade(Marine_group_B)",
"o__SAR86_clade", "o__Sericytochromatia", "o__Solibacterales",
"o__Solirubrobacterales", "o__Sphingobacteriales", "o__Sphingomonadales",
"o__Staphylococcales", "o__Streptomycetales", "o__Streptosporangiales",
"o__Synechococcales", "o__Synergistales", "o__Syntrophales",
"o__Thalassobaculales", "o__Thermales", "o__Thermoanaerobacterales",
"o__Thermoanaerobaculales", "o__Thermomicrobiales", "o__Tistrellales",
"o__TK10", "o__Vicinamibacterales", "o__WPS-2", "o__Xanthomonadales"
), class = "factor"), Family = structure(c(17L, 13L, 13L,
17L, 17L), .Label = c("f__67-14", "f__Acetobacteraceae",
"f__Acidobacteriaceae_(Subgroup_1)", "f__Actinomarinaceae",
"f__Actinomycetaceae", "f__Aerococcaceae", "f__Alcaligenaceae",
"f__Alcanivoracaceae1", "f__Algiphilaceae", "f__Alkalibacteraceae",
"f__Anaeromyxobacteraceae", "f__Arctic97B-4_marine_group",
"f__Azospirillaceae", "f__Bacillaceae", "f__Bacteroidetes_vadinHA17",
"f__Balneolaceae", "f__Beijerinckiaceae", "f__Beutenbergiaceae",
"f__Bifidobacteriaceae", "f__Bogoriellaceae", "f__Brevibacteriaceae",
"f__C0119", "f__Carnobacteriaceae", "f__Caulobacteraceae",
"f__Cellulomonadaceae", "f__Chitinophagaceae", "f__Chroococcidiopsaceae",
"f__Chthoniobacteraceae", "f__Clostridiaceae", "f__Comamonadaceae",
"f__Corynebacteriaceae", "f__CPR2", "f__Crocinitomicaceae",
"f__Cyanobiaceae", "f__Cyclobacteriaceae", "f__Deinococcaceae",
"f__Demequinaceae", "f__Dermabacteraceae", "f__Dermacoccaceae",
"f__Desulfitobacteriaceae", "f__Desulfotomaculales", "f__Devosiaceae",
"f__Dietziaceae", "f__Dysgonomonadaceae", "f__Endozoicomonadaceae",
"f__Enterobacteriaceae", "f__Eubacteriaceae", "f__Family_III",
"f__Fimbriimonadaceae", "f__Flavobacteriaceae", "f__Fodinicurvataceae",
"f__Frankiaceae", "f__Fusobacteriaceae", "f__Geminicoccaceae",
"f__Gemmataceae", "f__Gemmatimonadaceae", "f__Geodermatophilaceae",
"f__Haliangiaceae", "f__Halomonadaceae", "f__Halorhodospiraceae",
"f__Hungateiclostridiaceae", "f__Hymenobacteraceae", "f__Hyphomicrobiaceae",
"f__Hyphomonadaceae", "f__Idiomarinaceae", "f__IMCC26256",
"f__Intrasporangiaceae", "f__Isosphaeraceae", "f__JG30-KF-CM45",
"f__Jonesiaceae", "f__Kangiellaceae", "f__KD4-96", "f__Kineosporiaceae",
"f__Ktedonobacteraceae", "f__Labraceae", "f__Lachnospiraceae",
"f__Lactobacillaceae", "f__Legionellaceae", "f__Marine_Group_II",
"f__Marinilabiliaceae", "f__Marinobacteraceae", "f__Marinococcaceae",
"f__Methylomonadaceae", "f__Methylophagaceae", "f__Methylophilaceae",
"f__Methylopilaceae", "f__Microbacteriaceae", "f__Micrococcaceae",
"f__Micromonosporaceae", "f__Micropepsaceae", "f__Microscillaceae",
"f__Moraxellaceae", "f__Morganellaceae", "f__Mycobacteriaceae",
"f__Myxococcaceae", "f__Nakamurellaceae", "f__Nitriliruptoraceae",
"f__Nitrosomonadaceae", "f__Nocardiaceae", "f__Nocardioidaceae",
"f__Nocardiopsaceae", "f__Nostocaceae", "f__Oceanibaculaceae",
"f__OPB41", "f__Oscillospiraceae", "f__Oxalobacteraceae",
"f__Paenibacillaceae", "f__Parvibaculaceae", "f__PeM15",
"f__Peptostreptococcales-Tissierellales", "f__Phaselicystidaceae",
"f__Phycisphaeraceae", "f__Pirellulaceae", "f__Planococcaceae",
"f__Prevotellaceae", "f__Prolixibacteraceae", "f__Promicromonosporaceae",
"f__Proteinivoracales", "f__Pseudomonadaceae", "f__Pseudonocardiaceae",
"f__Rhizobiaceae", "f__Rhizobiales_Incertae_Sedis", "f__Rhodobacteraceae",
"f__Rhodobiaceae", "f__Rhodothermaceae", "f__Rickettsiaceae",
"f__Ruminococcaceae", "f__S25-593", "f__Salisediminibacteriaceae",
"f__SAR116_clade", "f__SAR324_clade(Marine_group_B)", "f__SAR86_clade",
"f__SC-I-84", "f__Sedimentibacteraceae", "f__Sericytochromatia",
"f__Solibacteraceae", "f__Solirubrobacteraceae", "f__Sphingobacteriaceae",
"f__Sphingomonadaceae", "f__Spirosomaceae", "f__Staphylococcaceae",
"f__Stappiaceae", "f__Streptococcaceae", "f__Streptomycetaceae",
"f__Streptosporangiaceae", "f__Synergistaceae", "f__Syntrophaceae",
"f__Syntrophobotulaceae", "f__Thalassospiraceae", "f__Thermaceae",
"f__Thermoanaerobaculaceae", "f__Thermotaleaceae", "f__Thioalkalibacteraceae",
"f__Tistrellaceae", "f__TK10", "f__TRA3-20", "f__uncultured",
"f__Vicinamibacteraceae", "f__Weeksellaceae", "f__WPS-2",
"f__Xanthobacteraceae", "f__Xanthomonadaceae"), class = "factor"),
Genus = structure(c(125L, 28L, 28L, 125L, NA), .Label = c("g__67-14",
"g__Acetobacterium", "g__Acidiphilium", "g__Acinetobacter",
"g__Actinomyces", "g__Actinotalea", "g__Advenella", "g__Aerococcus",
"g__Alcanivorax", "g__Algiphilus", "g__Algoriphagus", "g__Aliidiomarina",
"g__Aliihoeflea", "g__Aliterella", "g__Alkalibacter", "g__Allorhizobium-Neorhizobium-Pararhizobium-Rhizobium",
"g__Altererythrobacter", "g__Amorphus", "g__Amycolatopsis",
"g__Anaerobacillus", "g__Anaerobranca", "g__Anaeromyxobacter",
"g__Aquibacillus", "g__Arctic97B-4_marine_group", "g__Arenimonas",
"g__Arthrobacter", "g__Aureimonas", "g__Azospirillum", "g__Bacillus",
"g__Bacteroidetes_vadinHA17", "g__Bauldia", "g__Bifidobacterium",
"g__Blastococcus", "g__Blastopirellula", "g__Bosea", "g__Brachybacterium",
"g__Bradyrhizobium", "g__Brevibacterium", "g__Brevundimonas",
"g__C0119", "g__Candidatus_Actinomarina", "g__Candidatus_Alysiosphaera",
"g__Candidatus_Solibacter", "g__Candidatus_Udaeobacter",
"g__Cecembia", "g__Chromohalobacter", "g__Chryseobacterium",
"g__Chthoniobacter", "g__Citricoccus", "g__Cloacibacterium",
"g__Clostridium_sensu_stricto_9", "g__Conexibacter", "g__Corynebacterium",
"g__CPR2", "g__Craurococcus-Caldovatus", "g__Curtobacterium",
"g__Dehalobacter", "g__Deinococcus", "g__Demequina", "g__Dermacoccus",
"g__Desulfohalotomaculum", "g__Desulfosporosinus", "g__Devosia",
"g__Dietzia", "g__DSSD61", "g__Dyadobacter", "g__Egicoccus",
"g__Ellin6067", "g__Endozoicomonas", "g__Enhydrobacter",
"g__Ercella", "g__Escherichia-Shigella", "g__Faecalibacterium",
"g__Fermentimonas", "g__Fimbriiglobus", "g__Fimbriimonadaceae",
"g__Finegoldia", "g__Flaviflexus", "g__Flavobacterium", "g__Fluviicola",
"g__Fusicatenibacter", "g__Fusobacterium", "g__Gardnerella",
"g__Gemmatimonas", "g__Geobacillus", "g__Georgenia", "g__Glutamicibacter",
"g__Gracilibacillus", "g__Gracilimonas", "g__Guyparkeria",
"g__Haematobacter", "g__Haliangium", "g__Halolactibacillus",
"g__Halomonas", "g__Henriciella", "g__HIMB11", "g__HSB_OF53-F07",
"g__Hymenobacter", "g__Hyphomicrobium", "g__IMCC26256", "g__Janibacter",
"g__Jatrophihabitans", "g__JCM_18997", "g__Jeotgalibacillus",
"g__JG30-KF-CM45", "g__JG30a-KF-32", "g__JGI-0000079-D21",
"g__Jiella", "g__Jonesia", "g__KD4-96", "g__Kineococcus",
"g__Labrys", "g__Lactobacillus", "g__Lawsonella", "g__Leeuwenhoekiella",
"g__Legionella", "g__Marine_Group_II", "g__Marinilabiliaceae",
"g__Marinobacter", "g__Marinococcus", "g__Martelella", "g__Massilia",
"g__Meiothermus", "g__Methylobacillus", "g__Methylobacterium-Methylorubrum",
"g__Methylophaga", "g__Methylotenera", "g__Micrococcus",
"g__Microvirga", "g__Modestobacter", "g__Mucilaginibacter",
"g__Muricauda", "g__Mycobacterium", "g__Nakamurella", "g__Nesterenkonia",
"g__Nitratireductor", "g__Nitriliruptor", "g__Nitriliruptoraceae",
"g__Nocardia", "g__Nocardioides", "g__Nocardiopsis", "g__Novosphingobium",
"g__NS5_marine_group", "g__Oceanibaculum", "g__Oceanobacillus",
"g__OPB41", "g__Oricola", "g__Ornithinimicrobium", "g__Oryzihumus",
"g__P3OB-42", "g__Paenibacillus", "g__Paludisphaera", "g__Paracoccus",
"g__Paraliobacillus", "g__Parvibaculum", "g__Patulibacter",
"g__Pediococcus", "g__Pedomicrobium", "g__Pelagibacterium",
"g__Pelotomaculum", "g__PeM15", "g__Phaselicystis", "g__Phenylobacterium",
"g__Pir4_lineage", "g__Polaromonas", "g__Prevotella", "g__Prochlorococcus_MIT9313",
"g__Promicromonospora", "g__Proteiniclasticum", "g__Pseudolabrys",
"g__Pseudomonas", "g__Pseudonocardia", "g__Psychrobacter",
"g__Psychroglaciecola", "g__Pusillimonas", "g__Quadrisphaera",
"g__Rhodococcus", "g__Rhodopirellula", "g__Roseisolibacter",
"g__Roseovarius", "g__Rubrivirga", "g__Rummeliibacillus",
"g__S25-593", "g__Saccharomonospora", "g__Saccharopolyspora",
"g__Salegentibacter", "g__Salimesophilobacter", "g__Salinicola",
"g__Salipaludibacillus", "g__SAR116_clade", "g__SAR324_clade(Marine_group_B)",
"g__SAR86_clade", "g__SC-I-84", "g__Scytonema_UTEX_2349",
"g__Sedimentibacter", "g__Sediminibacterium", "g__Sericytochromatia",
"g__SM1A02", "g__Solirubrobacter", "g__Sphingobium", "g__Sphingomonas",
"g__Sporobacter", "g__Staphylococcus", "g__Stappia", "g__Streptococcus",
"g__Streptomyces", "g__Subgroup_10", "g__Synechococcus_CC9902",
"g__Syntrophus", "g__Terrabacter", "g__Thalassospira", "g__Thermoanaerobacterium",
"g__Thermopolyspora", "g__Thermus", "g__Tistrella", "g__TK10",
"g__TRA3-20", "g__uncultured", "g__Vicinamibacteraceae",
"g__Virgibacillus", "g__WCHB1-32", "g__WPS-2", "g__Xanthobacter"
), class = "factor"), Species = structure(c(NA, 55L, 55L,
NA, NA), .Label = c("s__Acetobacteraceae_bacterium", "s__Acinetobacter_venetianus",
"s__Actinomycetales_bacterium", "s__Alcanivorax_pacificus",
"s__Amorphus_suaedae", "s__Aquibacillus_sp.", "s__Bacillus_alcalophilus",
"s__bacterium_Ellin6515", "s__bacterium_enrichment", "s__bacterium_QTYC46b",
"s__Bifidobacterium_bifidum", "s__Blastopirellula_cremea",
"s__Brevibacterium_samyangense", "s__Cellulomonas_sp.", "s__Corynebacterium_glaucum",
"s__Deinococcus_geothermalis", "s__Desulfohalotomaculum_halophilum",
"s__Desulfosporosinus_youngiae", "s__Endozoicomonas_acroporae",
"s__Ercella_succinigenes", "s__Flavobacterium_qiangtangense",
"s__Fluviicola_sp.", "s__Gardnerella_vaginalis", "s__iron-reducing_bacterium",
"s__Jonesia_denitrificans", "s__Lactobacillus_iners", "s__Leeuwenhoekiella_sp.",
"s__marine_sediment", "s__Mesorhizobium_sp.", "s__metagenome",
"s__Nitriliruptor_alkaliphilus", "s__Oryzihumus_terrae",
"s__Pedomicrobium_ferrugineum", "s__Phyllobacteriaceae_bacterium",
"s__planctomycete_str.", "s__Prevotella_histicola", "s__Prevotella_pallens",
"s__Psychrobacter_pulmonis", "s__Rhodobacteraceae_bacterium",
"s__Rhodococcus_sp.", "s__rock_porewater", "s__Saccharopolyspora_rectivirgula",
"s__Sedimentibacter_acidaminivorans", "s__Sphingomonas_metalli",
"s__Streptomyces_specialis", "s__Streptosporangiaceae_str.",
"s__Tistrella_bauzanensis", "s__Triticum_aestivum", "s__uncultured_Acidobacteriaceae",
"s__uncultured_actinobacterium", "s__uncultured_Actinomycetales",
"s__uncultured_Alcaligenes", "s__uncultured_Anaerobacillus",
"s__uncultured_Anaerolineaceae", "s__uncultured_Azospirillum",
"s__uncultured_bacterium", "s__uncultured_Bacteroidetes",
"s__uncultured_Chloroflexi", "s__uncultured_Conexibacter",
"s__uncultured_cyanobacterium", "s__uncultured_Ferrimicrobium",
"s__uncultured_Fimbriimonas", "s__uncultured_Ktedobacteria",
"s__uncultured_Methylocystaceae", "s__uncultured_Nitriliruptorales",
"s__uncultured_planctomycete", "s__uncultured_Porphyromonadaceae",
"s__uncultured_prokaryote", "s__uncultured_Rhodospirillaceae",
"s__uncultured_soil"), class = "factor")), row.names = c(86L,
209L, 210L, 333L, 431L), class = "data.frame")
Assuming you are working with a phyloseq object called physeq you can do the following:
Agglomerate the OTU table to Family level:
p <- tax_glom(physeq, "Family")
Then you can select families that are present in both samples:
families <- tax_table(p)[otu_table(p)[,"m3ofair"] > 0 & otu_table(p)[,"m3NC"] > 0, "Family"]
Now you can select the ASVs with the families you want:
pout <- prune_taxa(tax_table(physeq)[, "Family"] %in% families, physeq)

KNN Error in `[.default`(xj, i): invalid subscript type 'list'

I'm dealing with a database containig stocks of three different and a column composed of 1 and 0 that indicates if the stock price of the first company goes up or down. I am trying to use KNN to predict the direction of the moviment of the price but when I deal with it, i get [.default(xj, i): invalid subscript type 'list', can someone please help me?
dput(head(df,10))
structure(list(ENEL = structure(c(38L, 2L, 7L, 6L, 27L, 4L, 3L,
14L, 5L, 21L), .Label = c("3,398", "3,442", "3,446", "3,46",
"3,476", "3,486", "3,492", "3,494", "3,498", "3,526", "3,536",
"3,538", "3,54", "3,544", "3,546", "3,586", "3,6", "3,604", "3,608",
"3,616", "3,618", "3,62", "3,628", "3,634", "3,638", "3,64",
"3,644", "3,646", "3,648", "3,65", "3,652", "3,658", "3,66",
"3,662", "3,67", "3,672", "3,676", "3,68", "3,684", "3,702",
"3,704", "3,708", "3,714", "3,716", "3,72", "3,728", "3,73",
"3,734", "3,736", "3,738", "3,74", "3,742", "3,744", "3,746",
"3,75", "3,754", "3,756", "3,758", "3,76", "3,762", "3,764",
"3,768", "3,772", "3,776", "3,778", "3,78", "3,786", "3,788",
"3,79", "3,794", "3,802", "3,806", "3,808", "3,814", "3,818",
"3,82", "3,822", "3,824", "3,826", "3,828", "3,83", "3,834",
"3,836", "3,838", "3,84", "3,842", "3,844", "3,846", "3,85",
"3,852", "3,856", "3,858", "3,864", "3,866", "3,868", "3,87",
"3,876", "3,878", "3,88", "3,882", "3,884", "3,888", "3,89",
"3,892", "3,896", "3,898", "3,9", "3,902", "3,904", "3,906",
"3,908", "3,91", "3,912", "3,914", "3,916", "3,918", "3,92",
"3,922", "3,924", "3,926", "3,928", "3,93", "3,934", "3,936",
"3,938", "3,94", "3,942", "3,944", "3,946", "3,95", "3,952",
"3,954", "3,956", "3,958", "3,96", "3,962", "3,966", "3,968",
"3,97", "3,972", "3,974", "3,976", "3,978", "3,98", "3,982",
"3,984", "3,986", "3,988", "3,99", "3,992", "3,994", "3,996",
"4", "4,002", "4,004", "4,006", "4,008", "4,012", "4,014", "4,016",
"4,018", "4,02", "4,022", "4,024", "4,026", "4,028", "4,03",
"4,034", "4,036", "4,038", "4,04", "4,042", "4,044", "4,046",
"4,048", "4,05", "4,052", "4,054", "4,056", "4,058", "4,06",
"4,062", "4,064", "4,066", "4,068", "4,072", "4,074", "4,076",
"4,078", "4,08", "4,082", "4,084", "4,086", "4,088", "4,09",
"4,094", "4,098", "4,1", "4,102", "4,104", "4,106", "4,108",
"4,11", "4,112", "4,114", "4,116", "4,118", "4,12", "4,122",
"4,124", "4,126", "4,128", "4,13", "4,134", "4,136", "4,14",
"4,142", "4,144", "4,148", "4,15", "4,154", "4,156", "4,158",
"4,16", "4,162", "4,166", "4,172", "4,174", "4,178", "4,188",
"4,19", "4,194", "4,196", "4,202", "4,204", "4,206", "4,21",
"4,212", "4,214", "4,216", "4,222", "4,224", "4,226", "4,23",
"4,234", "4,236", "4,24", "4,248", "4,252", "4,256", "4,26",
"4,264", "4,266", "4,268", "4,27", "4,276", "4,282", "4,284",
"4,286", "4,292", "4,296", "4,298", "4,306", "4,31", "4,324",
"4,33", "4,334", "4,338", "4,34", "4,342", "4,348", "4,35", "4,352",
"4,354", "4,358", "4,36", "4,362", "4,364", "4,372", "4,376",
"4,378", "4,386", "4,39", "4,396", "4,4", "4,404", "4,406", "4,41",
"4,414", "4,418", "4,438", "4,446", "4,456", "4,464"), class = "factor"),
A2a = structure(c(15L, 5L, 2L, 1L, 13L, 6L, 4L, 7L, 3L, 8L
), .Label = c("0,791", "0,796", "0,801", "0,806", "0,808",
"0,81", "0,812", "0,814", "0,821", "0,822", "0,825", "0,826",
"0,827", "0,833", "0,836", "0,839", "0,84", "0,841", "0,842",
"0,843", "0,844", "0,847", "0,849", "0,851", "0,853", "0,856",
"0,857", "0,858", "0,893", "0,894", "0,897", "0,904", "0,905",
"0,908", "0,915", "0,918", "0,932", "0,94", "0,948", "0,953",
"0,955", "0,956", "0,96", "0,963", "0,965", "0,967", "0,968",
"0,969", "0,972", "0,977", "0,979", "0,982", "0,985", "0,988",
"0,989", "0,999", "1,003", "1,013", "1,014", "1,017", "1,02",
"1,021", "1,025", "1,029", "1,031", "1,032", "1,033", "1,034",
"1,036", "1,038", "1,042", "1,043", "1,045", "1,047", "1,05",
"1,051", "1,053", "1,055", "1,057", "1,058", "1,06", "1,062",
"1,063", "1,065", "1,066", "1,067", "1,068", "1,069", "1,07",
"1,071", "1,072", "1,073", "1,075", "1,076", "1,077", "1,078",
"1,079", "1,08", "1,081", "1,083", "1,085", "1,086", "1,089",
"1,09", "1,091", "1,092", "1,093", "1,094", "1,096", "1,097",
"1,098", "1,099", "1,1", "1,101", "1,102", "1,103", "1,104",
"1,105", "1,106", "1,107", "1,108", "1,109", "1,11", "1,112",
"1,113", "1,114", "1,115", "1,116", "1,118", "1,12", "1,121",
"1,123", "1,124", "1,125", "1,126", "1,127", "1,128", "1,129",
"1,13", "1,133", "1,134", "1,136", "1,138", "1,14", "1,141",
"1,142", "1,143", "1,145", "1,146", "1,147", "1,15", "1,151",
"1,152", "1,153", "1,155", "1,156", "1,157", "1,158", "1,159",
"1,16", "1,162", "1,164", "1,165", "1,166", "1,167", "1,169",
"1,17", "1,171", "1,173", "1,174", "1,175", "1,176", "1,177",
"1,178", "1,18", "1,181", "1,182", "1,183", "1,184", "1,185",
"1,188", "1,189", "1,19", "1,191", "1,192", "1,194", "1,195",
"1,197", "1,198", "1,199", "1,2", "1,202", "1,203", "1,204",
"1,205", "1,206", "1,207", "1,208", "1,209", "1,21", "1,211",
"1,212", "1,213", "1,214", "1,215", "1,216", "1,217", "1,219",
"1,22", "1,221", "1,222", "1,224", "1,226", "1,227", "1,228",
"1,229", "1,23", "1,231", "1,232", "1,233", "1,234", "1,235",
"1,236", "1,237", "1,238", "1,239", "1,24", "1,241", "1,242",
"1,243", "1,244", "1,246", "1,247", "1,249", "1,25", "1,251",
"1,252", "1,253", "1,254", "1,255", "1,256", "1,257", "1,26",
"1,262", "1,263", "1,266", "1,267", "1,269", "1,27", "1,271",
"1,272", "1,274", "1,277", "1,278", "1,279", "1,281", "1,282",
"1,287", "1,291", "1,293", "1,3", "1,308", "1,31", "1,316",
"1,319", "1,321", "1,326", "1,329", "1,33", "1,336", "1,338",
"1,352"), class = "factor"), EDNn = structure(c(275L, 261L,
261L, 252L, 261L, 245L, 247L, 256L, 245L, 251L), .Label = c("0,6045",
"0,606", "0,611", "0,6145", "0,615", "0,6155", "0,6175",
"0,6195", "0,62", "0,6215", "0,622", "0,623", "0,6235", "0,624",
"0,6245", "0,625", "0,626", "0,6265", "0,627", "0,6275",
"0,628", "0,6295", "0,63", "0,631", "0,6315", "0,632", "0,633",
"0,634", "0,635", "0,6355", "0,636", "0,6365", "0,637", "0,638",
"0,64", "0,641", "0,6415", "0,6435", "0,644", "0,6445", "0,645",
"0,646", "0,6475", "0,6485", "0,649", "0,6495", "0,65", "0,651",
"0,6535", "0,655", "0,6555", "0,656", "0,6565", "0,658",
"0,6585", "0,659", "0,66", "0,6605", "0,661", "0,663", "0,6635",
"0,664", "0,6645", "0,665", "0,6655", "0,667", "0,6675",
"0,6685", "0,6695", "0,67", "0,671", "0,672", "0,6725", "0,673",
"0,6735", "0,674", "0,6745", "0,675", "0,6755", "0,676",
"0,677", "0,678", "0,6785", "0,679", "0,6795", "0,68", "0,681",
"0,682", "0,6835", "0,684", "0,685", "0,686", "0,6865", "0,687",
"0,6875", "0,688", "0,689", "0,6895", "0,69", "0,6905", "0,691",
"0,6925", "0,6935", "0,694", "0,695", "0,696", "0,6965",
"0,697", "0,6985", "0,699", "0,7", "0,7005", "0,701", "0,702",
"0,703", "0,704", "0,7045", "0,705", "0,7055", "0,706", "0,7065",
"0,707", "0,708", "0,7085", "0,71", "0,711", "0,712", "0,7125",
"0,713", "0,714", "0,7145", "0,715", "0,716", "0,7165", "0,717",
"0,7175", "0,719", "0,7195", "0,72", "0,7205", "0,7215",
"0,722", "0,7225", "0,723", "0,7235", "0,724", "0,7245",
"0,725", "0,7255", "0,726", "0,7265", "0,727", "0,728", "0,7285",
"0,7295", "0,73", "0,7305", "0,731", "0,7315", "0,732", "0,7325",
"0,733", "0,7335", "0,734", "0,735", "0,736", "0,737", "0,739",
"0,7395", "0,74", "0,7405", "0,741", "0,7415", "0,742", "0,7425",
"0,743", "0,744", "0,7445", "0,745", "0,7455", "0,746", "0,7465",
"0,747", "0,748", "0,7495", "0,75", "0,7505", "0,751", "0,752",
"0,7525", "0,7535", "0,754", "0,7545", "0,755", "0,756",
"0,7575", "0,758", "0,76", "0,7605", "0,761", "0,7625", "0,7635",
"0,764", "0,7645", "0,765", "0,7675", "0,7685", "0,769",
"0,77", "0,7705", "0,771", "0,772", "0,774", "0,7755", "0,776",
"0,7775", "0,7785", "0,78", "0,782", "0,7825", "0,784", "0,7855",
"0,7895", "0,79", "0,792", "0,7955", "0,798", "0,8", "0,802",
"0,808", "0,809", "0,81", "0,818", "0,8195", "0,821", "0,825",
"0,831", "0,8315", "0,833", "0,835", "0,8365", "0,837", "0,839",
"0,8395", "0,84", "0,8415", "0,842", "0,843", "0,8435", "0,844",
"0,8445", "0,845", "0,8455", "0,846", "0,8465", "0,847",
"0,848", "0,8485", "0,849", "0,8495", "0,85", "0,8505", "0,851",
"0,852", "0,853", "0,8545", "0,855", "0,856", "0,8565", "0,857",
"0,8575", "0,858", "0,859", "0,8595", "0,86", "0,862", "0,8705"
), class = "factor"), Aumento = c(0L, 0L, 1L, 0L, 1L, 0L,
0L, 1L, 0L, 1L)), row.names = c("02/01/2015", "05/01/2015",
"06/01/2015", "07/01/2015", "08/01/2015", "09/01/2015", "12/01/2015",
"13/01/2015", "14/01/2015", "15/01/2015"), class = "data.frame")
df <- structure(list(ENEL = structure(c(38L, 2L), .Label = c("3,398", "3,442", "3,446", "3,46", "3,476"), class = "factor"), A2a = structure(c(15L, 5L), .Label = c("0,791", "0,796", "0,801", "0,806", "0,808"), class = "factor"), EDNn = structure(c(275L, 261L), .Label = c("0,6045", "0,606", "0,611", "0,6145", "0,615"), class = "factor"), Aumento = c(0L, 0L)), row.names = c("02/01/2015", "05/01/2015"), class = "data.frame")
train<-df[1:(0.75*nrow(df)),]
test<-df[(0.75*nrow(df)+1):(nrow(df)+1),]
predictors<-cbind.data.frame(Enel = lag(df$ENEL),A2a = lag(df$A2a),Edn =lag(df$EDNn))
#I use edit to put a value inside the first cell which otherwise will be NA
predictors<-edit(predictors)
prediction <- knn(predictors[train,],predictors[test,], df$increase[train],k = 1)

Is possible to group by the next register in R with ggplot2?

I am looking for a way to group this kind of data:
6160407 1853162 cin1 csa1
6203165 1904030 cin1 csa1
8453347 6050699 cin1 csa1
8507125 5996661 cin1 csa1
I want to plot this with geom_point() but adding a line indicating a relationship between the first register '6160407' and '6203165' in x-axis and the same with its correspondent '1853162' and '1904030' in y-axis. Could it be possible with group?
The basic plot is:
ggplot(data, aes(V1, V2)) + geom_point(aes(colour=V4))
I know that a geom_line() is missing to group by this way. Have you some ideas?
Thank you in advance.
Some data.frame info:
structure(list(V1 = c(2918848L, 3020406L, 3012485L, 3059964L,
6118180L, 6144325L, 2672731L, 2726260L, 6169400L, 6239623L, 3225659L,
3243523L, 2921684L, 2952518L, 6077605L, 6094096L, 2871288L, 2895608L,
2920133L, 2929167L, 2829837L, 2837542L, 5593511L, 5597323L, 5594916L,
5597195L, 5594925L, 5601716L, 6158310L, 6165279L), V2 = c(2842270L,
2739302L, 2746640L, 2683527L, 1018440L, 1070436L, 3159605L, 3082481L,
937756L, 992470L, 106256L, 89087L, 620180L, 651582L, 73636L,
42430L, 3124288L, 3099294L, 2854976L, 2838157L, 3126301L, 3112938L,
2929012L, 2932231L, 554507L, 552825L, 567380L, 563320L, 989274L,
1011135L), V3 = structure(c(190L, 190L, 190L, 190L, 190L, 190L,
190L, 190L, 190L, 190L, 190L, 190L, 190L, 190L, 190L, 190L, 190L,
190L, 190L, 190L, 190L, 190L, 190L, 190L, 190L, 190L, 190L, 190L,
190L, 190L), .Label = c("cin1", "cin10", "cin102", "cin1049",
"cin107", "cin108", "cin109", "cin11", "cin111", "cin113", "cin116",
"cin117", "cin118", "cin119", "cin12", "cin120", "cin121", "cin122",
"cin123", "cin126", "cin128", "cin129", "cin13", "cin131", "cin133",
"cin135", "cin136", "cin137", "cin138", "cin14", "cin142", "cin143",
"cin148", "cin149", "cin15", "cin150", "cin152", "cin153", "cin154",
"cin155", "cin16", "cin160", "cin161", "cin165", "cin169", "cin17",
"cin170", "cin171", "cin172", "cin173", "cin174", "cin175", "cin176",
"cin177", "cin179", "cin18", "cin187", "cin188", "cin189", "cin19",
"cin190", "cin192", "cin193", "cin195", "cin197", "cin198", "cin2",
"cin20", "cin203", "cin204", "cin209", "cin21", "cin212", "cin216",
"cin218", "cin22", "cin223", "cin226", "cin23", "cin232", "cin233",
"cin234", "cin237", "cin24", "cin244", "cin246", "cin248", "cin25",
"cin254", "cin26", "cin263", "cin267", "cin27", "cin271", "cin272",
"cin279", "cin28", "cin280", "cin283", "cin285", "cin29", "cin294",
"cin298", "cin299", "cin3", "cin30", "cin304", "cin305", "cin307",
"cin31", "cin310", "cin32", "cin320", "cin33", "cin331", "cin34",
"cin35", "cin358", "cin36", "cin361", "cin364", "cin37", "cin370",
"cin38", "cin381", "cin387", "cin389", "cin39", "cin396", "cin4",
"cin40", "cin403", "cin408", "cin41", "cin42", "cin421", "cin429",
"cin43", "cin44", "cin441", "cin444", "cin45", "cin453", "cin46",
"cin467", "cin47", "cin475", "cin48", "cin49", "cin5", "cin50",
"cin51", "cin513", "cin52", "cin527", "cin529", "cin53", "cin537",
"cin54", "cin547", "cin55", "cin56", "cin57", "cin58", "cin581",
"cin586", "cin59", "cin6", "cin60", "cin619", "cin62", "cin63",
"cin64", "cin65", "cin66", "cin67", "cin68", "cin682", "cin69",
"cin7", "cin70", "cin71", "cin73", "cin737", "cin75", "cin76",
"cin77", "cin78", "cin79", "cin8", "cin82", "cin83", "cin832",
"cin833", "cin85", "cin86", "cin87", "cin88", "cin881", "cin9",
"cin90", "cin92", "cin93", "cin94", "cin95", "cin96", "cin99"
), class = "factor"), V4 = structure(c(163L, 163L, 163L, 163L,
163L, 163L, 163L, 163L, 163L, 163L, 163L, 163L, 163L, 163L, 163L,
163L, 163L, 163L, 163L, 163L, 163L, 163L, 163L, 163L, 163L, 163L,
163L, 163L, 163L, 163L), .Label = c("csa1", "csa10", "csa100",
"csa101", "csa102", "csa103", "csa104", "csa105", "csa106", "csa107",
"csa108", "csa109", "csa11", "csa110", "csa112", "csa113", "csa114",
"csa115", "csa116", "csa117", "csa118", "csa119", "csa12", "csa120",
"csa121", "csa122", "csa123", "csa125", "csa126", "csa127", "csa128",
"csa129", "csa13", "csa130", "csa131", "csa132", "csa133", "csa134",
"csa135", "csa136", "csa137", "csa138", "csa139", "csa14", "csa140",
"csa141", "csa143", "csa144", "csa145", "csa146", "csa147", "csa148",
"csa149", "csa15", "csa150", "csa151", "csa152", "csa153", "csa154",
"csa156", "csa158", "csa16", "csa160", "csa163", "csa164", "csa166",
"csa168", "csa17", "csa170", "csa172", "csa173", "csa176", "csa179",
"csa18", "csa185", "csa186", "csa189", "csa19", "csa192", "csa194",
"csa195", "csa197", "csa199", "csa2", "csa20", "csa200", "csa203",
"csa205", "csa206", "csa21", "csa210", "csa211", "csa214", "csa22",
"csa220", "csa229", "csa23", "csa230", "csa24", "csa25", "csa253",
"csa258", "csa26", "csa261", "csa267", "csa27", "csa270", "csa28",
"csa288", "csa29", "csa294", "csa3", "csa30", "csa301", "csa304",
"csa307", "csa308", "csa31", "csa32", "csa325", "csa329", "csa33",
"csa337", "csa34", "csa35", "csa36", "csa37", "csa38", "csa39",
"csa4", "csa40", "csa41", "csa42", "csa43", "csa44", "csa45",
"csa46", "csa47", "csa48", "csa49", "csa5", "csa50", "csa51",
"csa52", "csa53", "csa54", "csa55", "csa56", "csa57", "csa58",
"csa59", "csa6", "csa60", "csa61", "csa62", "csa63", "csa64",
"csa65", "csa66", "csa67", "csa68", "csa69", "csa7", "csa70",
"csa71", "csa72", "csa73", "csa74", "csa75", "csa76", "csa77",
"csa78", "csa79", "csa8", "csa80", "csa81", "csa82", "csa83",
"csa84", "csa85", "csa86", "csa87", "csa88", "csa89", "csa9",
"csa90", "csa91", "csa92", "csa93", "csa94", "csa95", "csa96",
"csa97", "csa98", "csa99"), class = "factor")), .Names = c("V1",
"V2", "V3", "V4"), row.names = 199181:199210, class = "data.frame")
You can do it 'manually' with geom_segment, is this what you're looking for?
ggplot(data, aes(V1, V2)) + geom_point(aes(colour=V4)) +
geom_segment(aes(x=6160407, xend=6203165, y=1853162, yend=1904030, colour=V4)) +
geom_segment(aes(x=8453347, xend=8507125, y=6050699, yend=5996661, colour=V4))
Trying to do it in a more generic way may require more of your data.frame and/or sepcifying your problem. Also, defining colnames on data will be easier to handle and would produce nicely labelled plots.

R Find time lapsed in minutes from pairs of "Enter Store" to following "Leave Store" for every Group (Cart.Serial)

Here is my first post! Following some requirements I am adding my data:
> dput(head(ctms3))
structure(list(Date = structure(c(1444136735, 1444136703, 1444136698,
1444136670, 1444136645, 1444136644), class = c("POSIXct", "POSIXt"
), tzone = "CST"), Cart.Serial = structure(c(114L, 118L, 8L,
4L, 35L, 76L), .Label = c("00817AF4", "008191A9", "008191BE",
"008191C4", "0081927D", "008192C8", "008192D1", "008192ED", "008193A5",
"008193BB", "008193D4", "008193D7", "008193D9", "008193DA", "008193DC",
"008193F2", "008193FB", "008193FE", "0081946C", "008194A6", "008194DA",
"0081954B", "0081955D", "008195A1", "008195B5", "008195D7", "008195F5",
"0081961B", "0081963E", "0081966C", "0081972B", "0081974E", "0081975A",
"0081976F", "0081977A", "008197A1", "008197A4", "008197A9", "008197BC",
"008197D1", "008197D3", "008197F2", "008197F3", "008197F4", "008197F8",
"008197FA", "0081985A", "00836B89", "00836CC4", "0083702B", "0083747E",
"008374FF", "0083752C", "0083754A", "008375BB", "008375C7", "008375F9",
"0083761D", "0083761F", "0083769B", "0083771D", "0083778A", "0083A4EB",
"0083A56B", "0083A570", "0083A5A6", "0083A5C7", "0083A887", "0083B3FE",
"0083D5EA", "0083D5FE", "0083D600", "008403C4", "008403DB", "0084049A",
"008404A5", "008404A8", "008405EE", "0084077E", "00840CFD", "00840EAD",
"00840F0C", "00840F24", "00840F31", "00840F3A", "0084108D", "008410ED",
"0084110C", "0084114B", "008413D1", "008413DD", "008413FE", "0084156A",
"0084187C", "008446AD", "008446C7", "008447A8", "008447B5", "0084497F",
"0084498D", "0084499F", "008449A7", "008449B4", "008449BF", "008449C8",
"008449DE", "00844C04", "00844C33", "00844CBF", "00844CEB", "00844D10",
"00844D19", "00846BDD", "00846C7C", "00846CDE", "00846CFB", "00846D1A",
"00846D20", "00846D24", "00846D2F", "00846D38", "00846D3D", "00846D88",
"00846E4F", "00846EA1", "819161", "819187", "819200", "819302",
"819313", "819332", "819346", "819353", "819371", "819458", "819606",
"819617", "819643", "819731", "819736", "819744", "819764", "819769",
"819789", "819798", "819854", "819863", "819875", "819878", "819879",
"819889", "819924", "819927", "819954", "8.19E+05", "8.19E+08",
"8.20E+09", "8.20E+10", "837059", "837344", "837347", "837387",
"837460", "837487", "837513", "837609", "837613", "837624", "837628",
"837649", "837652", "837757", "837772", "840242", "840476", "840482",
"8.40E+12", "841255", "841257", "841449", "841724", "841775",
"841785", "844834", "844835", "844902", "844981", "844994", "8.45E+06",
"8.45E+08", "8.45E+11"), class = "factor"), Message = structure(c(3L,
3L, 2L, 2L, 3L, 2L), .Label = c("Checkout", "Enter Store", "Leave Store",
"Must Checkout", "Ping", "Post Cashier Must Checkout", "Push Out",
"Unlock"), class = "factor")), .Names = c("Date", "Cart.Serial",
"Message"), row.names = c(1L, 5L, 6L, 10L, 13L, 14L), class = "data.frame")
Now, I have three columns: "Date", "Cart.Serial" and "Message". I need to group by Cart.Serial and then find the time lapsed between every pair of Enter Store and Leave Store from the Message variable. The result data should then be put in another data frame so I can work with it as sample and extract means and other descriptive statistics.
Thanks.

Summarise data into monthly counts by year

I'm not used to working with time series data in R, and I'm a bit stuck with this. I have a data frame of event references and the data the event was recorded. The data runs over a period of 7 years and want to summarise it into the number of event per month over the 7 year period and plot that with ggplot2.
I can't seem to get the date conversions to work together so I end up with a count and a date I can feed to ggplot2's scale_x_date() function
Here's an example of the data:
df <- structure(list(Ref = structure(c(127L, 33L, 232L, 392L, 490L,
242L, 437L, 346L, 443L, 560L, 598L, 568L, 103L, 262L, 463L, 17L,
114L, 276L, 361L, 422L), .Label = c("01090013", "0109005", "0109006",
"0109007", "0109009", "0109010", "0109011", "0109012", "0109014",
"0109016", "0109022", "0110001", "0110004", "0110007", "0110009",
"0110011", "0111001", "0111002", "0111012", "0111016", "0111017",
"0112001", "0112003", "0112008", "0112010", "015004", "015006",
"015008", "015010", "015013", "016002", "016003", "016004", "016005",
"016006", "016008", "016009", "016010", "016011", "016013", "016014",
"016016", "017001", "018001", "018004", "018005", "018007", "018008",
"018009", "020626", "0209024", "0209025", "0209026", "0209027",
"0209029", "0209031", "0209035", "0209037", "02100020", "0210017",
"0210018", "0210023", "0210026", "0210030", "0211018", "0211019",
"0211020", "0211022", "0211024", "0211025", "0211026", "0212018",
"0212021", "0212025", "0212027", "025018", "025021", "025022",
"025023", "025024", "025025", "025026", "025030", "026019", "026020",
"026021", "026023", "026025", "026027", "026030", "026032", "0270010",
"027010", "027012", "027013", "027014", "027016", "027017", "0309038",
"0309039", "0309041", "0309046", "0309050", "0309052", "0309053",
"0310035", "0310037", "0310041", "0310043", "0310044", "0311028",
"0311032", "0311035", "0311038", "0312031", "0312036", "0312037",
"0312043", "0312045", "0312047", "0312056", "0312058", "0312059",
"0312062", "035033", "035034", "035036", "035037", "035038",
"035040", "035041", "035042", "035043", "035045", "035049", "036036",
"036038", "036039", "036041", "036042", "036044", "036045", "036046",
"036047", "036048", "036050", "036051", "037021", "037026", "037029",
"038026", "038032", "038034", "038035", "038036", "0409056",
"0409057", "0409062", "0410046", "0410049", "0410050", "0410051",
"0410054", "0410055", "0410056", "0410057", "0410058", "0410060",
"0410062", "0410064", "0411047", "0411051", "0411052", "0411055",
"0412070", "0412074", "0412075", "0412076", "045054", "045056",
"045058", "045063", "045064", "045065", "045072", "046054", "046055",
"046058", "046060", "047035", "047036", "047037", "047038", "047041",
"047042", "047044", "047045", "047046", "048040", "048043", "048044",
"048045", "048048", "048050", "048051", "0509073", "0509080",
"0510066", "0510067", "0510082", "0511062", "0511065", "0511068",
"0511069", "0511072", "0512084", "0512088", "0512089", "0512091",
"055073", "055075", "055080", "055086", "055089", "055091", "055093",
"055094", "055095", "056064", "056066", "056067", "056068", "056070",
"056071", "056073", "056074", "057049", "057052", "057053", "057054",
"057058", "057059", "057060", "057061", "057063", "057065", "057066",
"057067", "057068", "057069", "058053", "058055", "058056", "058059",
"058062", "058064", "0609082", "0609086", "0609088", "0609089",
"0609090", "0609093", "0609095", "0609096", "0609097", "0609098",
"0609103", "0610086", "0610089", "0610095", "0610096", "0610098",
"0611073", "0611074", "0611080", "0611081", "0612109", "0612115",
"065096", "065099", "065103", "065105", "065106", "065109", "065114",
"066075", "066076", "066077", "066078", "066081", "066083", "067080",
"067081", "067084", "068065", "068070", "068074", "0709106",
"0709108", "0709113", "0709115", "0709116", "0709117", "0709120",
"0710104", "0710105", "0710107", "0710108", "0710110", "0710115",
"0710116", "0710117", "0710123", "0711083", "0711084", "0711085",
"0711086", "0711087", "0711088", "0711092", "0712122", "0712126",
"0712127", "0712128", "0712129", "075118", "075119", "075123",
"075124", "075125", "075126", "075127", "075130", "075132", "075133",
"076084", "076087", "076088", "076090", "076092", "076093", "076094",
"077103", "077105", "078079", "078080", "078081", "078082", "078085",
"078086", "0809126", "0809134", "0809137", "0809141", "0809143",
"0810125", "0810137", "0811099", "0811101", "0811106", "0811108",
"0811112", "0811113", "0811114", "0812142", "0812145", "0812150",
"0812152", "0814143", "085139", "085143", "085145", "085148",
"085149", "085150", "085154", "085156", "085160", "085163", "086098",
"086099", "086100", "086101", "086102", "086104", "086107", "086108",
"086109", "086110", "086111", "086112", "086114", "086115", "087106",
"087107", "087109", "087112", "088094", "088096", "088097", "088098",
"0909145", "0909155", "0909158", "0910145", "0910146", "0910147",
"0910149", "0910150", "0910153", "0910154", "0911116", "0911117",
"0911120", "0911121", "0911122", "0911123", "0911124", "0911130",
"0911131", "0912161", "0912163", "0912168", "0912171", "0912172",
"095166", "095167", "095170", "095171", "095172", "095178", "095180",
"096116", "096117", "096121", "097120", "097124", "097125", "097126",
"097132", "097133", "097136", "098110", "098115", "098116", "098119",
"100006825", "100006830", "1009160", "1009161", "1009162", "1009164",
"1009165", "1009166", "1009169", "1009170", "1009172", "1009173",
"1009174", "1010160", "1010162", "1010163", "1010164", "1010166",
"1010168", "1011133-A", "1011134", "1011140", "1011142", "1012179",
"1012184", "1012185", "1012194", "105185", "105186", "105187",
"105188", "105189", "105191", "105192", "105196", "105197", "105198",
"105199", "105201", "105202", "105207", "105208", "105211", "106127",
"106130", "106131", "107138", "107140", "107143", "107147", "107148",
"107149", "107153", "107155", "107156", "108122", "108123", "108127",
"108129", "108130", "108131", "108132", "108134", "108135", "108136",
"1109175", "1109176", "1109180", "1109182", "1110173", "1110176",
"1110177", "1110178", "1110185", "1110186", "1111145", "1111150",
"1111151", "1112196", "1112197", "1112201", "1112202", "1112206",
"1112208", "1112209", "1112212", "1112218", "1112220", "1112223",
"1112225", "1112226", "1112227", "115215", "115216", "115217",
"115218", "115219", "115223", "115225", "115226", "116139", "116143",
"116144", "116145", "117161", "117162", "117164", "117165", "117168",
"117175", "117180", "118139", "118140", "118143", "118147", "118148",
"118150", "118152", "118154", "118157", "118160", "118161", "1209188",
"1209189", "1209191", "1209193", "1209199", "1210191", "1210193",
"1211157", "1211158", "1211168", "1211169", "1211170", "1211171",
"1211173", "1212233", "1212235", "1212240", "125231", "125238",
"125241", "126147", "126149", "127182", "127183", "127186", "127187",
"127192", "127194", "128165", "128168", "128169", "128171", "128172",
"128175", "128176", "128177", "128182", "128183", "128184", "128186",
"128189", "128193"), class = "factor"), Date = structure(c(12846,
13154, 13284, 13391, 13434, 13655, 13766, 14067, 14119, 14183,
14209, 14211, 14322, 14412, 14897, 14960, 15049, 15155, 15201,
15597), class = "Date")), .Names = c("Ref", "Date"), row.names = c(NA,
-20L), class = "data.frame")
This is driving me crazy!
Thanks
H
I believe you are looking for this:
df <- transform(df, month = format(Date,"%m"), year = format(Date, "%Y"))
counts <- ddply(df,.(month,year),nrow)
Then to plot the date:
# make a new monthly date
counts <- transform(counts, new_date = as.Date(paste(year,month,'01',sep="-")))
# now plot
ggplot(counts,aes(x=new_date,y=V1)) + geom_point() + scale_x_date()
xts package is very handy for time series manipulations.
First I create the xts object :
library(xts)
dat.xts <- xts(df$Ref,order.by=as.POSIXct(df$Date))
Then I use apply.monthly to get the count by day, and plot it as xts object
count.month <- apply.monthly(dat.xts,FUN=length)
plot(count.month, type='b')
If you want to use ggplot2, you can transform the result to a data.frame.
as.data.frame(count.month)
Another option:
data$Month <- format(as.POSIXct(data$Date), "%Y-%m")
by.month.count <- data.frame(with(data, table(Month)))

Resources