I am using the grouped Median function (= Median of grouped data) as given in the following link:
how to calculate the median on grouped dataset?
(solution by A5C1D2H2I1M1N2O1R2T1)
For simplicity I will stick to the example of a salary range and counts of people who make that amount of money. I have following conundrum:
Imagine I am an accountant and I have different categories of employees, so I have the same salary range but 60 columns for salary counts. And I have 6 different companies. So if I were to use this function plainly I would have to repeat the steps 360 times... manually. That is a lot of copy-pasting.
I have tried (my salary range are the row names)
GroupedMedian(1:ncol(mydf), mydf$salary, sep="-")
resulting in the following error:
Error in intervals[1, Midrow] : subscript out of bounds
Does anybody have an idea how to calculate the grouped median on every column and perhaps add it to the table as a row below?
UPDATE As requested dput for my data frame
structure(list(Heu1_C = c(0L, 1L, 13L, 9L, 3L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), Hi1_C = c(0L, 9L, 18L, 10L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), Hi2_C = c(0L, 8L, 10L, 7L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L), Hi3_R = c(0L, 0L, 2L, 4L, 5L, 2L, 0L, 0L, 0L, 0L, 0L,
0L), Hi4_I = c(0L, 15L, 9L, 10L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), Hi5_I = c(0L, 4L, 11L, 18L, 2L, 3L, 0L, 0L, 0L, 0L, 0L,
0L), Ke1_C = c(0L, 8L, 15L, 13L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
0L), Ke2_C = c(0L, 12L, 10L, 6L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), Ke3_I = c(0L, 4L, 12L, 8L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
), Ke4_I = c(0L, 5L, 12L, 7L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L),
Ke5_I = c(0L, 0L, 3L, 4L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L),
Ke6_R = c(0L, 0L, 2L, 7L, 4L, 2L, 0L, 0L, 0L, 0L, 0L, 0L),
Ke7_I = c(0L, 9L, 13L, 13L, 6L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
), Ke8_I = c(0L, 8L, 6L, 13L, 3L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), Ke9_I = c(0L, 6L, 12L, 9L, 2L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), Ke10_S = c(0L, 2L, 5L, 3L, 5L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), Ke11_S = c(0L, 3L, 4L, 5L, 6L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), Ku1_A = c(0L, 1L, 4L, 8L, 8L, 1L, 0L, 0L, 0L, 0L, 0L,
0L), Ku2_C = c(0L, 9L, 12L, 5L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), Ku3_I = c(0L, 2L, 8L, 17L, 4L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), Ku4_I = c(1L, 6L, 15L, 12L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L), Ku5_I = c(0L, 6L, 14L, 10L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L), Ku6_I = c(0L, 10L, 10L, 8L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), Ku7_R = c(0L, 4L, 5L, 13L, 3L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), Ku8_R = c(0L, 9L, 9L, 10L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), Ku9_R = c(0L, 0L, 0L, 3L, 3L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), Ku10_I = c(0L, 4L, 10L, 14L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L), Ru1_I = c(0L, 13L, 11L, 11L, 7L, 0L,
0L, 0L, 0L, 0L, 0L, 0L), Ru2_I = c(1L, 8L, 11L, 12L, 1L,
1L, 0L, 0L, 0L, 0L, 0L, 0L), Ru3_C = c(0L, 11L, 13L, 7L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Sch1_C = c(0L, 6L, 7L, 5L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Sch2_I = c(0L, 5L, 8L, 11L,
4L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Sch3_S = c(0L, 6L, 11L,
10L, 8L, 1L, 0L, 0L, 0L, 0L, 0L, 0L), Sch4_S = c(0L, 2L,
1L, 2L, 8L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Se1_C = c(0L, 6L,
15L, 14L, 4L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Se2_C = c(1L,
9L, 10L, 12L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Se3_C = c(0L,
8L, 9L, 8L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Se4_S = c(1L,
1L, 2L, 12L, 11L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Se5_S = c(0L,
1L, 3L, 6L, 14L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Se6_S = c(0L,
0L, 1L, 6L, 15L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), StL1_I = c(0L,
0L, 5L, 10L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), StL2_C = c(0L,
5L, 8L, 7L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), StL3_S = c(0L,
0L, 0L, 2L, 9L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), StL4_S = c(0L,
0L, 0L, 2L, 7L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), StN1_C = c(0L,
2L, 12L, 3L, 4L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), StN2_C = c(0L,
5L, 16L, 10L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), StN3_R = c(0L,
1L, 2L, 10L, 9L, 1L, 0L, 0L, 0L, 0L, 0L, 0L), StN4_R = c(0L,
0L, 3L, 9L, 11L, 1L, 0L, 0L, 0L, 0L, 0L, 0L), StN5_R = c(1L,
0L, 0L, 4L, 6L, 1L, 0L, 0L, 0L, 0L, 0L, 0L), StN6_R = c(0L,
0L, 0L, 5L, 13L, 1L, 0L, 0L, 0L, 0L, 0L, 0L), StN7_R = c(0L,
0L, 1L, 4L, 7L, 4L, 0L, 0L, 0L, 0L, 0L, 0L), StN8_S = c(0L,
0L, 1L, 3L, 8L, 2L, 0L, 0L, 0L, 0L, 0L, 0L), StN9_S = c(0L,
2L, 4L, 4L, 5L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), StW1_C = c(0L,
8L, 12L, 8L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), StW2_C = c(0L,
12L, 16L, 8L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), StW3_I = c(0L,
15L, 16L, 10L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), StW4_I = c(0L,
6L, 13L, 5L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), StW5_C = c(0L,
8L, 12L, 12L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), StW6_S = c(0L,
5L, 8L, 8L, 7L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), StW7_S = c(0L,
0L, 1L, 5L, 10L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)), .Names = c("Heu1_C",
"Hi1_C", "Hi2_C", "Hi3_R", "Hi4_I", "Hi5_I", "Ke1_C", "Ke2_C",
"Ke3_I", "Ke4_I", "Ke5_I", "Ke6_R", "Ke7_I", "Ke8_I", "Ke9_I",
"Ke10_S", "Ke11_S", "Ku1_A", "Ku2_C", "Ku3_I", "Ku4_I", "Ku5_I",
"Ku6_I", "Ku7_R", "Ku8_R", "Ku9_R", "Ku10_I", "Ru1_I", "Ru2_I",
"Ru3_C", "Sch1_C", "Sch2_I", "Sch3_S", "Sch4_S", "Se1_C", "Se2_C",
"Se3_C", "Se4_S", "Se5_S", "Se6_S", "StL1_I", "StL2_C", "StL3_S",
"StL4_S", "StN1_C", "StN2_C", "StN3_R", "StN4_R", "StN5_R", "StN6_R",
"StN7_R", "StN8_S", "StN9_S", "StW1_C", "StW2_C", "StW3_I", "StW4_I",
"StW5_C", "StW6_S", "StW7_S"), class = "data.frame", row.names = c("0 - 1",
"1 - 2", "2 - 3", "3 - 4", "4 - 5", "5 - 6", "6 - 7", "7 - 8",
"8 - 9", "9 - 10", "10 - 11", "11 - 12"))
Related
I have this data: (Design contains several tissues and the ones I'll need to consider are pancreas and lung)
head(Design)
Individual sex age RNA.quality..max10. organ tissue
GTEX-Y5V6-0526-SM-4VBRV GTEX-Y5V6 1 60-69 7.1 Thyroid Thyroid
GTEX-1KXAM-1726-SM-D3LAE GTEX-1KXAM 1 60-69 8.1 Thyroid Thyroid
GTEX-18A67-0826-SM-7KFTI GTEX-18A67 1 50-59 7.2 Thyroid Thyroid
GTEX-14BMU-0226-SM-5S2QA GTEX-14BMU 2 20-29 7.2 Thyroid Thyroid
GTEX-13PVR-0626-SM-5S2RC GTEX-13PVR 2 60-69 7.3 Thyroid Thyroid
GTEX-1211K-0726-SM-5FQUW GTEX-1211K 2 60-69 7.0 Thyroid Thyroid
dput(counts[1:10,])
structure(list(`GTEX-Y5V6-0526-SM-4VBRV` = c(0L, 1L, 2L, 1L,
0L, 0L, 0L, 0L, 0L, 214L), `GTEX-1KXAM-1726-SM-D3LAE` = c(0L,
0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 205L), `GTEX-18A67-0826-SM-7KFTI` = c(0L,
0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 164L), `GTEX-14BMU-0226-SM-5S2QA` = c(0L,
0L, 0L, 12L, 0L, 0L, 0L, 0L, 0L, 108L), `GTEX-13PVR-0626-SM-5S2RC` = c(0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 100L), `GTEX-1211K-0726-SM-5FQUW` = c(0L,
0L, 0L, 2L, 0L, 0L, 1L, 0L, 0L, 174L), `GTEX-1KXAM-0926-SM-CXZKA` = c(2L,
1L, 2L, 2L, 0L, 0L, 0L, 0L, 0L, 99L), `GTEX-18A67-2626-SM-718AD` = c(7L,
3L, 7L, 2L, 0L, 1L, 5L, 0L, 0L, 116L), `GTEX-14BMU-1126-SM-5RQJ8` = c(0L,
0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 44L), `GTEX-1211K-1426-SM-5FQTF` = c(4L,
0L, 5L, 2L, 0L, 0L, 0L, 0L, 0L, 143L), `GTEX-11TT1-0726-SM-5GU5A` = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 57L), `GTEX-1HCUA-1626-SM-A9SMG` = c(0L,
0L, 0L, 22L, 0L, 0L, 0L, 0L, 0L, 53L), `GTEX-1KXAM-0226-SM-EV7AP` = c(0L,
0L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 75L), `GTEX-18A67-1726-SM-7KFT9` = c(0L,
0L, 2L, 1L, 0L, 0L, 0L, 0L, 0L, 73L), `GTEX-14BMU-0726-SM-73KXS` = c(0L,
0L, 0L, 40L, 0L, 0L, 0L, 0L, 0L, 74L), `GTEX-13PVR-0726-SM-5S2PX` = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 54L), `GTEX-1211K-1126-SM-5EGGB` = c(0L,
1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 25L), `GTEX-11TT1-0326-SM-5LUAY` = c(0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 54L), `GTEX-1KXAM-2426-SM-DIPFC` = c(1L,
0L, 2L, 1L, 0L, 0L, 2L, 0L, 0L, 29L), `GTEX-18A67-0326-SM-7LG5X` = c(0L,
0L, 5L, 4L, 0L, 0L, 2L, 0L, 1L, 91L), `GTEX-14BMU-2026-SM-5S2W6` = c(0L,
0L, 2L, 5L, 0L, 0L, 0L, 0L, 0L, 30L), `GTEX-13PVR-2526-SM-5RQIT` = c(0L,
0L, 2L, 1L, 0L, 0L, 0L, 0L, 0L, 14L), `GTEX-1211K-2126-SM-59HJZ` = c(1L,
0L, 2L, 0L, 0L, 0L, 1L, 0L, 0L, 51L), `GTEX-Y3I4-2326-SM-4TT81` = c(0L,
0L, 3L, 0L, 0L, 0L, 1L, 0L, 0L, 38L), `GTEX-1KXAM-0426-SM-DHXKG` = c(0L,
0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 105L), `GTEX-18A67-1126-SM-7KFSB` = c(1L,
0L, 0L, 4L, 0L, 0L, 1L, 0L, 0L, 76L), `GTEX-14BMU-0526-SM-73KW4` = c(0L,
0L, 0L, 11L, 0L, 0L, 0L, 0L, 0L, 53L), `GTEX-1211K-0826-SM-5FQUP` = c(1L,
0L, 0L, 2L, 0L, 0L, 1L, 0L, 0L, 104L), `GTEX-11TT1-1626-SM-5EQL7` = c(0L,
0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 113L), `GTEX-ZYFG-0226-SM-5GIDT` = c(1L,
0L, 2L, 2L, 0L, 0L, 2L, 0L, 0L, 54L), `GTEX-1KXAM-0826-SM-CXZK9` = c(0L,
0L, 0L, 5L, 0L, 0L, 2L, 0L, 0L, 97L), `GTEX-18A67-2426-SM-7LT95` = c(1L,
0L, 2L, 0L, 0L, 1L, 3L, 0L, 0L, 69L), `GTEX-14BMU-0926-SM-5S2QB` = c(0L,
0L, 0L, 3L, 0L, 0L, 0L, 0L, 0L, 29L), `GTEX-13PVR-1826-SM-5Q5CC` = c(1L,
0L, 0L, 3L, 0L, 1L, 2L, 0L, 0L, 32L), `GTEX-1211K-0926-SM-5FQTL` = c(0L,
0L, 0L, 3L, 0L, 0L, 1L, 0L, 0L, 99L), `GTEX-11TT1-0526-SM-5P9JO` = c(0L,
1L, 2L, 4L, 0L, 0L, 2L, 0L, 0L, 52L), `GTEX-1KXAM-0726-SM-E9U5I` = c(0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 45L), `GTEX-18A67-2526-SM-7LG5Z` = c(1L,
0L, 2L, 0L, 0L, 0L, 1L, 0L, 0L, 91L), `GTEX-14BMU-1026-SM-5RQJ5` = c(1L,
0L, 1L, 8L, 0L, 0L, 0L, 0L, 0L, 47L), `GTEX-13PVR-2026-SM-73KXT` = c(0L,
0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 27L), `GTEX-1211K-1326-SM-5FQV2` = c(0L,
0L, 3L, 0L, 0L, 0L, 1L, 1L, 0L, 57L), `GTEX-11TT1-0626-SM-5GU4X` = c(1L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 90L), `GTEX-ZYFG-1826-SM-5GZWX` = c(0L,
0L, 3L, 2L, 0L, 0L, 2L, 0L, 0L, 91L), `GTEX-1KXAM-1926-SM-D3LAG` = c(0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 103L), `GTEX-18A67-2226-SM-7LT9Z` = c(0L,
0L, 2L, 2L, 0L, 0L, 1L, 0L, 1L, 157L), `GTEX-13PVR-1726-SM-5Q5EC` = c(1L,
0L, 2L, 0L, 0L, 0L, 1L, 0L, 0L, 34L), `GTEX-1211K-1826-SM-5EGJ2` = c(0L,
0L, 1L, 3L, 0L, 0L, 0L, 0L, 0L, 49L), `GTEX-11TT1-0926-SM-5GU5M` = c(0L,
2L, 0L, 3L, 1L, 0L, 0L, 0L, 1L, 49L), `GTEX-1KXAM-1026-SM-CY8IA` = c(0L,
0L, 1L, 3L, 0L, 0L, 0L, 0L, 0L, 93L), `GTEX-14BMU-1626-SM-5TDE7` = c(0L,
1L, 3L, 13L, 0L, 0L, 1L, 0L, 0L, 84L), `GTEX-13PVR-2226-SM-7DHKP` = c(0L,
0L, 2L, 2L, 0L, 0L, 0L, 0L, 0L, 75L), `GTEX-1211K-1926-SM-5EQLB` = c(0L,
1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 114L), `GTEX-11TT1-2126-SM-5GU5Y` = c(2L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 49L), `GTEX-ZT9W-2026-SM-51MRA` = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 70L), `GTEX-1KXAM-2326-SM-CYPTD` = c(0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 20L), `GTEX-18A67-0226-SM-7LG67` = c(0L,
0L, 5L, 2L, 0L, 0L, 1L, 0L, 0L, 94L), `GTEX-14BMU-2126-SM-5S2TS` = c(0L,
0L, 0L, 3L, 0L, 0L, 0L, 0L, 0L, 50L), `GTEX-13PVR-2426-SM-5RQHN` = c(0L,
0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 59L), `GTEX-1211K-2226-SM-5FQU6` = c(0L,
0L, 0L, 3L, 0L, 0L, 0L, 0L, 0L, 81L), `GTEX-11TT1-2426-SM-5EQMK` = c(0L,
1L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 60L)), row.names = c("ENSG00000243485",
"ENSG00000237613", "ENSG00000186092", "ENSG00000238009", "ENSG00000222623",
"ENSG00000241599", "ENSG00000236601", "ENSG00000235146", "ENSG00000223181",
"ENSG00000237491"), class = "data.frame")
I need to create a DGEList with only some of the genes: Pancreas and lung genes (if I am right), in order to do the tasks in the image below: Tasks
I need to do a PCA to check if there's separation among male and female genes, and after I need to do a differential expression analysis with the function exactTest(), and since I need a DGEList for exactTest to compare Pancreas sex1 genes with pancreas sex 2 genes, lungsex1-lungsex2 I suppose that I can do both after creating the DGEList.
In the end my problem is that I dont know how to setup the data.
If you need anything else I'll be here, thank you in advance.
PancreasLungDesign=Design[13:30,1:6]
PancreasLungDesign=PancreasLungDesign[-c(7:12),]
Counts2=counts[,13:30]
Counts2= Counts2[,-(7:12)]
rownames(PancreasLungDesign) == colnames(Counts2)
Expressedgenes2=Counts2>=10
NumExpressedgenes2=apply(Expressedgenes2,1,sum)
FilteredCounts2=Counts2[NumExpressedgenes2>0,]
NumExpressedgenes2=apply(Expressedgenes2,1,sum)
FilteredCounts2=Counts2[NumExpressedgenes2>0,]
y2=DGEList(counts=FilteredCounts2, group = PancreasLungDesign$tissue)
y2=calcNormFactors(y2)
apply(cpm(y2,normalized.lib.sizes = T),2,sum)
plotMDS(y2,table(PancreasLungDesign$sex),labels = PancreasLungDesign$tissue,col=rep(c("green","green","blue","blue","blue","green","yellow","yellow","red","red","yellow","red")),cex=0.5,main="Principal component analysis sex specific expression")
I have to identify genes showing sex specific expression in 2 tissues: "pancreas" and "lung".
To do it first of all i need to do a PCA to ascertain whether there is separation between tissues of different sexes (in particular there are 3 individuals of sex 1 and 3 of sex2 for each tissue)
I suppose that i should classify the genes in counts for sex by using the sex column in the Design list and after I should perform a PCA where different colors are assigned to sex 1 and sex 2 genes.
The problem is that even if I know what I should do to perform the PCA (if what i tought is right) I don't know how to write the codes required to do it: how can i create a new dataframe made by only the genes in count that correspond to lung and pancreas rows in Design?
I thought to do in this way in order to color the genes with different colors depending by sex (information shown in Design), if there's a simplier way is well accepted any suggestion.
dput(Design[1:10,]):
Design = structure(list(Individual = c("GTEX-Y5V6", "GTEX-1KXAM", "GTEX-18A67",
"GTEX-14BMU", "GTEX-13PVR", "GTEX-1211K", "GTEX-1KXAM", "GTEX-18A67",
"GTEX-14BMU", "GTEX-1211K"), sex = c(1L, 1L, 1L, 2L, 2L, 2L,
1L, 1L, 2L, 2L), age = c("60-69", "60-69", "50-59", "20-29",
"60-69", "60-69", "60-69", "50-59", "20-29", "60-69"), RNA.quality..max10. = c(7.1,
8.1, 7.2, 7.2, 7.3, 7, 7.2, 7.3, 7.4, 8.2), organ = c("Thyroid",
"Thyroid", "Thyroid", "Thyroid", "Thyroid", "Thyroid", "Stomach",
"Stomach", "Stomach", "Stomach"), tissue = c("Thyroid", "Thyroid",
"Thyroid", "Thyroid", "Thyroid", "Thyroid", "Stomach", "Stomach",
"Stomach", "Stomach")), row.names = c("GTEX-Y5V6-0526-SM-4VBRV",
"GTEX-1KXAM-1726-SM-D3LAE", "GTEX-18A67-0826-SM-7KFTI", "GTEX-14BMU-0226-SM-5S2QA",
"GTEX-13PVR-0626-SM-5S2RC", "GTEX-1211K-0726-SM-5FQUW", "GTEX-1KXAM-0926-SM-CXZKA",
"GTEX-18A67-2626-SM-718AD", "GTEX-14BMU-1126-SM-5RQJ8", "GTEX-1211K-1426-SM-5FQTF"
), class = "data.frame")
dput(counts[1:10,]):
structure(list(`GTEX-Y5V6-0526-SM-4VBRV` = c(0L, 1L, 2L, 1L,
0L, 0L, 0L, 0L, 0L, 214L), `GTEX-1KXAM-1726-SM-D3LAE` = c(0L,
0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 205L), `GTEX-18A67-0826-SM-7KFTI` = c(0L,
0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 164L), `GTEX-14BMU-0226-SM-5S2QA` = c(0L,
0L, 0L, 12L, 0L, 0L, 0L, 0L, 0L, 108L), `GTEX-13PVR-0626-SM-5S2RC` = c(0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 100L), `GTEX-1211K-0726-SM-5FQUW` = c(0L,
0L, 0L, 2L, 0L, 0L, 1L, 0L, 0L, 174L), `GTEX-1KXAM-0926-SM-CXZKA` = c(2L,
1L, 2L, 2L, 0L, 0L, 0L, 0L, 0L, 99L), `GTEX-18A67-2626-SM-718AD` = c(7L,
3L, 7L, 2L, 0L, 1L, 5L, 0L, 0L, 116L), `GTEX-14BMU-1126-SM-5RQJ8` = c(0L,
0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 44L), `GTEX-1211K-1426-SM-5FQTF` = c(4L,
0L, 5L, 2L, 0L, 0L, 0L, 0L, 0L, 143L), `GTEX-11TT1-0726-SM-5GU5A` = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 57L), `GTEX-1HCUA-1626-SM-A9SMG` = c(0L,
0L, 0L, 22L, 0L, 0L, 0L, 0L, 0L, 53L), `GTEX-1KXAM-0226-SM-EV7AP` = c(0L,
0L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 75L), `GTEX-18A67-1726-SM-7KFT9` = c(0L,
0L, 2L, 1L, 0L, 0L, 0L, 0L, 0L, 73L), `GTEX-14BMU-0726-SM-73KXS` = c(0L,
0L, 0L, 40L, 0L, 0L, 0L, 0L, 0L, 74L), `GTEX-13PVR-0726-SM-5S2PX` = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 54L), `GTEX-1211K-1126-SM-5EGGB` = c(0L,
1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 25L), `GTEX-11TT1-0326-SM-5LUAY` = c(0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 54L), `GTEX-1KXAM-2426-SM-DIPFC` = c(1L,
0L, 2L, 1L, 0L, 0L, 2L, 0L, 0L, 29L), `GTEX-18A67-0326-SM-7LG5X` = c(0L,
0L, 5L, 4L, 0L, 0L, 2L, 0L, 1L, 91L), `GTEX-14BMU-2026-SM-5S2W6` = c(0L,
0L, 2L, 5L, 0L, 0L, 0L, 0L, 0L, 30L), `GTEX-13PVR-2526-SM-5RQIT` = c(0L,
0L, 2L, 1L, 0L, 0L, 0L, 0L, 0L, 14L), `GTEX-1211K-2126-SM-59HJZ` = c(1L,
0L, 2L, 0L, 0L, 0L, 1L, 0L, 0L, 51L), `GTEX-Y3I4-2326-SM-4TT81` = c(0L,
0L, 3L, 0L, 0L, 0L, 1L, 0L, 0L, 38L), `GTEX-1KXAM-0426-SM-DHXKG` = c(0L,
0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 105L), `GTEX-18A67-1126-SM-7KFSB` = c(1L,
0L, 0L, 4L, 0L, 0L, 1L, 0L, 0L, 76L), `GTEX-14BMU-0526-SM-73KW4` = c(0L,
0L, 0L, 11L, 0L, 0L, 0L, 0L, 0L, 53L), `GTEX-1211K-0826-SM-5FQUP` = c(1L,
0L, 0L, 2L, 0L, 0L, 1L, 0L, 0L, 104L), `GTEX-11TT1-1626-SM-5EQL7` = c(0L,
0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 113L), `GTEX-ZYFG-0226-SM-5GIDT` = c(1L,
0L, 2L, 2L, 0L, 0L, 2L, 0L, 0L, 54L), `GTEX-1KXAM-0826-SM-CXZK9` = c(0L,
0L, 0L, 5L, 0L, 0L, 2L, 0L, 0L, 97L), `GTEX-18A67-2426-SM-7LT95` = c(1L,
0L, 2L, 0L, 0L, 1L, 3L, 0L, 0L, 69L), `GTEX-14BMU-0926-SM-5S2QB` = c(0L,
0L, 0L, 3L, 0L, 0L, 0L, 0L, 0L, 29L), `GTEX-13PVR-1826-SM-5Q5CC` = c(1L,
0L, 0L, 3L, 0L, 1L, 2L, 0L, 0L, 32L), `GTEX-1211K-0926-SM-5FQTL` = c(0L,
0L, 0L, 3L, 0L, 0L, 1L, 0L, 0L, 99L), `GTEX-11TT1-0526-SM-5P9JO` = c(0L,
1L, 2L, 4L, 0L, 0L, 2L, 0L, 0L, 52L), `GTEX-1KXAM-0726-SM-E9U5I` = c(0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 45L), `GTEX-18A67-2526-SM-7LG5Z` = c(1L,
0L, 2L, 0L, 0L, 0L, 1L, 0L, 0L, 91L), `GTEX-14BMU-1026-SM-5RQJ5` = c(1L,
0L, 1L, 8L, 0L, 0L, 0L, 0L, 0L, 47L), `GTEX-13PVR-2026-SM-73KXT` = c(0L,
0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 27L), `GTEX-1211K-1326-SM-5FQV2` = c(0L,
0L, 3L, 0L, 0L, 0L, 1L, 1L, 0L, 57L), `GTEX-11TT1-0626-SM-5GU4X` = c(1L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 90L), `GTEX-ZYFG-1826-SM-5GZWX` = c(0L,
0L, 3L, 2L, 0L, 0L, 2L, 0L, 0L, 91L), `GTEX-1KXAM-1926-SM-D3LAG` = c(0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 103L), `GTEX-18A67-2226-SM-7LT9Z` = c(0L,
0L, 2L, 2L, 0L, 0L, 1L, 0L, 1L, 157L), `GTEX-13PVR-1726-SM-5Q5EC` = c(1L,
0L, 2L, 0L, 0L, 0L, 1L, 0L, 0L, 34L), `GTEX-1211K-1826-SM-5EGJ2` = c(0L,
0L, 1L, 3L, 0L, 0L, 0L, 0L, 0L, 49L), `GTEX-11TT1-0926-SM-5GU5M` = c(0L,
2L, 0L, 3L, 1L, 0L, 0L, 0L, 1L, 49L), `GTEX-1KXAM-1026-SM-CY8IA` = c(0L,
0L, 1L, 3L, 0L, 0L, 0L, 0L, 0L, 93L), `GTEX-14BMU-1626-SM-5TDE7` = c(0L,
1L, 3L, 13L, 0L, 0L, 1L, 0L, 0L, 84L), `GTEX-13PVR-2226-SM-7DHKP` = c(0L,
0L, 2L, 2L, 0L, 0L, 0L, 0L, 0L, 75L), `GTEX-1211K-1926-SM-5EQLB` = c(0L,
1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 114L), `GTEX-11TT1-2126-SM-5GU5Y` = c(2L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 49L), `GTEX-ZT9W-2026-SM-51MRA` = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 70L), `GTEX-1KXAM-2326-SM-CYPTD` = c(0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 20L), `GTEX-18A67-0226-SM-7LG67` = c(0L,
0L, 5L, 2L, 0L, 0L, 1L, 0L, 0L, 94L), `GTEX-14BMU-2126-SM-5S2TS` = c(0L,
0L, 0L, 3L, 0L, 0L, 0L, 0L, 0L, 50L), `GTEX-13PVR-2426-SM-5RQHN` = c(0L,
0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 59L), `GTEX-1211K-2226-SM-5FQU6` = c(0L,
0L, 0L, 3L, 0L, 0L, 0L, 0L, 0L, 81L), `GTEX-11TT1-2426-SM-5EQMK` = c(0L,
1L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 60L)), row.names = c("ENSG00000243485",
"ENSG00000237613", "ENSG00000186092", "ENSG00000238009", "ENSG00000222623",
"ENSG00000241599", "ENSG00000236601", "ENSG00000235146", "ENSG00000223181",
"ENSG00000237491"), class = "data.frame")
I'm trying to compute the ranked abundances of a community data (site*species matrix) by using rankabundance(df) in the BiodiversityR package. But the following error keeps popping up whenever I try to run it.
Error in `[.data.frame`(pi, i) : undefined columns selected
Can someone please help with what this code means?
I've already specified the column names when sub-setting the data. And the data is also in the right format; I've tried running BCI (from vegan) for the same function and it runs perfectly fine. My data is the same format as BCI.
library(BiodiversityR)
rankabundance(alad2, digits = 1)
This is the code that I'm running, and the data-frame is arranged in a site*species matrix, where sites are rows and species are columns.
Here is the dataframe, alad2:
structure(list(`Alysicarpous sp.1` = c(0L, 0L, 1L, 0L, 0L, 4L,
0L, 0L, 0L, 0L, 0L, 4L), `Alysicarpous sp.2` = c(0L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), `Bothriochloa pertusa` = c(0L,
0L, 4L, 0L, 12L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), `Butea monosperma ` = c(0L,
0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), `Chromolaena odorata` = c(0L,
0L, 0L, 1L, 3L, 0L, 0L, 5L, 17L, 4L, 0L, 0L), `Chrysopogon sp.*` = c(62L,
64L, 57L, 68L, 72L, 74L, 72L, 62L, 56L, 67L, 54L, 61L), `Desmodium triflorum` = c(0L,
2L, 7L, 12L, 6L, 12L, 0L, 10L, 13L, 0L, 14L, 8L), `Eragrostis tenuifolia` = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L), `Fimbristylis dichotoma` = c(32L,
38L, 41L, 26L, 38L, 38L, 41L, 20L, 28L, 41L, 31L, 32L), H80 = c(2L, 0L, 0L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 0L), `Hemigraphis sp.*` = c(0L,
0L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 3L, 0L, 0L), `Ischaemum sp.*` = c(18L,
0L, 18L, 18L, 0L, 18L, 33L, 26L, 12L, 16L, 24L, 23L), `Lantana camara` = c(0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L), `Leucas aspera` = c(0L,
0L, 0L, 0L, 2L, 2L, 0L, 0L, 1L, 0L, 0L, 0L), `Oldenlandia umbellata` = c(3L,
6L, 9L, 8L, 3L, 0L, 0L, 3L, 6L, 7L, 3L, 0L), `Phyllanthus virgatus` = c(0L, 2L, 9L, 13L, 6L, 7L, 9L, 0L, 0L, 6L, 11L, 8L), `Rungia pectinata` = c(0L,
0L, 0L, 2L, 3L, 3L, 0L, 0L, 0L, 0L, 0L, 0L), `Senagalia pennata` = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L), `Senna spectabilis ` = c(0L,
0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), `Setaria flavida` = c(0L,
0L, 0L, 0L, 11L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), `Setaria pumila` = c(4L,
0L, 13L, 0L, 0L, 0L, 5L, 4L, 7L, 5L, 4L, 7L), `Themeda triandra` = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 0L)), row.names = c(NA,
-12L), class = c("tbl_df", "tbl", "data.frame"))
You do not have a data frame, but a tibble. Use alad2 <- as.data.frame(alad2) and your code will work.
I've tried to complete an ANOSIM with data on a study I have carried out but I get multiple errors and i'm not sure how to fix it. Most of the errors are "dissimilarities have 24 observations, but grouping has 23". I'm trying to see the similarity in community structure between multiple samples.
my code so far is
setwd()
#load invertebrate data
Invertebrates<- read.csv(file="Invertebrates.csv",head=TRUE,sep=",")
#install packages
install.packages("vegan")
library(vegan)
#make community matrix
com<-Invertebrates[,2:ncol(Invertebrates)]
m_com<-as.matrix(com)
# group by site
group=Invertebrates[,1]
#ANOSIM
invert.ano<-anosim(m_com,group)
Then I get
Error in anosim(m_com, group) : there should be replicates within groups
Thanks for any help
Invertebrates <- structure(list(Site = structure(c(10L, 14L, 6L, 3L, 24L, 12L, 7L, 18L, 1L, 8L, 15L, 5L, 16L, 23L, 4L, 11L, 21L, 19L, 9L, 13L
), .Label = c("Anax parthenope", "Anisus vortex", "Asellus aquaticus",
"Bathyomphalus contortus", "Bithynia leachii", "Bithynia tentaculata",
"Coenagrion pulchellum", "Corixa punctata", "Dytiscus marginalis",
"Gammarus pulex", "Gyraulus albus", "Haliplus fluviatilis", "Haplotaxis gordioides",
"Ilyocoris cimicoides", "Lymnaea stagnalis", "Lymnaea truncatula",
"Oxygastra curtisii", "Physa fontilnalis", "Piscicola geometra",
"Planorbis cornatus", "Planorbis planorbis", "Radix ovata", "Radix palustris",
"Sialis lutaria"), class = "factor"), Finglesham.Brook.A = c(112L,
1L, 3L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), Betteshanger.Pond.A = c(0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Betteshanger.Pond.B = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), Great.Mongeham.A = c(7L, 0L, 0L, 2L, 2L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Site.7.SS.A = c(6L,
0L, 0L, 0L, 0L, 6L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), Great.Mongeham.B = c(32L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Broad.dike.A = c(0L,
0L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), Broad.dike.B = c(0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), S3.Broad.dike.SS.B = c(14L,
0L, 7L, 6L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), Site.6.NS.B = c(65L, 0L, 0L, 2L, 2L, 0L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Fowlmead.Lake.A = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 3L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), Site.7.SS.B = c(0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Fowlmead.lake.B = c(0L,
0L, 0L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), Adelaide.NS.A = c(5L, 0L, 3L, 6L, 2L, 0L, 0L, 0L,
0L, 0L, 2L, 6L, 4L, 1L, 1L, 6L, 4L, 0L, 0L, 0L), Little.Downs.Bridge.B = c(48L,
8L, 0L, 23L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 18L, 0L, 2L, 0L, 1L,
0L, 1L, 0L, 0L), Finglesham.Brook.B = c(78L, 0L, 3L, 15L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 2L),
Adelaide.SS.A = c(8L, 0L, 0L, 33L, 9L, 0L, 0L, 0L, 0L, 0L,
0L, 12L, 0L, 4L, 19L, 7L, 4L, 0L, 2L, 0L), Adelaide.SS.B = c(4L,
0L, 20L, 9L, 2L, 0L, 0L, 0L, 0L, 0L, 7L, 0L, 0L, 0L, 14L,
0L, 1L, 0L, 0L, 0L), Ham.Fen.SS = c(1L, 0L, 0L, 6L, 3L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L),
Adelaide.NS.B = c(3L, 0L, 0L, 8L, 0L, 6L, 1L, 0L, 0L, 2L,
0L, 0L, 0L, 0L, 0L, 0L, 12L, 0L, 1L, 0L), Site.6.NS.A = c(58L,
0L, 0L, 2L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), S3.Broad.dike.SS.A = c(24L, 0L, 0L, 50L,
0L, 0L, 3L, 13L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L), Little.Downs.Bridge.A = c(10L, 16L, 23L, 46L, 0L,
0L, 2L, 0L, 0L, 4L, 0L, 0L, 0L, 0L, 4L, 0L, 5L, 0L, 0L, 0L
)), row.names = c(NA, 20L), class = "data.frame")
If you run
table(Invertebrates$Site)
you will see that there you're grouping variable is not actually grouping anything. That is, there is maximum one observation per group. But ANOSIM requires the data to be grouped.
If I just make up a random grouping variable, like this:
Invertebrates$Group <- sample(c(1,2), nrow(Invertebrates), replace = TRUE)
and rerun your analysis:
Invertebrates$Group <- sample(c(1,2), nrow(Invertebrates), replace = TRUE)
group <- Invertebrates[, "Group"]
invert.ano <- anosim(m_com, group)
It works!
I have a data frame (sub), where I want to multiply the numeric values in column 2 with a factor which differs depending on the "value" of column 'domain'.
data:
sub <- structure(list(domain = c("Bacteria", "Bacteria", "Bacteria",
"Eukaryota", "Eukaryota", "Eukaryota", "Bacteria", "Bacteria",
"Eukaryota", "Bacteria"), `60781` = c(12471263.2580165, 0, 24942526.516033,
9845734.15106566, 0, 19691468.3021313, 122788742566383, 0, 0,
245577485132767), `60782` = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L), `60783` = c(2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L),
`60784` = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), `60785` = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), `60786` = c(5L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 6L), `60787` = c(2L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 5L), `60759` = c(3L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 8L), `60773` = c(1L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L), `60774` = c(0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 4L), `60775` = c(2L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 6L), `60776` = c(2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 2L), `60777` = c(4L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
4L), `60778` = c(1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 5L),
`60779` = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 4L), `60780` = c(1L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L)), .Names = c("domain",
"60781", "60782", "60783", "60784", "60785", "60786", "60787",
"60759", "60773", "60774", "60775", "60776", "60777", "60778",
"60779", "60780"), row.names = c(4549L, 9581L, 14048L, 17710L,
19822L, 17650L, 15353L, 13170L, 20622L, 157L), class = "data.frame")
Q16S <- structure(list(s = structure(c(10L, 11L, 12L, 13L, 14L, 15L,
16L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L), .Label = c("60759",
"60773", "60774", "60775", "60776", "60777", "60778", "60779",
"60780", "60781", "60782", "60783", "60784", "60785", "60786",
"60787"), class = "factor"), q = c(12471263.2580165, 9779600.35102098,
4233335.65669403, 4233335.65669403, 5861610.84202048, 3608701.24759829,
1911945.62045948, 5286624.33414104, 23126648.4362759, 4358019.31046983,
8226827.34243214, 4359062.63714278, 2351302.71868581, 5938544.50162295,
2772726.13977936, 7168230.19241166)), .Names = c("s", "q"), row.names = c(10L,
11L, 12L, 13L, 14L, 15L, 16L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L), class = "data.frame")
Q18S <- structure(list(s = structure(c(10L, 11L, 12L, 13L, 14L, 15L,
16L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L), .Label = c("60759",
"60773", "60774", "60775", "60776", "60777", "60778", "60779",
"60780", "60781", "60782", "60783", "60784", "60785", "60786",
"60787"), class = "factor"), q = c(9845734.15106566, 7720737.11922709,
3342107.09739003, 3342107.09739003, 4627587.50685827, 2848974.66915655,
1509430.75299433, 4173650.79011135, 18257880.3444283, 3440541.56089723,
6494863.6913938, 3441365.23984957, 1856291.62001511, 4688324.60654444,
2188994.32087844, 5659129.09927236)), .Names = c("s", "q"), row.names = c(10L,
11L, 12L, 13L, 14L, 15L, 16L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L), class = "data.frame")
Code:
sub[[2]][sub$domain =="Bacteria"] <- sub[[2]]*Q16S$q[1]
sub[[2]][sub$domain =="Eukaryota"] <- sub[[2]]*Q18S$q[1]
Eventually I want to do a for loop - loop'ing over column 11:16 and multiplying different factors as specified in Q16S or Q18S depending on 'domain'.
for (i in 1:16){
qdata[[10+i]][qdata$domain =="Bacteria"] <- qdata[[10+i]]*Q16S$q[i]
qdata[[10+i]][qdata$domain =="Eukaryota"] <- qdata[[10+i]]*Q18S$q[i]
}
However in the out-put are now numbers where there were '0' before and where there was for example '2' there is now multiplied by millions....
I do get a warning message, but I can't interpret it:
Warning message:
In sub[[11]][sub$domain == "Eukaryota"] <- sub[[11]] * Q18S$q[1] :
number of items to replace is not a multiple of replacement length
Any suggestions as to what I am doing wrong?