R: aggregate based of multiple columns - r

How can I sum DBH and Basal area by Tree.Species Compartment, Stand, Transect and Plot?
Tree.Species DBH Basal Area Compartment Stand Transect Plot
Sugar Maple 16.4 211.1336 107 20 2 3
Sugar Maple 25.1 494.55785 107 20 2 3
Hemlock 15.1 178.98785 209 30 1 2
I was trying:
aggregate(.~ Compartment + Stand + Transect + Plot + Tree.Speices, data = Trees, FUN = sum)
but I keep getting this error:
Error in model.frame.default(formula = cbind(DBH, Basal.Area, Transect.., :
variable lengths differ (found for 'Transect')
My variables unique values and there are not all possible combinations in the data.
Compartment: 107 209 310 231
Stand: 20 110 30 240 80 300
Transect: 1 2 3
Plot: 1 2 3 4 5 6
dput:
structure(list(Tree.Speices = structure(c(53L, 49L, 49L, 49L,
49L, 11L, 49L, 12L, 49L, 4L, 49L, 49L, 49L, 53L, 49L, 49L, 4L,
4L, 33L, 4L, 11L, 53L, 11L, 53L, 53L, 21L, 21L, 53L, 49L, 53L,
49L, 49L, 53L, 21L, 4L, 4L, 49L, 12L, 21L, 49L, 49L, 49L, 49L,
9L, 49L, 49L, 11L, 11L, 53L, 47L, 33L, 11L, 5L, 49L, 11L, 11L,
38L, 11L, 49L, 11L, 11L, 11L, 11L, 49L, 53L, 53L, 53L, 47L, 49L,
49L, 49L, 47L, 49L, 33L, 4L, 4L, 47L, 4L, 11L, 49L, 53L, 49L,
11L, 11L, 11L, 49L, 11L, 11L, 11L, 11L, 11L, 49L, 11L, 49L, 47L,
49L, 11L, 11L, 11L, 11L, 21L), .Label = c("American Elm", "Aspen",
"Balsam", "Basswood", "Beech", "Big-Toothed Aspen", "Black Cherry",
"Cedar", "Cottonwood", "Elm", "Hemlock", "Hop Hornbeam", "paper birch",
"Paper Birch", "Poplar", "Quaking Aspen", "Red Maple", "Red Oak",
"Red Spruce", "snag", "Snag", "Snag (Aspen)", "Snag (Basswood)",
"Snag (Beech)", "Snag (Big-Toothed Aspen)", "SNAG (Big-Toothed Aspen)",
"snag (conifer)", "Snag (Conifer)", "Snag (Cottonwood)", "Snag (Elm)",
"Snag (hardwood)", "snag (Hemlock)", "Snag (Hemlock)", "SNAG (Hemlock)",
"Snag (maple)", "Snag (Maple)", "Snag (Oak)", "Snag (Paper Birch)",
"Snag (Poplar)", "Snag (Red Maple)", "snag (Sugar Maple)", "Snag (Sugar Maple)",
"Snag (Sugar)", "Snag (White Ash)", "Snag (White Pine)", "snag (Yellow Birch)",
"Snag (Yellow Birch)", "SNAG (Yellow Birch)", "Sugar Maple",
"White Ash", "White Birch", "White Pine", "Yellow Birch"), class = "factor"),
DBH = c(55.7, 21.3, 14, 38, 6.5, 20.3, 33.2, 6.3, 30.5, 22.3,
32.7, 8.9, 41.8, 30, 24.6, 13.8, 56.6, 49.5, 49.9, 63.2,
28, 39, 21, 25.7, 29.9, 38, 17.5, 22.4, 6.2, 20.3, 18.3,
21, 24.7, 49.5, 6.4, 30.3, 19.4, 6, 37.8, 24.6, 24.4, 9.5,
17.4, 49.2, 26, 31, 23.6, 19.8, 37.9, 25.8, 31.5, 18.1, 34.4,
59.7, 28.2, 21, 16.4, 23.7, 34.4, 24.7, 15.4, 12, 24.2, 34.2,
19.4, 15.1, 34.9, 34.8, 6.6, 61.2, 25.4, 38.8, 28.9, 32.3,
43.9, 33.8, 27.1, 37, 21.2, 26.4, 27.4, 10.6, 55.1, 69.4,
24, 25.4, 51, 20.2, 14.3, 31.8, 48.8, 38.3, 19.6, 26.3, 34.5,
6.3, 41.3, 32.6, 14.6, 9.1, 57.8), Basal.Area = c(2435.45465,
356.14665, 153.86, 1133.54, 33.16625, 323.49065, 865.2584,
31.15665, 730.24625, 390.37265, 839.39265, 62.17985, 1371.5834,
706.5, 475.0506, 149.4954, 2514.7946, 1923.44625, 1954.65785,
3135.4784, 615.44, 1193.985, 346.185, 518.48465, 701.79785,
1133.54, 240.40625, 393.8816, 30.1754, 323.49065, 262.88865,
346.185, 478.92065, 1923.44625, 32.1536, 720.70065, 295.4426,
28.26, 1121.6394, 475.0506, 467.3576, 70.84625, 237.6666,
1900.2024, 530.66, 754.385, 437.2136, 307.7514, 1127.58185,
522.5274, 778.91625, 257.17385, 928.9376, 2797.81065, 624.2634,
346.185, 211.1336, 440.92665, 928.9376, 478.92065, 186.1706,
113.04, 459.7274, 918.1674, 295.4426, 178.98785, 956.13785,
950.6664, 34.1946, 2940.1704, 506.4506, 1181.7704, 655.63985,
818.98265, 1512.85985, 896.8154, 576.51185, 1074.665, 352.8104,
547.1136, 589.3466, 88.2026, 2383.26785, 3780.8426, 452.16,
506.4506, 2041.785, 320.3114, 160.52465, 793.8234, 1869.4304,
1151.50865, 301.5656, 542.97665, 934.34625, 31.15665, 1338.96665,
834.2666, 167.3306, 65.00585, 2622.5594), Compartment = c(107L,
107L, 107L, 107L, 107L, 107L, 107L, 107L, 107L, 107L, 107L,
107L, 107L, 107L, 107L, 107L, 107L, 107L, 107L, 107L, 107L,
107L, 107L, 107L, 107L, 107L, 107L, 107L, 107L, 107L, 107L,
107L, 107L, 107L, 107L, 107L, 107L, 107L, 107L, 107L, 107L,
107L, 107L, 107L, 107L, 107L, 107L, 107L, 107L, 107L, 107L,
107L, 107L, 107L, 107L, 107L, 107L, 107L, 107L, 107L, 107L,
107L, 107L, 107L, 107L, 107L, 107L, 107L, 107L, 107L, 107L,
107L, 107L, 107L, 107L, 107L, 107L, 107L, 107L, 107L, 107L,
107L, 107L, 107L, 107L, 107L, 107L, 107L, 107L, 107L, 107L,
107L, 107L, 107L, 107L, 107L, 107L, 107L, 107L, 107L, 107L
), Stand = c(20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L,
20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L,
20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L,
20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L,
20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L,
20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L,
20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L,
20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L,
20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L), Transect.. = c(2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), Plot.. = c(1L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L)), .Names = c("Tree.Speices",
"DBH", "Basal.Area", "Compartment", "Stand", "Transect..", "Plot.."
), row.names = 67:167, class = "data.frame")

The last two column names have two dots at the end and Species is incorrectly spelled:
> names(Trees)
[1] "Tree.Speices" "DBH" "Basal.Area" "Compartment" "Stand"
[6] "Transect.." "Plot.."
Try:
aggregate(.~ Compartment + Stand + Transect.. + Plot.. + Tree.Speices,
data = Trees, FUN = sum)
or remove the dots at the end of all names and correct the spelling:
names(Trees) <- sub("\\.+$", "", names(Trees))
names(Trees) <- sub("Speices", "Species", names(Trees))
aggregate(.~ Compartment + Stand + Transect + Plot + Tree.Species,
data = Trees, FUN = sum)

Related

Adding points to persp 3D plot - hide or obscure points when behind surface

Background:
I'm attempting to add a 3D plot to a Shiny application. I've added a button to rotate the plot ~ 90 degrees. I'd also like to include radio buttons to plot points on the surface.
Problem:
When points are plotted they simply appear on top of the image, even when they should be behind the surface.
Question:
Is there a way to plot the surface so that it's transparent and points appear either behind or in front? Or hide the points if they land out of eyesight?
Data:
d <- list(x = c(0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5, 5.5, 6,
6.5, 7, 7.5, 8, 8.5, 9, 9.5, 10), y = c(0, 0.5, 1, 1.5, 2, 2.5,
3, 3.5, 4, 4.5, 5, 5.5, 6, 6.5, 7, 7.5, 8, 8.5, 9, 9.5, 10),
z = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0.000147818839413345, 0.00112553487724733,
0.00210325091508131, 0.00308096695291529, 0.00405868299074927,
0.00503639902858325, 0.00601411506641723, 0.00699183110425121,
0.00796954714208519, 0.00894726317991917, 0.00992497921775315,
0.0109026952555871, 0.0118804112934211, 0.0128581273312551,
0.0138358433690891, 0.0148135594069231, 0.015791275444757,
0.016768991482591, 0.017746707520425, 0.018724423558259,
0.019702139596093, 0.00332663525507192, 0.0253299512993333,
0.0473332673435947, 0.0693365833878561, 0.0913398994321175,
0.113343215476379, 0.13534653152064, 0.157349847564902, 0.179353163609163,
0.201356479653424, 0.223359795697686, 0.245363111741947,
0.267366427786209, 0.28936974383047, 0.311373059874731, 0.333376375918993,
0.355379691963254, 0.377383008007516, 0.399386324051777,
0.421389640096038, 0.4433929561403, 0.0185048854236584, 0.140901484725856,
0.263298084028054, 0.385694683330252, 0.50809128263245, 0.630487881934648,
0.752884481236846, 0.875281080539044, 0.997677679841242,
1.12007427914344, 1.24247087844564, 1.36486747774784, 1.48726407705003,
1.60966067635223, 1.73205727565443, 1.85445387495663, 1.97685047425883,
2.09924707356102, 2.22164367286322, 2.34404027216542, 2.46643687146762,
0.0575583422570596, 0.438265663185897, 0.818972984114734,
1.19968030504357, 1.58038762597241, 1.96109494690124, 2.34180226783008,
2.72250958875892, 3.10321690968776, 3.48392423061659, 3.86463155154543,
4.24533887247427, 4.6260461934031, 5.00675351433194, 5.38746083526078,
5.76816815618962, 6.14887547711845, 6.52958279804729, 6.91029011897613,
7.29099743990496, 7.6717047608338, 0.129117933403967, 0.98314083577592,
1.83716373814787, 2.69118664051983, 3.54520954289178, 4.39923244526373,
5.25325534763568, 6.10727825000764, 6.96130115237959, 7.81532405475154,
8.6693469571235, 9.52336985949545, 10.3773927618674, 11.2314156642394,
12.0854385666113, 12.9394614689833, 13.7934843713552, 14.6475072737272,
15.5015301760991, 16.3555530784711, 17.209575980843, 0.23363441995763,
1.77895922624881, 3.32428403254, 4.86960883883118, 6.41493364512237,
7.96025845141355, 9.50558325770473, 11.0509080639959, 12.5962328702871,
14.1415576765783, 15.6868824828695, 17.2322072891607, 18.7775320954518,
20.322856901743, 21.8681817080342, 23.4135065143254, 24.9588313206166,
26.5041561269078, 28.0494809331989, 29.5948057394901, 31.1401305457813,
0.36143039040365, 2.75203425835922, 5.14263812631479, 7.53324199427035,
9.92384586222592, 12.3144497301815, 14.7050535981371, 17.0956574660926,
19.4862613340482, 21.8768652020038, 24.2674690699593, 26.6580729379149,
29.0486768058705, 31.439280673826, 33.8298845417816, 36.2204884097372,
38.6110922776927, 41.0016961456483, 43.3923000136039, 45.7829038815594,
48.173507749515, 0.494048345421132, 3.76182525870662, 7.02960217199211,
10.2973790852776, 13.5651559985631, 16.8329329118486, 20.1007098251341,
23.3684867384196, 26.636263651705, 29.9040405649905, 33.171817478276,
36.4395943915615, 39.707371304847, 42.9751482181325, 46.242925131418,
49.5107020447035, 52.778478957989, 56.0462558712744, 59.3140327845599,
62.5818096978454, 65.8495866111309, 0.608277972936286, 4.63160227964344,
8.65492658635059, 12.6782508930577, 16.7015751997649, 20.724899506472,
24.7482238131792, 28.7715481198863, 32.7948724265935, 36.8181967333006,
40.8415210400078, 44.8648453467149, 48.8881696534221, 52.9114939601292,
56.9348182668364, 60.9581425735435, 64.9814668802507, 69.0047911869578,
73.028115493665, 77.0514398003722, 81.0747641070793, 0.68169864474794,
5.19064825215217, 9.6995978595564, 14.2085474669606, 18.7174970743649,
23.2264466817691, 27.7353962891733, 32.2443458965776, 36.7532955039818,
41.262245111386, 45.7711947187903, 50.2801443261945, 54.7890939335987,
59.298043541003, 63.8069931484072, 68.3159427558114, 72.8248923632157,
77.3338419706199, 81.8427915780241, 86.3517411854284, 90.8606907928326,
0.698331143785818, 5.31729285196915, 9.93625456015249, 14.5552162683358,
19.1741779765192, 23.7931396847025, 28.4121013928858, 33.0310631010692,
37.6500248092525, 42.2689865174358, 46.8879482256192, 51.5069099338025,
56.1258716419859, 60.7448333501692, 65.3637950583525, 69.9827567665359,
74.6017184747192, 79.2206801829025, 83.8396418910859, 88.4586035992692,
93.0775653074525, 0.653010606586468, 4.9722093330084, 9.29140805943032,
13.6106067858523, 17.9298055122742, 22.2490042386961, 26.568202965118,
30.88740169154, 35.2066004179619, 39.5257991443838, 43.8449978708057,
48.1641965972277, 52.4833953236496, 56.8025940500715, 61.1217927764935,
65.4409915029154, 69.7601902293373, 74.0793889557592, 78.3985876821812,
82.7177864086031, 87.036985135025, 0.553337675961259, 4.21327116124787,
7.87320464653448, 11.5331381318211, 15.1930716171077, 18.8530051023943,
22.5129385876809, 26.1728720729675, 29.8328055582542, 33.4927390435408,
37.1526725288274, 40.812606014114, 44.4725394994006, 48.1324729846872,
51.7924064699738, 55.4523399552604, 59.112273440547, 62.7722069258337,
66.4321404111203, 70.0920738964069, 73.7520073816935, 0.418509049668882,
3.18664747819306, 5.95478590671724, 8.72292433524142, 11.4910627637656,
14.2592011922898, 17.027339620814, 19.7954780493381, 22.5636164778623,
25.3317549063865, 28.0998933349107, 30.8680317634349, 33.636170191959,
36.4043086204832, 39.1724470490074, 41.9405854775316, 44.7087239060558,
47.4768623345799, 50.2450007631041, 53.0131391916283, 55.7812776201525,
0.274945103406177, 2.09351057307846, 3.91207604275075, 5.73064151242304,
7.54920698209532, 9.36777245176761, 11.1863379214399, 13.0049033911122,
14.8234688607845, 16.6420343304568, 18.460599800129, 20.2791652698013,
22.0977307394736, 23.9162962091459, 25.7348616788182, 27.5534271484905,
29.3719926181628, 31.1905580878351, 33.0091235575073, 34.8276890271796,
36.6462544968519, 0.14939138421548, 1.1375086826693, 2.12562598112311,
3.11374327957693, 4.10186057803075, 5.08997787648456, 6.07809517493838,
7.06621247339219, 8.05432977184601, 9.04244707029983, 10.0305643687536,
11.0186816672075, 12.0067989656613, 12.9949162641151, 13.9830335625689,
14.9711508610227, 15.9592681594765, 16.9473854579304, 17.9355027563842,
18.923620054838, 19.9117373532918, 0.0610345623904979, 0.464734596487648,
0.868434630584799, 1.27213466468195, 1.6758346987791, 2.07953473287625,
2.4832347669734, 2.88693480107055, 3.2906348351677, 3.69433486926485,
4.098034903362, 4.50173493745915, 4.9054349715563, 5.30913500565345,
5.7128350397506, 6.11653507384775, 6.52023510794491, 6.92393514204206,
7.32763517613921, 7.73133521023636, 8.13503524433351, 0.0150842607904164,
0.114855871447028, 0.214627482103639, 0.31439909276025, 0.414170703416861,
0.513942314073472, 0.613713924730083, 0.713485535386694,
0.813257146043305, 0.913028756699917, 1.01280036735653, 1.11257197801314,
1.21234358866975, 1.31211519932636, 1.41188680998297, 1.51165842063958,
1.61143003129619, 1.71120164195281, 1.81097325260942, 1.91074486326603,
2.01051647392264, 0.00112075907879118, 0.00853377984279572,
0.0159468006068003, 0.0233598213708048, 0.0307728421348093,
0.0381858628988139, 0.0455988836628184, 0.0530119044268229,
0.0604249251908275, 0.067837945954832, 0.0752509667188366,
0.0826639874828411, 0.0900770082468456, 0.0974900290108502,
0.104903049774855, 0.112316070538859, 0.119729091302864,
0.127142112066868, 0.134555132830873, 0.141968153594877,
0.149381174358882, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), .Dim = c(21L, 21L)), facetcol = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L,
5L, 5L, 6L, 6L, 1L, 2L, 2L, 3L, 4L, 4L, 5L, 6L, 6L, 7L, 8L,
9L, 9L, 10L, 11L, 11L, 12L, 13L, 13L, 14L, 1L, 3L, 4L, 5L,
7L, 8L, 9L, 11L, 12L, 13L, 15L, 16L, 17L, 19L, 20L, 21L,
23L, 24L, 25L, 27L, 2L, 4L, 6L, 9L, 11L, 13L, 15L, 17L, 19L,
22L, 24L, 26L, 28L, 30L, 33L, 35L, 37L, 39L, 41L, 44L, 3L,
6L, 9L, 12L, 15L, 18L, 21L, 25L, 28L, 31L, 34L, 37L, 40L,
44L, 47L, 50L, 53L, 56L, 59L, 62L, 3L, 7L, 11L, 15L, 19L,
23L, 28L, 32L, 36L, 40L, 44L, 48L, 52L, 56L, 60L, 64L, 68L,
72L, 76L, 80L, 4L, 8L, 13L, 18L, 23L, 27L, 32L, 37L, 42L,
46L, 51L, 56L, 61L, 65L, 70L, 75L, 80L, 84L, 89L, 94L, 4L,
9L, 14L, 19L, 24L, 29L, 34L, 39L, 45L, 50L, 55L, 60L, 65L,
70L, 75L, 80L, 85L, 90L, 95L, 100L, 4L, 9L, 14L, 19L, 24L,
29L, 34L, 39L, 44L, 49L, 54L, 59L, 64L, 69L, 74L, 78L, 83L,
88L, 93L, 98L, 3L, 8L, 12L, 17L, 21L, 26L, 30L, 35L, 39L,
43L, 48L, 52L, 57L, 61L, 66L, 70L, 75L, 79L, 83L, 88L, 3L,
6L, 10L, 14L, 17L, 21L, 24L, 28L, 32L, 35L, 39L, 42L, 46L,
49L, 53L, 57L, 60L, 64L, 67L, 71L, 2L, 5L, 7L, 10L, 12L,
15L, 18L, 20L, 23L, 25L, 28L, 30L, 33L, 35L, 38L, 41L, 43L,
46L, 48L, 51L, 2L, 3L, 5L, 6L, 8L, 9L, 11L, 12L, 14L, 16L,
17L, 19L, 20L, 22L, 23L, 25L, 27L, 28L, 30L, 31L, 1L, 2L,
3L, 3L, 4L, 5L, 6L, 6L, 7L, 8L, 9L, 10L, 10L, 11L, 12L, 13L,
13L, 14L, 15L, 16L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = c("(-0.357,3.59]", "(3.59,7.18]",
"(7.18,10.8]", "(10.8,14.4]", "(14.4,17.9]", "(17.9,21.5]",
"(21.5,25.1]", "(25.1,28.7]", "(28.7,32.3]", "(32.3,35.9]",
"(35.9,39.5]", "(39.5,43.1]", "(43.1,46.6]", "(46.6,50.2]",
"(50.2,53.8]", "(53.8,57.4]", "(57.4,61]", "(61,64.6]", "(64.6,68.2]",
"(68.2,71.8]", "(71.8,75.3]", "(75.3,78.9]", "(78.9,82.5]",
"(82.5,86.1]", "(86.1,89.7]", "(89.7,93.3]", "(93.3,96.9]",
"(96.9,100]", "(100,104]", "(104,108]", "(108,111]", "(111,115]",
"(115,118]", "(118,122]", "(122,126]", "(126,129]", "(129,133]",
"(133,136]", "(136,140]", "(140,144]", "(144,147]", "(147,151]",
"(151,154]", "(154,158]", "(158,161]", "(161,165]", "(165,169]",
"(169,172]", "(172,176]", "(176,179]", "(179,183]", "(183,187]",
"(187,190]", "(190,194]", "(194,197]", "(197,201]", "(201,204]",
"(204,208]", "(208,212]", "(212,215]", "(215,219]", "(219,222]",
"(222,226]", "(226,230]", "(230,233]", "(233,237]", "(237,240]",
"(240,244]", "(244,248]", "(248,251]", "(251,255]", "(255,258]",
"(258,262]", "(262,265]", "(265,269]", "(269,273]", "(273,276]",
"(276,280]", "(280,283]", "(283,287]", "(287,291]", "(291,294]",
"(294,298]", "(298,301]", "(301,305]", "(305,309]", "(309,312]",
"(312,316]", "(316,319]", "(319,323]", "(323,326]", "(326,330]",
"(330,334]", "(334,337]", "(337,341]", "(341,344]", "(344,348]",
"(348,352]", "(352,355]", "(355,359]"), class = "factor"))
Code
flip <- 1 # 1 or 2
theta = c(-300,120)[flip]
pmat <- persp(d$x, d$y, d$z, asp = 1,col = color[d$facetcol], phi = 30, theta = theta, border = "grey10"
,d = .8,r = 2.8,expand = .6,shade = .2,axes = F,box = T,cex = .1)
xx <- c(7.76245335753423, 6.73123147037805)
yy <- c(4.88402435072353, 4.20867046100364)
zz <- c(68.727, 48.558)
mypoints <- trans3d(xx,yy,zz,pmat = pmat)
points(mypoints,pch = 16,col = 2)
The image below is correct, but when the plot is rotated (set flip to 2) the points do not jive. In other words, when the plot is rotated the points should be hidden from view, or seen through a semi-transparent surface. Help is appreciated!
In case this is helpful to anyone. I ended up using the persp3D() function from the plot3D package. All my custom axes labels and tick marks transferred seamlessly from the base persp() with the added bonus of a transparency argument (alpha =) and proper point plotting (points3D).

Change the value in a column of a dataframe depending on how many of each possible value there are

I have a dataframe looking like this:
chr <- c(1,1,1,1,1)
b1 <- c('HP', 'HP', 'CP', 'CP', 'KP')
b2 <- c('HP', 'HP', 'CP', 'CP', 'KP')
b3 <- c('CP', 'KP', 'CP', 'HP', 'CP')
b4 <- c('CP', 'KP', 'CP', 'HP', 'CP')
b5 <- c('CP', 'CP', 'KP', 'KP', 'HP')
b6 <- c('CP', 'CP', 'KP', 'KP', 'HP')
b7 <- c('CP', 'KP', 'HP', 'CP', 'CP')
b8 <- c('CP', 'KP', 'HP', 'CP', 'CP')
df <- data.frame(chr, b1,b2,b3,b4,b5,b6,b7,b8)
I want to write a function that looks at each 'b' column and asks if it contains the value 'HP'. If it does, and the other six 'b' columns contain 'CP' or 'KP', I want to change the value 'HP' into 'CP' or 'KP' depending on which is the majority. If CP is the majority, change the HP to CP. If KP is the majority, change HP to KP.
(note that the value of b1 and b2, b3 and b4 etc is always the same, so really only 4 columns need to be looked at, b1, b3, b5, and b7).
To clarify, if the columns are e.g. HP HP CP CP CP CP KP KP, I want to change the two HPs into CPs (and leave the other columns the same).
So, the example I gave would become:
chr <- c(1,1,1,1,1)
b1 <- c('CP', 'KP', 'CP', 'CP', 'KP')
b2 <- c('CP', 'KP', 'CP', 'CP', 'KP')
b3 <- c('CP', 'KP', 'CP', 'CP', 'CP')
b4 <- c('CP', 'KP', 'CP', 'CP', 'CP')
b5 <- c('CP', 'CP', 'KP', 'KP', 'CP')
b6 <- c('CP', 'CP', 'KP', 'KP', 'CP')
b7 <- c('CP', 'KP', 'CP', 'CP', 'CP')
b8 <- c('CP', 'KP', 'CP', 'CP', 'CP')
df <- data.frame(chr, b1,b2,b3,b4,b5,b6,b7,b8)
df
I have written a function (just for df$b1) with if statements, but it doesn't work.
(note the rules for whether the HP changes to KP or CP depend on how many other CPs or KPs there are):
fun <- function(df){
if(df$b1 == 'HP' && df$b3 == 'CP' && df$b5 == 'CP' && df$b7 == 'CP') {df$b1 <- 'KP'}
if(df$b1 == 'HP' && df$b3 == 'KP' && df$b5 == 'CP' && df$b7 == 'CP') {df$b1 <- 'CP'}
if(df$b1 == 'HP' && df$b3 == 'CP' && df$b5 == 'KP' && df$b7 == 'CP') {df$b1 <- 'CP'}
if(df$b1 == 'HP' && df$b3 == 'CP' && df$b5 == 'CP' && df$b7 == 'KP') {df$b1 <- 'CP'}
if(df$b1 == 'HP' && df$b3 == 'KP' && df$b5 == 'KP' && df$b7 == 'CP') {df$b1 <- 'KP'}
if(df$b1 == 'HP' && df$b3 == 'KP' && df$b5 == 'CP' && df$b7 == 'KP') {df$b1 <- 'KP'}
if(df$b1 == 'HP' && df$b3 == 'CP' && df$b5 == 'KP' && df$b7 == 'KP') {df$b1 <- 'KP'}
if(df$b1 == 'HP' && df$b3 == 'KP' && df$b5 == 'KP' && df$b7 == 'KP') {df$b1 <- 'CP'}
df$b2 <-df$b1
}
Thanks very much for any help. I'm really stuck on this one.
EDIT: This is a sample of my actual data which is more complex than the example I gave above.
structure(list(chr = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), pos_c = c(2373L, 2406L, 2418L, 2419L,
2447L, 2450L, 2468L, 2524L, 2533L, 2535L, 2536L, 2542L, 2623L,
2709L, 3942L, 11716L, 11893L, 11898L, 12190L, 12396L, 26639L,
26640L, 26643L, 26646L, 26655L, 26657L, 26661L, 26667L, 26670L,
26676L, 26679L, 26684L, 26685L, 26688L, 26694L, 26703L, 26710L,
26712L, 26713L, 26723L, 26733L, 26737L, 26738L, 26739L, 26742L,
26743L, 26748L, 26761L, 26765L, 26766L, 26778L, 26781L, 26790L,
26792L, 26796L, 26802L, 26805L, 26811L, 26814L, 26819L, 26820L,
26823L, 26829L, 26838L, 26846L, 26847L, 26848L, 26872L, 26873L,
26874L, 26877L, 26878L, 26883L, 26889L, 26901L, 26904L, 26907L,
26916L, 26923L, 26925L, 26927L, 26931L, 26937L, 26940L, 26946L,
26954L, 26958L, 26961L, 26963L, 26964L, 26970L, 26981L, 26982L,
26983L, 26991L, 26994L, 26997L, 27007L, 27008L, 27009L, 27012L,
27015L, 27018L, 27027L, 202471L, 203660L, 203668L, 203669L, 203670L,
203672L, 203678L, 203683L, 203686L, 203687L, 203690L, 203704L,
203705L, 203711L, 203714L, 203732L, 203749L, 203752L, 203754L,
203755L, 203903L, 203910L, 203911L, 203912L, 203913L, 203914L,
203915L, 203922L, 203924L, 203933L, 203937L, 203939L, 203945L,
203948L, 203951L, 203957L, 203960L, 203961L, 203963L, 203969L,
203972L, 203973L, 203974L, 203975L, 203981L, 203991L, 204220L,
204227L, 204230L, 204232L, 204242L, 204245L, 204262L, 204272L,
204278L, 204282L, 204290L), c1 = c(101L, 60L, 63L, 64L, 100L,
97L, 94L, 83L, 80L, 48L, 46L, 51L, 69L, 46L, 23L, 79L, 63L, 59L,
53L, 85L, 13L, 12L, 1L, 9L, 11L, 13L, 9L, 14L, 14L, 12L, 15L,
9L, 15L, 14L, 14L, 2L, 2L, 8L, 3L, 0L, 0L, 4L, 2L, 1L, 4L, 4L,
8L, 39L, 7L, 5L, 2L, 41L, 69L, 79L, 89L, 120L, 128L, 90L, 134L,
107L, 169L, 120L, 103L, 48L, 58L, 132L, 62L, 19L, 9L, 13L, 12L,
12L, 17L, 251L, 8L, 367L, 367L, 264L, 5L, 170L, 113L, 234L, 134L,
143L, 189L, 224L, 255L, 296L, 448L, 239L, 169L, 80L, 312L, 84L,
403L, 397L, 430L, 529L, 544L, 556L, 565L, 549L, 555L, 4L, 11L,
0L, 18L, 18L, 19L, 19L, 18L, 18L, 17L, 17L, 15L, 15L, 16L, 15L,
13L, 14L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 2L, 3L, 2L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 13L, 2L, 10L, 4L, 10L, 24L, 33L, 33L, 63L, 42L), c2 = c(101L,
60L, 63L, 64L, 100L, 97L, 94L, 83L, 80L, 48L, 46L, 51L, 69L,
46L, 23L, 79L, 63L, 59L, 53L, 85L, 13L, 12L, 1L, 9L, 11L, 13L,
9L, 14L, 14L, 12L, 15L, 9L, 15L, 14L, 14L, 2L, 2L, 8L, 3L, 0L,
0L, 4L, 2L, 1L, 4L, 4L, 8L, 39L, 7L, 5L, 2L, 41L, 69L, 79L, 89L,
120L, 128L, 90L, 134L, 107L, 169L, 120L, 103L, 48L, 58L, 132L,
62L, 19L, 9L, 13L, 12L, 12L, 17L, 251L, 8L, 367L, 367L, 264L,
5L, 170L, 113L, 234L, 134L, 143L, 189L, 224L, 255L, 296L, 448L,
239L, 169L, 80L, 312L, 84L, 403L, 397L, 430L, 529L, 544L, 556L,
565L, 549L, 555L, 4L, 11L, 0L, 18L, 18L, 19L, 19L, 18L, 18L,
17L, 17L, 15L, 15L, 16L, 15L, 13L, 14L, 0L, 1L, 0L, 0L, 0L, 0L,
0L, 2L, 3L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 13L, 2L, 10L, 4L, 10L, 24L,
33L, 33L, 63L, 42L), c3 = c(37L, 0L, 0L, 0L, 42L, 46L, 46L, 21L,
26L, 6L, 2L, 7L, 11L, 4L, 0L, 4L, 1L, 0L, 0L, 2L, 29L, 29L, 0L,
22L, 23L, 23L, 26L, 27L, 29L, 24L, 32L, 26L, 35L, 32L, 32L, 3L,
3L, 10L, 1L, 5L, 1L, 6L, 1L, 0L, 5L, 11L, 6L, 81L, 15L, 14L,
0L, 92L, 157L, 174L, 168L, 236L, 221L, 143L, 228L, 251L, 292L,
273L, 281L, 33L, 39L, 260L, 57L, 53L, 24L, 22L, 26L, 37L, 37L,
484L, 16L, 721L, 724L, 436L, 7L, 367L, 163L, 411L, 167L, 373L,
275L, 599L, 637L, 773L, 866L, 615L, 223L, 63L, 531L, 59L, 878L,
868L, 911L, 939L, 975L, 995L, 980L, 931L, 958L, 12L, 16L, 0L,
12L, 13L, 12L, 11L, 9L, 12L, 11L, 11L, 10L, 1L, 0L, 0L, 0L, 1L,
1L, 2L, 1L, 0L, 1L, 1L, 0L, 2L, 2L, 2L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 2L, 28L,
5L, 28L, 3L, 12L, 39L, 40L, 50L, 90L, 80L), c4 = c(37L, 0L, 0L,
0L, 42L, 46L, 46L, 21L, 26L, 6L, 2L, 7L, 11L, 4L, 0L, 4L, 1L,
0L, 0L, 2L, 29L, 29L, 0L, 22L, 23L, 23L, 26L, 27L, 29L, 24L,
32L, 26L, 35L, 32L, 32L, 3L, 3L, 10L, 1L, 5L, 1L, 6L, 1L, 0L,
5L, 11L, 6L, 81L, 15L, 14L, 0L, 92L, 157L, 174L, 168L, 236L,
221L, 143L, 228L, 251L, 292L, 273L, 281L, 33L, 39L, 260L, 57L,
53L, 24L, 22L, 26L, 37L, 37L, 484L, 16L, 721L, 724L, 436L, 7L,
367L, 163L, 411L, 167L, 373L, 275L, 599L, 637L, 773L, 866L, 615L,
223L, 63L, 531L, 59L, 878L, 868L, 911L, 939L, 975L, 995L, 980L,
931L, 958L, 12L, 16L, 0L, 12L, 13L, 12L, 11L, 9L, 12L, 11L, 11L,
10L, 1L, 0L, 0L, 0L, 1L, 1L, 2L, 1L, 0L, 1L, 1L, 0L, 2L, 2L,
2L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L,
1L, 1L, 0L, 1L, 1L, 2L, 28L, 5L, 28L, 3L, 12L, 39L, 40L, 50L,
90L, 80L), c5 = c(96L, 77L, 74L, 72L, 96L, 96L, 92L, 80L, 79L,
79L, 76L, 76L, 66L, 55L, 64L, 78L, 110L, 100L, 165L, 171L, 38L,
41L, 2L, 38L, 33L, 37L, 21L, 40L, 41L, 21L, 37L, 19L, 45L, 30L,
22L, 22L, 28L, 34L, 30L, 31L, 25L, 40L, 34L, 33L, 34L, 46L, 41L,
96L, 48L, 51L, 38L, 93L, 152L, 155L, 155L, 193L, 195L, 189L,
222L, 213L, 284L, 248L, 230L, 56L, 70L, 208L, 82L, 85L, 67L,
64L, 64L, 83L, 71L, 495L, 77L, 570L, 577L, 499L, 55L, 292L, 236L,
352L, 244L, 296L, 351L, 391L, 440L, 483L, 653L, 417L, 194L, 57L,
460L, 57L, 538L, 520L, 573L, 731L, 753L, 770L, 772L, 757L, 761L,
35L, 73L, 66L, 70L, 70L, 71L, 70L, 74L, 79L, 82L, 83L, 85L, 69L,
68L, 71L, 71L, 70L, 73L, 72L, 72L, 74L, 103L, 107L, 106L, 107L,
109L, 106L, 106L, 105L, 106L, 105L, 108L, 104L, 105L, 106L, 106L,
103L, 112L, 112L, 113L, 112L, 109L, 114L, 114L, 115L, 120L, 114L,
97L, 125L, 103L, 124L, 107L, 116L, 145L, 139L, 138L, 177L, 139L
), c6 = c(96L, 77L, 74L, 72L, 96L, 96L, 92L, 80L, 79L, 79L, 76L,
76L, 66L, 55L, 64L, 78L, 110L, 100L, 165L, 171L, 38L, 41L, 2L,
38L, 33L, 37L, 21L, 40L, 41L, 21L, 37L, 19L, 45L, 30L, 22L, 22L,
28L, 34L, 30L, 31L, 25L, 40L, 34L, 33L, 34L, 46L, 41L, 96L, 48L,
51L, 38L, 93L, 152L, 155L, 155L, 193L, 195L, 189L, 222L, 213L,
284L, 248L, 230L, 56L, 70L, 208L, 82L, 85L, 67L, 64L, 64L, 83L,
71L, 495L, 77L, 570L, 577L, 499L, 55L, 292L, 236L, 352L, 244L,
296L, 351L, 391L, 440L, 483L, 653L, 417L, 194L, 57L, 460L, 57L,
538L, 520L, 573L, 731L, 753L, 770L, 772L, 757L, 761L, 35L, 73L,
66L, 70L, 70L, 71L, 70L, 74L, 79L, 82L, 83L, 85L, 69L, 68L, 71L,
71L, 70L, 73L, 72L, 72L, 74L, 103L, 107L, 106L, 107L, 109L, 106L,
106L, 105L, 106L, 105L, 108L, 104L, 105L, 106L, 106L, 103L, 112L,
112L, 113L, 112L, 109L, 114L, 114L, 115L, 120L, 114L, 97L, 125L,
103L, 124L, 107L, 116L, 145L, 139L, 138L, 177L, 139L), c7 = c(28L,
3L, 1L, 1L, 52L, 50L, 60L, 49L, 50L, 3L, 2L, 2L, 37L, 11L, 0L,
1L, 2L, 2L, 0L, 1L, 28L, 30L, 1L, 17L, 23L, 28L, 11L, 30L, 32L,
13L, 32L, 19L, 39L, 18L, 17L, 23L, 29L, 46L, 37L, 25L, 21L, 42L,
32L, 29L, 30L, 41L, 44L, 141L, 72L, 64L, 25L, 93L, 219L, 234L,
218L, 294L, 277L, 184L, 294L, 273L, 382L, 293L, 280L, 131L, 132L,
386L, 157L, 99L, 77L, 75L, 68L, 66L, 88L, 615L, 55L, 746L, 740L,
685L, 27L, 305L, 158L, 511L, 151L, 326L, 371L, 605L, 650L, 727L,
886L, 623L, 314L, 170L, 734L, 162L, 937L, 908L, 987L, 964L, 997L,
1002L, 1007L, 960L, 980L, 28L, 75L, 61L, 96L, 98L, 97L, 96L,
93L, 101L, 99L, 100L, 98L, 91L, 90L, 90L, 89L, 87L, 76L, 75L,
75L, 76L, 88L, 92L, 87L, 86L, 88L, 87L, 85L, 87L, 87L, 83L, 86L,
87L, 86L, 86L, 89L, 83L, 83L, 84L, 84L, 86L, 83L, 86L, 88L, 87L,
88L, 84L, 81L, 118L, 90L, 120L, 90L, 101L, 127L, 134L, 140L,
172L, 160L), c8 = c(28L, 3L, 1L, 1L, 52L, 50L, 60L, 49L, 50L,
3L, 2L, 2L, 37L, 11L, 0L, 1L, 2L, 2L, 0L, 1L, 28L, 30L, 1L, 17L,
23L, 28L, 11L, 30L, 32L, 13L, 32L, 19L, 39L, 18L, 17L, 23L, 29L,
46L, 37L, 25L, 21L, 42L, 32L, 29L, 30L, 41L, 44L, 141L, 72L,
64L, 25L, 93L, 219L, 234L, 218L, 294L, 277L, 184L, 294L, 273L,
382L, 293L, 280L, 131L, 132L, 386L, 157L, 99L, 77L, 75L, 68L,
66L, 88L, 615L, 55L, 746L, 740L, 685L, 27L, 305L, 158L, 511L,
151L, 326L, 371L, 605L, 650L, 727L, 886L, 623L, 314L, 170L, 734L,
162L, 937L, 908L, 987L, 964L, 997L, 1002L, 1007L, 960L, 980L,
28L, 75L, 61L, 96L, 98L, 97L, 96L, 93L, 101L, 99L, 100L, 98L,
91L, 90L, 90L, 89L, 87L, 76L, 75L, 75L, 76L, 88L, 92L, 87L, 86L,
88L, 87L, 85L, 87L, 87L, 83L, 86L, 87L, 86L, 86L, 89L, 83L, 83L,
84L, 84L, 86L, 83L, 86L, 88L, 87L, 88L, 84L, 81L, 118L, 90L,
120L, 90L, 101L, 127L, 134L, 140L, 172L, 160L), k1 = c(39L, 64L,
68L, 69L, 38L, 38L, 41L, 51L, 54L, 84L, 83L, 84L, 57L, 50L, 43L,
58L, 72L, 71L, 29L, 35L, 0L, 0L, 10L, 1L, 1L, 0L, 3L, 0L, 0L,
1L, 0L, 3L, 0L, 0L, 0L, 14L, 14L, 9L, 15L, 18L, 24L, 20L, 20L,
27L, 28L, 10L, 28L, 27L, 59L, 64L, 73L, 43L, 19L, 7L, 27L, 5L,
23L, 30L, 29L, 65L, 10L, 46L, 27L, 160L, 168L, 95L, 175L, 255L,
265L, 271L, 270L, 76L, 269L, 77L, 14L, 12L, 11L, 118L, 382L,
204L, 220L, 181L, 290L, 290L, 114L, 209L, 89L, 159L, 7L, 144L,
95L, 9L, 180L, 411L, 105L, 125L, 97L, 19L, 3L, 3L, 2L, 12L, 1L,
540L, 1L, 32L, 14L, 14L, 13L, 13L, 15L, 14L, 12L, 11L, 12L, 11L,
12L, 13L, 13L, 9L, 18L, 17L, 8L, 18L, 6L, 2L, 1L, 2L, 1L, 2L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 4L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 0L, 2L, 1L, 21L, 28L, 49L, 50L, 54L, 45L,
44L), k2 = c(39L, 64L, 68L, 69L, 38L, 38L, 41L, 51L, 54L, 84L,
83L, 84L, 57L, 50L, 43L, 58L, 72L, 71L, 29L, 35L, 0L, 0L, 10L,
1L, 1L, 0L, 3L, 0L, 0L, 1L, 0L, 3L, 0L, 0L, 0L, 14L, 14L, 9L,
15L, 18L, 24L, 20L, 20L, 27L, 28L, 10L, 28L, 27L, 59L, 64L, 73L,
43L, 19L, 7L, 27L, 5L, 23L, 30L, 29L, 65L, 10L, 46L, 27L, 160L,
168L, 95L, 175L, 255L, 265L, 271L, 270L, 76L, 269L, 77L, 14L,
12L, 11L, 118L, 382L, 204L, 220L, 181L, 290L, 290L, 114L, 209L,
89L, 159L, 7L, 144L, 95L, 9L, 180L, 411L, 105L, 125L, 97L, 19L,
3L, 3L, 2L, 12L, 1L, 540L, 1L, 32L, 14L, 14L, 13L, 13L, 15L,
14L, 12L, 11L, 12L, 11L, 12L, 13L, 13L, 9L, 18L, 17L, 8L, 18L,
6L, 2L, 1L, 2L, 1L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 4L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 0L, 2L, 1L, 21L,
28L, 49L, 50L, 54L, 45L, 44L), k3 = c(84L, 122L, 120L, 120L,
92L, 88L, 90L, 107L, 98L, 114L, 120L, 117L, 91L, 64L, 59L, 100L,
113L, 109L, 56L, 136L, 1L, 0L, 29L, 7L, 4L, 6L, 5L, 6L, 6L, 9L,
7L, 11L, 7L, 10L, 9L, 44L, 46L, 38L, 51L, 60L, 79L, 75L, 80L,
83L, 80L, 41L, 97L, 61L, 133L, 135L, 180L, 100L, 50L, 28L, 75L,
18L, 79L, 94L, 100L, 117L, 47L, 74L, 68L, 393L, 390L, 191L, 416L,
504L, 532L, 545L, 545L, 181L, 556L, 175L, 19L, 24L, 19L, 312L,
766L, 389L, 416L, 418L, 639L, 475L, 239L, 293L, 70L, 135L, 37L,
122L, 84L, 42L, 408L, 886L, 93L, 115L, 65L, 67L, 35L, 37L, 47L,
50L, 54L, 942L, 9L, 43L, 29L, 29L, 29L, 29L, 28L, 27L, 25L, 25L,
26L, 32L, 33L, 32L, 33L, 30L, 26L, 23L, 24L, 23L, 8L, 1L, 2L,
2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 3L, 4L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 4L, 3L, 3L, 4L, 3L, 2L, 2L, 0L, 7L, 3L, 65L, 73L, 111L, 98L,
133L, 107L, 64L), k4 = c(84L, 122L, 120L, 120L, 92L, 88L, 90L,
107L, 98L, 114L, 120L, 117L, 91L, 64L, 59L, 100L, 113L, 109L,
56L, 136L, 1L, 0L, 29L, 7L, 4L, 6L, 5L, 6L, 6L, 9L, 7L, 11L,
7L, 10L, 9L, 44L, 46L, 38L, 51L, 60L, 79L, 75L, 80L, 83L, 80L,
41L, 97L, 61L, 133L, 135L, 180L, 100L, 50L, 28L, 75L, 18L, 79L,
94L, 100L, 117L, 47L, 74L, 68L, 393L, 390L, 191L, 416L, 504L,
532L, 545L, 545L, 181L, 556L, 175L, 19L, 24L, 19L, 312L, 766L,
389L, 416L, 418L, 639L, 475L, 239L, 293L, 70L, 135L, 37L, 122L,
84L, 42L, 408L, 886L, 93L, 115L, 65L, 67L, 35L, 37L, 47L, 50L,
54L, 942L, 9L, 43L, 29L, 29L, 29L, 29L, 28L, 27L, 25L, 25L, 26L,
32L, 33L, 32L, 33L, 30L, 26L, 23L, 24L, 23L, 8L, 1L, 2L, 2L,
2L, 2L, 2L, 4L, 4L, 4L, 4L, 3L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 4L,
4L, 3L, 3L, 4L, 3L, 2L, 2L, 0L, 7L, 3L, 65L, 73L, 111L, 98L,
133L, 107L, 64L), k5 = c(0L, 14L, 14L, 14L, 1L, 0L, 0L, 8L, 7L,
5L, 5L, 5L, 0L, 3L, 0L, 8L, 2L, 3L, 18L, 15L, 0L, 2L, 38L, 3L,
5L, 1L, 18L, 1L, 2L, 2L, 3L, 21L, 2L, 15L, 1L, 26L, 22L, 17L,
27L, 33L, 41L, 39L, 42L, 45L, 51L, 14L, 50L, 31L, 82L, 84L, 108L,
55L, 24L, 16L, 51L, 33L, 44L, 55L, 54L, 87L, 15L, 20L, 27L, 285L,
297L, 151L, 293L, 343L, 363L, 374L, 376L, 57L, 382L, 24L, 25L,
10L, 8L, 103L, 551L, 301L, 320L, 276L, 364L, 340L, 49L, 272L,
171L, 195L, 24L, 180L, 161L, 11L, 254L, 663L, 188L, 229L, 158L,
26L, 3L, 3L, 6L, 10L, 6L, 708L, 0L, 9L, 0L, 3L, 0L, 1L, 0L, 2L,
0L, 0L, 1L, 9L, 9L, 9L, 10L, 10L, 6L, 6L, 1L, 6L, 2L, 0L, 5L,
3L, 2L, 3L, 4L, 2L, 3L, 2L, 2L, 1L, 3L, 0L, 0L, 4L, 1L, 0L, 1L,
5L, 2L, 0L, 1L, 2L, 0L, 2L, 5L, 1L, 3L, 3L, 43L, 50L, 78L, 75L,
87L, 78L, 59L), k6 = c(0L, 14L, 14L, 14L, 1L, 0L, 0L, 8L, 7L,
5L, 5L, 5L, 0L, 3L, 0L, 8L, 2L, 3L, 18L, 15L, 0L, 2L, 38L, 3L,
5L, 1L, 18L, 1L, 2L, 2L, 3L, 21L, 2L, 15L, 1L, 26L, 22L, 17L,
27L, 33L, 41L, 39L, 42L, 45L, 51L, 14L, 50L, 31L, 82L, 84L, 108L,
55L, 24L, 16L, 51L, 33L, 44L, 55L, 54L, 87L, 15L, 20L, 27L, 285L,
297L, 151L, 293L, 343L, 363L, 374L, 376L, 57L, 382L, 24L, 25L,
10L, 8L, 103L, 551L, 301L, 320L, 276L, 364L, 340L, 49L, 272L,
171L, 195L, 24L, 180L, 161L, 11L, 254L, 663L, 188L, 229L, 158L,
26L, 3L, 3L, 6L, 10L, 6L, 708L, 0L, 9L, 0L, 3L, 0L, 1L, 0L, 2L,
0L, 0L, 1L, 9L, 9L, 9L, 10L, 10L, 6L, 6L, 1L, 6L, 2L, 0L, 5L,
3L, 2L, 3L, 4L, 2L, 3L, 2L, 2L, 1L, 3L, 0L, 0L, 4L, 1L, 0L, 1L,
5L, 2L, 0L, 1L, 2L, 0L, 2L, 5L, 1L, 3L, 3L, 43L, 50L, 78L, 75L,
87L, 78L, 59L), k7 = c(0L, 36L, 42L, 44L, 0L, 0L, 0L, 3L, 3L,
49L, 50L, 51L, 0L, 0L, 0L, 0L, 0L, 0L, 31L, 158L, 0L, 1L, 28L,
14L, 11L, 9L, 27L, 14L, 12L, 14L, 14L, 28L, 14L, 32L, 19L, 41L,
37L, 26L, 39L, 57L, 85L, 75L, 82L, 87L, 87L, 37L, 91L, 54L, 124L,
138L, 206L, 150L, 44L, 18L, 92L, 38L, 76L, 95L, 101L, 155L, 20L,
90L, 48L, 375L, 344L, 135L, 379L, 519L, 537L, 549L, 563L, 67L,
557L, 91L, 43L, 30L, 35L, 125L, 784L, 491L, 519L, 324L, 627L,
503L, 215L, 296L, 68L, 203L, 42L, 173L, 58L, 43L, 222L, 812L,
64L, 98L, 36L, 65L, 36L, 45L, 42L, 50L, 43L, 962L, 0L, 36L, 0L,
0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 15L, 17L, 15L, 13L, 12L, 25L,
27L, 8L, 26L, 7L, 2L, 5L, 5L, 4L, 5L, 5L, 5L, 5L, 6L, 5L, 4L,
6L, 0L, 0L, 5L, 0L, 1L, 0L, 5L, 3L, 0L, 0L, 4L, 0L, 1L, 4L, 2L,
9L, 3L, 59L, 77L, 123L, 107L, 144L, 119L, 79L), k8 = c(0L, 36L,
42L, 44L, 0L, 0L, 0L, 3L, 3L, 49L, 50L, 51L, 0L, 0L, 0L, 0L,
0L, 0L, 31L, 158L, 0L, 1L, 28L, 14L, 11L, 9L, 27L, 14L, 12L,
14L, 14L, 28L, 14L, 32L, 19L, 41L, 37L, 26L, 39L, 57L, 85L, 75L,
82L, 87L, 87L, 37L, 91L, 54L, 124L, 138L, 206L, 150L, 44L, 18L,
92L, 38L, 76L, 95L, 101L, 155L, 20L, 90L, 48L, 375L, 344L, 135L,
379L, 519L, 537L, 549L, 563L, 67L, 557L, 91L, 43L, 30L, 35L,
125L, 784L, 491L, 519L, 324L, 627L, 503L, 215L, 296L, 68L, 203L,
42L, 173L, 58L, 43L, 222L, 812L, 64L, 98L, 36L, 65L, 36L, 45L,
42L, 50L, 43L, 962L, 0L, 36L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L,
1L, 15L, 17L, 15L, 13L, 12L, 25L, 27L, 8L, 26L, 7L, 2L, 5L, 5L,
4L, 5L, 5L, 5L, 5L, 6L, 5L, 4L, 6L, 0L, 0L, 5L, 0L, 1L, 0L, 5L,
3L, 0L, 0L, 4L, 0L, 1L, 4L, 2L, 9L, 3L, 59L, 77L, 123L, 107L,
144L, 119L, 79L), b1 = structure(c(7L, 3L, 3L, 3L, 7L, 7L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 7L, 1L, 1L, 7L,
7L, 7L, 1L, 7L, 1L, 1L, 1L, 1L, 7L, 1L, 1L, 1L, 7L, 7L, 7L, 7L,
5L, 5L, 7L, 7L, 5L, 5L, 7L, 7L, 3L, 5L, 5L, 5L, 3L, 7L, 7L, 7L,
1L, 7L, 7L, 7L, 3L, 1L, 7L, 7L, 7L, 7L, 3L, 7L, 5L, 5L, 5L, 5L,
7L, 5L, 7L, 7L, 1L, 1L, 3L, 5L, 3L, 7L, 3L, 3L, 3L, 7L, 3L, 7L,
3L, 1L, 7L, 7L, 7L, 3L, 5L, 7L, 7L, 7L, 1L, 1L, 1L, 1L, 1L, 1L,
5L, 1L, 5L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 5L, 5L, 7L, 5L, 5L, 6L, 6L, 2L, 6L, 2L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 6L, 6L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L,
1L, 7L, 7L, 7L, 7L, 3L, 7L, 7L, 3L, 7L), .Label = c("CP", "HF",
"HP", "KF", "KP", "NF", "NP"), class = "factor"), b2 = structure(c(7L,
3L, 3L, 3L, 7L, 7L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 7L, 1L, 1L, 7L, 7L, 7L, 1L, 7L, 1L, 1L, 1L, 1L, 7L, 1L,
1L, 1L, 7L, 7L, 7L, 7L, 5L, 5L, 7L, 7L, 5L, 5L, 7L, 7L, 3L, 5L,
5L, 5L, 3L, 7L, 7L, 7L, 1L, 7L, 7L, 7L, 3L, 1L, 7L, 7L, 7L, 7L,
3L, 7L, 5L, 5L, 5L, 5L, 7L, 5L, 7L, 7L, 1L, 1L, 3L, 5L, 3L, 7L,
3L, 3L, 3L, 7L, 3L, 7L, 3L, 1L, 7L, 7L, 7L, 3L, 5L, 7L, 7L, 7L,
1L, 1L, 1L, 1L, 1L, 1L, 5L, 1L, 5L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 5L, 5L, 7L, 5L, 5L, 6L, 6L, 2L, 6L,
2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 6L, 6L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 2L, 1L, 7L, 7L, 7L, 7L, 3L, 7L, 7L, 3L, 7L
), .Label = c("CP", "HF", "HP", "KF", "KP", "NF", "NP"), class = "factor"),
b3 = structure(c(3L, 5L, 5L, 5L, 3L, 3L, 3L, 5L, 7L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 1L, 1L, 5L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 1L, 7L, 7L, 5L, 5L, 7L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 7L, 5L, 3L, 5L, 5L, 5L, 3L, 7L, 7L, 3L,
7L, 7L, 7L, 3L, 3L, 7L, 7L, 7L, 5L, 5L, 3L, 5L, 5L, 5L, 5L,
5L, 7L, 5L, 7L, 7L, 1L, 1L, 3L, 5L, 3L, 7L, 3L, 7L, 3L, 7L,
3L, 7L, 1L, 1L, 7L, 7L, 7L, 3L, 5L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 5L, 3L, 5L, 7L, 3L, 7L, 7L, 7L, 3L, 3L, 3L, 7L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 2L, 2L, 2L,
6L, 4L, 4L, 4L, 4L, 6L, 4L, 6L, 6L, 6L, 6L, 6L, 6L, 4L, 4L,
6L, 6L, 4L, 6L, 2L, 7L, 1L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 3L,
7L), .Label = c("CP", "HF", "HP", "KF", "KP", "NF", "NP"), class = "factor"),
b4 = structure(c(3L, 5L, 5L, 5L, 3L, 3L, 3L, 5L, 7L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 1L, 1L, 5L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 1L, 7L, 7L, 5L, 5L, 7L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 7L, 5L, 3L, 5L, 5L, 5L, 3L, 7L, 7L, 3L,
7L, 7L, 7L, 3L, 3L, 7L, 7L, 7L, 5L, 5L, 3L, 5L, 5L, 5L, 5L,
5L, 7L, 5L, 7L, 7L, 1L, 1L, 3L, 5L, 3L, 7L, 3L, 7L, 3L, 7L,
3L, 7L, 1L, 1L, 7L, 7L, 7L, 3L, 5L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 5L, 3L, 5L, 7L, 3L, 7L, 7L, 7L, 3L, 3L, 3L, 7L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 2L, 2L, 2L,
6L, 4L, 4L, 4L, 4L, 6L, 4L, 6L, 6L, 6L, 6L, 6L, 6L, 4L, 4L,
6L, 6L, 4L, 6L, 2L, 7L, 1L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 3L,
7L), .Label = c("CP", "HF", "HP", "KF", "KP", "NF", "NP"), class = "factor"),
b5 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 3L, 1L, 4L,
1L, 2L, 1L, 1L, 4L, 1L, 4L, 1L, 4L, 4L, 2L, 4L, 2L, 2L, 2L,
4L, 2L, 2L, 2L, 2L, 4L, 2L, 4L, 2L, 2L, 4L, 2L, 1L, 1L, 4L,
4L, 4L, 4L, 4L, 4L, 1L, 4L, 4L, 4L, 4L, 2L, 4L, 4L, 4L, 3L,
3L, 4L, 4L, 1L, 4L, 1L, 1L, 1L, 3L, 2L, 4L, 2L, 2L, 2L, 4L,
2L, 4L, 4L, 1L, 4L, 4L, 4L, 2L, 3L, 4L, 2L, 4L, 1L, 1L, 1L,
1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 4L, 2L, 2L, 4L, 4L, 2L,
4L), .Label = c("CP", "HP", "KP", "NP"), class = "factor"),
b6 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 3L, 1L, 4L,
1L, 2L, 1L, 1L, 4L, 1L, 4L, 1L, 4L, 4L, 2L, 4L, 2L, 2L, 2L,
4L, 2L, 2L, 2L, 2L, 4L, 2L, 4L, 2L, 2L, 4L, 2L, 1L, 1L, 4L,
4L, 4L, 4L, 4L, 4L, 1L, 4L, 4L, 4L, 4L, 2L, 4L, 4L, 4L, 3L,
3L, 4L, 4L, 1L, 4L, 1L, 1L, 1L, 3L, 2L, 4L, 2L, 2L, 2L, 4L,
2L, 4L, 4L, 1L, 4L, 4L, 4L, 2L, 3L, 4L, 2L, 4L, 1L, 1L, 1L,
1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 4L, 2L, 2L, 4L, 4L, 2L,
4L), .Label = c("CP", "HP", "KP", "NP"), class = "factor"),
b7 = structure(c(2L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 4L,
4L, 4L, 2L, 2L, 5L, 1L, 1L, 1L, 4L, 4L, 2L, 2L, 4L, 3L, 6L,
6L, 6L, 3L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 3L, 3L, 3L, 3L, 6L,
6L, 3L, 6L, 6L, 6L, 6L, 3L, 6L, 3L, 3L, 4L, 3L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 3L, 2L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 4L, 4L,
4L, 6L, 4L, 2L, 6L, 2L, 2L, 2L, 4L, 3L, 6L, 3L, 6L, 3L, 6L,
3L, 6L, 6L, 2L, 6L, 6L, 6L, 6L, 4L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 4L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 6L, 6L, 6L, 6L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 6L, 2L, 6L, 3L, 3L, 6L, 3L, 3L,
6L), .Label = c("CF", "CP", "HP", "KP", "NF", "NP"), class = "factor"),
b8 = structure(c(2L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 4L,
4L, 4L, 2L, 2L, 5L, 1L, 1L, 1L, 4L, 4L, 2L, 2L, 4L, 3L, 6L,
6L, 6L, 3L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 3L, 3L, 3L, 3L, 6L,
6L, 3L, 6L, 6L, 6L, 6L, 3L, 6L, 3L, 3L, 4L, 3L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 3L, 2L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 4L, 4L,
4L, 6L, 4L, 2L, 6L, 2L, 2L, 2L, 4L, 3L, 6L, 3L, 6L, 3L, 6L,
3L, 6L, 6L, 2L, 6L, 6L, 6L, 6L, 4L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 4L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 6L, 6L, 6L, 6L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 6L, 2L, 6L, 3L, 3L, 6L, 3L, 3L,
6L), .Label = c("CF", "CP", "HP", "KP", "NF", "NP"), class = "factor")), .Names = c("chr",
"pos_c", "c1", "c2", "c3", "c4", "c5", "c6", "c7", "c8", "k1",
"k2", "k3", "k4", "k5", "k6", "k7", "k8", "b1", "b2", "b3", "b4",
"b5", "b6", "b7", "b8"), class = "data.frame", row.names = c(NA,
-161L))
You can try:
t(apply(df[,-1], 1, function(rg){
occ_rg <- table(rg)
rg[grep("HP",rg)] <- names(occ_rg)[which.max(occ_rg)]
return(rg)}))
So, to have your new df:
df <- data.frame(chr=df[, 1], t(apply(df[,-1], 1, function(rg){
occ_rg <- table(rg)
rg[grep("HP",rg)] <- names(occ_rg)[which.max(occ_rg)]
return(rg)})),
stringsAsFactors=F)
# chr b1 b2 b3 b4 b5 b6 b7 b8
#1 1 CP CP CP CP CP CP CP CP
#2 1 KP KP KP KP CP CP KP KP
#3 1 CP CP CP CP KP KP CP CP
#4 1 CP CP CP CP KP KP CP CP
#5 1 KP KP CP CP CP CP CP CP
EDIT
If you have other columns and the columns you want to change are the only ones beginning with "b", you can do :
df[, grepl("^b", colnames(df))] <- t(apply(df[, grepl("^b", colnames(df))],
1,
function(rg){
occ_rg <- table(rg)
rg[grep("HP",rg)] <- names(occ_rg)[which.max(occ_rg)]
return(rg)}))
Example:
With this df:
# chr c1 b1 b2 b3 b4 b5 b6 b7 b8 c2
#1 1 1 HP HP CP CP CP CP CP CP 11
#2 1 2 HP HP KP KP CP CP KP KP 12
#3 1 3 CP CP CP CP KP KP HP HP 13
#4 1 4 CP CP HP HP KP KP CP CP 14
#5 1 5 KP KP CP CP HP HP CP CP 15
You get:
# chr c1 b1 b2 b3 b4 b5 b6 b7 b8 c2
#1 1 1 CP CP CP CP CP CP CP CP 11
#2 1 2 KP KP KP KP CP CP KP KP 12
#3 1 3 CP CP CP CP KP KP CP CP 13
#4 1 4 CP CP CP CP KP KP CP CP 14
#5 1 5 KP KP CP CP CP CP CP CP 15
EDIT 2
If you have other values than "HP", "CP" and "KP" and want to replace "HP" by either "CP" or "KP", depending on which occurs the most, you can do:
df[, grepl("^b", colnames(df))] <- t(apply(df[, grepl("^b", colnames(df))],
1,
function(rg){
occ_rg <- table(rg)
occ_rg <- occ_rg[grepl("KP|CP", names(occ_rg))]
rg[grep("HP",rg)] <- names(occ_rg)[which.max(occ_rg)]
return(rg)}))
Explanation (for edit2):
df[, grepl("^b", colnames(df))] <- # only the columns beginning with b are considered (so the other ones will remain untouched)
t( # the results of apply will be transposed
apply(df[, grepl("^b", colnames(df))], # apply on df with only the columns beginning by b
1, # by row
function(rg){ # a function that takes a vector "rg" as input
occ_rg <- table(rg) # computes the table
occ_rg <- occ_rg[grepl("KP|CP", names(occ_rg))] # keep only the occurrences of either "KP" or "CP"
rg[grep("HP",rg)] <- names(occ_rg)[which.max(occ_rg)] # replace in the vector rg the "HP" elements by "KP" or "CP" depending on which occurs the most
return(rg) # finally returns the vector rg
}))

R function keeps returning empty data frame

I'm trying to write an R function that loops through a given dataframe to filter it a bit. The data in the dataframe consists of travel information between two lines in the London subway an I'd like to cut off the top percent. Here's the output of the str() function for the input data:
'data.frame': 71748 obs. of 9 variables:
$ depart : Factor w/ 52 levels "Bank","Barkingside",..: 22 22 22 22 22 25 25 25 25 25 ...
$ arrival : Factor w/ 48 levels "Bank","Barkingside",..: 48 43 38 5 8 1 42 48 41 43 ...
$ traveltime : num 433 1102 161 584 891 ...
$ departuretime: POSIXlt, format: "2014-03-24 18:17:20" "2014-03-24 18:17:20" "2014-03-24 18:17:20" ...
$ arrivaltime : POSIXlt, format: "2014-03-24 18:24:33" "2014-03-24 18:35:42" "2014-03-24 18:20:01" ...
$ lcid : Factor w/ 28 levels "1000001","1000002",..: 1 1 1 1 1 1 1 1 1 1 ...
$ tripno : Factor w/ 25 levels "1","10","11",..: 2 2 2 2 2 2 2 2 2 2 ...
$ destination : Factor w/ 18 levels "Debden","Ealing Broadway",..: 3 3 3 3 3 3 3 3 3 3 ...
$ line : Factor w/ 1 level "C": 1 1 1 1 1 1 1 1 1 1 ...
Here's the functions I wrote:
#cut off top percent of travel times for each combination of arrival and
#departure stations to remove outliers
cutOffTopPercent <- function(data, percentage=0.99){
res <- data.frame()
#loop through all combinations of depart and arrival stations
for(i in 1:length(levels(data$depart))){
for(j in 1:length(levels(data$arrival))){
#create variables for departure/arrival station to make code easier to read
departureStation <- levels(data$depart)[i]
arrivalStation <- levels(data$arrival)[j]
#create a subset containing only the current departure and arrival station
dataSubset <- data[data$depart == departureStation & data$arrival == arrivalStation,]
#get top value that's allowed
upperBorder <- getTopPercentileBottom(dataSubset, percentage)
#remove records with values higher than than allowed
dataSubset <- dataSubset[dataSubset$traveltime < upperBorder,]
#glue the subset to the end result
res <- rbind(res,dataSubset)
}
}
return(res)
}
#returns the traveltime that marks where the given percentage of traveltimes starts
getTopPercentileBottom <- function(data, percentile){
upperBorder <- quantile(data$traveltime, probs = percentile)
return(upperBorder)
}
The cutOffTopPercent() function always returns an empty data frame however. I can't find my error. I've been trying to go to the steps manually, but when I do so, all the data subsets get appended to the res dataframe correctly.
Can anyone see what I did wrong, or suggest a better approach to what I'm trying to do?
EDIT:
a dput of the first 30 records in my input data:
structure(list(depart = structure(c(22L, 22L, 22L, 22L, 22L,
25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L,
25L, 25L, 25L, 25L, 47L, 47L, 47L, 47L, 47L, 47L, 47L, 47L), .Label = c("Bank",
"Barkingside", "Bethnal Green", "Bond Street", "Buckhurst Hill",
"Chancery Lane", "Chigwell", "Debden", "Ealing Broadway", "East Acton",
"Epping", "Fairlop", "Gants Hill", "Grange Hill", "Greenford",
"Hainault", "Hanger Lane", "Holborn", "Holland Park", "Lancaster Gate",
"Leyton", "Leytonstone", "Liverpool Street", "Loughton", "Marble Arch",
"Mile End", "Newbury Park", "Newbury Park Loop", "North Acton",
"North Acton Junction", "Northolt", "Notting Hill Gate", "Oxford Circus",
"Perivale", "Queensway", "Redbridge", "Roding Valley", "Ruislip Gardens",
"Shepherd's Bush", "Shepherds Bush (Central Line)", "Snaresbrook",
"South Ruislip", "South Woodford", "St. Paul's", "Stratford",
"Theydon Bois", "Tottenham Court Road", "Wanstead", "West Acton",
"West Ruislip", "White City", "Woodford"), class = "factor"),
arrival = structure(c(48L, 43L, 38L, 5L, 8L, 1L, 42L, 48L,
41L, 43L, 6L, 38L, 5L, 4L, 16L, 30L, 44L, 20L, 8L, 3L, 24L,
19L, 1L, 42L, 48L, 41L, 43L, 6L, 38L, 5L), .Label = c("Bank",
"Barkingside", "Bethnal Green", "Bond Street", "Buckhurst Hill",
"Chancery Lane", "Chigwell", "Debden", "East Acton", "Fairlop",
"Gants Hill", "Grange Hill", "Greenford", "Hainault", "Hanger Lane",
"Holborn", "Holland Park", "Lancaster Gate", "Leyton", "Leytonstone",
"Liverpool Street", "Loughton", "Marble Arch", "Mile End",
"Newbury Park", "North Acton", "North Acton Junction", "Northolt",
"Notting Hill Gate", "Oxford Circus", "Perivale", "Queensway",
"Redbridge", "Roding Valley", "Ruislip Gardens", "Shepherd's Bush",
"Shepherds Bush (Central Line)", "Snaresbrook", "South Ruislip",
"South Woodford", "St. Paul's", "Stratford", "Theydon Bois",
"Tottenham Court Road", "Wanstead", "West Acton", "White City",
"Woodford"), class = "factor"), traveltime = c(433, 1102,
161, 584, 891, 829, 1473, 2273, 629, 2942, 467, 2001, 2424,
75, 351, 165, 249, 1840, 2731, 1148, 1289, 1653, 580, 1224,
2024, 380, 2693, 218, 1752, 2175), departuretime = structure(list(
sec = c(20, 20, 20, 20, 20, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 49, 49, 49, 49,
49, 49, 49, 49), min = c(17L, 17L, 17L, 17L, 17L, 46L,
46L, 46L, 46L, 46L, 46L, 46L, 46L, 46L, 46L, 46L, 46L,
46L, 46L, 46L, 46L, 46L, 50L, 50L, 50L, 50L, 50L, 50L,
50L, 50L), hour = c(18L, 18L, 18L, 18L, 18L, 17L, 17L,
17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L,
17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L,
17L), mday = c(24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L,
24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L,
24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L
), mon = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L), year = c(114L, 114L, 114L, 114L,
114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L,
114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L,
114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L), wday = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), yday = c(82L, 82L, 82L, 82L, 82L, 82L, 82L, 82L,
82L, 82L, 82L, 82L, 82L, 82L, 82L, 82L, 82L, 82L, 82L,
82L, 82L, 82L, 82L, 82L, 82L, 82L, 82L, 82L, 82L, 82L
), isdst = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L)), .Names = c("sec", "min", "hour",
"mday", "mon", "year", "wday", "yday", "isdst"), class = c("POSIXlt",
"POSIXt"), tzone = "GMT"), arrivaltime = structure(list(sec = c(33,
42, 1, 4, 11, 29, 13, 33, 9, 42, 27, 1, 4, 55, 31, 25, 49,
20, 11, 48, 9, 13, 29, 13, 33, 9, 42, 27, 1, 4), min = c(24L,
35L, 20L, 27L, 32L, 0L, 11L, 24L, 57L, 35L, 54L, 20L, 27L,
47L, 52L, 49L, 50L, 17L, 32L, 5L, 8L, 14L, 0L, 11L, 24L,
57L, 35L, 54L, 20L, 27L), hour = c(18L, 18L, 18L, 18L, 18L,
18L, 18L, 18L, 17L, 18L, 17L, 18L, 18L, 17L, 17L, 17L, 17L,
18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 17L, 18L, 17L, 18L,
18L), mday = c(24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L,
24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L,
24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L), mon = c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L),
year = c(114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L,
114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L,
114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L,
114L, 114L, 114L, 114L), wday = c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), yday = c(82L,
82L, 82L, 82L, 82L, 82L, 82L, 82L, 82L, 82L, 82L, 82L,
82L, 82L, 82L, 82L, 82L, 82L, 82L, 82L, 82L, 82L, 82L,
82L, 82L, 82L, 82L, 82L, 82L, 82L), isdst = c(0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
)), .Names = c("sec", "min", "hour", "mday", "mon", "year",
"wday", "yday", "isdst"), class = c("POSIXlt", "POSIXt"), tzone = "GMT"),
lcid = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = c("1000001", "1000002", "1000003",
"1000004", "1000005", "1000006", "1000007", "1000008", "1000009",
"1000010", "1000045", "1000054", "1000070", "1000088", "1000089",
"1000090", "1000097", "1000098", "1000099", "1000100", "1000101",
"1000102", "1000103", "1000104", "1000105", "1000106", "1000107",
"1000109"), class = "factor"), tripno = structure(c(2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("1",
"10", "11", "12", "13", "14", "15", "16", "17", "18", "19",
"2", "20", "21", "22", "23", "24", "3", "4", "5", "6", "7",
"8", "81", "9"), class = "factor"), destination = structure(c(3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("Debden",
"Ealing Broadway", "Epping", "Grange Hill via Woodford",
"Hainault", "Hainault via Newbury Park", "Hainault via Woodford",
"Leytonstone", "Loughton", "Marble Arch", "Newbury Park",
"North Acton", "Northolt", "Ruislip Gardens", "West Ruislip",
"White City", "Woodford", "Woodford Via Hainault"), class = "factor"),
line = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = "C", class = "factor")), .Names = c("depart",
"arrival", "traveltime", "departuretime", "arrivaltime", "lcid",
"tripno", "destination", "line"), row.names = c(NA, 30L), class = "data.frame")
Here a vectorized version of your code. Basically I used Map to avoid double loops and filling the result manullay (using rbind, very solw).
cutOffTopPercent <-
function(data,percent=0.99){
cut_off_dep_arr <-
function(dep,arr){
dataSubset <- data[data$depart == dep & data$arrival == arr,]
upperBorder <- getTopPercentileBottom(dataSubset, percent)
dataSubset[dataSubset$traveltime <= upperBorder,] ## <= not <
}
Map(cut_off_dep_arr,df$depart,df$arrival)
}
cutOffTopPercent(data=df)

Error using the step-function with glmmML

When I tried to use the step function I receive this error:
"Error in if (all(is.finite(c(n0, nnew))) && nnew != n0)
stop("number of rows in use has changed: remove missing values?") :
missing value where TRUE/FALSE needed"
Seems like it has something to do with missing values. I checked for this and there are none. I searched for more information around this error. I could only find one unanswered post from several years ago.
I've included random sample selection from my dataset, together with the R-code I used. (SD=integer. DIST,CD=numeric. Hunt,Region,DN,IDcat=categorical).
Sika.sample <- structure(list(ID = c(16L, 19L, 68L, 58L, 35L, 21L, 21L, 83L,
48L, 64L, 73L, 63L, 80L, 63L, 8L, 43L, 77L, 75L, 27L, 73L, 22L,
65L, 32L, 78L, 61L, 68L, 46L, 30L, 44L, 78L, 58L, 72L, 27L, 46L,
41L, 52L, 36L, 38L, 67L, 18L, 45L, 75L, 72L, 8L, 5L, 62L, 70L,
23L, 4L, 8L, 7L, 30L, 37L, 7L, 68L, 20L, 80L, 44L, 39L, 6L, 83L,
26L, 66L, 21L, 5L, 39L, 10L, 73L, 69L, 44L, 51L, 69L, 53L, 63L,
27L, 29L, 15L, 13L, 1L, 18L, 31L, 9L, 42L, 32L, 78L, 62L, 23L,
3L, 29L, 49L, 81L, 60L, 70L, 73L, 8L, 69L, 79L, 19L, 47L, 38L
), SD = c(8L, 3L, 4L, 6L, 2L, 1L, 8L, 0L, 4L, 2L, 8L, 2L, 0L,
8L, 0L, 0L, 2L, 2L, 0L, 3L, 0L, 2L, 25L, 0L, 18L, 28L, 0L, 10L,
1L, 0L, 0L, 1L, 0L, 10L, 1L, 0L, 0L, 7L, 0L, 0L, 18L, 0L, 0L,
0L, 0L, 28L, 1L, 0L, 10L, 1L, 0L, 2L, 0L, 0L, 3L, 7L, 0L, 0L,
8L, 0L, 5L, 1L, 3L, 33L, 1L, 3L, 0L, 1L, 0L, 0L, 19L, 0L, 3L,
3L, 0L, 1L, 0L, 3L, 5L, 2L, 0L, 0L, 0L, 2L, 0L, 10L, 0L, 0L,
0L, 0L, 2L, 0L, 2L, 0L, 8L, 1L, 0L, 0L, 0L, 0L), DIST = c(0,
0, 42.7, 800.6, 44.6, 0, 0, 19.3, 42.8, 570.7, 111.7, 348.2,
0, 348.2, 24, 0, 7.6, 3.1, 23.2, 111.7, 0, 404, 331.9, 0, 0,
42.7, 0, 97.7, 0, 0, 800.6, 295.5, 23.2, 0, 0, 0, 4.3, 29.5,
408.1, 37.7, 0, 3.1, 295.5, 24, 15.5, 0, 34.1, 0, 22.1, 24, 223.4,
97.7, 99.1, 223.4, 42.7, 75.2, 0, 0, 279.5, 28, 19.3, 58, 972.3,
0, 15.5, 279.5, 652.8, 111.7, 24.8, 0, 0, 24.8, 0, 348.2, 23.2,
278.8, 20.1, 30.6, 4.9, 37.7, 46.3, 735.7, 1.2, 331.9, 0, 0,
0, 5.8, 278.8, 817.6, 0, 190.4, 34.1, 111.7, 24, 24.8, 11.3,
0, 0, 29.5), CD = c(103.9, 25.3, 46.6, 99.4, 55, 95.2, 68, 62.5,
59, 78.8, 65.5, 46.6, 51.8, 78.2, 52.7, 15.7, 62.8, 81.3, 40.9,
82.5, 64.9, 50.1, 62, 56.1, 88.9, 77.2, 48.1, 69.2, 37.9, 101.8,
43.9, 82.4, 57, 75.1, 41.9, 42.2, 48.7, 53.3, 42, 61, 70.9, 38,
51.9, 39.3, 44.9, 69.7, 25.1, 49, 61.8, 58, 61.2, 41.1, 90.3,
45.8, 36.4, 103.1, 52.4, 84.6, 63.5, 53.5, 101.1, 64.4, 50, 80.8,
75.1, 47.5, 79.7, 44.9, 37, 29.1, 65.9, 49, 56.7, 61.4, 31.1,
102.7, 64.8, 51.4, 80.7, 61.6, 36, 50.3, 42.4, 47, 41.9, 68.4,
88.9, 56.2, 52.1, 50.1, 69.1, 55.1, 48.4, 34.1, 51, 77.9, 53.5,
36.8, 48.2, 38.7), DN = structure(c(1L, 2L, 2L, 1L, 2L, 1L, 2L,
2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L,
2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L,
2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L), .Label = c("Day",
"Night"), class = "factor"), Hunt = structure(c(2L, 1L, 1L, 2L,
2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L,
2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L
), .Label = c("Hunt", "Nohunt"), class = "factor"), Region = structure(c(2L,
2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L,
2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L,
1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 2L), .Label = c("H", "S"), class = "factor"), IDcat = structure(c(16L,
19L, 68L, 58L, 35L, 21L, 21L, 83L, 48L, 64L, 73L, 63L, 80L, 63L,
8L, 43L, 77L, 75L, 27L, 73L, 22L, 65L, 32L, 78L, 61L, 68L, 46L,
30L, 44L, 78L, 58L, 72L, 27L, 46L, 41L, 52L, 36L, 38L, 67L, 18L,
45L, 75L, 72L, 8L, 5L, 62L, 70L, 23L, 4L, 8L, 7L, 30L, 37L, 7L,
68L, 20L, 80L, 44L, 39L, 6L, 83L, 26L, 66L, 21L, 5L, 39L, 10L,
73L, 69L, 44L, 51L, 69L, 53L, 63L, 27L, 29L, 15L, 13L, 1L, 18L,
31L, 9L, 42L, 32L, 78L, 62L, 23L, 3L, 29L, 49L, 81L, 60L, 70L,
73L, 8L, 69L, 79L, 19L, 47L, 38L), .Label = c("1", "2", "3",
"4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15",
"16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26",
"27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37",
"38", "39", "40", "41", "42", "43", "44", "45", "46", "47", "48",
"49", "50", "51", "52", "53", "54", "55", "56", "57", "58", "59",
"60", "61", "62", "63", "64", "65", "66", "67", "68", "69", "70",
"71", "72", "73", "74", "75", "76", "77", "78", "79", "80", "81",
"82", "83"), class = "factor")), .Names = c("ID", "SD", "DIST",
"CD", "DN", "Hunt", "Region", "IDcat"), row.names = c(16L, 172L,
328L, 222L, 86L, 21L, 174L, 332L, 308L, 228L, 96L, 291L, 233L,
259L, 161L, 271L, 202L, 98L, 180L, 45L, 22L, 293L, 185L, 203L,
257L, 264L, 274L, 81L, 304L, 50L, 286L, 95L, 27L, 242L, 269L,
280L, 138L, 191L, 295L, 171L, 241L, 149L, 146L, 110L, 107L, 258L,
195L, 125L, 55L, 8L, 160L, 183L, 37L, 109L, 296L, 20L, 297L,
208L, 192L, 6L, 236L, 179L, 294L, 72L, 5L, 141L, 10L, 198L, 143L,
272L, 311L, 194L, 249L, 323L, 129L, 29L, 66L, 166L, 52L, 69L,
133L, 162L, 270L, 134L, 152L, 322L, 23L, 156L, 182L, 277L, 330L,
288L, 42L, 147L, 59L, 41L, 204L, 19L, 275L, 140L), class = "data.frame")
Glmm_full <- glmmML(SD~DIST*as.factor(Hunt)*as.factor(Region)*as.factor(DN),
offset=log(CD),data=Sika.sample,family="poisson",cluster=IDcat)
finalModel <-step(Glmm_full) #ERROR-MESSAGE

R tells me " object 'train' not found "

In my customized function I met a strange problem.
I'm writing a function to do cross-validation with logistic and clogit(in survival) regression.Thus I need to generate a training set and testing set.I've marked the part to do it.
I need to compare the classic logistic regression and the conditional logistic regression.So I use an 'if' statement to distinguish those two functions.
Here's the problem.It seems that the glm function can find the train vector and doing well,but clogit can't find it!Even if the train vector is output correctly.
When I test each line out of my function gcv,clogit works again.
Can somebody tell me why is clogit not working with train?
I called this function as:
gcv(as.numeric(FNDX)~HIGD+DEG+CHK+AGP1+AGMN+NLV+LIV+WT+AGLP+MST+strata(STR),bbdm,method="clogit")
and the error message is
Error in `[.data.frame`(bbdm, train, ) : object 'train' not found
Do you need traceback() information?
and the data set is bbdm13 in http://www.umass.edu/statdata/statdata/stat-logistic.html.
There are NA in the original data,or use the sample after the code :)
Related codes are as following:
gcv<-function(formula,data=NULL,method="rpart",cross=5,times=10,k=7,layer=5,seed=0)
{
set=data;
n=nrow(set);
set.seed(as.vector(Sys.time()));
bb1=1:n;
bb2=rep(1:cross,ceiling(n/cross))[1:n];
bb2=sample(bb2,n);
samp=sample(c(1:n),size=n);
m=ceiling(n/cross);
smp<-mat.or.vec(cross,m);
j=rep(0,cross)
for (i in 1:n)
{
smp[bb2[i],j[bb2[i]]]=i
j[bb2[i]]=j[bb2[i]]+1
}
# Here we separate the original set into 5(variable cross)sets,
# each time we take one out and treat it as the testing set
mf <- match.call(expand.dots = FALSE)
m <- match(c("formula","data"), names(mf), 0L)
mf <- mf[c(1L, m)]
mf$drop.unused.levels <- TRUE
mf[[1L]] <- as.name("model.frame")
mf <- eval(mf, parent.frame())
response<-model.response(mf)
#code copied from function.lm
reslvl<-length(levels(response))
tra<-mat.or.vec(reslvl,reslvl);
tes<-mat.or.vec(reslvl,reslvl);
for (i in 1:cross)
{
test<-smp[i,];
train<-setdiff(1:200,test);
show(train); #THe 'train' set can be shown here.
#some "if" and "else"statements are hidden
if (method=="logistic")#logistic is running well
{
bb.log<-step(glm(formula,set,family=binomial),trace=FALSE)
tra<-tra+as.vector(t(table(response[train],
bin(predict.glm(bb.log,set[train,],type="response")))))
tes<-tes+as.vector(t(table(response[test],
bin(predict.glm(bb.log,set[test,],type="response")))))
}
else if (method=="clogit")#clogit is meeting a problem.
{
library("survival")
bb.clog<-step(clogit(formula,bbdm[train,]),trace=FALSE)
tra<-tra+as.vector(t(table( response[train],
bin(predict(bb.clog,set[train,])))))
tes<-tes+as.vector(t(table( response[test],
bin(predict(bb.clog,set[test,])))))
}
}
tra<-tra/cross;
tes<-tes/cross;
trainrate=1-sum(diag(tra))/sum(tra)
testrate=1-sum(diag(tes))/sum(tes)
result<-list(Train=tra,TrainRate=trainrate,Test=tes,TestRate=testrate)
result
}
Sample Data:
STR OBS AGMT FNDX HIGD DEG CHK AGP1 AGMN NLV LIV WT AGLP MST
1 1 1 39 1 9 0 1 23 13 0 5 118 39 1
2 1 2 39 0 10 0 2 16 11 1 3 175 39 3
3 1 3 39 0 11 0 2 20 12 1 3 135 39 2
4 1 4 39 0 12 1 1 21 11 0 3 125 40 1
5 2 1 38 1 14 2 1 24 14 1 3 118 39 1
6 2 2 38 0 12 1 2 20 15 0 2 183 38 1
7 2 3 38 0 9 0 2 19 11 0 5 218 38 1
8 2 4 38 0 13 1 1 23 13 0 2 192 37 1
9 3 1 38 1 9 0 1 22 15 2 2 125 38 1
10 3 2 38 0 10 0 2 20 14 0 2 123 38 1
11 3 3 38 0 15 1 1 19 13 3 2 140 37 1
12 3 4 38 0 12 1 1 18 13 0 2 160 38 1
13 4 1 38 1 15 1 1 24 14 2 3 150 38 5
14 4 2 38 0 15 2 1 26 13 1 1 130 38 2
15 4 3 38 0 12 1 2 23 14 0 4 140 38 1
16 4 4 38 0 12 1 1 25 16 0 2 130 38 1
17 5 1 38 1 12 1 1 21 17 0 2 150 38 2
18 5 2 38 0 12 1 2 20 12 1 2 148 38 1
19 5 3 38 0 14 2 1 22 13 0 2 134 39 1
20 5 4 38 0 13 1 1 16 14 0 6 138 38 4
21 6 1 38 1 13 1 1 24 12 1 3 116 39 1
22 6 2 38 0 12 1 2 19 12 0 2 145 35 2
23 6 3 38 0 14 2 2 21 10 4 3 195 35 1
24 6 4 38 0 14 4 1 25 8 0 1 180 38 2
25 7 1 37 1 17 4 1 26 13 1 4 137 37 5
26 7 2 37 0 15 2 1 20 11 2 2 135 37 2
27 7 3 37 0 9 0 1 18 10 2 3 155 37 1
28 7 4 37 0 12 1 2 22 13 2 2 120 38 1
29 8 1 36 1 12 1 1 23 14 0 2 126 36 2
30 8 2 36 0 10 0 1 20 12 1 2 191 36 1
31 8 3 36 0 10 0 2 17 10 1 3 185 37 1
32 8 4 36 0 12 1 2 23 12 0 2 119 37 1
33 9 1 35 1 12 1 1 23 14 0 3 129 36 1
34 9 2 35 0 14 1 2 21 11 0 3 170 34 2
35 9 3 36 0 12 1 1 22 14 0 4 110 36 1
36 9 4 35 0 14 2 2 24 11 0 2 155 35 1
37 10 1 35 1 12 1 2 21 12 0 2 105 29 1
38 10 2 36 0 17 3 1 26 13 1 2 115 36 1
39 10 3 36 0 12 1 2 22 12 2 3 120 36 1
40 10 4 36 0 12 1 1 33 16 0 1 150 36 1
Structure:
structure(list(STR = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L,
6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 10L,
10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 13L, 13L, 13L,
13L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L,
17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L, 19L, 19L, 19L, 19L, 20L,
20L, 20L, 20L, 21L, 21L, 21L, 21L, 22L, 22L, 22L, 22L, 23L, 23L,
23L, 23L, 24L, 24L, 24L, 24L, 25L, 25L, 25L, 25L, 26L, 26L, 26L,
26L, 27L, 27L, 27L, 27L, 28L, 28L, 28L, 28L, 29L, 29L, 29L, 29L,
30L, 30L, 30L, 30L, 31L, 31L, 31L, 31L, 32L, 32L, 32L, 32L, 33L,
33L, 33L, 33L, 34L, 34L, 34L, 34L, 35L, 35L, 35L, 35L, 36L, 36L,
36L, 36L, 37L, 37L, 37L, 37L, 38L, 38L, 38L, 38L, 39L, 39L, 39L,
39L, 40L, 40L, 40L, 40L, 41L, 41L, 41L, 41L, 42L, 42L, 42L, 42L,
43L, 43L, 43L, 43L, 44L, 44L, 44L, 44L, 45L, 45L, 45L, 45L, 46L,
46L, 46L, 46L, 47L, 47L, 47L, 47L, 48L, 48L, 48L, 48L, 49L, 49L,
49L, 49L, 50L, 50L, 50L, 50L), .Label = c("1", "2", "3", "4",
"5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15",
"16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26",
"27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37",
"38", "39", "40", "41", "42", "43", "44", "45", "46", "47", "48",
"49", "50"), class = "factor"), OBS = structure(c(1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L), .Label = c("1", "2", "3", "4"), class = "factor"),
AGMT = c(39L, 39L, 39L, 39L, 38L, 38L, 38L, 38L, 38L, 38L,
38L, 38L, 38L, 38L, 38L, 38L, 38L, 38L, 38L, 38L, 38L, 38L,
38L, 38L, 37L, 37L, 37L, 37L, 36L, 36L, 36L, 36L, 35L, 35L,
36L, 35L, 35L, 36L, 36L, 36L, 35L, 35L, 35L, 35L, 34L, 35L,
34L, 34L, 33L, 33L, 32L, 33L, 33L, 33L, 33L, 33L, 32L, 32L,
32L, 32L, 31L, 30L, 31L, 31L, 68L, 68L, 68L, 68L, 64L, 64L,
64L, 64L, 63L, 63L, 63L, 63L, 62L, 62L, 62L, 62L, 61L, 61L,
61L, 61L, 61L, 62L, 62L, 61L, 61L, 62L, 61L, 61L, 61L, 61L,
61L, 61L, 60L, 60L, 60L, 60L, 58L, 58L, 58L, 58L, 55L, 55L,
55L, 55L, 55L, 55L, 55L, 55L, 52L, 52L, 52L, 52L, 52L, 52L,
52L, 52L, 51L, 51L, 51L, 51L, 49L, 49L, 49L, 49L, 48L, 48L,
48L, 48L, 47L, 47L, 47L, 47L, 47L, 47L, 47L, 47L, 46L, 46L,
46L, 46L, 46L, 46L, 46L, 46L, 45L, 45L, 45L, 45L, 45L, 45L,
45L, 45L, 45L, 45L, 45L, 45L, 45L, 45L, 45L, 45L, 44L, 44L,
44L, 44L, 44L, 44L, 44L, 44L, 43L, 43L, 43L, 43L, 28L, 27L,
28L, 28L, 53L, 53L, 53L, 53L, 56L, 56L, 56L, 56L, 41L, 41L,
41L, 41L, 41L, 41L, 40L, 41L, 41L, 42L, 41L, 41L), FNDX = structure(c(2L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
2L, 1L, 1L, 1L), .Label = c("0", "1"), class = "factor"),
HIGD = c(9L, 10L, 11L, 12L, 14L, 12L, 9L, 13L, 9L, 10L, 15L,
12L, 15L, 15L, 12L, 12L, 12L, 12L, 14L, 13L, 13L, 12L, 14L,
14L, 17L, 15L, 9L, 12L, 12L, 10L, 10L, 12L, 12L, 14L, 12L,
14L, 12L, 17L, 12L, 12L, 20L, 10L, 12L, 14L, 12L, 18L, 12L,
12L, 20L, 15L, 12L, 14L, 18L, 12L, 13L, 18L, 12L, 12L, 15L,
12L, 17L, 10L, 13L, 13L, 14L, 8L, 16L, 12L, 12L, 20L, 13L,
12L, 10L, 12L, 5L, 12L, 12L, 12L, 16L, 10L, 8L, 13L, 8L,
16L, 11L, 9L, 15L, 14L, 12L, 18L, 6L, 12L, 10L, 8L, 12L,
8L, 13L, 12L, 11L, 13L, 12L, 12L, 13L, 12L, 14L, 12L, 12L,
11L, 12L, 12L, 12L, 10L, 12L, 14L, 8L, 12L, 12L, 14L, 9L,
12L, 7L, 16L, 15L, 15L, 20L, 12L, 12L, 14L, 17L, 12L, 12L,
12L, 17L, 15L, 12L, 10L, 12L, 10L, 11L, 17L, 10L, 12L, 14L,
8L, 12L, 12L, 12L, 11L, 12L, 12L, 8L, 13L, 12L, 12L, 12L,
19L, 12L, 12L, 13L, 12L, 17L, 12L, 16L, 14L, 16L, 18L, 12L,
12L, 12L, 12L, 12L, 12L, 16L, 16L, 12L, 12L, 16L, 11L, 12L,
12L, 16L, 12L, 12L, 11L, 12L, 12L, 16L, 12L, 12L, 12L, 12L,
16L, 10L, 11L, 15L, 12L, 14L, 10L, 15L, 13L), DEG = structure(c(1L,
1L, 1L, 2L, 3L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 3L, 2L, 2L,
2L, 2L, 3L, 2L, 2L, 2L, 3L, 5L, 5L, 3L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 2L, 2L, 3L, 2L, 4L, 2L, 2L, 5L, 1L, 2L, 2L, 2L, 5L,
2L, 2L, 5L, 2L, 2L, 3L, 5L, 2L, 2L, 5L, 2L, 2L, 2L, 2L, 4L,
1L, 2L, 2L, 3L, 1L, 4L, 2L, 2L, 5L, 2L, 2L, 1L, 2L, 1L, 2L,
2L, 2L, 4L, 1L, 1L, 2L, 1L, 4L, 1L, 1L, 3L, 2L, 2L, 5L, 1L,
2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 3L, 2L, 2L, 3L, 2L, 3L, 2L,
2L, 1L, 2L, 2L, 2L, 1L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 1L,
4L, 3L, 3L, 5L, 2L, 2L, 3L, 5L, 2L, 2L, 2L, 5L, 2L, 2L, 1L,
2L, 1L, 1L, 4L, 1L, 2L, 3L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L,
2L, 2L, 2L, 2L, 5L, 2L, 2L, 2L, 2L, 5L, 2L, 4L, 2L, 4L, 5L,
2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 2L, 2L, 4L, 1L, 2L, 2L, 4L,
2L, 2L, 1L, 2L, 2L, 4L, 2L, 2L, 2L, 2L, 4L, 1L, 1L, 2L, 2L,
2L, 1L, 2L, 2L), .Label = c("0", "1", "2", "3", "4"), class = "factor"),
CHK = structure(c(1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L,
1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L,
2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L,
2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L,
2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L), .Label = c("1",
"2"), class = "factor"), AGP1 = c(23, 16, 20, 21, 24, 20,
19, 23, 22, 20, 19, 18, 24, 26, 23, 25, 21, 20, 22, 16, 24,
19, 21, 25, 26, 20, 18, 22, 23, 20, 17, 23, 23, 21, 22, 24,
21, 26, 22, 33, 26, 18, 19, 21, 25, 27, 20, 25, 26, 21, 24,
25, 28, 21, 20, 21, 30, 25, 20, 23, 30, 21, 23, 24, 22, 34,
23, 19, 30, 28, 26, 25, 21, 24, 24, 24, 26, 26, 32, 22, 28,
26, 28, 27, 22, 30, 25, 26, 26, 33, 25, 29, 21, 18, 22, 23,
28, 25, 24, 33, 20, 25, 24, 24, 30, 30, 30, 24, 24, 23, 16,
26, 24, 28, 20, 25, 23, 21, 23, 20, 24, 24, 22, 24, 25, 25,
24, 25, 22, 22, 23, 19, 26, 20, 24, 22, 19, 23, 23, 21, 27,
19, 26, 15, 27, 23, 22, 17, 33, 25, 20, 22, 24, 23, 20, 30,
18, 22, 30, 22, 25, 23, 23, 23, 25, 27, 27, 25, 24, 22, 23,
18, 27, 31, 14, 20, 29, 22, 20, 23, 29, 28, 23, 26, 21, 27,
26, 25, 25, 20, 21, 22, 40, 21, 21, 26, 34, 21, 30, 21),
AGMN = c(13L, 11L, 12L, 11L, 14L, 15L, 11L, 13L, 15L, 14L,
13L, 13L, 14L, 13L, 14L, 16L, 17L, 12L, 13L, 14L, 12L, 12L,
10L, 8L, 13L, 11L, 10L, 13L, 14L, 12L, 10L, 12L, 14L, 11L,
14L, 11L, 12L, 13L, 12L, 16L, 11L, 13L, 11L, 12L, 10L, 13L,
11L, 16L, 14L, 11L, 12L, 12L, 14L, 12L, 13L, 13L, 13L, 11L,
9L, 16L, 14L, 14L, 11L, 13L, 12L, 14L, 13L, 12L, 14L, 14L,
11L, 10L, 15L, 12L, 14L, 11L, 16L, 15L, 12L, 12L, 14L, 13L,
15L, 14L, 16L, 11L, 15L, 13L, 17L, 11L, 13L, 13L, 15L, 13L,
17L, 15L, 17L, 11L, 13L, 15L, 12L, 16L, 12L, 10L, 16L, 13L,
12L, 14L, 14L, 14L, 12L, 15L, 12L, 12L, 14L, 13L, 14L, 12L,
11L, 11L, 16L, 12L, 13L, 13L, 14L, 12L, 13L, 13L, 11L, 11L,
12L, 11L, 14L, 12L, 14L, 13L, 12L, 15L, 13L, 12L, 15L, 11L,
13L, 13L, 12L, 12L, 11L, 13L, 14L, 13L, 11L, 11L, 12L, 11L,
12L, 12L, 15L, 17L, 13L, 10L, 16L, 12L, 13L, 12L, 12L, 13L,
14L, 13L, 15L, 15L, 12L, 17L, 15L, 12L, 12L, 14L, 12L, 12L,
11L, 16L, 12L, 11L, 12L, 11L, 17L, 11L, 13L, 12L, 16L, 13L,
14L, 12L, 15L, 16L, 12L, 14L, 13L, 13L, 12L, 12L), NLV = c(0,
1, 1, 0, 1, 0, 0, 0, 2, 0, 3, 0, 2, 1, 0, 0, 0, 1, 0, 0,
1, 0, 4, 0, 1, 2, 2, 2, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 2,
0, 0, 2, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0,
1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1,
0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1,
0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 2, 2, 1, 0, 2,
0, 0, 0, 1, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0,
0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 4, 0, 0, 0, 0, 1, 1, 0, 1,
0, 0, 0, 4, 1, 0, 0, 1, 3, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 1, 1, 0,
0, 0, 0, 0, 2, 1, 1, 1, 0), LIV = c(5, 3, 3, 3, 3, 2, 5,
2, 2, 2, 2, 2, 3, 1, 4, 2, 2, 2, 2, 6, 3, 2, 3, 1, 4, 2,
3, 2, 2, 2, 3, 2, 3, 3, 4, 2, 2, 2, 3, 1, 4, 2, 3, 2, 1,
4, 3, 1, 4, 1, 2, 2, 5, 2, 2, 1, 1, 2, 2, 2, 0, 3, 2, 3,
3, 3, 3, 7, 3, 3, 5, 2, 5, 2, 3, 3, 3, 2, 2, 3, 3, 1, 3,
2, 4, 1, 4, 3, 2, 1, 3, 2, 3, 5, 2, 3, 2, 2, 2, 3, 5, 3,
3, 0, 2, 2, 2, 6, 4, 3, 3, 4, 2, 2, 6, 3, 3, 3, 2, 5, 5,
4, 2, 5, 4, 2, 3, 3, 3, 1, 2, 0, 4, 5, 2, 3, 1, 3, 2, 5,
11, 3, 7, 1, 4, 4, 6, 3, 2, 1, 1, 3, 3, 2, 1, 3, 4, 2, 2,
5, 4, 3, 3, 4, 3, 3, 1, 2, 1, 1, 5, 7, 2, 1, 2, 6, 3, 1,
2, 2, 4, 3, 4, 1, 6, 4, 4, 2, 3, 4, 5, 4, 1, 3, 4, 3, 2,
2, 2, 2), WT = c(118L, 175L, 135L, 125L, 118L, 183L, 218L,
192L, 125L, 123L, 140L, 160L, 150L, 130L, 140L, 130L, 150L,
148L, 134L, 138L, 116L, 145L, 195L, 180L, 137L, 135L, 155L,
120L, 126L, 191L, 185L, 119L, 129L, 170L, 110L, 155L, 105L,
115L, 120L, 150L, 135L, 110L, 170L, 145L, 170L, 140L, 240L,
100L, 92L, 160L, 155L, 132L, 110L, 145L, 155L, 110L, 129L,
131L, 218L, 115L, 110L, 130L, 97L, 120L, 130L, 150L, 123L,
145L, 135L, 132L, 205L, 127L, 120L, 145L, 175L, 144L, 123L,
170L, 134L, 155L, 125L, 140L, 120L, 134L, 150L, 117L, 147L,
124L, 129L, 170L, 153L, 130L, 145L, 140L, 155L, 116L, 115L,
175L, 179L, 119L, 153L, 185L, 280L, 140L, 126L, 193L, 140L,
116L, 140L, 138L, 175L, 155L, 125L, 113L, 110L, 190L, 114L,
126L, 159L, 170L, 156L, 161L, 150L, 115L, 95L, 235L, 145L,
123L, 145L, 155L, 115L, 190L, 120L, 110L, 148L, 120L, 132L,
115L, 125L, 120L, 155L, 170L, 180L, 179L, 137L, 107L, 144L,
189L, 80L, 142L, 150L, 154L, 90L, 150L, 102L, 110L, 101L,
109L, 210L, 198L, 124L, 133L, 120L, 165L, 130L, 240L, 125L,
183L, 130L, 105L, 123L, 180L, 130L, 104L, 158L, 160L, 108L,
127L, 145L, 127L, 132L, 140L, 178L, 130L, 130L, 265L, 195L,
125L, 105L, 161L, 135L, 185L, 115L, 140L, 145L, 195L, 138L,
118L, 129L, 180L), AGLP = c(39L, 39L, 39L, 40L, 39L, 38L,
38L, 37L, 38L, 38L, 37L, 38L, 38L, 38L, 38L, 38L, 38L, 38L,
39L, 38L, 39L, 35L, 35L, 38L, 37L, 37L, 37L, 38L, 36L, 36L,
37L, 37L, 36L, 34L, 36L, 35L, 29L, 36L, 36L, 36L, 35L, 35L,
36L, 36L, 34L, 35L, 34L, 35L, 33L, 33L, 32L, 33L, 33L, 29L,
29L, 33L, 32L, 32L, 26L, 32L, 30L, 30L, 31L, 31L, 50L, 53L,
35L, 46L, 53L, 44L, 42L, 50L, 52L, 46L, 51L, 50L, 33L, 39L,
53L, 39L, 53L, 50L, 41L, 45L, 56L, 36L, 52L, 52L, 34L, 54L,
50L, 55L, 53L, 56L, 55L, 43L, 51L, 42L, 50L, 47L, 53L, 55L,
42L, 25L, 44L, 50L, 55L, 47L, 52L, 50L, 47L, 50L, 36L, 45L,
40L, 48L, 50L, 43L, 42L, 42L, 52L, 50L, 45L, 51L, 49L, 44L,
44L, 49L, 48L, 48L, 48L, 29L, 47L, 47L, 45L, 45L, 47L, 29L,
47L, 39L, 46L, 45L, 46L, 40L, 46L, 46L, 46L, 39L, 45L, 38L,
45L, 46L, 45L, 45L, 28L, 45L, 45L, 40L, 40L, 33L, 45L, 45L,
46L, 35L, 44L, 45L, 44L, 44L, 44L, 44L, 33L, 44L, 43L, 43L,
21L, 39L, 29L, 27L, 27L, 29L, 50L, 49L, 43L, 49L, 47L, 42L,
50L, 47L, 27L, 31L, 36L, 41L, 41L, 41L, 40L, 41L, 42L, 41L,
41L, 41L), MST = structure(c(1L, 3L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 5L, 2L, 1L, 1L, 2L, 1L, 1L, 4L, 1L, 2L,
1L, 2L, 5L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 1L, 1L, 1L,
1L, 5L, 3L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 2L, 4L, 5L,
4L, 1L, 5L, 4L, 4L, 1L, 5L, 3L, 1L, 5L, 1L, 4L, 4L, 1L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 4L, 1L, 4L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 4L, 5L, 1L, 1L, 1L, 1L, 3L,
5L, 1L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 4L, 1L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 5L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 4L, 1L, 1L,
4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 4L, 1L, 1L, 1L,
1L, 3L, 4L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L), .Label = c("1",
"2", "3", "4", "5"), class = "factor")), .Names = c("STR",
"OBS", "AGMT", "FNDX", "HIGD", "DEG", "CHK", "AGP1", "AGMN",
"NLV", "LIV", "WT", "AGLP", "MST"), row.names = c(NA, -200L), class = "data.frame")
Could it be bbdm[train] that it can't find, rather than train itself? What error message do you get?
You can use the browser command to debug here. i.e.
gcv<-function(formula,data=NULL,method="rpart",cross=5,times=10,k=7,layer=5,seed=0)
{
set=data;
n=nrow(set);
set.seed(as.vector(Sys.time()));
bb1=1:n;
bb2=rep(1:cross,ceiling(n/cross))[1:n];
bb2=sample(bb2,n);
samp=sample(c(1:n),size=n);
m=ceiling(n/cross);
smp<-mat.or.vec(cross,m);
j=rep(0,cross)
for (i in 1:n)
{
smp[bb2[i],j[bb2[i]]]=i
j[bb2[i]]=j[bb2[i]]+1
}
# Here we separate the original set into 5(variable cross)sets,
# each time we take one out and treat it as the testing set
mf <- match.call(expand.dots = FALSE)
m <- match(c("formula","data"), names(mf), 0L)
mf <- mf[c(1L, m)]
mf$drop.unused.levels <- TRUE
mf[[1L]] <- as.name("model.frame")
mf <- eval(mf, parent.frame())
response<-model.response(mf)
#code copied from function.lm
reslvl<-length(levels(response))
tra<-mat.or.vec(reslvl,reslvl);
tes<-mat.or.vec(reslvl,reslvl);
for (i in 1:cross)
{
test<-smp[i,];
train<-setdiff(1:200,test);
show(train); #THe 'train' set can be shown here.
#some "if" and "else"statements are hidden
if (method=="logistic")#logistic is running well
{
bb.log<-step(glm(formula,set,family=binomial),trace=FALSE)
tra<-tra+as.vector(t(table(response[train],
bin(predict.glm(bb.log,set[train,],type="response")))))
tes<-tes+as.vector(t(table(response[test],
bin(predict.glm(bb.log,set[test,],type="response")))))
}
else if (method=="clogit")#clogit is meeting a problem.
{
##### BROWSER() CALL ##########
browser()
library("survival")
bb.clog<-step(clogit(formula,bbdm[train,]),trace=FALSE)
tra<-tra+as.vector(t(table( response[train],
bin(predict(bb.clog,set[train,])))))
tes<-tes+as.vector(t(table( response[test],
bin(predict(bb.clog,set[test,])))))
}
}
tra<-tra/cross;
tes<-tes/cross;
trainrate=1-sum(diag(tra))/sum(tra)
testrate=1-sum(diag(tes))/sum(tes)
result<-list(Train=tra,TrainRate=trainrate,Test=tes,TestRate=testrate)
result
}
Browser can be used to debug functions like this. Essentially, when you run the code, you'll enter into the environment at the moment browser was called. This will allow you to explore and see if the variables are what you thought they were. You can do an ls() to see which objects are defined, or try to find the value of train or (my suspicion) bbdm to see that they're all properly defined.

Resources