I have a data frame, which contain thousands of firms from year 1998 to 2007(each firm not necessarily have equal length of time duration). and I want to convert it into a tensor with index: firm, year, variables.
how to achieve this ?
I don't know how to extract a small part of this data set to put here for us to discuss the problem, any one know how to do it?
structure(list(year = c(1998, 1998, 1998, 1998, 1998, 1998, 1998,
1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998,
1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998,
1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998,
1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998,
1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998,
1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998), firmid = c("QB3732337",
"113810712", "618851819", "619457768", "HU5176905", "618024813",
"617883552", "105679742", "230141773", "609442909", "HU6355534",
"617882832", "60088847X", "617881725", "618403506", "145665417",
"216582994", "14405557X", "103400293", "607369138", "617737408",
"177160683", "142418787", "245560903", "15112238X", "617880650",
"618354214", "226040099", "128955068", "61156047X", "617481385",
"226091312", "190380322", "617880255", "611567073", "GD6120293",
"617876061", "617875739", "126925703", "221461337", "614801582",
"617883931", "618129447", "101713181", "611209524", "617883974",
"706747835", "242727819", "608934944", "619723894", "139432377",
"152102399", "617866832", "614407067", "607282008", "117944574",
"617865629", "618354185", "228791275", "134789270", "113810632",
"EJ2468142", "169832427", "115319804", "602003890", "211551128",
"193929448", "105044755", "704448663", "21960081X"), provinceid = c(420000,
140000, 440000, 450000, 420000, 440000, 440000, 130000, 650000,
330000, 420000, 440000, 120000, 440000, 440000, 330000, 530000,
330000, 120000, 310000, 440000, 410000, 320000, 230000, 340000,
440000, 440000, 620000, 230000, 350000, 440000, 620000, 440000,
440000, 350000, 440000, 440000, 440000, 220000, 610000, 410000,
440000, 440000, 110000, 350000, 440000, 410000, 210000, 320000,
450000, 320000, 340000, 440000, 410000, 310000, 210000, 440000,
440000, 650000, 320000, 140000, 330000, 370000, 150000, 140000,
510000, 440000, 130000, 330000, 530000), industrycode2 = c(3400,
3500, 2900, 1900, 1500, 2200, 1400, 3600, 1500, 4000, 1500, 3000,
2400, 2100, 1800, 1300, 2900, 4000, 3600, 2300, 1900, 3700, 2200,
3400, 2600, 1800, 2400, 1300, 1800, 2400, 1900, 3100, 1400, 1700,
2400, 3400, 2600, 2600, 1400, 2600, 3100, 1800, 3100, 1400, 2600,
3300, 1300, 2200, 3000, 3100, 4100, 3000, 1500, 1400, 3500, 3500,
3700, 2600, 2300, 3200, 1700, 4000, 4200, 3600, 2500, 1300, 3500,
3600, 1700, 2600), sales = c(45860, 4050, 17034, 154721, 267,
7703, 47572, 846, 267, 5132, 1767, 8354, 5668, 75330, 8935, 1958,
154721, 13072, 10654, 40505, 20637, 1510, 12884, 10753, 45542,
5286, 27492, 267, 1557, 872, 10892, 1386, 32054, 7290, 6903,
8263, 6996, 12848, 460, 44823, 52000, 16353, 6225, 750, 10863,
35110, 10638, 154721, 18100, 16773, 2415, 8686, 14362, 19831,
46958, 1340, 79855, 61817, 1114, 154721, 7030, 9923, 599, 4060,
154721, 361, 72986, 445, 18080, 3682), cogs = c(44780, 2430,
13839, 144088, 246, 9310, 37863, 495, 52, 4170, 1582, 7416, 3964,
58090, 8639, 1667, 211569, 8066, 4960, 28399, 19831, 1280, 12564,
7540, 37058, 1855, 25519, 70, 1539, 700, 10398, 1190, 25048,
6779, 5500, 7656, 6078, 12519, 370, 39479, 26816, 16586, 6061,
534, 10064, 32783, 8519, 308403, 16000, 23833, 1282, 6918, 12097,
15663, 35182, 768, 76005, 58528, 775, 4362410, 5770, 9040, 417,
2630, 167668, 290, 64038, 306, 15898, 2511), inventory = c(2740,
280, 1950, 46914, 711, 9552, 3984, 4989, 497, 1249, 0, 4336,
1450, 3000, 284, 0, 134404, 5881, 9347, 4818, 1744, 377, 376,
12238, 11669, 835, 17355, 226, 1370, 360, 434, 1089, 12154, 4000,
2388, 7257, 1547, 808, 137, 5920, 8750, 5600, 179, 151, 1321,
3454, 5479, 135303, 7480, 5943, 565, 850, 3032, 1207, 11307,
474, 2574, 26104, 519, 604670, 400, 501, 106, 7040, 43568, 711,
6763, 558, 444, 564), fixedasset = c(8580, 460, 6750, 28874,
2878, 25901, 43081, 3065, 198, 1163, 2140, 8484, 1688, 6900,
631, 1290, 849666, 6545, 10075, 6658, 3089, 581, 114, 22299,
22499, 3967, 54033, 1106, 883, 435, 404, 1712, 29329, 7952, 3176,
10272, 533, 138, 854, 14151, 64252, 10672, 5023, 62, 213, 1068,
572, 1115119, 14090, 71451, 13017, 5390, 6657, 5840, 31943, 80,
26145, 41905, 517, 3801800, 1164, 1725, 220, 15550, 72000, 825,
4697, 1913, 735, 3415), totalasset = c(13610, 3220, 16090, 166501,
14319, 44739, 78920, 10394, 823, 4698, 3101, 25325, 4221, 14900,
3118, 1724, 1091978, 28912, 28272, 27222, 10000, 1178, 1413,
42394, 52156, 11284, 89191, 1582, 6514, 3531, 1495, 3978, 54618,
22352, 13733, 27088, 3247, 1450, 1164, 33419, 129957, 18000,
20163, 266, 3728, 15286, 17337, 1718823, 25650, 94590, 15418,
8430, 12425, 10060, 75576, 991, 46436, 75405, 1973, 5976610,
4604, 5720, 1327, 43440, 248715, 1710, 31723, 2799, 4616, 5417
), stateshare = c(0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0,
1, 0.518154501914978, 1, 1, 0, 0, 1, 1, 0.699999988079071, 0.831946730613708,
0, 0, 1, 1, 0, 0, 1, 0.200000002980232, 0, 0, 0, 0, 0, 1, 1,
0.400013834238052, 0, 0, 0.25, 0, 0.400000005960464, 0, 1, 0,
0, 1, 1, 0.823567152023315, 0, 0, 1, 0.742925107479095, 0, 1,
0.661562383174896, 1, 0, 1, 1, 0, 1, 0, 1, 0.749309420585632,
1), foreignshare = c(0.571428596973419, 0, 1, 0.385093629360199,
0, 0.5, 1, 0, 0, 0.30011722445488, 0, 0.699992954730988, 1, 1,
0.5, 0, 0, 0, 0, 1, 0.300029307603836, 0, 0, 0, 0, 0.782930612564087,
1, 0, 0, 1, 1, 0, 0.416000008583069, 1, 0.899999976158142, 1,
1, 1, 0, 0, 0.59998619556427, 0.700012564659119, 0.233907759189606,
0, 1, 0.600000023841858, 1, 0, 0.509767174720764, 0.299807518720627,
0, 0, 0, 1, 0.531239151954651, 0, 0.257074922323227, 1, 0, 0.245536029338837,
0, 0.285785287618637, 0, 0, 0.245354115962982, 0, 0.219982624053955,
0, 0.25069060921669, 0), privateshare = c(0.428571432828903,
0, 0, 0.614906370639801, 0, 0.5, 0, 0, 0, 0.699882745742798,
0, 0.300007075071335, 0, 0, 0.5, 0, 0.481845527887344, 0, 0,
0, 0.699970722198486, 0, 0, 0.300000011920929, 0.168053239583969,
0.217069372534752, 0, 0, 0, 0, 0, 0, 0.38400000333786, 0, 0.100000001490116,
0, 0, 0, 0, 0, 0, 0.299987435340881, 0.766092240810394, 0.75,
0, 0, 0, 0, 0.490232825279236, 0.700192511081696, 0, 0, 0.176432847976685,
0, 0.468760877847672, 0, 0, 0, 0, 0.0929015725851059, 0, 0.714214682579041,
0, 0, 0.754645884037018, 0, 0.780017375946045, 0, 0, 0), stateown = c(0,
1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1,
1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1,
0, 1, 0, 1, 0, 1), foreignown = c(0, 0, 1, 0, 0, 0, 1, 0, 0,
0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), privateown = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0), mixown = c(1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0,
1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0,
1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1,
0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0), stateonly = c(0,
1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1,
1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1,
0, 1, 0, 1, 0, 0), mixonly = c(1, 0, 0, 1, 0, 1, 0, 0, 0, 1,
0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0,
1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0), foreignonly = c(0,
0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0,
0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0), privateonly = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), gs = c(0,
1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1,
1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1,
0, 1, 0, 1, 0, 1), gm = c(1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1,
0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1,
0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0,
1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0), gf = c(0, 0,
1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0,
1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0), privatize = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), IR = c(0.061188030987978,
0.115226335823536, 0.140906140208244, 0.325592696666718, 2.8902440071106,
1.02599358558655, 0.105221457779408, 3.07859539985657, 3.07859539985657,
0.299520373344421, 0.0191550496965647, 0.584681749343872, 0.365792125463486,
0.0516440011560917, 0.0328741744160652, 0.0191550496965647, 0.635272681713104,
0.729109823703766, 1.8844758272171, 0.169653862714767, 0.0879431217908859,
0.294531255960464, 0.0299267750233412, 1.62307691574097, 0.314884781837463,
0.450134783983231, 0.680081486701965, 3.07859539985657, 0.890188455581665,
0.514285743236542, 0.0417387969791889, 0.915126025676727, 0.485228359699249,
0.590057551860809, 0.434181809425354, 0.947884023189545, 0.254524528980255,
0.0645418986678123, 0.370270282030106, 0.14995314180851, 0.326297730207443,
0.337634146213531, 0.0295330807566643, 0.282771527767181, 0.131259933114052,
0.105359487235546, 0.643150627613068, 0.438721418380737, 0.467500001192093,
0.249360129237175, 0.44071763753891, 0.12286788225174, 0.250640660524368,
0.0770605877041817, 0.321385949850082, 0.6171875, 0.0338661931455135,
0.446008741855621, 0.669677436351776, 0.138609156012535, 0.0693240910768509,
0.0554203540086746, 0.254196643829346, 2.67680597305298, 0.259846836328506,
2.4517240524292, 0.10560917109251, 1.82352936267853, 0.0279280412942171,
0.224611714482307), GM = c(0.0241179093718529, 0.666666686534882,
0.230869278311729, 0.25, 0, -0.0678684562444687, 0.256424486637115,
0.709090888500214, 0.0769230797886848, 0.230695441365242, 0.116940580308437,
0.126483276486397, 0.429868817329407, 0.296780854463577, 0.0342632234096527,
0.174565091729164, 0.25985848903656, 0.620629787445068, 0.807692289352417,
0.426282614469528, 0.0406434386968613, 0.1796875, 0.0254695955663919,
0.426127314567566, 0.228938415646553, 0.807692289352417, 0.0773149430751801,
-0.0678684562444687, 0.0116959065198898, 0.245714291930199, 0.0475091375410557,
0.164705887436867, 0.279702961444855, 0.0753798484802246, 0.255090922117233,
0.0792842209339142, 0.151036530733109, 0.0262800548225641, 0.243243247270584,
0.135363101959229, 0.807692289352417, -0.0140479924157262, 0.0270582418888807,
0.404494374990463, 0.0793918892741203, 0.0709819123148918, 0.24873811006546,
0.0649604573845863, 0.131249994039536, -0.0678684562444687, 0.807692289352417,
0.255565196275711, 0.187236502766609, 0.266104847192764, 0.334716618061066,
0.744791686534882, 0.0506545640528202, 0.0561953261494637, 0.437419354915619,
0.0327548310160637, 0.218370884656906, 0.0976769924163818, 0.436450839042664,
0.54372626543045, 0.595140397548676, 0.244827583432198, 0.139729529619217,
0.454248368740082, 0.137249961495399, 0.466348081827164), CI = c(0.630418837070465,
0.142857149243355, 0.41951522231102, 0.173416376113892, 0.200991690158844,
0.578935623168945, 0.545881927013397, 0.294881671667099, 0.240583226084709,
0.247552156448364, 0.690099954605103, 0.335004925727844, 0.399905234575272,
0.463087260723114, 0.202373310923576, 0.748259842395782, 0.778098106384277,
0.226376593112946, 0.356359660625458, 0.24458159506321, 0.308899998664856,
0.493208825588226, 0.0846758112311363, 0.52599424123764, 0.431378930807114,
0.351559728384018, 0.605812251567841, 0.699115037918091, 0.135554194450378,
0.123194560408592, 0.270234107971191, 0.430367022752762, 0.536984145641327,
0.355762362480164, 0.231267750263214, 0.379208505153656, 0.164151519536972,
0.0951724126935005, 0.733676970005035, 0.423441767692566, 0.494409680366516,
0.59288889169693, 0.24911966919899, 0.233082711696625, 0.0846758112311363,
0.0846758112311363, 0.0846758112311363, 0.648768961429596, 0.549317717552185,
0.755375862121582, 0.780426323413849, 0.639383137226105, 0.535774648189545,
0.580516874790192, 0.422660619020462, 0.0846758112311363, 0.563032984733582,
0.555732369422913, 0.262037515640259, 0.636113107204437, 0.252823621034622,
0.301573425531387, 0.165787488222122, 0.357965022325516, 0.289487957954407,
0.482456147670746, 0.148062914609909, 0.683458387851715, 0.159228771924973,
0.63042277097702), WACC = c(0.0587803088128567, 0.114285714924335,
0.0474829077720642, 0.089603066444397, 0, -0.0595453642308712,
-0.0409021787345409, 0.00990956369787455, -0.0255164038389921,
0.0834397599101067, 0.00515962578356266, -0.00363277364522219,
0.0127931768074632, 0.175039649009705, 0.0102629894390702, -0.00986078940331936,
-0.00165113247931004, 0.0121057005599141, 0.0084889642894268,
0.175039649009705, -0.0706999972462654, 0.000848896452225745,
0.0481245554983616, 0.0177619475871325, -0.00661477027460933,
-0.0334987565875053, -0.0147324288263917, -0.095448799431324,
0.00445194961503148, 0.0218068547546864, 0.0675585269927979,
0.0175967831164598, 0.0445274449884892, -0.00881352834403515,
0.0546129755675793, -0.0589929111301899, 0.0344933792948723,
0.0675862058997154, 0.0180412363260984, 0.028426943346858, 0.0992020443081856,
-0.016499999910593, -0.0906611084938049, 0.0338345877826214,
-0.103832200169563, 0.0640455335378647, 0.0919997692108154, -0.002492984989658,
0.0179337225854397, -0.103832200169563, -0.035867165774107, 0.0543297752737999,
0.0449094548821449, 0.120576545596123, 0.0157457403838634, -0.0847628638148308,
0.0742096677422523, -0.00257277372293174, 0.0309173855930567,
0.0231686513870955, 0.116203308105469, 0.0748251751065254, 0.11379050463438,
-0.024171270430088, 0.00290694180876017, -0.0760233923792839,
0.166220098733902, 0.0178635232150555, 0.175039649009705, 0.0304596647620201
), Salesgrowth = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), Export = c(0, 0, 0.998237371444702, 0.726109445095062,
0, 0.00252844509668648, 0.895112693309784, 0, 0, 0.0362807661294937,
0, 0.348896056413651, 1, 0.88113659620285, 0.868606626987457,
0, 0, 0, 0.0201816353946924, 0, 1, 0, 0, 0, 0.195195838809013,
1, 0.926722705364227, 0, 0, 0.743137240409851, 1, 0, 0.0486678741872311,
0.944032907485962, 0.501250028610229, 0, 0.999022483825684, 0.996436417102814,
0, 0, 0, 0.988813638687134, 0, 0, 1, 0.967754900455475, 0, 0,
0, 0.00271074729971588, 0, 0, 0.0384709499776363, 0, 0.0599414147436619,
0, 0, 1, 0, 0.000741015828680247, 0, 0.870000004768372, 0, 0,
0.905181586742401, 0, 0.599265575408936, 0, 0.931351482868195,
0), Leverage = c(14.4659090042114, 0.483870953321457, 0.5306316614151,
5.99260854721069, 21.3162212371826, 0.802828848361969, 1.31172561645508,
2.2685534954071, 0.796943247318268, 1.39327561855316, 2.87625002861023,
1.1588100194931, 0.973352015018463, 0.183956876397133, 0.769580006599426,
0.289454013109207, 5.48250532150269, 0.52344822883606, 0.197103783488274,
0.183956876397133, 0.570105195045471, 3.6015625, 3.71000003814697,
4.99886798858643, 5.34887409210205, 1.4610687494278, 0.733816742897034,
1.18206894397736, 21.3162212371826, 1.80238091945648, 1.5820380449295,
3.65263152122498, 0.705373585224152, 0.914846241474152, 0.916945815086365,
1.07761931419373, 0.183956876397133, 0.361502349376678, 2.07936501502991,
2.1812469959259, 0.760839521884918, 3.46650123596191, 1.53558850288391,
0.330000013113022, 3.10121011734009, 0.874662756919861, 1.48345506191254,
1.28475737571716, 1.05200004577637, 7.94806528091431, 0.203309133648872,
1.210857629776, 1.16275024414062, 2.03012037277222, 0.476988017559052,
0.45521292090416, 1.75912058353424, 0.23635022342205, 0.342176884412766,
0.406383603811264, 0.641940057277679, 1, 1.0321592092514, 0.459677428007126,
3.95616054534912, 4.42857122421265, 1.85715568065643, 3.64950156211853,
0.515927731990814, 0.516942024230957), Current = c(0.642147123813629,
0.260168313980103, 0.600883364677429, 0.956995725631714, 0.260168313980103,
1.23653173446655, 1.40639424324036, 1.07675218582153, 0.260168313980103,
0.801113069057465, 2.81943321228027, 0.843509554862976, 1.13398694992065,
0.260168313980103, 0.545235216617584, 2.81943321228027, 1.19198870658875,
0.474293291568756, 0.260168313980103, 0.260168313980103, 0.401427298784256,
1.21140944957733, 0.326404929161072, 1.71450614929199, 0.987657248973846,
1.09765684604645, 1.29162395000458, 1.80042016506195, 0.98863410949707,
0.73352712392807, 0.839596688747406, 1.08649599552155, 0.746986508369446,
0.519444465637207, 0.607772707939148, 0.260168313980103, 0.260168313980103,
0.293445110321045, 2.21290326118469, 0.673032999038696, 0.964383006095886,
2.21746039390564, 0.470989525318146, 0.323529422283173, 1.15627562999725,
0.506606042385101, 0.626914441585541, 1.53568696975708, 1.02733683586121,
2.81943321228027, 1.05705952644348, 1.7860734462738, 1.1391396522522,
2.3782639503479, 0.665584921836853, 0.340285390615463, 1.23362839221954,
0.490639895200729, 0.345467031002045, 0.696641504764557, 0.268895357847214,
0.741701245307922, 0.433604329824448, 0.887159526348114, 1.37459933757782,
0.593891382217407, 0.616396367549896, 2.4796838760376, 0.42551463842392,
0.725274741649628), Cover = c(0.649999976158142, 0.112769484519958,
0, 0.5163214802742, NA, -0.111111111938953, 0.976295828819275,
1, -0.461538463830948, 0.249363869428635, 0, -1.61016952991486,
0, 0.0135302441194654, 0.161290317773819, -0.396551728248596,
-1.61016952991486, 1.04938268661499, 0.804794549942017, -0.00240922393277287,
-0.0130399344488978, 0, 0.267605632543564, 0.831447958946228,
1.6728972196579, 0, -0.00519031146541238, 0, -0.0773333311080933,
0.862068951129913, -0.0281690135598183, 0.32051283121109, 0.332811266183853,
0, 0.304878056049347, 0, 0, 0, -0.0192307699471712, 0.822123885154724,
0.376334100961685, -0.10884353518486, -0.86366331577301, 0, -0.0272628143429756,
0.403465360403061, -0.00689655169844627, -0.935179531574249,
1.6728972196579, 0, -0.215469613671303, 0.818540453910828, 0.538461565971375,
0, 0.861155688762665, 0.0506329126656055, 0.00725478818640113,
0.204545453190804, -0.0206896550953388, 0.640307724475861, 0.0638686120510101,
0.584474861621857, -0.0592105276882648, 0, 0, -0.1875, 0.520370066165924,
0, 0.020833333954215, 0.528301894664764), Bank = c(1, 1, 0, 1,
0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1,
0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1,
0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1,
0, 1, 1), Inctaxrate = c(0.321428567171097, 0.330000013113022,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.200000002980232,
0, 0, 0, 0, 0, 0.333333343267441, 0, 0, 0, 0, 0, 0, 0, 0.266666680574417,
0.121365360915661, 0, 0.122222222387791, 0, 0, 0, 0.333333343267441,
0.333333343267441, 0.0750062316656113, 0, 0, 0.111111111938953,
0, 0.120408162474632, 0, 0, 0, 0, 0, 0.279069781303406, 0.244000002741814,
0, 0, 0, 0.33002045750618, 0, 0, 0.0624906569719315, 0.330000013113022,
0, 0.324999988079071, 0, 0.149377599358559, 0, 0.0553662702441216,
0, 0, 0.113207548856735), ROA = c(0.0205731075257063, 0.0931676998734474,
0.0474829077720642, 0.0433390773832798, 0, -0.0661615133285522,
-0.0800430849194527, 0, -0.0328068025410175, 0.0625798180699348,
0.00515962578356266, -0.0123593285679817, 0.0127931768074632,
0.15384615957737, 0.0070558050647378, -0.0232018567621708, -0.00898461323231459,
0.000345877138897777, 0.00017685342754703, 0.15384615957737,
-0.0723000019788742, 0.000848896452225745, 0.0346779897809029,
0.000424588390160352, -0.050157219171524, -0.0334987603127956,
-0.0147996991872787, -0.095448799431324, 0, 0.000566411763429642,
0.0688963234424591, 0.011312217451632, 0.0289648100733757, -0.00881352927535772,
0.0327677838504314, -0.0589929111301899, 0.0344933792948723,
0.0675862058997154, 0.0171821303665638, 0.000628385052550584,
0.061758890748024, -0.0173888895660639, -0.126888483762741, 0.0338345877826214,
-0.126888483762741, 0.0320554748177528, 0.0926342532038689, -0.00428083632141352,
0.00701754400506616, -0.107918381690979, -0.0434557013213634,
0.00510083045810461, 0.0201207250356674, 0.120576538145542, 0.00154811050742865,
-0.0807265415787697, 0.0736712887883186, -0.00233406270854175,
0.0324379131197929, 0.0111919632181525, 0.108601219952106, 0.0300699304789305,
0.120572723448277, -0.024171270430088, 0.00290694157592952, -0.0900584831833839,
0.074015699326992, 0.0178635232150555, 0.15384615957737, 0.00978401303291321
), ROS = c(0.00610553845763206, 0.0740740746259689, 0.0448514744639397,
0.0400644056499004, 0, -0.384265869855881, -0.132788196206093,
0, -0.482142865657806, 0.0572876073420048, 0.00905489549040794,
-0.0374670810997486, 0.00952717009931803, 0.14293497800827, 0.00246222713030875,
-0.0204290095716715, -0.0368077680468559, 0.000764993892516941,
0.000469307298772037, 0.14293497800827, -0.0350341610610485,
0.000662251666653901, 0.00380316679365933, 0.00167395151220262,
-0.057441484183073, -0.0715096518397331, -0.0480139665305614,
-0.681996643543243, 0, 0.00229357788339257, 0.00945648178458214,
0.032467532902956, 0.0493542142212391, -0.0270233191549778, 0.0651890486478806,
-0.193392232060432, 0.0160091482102871, 0.00762764643877745,
0.0434782616794109, 0.000468509475467727, 0.14293497800827, -0.0191402193158865,
-0.527710855007172, 0.0120000001043081, -0.0867163762450218,
0.013956137932837, 0.14293497800827, -0.0224030781537294, 0.00994475092738867,
-0.608597159385681, -0.277432709932327, 0.00495049497112632,
0.0174070466309786, 0.0611668601632118, 0.00249158823862672,
-0.0597014911472797, 0.0428401492536068, -0.00284711318090558,
0.0574506297707558, 0.014846958220005, 0.0711237564682961, 0.0173334684222937,
0.14293497800827, -0.25862067937851, 0.00270326854661107, -0.426592797040939,
0.0321705527603626, 0.112359553575516, 0.0831858441233635, 0.0143943512812257
), num_ID = c(110811, 5231, 56906, 57829, 109353, 53870, 53447,
3268, 31728, 44842, 109431, 53437, 35768, 53429, 55421, 12707,
28753, 12371, 2213, 40407, 52997, 20414, 11831, 32734, 13940,
53414, 55153, 30755, 8398, 47905, 51851, 30780, 23354, 53407,
48003, 107459, 53394, 53390, 7955, 29718, 51021, 53453, 54348,
597, 46440, 53454, 62729, 32489, 43867, 57861, 11030, 14104,
53370, 50914, 39352, 5956, 53364, 55151, 31386, 9852, 5230, 106943,
18825, 5561, 36645, 27786, 24240, 2994, 61769, 29396)), .Names = c("year",
"firmid", "provinceid", "industrycode2", "sales", "cogs", "inventory",
"fixedasset", "totalasset", "stateshare", "foreignshare", "privateshare",
"stateown", "foreignown", "privateown", "mixown", "stateonly",
"mixonly", "foreignonly", "privateonly", "gs", "gm", "gf", "privatize",
"IR", "GM", "CI", "WACC", "Salesgrowth", "Export", "Leverage",
"Current", "Cover", "Bank", "Inctaxrate", "ROA", "ROS", "num_ID"
), datalabel = "", time.stamp = "25 Sep 2016 11:04", formats = c("%9.0g",
"%9s", "%9.0g", "%9.0g", "%12.0g", "%12.0g", "%12.0g", "%12.0g",
"%12.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g",
"%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g",
"%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g",
"%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g",
"%9.0g", "%9.0g"), types = c(65527L, 9L, 65527L, 65527L, 65526L,
65526L, 65526L, 65526L, 65526L, 65527L, 65527L, 65527L, 65527L,
65527L, 65527L, 65527L, 65527L, 65527L, 65527L, 65527L, 65527L,
65527L, 65527L, 65527L, 65527L, 65527L, 65527L, 65527L, 65527L,
65527L, 65527L, 65527L, 65527L, 65527L, 65527L, 65527L, 65527L,
65527L), val.labels = structure(c("", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", ""), .Names = c("",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "")), var.labels = c("", "<b7><a8><c8><U+02F4><fa><c2><eb>",
"", "", "<b2><fa><U+01B7><cf><fa><ca><db><ca><d5><c8><eb>", "<b2><fa><U+01B7><cf><fa><ca><U+06F3><U+0271><be>",
"<b4><e6><bb><f5>", "<b9><U+0336><a8><d7><U+02B2><fa><ba><U+03FC><c6>",
"<d7><U+02B2><fa><d7><U+073C><c6>", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "group(firmid)"), version = 118L, label.table = list(), expansion.fields = list(), byteorder = "LSF", row.names = c(NA,
70L), class = "data.frame")
data added, now I know I can change a three modes array into a three modes tensor, so we could also consider how to change the current data frame to a three modes array with dimensions "firmid","year",and "all other co-variates except these two"
Or image a three dimensions reference system X-Y-Z, I want X to be firms, Y to be co-variates, Z to be years
any suggestions?
Related
I would like to replace the 0 values within cols 10:37 by NA when year = 0.
I have tried to following codes:
ACLED_PRIO_flanked[which(ACLED_PRIO_flanked$year == "0")] <- NA
ACLED_PRIO_flanked[c(10:37),][ACLED_PRIO_flanked$year == 0] <- NA
is.na(ACLED_PRIO_flanked[,10:37]) <- ACLED_PRIO_flanked$year== 0
but I get the following message:
Error in vectbl_as_col_location():
! Must assign to columns with a valid subscript vector.
ℹ Logical subscripts must match the size of the indexed input.
✖ Input has size 28 but subscript value has size 17494.
or
Error in numtbl_as_col_location_assign():
! Can't assign to columns beyond the end with non-consecutive locations.
ℹ Input has size 37.
✖ Subscript which(ACLED_PRIO_flanked$year == "0") contains non-consecutive locations 52, 96, 103, 137, 157, etc.
the dataset:
structure(list(gid = c("100467", "100468", "100469", "100470",
"100471", "100472", "100473", "100474", "100475", "100476", "100477",
"100478", "100479", "100480", "100481", "100482", "100483", "100484",
"100485", "100486", "100487", "100488", "100489", "100490", "100491",
"100492", "100493", "100494", "100495", "100496", "100496", "100497",
"100497", "100497", "100497", "100497", "100497", "100497", "100498",
"100498"), xcoord = c(13.25, 13.75, 14.25, 14.75, 15.25, 15.75,
16.25, 16.75, 17.25, 17.75, 18.25, 18.75, 19.25, 19.75, 20.25,
20.75, 21.25, 21.75, 22.25, 22.75, 23.25, 23.75, 24.25, 24.75,
25.25, 25.75, 26.25, 26.75, 27.25, 27.75, 27.75, 28.25, 28.25,
28.25, 28.25, 28.25, 28.25, 28.25, 28.75, 28.75), ycoord = c(-20.25,
-20.25, -20.25, -20.25, -20.25, -20.25, -20.25, -20.25, -20.25,
-20.25, -20.25, -20.25, -20.25, -20.25, -20.25, -20.25, -20.25,
-20.25, -20.25, -20.25, -20.25, -20.25, -20.25, -20.25, -20.25,
-20.25, -20.25, -20.25, -20.25, -20.25, -20.25, -20.25, -20.25,
-20.25, -20.25, -20.25, -20.25, -20.25, -20.25, -20.25), col = c("387",
"388", "389", "390", "391", "392", "393", "394", "395", "396",
"397", "398", "399", "400", "401", "402", "403", "404", "405",
"406", "407", "408", "409", "410", "411", "412", "413", "414",
"415", "416", "416", "417", "417", "417", "417", "417", "417",
"417", "418", "418"), row = c("140", "140", "140", "140", "140",
"140", "140", "140", "140", "140", "140", "140", "140", "140",
"140", "140", "140", "140", "140", "140", "140", "140", "140",
"140", "140", "140", "140", "140", "140", "140", "140", "140",
"140", "140", "140", "140", "140", "140", "140", "140"), gwno = c(565,
565, 565, 565, 565, 565, 565, 565, 565, 565, 565, 565, 565, 565,
565, 565, 571, 571, 571, 571, 571, 571, 571, 571, 571, 571, 571,
571, 571, 552, 552, 552, 552, 552, 552, 552, 552, 552, 552, 552
), country = c("Namibia", "Namibia", "Namibia", "Namibia", "Namibia",
"Namibia", "Namibia", "Namibia", "Namibia", "Namibia", "Namibia",
"Namibia", "Namibia", "Namibia", "Namibia", "Namibia", "Botswana",
"Botswana", "Botswana", "Botswana", "Botswana", "Botswana", "Botswana",
"Botswana", "Botswana", "Botswana", "Botswana", "Botswana", "Botswana",
"Zimbabwe (Rhodesia)", "Zimbabwe (Rhodesia)", "Zimbabwe (Rhodesia)",
"Zimbabwe (Rhodesia)", "Zimbabwe (Rhodesia)", "Zimbabwe (Rhodesia)",
"Zimbabwe (Rhodesia)", "Zimbabwe (Rhodesia)", "Zimbabwe (Rhodesia)",
"Zimbabwe (Rhodesia)", "Zimbabwe (Rhodesia)"), km2 = c(3282.64642089,
3282.64642089, 3282.64642089, 3282.64642089, 3282.64642089, 3282.64642089,
3282.64642089, 3282.64642089, 3282.64642089, 3282.64642089, 3282.64642089,
3282.64642089, 3282.64642089, 3282.64642089, 3282.64642089, 3282.64642089,
3282.64642089, 3282.64642089, 3282.64642089, 3282.64642089, 3282.64642089,
3282.64642089, 3282.64642089, 3282.64642089, 3282.64642089, 3282.64642089,
3282.64642089, 3282.64642089, 3282.64642089, 3282.64642089, 3282.64642089,
3282.64642089, 3282.64642089, 3282.64642089, 3282.64642089, 3282.64642089,
3282.64642089, 3282.64642089, 3282.64642089, 3282.64642089),
year = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2000, 2002, 2001,
2007, 2002, 2004, 2008, 2003, 2000, 2002, 2010), `Violence against civilians` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 1, 1, 1, 1, 1, 2, 1, 1, 26,
6), Protests = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
0, 0, 0, 0, 0, 0, 6), `Strategic developments` = c(0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0),
Riots = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0), Battles = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1), `Explosions/Remote violence` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0), TotalConflicts = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 1,
1, 3, 1, 1, 2, 1, 1, 29, 13), IncidenceConflicts = c(0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
LagIncidenceConflicrs = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 1, 1, 1, 1, 1, 1, 0, 1), OnsetConflicts = c(0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0), IncidenceBattles = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1), LagIncidenceBattles = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), OnsetBattles = c(0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1),
IncidenceERV = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), LagIncidenceERV = c(0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), OnsetERV = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0), IncidenceRiots = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), LagIncidenceRiots = c(0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), OnsetRiots = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0), IncidenceSD = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0), LagIncidenceSD = c(0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1), OnsetSD = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0), IncidenceVAC = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1), LagIncidenceVAC = c(0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1), OnsetVAC = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1,
0), IncidenceProtests = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 1), LagIncidenceProtests = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0), OnsetProtests = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 1)), row.names = c(NA, -40L), class = c("tbl_df",
"tbl", "data.frame"))
We can use across to loop over the column and replace the values where the value is 0 and the 'year' column value is 0
library(dplyr)
df1 <- df1 %>%
mutate(across(10:37, ~replace(.x, .x == 0 & year == 0, NA)))
Try this
df[10:37] <- lapply(df[10:37] , \(x) x <- ifelse(x == 0 & df$year == 0 , NA , x))
OK, found this working as well:
DF[c(10:37)][which(DF$year >= 0), ] <- NA
So here's a sample of the data I am working with:
> dput(candidateEvokeDFYoung)
structure(list(youngTreatment = structure(c(NA, 1, 0, 1, 0, 1,
0, 1, 1, 0, 1, 1, 0, 0, NA, NA, NA, NA, 1, 1), format.stata = "%10.0g"),
candTrustworthy = structure(c(0, 0, 0, 0, 0, 0, 0, 1, 0,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), format.stata = "%10.0g"),
candKnowledgeable = structure(c(1, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1), format.stata = "%10.0g"),
candQualified = structure(c(0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1), format.stata = "%10.0g"),
candConservative = structure(c(0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0), format.stata = "%10.0g"),
candLiberal = structure(c(0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), format.stata = "%10.0g"), candInexperienced = structure(c(0,
1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0), format.stata = "%10.0g"),
candPrincipled = structure(c(1, 1, 0, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 1, 1, 1, 0, 0, 0, 0), format.stata = "%10.0g"),
candDistance = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0), format.stata = "%10.0g"),
candEfficacy = structure(c(1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 1, 0, 0), format.stata = "%10.0g")), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"))
What I am trying to do is generate a table using the tables package with results from a t.test. The trouble I am having is I've taken this dataset and have used lapply to calculate my t.tests on each of the variables with youngTreatment as my 'y' variable:
candidateEvokesDiffYoung = lapply(candidateEvokeDFYoung[-1], function(x) t.test(x ~ candidateEvokeDFYoung$youngTreatment))
This gives me a list of lists. I have no clue how to use tables::tabular to access
list[['statistic']]
and
list[['p.value]]
I could definitely just manually pull all of these out myself and put it in a dataframe for stargazer or something, but I was wondering if there was someone who knew how I could do this more efficiently and with the tables package.
t.test returns objects of the class htest. I believe the best way to gather the results of an object of the class htest is to use the function tidy of the package broom.
library(broom)
candidateEvokesDiffYoung = lapply(candidateEvokeDFYoung[-1],
function(x) {
t.test(x ~ candidateEvokeDFYoung$youngTreatment)
})
m <- t(sapply(candidateEvokesDiffYoung, tidy))
This will allow you to refer to the elements in a similar way to what you seem to be trying to.
> m["candTrustworthy", "p.value"][[1]]
[1] 0.7875872
> unlist(m[, "p.value"])
candTrustworthy candKnowledgeable candQualified candConservative candLiberal candInexperienced candPrincipled candDistance candEfficacy
0.7875872 0.7875872 0.7875872 0.3632175 0.3465935 0.6933006 0.3790778 NaN 0.3632175
I am trying to run an NMDS on some data, using the metaMDS function in the R vegan package. I've managed to run it with a similar dataframe, but for some reason I'm getting the following error with this one:
>Error in cmdscale(dist, k = k) : NA values not allowed in 'd'
In addition: Warning messages:
1: In distfun(comm, method = distance, ...) :
you have empty rows: their dissimilarities may be meaningless in method “bray”
2: In distfun(comm, method = distance, ...) : missing values in results
As it's a large dataframe, I've put it into a Google sheet here
For context, the rows are samples and the columns are genes, with the value indicating the level of the gene in the sample.
With the NMDS, I want to see how similar the samples are, and from that I understand I've got the data set up correctly.
So I tried running the following;
library(vegan)
NMDS <- metaMDS(NMDS, distance="bray")
where NMDS is the dataframe. This is where I get the above error, and I'm not sure what I've done wrong?
This also happens after I run the following code:
NMDS[is.na(NMDS)] = 0
Any ideas where I'm going wrong?
dput:
structure(list(X1 = c(0, 0, 0, 0, 0, 0), X2 = c(0, 0, 0, 0, 0,
0), X3 = c(0, 0, 0, 0, 0, 0), X4 = c(0, 0, 0, 0, 0, 0), X5 = c(0,
0, 0, 0, 0, 0), X6 = c(0, 28, 161, 688, 0, 0), X7 = c(0, 3, 14,
0, 0, 0), X8 = c(0, 0, 0, 0, 0, 0), X9 = c(3, 0, 2, 2, 0, 0),
X10 = c(12, 78, 602, 303, 900, 0), X11 = c(0, 52, 856, 28,
191, 0), X12 = c(0, 51, 12, 1, 0, 0), X13 = c(0, 0, 0, 0,
0, 0), X14 = c(0, 0, 2, 0, 0, 0), X15 = c(5, 17, 46, 39,
9, 0), X16 = c(5255, 1531, 6790, 3302, 5084, 0), X17 = c(0,
0, 0, 0, 0, 0), X18 = c(0, 0, 15, 0, 0, 0), X19 = c(0, 0,
0, 0, 0, 0), X20 = c(0, 0, 0, 0, 0, 0), X21 = c(0, 0, 0,
0, 0, 0), X22 = c(0, 0, 0, 0, 0, 0), X23 = c(0, 0, 0, 0,
0, 0), X24 = c(0, 0, 44, 0, 0, 0), X25 = c(0, 0, 0, 0, 0,
0), X26 = c(0, 6, 24, 185, 0, 0), X27 = c(0, 0, 0, 0, 0,
0), X28 = c(0, 0, 13, 0, 0, 0), X29 = c(0, 0, 0, 0, 0, 0),
X30 = c(0, 0, 0, 7, 0, 0), X31 = c(0, 0, 0, 0, 0, 0), X32 = c(0,
0, 0, 0, 0, 0), X33 = c(0, 0, 1, 2, 0, 0), X34 = c(0, 0,
0, 0, 0, 0), X35 = c(0, 0, 0, 0, 0, 0), X36 = c(0, 2, 0,
0, 0, 0), X37 = c(0, 0, 0, 0, 0, 0), X38 = c(0, 0, 0, 0,
0, 0), X39 = c(0, 0, 0, 0, 0, 0), X40 = c(0, 0, 0, 0, 0,
0), X41 = c(0, 0, 0, 0, 0, 0), X42 = c(0, 0, 0, 0, 0, 0),
X43 = c(0, 0, 0, 0, 0, 0), X44 = c(0, 0, 0, 0, 0, 0), X45 = c(0,
0, 0, 1, 0, 0), X46 = c(0, 0, 0, 63, 0, 0), X47 = c(0, 0,
0, 0, 0, 0), X48 = c(0, 0, 0, 0, 0, 0), X49 = c(0, 0, 0,
0, 0, 0), X50 = c(0, 0, 0, 0, 0, 0), X51 = c(0, 0, 0, 0,
0, 0), X52 = c(0, 0, 0, 0, 0, 0), X53 = c(0, 0, 0, 1, 0,
0), X54 = c(0, 0, 0, 0, 0, 0), X55 = c(0, 0, 0, 1, 0, 0),
X56 = c(0, 0, 0, 0, 0, 0), X57 = c(0, 0, 3, 0, 0, 0), X58 = c(0,
0, 0, 0, 0, 0), X59 = c(0, 0, 0, 0, 0, 0), X60 = c(0, 0,
0, 0, 0, 0), X61 = c(0, 0, 44, 0, 0, 0), X62 = c(0, 0, 15,
0, 0, 0), X63 = c(0, 0, 347, 0, 0, 0), X64 = c(0, 0, 0, 0,
0, 0), X65 = c(0, 0, 0, 5, 0, 0), X66 = c(0, 0, 0, 0, 0,
0), X67 = c(1, 8, 2, 11, 6, 0), X68 = c(0, 26, 0, 0, 0, 0
), X69 = c(0, 0, 0, 8, 0, 0), X70 = c(0, 0, 0, 13, 0, 0),
X71 = c(0, 0, 0, 0, 0, 0), X72 = c(0, 2, 0, 0, 0, 0), X73 = c(0,
0, 0, 0, 0, 0), X74 = c(341, 74, 0, 0, 0, 0), X75 = c(4,
6, 10, 17, 13, 0), X76 = c(0, 0, 0, 0, 0, 0), X77 = c(0,
0, 0, 0, 0, 0), X78 = c(0, 0, 0, 6, 0, 0), X79 = c(0, 0,
0, 0, 0, 0), X80 = c(0, 0, 0, 0, 0, 0), X81 = c(403, 86,
0, 0, 0, 0), X82 = c(20, 95, 54, 0, 0, 0), X83 = c(0, 2,
0, 1, 0, 0), X84 = c(0, 0, 3, 1, 0, 0), X85 = c(0, 0, 0,
0, 0, 0), X86 = c(40, 132, 39, 0, 1, 0), X87 = c(0, 0, 0,
0, 0, 0), X88 = c(0, 0, 0, 0, 0, 0), X89 = c(0, 0, 0, 0,
0, 0), X90 = c(0, 0, 0, 0, 0, 0), X91 = c(0, 0, 0, 0, 0,
0), X92 = c(0, 7, 0, 0, 0, 0), X93 = c(0, 0, 0, 0, 0, 0),
X94 = c(0, 0, 0, 0, 0, 0), X95 = c(0, 0, 0, 0, 0, 0), X96 = c(0,
0, 0, 0, 0, 0), X97 = c(0, 0, 0, 0, 0, 0), X98 = c(0, 0,
0, 0, 0, 0), X99 = c(0, 0, 0, 0, 0, 0), X100 = c(0, 0, 0,
0, 0, 0), X101 = c(0, 0, 0, 0, 0, 0), X102 = c(0, 8, 0, 1,
0, 0), X103 = c(0, 0, 0, 0, 0, 0), X104 = c(0, 0, 0, 0, 0,
0), X105 = c(0, 0, 0, 0, 0, 0), X106 = c(0, 0, 0, 0, 0, 0
), X107 = c(0, 0, 0, 0, 0, 0), X108 = c(0, 0, 0, 0, 0, 0),
X109 = c(0, 0, 0, 0, 0, 0), X110 = c(0, 0, 0, 0, 0, 0), X111 = c(0,
0, 0, 0, 0, 0), X112 = c(15, 47, 0, 1, 0, 0), X113 = c(0,
0, 0, 0, 0, 0), X114 = c(0, 0, 0, 0, 0, 0), X115 = c(0, 0,
0, 2, 0, 0), X116 = c(43, 0, 0, 1, 1, 0), X117 = c(0, 0,
0, 0, 0, 0), X118 = c(0, 0, 0, 0, 0, 0), X119 = c(0, 0, 0,
0, 0, 0), X120 = c(387, 0, 0, 0, 0, 0), X121 = c(0, 0, 0,
0, 0, 0), X122 = c(342, 1, 0, 72, 0, 0), X123 = c(0, 0, 0,
0, 0, 0), X124 = c(0, 0, 0, 76, 0, 0), X125 = c(0, 0, 0,
0, 0, 0), X126 = c(0, 0, 0, 0, 0, 0), X127 = c(0, 2, 0, 0,
0, 0), X128 = c(0, 0, 0, 0, 0, 0), X129 = c(0, 0, 0, 0, 0,
0), X130 = c(0, 0, 0, 0, 0, 0), X131 = c(0, 0, 0, 0, 0, 0
), X132 = c(0, 0, 0, 0, 0, 0), X133 = c(0, 0, 0, 0, 0, 0),
X134 = c(0, 0, 0, 11, 0, 0), X135 = c(13, 108, 0, 129, 192,
0), X136 = c(0, 0, 0, 0, 0, 0), X137 = c(18, 129, 0, 23,
0, 0), X138 = c(0, 0, 0, 32, 7, 0), X139 = c(1, 0, 0, 10,
0, 0), X140 = c(0, 0, 0, 3, 0, 0), X141 = c(0, 0, 0, 0, 0,
0), X142 = c(0, 0, 0, 14, 0, 0), X143 = c(0, 0, 0, 0, 0,
0), X144 = c(16, 74, 71, 0, 0, 0), X145 = c(0, 0, 0, 0, 392,
0), X146 = c(0, 24, 224, 1, 0, 0), X147 = c(0, 19, 224, 1,
0, 0), X148 = c(0, 13, 253, 0, 0, 0), X149 = c(49, 17, 17,
0, 0, 0), X150 = c(133, 70, 74, 0, 0, 0), X151 = c(0, 0,
0, 0, 0, 0), X152 = c(0, 0, 0, 0, 0, 0), X153 = c(0, 0, 0,
0, 0, 0), X154 = c(0, 0, 0, 0, 0, 0), X155 = c(0, 0, 0, 0,
0, 0), X156 = c(0, 1, 0, 0, 0, 0), X157 = c(0, 0, 0, 0, 0,
0), X158 = c(0, 0, 0, 22, 0, 0), X159 = c(0, 0, 0, 0, 0,
0), X160 = c(0, 0, 0, 10, 0, 0), X161 = c(0, 0, 0, 106, 0,
0), X162 = c(148, 27, 85, 0, 0, 0), X163 = c(0, 0, 0, 0,
0, 0), X164 = c(0, 0, 0, 0, 0, 0), X165 = c(0, 10, 0, 0,
0, 0), X166 = c(0, 5, 0, 0, 0, 0), X167 = c(0, 0, 0, 0, 0,
0), X168 = c(1, 0, 0, 0, 0, 0), X169 = c(0, 7, 0, 0, 0, 0
), X170 = c(0, 0, 0, 2, 0, 0), X171 = c(0, 0, 0, 0, 0, 0),
X172 = c(0, 0, 0, 0, 0, 0), X173 = c(0, 0, 0, 0, 0, 0), X174 = c(0,
0, 0, 0, 0, 0), X175 = c(0, 0, 0, 2, 0, 0), X176 = c(0, 0,
0, 0, 0, 0), X177 = c(0, 0, 0, 212, 0, 0), X178 = c(0, 1,
0, 0, 0, 0), X179 = c(0, 0, 0, 0, 0, 0), X180 = c(0, 0, 0,
0, 0, 0), X181 = c(0, 0, 0, 0, 0, 0), X182 = c(0, 0, 0, 0,
0, 0), X183 = c(0, 0, 0, 0, 0, 0), X184 = c(0, 0, 0, 0, 0,
0), X185 = c(0, 9, 0, 0, 0, 0), X186 = c(0, 0, 0, 0, 0, 0
), X187 = c(0, 0, 0, 0, 0, 0), X188 = c(0, 0, 0, 0, 0, 0),
X189 = c(0, 0, 0, 0, 0, 0), X190 = c(475, 108, 329, 14, 57,
0), X191 = c(0, 0, 8, 0, 0, 0), X192 = c(0, 0, 0, 0, 0, 0
), X193 = c(0, 0, 0, 0, 0, 0), X194 = c(0, 0, 0, 0, 0, 0),
X195 = c(0, 0, 0, 0, 0, 0), X196 = c(0, 0, 0, 0, 0, 0), X197 = c(0,
0, 0, 0, 0, 0), X198 = c(0, 0, 2, 0, 0, 0), X199 = c(0, 0,
0, 0, 0, 0), X200 = c(0, 0, 0, 0, 0, 0), X201 = c(0, 27,
647, 1, 0, 0), X202 = c(0, 0, 0, 0, 0, 0), X203 = c(0, 0,
0, 0, 0, 0), X204 = c(0, 0, 0, 0, 0, 0), X205 = c(251, 41,
58, 0, 1, 0), X206 = c(0, 0, 0, 0, 0, 0), X207 = c(0, 0,
0, 0, 0, 0), X208 = c(0, 0, 0, 0, 0, 0), X209 = c(0, 0, 0,
0, 0, 0), X210 = c(0, 0, 0, 0, 0, 0), X211 = c(0, 0, 0, 0,
0, 0), X212 = c(0, 0, 0, 0, 0, 0), X213 = c(0, 0, 0, 0, 0,
0), X214 = c(0, 0, 0, 0, 0, 0), X215 = c(0, 0, 0, 0, 0, 0
), X216 = c(0, 0, 0, 0, 0, 0), X217 = c(0, 0, 0, 0, 0, 0),
X218 = c(0, 0, 0, 0, 0, 0), X219 = c(0, 0, 0, 0, 0, 0), X220 = c(0,
0, 0, 0, 0, 0), X221 = c(0, 0, 0, 0, 0, 0), X222 = c(0, 0,
0, 0, 0, 0), X223 = c(0, 0, 0, 0, 0, 0), X224 = c(2, 0, 0,
0, 0, 0), X225 = c(0, 0, 0, 0, 0, 0), X226 = c(0, 0, 0, 0,
0, 0), X227 = c(0, 0, 0, 0, 0, 0), X228 = c(0, 0, 0, 0, 0,
0), X229 = c(0, 0, 0, 0, 0, 0), X230 = c(0, 0, 0, 0, 0, 0
), X231 = c(1, 0, 0, 0, 0, 0), X232 = c(0, 0, 0, 0, 0, 0),
X233 = c(0, 0, 0, 0, 0, 0), X234 = c(0, 0, 0, 0, 0, 0), X235 = c(0,
0, 0, 0, 0, 0), X236 = c(0, 0, 0, 0, 0, 0), X237 = c(0, 0,
0, 0, 0, 0), X238 = c(0, 0, 0, 0, 0, 0), X239 = c(0, 0, 0,
0, 0, 0), X240 = c(1, 0, 0, 0, 0, 0), X241 = c(445, 90, 0,
0, 1, 0), X242 = c(1, 70, 0, 0, 0, 0), X243 = c(23, 154,
11, 0, 0, 0), X244 = c(0, 0, 1, 0, 0, 0), X245 = c(174, 250,
192, 6, 0, 0), X246 = c(0, 2, 0, 1, 0, 0), X247 = c(0, 0,
0, 0, 0, 0), X248 = c(0, 0, 0, 0, 0, 0), X249 = c(29, 73,
20, 0, 0, 0), X250 = c(0, 99, 0, 0, 0, 0), X251 = c(20, 66,
4, 0, 0, 0), X252 = c(265, 48, 191, 0, 1, 0), X253 = c(112,
59, 0, 0, 0, 0), X254 = c(0, 3, 3, 0, 0, 0), X255 = c(0,
1, 0, 0, 0, 0), X256 = c(0, 0, 0, 0, 0, 0), X257 = c(0, 2,
0, 0, 0, 0), X258 = c(0, 0, 0, 0, 0, 0), X259 = c(86, 44,
69, 0, 0, 0), X260 = c(0, 0, 0, 0, 0, 0), X261 = c(13, 27,
0, 0, 1, 0), X262 = c(0, 5, 0, 0, 0, 0), X263 = c(0, 0, 0,
0, 0, 0), X264 = c(0, 0, 0, 0, 0, 0), X265 = c(0, 0, 0, 0,
0, 0), X266 = c(0, 0, 0, 0, 0, 0), X267 = c(0, 1, 0, 0, 0,
0), X268 = c(0, 0, 0, 0, 0, 0), X269 = c(0, 0, 0, 0, 0, 0
), X270 = c(0, 0, 0, 0, 0, 0), X271 = c(0, 0, 0, 4, 0, 0),
X272 = c(0, 0, 0, 0, 0, 0), X273 = c(0, 0, 0, 0, 0, 0), X274 = c(0,
0, 0, 0, 0, 0), X275 = c(291, 200, 115, 0, 0, 0), X276 = c(0,
5, 0, 0, 0, 0), X277 = c(0, 0, 0, 0, 0, 0), X278 = c(0, 5,
0, 5, 0, 0), X279 = c(0, 3, 2, 6, 0, 0), X280 = c(0, 0, 28,
0, 0, 0), X281 = c(0, 1, 0, 0, 0, 0), X282 = c(0, 8, 1, 5,
0, 0), X283 = c(0, 3, 0, 1, 0, 0), X284 = c(0, 0, 17, 0,
0, 0), X285 = c(0, 3, 0, 0, 0, 0), X286 = c(0, 0, 0, 0, 0,
0), X287 = c(0, 1, 1, 4, 0, 0), X288 = c(0, 0, 0, 0, 0, 0
), X289 = c(0, 2, 0, 0, 0, 0), X290 = c(0, 0, 0, 0, 0, 0),
X291 = c(0, 0, 0, 0, 0, 0), X292 = c(0, 0, 0, 4, 0, 0), X293 = c(0,
0, 0, 0, 0, 0), X294 = c(38, 10, 72, 0, 0, 0), X295 = c(0,
58, 0, 0, 0, 0), X296 = c(0, 20, 0, 0, 0, 0), X297 = c(69,
4, 39, 0, 1, 0), X298 = c(0, 15, 304, 3, 0, 0), X299 = c(0,
0, 0, 0, 0, 0), X300 = c(0, 6, 0, 0, 0, 0), X301 = c(0, 1,
0, 0, 0, 0), X302 = c(51, 28, 13, 0, 0, 0), X303 = c(96,
149, 28, 0, 0, 0), X304 = c(34, 25, 24, 0, 0, 0), X305 = c(0,
3, 1, 0, 0, 0), X306 = c(0, 3, 7, 0, 0, 0), X307 = c(0, 4,
0, 0, 0, 0), X308 = c(0, 0, 0, 0, 0, 0), X309 = c(0, 0, 35,
1, 0, 0), X310 = c(262, 9, 137, 0, 0, 0), X311 = c(3, 15,
0, 2, 9, 0), X312 = c(445, 139, 353, 48, 16, 0), X313 = c(0,
0, 0, 0, 0, 0), X314 = c(0, 0, 0, 0, 0, 0), X315 = c(0, 0,
0, 0, 0, 0), X316 = c(0, 0, 0, 0, 0, 0), X317 = c(0, 0, 0,
0, 0, 0), X318 = c(0, 0, 0, 0, 0, 0), X319 = c(0, 0, 0, 0,
0, 0), X320 = c(62, 138, 36, 0, 0, 0), X321 = c(3, 0, 0,
0, 0, 0), X322 = c(0, 0, 0, 0, 0, 0), X323 = c(0, 13, 0,
0, 0, 0), X324 = c(0, 0, 0, 0, 0, 0), X325 = c(142, 0, 104,
0, 0, 0), X326 = c(0, 2, 0, 0, 0, 0), X327 = c(56, 35, 101,
0, 0, 0), X328 = c(0, 0, 0, 10, 0, 0), X329 = c(0, 0, 0,
0, 0, 0), X330 = c(0, 2, 0, 0, 0, 0), X331 = c(259, 27, 107,
0, 2, 0), X332 = c(0, 0, 0, 0, 0, 0), X333 = c(0, 7, 0, 0,
0, 0), X334 = c(0, 0, 0, 0, 0, 0), X335 = c(98, 39, 95, 0,
0, 0), X336 = c(0, 0, 1, 0, 0, 0), X337 = c(0, 0, 0, 0, 0,
0), X338 = c(141, 28, 85, 0, 0, 0), X339 = c(15, 14, 20,
0, 0, 0), X340 = c(0, 6, 0, 0, 0, 0), X341 = c(0, 0, 0, 0,
0, 0), X342 = c(0, 2, 0, 0, 0, 0), X343 = c(0, 0, 0, 0, 0,
0), X344 = c(0, 0, 0, 0, 0, 0), X345 = c(0, 10, 232, 0, 0,
0), X346 = c(0, 4, 0, 0, 0, 0), X347 = c(0, 0, 0, 0, 0, 0
), X348 = c(0, 0, 0, 0, 0, 0), X349 = c(0, 0, 0, 0, 0, 0),
X350 = c(0, 0, 0, 0, 0, 0), X351 = c(0, 0, 0, 0, 0, 0), X352 = c(0,
0, 0, 0, 0, 0), X353 = c(0, 0, 0, 0, 4, 0), X354 = c(0, 0,
0, 0, 0, 0), X355 = c(0, 0, 0, 0, 1, 0), X356 = c(0, 0, 0,
0, 0, 0), X357 = c(0, 0, 0, 0, 0, 0), X358 = c(0, 0, 0, 0,
0, 0), X359 = c(0, 0, 0, 0, 0, 0), X360 = c(0, 0, 0, 0, 0,
0), X361 = c(0, 0, 0, 0, 0, 0), X362 = c(0, 0, 0, 0, 0, 0
), X363 = c(0, 0, 0, 0, 0, 0), X364 = c(0, 0, 0, 0, 2, 0),
X365 = c(0, 0, 0, 0, 0, 0), X366 = c(0, 0, 0, 0, 0, 0), X367 = c(0,
0, 0, 0, 0, 0), X368 = c(0, 0, 0, 0, 0, 0), X369 = c(0, 0,
0, 17, 0, 0), X370 = c(0, 0, 0, 0, 0, 0), X371 = c(0, 0,
0, 0, 0, 0), X372 = c(0, 0, 0, 0, 0, 0), X373 = c(0, 0, 0,
0, 0, 0), X374 = c(0, 0, 0, 0, 0, 0), X375 = c(0, 0, 0, 0,
0, 0), X376 = c(0, 0, 1, 0, 0, 0), X377 = c(0, 0, 0, 0, 0,
0), X378 = c(0, 0, 0, 0, 0, 0), X379 = c(0, 0, 0, 0, 0, 0
), X380 = c(0, 0, 0, 0, 0, 0), X381 = c(0, 0, 0, 0, 0, 0),
X382 = c(0, 0, 0, 0, 0, 0), X383 = c(0, 51, 0, 0, 0, 0),
X384 = c(0, 0, 0, 0, 0, 0), X385 = c(7, 0, 0, 11, 1, 0),
X386 = c(0, 0, 0, 0, 0, 0), X387 = c(0, 0, 1, 0, 0, 0), X388 = c(0,
0, 0, 0, 0, 0), X389 = c(0, 0, 0, 0, 0, 0), X390 = c(0, 5,
0, 0, 0, 0), X391 = c(0, 0, 0, 0, 0, 0), X392 = c(0, 0, 0,
0, 0, 0), X393 = c(2, 16, 0, 0, 0, 0), X394 = c(0, 6, 88,
0, 0, 0), X395 = c(0, 14, 136, 1, 0, 0), X396 = c(0, 41,
350, 2, 0, 0), X397 = c(0, 0, 0, 0, 0, 0), X398 = c(20, 413,
0, 12, 3, 0), X399 = c(0, 0, 0, 0, 0, 0), X400 = c(0, 3,
0, 0, 0, 0), X401 = c(0, 0, 0, 0, 0, 0), X402 = c(0, 2, 0,
0, 0, 0), X403 = c(0, 2, 0, 0, 0, 0), X404 = c(0, 0, 0, 0,
0, 0), X405 = c(0, 0, 0, 0, 0, 0), X406 = c(0, 0, 0, 0, 0,
0), X407 = c(0, 0, 39, 1, 0, 0), X408 = c(10, 73, 31, 0,
0, 0), X409 = c(0, 11, 0, 0, 0, 0), X410 = c(68, 58, 66,
1, 0, 0), X411 = c(4, 32, 3, 0, 0, 0), X412 = c(8, 66, 39,
0, 0, 0), X413 = c(0, 0, 0, 0, 0, 0), X414 = c(2, 53, 7,
0, 0, 0), X415 = c(120, 90, 109, 0, 0, 0), X416 = c(0, 80,
0, 0, 0, 0), X417 = c(62, 79, 24, 0, 0, 0), X418 = c(58,
156, 30, 0, 0, 0), X419 = c(72, 138, 50, 2, 0, 0), X420 = c(0,
0, 0, 0, 0, 0), X421 = c(0, 0, 0, 0, 0, 0), X422 = c(36,
143, 43, 0, 0, 0), X423 = c(0, 0, 0, 0, 0, 0), X424 = c(0,
0, 0, 0, 0, 0), X425 = c(0, 5, 0, 0, 0, 0), X426 = c(12,
109, 0, 18, 26, 0), X427 = c(0, 0, 0, 0, 0, 0), X428 = c(0,
0, 0, 0, 0, 0), X429 = c(0, 3, 0, 0, 0, 0), X430 = c(0, 0,
362, 0, 0, 0), X431 = c(0, 0, 0, 0, 0, 0), X432 = c(0, 0,
685, 0, 0, 0), X433 = c(0, 0, 0, 0, 0, 0), X434 = c(0, 0,
0, 0, 0, 0), X435 = c(0, 0, 0, 0, 0, 0), X436 = c(0, 0, 0,
0, 0, 0), X437 = c(0, 0, 15, 8, 0, 0), X438 = c(0, 0, 184,
0, 0, 0), X439 = c(0, 0, 0, 0, 0, 0), X440 = c(0, 0, 0, 0,
0, 0), X441 = c(0, 0, 0, 0, 0, 0), X442 = c(0, 0, 0, 0, 0,
0), X443 = c(0, 0, 0, 0, 0, 0), X444 = c(0, 6, 0, 0, 0, 0
), X445 = c(0, 0, 0, 0, 0, 0), X446 = c(0, 1, 1, 4, 0, 0),
X447 = c(0, 3, 0, 0, 0, 0), X448 = c(0, 1, 0, 0, 0, 0), X449 = c(616,
28, 368, 0, 0, 0), X450 = c(0, 0, 1, 0, 0, 0), X451 = c(4098,
2120, 3788, 2663, 3524, 0), X452 = c(0, 0, 0, 0, 0, 0), X453 = c(0,
66, 0, 0, 0, 0), X454 = c(0, 9, 0, 0, 0, 0), X455 = c(0,
1, 0, 0, 0, 0), X456 = c(0, 5, 0, 0, 0, 0), X457 = c(57,
111, 36, 0, 0, 0), X458 = c(0, 0, 0, 0, 0, 0), X459 = c(0,
54, 68, 0, 0, 0), X460 = c(0, 0, 0, 0, 0, 0), X461 = c(0,
0, 0, 0, 0, 0), X462 = c(0, 0, 0, 0, 0, 0), X463 = c(0, 0,
0, 0, 0, 0), X464 = c(0, 0, 0, 0, 0, 0), X465 = c(0, 0, 0,
0, 0, 0), X466 = c(0, 0, 0, 0, 0, 0), X467 = c(0, 1, 0, 2,
0, 0), X468 = c(48, 79, 52, 0, 0, 0), X469 = c(24, 244, 178,
0, 0, 0), X470 = c(24, 28, 13, 0, 0, 0), X471 = c(0, 0, 0,
0, 0, 0), X472 = c(96, 52, 45, 0, 0, 0), X473 = c(0, 0, 0,
102, 0, 0), X474 = c(196, 82, 130, 0, 0, 0), X475 = c(106,
30, 33, 0, 0, 0), X476 = c(12, 21, 22, 0, 0, 0), X477 = c(0,
0, 0, 0, 172, 0), X478 = c(0, 28, 280, 0, 0, 0), X479 = c(0,
27, 310, 0, 0, 0), X480 = c(0, 32, 366, 0, 0, 0), X481 = c(0,
7, 0, 0, 0, 0), X482 = c(0, 22, 0, 0, 0, 0), X483 = c(0,
1, 0, 0, 0, 0), X484 = c(0, 13, 0, 0, 0, 0), X485 = c(0,
2, 0, 0, 0, 0), X486 = c(0, 16, 0, 0, 0, 0), X487 = c(0,
6, 0, 0, 0, 0), X488 = c(0, 8, 0, 0, 0, 0), X489 = c(0, 20,
0, 0, 0, 0), X490 = c(0, 3, 0, 0, 0, 0), X491 = c(0, 14,
0, 0, 0, 0), X492 = c(0, 4, 0, 0, 0, 0), X493 = c(0, 2, 0,
0, 0, 0), X494 = c(0, 5, 0, 0, 0, 0), X495 = c(0, 1, 0, 0,
0, 0), X496 = c(0, 4, 0, 0, 0, 0), X497 = c(0, 15, 0, 0,
0, 0), X498 = c(0, 0, 0, 0, 0, 0), X499 = c(0, 7, 0, 0, 0,
0), X500 = c(0, 13, 0, 0, 0, 0), X501 = c(0, 11, 0, 0, 0,
0), X502 = c(0, 7, 0, 0, 0, 0), X503 = c(0, 4, 0, 0, 0, 0
), X504 = c(0, 0, 0, 0, 0, 0), X505 = c(0, 7, 0, 0, 0, 0),
X506 = c(0, 1, 0, 0, 0, 0), X507 = c(0, 1, 0, 0, 0, 0), X508 = c(0,
0, 0, 1, 0, 0), X509 = c(0, 6, 0, 0, 0, 0), X510 = c(0, 0,
0, 0, 0, 0), X511 = c(0, 2, 0, 0, 0, 0), X512 = c(0, 1, 0,
0, 0, 0), X513 = c(0, 14, 0, 0, 0, 0), X514 = c(0, 3, 0,
0, 0, 0), X515 = c(237, 171, 188, 0, 0, 0), X516 = c(291,
222, 163, 0, 0, 0), X517 = c(5, 36, 9, 0, 0, 0), X518 = c(5,
102, 0, 0, 0, 0), X519 = c(0, 0, 0, 0, 0, 0), X520 = c(0,
0, 0, 0, 0, 0), X521 = c(0, 0, 0, 0, 0, 0), X522 = c(96,
69, 109, 0, 0, 0), X523 = c(236, 0, 118, 0, 1, 0), X524 = c(0,
44, 0, 0, 0, 0), X525 = c(0, 0, 0, 0, 0, 0), X526 = c(0,
0, 0, 0, 0, 0), X527 = c(0, 0, 0, 0, 0, 0), X528 = c(0, 0,
0, 0, 0, 0), X529 = c(0, 62, 15, 0, 0, 0), X530 = c(4, 183,
16, 0, 0, 0), X531 = c(3, 187, 19, 0, 0, 0), X532 = c(197,
79, 64, 0, 0, 0), X533 = c(27, 255, 25, 0, 0, 0), X534 = c(0,
2, 0, 0, 0, 0), X535 = c(0, 20, 0, 0, 0, 0), X536 = c(0,
1, 0, 0, 0, 0), X537 = c(0, 10, 0, 0, 0, 0), X538 = c(0,
1, 0, 0, 0, 0), X539 = c(0, 4, 0, 0, 0, 0), X540 = c(0, 0,
0, 0, 0, 0), X541 = c(0, 6, 0, 0, 0, 0), X542 = c(0, 1, 0,
0, 0, 0), X543 = c(0, 12, 113, 0, 0, 0), X544 = c(0, 77,
990, 0, 0, 0), X545 = c(6, 27, 14, 0, 0, 0), X546 = c(0,
0, 0, 0, 0, 0), X547 = c(0, 0, 0, 0, 0, 0), X548 = c(0, 0,
0, 0, 0, 0), X549 = c(0, 0, 0, 0, 0, 0), X550 = c(0, 0, 0,
0, 0, 0), X551 = c(0, 0, 0, 0, 0, 0), X552 = c(0, 0, 0, 0,
0, 0), X553 = c(301, 0, 0, 0, 0, 0), X554 = c(444, 148, 305,
0, 0, 0), X555 = c(0, 0, 0, 0, 0, 0), X556 = c(0, 2, 2, 0,
0, 0), X557 = c(0, 0, 0, 0, 0, 0), X558 = c(0, 1, 0, 0, 0,
0), X559 = c(0, 0, 0, 0, 0, 0), X560 = c(0, 0, 0, 0, 0, 0
), X561 = c(0, 3, 4, 6, 1, 0), X562 = c(120, 77, 26, 0, 0,
0), X563 = c(0, 3, 628, 0, 0, 0), X564 = c(709, 104, 0, 0,
0, 0), X565 = c(0, 0, 0, 0, 0, 0), X566 = c(95, 59, 581,
175, 1219, 0), X567 = c(0, 0, 0, 0, 13, 0), X568 = c(26,
7, 0, 26, 39, 0), X569 = c(18, 33, 0, 35, 36, 0), X570 = c(0,
2, 41, 39, 1, 0), X571 = c(0, 8, 47, 97, 1, 0), X572 = c(216,
291, 52, 279, 688, 0), X573 = c(198, 504, 0, 5, 0, 0), X574 = c(0,
0, 0, 0, 0, 0), X575 = c(110, 102, 895, 254, 1682, 0), X576 = c(1,
2, 0, 0, 0, 0), X577 = c(10, 18, 0, 0, 0, 0), X578 = c(8,
40, 0, 0, 0, 0), X579 = c(0, 0, 0, 0, 0, 0), X580 = c(0,
0, 0, 0, 0, 0), X581 = c(0, 0, 0, 0, 0, 0), X582 = c(0, 0,
0, 0, 0, 0), X583 = c(0, 0, 216, 0, 0, 0), X584 = c(0, 0,
0, 0, 0, 0), X585 = c(0, 0, 0, 0, 0, 0), X586 = c(0, 0, 0,
0, 0, 0), X587 = c(0, 0, 0, 0, 0, 0), X588 = c(0, 0, 0, 0,
0, 0), X589 = c(0, 0, 0, 0, 0, 0), X590 = c(0, 0, 0, 0, 0,
0), X591 = c(31, 32, 0, 52, 213, 0), X592 = c(0, 0, 12, 0,
0, 0), X593 = c(0, 0, 0, 0, 0, 0), X594 = c(28, 77, 21, 0,
0, 0), X595 = c(0, 0, 0, 0, 0, 0), X596 = c(0, 0, 0, 0, 0,
0)), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"
))
You have some rows in NMDS that contain all 0 values which apparently doesn't work with metaMDS.
You can remove rows containing all values == 0 using dplyr:
library(dplyr)
NMDS <- NMDS %>%
filter_all(any_vars(. != 0))
NMDS <- metaMDS(NMDS, distance="bray")
I have some data which looks like:
# A tibble: 50 x 28
sanchinarro date holiday weekday weekend workday_on_holi… weekend_on_holi… protocol_active
<dbl> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 -1.01 2010-01-01 1 1 0 1 0 0
2 0.832 2010-01-02 0 0 1 0 0 0
3 1.29 2010-01-03 0 0 1 0 0 0
4 1.04 2010-01-04 0 1 0 0 0 0
5 0.526 2010-01-05 0 1 0 0 0 0
6 -0.292 2010-01-06 1 1 0 1 0 0
7 -0.394 2010-01-07 0 1 0 0 0 0
8 -0.547 2010-01-08 0 1 0 0 0 0
9 -0.139 2010-01-09 0 0 1 0 0 0
10 0.628 2010-01-10 0 0 1 0 0 0
I want to run xgb.cv on the first 40 rows and validate it on the final 10 rows.
I try the following:
library(xgboost)
library(dplyr)
X_Val <- ddd %>% select(-c(1:2))
Y_Val <- ddd %>% select(c(1)) %>% pull()
dVal <- xgb.DMatrix(data = as.matrix(X_Val), label = as.numeric(Y_Val))
xgb.cv(data = dVal, nround = 30, folds = NA, params = list(eta = 0.1, max_depth = 5))
which gives me this error:
Error in xgb.cv(data = dVal, nround = 30, folds = NA, eta = 0.1,
max_depth = 5) : 'folds' must be a list with 2 or more elements
that are vectors of indices for each CV-fold
How can I run a simple xgb.cv on the first 40 rows and test it on the last 10 rows.
I eventually want to apply a gird search with a list of parameters and save the results in a list. Since I am dealing with time series data I do not want to mix the folds up, I just want a simple train and in-sample test of 40:10.
Data:
ddd <- structure(list(sanchinarro = c(-1.00742964973274, 0.832453587904369,
1.29242439731365, 1.03688505875294, 0.525806381631517, -0.291919501762755,
-0.394135237187039, -0.547458840323464, -0.138595898626329, 0.628022117055801,
1.19020866188936, 1.5990716035865, 1.5990716035865, -0.70078244345989,
2.11015028070792, 1.95682667757149, 0.985777191040795, 0.883561455616511,
0.985777191040795, 0.270267043070807, 2.51901322240505, 2.41679748698077,
0.372482778495091, -0.291919501762755, -0.905213914308458, -0.905213914308458,
-0.649674575747748, 1.2413165296015, 1.54796373587436, -0.70078244345989,
-0.905213914308458, -0.0363801632020448, 1.54796373587436, 2.00793454528363,
1.54796373587436, -0.445243104899181, -0.445243104899181, 1.03688505875294,
0.628022117055801, -0.496350972611323, 0.168051307646523, -0.649674575747748,
0.0658355722222391, -1.00742964973274, -0.291919501762755, 0.0147277045100972,
0.168051307646523, -0.189703766338471, 0.219159175358665, 0.679129984767943
), date = structure(c(14610, 14611, 14612, 14613, 14614, 14615,
14616, 14617, 14618, 14619, 14620, 14621, 14622, 14623, 14624,
14625, 14626, 14627, 14628, 14629, 14630, 14631, 14632, 14633,
14634, 14635, 14636, 14637, 14638, 14639, 14640, 14641, 14642,
14643, 14644, 14645, 14646, 14647, 14648, 14649, 14650, 14651,
14652, 14653, 14654, 14655, 14656, 14657, 14658, 14659), class = "Date"),
holiday = c(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), weekday = c(1,
0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1,
1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1,
1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1), weekend = c(0, 1, 1, 0,
0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1,
1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0,
0, 1, 1, 0, 0, 0, 0, 0), workday_on_holiday = c(1, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), weekend_on_holiday = c(0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), protocol_active = c(0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), text_broken_clouds = c(0,
1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1), text_clear = c(0, 0, 0,
0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1,
0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1, 1), text_fog = c(0, 1, 0, 1, 1, 0,
0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 1, 0, 1, 0), text_partly_cloudy = c(0, 1, 0, 0, 0,
1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), text_partly_sunny = c(1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0,
0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0,
0, 0, 0, 0, 1, 1, 1), text_passing_clouds = c(1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 1, 1, 1), text_scattered_clouds = c(1, 1,
0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0,
0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 1, 1), text_sunny = c(0, 0, 0, 0,
0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,
0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1), month_1 = c(1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0), month_2 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1), month_3 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), month_4 = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), month_5 = c(0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), month_6 = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0), month_7 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0), month_8 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), month_9 = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), month_10 = c(0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), month_11 = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0), month_12 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-50L))
EDIT: List data:
The final data comes in the form of lists.
datalst <- list(structure(list(sanchinarro = c(-1.00742964973274, 0.832453587904369,
1.29242439731365, 1.03688505875294, 0.525806381631517, -0.291919501762755,
-0.394135237187039, -0.547458840323464, -0.138595898626329, 0.628022117055801,
1.19020866188936, 1.5990716035865, 1.5990716035865, -0.70078244345989
), date = structure(c(14610, 14611, 14612, 14613, 14614, 14615,
14616, 14617, 14618, 14619, 14620, 14621, 14622, 14623), class = "Date"),
holiday = c(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0), weekday = c(1,
0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1), weekend = c(0, 1,
1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0), workday_on_holiday = c(1,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0), weekend_on_holiday = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), protocol_active = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), text_broken_clouds = c(0,
1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0), text_clear = c(0,
0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0), text_fog = c(0, 1,
0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0), text_partly_cloudy = c(0,
1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0), text_partly_sunny = c(1,
1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1), text_passing_clouds = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), text_scattered_clouds = c(1,
1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1), text_sunny = c(0,
0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0), month_1 = c(1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), month_2 = c(0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), month_3 = c(0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), month_4 = c(0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), month_5 = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), month_6 = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), month_7 = c(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0), month_8 = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0), month_9 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0), month_10 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0), month_11 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0), month_12 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-14L)), structure(list(sanchinarro = c(0.832179838392013, 1.29225734336885,
1.03665872949283, 0.525461501740789, -0.292454062662475, -0.394693508212883,
-0.548052676538495, -0.139094894336863, 0.627700947291197, 1.19001789781844,
1.59897568002007, 1.59897568002007, -0.701411844864107, 2.11017290777211
), date = structure(c(14611, 14612, 14613, 14614, 14615, 14616,
14617, 14618, 14619, 14620, 14621, 14622, 14623, 14624), class = "Date"),
holiday = c(0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0), weekday = c(0,
0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1), weekend = c(1, 1,
0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0), workday_on_holiday = c(0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0), weekend_on_holiday = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), protocol_active = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), text_broken_clouds = c(1,
0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0), text_clear = c(0,
0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1), text_fog = c(1, 0,
1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0), text_partly_cloudy = c(1,
0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0), text_partly_sunny = c(1,
1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0), text_passing_clouds = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), text_scattered_clouds = c(1,
0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0), text_sunny = c(0,
0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1), month_1 = c(1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), month_2 = c(0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), month_3 = c(0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), month_4 = c(0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), month_5 = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), month_6 = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), month_7 = c(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0), month_8 = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0), month_9 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0), month_10 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0), month_11 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0), month_12 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-14L)), structure(list(sanchinarro = c(1.29293502084952, 1.03729933727253,
0.526027970118536, -0.292006217327851, -0.394260490758649, -0.547641900904846,
-0.138624807181653, 0.628282243549334, 1.19068074741873, 1.59969784114192,
1.59969784114192, -0.701023311051044, 2.11096920829591, 1.95758779814971
), date = structure(c(14612, 14613, 14614, 14615, 14616, 14617,
14618, 14619, 14620, 14621, 14622, 14623, 14624, 14625), class = "Date"),
holiday = c(0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), weekday = c(0,
1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0), weekend = c(1, 0,
0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1), workday_on_holiday = c(0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), weekend_on_holiday = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), protocol_active = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), text_broken_clouds = c(0,
1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1), text_clear = c(0,
0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0), text_fog = c(0, 1,
1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0), text_partly_cloudy = c(0,
0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0), text_partly_sunny = c(1,
1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1), text_passing_clouds = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), text_scattered_clouds = c(0,
0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0), text_sunny = c(0,
0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0), month_1 = c(1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), month_2 = c(0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), month_3 = c(0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), month_4 = c(0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), month_5 = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), month_6 = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), month_7 = c(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0), month_8 = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0), month_9 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0), month_10 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0), month_11 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0), month_12 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-14L)))
EDIT:
I think this gives me what I am after - I need to double/tripple check it. (if you see any errors please let me know)
splt <- 0.80 * nrow(ddd)
ddd[c(1:splt), "id"] = 1
ddd$id[is.na(ddd$id)] = 2
fold.ids <- unique(ddd$id)
custom.folds <- vector("list", length(fold.ids))
i <- 1
for( id in fold.ids){
custom.folds[[i]] <- which( ddd$id %in% id )
i <- i+1
}
custom.folds
cv <- xgb.cv(params = list(eta = 0.1, max_depth = 5), dVal, nround = 10, folds = custom.folds, prediction = TRUE)
cv$evaluation_log
I now need to find a way to apply this to all 3 lists in the "new" added data.
Firstly, you should split the data onto dtrain (40 first rows) and dval (10 last rows). Secondly, you need rather xgb.train, not xgb.cv.
So, your code should be modified to something like that:
library(xgboost)
library(dplyr)
# you code regarding ddd
X <- ddd %>% select(-c(1:2))
Y <- ddd %>% select(c(1)) %>% pull()
dtrain <- xgb.DMatrix(data = as.matrix(X[1:40,]), label = as.numeric(Y[1:40,]))
dval <- xgb.DMatrix(data = as.matrix(X[41:50,]), label = as.numeric(Y[41:50,]))
watchlist <- list(train=dtrain, val=dval)
model <- xgb.train(data=dtrain, watchlist=watchlist, nround = 30, eta = 0.1, max_depth = 5)
IMHO, 40+10 rows only and so sparse features give no hope to obtain good results using XGBoost.
I have data about gender and petitioning. I want to make a regression between the "Femme" (woman) variable and the different issues of the petitions. I have regrouped those issues into data frames under general themes, and those themes are what I want to regress with the "Femme" (woman) variable.
P.S.: Some petitions have many issues (ex.: water + science). So one petition could be counted in two data frames at the same time.
1) Here is what I did for all issues, this one is an example with the "Aboriginal" issue to show you how I coded the initial issues (you can also see the "Femme" variable at the beginning, which is already coded "0" and "1" in the original dataset under "Female"):
DataPetitions$Femme <- DataPetitions$Female
DataPetitions$Aboriginal <- NA
DataPetitions$Aboriginal[grepl("Aboriginal", DataPetitions$Issue)] <-1
DataPetitions$Aboriginal[!grepl("Aboriginal", DataPetitions$Issue)] <-0
# ... (same for all 24 specific issues)
2) Creating 7 data frames for general petitioning themes:
EnvironmentalIssues <- c(DataPetitions$AirQuality,DataPetitions$Biological, DataPetitions$Climate, DataPetitions$Environmental, DataPetitions$Toxic, DataPetitions$Waste, DataPetitions$Water)
EconomicIssues <- c(DataPetitions$Natural, DataPetitions$Transport)
SocialIssues <- c(DataPetitions$Aboriginal, DataPetitions$Health)
AgriculturalIssues <- c(DataPetitions$Agriculture,
DataPetitions$Fisheries, DataPetitions$Pesticides)
PoliticalIssues <- c(DataPetitions$Compliance, DataPetitions$Federal,
DataPetitions$Governance, DataPetitions$International)
ScientificIssues <- c(DataPetitions$Science)
OtherIssues <- c(DataPetitions$Other)
3) Trying to do a regression. This is my glm code:
model7 <- glm(DataPetitions$Femme ~ SocialIssues + PoliticalIssues +
ScientificIssues + EnvironmentalIssues + EconomicIssues +
AgriculturalIssues + OtherIssues, data = DataPetitions)
# When I try to run it, I get this error message:
Error in model.frame.default(formula = DataPetitions$Femme ~
SocialIssues + : variable lengths differ (found for
'SocialIssues')
With dput(head(DataPetitions,20)), I get this:
[...] class = "factor"), Femme = c(1, 1, 1, 0, 0, 0, 0, 0, 0,
0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1), AuMoinsUneFemme = c(1,
1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1),
Homme = c(1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1,
1, 1, 1, 2), AuMoinsUnHomme = c(1, 0, 0, 0, 1, 1, 0, 1, 1,
1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1), Individual1 = c(0, 0, 0,
1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0), Group1 = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1),
Organisation1 = c(1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0,
0, 0, 1, 0, 0, 0, 0), Aboriginal = c(1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0), Agriculture = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0),
AirQuality = c(0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1,
0, 0, 1, 0, 0, 0), Biological = c(0, 1, 0, 0, 1, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0), Climate = c(0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1), Compliance = c(0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0),
Environmental = c(0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), Federal = c(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Fisheries = c(0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0), Governance = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
Health = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0), International = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0), Natural = c(0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0), Other = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
Pesticides = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0), Science = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Toxic = c(0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Transport = c(0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
Waste = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0), Water = c(0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0)), .Names = c("Data.", "Title", "Number", "Issue", "Petitioner", "Individual", "Group", "Organisation",
"Female", "Male", "Unknown", "DateReceived", "Status", "Summary",
"Hyperlink", "Femme", "AuMoinsUneFemme", "Homme", "AuMoinsUnHomme",
"Individual1", "Group1", "Organisation1", "Aboriginal", "Agriculture",
"AirQuality", "Biological", "Climate", "Compliance", "Environmental",
"Federal", "Fisheries", "Governance", "Health", "International",
"Natural", "Other", "Pesticides", "Science", "Toxic", "Transport",
"Waste", "Water"), row.names = c(NA, 20L), class = "data.frame")