Related
I am estimating a panel regression model, and I need to add the cross sectional average of the dependent variable and regressors to the model.
I am struggling to implement the cross sectional averages in R. Can anyone help me out.
So I have a panel regression code below - using plm package.
I need to add cross sectional average of variable A, B, C and D to the right hand side of the regression
library(plm)
panel_fe <- plm(A ~ B+ C + D, model = "fd", effect="individual", data = PanelS)
So my final regression model would be like this A = B+ C+D + A_bar + B_bar + C_bar + D_bar, where A_bar, B_bar , C_bar and D_bar are the cross sectional averages of A, B,C and D respectively.
My panel datasets is below, PanelS.
structure(list(Country = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L), .Label = c("CountryA", "CountryB",
"CountryC", "CountryD", "CountryE", "CountryF", "CountryG", "CountryH",
"CountryI", "CountryJ"), class = "factor"), Year = structure(c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L,
18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L,
20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L), .Label = c("2000", "2001",
"2002", "2003", "2004", "2005", "2006", "2007", "2008", "2009",
"2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017",
"2018", "2019"), class = "factor"), A = c(0.051539, 0.064525,
0.014292, 0.018774, 0.035449, 0.021988, 0.02396, 0.011415, 0.010358,
-0.029607, -0.020427, -0.012734, 0.006683, 0.007373, -0.039712,
-0.005499, 0.008682, 0.015326, 0.020524, 0.015101, 0.035355,
0.031157, 0.023387, 0.024198, 0.035353, 0.053873, 0.038743, 0.042338,
0.034935, 0.015377, 0.010599, 0.015154, 0.002919, 0.024291, 0.043819,
0.015901, 0.01897, 0.027767, 0.015992, 0.041976, 0.011223, 0.006144,
0.000778, 0.005873, 0.007194, -0.022017, -0.023338, -0.037765,
-0.049356, 0.026135, 0.035633, 0.015691, -0.006196, -0.00025,
0.001181, -0.001472, -0.009324, -0.022664, -0.022623, -0.019586,
-0.012207, -0.004603, -0.013073, -0.010771, -0.009882, -0.014417,
-0.031812, -0.043885, -0.050883, -0.039834, -0.020299, -0.000684,
0.011216, 0.005419, 0.000939, -0.005508, 0.006266, -0.008077,
-0.016137, -0.012681, 0.031612, 0.043729, 0.009314, 0.002734,
-0.012284, 0.002403, 0.016807, 0.019995, 0.033096, 0.024383,
0.010588, 0.019833, 0.031837, 0.03127, 0.029059, 0.020708, 0.019296,
0.017787, 0.032074, 0.027125, 0.005673, 0.003698, -5.3e-05, 0.001794,
-0.011977, -0.008686, -0.031588, -0.039411, -0.073931, -0.076715,
-0.039171, -0.025797, -0.007637, 0.00345, 0.009101, 0.01674,
-0.006968, -0.019178, -0.02438, -0.039663, 0.078313, 0.06707,
0.062822, 0.050771, 0.041274, 0.043921, 0.046429, 0.039418, 0.034671,
0.017356, 0.001054, 0.00414, 0.00226, 0.00275, 0.00085, 0.00495,
0.001276, -0.001446, -0.005771, -0.007513, 0.053734, 0.038679,
0.017375, 0.01438, 0.018403, 0.032943, 0.025539, 0.032463, 0.032267,
0.034009, 0.018229, 0.008958, 0.010079, 0.00749, 0.000604, 0.001948,
0.011782, 0.013253, 0.007898, 0.007546, 0.018052, -0.001123,
-0.012597, -0.042292, -0.058516, -0.022736, -0.03841, -0.050843,
-0.073979, -0.097242, -0.024712, 0.038037, 0.048685, -0.00624,
0.075575, 0.044947, 0.097171, 0.086809, 0.079856, 0.068521, 0.008062,
-0.00911, -0.010527, -4.3e-05, 0.002428, 0.004422, 0.008752,
0.019602, 0.01724, 0.01965, -0.008816, 0.011466, 0.020956, 0.021873,
0.021772, 0.024495, 0.021354, 0.015267, 0.018769, 0.016904),
C = c(0.75345, 0.70657, 0.645051, 0.510055, 0.433786, 0.35728,
0.265817, 0.208721, 0.163261, 0.130248, 0.136607, 0.153873,
0.152275, 0.166592, 0.170559, 0.27089, 0.259813, 0.292847,
0.253142, 0.222618, 0.56764082, 0.523543, 0.485083, 0.49081,
0.461501, 0.44156, 0.374122, 0.315494, 0.27346, 0.333132,
0.401818, 0.425879, 0.460709, 0.448942, 0.440456, 0.442703,
0.397737, 0.372338, 0.359446, 0.340254, 0.064305, 0.05107,
0.047682, 0.056584, 0.055981, 0.051134, 0.047025, 0.046318,
0.037655, 0.045041, 0.071989, 0.066074, 0.061057, 0.097641,
0.101621, 0.105545, 0.09996, 0.099131, 0.091119, 0.082012,
0.120817, 0.120871, 0.138383, 0.13023, 0.141247, 0.146088,
0.119133, 0.100396, 0.084592, 0.185873, 0.368416, 0.479167,
0.4367, 0.421837, 0.400428, 0.416259, 0.37072, 0.40398, 0.390126,
0.371126, 0.079576, 0.074647, 0.076712, 0.074295, 0.074504,
0.079053, 0.080224, 0.082991, 0.082006, 0.15357, 0.161465,
0.201522, 0.190049, 0.219974, 0.236873, 0.227428, 0.219862,
0.200938, 0.223426, 0.209529, 0.217219, 0.224867, 0.258694,
0.248207, 0.221093, 0.189452, 0.159052, 0.124236, 0.119492,
0.123362, 0.217807, 0.296186, 0.339882, 0.371345, 0.376212,
0.391509, 0.378059, 0.373931, 0.351043, 0.347354, 0.440547,
0.424547, 0.409236, 0.401795, 0.427482, 0.426416, 0.399297,
0.381117, 0.339041, 0.325607, 0.415314, 0.469047, 0.482712,
0.536225, 0.562292, 0.598259, 0.636417, 0.631764, 0.612668,
0.596271, 0.605061, 0.503479, 0.518971, 0.498057, 0.492731,
0.484527, 0.486885, 0.43596, 0.388967, 0.374978, 0.407324,
0.381025, 0.371731, 0.375149, 0.402248, 0.449982, 0.437387,
0.422554, 0.407331, 0.389125, 0.989067, 1.049344, 1.070812,
1.048631, 1.014561, 1.028734, 1.073949, 1.036117, 1.03103,
1.094155, 1.267447, 1.474942, 1.752192, 1.619444, 1.784347,
1.802256, 1.770079, 1.807951, 1.792139, 1.862386, 0.601394,
0.590658, 0.579365, 0.597035, 0.633089, 0.649877, 0.673465,
0.667047, 0.639942, 0.655222, 0.729901, 0.823816, 0.79801,
0.811354, 0.787169, 0.756694, 0.72207, 0.692768, 0.651024,
0.617801), B = c(0.147502302, 0.043680673, -0.212478849,
-0.266834333, -0.228099071, -0.199890362, -0.968175801, 1.047500546,
1.273127656, 1.227657506, -0.286068921, -1.356896168, -1.442625298,
-0.291748363, 2.029875219, 1.099611751, -1.112127832, -0.894025857,
0.103213651, 0.286801553, 0.756833023, 0.591945192, 0.525259532,
0.466656359, 0.706692697, -2.361722697, -2.777257989, -4.097114222,
-4.564987155, 2.317853991, 3.44030537, 3.034469093, 5.845290721,
0.403542521, 0.128582254, 0.817094156, -0.886707561, -2.998573025,
-0.491794488, -0.856367773, 0.023343476, -0.209503364, -0.084839186,
-0.146285026, -0.256672799, -0.093852713, 0.145824486, 0.434606031,
0.966980327, 0.67904687, -0.292659443, -0.487763914, -0.084930583,
-0.32722087, -0.442172133, -0.168366978, -0.186469629, 0.046322287,
0.181126569, 0.303486593, 0.171541123, -0.348150815, -0.407466419,
-0.624622679, -0.354132366, -0.15050691, 0.700892294, 0.67692383,
1.014111655, 0.862019536, 0.395600738, -0.256706715, -0.542246369,
-0.539422399, -0.405088653, -0.247954994, -0.497333992, -0.010723655,
0.393516751, 0.169750037, -0.581903347, -0.730163914, 0.351894514,
0.629568917, 0.882078894, 0.760041333, -0.564317727, -0.57799292,
-0.433736512, 0.513350369, 0.55464973, -0.224497194, -0.074326596,
-0.123301819, -0.432013928, -0.25316664, -0.374406673, 0.116449941,
0.308969388, 0.252824183, 2.398228162, -0.033362631, -1.681378615,
-3.655293426, -2.793256764, -3.636310622, 0.149490332, 3.951131246,
7.177449077, 4.831325877, 2.050070679, 1.314471427, -1.687424783,
-3.796189127, -3.329685346, -1.695252718, -3.010416797, -2.414597902,
1.199960369, 4.661041564, 0.531518012, -1.384184059, -0.64216453,
-0.13206166, 0.249287935, -0.153010531, -0.987952985, -1.71711917,
-0.678751076, 0.890062065, 1.663691535, 1.883735194, 2.171029985,
2.383501603, 1.490313839, -0.732542129, -0.291797363, -1.655272704,
-1.613245217, -1.275038743, -0.789256935, -3.589249982, 0.502475039,
1.840081099, 1.141218417, 3.130100399, 3.94751837, 0.97811035,
0.013586974, -3.245960526, -2.068241886, -1.82476664, -1.481654499,
0.37039449, -1.516414277, -1.722381744, 0.683458083, 0.153189319,
3.410781995, 0.067011953, -3.09418792, -4.09753755, -4.682167411,
-1.333607727, 2.505605899, -4.332639317, -2.190945016, 4.048457741,
11.60535564, 13.61047901, 5.145259686, -0.712611552, -3.385649938,
7.214394614, -10.34401695, -1.841542179, -6.437949187, -4.545422837,
-0.012548047, 2.881273043, 3.227611639, 10.96399365, 16.38843255,
14.72001327, -13.84595255, -10.51570643, -13.59695535, -36.70577424,
-12.07070647, 12.51742535, 52.88207865, 9.143152612, -7.818895359,
-15.57456939, -21.31957866, -23.55720863, -5.574415019, 5.783084584,
12.02189272, 22.93207708), D = c(0.77780751, 0.793229898,
0.80623893, 0.821155065, 0.836880111, 0.854312944, 0.873660631,
0.890537317, 0.907536298, 0.912375095, 0.929637942, 0.946439284,
0.965000087, 0.97726773, 0.986870808, 1, 1.019208507, 1.037842597,
1.054711181, 1.072171599, 0.534008473, 0.566583199, 0.58762954,
0.601043497, 0.63362178, 0.673913677, 0.719447102, 0.799187909,
0.864173776, 0.899162389, 0.909465125, 0.96350569, 0.978220642,
0.971679886, 0.976158221, 1, 1.025374896, 1.065804414, 1.108567186,
1.166769344, 0.588726028, 0.64526073, 0.733094431, 0.718268082,
0.746291144, 0.799900392, 0.846050389, 0.894179583, 1.015232882,
0.982856394, 1.012948099, 1.041332642, 1.032947106, 1.013566583,
0.980944689, 1, 1.020576612, 1.061740647, 1.117831183, 1.159906251,
0.750587042, 0.769670674, 0.790024355, 0.801712216, 0.817505148,
0.83991247, 0.856517319, 0.878345181, 0.914006005, 0.920044857,
0.949573071, 0.955207703, 0.978810398, 0.985618398, 0.996205139,
1, 1.004364708, 1.017159213, 1.021013703, 1.02682649, 0.825278825,
0.836048671, 0.847570474, 0.858769029, 0.86834942, 0.871868036,
0.875331803, 0.890827568, 0.898928134, 0.915485416, 0.921392822,
0.931246968, 0.945182975, 0.963702812, 0.981800571, 1, 1.013277522,
1.026999204, 1.044176589, 1.067069774, 0.490666665, 0.523850087,
0.54906662, 0.570457925, 0.597126217, 0.632406036, 0.689467717,
0.775073059, 0.828560075, 0.827109078, 0.842215091, 0.887572897,
0.923280339, 0.960610381, 0.988936452, 1, 1.022699304, 1.054533263,
1.098615084, 1.134067127, 0.757140805, 0.809228408, 0.851488047,
0.884918505, 0.889385715, 0.916751643, 0.948479832, 0.960072842,
0.956196673, 0.911566837, 0.884542463, 0.89644222, 0.917048164,
0.929279352, 0.929337342, 1, 1.010128912, 1.026719845, 1.029923385,
1.062349178, 0.786853444, 0.804351028, 0.831286834, 0.859995963,
0.886334727, 0.906191485, 0.937863282, 0.969963165, 1.012104032,
1.038112793, 1.036283847, 1.046222, 1.043339336, 1.02279939,
1.002888566, 1, 0.994233243, 0.998082845, 0.997049083, 0.998951287,
0.740171055, 0.770579402, 0.802054487, 0.833603662, 0.865965514,
0.90147914, 0.937354271, 0.969378485, 0.99123068, 0.992657113,
0.994179737, 0.993983379, 0.992844694, 0.99680058, 0.994574042,
1, 1.003228988, 1.016266499, 1.028341184, 1.04261954, 0.801617134,
0.817716283, 0.834621959, 0.850140657, 0.863935678, 0.880664424,
0.899645623, 0.9226463, 0.944486016, 0.945115307, 0.95522518,
0.964280334, 0.975483583, 0.983073825, 0.988745617, 1, 1.005225593,
1.010468623, 1.020086873, 1.032605559)), row.names = c("CountryA-2000",
"CountryA-2001", "CountryA-2002", "CountryA-2003", "CountryA-2004",
"CountryA-2005", "CountryA-2006", "CountryA-2007", "CountryA-2008",
"CountryA-2009", "CountryA-2010", "CountryA-2011", "CountryA-2012",
"CountryA-2013", "CountryA-2014", "CountryA-2015", "CountryA-2016",
"CountryA-2017", "CountryA-2018", "CountryA-2019", "CountryB-2000",
"CountryB-2001", "CountryB-2002", "CountryB-2003", "CountryB-2004",
"CountryB-2005", "CountryB-2006", "CountryB-2007", "CountryB-2008",
"CountryB-2009", "CountryB-2010", "CountryB-2011", "CountryB-2012",
"CountryB-2013", "CountryB-2014", "CountryB-2015", "CountryB-2016",
"CountryB-2017", "CountryB-2018", "CountryB-2019", "CountryC-2000",
"CountryC-2001", "CountryC-2002", "CountryC-2003", "CountryC-2004",
"CountryC-2005", "CountryC-2006", "CountryC-2007", "CountryC-2008",
"CountryC-2009", "CountryC-2010", "CountryC-2011", "CountryC-2012",
"CountryC-2013", "CountryC-2014", "CountryC-2015", "CountryC-2016",
"CountryC-2017", "CountryC-2018", "CountryC-2019", "CountryD-2000",
"CountryD-2001", "CountryD-2002", "CountryD-2003", "CountryD-2004",
"CountryD-2005", "CountryD-2006", "CountryD-2007", "CountryD-2008",
"CountryD-2009", "CountryD-2010", "CountryD-2011", "CountryD-2012",
"CountryD-2013", "CountryD-2014", "CountryD-2015", "CountryD-2016",
"CountryD-2017", "CountryD-2018", "CountryD-2019", "CountryE-2000",
"CountryE-2001", "CountryE-2002", "CountryE-2003", "CountryE-2004",
"CountryE-2005", "CountryE-2006", "CountryE-2007", "CountryE-2008",
"CountryE-2009", "CountryE-2010", "CountryE-2011", "CountryE-2012",
"CountryE-2013", "CountryE-2014", "CountryE-2015", "CountryE-2016",
"CountryE-2017", "CountryE-2018", "CountryE-2019", "CountryF-2000",
"CountryF-2001", "CountryF-2002", "CountryF-2003", "CountryF-2004",
"CountryF-2005", "CountryF-2006", "CountryF-2007", "CountryF-2008",
"CountryF-2009", "CountryF-2010", "CountryF-2011", "CountryF-2012",
"CountryF-2013", "CountryF-2014", "CountryF-2015", "CountryF-2016",
"CountryF-2017", "CountryF-2018", "CountryF-2019", "CountryG-2000",
"CountryG-2001", "CountryG-2002", "CountryG-2003", "CountryG-2004",
"CountryG-2005", "CountryG-2006", "CountryG-2007", "CountryG-2008",
"CountryG-2009", "CountryG-2010", "CountryG-2011", "CountryG-2012",
"CountryG-2013", "CountryG-2014", "CountryG-2015", "CountryG-2016",
"CountryG-2017", "CountryG-2018", "CountryG-2019", "CountryH-2000",
"CountryH-2001", "CountryH-2002", "CountryH-2003", "CountryH-2004",
"CountryH-2005", "CountryH-2006", "CountryH-2007", "CountryH-2008",
"CountryH-2009", "CountryH-2010", "CountryH-2011", "CountryH-2012",
"CountryH-2013", "CountryH-2014", "CountryH-2015", "CountryH-2016",
"CountryH-2017", "CountryH-2018", "CountryH-2019", "CountryI-2000",
"CountryI-2001", "CountryI-2002", "CountryI-2003", "CountryI-2004",
"CountryI-2005", "CountryI-2006", "CountryI-2007", "CountryI-2008",
"CountryI-2009", "CountryI-2010", "CountryI-2011", "CountryI-2012",
"CountryI-2013", "CountryI-2014", "CountryI-2015", "CountryI-2016",
"CountryI-2017", "CountryI-2018", "CountryI-2019", "CountryJ-2000",
"CountryJ-2001", "CountryJ-2002", "CountryJ-2003", "CountryJ-2004",
"CountryJ-2005", "CountryJ-2006", "CountryJ-2007", "CountryJ-2008",
"CountryJ-2009", "CountryJ-2010", "CountryJ-2011", "CountryJ-2012",
"CountryJ-2013", "CountryJ-2014", "CountryJ-2015", "CountryJ-2016",
"CountryJ-2017", "CountryJ-2018", "CountryJ-2019"), class = c("pdata.frame",
"data.frame"), index = structure(list(Country = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L), .Label = c("CountryA",
"CountryB", "CountryC", "CountryD", "CountryE", "CountryF", "CountryG",
"CountryH", "CountryI", "CountryJ"), class = "factor"), Year = structure(c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L,
18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L,
20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L), .Label = c("2000", "2001",
"2002", "2003", "2004", "2005", "2006", "2007", "2008", "2009",
"2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017",
"2018", "2019"), class = "factor")), class = c("pindex", "data.frame"
), row.names = c(NA, 200L)))
You can use function Between from package plm to calculate the cross sectional averages and add them to your data:
library(plm)
# PanelS is a pdata.frame (otherwise use pdata.frame(your_data, index))
PanelS$A_bar <- Between(PanelS$A)
PanelS$B_bar <- Between(PanelS$B)
PanelS$C_bar <- Between(PanelS$C)
PanelS$D_bar <- Between(PanelS$D)
mod <- plm(A ~ B + C + D + A_bar + B_bar + C_bar + D_bar, model = "pooling", effect="individual", data = PanelS)
summary(mod)
# Pooling Model
#
# Call:
# plm(formula = A ~ B + C + D + A_bar + B_bar + C_bar + D_bar,
# data = PanelS, effect = "individual", model = "pooling")
#
# Balanced Panel: n = 10, T = 20, N = 200
#
# Residuals:
# Min. 1st Qu. Median 3rd Qu. Max.
# -0.06143690 -0.01311792 0.00070253 0.01186605 0.05107105
#
# Coefficients:
# Estimate Std. Error t-value Pr(>|t|)
# (Intercept) -0.00000000000001042 0.03313743211380626 0.0000 1.000000
# B -0.00076930351859426 0.00020566635571130 -3.7405 0.000242 ***
# C 0.10827039012266901 0.00949296134830719 11.4053 < 0.00000000000000022 ***
# D -0.04222788490989914 0.01136058813979121 -3.7171 0.000264 ***
# A_bar 0.99999999999911215 0.09632471140222754 10.3816 < 0.00000000000000022 ***
# C_bar -0.10827039012256123 0.01033406661607372 -10.4770 < 0.00000000000000022 ***
# D_bar 0.04222788490990802 0.03874710199411169 1.0898 0.277145
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
# Total Sum of Squares: 0.17549
# Residual Sum of Squares: 0.07128
# R-Squared: 0.59382
# Adj. R-Squared: 0.58119
# F-statistic: 47.0268 on 6 and 193 DF, p-value: < 0.000000000000000222
Note that it seems like you want to estimate a fixed effects model but your estimation has model = "fd" to estimate a first-differenced model. Also note that the cross sectional averages will drop out of the estimation of a fixed effects model.
I have a cleaned data set with 26 nodes. I am placing these 26 nodes in an undirected network graph using tidygraph, where I use the centrality_degree() function to calculate the centrality degree. However, when I graph the resulting network, my highest possible centrality degree is 40, which should not be possible. When I change the graph to directed, this is corrected.
I somewhat confused, as other methods I have used in the past, where I manually calculated the centrality degree, I have never once come across this issue.
Is this regular behaviour, or am I doing something wrong?
Reproducible example:
library(tidygraph)
library(ggraph)
library(tidyverse)
nodes <- structure(list(id = 1:26, label = c("a", "b", "c", "d", "e",
"f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r",
"s", "t", "u", "v", "w", "x", "y", "z")), row.names = c(NA, -26L
), class = "data.frame")
edges <- structure(list(from = c(21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L,
21L, 21L, 21L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L,
13L, 13L, 13L, 13L, 13L, 13L, 13L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 18L,
18L, 18L, 18L, 18L, 18L, 18L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 24L,
24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L,
24L, 24L, 24L, 24L, 24L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 14L, 14L, 14L, 14L,
14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L,
25L, 25L, 25L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L,
22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 15L, 15L, 15L,
15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L,
15L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 20L, 20L, 20L, 20L, 20L, 20L, 20L,
20L, 20L, 20L, 20L, 20L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 17L, 17L, 17L, 17L, 17L),
to = c(1L, 12L, 3L, 16L, 24L, 4L, 10L, 6L, 22L, 2L, 8L, 1L,
12L, 13L, 3L, 18L, 16L, 24L, 5L, 7L, 14L, 4L, 10L, 6L, 9L,
22L, 15L, 2L, 20L, 8L, 21L, 12L, 13L, 3L, 16L, 24L, 5L, 7L,
14L, 4L, 10L, 6L, 22L, 15L, 2L, 8L, 17L, 21L, 1L, 13L, 3L,
16L, 5L, 7L, 14L, 10L, 6L, 9L, 22L, 15L, 2L, 20L, 8L, 17L,
21L, 1L, 3L, 18L, 16L, 5L, 7L, 14L, 4L, 10L, 6L, 25L, 9L,
22L, 15L, 20L, 8L, 17L, 21L, 11L, 1L, 12L, 13L, 18L, 16L,
24L, 5L, 7L, 14L, 4L, 10L, 6L, 25L, 9L, 22L, 15L, 20L, 8L,
17L, 1L, 3L, 10L, 6L, 22L, 20L, 8L, 21L, 11L, 1L, 13L, 3L,
18L, 24L, 7L, 4L, 10L, 6L, 25L, 9L, 22L, 15L, 2L, 20L, 8L,
17L, 21L, 11L, 1L, 12L, 13L, 18L, 16L, 5L, 7L, 14L, 10L,
6L, 25L, 9L, 22L, 15L, 20L, 8L, 17L, 1L, 3L, 18L, 16L, 7L,
14L, 4L, 10L, 6L, 9L, 22L, 15L, 2L, 20L, 8L, 17L, 21L, 11L,
1L, 12L, 13L, 3L, 18L, 16L, 24L, 14L, 4L, 10L, 6L, 25L, 9L,
22L, 15L, 2L, 20L, 8L, 11L, 1L, 3L, 18L, 16L, 7L, 10L, 6L,
9L, 22L, 15L, 2L, 20L, 8L, 17L, 21L, 11L, 1L, 12L, 13L, 3L,
18L, 16L, 24L, 5L, 7L, 14L, 10L, 6L, 25L, 9L, 22L, 15L, 2L,
20L, 8L, 17L, 21L, 11L, 1L, 12L, 13L, 3L, 18L, 16L, 24L,
5L, 7L, 14L, 4L, 6L, 25L, 9L, 22L, 15L, 2L, 20L, 8L, 17L,
21L, 11L, 1L, 12L, 13L, 3L, 18L, 24L, 5L, 7L, 14L, 4L, 10L,
25L, 9L, 22L, 15L, 2L, 20L, 8L, 21L, 1L, 13L, 3L, 18L, 5L,
10L, 6L, 22L, 2L, 20L, 8L, 21L, 1L, 13L, 3L, 18L, 16L, 24L,
4L, 10L, 6L, 22L, 15L, 2L, 20L, 8L, 11L, 1L, 12L, 13L, 3L,
16L, 24L, 5L, 7L, 14L, 4L, 10L, 6L, 25L, 9L, 15L, 2L, 20L,
8L, 17L, 21L, 1L, 12L, 3L, 18L, 16L, 24L, 7L, 10L, 6L, 25L,
9L, 22L, 2L, 20L, 8L, 17L, 21L, 11L, 1L, 12L, 13L, 3L, 18L,
16L, 24L, 5L, 7L, 14L, 4L, 6L, 25L, 9L, 22L, 15L, 20L, 8L,
17L, 21L, 11L, 1L, 3L, 16L, 24L, 7L, 10L, 6L, 22L, 2L, 8L,
21L, 11L, 1L, 12L, 13L, 3L, 18L, 16L, 24L, 14L, 4L, 10L,
6L, 25L, 9L, 22L, 2L, 20L, 7L, 6L, 25L, 22L, 8L), weight = c(3L,
1L, 3L, 2L, 1L, 1L, 5L, 1L, 8L, 2L, 1L, 2L, 3L, 2L, 5L, 1L,
4L, 1L, 4L, 4L, 4L, 1L, 5L, 13L, 3L, 7L, 3L, 2L, 3L, 8L,
1L, 1L, 1L, 15L, 10L, 7L, 2L, 4L, 2L, 5L, 19L, 23L, 6L, 2L,
11L, 7L, 1L, 1L, 2L, 3L, 3L, 5L, 4L, 5L, 4L, 4L, 21L, 2L,
9L, 8L, 1L, 1L, 12L, 1L, 2L, 1L, 3L, 1L, 6L, 6L, 5L, 6L,
1L, 6L, 22L, 2L, 2L, 9L, 8L, 3L, 13L, 1L, 5L, 6L, 4L, 10L,
13L, 3L, 41L, 46L, 11L, 39L, 9L, 55L, 2L, 108L, 2L, 8L, 31L,
30L, 13L, 39L, 2L, 2L, 1L, 3L, 4L, 8L, 5L, 1L, 8L, 1L, 6L,
1L, 8L, 2L, 3L, 23L, 2L, 12L, 96L, 1L, 3L, 21L, 1L, 6L, 12L,
38L, 4L, 5L, 4L, 4L, 8L, 8L, 3L, 29L, 3L, 11L, 3L, 3L, 63L,
2L, 5L, 18L, 19L, 4L, 25L, 1L, 2L, 3L, 1L, 7L, 6L, 7L, 1L,
3L, 17L, 1L, 3L, 6L, 1L, 4L, 11L, 1L, 5L, 1L, 5L, 1L, 1L,
15L, 4L, 7L, 3L, 1L, 4L, 12L, 8L, 1L, 9L, 32L, 3L, 7L, 5L,
35L, 1L, 1L, 3L, 1L, 6L, 4L, 4L, 12L, 2L, 5L, 4L, 2L, 2L,
9L, 1L, 2L, 3L, 4L, 9L, 13L, 2L, 1L, 25L, 25L, 10L, 14L,
10L, 4L, 59L, 4L, 5L, 21L, 19L, 1L, 8L, 27L, 3L, 5L, 8L,
8L, 11L, 12L, 111L, 5L, 50L, 45L, 15L, 32L, 10L, 49L, 109L,
1L, 8L, 28L, 39L, 53L, 13L, 48L, 5L, 13L, 2L, 20L, 3L, 3L,
27L, 10L, 8L, 1L, 58L, 1L, 7L, 32L, 13L, 21L, 110L, 1L, 17L,
27L, 124L, 1L, 1L, 1L, 2L, 3L, 1L, 1L, 2L, 7L, 1L, 1L, 1L,
2L, 2L, 1L, 5L, 2L, 2L, 2L, 1L, 3L, 3L, 14L, 2L, 2L, 4L,
1L, 3L, 14L, 5L, 8L, 44L, 16L, 14L, 4L, 12L, 4L, 19L, 41L,
47L, 2L, 1L, 11L, 24L, 2L, 18L, 1L, 7L, 5L, 1L, 7L, 3L, 27L,
3L, 15L, 7L, 54L, 1L, 4L, 17L, 5L, 6L, 27L, 1L, 1L, 2L, 3L,
4L, 10L, 56L, 3L, 25L, 25L, 7L, 16L, 5L, 29L, 59L, 3L, 3L,
20L, 17L, 5L, 31L, 3L, 6L, 1L, 4L, 7L, 1L, 3L, 1L, 6L, 5L,
13L, 1L, 2L, 9L, 1L, 15L, 2L, 1L, 16L, 4L, 4L, 3L, 1L, 6L,
17L, 10L, 1L, 13L, 63L, 11L, 12L, 1L, 5L, 1L, 2L, 3L)), row.names = c(NA,
-383L), class = c("tbl_df", "tbl", "data.frame"))
routes_tidy <- tbl_graph(nodes=nodes, edges=edges, directed=FALSE) %>% mutate(neighbors = centrality_degree())
# Filtering out 3 nodes out of the graph as they have no connections and zoom the figure way out
ggraph(routes_tidy, layout="graphopt") +
geom_node_point(aes(size=neighbors, filter=(label!="z" & label!="s" & label!="w"))) +
geom_edge_link(aes(width=weight, alpha=weight)) +
scale_edge_width(range=c(0.2, 2)) +
geom_node_text(aes(label=label, fontface="bold", size=neighbors, filter=(label!="z" & label!="s" & label!="w")), repel=TRUE) +
labs(edge_width="N") +
theme_graph()
I'm new to the whole tidygraph thing, stumbled over this question, got confused, and figured it'd be a nice way to get to know stuff. So, I don't know if it's a bug or a feature, but the behaviour is triggered because you have doubled edges:
# Given your edges
edges %>%
filter((from == 1 & to == 2) | from == 2 & to == 1)
# A tibble: 2 x 3
from to weight
<int> <int> <int>
1 1 2 11
2 2 1 3
And those count as 2 connections in the calculation of the degree centrality. One way to remove those double edges is to convert the network to a simple network:
routes_simple <-
routes_tidy %>%
morph(to_simple) %>%
crystallise() %>%
pull(graph) %>%
getElement(1) %>%
activate(nodes) %>%
mutate(neighbors = centrality_degree())
Now the maximum degree is 22 (and the heighest possible, presumably, 25).
I need some help regarding transforming a geom_bar into a geom_area plot. This is my df:
dput(df)
df <- structure(list(new_day = c(-25L, 3L, 7L, -7L, 3L, 7L, -7L, 0L,
-25L, 3L, 7L, -7L, 0L, -25L, 3L, 7L, -7L, 0L, -25L, 3L, 7L, -7L,
0L, -25L, 3L, 7L, -7L, 0L, -25L, 3L, 7L, -7L, 0L, 3L, 7L, -7L,
0L, -25L, 3L, 7L, -7L, 0L, -25L, 3L, 7L, -7L, 0L, -25L, 3L, 7L,
-25L, 3L, 7L, -7L, 0L, 3L, -7L, 0L, -25L, 7L, 3L, 7L, -7L, 0L,
-25L, 3L, 7L, -7L, 0L, -25L, 3L, 7L, 3L, 7L, -7L, 0L, -25L, 3L,
7L, -7L, 0L, 7L, -25L, 3L, 7L, -7L, 0L, 3L, 7L, -25L, -25L, -25L,
-25L, -25L, -25L, -25L), order = structure(c(8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 11L, 11L, 11L, 11L, 11L, 13L, 13L, 13L, 13L,
13L, 10L, 10L, 10L, 10L, 10L, 7L, 7L, 7L, 7L, 7L, 2L, 2L, 2L,
2L, 2L, 7L, 7L, 7L, 7L, 9L, 9L, 9L, 9L, 9L, 1L, 1L, 1L, 1L, 1L,
9L, 9L, 9L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 13L, 13L, 14L, 14L,
14L, 14L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 13L, 13L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 2L, 2L, 2L, 2L, 2L, 6L, 6L, 1L, 7L, 5L, 2L,
12L, 2L, 2L), .Label = c("Alteromonadales", "Betaproteobacteriales",
"Caulobacterales", "Chitinophagales", "Flavobacteriales", "Parvibaculales",
"Pseudomonadales", "Rhizobiales", "Rhodobacterales", "Rhodospirillales",
"Sneathiellales", "Sphingobacteriales", "Sphingomonadales", "Thalassobaculales"
), class = "factor"), family = structure(c(13L, 13L, 13L, 13L,
12L, 12L, 12L, 12L, 15L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L,
16L, 7L, 7L, 7L, 7L, 7L, 11L, 11L, 11L, 11L, 11L, 1L, 1L, 1L,
1L, 1L, 11L, 11L, 11L, 11L, 14L, 14L, 14L, 14L, 14L, 4L, 4L,
4L, 4L, 4L, 14L, 14L, 14L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 16L,
16L, 17L, 17L, 17L, 17L, 8L, 8L, 8L, 8L, 8L, 5L, 5L, 5L, 16L,
16L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 8L, 8L, 8L, 8L,
8L, 10L, 10L, 6L, 11L, 3L, 1L, 9L, 1L, 1L), .Label = c("Burkholderiaceae",
"Chitinophagaceae", "Flavobacteriaceae", "Gallaecimonadaceae",
"Hyphomonadaceae", "Idiomarinaceae", "Magnetospiraceae", "Methylophilaceae",
"NS11-12_marine_group", "Parvibaculaceae", "Pseudomonadaceae",
"Rhizobiaceae", "Rhizobiales_unclassified", "Rhodobacteraceae",
"Sneathiellaceae", "Sphingomonadaceae", "Thalassobaculaceae"), class = "factor"),
genus = structure(c(16L, 16L, 16L, 16L, 7L, 7L, 7L, 7L, 3L,
3L, 3L, 3L, 3L, 19L, 19L, 19L, 19L, 19L, 24L, 24L, 24L, 24L,
24L, 14L, 14L, 14L, 14L, 14L, 17L, 17L, 17L, 17L, 17L, 14L,
14L, 14L, 14L, 15L, 15L, 15L, 15L, 15L, 5L, 5L, 5L, 5L, 5L,
10L, 10L, 10L, 2L, 2L, 2L, 2L, 2L, 22L, 22L, 22L, 20L, 20L,
23L, 23L, 23L, 23L, 11L, 11L, 11L, 11L, 11L, 8L, 8L, 8L,
21L, 21L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 11L, 11L,
11L, 11L, 11L, 13L, 13L, 9L, 14L, 4L, 6L, 12L, 1L, 18L), .Label = c("Burkholderiaceae_unclassified",
"Cupriavidus", "Ferrovibrio", "Flavobacteriaceae_unclassified",
"Gallaecimonas", "GKS98_freshwater_group", "Hoeflea", "Hyphomonas",
"Idiomarina", "Marivivens", "Methylotenera", "NS11-12_marine_group_ge",
"Parvibaculum", "Pseudomonas", "Pseudorhodobacter", "Rhizobiales_unclassified",
"Rhodoferax", "RS62_marine_group", "Sphingomonadaceae_unclassified",
"Sphingopyxis", "Sphingorhabdus", "Terrimonas", "Thalassobaculum",
"uncultured"), class = "factor"), Abundance = c(0.758296593899054,
0.728046713738242, 0.421798852637834, 0.185971692147469,
7.36584152568739, 11.0004160226707, 1.93134577450352, 19.7144376530921,
46.2350237547082, 25.8715062086956, 22.1549641486618, 34.4112477828867,
20.4937613394223, 3.73518219692229, 15.9295990367068, 13.8490383262387,
13.3481723220855, 20.3866145291388, 0.165618346100574, 8.86991024549668,
8.5330814375361, 6.86819004205197, 5.72129192186814, 1.04512973253723,
3.77880217461655, 6.47871112880127, 1.12084852451492, 0.903754246093232,
19.0854333497858, 15.7152146349298, 12.3768753373503, 15.8790763239117,
10.2875187327705, 2.82159106304821, 4.22393981370602, 8.82452898193968,
4.8507226701533, 6.19619716749583, 8.28477594908417, 8.05201189383953,
9.7404731686272, 9.84535225459449, 1.7940554465653, 2.62276259756813,
2.74008811315788, 0.543937440677315, 0.55325167765205, 0.910457573040239,
0.451385497886567, 0.655661306732001, 6.59400178917785, 1.92570846362683,
2.62192443054515, 2.10049053655497, 2.13139299576524, 0.20799245164738,
0.324291631088576, 0.369492771993701, 1.52162438803598, 0.151864202275619,
0.420953084533189, 0.391517677365401, 0.29116200940885, 0.232440441774702,
4.21428798609281, 0.859779996836882, 1.33107018783728, 1.013155122065,
0.447286602320585, 0.165001492967355, 0.285983094976304,
0.377758692391269, 0.21556919104275, 0.314057858254493, 0.354649793637887,
0.338799824269294, 0.218027624939685, 0.914324162324944,
1.22932824654674, 0.731649603629864, 0.566393265064962, 0.247942012186621,
1.73171328618728, 0.636597714441988, 0.505393049999761, 0.491318560043637,
0.990988961717433, 0.195417142399681, 0.210412739808352,
0.476107780140271, 0.936663899397428, 0.251540964619117,
0.963667386912928, 0.504905545701818, 0.296220086916766,
0.240809811677774)), class = "data.frame", row.names = c(52L,
68L, 72L, 93L, 165L, 169L, 190L, 194L, 246L, 262L, 266L, 287L,
291L, 343L, 359L, 363L, 384L, 388L, 440L, 456L, 460L, 481L, 485L,
634L, 650L, 654L, 675L, 679L, 731L, 747L, 751L, 772L, 776L, 844L,
848L, 869L, 873L, 925L, 941L, 945L, 966L, 970L, 1022L, 1038L,
1042L, 1063L, 1067L, 1216L, 1232L, 1236L, 1313L, 1329L, 1333L,
1354L, 1358L, 1426L, 1451L, 1455L, 1507L, 1527L, 1717L, 1721L,
1742L, 1746L, 2186L, 2202L, 2206L, 2227L, 2231L, 2380L, 2396L,
2400L, 3075L, 3079L, 3294L, 3298L, 3350L, 3366L, 3370L, 3391L,
3395L, 3467L, 4223L, 4239L, 4243L, 4264L, 4268L, 4433L, 4437L,
4708L, 4805L, 4902L, 5193L, 5969L, 7909L, 8006L))
and this is the structure:
> str(df)
'data.frame': 96 obs. of 5 variables:
$ new_day : int -25 3 7 -7 3 7 -7 0 -25 3 ...
$ order : Factor w/ 14 levels "Alteromonadales",..: 8 8 8 8 8 8 8 8 11 11 ...
$ family : Factor w/ 17 levels "Burkholderiaceae",..: 13 13 13 13 12 12 12 12 15 15 ...
$ genus : Factor w/ 24 levels "Burkholderiaceae_unclassified",..: 16 16 16 16 7 7 7 7 3 3 ...
$ Abundance: num 0.758 0.728 0.422 0.186 7.366 ...
my data is about relative abundances of species over time, I removed rare species so it doesn't add up to 100 % anymore,
but that is fine, it is about 98 % per date. However, I get these weird free polygons and triangles which I recognize from incorrect grouping etc., but the group parameter did not change anything here. I also tried several position and stat arguments, which did not help. Maybe it is about the order of factors or something?
What I'm looking for is a stacked plot of the abundances of cumulated orders without empty spaces in between etc. Create proportional geom_area plot directly in ggplot2
# area plot combining species on order level
ggplot(df, aes(x = new_day, y = Abundance, fill = order)) +
geom_area(stat = "identity") +
geom_vline(aes(xintercept = 0), linetype = "dashed", size = 1.2)
I get fewer weird shapes when going to a more detailed hierarchical level (genus instead of order)
# area plot on genus level
ggplot(df, aes(x = new_day, y = Abundance, fill = genus)) +
geom_area(stat = "identity", position = "stack") +
geom_vline(aes(xintercept = 0), linetype = "dashed", size = 1.2)
but these are still more blank areas than there should be by the sum of abundances for a given time
# total abundance per day
sum(subset(df, new_day == -25)$Abundance)
[1] 98.03997
Any suggestions on how to fix this?
The problem is that you sometimes have several abundance values for one new_day, even with more detailed hierarchical levels.
This is what creates discontinuities in the area plot. You need to have only one unique value for each new_day. In my example below, I just take the first abundance value after grouping by new_day and order, but it is probably not relevant for what you want to show. (You may want to take the mean or attributes these values to other new_day points in between, whatever you need).
The remaining little gaps are caused by the missing abundance values, since as you said, it does not add up to 100%. This is not a big deal, but you can probably fix it by replacing the missing values by 0.
EDIT : Now doing the sum of abundance values as you mentioned, and removing the small remaining gaps by replacing missing values by 0.
library(tidyverse)
df %>%
# Sum abundance values, to only keep one per point
group_by(new_day, order) %>%
summarise(abundance=sum(Abundance)) %>%
ungroup() %>%
# Replace missing values by 0
spread(key=order, value=abundance) %>%
gather(key=order, value=abundance, -new_day) %>%
replace_na(list(abundance=0)) -> data
ggplot(data, aes(x = new_day, y = abundance, fill=order)) +
geom_area(stat = "identity") +
geom_vline(aes(xintercept = 0), linetype = "dashed", size = 1.2)
I have a long format dataframe of responses to a repeated question about puberty status vb_ asked approximately yearly at ages 9, 10, 11, 13, 14, 15, 16, and 17.
Each year participants were asked to rate their development from 1 to 5, with 1 being least developed and 5 being most developed.
I would like to use R's ifelse() to identify inconsistent responses i.e. those that report a stage at one year that is lower than any of the previous years.
Here is some fake example data for 20 people:
vb <- structure(list(id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 13L, 13L, 13L,
13L, 13L, 13L, 13L, 13L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L,
15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 18L, 18L,
18L, 18L, 18L, 18L, 18L, 18L, 19L, 19L, 19L, 19L, 19L, 19L, 19L,
19L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L), age = c(9L, 10L,
11L, 13L, 14L, 15L, 16L, 17L, 9L, 10L, 11L, 13L, 14L, 15L, 16L,
17L, 9L, 10L, 11L, 13L, 14L, 15L, 16L, 17L, 9L, 10L, 11L, 13L,
14L, 15L, 16L, 17L, 9L, 10L, 11L, 13L, 14L, 15L, 16L, 17L, 9L,
10L, 11L, 13L, 14L, 15L, 16L, 17L, 9L, 10L, 11L, 13L, 14L, 15L,
16L, 17L, 9L, 10L, 11L, 13L, 14L, 15L, 16L, 17L, 9L, 10L, 11L,
13L, 14L, 15L, 16L, 17L, 9L, 10L, 11L, 13L, 14L, 15L, 16L, 17L,
9L, 10L, 11L, 13L, 14L, 15L, 16L, 17L, 9L, 10L, 11L, 13L, 14L,
15L, 16L, 17L, 9L, 10L, 11L, 13L, 14L, 15L, 16L, 17L, 9L, 10L,
11L, 13L, 14L, 15L, 16L, 17L, 9L, 10L, 11L, 13L, 14L, 15L, 16L,
17L, 9L, 10L, 11L, 13L, 14L, 15L, 16L, 17L, 9L, 10L, 11L, 13L,
14L, 15L, 16L, 17L, 9L, 10L, 11L, 13L, 14L, 15L, 16L, 17L, 9L,
10L, 11L, 13L, 14L, 15L, 16L, 17L, 9L, 10L, 11L, 13L, 14L, 15L,
16L, 17L), vb_ = c(1L, 1L, 1L, 3L, 4L, 4L, 4L, 5L, 2L, 2L, 3L,
4L, 5L, 5L, 5L, 5L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 2L, 1L, 3L,
4L, 4L, 5L, 5L, 5L, 2L, 2L, 1L, 3L, 4L, 3L, 4L, 4L, 1L, 1L, 1L,
3L, 4L, 4L, 5L, 5L, 1L, 1L, 2L, 4L, 4L, 4L, 5L, 5L, 2L, 2L, 2L,
4L, 5L, 4L, 4L, 5L, 2L, 2L, 1L, 4L, 5L, 5L, 5L, 5L, 1L, 2L, 3L,
4L, 5L, 5L, 4L, 5L, 1L, 1L, 1L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
1L, 4L, 4L, 4L, 4L, 1L, 1L, 3L, 4L, 4L, 4L, 5L, 5L, 1L, 1L, 1L,
4L, 4L, 5L, 5L, 5L, 2L, 2L, 2L, 2L, 4L, 4L, 5L, 5L, 2L, 3L, 3L,
4L, 4L, 5L, 5L, 5L, 1L, 1L, 2L, 2L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
3L, 3L, 4L, 5L, 5L, 1L, 1L, 1L, 2L, 4L, 4L, 4L, 4L, 1L, 1L, 1L,
2L, 4L, 4L, 4L, 4L)), class = "data.frame", row.names = c(NA,
-160L), .Names = c("id", "age", "vb_"))
If you insist on a ifelse approach you can do:
vb <- vb[order(vb$id, vb$age), ]
vb$inconsistent <- ifelse(vb$id == lag(vb$id),
ifelse(vb$vb_ < lag(vb$vb_), "inconsistent", ""),
"")
vb$inconsistent[1] <- ""
id age vb_ inconsistent
1 1 9 1
2 1 10 1
3 1 11 1
4 1 13 3
5 1 14 4
6 1 15 4
7 1 16 4
8 1 17 5
9 2 9 2
10 2 10 2
11 2 11 3
12 2 13 4
13 2 14 5
14 2 15 5
15 2 16 5
16 2 17 5
17 3 9 2
18 3 10 3
19 3 11 3
20 3 13 3
21 3 14 4
22 3 15 4
23 3 16 4
24 3 17 5
25 4 9 2
26 4 10 1 inconsistent
27 4 11 3
...
Or one approach with dplyr is:
library(dplyr)
vb %>%
group_by(id) %>%
arrange(id, age) %>%
mutate(vb_diff = vb_ - lag(vb_)) %>%
filter(vb_diff < 0)
# A tibble: 6 x 4
# Groups: id [5]
id age vb_ vb_diff
<int> <int> <int> <int>
1 4 10 1 -1
2 5 11 1 -1
3 5 15 3 -1
4 8 15 4 -1
5 9 11 1 -1
6 10 16 4 -1
Here you go.
vb <- vb[order(vb$id, vb$age),]
vb$decreasingdevelopment <- c(0, diff(vb$vb_))<0 #difference between this score and previous <0
vb$sameperson <- c(0, diff(vb$id))==0 #is this the same participant than previous
vb$inconsistency <- vb$decreasingdevelopment&vb$sameperson #ifelse(vb$devdiff&vb$sameperson, T, F)
which(vb$inconsistency)
#[1] 26 35 38 62 67 79
Note that the use of ifelse() is possible but not necessary.
PS: for completeness of the answer, you should always use the following :
vb$inconsistency_robust <- apply(vb, 1, function(x) length(which(vb$vb_>x["vb_"]&vb$age<x["age"]&vb$id==x["id"]))>0)
#x["decreasingdevelopment"]&x["sameperson"])
all.equal(which(vb$inconsistency_robust), which(vb$inconsistency))
#> which(vb$inconsistency_robust)
#[1] 26 35 38 62 63 67 79
#> which(vb$inconsistency)
#[1] 26 35 38 62 67 79
Note how the robust method spots all the occurences of inconsistencies while my more naive ifelse() method here only compares line to line.
I have a data set with 4 columns, 2 of which are numeric, 1 is categorical and 1 is the label. The label has 13 levels (A to M). I tried to use knncat package in R to do classification, but every time I ran the code, I got the following error message:
Error in `[<-.data.frame`(`*tmp*`, factor.vars, value = c("M", "J", "K", :
replacement has 45500 rows, data has 1
The following is the code I used:
data <- read.csv('mosaic_data2.csv', header = T)
num <- dim(data)[1]
library(sampling)
set.seed(1234)
train_index <- sample(seq(1,num,1), floor(num * 0.7), replace = F)
test_index <- setdiff(seq(1,num,1), train_index)
train_data <- data[train_index,]
test_data <- data[test_index,]
library(knncat)
model <- knncat(train_data, classcol = 2)
Could anyone please take a look at the code and advise how I could eliminate this bug? Thank you very much!
The output of dput(head(data,100)) is as follows:
structure(list(latitude = c(52.7326028, 52.74287543, 52.82107841,
52.82025363, 52.81980596, 52.81721897, 52.81274172, 52.81274172,
52.8089586, 52.81424219, 52.8089586, 52.74007929, 52.77394023,
52.73659034, 52.73672518, 52.73764626, 52.73753744, 52.73659034,
52.73815233, 52.73679388, 52.73890319, 52.71697237, 52.63730282,
52.62720385, 52.63730282, 52.63543017, 52.63768035, 52.63510366,
52.6346578, 52.6346578, 52.6346578, 52.63447454, 52.63576418,
52.63447454, 52.6346578, 52.63447454, 52.69820719, 52.69603926,
52.68246919, 52.54600173, 52.54210198, 52.60628983, 52.61003275,
52.60278236, 52.60239604, 52.60348688, 52.60239604, 52.60382146,
52.60315644, 52.86047938, 52.86576353, 52.86954228, 52.81039471,
52.82094872, 52.82395073, 52.82444705, 52.88098384, 52.88469208,
52.88469208, 52.84979201, 52.84720159, 52.84831759, 52.82435938,
52.82319493, 52.82168337, 52.8230402, 52.8230402, 52.82513486,
52.82472379, 52.82756385, 52.82475438, 52.82434902, 52.82166611,
52.823712, 52.82401481, 52.82483489, 52.82103704, 52.82060763,
52.8208682, 52.82211317, 52.81868547, 52.8198332, 52.82023595,
52.81989134, 52.8196971, 52.82051066, 52.82463338, 52.82539131,
52.82580625, 52.82509199, 52.83759415, 52.83946254, 52.83946254,
52.83891871, 52.83821538, 52.84757879, 52.84663773, 52.8449371,
52.84592185, 52.84331619), longitude = c(-6.892397941, -6.915346343,
-6.922554014, -6.924997835, -6.926099967, -6.883340697, -6.897757597,
-6.897757597, -6.895500952, -6.883129556, -6.895500952, -6.703781864,
-6.680851783, -6.771845364, -6.773301282, -6.772958488, -6.77484647,
-6.771845364, -6.773422218, -6.772164896, -6.770622695, -6.784187251,
-6.901922588, -6.905109015, -6.901922588, -6.976679508, -6.973114498,
-6.974753462, -6.947990431, -6.947990431, -6.947990431, -6.976921427,
-6.958295227, -6.976921427, -6.947990431, -6.976921427, -6.902010609,
-6.915233457, -6.871160885, -6.832461149, -6.862126342, -6.943925285,
-6.93813643, -6.925128034, -6.932247524, -6.93461305, -6.932247524,
-6.934657053, -6.929283954, -6.845259603, -6.861188287, -6.866476268,
-6.940851164, -6.939203401, -6.930506188, -6.933317462, -6.929441954,
-6.922589037, -6.922589037, -6.926037258, -6.929423169, -6.917829279,
-6.938211918, -6.940658091, -6.940651748, -6.940107883, -6.940107883,
-6.938704642, -6.939084526, -6.933331264, -6.937496468, -6.937678962,
-6.940276221, -6.94018054, -6.939876475, -6.938983181, -6.934235666,
-6.93387209, -6.933134226, -6.934193569, -6.934383596, -6.933832641,
-6.937454656, -6.933818238, -6.93443811, -6.936913947, -6.920030341,
-6.920400963, -6.92215006, -6.910771124, -6.901500591, -6.899018998,
-6.899018998, -6.903007684, -6.90119821, -6.91063672, -6.909935672,
-6.90240965, -6.900066763, -6.901411136), mosaic_group = structure(c(10L,
10L, 8L, 8L, 8L, 7L, 7L, 7L, 7L, 7L, 7L, 10L, 10L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 10L, 10L, 10L, 13L, 13L, 13L, 13L,
9L, 6L, 6L, 6L, 6L, 6L, 10L, 8L, 8L, 9L, 9L, 9L, 9L, 7L, 7L,
7L, 9L, 9L, 9L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 8L, 8L, 8L, 8L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 8L,
6L, 6L, 6L, 6L, 6L, 8L, 8L, 10L, 10L, 10L), .Label = c("A", "B",
"C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M"), class = "factor"),
small_code = c(1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 8L, 8L, 8L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 11L,
11L, 12L, 12L, 13L, 14L, 14L, 14L, 14L, 14L, 15L, 16L, 16L,
17L, 17L, 18L, 18L, 19L, 19L, 19L, 20L, 20L, 20L, 21L, 21L,
21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L,
22L, 22L, 22L, 22L, 23L, 23L, 23L, 23L, 23L, 23L, 24L, 24L,
24L, 25L, 26L, 26L, 26L, 26L, 26L, 27L, 27L, 28L, 28L, 28L
)), .Names = c("latitude", "longitude", "mosaic_group", "small_code"
), row.names = c(NA, 100L), class = "data.frame")
The function knncat::knncat accepts the argument classcol which is defined as:
Column with classification in it. Default: 1.
You have a data set with structure:
latitude longitude mosaic_group small_code
1 52.73260 -6.892398 J 1
2 52.74288 -6.915346 J 1
3 52.82108 -6.922554 H 2
4 52.82025 -6.924998 H 2
5 52.81981 -6.926100 H 2
6 52.81722 -6.883341 G 3
Therefore your argument should be classcol = 3 (or 4) I am assuming, but we can see that it certainly shouldn't be classcol = 2.