Running a regression of nested tibbles - r

I have a nested tibble which looks like the following:
# A tibble: 2 x 3
SCORE score1_rank score2_rank
<chr> <list> <list>
1 scr_rnk_1 <tibble [54 x 5]> <tibble [54 x 5]>
2 scr_rnk_2 <tibble [46 x 5]> <tibble [46 x 5]>
I want to construct regressions for each of the 4 tibbles. I can expand out the data by the following and run regressions individually:
sub_data1 <- nested_df$score1_rank[[1]]
sub_data2 <- nested_df$score1_rank[[2]]
#Reression 1
sub_data1 <- sub_data1[!is.na(sub_data1$Y), ]
lm(Y ~ X1 + X2, data = sub_data1)
#Regression 2
sub_data2 <- sub_data2[!is.na(sub_data2$Y), ]
lm(Y ~ X1 + X2, data = sub_data2)
However I would like to try to do this for the whole nested tibble.
i.e. I am trying to map the regression over the tibbles.
Data:
nested_df <- structure(list(SCORE = c("scr_rnk_1", "scr_rnk_2"), score1_rank = list(
structure(list(time = c("July_2013_June_2014", "July_2013_June_2014",
"July_2013_June_2014", "July_2013_June_2014", "July_2013_June_2014",
"July_2014_June_2015", "July_2014_June_2015", "July_2014_June_2015",
"July_2014_June_2015", "July_2014_June_2015", "July_2014_June_2015",
"July_2014_June_2015", "July_2016_June_2017", "July_2016_June_2017",
"July_2016_June_2017", "July_2016_June_2017", "July_2010_June_2011",
"July_2010_June_2011", "July_2010_June_2011", "July_2010_June_2011",
"July_2010_June_2011", "July_2010_June_2011", "July_2012_June_2013",
"July_2012_June_2013", "July_2012_June_2013", "July_2012_June_2013",
"July_2012_June_2013", "July_2012_June_2013", "July_2018_June_2019",
"July_2018_June_2019", "July_2018_June_2019", "July_2018_June_2019",
"July_2015_June_2016", "July_2015_June_2016", "July_2015_June_2016",
"July_2015_June_2016", "July_2015_June_2016", "July_2015_June_2016",
"July_2015_June_2016", "July_2011_June_2012", "July_2011_June_2012",
"July_2011_June_2012", "July_2011_June_2012", "July_2011_June_2012",
"July_2008_June_2009", "July_2008_June_2009", "July_2008_June_2009",
"July_2017_June_2018", "July_2017_June_2018", "July_2017_June_2018",
"July_2009_June_2010", "July_2009_June_2010", "July_2009_June_2010",
"July_2019_June_2020"), score1 = c(0.878385627705134, 0.829149886628575,
0.873633400824437, 0.873191548477804, 0.833360020840671,
0.821514348879447, 0.93893179382238, 0.902566094498171, 0.832521540654393,
0.904546026086165, 0.944312545893212, 0.90721438246816, 0.925563285777056,
0.837735581176652, 0.898314100598163, 0.881156591451732,
0.927432166201199, 0.810462622843289, 0.924966424794594,
0.54982486102469, 0.632637353015548, 0.93598101241571, 0.748712668464033,
0.887355002120062, 0.00606213355201044, 0.66570681669867,
0.809662797719473, 0.80883896141453, 0.410059100270974, 0.45097086832185,
0.855118540355703, 0.73792861592456, 0.582170697766921, 0.910913548399676,
0.909192361557635, 0.61000565934628, 0.541242004262667, 0.847840909074889,
0.838844407944549, 0.638014235742945, 0.948686837455938,
0.569343264654849, 0.942357992461572, 0.956483422999484,
0.716630105733463, 0.757677906984471, 0.840660131450953,
0.944095864840561, 0.74291963665858, 0.944596570938035, 0.916460742106468,
0.90890022256817, 0.895889262055934, 0.886515265060623),
Y = c(-0.0392143242061138, 0.00517332553863525, 0.0475661605596542,
-0.0140374358743429, -0.0235463473945856, 0.0460794232785702,
0.0647838711738586, -0.0257589742541313, 0.0539961569011211,
-0.170428335666656, 0.0925306528806686, 0.11557175219059,
0.0496749319136143, -0.11405622959137, 0.0666666403412819,
-0.0189777128398418, -0.00572755141183734, 0.0277173686772585,
-0.0241545476019383, 0.0328245237469673, 0.223529428243637,
0.0253662765026092, 0.0394621938467026, 0.0815821811556816,
0.0597507022321224, -0.0132956989109516, 0.0609685145318508,
0.0393742695450783, -0.00168346334248781, -0.000859459512867033,
0.0345749147236347, NA, 0.0327170714735985, 0.144188165664673,
0.0415891073644161, 0.0028026478830725, -0.0840985849499702,
0.00914959330111742, 0.0197730101644993, -0.0929021015763283,
0.0382972247898579, NA, 0.015947800129652, 0.0136986169964075,
-0.139593943953514, 0.113736107945442, 0.0216289088129997,
-0.209788918495178, 0.00545153254643083, 0.126438871026039,
0.0538020096719265, 0.0774460881948471, 0.0651820451021194,
NA), X1 = c(0.14, 5.52, 0.14, -3.29, 1.82, -1.17, 1.93,
2.7, -1.44, -1.74, 5.91, -2.05, 2.72, 1.86, 2.28, 1.39,
3.49, 4.47, -1.52, 4.47, 9.85, -0.68, -2.52, 5.46, -0.43,
-0.43, 2.3, 0.56, -8.19, 0.87, 2.53, NA, 7.32, 6.92,
6.92, -6.18, -3.91, -6.32, 0.45, -8.88, -0.44, NA, -0.44,
-1.11, -8.54, 7.28, -6.53, 1.93, 1.93, 1.93, 6.24, 8.62,
6.24, NA), X2 = c(-0.5, 2.22, -0.5, 2.93, -0.17, 1.42,
-0.53, 0.78, 1.67, -0.05, -0.39, -1.08, 0.46, 0.37, -0.62,
0.17, 0.18, -0.69, -0.42, -0.69, 1.48, 1.32, 0.21, 0.17,
-0.76, -0.76, 1.19, -0.66, -2.51, -0.38, -2.56, NA, -2.36,
1.33, 1.33, 1.16, -0.25, -2.16, 0.04, -0.53, -0.46, NA,
-0.46, 0.23, 2.23, -1.27, -0.57, -0.61, -0.61, -0.61,
-0.19, -1.37, -0.19, NA)), class = c("tbl_df", "tbl",
"data.frame"), row.names = c(NA, -54L)), structure(list(time = c("July_2013_June_2014",
"July_2013_June_2014", "July_2013_June_2014", "July_2013_June_2014",
"July_2013_June_2014", "July_2014_June_2015", "July_2014_June_2015",
"July_2014_June_2015", "July_2014_June_2015", "July_2014_June_2015",
"July_2014_June_2015", "July_2016_June_2017", "July_2016_June_2017",
"July_2016_June_2017", "July_2010_June_2011", "July_2010_June_2011",
"July_2010_June_2011", "July_2010_June_2011", "July_2010_June_2011",
"July_2012_June_2013", "July_2012_June_2013", "July_2012_June_2013",
"July_2012_June_2013", "July_2012_June_2013", "July_2018_June_2019",
"July_2018_June_2019", "July_2018_June_2019", "July_2015_June_2016",
"July_2015_June_2016", "July_2015_June_2016", "July_2015_June_2016",
"July_2015_June_2016", "July_2015_June_2016", "July_2015_June_2016",
"July_2011_June_2012", "July_2011_June_2012", "July_2011_June_2012",
"July_2011_June_2012", "July_2008_June_2009", "July_2008_June_2009",
"July_2008_June_2009", "July_2017_June_2018", "July_2017_June_2018",
"July_2017_June_2018", "July_2009_June_2010", "July_2009_June_2010"
), score1 = c(0.910630243821458, 0.887211746784698, 0.920092482844549,
0.94450683954903, 0.886972163304589, 0.991052738161695, 0.981619567238222,
0.977490375052585, 0.961036277360393, 0.985523653404714,
0.948091565971217, 0.959812930740014, 0.936269500157121,
0.948541666157695, 0.939675946745415, 0.995146212267317,
0.944554298851532, 0.982930629437269, 0.963858517802992,
0.92872841572452, 0.968099127001545, 0.945198156814004, 0.892947157198215,
0.906930889247629, 0.957790348580216, 0.928122479697648,
0.953267485671018, 0.963714595673124, 0.976914001156382,
0.973623547932495, 0.962870831719229, 0.978333062077069,
0.958765402277667, 0.959032891808224, 0.972965648015492,
0.982760065777063, 0.957170836537733, 0.961880715763936,
0.975885654717621, 0.924673632533321, 0.925318007280836,
0.987246011368269, 0.98249943727474, 0.980272445641619, 0.978206000922261,
0.929807352926533), Y = c(0.0737265646457672, 0.0278251487761736,
0.201131358742714, 0.125700861215591, 0.0777644738554955,
-0.0130416098982096, -0.0990565568208694, 0.0333333089947701,
-0.031569954007864, 0.0422280319035053, -0.0111790159717202,
-0.278726726770401, -0.139534845948219, -0.0800638571381569,
0.23757965862751, -0.0746169164776802, 0.0465963147580624,
0.0337920561432838, -0.0111621227115393, -0.0133928591385484,
0.0778210312128067, -0.0821536555886269, 0.00643268134444952,
NA, 0.152694001793861, 0.0409262739121914, 0.0360006913542747,
-0.0233012177050114, -0.211209982633591, -0.11425743252039,
-0.169167995452881, 0.0282719731330872, 0.161968618631363,
-0.0525752492249012, 0.0127659253776074, -0.0466842725872993,
-0.115001328289509, -0.00946897640824318, 0.114568591117859,
0.2675521671772, -0.0196253582835197, 0.123595483601093,
NA, 0.12380950897932, -0.0350765138864517, -0.16666667163372
), X1 = c(2.01, 0.14, 5.06, 5.52, 1.82, 2.7, -3.09, 1.65,
0.5, 1.93, -1.17, 2.25, 1.86, -1.88, 9.85, -3.9, 3.94, 7.6,
4.47, -2.52, 1.32, 2.78, 0.09, NA, 0.88, 2.53, 2.53, 7.32,
1.13, -6.18, -6.32, -0.3, 7.32, -6.18, 4.93, -1.11, -9.2,
-7.52, 11.42, 9.96, -0.26, 1.93, NA, 0.49, 8.62, 0.49), X2 = c(2.18,
-0.5, -1.03, 2.22, -0.17, 0.78, -2.72, -2.19, 1.22, -0.53,
1.42, -0.51, 0.37, -1.55, 1.48, -0.22, -0.02, 2.08, -0.69,
0.21, -1.2, -0.32, 0.35, NA, -0.57, -2.56, -2.56, -2.36,
-3.09, 1.16, -2.16, 1.75, -2.36, 1.16, -0.77, 0.23, -1.33,
-0.63, 1.64, 1.63, 2.85, -0.61, NA, 1.88, -1.37, 3.81)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -46L))), score2_rank = list(
structure(list(time = c("July_2013_June_2014", "July_2013_June_2014",
"July_2013_June_2014", "July_2013_June_2014", "July_2013_June_2014",
"July_2014_June_2015", "July_2014_June_2015", "July_2014_June_2015",
"July_2014_June_2015", "July_2014_June_2015", "July_2014_June_2015",
"July_2014_June_2015", "July_2016_June_2017", "July_2016_June_2017",
"July_2016_June_2017", "July_2016_June_2017", "July_2010_June_2011",
"July_2010_June_2011", "July_2010_June_2011", "July_2010_June_2011",
"July_2010_June_2011", "July_2010_June_2011", "July_2012_June_2013",
"July_2012_June_2013", "July_2012_June_2013", "July_2012_June_2013",
"July_2012_June_2013", "July_2012_June_2013", "July_2018_June_2019",
"July_2018_June_2019", "July_2018_June_2019", "July_2018_June_2019",
"July_2015_June_2016", "July_2015_June_2016", "July_2015_June_2016",
"July_2015_June_2016", "July_2015_June_2016", "July_2015_June_2016",
"July_2015_June_2016", "July_2011_June_2012", "July_2011_June_2012",
"July_2011_June_2012", "July_2011_June_2012", "July_2011_June_2012",
"July_2008_June_2009", "July_2008_June_2009", "July_2008_June_2009",
"July_2017_June_2018", "July_2017_June_2018", "July_2017_June_2018",
"July_2009_June_2010", "July_2009_June_2010", "July_2009_June_2010",
"July_2019_June_2020"), score2 = c(0.573384803196917, 0.95560973004494,
0.936151601862601, 0.940067094946625, 0.790149367637373,
0.885023225824309, 0.956490411723667, 0.918534374861312,
0.9660240615445, 0.961407533200788, 0.794743982673356, 0.926614681101157,
0.924390324452674, 0.838697174839086, 0.548480558835933,
0.928419789574611, 0.942229561212187, 0.808215644539813,
0.89946853678008, 0.931010276978734, 0.780385177969094, 0.945728847589739,
0.958939314931932, 0.101395325662518, 0.0547541695358364,
0.757995973046388, 0.815555744982054, 0.947726570770333,
0.589921893700343, 0.924114006154793, 0.164071857964122,
0.946752193254218, 0.801515206601873, 0.709037475517904,
0.730962189352849, 0.872901083488831, 0.958819700206169,
0.951829945538551, 0.924000702901887, 0.963439907199707,
0.94482417669742, 0.817381450384857, 0.977233364779766, 0.881676744287434,
0.820839678297149, 0.449214983785051, 0.536396658733052,
0.756705578897905, 0.904306523171427, 0.947974271863387,
0.947487349720247, 0.95821125132286, 0.890792036806817, 0.983129670844182
), Y = c(-0.0392143242061138, 0.0475661605596542, 0.0278251487761736,
-0.0235463473945856, 0.0777644738554955, 0.0333333089947701,
0.0460794232785702, 0.0647838711738586, -0.0257589742541313,
-0.170428335666656, 0.0925306528806686, 0.11557175219059,
-0.278726726770401, -0.139534845948219, -0.11405622959137,
0.0666666403412819, -0.00572755141183734, 0.0277173686772585,
0.23757965862751, -0.0241545476019383, 0.0465963147580624,
0.0253662765026092, 0.0394621938467026, 0.00643268134444952,
0.0597507022321224, -0.0132956989109516, 0.0609685145318508,
0.0393742695450783, -0.00168346334248781, 0.0345749147236347,
NA, 0.0360006913542747, 0.0327170714735985, -0.0233012177050114,
0.0028026478830725, -0.0840985849499702, 0.161968618631363,
0.00914959330111742, 0.0197730101644993, -0.0466842725872993,
-0.0929021015763283, 0.0382972247898579, 0.015947800129652,
0.0136986169964075, -0.139593943953514, 0.113736107945442,
0.0216289088129997, -0.209788918495178, 0.00545153254643083,
0.12380950897932, 0.0538020096719265, 0.0774460881948471,
-0.16666667163372, NA), X1 = c(0.14, 0.14, 0.14, 1.82, 1.82,
1.65, -1.17, 1.93, 2.7, -1.74, 5.91, -2.05, 2.25, 1.86, 1.86,
2.28, 3.49, 4.47, 9.85, -1.52, 3.94, -0.68, -2.52, 0.09,
-0.43, -0.43, 2.3, 0.56, -8.19, 2.53, NA, 2.53, 7.32, 7.32,
-6.18, -3.91, 7.32, -6.32, 0.45, -1.11, -8.88, -0.44, -0.44,
-1.11, -8.54, 7.28, -6.53, 1.93, 1.93, 0.49, 6.24, 8.62,
0.49, NA), X2 = c(-0.5, -0.5, -0.5, -0.17, -0.17, -2.19,
1.42, -0.53, 0.78, -0.05, -0.39, -1.08, -0.51, 0.37, 0.37,
-0.62, 0.18, -0.69, 1.48, -0.42, -0.02, 1.32, 0.21, 0.35,
-0.76, -0.76, 1.19, -0.66, -2.51, -2.56, NA, -2.56, -2.36,
-2.36, 1.16, -0.25, -2.36, -2.16, 0.04, 0.23, -0.53, -0.46,
-0.46, 0.23, 2.23, -1.27, -0.57, -0.61, -0.61, 1.88, -0.19,
-1.37, 3.81, NA)), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -54L)), structure(list(time = c("July_2013_June_2014",
"July_2013_June_2014", "July_2013_June_2014", "July_2013_June_2014",
"July_2013_June_2014", "July_2014_June_2015", "July_2014_June_2015",
"July_2014_June_2015", "July_2014_June_2015", "July_2014_June_2015",
"July_2014_June_2015", "July_2016_June_2017", "July_2016_June_2017",
"July_2016_June_2017", "July_2010_June_2011", "July_2010_June_2011",
"July_2010_June_2011", "July_2010_June_2011", "July_2010_June_2011",
"July_2012_June_2013", "July_2012_June_2013", "July_2012_June_2013",
"July_2012_June_2013", "July_2012_June_2013", "July_2018_June_2019",
"July_2018_June_2019", "July_2018_June_2019", "July_2015_June_2016",
"July_2015_June_2016", "July_2015_June_2016", "July_2015_June_2016",
"July_2015_June_2016", "July_2015_June_2016", "July_2015_June_2016",
"July_2011_June_2012", "July_2011_June_2012", "July_2011_June_2012",
"July_2011_June_2012", "July_2008_June_2009", "July_2008_June_2009",
"July_2008_June_2009", "July_2017_June_2018", "July_2017_June_2018",
"July_2017_June_2018", "July_2009_June_2010", "July_2009_June_2010"
), score2 = c(0.977777238266838, 0.994161535248162, 0.973746623206586,
0.959737686390477, 0.960771840809366, 0.973573416279972,
0.971473417619078, 0.994362749200424, 0.998832204612857,
0.969953961861552, 0.974595202023975, 0.990460167618893,
0.977938934839813, 0.933720130788891, 0.997555980989323,
0.983534940461115, 0.961638641355128, 0.98302503175898, 0.955924205281728,
0.960588460795172, 0.980272014323638, 0.99319344527155, 0.990396166187007,
0.96928405964874, 0.958824291095735, 0.94735915935544, 0.956799713877734,
0.974313477760366, 0.959422857050319, 0.970981339110875,
0.986720965210939, 0.988119219123952, 0.987757971968369,
0.998331238333002, 0.985606980938901, 0.996309951852897,
0.978123949182993, 0.980322946112709, 0.870995840583191,
0.99620925825849, 0.952471805464684, 0.967521340577839, 0.997358168481063,
0.954089152398106, 0.99961257213601, 0.971649355774121),
Y = c(0.00517332553863525, 0.0737265646457672, 0.201131358742714,
-0.0140374358743429, 0.125700861215591, -0.0130416098982096,
-0.0990565568208694, 0.0539961569011211, -0.031569954007864,
0.0422280319035053, -0.0111790159717202, 0.0496749319136143,
-0.0189777128398418, -0.0800638571381569, -0.0746169164776802,
0.0328245237469673, 0.223529428243637, 0.0337920561432838,
-0.0111621227115393, -0.0133928591385484, 0.0815821811556816,
0.0778210312128067, -0.0821536555886269, NA, -0.000859459512867033,
0.152694001793861, 0.0409262739121914, -0.211209982633591,
0.144188165664673, 0.0415891073644161, -0.11425743252039,
-0.169167995452881, 0.0282719731330872, -0.0525752492249012,
0.0127659253776074, -0.115001328289509, -0.00946897640824318,
NA, 0.114568591117859, 0.2675521671772, -0.0196253582835197,
0.123595483601093, NA, 0.126438871026039, -0.0350765138864517,
0.0651820451021194), X1 = c(5.52, 2.01, 5.06, -3.29,
5.52, 2.7, -3.09, -1.44, 0.5, 1.93, -1.17, 2.72, 1.39,
-1.88, -3.9, 4.47, 9.85, 7.6, 4.47, -2.52, 5.46, 1.32,
2.78, NA, 0.87, 0.88, 2.53, 1.13, 6.92, 6.92, -6.18,
-6.32, -0.3, -6.18, 4.93, -9.2, -7.52, NA, 11.42, 9.96,
-0.26, 1.93, NA, 1.93, 8.62, 6.24), X2 = c(2.22, 2.18,
-1.03, 2.93, 2.22, 0.78, -2.72, 1.67, 1.22, -0.53, 1.42,
0.46, 0.17, -1.55, -0.22, -0.69, 1.48, 2.08, -0.69, 0.21,
0.17, -1.2, -0.32, NA, -0.38, -0.57, -2.56, -3.09, 1.33,
1.33, 1.16, -2.16, 1.75, 1.16, -0.77, -1.33, -0.63, NA,
1.64, 1.63, 2.85, -0.61, NA, -0.61, -1.37, -0.19)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -46L)))), row.names = c(NA,
-2L), class = c("tbl_df", "tbl", "data.frame"))

2 nested lapplys is what I would use to do it across the whole tibble:
#iterate across score1 and score2
lapply(df[-1], function(x) {
#iterate within score1 and then score2 to run the regressions
lapply(x, function(y) {
sub_data1 <- y[!is.na(y$Y), ]
lm(Y ~ X1 + X2, data = sub_data1)
})
})
Output (4 regressions):
# $score1_rank
# $score1_rank[[1]]
#
# Call:
# lm(formula = Y ~ X1 + X2, data = sub_data1)
#
# Coefficients:
# (Intercept) X1 X2
# 0.010491 0.008486 -0.002082
#
#
# $score1_rank[[2]]
#
# Call:
# lm(formula = Y ~ X1 + X2, data = sub_data1)
#
# Coefficients:
# (Intercept) X1 X2
# -0.013118 0.013098 0.008622
#
#
#
# $score2_rank
# $score2_rank[[1]]
#
# Call:
# lm(formula = Y ~ X1 + X2, data = sub_data1)
#
# Coefficients:
# (Intercept) X1 X2
# -0.003704 0.007486 -0.009675
#
#
# $score2_rank[[2]]
#
# Call:
# lm(formula = Y ~ X1 + X2, data = sub_data1)
#
# Coefficients:
# (Intercept) X1 X2
# -0.002017 0.012093 0.014742

Another option would be to use the tidy model approach using tidyverse and broom.
library(tidyverse)
library(broom)
nested_df %>%
gather(key, data, -SCORE) %>%
mutate(tidymod = map(data, ~lm(Y ~ X1 + X2, data = .) %>% tidy)) %>%
unnest(tidymod)
# A tibble: 12 x 7
SCORE key term estimate std.error statistic p.value
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 scr_rnk_1 score1_rank (Intercept) 0.0105 0.00962 1.09 0.281
2 scr_rnk_1 score1_rank X1 0.00849 0.00212 4.00 0.000219
3 scr_rnk_1 score1_rank X2 -0.00208 0.00808 -0.258 0.798
4 scr_rnk_2 score1_rank (Intercept) -0.0131 0.0155 -0.848 0.402
5 scr_rnk_2 score1_rank X1 0.0131 0.00320 4.10 0.000192
6 scr_rnk_2 score1_rank X2 0.00862 0.00894 0.965 0.340
7 scr_rnk_1 score2_rank (Intercept) -0.00370 0.0125 -0.296 0.769
8 scr_rnk_1 score2_rank X1 0.00749 0.00291 2.57 0.0132
9 scr_rnk_1 score2_rank X2 -0.00968 0.00961 -1.01 0.319
10 scr_rnk_2 score2_rank (Intercept) -0.00202 0.0121 -0.166 0.869
11 scr_rnk_2 score2_rank X1 0.0121 0.00242 4.99 0.0000121
12 scr_rnk_2 score2_rank X2 0.0147 0.00774 1.91 0.0640

Related

How i can plot a 3d histogram in R?

I have a table with numbers and can plot a 3d histogram in excel.
Here is my histogram in excel:
How can i do the same in R with plot3d?
In their example they are use 3 digits for x, y, z.
Here their dataset and histogram in R:
But i have only one digit for one bar
My table:
-2.88 -1.76 -0.41 -2.25 -0.83 -0.62 -1.25 -2.68 -2.41 -1.74 -2.51 -0.78 -1.97 -2.67 -1.41 -1.56 0.49 -1.54 -1.37 -1.47 -2.32 0.66
-2.39 -1.98 -0.65 -2.33 -1.98 -1.19 -2.44 -2.13 -2.16 -2.44 -2.20 -1.77 -0.60 -0.73 -0.77 -1.59 -1.01 -1.37 -1.68 -0.92 -1.28 -0.12
-1.99 -2.48 -0.43 -1.75 -1.81 -2.37 -1.08 -1.18 -0.80 -3.30 -2.04 -1.96 -0.65 -2.44 -0.83 -1.67 -0.48 -1.03 -1.76 0.04 -1.30 -0.71
-2.73 -2.22 -0.98 -1.24 -2.21 -1.29 -1.37 -0.89 -0.86 -2.22 -1.32 -2.13 -1.04 -1.12 -0.60 -1.58 0.20 0.01 -1.81 -0.17 -0.38 -1.74
-1.63 -1.29 -1.31 -1.94 -2.39 -1.20 -1.66 -0.14 -0.96 -1.10 -0.40 -1.29 -0.44 -0.26 0.01 -2.71 -0.55 0.17 -3.44 -0.95 0.75 -1.08
-0.95 -0.15 -1.13 -1.18 -1.74 0.09 -1.12 -0.37 -0.80 -0.44 -1.18 -1.53 -1.28 0.36 -0.56 -1.54 -0.58 0.71 -1.53 -0.57 -0.91 -1.29
-0.67 0.02 -1.82 -0.84 -2.11 -0.38 -1.12 -0.57 -0.81 -1.04 -1.22 -0.93 -1.29 -0.26 0.02 -0.76 -0.28 -0.24 -0.43 -0.37 -1.30 -1.61
-3.45 -2.79 -0.44 -2.25 -0.81 -1.00 -1.20 -2.90 -1.96 -2.79 -2.91 -0.58 -1.65 -3.10 -1.23 -2.20 -0.15 -1.60 -1.51 -0.97 -2.35 0.38
-3.03 -3.12 -0.62 -2.01 -2.25 -1.84 -2.29 -2.51 -1.86 -2.93 -2.32 -1.63 -0.35 -1.05 -1.09 -2.04 -0.79 -1.18 -2.39 -0.54 -0.60 -0.71
-2.78 -2.60 -0.49 -1.69 -1.96 -2.10 -1.70 -1.26 -0.37 -2.80 -2.40 -2.23 -0.61 -2.26 -0.80 -2.11 -0.17 -0.21 -2.61 -0.09 -1.18 -1.26
-3.13 -1.96 -1.19 -1.17 -2.76 -0.87 -1.96 -0.22 -0.49 -2.75 -1.81 -2.48 -1.26 -1.04 0.08 -2.52 0.21 0.80 -2.28 -0.14 -0.27 -1.69
-1.52 -1.85 -1.36 -1.42 -2.28 -0.49 -1.58 -0.34 -1.11 -0.59 -0.74 -1.63 -0.58 -0.23 0.12 -2.97 0.17 0.68 -3.14 -0.64 0.21 -1.70
-1.05 -0.42 -1.50 -1.46 -2.32 -0.57 -0.63 -0.17 -0.79 -0.92 -1.52 -1.69 -1.25 0.34 -0.46 -1.94 0.27 0.82 -1.48 0.35 -1.25 -1.89
-1.03 0.28 -1.39 -0.82 -2.44 -0.75 -0.86 -0.69 -1.07 -1.38 -1.46 -1.09 -1.71 -0.50 0.59 -1.42 -0.54 -0.13 -0.86 -0.14 -1.28 -1.84
UPD:
I tried to insert a full dataset to one of examples. Just want to see how plot3 handle with a huge amount of bars. Its pretty stucking.
And i dont see a negative bars. I assume that positive bar will apperars upper 0 and negative bottom, like on my first picture.
So, i realize that firstly i need to render a big amount of data to be able to choose a right library.
Also i assume, that full realtime 3d rendering maybe impossible for that amount of data. So it will be normal if library will render just a 1 picture like a hist3d does.
m <- structure(c(-2.88, -1.76, -0.41, -2.25, -0.83, -0.62, -1.25, -2.68, -2.41, -1.74, -2.51, -0.78, -1.97, -2.67, -1.41, -1.56, 0.49, -1.54, -1.37, -1.47, -2.32, 0.66,
-2.39, -1.98, -0.65, -2.33, -1.98, -1.19, -2.44, -2.13, -2.16, -2.44, -2.20, -1.77, -0.60, -0.73, -0.77, -1.59, -1.01, -1.37, -1.68, -0.92, -1.28, -0.12,
-1.99, -2.48, -0.43, -1.75, -1.81, -2.37, -1.08, -1.18, -0.80, -3.30, -2.04, -1.96, -0.65, -2.44, -0.83, -1.67, -0.48, -1.03, -1.76, 0.04, -1.30, -0.71,
<=-=-=-=-=-=-=-=-=-=-=-skipped ==============>>
-2.64, -0.89, -1.60, -2.28, -3.56, -0.84, 0.31, 0.48, -0.31, 0.03, -2.42, 0.92, -3.10, -2.35, 0.03, -2.56, -0.91, 1.01, -5.90, -0.40, 2.95, -1.32,
-3.06, -0.69, -0.74, -2.46, -4.16, 0.46, 0.97, 0.46, -0.47, -0.79, -3.12, 1.09, -3.53, -1.08, -0.25, -1.26, -0.57, 0.67, -4.76, 0.01, -0.08, -1.56,
-2.70, -0.89, -0.97, -2.40, -5.45, -1.26, 1.65, 0.24, -1.60, -1.79, -2.05, 0.18, -3.01, -0.39, 0.47, -2.21, -0.50, 0.77, -3.05, 0.81, -0.36, -1.98), .Dim = c(700L, 22L))
library(graph3d)
dat <- cbind(
expand.grid(x = 1:700, y = 1:22),
z = c(m)
)
graph3d(
dat,
~x, ~y, ~z,
type = "bar"
)
Help me please to plot a histogram from a full txt file with positive up bars and negative down.
My full txt file is here https://pastebin.com/2zyyRDy8
I've read my txt file to res_cut, but i see data structure different from your examples, in my there 700 objs of 23 variable
res_cut <- read.delim("d:/result_cut.txt",sep = "\t", header = FALSE)
With the graph3d package:
m <- structure(c(-2.88, -2.39, -1.99, -2.73, -1.63, -0.95, -0.67,
-3.45, -3.03, -2.78, -3.13, -1.52, -1.05, -1.03, -1.76, -1.98,
-2.48, -2.22, -1.29, -0.15, 0.02, -2.79, -3.12, -2.6, -1.96,
-1.85, -0.42, 0.28, -0.41, -0.65, -0.43, -0.98, -1.31, -1.13,
-1.82, -0.44, -0.62, -0.49, -1.19, -1.36, -1.5, -1.39, -2.25,
-2.33, -1.75, -1.24, -1.94, -1.18, -0.84, -2.25, -2.01, -1.69,
-1.17, -1.42, -1.46, -0.82, -0.83, -1.98, -1.81, -2.21, -2.39,
-1.74, -2.11, -0.81, -2.25, -1.96, -2.76, -2.28, -2.32, -2.44,
-0.62, -1.19, -2.37, -1.29, -1.2, 0.09, -0.38, -1, -1.84, -2.1,
-0.87, -0.49, -0.57, -0.75, -1.25, -2.44, -1.08, -1.37, -1.66,
-1.12, -1.12, -1.2, -2.29, -1.7, -1.96, -1.58, -0.63, -0.86,
-2.68, -2.13, -1.18, -0.89, -0.14, -0.37, -0.57, -2.9, -2.51,
-1.26, -0.22, -0.34, -0.17, -0.69, -2.41, -2.16, -0.8, -0.86,
-0.96, -0.8, -0.81, -1.96, -1.86, -0.37, -0.49, -1.11, -0.79,
-1.07, -1.74, -2.44, -3.3, -2.22, -1.1, -0.44, -1.04, -2.79,
-2.93, -2.8, -2.75, -0.59, -0.92, -1.38, -2.51, -2.2, -2.04,
-1.32, -0.4, -1.18, -1.22, -2.91, -2.32, -2.4, -1.81, -0.74,
-1.52, -1.46, -0.78, -1.77, -1.96, -2.13, -1.29, -1.53, -0.93,
-0.58, -1.63, -2.23, -2.48, -1.63, -1.69, -1.09, -1.97, -0.6,
-0.65, -1.04, -0.44, -1.28, -1.29, -1.65, -0.35, -0.61, -1.26,
-0.58, -1.25, -1.71, -2.67, -0.73, -2.44, -1.12, -0.26, 0.36,
-0.26, -3.1, -1.05, -2.26, -1.04, -0.23, 0.34, -0.5, -1.41, -0.77,
-0.83, -0.6, 0.01, -0.56, 0.02, -1.23, -1.09, -0.8, 0.08, 0.12,
-0.46, 0.59, -1.56, -1.59, -1.67, -1.58, -2.71, -1.54, -0.76,
-2.2, -2.04, -2.11, -2.52, -2.97, -1.94, -1.42, 0.49, -1.01,
-0.48, 0.2, -0.55, -0.58, -0.28, -0.15, -0.79, -0.17, 0.21, 0.17,
0.27, -0.54, -1.54, -1.37, -1.03, 0.01, 0.17, 0.71, -0.24, -1.6,
-1.18, -0.21, 0.8, 0.68, 0.82, -0.13, -1.37, -1.68, -1.76, -1.81,
-3.44, -1.53, -0.43, -1.51, -2.39, -2.61, -2.28, -3.14, -1.48,
-0.86, -1.47, -0.92, 0.04, -0.17, -0.95, -0.57, -0.37, -0.97,
-0.54, -0.09, -0.14, -0.64, 0.35, -0.14, -2.32, -1.28, -1.3,
-0.38, 0.75, -0.91, -1.3, -2.35, -0.6, -1.18, -0.27, 0.21, -1.25,
-1.28, 0.66, -0.12, -0.71, -1.74, -1.08, -1.29, -1.61, 0.38,
-0.71, -1.26, -1.69, -1.7, -1.89, -1.84), .Dim = c(14L, 22L))
library(graph3d)
dat <- cbind(
expand.grid(x = 1:14, y = 1:22),
z = c(m)
)
graph3d(
dat,
~x, ~y, ~z,
type = "bar"
)
You could use hist3D from plot3Dpackage with z parameter:
m <- structure(c(-2.88, -2.39, -1.99, -2.73, -1.63, -0.95, -0.67,
-3.45, -3.03, -2.78, -3.13, -1.52, -1.05, -1.03, -1.76, -1.98,
-2.48, -2.22, -1.29, -0.15, 0.02, -2.79, -3.12, -2.6, -1.96,
-1.85, -0.42, 0.28, -0.41, -0.65, -0.43, -0.98, -1.31, -1.13,
-1.82, -0.44, -0.62, -0.49, -1.19, -1.36, -1.5, -1.39, -2.25,
-2.33, -1.75, -1.24, -1.94, -1.18, -0.84, -2.25, -2.01, -1.69,
-1.17, -1.42, -1.46, -0.82, -0.83, -1.98, -1.81, -2.21, -2.39,
-1.74, -2.11, -0.81, -2.25, -1.96, -2.76, -2.28, -2.32, -2.44,
-0.62, -1.19, -2.37, -1.29, -1.2, 0.09, -0.38, -1, -1.84, -2.1,
-0.87, -0.49, -0.57, -0.75, -1.25, -2.44, -1.08, -1.37, -1.66,
-1.12, -1.12, -1.2, -2.29, -1.7, -1.96, -1.58, -0.63, -0.86,
-2.68, -2.13, -1.18, -0.89, -0.14, -0.37, -0.57, -2.9, -2.51,
-1.26, -0.22, -0.34, -0.17, -0.69, -2.41, -2.16, -0.8, -0.86,
-0.96, -0.8, -0.81, -1.96, -1.86, -0.37, -0.49, -1.11, -0.79,
-1.07, -1.74, -2.44, -3.3, -2.22, -1.1, -0.44, -1.04, -2.79,
-2.93, -2.8, -2.75, -0.59, -0.92, -1.38, -2.51, -2.2, -2.04,
-1.32, -0.4, -1.18, -1.22, -2.91, -2.32, -2.4, -1.81, -0.74,
-1.52, -1.46, -0.78, -1.77, -1.96, -2.13, -1.29, -1.53, -0.93,
-0.58, -1.63, -2.23, -2.48, -1.63, -1.69, -1.09, -1.97, -0.6,
-0.65, -1.04, -0.44, -1.28, -1.29, -1.65, -0.35, -0.61, -1.26,
-0.58, -1.25, -1.71, -2.67, -0.73, -2.44, -1.12, -0.26, 0.36,
-0.26, -3.1, -1.05, -2.26, -1.04, -0.23, 0.34, -0.5, -1.41, -0.77,
-0.83, -0.6, 0.01, -0.56, 0.02, -1.23, -1.09, -0.8, 0.08, 0.12,
-0.46, 0.59, -1.56, -1.59, -1.67, -1.58, -2.71, -1.54, -0.76,
-2.2, -2.04, -2.11, -2.52, -2.97, -1.94, -1.42, 0.49, -1.01,
-0.48, 0.2, -0.55, -0.58, -0.28, -0.15, -0.79, -0.17, 0.21, 0.17,
0.27, -0.54, -1.54, -1.37, -1.03, 0.01, 0.17, 0.71, -0.24, -1.6,
-1.18, -0.21, 0.8, 0.68, 0.82, -0.13, -1.37, -1.68, -1.76, -1.81,
-3.44, -1.53, -0.43, -1.51, -2.39, -2.61, -2.28, -3.14, -1.48,
-0.86, -1.47, -0.92, 0.04, -0.17, -0.95, -0.57, -0.37, -0.97,
-0.54, -0.09, -0.14, -0.64, 0.35, -0.14, -2.32, -1.28, -1.3,
-0.38, 0.75, -0.91, -1.3, -2.35, -0.6, -1.18, -0.27, 0.21, -1.25,
-1.28, 0.66, -0.12, -0.71, -1.74, -1.08, -1.29, -1.61, 0.38,
-0.71, -1.26, -1.69, -1.7, -1.89, -1.84), .Dim = c(14L, 22L))
plot3D::hist3D(z=m)

boxplot of all columns of a matrix with ggplot and geom_boxplot() via aes()

Have a matrix. Each column is a variable. Want to plot all columns, together, using ggplot, geom_boxplot. I want to set aes() correctly. I just know how to do it for 1 column, as below example. But how I set aes() to plot all columns together to get 6 boxplots in the same plot?
ggplot(Comparativa_Fondos_v1,aes('',SPY)) +geom_boxplot()
This is the matrix I have
> print(Comparativa_Fondos_v1[10:25,8:13])
# A tibble: 16 x 6
SPY MSCI_Word Russell_3000 `Russell 2000` IWM RHS
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0.206 0.253 0.209 0.213 NA NA
2 -0.0915 -0.129 -0.0746 -0.0302 NA NA
3 -0.119 -0.165 -0.115 0.0249 0.0197 NA
4 -0.221 -0.195 -0.215 -0.205 -0.205 NA
5 0.284 0.338 0.311 0.472 0.469 NA
6 0.108 0.152 0.120 0.183 0.182 NA
7 0.0479 0.100 0.0612 0.0455 0.0446 NA
8 0.157 0.206 0.157 0.184 0.182 NA
9 0.0539 0.0957 0.0514 -0.0157 -0.0147 0.0847
10 -0.370 -0.403 -0.373 -0.338 -0.336 -0.218
11 0.264 0.308 0.283 0.272 0.271 0.265
12 0.149 0.123 0.169 0.268 0.268 0.180
13 0.0206 -0.0502 0.0103 -0.0418 -0.0419 0.129
14 0.158 0.165 0.164 0.164 0.164 0.122
15 0.322 0.274 0.336 0.388 0.388 0.326
16 0.135 0.055 0.126 0.0489 0.0494 0.179
Try this approach, reshaping your data to long and then sketching the plot:
library(dplyr)
library(tidyr)
library(ggplot2)
#Code
df %>% pivot_longer(everything()) %>%
ggplot(aes(x=name,y=value,fill=name))+
geom_boxplot()
Output:
Some data used:
#Data
df <- structure(list(SPY = c(0.206, -0.0915, -0.119, -0.221, 0.284,
0.108, 0.0479, 0.157, 0.0539, -0.37, 0.264, 0.149, 0.0206, 0.158,
0.322, 0.135), MSCI_Word = c(0.253, -0.129, -0.165, -0.195, 0.338,
0.152, 0.1, 0.206, 0.0957, -0.403, 0.308, 0.123, -0.0502, 0.165,
0.274, 0.055), Russell_3000 = c(0.209, -0.0746, -0.115, -0.215,
0.311, 0.12, 0.0612, 0.157, 0.0514, -0.373, 0.283, 0.169, 0.0103,
0.164, 0.336, 0.126), Russell_2000 = c(0.213, -0.0302, 0.0249,
-0.205, 0.472, 0.183, 0.0455, 0.184, -0.0157, -0.338, 0.272,
0.268, -0.0418, 0.164, 0.388, 0.0489), IWM = c(NA, NA, 0.0197,
-0.205, 0.469, 0.182, 0.0446, 0.182, -0.0147, -0.336, 0.271,
0.268, -0.0419, 0.164, 0.388, 0.0494), RHS = c(NA, NA, NA, NA,
NA, NA, NA, NA, 0.0847, -0.218, 0.265, 0.18, 0.129, 0.122, 0.326,
0.179)), class = "data.frame", row.names = c(NA, -16L))
In base R, we can convert to a matrix and use boxplot
boxplot(as.matrix(df))
-output
data
df <- structure(list(SPY = c(0.206, -0.0915, -0.119, -0.221, 0.284,
0.108, 0.0479, 0.157, 0.0539, -0.37, 0.264, 0.149, 0.0206, 0.158,
0.322, 0.135), MSCI_Word = c(0.253, -0.129, -0.165, -0.195, 0.338,
0.152, 0.1, 0.206, 0.0957, -0.403, 0.308, 0.123, -0.0502, 0.165,
0.274, 0.055), Russell_3000 = c(0.209, -0.0746, -0.115, -0.215,
0.311, 0.12, 0.0612, 0.157, 0.0514, -0.373, 0.283, 0.169, 0.0103,
0.164, 0.336, 0.126), Russell_2000 = c(0.213, -0.0302, 0.0249,
-0.205, 0.472, 0.183, 0.0455, 0.184, -0.0157, -0.338, 0.272,
0.268, -0.0418, 0.164, 0.388, 0.0489), IWM = c(NA, NA, 0.0197,
-0.205, 0.469, 0.182, 0.0446, 0.182, -0.0147, -0.336, 0.271,
0.268, -0.0419, 0.164, 0.388, 0.0494), RHS = c(NA, NA, NA, NA,
NA, NA, NA, NA, 0.0847, -0.218, 0.265, 0.18, 0.129, 0.122, 0.326,
0.179)), class = "data.frame", row.names = c(NA, -16L))

Divide each column of a dataframe by one row of the dataframe

I would like to divide each column of my dataframe by the values of one row.
I tried to transform my dataframe into a matrix and to extract one row of the dataframe as a vector then divide the matrix by the vector but it did not work. Indeed, only the first row of the matrix got divided by the vector.
Here is my original dataframe.
And this is the code I tried to run :
data <- read_excel("Documents/TFB/xlsx_geochimie/solfatara_maj.xlsx")
View(data)
data.mat <- as.matrix(data[,2:20])
vector <- data[12,2:20]
data.mat/vector
We replicate the vector to make the length same and then do the division
data.mat/unlist(vector)[col(data.mat)]
# FeO Total S SO4 Total N SiO2 Al2O3 Fe2O3 MnO MgO CaO Na2O K2O
#[1,] 0.10 16.5555556 NA NA 0.8908607 0.8987269 0.1835206 0.08333333 0.03680982 0.04175365 0.04823151 0.5738562
#[2,] 0.40 125.8333333 NA NA 0.5510204 0.4456019 0.2359551 0.08333333 0.04294479 0.01878914 0.04501608 0.2588235
#[3,] 0.85 0.6111111 NA NA 1.0021295 1.0162037 0.7715356 1.08333333 0.53987730 0.69728601 1.03858521 1.0457516
#[4,] 0.15 48.0555556 NA NA 1.1027507 0.2569444 NA 0.08333333 0.01840491 0.01878914 0.04180064 0.1647059
#[5,] 0.85 NA NA NA 1.0889086 1.0271991 0.6591760 0.75000000 0.59509202 0.53862213 1.02250804 1.1228758
#[6,] NA NA NA NA 1.3426797 0.6319444 0.0411985 0.08333333 0.03067485 0.11899791 0.65594855 0.7764706
# TiO2 P2O5 LOI LOI2 Total Total 2 Fe2O3(T)
#[1,] 0.7924528 0.3928571 7.0841837 6.6963855 0.9922233 0.9894632 0.14489796
#[2,] 0.5094340 0.3214286 14.5561224 13.7710843 0.9958126 0.9936382 0.31020408
#[3,] 0.8679245 0.6428571 1.5637755 1.5228916 0.9990030 0.9970179 0.80612245
#[4,] 1.4905660 0.2857143 7.4056122 7.0024096 0.9795613 0.9769384 0.05510204
#[5,] 1.0377358 0.2500000 0.3520408 0.3783133 0.9969093 0.9960239 0.74489796
#[6,] 0.3018868 0.2500000 1.2551020 1.1879518 1.0019940 1.0000000 0.04489796
Or use sweep
sweep(data.mat, MARGIN = 2, unlist(vector), FUN = `/`)
Or using mapply with asplit
mapply(`/`, asplit(data.mat, 2), vector)
data
data_mat <- structure(c(0.2, 0.8, 1.7, 0.3, 1.7, NA, 5.96, 45.3, 0.22, 17.3,
NA, NA, NA, 6.72, NA, 4.08, 0.06, 0.16, NA, NA, NA, NA, NA, NA,
50.2, 31.05, 56.47, 62.14, 61.36, 75.66, 15.53, 7.7, 17.56, 4.44,
17.75, 10.92, 0.49, 0.63, 2.06, NA, 1.76, 0.11, 0.01, 0.01, 0.13,
0.01, 0.09, 0.01, 0.06, 0.07, 0.88, 0.03, 0.97, 0.05, 0.2, 0.09,
3.34, 0.09, 2.58, 0.57, 0.15, 0.14, 3.23, 0.13, 3.18, 2.04, 4.39,
1.98, 8, 1.26, 8.59, 5.94, 0.42, 0.27, 0.46, 0.79, 0.55, 0.16,
0.11, 0.09, 0.18, 0.08, 0.07, 0.07, 27.77, 57.06, 6.13, 29.03,
1.38, 4.92, 27.79, 57.15, 6.32, 29.06, 1.57, 4.93, 99.52, 99.88,
100.2, 98.25, 99.99, 100.5, 99.54, 99.96, 100.3, 98.28, 100.2,
100.6, 0.71, 1.52, 3.95, 0.27, 3.65, 0.22), .Dim = c(6L, 19L), .Dimnames = list(
NULL, c("FeO", "Total S", "SO4", "Total N", "SiO2", "Al2O3",
"Fe2O3", "MnO", "MgO", "CaO", "Na2O", "K2O", "TiO2", "P2O5",
"LOI", "LOI2", "Total", "Total 2", "Fe2O3(T)")))
vector <- structure(list(FeO = 2, `Total S` = 0.36, SO4 = NA_real_, `Total N` = NA_real_,
SiO2 = 56.35, Al2O3 = 17.28, Fe2O3 = 2.67, MnO = 0.12, MgO = 1.63,
CaO = 4.79, Na2O = 3.11, K2O = 7.65, TiO2 = 0.53, P2O5 = 0.28,
LOI = 3.92, LOI2 = 4.15, Total = 100.3, `Total 2` = 100.6,
`Fe2O3(T)` = 4.9), row.names = c(NA, -1L), class = c("tbl_df",
"tbl", "data.frame"))
To divide data frame, df, by the third row:
df/df[rep(3, nrow(df)), ]

How to compute/plot efficient frontiers per time period in one graph in R?

Currently we compute and sort data of stocks (X1 to X10). Historical data is stored in Excel and R for the time period 1950-1980, 1980-1999 and for 1950-1999.
The dataset:
date X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
1 1950-01-01 5.92 6.35 4.61 4.08 5.47 3.90 2.35 1.49 2.27 0.82
2 1950-02-01 2.43 2.16 2.10 1.58 -0.05 1.14 1.51 1.52 2.02 1.12
3 1950-03-01 -0.81 0.21 -1.67 -0.02 -0.79 0.18 -0.22 1.03 0.12 1.75
4 1950-04-01 5.68 6.45 5.41 5.94 6.10 5.87 3.82 3.34 3.44 3.97
5 1950-05-01 3.84 1.60 1.64 3.33 2.54 2.12 4.46 2.83 3.82 4.75
6 1950-06-01 -9.88 -10.56 -8.02 -7.86 -7.27 -7.44 -7.13 -7.76 -6.32 -5.04
7 1950-07-01 9.09 8.76 7.31 5.88 3.84 4.61 3.09 3.07 1.41 0.42
598 1999-10-01 -0.95 -1.88 -1.25 -0.52 1.65 0.72 5.41 4.38 5.58 6.59
599 1999-11-01 11.57 9.15 8.17 7.14 6.15 4.95 5.78 4.21 1.55 2.15
600 1999-12-01 12.32 14.97 9.29 11.77 11.09 5.89 11.88 11.26 6.23 5.64
The main question is, we would like to compute/plot efficient frontiers for these 4 time periods to see how the efficient frontier has evolved in 1 graph. Are there ways to do this in R?
The efficient frontier is the set of optimal portfolios that offers the highest expected return for a defined level of risk or the lowest risk for a given level of expected return.
In modern portfolio theory, the efficient frontier (or portfolio frontier) is an investment portfolio which occupies the 'efficient' parts of the risk-return spectrum. Formally, it is the set of portfolios which satisfy the condition that no other portfolio exists with a higher expected return but with the same standard deviation of return.
So, how would one go about computing this in R?
dput sample data (first 50 rows)
> dput(head(data,50))
structure(list(X__1 = structure(c(-631152000, -628473600, -626054400,
-623376000, -620784000, -618105600, -615513600, -612835200, -610156800,
-607564800, -604886400, -602294400, -599616000, -596937600, -594518400,
-591840000, -589248000, -586569600, -583977600, -581299200, -578620800,
-576028800, -573350400, -570758400, -568080000, -565401600, -562896000,
-560217600, -557625600, -554947200, -552355200, -549676800, -546998400,
-544406400, -541728000, -539136000, -536457600, -533779200, -531360000,
-528681600, -526089600, -523411200, -520819200, -518140800, -515462400,
-512870400, -510192000, -507600000, -504921600, -502243200), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), X__2 = c(5.92, 2.43, -0.81, 5.68,
3.84, -9.88, 9.09, 4.93, 3.99, -0.5, 3.09, 15.77, 8.22, 0.36,
-7.36, 3.84, -2.81, -7.12, 3.57, 6.59, 1.04, -1.41, -1.42, -0.53,
1.86, -3.25, 0.68, -4.4, 0.57, 2.5, -0.36, -0.74, -1.11, -0.58,
3.22, 0.33, 5.01, 2.75, -1.25, -2.13, 1.3, -4.42, 0.25, -5.56,
-4.09, 2.71, 2.01, -3.15, 8.48, -0.16), X__3 = c(6.35, 2.16,
0.21, 6.45, 1.6, -10.56, 8.76, 4.63, 3.52, -1.2, 3.36, 10.98,
8.41, 0.81, -4.01, 3.56, -4.27, -6.11, 4.7, 5.3, 2.73, -3.07,
-0.13, 0.6, 1.1, -2.77, 2.37, -4.5, 1.87, 3.18, 1.51, 0.43, -1.91,
-1.52, 4.91, 1.43, 3.4, 3.03, -2.25, -2, 0.34, -4.75, 2.24, -6.53,
-1.87, 1.97, 1.78, -2.96, 7.38, 0.43), X__4 = c(4.61, 2.1, -1.67,
5.41, 1.64, -8.02, 7.31, 4.56, 5.18, -0.46, 3.52, 10.78, 8.46,
0.28, -4.88, 4.26, -3.25, -6.76, 6.78, 4.99, 3.86, -2.57, 0.59,
0.16, 1.75, -2.04, 2.49, -5.29, 1.76, 2.88, 0.76, 0.67, -1.67,
-1.45, 5.69, 2.95, 3.66, 1.15, -1.58, -2.34, 0.51, -3.82, 0.72,
-6.25, -2.33, 3.1, 2.19, -2.63, 7.3, 1.82), X__5 = c(4.08, 1.58,
-0.02, 5.94, 3.33, -7.86, 5.88, 4.68, 5.99, 0.75, 2.68, 9.29,
8, 1.08, -3.13, 4.21, -3.35, -5.01, 5.77, 4.85, 2.73, -3.44,
0.27, 1.56, 1.62, -2.35, 2.93, -4.62, 2.36, 2.56, 0.86, 0.16,
-1.8, -2.04, 5.12, 2.72, 3.21, 1.21, -2.17, -1.84, 0.32, -3.63,
1.47, -5.16, -0.65, 3.33, 1.34, -1.36, 6.24, 1.19), X__6 = c(5.47,
-0.05, -0.79, 6.1, 2.54, -7.27, 3.84, 6.29, 4.46, -0.24, 2.42,
6.12, 8.63, 0.88, -3.31, 4.56, -2.14, -5.62, 5.73, 5.36, 2.44,
-1.88, 0.83, 0.65, 1.47, -1.81, 2.31, -4.48, 2.56, 2.69, 0.9,
0.34, -0.62, -1.58, 6.59, 0.86, 3.58, 1.92, -1.85, -2.79, 0.7,
-3.4, 1.26, -5.26, -1.18, 4.26, 1.35, -0.97, 6.66, 1.77), X__7 = c(3.9,
1.14, 0.18, 5.87, 2.12, -7.44, 4.61, 4.57, 6.14, -0.84, 4.22,
8.37, 7.44, 0.69, -4.26, 4.13, -2.24, -6.75, 5.81, 4.35, 1.98,
-2.87, 0.93, 0.61, 1.27, -2.18, 2.97, -4.09, 2.27, 2.96, 1.16,
-0.38, -2.37, -0.71, 5.53, 2.45, 1.3, 0.31, -0.47, -2.03, 0.14,
-3.26, 1.79, -5.5, -1.47, 4.18, 1.96, -1.35, 7.06, 1.69), X__8 = c(2.35,
1.51, -0.22, 3.82, 4.46, -7.13, 3.09, 5.01, 5.84, -1.05, 3.81,
7.54, 6.46, 0.71, -3.56, 4.42, -1.87, -4.52, 7.3, 3.66, 2.11,
-2.92, 2.25, 2.17, 1.32, -1.71, 3.17, -4.63, 2.59, 3.89, 0.49,
0.21, -1.71, -1.18, 4.95, 3.21, 1.41, 0.89, -1.02, -2.89, 0.59,
-2.67, 1.47, -4.62, -0.69, 4.07, 2.83, -1.44, 6.11, 1.58), X__9 = c(1.49,
1.52, 1.03, 3.34, 2.83, -7.76, 3.07, 3.72, 6.21, -1.66, 3.46,
6.14, 7.17, 2.13, -3.19, 4.59, -2.65, -3.5, 7.43, 3.5, 2.41,
-2.73, 1.35, 1.97, 1.72, -1.8, 4.06, -5.35, 2.57, 3.14, 1.89,
-0.86, -1.73, -0.95, 6.07, 1.73, 1.09, 0.37, -1.34, -2.48, 0.31,
-3.2, 1.34, -4.99, -0.18, 4.35, 3.03, 0.09, 5.65, 2.39), X__10 = c(2.27,
2.02, 0.12, 3.44, 3.82, -6.32, 1.41, 4.54, 5.55, -0.97, 3.8,
5.69, 5.65, 1.78, -2.6, 4.21, -1.29, -2.63, 7.15, 3.52, 1.85,
-2.32, 0.96, 2.74, 1.9, -2.6, 3.83, -4.31, 3.15, 2.76, 0.93,
-0.39, -1.86, -1.57, 7.05, 2.36, -0.33, -0.23, -0.54, -2.6, 0.61,
-2.37, 2.12, -3.76, 0.47, 3.98, 3.03, 0.2, 5.63, 1.26), X__11 = c(0.82,
1.12, 1.75, 3.97, 4.75, -5.04, 0.42, 4.96, 4.32, 0.25, 2.26,
4.71, 5.05, 1.63, -1.53, 5.12, -2.59, -1.92, 6.89, 4.48, -0.09,
-2.49, 0.26, 4.03, 1.37, -2.82, 4.95, -5.1, 3.4, 4.29, 0.89,
-1.06, -2.18, -0.31, 5.76, 3.32, -1.04, -0.63, -1.78, -2.97,
0.55, -1.3, 2.75, -4.47, 0.48, 4.83, 2.85, 0.27, 4.4, 1.93)), .Names = c("date",
"X1", "X2", "X3", "X4", "X5", "X6", "X7", "X8",
"X9", "X10"), row.names = c(NA, 50L), class = c("tbl_df",
"tbl", "data.frame"))
After a few correpondence via the comments with #Jonathan, I widened the example data from 3 columns to 12 columns with some sampling. And the code at the "With short-selling" section at the blog scales well for 10K observations:
# using code at:
# https://www.r-bloggers.com/a-gentle-introduction-to-finance-using-r-efficient-frontier-and-capm-part-1/
# https://datashenanigan.wordpress.com/2016/05/24/a-gentle-introduction-to-finance-using-r-efficient-frontier-and-capm-part-1/
library(data.table)
calcEFParams <- function(rets)
{
retbar <- colMeans(rets, na.rm = T)
covs <- var(rets, na.rm = T) # calculates the covariance of the returns
invS <- solve(covs)
i <- matrix(1, nrow = length(retbar))
alpha <- t(i) %*% invS %*% i
beta <- t(i) %*% invS %*% retbar
gamma <- t(retbar) %*% invS %*% retbar
delta <- alpha * gamma - beta * beta
retlist <- list(alpha = as.numeric(alpha),
beta = as.numeric(beta),
gamma = as.numeric(gamma),
delta = as.numeric(delta))
return(retlist)
}
# load data
link <- "https://raw.githubusercontent.com/DavZim/Efficient_Frontier/master/data/mult_assets.csv"
df <- data.table(read.csv(link))
df2 <- df[,lapply(.SD, sample),]
df3 <- cbind(df, df2)
df4 <- df3[,lapply(.SD, sample),]
df5 <- cbind(df3, df4)
Now loading the microbenchmark package, the performance is as such:
> library(microbenchmark)
> microbenchmark(calcEFParams(df5), times = 10)
Unit: milliseconds
expr min lq mean median uq max neval
calcEFParams(df5) 2.692514 2.764053 2.795127 2.777547 2.805447 3.024349 10
It seems that David Zimmermann's code is scalable and efficient enough!

Convert object list to obtain rownames R

I am having trouble extracting rownames from an object.
When I type in rownames(object), I obtain "null", but if I type in object, I obtain the matrix of information. If it helps, when I type in class(object), it tells me that it is a list. What I am looking for is a method to obtain the row names on the side. Thanks!
>
alpha84 alpha91 alpha98 alpha105 alpha112 alpha119
YBR088C 1.08 0.27 0.04 -0.51 -0.80 -0.89
YDL003W 0.62 -0.01 -0.36 -0.04 -0.55 -0.55
YDR097C 0.64 0.18 -0.05 0.03 -0.76 -0.66
YDR507C 0.53 0.13 0.07 0.14 -0.56 -0.41
YER070W 0.73 0.20 0.00 0.11 -0.53 -0.72
YER095W 0.28 -0.05 -0.11 -0.13 -0.87 -0.90
YER111C 0.37 -0.19 -0.11 -0.54 -0.34 -0.47
YGR189C 0.81 0.12 0.15 -0.39 -0.60 -1.20
YKL045W 0.46 -0.27 -0.10 -0.23 -0.42 -1.21
YLR183C 0.96 0.14 0.28 -0.17 -0.14 -0.68
YML027W 0.50 -0.01 0.11 -0.33 -0.44 -0.94
YMR179W 0.42 0.04 -0.40 -0.47 -0.12 -0.61
YNL300W 0.79 0.33 0.54 -0.09 -0.31 -1.01
YOR074C 0.73 0.09 -0.27 -0.22 -0.62 -0.80
YPL163C 1.61 0.84 0.82 -0.09 -0.48 -0.97
YPL256C 1.10 0.56 0.18 -0.32 -0.38 -1.04
structure(list(4 = structure(list(alpha0 = c(-1.15, -1.22,
-0.72, -1.76, -1.46, -0.57, -1.21, -0.32, -0.8, -1.7, -1.72,
-1.3, -1.24, -1.14, -2.42, -1.41), alpha7 = c(-0.86, -0.74, -0.85,
-0.34, -0.76, 0.42, -0.26, -0.65, 0.01, -1.46, -0.66, 0.07, -0.78,
-0.31, -2.15, -0.69), alpha14 = c(1.21, 1.34, 0.54, 0.18, 1.08,
1.03, 1.36, 0.87, 0.86, 0.93, 1.73, 0.98, 0.31, 0.57, 0.66, 1.39
), alpha21 = c(1.62, 1.5, 1.04, 1.07, 1.5, 1.35, 1.37, 1.1, 0.84,
1.12, 1.29, 1.12, 1.46, 1.08, 1.98, 1.98), alpha28 = c(1.12,
0.63, 0.84, 0.37, 0.74, 0.64, 0.54, 1.17, 0.51, 0.91, 0.51, 0.13,
1.11, 1.17, 1.55, 0.74), alpha35 = c(0.16, 0.29, 0.24, 0.32,
0.47, 0.42, 0.18, 0.44, 0.14, 0.11, 0.28, 0.19, 0.62, 0.57, 0.78,
0.21), alpha42 = c(-0.44, -0.55, -0.64, -0.5, -0.7, -0.4, -0.85,
0.37, -0.4, 0, 0.23, -0.58, 0.07, 0.31, 0.14, -0.36), alpha49 = c(-0.93,
-0.65, -0.83, -0.25, -0.68, -0.9, -0.82, -0.93, -0.64, -0.73,
-0.55, -0.63, -0.23, -0.74, -0.94, -1.32), alpha56 = c(-1.23,
-0.76, -0.36, -0.48, -1.03, -0.73, -0.75, -1.45, -0.8, -0.9,
-0.97, -0.9, -0.58, -0.68, -1.03, -1.5), alpha63 = c(-0.62, -0.88,
-0.7, -0.25, -0.55, -0.47, 0.07, -0.57, 0.41, -0.46, -0.48, 0.09,
-1.01, -0.1, -1.5, -1.07), alpha70 = c(0.62, 0.69, 0.99, 0.79,
0.35, 0.2, 0.89, 0.15, 0.88, 0.85, 0.57, 0.54, -0.24, -0.38,
-0.03, 0.35), alpha77 = c(1.3, 1.25, 1.08, 0.97, 1.24, 0.78,
0.78, 0.92, 0.75, 0.93, 0.88, 1.44, 0.23, 0.75, 1.25, 1.57),
alpha84 = c(1.08, 0.62, 0.64, 0.53, 0.73, 0.28, 0.37, 0.81,
0.46, 0.96, 0.5, 0.42, 0.79, 0.73, 1.61, 1.1), alpha91 = c(0.27,
-0.01, 0.18, 0.13, 0.2, -0.05, -0.19, 0.12, -0.27, 0.14,
-0.01, 0.04, 0.33, 0.09, 0.84, 0.56), alpha98 = c(0.04, -0.36,
-0.05, 0.07, 0, -0.11, -0.11, 0.15, -0.1, 0.28, 0.11, -0.4,
0.54, -0.27, 0.82, 0.18), alpha105 = c(-0.51, -0.04, 0.03,
0.14, 0.11, -0.13, -0.54, -0.39, -0.23, -0.17, -0.33, -0.47,
-0.09, -0.22, -0.09, -0.32), alpha112 = c(-0.8, -0.55, -0.76,
-0.56, -0.53, -0.87, -0.34, -0.6, -0.42, -0.14, -0.44, -0.12,
-0.31, -0.62, -0.48, -0.38), alpha119 = c(-0.89, -0.55, -0.66,
-0.41, -0.72, -0.9, -0.47, -1.2, -1.21, -0.68, -0.94, -0.61,
-1.01, -0.8, -0.97, -1.04)), .Names = c("alpha0", "alpha7",
"alpha14", "alpha21", "alpha28", "alpha35", "alpha42", "alpha49",
"alpha56", "alpha63", "alpha70", "alpha77", "alpha84", "alpha91",
"alpha98", "alpha105", "alpha112", "alpha119"), row.names = c("YBR088C",
"YDL003W", "YDR097C", "YDR507C", "YER070W", "YER095W", "YER111C",
"YGR189C", "YKL045W", "YLR183C", "YML027W", "YMR179W", "YNL300W",
"YOR074C", "YPL163C", "YPL256C"), class = "data.frame")), .Names = "4")
You have a list of one element. This single element is a data.frame.
If you are after the rownames from this object, then index the list appropriately
rownames(object[[1]])
## [1] "YBR088C" "YDL003W" "YDR097C" "YDR507C" "YER070W" "YER095W" "YER111C" "YGR189C" "YKL045W" "YLR183C" "YML027W"
## [12] "YMR179W" "YNL300W" "YOR074C" "YPL163C" "YPL256C"
For a more general list of data.frames
# get rownames from all data.frames in a list
lapply(object, rownames)
If you want a data.frame, not a list containing a data.frame then you could simply assign the results from the first element to a separate element
object.df <- object[[1]]
If it is a list of data.frames, it probably is more idiomatically R to keep it in the list, and use lapply to work on each element.

Resources