for loop a non-parametric test filtered by year and save each year's result as a data frame - r

I'm trying to create and print a list of data frames that are the result of the Mann-Whitney-Wilcoxon Test.
My code currently runs the Mann-Whitney-Wilcoxon Test on all the observations and compares the two data frames, ORATIOS and KFMARATIOS.
library(tidyverse)
library(devtools)
library(inspectdf)
library(readr)
library(broom)
library(knitr)
library(readxl)
library(skimr)
library(kableExtra)
list_ratio <- grep("RATIO",colnames(ORATIOS), value=TRUE)
MWU_pvalues <- unlist(Map(function(a,b) wilcox.test(a, b)$p.value, ORATIOS[list_ratio], KFMARATIOS[list_ratio]))
MWU_pvalues <- as.data.frame(MWU_pvalues) %>%
rename(`P VALUE` = MWU_pvalues)
MWU_pvalues <- tibble::rownames_to_column(MWU_pvalues, "RATIO") %>%
mutate(`Significance` = if_else(`P VALUE` > 0.05, "",
if_else(`P VALUE` <= 0.05 & `P VALUE` >= 0.01, "\\*",
if_else(`P VALUE` <= 0.01 & `P VALUE` >= 0.001, "**", "***"))))
kable(MWU_pvalues) %>%
kable_styling()
How would I create a for loop or lapply filtering on each year, running the above test, saving each result as a dataframe into a list of dataframes? I'd like to have each dataframe for each year printed using kable in my RMarkdown file.
Sample data:
ORATIOS:
structure(list(YEAR = c(2008, 2009, 2010, 2011, 2012, 2013, 2014,
2015, 2016, 2017, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015,
2016, 2017), FARM = c("D", "D", "D", "D", "D", "D", "D", "D",
"D", "D", "I", "I", "I", "I", "I", "I", "I", "I", "I", "I"),
`CURRENT RATIO` = c(0.568022785746452, 0.329854720020037,
0.832073159580644, 0.643108790851367, 25.1454874121908, 14.5975395062397,
5.12537888750377, 5.20160770260219, 7.64257374037806, 2.1580962424325,
1.31703632160198, 0.125166573684741, 0.0680923398879462,
0.100452384108057, 0.0998706900125819, 0.0907309088049343,
0.521537398114045, 0.773433351511582, 0.174099653043861,
0.0804425861373205), `WORKING CAPITAL TO GROSS FARMING INCOME` = c(-0.132573843177753,
-0.419436996986394, -0.031444400685141, -0.114022796397208,
1.22962822585944, 0.397841184148093, 0.239623650110705, 0.295681875030473,
0.502930206605254, 0.41862926754376, 0.0513905118422565,
-0.406448322702947, -0.343476652794216, -0.366684678854441,
-0.27321810774102, -0.306827980132377, -0.173010159020099,
-0.140768598200492, -0.367184395657858, -0.888263538055031
), `DEBT TO TOTAL ASSET RATIO` = c(0.0846892634197993, 0.102127561711337,
0.0750728145035032, 0.0797349374471145, 0.0122514875519798,
0.0162967044282012, 0.0165670856047258, 0.0188732833402721,
0.0150968780472965, 0.0275252089477482, 0.1123291162633,
0.151496340475165, 0.0960615511639704, 0.0985641068765839,
0.119816717131179, 0.121164074695269, 0.0970056997272376,
0.139114211255347, 0.0686657852466466, 0.17098484263781),
`DEBT TO FARM ASSET RATIO` = c(0.0935832744841849, 0.114259598684054,
0.0824723632268821, 0.08365143337564, 0.0129689938858425,
0.0191316764222117, 0.0216751963945452, 0.0225358439285237,
0.0167830935834987, 0.030821228954403, 0.140068283663094,
0.203393535891141, 0.133942894025292, 0.137887444914688,
0.17818477721901, 0.182143899668642, 0.141540075268137, 0.212926916788055,
0.0962721755129152, 0.172706971368876), `EQUITY TO ASSET RATIO` = c(0.915310736580201,
0.897872438288663, 0.924927185496497, 0.920265062552885,
0.98774851244802, 0.983703295571799, 0.983432914395274, 0.981126716659728,
0.984903121952704, 0.972474791052252, 0.8876708837367, 0.848503659524835,
0.90393844883603, 0.901435893123416, 0.880183282868821, 0.878835925304732,
0.902994300272762, 0.860885788744653, 0.931334214753353,
0.82901515736219), `DEBT TO EQUITY RATIO` = c(0.0925251502415636,
0.113743954437438, 0.0811661887343104, 0.0866434472975902,
0.0124034482437396, 0.0165666868267717, 0.0168461776723358,
0.0192363361631072, 0.0153282873318188, 0.0283042904566863,
0.126543652970169, 0.178545300040313, 0.106270013503315,
0.109341227289126, 0.13612700838927, 0.137868823072129, 0.107426702137473,
0.161594270778014, 0.0737284040024573, 0.206250562633691),
`RETURN ON FARM ASSETS` = c(0.0170145283510924, -0.00522377886147693,
0.0237250420249203, 0.00257743472229431, 0.0213365859181817,
0.0244609737360482, 0.0279373354305636, 0.0167869242322396,
0.0572363957452595, -0.00273821783417637, 0.0325678749005671,
-0.0532931806283685, 0.024215521265722, -0.0178636730481072,
0.0189254399688753, 0.00211416100547258, -0.00938005681041073,
0.0501921695586829, 0.0215269026374393, -0.0366154070757298
), `RETURN ON ASSETS` = c(0.0566608458884666, 0.0239054711694685,
0.0264084815850861, 0.00576204495548541, 0.179667366138176,
0.0246773695339781, 0.0246552659101915, 0.020526505137709,
0.0551370549195115, -5.05665725060606e-05, 0.0449112877923212,
-0.0284073208306705, 0.0249952584312144, -0.00283565027536605,
0.0360687362998932, 0.0080927754538142, -0.00331579015236834,
0.0457634829675583, 0.0229640648122328, -0.023016837706958
), `RETURN ON EQUITY` = c(0.0168221490501512, -0.00520020437367425,
0.023349291367177, 0.00266962346623839, 0.0204061503508897,
0.0211814836515069, 0.0217131742563291, 0.0143291246913213,
0.0522749822883451, -0.002514608130223, 0.0294232052511338,
-0.0467824450944562, 0.0192125442012039, -0.0141654371518756,
0.0144583817182496, 0.00160025611694793, -0.00711931632857772,
0.0380917883044123, 0.0164860113123938, -0.0437269454184399
), `FARM OPERATING PROFIT MARGIN RATIO` = c(0.113108456739495,
-0.0455472105804567, 0.199838203998892, 0.0234275923606582,
0.158472105656006, 0.183710042172317, 0.190582976791897,
0.124927655425634, 0.45847835351018, -0.0422031337055503,
0.122121670323183, -0.243017854350921, 0.11277681710057,
-0.0790679940692684, 0.076084143213901, 0.00890894198839937,
-0.0450368591167229, 0.204577659697265, 0.13619384495868,
-0.358538500350435), `ASSET TURNOVER RATIO` = c(0.0153974936379558,
-0.00466912018059027, 0.0215963943475807, 0.00245676120615052,
0.0201561446538819, 0.0208362952730876, 0.0213534502396742,
0.0140586870610039, 0.0514857932558134, -0.00244539301601691,
0.0261181226076402, -0.0396950758641658, 0.0173669574034299,
-0.0127692334904846, 0.0127260258857395, 0.00140636256526249,
-0.00642870206654449, 0.0327926792191383, 0.0153539864000432,
-0.0362503005370359), `OPERATING EXPENSE RATIO` = c(0.671535228245263,
0.773166498456329, 0.607985458258, 0.724432447012029, 0.67336000606662,
0.64796797949329, 0.589032574693052, 0.74988495257417, 0.461775664398759,
0.862141471389961, 0.672863504023624, 0.980455882037588,
0.669661413731221, 0.86690216270866, 0.670033358895902, 0.737005445439968,
0.783494244501376, 0.649760819934915, 0.706382908455109,
1.134948535946), `DEPRECIATION EXPENSE RATIO` = c(0.12660532789432,
0.132732814909818, 0.103826844188336, 0.144629676126728,
0.140059287930065, 0.157478624539652, 0.141620283491016,
0.0919194664659044, 0.0583370508964949, 0.133579109920113,
0.150646135557582, 0.183514628711121, 0.146236932328879,
0.16125312788589, 0.191531747619893, 0.197293862401247, 0.193527787561396,
0.0913809290148264, 0.0946887014018637, 0.145522583536315
), `INTEREST EXPENSE RATIO` = c(0.0887509871209225, 0.139647897214309,
0.0883494935547731, 0.107510284500585, 0.028108600347309,
0.0108433537947408, 0.0787641650240354, 0.0332679255342914,
0.0214089311945663, 0.0464825523954769, 0.0543686900956105,
0.0790473436022124, 0.0713248368393299, 0.0509127034747178,
0.0623507502703033, 0.0567917501703862, 0.068014827053951,
0.0542805913529945, 0.0627345451843474, 0.0780673808681226
), `NET FARM INCOME RATIO` = c(0.113108456739495, -0.0455472105804567,
0.199838203998892, 0.0234275923606582, 0.158472105656006,
0.183710042172317, 0.190582976791897, 0.124927655425634,
0.45847835351018, -0.0422031337055503, 0.122121670323183,
-0.243017854350921, 0.11277681710057, -0.0790679940692684,
0.076084143213901, 0.00890894198839937, -0.0450368591167229,
0.204577659697265, 0.13619384495868, -0.358538500350435)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -20L))
KFMARATIOS:
structure(list(YEAR = c(2008, 2008, 2008, 2008, 2008, 2008, 2008,
2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008,
2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008,
2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008
), FARM = c(11407100, 11484600, 11485100, 11495100, 11801800,
11806400, 11820000, 11885400, 11886000, 11897200, 11897300, 12004500,
12004501, 12303001, 12340101, 12398300, 13050001, 13700201, 13705601,
14089100, 14110900, 14130000, 14130002, 14184100, 14192300, 14330302,
14388200, 14783200, 14786200, 15094200, 15096200, 15584200, 15586100,
15682100, 15683100, 15689100, 16507002, 16580000, 16598200, 16601300
), `CURRENT RATIO` = c(-3, 0, 4.57, 15.94, 2.22, 0, 368.69, 1.86,
9.1, 3.45, 2, 0, 1.58, 6.26, 1.97, 1.54, 0, 3.39, 313.09, 5.59,
5.4, 0, 3.6, 5.78, 3.18, 207.1, 2.36, 28.31, 3.4, 3.68, 0.37,
3.5, 5.6, 13.64, 7.05, 0, 2.23, 0.89, 4.4, 1.11), `WORKING CAPITAL TO GROSS FARMING INCOME` = c(0.783990044655886,
0.939342207539837, 0.468883358203084, 0.53708199556795, 0.429230789973027,
0.856616290636639, 0.46085746623408, 0.019246546772549, 1.04338230212655,
0.318770448161572, 0.398058372857175, 0.506978780306214, 0.263816960947357,
0.4960655740923, 0.101962576323424, 0.220623464476751, 1.12676140487953,
0.533690322762107, 0.685276501922026, 0.703540899065169, 0.660869855557338,
0.71777803486123, 0.319578323479609, 0.722736340214157, 0.286630301648443,
0.818610240507597, 0.184477489966846, 0.78148168000963, 0.357891811040315,
0.289159422203956, -0.125641128630768, 0.392321597654173, 0.561996673317676,
0.353452531903466, 0.683345718597063, 0.804567295215173, 0.307398272114796,
-0.375449779668313, 0.186702574682293, -0.55737251721071), `DEBT TO TOTAL ASSET RATIO` = c(0.02,
0.07, 0.27, 0.37, 0.36, 0, 0.07, 0.37, 0.05, 0.33, 0.42, 0.08,
0.24, 0.34, 0.36, 0.51, 0.01, 0.11, 0.1, 0.07, 0.08, 0.01, 0.32,
0.14, 0.4, 0.52, 0.39, 0.06, 0.21, 0.32, 0.43, 0.52, 0.29, 0.12,
0.17, 0.1, 0.15, 0.87, 0.12, 0.69), `DEBT TO FARM ASSET RATIO` = c(0.0210960466847519,
0.0662443993261916, 0.270051570315789, 0.373240578143398, 0.359031265562519,
0, 0.0678176279710153, 0.369000587598404, 0.04831743727994, 0.33065743433488,
0.41680939549244, 0.0851067276205844, 0.245359588845858, 0.337912727823456,
0.356607488633417, 0.508663012923272, 0.0126098421632802, 0.10665178903834,
0.105106247793806, 0.0698908293989529, 0.0818483764283224, 0.00750932570017385,
0.319501072718455, 0.136757510256717, 0.400840648545665, 0.516753083750126,
0.389587948103612, 0.0577299469460252, 0.206521419569117, 0.315261383020663,
0.43256943562472, 0.520491208048298, 0.290288373137576, 0.120229338185664,
0.173192986515349, 0.104536048245734, 0.151997186500475, 0.868552025800098,
0.123958600776313, 0.692195974317741), `EQUITY TO ASSET RATIO` = c(0.98536882817945,
0.944215770167283, 0.736537746555766, 0.729860554651407, 0.642228778874089,
1, 0.94228148558872, 0.630999412401596, 0.95168256272006, 0.66934256566512,
0.592693562701164, 0.914893272379416, 0.813956784138156, 0.688995447780108,
0.725420084109645, 0.545241148972386, 0.988536562104007, 0.900124825958172,
0.90344241855196, 0.930936390469265, 0.92060316189968, 0.992490674299826,
0.758518009863028, 0.881474617998699, 0.600468426703118, 0.553595877267449,
0.667405715763261, 0.942270053053975, 0.842757601135073, 0.708413078986436,
0.56743056437528, 0.533041296742996, 0.743304732269968, 0.88511363093375,
0.831970255984885, 0.904591907651469, 0.876296809602567, 0.131447974199902,
0.890119750534961, 0.307804025682259), `DEBT TO EQUITY RATIO` = c(0.02,
0.07, 0.37, 0.6, 0.56, 0, 0.07, 0.58, 0.05, 0.49, 0.72, 0.09,
0.32, 0.51, 0.55, 1.04, 0.01, 0.12, 0.12, 0.08, 0.09, 0.01, 0.47,
0.16, 0.67, 1.07, 0.64, 0.06, 0.26, 0.46, 0.76, 1.08, 0.41, 0.14,
0.21, 0.12, 0.18, 6.61, 0.14, 2.25), `RETURN ON FARM ASSETS` = c(0.374484329540697,
0.0498819566035984, 0.181954755022922, 0.193161758267218, 0.0473627311001023,
0.327305563029612, 0.603037930741254, -0.0156737997438482, 0.10397858597475,
0.10789191406389, 0.180771277730155, 0.150007797084, 0.174196776278552,
0.120122100767257, 0.298096858936563, 0.0517125227815447, 0.111597414809764,
0.185024421154621, 0.239979711875599, 0.0808784377916965, 0.201436668181771,
0.135024051506645, 0.251851638310215, 0.103285147847268, 0.14207589091784,
0.247675592658745, 0.100067311604358, 0.308209326567443, 0.154555623216289,
0.174464204907127, 0.00457531564104158, 0.098141499884622, 0.251116584438097,
0.153198476415449, 0.183688952743912, 0.0838032420725189, 0.169288085631256,
0.0279120898963428, 0.147329195543669, 0.034801030826966), `RETURN ON ASSETS` = c(0.260063898261748,
0.0581159003954688, 0.186586004612603, 0.144217266907855, 0.0471965084015535,
0.203276288956977, 0.522691591931166, -0.0156737997438482, 0.104160943214225,
0.110451790466256, 0.178360409188664, 0.150089138729099, 0.134029707705111,
0.120565772385725, 0.229528019076799, 0.0697390623585822, 0.10198296142804,
0.192570247620748, 0.245119340816501, 0.115758491252085, 0.195889106965538,
0.138158444053898, 0.231674956423303, 0.0966027636728098, 0.141766843553559,
0.215113054221126, 0.135495862386357, 0.314351616201071, 0.133076845003381,
0.168262801476855, 0.00457531564104158, 0.0986664889666124, 0.242490501823923,
0.152124266735103, 0.201716489655936, 0.0786665142081486, 0.162659186669921,
0.0279454048764536, 0.134992616527726, 0.034801030826966), `RETURN ON EQUITY` = c(0.263580248064511,
0.0444871419402714, 0.241012793134955, 0.191549228659637, 0.0734886226747657,
0.186089113513671, 0.544673844576945, -0.0248396423765173, 0.109257634896201,
0.161190875342999, 0.298045789765326, 0.163962072531003, 0.162274234481587,
0.160460729376603, 0.31640703656353, 0.0847926292565323, 0.102628180483108,
0.192493344561337, 0.244023637469295, 0.0858503015508329, 0.212255623707772,
0.13604566269794, 0.250952374400512, 0.101551944180348, 0.235835707060263,
0.386487527831846, 0.128000474163853, 0.327092350614891, 0.139632557156543,
0.227780755169442, 0.0080632167674627, 0.165179790324242, 0.298742298993181,
0.165391606109475, 0.214205739228479, 0.084552656304169, 0.157224605882577,
0.212343248849882, 0.146717984157146, 0.113062299136044), `FARM OPERATING PROFIT MARGIN RATIO` = c(0.55,
0.18, 0.29, 0.33, 0.12, 0.46, 0.24, -0.1, 0.14, 0.23, 0.2, 0.22,
0.44, 0.25, 0.33, 0.13, 0.36, 0.44, 0.33, 0.05, 0.32, 0.16, 0.52,
0.3, 0.24, 0.35, 0.2, 0.32, 0.38, 0.29, 0.02, 0.24, 0.36, 0.25,
0.4, 0.18, 0.32, -0.01, 0.08, -0.01), `ASSET TURNOVER RATIO` = c(0.64,
0.2, 0.55, 0.58, 0.29, 0.64, 1.88, 0.39, 0.31, 0.34, 0.72, 0.58,
0.38, 0.41, 0.96, 0.38, 0.26, 0.4, 0.62, 0.41, 0.55, 0.67, 0.53,
0.29, 0.51, 0.86, 0.38, 0.94, 0.4, 0.54, 0.65, 0.49, 0.7, 0.49,
0.41, 0.3, 0.47, 0.62, 0.87, 0.79), `OPERATING EXPENSE RATIO` = c(0.29,
0.57, 0.61, 0.52, 0.69, 0.48, 0.57, 0.89, 0.64, 0.57, 0.72, 0.62,
0.45, 0.55, 0.52, 0.69, 0.49, 0.43, 0.5, 0.75, 0.53, 0.69, 0.38,
0.54, 0.6, 0.54, 0.55, 0.56, 0.5, 0.57, 0.87, 0.61, 0.54, 0.63,
0.44, 0.61, 0.56, 0.82, 0.77, 0.83), `DEPRECIATION EXPENSE RATIO` = c(0.08,
0.16, 0.01, 0.05, 0.07, 0.02, 0.03, 0.09, 0.02, 0.06, 0.03, 0.1,
0.04, 0.08, 0.06, 0.1, 0.06, 0.05, 0.03, 0.04, 0.08, 0.09, 0.04,
0.06, 0.05, 0.01, 0.11, 0.05, 0.04, 0.06, 0.05, 0.08, 0.04, 0.03,
0.06, 0.08, 0.01, 0.1, 0.05, 0.04), `INTEREST EXPENSE RATIO` = c(0.01,
0, 0.03, 0.07, 0.08, 0, 0, 0.06, 0, 0.02, 0.04, 0.01, 0.02, 0.06,
0.03, 0.06, 0, 0, 0.03, 0.01, 0.02, 0, 0.06, 0.01, 0.05, 0, 0.07,
0, 0.04, 0.01, 0.08, 0.1, 0.04, 0.02, 0.03, 0.02, 0.04, 0.04,
0.01, 0.09), `NET FARM INCOME RATIO` = c(0.62, 0.27, 0.35, 0.36,
0.16, 0.5, 0.39, -0.04, 0.34, 0.35, 0.22, 0.28, 0.49, 0.31, 0.39,
0.15, 0.45, 0.51, 0.44, 0.2, 0.37, 0.21, 0.52, 0.39, 0.29, 0.45,
0.27, 0.39, 0.43, 0.36, 0.01, 0.21, 0.37, 0.32, 0.47, 0.28, 0.38,
0.05, 0.17, 0.05)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-40L))

My solution is kind of convuluted but I guess it is never easy to work with list columns,
nested_oratios <- ORATIOS %>%
group_by(YEAR) %>%
nest() %>%
mutate(fake_year = 2008) %>%
ungroup()
nested_kfmaratios <- KFMARATIOS %>%
group_by(YEAR) %>%
nest() %>%
mutate(fake_year = 2008) %>%
ungroup() %>%
select(-YEAR)
nested_comb <- nested_oratios %>%
left_join(nested_kfmaratios,by = c('fake_year'),suffix = c(".oratios", ".kfmaratios")) %>%
select(-fake_year)
logic_pipe <- function(a,b) {
a <- a %>% select(contains('RATIO'))
b <- b %>% select(contains('RATIO'))
MWU_pvalues <- map2(a,b,function(a,b) wilcox.test(a, b)$p.value) %>% unlist()
MWU_pvalues <- as.data.frame(MWU_pvalues) %>%
rename(`P VALUE` = MWU_pvalues)
MWU_pvalues <- tibble::rownames_to_column(MWU_pvalues, "RATIO") %>%
mutate(`Significance` = if_else(`P VALUE` > 0.05, "",
if_else(`P VALUE` <= 0.05 & `P VALUE` >= 0.01, "\\*",
if_else(`P VALUE` <= 0.01 & `P VALUE` >= 0.001, "**", "***"))))
return(MWU_pvalues %>% as_tibble())
}
nested_comb %>%
mutate(result = map2(.x = data.oratios ,.y =data.kfmaratios,logic_pipe))

Consider the apply family with mapply and by (object-oriented wrapper to tapply) that can subset your data by year and pass into a user-defined function. Note: unlist + Map can be replaced with mapply (the underlying function of Map, its wrapper). Below demonstrates with base R where transform replaces mutate and ifelse replaces if_else:
proc_df <- function(df) {
yr <- df$YEAR[1]
MWU_pvalues <- mapply(function(a,b) wilcox.test(a, b)$p.value,
subset(ORATIOS, YEAR==yr)[list_ratio], df[list_ratio])
final_df <- transform(data.frame(ratio = names(MWU_pvalues),
p_value = unname(MWU_pvalues)),
significance = ifelse(p_value > 0.05, "",
ifelse(p_value <= 0.05 & p_value >= 0.01, "*",
ifelse(p_value <= 0.01 & p_value >= 0.001, "**", "***")
)
)
)
return(final_df)
}
df_list <- by(KFMARATIOS, KFMARATIOS$YEAR, proc_df)
Output
df_list$`2008`
# ratio p_value significance
# 1 CURRENT RATIO 0.20349856
# 2 DEBT TO TOTAL ASSET RATIO 0.39154322
# 3 DEBT TO FARM ASSET RATIO 0.52264808
# 4 EQUITY TO ASSET RATIO 0.42276423
# 5 DEBT TO EQUITY RATIO 0.39162003
# 6 FARM OPERATING PROFIT MARGIN RATIO 0.11726414
# 7 ASSET TURNOVER RATIO 0.01957554 *
# 8 OPERATING EXPENSE RATIO 0.24893798
# 9 DEPRECIATION EXPENSE RATIO 0.02588258 *
# 10 INTEREST EXPENSE RATIO 0.10127823
# 11 NET FARM INCOME RATIO 0.06262773

Related

Is there a way to filter out the row that has the highest of three different columns simultaneously?

Is there a way to filter out the row that has the highest of three different columns simultaneously? I am trying to filter out the row that has the best accuracy, specificity, and sensitivity in a data frame.
Pic of first few rows of data
in the data provided the highest for all 3 should be (aka the desired output)
"thresh_info.59 0.60 83.39 83.27684 83.557047"
data<- structure(list(threshold = c(0.01, 0.02, 0.03, 0.04, 0.05, 0.06,
0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17,
0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28,
0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39,
0.4, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5,
0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61,
0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72,
0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83,
0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94,
0.95, 0.96, 0.97, 0.98, 0.99), accuracy = c(61.72, 63.67, 65.29,
66.58, 67.86, 69.01, 69.75, 70.83, 71.51, 72.79, 73.87, 74.54,
75.02, 75.29, 75.83, 76.3, 76.7, 77.25, 77.65, 77.92, 78.33,
79, 79.14, 79.07, 79.41, 79.61, 79.68, 80.28, 80.69, 80.82, 80.89,
81.16, 81.3, 81.77, 81.9, 81.97, 82.17, 82.31, 82.44, 82.65,
82.58, 82.92, 82.98, 83.59, 83.52, 83.59, 83.25, 83.46, 83.39,
83.46, 83.66, 83.73, 83.52, 83.66, 83.93, 83.46, 83.25, 83.32,
83.32, 83.39, 83.39, 82.92, 82.24, 82.04, 81.77, 81.3, 81.5,
81.23, 81.03, 80.89, 80.49, 80.35, 80.01, 80.01, 79.2, 79.14,
78.87, 78.93, 78.6, 77.92, 77.25, 76.91, 76.37, 75.56, 74.81,
73.94, 73.13, 72.79, 71.84, 71.51, 69.89, 68.4, 66.44, 64.82,
63.13, 61.44, 59.08, 55.77, 52.8), sensitivity = c(100, 100,
100, 99.8870056497175, 99.8870056497175, 99.6610169491526, 99.6610169491526,
99.5480225988701, 99.3220338983051, 99.2090395480226, 99.0960451977401,
98.7570621468927, 98.6440677966102, 98.5310734463277, 98.1920903954802,
97.9661016949153, 97.7401129943503, 97.6271186440678, 96.8361581920904,
96.3841807909604, 96.045197740113, 95.9322033898305, 95.5932203389831,
95.2542372881356, 94.9152542372881, 94.6892655367232, 94.2372881355932,
94.1242937853107, 94.1242937853107, 93.6723163841808, 93.1073446327684,
92.8813559322034, 92.6553672316384, 92.4293785310735, 92.316384180791,
91.9774011299435, 91.864406779661, 91.5254237288136, 91.2994350282486,
90.8474576271186, 90.0564971751412, 89.9435028248588, 89.7175141242938,
89.2655367231638, 89.0395480225989, 88.8135593220339, 88.135593220339,
87.909604519774, 87.3446327683616, 87.0056497175141, 86.5536723163842,
86.3276836158192, 85.6497175141243, 85.5367231638418, 85.5367231638418,
84.7457627118644, 84.180790960452, 83.8418079096045, 83.6158192090395,
83.2768361581921, 83.0508474576271, 82.0338983050847, 80.6779661016949,
80.225988700565, 79.3220338983051, 78.1920903954802, 77.9661016949153,
76.9491525423729, 76.1581920903955, 75.4802259887006, 74.3502824858757,
73.7853107344633, 72.8813559322034, 72.4293785310735, 70.8474576271186,
70.1694915254237, 69.1525423728814, 68.8135593220339, 68.135593220339,
66.8926553672316, 65.6497175141243, 64.4067796610169, 63.3898305084746,
61.9209039548023, 60.4519774011299, 58.6440677966102, 57.2881355932203,
56.1581920903955, 54.3502824858757, 53.3333333333333, 50.6214689265537,
48.135593220339, 44.8587570621469, 41.9209039548023, 38.6440677966102,
35.7062146892655, 31.638418079096, 26.1016949152542, 21.1299435028249
), specificity = c(4.86577181208054, 9.73154362416107, 13.758389261745,
17.1140939597315, 20.3020134228188, 23.489932885906, 25.3355704697987,
28.1879194630872, 30.2013422818792, 33.5570469798658, 36.4093959731544,
38.5906040268456, 39.9328859060403, 40.7718120805369, 42.6174496644295,
44.1275167785235, 45.4697986577181, 46.9798657718121, 49.1610738255034,
50.503355704698, 52.0134228187919, 53.8590604026846, 54.6979865771812,
55.0335570469799, 56.3758389261745, 57.2147651006711, 58.0536912751678,
59.7315436241611, 60.738255033557, 61.744966442953, 62.751677852349,
63.758389261745, 64.4295302013423, 65.9395973154362, 66.4429530201342,
67.1140939597315, 67.7852348993289, 68.6241610738255, 69.2953020134228,
70.4697986577181, 71.4765100671141, 72.4832214765101, 72.9865771812081,
75.1677852348993, 75.3355704697987, 75.8389261744966, 76.006711409396,
76.8456375838926, 77.5167785234899, 78.1879194630873, 79.3624161073825,
79.8657718120805, 80.3691275167785, 80.8724832214765, 81.5436241610738,
81.5436241610738, 81.8791946308725, 82.5503355704698, 82.8859060402685,
83.5570469798658, 83.8926174496644, 84.2281879194631, 84.5637583892617,
84.7315436241611, 85.4026845637584, 85.9060402684564, 86.744966442953,
87.5838926174497, 88.255033557047, 88.9261744966443, 89.5973154362416,
90.1006711409396, 90.6040268456376, 91.2751677852349, 91.6107382550336,
92.4496644295302, 93.2885906040269, 93.9597315436242, 94.1275167785235,
94.2953020134228, 94.4630872483222, 95.4697986577181, 95.6375838926175,
95.8053691275168, 96.1409395973154, 96.6442953020134, 96.6442953020134,
97.4832214765101, 97.8187919463087, 98.489932885906, 98.489932885906,
98.489932885906, 98.489932885906, 98.8255033557047, 99.496644295302,
99.6644295302013, 99.8322147651007, 99.8322147651007, 99.8322147651007
)), row.names = c("thresh_info", "thresh_info.1", "thresh_info.2",
"thresh_info.3", "thresh_info.4", "thresh_info.5", "thresh_info.6",
"thresh_info.7", "thresh_info.8", "thresh_info.9", "thresh_info.10",
"thresh_info.11", "thresh_info.12", "thresh_info.13", "thresh_info.14",
"thresh_info.15", "thresh_info.16", "thresh_info.17", "thresh_info.18",
"thresh_info.19", "thresh_info.20", "thresh_info.21", "thresh_info.22",
"thresh_info.23", "thresh_info.24", "thresh_info.25", "thresh_info.26",
"thresh_info.27", "thresh_info.28", "thresh_info.29", "thresh_info.30",
"thresh_info.31", "thresh_info.32", "thresh_info.33", "thresh_info.34",
"thresh_info.35", "thresh_info.36", "thresh_info.37", "thresh_info.38",
"thresh_info.39", "thresh_info.40", "thresh_info.41", "thresh_info.42",
"thresh_info.43", "thresh_info.44", "thresh_info.45", "thresh_info.46",
"thresh_info.47", "thresh_info.48", "thresh_info.49", "thresh_info.50",
"thresh_info.51", "thresh_info.52", "thresh_info.53", "thresh_info.54",
"thresh_info.55", "thresh_info.56", "thresh_info.57", "thresh_info.58",
"thresh_info.59", "thresh_info.60", "thresh_info.61", "thresh_info.62",
"thresh_info.63", "thresh_info.64", "thresh_info.65", "thresh_info.66",
"thresh_info.67", "thresh_info.68", "thresh_info.69", "thresh_info.70",
"thresh_info.71", "thresh_info.72", "thresh_info.73", "thresh_info.74",
"thresh_info.75", "thresh_info.76", "thresh_info.77", "thresh_info.78",
"thresh_info.79", "thresh_info.80", "thresh_info.81", "thresh_info.82",
"thresh_info.83", "thresh_info.84", "thresh_info.85", "thresh_info.86",
"thresh_info.87", "thresh_info.88", "thresh_info.89", "thresh_info.90",
"thresh_info.91", "thresh_info.92", "thresh_info.93", "thresh_info.94",
"thresh_info.95", "thresh_info.96", "thresh_info.97", "thresh_info.98"
), class = "data.frame")
You can filter by the minimum variance across the 3 columns:
library(dplyr)
data |>
tibble::rownames_to_column() |>
rowwise() |>
mutate(var = var(c_across(3:5))) |>
ungroup() |>
filter(var == min(var))
# A tibble: 1 × 6
rowname threshold accuracy sensitivity specificity var
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 thresh_info.59 0.6 83.4 83.3 83.6 0.0199

How to change the a axis to a time series in ggplot2

I'm trying to replicate the graph provided at https://www.chicagofed.org/research/data/cfnai/current-data since I will be needing graphs for data sets soon that look like this. I'm almost there, I can't seem to figure out how to change the x axis to the dates when using ggplot2. Specifically, I would like to change it to the dates in the Date column. I tried about a dozen ways and nothing is working. The data for this graph is under indexes on the website. Here's my code and the graph where dataSet is the data from the website:
library(ggplot2)
library(reshape2)
library(tidyverse)
library(lubridate)
df = data.frame(time = index(dataSet), melt(as.data.frame(dataSet)))
df
str(df)
df$data1.Date = as.Date(as.character(df$data1.Date))
str(df)
replicaPlot1 = ggplot(df, aes(x = time, y = value)) +
geom_area(aes(colour = variable, fill = variable)) +
stat_summary(fun = sum, geom = "line", size = 0.4) +
labs(title = "Chicago Fed National Activity Index (CFNAI) Current Data")
replicaPlot1 + scale_x_continuous(name = "time", breaks = waiver(), labels = waiver(), limits =
df$data1.Date)
replicaPlot1
Any sort of help on this would be very much appreciated!
G:\BOS\Common\R-Projects\Graphs\Replica of Chicago Fed National Acitivty index (PCA)\dataSet
Not sure what's your intention with data.frame(time = index(dataSet), melt(as.data.frame(dataSet))). When I download the data and read via readxl::read_excel I got a nice tibble with a date(time) column which after reshaping via tidyr::pivot_longer could easily be plotted and by making use of scale_x_datetime has a nicely formatted date axis:
Using just the first 20 rows of data try this:
library(ggplot2)
library(readxl)
library(tidyr)
df <- pivot_longer(df, -Date, names_to = "variable")
ggplot(df, aes(x = Date, y = value)) +
geom_area(aes(colour = variable, fill = variable)) +
stat_summary(fun = sum, geom = "line", size = 0.4) +
labs(title = "Chicago Fed National Activity Index (CFNAI) Current Data") +
scale_x_datetime(name = "time")
#> Warning: Removed 4 rows containing non-finite values (stat_summary).
#> Warning: Removed 4 rows containing missing values (position_stack).
Created on 2021-01-28 by the reprex package (v1.0.0)
DATA
# Data downloaded from https://www.chicagofed.org/~/media/publications/cfnai/cfnai-data-series-xlsx.xlsx?la=en
# df <- readxl::read_excel("cfnai-data-series-xlsx.xlsx")
# dput(head(df, 20))
df <- structure(list(Date = structure(c(
-87004800, -84412800, -81734400,
-79142400, -76464000, -73785600, -71193600, -68515200, -65923200,
-63244800, -60566400, -58060800, -55382400, -52790400, -50112000,
-47520000, -44841600, -42163200, -39571200, -36892800
), tzone = "UTC", class = c(
"POSIXct",
"POSIXt"
)), P_I = c(
-0.26, 0.16, -0.43, -0.09, -0.19, 0.58, -0.05,
0.21, 0.51, 0.33, -0.1, 0.12, 0.07, 0.04, 0.35, 0.04, -0.1, 0.14,
0.05, 0.11
), EU_H = c(
-0.06, -0.09, 0.01, 0.04, 0.1, 0.22, -0.04,
0, 0.32, 0.16, -0.2, 0.34, 0.06, 0.17, 0.17, 0.07, 0.12, 0.12,
0.15, 0.18
), C_H = c(
-0.01, 0.01, -0.05, 0.08, -0.07, -0.01,
0.12, -0.11, 0.1, 0.15, -0.04, 0.04, 0.17, -0.03, 0.05, 0.08,
0.09, 0.05, -0.06, 0.09
), SO_I = c(
-0.01, -0.07, -0.08, 0.02,
-0.16, 0.22, -0.08, -0.07, 0.38, 0.34, -0.13, -0.1, 0.08, -0.07,
0.06, 0.07, 0.12, -0.3, 0.35, 0.14
), CFNAI = c(
-0.34, 0.02, -0.55,
0.04, -0.32, 1, -0.05, 0.03, 1.32, 0.97, -0.46, 0.39, 0.38, 0.11,
0.63, 0.25, 0.22, 0.01, 0.49, 0.52
), CFNAI_MA3 = c(
NA, NA, -0.29,
-0.17, -0.28, 0.24, 0.21, 0.33, 0.43, 0.77, 0.61, 0.3, 0.1, 0.29,
0.37, 0.33, 0.37, 0.16, 0.24, 0.34
), DIFFUSION = c(
NA, NA, -0.17,
-0.14, -0.21, 0.16, 0.11, 0.17, 0.2, 0.5, 0.41, 0.28, 0.2, 0.32,
0.36, 0.32, 0.33, 0.25, 0.31, 0.47
)), row.names = c(NA, -20L), class = c(
"tbl_df",
"tbl", "data.frame"
))

How do I make segments (of my probabilities?)

I was wondering if there is a function which can help me with segmentation. Via mixtools (logisregmixEM), I got an optimum of 3 segments with corresponding size of 2.5%, 40.3% and 57.2%. I also got posterior probabilities. Is there any way how to create three separate segments with corresponding observations based on the probabilities, in which I end up with 3 segments with the above called sizes?
For what its worth some background information of my coefficients, and probabilities:
> dput(head(betas))
structure(list(comp1 = c(4.57, 0.08, 0.91, -0.11, 0.09, 0.07),
comp2 = c(2.04, -0.22, 0.19, 0.34, -0.34, -0.01), comp3 = c(0.88,
0.03, 0.42, -0.02, -0.17, -0.01)), row.names = c("beta.0",
"beta.1", "beta.2", "beta.3", "beta.4", "beta.5"), class = "data.frame")
> dput(head(posteriorp))
structure(c(0.06, 0.03, 0, 0.03, 0, 0, 0.61, 0.42, 0.07, 0.41,
0.31, 0.41, 0.33, 0.56, 0.93, 0.56, 0.69, 0.59), .Dim = c(6L,
3L), .Dimnames = list(NULL, c("comp.1", "comp.2", "comp.3")))

Understanding and implementing numerical integration with a quantile function in R

I need to calculate this integral below, using R:
The q_theta(x) function I managed to do in R with quantile regression (package: quantreg).
matrix=structure(c(0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09,
0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2,
0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31,
0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42,
0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53,
0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64,
0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75,
0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86,
0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97,
0.98, 0.99, -22.2830664155772, -22.2830664155772, -19.9298291765612,
-18.2066426767652, -15.2657135034479, -14.921522915965, -13.5035945028536,
-13.1557269916064, -12.9495709618481, -11.6168348488161, -11.3999095021713,
-10.6962766764396, -10.0588239375837, -9.12944363439522, -8.15648778610587,
-8.04133299299019, -7.66558386420434, -7.50906566627427, -6.95626096568998,
-6.90630556403136, -6.53374879831376, -6.39324677042686, -6.20705804899049,
-6.09754765999465, -5.91272058217526, -5.75771166206242, -5.3770131257001,
-5.20892464393192, -5.07372162687422, -4.96706814289334, -4.64404095131293,
-4.1567394053577, -4.13209444755342, -3.85483644113723, -3.64855238293205,
-3.53054113507559, -3.46035383338799, -3.03155417364444, -2.93100183005178,
-2.90491824855193, -2.64056616049773, -2.51857727614607, -2.25163805172486,
-2.00934783937474, -1.89925824841417, -1.71405007411747, -1.65905834683964,
-1.47502511311988, -1.42755073292529, -1.20464216637298, -1.08574103345057,
-0.701134735371922, -0.590656010656201, -0.290335898959635, -0.0575062007348038,
0.0778328375033378, 0.165234593185889, 0.230651883848336, 0.316817885358695,
0.34841775605248, 0.516869604496075, 0.59743162507581, 0.857843937404964,
0.939734010162078, 1.12533017928147, 1.27037182428776, 1.52040854525927,
1.76577933448152, 2.07456447851822, 2.17389787235523, 2.27567786362425,
2.3850323163509, 2.55365596853891, 2.61208242890655, 2.77359226593771,
2.93275094039929, 3.07968072488942, 3.0822647851901, 3.26452177629061,
3.46223321951649, 3.66011832966054, 3.85710605543097, 4.05385887531972,
4.83943843494744, 5.05864734149161, 5.25501778319145, 5.38941130574907,
5.88571117751377, 6.5116611852713, 6.98632496342285, 7.21816245728101,
7.73244825971004, 7.80401007592906, 8.34648625541999, 9.83184090479964,
10.8324874884172, 11.3060100107816, 12.3048113953808, 13.1300123358331
), .Dim = c(99L, 2L), .Dimnames = list(NULL, c("Theta", "q(x)_(Theta)"
)))
This is my q_theta(x) function that I estimated in R. One of the question I have is:
a> If x is a standard normal distribution this integral is zero; Right?
b> Otherwise, in my case, the integral is not zero. How do I treat the q_1-Theta(x)? Its simply the sort(matrix[,"q(x)_(Theta)"],decreasing=TRUE) ?
And the integration would be:
sintegral(thau[1:50], (matrix[,"q(x)_(Theta)"][1:50] - sort(matrix[,"q(x)_(Theta)"],TRUE)[1:50])[1:50])$value
The median would be a comun point of this two functions. Right?
Thanks.
Recall your previous post Building a function by defining X and Y and then Integrating in R, we build a linear interpolation function
## note `rule = 2` to enable "extrapolation";
## otherwise `rule = 1` gives `NA` outside [0.01, 0.5]
integrand <- approxfun(mat[, 1], y, rule = 2)
Then we can perform numeric integration on [0, 0.5]:
integrate(integrand, lower = 0, upper = 0.5)
# -5.594405 with absolute error < 4e-04
Now for a>, let's have a proof first.
Note, your quantile function is not for normal distribution, so this result does not hold. You can actually verify this
quant <- approxfun(mat[, 1], mat[, 2], rule = 2)
integrate(quant, lower = 0, upper = 0.5)
# -3.737973 with absolute error < 0.00029
Compared with previous integration result -5.594405, the difference is not a factor of 2.

Building a function by defining X and Y and then Integrating in R

I need to construct a function with x values coming from the first column of this matrix below and y values coming from the second column from the same matrix, with the purpose of later calculating the integral in the desired range.:
matrix=structure(c(0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09,
0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2,
0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31,
0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42,
0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53,
0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64,
0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75,
0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86,
0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97,
0.98, 0.99, -7.38512004893287, -7.38512004893287, -6.4788834441613,
-5.63088940915783, -4.83466644123448, -4.68738146949482, -4.28638930290018,
-4.22411786604579, -3.59136848943044, -3.51706359680799, -3.39972014575003,
-3.28609348968074, -3.08569873266253, -2.99764447889508, -2.89470597729108,
-2.77488515429677, -2.67019029728821, -2.54646363628509, -2.48474483938047,
-2.30542896070156, -2.22485510301423, -2.16689229344011, -2.10316315192181,
-2.05135466960309, -1.90942757945567, -1.87863626704201, -1.82507998490407,
-1.75875817642096, -1.6919717645629, -1.62396997031953, -1.56159595204983,
-1.52152738173419, -1.46478394989911, -1.4590555309334, -1.21744398902807,
-1.21731951113139, -1.15003007559406, -1.07321513324935, -0.993364510081357,
-0.924402354306976, -0.885939210442384, -0.831155619244629, -0.80947326709303,
-0.786842719842383, -0.743834513319968, -0.721194178931262, -0.593033922802471,
-0.514780082129033, -0.50717184901095, -0.44223827942003, -0.403514759789576,
-0.296251921664, -0.204238424399985, -0.1463212643028, -0.0982036017275267,
-0.0705262020944892, 0.0275436976821241, 0.0601977432996216,
0.114959963559268, 0.182222546319913, 0.236503724954577, 0.272244043950984,
0.325188234828891, 0.347862804414816, 0.438932719815686, 0.630570414177834,
0.805087251137292, 0.904903847087405, 0.940702374334727, 0.958351604371838,
1.03920208406121, 1.25808734990267, 1.32634708210007, 1.34458194173569,
1.42693337001189, 1.55016591141652, 1.5710754638668, 1.61795101580197,
1.62472416407376, 1.70223430572367, 1.86164374636379, 1.94317125269006,
2.03941620499986, 2.12071850455654, 2.17753890907921, 2.22227616630581,
2.45586794615095, 2.66160802425205, 2.83084956697756, 2.94669126521054,
3.04536994227142, 3.09217816201639, 3.42405058020625, 3.45140184734503,
3.67343579954061, 4.64233570345934, 4.87075743677502, 5.27924539262207,
5.56822483595709), .Dim = c(99L, 2L), .Dimnames = list(NULL,
c("x", "y")))
So i would have a function like this:
plot(matrix[,1],matrix[,2])
And then, my idea is to calculate the integral of this function using this code in R:
integrating= function(x) return(myfunction(x));
integrate(integrating, lower=0.08, upper=0.15)
Is it possible?
I tried but it didnt work.
When I looked at you provide matrix (better use variable mat not matrix for it), I found that your x samples are evenly spaced, and y values are monotone and smooth against x. So a simple linear interpolation would be sufficiently good to model those data.
## read `?approx`
f <- approxfun(mat[, 1], mat[, 2])
Then you can do
integrate (f, lower = 0.08, upper = 0.15)
# -0.2343698 with absolute error < 1.3e-05

Resources