map_dfr and bindling rows together based on columns in R - r

I am trying to create a list based on the column names and then bind all my lists together.
That is, I have for the first list:
> myList[[1]] %>% data.frame() %>% select(c(1:2))
WTS FMC
frequency 1 1
nperiods 0 0
seasonal_period 1 1
trend 0.1758021 0.140052
spike 0.04209651 0.04940537
I want to create new lists - one list for each of the columns in the data and then bind the lists 2010 Jul, 2010 Aug and 2010 Sep together using bind_rows.
I can do the following:
map_dfr(data.frame(myList), ~bind_rows(.), .id = "date")
Which almost gets what I want but I would like to do this and obtain one of these for each of the columns WTS and FMC etc.
Data:
myList <- list(`2010 Jul` = structure(list(1, 0, 1, 0.175802105278148,
0.0420965089715215, -0.597180003813241, -0.14766101736596,
0.101328352458739, NA_real_, 1, 0.163542974434028, NA_real_,
-0.0477740942262392, 0.109285246298631, -0.585925108800292,
0.349534758601262, 1, 0, 1, 0.140051954024691, 0.0494053672229871,
-0.731689686416635, -0.165607865331302, -0.252997419985073,
NA_real_, 1, -0.0505136284783927, NA_real_, -0.172891705413366,
0.171035553843115, -0.389685810850311, 0.395230400055788,
1, 0, 1, 0.0534461538613374, 0.0231700091040301, -0.356613468922694,
-0.0999668254541441, -0.127071388891534, NA_real_, 1, -0.0372310379765763,
NA_real_, -0.124065837653166, 0.214832600571785, -0.252551509468299,
0.123852141180675, 1, 0, 1, 0.041796656791166, 0.0262360694498456,
-0.266921718141474, -0.0803839036263304, -0.128269552651254,
NA_real_, 1, -0.0651244706731801, NA_real_, -0.205637974697809,
0.151555475533217, -0.292968222735457, 0.107723199237638,
1, 0, 1, 0.25433724307978, 0.0224412849894418, -1.12954982181859,
-0.146142537909, -0.165760782142423, NA_real_, 1, 0.151156560509677,
NA_real_, -0.166767943225804, 0.282321896787354, -0.513571168467497,
0.571934734212278, 1, 0, 1, 0.0754993659336637, 0.0387283712994059,
-0.853707662110111, -0.0251051264639037, -0.155640882435377,
NA_real_, 1, 0.0578132817097772, NA_real_, -0.061111476402639,
0.404959638504767, -0.0639404592330979, 0.201777982385734,
1, 0, 1, 0.174669741802688, 0.0236268612206601, -1.04169291855456,
-0.0843655048351571, -0.658325339642133, NA_real_, 1, -0.325360383314808,
NA_real_, -0.727093163070252, 0.937170983212177, -0.767714278501687,
0.873631983600454, 1, 0, 1, 0.0212342532070486, 0.0394007679441418,
-0.343036121758394, -0.0509252272227679, -0.119680074500327,
NA_real_, 1, -0.0455646776030852, NA_real_, -0.0398983609027588,
0.337412411363141, 0.0192485531321964, 0.223377235550471,
1, 0, 1, 0.0420262338532703, 0.129212727423114, -0.438100122508869,
-0.042771290337182, -0.389638024842517, NA_real_, 1, -0.326954900702078,
NA_real_, -0.555804439643753, 0.310736075437147, -0.677496652871654,
0.493116631796998, 1, 0, 1, 0.598184608626656, 0.00485326113018123,
-1.55755544029203, -0.0566390666856906, -0.177252150724023,
NA_real_, 1, 0.057995193957956, NA_real_, -0.289185837730521,
0.180050213764505, -0.414345078778786, 0.193535375761028,
1, 0, 1, 0.568485402318989, 0.0119368452769537, -1.67771371104516,
-0.0263928835552806, -0.454975191345, NA_real_, 1, 0.0686977616512836,
NA_real_, -0.533085306700341, 0.446147099224813, -0.416815928407965,
0.249040354012687, 1, 0, 1, 0.124079903347872, 0.059118600048602,
-0.885885264087633, -0.0808876385366851, -0.060659659056956,
NA_real_, 1, 0.148533886538717, NA_real_, -0.00173087492998668,
0.344674083224914, -0.182813884409897, 0.146185243416273,
1, 0, 1, 0.0930926158625137, 0.0245329908921137, 0.509061201314714,
-0.058666146601623, -0.0356918805242959, NA_real_, 1, 0.000950336898647261,
NA_real_, -0.245464366660066, 0.33682941009751, -0.254338263672044,
0.367239900683189, 1, 0, 1, 0.22638632247172, 0.0218317533978287,
-0.852854792551597, -0.16522406381938, 0.167205818008961,
NA_real_, 1, 0.299103246969011, NA_real_, 0.337090742253813,
0.426672282245496, -0.0631643700301268, 0.107558529054556,
1, 0, 1, 0.184189030274566, 0.0177470314855779, -1.16461284208247,
-0.0835786581051569, -0.318545876407814, NA_real_, 1, 0.0642963953901268,
NA_real_, -0.301483310526926, 0.36126076411659, -0.408014673266521,
0.38067311290122, 1, 0, 1, 0.0764619219562191, 0.0229456293092152,
-0.399483875437517, -0.109463724994312, -0.0624895855715813,
NA_real_, 1, 0.0247766231933698, NA_real_, -0.175710273625244,
0.148883400498395, -0.391350369491028, 0.164652945563837,
1, 0, 1, 0.157413400293104, 0.0210925522480966, 0.559184312376902,
-0.170376937825492, -0.463695060059251, NA_real_, 1, -0.398949758420571,
NA_real_, -0.343559615134694, 0.360798780983868, -0.254653149412353,
0.291717976532446, 1, 0, 1, 0.157638226870364, 0.0328482314858161,
-1.04113661683743, -0.11461389672605, -0.227655536180246,
NA_real_, 1, 0.0955560244689036, NA_real_, -0.0850108661597532,
0.249052330398167, -0.121962432488975, 0.270531142248378), .Dim = c(16L,
18L), .Dimnames = list(c("frequency", "nperiods", "seasonal_period",
"trend", "spike", "linearity", "curvature", "e_acf1", "e_acf10",
"entropy", "x_acf1", "x_acf10", "diff1_acf1", "diff1_acf10",
"diff2_acf1", "diff2_acf10"), c("WTS", "FMC", "WGL", "SCG", "GPS",
"AOS", "CVC", "EMF", "SSY", "MGA", "WEX", "MT", "HXM", "CNS",
"LCM", "KGN", "SIHI", "JLS"))), `2010 Aug` = structure(list(1,
0, 1, 0.0233905158348703, 0.0208562122467506, -0.534541260410219,
0.0724848038424846, -0.243808681545836, NA_real_, 1, -0.127516468307146,
NA_real_, -0.0882273545301255, 0.330583666477203, -0.0544514809293154,
0.179612938516917, 1, 0, 1, 0.298243851692594, 0.0288988085684842,
-1.17329074859827, 0.167794305134058, -0.129125081312144,
NA_real_, 1, 0.165909888118736, NA_real_, 0.0234017388186864,
0.126122636351595, -0.0910583855529177, 0.179360806895702,
1, 0, 1, 0.065724074574338, 0.0758937621405237, -0.399747739700934,
0.108429436039378, -0.0667990493250848, NA_real_, 1, 0.0146134416445858,
NA_real_, -0.0265326096067546, 0.238490359616056, -0.317300024293075,
0.136078891269167, 1, 0, 1, 0.0359901363825194, 0.0727713985049959,
-0.261713912883042, 0.0760994399652499, -0.133814042822593,
NA_real_, 1, -0.0827578051666984, NA_real_, -0.117826507182037,
0.252189340156553, -0.355540397423096, 0.151183673456332,
1, 0, 1, 0.73385650012555, 0.004162899076158, -2.00737648513829,
0.0557091477539321, -0.261660339901219, NA_real_, 1, 0.451470983541603,
NA_real_, -0.288519428211535, 0.150745403548235, -0.543122218246143,
0.296612522913519, 1, 0, 1, 0.0649402281700383, 0.0378619493291227,
-0.31449739260034, 0.0988460944383464, 0.0349741429158428,
NA_real_, 1, 0.098161238113042, NA_real_, 0.0300972574757304,
0.298075123956731, -0.0887625952498301, 0.232199321985802,
1, 0, 1, 0.0945945759860801, 0.0180084376802645, -0.469510406384772,
0.153730038064492, -0.399294299223668, NA_real_, 1, -0.305460924007922,
NA_real_, -0.415074250523729, 0.222830245111534, -0.606351673786828,
0.374684747936308, 1, 0, 1, 0.0591309037250093, 0.0649064539127003,
-0.0744983518809387, 0.0920811715896319, 0.00528921345437106,
NA_real_, 1, 0.0298619700426544, NA_real_, 0.037634026581331,
0.362140624457385, -0.107462109231618, 0.2254760279785, 1,
0, 1, 0.078011344771787, 0.0697267856529186, -0.895566956749497,
-0.0499496267058433, -0.565731971800641, NA_real_, 1, -0.340406856638686,
NA_real_, -0.537251820856348, 0.290072188692432, -0.418146742408836,
0.178344451571795, 1, 0, 1, 0.036755839364403, 0.0608765681761267,
-0.0373907046624393, 0.10003472183405, -0.36540942843628,
NA_real_, 1, -0.341604813341389, NA_real_, -0.184126527521471,
0.165571048572808, -0.082428918609678, 0.160459761431743,
1, 0, 1, 0.155954127781764, 0.0165284207980147, -0.542789032914492,
0.198320667255402, -0.492235313036742, NA_real_, 1, -0.39030915972297,
NA_real_, -0.361944799122207, 0.428712468490625, -0.275063299692073,
0.341636357201519, 1, 0, 1, 0.0677160966850889, 0.0284434829900084,
-0.359449385908881, 0.113540625475898, -0.0349872275718705,
NA_real_, 1, 0.0516285716355073, NA_real_, 0.0458827167347926,
0.280987645964838, -0.0563357760675071, 0.101938565072249,
1, 0, 1, 0.319347071530101, 0.00980494397260724, 0.952800719371358,
0.0593643213576319, -0.159931065778718, NA_real_, 1, -0.161861522802606,
NA_real_, 0.0861865077322326, 0.462009189577021, 0.288750047974667,
0.412369625453985, 1, 0, 1, 0.24423399870397, 0.0156919487915331,
-1.35672952181182, 0.0754111723913473, -0.177255831987036,
NA_real_, 1, 0.2702466551225, NA_real_, 0.072432653719567,
0.398793873454873, 0.299718556657641, 0.420115516359753,
1, 0, 1, 0.145010697778435, 0.061864744065635, -0.634783939837577,
0.156947702469577, -0.0325408000915056, NA_real_, 1, 0.0909718902406596,
NA_real_, 0.0168167770621337, 0.118454773755493, -0.42986156681522,
0.189447158128956, 1, 0, 1, 0.272662492338871, 0.0320020691693299,
-1.35057407751299, -0.000704120068878284, -0.305058370459884,
NA_real_, 1, 0.0989171620469294, NA_real_, -0.345872268685382,
0.225882532526285, -0.523296429442332, 0.274117925854473,
1, 0, 1, 0.133849706665592, 0.0234607726133869, 0.385342516199894,
0.171489465028886, -0.522328366590807, NA_real_, 1, -0.444297376125095,
NA_real_, -0.550596521310195, 0.466264657679625, -0.73127378610922,
0.69157532319532, 1, 0, 1, 0.202865549667432, 0.0443986075890144,
-0.807925019780012, 0.171442275242251, 0.0629959271618186,
NA_real_, 1, 0.213447359336486, NA_real_, 0.242973171792414,
0.265885818267854, -0.0620290037554373, 0.141877034992979), .Dim = c(16L,
18L), .Dimnames = list(c("frequency", "nperiods", "seasonal_period",
"trend", "spike", "linearity", "curvature", "e_acf1", "e_acf10",
"entropy", "x_acf1", "x_acf10", "diff1_acf1", "diff1_acf10",
"diff2_acf1", "diff2_acf10"), c("WTS", "FMC", "WGL", "SCG", "GPS",
"AOS", "CVC", "EMF", "SSY", "MGA", "WEX", "MT", "HXM", "CNS",
"LCM", "KGN", "SIHI", "JLS"))), `2010 Sep` = structure(list(1,
0, 1, 0.114407589475582, 0.0235377481165926, -0.728800100661772,
-0.0166684916231905, -0.319561503372181, NA_real_, 1, -0.252283814071854,
NA_real_, -0.433907428334825, 0.494174288679032, -0.259632053945162,
0.345158784209255, 1, 0, 1, 0.0554648365804654, 0.0297035032516045,
-0.301137665508776, 0.0371842617719873, -0.153107729568536,
NA_real_, 1, -0.148125951621602, NA_real_, -0.309974262769443,
0.282319970549421, -0.256060952169572, 0.260036721023129,
1, 0, 1, 0.0724574536186097, 0.0506702652874201, -0.397515147429409,
-0.000875929043770737, -0.226741580969926, NA_real_, 1, -0.239426915169087,
NA_real_, -0.26443608261244, 0.380280023570942, -0.181952275816044,
0.287613920079175, 1, 0, 1, 0.0366387433543232, 0.0828210443160761,
0.0346015782281233, 0.0584760824131681, -0.0807409978271288,
NA_real_, 1, -0.0640076873681771, NA_real_, -0.241606619566609,
0.28402059698436, -0.32704318254068, 0.255565332273312, 1,
0, 1, 0.506821906250132, 0.0117517625384047, -1.57158954102578,
0.0777902977231915, 0.0323140578792685, NA_real_, 1, 0.314866952104353,
NA_real_, -0.184027731637231, 0.113495496807055, -0.416317148005885,
0.221007409079218, 1, 0, 1, 0.0789008969637934, 0.0227004903617495,
-0.270300272577158, 0.0385921685543045, -0.163539848233482,
NA_real_, 1, -0.186957341754706, NA_real_, 0.0122408218485358,
0.405097502405729, 0.156234747286005, 0.273301919830479,
1, 0, 1, 0.0250014845596822, 0.0302514181589841, -0.338784673049847,
-0.0859332071221103, -0.497829122832546, NA_real_, 1, -0.501527437157675,
NA_real_, -0.476584045508235, 0.580274617020235, -0.250869906535054,
0.356695266531789, 1, 0, 1, 0.0319333349525267, 0.0556081429363308,
0.147813225937377, 0.0607115815601036, -0.075824219265655,
NA_real_, 1, -0.0426184206883323, NA_real_, -0.0207223789747501,
0.385629184963258, -0.0532538380902457, 0.237605631059521,
1, 0, 1, 0.381518944029993, 0.0321886406040401, -1.06752151081575,
0.131205784717954, -0.280480506945643, NA_real_, 1, -0.179355245047458,
NA_real_, -0.106920170519719, 0.0714415677242584, -0.0319868419486709,
0.118669624281828, 1, 0, 1, 0.0121834121844098, 0.068237303825428,
0.0536306750135053, 0.0336173618788365, -0.369652200763938,
NA_real_, 1, -0.351748579579802, NA_real_, -0.528968821125061,
0.281681886913385, -0.514355713006262, 0.266023990706781,
1, 0, 1, 0.0287643057822889, 0.044676101917498, -0.0138724727466971,
-0.134010559894424, -0.73335419052835, NA_real_, 1, -0.71539381603517,
NA_real_, -0.658713675985, 0.50928626741049, -0.619760970527367,
0.416625800834141, 1, 0, 1, 0.05210630828958, 0.0276550837203792,
0.190816750390097, 0.0978766034871588, -0.0805703916378234,
NA_real_, 1, -0.0268457813914789, NA_real_, -0.00549001926408891,
0.267785723185472, -0.0608182434517501, 0.100506808744203,
1, 0, 1, 0, 0.0627792442320371, 0.255196051933168, -0.0382711160010135,
-0.386591712415439, NA_real_, 1, -0.357984708839978, NA_real_,
-0.368112450163228, 0.267135781124212, -0.197560540922087,
0.182171367812671, 1, 0, 1, 0.269386209436678, 0.0148715672740464,
-0.968523363062877, 0.0763244158585192, 0.0185934939902807,
NA_real_, 1, 0.0765904658501373, NA_real_, -0.161248155686918,
0.20111491459834, -0.273002230573575, 0.258423208707053,
1, 0, 1, 0.018597597094501, 0.0552754657963658, 0.143897330819771,
0.0353124852994875, -0.125202166775784, NA_real_, 1, -0.10138717345503,
NA_real_, -0.232282311284955, 0.358150791920914, -0.202251311791963,
0.313200193280975, 1, 0, 1, 0.203989370024047, 0.0224128873339424,
0.502551786048769, 0.170091454126145, -0.0446367518715121,
NA_real_, 1, -0.0199614715680664, NA_real_, -0.0784148935207206,
0.256326721120486, -0.629406417417173, 0.680388906963932,
1, 0, 1, 0.166310912865401, 0.0515699413907982, -0.563616415630654,
-0.124142897096449, -0.570353166601179, NA_real_, 1, -0.511575482342321,
NA_real_, -0.502130427060656, 0.424780030379441, -0.561486820277065,
0.520329930641319, 1, 0, 1, 0.125538977433979, 0.0316092331640379,
0.644167550608129, 0.108886405075484, 0.10582508645383, NA_real_,
1, 0.208780092739966, NA_real_, 0.0489489788996666, 0.285296904348623,
-0.161944089572295, 0.294280045785781), .Dim = c(16L, 18L
), .Dimnames = list(c("frequency", "nperiods", "seasonal_period",
"trend", "spike", "linearity", "curvature", "e_acf1", "e_acf10",
"entropy", "x_acf1", "x_acf10", "diff1_acf1", "diff1_acf10",
"diff2_acf1", "diff2_acf10"), c("WTS", "FMC", "WGL", "SCG", "GPS",
"AOS", "CVC", "EMF", "SSY", "MGA", "WEX", "MT", "HXM", "CNS",
"LCM", "KGN", "SIHI", "JLS"))))

With tidyverse, operations, the row names are removed or changed to default NULL sequence, so before binding the list elements together, use rownames_to_column to create a new column with row names
library(dplyr)
library(purrr)
library(tibble)
myList %>%
map_dfr(~ .x %>%
as.data.frame %>%
select(1:2) %>%
rownames_to_column('rname')
, .id = 'date')
# date rname WTS FMC
#1 2010 Jul frequency 1 1
#2 2010 Jul nperiods 0 0
#3 2010 Jul seasonal_period 1 1
#4 2010 Jul trend 0.1758021 0.140052
#5 2010 Jul spike 0.04209651 0.04940537
#6 2010 Jul linearity -0.59718 -0.7316897
#7 2010 Jul curvature -0.147661 -0.1656079
#8 2010 Jul e_acf1 0.1013284 -0.2529974
#9 2010 Jul e_acf10 NA NA
# ...

Related

How to calculate a proportion in R

I have this reproducible DataFrame:
structure(list(age = c(62.84998, 60.33899, 52.74698, 42.38498,
79.88495, 93.01599, 62.37097, 86.83899, 85.65594, 42.25897),
death = c(0, 1, 1, 1, 0, 1, 1, 1, 1, 1), sex = c("male",
"female", "female", "female", "female", "male", "male", "male",
"male", "female"), hospdead = c(0, 1, 0, 0, 0, 1, 0, 0, 0,
0), slos = c(5, 4, 17, 3, 16, 4, 9, 7, 12, 8), d.time = c(2029,
4, 47, 133, 2029, 4, 659, 142, 63, 370), dzgroup = c("Lung Cancer",
"Cirrhosis", "Cirrhosis", "Lung Cancer", "ARF/MOSF w/Sepsis",
"Coma", "CHF", "CHF", "Lung Cancer", "Colon Cancer"), dzclass = c("Cancer",
"COPD/CHF/Cirrhosis", "COPD/CHF/Cirrhosis", "Cancer", "ARF/MOSF",
"Coma", "COPD/CHF/Cirrhosis", "COPD/CHF/Cirrhosis", "Cancer",
"Cancer"), num.co = c(0, 2, 2, 2, 1, 1, 1, 3, 2, 0), edu = c(11,
12, 12, 11, NA, 14, 14, NA, 12, 11), income = c("$11-$25k",
"$11-$25k", "under $11k", "under $11k", NA, NA, "$25-$50k",
NA, NA, "$25-$50k"), scoma = c(0, 44, 0, 0, 26, 55, 0, 26,
26, 0), charges = c(9715, 34496, 41094, 3075, 50127, 6884,
30460, 30460, NA, 9914), totcst = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), totmcst = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), avtisst = c(7, 29, 13, 7, 18.666656, 5, 8, 6.5, 8.5, 8
), race = c("other", "white", "white", "white", "white",
"white", "white", "white", "black", "hispanic"), sps = c(33.8984375,
52.6953125, 20.5, 20.0976562, 23.5, 19.3984375, 17.296875,
21.5976562, 15.8984375, 2.2998047), aps = c(20, 74, 45, 19,
30, 27, 46, 53, 17, 9), surv2m = c(0.262939453, 0.0009999275,
0.790893555, 0.698974609, 0.634887695, 0.284973145, 0.892944336,
0.670898438, 0.570922852, 0.952880859), surv6m = c(0.0369949341,
0, 0.664916992, 0.411987305, 0.532958984, 0.214996338, 0.820922852,
0.498962402, 0.24899292, 0.887939453), hday = c(1, 3, 4,
1, 3, 1, 1, 1, 1, 1), diabetes = c(0, 0, 0, 0, 0, 0, 0, 1,
0, 0), dementia = c(0, 0, 0, 0, 0, 0, 0, 0, 1, 0), ca = c("metastatic",
"no", "no", "metastatic", "no", "no", "no", "no", "metastatic",
"metastatic"), prg2m = c(0.5, 0, 0.75, 0.899999619, 0.899999619,
0, NA, 0.799999714, 0.049999982, NA), prg6m = c(0.25, 0,
0.5, 0.5, 0.8999996, 0, 0.6999998, 0.3999999, 0.0001249999,
NA), dnr = c("no dnr", NA, "no dnr", "no dnr", "no dnr",
"no dnr", "no dnr", "no dnr", "dnr after sadm", "no dnr"),
dnrday = c(5, NA, 17, 3, 16, 4, 9, 7, 2, 8), meanbp = c(97,
43, 70, 75, 59, 110, 78, 72, 97, 84), wblc = c(6, 17.0976562,
8.5, 9.09960938, 13.5, 10.3984375, 11.6992188, 13.5996094,
9.69921875, 11.2988281), hrt = c(69, 112, 88, 88, 112, 101,
120, 100, 56, 94), resp = c(22, 34, 28, 32, 20, 44, 28, 26,
20, 20), temp = c(36, 34.59375, 37.39844, 35, 37.89844, 38.39844,
37.39844, 37.59375, 36.59375, 38.19531), pafi = c(388, 98,
231.65625, NA, 173.3125, 266.625, 309.5, 404.75, 357.125,
NA), alb = c(1.7998047, NA, NA, NA, NA, NA, 4.7998047, NA,
NA, 4.6992188), bili = c(0.19998169, NA, 2.19970703, NA,
NA, NA, 0.39996338, NA, 0.39996338, 0.19998169), crea = c(1.19995117,
5.5, 2, 0.79992676, 0.79992676, 0.69995117, 1.59985352, 2,
1, 0.79992676), sod = c(141, 132, 134, 139, 143, 140, 132,
139, 143, 139), ph = c(7.459961, 7.25, 7.459961, NA, 7.509766,
7.65918, 7.479492, 7.509766, 7.449219, NA), glucose = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), bun = c(NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), urine = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), adlp = c(7, NA, 1, 0, NA, NA, 0, NA, NA, 0), adls = c(7,
1, 0, 0, 2, 1, 1, 0, 7, NA), sfdm2 = c(NA, "<2 mo. follow-up",
"<2 mo. follow-up", "no(M2 and SIP pres)", "no(M2 and SIP pres)",
"<2 mo. follow-up", "no(M2 and SIP pres)", NA, NA, NA), adlsc = c(7,
1, 0, 0, 2, 1, 1, 0, 7, 0.4947999)), row.names = c(NA, 10L
), class = "data.frame")
I am needing to calculate the proportion of patients who died in the hospital in patients with an active DNR order on day 3 and in patients without an active DNR order on day 3. To group which patients had an active DNR on day 3 and which did not, I used the subset function below:
SB_xlsx1 = SB_xlsx[!is.na(SB_xlsx$dnrday), ]
YesDNR = subset(SB_xlsx1, dnrday <= 3)
NoDNR = subset(SB_xlsx1, dnrday > 3)
However, I don't know how to calculate the proportion of patients that died in the hospital for those with a DNR and without a DNR. The 'hospdead' variable has all 0s and 1s, where 0 = not dead and 1 = dead. However, I don't know how to get the proportion that died for having a DNR at day 3 and did not have a DNR at day 3. What code could I use for my desired result. SB_xlsx also just represents my DataFrame name.
There's a few ways to do this but the simplest is probably via the aggregate function.
> aggregate( hospdead ~ (dnrday<=3) , SB_xlsx1 , mean)
dnrday <= 3 hospdead
1 FALSE 0.1428571
2 TRUE 0.0000000
You may use tapply to group deaths by the condition dnrday <= 3, i.e. with an active DNR on day 3 and calculate the mean.
(res <- proportions(xtabs(death ~ dnrday <= 3, SB_xlsx)))
# dnrday <= 3
# FALSE TRUE
# 0.7142857 0.2857143
where
sum(res)
# [1] 1
EDIT: I apologize; I misread your post when providing my original answer. I've revised it below.
You referred to the hospdeath variable, but in the toy data set it has just one nonzero entry, so I'm using the death variable instead to demonstrate the principle.
First, abase R approach:
mean(SB_xlsx1[SB_xlsx1$death == 1, ]$dnrday <= 3)
mean(SB_xlsx1[SB_xlsx1$death == 1, ]$dnrday > 3)
The idea is to restrict to the subset of rows for which a death occurred, then perform a logical check to see which entries have dnrday greater than 3.
Note that if you have NA entries in death, you'll want to remove them first as you did with those in dnrday.
For a dplyr approach:
library(dplyr)
SB_xlsx1 %>%
filter(death == 1) %>%
summarize(mean(dnrday <= 3), mean(dnrday > 3))
or, for a slightly nicer-looking table,
SB_xlsx1 %>%
filter(death == 1) %>%
group_by(dnrday <= 3) %>%
summarize(prop = n() / nrow(.))

Layering ggplot

I have 3 piece of data that I need to layer onto one plot. The first time series layer is coded:
p<-ggplot(MI_FL_Data, aes(realdate, FLday))+geom_line()
The next layer adds two geom_hlines at yintercept=15000 and 17000 respectively. This layer is coded:
q<-ggplot(MI_FL_Data, aes( realdate, FL_Actions))+geom_point(na.rm = TRUE)
The final layer plots the points based on a categorical variable FL_Actions at the yintercept produced in the second code. This code is:
r<-ggplot(MI_FL_Data, aes(realdate, FLday))+
geom_hline(data = MI_FL_Data %>% filter(FL_Actions == 1), aes(yintercept = 15000), linetype=5, na.rm=TRUE)+
geom_hline(data = MI_FL_Data %>% filter(FL_Actions == 2), aes(yintercept = 17000), linetype=1, na.rm=TRUE))
Now I need to layer each of these saved vectors on top of each other in one graph. When I use the code:
ggplot(MI_FL_Data, aes(realdate, FLday))+
geom_hline(data=r)+
geom_point(data=r)
I get an error: data must be a data frame, or other object coercible by fortify(), not an S3 object with class gg/ggplot. I thought by saving each layer it would be fairly simple to just add them together. Any advice? I'm a bit of a novice with ggplot but what I want to do seem fairly intuitive so I'm stumped.
I've added images of each layer just in case.
# data
structure(list(Date = c("1/22/20", "1/23/20", "1/24/20", "1/25/20",
"1/26/20", "1/27/20", "1/28/20", "1/29/20", "1/30/20", "1/31/20",
"2/1/20", "2/2/20", "2/3/20", "2/4/20", "2/5/20", "2/6/20", "2/7/20",
"2/8/20", "2/9/20", "2/10/20", "2/11/20", "2/12/20", "2/13/20",
"2/14/20", "2/15/20", "2/16/20", "2/17/20", "2/18/20", "2/19/20",
"2/20/20"), Date2 = c("1/22/20", "1/23/20", "1/24/20", "1/25/20",
"1/26/20", "1/27/20", "1/28/20", "1/29/20", "1/30/20", "1/31/20",
"2/1/20", "2/2/20", "2/3/20", "2/4/20", "2/5/20", "2/6/20", "2/7/20",
"2/8/20", "2/9/20", "2/10/20", "2/11/20", "2/12/20", "2/13/20",
"2/14/20", "2/15/20", "2/16/20", "2/17/20", "2/18/20", "2/19/20",
"2/20/20"), Date3 = c("1/22/20", "1/23/20", "1/24/20", "1/25/20",
"1/26/20", "1/27/20", "1/28/20", "1/29/20", "1/30/20", "1/31/20",
"2/1/20", "2/2/20", "2/3/20", "2/4/20", "2/5/20", "2/6/20", "2/7/20",
"2/8/20", "2/9/20", "2/10/20", "2/11/20", "2/12/20", "2/13/20",
"2/14/20", "2/15/20", "2/16/20", "2/17/20", "2/18/20", "2/19/20",
"2/20/20"), FLORIDA = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), FLday = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), MICHIGAN = c(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0), MIday = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), FL_Actions = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), MI_Actions = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), realdate = structure(c(18283,
18284, 18285, 18286, 18287, 18288, 18289, 18290, 18291, 18292,
18293, 18294, 18295, 18296, 18297, 18298, 18299, 18300, 18301,
18302, 18303, 18304, 18305, 18306, 18307, 18308, 18309, 18310,
18311, 18312), class = "Date")), row.names = c(NA, -30L), class = c("tbl_df",
"tbl", "data.frame"))
NOTE that FL_Actions shows up as NA in this sippet of the data. This is because policy actions did not occur until March and continued through November of 2020
This is the current ggplot created with suggested code:
ggplot(MI_FL_Data, aes(realdate, FLday)) +
geom_line()+ geom_label(data=MI_FL_Data, aes(label=FL_Actions), nudge_x = 0.50, nudge_y=.25, size=2, na.rm=TRUE)+
geom_point(na.rm = TRUE) +
geom_point(na.rm = TRUE) +
geom_hline(data = MI_FL_Data %>% filter(FL_Actions == 1),aes(yintercept = 15000), linetype=5, na.rm=TRUE) +
geom_hline(data = MI_FL_Data %>% filter(FL_Actions == 2),aes(yintercept = 17000), linetype=1, na.rm=TRUE) +
labs(x=NULL, y="Number of Reported Daily COVID Cases", title="State of Florida",caption="1= closing actions, 2= opening actions")+theme_classic()
Updating with suggestion. This is the code: ggplot(MI_FL_Data, aes(realdate, FLday)) + geom_line()+ geom_label(data=MI_FL_Data, aes(label=FL_Actions), nudge_x = 0.50, nudge_y=.25, size=2, na.rm=TRUE, y=15000)+geom_point(aes(realdate, 17000),na.rm = TRUE) + geom_point(aes(realdate, 15000), na.rm = TRUE) + geom_hline(data = MI_FL_Data %>% filter(FL_Actions == 1),aes(yintercept = 15000), linetype=5, na.rm=TRUE) +geom_hline(data = MI_FL_Data %>% filter(FL_Actions == 2),aes(yintercept = 17000), linetype=1, na.rm=TRUE)+labs(x=NULL, y="Number of Reported Daily COVID Cases", title="State of Florida",caption="1= closing actions, 2= opening actions")+theme_classic() and this is the resulting graph:
You have 3 plots, not 3 layers. Every time you use ggplot(), you're creating a new plot. The layers are the just geoms. You need to add only the layers together, not the full plots:
ggplot(MI_FL_Data, aes(realdate, FL_Actions)) +
geom_point(na.rm = TRUE) +
geom_point(na.rm = TRUE) +
geom_hline(
data = MI_FL_Data %>% filter(FL_Actions == 1),
aes(yintercept = 15000), linetype=5, na.rm=TRUE
) +
geom_hline(
data = MI_FL_Data %>% filter(FL_Actions == 2),
aes(yintercept = 17000), linetype=1, na.rm=TRUE)
)
I think the above should work. If it gives you trouble, please post a reproducible example - say 10 rows of data shared with dput, e.g., dput(MI_FL_Data[1:10, ]).
The code used to produce the following graph is: gplot(MI_FL_Data, aes(realdate, FLday)) + geom_line()+ geom_label(data=MI_FL_Data, aes(label=FL_Actions), na.rm=TRUE, y=15500)+ geom_point(aes(realdate, 15000), na.rm = TRUE) + geom_hline(data = MI_FL_Data %>% filter(FL_Actions >= 1),aes(yintercept = 15000), linetype=5, na.rm=TRUE)+labs(x=NULL, y="Number of Reported Daily COVID Cases", title="State of Florida",caption="1= closing actions, 2= opening actions")+theme_classic()
However, the size of the hline is still concerning and I'm not sure exactly how to get the bolded overlay to go away. Any suggestions on this are welcome.

Error bars on double Y-axis graph--ggplot2

I am trying to add error bars to my double y axis graph, but when ran, it completely ruins the graph. I attached a picture below. I also added my code.
If you need the full data set, let me know! Thank you so much in advance!
scalefactor <- max(Complete_Seasonality_Data$PRCP)/max(Complete_Seasonality_Data$Temp_C)
p <- ggplot(Complete_Seasonality_Data, aes(x = NewMonths5))
p <- p + geom_point(aes(y = PRCP, colour = "Precipitation"))
p <- p + geom_line(aes(y = PRCP, colour = "Precipitation", group=1))
p <- p + geom_point(aes(y = Temp_C*scalefactor, colour = "Temperature"))
p <- p + geom_line(aes(y = Temp_C*scalefactor, colour = "Temperature", group=1))
p <- p + scale_y_continuous(sec.axis = sec_axis(~./scalefactor, name = ylabseasonality))
p <- p + scale_colour_manual(values = c("blue", "red"))
p <- p + labs(y = "Precipitation (in)",
x = "Month",
colour = "Parameter")
p <- p + theme_bw()
p <- p + theme(axis.text.x = element_text(angle = 90), legend.position = c(.99, .01))
p <- p + geom_errorbar(aes(ymin = TempSummary$mean - StdErrorTemp, ymax = TempSummary$mean + StdErrorTemp), position=position_dodge(.9), width=0.2)
p <- p + geom_errorbar(aes(ymin = PrecipSummary$mean - StdErrorPrecip, ymax = TempSummary$mean + StdErrorPrecip), position=position_dodge(.9), width=0.2)
p
How I computed the Std Errors
TempSummary<- Summarize(Temp_C~ Month,
data=Chara_Data,
digits=3)
View(TempSummary)
StdErrorTemp<- (TempSummary$sd)/ (sqrt(TempSummary$n))
View(StdErrorTemp)
PrecipSummary<- Summarize(PRCP ~ Group.1,
data=Complete_Seasonality_Data,
digits=3)
StdErrorPrecip<- (PrecipSummary$sd/ sqrt(PrecipSummary$n))
Complete data set!
structure(list(Group.1 = c("April", "August", "December", "February",
"January", "July", "June", "March", "May", "November", "October",
"September"), Season = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), Month = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), Year = c(2017.05882352941, 2016.6, 2016.6, 2017.6,
2017.6, 2016.6, 2017.05882352941, 2017, 2017.05882352941, 2016.6,
2016.6, 2016.6), Date = structure(c(1494315952.94118, 1490691600,
1500316560, 1506183120, 1504163520, 1487501280, 1499108611.76471,
1489840800, 1496798682.35294, 1498314240, 1496087280, 1493421840
), class = c("POSIXct", "POSIXt")), Site = c(8.17647058823529,
8.125, 7.775, 7.775, 6.375, 6.375, 8.20588235294118, 6.80555555555556,
6.55882352941176, 6.375, 8.1, 6.375), PercentCover = c(0.765882352941176,
0.7125, 0.7505, 0.7775, 0.8625, 0.867, 0.763529411764706, 0.83,
0.850588235294118, 0.848, 0.7065, 0.834), AveHt = c(60.1684438927086,
50.2311192279942, 58.9048701298701, 57.3448097041847, 55.2253291847042,
64.6965656565657, 57.9602622867329, 56.672138047138, 64.4076426024955,
57.1465322871573, 54.3781565656566, 58.3185831529582), SE = c(7.07246013321596,
7.79305525403115, 7.00224498332823, 6.46671176266333, 6.32495719718401,
7.04611575726224, 8.09695750051648, 5.65899377193264, 7.28959135811987,
6.24571692582705, 7.32819802238581, 7.05669314452393), MaxHt = c(88.3823529411765,
81.625, 87.75, 85, 85.875, 96.425, 92.9117647058823, 82.5, 98.6764705882353,
88.125, 79.75, 89.65), green = c(0.350962665193537, 0.278211058736042,
0.183934291894458, 0.197711422851132, 0.179043270311077, 0.335751664926552,
0.186533536107468, 0.256634190010066, 0.319397625619223, 0.204519948331115,
0.249063275007846, 0.277894684744482), yellow = c(0.556643767952726,
0.569690303836593, 0.686152813243381, 0.654331042886853, 0.594548585049017,
0.554485584960289, 0.581008683220038, 0.609988063809375, 0.594827659217835,
0.620510694031593, 0.633793562346056, 0.600527348262596), brown = c(0.0923935668537371,
0.14983619398845, 0.122185622134889, 0.145933312808728, 0.226114026992848,
0.10976275011316, 0.229212761734686, 0.132653108499399, 0.0857747151629417,
0.174675239990233, 0.114398064606882, 0.121577966992922), Temp = c(78.4411764705882,
82.975, 75.65, 74.75, 74.3, 82.2051282051282, 81.0882352941177,
75.8333333333333, 79.8823529411765, 78.6, 80.1944444444444, 83
), Temp_C = c(25.8006535947712, 28.3194444444444, 24.25, 23.75,
23.5, 27.8917378917379, 27.2712418300654, 24.3518518518519, 26.6013071895425,
25.8888888888889, 26.7746913580247, 28.3333333333333), Vis = c(1.98823529411765,
2.12820512820513, 2.2125, 2.07, 2.1625, 2.07179487179487, 2.05,
2.02777777777778, 2.11764705882353, 2.205, 2.11, 2.17375), Nests = c(12.4117647058824,
17.1, 7.1, 6.275, 4, 8.9, 13.8787878787879, 4.88888888888889,
7.38235294117647, 2.8, 13.025, 5.6), SickorDeadFish = c(0.0882352941176471,
0.2, 0.175, 0.075, 0.05, 0.117647058823529, 0.0882352941176471,
0.166666666666667, 0.0294117647058824, 0.25, 0.333333333333333,
0.275), Cladophora = c(0.0866666666666667, 0.0492857142857143,
0.0471428571428571, 0.0907142857142857, 0.0264285714285714, 0.0154545454545455,
0.0380952380952381, 0.0295238095238095, 0.0161904761904762, 0.0178571428571429,
0.0407142857142857, 0.03), Comments = c(NaN, NaN, NaN, NaN, NaN,
NaN, NaN, NaN, NaN, NaN, NaN, NaN), STATION = c(NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), NAME = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), DATE = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), MONTH = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), AWND = c(6.52626966292135, 5.97866090712743, 5.85811926605505,
6.31656097560976, 6.181, 6.1103908045977, 6.23947727272727, 6.5154211663067,
6.0985313174946, 5.64997635933806, 5.43263157894737, 5.54940639269406
), FMTM = c(1412.13333333333, 1431.1935483871, 1411.77419354839,
1535.16666666667, 1339.24137931034, 1439.77419354839, 1378.3,
1398.8064516129, 1353.12903225806, 1362.96666666667, 1408.45161290323,
1381.46666666667), PGTM = c(1394.1095890411, 1394.96774193548,
1306.83333333333, 1412.0511627907, 1327.90350877193, 1435.51769911504,
1372.37674418605, 1389.12328767123, 1376.75576036866, 1373.45341614907,
1346.2774566474, 1396), PRCP = c(0.0205869074492099, 0.0248701298701299,
0.0663425925925926, 0.0481472684085511, 0.0360991379310345, 0.0101144164759725,
0.00790067720090293, 0.0762693156732892, 0.0298491379310345,
0.0472985781990521, 0.034965034965035, 0.0243778801843318), SNOW = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), SNWD = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0), TAVG = c(78.5333333333333, NaN, NaN, 61.1052631578947,
68.6333333333333, 80.2903225806452, 79.4, 72.5161290322581, 77.8709677419355,
NaN, NaN, NaN), TMAX = c(83.6826484018265, 88.8509719222462,
81.4940617577197, 80.6938271604938, 80.8072562358277, 88.1520737327189,
86.8795454545455, 81.3290043290043, 84.6048034934498, 83.8289786223278,
86.3615560640732, 88.1009174311927), TMIN = c(67.5423340961098,
72.5917926565875, 66.4394299287411, 64.9283950617284, 64.5600907029478,
71.9654377880184, 70.6772727272727, 65.7597402597403, 68.6527472527472,
68.9643705463183, 70.558352402746, 71.7821100917431), TSUN = c(NaN,
NaN, NaN, 0, 0, NaN, NaN, NaN, NaN, NaN, NaN, NaN), WDF2 = c(115.538116591928,
100.905172413793, 133.577981651376, 143.965936739659, 149.438444924406,
91.141876430206, 99.5022624434389, 131.612903225806, 124.279569892473,
109.693396226415, 119.450800915332, 115.068493150685), WDF5 = c(107.545045045045,
97.6077586206897, 124.528735632184, 133.031784841076, 140.826086956522,
82.5229357798165, 90.972850678733, 120.634573304158, 115.714285714286,
103.720379146919, 109.266055045872, 104.736842105263), WSF2 = c(15.2026905829596,
14.8530172413793, 14.6919724770642, 15.4111922141119, 15.1332613390929,
14.9070938215103, 15.083257918552, 15.4161290322581, 14.8625806451613,
14.322641509434, 14.3432494279176, 14.5600456621005), WSF5 = c(22.1105855855856,
21.9961206896552, 20.8029885057471, 20.8081145584726, 20.4824675324675,
22.4052752293578, 22.2158371040724, 21.9317286652079, 21.130303030303,
20.8722748815166, 20.493119266055, 21.0052511415525), WT01 = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), WT02 = c(NaN, 1, NaN, 1, 1,
NaN, NaN, 1, 1, NaN, 1, NaN), WT08 = c(1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1), WT10 = c(NaN, NaN, NaN, NaN, NaN, NaN, 1, NaN, NaN,
NaN, NaN, NaN), NewMonths2 = structure(c(17295, 17253, 17364,
17432, 17409, 17216, 17350, 17243, 17324, 17341, 17315, 17284
), class = "Date")), row.names = c(NA, -12L), class = "data.frame")
**Edited to add complete data set and how I did std error
Temp Summary
structure(list(Month = c("April", "August", "December", "February",
"January", "July", "June", "March", "May", "November", "October",
"September"), n = c(34, 40, 40, 40, 40, 40, 34, 36, 34, 40, 40,
40), nvalid = c(34, 40, 40, 40, 40, 39, 34, 36, 34, 40, 36, 40
), mean = c(25.801, 28.319, 24.25, 23.75, 23.5, 27.892, 27.271,
24.352, 26.601, 25.889, 26.775, 28.333), sd = c(0.478, 0.978,
0.921, 0.793, 0.551, 0.463, 0.632, 1.47, 0.905, 0.763, 0.928,
0.534), min = c(25, 26.667, 22.778, 21.667, 21.667, 27.222, 26.111,
22.778, 25, 25, 25.556, 27.222), Q1 = c(25.556, 27.778, 23.889,
23.333, 23.333, 27.778, 27.222, 23.333, 26.111, 25.556, 25.556,
27.778), median = c(25.556, 27.778, 23.889, 23.889, 23.333, 27.778,
27.222, 23.889, 26.667, 25.556, 27.222, 28.333), Q3 = c(25.972,
28.889, 25, 24.444, 23.889, 28.333, 27.639, 24.583, 27.222, 26.111,
27.361, 28.889), max = c(26.667, 30, 25.556, 25, 24.444, 28.889,
28.889, 27.222, 27.778, 27.778, 28.333, 29.444)), class = "data.frame", row.names = c(NA,
-12L))
Precip Summary
structure(list(MONTH = c("April", "August", "December", "February",
"January", "July", "June", "March", "May", "November", "October",
"September"), n = c(446, 464, 436, 422, 465, 437, 444, 465, 465,
424, 438, 439), nvalid = c(443, 462, 432, 421, 464, 437, 443,
453, 464, 422, 429, 434), mean = c(0.021, 0.025, 0.066, 0.048,
0.036, 0.01, 0.008, 0.076, 0.03, 0.047, 0.035, 0.024), sd = c(0.094,
0.184, 0.342, 0.211, 0.142, 0.047, 0.047, 0.343, 0.14, 0.24,
0.243, 0.112), min = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Q1 = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), median = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0), Q3 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
max = c(1.1, 3.06, 4.94, 2.61, 1.5, 0.47, 0.76, 3.32, 1.43,
3.29, 3.64, 1.25), percZero = c(81.264, 87.662, 76.389, 75.534,
77.802, 86.728, 86.682, 75.717, 84.267, 77.962, 83.916, 81.797
)), class = "data.frame", row.names = c(NA, -12L))
Temp Summary Results
enter image description here
Precip Summary Results
enter image description here
I would suggest next approach. Just be careful on the values of your error bars. Also, scaling factors must also be applied to error bars. That is why you got a messy plot. Here the code using the data you added:
library(ggplot2)
#Create var
Complete_Seasonality_Data$NewMonths5 <- as.Date(Complete_Seasonality_Data$Date)
#Computing
StdErrorTemp<- (TempSummary$sd)/ (sqrt(TempSummary$n))
StdErrorPrecip<- (PrecipSummary$sd/ sqrt(PrecipSummary$n))
#Scale factor
scalefactor <- max(Complete_Seasonality_Data$PRCP)/max(Complete_Seasonality_Data$Temp_C)
#Plot
p <- ggplot(Complete_Seasonality_Data, aes(x = NewMonths5))
p <- p + geom_point(aes(y = PRCP, colour = "Precipitation"))
p <- p + geom_line(aes(y = PRCP, colour = "Precipitation", group=1))
p <- p + geom_errorbar(aes(ymin = PrecipSummary$mean - StdErrorPrecip,
ymax = PrecipSummary$mean + StdErrorPrecip),
position=position_dodge(.9), width=0.2)
p <- p + geom_point(aes(y = Temp_C*scalefactor, colour = "Temperature"))
p <- p + geom_line(aes(y = Temp_C*scalefactor, colour = "Temperature", group=1))
p <- p + scale_y_continuous(sec.axis = sec_axis(~./scalefactor, name = 'Temperature'))
p <- p + geom_errorbar(aes(ymin = TempSummary$mean*scalefactor - StdErrorTemp,
ymax = TempSummary$mean*scalefactor + StdErrorTemp),
position=position_dodge(.9), width=0.2)
p <- p + scale_colour_manual(values = c("blue", "red"))
p <- p + labs(y = "Precipitation (in)",
x = "Month",
colour = "Parameter")
p <- p + theme_bw()
p <- p + theme(axis.text.x = element_text(angle = 90), legend.position = c(.99, .01))
p
Output:

Object saved using saveRDS not identical to the original?

I saved a data.table named dat using saveRDS and then read the saved object to a new variable named u. I expected the two to be identical, upon checking, here's what I found:
> identical(u$type, dat$type)
[1] TRUE
> identical(u$degc, dat$degc)
[1] TRUE
> identical(u$rh, dat$rh)
[1] TRUE
> identical(u$pres, dat$pres)
[1] TRUE
> identical(u$prec, dat$prec)
[1] TRUE
> identical(u$tme, dat$tme)
[1] TRUE
> identical(u, dat)
[1] FALSE
If all the columns are identical, and the column names are the same, why are the two not identical?
DATA
This should be reproducible with most datasets but here's a sample of what I have just in case:
> dput(dat[1:20])
structure(list(tme = structure(c(1512489600, 1512493200, 1512496800,
1512500400, 1512504000, 1512507600, 1512511200, 1512514800, 1512518400,
1512522000, 1512525600, 1512529200, 1512532800, 1512536400, 1512540000,
1512543600, 1512547200, 1512550800, 1512554400, 1512558000), class = c("POSIXct",
"POSIXt"), tzone = "America/Chicago"), degc = c(24, 21, 21, 19,
18, 17, 16, 15, 14, 14, 13, 12, 12, 12, 11, 11, 10, 10, 9, 9),
rh = c(89, 87, 88, 82, 81, 79, 76, 80, 80, 80, 78, 75, 71,
68, 67, 68, 71, 76, 78, 80), type = c("forecast_1", "forecast_1",
"forecast_1", "forecast_1", "forecast_1", "forecast_1", "forecast_1",
"forecast_1", "forecast_1", "forecast_1", "forecast_1", "forecast_1",
"forecast_1", "forecast_1", "forecast_1", "forecast_1", "forecast_1",
"forecast_1", "forecast_1", "forecast_1"), pres = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), prec = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_)), .Names = c("tme",
"degc", "rh", "type", "pres", "prec"), class = c("data.table",
"data.frame"), row.names = c(NA, -20L), .internal.selfref = <pointer: 0x00000000000b0788>)

r for loop with names mutate

The goal is to replace NAs with 0 values in a set of variables using a loop function. Obviously, this is a super simple loop function, but I have no idea why this is not doing what it should.
two additional preferences, suggestions that use the variable names (as opposed to column numbers) and use dplyr are preferred.
library
library(plyr)
library(dplyr)
sample data
y <- structure(list(pid = c(1002L, 1002L, 1002L, 1002L, 1002L, 1002L,1002L, 1002L, 1002L, 1002L), year = 1968:1977, weeks_hd_e = c(3,0, 50, 49, 50, 50, 50, 50, 50, 49), weeks_wf_e = c(4, 6, 0, 0,0, 0, 0, 0, 0, 0), weeks_hd_u = c(NA, NA, 0, 0, 0, 0, 0, 0, 0,0), weeks_hd = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), weeks_wf_u = c(NA,NA, NA, NA, NA, NA, NA, NA, 0, NA), weeks_wf = c(NA_real_, NA_real_,NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,NA_real_)), .Names = c("pid", "year", "weeks_hd_e", "weeks_wf_e","weeks_hd_u", "weeks_hd", "weeks_wf_u", "weeks_wf"), row.names = c(NA,10L), class = "data.frame")
this command works
y <- mutate(y, i = ifelse(!is.na(i), i, 0))
this loop does not
vars <- c("weeks_hd_e", "weeks_hd_u", "weeks_wf_e", "weeks_wf_u", "weeks_hd", "weeks_wf")
for (i in names(vars)) {
y <- mutate(y, i = ifelse(!is.na(i), i, 0))
}
View(y)
i have been given two excellent answers from friends:
for (i in 1:length(vars)){
y[vars[i]][is.na(y[vars[i]])] <- 0
}
or
y[, vars] <- apply(y[, vars], 2, function(x) ifelse(is.na(x), 0, x))
The replace_na command from the tidyr package does exactly what you want.
Use it like this:
install.packages("tidyr")
library(tidyr)
# your data
y <- structure(list(pid = c(1002L, 1002L, 1002L, 1002L, 1002L, 1002L,1002L, 1002L, 1002L, 1002L), year = 1968:1977, weeks_hd_e = c(3,0, 50, 49, 50, 50, 50, 50, 50, 49), weeks_wf_e = c(4, 6, 0, 0,0, 0, 0, 0, 0, 0), weeks_hd_u = c(NA, NA, 0, 0, 0, 0, 0, 0, 0,0), weeks_hd = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), weeks_wf_u = c(NA,NA, NA, NA, NA, NA, NA, NA, 0, NA), weeks_wf = c(NA_real_, NA_real_,NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,NA_real_)), .Names = c("pid", "year", "weeks_hd_e", "weeks_wf_e","weeks_hd_u", "weeks_hd", "weeks_wf_u", "weeks_wf"), row.names = c(NA,10L), class = "data.frame")
# replacing NAs in your dataframe
# specify the variables you want to replace NAs in and the replacement in the `replace` = list argument
y <- replace_na(y, replace = list(weeks_hd_e = 0, weeks_hd_u = 0, weeks_wf_e = 0, weeks_wf_u = 0, weeks_hd = 0, weeks_wf = 0))
Note that this meets your preference to specify the variables by name and is more flexible in terms of replacement, i.e. you can replace NAs in numeric and character variables in the same command.

Resources