Related
I have this data (listed as reproducible):
structure(list(age = c(62.84998, 60.33899, 52.74698, 42.38498
), death = c(0, 1, 1, 1), sex = c("male", "female", "female",
"female"), hospdead = c(0, 1, 0, 0), slos = c(5, 4, 17, 3), d.time = c(2029,
4, 47, 133), dzgroup = c("Lung Cancer", "Cirrhosis", "Cirrhosis",
"Lung Cancer"), dzclass = c("Cancer", "COPD/CHF/Cirrhosis", "COPD/CHF/Cirrhosis",
"Cancer"), num.co = c(0, 2, 2, 2), edu = c(11, 12, 12, 11), income = c("$11-$25k",
"$11-$25k", "under $11k", "under $11k"), scoma = c(0, 44, 0,
0), charges = c(9715, 34496, 41094, 3075), totcst = c(NA_real_,
NA_real_, NA_real_, NA_real_), totmcst = c(NA_real_, NA_real_,
NA_real_, NA_real_), avtisst = c(7, 29, 13, 7), race = c("other",
"white", "white", "white"), sps = c(33.8984375, 52.6953125, 20.5,
20.0976562), aps = c(20, 74, 45, 19), surv2m = c(0.262939453,
0.0009999275, 0.790893555, 0.698974609), surv6m = c(0.0369949341,
0, 0.664916992, 0.411987305), hday = c(1, 3, 4, 1), diabetes = c(0,
0, 0, 0), dementia = c(0, 0, 0, 0), ca = c("metastatic", "no",
"no", "metastatic"), prg2m = c(0.5, 0, 0.75, 0.899999619), prg6m = c(0.25,
0, 0.5, 0.5), dnr = c("no dnr", NA, "no dnr", "no dnr"), dnrday = c(5,
NA, 17, 3), meanbp = c(97, 43, 70, 75), wblc = c(6, 17.0976562,
8.5, 9.09960938), hrt = c(69, 112, 88, 88), resp = c(22, 34,
28, 32), temp = c(36, 34.59375, 37.39844, 35), pafi = c(388,
98, 231.65625, NA), alb = c(1.7998047, NA, NA, NA), bili = c(0.19998169,
NA, 2.19970703, NA), crea = c(1.19995117, 5.5, 2, 0.79992676),
sod = c(141, 132, 134, 139), ph = c(7.459961, 7.25, 7.459961,
NA), glucose = c(NA_real_, NA_real_, NA_real_, NA_real_),
bun = c(NA_real_, NA_real_, NA_real_, NA_real_), urine = c(NA_real_,
NA_real_, NA_real_, NA_real_), adlp = c(7, NA, 1, 0), adls = c(7,
1, 0, 0), sfdm2 = c(NA, "<2 mo. follow-up", "<2 mo. follow-up",
"no(M2 and SIP pres)"), adlsc = c(7, 1, 0, 0)), row.names = c(NA,
4L), class = "data.frame")
I am wanting to estimate the population proportion of individuals who had lung cancer listed as their primary disease group (dzgroup). How would I do this? My original thought was to just divide the total number that have lung cancer by the whole dataset population, but I do not believe this is correct.
If we want to get the proportion on the whole data, create a logical vector and get the mean as TRUE -> 1 and FALSE -> 0, the mean will be the proportion of 1s and multiplying by 100 gives the percentage
round(100 * mean(df1$dzgroup == "Lung Cancer", na.rm = TRUE), 2)
I have this reproducible DataFrame:
structure(list(age = c(62.84998, 60.33899, 52.74698, 42.38498,
79.88495, 93.01599, 62.37097, 86.83899, 85.65594, 42.25897),
death = c(0, 1, 1, 1, 0, 1, 1, 1, 1, 1), sex = c("male",
"female", "female", "female", "female", "male", "male", "male",
"male", "female"), hospdead = c(0, 1, 0, 0, 0, 1, 0, 0, 0,
0), slos = c(5, 4, 17, 3, 16, 4, 9, 7, 12, 8), d.time = c(2029,
4, 47, 133, 2029, 4, 659, 142, 63, 370), dzgroup = c("Lung Cancer",
"Cirrhosis", "Cirrhosis", "Lung Cancer", "ARF/MOSF w/Sepsis",
"Coma", "CHF", "CHF", "Lung Cancer", "Colon Cancer"), dzclass = c("Cancer",
"COPD/CHF/Cirrhosis", "COPD/CHF/Cirrhosis", "Cancer", "ARF/MOSF",
"Coma", "COPD/CHF/Cirrhosis", "COPD/CHF/Cirrhosis", "Cancer",
"Cancer"), num.co = c(0, 2, 2, 2, 1, 1, 1, 3, 2, 0), edu = c(11,
12, 12, 11, NA, 14, 14, NA, 12, 11), income = c("$11-$25k",
"$11-$25k", "under $11k", "under $11k", NA, NA, "$25-$50k",
NA, NA, "$25-$50k"), scoma = c(0, 44, 0, 0, 26, 55, 0, 26,
26, 0), charges = c(9715, 34496, 41094, 3075, 50127, 6884,
30460, 30460, NA, 9914), totcst = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), totmcst = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), avtisst = c(7, 29, 13, 7, 18.666656, 5, 8, 6.5, 8.5, 8
), race = c("other", "white", "white", "white", "white",
"white", "white", "white", "black", "hispanic"), sps = c(33.8984375,
52.6953125, 20.5, 20.0976562, 23.5, 19.3984375, 17.296875,
21.5976562, 15.8984375, 2.2998047), aps = c(20, 74, 45, 19,
30, 27, 46, 53, 17, 9), surv2m = c(0.262939453, 0.0009999275,
0.790893555, 0.698974609, 0.634887695, 0.284973145, 0.892944336,
0.670898438, 0.570922852, 0.952880859), surv6m = c(0.0369949341,
0, 0.664916992, 0.411987305, 0.532958984, 0.214996338, 0.820922852,
0.498962402, 0.24899292, 0.887939453), hday = c(1, 3, 4,
1, 3, 1, 1, 1, 1, 1), diabetes = c(0, 0, 0, 0, 0, 0, 0, 1,
0, 0), dementia = c(0, 0, 0, 0, 0, 0, 0, 0, 1, 0), ca = c("metastatic",
"no", "no", "metastatic", "no", "no", "no", "no", "metastatic",
"metastatic"), prg2m = c(0.5, 0, 0.75, 0.899999619, 0.899999619,
0, NA, 0.799999714, 0.049999982, NA), prg6m = c(0.25, 0,
0.5, 0.5, 0.8999996, 0, 0.6999998, 0.3999999, 0.0001249999,
NA), dnr = c("no dnr", NA, "no dnr", "no dnr", "no dnr",
"no dnr", "no dnr", "no dnr", "dnr after sadm", "no dnr"),
dnrday = c(5, NA, 17, 3, 16, 4, 9, 7, 2, 8), meanbp = c(97,
43, 70, 75, 59, 110, 78, 72, 97, 84), wblc = c(6, 17.0976562,
8.5, 9.09960938, 13.5, 10.3984375, 11.6992188, 13.5996094,
9.69921875, 11.2988281), hrt = c(69, 112, 88, 88, 112, 101,
120, 100, 56, 94), resp = c(22, 34, 28, 32, 20, 44, 28, 26,
20, 20), temp = c(36, 34.59375, 37.39844, 35, 37.89844, 38.39844,
37.39844, 37.59375, 36.59375, 38.19531), pafi = c(388, 98,
231.65625, NA, 173.3125, 266.625, 309.5, 404.75, 357.125,
NA), alb = c(1.7998047, NA, NA, NA, NA, NA, 4.7998047, NA,
NA, 4.6992188), bili = c(0.19998169, NA, 2.19970703, NA,
NA, NA, 0.39996338, NA, 0.39996338, 0.19998169), crea = c(1.19995117,
5.5, 2, 0.79992676, 0.79992676, 0.69995117, 1.59985352, 2,
1, 0.79992676), sod = c(141, 132, 134, 139, 143, 140, 132,
139, 143, 139), ph = c(7.459961, 7.25, 7.459961, NA, 7.509766,
7.65918, 7.479492, 7.509766, 7.449219, NA), glucose = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), bun = c(NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), urine = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), adlp = c(7, NA, 1, 0, NA, NA, 0, NA, NA, 0), adls = c(7,
1, 0, 0, 2, 1, 1, 0, 7, NA), sfdm2 = c(NA, "<2 mo. follow-up",
"<2 mo. follow-up", "no(M2 and SIP pres)", "no(M2 and SIP pres)",
"<2 mo. follow-up", "no(M2 and SIP pres)", NA, NA, NA), adlsc = c(7,
1, 0, 0, 2, 1, 1, 0, 7, 0.4947999)), row.names = c(NA, 10L
), class = "data.frame")
I am needing to calculate the proportion of patients who died in the hospital in patients with an active DNR order on day 3 and in patients without an active DNR order on day 3. To group which patients had an active DNR on day 3 and which did not, I used the subset function below:
SB_xlsx1 = SB_xlsx[!is.na(SB_xlsx$dnrday), ]
YesDNR = subset(SB_xlsx1, dnrday <= 3)
NoDNR = subset(SB_xlsx1, dnrday > 3)
However, I don't know how to calculate the proportion of patients that died in the hospital for those with a DNR and without a DNR. The 'hospdead' variable has all 0s and 1s, where 0 = not dead and 1 = dead. However, I don't know how to get the proportion that died for having a DNR at day 3 and did not have a DNR at day 3. What code could I use for my desired result. SB_xlsx also just represents my DataFrame name.
There's a few ways to do this but the simplest is probably via the aggregate function.
> aggregate( hospdead ~ (dnrday<=3) , SB_xlsx1 , mean)
dnrday <= 3 hospdead
1 FALSE 0.1428571
2 TRUE 0.0000000
You may use tapply to group deaths by the condition dnrday <= 3, i.e. with an active DNR on day 3 and calculate the mean.
(res <- proportions(xtabs(death ~ dnrday <= 3, SB_xlsx)))
# dnrday <= 3
# FALSE TRUE
# 0.7142857 0.2857143
where
sum(res)
# [1] 1
EDIT: I apologize; I misread your post when providing my original answer. I've revised it below.
You referred to the hospdeath variable, but in the toy data set it has just one nonzero entry, so I'm using the death variable instead to demonstrate the principle.
First, abase R approach:
mean(SB_xlsx1[SB_xlsx1$death == 1, ]$dnrday <= 3)
mean(SB_xlsx1[SB_xlsx1$death == 1, ]$dnrday > 3)
The idea is to restrict to the subset of rows for which a death occurred, then perform a logical check to see which entries have dnrday greater than 3.
Note that if you have NA entries in death, you'll want to remove them first as you did with those in dnrday.
For a dplyr approach:
library(dplyr)
SB_xlsx1 %>%
filter(death == 1) %>%
summarize(mean(dnrday <= 3), mean(dnrday > 3))
or, for a slightly nicer-looking table,
SB_xlsx1 %>%
filter(death == 1) %>%
group_by(dnrday <= 3) %>%
summarize(prop = n() / nrow(.))
I am trying to add error bars to my double y axis graph, but when ran, it completely ruins the graph. I attached a picture below. I also added my code.
If you need the full data set, let me know! Thank you so much in advance!
scalefactor <- max(Complete_Seasonality_Data$PRCP)/max(Complete_Seasonality_Data$Temp_C)
p <- ggplot(Complete_Seasonality_Data, aes(x = NewMonths5))
p <- p + geom_point(aes(y = PRCP, colour = "Precipitation"))
p <- p + geom_line(aes(y = PRCP, colour = "Precipitation", group=1))
p <- p + geom_point(aes(y = Temp_C*scalefactor, colour = "Temperature"))
p <- p + geom_line(aes(y = Temp_C*scalefactor, colour = "Temperature", group=1))
p <- p + scale_y_continuous(sec.axis = sec_axis(~./scalefactor, name = ylabseasonality))
p <- p + scale_colour_manual(values = c("blue", "red"))
p <- p + labs(y = "Precipitation (in)",
x = "Month",
colour = "Parameter")
p <- p + theme_bw()
p <- p + theme(axis.text.x = element_text(angle = 90), legend.position = c(.99, .01))
p <- p + geom_errorbar(aes(ymin = TempSummary$mean - StdErrorTemp, ymax = TempSummary$mean + StdErrorTemp), position=position_dodge(.9), width=0.2)
p <- p + geom_errorbar(aes(ymin = PrecipSummary$mean - StdErrorPrecip, ymax = TempSummary$mean + StdErrorPrecip), position=position_dodge(.9), width=0.2)
p
How I computed the Std Errors
TempSummary<- Summarize(Temp_C~ Month,
data=Chara_Data,
digits=3)
View(TempSummary)
StdErrorTemp<- (TempSummary$sd)/ (sqrt(TempSummary$n))
View(StdErrorTemp)
PrecipSummary<- Summarize(PRCP ~ Group.1,
data=Complete_Seasonality_Data,
digits=3)
StdErrorPrecip<- (PrecipSummary$sd/ sqrt(PrecipSummary$n))
Complete data set!
structure(list(Group.1 = c("April", "August", "December", "February",
"January", "July", "June", "March", "May", "November", "October",
"September"), Season = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), Month = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), Year = c(2017.05882352941, 2016.6, 2016.6, 2017.6,
2017.6, 2016.6, 2017.05882352941, 2017, 2017.05882352941, 2016.6,
2016.6, 2016.6), Date = structure(c(1494315952.94118, 1490691600,
1500316560, 1506183120, 1504163520, 1487501280, 1499108611.76471,
1489840800, 1496798682.35294, 1498314240, 1496087280, 1493421840
), class = c("POSIXct", "POSIXt")), Site = c(8.17647058823529,
8.125, 7.775, 7.775, 6.375, 6.375, 8.20588235294118, 6.80555555555556,
6.55882352941176, 6.375, 8.1, 6.375), PercentCover = c(0.765882352941176,
0.7125, 0.7505, 0.7775, 0.8625, 0.867, 0.763529411764706, 0.83,
0.850588235294118, 0.848, 0.7065, 0.834), AveHt = c(60.1684438927086,
50.2311192279942, 58.9048701298701, 57.3448097041847, 55.2253291847042,
64.6965656565657, 57.9602622867329, 56.672138047138, 64.4076426024955,
57.1465322871573, 54.3781565656566, 58.3185831529582), SE = c(7.07246013321596,
7.79305525403115, 7.00224498332823, 6.46671176266333, 6.32495719718401,
7.04611575726224, 8.09695750051648, 5.65899377193264, 7.28959135811987,
6.24571692582705, 7.32819802238581, 7.05669314452393), MaxHt = c(88.3823529411765,
81.625, 87.75, 85, 85.875, 96.425, 92.9117647058823, 82.5, 98.6764705882353,
88.125, 79.75, 89.65), green = c(0.350962665193537, 0.278211058736042,
0.183934291894458, 0.197711422851132, 0.179043270311077, 0.335751664926552,
0.186533536107468, 0.256634190010066, 0.319397625619223, 0.204519948331115,
0.249063275007846, 0.277894684744482), yellow = c(0.556643767952726,
0.569690303836593, 0.686152813243381, 0.654331042886853, 0.594548585049017,
0.554485584960289, 0.581008683220038, 0.609988063809375, 0.594827659217835,
0.620510694031593, 0.633793562346056, 0.600527348262596), brown = c(0.0923935668537371,
0.14983619398845, 0.122185622134889, 0.145933312808728, 0.226114026992848,
0.10976275011316, 0.229212761734686, 0.132653108499399, 0.0857747151629417,
0.174675239990233, 0.114398064606882, 0.121577966992922), Temp = c(78.4411764705882,
82.975, 75.65, 74.75, 74.3, 82.2051282051282, 81.0882352941177,
75.8333333333333, 79.8823529411765, 78.6, 80.1944444444444, 83
), Temp_C = c(25.8006535947712, 28.3194444444444, 24.25, 23.75,
23.5, 27.8917378917379, 27.2712418300654, 24.3518518518519, 26.6013071895425,
25.8888888888889, 26.7746913580247, 28.3333333333333), Vis = c(1.98823529411765,
2.12820512820513, 2.2125, 2.07, 2.1625, 2.07179487179487, 2.05,
2.02777777777778, 2.11764705882353, 2.205, 2.11, 2.17375), Nests = c(12.4117647058824,
17.1, 7.1, 6.275, 4, 8.9, 13.8787878787879, 4.88888888888889,
7.38235294117647, 2.8, 13.025, 5.6), SickorDeadFish = c(0.0882352941176471,
0.2, 0.175, 0.075, 0.05, 0.117647058823529, 0.0882352941176471,
0.166666666666667, 0.0294117647058824, 0.25, 0.333333333333333,
0.275), Cladophora = c(0.0866666666666667, 0.0492857142857143,
0.0471428571428571, 0.0907142857142857, 0.0264285714285714, 0.0154545454545455,
0.0380952380952381, 0.0295238095238095, 0.0161904761904762, 0.0178571428571429,
0.0407142857142857, 0.03), Comments = c(NaN, NaN, NaN, NaN, NaN,
NaN, NaN, NaN, NaN, NaN, NaN, NaN), STATION = c(NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), NAME = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), DATE = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), MONTH = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), AWND = c(6.52626966292135, 5.97866090712743, 5.85811926605505,
6.31656097560976, 6.181, 6.1103908045977, 6.23947727272727, 6.5154211663067,
6.0985313174946, 5.64997635933806, 5.43263157894737, 5.54940639269406
), FMTM = c(1412.13333333333, 1431.1935483871, 1411.77419354839,
1535.16666666667, 1339.24137931034, 1439.77419354839, 1378.3,
1398.8064516129, 1353.12903225806, 1362.96666666667, 1408.45161290323,
1381.46666666667), PGTM = c(1394.1095890411, 1394.96774193548,
1306.83333333333, 1412.0511627907, 1327.90350877193, 1435.51769911504,
1372.37674418605, 1389.12328767123, 1376.75576036866, 1373.45341614907,
1346.2774566474, 1396), PRCP = c(0.0205869074492099, 0.0248701298701299,
0.0663425925925926, 0.0481472684085511, 0.0360991379310345, 0.0101144164759725,
0.00790067720090293, 0.0762693156732892, 0.0298491379310345,
0.0472985781990521, 0.034965034965035, 0.0243778801843318), SNOW = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), SNWD = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0), TAVG = c(78.5333333333333, NaN, NaN, 61.1052631578947,
68.6333333333333, 80.2903225806452, 79.4, 72.5161290322581, 77.8709677419355,
NaN, NaN, NaN), TMAX = c(83.6826484018265, 88.8509719222462,
81.4940617577197, 80.6938271604938, 80.8072562358277, 88.1520737327189,
86.8795454545455, 81.3290043290043, 84.6048034934498, 83.8289786223278,
86.3615560640732, 88.1009174311927), TMIN = c(67.5423340961098,
72.5917926565875, 66.4394299287411, 64.9283950617284, 64.5600907029478,
71.9654377880184, 70.6772727272727, 65.7597402597403, 68.6527472527472,
68.9643705463183, 70.558352402746, 71.7821100917431), TSUN = c(NaN,
NaN, NaN, 0, 0, NaN, NaN, NaN, NaN, NaN, NaN, NaN), WDF2 = c(115.538116591928,
100.905172413793, 133.577981651376, 143.965936739659, 149.438444924406,
91.141876430206, 99.5022624434389, 131.612903225806, 124.279569892473,
109.693396226415, 119.450800915332, 115.068493150685), WDF5 = c(107.545045045045,
97.6077586206897, 124.528735632184, 133.031784841076, 140.826086956522,
82.5229357798165, 90.972850678733, 120.634573304158, 115.714285714286,
103.720379146919, 109.266055045872, 104.736842105263), WSF2 = c(15.2026905829596,
14.8530172413793, 14.6919724770642, 15.4111922141119, 15.1332613390929,
14.9070938215103, 15.083257918552, 15.4161290322581, 14.8625806451613,
14.322641509434, 14.3432494279176, 14.5600456621005), WSF5 = c(22.1105855855856,
21.9961206896552, 20.8029885057471, 20.8081145584726, 20.4824675324675,
22.4052752293578, 22.2158371040724, 21.9317286652079, 21.130303030303,
20.8722748815166, 20.493119266055, 21.0052511415525), WT01 = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), WT02 = c(NaN, 1, NaN, 1, 1,
NaN, NaN, 1, 1, NaN, 1, NaN), WT08 = c(1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1), WT10 = c(NaN, NaN, NaN, NaN, NaN, NaN, 1, NaN, NaN,
NaN, NaN, NaN), NewMonths2 = structure(c(17295, 17253, 17364,
17432, 17409, 17216, 17350, 17243, 17324, 17341, 17315, 17284
), class = "Date")), row.names = c(NA, -12L), class = "data.frame")
**Edited to add complete data set and how I did std error
Temp Summary
structure(list(Month = c("April", "August", "December", "February",
"January", "July", "June", "March", "May", "November", "October",
"September"), n = c(34, 40, 40, 40, 40, 40, 34, 36, 34, 40, 40,
40), nvalid = c(34, 40, 40, 40, 40, 39, 34, 36, 34, 40, 36, 40
), mean = c(25.801, 28.319, 24.25, 23.75, 23.5, 27.892, 27.271,
24.352, 26.601, 25.889, 26.775, 28.333), sd = c(0.478, 0.978,
0.921, 0.793, 0.551, 0.463, 0.632, 1.47, 0.905, 0.763, 0.928,
0.534), min = c(25, 26.667, 22.778, 21.667, 21.667, 27.222, 26.111,
22.778, 25, 25, 25.556, 27.222), Q1 = c(25.556, 27.778, 23.889,
23.333, 23.333, 27.778, 27.222, 23.333, 26.111, 25.556, 25.556,
27.778), median = c(25.556, 27.778, 23.889, 23.889, 23.333, 27.778,
27.222, 23.889, 26.667, 25.556, 27.222, 28.333), Q3 = c(25.972,
28.889, 25, 24.444, 23.889, 28.333, 27.639, 24.583, 27.222, 26.111,
27.361, 28.889), max = c(26.667, 30, 25.556, 25, 24.444, 28.889,
28.889, 27.222, 27.778, 27.778, 28.333, 29.444)), class = "data.frame", row.names = c(NA,
-12L))
Precip Summary
structure(list(MONTH = c("April", "August", "December", "February",
"January", "July", "June", "March", "May", "November", "October",
"September"), n = c(446, 464, 436, 422, 465, 437, 444, 465, 465,
424, 438, 439), nvalid = c(443, 462, 432, 421, 464, 437, 443,
453, 464, 422, 429, 434), mean = c(0.021, 0.025, 0.066, 0.048,
0.036, 0.01, 0.008, 0.076, 0.03, 0.047, 0.035, 0.024), sd = c(0.094,
0.184, 0.342, 0.211, 0.142, 0.047, 0.047, 0.343, 0.14, 0.24,
0.243, 0.112), min = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Q1 = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), median = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0), Q3 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
max = c(1.1, 3.06, 4.94, 2.61, 1.5, 0.47, 0.76, 3.32, 1.43,
3.29, 3.64, 1.25), percZero = c(81.264, 87.662, 76.389, 75.534,
77.802, 86.728, 86.682, 75.717, 84.267, 77.962, 83.916, 81.797
)), class = "data.frame", row.names = c(NA, -12L))
Temp Summary Results
enter image description here
Precip Summary Results
enter image description here
I would suggest next approach. Just be careful on the values of your error bars. Also, scaling factors must also be applied to error bars. That is why you got a messy plot. Here the code using the data you added:
library(ggplot2)
#Create var
Complete_Seasonality_Data$NewMonths5 <- as.Date(Complete_Seasonality_Data$Date)
#Computing
StdErrorTemp<- (TempSummary$sd)/ (sqrt(TempSummary$n))
StdErrorPrecip<- (PrecipSummary$sd/ sqrt(PrecipSummary$n))
#Scale factor
scalefactor <- max(Complete_Seasonality_Data$PRCP)/max(Complete_Seasonality_Data$Temp_C)
#Plot
p <- ggplot(Complete_Seasonality_Data, aes(x = NewMonths5))
p <- p + geom_point(aes(y = PRCP, colour = "Precipitation"))
p <- p + geom_line(aes(y = PRCP, colour = "Precipitation", group=1))
p <- p + geom_errorbar(aes(ymin = PrecipSummary$mean - StdErrorPrecip,
ymax = PrecipSummary$mean + StdErrorPrecip),
position=position_dodge(.9), width=0.2)
p <- p + geom_point(aes(y = Temp_C*scalefactor, colour = "Temperature"))
p <- p + geom_line(aes(y = Temp_C*scalefactor, colour = "Temperature", group=1))
p <- p + scale_y_continuous(sec.axis = sec_axis(~./scalefactor, name = 'Temperature'))
p <- p + geom_errorbar(aes(ymin = TempSummary$mean*scalefactor - StdErrorTemp,
ymax = TempSummary$mean*scalefactor + StdErrorTemp),
position=position_dodge(.9), width=0.2)
p <- p + scale_colour_manual(values = c("blue", "red"))
p <- p + labs(y = "Precipitation (in)",
x = "Month",
colour = "Parameter")
p <- p + theme_bw()
p <- p + theme(axis.text.x = element_text(angle = 90), legend.position = c(.99, .01))
p
Output:
I am trying to create a list based on the column names and then bind all my lists together.
That is, I have for the first list:
> myList[[1]] %>% data.frame() %>% select(c(1:2))
WTS FMC
frequency 1 1
nperiods 0 0
seasonal_period 1 1
trend 0.1758021 0.140052
spike 0.04209651 0.04940537
I want to create new lists - one list for each of the columns in the data and then bind the lists 2010 Jul, 2010 Aug and 2010 Sep together using bind_rows.
I can do the following:
map_dfr(data.frame(myList), ~bind_rows(.), .id = "date")
Which almost gets what I want but I would like to do this and obtain one of these for each of the columns WTS and FMC etc.
Data:
myList <- list(`2010 Jul` = structure(list(1, 0, 1, 0.175802105278148,
0.0420965089715215, -0.597180003813241, -0.14766101736596,
0.101328352458739, NA_real_, 1, 0.163542974434028, NA_real_,
-0.0477740942262392, 0.109285246298631, -0.585925108800292,
0.349534758601262, 1, 0, 1, 0.140051954024691, 0.0494053672229871,
-0.731689686416635, -0.165607865331302, -0.252997419985073,
NA_real_, 1, -0.0505136284783927, NA_real_, -0.172891705413366,
0.171035553843115, -0.389685810850311, 0.395230400055788,
1, 0, 1, 0.0534461538613374, 0.0231700091040301, -0.356613468922694,
-0.0999668254541441, -0.127071388891534, NA_real_, 1, -0.0372310379765763,
NA_real_, -0.124065837653166, 0.214832600571785, -0.252551509468299,
0.123852141180675, 1, 0, 1, 0.041796656791166, 0.0262360694498456,
-0.266921718141474, -0.0803839036263304, -0.128269552651254,
NA_real_, 1, -0.0651244706731801, NA_real_, -0.205637974697809,
0.151555475533217, -0.292968222735457, 0.107723199237638,
1, 0, 1, 0.25433724307978, 0.0224412849894418, -1.12954982181859,
-0.146142537909, -0.165760782142423, NA_real_, 1, 0.151156560509677,
NA_real_, -0.166767943225804, 0.282321896787354, -0.513571168467497,
0.571934734212278, 1, 0, 1, 0.0754993659336637, 0.0387283712994059,
-0.853707662110111, -0.0251051264639037, -0.155640882435377,
NA_real_, 1, 0.0578132817097772, NA_real_, -0.061111476402639,
0.404959638504767, -0.0639404592330979, 0.201777982385734,
1, 0, 1, 0.174669741802688, 0.0236268612206601, -1.04169291855456,
-0.0843655048351571, -0.658325339642133, NA_real_, 1, -0.325360383314808,
NA_real_, -0.727093163070252, 0.937170983212177, -0.767714278501687,
0.873631983600454, 1, 0, 1, 0.0212342532070486, 0.0394007679441418,
-0.343036121758394, -0.0509252272227679, -0.119680074500327,
NA_real_, 1, -0.0455646776030852, NA_real_, -0.0398983609027588,
0.337412411363141, 0.0192485531321964, 0.223377235550471,
1, 0, 1, 0.0420262338532703, 0.129212727423114, -0.438100122508869,
-0.042771290337182, -0.389638024842517, NA_real_, 1, -0.326954900702078,
NA_real_, -0.555804439643753, 0.310736075437147, -0.677496652871654,
0.493116631796998, 1, 0, 1, 0.598184608626656, 0.00485326113018123,
-1.55755544029203, -0.0566390666856906, -0.177252150724023,
NA_real_, 1, 0.057995193957956, NA_real_, -0.289185837730521,
0.180050213764505, -0.414345078778786, 0.193535375761028,
1, 0, 1, 0.568485402318989, 0.0119368452769537, -1.67771371104516,
-0.0263928835552806, -0.454975191345, NA_real_, 1, 0.0686977616512836,
NA_real_, -0.533085306700341, 0.446147099224813, -0.416815928407965,
0.249040354012687, 1, 0, 1, 0.124079903347872, 0.059118600048602,
-0.885885264087633, -0.0808876385366851, -0.060659659056956,
NA_real_, 1, 0.148533886538717, NA_real_, -0.00173087492998668,
0.344674083224914, -0.182813884409897, 0.146185243416273,
1, 0, 1, 0.0930926158625137, 0.0245329908921137, 0.509061201314714,
-0.058666146601623, -0.0356918805242959, NA_real_, 1, 0.000950336898647261,
NA_real_, -0.245464366660066, 0.33682941009751, -0.254338263672044,
0.367239900683189, 1, 0, 1, 0.22638632247172, 0.0218317533978287,
-0.852854792551597, -0.16522406381938, 0.167205818008961,
NA_real_, 1, 0.299103246969011, NA_real_, 0.337090742253813,
0.426672282245496, -0.0631643700301268, 0.107558529054556,
1, 0, 1, 0.184189030274566, 0.0177470314855779, -1.16461284208247,
-0.0835786581051569, -0.318545876407814, NA_real_, 1, 0.0642963953901268,
NA_real_, -0.301483310526926, 0.36126076411659, -0.408014673266521,
0.38067311290122, 1, 0, 1, 0.0764619219562191, 0.0229456293092152,
-0.399483875437517, -0.109463724994312, -0.0624895855715813,
NA_real_, 1, 0.0247766231933698, NA_real_, -0.175710273625244,
0.148883400498395, -0.391350369491028, 0.164652945563837,
1, 0, 1, 0.157413400293104, 0.0210925522480966, 0.559184312376902,
-0.170376937825492, -0.463695060059251, NA_real_, 1, -0.398949758420571,
NA_real_, -0.343559615134694, 0.360798780983868, -0.254653149412353,
0.291717976532446, 1, 0, 1, 0.157638226870364, 0.0328482314858161,
-1.04113661683743, -0.11461389672605, -0.227655536180246,
NA_real_, 1, 0.0955560244689036, NA_real_, -0.0850108661597532,
0.249052330398167, -0.121962432488975, 0.270531142248378), .Dim = c(16L,
18L), .Dimnames = list(c("frequency", "nperiods", "seasonal_period",
"trend", "spike", "linearity", "curvature", "e_acf1", "e_acf10",
"entropy", "x_acf1", "x_acf10", "diff1_acf1", "diff1_acf10",
"diff2_acf1", "diff2_acf10"), c("WTS", "FMC", "WGL", "SCG", "GPS",
"AOS", "CVC", "EMF", "SSY", "MGA", "WEX", "MT", "HXM", "CNS",
"LCM", "KGN", "SIHI", "JLS"))), `2010 Aug` = structure(list(1,
0, 1, 0.0233905158348703, 0.0208562122467506, -0.534541260410219,
0.0724848038424846, -0.243808681545836, NA_real_, 1, -0.127516468307146,
NA_real_, -0.0882273545301255, 0.330583666477203, -0.0544514809293154,
0.179612938516917, 1, 0, 1, 0.298243851692594, 0.0288988085684842,
-1.17329074859827, 0.167794305134058, -0.129125081312144,
NA_real_, 1, 0.165909888118736, NA_real_, 0.0234017388186864,
0.126122636351595, -0.0910583855529177, 0.179360806895702,
1, 0, 1, 0.065724074574338, 0.0758937621405237, -0.399747739700934,
0.108429436039378, -0.0667990493250848, NA_real_, 1, 0.0146134416445858,
NA_real_, -0.0265326096067546, 0.238490359616056, -0.317300024293075,
0.136078891269167, 1, 0, 1, 0.0359901363825194, 0.0727713985049959,
-0.261713912883042, 0.0760994399652499, -0.133814042822593,
NA_real_, 1, -0.0827578051666984, NA_real_, -0.117826507182037,
0.252189340156553, -0.355540397423096, 0.151183673456332,
1, 0, 1, 0.73385650012555, 0.004162899076158, -2.00737648513829,
0.0557091477539321, -0.261660339901219, NA_real_, 1, 0.451470983541603,
NA_real_, -0.288519428211535, 0.150745403548235, -0.543122218246143,
0.296612522913519, 1, 0, 1, 0.0649402281700383, 0.0378619493291227,
-0.31449739260034, 0.0988460944383464, 0.0349741429158428,
NA_real_, 1, 0.098161238113042, NA_real_, 0.0300972574757304,
0.298075123956731, -0.0887625952498301, 0.232199321985802,
1, 0, 1, 0.0945945759860801, 0.0180084376802645, -0.469510406384772,
0.153730038064492, -0.399294299223668, NA_real_, 1, -0.305460924007922,
NA_real_, -0.415074250523729, 0.222830245111534, -0.606351673786828,
0.374684747936308, 1, 0, 1, 0.0591309037250093, 0.0649064539127003,
-0.0744983518809387, 0.0920811715896319, 0.00528921345437106,
NA_real_, 1, 0.0298619700426544, NA_real_, 0.037634026581331,
0.362140624457385, -0.107462109231618, 0.2254760279785, 1,
0, 1, 0.078011344771787, 0.0697267856529186, -0.895566956749497,
-0.0499496267058433, -0.565731971800641, NA_real_, 1, -0.340406856638686,
NA_real_, -0.537251820856348, 0.290072188692432, -0.418146742408836,
0.178344451571795, 1, 0, 1, 0.036755839364403, 0.0608765681761267,
-0.0373907046624393, 0.10003472183405, -0.36540942843628,
NA_real_, 1, -0.341604813341389, NA_real_, -0.184126527521471,
0.165571048572808, -0.082428918609678, 0.160459761431743,
1, 0, 1, 0.155954127781764, 0.0165284207980147, -0.542789032914492,
0.198320667255402, -0.492235313036742, NA_real_, 1, -0.39030915972297,
NA_real_, -0.361944799122207, 0.428712468490625, -0.275063299692073,
0.341636357201519, 1, 0, 1, 0.0677160966850889, 0.0284434829900084,
-0.359449385908881, 0.113540625475898, -0.0349872275718705,
NA_real_, 1, 0.0516285716355073, NA_real_, 0.0458827167347926,
0.280987645964838, -0.0563357760675071, 0.101938565072249,
1, 0, 1, 0.319347071530101, 0.00980494397260724, 0.952800719371358,
0.0593643213576319, -0.159931065778718, NA_real_, 1, -0.161861522802606,
NA_real_, 0.0861865077322326, 0.462009189577021, 0.288750047974667,
0.412369625453985, 1, 0, 1, 0.24423399870397, 0.0156919487915331,
-1.35672952181182, 0.0754111723913473, -0.177255831987036,
NA_real_, 1, 0.2702466551225, NA_real_, 0.072432653719567,
0.398793873454873, 0.299718556657641, 0.420115516359753,
1, 0, 1, 0.145010697778435, 0.061864744065635, -0.634783939837577,
0.156947702469577, -0.0325408000915056, NA_real_, 1, 0.0909718902406596,
NA_real_, 0.0168167770621337, 0.118454773755493, -0.42986156681522,
0.189447158128956, 1, 0, 1, 0.272662492338871, 0.0320020691693299,
-1.35057407751299, -0.000704120068878284, -0.305058370459884,
NA_real_, 1, 0.0989171620469294, NA_real_, -0.345872268685382,
0.225882532526285, -0.523296429442332, 0.274117925854473,
1, 0, 1, 0.133849706665592, 0.0234607726133869, 0.385342516199894,
0.171489465028886, -0.522328366590807, NA_real_, 1, -0.444297376125095,
NA_real_, -0.550596521310195, 0.466264657679625, -0.73127378610922,
0.69157532319532, 1, 0, 1, 0.202865549667432, 0.0443986075890144,
-0.807925019780012, 0.171442275242251, 0.0629959271618186,
NA_real_, 1, 0.213447359336486, NA_real_, 0.242973171792414,
0.265885818267854, -0.0620290037554373, 0.141877034992979), .Dim = c(16L,
18L), .Dimnames = list(c("frequency", "nperiods", "seasonal_period",
"trend", "spike", "linearity", "curvature", "e_acf1", "e_acf10",
"entropy", "x_acf1", "x_acf10", "diff1_acf1", "diff1_acf10",
"diff2_acf1", "diff2_acf10"), c("WTS", "FMC", "WGL", "SCG", "GPS",
"AOS", "CVC", "EMF", "SSY", "MGA", "WEX", "MT", "HXM", "CNS",
"LCM", "KGN", "SIHI", "JLS"))), `2010 Sep` = structure(list(1,
0, 1, 0.114407589475582, 0.0235377481165926, -0.728800100661772,
-0.0166684916231905, -0.319561503372181, NA_real_, 1, -0.252283814071854,
NA_real_, -0.433907428334825, 0.494174288679032, -0.259632053945162,
0.345158784209255, 1, 0, 1, 0.0554648365804654, 0.0297035032516045,
-0.301137665508776, 0.0371842617719873, -0.153107729568536,
NA_real_, 1, -0.148125951621602, NA_real_, -0.309974262769443,
0.282319970549421, -0.256060952169572, 0.260036721023129,
1, 0, 1, 0.0724574536186097, 0.0506702652874201, -0.397515147429409,
-0.000875929043770737, -0.226741580969926, NA_real_, 1, -0.239426915169087,
NA_real_, -0.26443608261244, 0.380280023570942, -0.181952275816044,
0.287613920079175, 1, 0, 1, 0.0366387433543232, 0.0828210443160761,
0.0346015782281233, 0.0584760824131681, -0.0807409978271288,
NA_real_, 1, -0.0640076873681771, NA_real_, -0.241606619566609,
0.28402059698436, -0.32704318254068, 0.255565332273312, 1,
0, 1, 0.506821906250132, 0.0117517625384047, -1.57158954102578,
0.0777902977231915, 0.0323140578792685, NA_real_, 1, 0.314866952104353,
NA_real_, -0.184027731637231, 0.113495496807055, -0.416317148005885,
0.221007409079218, 1, 0, 1, 0.0789008969637934, 0.0227004903617495,
-0.270300272577158, 0.0385921685543045, -0.163539848233482,
NA_real_, 1, -0.186957341754706, NA_real_, 0.0122408218485358,
0.405097502405729, 0.156234747286005, 0.273301919830479,
1, 0, 1, 0.0250014845596822, 0.0302514181589841, -0.338784673049847,
-0.0859332071221103, -0.497829122832546, NA_real_, 1, -0.501527437157675,
NA_real_, -0.476584045508235, 0.580274617020235, -0.250869906535054,
0.356695266531789, 1, 0, 1, 0.0319333349525267, 0.0556081429363308,
0.147813225937377, 0.0607115815601036, -0.075824219265655,
NA_real_, 1, -0.0426184206883323, NA_real_, -0.0207223789747501,
0.385629184963258, -0.0532538380902457, 0.237605631059521,
1, 0, 1, 0.381518944029993, 0.0321886406040401, -1.06752151081575,
0.131205784717954, -0.280480506945643, NA_real_, 1, -0.179355245047458,
NA_real_, -0.106920170519719, 0.0714415677242584, -0.0319868419486709,
0.118669624281828, 1, 0, 1, 0.0121834121844098, 0.068237303825428,
0.0536306750135053, 0.0336173618788365, -0.369652200763938,
NA_real_, 1, -0.351748579579802, NA_real_, -0.528968821125061,
0.281681886913385, -0.514355713006262, 0.266023990706781,
1, 0, 1, 0.0287643057822889, 0.044676101917498, -0.0138724727466971,
-0.134010559894424, -0.73335419052835, NA_real_, 1, -0.71539381603517,
NA_real_, -0.658713675985, 0.50928626741049, -0.619760970527367,
0.416625800834141, 1, 0, 1, 0.05210630828958, 0.0276550837203792,
0.190816750390097, 0.0978766034871588, -0.0805703916378234,
NA_real_, 1, -0.0268457813914789, NA_real_, -0.00549001926408891,
0.267785723185472, -0.0608182434517501, 0.100506808744203,
1, 0, 1, 0, 0.0627792442320371, 0.255196051933168, -0.0382711160010135,
-0.386591712415439, NA_real_, 1, -0.357984708839978, NA_real_,
-0.368112450163228, 0.267135781124212, -0.197560540922087,
0.182171367812671, 1, 0, 1, 0.269386209436678, 0.0148715672740464,
-0.968523363062877, 0.0763244158585192, 0.0185934939902807,
NA_real_, 1, 0.0765904658501373, NA_real_, -0.161248155686918,
0.20111491459834, -0.273002230573575, 0.258423208707053,
1, 0, 1, 0.018597597094501, 0.0552754657963658, 0.143897330819771,
0.0353124852994875, -0.125202166775784, NA_real_, 1, -0.10138717345503,
NA_real_, -0.232282311284955, 0.358150791920914, -0.202251311791963,
0.313200193280975, 1, 0, 1, 0.203989370024047, 0.0224128873339424,
0.502551786048769, 0.170091454126145, -0.0446367518715121,
NA_real_, 1, -0.0199614715680664, NA_real_, -0.0784148935207206,
0.256326721120486, -0.629406417417173, 0.680388906963932,
1, 0, 1, 0.166310912865401, 0.0515699413907982, -0.563616415630654,
-0.124142897096449, -0.570353166601179, NA_real_, 1, -0.511575482342321,
NA_real_, -0.502130427060656, 0.424780030379441, -0.561486820277065,
0.520329930641319, 1, 0, 1, 0.125538977433979, 0.0316092331640379,
0.644167550608129, 0.108886405075484, 0.10582508645383, NA_real_,
1, 0.208780092739966, NA_real_, 0.0489489788996666, 0.285296904348623,
-0.161944089572295, 0.294280045785781), .Dim = c(16L, 18L
), .Dimnames = list(c("frequency", "nperiods", "seasonal_period",
"trend", "spike", "linearity", "curvature", "e_acf1", "e_acf10",
"entropy", "x_acf1", "x_acf10", "diff1_acf1", "diff1_acf10",
"diff2_acf1", "diff2_acf10"), c("WTS", "FMC", "WGL", "SCG", "GPS",
"AOS", "CVC", "EMF", "SSY", "MGA", "WEX", "MT", "HXM", "CNS",
"LCM", "KGN", "SIHI", "JLS"))))
With tidyverse, operations, the row names are removed or changed to default NULL sequence, so before binding the list elements together, use rownames_to_column to create a new column with row names
library(dplyr)
library(purrr)
library(tibble)
myList %>%
map_dfr(~ .x %>%
as.data.frame %>%
select(1:2) %>%
rownames_to_column('rname')
, .id = 'date')
# date rname WTS FMC
#1 2010 Jul frequency 1 1
#2 2010 Jul nperiods 0 0
#3 2010 Jul seasonal_period 1 1
#4 2010 Jul trend 0.1758021 0.140052
#5 2010 Jul spike 0.04209651 0.04940537
#6 2010 Jul linearity -0.59718 -0.7316897
#7 2010 Jul curvature -0.147661 -0.1656079
#8 2010 Jul e_acf1 0.1013284 -0.2529974
#9 2010 Jul e_acf10 NA NA
# ...
The goal is to replace NAs with 0 values in a set of variables using a loop function. Obviously, this is a super simple loop function, but I have no idea why this is not doing what it should.
two additional preferences, suggestions that use the variable names (as opposed to column numbers) and use dplyr are preferred.
library
library(plyr)
library(dplyr)
sample data
y <- structure(list(pid = c(1002L, 1002L, 1002L, 1002L, 1002L, 1002L,1002L, 1002L, 1002L, 1002L), year = 1968:1977, weeks_hd_e = c(3,0, 50, 49, 50, 50, 50, 50, 50, 49), weeks_wf_e = c(4, 6, 0, 0,0, 0, 0, 0, 0, 0), weeks_hd_u = c(NA, NA, 0, 0, 0, 0, 0, 0, 0,0), weeks_hd = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), weeks_wf_u = c(NA,NA, NA, NA, NA, NA, NA, NA, 0, NA), weeks_wf = c(NA_real_, NA_real_,NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,NA_real_)), .Names = c("pid", "year", "weeks_hd_e", "weeks_wf_e","weeks_hd_u", "weeks_hd", "weeks_wf_u", "weeks_wf"), row.names = c(NA,10L), class = "data.frame")
this command works
y <- mutate(y, i = ifelse(!is.na(i), i, 0))
this loop does not
vars <- c("weeks_hd_e", "weeks_hd_u", "weeks_wf_e", "weeks_wf_u", "weeks_hd", "weeks_wf")
for (i in names(vars)) {
y <- mutate(y, i = ifelse(!is.na(i), i, 0))
}
View(y)
i have been given two excellent answers from friends:
for (i in 1:length(vars)){
y[vars[i]][is.na(y[vars[i]])] <- 0
}
or
y[, vars] <- apply(y[, vars], 2, function(x) ifelse(is.na(x), 0, x))
The replace_na command from the tidyr package does exactly what you want.
Use it like this:
install.packages("tidyr")
library(tidyr)
# your data
y <- structure(list(pid = c(1002L, 1002L, 1002L, 1002L, 1002L, 1002L,1002L, 1002L, 1002L, 1002L), year = 1968:1977, weeks_hd_e = c(3,0, 50, 49, 50, 50, 50, 50, 50, 49), weeks_wf_e = c(4, 6, 0, 0,0, 0, 0, 0, 0, 0), weeks_hd_u = c(NA, NA, 0, 0, 0, 0, 0, 0, 0,0), weeks_hd = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), weeks_wf_u = c(NA,NA, NA, NA, NA, NA, NA, NA, 0, NA), weeks_wf = c(NA_real_, NA_real_,NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,NA_real_)), .Names = c("pid", "year", "weeks_hd_e", "weeks_wf_e","weeks_hd_u", "weeks_hd", "weeks_wf_u", "weeks_wf"), row.names = c(NA,10L), class = "data.frame")
# replacing NAs in your dataframe
# specify the variables you want to replace NAs in and the replacement in the `replace` = list argument
y <- replace_na(y, replace = list(weeks_hd_e = 0, weeks_hd_u = 0, weeks_wf_e = 0, weeks_wf_u = 0, weeks_hd = 0, weeks_wf = 0))
Note that this meets your preference to specify the variables by name and is more flexible in terms of replacement, i.e. you can replace NAs in numeric and character variables in the same command.