Related
I have this reproducible DataFrame:
structure(list(age = c(62.84998, 60.33899, 52.74698, 42.38498,
79.88495, 93.01599, 62.37097, 86.83899, 85.65594, 42.25897),
death = c(0, 1, 1, 1, 0, 1, 1, 1, 1, 1), sex = c("male",
"female", "female", "female", "female", "male", "male", "male",
"male", "female"), hospdead = c(0, 1, 0, 0, 0, 1, 0, 0, 0,
0), slos = c(5, 4, 17, 3, 16, 4, 9, 7, 12, 8), d.time = c(2029,
4, 47, 133, 2029, 4, 659, 142, 63, 370), dzgroup = c("Lung Cancer",
"Cirrhosis", "Cirrhosis", "Lung Cancer", "ARF/MOSF w/Sepsis",
"Coma", "CHF", "CHF", "Lung Cancer", "Colon Cancer"), dzclass = c("Cancer",
"COPD/CHF/Cirrhosis", "COPD/CHF/Cirrhosis", "Cancer", "ARF/MOSF",
"Coma", "COPD/CHF/Cirrhosis", "COPD/CHF/Cirrhosis", "Cancer",
"Cancer"), num.co = c(0, 2, 2, 2, 1, 1, 1, 3, 2, 0), edu = c(11,
12, 12, 11, NA, 14, 14, NA, 12, 11), income = c("$11-$25k",
"$11-$25k", "under $11k", "under $11k", NA, NA, "$25-$50k",
NA, NA, "$25-$50k"), scoma = c(0, 44, 0, 0, 26, 55, 0, 26,
26, 0), charges = c(9715, 34496, 41094, 3075, 50127, 6884,
30460, 30460, NA, 9914), totcst = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), totmcst = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), avtisst = c(7, 29, 13, 7, 18.666656, 5, 8, 6.5, 8.5, 8
), race = c("other", "white", "white", "white", "white",
"white", "white", "white", "black", "hispanic"), sps = c(33.8984375,
52.6953125, 20.5, 20.0976562, 23.5, 19.3984375, 17.296875,
21.5976562, 15.8984375, 2.2998047), aps = c(20, 74, 45, 19,
30, 27, 46, 53, 17, 9), surv2m = c(0.262939453, 0.0009999275,
0.790893555, 0.698974609, 0.634887695, 0.284973145, 0.892944336,
0.670898438, 0.570922852, 0.952880859), surv6m = c(0.0369949341,
0, 0.664916992, 0.411987305, 0.532958984, 0.214996338, 0.820922852,
0.498962402, 0.24899292, 0.887939453), hday = c(1, 3, 4,
1, 3, 1, 1, 1, 1, 1), diabetes = c(0, 0, 0, 0, 0, 0, 0, 1,
0, 0), dementia = c(0, 0, 0, 0, 0, 0, 0, 0, 1, 0), ca = c("metastatic",
"no", "no", "metastatic", "no", "no", "no", "no", "metastatic",
"metastatic"), prg2m = c(0.5, 0, 0.75, 0.899999619, 0.899999619,
0, NA, 0.799999714, 0.049999982, NA), prg6m = c(0.25, 0,
0.5, 0.5, 0.8999996, 0, 0.6999998, 0.3999999, 0.0001249999,
NA), dnr = c("no dnr", NA, "no dnr", "no dnr", "no dnr",
"no dnr", "no dnr", "no dnr", "dnr after sadm", "no dnr"),
dnrday = c(5, NA, 17, 3, 16, 4, 9, 7, 2, 8), meanbp = c(97,
43, 70, 75, 59, 110, 78, 72, 97, 84), wblc = c(6, 17.0976562,
8.5, 9.09960938, 13.5, 10.3984375, 11.6992188, 13.5996094,
9.69921875, 11.2988281), hrt = c(69, 112, 88, 88, 112, 101,
120, 100, 56, 94), resp = c(22, 34, 28, 32, 20, 44, 28, 26,
20, 20), temp = c(36, 34.59375, 37.39844, 35, 37.89844, 38.39844,
37.39844, 37.59375, 36.59375, 38.19531), pafi = c(388, 98,
231.65625, NA, 173.3125, 266.625, 309.5, 404.75, 357.125,
NA), alb = c(1.7998047, NA, NA, NA, NA, NA, 4.7998047, NA,
NA, 4.6992188), bili = c(0.19998169, NA, 2.19970703, NA,
NA, NA, 0.39996338, NA, 0.39996338, 0.19998169), crea = c(1.19995117,
5.5, 2, 0.79992676, 0.79992676, 0.69995117, 1.59985352, 2,
1, 0.79992676), sod = c(141, 132, 134, 139, 143, 140, 132,
139, 143, 139), ph = c(7.459961, 7.25, 7.459961, NA, 7.509766,
7.65918, 7.479492, 7.509766, 7.449219, NA), glucose = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), bun = c(NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), urine = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), adlp = c(7, NA, 1, 0, NA, NA, 0, NA, NA, 0), adls = c(7,
1, 0, 0, 2, 1, 1, 0, 7, NA), sfdm2 = c(NA, "<2 mo. follow-up",
"<2 mo. follow-up", "no(M2 and SIP pres)", "no(M2 and SIP pres)",
"<2 mo. follow-up", "no(M2 and SIP pres)", NA, NA, NA), adlsc = c(7,
1, 0, 0, 2, 1, 1, 0, 7, 0.4947999)), row.names = c(NA, 10L
), class = "data.frame")
I am needing to calculate the proportion of patients who died in the hospital in patients with an active DNR order on day 3 and in patients without an active DNR order on day 3. To group which patients had an active DNR on day 3 and which did not, I used the subset function below:
SB_xlsx1 = SB_xlsx[!is.na(SB_xlsx$dnrday), ]
YesDNR = subset(SB_xlsx1, dnrday <= 3)
NoDNR = subset(SB_xlsx1, dnrday > 3)
However, I don't know how to calculate the proportion of patients that died in the hospital for those with a DNR and without a DNR. The 'hospdead' variable has all 0s and 1s, where 0 = not dead and 1 = dead. However, I don't know how to get the proportion that died for having a DNR at day 3 and did not have a DNR at day 3. What code could I use for my desired result. SB_xlsx also just represents my DataFrame name.
There's a few ways to do this but the simplest is probably via the aggregate function.
> aggregate( hospdead ~ (dnrday<=3) , SB_xlsx1 , mean)
dnrday <= 3 hospdead
1 FALSE 0.1428571
2 TRUE 0.0000000
You may use tapply to group deaths by the condition dnrday <= 3, i.e. with an active DNR on day 3 and calculate the mean.
(res <- proportions(xtabs(death ~ dnrday <= 3, SB_xlsx)))
# dnrday <= 3
# FALSE TRUE
# 0.7142857 0.2857143
where
sum(res)
# [1] 1
EDIT: I apologize; I misread your post when providing my original answer. I've revised it below.
You referred to the hospdeath variable, but in the toy data set it has just one nonzero entry, so I'm using the death variable instead to demonstrate the principle.
First, abase R approach:
mean(SB_xlsx1[SB_xlsx1$death == 1, ]$dnrday <= 3)
mean(SB_xlsx1[SB_xlsx1$death == 1, ]$dnrday > 3)
The idea is to restrict to the subset of rows for which a death occurred, then perform a logical check to see which entries have dnrday greater than 3.
Note that if you have NA entries in death, you'll want to remove them first as you did with those in dnrday.
For a dplyr approach:
library(dplyr)
SB_xlsx1 %>%
filter(death == 1) %>%
summarize(mean(dnrday <= 3), mean(dnrday > 3))
or, for a slightly nicer-looking table,
SB_xlsx1 %>%
filter(death == 1) %>%
group_by(dnrday <= 3) %>%
summarize(prop = n() / nrow(.))
Ive tried creating a day of week variable using this code:
weekdays1 <- c('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday')
fulldata$wDay <- factor((weekdays(fulldata$completed_ts) %in% weekdays1),
levels=c(FALSE, TRUE), labels=c('weekend', 'weekday'))
# Error message:
# Error in UseMethod("weekdays") :
# no applicable method for 'weekdays' applied to an object of class "character"
This issue arises only after adding a new variable (coded on SPSS) to the data file, and resolves when removing this added variable (however I need this variable in my analyses) Unsure why this is the case.
Any suggestions would be really appreciated, cheers.
structure(list(participant_id = c(5237430, 5237430, 5237430),
participant_tz = c("UTC", "Australia/Melbourne", "Australia/Melbourne"
), study_id = c("s4lpHqswe", "s4lpHqswe", "s4lpHqswe"), study_name = c("Social Networks and Eating Behaviours",
"Social Networks and Eating Behaviours", "Social Networks and Eating Behaviours"
), study_version = c(7, 7, 7), survey_id = c("X81ypVgkcU",
"X81ypVgkcU", "X81ypVgkcU"), survey_name = c("Survey 1",
"Survey 1", "Survey 1"), trigger = c("scheduled", "scheduled",
"scheduled"), export_tz = c("Australia/Melbourne", "Australia/Melbourne",
"Australia/Melbourne"), start_end = c(1, 1, 1), created_ts = structure(c(1587937200,
1587813720, 1587820680), tzone = "UTC", class = c("POSIXct",
"POSIXt")), scheduled_ts = structure(c(1587935280, 1587813720,
1587820680), tzone = "UTC", class = c("POSIXct", "POSIXt"
)), started_ts = c("#NULL!", "43946.473611111112", "43946.554166666669"
), completed_ts = c(NA, 43946.4743055556, 43946.5548611111
), expired_ts = c("43947.901388888888", "#NULL!", "#NULL!"
), uploaded_ts = c("#NULL!", "43946.474305555574", "43946.554861111101"
), total_rt = c("NA", "56500", "33155"), rand_prob = c("NA",
"NA", "NA"), lonely1 = c(NA, 6, 5), lonely1_rt = c(NA, 4359,
1377), happy = c(NA, 5, 5), happy_rt = c(NA, 1071, 963),
lonely2 = c(NA, 4, 3), lonely2_rt = c(NA, 979, 2319), pos_feedback_1 = c(NA,
1, 1), pos_feedback_2 = c(NA, 0, 0), pos_feedback_3 = c(NA,
0, 0), pos_feedback_4 = c(NA, 0, 0), pos_feedback_5 = c(NA,
0, 0), pos_feedback_rt = c(NA, 7452, 1650), neg_feedback_1 = c(NA,
1, 1), neg_feedback_2 = c(NA, 0, 0), neg_feedback_3 = c(NA,
0, 0), neg_feedback_4 = c(NA, 0, 0), neg_feedback_5 = c(NA,
0, 0), neg_feedback_rt = c(NA, 2695, 3267), sat1 = c(NA,
4, 2), sat1_rt = c(NA, 3462, 1482), sat2 = c(NA, 5, 5), sat2_rt = c(NA,
1330, 948), comp1 = c(NA, 5, 4), comp1_rt = c(NA, 1043, 926
), comp2 = c(NA, 3, 3), comp2_rt = c(NA, 1134, 851), comp3 = c(NA,
2, 2), comp3_rt = c(NA, 2985, 2888), comp4 = c(NA, 6, 5),
comp4_rt = c(NA, 2221, 1253), selfie1 = c(NA, 1, 1), selfie1_rt = c(NA,
2315, 1241), selfie2 = c(NA, 102, 78), selfie2_rt = c(NA,
1393, 1078), selfie3 = c(NA, 1, 2), selfie3_rt = c(NA, 2589,
883), inspo1 = c(NA, 1, 2), inspo1_rt = c(NA, 1641, 788),
inspo2 = c(NA, 1, 2), inspo2_rt = c(NA, 1435, 968), inspo3 = c(NA,
2, 2), inspo3_rt = c(NA, 3953, 883), dating1 = structure(c(NA,
1L, 2L), .Label = c("1", "2"), class = "factor"), dating1_rt = c(NA,
2710, 1064), dating2_1 = c(NA, 0, NA), dating2_2 = c(NA,
1, NA), dating2_3 = c(NA, 0, NA), dating2_4 = c(NA, 0, NA
), dating2_5 = c(NA, 1, NA), dating2_6 = c(NA, 0, NA), dating2_7 = c(NA,
0, NA), dating2_rt = c(NA, 3988, NA), video = c(NA, 2, 2),
video_rt = c(NA, 2809, 1283), eating_1 = c(NA, 1, 0), eating_2 = c(NA,
0, 0), eating_3 = c(NA, 0, 0), eating_4 = c(NA, 0, 0), eating_5 = c(NA,
0, 0), eating_6 = c(NA, 0, 1), eating_7 = c(NA, 0, 0), eating_8 = c(NA,
0, 0), eating_none = c(NA, 0, 0), eating_rt = c(NA, 2226,
5979), dating = c(NA, 1, 2), dating_rt = c(NA, 2710, 1064
), partner_cat = c(NA_real_, NA_real_, NA_real_), partnerideal_dum = structure(c(NA,
1L, NA), .Label = c("0", "1"), class = "factor"), partnernonideal_dum = structure(c(NA,
1L, NA), .Label = c("0", "1"), class = "factor"), partnerboth_dum = structure(c(NA,
2L, NA), .Label = c("0", "1"), class = "factor"), qualtrics_sample = c(NA_character_,
NA_character_, NA_character_), start_date = structure(c(1587397597,
1587397597, 1587397597), tzone = "UTC", class = c("POSIXct",
"POSIXt")), end_date = structure(c(1587398241, 1587398241,
1587398241), tzone = "UTC", class = c("POSIXct", "POSIXt"
)), status = c(0, 0, 0), ip_address = c("101.182.17.165",
"101.182.17.165", "101.182.17.165"), progress = c(100, 100,
100), duration_in_seconds = c(644, 644, 644), finished = c(1,
1, 1), recorded_date = structure(c(1587398242, 1587398242,
1587398242), tzone = "UTC", class = c("POSIXct", "POSIXt"
)), response_id = c("R_3n97cmY4P1NXi92", "R_3n97cmY4P1NXi92",
"R_3n97cmY4P1NXi92"), user_language = c("EN", "EN", "EN"),
self_genid = c(NA_character_, NA_character_, NA_character_
), agree_share_email = c(1, 1, 1), consent = c(1, 1, 1),
age = c(20, 20, 20), gender = c(2, 2, 2), gender_other = c(NA_character_,
NA_character_, NA_character_), currentweight = c(55, 55,
55), currentheight = c(158, 158, 158), highestweight = c("63",
"63", "63"), highestheight = c("158", "158", "158"), lowestweight = c("55",
"55", "55"), lowestheight = c("158", "158", "158"), culture = c("southern asian",
"southern asian", "southern asian"), culture_other = c(NA_character_,
NA_character_, NA_character_), student = c("yes", "yes",
"yes"), international_student = c(NA_character_, NA_character_,
NA_character_), aus_international_student = c(NA_character_,
NA_character_, NA_character_), aus_years = c(NA_character_,
NA_character_, NA_character_), currentlive = c(NA_character_,
NA_character_, NA_character_), language = c("English", "English",
"English"), language_other = c(NA_character_, NA_character_,
NA_character_), maritalstatus = c("single", "single", "single"
), sexualorientation = c("heterosexual", "heterosexual",
"heterosexual"), sexualorientation_other = c(NA_character_,
NA_character_, NA_character_), education = c("bachelor degree",
"bachelor degree", "bachelor degree"), working_full = c(0,
0, 0), working_part = c(0, 0, 0), working_casual = c(0, 0,
0), working_unemployed = c(0, 0, 0), working_student = c(1,
1, 1), workhours = c("0", "0", "0"), taxes = c(4, 4, 4),
videoconferencing = c(1, 1, 1), zoom = c(1, 1, 1), team_viewer = c(NA_real_,
NA_real_, NA_real_), microsoft_teams = c(NA_real_, NA_real_,
NA_real_), skype = c(NA_real_, NA_real_, NA_real_), webex = c(NA_real_,
NA_real_, NA_real_), googlemeet = c(NA_real_, NA_real_, NA_real_
), joinme = c(NA, NA, NA), whats_app = c(NA_real_, NA_real_,
NA_real_), slack = c(NA_real_, NA_real_, NA_real_), houseparty = c(NA_real_,
NA_real_, NA_real_), videoconferencing_other = c(NA_real_,
NA_real_, NA_real_), videoconferencing_othertext = c(NA_character_,
NA_character_, NA_character_), videoconf_time = c(2, 2, 2
), fooddelivery = c(2, 2, 2), uber_eats = c(NA_real_, NA_real_,
NA_real_), deliveroo = c(NA_real_, NA_real_, NA_real_), menulog = c(NA_real_,
NA_real_, NA_real_), foodora = c(NA_real_, NA_real_, NA_real_
), door_dash = c(NA_real_, NA_real_, NA_real_), fooddelivery_other = c(NA_real_,
NA_real_, NA_real_), fooddelivery_othertext = c(NA_character_,
NA_character_, NA_character_), fooddeliverymonth = c(NA_real_,
NA_real_, NA_real_), serviceson = c(NA_real_, NA_real_, NA_real_
), serviceswith = c(NA_real_, NA_real_, NA_real_), fooddelivery_money = c(NA_real_,
NA_real_, NA_real_), facebook = c(1, 1, 1), instagram = c(1,
1, 1), snapchat = c(1, 1, 1), twitter = c(NA_real_, NA_real_,
NA_real_), tumblr = c(NA_real_, NA_real_, NA_real_), socialmedia_other = c(NA_real_,
NA_real_, NA_real_), socialmedia_othertext = c(NA_character_,
NA_character_, NA_character_), socialmediatime = c(1, 1,
1), socialmediaminutes_t = c("30", "30", "30"), socialmediaminutes_d = c("300",
"300", "300"), selfies = c(4, 4, 4), modifiedselfie = c(1,
1, 1), fitspiration = c(1, 1, 1), fitspirationtime = c(1,
1, 1), thinspiration = c(1, 1, 1), thinspirationtime = c(1,
1, 1), fatspiration = c(1, 1, 1), fatspirationtime = c(2,
2, 2), datingapp = c(1, 1, 1), tinder = c(1, 1, 1), hinge = c(NA_real_,
NA_real_, NA_real_), grindr = c(NA_real_, NA_real_, NA_real_
), bumble = c(NA_real_, NA_real_, NA_real_), ok_cupid = c(NA_real_,
NA_real_, NA_real_), her = c(NA_real_, NA_real_, NA_real_
), offee_meets = c(NA_real_, NA_real_, NA_real_), happn = c(NA_real_,
NA_real_, NA_real_), momo = c(NA_real_, NA_real_, NA_real_
), tantan = c(NA_real_, NA_real_, NA_real_), datingapp_other = c(NA_real_,
NA_real_, NA_real_), datingapp_other_t = c(NA_character_,
NA_character_, NA_character_), datingapp_time = c("multiple times a month",
"multiple times a month", "multiple times a month"), matchweek = c("10",
"10", "10"), match_month = c("40", "40", "40"), date_love = c(1,
1, 1), date_sex = c(1, 1, 1), date_comm = c(1, 1, 1), date_worth = c(NA_real_,
NA_real_, NA_real_), date_thrill = c(1, 1, 1), date_trend = c(NA_real_,
NA_real_, NA_real_), feat_thin = c(NA_real_, NA_real_, NA_real_
), feat_muscle = c(1, 1, 1), feat_face = c(1, 1, 1), feat_sex = c(NA_real_,
NA_real_, NA_real_), feat_health = c(1, 1, 1), feat_intell = c(1,
1, 1), feat_other = c(NA_real_, NA_real_, NA_real_), feat_other_t = c(NA_character_,
NA_character_, NA_character_), covid_food = c(1, 1, 1), covid_apps = c(3,
3, 3), covid_social = c(5, 5, 5), eatingdisorder_diagnosed = c("no",
"no", "no"), month_diagnosed = c(NA_character_, NA_character_,
NA_character_), year_diagnosed = c(NA_character_, NA_character_,
NA_character_), eatingdisorder = c(NA_real_, NA_real_, NA_real_
), eatingdisorder_other = c(NA_character_, NA_character_,
NA_character_), eatingdisorder_status = c(NA_real_, NA_real_,
NA_real_), ed_age = c(NA_character_, NA_character_, NA_character_
), ed_years = c(NA_character_, NA_character_, NA_character_
), socio_1 = c(3, 3, 3), socio_2 = c(4, 4, 4), socio_3 = c(4,
4, 4), socio_4 = c(3, 3, 3), socio_5 = c(2, 2, 2), socio_6 = c(4,
4, 4), socio_7 = c(5, 5, 5), socio_8 = c(5, 5, 5), socio_9 = c(4,
4, 4), socio_10 = c(1, 1, 1), bodysat_1 = c(6, 6, 6), bodysat_2 = c(6,
6, 6), bodysat_3 = c(4, 4, 4), bodysat_4 = c(6, 6, 6), bodysat_5 = c(6,
6, 6), bodysat_6 = c(6, 6, 6), bodysat_7 = c(6, 6, 6), bodysat_8 = c(6,
6, 6), bodyimage_1 = c(5, 5, 5), bodayimage_2 = c(5, 5, 5
), bodyimage_3 = c(5, 5, 5), bodyimage_4 = c(5, 5, 5), bodayimage_5 = c(5,
5, 5), bodayimage_6 = c(5, 5, 5), bodyimage_7 = c(5, 5, 5
), bodyimage_8 = c(5, 5, 5), bodyimage_9 = c(5, 5, 5), bodyimage_10 = c(5,
5, 5), media_1 = c(2, 2, 2), media_2 = c(1, 1, 1), media_3 = c(3,
3, 3), media_4 = c(1, 1, 1), media_5 = c(2, 2, 2), media_6 = c(3,
3, 3), critical_1 = c(4, 4, 4), critical_2 = c(4, 4, 4),
critical_3 = c(4, 4, 4), critical_4 = c(3, 3, 3), critical_5 = c(4,
4, 4), intro_aware_1 = c(2, 2, 2), intro_aware_2 = c(3, 3,
3), intro_aware_3 = c(3, 3, 3), intro_aware_4 = c(3, 3, 3
), cesd_1 = c(2, 2, 2), cesd_2 = c(2, 2, 2), cesd_3 = c(3,
3, 3), cesd_4 = c(3, 3, 3), cesd_5 = c(3, 3, 3), cesd_6 = c(2,
2, 2), cesd_7 = c(2, 2, 2), cesd_8 = c(3, 3, 3), cesd_9 = c(3,
3, 3), cesd_10 = c(2, 2, 2), eat26_1 = c(1, 1, 1), eat26_2 = c(5,
5, 5), eat26_3 = c(1, 1, 1), eat26_4 = c(1, 1, 1), eat26_5 = c(1,
1, 1), eat26_6 = c(1, 1, 1), eat26_7 = c(1, 1, 1), eat26_8 = c(4,
4, 4), eat26_9 = c(6, 6, 6), eat26_10 = c(2, 2, 2), eat26_11 = c(1,
1, 1), eat26_12 = c(1, 1, 1), eat26_13 = c(5, 5, 5), eat26_14 = c(2,
2, 2), eat26_15 = c(5, 5, 5), eat26_16 = c(3, 3, 3), eat26_17 = c(2,
2, 2), eat26_18 = c(2, 2, 2), eat26_19 = c(2, 2, 2), eat26_20 = c(4,
4, 4), eat26_21 = c(1, 1, 1), eat26_22 = c(1, 1, 1), eat26_23 = c(2,
2, 2), eat26_24 = c(2, 2, 2), eat26_25 = c(1, 1, 1), eat26_26 = c(2,
2, 2), eat26_a = c(2, 2, 2), eat26_b = c(2, 2, 2), eat26_c = c(1,
1, 1), eat26_d = c(1, 1, 1), eat26_e = c(1, 1, 1), neg_urg_1 = c(2,
2, 2), neg_urg_2 = c(2, 2, 2), neg_urg_3 = c(2, 2, 2), neg_urg_4 = c(2,
2, 2), neg_urg_5 = c(3, 3, 3), neg_urg_6 = c(2, 2, 2), neg_urg_7 = c(2,
2, 2), neg_urg_8 = c(3, 3, 3), neg_urg_9 = c(2, 2, 2), neg_urg_10 = c(2,
2, 2), neg_urg_11 = c(3, 3, 3), neg_urg_12 = c(2, 2, 2),
dis_tol_1 = c(3, 3, 3), dis_tol_2 = c(3, 3, 3), dis_tol_3 = c(2,
2, 2), dis_tol_4 = c(3, 3, 3), dis_tol_5 = c(4, 4, 4), dis_tol_6 = c(4,
4, 4), dis_tol_7 = c(3, 3, 3), dis_tol_8 = c(4, 4, 4), dis_tol_9 = c(3,
3, 3), dis_tol_10 = c(2, 2, 2), dis_tol_11 = c(2, 2, 2),
dis_tol_12 = c(3, 3, 3), dis_tol_13 = c(4, 4, 4), dis_tol_14 = c(4,
4, 4), dis_tol_15 = c(3, 3, 3), lone_1 = c(3, 3, 3), lone_2 = c(2,
2, 2), lone_3 = c(3, 3, 3), lone_4 = c(3, 3, 3), lone_5 = c(2,
2, 2), lone_6 = c(3, 3, 3), lone_7 = c(2, 2, 2), lone_8 = c(3,
3, 3), lone_9 = c(3, 3, 3), lone_10 = c(2, 2, 2), lone_11 = c(3,
3, 3), lone_12 = c(2, 2, 2), lone_13 = c(3, 3, 3), lone_14 = c(3,
3, 3), lone_15 = c(2, 2, 2), lone_16 = c(3, 3, 3), lone_17 = c(2,
2, 2), lone_18 = c(3, 3, 3), lone_19 = c(3, 3, 3), lone_20 = c(3,
3, 3), ucla_1 = c(3, 3, 3), ucla_2 = c(3, 3, 3), ucla_3 = c(3,
3, 3), appear_1a = c(5, 5, 5), appear_1e = c(4, 4, 4), appear_2a = c(5,
5, 5), appear_2e = c(5, 5, 5), appear_3a = c(4, 4, 4), appear_3e = c(4,
4, 4), appear_4a = c(5, 5, 5), appear_4e = c(5, 5, 5), appear_5a = c(5,
5, 5), appear_5e = c(4, 4, 4), appear_6a = c(5, 5, 5), appear_6e = c(4,
4, 4), appear_7a = c(4, 4, 4), appear_7e = c(5, 5, 5), appear_8a = c(5,
5, 5), appear_8e = c(5, 5, 5), appear_9a = c(5, 5, 5), appear_9e = c(4,
4, 4), appear_10a = c(5, 5, 5), appear_10e = c(5, 5, 5),
object_1 = c(3, 3, 3), object_2 = c(1, 1, 1), object_3 = c(2,
2, 2), object_4 = c(4, 4, 4), object_5 = c(12, 12, 12), object_6 = c(5,
5, 5), object_7 = c(11, 11, 11), object_8 = c(6, 6, 6), object_9 = c(10,
10, 10), object_10 = c(7, 7, 7), object_11 = c(8, 8, 8),
object_12 = c(9, 9, 9), rrs_1 = c(NA_real_, NA_real_, NA_real_
), rrs_2 = c(NA_real_, NA_real_, NA_real_), rrs_3 = c(NA_real_,
NA_real_, NA_real_), rrs_4 = c(NA_real_, NA_real_, NA_real_
), rrs_5 = c(NA_real_, NA_real_, NA_real_), rrs_6 = c(NA_real_,
NA_real_, NA_real_), rrs_7 = c(NA_real_, NA_real_, NA_real_
), rrs_8 = c(NA_real_, NA_real_, NA_real_), rrs_9 = c(NA_real_,
NA_real_, NA_real_), rrs_10 = c(NA_real_, NA_real_, NA_real_
), negative_urgency_tot = c(34, 34, 34), smartphone = c(NA_real_,
NA_real_, NA_real_), eat_26_total = c(48, 48, 48), eat26_oral_control = c(5,
5, 5), eat26_bulimia_food = c(11, 11, 11), eat26_diet = c(32,
32, 32), total_lone = c(49, 49, 49), total_object = c(78,
78, 78), rrs_total = c(NA_real_, NA_real_, NA_real_), total_dis_tol = c(45,
45, 45), total_body_sat = c(46, 46, 46), totalsocio = c(35,
35, 35), total_bodyimage = c(50, 50, 50), total_media = c(12,
12, 12), total_critical = c(19, 19, 19), total_intro_aware = c(7,
7, 7), intro_aware1_recoded = c(1, 1, 1), totalcesdrecoded = c(13,
13, 13), itro_aware_2recoded = c(2, 2, 2), intro_aware_3recoded = c(2,
2, 2), intro_aware4_recoded = c(2, 2, 2), cesd_1recoded = c(1,
1, 1), cesd_2recoded = c(1, 1, 1), cesd_3recoded = c(2, 2,
2), cesd_4recoded = c(2, 2, 2), cesd_5reversecoded = c(1,
1, 1), cesd_6recoded = c(1, 1, 1), cesd_7recoded = c(1, 1,
1), cesd_8reversecoded = c(1, 1, 1), cesd_9recoded = c(2,
2, 2), cesd_10recoded = c(1, 1, 1), recoded_eat26_q1 = c(3,
3, 3), recoded_eat26_q2 = c(0, 0, 0), recoded_eat26_q3 = c(3,
3, 3), recoded_eat26_q4 = c(3, 3, 3), recoded_eat26_q5 = c(3,
3, 3), recoded_eat26_q6 = c(3, 3, 3), recoded_eat26_q7 = c(3,
3, 3), recoded_eat26_q8 = c(0, 0, 0), recoded_eat26_q9 = c(0,
0, 0), recoded_eat26_q10 = c(2, 2, 2), recoded_eat26_q11 = c(3,
3, 3), recoded_eat26_q12 = c(3, 3, 3), recoded_eat26_q13 = c(0,
0, 0), recoded_eat26_q14 = c(2, 2, 2), recoded_eat26_q15 = c(0,
0, 0), recoded_eat26_q16 = c(1, 1, 1), recoded_eat26_q17 = c(2,
2, 2), recoded_eat26_q18 = c(2, 2, 2), recoded_eat26_q19 = c(2,
2, 2), recoded_eat26_q20 = c(0, 0, 0), recoded_eat26_q21 = c(3,
3, 3), recoded_eat26_q22 = c(3, 3, 3), recoded_eat26_q23 = c(2,
2, 2), recoded_eat26_q24 = c(2, 2, 2), recoded_eat26_q25 = c(3,
3, 3), recoded_eat26_q26 = c(0, 0, 0), dis_tol_6recoded = c(2,
2, 2), lone_1recoded = c(2, 2, 2), lone_5recoded = c(3, 3,
3), lone_6recoded = c(2, 2, 2), lone_9recoded = c(2, 2, 2
), lone_10recoded = c(3, 3, 3), lone_15recoded = c(3, 3,
3), lone_16recoded = c(2, 2, 2), lone_19recoded = c(2, 2,
2), lone_20recoded = c(2, 2, 2), lone_4recoded = c(2, 2,
2), neg_urg_1recoded = c(3, 3, 3), neg_urg_2recoded = c(3,
3, 3), neg_urg_3recoded = c(3, 3, 3), neg_urg_4recoded = c(3,
3, 3), neg_urg_5recoded = c(2, 2, 2), neg_urg_6recoded = c(3,
3, 3), neg_urg_7recoded = c(3, 3, 3), neg_urg_8recoded = c(2,
2, 2), neg_urg_9recoded = c(3, 3, 3), neg_urg_10recoded = c(3,
3, 3), neg_urg_12recoded = c(3, 3, 3), filter = c(1, 1, 1
), EatingSum = c(NA, 0, 1), EatingMean = c(NA, 0, 0.166666666666667
), appearsum = c(216, 216, 216), appearT = c(21.6, 21.6,
21.6), sexualorientation_col = c(NA_real_, NA_real_, NA_real_
), sat1r = c(NA, 6, 8), happyr = c(NA, 5, 5), dating1r = c(NA,
2, 1), datingappr = c(2, 2, 2), currentheight_metre = c(1.58,
1.58, 1.58), BMI = c(22.0317256849864, 22.0317256849864,
22.0317256849864), employed_sum = c(0, 0, 0), employed = c(0,
0, 0), unemployed = c(0, 0, 0), IDorder = 1:3, Date = structure(c(NA_real_,
NA_real_, NA_real_), class = c("POSIXct", "POSIXt"), tzone = ""),
FirstDate = structure(c(NA_real_, NA_real_, NA_real_), class = c("POSIXct",
"POSIXt"), tzone = ""), DaysElapsed = structure(c(NA_real_,
NA_real_, NA_real_), class = "difftime", units = "secs")), row.names = c(NA,
-3L), groups = structure(list(participant_id = 5237430, .rows = structure(list(
1:3), ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr",
"list"))), row.names = 1L, class = c("tbl_df", "tbl", "data.frame"
), .drop = TRUE), class = c("grouped_df", "tbl_df", "tbl", "data.frame"
))
I am trying to add error bars to my double y axis graph, but when ran, it completely ruins the graph. I attached a picture below. I also added my code.
If you need the full data set, let me know! Thank you so much in advance!
scalefactor <- max(Complete_Seasonality_Data$PRCP)/max(Complete_Seasonality_Data$Temp_C)
p <- ggplot(Complete_Seasonality_Data, aes(x = NewMonths5))
p <- p + geom_point(aes(y = PRCP, colour = "Precipitation"))
p <- p + geom_line(aes(y = PRCP, colour = "Precipitation", group=1))
p <- p + geom_point(aes(y = Temp_C*scalefactor, colour = "Temperature"))
p <- p + geom_line(aes(y = Temp_C*scalefactor, colour = "Temperature", group=1))
p <- p + scale_y_continuous(sec.axis = sec_axis(~./scalefactor, name = ylabseasonality))
p <- p + scale_colour_manual(values = c("blue", "red"))
p <- p + labs(y = "Precipitation (in)",
x = "Month",
colour = "Parameter")
p <- p + theme_bw()
p <- p + theme(axis.text.x = element_text(angle = 90), legend.position = c(.99, .01))
p <- p + geom_errorbar(aes(ymin = TempSummary$mean - StdErrorTemp, ymax = TempSummary$mean + StdErrorTemp), position=position_dodge(.9), width=0.2)
p <- p + geom_errorbar(aes(ymin = PrecipSummary$mean - StdErrorPrecip, ymax = TempSummary$mean + StdErrorPrecip), position=position_dodge(.9), width=0.2)
p
How I computed the Std Errors
TempSummary<- Summarize(Temp_C~ Month,
data=Chara_Data,
digits=3)
View(TempSummary)
StdErrorTemp<- (TempSummary$sd)/ (sqrt(TempSummary$n))
View(StdErrorTemp)
PrecipSummary<- Summarize(PRCP ~ Group.1,
data=Complete_Seasonality_Data,
digits=3)
StdErrorPrecip<- (PrecipSummary$sd/ sqrt(PrecipSummary$n))
Complete data set!
structure(list(Group.1 = c("April", "August", "December", "February",
"January", "July", "June", "March", "May", "November", "October",
"September"), Season = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), Month = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), Year = c(2017.05882352941, 2016.6, 2016.6, 2017.6,
2017.6, 2016.6, 2017.05882352941, 2017, 2017.05882352941, 2016.6,
2016.6, 2016.6), Date = structure(c(1494315952.94118, 1490691600,
1500316560, 1506183120, 1504163520, 1487501280, 1499108611.76471,
1489840800, 1496798682.35294, 1498314240, 1496087280, 1493421840
), class = c("POSIXct", "POSIXt")), Site = c(8.17647058823529,
8.125, 7.775, 7.775, 6.375, 6.375, 8.20588235294118, 6.80555555555556,
6.55882352941176, 6.375, 8.1, 6.375), PercentCover = c(0.765882352941176,
0.7125, 0.7505, 0.7775, 0.8625, 0.867, 0.763529411764706, 0.83,
0.850588235294118, 0.848, 0.7065, 0.834), AveHt = c(60.1684438927086,
50.2311192279942, 58.9048701298701, 57.3448097041847, 55.2253291847042,
64.6965656565657, 57.9602622867329, 56.672138047138, 64.4076426024955,
57.1465322871573, 54.3781565656566, 58.3185831529582), SE = c(7.07246013321596,
7.79305525403115, 7.00224498332823, 6.46671176266333, 6.32495719718401,
7.04611575726224, 8.09695750051648, 5.65899377193264, 7.28959135811987,
6.24571692582705, 7.32819802238581, 7.05669314452393), MaxHt = c(88.3823529411765,
81.625, 87.75, 85, 85.875, 96.425, 92.9117647058823, 82.5, 98.6764705882353,
88.125, 79.75, 89.65), green = c(0.350962665193537, 0.278211058736042,
0.183934291894458, 0.197711422851132, 0.179043270311077, 0.335751664926552,
0.186533536107468, 0.256634190010066, 0.319397625619223, 0.204519948331115,
0.249063275007846, 0.277894684744482), yellow = c(0.556643767952726,
0.569690303836593, 0.686152813243381, 0.654331042886853, 0.594548585049017,
0.554485584960289, 0.581008683220038, 0.609988063809375, 0.594827659217835,
0.620510694031593, 0.633793562346056, 0.600527348262596), brown = c(0.0923935668537371,
0.14983619398845, 0.122185622134889, 0.145933312808728, 0.226114026992848,
0.10976275011316, 0.229212761734686, 0.132653108499399, 0.0857747151629417,
0.174675239990233, 0.114398064606882, 0.121577966992922), Temp = c(78.4411764705882,
82.975, 75.65, 74.75, 74.3, 82.2051282051282, 81.0882352941177,
75.8333333333333, 79.8823529411765, 78.6, 80.1944444444444, 83
), Temp_C = c(25.8006535947712, 28.3194444444444, 24.25, 23.75,
23.5, 27.8917378917379, 27.2712418300654, 24.3518518518519, 26.6013071895425,
25.8888888888889, 26.7746913580247, 28.3333333333333), Vis = c(1.98823529411765,
2.12820512820513, 2.2125, 2.07, 2.1625, 2.07179487179487, 2.05,
2.02777777777778, 2.11764705882353, 2.205, 2.11, 2.17375), Nests = c(12.4117647058824,
17.1, 7.1, 6.275, 4, 8.9, 13.8787878787879, 4.88888888888889,
7.38235294117647, 2.8, 13.025, 5.6), SickorDeadFish = c(0.0882352941176471,
0.2, 0.175, 0.075, 0.05, 0.117647058823529, 0.0882352941176471,
0.166666666666667, 0.0294117647058824, 0.25, 0.333333333333333,
0.275), Cladophora = c(0.0866666666666667, 0.0492857142857143,
0.0471428571428571, 0.0907142857142857, 0.0264285714285714, 0.0154545454545455,
0.0380952380952381, 0.0295238095238095, 0.0161904761904762, 0.0178571428571429,
0.0407142857142857, 0.03), Comments = c(NaN, NaN, NaN, NaN, NaN,
NaN, NaN, NaN, NaN, NaN, NaN, NaN), STATION = c(NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), NAME = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), DATE = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), MONTH = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), AWND = c(6.52626966292135, 5.97866090712743, 5.85811926605505,
6.31656097560976, 6.181, 6.1103908045977, 6.23947727272727, 6.5154211663067,
6.0985313174946, 5.64997635933806, 5.43263157894737, 5.54940639269406
), FMTM = c(1412.13333333333, 1431.1935483871, 1411.77419354839,
1535.16666666667, 1339.24137931034, 1439.77419354839, 1378.3,
1398.8064516129, 1353.12903225806, 1362.96666666667, 1408.45161290323,
1381.46666666667), PGTM = c(1394.1095890411, 1394.96774193548,
1306.83333333333, 1412.0511627907, 1327.90350877193, 1435.51769911504,
1372.37674418605, 1389.12328767123, 1376.75576036866, 1373.45341614907,
1346.2774566474, 1396), PRCP = c(0.0205869074492099, 0.0248701298701299,
0.0663425925925926, 0.0481472684085511, 0.0360991379310345, 0.0101144164759725,
0.00790067720090293, 0.0762693156732892, 0.0298491379310345,
0.0472985781990521, 0.034965034965035, 0.0243778801843318), SNOW = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), SNWD = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0), TAVG = c(78.5333333333333, NaN, NaN, 61.1052631578947,
68.6333333333333, 80.2903225806452, 79.4, 72.5161290322581, 77.8709677419355,
NaN, NaN, NaN), TMAX = c(83.6826484018265, 88.8509719222462,
81.4940617577197, 80.6938271604938, 80.8072562358277, 88.1520737327189,
86.8795454545455, 81.3290043290043, 84.6048034934498, 83.8289786223278,
86.3615560640732, 88.1009174311927), TMIN = c(67.5423340961098,
72.5917926565875, 66.4394299287411, 64.9283950617284, 64.5600907029478,
71.9654377880184, 70.6772727272727, 65.7597402597403, 68.6527472527472,
68.9643705463183, 70.558352402746, 71.7821100917431), TSUN = c(NaN,
NaN, NaN, 0, 0, NaN, NaN, NaN, NaN, NaN, NaN, NaN), WDF2 = c(115.538116591928,
100.905172413793, 133.577981651376, 143.965936739659, 149.438444924406,
91.141876430206, 99.5022624434389, 131.612903225806, 124.279569892473,
109.693396226415, 119.450800915332, 115.068493150685), WDF5 = c(107.545045045045,
97.6077586206897, 124.528735632184, 133.031784841076, 140.826086956522,
82.5229357798165, 90.972850678733, 120.634573304158, 115.714285714286,
103.720379146919, 109.266055045872, 104.736842105263), WSF2 = c(15.2026905829596,
14.8530172413793, 14.6919724770642, 15.4111922141119, 15.1332613390929,
14.9070938215103, 15.083257918552, 15.4161290322581, 14.8625806451613,
14.322641509434, 14.3432494279176, 14.5600456621005), WSF5 = c(22.1105855855856,
21.9961206896552, 20.8029885057471, 20.8081145584726, 20.4824675324675,
22.4052752293578, 22.2158371040724, 21.9317286652079, 21.130303030303,
20.8722748815166, 20.493119266055, 21.0052511415525), WT01 = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), WT02 = c(NaN, 1, NaN, 1, 1,
NaN, NaN, 1, 1, NaN, 1, NaN), WT08 = c(1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1), WT10 = c(NaN, NaN, NaN, NaN, NaN, NaN, 1, NaN, NaN,
NaN, NaN, NaN), NewMonths2 = structure(c(17295, 17253, 17364,
17432, 17409, 17216, 17350, 17243, 17324, 17341, 17315, 17284
), class = "Date")), row.names = c(NA, -12L), class = "data.frame")
**Edited to add complete data set and how I did std error
Temp Summary
structure(list(Month = c("April", "August", "December", "February",
"January", "July", "June", "March", "May", "November", "October",
"September"), n = c(34, 40, 40, 40, 40, 40, 34, 36, 34, 40, 40,
40), nvalid = c(34, 40, 40, 40, 40, 39, 34, 36, 34, 40, 36, 40
), mean = c(25.801, 28.319, 24.25, 23.75, 23.5, 27.892, 27.271,
24.352, 26.601, 25.889, 26.775, 28.333), sd = c(0.478, 0.978,
0.921, 0.793, 0.551, 0.463, 0.632, 1.47, 0.905, 0.763, 0.928,
0.534), min = c(25, 26.667, 22.778, 21.667, 21.667, 27.222, 26.111,
22.778, 25, 25, 25.556, 27.222), Q1 = c(25.556, 27.778, 23.889,
23.333, 23.333, 27.778, 27.222, 23.333, 26.111, 25.556, 25.556,
27.778), median = c(25.556, 27.778, 23.889, 23.889, 23.333, 27.778,
27.222, 23.889, 26.667, 25.556, 27.222, 28.333), Q3 = c(25.972,
28.889, 25, 24.444, 23.889, 28.333, 27.639, 24.583, 27.222, 26.111,
27.361, 28.889), max = c(26.667, 30, 25.556, 25, 24.444, 28.889,
28.889, 27.222, 27.778, 27.778, 28.333, 29.444)), class = "data.frame", row.names = c(NA,
-12L))
Precip Summary
structure(list(MONTH = c("April", "August", "December", "February",
"January", "July", "June", "March", "May", "November", "October",
"September"), n = c(446, 464, 436, 422, 465, 437, 444, 465, 465,
424, 438, 439), nvalid = c(443, 462, 432, 421, 464, 437, 443,
453, 464, 422, 429, 434), mean = c(0.021, 0.025, 0.066, 0.048,
0.036, 0.01, 0.008, 0.076, 0.03, 0.047, 0.035, 0.024), sd = c(0.094,
0.184, 0.342, 0.211, 0.142, 0.047, 0.047, 0.343, 0.14, 0.24,
0.243, 0.112), min = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Q1 = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), median = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0), Q3 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
max = c(1.1, 3.06, 4.94, 2.61, 1.5, 0.47, 0.76, 3.32, 1.43,
3.29, 3.64, 1.25), percZero = c(81.264, 87.662, 76.389, 75.534,
77.802, 86.728, 86.682, 75.717, 84.267, 77.962, 83.916, 81.797
)), class = "data.frame", row.names = c(NA, -12L))
Temp Summary Results
enter image description here
Precip Summary Results
enter image description here
I would suggest next approach. Just be careful on the values of your error bars. Also, scaling factors must also be applied to error bars. That is why you got a messy plot. Here the code using the data you added:
library(ggplot2)
#Create var
Complete_Seasonality_Data$NewMonths5 <- as.Date(Complete_Seasonality_Data$Date)
#Computing
StdErrorTemp<- (TempSummary$sd)/ (sqrt(TempSummary$n))
StdErrorPrecip<- (PrecipSummary$sd/ sqrt(PrecipSummary$n))
#Scale factor
scalefactor <- max(Complete_Seasonality_Data$PRCP)/max(Complete_Seasonality_Data$Temp_C)
#Plot
p <- ggplot(Complete_Seasonality_Data, aes(x = NewMonths5))
p <- p + geom_point(aes(y = PRCP, colour = "Precipitation"))
p <- p + geom_line(aes(y = PRCP, colour = "Precipitation", group=1))
p <- p + geom_errorbar(aes(ymin = PrecipSummary$mean - StdErrorPrecip,
ymax = PrecipSummary$mean + StdErrorPrecip),
position=position_dodge(.9), width=0.2)
p <- p + geom_point(aes(y = Temp_C*scalefactor, colour = "Temperature"))
p <- p + geom_line(aes(y = Temp_C*scalefactor, colour = "Temperature", group=1))
p <- p + scale_y_continuous(sec.axis = sec_axis(~./scalefactor, name = 'Temperature'))
p <- p + geom_errorbar(aes(ymin = TempSummary$mean*scalefactor - StdErrorTemp,
ymax = TempSummary$mean*scalefactor + StdErrorTemp),
position=position_dodge(.9), width=0.2)
p <- p + scale_colour_manual(values = c("blue", "red"))
p <- p + labs(y = "Precipitation (in)",
x = "Month",
colour = "Parameter")
p <- p + theme_bw()
p <- p + theme(axis.text.x = element_text(angle = 90), legend.position = c(.99, .01))
p
Output:
I'm new to R and am having trouble with a simple command. How do I find the proportion of demographic variables (for example, proportion of English speakers in my population, or proportion of White respondents)?
I'd like to create a large table with all of the proportions, and would hopefull include mean age and median education level, but am having trouble finding the command. This is what I've tried:
table2 <- table(VR_Data$English)
prop.table(table2)
table3 <- table(VR_Data$race)
prop.table(table3)
table4 <- table(VR_Data$male)
prop.table(table4)
If it helps, this is my data:
structure(list(study = c(4, 4, 4, 1, 1, 1), TREATMENT = c(0,
0, 0, 0, 0, 0), TREATMENT4 = c(0, 0, 0, 0, 0, 0), TREATMENT2 = c(0,
0, 0, 0, 0, 0), TREATMENT3 = c(0, 0, 0, 0, 0, 0), order = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), id = c(279,
238, 239, 135, 143, 138), treatment = c(0, 0, 0, 0, 0, 0), treatment_condition = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), control_condition = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), m_check1 = c(1,
1, 1, 1, 1, 1), relationship = c(NA, NA, NA, 7, 6, 5), payment = c(NA,
NA, NA, 10, 3, 3), educ_level = c(14, 14, 12, 16, 16, 18), golf = c(3,
5, 3, 3, 2, 3), male = c(1, 0, 1, 0, 0, 1), Asian = c(0, 1, 0,
0, 0, 0), Black = c(0, 0, 0, 0, 0, 0), Latino = c(1, 0, 0, 0,
0, 0), White = c(0, 0, 1, 1, 1, 1), age = c(27, 53, 49, 25, 28,
24), English = c(1, 1, 1, 1, 1, 1), education = c(16, 16, 14,
14, 14, 16), enjoy = c(4, 1, 3.5, 4.25, 3.25, 3.5), RELATIONSHIP = c(4.33333349227905,
1, 4.33333349227905, 3.66666674613953, 3.5, 3.66666674613953),
anxiety = c(3, 3.40000009536743, 2.20000004768372, 1.25,
2, 1.25), BEH_SIM = c(3, 1, 3.75, 2.75, 2.5, 1.75), sptconf = c(3.33333325386047,
1.5, 4, 4.83333349227905, 4, 3.66666674613953), NEG_EFFICACY = c(4,
1.16666662693024, 3.66666674613953, 4.83333349227905, 4.16666650772095,
4.5), spteffort = c(3.16666674613953, 3.5, 4.16666650772095,
3.16666674613953, 3.16666674613953, 3.5), SPTEFFORT_OTHER = c(3.16666674613953,
3.5, 3.5, 3.16666674613953, 3, 3.33333325386047), SIM_VALUES = c(3.75,
1, 3.75, 3.75, 1.5, 2.25), COOP_MOTIV = c(2.33333325386047,
3, 2.66666674613953, 5, 2.5, 2.66666674613953), COMP_MOTIV = c(5,
5, 3.20000004768372, 4.40000009536743, 2.40000009536743,
4.40000009536743), presence = c(NA, NA, NA, 2.79999995231628,
1.79999995231628, 2.59999990463257), environ = c(NA, NA,
NA, 3, 4, 3), openresponse = c(NA, NA, NA, 94.25, 86, 60),
TotalOwnerCommission = c(300, 266.666656494141, 258.333343505859,
266.666656494141, 383.333343505859, 325), TotalRangerComm = c(258.333343505859,
233.33332824707, 291.666656494141, 258.333343505859, 175,
166.66667175293), TotalComm = c(279.166687011719, 250, 275,
262.5, 279.166687011719, 245.833343505859), merge = c(1,
1, 1, 0, 0, 0), Control = c(1, 1, 1, NA, NA, NA), treatment_Shoes = c(0,
0, 0, NA, NA, NA), treatment_Instructions_Only = c(0, 0,
0, NA, NA, NA), treatment_Info_Only = c(0, 0, 0, NA, NA,
NA), treatment_Info_Instructions = c(0, 0, 0, NA, NA, NA),
group = c("OwnerOnly", "OwnerOnly", "OwnerOnly", "", "",
""), race = c(4, 2, 5, NA, NA, NA), race_a = c("", "", "",
"", "", ""), RELATIONSHIP_2 = c(9.02055358886719, 1, 9.02055358886719,
7.02113246917725, 6.54790019989014, 7.02113246917725), TotalOwnerCommission_2 = c(5196.15234375,
4354.64794921875, 4152.12744140625, 4354.64794921875, 7505.24560546875,
5859.02099609375)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
How can I put all of these proportions into one table, with mean and medians? Is this possible? Thank you so much in advance.
If I understand your question correctly, this should help you.
library(dplyr)
VR_Data %>%
summarize(English_prop = sum(English) / n(),
White_prop = sum(White) / n(),
male_prop = sum(male) / n(),
age_avg = mean(age),
education_avg = mean(education))
Should give you this...
# A tibble: 1 x 5
English_prop White_prop male_prop age_avg education_avg
<dbl> <dbl> <dbl> <dbl> <dbl>
1 1 0.667 0.5 34.3 15
This is the code that I used (with a lot of help from the StackOverflow communitiy!) to create a simpler table using the same data:
library(here)
ANOVA_Relationship_Subset_sum <- ANOVA_Relationship_Subset %>%
dplyr::group_by(treatment) %>%
dplyr::summarize(
n=n(),
mean=mean(TotalComm),
`std. dev` = sd(TotalComm)
)
ANOVA_Relationship_Subset_sum
Now I'm on to something a little more complicated; how can I create a table like this:
If it helps, this is my data:
structure(list(study = c(4, 4, 4, 1, 1, 1), TREATMENT = c(0,
0, 0, 0, 0, 0), TREATMENT4 = c(0, 0, 0, 0, 0, 0), TREATMENT2 = c(0,
0, 0, 0, 0, 0), TREATMENT3 = c(0, 0, 0, 0, 0, 0), order = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), id = c(279,
238, 239, 135, 143, 138), treatment = c(0, 0, 0, 0, 0, 0), treatment_condition = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), control_condition = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), m_check1 = c(1,
1, 1, 1, 1, 1), relationship = c(NA, NA, NA, 7, 6, 5), payment = c(NA,
NA, NA, 10, 3, 3), educ_level = c(14, 14, 12, 16, 16, 18), golf = c(3,
5, 3, 3, 2, 3), male = c(1, 0, 1, 0, 0, 1), Asian = c(0, 1, 0,
0, 0, 0), Black = c(0, 0, 0, 0, 0, 0), Latino = c(1, 0, 0, 0,
0, 0), White = c(0, 0, 1, 1, 1, 1), age = c(27, 53, 49, 25, 28,
24), English = c(1, 1, 1, 1, 1, 1), education = c(16, 16, 14,
14, 14, 16), enjoy = c(4, 1, 3.5, 4.25, 3.25, 3.5), RELATIONSHIP = c(4.33333349227905,
1, 4.33333349227905, 3.66666674613953, 3.5, 3.66666674613953),
anxiety = c(3, 3.40000009536743, 2.20000004768372, 1.25,
2, 1.25), BEH_SIM = c(3, 1, 3.75, 2.75, 2.5, 1.75), sptconf = c(3.33333325386047,
1.5, 4, 4.83333349227905, 4, 3.66666674613953), NEG_EFFICACY = c(4,
1.16666662693024, 3.66666674613953, 4.83333349227905, 4.16666650772095,
4.5), spteffort = c(3.16666674613953, 3.5, 4.16666650772095,
3.16666674613953, 3.16666674613953, 3.5), SPTEFFORT_OTHER = c(3.16666674613953,
3.5, 3.5, 3.16666674613953, 3, 3.33333325386047), SIM_VALUES = c(3.75,
1, 3.75, 3.75, 1.5, 2.25), COOP_MOTIV = c(2.33333325386047,
3, 2.66666674613953, 5, 2.5, 2.66666674613953), COMP_MOTIV = c(5,
5, 3.20000004768372, 4.40000009536743, 2.40000009536743,
4.40000009536743), presence = c(NA, NA, NA, 2.79999995231628,
1.79999995231628, 2.59999990463257), environ = c(NA, NA,
NA, 3, 4, 3), openresponse = c(NA, NA, NA, 94.25, 86, 60),
TotalOwnerCommission = c(300, 266.666656494141, 258.333343505859,
266.666656494141, 383.333343505859, 325), TotalRangerComm = c(258.333343505859,
233.33332824707, 291.666656494141, 258.333343505859, 175,
166.66667175293), TotalComm = c(279.166687011719, 250, 275,
262.5, 279.166687011719, 245.833343505859), merge = c(1,
1, 1, 0, 0, 0), Control = c(1, 1, 1, NA, NA, NA), treatment_Shoes = c(0,
0, 0, NA, NA, NA), treatment_Instructions_Only = c(0, 0,
0, NA, NA, NA), treatment_Info_Only = c(0, 0, 0, NA, NA,
NA), treatment_Info_Instructions = c(0, 0, 0, NA, NA, NA),
group = c("OwnerOnly", "OwnerOnly", "OwnerOnly", "", "",
""), race = c(4, 2, 5, NA, NA, NA), race_a = c("", "", "",
"", "", ""), RELATIONSHIP_2 = c(9.02055358886719, 1, 9.02055358886719,
7.02113246917725, 6.54790019989014, 7.02113246917725), TotalOwnerCommission_2 = c(5196.15234375,
4354.64794921875, 4152.12744140625, 4354.64794921875, 7505.24560546875,
5859.02099609375)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
Briefly, I do want to thank the SO community for all their help with R. I don't know how I would have gotten this far without all of your help.
Try the apaTables Package! Format your data as per the example, and use the apa.aov.table() function to transform your table to APA style.