Hanging Graphs w/ NA Values - r

I've been using this code
ggplot(Oak, aes(x = age, fill = factor(subject_talk))) +
geom_bar(aes(y = (..count..)/sum(..count..)),position = "stack") +
xlim(18,29) +
scale_fill_manual(breaks=c("0","1"), values = scales::hue_pal()(2))
to create graphs that look like this
Recently, some graphs end up floating where NA values should be,
which I don't want.
Here's the code for 2
ggplot(Oak, aes(x = age, fill = factor(highcho))) +
geom_bar(aes(y = (..count..)/sum(..count..)),position = "stack") +
xlim(18,29) +
scale_fill_manual(breaks=c("0","1"), values = scales::hue_pal()(2))
The output is too long to set as code, I can't post it otherwise.
dput(head(Oak,20))
structure(list(studyid = structure(c(1002, 1002, 1002, 1002,
1002, 1004, 1004, 1004, 1004, 1004, 1005, 1005, 1005, 1005, 1005,
1006, 1006, 1006, 1006, 1006), label = "Subject Study ID", format.stata = "%12.0g"),
post_flu = structure(c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1,
1, 1, 1, 0, 0, 0, 0, 0), label = "Receipt of Flu Vaccine - Encounter Survey", format.stata = "%10.0g"),
post_bmi = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "BMI Test Received - Encounter Survey", format.stata = "%9.0g"),
post_bp = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Blood Pressure Test Received - Encounter Survey", format.stata = "%9.0g"),
post_dia = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Diabetes Test Received - Encounter Survey", format.stata = "%9.0g"),
post_cho = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Cholesterol Test Received - Encounter Survey", format.stata = "%9.0g"),
post_flu_sl = structure(c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1,
1, 1, 1, 1, NA, NA, NA, NA, NA), label = "Flu Shot Received (Subsidy Received) - Encounter Survey", format.stata = "%9.0g"),
post_flu_nosl = structure(c(NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, 0, 0, 0, 0, 0), label = "Flu Shot Received (No Subsidy Received) - Encounter Survey", format.stata = "%9.0g"),
post_shr_invasive = structure(c(1, 1, 1, 1, 1, 0.666666686534882,
0.666666686534882, 0.666666686534882, 0.666666686534882,
0.666666686534882, 0.333333343267441, 0.333333343267441,
0.333333343267441, 0.333333343267441, 0.333333343267441,
0, 0, 0, 0, 0), label = "Post Take-Up as Share of Invasive Services", format.stata = "%9.0g"),
post_share4 = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Post Take-Up as Share of Four Services", format.stata = "%9.0g"),
pre_bmi = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Weight and Height Selected - CTO Patient Survey", format.stata = "%8.0g"),
pre_bp = structure(c(0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Blood Pressure Selected - CTO Patient Survey", format.stata = "%8.0g"),
pre_dia = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Diabetes Selected - CTO Patient Survey", format.stata = "%8.0g"),
pre_cho = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Cholesterol Selected - CTO Patient Survey", format.stata = "%8.0g"),
pre_flu = structure(c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Flu Shot Selected - CTO Patient Survey", format.stata = "%8.0g", labels = c(No = 0,
Yes = 1, Unsure = 99), class = c("haven_labelled", "vctrs_vctr",
"double")), pre_flu_sl = structure(c(1, 1, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, NA, NA, NA, NA, NA), label = "Flu Shot Selected (Subsidy Received) - CTO Survey", format.stata = "%9.0g"),
pre_flu_nosl = structure(c(NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, 0, 0, 0, 0, 0), label = "Flu Shot Selected (No Subsidy Received) - CTO Survey", format.stata = "%9.0g"),
pre_shr_invasive = structure(c(0.333333343267441, 0.333333343267441,
0.333333343267441, 0.333333343267441, 0.333333343267441,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Pre Take-Up as Share of Invasive Services", format.stata = "%9.0g"),
pre_share4 = structure(c(0, 0, 0, 0, 0, 0.25, 0.25, 0.25,
0.25, 0.25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Pre Take-Up as Share of Four Services", format.stata = "%9.0g"),
delta_bmi = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Delta BMI: Post - Pre", format.stata = "%9.0g"),
delta_bp = structure(c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Delta Blood Pressure: Post - Pre", format.stata = "%9.0g"),
delta_dia = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Delta Diabetes: Post - Pre", format.stata = "%9.0g"),
delta_cho = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Delta Cholesterol: Post - Pre", format.stata = "%9.0g"),
delta_flu = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
1, 1, 1, 1, 0, 0, 0, 0, 0), label = "Delta Flu: Post - Pre", format.stata = "%9.0g"),
delta_shr_invasive = structure(c(0.666666686534882, 0.666666686534882,
0.666666686534882, 0.666666686534882, 0.666666686534882,
0.666666686534882, 0.666666686534882, 0.666666686534882,
0.666666686534882, 0.666666686534882, 0.333333343267441,
0.333333343267441, 0.333333343267441, 0.333333343267441,
0.333333343267441, 0, 0, 0, 0, 0), label = "Delta Take-Up as Share of Invasive Services", format.stata = "%9.0g"),
deltaind_test = structure(c(1, 1, 1, 1, 0, 1, 0, 1, 1, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0), label = "Indicator for Delta Selection - Stacked Subject X Test", format.stata = "%9.0g"),
delta_share4 = structure(c(1, 1, 1, 1, 1, 0.75, 0.75, 0.75,
0.75, 0.75, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Delta as Share of Four Services", format.stata = "%9.0g"),
friends_enrolled = structure(c(NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 0, 0, 0, 0, 0, NA, NA, NA, NA, NA), label = "Are Friends Enrolled in the Study - CTO Patient Survey", format.stata = "%8.0g", labels = c(No = 0,
Yes = 1, Unsure = 99), class = c("haven_labelled", "vctrs_vctr",
"double")), value_bmi = structure(c(25, 25, 25, 25, 25, 28,
28, 28, 28, 28, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), label = "Patient Test Value: bmi - Encounter Survey", format.stata = "%10.0g"),
value_dia = structure(c(5.9, 5.9, 5.9, 5.9, 5.9, 6.9, 6.9,
6.9, 6.9, 6.9, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), label = "Patient Test Value: hgb - Encounter Survey", format.stata = "%10.0g"),
value_cho = structure(c(208, 208, 208, 208, 208, 170, 170,
170, 170, 170, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), label = "Patient Test Value: cho - Encounter Survey", format.stata = "%10.0g"),
subject_talk = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 0, 0, 0, 0, 0), label = "Subject Tried to Talk About Other Health Problems - Encounter Survey", format.stata = "%10.0g"),
choice_care = structure(c(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2,
2, 2, 2, 2, 3, 3, 3, 3, 3), label = "How Much Choice Subj Has in Choice of Medical Care - Barber Survey", format.stata = "%22.0g", labels = c(`prefer not to answer` = -99,
`don't know` = -98, `a great deal of choice` = 1, `some choice` = 2,
`very little choice` = 3, `no choice` = 4), class = c("haven_labelled",
"vctrs_vctr", "double")), rating = structure(c(4, 4, 4, 4,
4, 5, 5, 5, 5, 5, 3, 3, 3, 3, 3, NA, NA, NA, NA, NA), label = "Experience Rating (1 = Bad, 5 = Excellent) - Subject Feedback", format.stata = "%10.0g"),
doctor_id = structure(c("BL6", "BL6", "BL6", "BL6", "BL6",
"NB8", "NB8", "NB8", "NB8", "NB8", "NB4", "NB4", "NB4", "NB4",
"NB4", "NB4", "NB4", "NB4", "NB4", "NB4"), label = "Doctor Mask ID", format.stata = "%9s"),
nonpreventive_agree = structure(c(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Doctor Notes Relating to Personal/Other Health (Coders Agree) - Encounter Survey", format.stata = "%9.0g"),
mecherror_cho = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Mechanical Error, Cholesterol Test - Encounter Survey", format.stata = "%9.0g"),
mecherror_dia = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Mechanical Error, Diabetes Test - Encounter Survey", format.stata = "%9.0g"),
value_systolic = structure(c(165, 165, 165, 165, 165, 168,
168, 168, 168, 168, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
), label = "Blood Pressure Value: Systolic - Encounter Survey", format.stata = "%9.0g"),
hyptension = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA), label = "Subj Has Hypertension, Test Value or MD Comments - Encounter Survey", format.stata = "%9.0g"),
diabetic = structure(c(0, 0, 0, 0, 0, 1, 1, 1, 1, 1, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA), label = "Subj Has Diabetes, Test Value or MD Comments - Encounter Survey", format.stata = "%9.0g"),
highcho = structure(c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA), label = "Subj Has High Cholesterol, Test Value or MD Comments - Encounter Survey", format.stata = "%9.0g"),
obese = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA), label = "Subj Is Obese, Test Value or MD Comments - Encounter Survey", format.stata = "%9.0g"),
length_visit_dr = structure(c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), label = "Doctor Visit Duration, Time Out of Waiting Room to Time Out - Encounter Survey", format.stata = "%9.0g"),
RO_tablet_assist = structure(c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), label = "RO Assisted Subject with Tablet Survey - Encounter Survey", format.stata = "%9.0g"),
yes_recommend = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, NA, NA, NA, NA, NA), label = "Patient Would Recommend Doctor - Subject Feedback", format.stata = "%9.0g"),
dr_notes = structure(c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 1, 1, 1, 1), label = "Doctor Wrote Notes in 'Notable' About Subject - Encounter Survey", format.stata = "%9.0g"),
any_health_prob = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1), label = "Subject Reported Any Health Problems (A2-A11) - Barber Survey", format.stata = "%9.0g"),
hosp_visits_2years = structure(c(NA, NA, NA, NA, NA, 3, 3,
3, 3, 3, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3), label = "Number of Hospital Visits in Last 2 Years - CTO Survey", format.stata = "%9.0g"),
ER_2years = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
1, 1, 1, 1, 7, 7, 7, 7, 7), label = "Number of ER Visits in Last 2 Years - Barber Survey", format.stata = "%9.0g"),
nights_hosp_2years = structure(c(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0), label = "Nights in the Hospital in Last 2 Years - Barber Survey", format.stata = "%9.0g"),
has_PCP = structure(c(0, 0, 0, 0, 0, -9, -9, -9, -9, -9,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1), label = "Subject Has Primary Care Provider - Barber Survey", format.stata = "%9.0g"),
uninsured = structure(c(0, 0, 0, 0, 0, 1, 1, 1, 1, 1, -9,
-9, -9, -9, -9, 0, 0, 0, 0, 0), label = "Subject is Uninsured - Barber Survey", format.stata = "%9.0g"),
ER_visits_uninsured = structure(c(NA, NA, NA, NA, NA, 0,
0, 0, 0, 0, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), label = "Number of ER Visits in Last 2 Years for Uninsured - Barber Survey", format.stata = "%9.0g"),
mistrust_5levels = structure(c(3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 1, 1, 1, 1, 1, 4, 4, 4, 4, 4), label = "Doctor Mistrust (1 is Lowest, 5 is Highest) - Barber Survey", format.stata = "%9.0g"),
med_mistrust = structure(c(2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 3, 3, 3, 3, 3), label = "Doctor Mistrust (1 is Lowest, 3 is Highest) - Barber Survey", format.stata = "%9.0g"),
age = structure(c(50, 50, 50, 50, 50, 44, 44, 44, 44, 44,
33, 33, 33, 33, 33, 35, 35, 35, 35, 35), label = "Subject Age - Barber Survey", format.stata = "%9.0g"),
age2 = structure(c(2500, 2500, 2500, 2500, 2500, 1936, 1936,
1936, 1936, 1936, 1089, 1089, 1089, 1089, 1089, 1225, 1225,
1225, 1225, 1225), label = "Subject Age Squared - Barber Survey", format.stata = "%9.0g"),
married = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Subject Is Married - Barber Survey", format.stata = "%9.0g"),
unemployed = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Subject Is Unemployed - Barber Survey", format.stata = "%9.0g"),
benefits = structure(c(0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Subject Receives DI/SSI/UB - Barber Survey", format.stata = "%9.0g"),
sl0 = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 1, 1, 1, 1), label = "Subsidy Level: $0 - CTO Survey", format.stata = "%9.0g"),
sl5 = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
1, 1, 0, 0, 0, 0, 0), label = "Subsidy Level: $5 - CTO Survey", format.stata = "%9.0g"),
sl10 = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), label = "Subsidy Level: $10 - CTO Survey", format.stata = "%9.0g"),
subsidy_level = structure(c(10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0), label = "Subsidy Level, Categorical - CTO Survey", format.stata = "%9.0g"),
black_dr = structure(c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Randomized to Black Doctor - CTO Survey", format.stata = "%9.0g"),
black0 = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doctor & Subsidy Level: $0 - CTO Survey", format.stata = "%9.0g"),
black5 = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doctor & Subsidy Level: $5 - CTO Survey", format.stata = "%9.0g"),
black10 = structure(c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doctor & Subsidy Level: $10 - CTO Survey", format.stata = "%9.0g"),
white0 = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 1, 1, 1, 1), label = "White Doctor & Subsidy Level: $0 - CTO Survey", format.stata = "%9.0g"),
white5 = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
1, 1, 1, 0, 0, 0, 0, 0), label = "White Doctor & Subsidy Level: $5 - CTO Survey", format.stata = "%9.0g"),
white10 = structure(c(0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "White Doctor & Subsidy Level: $10 - CTO Survey", format.stata = "%9.0g"),
any_subsidy = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 0, 0, 0, 0, 0), label = "Subject Received $5 or $10 Subsidy - CTO Survey", format.stata = "%9.0g"),
age5 = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), label = "Subject Age Within 5 Years of Doctor's Age - Baseline Survey", format.stata = "%9.0g"),
age10 = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), label = "Subject Age Within 10 Years of Doctor's Age - Baseline Survey", format.stata = "%9.0g"),
educ_conc = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Education Concordance: Subject Has BA or Higher - Baseline Survey", format.stata = "%9.0g"),
good_sa_health = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 0, 0, 0, 0, 0), label = "Subject Rated Health as Good, Very Good, or Excellent - Barber Survey", format.stata = "%9.0g"),
no_rec_scr_interval = structure(c(1, 1, 1, 1, 1, 0, 0, 0,
0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0), label = "Subject No Recent Screenings in Recommended Interval - Barber Survey", format.stata = "%9.0g"),
millenial = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1), label = "Subject Is Less Than 40 - Barber Survey", format.stata = "%9.0g"),
HSless = structure(c(0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 0, 0, 0, 0, 0), label = "Subject Has a High School Degree or Less - Barber Survey", format.stata = "%9.0g"),
low_income = structure(c(0, 0, 0, 0, 0, -9, -9, -9, -9, -9,
0, 0, 0, 0, 0, 1, 1, 1, 1, 1), label = "Household Has Income Below $5k/Year - Barber Survey", format.stata = "%9.0g"),
long_wait = structure(c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), label = "Subject Waited Longer Than 1 Hour to See Doctor - Barber Survey", format.stata = "%9.0g"),
high_congestion = structure(c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), label = "More Than 8 People in Waiting Room When Subject Arrived - Congestion", format.stata = "%9.0g"),
long_driv = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Commute Via Car Above Median (18 Mins) - Barber Distance", format.stata = "%9.0g"),
atrisk_cho = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
0, 0, 0, 0, 1, 1, 1, 1, 1), label = "Subject Recommended to Get Cholesterol Test - CTO Survey", format.stata = "%9.0g"),
atrisk_dia = structure(c(NA, NA, NA, NA, NA, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Subject Recommended to Get Diabetes Test - CTO Survey", format.stata = "%9.0g"),
excuses = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 1, 1, 1, 1), label = "Subject Gave Excuse for Not Receiving Services - Suubject Feedback", format.stata = "%10.0g"),
length_dr_note = structure(c(9, 9, 9, 9, 9, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 61, 61, 61, 61, 61), label = "Length (Number of Characters) of Doctor Notes - Encounter Survey", format.stata = "%9.0g"),
mentioned_PCP = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Subject Mentioned PCP in Clinic Notes - Suubject Feedback", format.stata = "%9.0g"),
bl_ER_2years = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Number of ER Visits in Last 2 Years - Barber Survey", format.stata = "%9.0g"),
bl_med_mistrust = structure(c(2, 2, 2, 2, 2, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Doctor Mistrust (1 is Lowest, 3 is Highest) - Barber Survey", format.stata = "%9.0g"),
bl_millenial = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Subject Is Less Than 40 - Barber Survey", format.stata = "%9.0g"),
bl_HSless = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Subject Has a High School Degree or Less - Barber Survey", format.stata = "%9.0g"),
bl_low_income = structure(c(0, 0, 0, 0, 0, NA, NA, NA, NA,
NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Household Has Income Below $5k/Year - Barber Survey", format.stata = "%9.0g"),
bl_long_wait = structure(c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), label = "Black Doc * Subject Waited Longer Than 1 Hour to See Doctor - Barber Survey", format.stata = "%9.0g"),
bl_long_driv = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Commute Via Car Above Median (18 Mins) - Barber Distance", format.stata = "%9.0g"),
bl_high_congest = structure(c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), label = "Black Doc * More Than 8 People in Waiting Room When Subject Arrived - Congestion", format.stata = "%9.0g"),
bl_atrisk_cho = structure(c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Subject Recommended to Get Cholesterol Test - CTO Survey", format.stata = "%9.0g"),
bl_atrisk_dia = structure(c(NA, NA, NA, NA, NA, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Subject Recommended to Get Diabetes Test - CTO Survey", format.stata = "%9.0g"),
bl_no_rec_scr_interval = structure(c(1, 1, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Subject No Recent Screenings in Recommended Interval - Barber Survey", format.stata = "%9.0g"),
bl_ER_visits_uninsured = structure(c(NA, NA, NA, NA, NA,
0, 0, 0, 0, 0, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), label = "Black Doc * Number of ER Visits in Last 2 Years for Uninsured - Barber Survey", format.stata = "%9.0g"),
bl_educ_conc = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Education Concordance: Subject Has BA or Higher - Baseline Survey", format.stata = "%9.0g"),
bl_age5 = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Subject Age Within 5 Years of Doctor's Age - Baseline Survey", format.stata = "%9.0g"),
bl_age10 = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Subject Age Within 10 Years of Doctor's Age - Baseline Survey", format.stata = "%9.0g"),
bl_sl10 = structure(c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Subsidy Level: $10 - CTO Survey", format.stata = "%9.0g"),
RO_id = structure(c(6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3), label = "Blind ID, Reception Officer - Encounter Survey", format.stata = "%9.0g"),
location_id = structure(c(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9), label = "Blind ID, Recruitment Location - Barber Survey", format.stata = "%9.0g"),
date_visit_id = structure(c(2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1), label = "Blind ID, Date of Clinic Visit - Encounter Survey", format.stata = "%9.0g"),
tag = structure(c(0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
1, 0, 0, 0, 0, 1, 0), label = "Tag for Study ID", format.stata = "%8.0g"),
bmi_test = structure(c(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0,
0, 0, 0, 1, 0, 0, 0, 0), label = "Tag for BMI Test", format.stata = "%9.0g"),
bp_test = structure(c(0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1,
0, 0, 0, 0, 1, 0, 0, 0), label = "Tag for Blood Pressure Test", format.stata = "%9.0g"),
dia_test = structure(c(0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0,
1, 0, 0, 0, 0, 1, 0, 0), label = "Tag for Diabetes Test", format.stata = "%9.0g"),
cho_test = structure(c(0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0,
0, 1, 0, 0, 0, 0, 1, 0), label = "Tag for Cholesterol Test", format.stata = "%9.0g"),
flu_test = structure(c(0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0,
0, 0, 1, 0, 0, 0, 0, 1), label = "Tag for Flu Shot", format.stata = "%9.0g"),
any_invasive = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 0, 0, 0, 0, 0), label = "Subject Chose At Least One Invasive Screening", format.stata = "%9.0g"),
bl_any_invasive = structure(c(1, 1, 1, 1, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Subject Chose At Least One Invasive Screening", format.stata = "%9.0g"),
preind_test = structure(c(0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Indicator for Ex pre Selection - Stacked Subject X Test", format.stata = "%9.0g"),
postind_test = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0), label = "Indicator for Ex Post Selection - Stacked Subject X Test", format.stata = "%9.0g"),
bl_bp_test = structure(c(0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Tag for Blood Pressure Test", format.stata = "%9.0g"),
bl_bmi_test = structure(c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Tag for BMI Test", format.stata = "%9.0g"),
bl_dia_test = structure(c(0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Tag for Diabetes Test", format.stata = "%9.0g"),
bl_flu_test = structure(c(0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Tag for Flu Shot", format.stata = "%9.0g"),
bl_cho_test = structure(c(0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Tag for Cholesterol Test", format.stata = "%9.0g"),
missing_age = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Missing Indicator for age", format.stata = "%9.0g"),
missing_HSless = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Missing Indicator for HSless", format.stata = "%9.0g"),
missing_low_income = structure(c(0, 0, 0, 0, 0, 1, 1, 1,
1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Missing Indicator for low_income", format.stata = "%9.0g"),
missing_has_PCP = structure(c(0, 0, 0, 0, 0, 1, 1, 1, 1,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Missing Indicator for has_PCP", format.stata = "%9.0g"),
missing_uninsured = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0), label = "Missing Indicator for uninsured", format.stata = "%9.0g"),
missing_age2 = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Missing Indicator for age2", format.stata = "%9.0g"),
missing_good_sa_health = structure(c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Missing Indicator for good_sa_health", format.stata = "%9.0g")), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"), label = "Main Oakland Clinic Analysis Dataset")

I changed geom_bar to geom_histogram and that solved the problem. It's an alternate that works.

Related

Creating column that takes mean of X for specific group id when date1 < date2

in (R)
for an event study I'm trying to create a column that calculates the mean of ccu_avg for a specific combination of appid and Eventdate1. One appid has multiple events so it has to be divided by both appid and Eventdate1.
The difficult thing here is that I want it to calculate the mean only up until the event date since after the event happened the estimation period stops
The new column should look like est_ccu_avg:
picture of the dataset below for explanation
https://i.stack.imgur.com/ZPquW.png
Could someone help me figure the code for this out? I've been trying for hours and can't seem to get it to work.
I've now been trying things like this but without success:
study <- study %>%
mutate(est_ccu_avg=
mean(study[unique(study$appid) | study$Eventdate1 >
study$datefinal, "ccu_avg"])
)
Result of dput head:
structure(list(appid = c("105600", "105600", "105600", "105600",
"105600", "105600"), name = c("Terraria", "Terraria", "Terraria",
"Terraria", "Terraria", "Terraria"), ccu_avg = c(26825, 29058,
37842, 37525, 26484, 24377), ccu_min = c(21176, 21620, 28954,
32880, 19648, 19118), ccu_max = c(35827, 41322, 50012, 44071,
33241, 32060), pos_max = c(356186, 356363, 356508, 356712, 356921,
357092), neg_max = c(6756, 6756, 6758, 6768, 6766, 6768), Maj_Upt =
c(0,
0, 0, 0, 0, 0), Min_Upt = c(0, 0, 0, 0, 0, 0), Hotfix = c(0,
0, 0, 0, 0, 0), Bugfix = c(0, 0, 0, 0, 0, 0), Balance = c(0,
0, 0, 0, 0, 0), ExpBranch = c(0, 0, 0, 0, 0, 0), Promo = c(0,
1, 0, 0, 0, 0), Ev_Out = c(0, 0, 0, 0, 0, 0), Ev_In = c(0, 0,
0, 0, 0, 0), isfree = c(0, 0, 0, 0, 0, 0), developers1 = c("Re-
Logic",
"Re-Logic", "Re-Logic", "Re-Logic", "Re-Logic", "Re-Logic"),
publishers1 = c("Re-Logic", "Re-Logic", "Re-Logic", "Re-Logic",
"Re-Logic", "Re-Logic"), metascore = c(83, 83, 83, 83, 83,
83), singleplayer = c(1, 1, 1, 1, 1, 1), multiplayer = c(1,
1, 1, 1, 1, 1), coop = c(1, 1, 1, 1, 1, 1), mmo = c(0, 0,
0, 0, 0, 0), indie = c(1, 1, 1, 1, 1, 1), single_player_gen = c(0,
0, 0, 0, 0, 0), adventure = c(1, 1, 1, 1, 1, 1), casual = c(0,
0, 0, 0, 0, 0), strategy = c(0, 0, 0, 0, 0, 0), rpg = c(1,
1, 1, 1, 1, 1), simulation = c(0, 0, 0, 0, 0, 0), multi_player_gen =
c(0,
0, 0, 0, 0, 0), shooter = c(0, 0, 0, 0, 0, 0), platformer = c(0,
0, 0, 0, 0, 0), ea_min = c(0, 0, 0, 0, 0, 0), ea_max = c(0,
0, 0, 0, 0, 0), scifi = c(0, 0, 0, 0, 0, 0), sports = c(0,
0, 0, 0, 0, 0), racing = c(0, 0, 0, 0, 0, 0), inappurchase = c(0,
0, 0, 0, 0, 0), workshop = c(0, 0, 0, 0, 0, 0), f_release_date =
c("May 16, 2011",
"May 16, 2011", "May 16, 2011", "May 16, 2011", "May 16, 2011",
"May 16, 2011"), l_release_date = c("May 16, 2011", "May 16, 2011",
"May 16, 2011", "May 16, 2011", "May 16, 2011", "May 16, 2011"
), datefinal = structure(c(18942, 18943, 18944, 18945, 18946,
18947), class = "Date"), Eventdate = c("", "", "", "", "",
""), Eventdate1 = structure(c(18949, 18949, 18949, 18949,
18949, 18949), class = "Date"), est_ccu_avg = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_)), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame"))
I figured it out, there probably is an easier way but this is how I did it:
# first make a list with only the rows where eventdate > datefinal to only
include estimation period.
estmeans <- study[study$Eventdate1 > study$datefinal,]
# calculate means per appid and eventdate
studymeans <- aggregate(estmeans$ccu_avg, list(estmeans$appid,
estmeans$Eventdate1), mean)
# change the names for merging
names(studymeans)[1] <- 'appid'
names(studymeans)[2] <- 'Eventdate1'
names(studymeans)[3] <- 'est_ccu_avg'
# merge the dataframes, it creates 2 new columns, delete the empty one.
studynew <- merge(study, studymeans, by=c("appid", "Eventdate1"))
studynew$est_ccu_avg.x <- NULL
You can leverage the special .BY, to refer to the grouping variable, when using data.table
library(data.table)
setDT(df)[, mean(ccu_avg[datefinal<=.BY$Eventdate1]), by=.(appid, Eventdate1)]
The equivalent in dplyr is cur_group().
df %>%
group_by(appid,Eventdate1) %>%
summarize(res = mean(ccu_avg[datefinal<=cur_group()$Eventdate1))

Changing a character column into a continuous column, by dividing them into sections (1,2,3,4)

I have a data set I'm trying to run a glm regression on, however it contains characters as age limit, race, and comorbidity class. I would like to change those columns into a continuous variable so the regression can accept it. Data below, I want to change the TBI.irace2 into (Hispanic=1, Black=2, white=3, and other=4) same with age (age 18-28=1, 29-46=2, 47-64=3, and >64=4) and with NISS (NISS 0-10=1, NISS 11-20=2, NISS 21-30=3, and NISS 31-40=4, NISS41-50=5, NISS 51-60=6, NISS 61-70=7, NISS>70= 8)
Please find summary of data below
TBI.crani = c(0, 0, 0, 0, 0, 0), TBI.vte = c(0,
0, 0, 0, 0, 0), TBI.FEMALE = c(0, 0, 1, 0, 1, 0), TBI.iracecat2 = c("Whites",
"Whites", "Whites", "Hispanics", "Whites", "Blacks"), TBI.agecat = c("Age 47-64",
"Age 29-46", "Age > 64", "Age 29-46", "Age 18-28", "Age 18-28"
), TBI.nisscategory = c("NISS 21-30", "NISS 11-20", "NISS 21-30",
"NISS 11-20", "NISS 11-20", "NISS 0-10"), TBI.LOS = c(5, 8, 1,
3, 19, 1), TBI.hospitalteach = c(0, 0, 1, 1, 1, 1), TBI.largebedsize = c(1,
1, 1, 1, 1, 1), TBI.CM_ALCOHOL = c(0, 0, 0, 1, 0, 0), TBI.CM_ANEMDEF = c(0,
0, 0, 0, 0, 0), TBI.CM_BLDLOSS = c(0, 0, 0, 0, 0, 0), TBI.CM_CHF = c(1,
0, 0, 0, 0, 0), TBI.CM_CHRNLUNG = c(0, 0, 0, 0, 0, 0), TBI.CM_COAG = c(0,
0, 0, 0, 1, 0), TBI.CM_HYPOTHY = c(0, 0, 0, 0, 0, 0), TBI.CM_LYTES = c(0,
0, 0, 0, 0, 0), TBI.CM_METS = c(0, 0, 0, 0, 0, 0), TBI.CM_NEURO = c(0,
0, 0, 0, 0, 0), TBI.CM_OBESE = c(0, 0, 0, 0, 0, 0), TBI.CM_PARA = c(0,
0, 0, 0, 0, 0), TBI.CM_PSYCH = c(0, 1, 0, 0, 0, 0), TBI.CM_TUMOR = c(0,
0, 0, 0, 0, 0), TBI.CM_WGHTLOSS = c(0, 0, 0, 0, 0, 0), TBI.UTI = c(0,
0, 0, 0, 0, 0), TBI.pneumonia = c(0, 0, 0, 0, 0, 0), TBI.AMI = c(0,
0, 0, 0, 0, 0), TBI.sepsis = c(0, 0, 0, 0, 0, 0), TBI.arrest = c(0,
0, 0, 0, 0, 0), TBI.spineinjury = c(0, 0, 0, 0, 0, 0), TBI.legfracture = c(0,
0, 0, 0, 0, 0), TBI_time_to_surg.NEW = c(0, 0, 0, 0, 0, 0)), row.names = c(NA,
6L), class = "data.frame")
A small little tip, provide a small sample set that is just big enough to address your question.
library(data.table)
# took a small sample and changed one value to Asian
dt <- data.table(
TBI.FEMALE = c(0, 0, 1, 0, 1, 0),
TBI.iracecat2 = as.character(c("Whites", "Whites", "Asian", "Hispanics", "Whites", "Blacks"))
)
# define race groups, and note I did not define Asian
convert_race <- c("Hispanics" = 1, "Blacks" = 2, "Whites" = 3) # other will all be not defined
dt[, TBI.irace2 := lapply(TBI.iracecat2, function(x) convert_race[x]), by = TBI.iracecat2]
dt[is.na(TBI.irace2), TBI.irace2 := 4]
dt
# TBI.FEMALE TBI.iracecat2 TBI.irace2
# 1: 0 Whites 3
# 2: 0 Whites 3
# 3: 1 Asian 4
# 4: 0 Hispanics 1
# 5: 1 Whites 3
# 6: 0 Blacks 2

t.test outputs in the `table` package in R

So here's a sample of the data I am working with:
> dput(candidateEvokeDFYoung)
structure(list(youngTreatment = structure(c(NA, 1, 0, 1, 0, 1,
0, 1, 1, 0, 1, 1, 0, 0, NA, NA, NA, NA, 1, 1), format.stata = "%10.0g"),
candTrustworthy = structure(c(0, 0, 0, 0, 0, 0, 0, 1, 0,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), format.stata = "%10.0g"),
candKnowledgeable = structure(c(1, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1), format.stata = "%10.0g"),
candQualified = structure(c(0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1), format.stata = "%10.0g"),
candConservative = structure(c(0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0), format.stata = "%10.0g"),
candLiberal = structure(c(0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), format.stata = "%10.0g"), candInexperienced = structure(c(0,
1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0), format.stata = "%10.0g"),
candPrincipled = structure(c(1, 1, 0, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 1, 1, 1, 0, 0, 0, 0), format.stata = "%10.0g"),
candDistance = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0), format.stata = "%10.0g"),
candEfficacy = structure(c(1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 1, 0, 0), format.stata = "%10.0g")), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"))
What I am trying to do is generate a table using the tables package with results from a t.test. The trouble I am having is I've taken this dataset and have used lapply to calculate my t.tests on each of the variables with youngTreatment as my 'y' variable:
candidateEvokesDiffYoung = lapply(candidateEvokeDFYoung[-1], function(x) t.test(x ~ candidateEvokeDFYoung$youngTreatment))
This gives me a list of lists. I have no clue how to use tables::tabular to access
list[['statistic']]
and
list[['p.value]]
I could definitely just manually pull all of these out myself and put it in a dataframe for stargazer or something, but I was wondering if there was someone who knew how I could do this more efficiently and with the tables package.
t.test returns objects of the class htest. I believe the best way to gather the results of an object of the class htest is to use the function tidy of the package broom.
library(broom)
candidateEvokesDiffYoung = lapply(candidateEvokeDFYoung[-1],
function(x) {
t.test(x ~ candidateEvokeDFYoung$youngTreatment)
})
m <- t(sapply(candidateEvokesDiffYoung, tidy))
This will allow you to refer to the elements in a similar way to what you seem to be trying to.
> m["candTrustworthy", "p.value"][[1]]
[1] 0.7875872
> unlist(m[, "p.value"])
candTrustworthy candKnowledgeable candQualified candConservative candLiberal candInexperienced candPrincipled candDistance candEfficacy
0.7875872 0.7875872 0.7875872 0.3632175 0.3465935 0.6933006 0.3790778 NaN 0.3632175

Hierarchical clustering of a time-series

I am struggling with hierarchical or clustering. I have the following time-series and I want to cluster to based on time. Would transpose function work for this?
structure(list(`04:00` = c(0, 0, 0, 0, 0, 0), `04:10` = c(0,
0, 0, 0, 0, 0), `04:20` = c(0, 0, 0, 0, 0, 0), `04:30` = c(0,
0, 0, 0, 0, 0), `04:40` = c(0, 0, 0, 0, 0, 0), `04:50` = c(0,
0, 0, 0, 0, 0), `05:00` = c(0, 0, 0, 0, 0, 0), `05:10` = c(0,
0, 0, 0, 0, 0), `05:20` = c(0, 0, 0, 0, 0, 0), `05:30` = c(0,
0, 0, 0, 0, 0), `05:40` = c(0, 0, 0, 0, 0, 0), `05:50` = c(1,
0, 0, 0, 0, 0), `06:00` = c(1, 0, 0, 0, 0, 0), `06:10` = c(1,
0, 0, 0, 0, 0), `06:20` = c(2, 0, 0, 0, 0, 0), `06:30` = c(0,
0, 0, 0, 0, 0), `06:40` = c(0, 1, 0, 0, 0, 0), `06:50` = c(0,
2, 0, 0, 0, 1), `07:00` = c(0, 0, 0, 0, 0, 2), `07:10` = c(0,
0, 1, 0, 0, 2), `07:20` = c(0, 0, 0, 0, 0, 2), `07:30` = c(0,
0, 1, 0, 0, 0), `07:40` = c(1, 0, 1, 0, 0, 0), `07:50` = c(1,
0, 0, 0, 2, 0), `08:00` = c(1, 0, 0, 0, 0, 0), `08:10` = c(1,
0, 0, 0, 0, 0), `08:20` = c(2, 0, 0, 0, 0, 0), `08:30` = c(2,
0, 0, 0, 0, 0), `08:40` = c(2, 0, 0, 0, 0, 0), `08:50` = c(2,
0, 0, 0, 0, 0), `09:00` = c(0, 0, 0, 0, 0, 0), `09:10` = c(0,
0, 0, 0, 0, 0), `09:20` = c(0, 1, 0, 0, 0, 0), `09:30` = c(0,
1, 0, 2, 0, 0), `09:40` = c(0, 1, 0, 0, 0, 0), `09:50` = c(0,
1, 0, 0, 0, 0), `10:00` = c(0, 0, 0, 0, 0, 0), `10:10` = c(0,
0, 0, 0, 0, 0), `10:20` = c(0, 1, 0, 0, 0, 0), `10:30` = c(0,
1, 0, 0, 0, 0), `10:40` = c(0, 0, 0, 0, 0, 0), `10:50` = c(0,
0, 0, 0, 0, 0), `11:00` = c(2, 0, 0, 1, 0, 0), `11:10` = c(0,
0, 0, 1, 0, 0), `11:20` = c(0, 0, 0, 1, 0, 1), `11:30` = c(0,
0, 0, 1, 0, 1), `11:40` = c(0, 0, 0, 1, 0, 1), `11:50` = c(0,
0, 0, 1, 0, 0), `12:00` = c(0, 0, 0, 1, 2, 0), `12:10` = c(0,
0, 0, 1, 0, 0), `12:20` = c(0, 0, 0, 1, 0, 0), `12:30` = c(0,
0, 0, 1, 0, 0), `12:40` = c(0, 0, 0, 1, 0, 0), `12:50` = c(0,
0, 0, 1, 1, 0), `13:00` = c(0, 0, 0, 0, 1, 0), `13:10` = c(0,
0, 0, 0, 1, 0), `13:20` = c(0, 0, 0, 0, 1, 0), `13:30` = c(0,
0, 0, 0, 1, 0), `13:40` = c(0, 0, 0, 0, 1, 0), `13:50` = c(0,
0, 0, 0, 1, 0), `14:00` = c(0, 0, 0, 0, 1, 0), `14:10` = c(0,
0, 0, 0, 1, 0), `14:20` = c(0, 0, 0, 0, 1, 0), `14:30` = c(0,
0, 0, 0, 1, 0), `14:40` = c(0, 0, 0, 0, 1, 0), `14:50` = c(0,
0, 0, 0, 0, 0), `15:00` = c(0, 0, 0, 0, 0, 0), `15:10` = c(0,
2, 0, 0, 0, 0), `15:20` = c(0, 2, 0, 0, 1, 0), `15:30` = c(0,
2, 0, 0, 1, 1), `15:40` = c(0, 2, 0, 0, 1, 0), `15:50` = c(0,
2, 0, 0, 1, 0), `16:00` = c(0, 2, 0, 0, 1, 0), `16:10` = c(0,
2, 0, 0, 1, 0), `16:20` = c(2, 2, 0, 0, 1, 0), `16:30` = c(2,
2, 0, 0, 1, 2), `16:40` = c(2, 2, 0, 0, 1, 1), `16:50` = c(2,
2, 0, 0, 0, 1), `17:00` = c(0, 2, 0, 0, 2, 0), `17:10` = c(0,
0, 0, 0, 2, 0), `17:20` = c(0, 0, 0, 0, 2, 0), `17:30` = c(0,
0, 0, 0, 2, 0), `17:40` = c(0, 0, 0, 0, 0, 0), `17:50` = c(0,
0, 0, 0, 0, 0), `18:00` = c(0, 2, 0, 0, 0, 2), `18:10` = c(0,
2, 0, 0, 0, 2), `18:20` = c(0, 0, 0, 0, 2, 2), `18:30` = c(0,
0, 0, 0, 0, 2), `18:40` = c(0, 0, 0, 0, 0, 2), `18:50` = c(1,
0, 0, 0, 0, 2), `19:00` = c(1, 0, 0, 1, 1, 0), `19:10` = c(1,
0, 0, 1, 1, 0), `19:20` = c(1, 0, 0, 1, 1, 0), `19:30` = c(1,
0, 1, 1, 1, 0), `19:40` = c(1, 0, 1, 1, 1, 1), `19:50` = c(1,
0, 1, 1, 1, 1), `20:00` = c(0, 0, 1, 1, 1, 1), `20:10` = c(0,
0, 1, 1, 1, 1), `20:20` = c(0, 0, 1, 1, 1, 1), `20:30` = c(0,
1, 2, 1, 1, 1), `20:40` = c(0, 1, 0, 1, 1, 1), `20:50` = c(0,
1, 0, 1, 1, 1), `21:00` = c(0, 1, 0, 1, 1, 1), `21:10` = c(0,
1, 0, 0, 1, 1), `21:20` = c(0, 1, 0, 0, 1, 1), `21:30` = c(0,
1, 1, 0, 1, 1), `21:40` = c(0, 1, 1, 0, 1, 1), `21:50` = c(0,
1, 1, 0, 0, 1), `22:00` = c(0, 1, 1, 0, 0, 0), `22:10` = c(0,
1, 0, 0, 0, 0), `22:20` = c(0, 1, 0, 0, 0, 0), `22:30` = c(0,
1, 0, 0, 0, 0), `22:40` = c(0, 1, 0, 0, 0, 0), `22:50` = c(0,
1, 0, 0, 0, 0), `23:00` = c(0, 0, 0, 0, 1, 0), `23:10` = c(0,
0, 0, 0, 0, 1), `23:20` = c(0, 0, 0, 0, 0, 1), `23:30` = c(0,
0, 0, 0, 0, 1), `23:40` = c(0, 0, 0, 0, 0, 1), `23:50` = c(0,
0, 0, 0, 0, 0), `00:00` = c(0, 0, 0, 0, 0, 0), `00:10` = c(0,
0, 0, 0, 0, 0), `00:20` = c(0, 0, 0, 0, 0, 0), `00:30` = c(0,
0, 0, 0, 0, 0), `00:40` = c(0, 0, 0, 0, 0, 0), `00:50` = c(0,
0, 0, 0, 0, 0), `01:00` = c(0, 0, 0, 0, 0, 0), `01:10` = c(0,
0, 0, 0, 0, 0), `01:20` = c(0, 0, 0, 0, 0, 0), `01:30` = c(0,
0, 0, 0, 0, 0), `01:40` = c(0, 0, 0, 0, 0, 0), `01:50` = c(0,
0, 0, 0, 0, 0), `02:00` = c(0, 0, 0, 0, 0, 0), `02:10` = c(0,
0, 0, 0, 0, 0), `02:20` = c(0, 0, 0, 0, 0, 0), `02:30` = c(0,
0, 0, 0, 0, 0), `02:40` = c(0, 0, 0, 0, 0, 0), `02:50` = c(0,
0, 0, 0, 0, 0), `03:00` = c(0, 0, 0, 0, 0, 0), `03:10` = c(0,
0, 0, 0, 0, 0), `03:20` = c(0, 0, 0, 0, 0, 0), `03:30` = c(0,
0, 0, 0, 0, 0), `03:40` = c(0, 0, 0, 0, 0, 0), `03:50` = c(0,
0, 0, 0, 0, 0)), row.names = c("1", "2", "3", "4", "5", "6"), class = "data.frame")
I managed to run hierarchical clustering but only on cases and not on time
d_distance <- dist(as.matrix(df))
plot(hclust(d_distance))
The plot that I generated
As you can see on the plot the structure end points are indexes - how can I have instead of index time (maybe transpose)? Also I would like to plot time-series cluster separately like below plot. Would dtw be better than hierarchical clustering?

convert a data frame into tensor in R

I have a data frame, which contain thousands of firms from year 1998 to 2007(each firm not necessarily have equal length of time duration). and I want to convert it into a tensor with index: firm, year, variables.
how to achieve this ?
I don't know how to extract a small part of this data set to put here for us to discuss the problem, any one know how to do it?
structure(list(year = c(1998, 1998, 1998, 1998, 1998, 1998, 1998,
1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998,
1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998,
1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998,
1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998,
1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998,
1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998), firmid = c("QB3732337",
"113810712", "618851819", "619457768", "HU5176905", "618024813",
"617883552", "105679742", "230141773", "609442909", "HU6355534",
"617882832", "60088847X", "617881725", "618403506", "145665417",
"216582994", "14405557X", "103400293", "607369138", "617737408",
"177160683", "142418787", "245560903", "15112238X", "617880650",
"618354214", "226040099", "128955068", "61156047X", "617481385",
"226091312", "190380322", "617880255", "611567073", "GD6120293",
"617876061", "617875739", "126925703", "221461337", "614801582",
"617883931", "618129447", "101713181", "611209524", "617883974",
"706747835", "242727819", "608934944", "619723894", "139432377",
"152102399", "617866832", "614407067", "607282008", "117944574",
"617865629", "618354185", "228791275", "134789270", "113810632",
"EJ2468142", "169832427", "115319804", "602003890", "211551128",
"193929448", "105044755", "704448663", "21960081X"), provinceid = c(420000,
140000, 440000, 450000, 420000, 440000, 440000, 130000, 650000,
330000, 420000, 440000, 120000, 440000, 440000, 330000, 530000,
330000, 120000, 310000, 440000, 410000, 320000, 230000, 340000,
440000, 440000, 620000, 230000, 350000, 440000, 620000, 440000,
440000, 350000, 440000, 440000, 440000, 220000, 610000, 410000,
440000, 440000, 110000, 350000, 440000, 410000, 210000, 320000,
450000, 320000, 340000, 440000, 410000, 310000, 210000, 440000,
440000, 650000, 320000, 140000, 330000, 370000, 150000, 140000,
510000, 440000, 130000, 330000, 530000), industrycode2 = c(3400,
3500, 2900, 1900, 1500, 2200, 1400, 3600, 1500, 4000, 1500, 3000,
2400, 2100, 1800, 1300, 2900, 4000, 3600, 2300, 1900, 3700, 2200,
3400, 2600, 1800, 2400, 1300, 1800, 2400, 1900, 3100, 1400, 1700,
2400, 3400, 2600, 2600, 1400, 2600, 3100, 1800, 3100, 1400, 2600,
3300, 1300, 2200, 3000, 3100, 4100, 3000, 1500, 1400, 3500, 3500,
3700, 2600, 2300, 3200, 1700, 4000, 4200, 3600, 2500, 1300, 3500,
3600, 1700, 2600), sales = c(45860, 4050, 17034, 154721, 267,
7703, 47572, 846, 267, 5132, 1767, 8354, 5668, 75330, 8935, 1958,
154721, 13072, 10654, 40505, 20637, 1510, 12884, 10753, 45542,
5286, 27492, 267, 1557, 872, 10892, 1386, 32054, 7290, 6903,
8263, 6996, 12848, 460, 44823, 52000, 16353, 6225, 750, 10863,
35110, 10638, 154721, 18100, 16773, 2415, 8686, 14362, 19831,
46958, 1340, 79855, 61817, 1114, 154721, 7030, 9923, 599, 4060,
154721, 361, 72986, 445, 18080, 3682), cogs = c(44780, 2430,
13839, 144088, 246, 9310, 37863, 495, 52, 4170, 1582, 7416, 3964,
58090, 8639, 1667, 211569, 8066, 4960, 28399, 19831, 1280, 12564,
7540, 37058, 1855, 25519, 70, 1539, 700, 10398, 1190, 25048,
6779, 5500, 7656, 6078, 12519, 370, 39479, 26816, 16586, 6061,
534, 10064, 32783, 8519, 308403, 16000, 23833, 1282, 6918, 12097,
15663, 35182, 768, 76005, 58528, 775, 4362410, 5770, 9040, 417,
2630, 167668, 290, 64038, 306, 15898, 2511), inventory = c(2740,
280, 1950, 46914, 711, 9552, 3984, 4989, 497, 1249, 0, 4336,
1450, 3000, 284, 0, 134404, 5881, 9347, 4818, 1744, 377, 376,
12238, 11669, 835, 17355, 226, 1370, 360, 434, 1089, 12154, 4000,
2388, 7257, 1547, 808, 137, 5920, 8750, 5600, 179, 151, 1321,
3454, 5479, 135303, 7480, 5943, 565, 850, 3032, 1207, 11307,
474, 2574, 26104, 519, 604670, 400, 501, 106, 7040, 43568, 711,
6763, 558, 444, 564), fixedasset = c(8580, 460, 6750, 28874,
2878, 25901, 43081, 3065, 198, 1163, 2140, 8484, 1688, 6900,
631, 1290, 849666, 6545, 10075, 6658, 3089, 581, 114, 22299,
22499, 3967, 54033, 1106, 883, 435, 404, 1712, 29329, 7952, 3176,
10272, 533, 138, 854, 14151, 64252, 10672, 5023, 62, 213, 1068,
572, 1115119, 14090, 71451, 13017, 5390, 6657, 5840, 31943, 80,
26145, 41905, 517, 3801800, 1164, 1725, 220, 15550, 72000, 825,
4697, 1913, 735, 3415), totalasset = c(13610, 3220, 16090, 166501,
14319, 44739, 78920, 10394, 823, 4698, 3101, 25325, 4221, 14900,
3118, 1724, 1091978, 28912, 28272, 27222, 10000, 1178, 1413,
42394, 52156, 11284, 89191, 1582, 6514, 3531, 1495, 3978, 54618,
22352, 13733, 27088, 3247, 1450, 1164, 33419, 129957, 18000,
20163, 266, 3728, 15286, 17337, 1718823, 25650, 94590, 15418,
8430, 12425, 10060, 75576, 991, 46436, 75405, 1973, 5976610,
4604, 5720, 1327, 43440, 248715, 1710, 31723, 2799, 4616, 5417
), stateshare = c(0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0,
1, 0.518154501914978, 1, 1, 0, 0, 1, 1, 0.699999988079071, 0.831946730613708,
0, 0, 1, 1, 0, 0, 1, 0.200000002980232, 0, 0, 0, 0, 0, 1, 1,
0.400013834238052, 0, 0, 0.25, 0, 0.400000005960464, 0, 1, 0,
0, 1, 1, 0.823567152023315, 0, 0, 1, 0.742925107479095, 0, 1,
0.661562383174896, 1, 0, 1, 1, 0, 1, 0, 1, 0.749309420585632,
1), foreignshare = c(0.571428596973419, 0, 1, 0.385093629360199,
0, 0.5, 1, 0, 0, 0.30011722445488, 0, 0.699992954730988, 1, 1,
0.5, 0, 0, 0, 0, 1, 0.300029307603836, 0, 0, 0, 0, 0.782930612564087,
1, 0, 0, 1, 1, 0, 0.416000008583069, 1, 0.899999976158142, 1,
1, 1, 0, 0, 0.59998619556427, 0.700012564659119, 0.233907759189606,
0, 1, 0.600000023841858, 1, 0, 0.509767174720764, 0.299807518720627,
0, 0, 0, 1, 0.531239151954651, 0, 0.257074922323227, 1, 0, 0.245536029338837,
0, 0.285785287618637, 0, 0, 0.245354115962982, 0, 0.219982624053955,
0, 0.25069060921669, 0), privateshare = c(0.428571432828903,
0, 0, 0.614906370639801, 0, 0.5, 0, 0, 0, 0.699882745742798,
0, 0.300007075071335, 0, 0, 0.5, 0, 0.481845527887344, 0, 0,
0, 0.699970722198486, 0, 0, 0.300000011920929, 0.168053239583969,
0.217069372534752, 0, 0, 0, 0, 0, 0, 0.38400000333786, 0, 0.100000001490116,
0, 0, 0, 0, 0, 0, 0.299987435340881, 0.766092240810394, 0.75,
0, 0, 0, 0, 0.490232825279236, 0.700192511081696, 0, 0, 0.176432847976685,
0, 0.468760877847672, 0, 0, 0, 0, 0.0929015725851059, 0, 0.714214682579041,
0, 0, 0.754645884037018, 0, 0.780017375946045, 0, 0, 0), stateown = c(0,
1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1,
1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1,
0, 1, 0, 1, 0, 1), foreignown = c(0, 0, 1, 0, 0, 0, 1, 0, 0,
0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), privateown = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0), mixown = c(1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0,
1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0,
1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1,
0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0), stateonly = c(0,
1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1,
1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1,
0, 1, 0, 1, 0, 0), mixonly = c(1, 0, 0, 1, 0, 1, 0, 0, 0, 1,
0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0,
1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0), foreignonly = c(0,
0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0,
0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0), privateonly = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), gs = c(0,
1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1,
1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1,
0, 1, 0, 1, 0, 1), gm = c(1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1,
0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1,
0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0,
1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0), gf = c(0, 0,
1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0,
1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0), privatize = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), IR = c(0.061188030987978,
0.115226335823536, 0.140906140208244, 0.325592696666718, 2.8902440071106,
1.02599358558655, 0.105221457779408, 3.07859539985657, 3.07859539985657,
0.299520373344421, 0.0191550496965647, 0.584681749343872, 0.365792125463486,
0.0516440011560917, 0.0328741744160652, 0.0191550496965647, 0.635272681713104,
0.729109823703766, 1.8844758272171, 0.169653862714767, 0.0879431217908859,
0.294531255960464, 0.0299267750233412, 1.62307691574097, 0.314884781837463,
0.450134783983231, 0.680081486701965, 3.07859539985657, 0.890188455581665,
0.514285743236542, 0.0417387969791889, 0.915126025676727, 0.485228359699249,
0.590057551860809, 0.434181809425354, 0.947884023189545, 0.254524528980255,
0.0645418986678123, 0.370270282030106, 0.14995314180851, 0.326297730207443,
0.337634146213531, 0.0295330807566643, 0.282771527767181, 0.131259933114052,
0.105359487235546, 0.643150627613068, 0.438721418380737, 0.467500001192093,
0.249360129237175, 0.44071763753891, 0.12286788225174, 0.250640660524368,
0.0770605877041817, 0.321385949850082, 0.6171875, 0.0338661931455135,
0.446008741855621, 0.669677436351776, 0.138609156012535, 0.0693240910768509,
0.0554203540086746, 0.254196643829346, 2.67680597305298, 0.259846836328506,
2.4517240524292, 0.10560917109251, 1.82352936267853, 0.0279280412942171,
0.224611714482307), GM = c(0.0241179093718529, 0.666666686534882,
0.230869278311729, 0.25, 0, -0.0678684562444687, 0.256424486637115,
0.709090888500214, 0.0769230797886848, 0.230695441365242, 0.116940580308437,
0.126483276486397, 0.429868817329407, 0.296780854463577, 0.0342632234096527,
0.174565091729164, 0.25985848903656, 0.620629787445068, 0.807692289352417,
0.426282614469528, 0.0406434386968613, 0.1796875, 0.0254695955663919,
0.426127314567566, 0.228938415646553, 0.807692289352417, 0.0773149430751801,
-0.0678684562444687, 0.0116959065198898, 0.245714291930199, 0.0475091375410557,
0.164705887436867, 0.279702961444855, 0.0753798484802246, 0.255090922117233,
0.0792842209339142, 0.151036530733109, 0.0262800548225641, 0.243243247270584,
0.135363101959229, 0.807692289352417, -0.0140479924157262, 0.0270582418888807,
0.404494374990463, 0.0793918892741203, 0.0709819123148918, 0.24873811006546,
0.0649604573845863, 0.131249994039536, -0.0678684562444687, 0.807692289352417,
0.255565196275711, 0.187236502766609, 0.266104847192764, 0.334716618061066,
0.744791686534882, 0.0506545640528202, 0.0561953261494637, 0.437419354915619,
0.0327548310160637, 0.218370884656906, 0.0976769924163818, 0.436450839042664,
0.54372626543045, 0.595140397548676, 0.244827583432198, 0.139729529619217,
0.454248368740082, 0.137249961495399, 0.466348081827164), CI = c(0.630418837070465,
0.142857149243355, 0.41951522231102, 0.173416376113892, 0.200991690158844,
0.578935623168945, 0.545881927013397, 0.294881671667099, 0.240583226084709,
0.247552156448364, 0.690099954605103, 0.335004925727844, 0.399905234575272,
0.463087260723114, 0.202373310923576, 0.748259842395782, 0.778098106384277,
0.226376593112946, 0.356359660625458, 0.24458159506321, 0.308899998664856,
0.493208825588226, 0.0846758112311363, 0.52599424123764, 0.431378930807114,
0.351559728384018, 0.605812251567841, 0.699115037918091, 0.135554194450378,
0.123194560408592, 0.270234107971191, 0.430367022752762, 0.536984145641327,
0.355762362480164, 0.231267750263214, 0.379208505153656, 0.164151519536972,
0.0951724126935005, 0.733676970005035, 0.423441767692566, 0.494409680366516,
0.59288889169693, 0.24911966919899, 0.233082711696625, 0.0846758112311363,
0.0846758112311363, 0.0846758112311363, 0.648768961429596, 0.549317717552185,
0.755375862121582, 0.780426323413849, 0.639383137226105, 0.535774648189545,
0.580516874790192, 0.422660619020462, 0.0846758112311363, 0.563032984733582,
0.555732369422913, 0.262037515640259, 0.636113107204437, 0.252823621034622,
0.301573425531387, 0.165787488222122, 0.357965022325516, 0.289487957954407,
0.482456147670746, 0.148062914609909, 0.683458387851715, 0.159228771924973,
0.63042277097702), WACC = c(0.0587803088128567, 0.114285714924335,
0.0474829077720642, 0.089603066444397, 0, -0.0595453642308712,
-0.0409021787345409, 0.00990956369787455, -0.0255164038389921,
0.0834397599101067, 0.00515962578356266, -0.00363277364522219,
0.0127931768074632, 0.175039649009705, 0.0102629894390702, -0.00986078940331936,
-0.00165113247931004, 0.0121057005599141, 0.0084889642894268,
0.175039649009705, -0.0706999972462654, 0.000848896452225745,
0.0481245554983616, 0.0177619475871325, -0.00661477027460933,
-0.0334987565875053, -0.0147324288263917, -0.095448799431324,
0.00445194961503148, 0.0218068547546864, 0.0675585269927979,
0.0175967831164598, 0.0445274449884892, -0.00881352834403515,
0.0546129755675793, -0.0589929111301899, 0.0344933792948723,
0.0675862058997154, 0.0180412363260984, 0.028426943346858, 0.0992020443081856,
-0.016499999910593, -0.0906611084938049, 0.0338345877826214,
-0.103832200169563, 0.0640455335378647, 0.0919997692108154, -0.002492984989658,
0.0179337225854397, -0.103832200169563, -0.035867165774107, 0.0543297752737999,
0.0449094548821449, 0.120576545596123, 0.0157457403838634, -0.0847628638148308,
0.0742096677422523, -0.00257277372293174, 0.0309173855930567,
0.0231686513870955, 0.116203308105469, 0.0748251751065254, 0.11379050463438,
-0.024171270430088, 0.00290694180876017, -0.0760233923792839,
0.166220098733902, 0.0178635232150555, 0.175039649009705, 0.0304596647620201
), Salesgrowth = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), Export = c(0, 0, 0.998237371444702, 0.726109445095062,
0, 0.00252844509668648, 0.895112693309784, 0, 0, 0.0362807661294937,
0, 0.348896056413651, 1, 0.88113659620285, 0.868606626987457,
0, 0, 0, 0.0201816353946924, 0, 1, 0, 0, 0, 0.195195838809013,
1, 0.926722705364227, 0, 0, 0.743137240409851, 1, 0, 0.0486678741872311,
0.944032907485962, 0.501250028610229, 0, 0.999022483825684, 0.996436417102814,
0, 0, 0, 0.988813638687134, 0, 0, 1, 0.967754900455475, 0, 0,
0, 0.00271074729971588, 0, 0, 0.0384709499776363, 0, 0.0599414147436619,
0, 0, 1, 0, 0.000741015828680247, 0, 0.870000004768372, 0, 0,
0.905181586742401, 0, 0.599265575408936, 0, 0.931351482868195,
0), Leverage = c(14.4659090042114, 0.483870953321457, 0.5306316614151,
5.99260854721069, 21.3162212371826, 0.802828848361969, 1.31172561645508,
2.2685534954071, 0.796943247318268, 1.39327561855316, 2.87625002861023,
1.1588100194931, 0.973352015018463, 0.183956876397133, 0.769580006599426,
0.289454013109207, 5.48250532150269, 0.52344822883606, 0.197103783488274,
0.183956876397133, 0.570105195045471, 3.6015625, 3.71000003814697,
4.99886798858643, 5.34887409210205, 1.4610687494278, 0.733816742897034,
1.18206894397736, 21.3162212371826, 1.80238091945648, 1.5820380449295,
3.65263152122498, 0.705373585224152, 0.914846241474152, 0.916945815086365,
1.07761931419373, 0.183956876397133, 0.361502349376678, 2.07936501502991,
2.1812469959259, 0.760839521884918, 3.46650123596191, 1.53558850288391,
0.330000013113022, 3.10121011734009, 0.874662756919861, 1.48345506191254,
1.28475737571716, 1.05200004577637, 7.94806528091431, 0.203309133648872,
1.210857629776, 1.16275024414062, 2.03012037277222, 0.476988017559052,
0.45521292090416, 1.75912058353424, 0.23635022342205, 0.342176884412766,
0.406383603811264, 0.641940057277679, 1, 1.0321592092514, 0.459677428007126,
3.95616054534912, 4.42857122421265, 1.85715568065643, 3.64950156211853,
0.515927731990814, 0.516942024230957), Current = c(0.642147123813629,
0.260168313980103, 0.600883364677429, 0.956995725631714, 0.260168313980103,
1.23653173446655, 1.40639424324036, 1.07675218582153, 0.260168313980103,
0.801113069057465, 2.81943321228027, 0.843509554862976, 1.13398694992065,
0.260168313980103, 0.545235216617584, 2.81943321228027, 1.19198870658875,
0.474293291568756, 0.260168313980103, 0.260168313980103, 0.401427298784256,
1.21140944957733, 0.326404929161072, 1.71450614929199, 0.987657248973846,
1.09765684604645, 1.29162395000458, 1.80042016506195, 0.98863410949707,
0.73352712392807, 0.839596688747406, 1.08649599552155, 0.746986508369446,
0.519444465637207, 0.607772707939148, 0.260168313980103, 0.260168313980103,
0.293445110321045, 2.21290326118469, 0.673032999038696, 0.964383006095886,
2.21746039390564, 0.470989525318146, 0.323529422283173, 1.15627562999725,
0.506606042385101, 0.626914441585541, 1.53568696975708, 1.02733683586121,
2.81943321228027, 1.05705952644348, 1.7860734462738, 1.1391396522522,
2.3782639503479, 0.665584921836853, 0.340285390615463, 1.23362839221954,
0.490639895200729, 0.345467031002045, 0.696641504764557, 0.268895357847214,
0.741701245307922, 0.433604329824448, 0.887159526348114, 1.37459933757782,
0.593891382217407, 0.616396367549896, 2.4796838760376, 0.42551463842392,
0.725274741649628), Cover = c(0.649999976158142, 0.112769484519958,
0, 0.5163214802742, NA, -0.111111111938953, 0.976295828819275,
1, -0.461538463830948, 0.249363869428635, 0, -1.61016952991486,
0, 0.0135302441194654, 0.161290317773819, -0.396551728248596,
-1.61016952991486, 1.04938268661499, 0.804794549942017, -0.00240922393277287,
-0.0130399344488978, 0, 0.267605632543564, 0.831447958946228,
1.6728972196579, 0, -0.00519031146541238, 0, -0.0773333311080933,
0.862068951129913, -0.0281690135598183, 0.32051283121109, 0.332811266183853,
0, 0.304878056049347, 0, 0, 0, -0.0192307699471712, 0.822123885154724,
0.376334100961685, -0.10884353518486, -0.86366331577301, 0, -0.0272628143429756,
0.403465360403061, -0.00689655169844627, -0.935179531574249,
1.6728972196579, 0, -0.215469613671303, 0.818540453910828, 0.538461565971375,
0, 0.861155688762665, 0.0506329126656055, 0.00725478818640113,
0.204545453190804, -0.0206896550953388, 0.640307724475861, 0.0638686120510101,
0.584474861621857, -0.0592105276882648, 0, 0, -0.1875, 0.520370066165924,
0, 0.020833333954215, 0.528301894664764), Bank = c(1, 1, 0, 1,
0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1,
0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1,
0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1,
0, 1, 1), Inctaxrate = c(0.321428567171097, 0.330000013113022,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.200000002980232,
0, 0, 0, 0, 0, 0.333333343267441, 0, 0, 0, 0, 0, 0, 0, 0.266666680574417,
0.121365360915661, 0, 0.122222222387791, 0, 0, 0, 0.333333343267441,
0.333333343267441, 0.0750062316656113, 0, 0, 0.111111111938953,
0, 0.120408162474632, 0, 0, 0, 0, 0, 0.279069781303406, 0.244000002741814,
0, 0, 0, 0.33002045750618, 0, 0, 0.0624906569719315, 0.330000013113022,
0, 0.324999988079071, 0, 0.149377599358559, 0, 0.0553662702441216,
0, 0, 0.113207548856735), ROA = c(0.0205731075257063, 0.0931676998734474,
0.0474829077720642, 0.0433390773832798, 0, -0.0661615133285522,
-0.0800430849194527, 0, -0.0328068025410175, 0.0625798180699348,
0.00515962578356266, -0.0123593285679817, 0.0127931768074632,
0.15384615957737, 0.0070558050647378, -0.0232018567621708, -0.00898461323231459,
0.000345877138897777, 0.00017685342754703, 0.15384615957737,
-0.0723000019788742, 0.000848896452225745, 0.0346779897809029,
0.000424588390160352, -0.050157219171524, -0.0334987603127956,
-0.0147996991872787, -0.095448799431324, 0, 0.000566411763429642,
0.0688963234424591, 0.011312217451632, 0.0289648100733757, -0.00881352927535772,
0.0327677838504314, -0.0589929111301899, 0.0344933792948723,
0.0675862058997154, 0.0171821303665638, 0.000628385052550584,
0.061758890748024, -0.0173888895660639, -0.126888483762741, 0.0338345877826214,
-0.126888483762741, 0.0320554748177528, 0.0926342532038689, -0.00428083632141352,
0.00701754400506616, -0.107918381690979, -0.0434557013213634,
0.00510083045810461, 0.0201207250356674, 0.120576538145542, 0.00154811050742865,
-0.0807265415787697, 0.0736712887883186, -0.00233406270854175,
0.0324379131197929, 0.0111919632181525, 0.108601219952106, 0.0300699304789305,
0.120572723448277, -0.024171270430088, 0.00290694157592952, -0.0900584831833839,
0.074015699326992, 0.0178635232150555, 0.15384615957737, 0.00978401303291321
), ROS = c(0.00610553845763206, 0.0740740746259689, 0.0448514744639397,
0.0400644056499004, 0, -0.384265869855881, -0.132788196206093,
0, -0.482142865657806, 0.0572876073420048, 0.00905489549040794,
-0.0374670810997486, 0.00952717009931803, 0.14293497800827, 0.00246222713030875,
-0.0204290095716715, -0.0368077680468559, 0.000764993892516941,
0.000469307298772037, 0.14293497800827, -0.0350341610610485,
0.000662251666653901, 0.00380316679365933, 0.00167395151220262,
-0.057441484183073, -0.0715096518397331, -0.0480139665305614,
-0.681996643543243, 0, 0.00229357788339257, 0.00945648178458214,
0.032467532902956, 0.0493542142212391, -0.0270233191549778, 0.0651890486478806,
-0.193392232060432, 0.0160091482102871, 0.00762764643877745,
0.0434782616794109, 0.000468509475467727, 0.14293497800827, -0.0191402193158865,
-0.527710855007172, 0.0120000001043081, -0.0867163762450218,
0.013956137932837, 0.14293497800827, -0.0224030781537294, 0.00994475092738867,
-0.608597159385681, -0.277432709932327, 0.00495049497112632,
0.0174070466309786, 0.0611668601632118, 0.00249158823862672,
-0.0597014911472797, 0.0428401492536068, -0.00284711318090558,
0.0574506297707558, 0.014846958220005, 0.0711237564682961, 0.0173334684222937,
0.14293497800827, -0.25862067937851, 0.00270326854661107, -0.426592797040939,
0.0321705527603626, 0.112359553575516, 0.0831858441233635, 0.0143943512812257
), num_ID = c(110811, 5231, 56906, 57829, 109353, 53870, 53447,
3268, 31728, 44842, 109431, 53437, 35768, 53429, 55421, 12707,
28753, 12371, 2213, 40407, 52997, 20414, 11831, 32734, 13940,
53414, 55153, 30755, 8398, 47905, 51851, 30780, 23354, 53407,
48003, 107459, 53394, 53390, 7955, 29718, 51021, 53453, 54348,
597, 46440, 53454, 62729, 32489, 43867, 57861, 11030, 14104,
53370, 50914, 39352, 5956, 53364, 55151, 31386, 9852, 5230, 106943,
18825, 5561, 36645, 27786, 24240, 2994, 61769, 29396)), .Names = c("year",
"firmid", "provinceid", "industrycode2", "sales", "cogs", "inventory",
"fixedasset", "totalasset", "stateshare", "foreignshare", "privateshare",
"stateown", "foreignown", "privateown", "mixown", "stateonly",
"mixonly", "foreignonly", "privateonly", "gs", "gm", "gf", "privatize",
"IR", "GM", "CI", "WACC", "Salesgrowth", "Export", "Leverage",
"Current", "Cover", "Bank", "Inctaxrate", "ROA", "ROS", "num_ID"
), datalabel = "", time.stamp = "25 Sep 2016 11:04", formats = c("%9.0g",
"%9s", "%9.0g", "%9.0g", "%12.0g", "%12.0g", "%12.0g", "%12.0g",
"%12.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g",
"%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g",
"%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g",
"%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g",
"%9.0g", "%9.0g"), types = c(65527L, 9L, 65527L, 65527L, 65526L,
65526L, 65526L, 65526L, 65526L, 65527L, 65527L, 65527L, 65527L,
65527L, 65527L, 65527L, 65527L, 65527L, 65527L, 65527L, 65527L,
65527L, 65527L, 65527L, 65527L, 65527L, 65527L, 65527L, 65527L,
65527L, 65527L, 65527L, 65527L, 65527L, 65527L, 65527L, 65527L,
65527L), val.labels = structure(c("", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", ""), .Names = c("",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "")), var.labels = c("", "<b7><a8><c8><U+02F4><fa><c2><eb>",
"", "", "<b2><fa><U+01B7><cf><fa><ca><db><ca><d5><c8><eb>", "<b2><fa><U+01B7><cf><fa><ca><U+06F3><U+0271><be>",
"<b4><e6><bb><f5>", "<b9><U+0336><a8><d7><U+02B2><fa><ba><U+03FC><c6>",
"<d7><U+02B2><fa><d7><U+073C><c6>", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "group(firmid)"), version = 118L, label.table = list(), expansion.fields = list(), byteorder = "LSF", row.names = c(NA,
70L), class = "data.frame")
data added, now I know I can change a three modes array into a three modes tensor, so we could also consider how to change the current data frame to a three modes array with dimensions "firmid","year",and "all other co-variates except these two"
Or image a three dimensions reference system X-Y-Z, I want X to be firms, Y to be co-variates, Z to be years
any suggestions?

Resources