Related
I have a question for my project in R-Studio.
In my research I assume that the presence of peacekeepers can positively influence the political representation of women. In this regard, I have the independent variable "pko_dummy" (0 = no peacekeeping mission, 1 = peacekeeping mission) and the two dependent variables "parl_wom.per" and "exe_wom.per", which measure the percentage of women elected to parliament and ministerial cabinets respectively.
The dataset in question includes all African countries that experienced a war between 1990 and 2018. I would like to run a differences-in-difference model. The treatment group would be the war-torn countries with a PKO, the control group the war-torn countries without a PKO. The problem is that the year of intervention of peacekeeping missions is different from country to country, so I don't know how to create a graph in R-Studio to see if the parallel trend assumption can be fulfilled.
data <- structure(list(cown = c(432, 432, 432, 432, 432, 432, 432, 432,
432, 432),
year = c(1990, 1991, 1992, 1993, 1994, 1995, 1996,
1997, 1998, 1999),
intensity_level = c(1, 1, 0, 0, 1, 0, 0, 0,
0, 0),
pa_dummy = c(0, 1, 1, 0, 0, 0, 0, 0, 0, 0),
pko_dummy = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
parl_wom.per = c(NA, NA, 0.023, 0.023, 0.023, 0.023,
0.023, 0.122449, 0.122449, 0.122449),
exe_wom.per = c(0.0588235, 0.1052632, 0.0526316,
0.0952381, 0.1111111, 0.0555556,
0.125, 0.1176471, 0.2608696, 0.2727273),
gender_mean = c(0, 0, 1.75, 0, 0, 0, 0, 0, 0, 0),
gender_art = c(0, 0, 7, 0, 0, 0, 0, 0, 0, 0),
female_pko.per = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
lf_wom.per = c(0.60855, 0.60834, 0.6082, 0.60815,
0.6082, 0.60838, 0.60806, 0.60798,
0.60804, 0.60811),
ss.per = c(0.0679799, 0.0723098, 0.0827134, 0.0837933,
0.0957365, 0.1073224, 0.1127522, 0.1228388,
0.1336761, 0.1510765),
fdi.per = c(0.0021364, 0.0004424, -0.0077276, 0.001441,
0.0083661, 0.0411724, 0.009786, 0.0275705,
0.0032724, 0.0090061),
ele.sy = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
polity = c(-7, NA, 4, 4, 4, 4, 4, 5, 5, 5),
mus.per = c(0.944, 0.944, 0.944, 0.944, 0.944,
0.944, 0.944, 0.944, 0.944, 0.944),
cons_ref = c(0, 0, 1, 1, 1, 1, 1, 1, 1, 1),
jud_ind.per = c(0.4763113, 0.5237863, 0.5575284,
0.548066, 0.548066, 0.548066, 0.548066,
0.548066, 0.548066, 0.548066)),
row.names = c(NA, -10L), class = "data.frame")
I've been using this code
ggplot(Oak, aes(x = age, fill = factor(subject_talk))) +
geom_bar(aes(y = (..count..)/sum(..count..)),position = "stack") +
xlim(18,29) +
scale_fill_manual(breaks=c("0","1"), values = scales::hue_pal()(2))
to create graphs that look like this
Recently, some graphs end up floating where NA values should be,
which I don't want.
Here's the code for 2
ggplot(Oak, aes(x = age, fill = factor(highcho))) +
geom_bar(aes(y = (..count..)/sum(..count..)),position = "stack") +
xlim(18,29) +
scale_fill_manual(breaks=c("0","1"), values = scales::hue_pal()(2))
The output is too long to set as code, I can't post it otherwise.
dput(head(Oak,20))
structure(list(studyid = structure(c(1002, 1002, 1002, 1002,
1002, 1004, 1004, 1004, 1004, 1004, 1005, 1005, 1005, 1005, 1005,
1006, 1006, 1006, 1006, 1006), label = "Subject Study ID", format.stata = "%12.0g"),
post_flu = structure(c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1,
1, 1, 1, 0, 0, 0, 0, 0), label = "Receipt of Flu Vaccine - Encounter Survey", format.stata = "%10.0g"),
post_bmi = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "BMI Test Received - Encounter Survey", format.stata = "%9.0g"),
post_bp = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Blood Pressure Test Received - Encounter Survey", format.stata = "%9.0g"),
post_dia = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Diabetes Test Received - Encounter Survey", format.stata = "%9.0g"),
post_cho = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Cholesterol Test Received - Encounter Survey", format.stata = "%9.0g"),
post_flu_sl = structure(c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1,
1, 1, 1, 1, NA, NA, NA, NA, NA), label = "Flu Shot Received (Subsidy Received) - Encounter Survey", format.stata = "%9.0g"),
post_flu_nosl = structure(c(NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, 0, 0, 0, 0, 0), label = "Flu Shot Received (No Subsidy Received) - Encounter Survey", format.stata = "%9.0g"),
post_shr_invasive = structure(c(1, 1, 1, 1, 1, 0.666666686534882,
0.666666686534882, 0.666666686534882, 0.666666686534882,
0.666666686534882, 0.333333343267441, 0.333333343267441,
0.333333343267441, 0.333333343267441, 0.333333343267441,
0, 0, 0, 0, 0), label = "Post Take-Up as Share of Invasive Services", format.stata = "%9.0g"),
post_share4 = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Post Take-Up as Share of Four Services", format.stata = "%9.0g"),
pre_bmi = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Weight and Height Selected - CTO Patient Survey", format.stata = "%8.0g"),
pre_bp = structure(c(0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Blood Pressure Selected - CTO Patient Survey", format.stata = "%8.0g"),
pre_dia = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Diabetes Selected - CTO Patient Survey", format.stata = "%8.0g"),
pre_cho = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Cholesterol Selected - CTO Patient Survey", format.stata = "%8.0g"),
pre_flu = structure(c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Flu Shot Selected - CTO Patient Survey", format.stata = "%8.0g", labels = c(No = 0,
Yes = 1, Unsure = 99), class = c("haven_labelled", "vctrs_vctr",
"double")), pre_flu_sl = structure(c(1, 1, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, NA, NA, NA, NA, NA), label = "Flu Shot Selected (Subsidy Received) - CTO Survey", format.stata = "%9.0g"),
pre_flu_nosl = structure(c(NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, 0, 0, 0, 0, 0), label = "Flu Shot Selected (No Subsidy Received) - CTO Survey", format.stata = "%9.0g"),
pre_shr_invasive = structure(c(0.333333343267441, 0.333333343267441,
0.333333343267441, 0.333333343267441, 0.333333343267441,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Pre Take-Up as Share of Invasive Services", format.stata = "%9.0g"),
pre_share4 = structure(c(0, 0, 0, 0, 0, 0.25, 0.25, 0.25,
0.25, 0.25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Pre Take-Up as Share of Four Services", format.stata = "%9.0g"),
delta_bmi = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Delta BMI: Post - Pre", format.stata = "%9.0g"),
delta_bp = structure(c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Delta Blood Pressure: Post - Pre", format.stata = "%9.0g"),
delta_dia = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Delta Diabetes: Post - Pre", format.stata = "%9.0g"),
delta_cho = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Delta Cholesterol: Post - Pre", format.stata = "%9.0g"),
delta_flu = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
1, 1, 1, 1, 0, 0, 0, 0, 0), label = "Delta Flu: Post - Pre", format.stata = "%9.0g"),
delta_shr_invasive = structure(c(0.666666686534882, 0.666666686534882,
0.666666686534882, 0.666666686534882, 0.666666686534882,
0.666666686534882, 0.666666686534882, 0.666666686534882,
0.666666686534882, 0.666666686534882, 0.333333343267441,
0.333333343267441, 0.333333343267441, 0.333333343267441,
0.333333343267441, 0, 0, 0, 0, 0), label = "Delta Take-Up as Share of Invasive Services", format.stata = "%9.0g"),
deltaind_test = structure(c(1, 1, 1, 1, 0, 1, 0, 1, 1, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0), label = "Indicator for Delta Selection - Stacked Subject X Test", format.stata = "%9.0g"),
delta_share4 = structure(c(1, 1, 1, 1, 1, 0.75, 0.75, 0.75,
0.75, 0.75, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Delta as Share of Four Services", format.stata = "%9.0g"),
friends_enrolled = structure(c(NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 0, 0, 0, 0, 0, NA, NA, NA, NA, NA), label = "Are Friends Enrolled in the Study - CTO Patient Survey", format.stata = "%8.0g", labels = c(No = 0,
Yes = 1, Unsure = 99), class = c("haven_labelled", "vctrs_vctr",
"double")), value_bmi = structure(c(25, 25, 25, 25, 25, 28,
28, 28, 28, 28, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), label = "Patient Test Value: bmi - Encounter Survey", format.stata = "%10.0g"),
value_dia = structure(c(5.9, 5.9, 5.9, 5.9, 5.9, 6.9, 6.9,
6.9, 6.9, 6.9, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), label = "Patient Test Value: hgb - Encounter Survey", format.stata = "%10.0g"),
value_cho = structure(c(208, 208, 208, 208, 208, 170, 170,
170, 170, 170, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), label = "Patient Test Value: cho - Encounter Survey", format.stata = "%10.0g"),
subject_talk = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 0, 0, 0, 0, 0), label = "Subject Tried to Talk About Other Health Problems - Encounter Survey", format.stata = "%10.0g"),
choice_care = structure(c(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2,
2, 2, 2, 2, 3, 3, 3, 3, 3), label = "How Much Choice Subj Has in Choice of Medical Care - Barber Survey", format.stata = "%22.0g", labels = c(`prefer not to answer` = -99,
`don't know` = -98, `a great deal of choice` = 1, `some choice` = 2,
`very little choice` = 3, `no choice` = 4), class = c("haven_labelled",
"vctrs_vctr", "double")), rating = structure(c(4, 4, 4, 4,
4, 5, 5, 5, 5, 5, 3, 3, 3, 3, 3, NA, NA, NA, NA, NA), label = "Experience Rating (1 = Bad, 5 = Excellent) - Subject Feedback", format.stata = "%10.0g"),
doctor_id = structure(c("BL6", "BL6", "BL6", "BL6", "BL6",
"NB8", "NB8", "NB8", "NB8", "NB8", "NB4", "NB4", "NB4", "NB4",
"NB4", "NB4", "NB4", "NB4", "NB4", "NB4"), label = "Doctor Mask ID", format.stata = "%9s"),
nonpreventive_agree = structure(c(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Doctor Notes Relating to Personal/Other Health (Coders Agree) - Encounter Survey", format.stata = "%9.0g"),
mecherror_cho = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Mechanical Error, Cholesterol Test - Encounter Survey", format.stata = "%9.0g"),
mecherror_dia = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Mechanical Error, Diabetes Test - Encounter Survey", format.stata = "%9.0g"),
value_systolic = structure(c(165, 165, 165, 165, 165, 168,
168, 168, 168, 168, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
), label = "Blood Pressure Value: Systolic - Encounter Survey", format.stata = "%9.0g"),
hyptension = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA), label = "Subj Has Hypertension, Test Value or MD Comments - Encounter Survey", format.stata = "%9.0g"),
diabetic = structure(c(0, 0, 0, 0, 0, 1, 1, 1, 1, 1, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA), label = "Subj Has Diabetes, Test Value or MD Comments - Encounter Survey", format.stata = "%9.0g"),
highcho = structure(c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA), label = "Subj Has High Cholesterol, Test Value or MD Comments - Encounter Survey", format.stata = "%9.0g"),
obese = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA), label = "Subj Is Obese, Test Value or MD Comments - Encounter Survey", format.stata = "%9.0g"),
length_visit_dr = structure(c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), label = "Doctor Visit Duration, Time Out of Waiting Room to Time Out - Encounter Survey", format.stata = "%9.0g"),
RO_tablet_assist = structure(c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), label = "RO Assisted Subject with Tablet Survey - Encounter Survey", format.stata = "%9.0g"),
yes_recommend = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, NA, NA, NA, NA, NA), label = "Patient Would Recommend Doctor - Subject Feedback", format.stata = "%9.0g"),
dr_notes = structure(c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 1, 1, 1, 1), label = "Doctor Wrote Notes in 'Notable' About Subject - Encounter Survey", format.stata = "%9.0g"),
any_health_prob = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1), label = "Subject Reported Any Health Problems (A2-A11) - Barber Survey", format.stata = "%9.0g"),
hosp_visits_2years = structure(c(NA, NA, NA, NA, NA, 3, 3,
3, 3, 3, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3), label = "Number of Hospital Visits in Last 2 Years - CTO Survey", format.stata = "%9.0g"),
ER_2years = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
1, 1, 1, 1, 7, 7, 7, 7, 7), label = "Number of ER Visits in Last 2 Years - Barber Survey", format.stata = "%9.0g"),
nights_hosp_2years = structure(c(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0), label = "Nights in the Hospital in Last 2 Years - Barber Survey", format.stata = "%9.0g"),
has_PCP = structure(c(0, 0, 0, 0, 0, -9, -9, -9, -9, -9,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1), label = "Subject Has Primary Care Provider - Barber Survey", format.stata = "%9.0g"),
uninsured = structure(c(0, 0, 0, 0, 0, 1, 1, 1, 1, 1, -9,
-9, -9, -9, -9, 0, 0, 0, 0, 0), label = "Subject is Uninsured - Barber Survey", format.stata = "%9.0g"),
ER_visits_uninsured = structure(c(NA, NA, NA, NA, NA, 0,
0, 0, 0, 0, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), label = "Number of ER Visits in Last 2 Years for Uninsured - Barber Survey", format.stata = "%9.0g"),
mistrust_5levels = structure(c(3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 1, 1, 1, 1, 1, 4, 4, 4, 4, 4), label = "Doctor Mistrust (1 is Lowest, 5 is Highest) - Barber Survey", format.stata = "%9.0g"),
med_mistrust = structure(c(2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 3, 3, 3, 3, 3), label = "Doctor Mistrust (1 is Lowest, 3 is Highest) - Barber Survey", format.stata = "%9.0g"),
age = structure(c(50, 50, 50, 50, 50, 44, 44, 44, 44, 44,
33, 33, 33, 33, 33, 35, 35, 35, 35, 35), label = "Subject Age - Barber Survey", format.stata = "%9.0g"),
age2 = structure(c(2500, 2500, 2500, 2500, 2500, 1936, 1936,
1936, 1936, 1936, 1089, 1089, 1089, 1089, 1089, 1225, 1225,
1225, 1225, 1225), label = "Subject Age Squared - Barber Survey", format.stata = "%9.0g"),
married = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Subject Is Married - Barber Survey", format.stata = "%9.0g"),
unemployed = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Subject Is Unemployed - Barber Survey", format.stata = "%9.0g"),
benefits = structure(c(0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Subject Receives DI/SSI/UB - Barber Survey", format.stata = "%9.0g"),
sl0 = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 1, 1, 1, 1), label = "Subsidy Level: $0 - CTO Survey", format.stata = "%9.0g"),
sl5 = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
1, 1, 0, 0, 0, 0, 0), label = "Subsidy Level: $5 - CTO Survey", format.stata = "%9.0g"),
sl10 = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), label = "Subsidy Level: $10 - CTO Survey", format.stata = "%9.0g"),
subsidy_level = structure(c(10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0), label = "Subsidy Level, Categorical - CTO Survey", format.stata = "%9.0g"),
black_dr = structure(c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Randomized to Black Doctor - CTO Survey", format.stata = "%9.0g"),
black0 = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doctor & Subsidy Level: $0 - CTO Survey", format.stata = "%9.0g"),
black5 = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doctor & Subsidy Level: $5 - CTO Survey", format.stata = "%9.0g"),
black10 = structure(c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doctor & Subsidy Level: $10 - CTO Survey", format.stata = "%9.0g"),
white0 = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 1, 1, 1, 1), label = "White Doctor & Subsidy Level: $0 - CTO Survey", format.stata = "%9.0g"),
white5 = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
1, 1, 1, 0, 0, 0, 0, 0), label = "White Doctor & Subsidy Level: $5 - CTO Survey", format.stata = "%9.0g"),
white10 = structure(c(0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "White Doctor & Subsidy Level: $10 - CTO Survey", format.stata = "%9.0g"),
any_subsidy = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 0, 0, 0, 0, 0), label = "Subject Received $5 or $10 Subsidy - CTO Survey", format.stata = "%9.0g"),
age5 = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), label = "Subject Age Within 5 Years of Doctor's Age - Baseline Survey", format.stata = "%9.0g"),
age10 = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), label = "Subject Age Within 10 Years of Doctor's Age - Baseline Survey", format.stata = "%9.0g"),
educ_conc = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Education Concordance: Subject Has BA or Higher - Baseline Survey", format.stata = "%9.0g"),
good_sa_health = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 0, 0, 0, 0, 0), label = "Subject Rated Health as Good, Very Good, or Excellent - Barber Survey", format.stata = "%9.0g"),
no_rec_scr_interval = structure(c(1, 1, 1, 1, 1, 0, 0, 0,
0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0), label = "Subject No Recent Screenings in Recommended Interval - Barber Survey", format.stata = "%9.0g"),
millenial = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1), label = "Subject Is Less Than 40 - Barber Survey", format.stata = "%9.0g"),
HSless = structure(c(0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 0, 0, 0, 0, 0), label = "Subject Has a High School Degree or Less - Barber Survey", format.stata = "%9.0g"),
low_income = structure(c(0, 0, 0, 0, 0, -9, -9, -9, -9, -9,
0, 0, 0, 0, 0, 1, 1, 1, 1, 1), label = "Household Has Income Below $5k/Year - Barber Survey", format.stata = "%9.0g"),
long_wait = structure(c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), label = "Subject Waited Longer Than 1 Hour to See Doctor - Barber Survey", format.stata = "%9.0g"),
high_congestion = structure(c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), label = "More Than 8 People in Waiting Room When Subject Arrived - Congestion", format.stata = "%9.0g"),
long_driv = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Commute Via Car Above Median (18 Mins) - Barber Distance", format.stata = "%9.0g"),
atrisk_cho = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
0, 0, 0, 0, 1, 1, 1, 1, 1), label = "Subject Recommended to Get Cholesterol Test - CTO Survey", format.stata = "%9.0g"),
atrisk_dia = structure(c(NA, NA, NA, NA, NA, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Subject Recommended to Get Diabetes Test - CTO Survey", format.stata = "%9.0g"),
excuses = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 1, 1, 1, 1), label = "Subject Gave Excuse for Not Receiving Services - Suubject Feedback", format.stata = "%10.0g"),
length_dr_note = structure(c(9, 9, 9, 9, 9, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 61, 61, 61, 61, 61), label = "Length (Number of Characters) of Doctor Notes - Encounter Survey", format.stata = "%9.0g"),
mentioned_PCP = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Subject Mentioned PCP in Clinic Notes - Suubject Feedback", format.stata = "%9.0g"),
bl_ER_2years = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Number of ER Visits in Last 2 Years - Barber Survey", format.stata = "%9.0g"),
bl_med_mistrust = structure(c(2, 2, 2, 2, 2, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Doctor Mistrust (1 is Lowest, 3 is Highest) - Barber Survey", format.stata = "%9.0g"),
bl_millenial = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Subject Is Less Than 40 - Barber Survey", format.stata = "%9.0g"),
bl_HSless = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Subject Has a High School Degree or Less - Barber Survey", format.stata = "%9.0g"),
bl_low_income = structure(c(0, 0, 0, 0, 0, NA, NA, NA, NA,
NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Household Has Income Below $5k/Year - Barber Survey", format.stata = "%9.0g"),
bl_long_wait = structure(c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), label = "Black Doc * Subject Waited Longer Than 1 Hour to See Doctor - Barber Survey", format.stata = "%9.0g"),
bl_long_driv = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Commute Via Car Above Median (18 Mins) - Barber Distance", format.stata = "%9.0g"),
bl_high_congest = structure(c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), label = "Black Doc * More Than 8 People in Waiting Room When Subject Arrived - Congestion", format.stata = "%9.0g"),
bl_atrisk_cho = structure(c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Subject Recommended to Get Cholesterol Test - CTO Survey", format.stata = "%9.0g"),
bl_atrisk_dia = structure(c(NA, NA, NA, NA, NA, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Subject Recommended to Get Diabetes Test - CTO Survey", format.stata = "%9.0g"),
bl_no_rec_scr_interval = structure(c(1, 1, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Subject No Recent Screenings in Recommended Interval - Barber Survey", format.stata = "%9.0g"),
bl_ER_visits_uninsured = structure(c(NA, NA, NA, NA, NA,
0, 0, 0, 0, 0, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), label = "Black Doc * Number of ER Visits in Last 2 Years for Uninsured - Barber Survey", format.stata = "%9.0g"),
bl_educ_conc = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Education Concordance: Subject Has BA or Higher - Baseline Survey", format.stata = "%9.0g"),
bl_age5 = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Subject Age Within 5 Years of Doctor's Age - Baseline Survey", format.stata = "%9.0g"),
bl_age10 = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Subject Age Within 10 Years of Doctor's Age - Baseline Survey", format.stata = "%9.0g"),
bl_sl10 = structure(c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Subsidy Level: $10 - CTO Survey", format.stata = "%9.0g"),
RO_id = structure(c(6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3), label = "Blind ID, Reception Officer - Encounter Survey", format.stata = "%9.0g"),
location_id = structure(c(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9), label = "Blind ID, Recruitment Location - Barber Survey", format.stata = "%9.0g"),
date_visit_id = structure(c(2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1), label = "Blind ID, Date of Clinic Visit - Encounter Survey", format.stata = "%9.0g"),
tag = structure(c(0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
1, 0, 0, 0, 0, 1, 0), label = "Tag for Study ID", format.stata = "%8.0g"),
bmi_test = structure(c(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0,
0, 0, 0, 1, 0, 0, 0, 0), label = "Tag for BMI Test", format.stata = "%9.0g"),
bp_test = structure(c(0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1,
0, 0, 0, 0, 1, 0, 0, 0), label = "Tag for Blood Pressure Test", format.stata = "%9.0g"),
dia_test = structure(c(0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0,
1, 0, 0, 0, 0, 1, 0, 0), label = "Tag for Diabetes Test", format.stata = "%9.0g"),
cho_test = structure(c(0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0,
0, 1, 0, 0, 0, 0, 1, 0), label = "Tag for Cholesterol Test", format.stata = "%9.0g"),
flu_test = structure(c(0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0,
0, 0, 1, 0, 0, 0, 0, 1), label = "Tag for Flu Shot", format.stata = "%9.0g"),
any_invasive = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 0, 0, 0, 0, 0), label = "Subject Chose At Least One Invasive Screening", format.stata = "%9.0g"),
bl_any_invasive = structure(c(1, 1, 1, 1, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Subject Chose At Least One Invasive Screening", format.stata = "%9.0g"),
preind_test = structure(c(0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Indicator for Ex pre Selection - Stacked Subject X Test", format.stata = "%9.0g"),
postind_test = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0), label = "Indicator for Ex Post Selection - Stacked Subject X Test", format.stata = "%9.0g"),
bl_bp_test = structure(c(0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Tag for Blood Pressure Test", format.stata = "%9.0g"),
bl_bmi_test = structure(c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Tag for BMI Test", format.stata = "%9.0g"),
bl_dia_test = structure(c(0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Tag for Diabetes Test", format.stata = "%9.0g"),
bl_flu_test = structure(c(0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Tag for Flu Shot", format.stata = "%9.0g"),
bl_cho_test = structure(c(0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Black Doc * Tag for Cholesterol Test", format.stata = "%9.0g"),
missing_age = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Missing Indicator for age", format.stata = "%9.0g"),
missing_HSless = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Missing Indicator for HSless", format.stata = "%9.0g"),
missing_low_income = structure(c(0, 0, 0, 0, 0, 1, 1, 1,
1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Missing Indicator for low_income", format.stata = "%9.0g"),
missing_has_PCP = structure(c(0, 0, 0, 0, 0, 1, 1, 1, 1,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Missing Indicator for has_PCP", format.stata = "%9.0g"),
missing_uninsured = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0), label = "Missing Indicator for uninsured", format.stata = "%9.0g"),
missing_age2 = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Missing Indicator for age2", format.stata = "%9.0g"),
missing_good_sa_health = structure(c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Missing Indicator for good_sa_health", format.stata = "%9.0g")), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"), label = "Main Oakland Clinic Analysis Dataset")
I changed geom_bar to geom_histogram and that solved the problem. It's an alternate that works.
So I would like to stack the two bars from each of these graphs into one big graph. That is, I would like Black State Claim (from plot a) to be right next to Black Civil Rights Claim (from plot b) and consequently for all races into one graph.
Since some of the data, like asian, is so low, is there a more ideal way to compare State Claim/Civil Rights Claim Status with Race???
#a) State Claim?
race_claim <- data.frame(table(jail$Race,jail$State_Claim_Made))
names(race_claim) <- c("Race","Claim","Count")
ggplot(data=race_claim, aes(x=Race, y=Count, fill=Claim)) + geom_bar(stat = "identity")
#b) civil rights claim?
race_claim_civ <- data.frame(table(jail$Race,jail$Non_Statutory))
names(race_claim_civ) <- c("Race","Claim","Count")
ggplot(data=race_claim_civ, aes(x=Race, y=Count, fill=Claim)) + geom_bar(stat = "identity")
DATA SAMPLE:
structure(list(Last_Name = c("Banks", "Beamon", "Dandridge",
"Deakle, Jr.", "Doyle", "Drinkard", "Ellis", "Embry", "Gaines",
"Gurley", "Hinton", "Holemon", "Holsomback", "Hunt", "Jones",
"Mahan", "Mahan", "McMillian", "Moore", "Padgett"), First_Name = c("Medell",
"Melvin Todd", "Beniah Alton", "Evan Lee", "Robert E.", "Gary",
"Andre", "Anthony", "Freddie Lee", "Timothy", "Anthony", "Jeffrey",
"John", "H. Guy", "Lydia Diane", "Dale", "Ronnie", "Walter",
"Daniel Wade", "Larry Randal"), Age = c("27", "24", "29", "59",
"44", "37", "35", "23", "22", "22", "29", "23", "33", "54", "40",
"22", "26", "45", "24", "40"), Race = c("Black", "Asian", "Caucasian",
"Caucasian", "Other", "Asian", "Black", "Black", "Black",
"Caucasian", "Black", "Caucasian", "Caucasian", "Other",
"Black", "Caucasian", "Asian", "Black", "Native American", "Caucasian"
), Sex = c("Male", "Male", "Male", "Male", "Male", "Male", "Male",
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Female",
"Male", "Male", "Male", "Male", "Male"), State = c("Alabama",
"Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama",
"Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama",
"Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama",
"Alabama"), CIU = c(0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0,
0, 0, 0, 0, 1, 0), Guilty_Plea = c(1, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), IO = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Worst_Crime = c(6, 1,
1, 4, 4, 1, 2, 1, 1, 6, 1, 2, 4, 6, 3, 2, 2, 1, 1, 1), Occurred = c(1999,
1988, 1994, 2014, 1991, 1993, 2012, 1992, 1972, 1999, 1985, 1987,
1987, 1987, 1997, 1983, 1983, 1986, 1999, 1990), Convicted = c(2001,
1989, 1996, 2015, 1992, 1995, 2013, 1993, 1974, 2000, 1986, 1988,
1988, 1993, 2000, 1986, 1986, 1988, 2002, 1992), Exonerated = c(2003,
1990, 2015, 2015, 2001, 2001, 2014, 1997, 1991, 2002, 2015, 1999,
2000, 1998, 2006, 1998, 1998, 1993, 2009, 1997), Sentence = c("15",
"25", "Life", "Not sentenced", "20", "Death", "85", "20", "30",
"35", "Death", "Life", "25", "Probation", "Life without parole",
"35", "Life without parole", "Death", "Death", "Death"), Death_Penalty = c(0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1), DNA_Only = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0), FC = c(1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), MWID = c(0,
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0), F_MFE = c(0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1), P_FA = c(1,
1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0), OM = c(1,
1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1), ILD = c(0,
0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0), State_Statute = c("Y",
"Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y",
"Y", "Y", "Y", "Y", "Y", "Y"), State_Claim_Made = c(0, 0, 1,
0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1 0), Zero_time = c(0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0), Prem = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Pending = c(0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0), Denied = c(0,
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), State_Award = c("0",
"0", "2", "0", "1", "0", "0", "0", "1", "0", "2", "0", "0", "0",
"0", "0", "0", "0", "0", "0"), Amount = c("0", "0", NA, "0",
"129041.88", "0", "0", "0", "1000000", "0", NA, "0", "0", "0",
"0", "0", "0", "0", "0", "0"), `Non-Statutory_Case_Filed` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0), No_Time = c(0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0), Unfiled = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1), Dismissed = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0), Pending__1 = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Award = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0), Premature = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Amount__1 = c("0",
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
"0", "0", "0", "$ undisclosed", "0", "0"), Years_Lost = c(1.7,
0.1, 19.5, 0, 2.6, 5.7, 1.8, 4, 10.7, 1.5, 28.5, 10.6, 10.1,
0, 5.8, 11.4, 11.4, 4.5, 5.4, 5.5), State_Award2 = c("0", "0",
"0", "0", "1", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0",
"0", "0", "0", "0", "0")), row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame"))
I think there is a clash between two requirements: to make the barplot stack-ed and at the same time - dodge-d. Probably my solution isn't the best, and someone would do better. But that's what I've got right now:
Preprocessing
library(tidyverse)
dat <- jail %>%
rename_all(tolower) %>%
select(race, state_claim_made, non_statutory_case_filed) %>%
gather(key = action, value = claim, 2, 3) %>%
count(race, action, claim) %>%
mutate(action = ifelse(action == "state_claim_made", "state", "civil")) %>%
mutate(x = as.numeric(reorder(interaction(race, action), 1:n())))
Output:
# # A tibble: 15 x 5
# race action claim n x
# <chr> <chr> <dbl> <int> <dbl>
# 1 Asian civil 0 3 1
# 2 Asian state 0 2 2
# 3 Asian state 1 1 2
# 4 Black civil 0 6 3
# 5 Black civil 1 1 3
# 6 Black state 0 3 4
# 7 Black state 1 4 4
# 8 Caucasian civil 0 7 5
# 9 Caucasian state 0 6 6
# 10 Caucasian state 1 1 6
# 11 Native American civil 1 1 7
# 12 Native American state 1 1 8
# 13 Other civil 0 2 9
# 14 Other state 0 1 10
# 15 Other state 1 1 10
Some necessary tweaks for x-axis labels:
Adapted from this answer:
breaks = sort(c(unique(dat$x), seq(min(dat$x) + .5,
max(dat$x) + .5,
length(unique(dat$action))
)
)
)
labels = unlist(
lapply(unique(dat$race), function(i) c("civil", paste0("\n", i), "state"))
)
Plot data
ggplot(dat, aes(x = x, y = n, fill = factor(claim))) +
geom_col(show.legend = T) +
ggthemes::theme_few() +
scale_fill_manual(name = NULL,
values = c("gray75", "gray25"),
breaks= c("0", "1"),
labels = c("false", "true")
) +
scale_x_continuous(breaks = breaks, labels = labels) +
theme(axis.title.x = element_blank(), axis.ticks.x = element_blank()) +
labs(title = "Jail Plot", y = "Count")
Data
The data you attached are corrupted - missing comma or $ somewhere in the table (I don't remember what that was). There are the same data, but without variables we don't to solve the problem.
structure(
list(Race = c("Black", "Asian", "Caucasian", "Caucasian", "Other", "Asian",
"Black", "Black", "Black", "Caucasian", "Black", "Caucasian",
"Caucasian", "Other", "Black", "Caucasian", "Asian", "Black",
"Native American", "Caucasian"),
State_Claim_Made = c(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1,
0, 1, 0),
Non_Statutory_Case_Filed = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 1, 0)
),
row.names = c(NA, -20L),
class = c("tbl_df", "tbl", "data.frame")
)
For an assignment, I would like to see the number of subjects who have 0 for the variable CIU vs. 1 for CIU.
structure(list(Last_Name = c("Banks", "Beamon", "Dandridge",
"Deakle, Jr.", "Doyle", "Drinkard", "Ellis", "Embry", "Gaines",
"Gurley", "Hinton", "Holemon", "Holsomback", "Hunt", "Jones",
"Mahan", "Mahan", "McMillian", "Moore", "Padgett"), First_Name = c("Medell",
"Melvin Todd", "Beniah Alton", "Evan Lee", "Robert E.", "Gary",
"Andre", "Anthony", "Freddie Lee", "Timothy", "Anthony", "Jeffrey",
"John", "H. Guy", "Lydia Diane", "Dale", "Ronnie", "Walter",
"Daniel Wade", "Larry Randal"), Age = c("27", "24", "29", "59",
"44", "37", "35", "23", "22", "22", "29", "23", "33", "54", "40",
"22", "26", "45", "24", "40"), Race = c("Black", "Black", "Caucasian",
"Caucasian", "Caucasian", "Caucasian", "Black", "Black", "Black",
"Caucasian", "Black", "Caucasian", "Caucasian", "Caucasian",
"Black", "Caucasian", "Caucasian", "Black", "Caucasian", "Caucasian"
), Sex = c("Male", "Male", "Male", "Male", "Male", "Male", "Male",
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Female",
"Male", "Male", "Male", "Male", "Male"), State = c("Alabama",
"Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama",
"Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama",
"Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama",
"Alabama"), CIU = c(0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0,
0, 0, 0, 0, 1, 0), Guilty_Plea = c(1, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), IO = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Worst_Crime = c(6, 1,
1, 4, 4, 1, 2, 1, 1, 6, 1, 2, 4, 6, 3, 2, 2, 1, 1, 1), Occurred = c(1999,
1988, 1994, 2014, 1991, 1993, 2012, 1992, 1972, 1999, 1985, 1987,
1987, 1987, 1997, 1983, 1983, 1986, 1999, 1990), Convicted = c(2001,
1989, 1996, 2015, 1992, 1995, 2013, 1993, 1974, 2000, 1986, 1988,
1988, 1993, 2000, 1986, 1986, 1988, 2002, 1992), Exonerated = c(2003,
1990, 2015, 2015, 2001, 2001, 2014, 1997, 1991, 2002, 2015, 1999,
2000, 1998, 2006, 1998, 1998, 1993, 2009, 1997), Sentence = c("15",
"25", "Life", "Not sentenced", "20", "Death", "85", "20", "30",
"35", "Death", "Life", "25", "Probation", "Life without parole",
"35", "Life without parole", "Death", "Death", "Death"), Death_Penalty = c(0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1), DNA_Only = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0), FC = c(1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), MWID = c(0,
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0), F_MFE = c(0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1), P_FA = c(1,
1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0), OM = c(1,
1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1), ILD = c(0,
0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0), State_Statute = c("Y",
"Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y",
"Y", "Y", "Y", "Y", "Y", "Y"), State_Claim_Made = c(0, 0, 1,
0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0), Zero_time = c(0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0), Prem = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Pending = c(0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0), Denied = c(0,
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), State_Award = c("0",
"0", "2", "0", "1", "0", "0", "0", "1", "0", "2", "0", "0", "0",
"0", "0", "0", "0", "0", "0"), Amount = c("0", "0", NA, "0",
"129041.88", "0", "0", "0", "1000000", "0", NA, "0", "0", "0",
"0", "0", "0", "0", "0", "0"), `Non-Statutory_Case_Filed` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0), No_Time = c(0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0), Unfiled = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1), Dismissed = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0), Pending__1 = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Award = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0), Premature = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Amount__1 = c("0",
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
"0", "0", "0", "$ undisclosed", "0", "0"), Years_Lost = c(1.7,
0.1, 19.5, 0, 2.6, 5.7, 1.8, 4, 10.7, 1.5, 28.5, 10.6, 10.1,
0, 5.8, 11.4, 11.4, 4.5, 5.4, 5.5), State_Award2 = c("0", "0",
"0", "0", "1", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0",
"0", "0", "0", "0", "0")), row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame"))
Using the dplyr package, I accomplished this much:
CUI <- jail %>%
group_by(CIU) %>%
summarize(count = n())
Now I would like to create a table showing the percentage of each group within the "State_Claim_Made" category, but I am unsure what to do from here. In the end I would like to see the percent of CUI=0 that have State_Claim_Made=0 vs. State_Claim_Made=1 and same for CUI=1; a 2-2 table of sorts. I also prefer to continue to use the dplyr package but not necessary.
Your example doesn't really let to see the full picture, so let
df <- data.frame(CIU = rep(0:1, times = c(20, 30)),
State_Claim_Made = rep(1:0, times = c(15, 35)))
Then
table(CIU = df$CIU, State_Claim_Made = df$State_Claim_Made)
# State_Claim_Made
# CIU 0 1
# 0 5 15
# 1 30 0
table(CIU = df$CIU, State_Claim_Made = df$State_Claim_Made) / c(table(df$CIU))
# State_Claim_Made
# CIU 0 1
# 0 0.25 0.75
# 1 1.00 0.00
Using base R you can just use the table command:
table(data$CIU, data$State_Claim_Made)
Output:
0 1
0 15 5
If you have data including CUI =1 then the output would be a 2x2 table like you need
I have a data frame, which contain thousands of firms from year 1998 to 2007(each firm not necessarily have equal length of time duration). and I want to convert it into a tensor with index: firm, year, variables.
how to achieve this ?
I don't know how to extract a small part of this data set to put here for us to discuss the problem, any one know how to do it?
structure(list(year = c(1998, 1998, 1998, 1998, 1998, 1998, 1998,
1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998,
1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998,
1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998,
1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998,
1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998,
1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998), firmid = c("QB3732337",
"113810712", "618851819", "619457768", "HU5176905", "618024813",
"617883552", "105679742", "230141773", "609442909", "HU6355534",
"617882832", "60088847X", "617881725", "618403506", "145665417",
"216582994", "14405557X", "103400293", "607369138", "617737408",
"177160683", "142418787", "245560903", "15112238X", "617880650",
"618354214", "226040099", "128955068", "61156047X", "617481385",
"226091312", "190380322", "617880255", "611567073", "GD6120293",
"617876061", "617875739", "126925703", "221461337", "614801582",
"617883931", "618129447", "101713181", "611209524", "617883974",
"706747835", "242727819", "608934944", "619723894", "139432377",
"152102399", "617866832", "614407067", "607282008", "117944574",
"617865629", "618354185", "228791275", "134789270", "113810632",
"EJ2468142", "169832427", "115319804", "602003890", "211551128",
"193929448", "105044755", "704448663", "21960081X"), provinceid = c(420000,
140000, 440000, 450000, 420000, 440000, 440000, 130000, 650000,
330000, 420000, 440000, 120000, 440000, 440000, 330000, 530000,
330000, 120000, 310000, 440000, 410000, 320000, 230000, 340000,
440000, 440000, 620000, 230000, 350000, 440000, 620000, 440000,
440000, 350000, 440000, 440000, 440000, 220000, 610000, 410000,
440000, 440000, 110000, 350000, 440000, 410000, 210000, 320000,
450000, 320000, 340000, 440000, 410000, 310000, 210000, 440000,
440000, 650000, 320000, 140000, 330000, 370000, 150000, 140000,
510000, 440000, 130000, 330000, 530000), industrycode2 = c(3400,
3500, 2900, 1900, 1500, 2200, 1400, 3600, 1500, 4000, 1500, 3000,
2400, 2100, 1800, 1300, 2900, 4000, 3600, 2300, 1900, 3700, 2200,
3400, 2600, 1800, 2400, 1300, 1800, 2400, 1900, 3100, 1400, 1700,
2400, 3400, 2600, 2600, 1400, 2600, 3100, 1800, 3100, 1400, 2600,
3300, 1300, 2200, 3000, 3100, 4100, 3000, 1500, 1400, 3500, 3500,
3700, 2600, 2300, 3200, 1700, 4000, 4200, 3600, 2500, 1300, 3500,
3600, 1700, 2600), sales = c(45860, 4050, 17034, 154721, 267,
7703, 47572, 846, 267, 5132, 1767, 8354, 5668, 75330, 8935, 1958,
154721, 13072, 10654, 40505, 20637, 1510, 12884, 10753, 45542,
5286, 27492, 267, 1557, 872, 10892, 1386, 32054, 7290, 6903,
8263, 6996, 12848, 460, 44823, 52000, 16353, 6225, 750, 10863,
35110, 10638, 154721, 18100, 16773, 2415, 8686, 14362, 19831,
46958, 1340, 79855, 61817, 1114, 154721, 7030, 9923, 599, 4060,
154721, 361, 72986, 445, 18080, 3682), cogs = c(44780, 2430,
13839, 144088, 246, 9310, 37863, 495, 52, 4170, 1582, 7416, 3964,
58090, 8639, 1667, 211569, 8066, 4960, 28399, 19831, 1280, 12564,
7540, 37058, 1855, 25519, 70, 1539, 700, 10398, 1190, 25048,
6779, 5500, 7656, 6078, 12519, 370, 39479, 26816, 16586, 6061,
534, 10064, 32783, 8519, 308403, 16000, 23833, 1282, 6918, 12097,
15663, 35182, 768, 76005, 58528, 775, 4362410, 5770, 9040, 417,
2630, 167668, 290, 64038, 306, 15898, 2511), inventory = c(2740,
280, 1950, 46914, 711, 9552, 3984, 4989, 497, 1249, 0, 4336,
1450, 3000, 284, 0, 134404, 5881, 9347, 4818, 1744, 377, 376,
12238, 11669, 835, 17355, 226, 1370, 360, 434, 1089, 12154, 4000,
2388, 7257, 1547, 808, 137, 5920, 8750, 5600, 179, 151, 1321,
3454, 5479, 135303, 7480, 5943, 565, 850, 3032, 1207, 11307,
474, 2574, 26104, 519, 604670, 400, 501, 106, 7040, 43568, 711,
6763, 558, 444, 564), fixedasset = c(8580, 460, 6750, 28874,
2878, 25901, 43081, 3065, 198, 1163, 2140, 8484, 1688, 6900,
631, 1290, 849666, 6545, 10075, 6658, 3089, 581, 114, 22299,
22499, 3967, 54033, 1106, 883, 435, 404, 1712, 29329, 7952, 3176,
10272, 533, 138, 854, 14151, 64252, 10672, 5023, 62, 213, 1068,
572, 1115119, 14090, 71451, 13017, 5390, 6657, 5840, 31943, 80,
26145, 41905, 517, 3801800, 1164, 1725, 220, 15550, 72000, 825,
4697, 1913, 735, 3415), totalasset = c(13610, 3220, 16090, 166501,
14319, 44739, 78920, 10394, 823, 4698, 3101, 25325, 4221, 14900,
3118, 1724, 1091978, 28912, 28272, 27222, 10000, 1178, 1413,
42394, 52156, 11284, 89191, 1582, 6514, 3531, 1495, 3978, 54618,
22352, 13733, 27088, 3247, 1450, 1164, 33419, 129957, 18000,
20163, 266, 3728, 15286, 17337, 1718823, 25650, 94590, 15418,
8430, 12425, 10060, 75576, 991, 46436, 75405, 1973, 5976610,
4604, 5720, 1327, 43440, 248715, 1710, 31723, 2799, 4616, 5417
), stateshare = c(0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0,
1, 0.518154501914978, 1, 1, 0, 0, 1, 1, 0.699999988079071, 0.831946730613708,
0, 0, 1, 1, 0, 0, 1, 0.200000002980232, 0, 0, 0, 0, 0, 1, 1,
0.400013834238052, 0, 0, 0.25, 0, 0.400000005960464, 0, 1, 0,
0, 1, 1, 0.823567152023315, 0, 0, 1, 0.742925107479095, 0, 1,
0.661562383174896, 1, 0, 1, 1, 0, 1, 0, 1, 0.749309420585632,
1), foreignshare = c(0.571428596973419, 0, 1, 0.385093629360199,
0, 0.5, 1, 0, 0, 0.30011722445488, 0, 0.699992954730988, 1, 1,
0.5, 0, 0, 0, 0, 1, 0.300029307603836, 0, 0, 0, 0, 0.782930612564087,
1, 0, 0, 1, 1, 0, 0.416000008583069, 1, 0.899999976158142, 1,
1, 1, 0, 0, 0.59998619556427, 0.700012564659119, 0.233907759189606,
0, 1, 0.600000023841858, 1, 0, 0.509767174720764, 0.299807518720627,
0, 0, 0, 1, 0.531239151954651, 0, 0.257074922323227, 1, 0, 0.245536029338837,
0, 0.285785287618637, 0, 0, 0.245354115962982, 0, 0.219982624053955,
0, 0.25069060921669, 0), privateshare = c(0.428571432828903,
0, 0, 0.614906370639801, 0, 0.5, 0, 0, 0, 0.699882745742798,
0, 0.300007075071335, 0, 0, 0.5, 0, 0.481845527887344, 0, 0,
0, 0.699970722198486, 0, 0, 0.300000011920929, 0.168053239583969,
0.217069372534752, 0, 0, 0, 0, 0, 0, 0.38400000333786, 0, 0.100000001490116,
0, 0, 0, 0, 0, 0, 0.299987435340881, 0.766092240810394, 0.75,
0, 0, 0, 0, 0.490232825279236, 0.700192511081696, 0, 0, 0.176432847976685,
0, 0.468760877847672, 0, 0, 0, 0, 0.0929015725851059, 0, 0.714214682579041,
0, 0, 0.754645884037018, 0, 0.780017375946045, 0, 0, 0), stateown = c(0,
1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1,
1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1,
0, 1, 0, 1, 0, 1), foreignown = c(0, 0, 1, 0, 0, 0, 1, 0, 0,
0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), privateown = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0), mixown = c(1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0,
1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0,
1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1,
0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0), stateonly = c(0,
1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1,
1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1,
0, 1, 0, 1, 0, 0), mixonly = c(1, 0, 0, 1, 0, 1, 0, 0, 0, 1,
0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0,
1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0), foreignonly = c(0,
0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0,
0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0), privateonly = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), gs = c(0,
1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1,
1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1,
0, 1, 0, 1, 0, 1), gm = c(1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1,
0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1,
0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0,
1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0), gf = c(0, 0,
1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0,
1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0), privatize = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), IR = c(0.061188030987978,
0.115226335823536, 0.140906140208244, 0.325592696666718, 2.8902440071106,
1.02599358558655, 0.105221457779408, 3.07859539985657, 3.07859539985657,
0.299520373344421, 0.0191550496965647, 0.584681749343872, 0.365792125463486,
0.0516440011560917, 0.0328741744160652, 0.0191550496965647, 0.635272681713104,
0.729109823703766, 1.8844758272171, 0.169653862714767, 0.0879431217908859,
0.294531255960464, 0.0299267750233412, 1.62307691574097, 0.314884781837463,
0.450134783983231, 0.680081486701965, 3.07859539985657, 0.890188455581665,
0.514285743236542, 0.0417387969791889, 0.915126025676727, 0.485228359699249,
0.590057551860809, 0.434181809425354, 0.947884023189545, 0.254524528980255,
0.0645418986678123, 0.370270282030106, 0.14995314180851, 0.326297730207443,
0.337634146213531, 0.0295330807566643, 0.282771527767181, 0.131259933114052,
0.105359487235546, 0.643150627613068, 0.438721418380737, 0.467500001192093,
0.249360129237175, 0.44071763753891, 0.12286788225174, 0.250640660524368,
0.0770605877041817, 0.321385949850082, 0.6171875, 0.0338661931455135,
0.446008741855621, 0.669677436351776, 0.138609156012535, 0.0693240910768509,
0.0554203540086746, 0.254196643829346, 2.67680597305298, 0.259846836328506,
2.4517240524292, 0.10560917109251, 1.82352936267853, 0.0279280412942171,
0.224611714482307), GM = c(0.0241179093718529, 0.666666686534882,
0.230869278311729, 0.25, 0, -0.0678684562444687, 0.256424486637115,
0.709090888500214, 0.0769230797886848, 0.230695441365242, 0.116940580308437,
0.126483276486397, 0.429868817329407, 0.296780854463577, 0.0342632234096527,
0.174565091729164, 0.25985848903656, 0.620629787445068, 0.807692289352417,
0.426282614469528, 0.0406434386968613, 0.1796875, 0.0254695955663919,
0.426127314567566, 0.228938415646553, 0.807692289352417, 0.0773149430751801,
-0.0678684562444687, 0.0116959065198898, 0.245714291930199, 0.0475091375410557,
0.164705887436867, 0.279702961444855, 0.0753798484802246, 0.255090922117233,
0.0792842209339142, 0.151036530733109, 0.0262800548225641, 0.243243247270584,
0.135363101959229, 0.807692289352417, -0.0140479924157262, 0.0270582418888807,
0.404494374990463, 0.0793918892741203, 0.0709819123148918, 0.24873811006546,
0.0649604573845863, 0.131249994039536, -0.0678684562444687, 0.807692289352417,
0.255565196275711, 0.187236502766609, 0.266104847192764, 0.334716618061066,
0.744791686534882, 0.0506545640528202, 0.0561953261494637, 0.437419354915619,
0.0327548310160637, 0.218370884656906, 0.0976769924163818, 0.436450839042664,
0.54372626543045, 0.595140397548676, 0.244827583432198, 0.139729529619217,
0.454248368740082, 0.137249961495399, 0.466348081827164), CI = c(0.630418837070465,
0.142857149243355, 0.41951522231102, 0.173416376113892, 0.200991690158844,
0.578935623168945, 0.545881927013397, 0.294881671667099, 0.240583226084709,
0.247552156448364, 0.690099954605103, 0.335004925727844, 0.399905234575272,
0.463087260723114, 0.202373310923576, 0.748259842395782, 0.778098106384277,
0.226376593112946, 0.356359660625458, 0.24458159506321, 0.308899998664856,
0.493208825588226, 0.0846758112311363, 0.52599424123764, 0.431378930807114,
0.351559728384018, 0.605812251567841, 0.699115037918091, 0.135554194450378,
0.123194560408592, 0.270234107971191, 0.430367022752762, 0.536984145641327,
0.355762362480164, 0.231267750263214, 0.379208505153656, 0.164151519536972,
0.0951724126935005, 0.733676970005035, 0.423441767692566, 0.494409680366516,
0.59288889169693, 0.24911966919899, 0.233082711696625, 0.0846758112311363,
0.0846758112311363, 0.0846758112311363, 0.648768961429596, 0.549317717552185,
0.755375862121582, 0.780426323413849, 0.639383137226105, 0.535774648189545,
0.580516874790192, 0.422660619020462, 0.0846758112311363, 0.563032984733582,
0.555732369422913, 0.262037515640259, 0.636113107204437, 0.252823621034622,
0.301573425531387, 0.165787488222122, 0.357965022325516, 0.289487957954407,
0.482456147670746, 0.148062914609909, 0.683458387851715, 0.159228771924973,
0.63042277097702), WACC = c(0.0587803088128567, 0.114285714924335,
0.0474829077720642, 0.089603066444397, 0, -0.0595453642308712,
-0.0409021787345409, 0.00990956369787455, -0.0255164038389921,
0.0834397599101067, 0.00515962578356266, -0.00363277364522219,
0.0127931768074632, 0.175039649009705, 0.0102629894390702, -0.00986078940331936,
-0.00165113247931004, 0.0121057005599141, 0.0084889642894268,
0.175039649009705, -0.0706999972462654, 0.000848896452225745,
0.0481245554983616, 0.0177619475871325, -0.00661477027460933,
-0.0334987565875053, -0.0147324288263917, -0.095448799431324,
0.00445194961503148, 0.0218068547546864, 0.0675585269927979,
0.0175967831164598, 0.0445274449884892, -0.00881352834403515,
0.0546129755675793, -0.0589929111301899, 0.0344933792948723,
0.0675862058997154, 0.0180412363260984, 0.028426943346858, 0.0992020443081856,
-0.016499999910593, -0.0906611084938049, 0.0338345877826214,
-0.103832200169563, 0.0640455335378647, 0.0919997692108154, -0.002492984989658,
0.0179337225854397, -0.103832200169563, -0.035867165774107, 0.0543297752737999,
0.0449094548821449, 0.120576545596123, 0.0157457403838634, -0.0847628638148308,
0.0742096677422523, -0.00257277372293174, 0.0309173855930567,
0.0231686513870955, 0.116203308105469, 0.0748251751065254, 0.11379050463438,
-0.024171270430088, 0.00290694180876017, -0.0760233923792839,
0.166220098733902, 0.0178635232150555, 0.175039649009705, 0.0304596647620201
), Salesgrowth = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), Export = c(0, 0, 0.998237371444702, 0.726109445095062,
0, 0.00252844509668648, 0.895112693309784, 0, 0, 0.0362807661294937,
0, 0.348896056413651, 1, 0.88113659620285, 0.868606626987457,
0, 0, 0, 0.0201816353946924, 0, 1, 0, 0, 0, 0.195195838809013,
1, 0.926722705364227, 0, 0, 0.743137240409851, 1, 0, 0.0486678741872311,
0.944032907485962, 0.501250028610229, 0, 0.999022483825684, 0.996436417102814,
0, 0, 0, 0.988813638687134, 0, 0, 1, 0.967754900455475, 0, 0,
0, 0.00271074729971588, 0, 0, 0.0384709499776363, 0, 0.0599414147436619,
0, 0, 1, 0, 0.000741015828680247, 0, 0.870000004768372, 0, 0,
0.905181586742401, 0, 0.599265575408936, 0, 0.931351482868195,
0), Leverage = c(14.4659090042114, 0.483870953321457, 0.5306316614151,
5.99260854721069, 21.3162212371826, 0.802828848361969, 1.31172561645508,
2.2685534954071, 0.796943247318268, 1.39327561855316, 2.87625002861023,
1.1588100194931, 0.973352015018463, 0.183956876397133, 0.769580006599426,
0.289454013109207, 5.48250532150269, 0.52344822883606, 0.197103783488274,
0.183956876397133, 0.570105195045471, 3.6015625, 3.71000003814697,
4.99886798858643, 5.34887409210205, 1.4610687494278, 0.733816742897034,
1.18206894397736, 21.3162212371826, 1.80238091945648, 1.5820380449295,
3.65263152122498, 0.705373585224152, 0.914846241474152, 0.916945815086365,
1.07761931419373, 0.183956876397133, 0.361502349376678, 2.07936501502991,
2.1812469959259, 0.760839521884918, 3.46650123596191, 1.53558850288391,
0.330000013113022, 3.10121011734009, 0.874662756919861, 1.48345506191254,
1.28475737571716, 1.05200004577637, 7.94806528091431, 0.203309133648872,
1.210857629776, 1.16275024414062, 2.03012037277222, 0.476988017559052,
0.45521292090416, 1.75912058353424, 0.23635022342205, 0.342176884412766,
0.406383603811264, 0.641940057277679, 1, 1.0321592092514, 0.459677428007126,
3.95616054534912, 4.42857122421265, 1.85715568065643, 3.64950156211853,
0.515927731990814, 0.516942024230957), Current = c(0.642147123813629,
0.260168313980103, 0.600883364677429, 0.956995725631714, 0.260168313980103,
1.23653173446655, 1.40639424324036, 1.07675218582153, 0.260168313980103,
0.801113069057465, 2.81943321228027, 0.843509554862976, 1.13398694992065,
0.260168313980103, 0.545235216617584, 2.81943321228027, 1.19198870658875,
0.474293291568756, 0.260168313980103, 0.260168313980103, 0.401427298784256,
1.21140944957733, 0.326404929161072, 1.71450614929199, 0.987657248973846,
1.09765684604645, 1.29162395000458, 1.80042016506195, 0.98863410949707,
0.73352712392807, 0.839596688747406, 1.08649599552155, 0.746986508369446,
0.519444465637207, 0.607772707939148, 0.260168313980103, 0.260168313980103,
0.293445110321045, 2.21290326118469, 0.673032999038696, 0.964383006095886,
2.21746039390564, 0.470989525318146, 0.323529422283173, 1.15627562999725,
0.506606042385101, 0.626914441585541, 1.53568696975708, 1.02733683586121,
2.81943321228027, 1.05705952644348, 1.7860734462738, 1.1391396522522,
2.3782639503479, 0.665584921836853, 0.340285390615463, 1.23362839221954,
0.490639895200729, 0.345467031002045, 0.696641504764557, 0.268895357847214,
0.741701245307922, 0.433604329824448, 0.887159526348114, 1.37459933757782,
0.593891382217407, 0.616396367549896, 2.4796838760376, 0.42551463842392,
0.725274741649628), Cover = c(0.649999976158142, 0.112769484519958,
0, 0.5163214802742, NA, -0.111111111938953, 0.976295828819275,
1, -0.461538463830948, 0.249363869428635, 0, -1.61016952991486,
0, 0.0135302441194654, 0.161290317773819, -0.396551728248596,
-1.61016952991486, 1.04938268661499, 0.804794549942017, -0.00240922393277287,
-0.0130399344488978, 0, 0.267605632543564, 0.831447958946228,
1.6728972196579, 0, -0.00519031146541238, 0, -0.0773333311080933,
0.862068951129913, -0.0281690135598183, 0.32051283121109, 0.332811266183853,
0, 0.304878056049347, 0, 0, 0, -0.0192307699471712, 0.822123885154724,
0.376334100961685, -0.10884353518486, -0.86366331577301, 0, -0.0272628143429756,
0.403465360403061, -0.00689655169844627, -0.935179531574249,
1.6728972196579, 0, -0.215469613671303, 0.818540453910828, 0.538461565971375,
0, 0.861155688762665, 0.0506329126656055, 0.00725478818640113,
0.204545453190804, -0.0206896550953388, 0.640307724475861, 0.0638686120510101,
0.584474861621857, -0.0592105276882648, 0, 0, -0.1875, 0.520370066165924,
0, 0.020833333954215, 0.528301894664764), Bank = c(1, 1, 0, 1,
0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1,
0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1,
0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1,
0, 1, 1), Inctaxrate = c(0.321428567171097, 0.330000013113022,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.200000002980232,
0, 0, 0, 0, 0, 0.333333343267441, 0, 0, 0, 0, 0, 0, 0, 0.266666680574417,
0.121365360915661, 0, 0.122222222387791, 0, 0, 0, 0.333333343267441,
0.333333343267441, 0.0750062316656113, 0, 0, 0.111111111938953,
0, 0.120408162474632, 0, 0, 0, 0, 0, 0.279069781303406, 0.244000002741814,
0, 0, 0, 0.33002045750618, 0, 0, 0.0624906569719315, 0.330000013113022,
0, 0.324999988079071, 0, 0.149377599358559, 0, 0.0553662702441216,
0, 0, 0.113207548856735), ROA = c(0.0205731075257063, 0.0931676998734474,
0.0474829077720642, 0.0433390773832798, 0, -0.0661615133285522,
-0.0800430849194527, 0, -0.0328068025410175, 0.0625798180699348,
0.00515962578356266, -0.0123593285679817, 0.0127931768074632,
0.15384615957737, 0.0070558050647378, -0.0232018567621708, -0.00898461323231459,
0.000345877138897777, 0.00017685342754703, 0.15384615957737,
-0.0723000019788742, 0.000848896452225745, 0.0346779897809029,
0.000424588390160352, -0.050157219171524, -0.0334987603127956,
-0.0147996991872787, -0.095448799431324, 0, 0.000566411763429642,
0.0688963234424591, 0.011312217451632, 0.0289648100733757, -0.00881352927535772,
0.0327677838504314, -0.0589929111301899, 0.0344933792948723,
0.0675862058997154, 0.0171821303665638, 0.000628385052550584,
0.061758890748024, -0.0173888895660639, -0.126888483762741, 0.0338345877826214,
-0.126888483762741, 0.0320554748177528, 0.0926342532038689, -0.00428083632141352,
0.00701754400506616, -0.107918381690979, -0.0434557013213634,
0.00510083045810461, 0.0201207250356674, 0.120576538145542, 0.00154811050742865,
-0.0807265415787697, 0.0736712887883186, -0.00233406270854175,
0.0324379131197929, 0.0111919632181525, 0.108601219952106, 0.0300699304789305,
0.120572723448277, -0.024171270430088, 0.00290694157592952, -0.0900584831833839,
0.074015699326992, 0.0178635232150555, 0.15384615957737, 0.00978401303291321
), ROS = c(0.00610553845763206, 0.0740740746259689, 0.0448514744639397,
0.0400644056499004, 0, -0.384265869855881, -0.132788196206093,
0, -0.482142865657806, 0.0572876073420048, 0.00905489549040794,
-0.0374670810997486, 0.00952717009931803, 0.14293497800827, 0.00246222713030875,
-0.0204290095716715, -0.0368077680468559, 0.000764993892516941,
0.000469307298772037, 0.14293497800827, -0.0350341610610485,
0.000662251666653901, 0.00380316679365933, 0.00167395151220262,
-0.057441484183073, -0.0715096518397331, -0.0480139665305614,
-0.681996643543243, 0, 0.00229357788339257, 0.00945648178458214,
0.032467532902956, 0.0493542142212391, -0.0270233191549778, 0.0651890486478806,
-0.193392232060432, 0.0160091482102871, 0.00762764643877745,
0.0434782616794109, 0.000468509475467727, 0.14293497800827, -0.0191402193158865,
-0.527710855007172, 0.0120000001043081, -0.0867163762450218,
0.013956137932837, 0.14293497800827, -0.0224030781537294, 0.00994475092738867,
-0.608597159385681, -0.277432709932327, 0.00495049497112632,
0.0174070466309786, 0.0611668601632118, 0.00249158823862672,
-0.0597014911472797, 0.0428401492536068, -0.00284711318090558,
0.0574506297707558, 0.014846958220005, 0.0711237564682961, 0.0173334684222937,
0.14293497800827, -0.25862067937851, 0.00270326854661107, -0.426592797040939,
0.0321705527603626, 0.112359553575516, 0.0831858441233635, 0.0143943512812257
), num_ID = c(110811, 5231, 56906, 57829, 109353, 53870, 53447,
3268, 31728, 44842, 109431, 53437, 35768, 53429, 55421, 12707,
28753, 12371, 2213, 40407, 52997, 20414, 11831, 32734, 13940,
53414, 55153, 30755, 8398, 47905, 51851, 30780, 23354, 53407,
48003, 107459, 53394, 53390, 7955, 29718, 51021, 53453, 54348,
597, 46440, 53454, 62729, 32489, 43867, 57861, 11030, 14104,
53370, 50914, 39352, 5956, 53364, 55151, 31386, 9852, 5230, 106943,
18825, 5561, 36645, 27786, 24240, 2994, 61769, 29396)), .Names = c("year",
"firmid", "provinceid", "industrycode2", "sales", "cogs", "inventory",
"fixedasset", "totalasset", "stateshare", "foreignshare", "privateshare",
"stateown", "foreignown", "privateown", "mixown", "stateonly",
"mixonly", "foreignonly", "privateonly", "gs", "gm", "gf", "privatize",
"IR", "GM", "CI", "WACC", "Salesgrowth", "Export", "Leverage",
"Current", "Cover", "Bank", "Inctaxrate", "ROA", "ROS", "num_ID"
), datalabel = "", time.stamp = "25 Sep 2016 11:04", formats = c("%9.0g",
"%9s", "%9.0g", "%9.0g", "%12.0g", "%12.0g", "%12.0g", "%12.0g",
"%12.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g",
"%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g",
"%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g",
"%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g",
"%9.0g", "%9.0g"), types = c(65527L, 9L, 65527L, 65527L, 65526L,
65526L, 65526L, 65526L, 65526L, 65527L, 65527L, 65527L, 65527L,
65527L, 65527L, 65527L, 65527L, 65527L, 65527L, 65527L, 65527L,
65527L, 65527L, 65527L, 65527L, 65527L, 65527L, 65527L, 65527L,
65527L, 65527L, 65527L, 65527L, 65527L, 65527L, 65527L, 65527L,
65527L), val.labels = structure(c("", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", ""), .Names = c("",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "")), var.labels = c("", "<b7><a8><c8><U+02F4><fa><c2><eb>",
"", "", "<b2><fa><U+01B7><cf><fa><ca><db><ca><d5><c8><eb>", "<b2><fa><U+01B7><cf><fa><ca><U+06F3><U+0271><be>",
"<b4><e6><bb><f5>", "<b9><U+0336><a8><d7><U+02B2><fa><ba><U+03FC><c6>",
"<d7><U+02B2><fa><d7><U+073C><c6>", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "group(firmid)"), version = 118L, label.table = list(), expansion.fields = list(), byteorder = "LSF", row.names = c(NA,
70L), class = "data.frame")
data added, now I know I can change a three modes array into a three modes tensor, so we could also consider how to change the current data frame to a three modes array with dimensions "firmid","year",and "all other co-variates except these two"
Or image a three dimensions reference system X-Y-Z, I want X to be firms, Y to be co-variates, Z to be years
any suggestions?