Related
I'm trying to build a table using tableGrob in gridExtra and can't figure out how to set the column with. I am using ttheme_minimal(base_size = 25) to set the size of the text, but I can't seem to find a way in that function to se the column widths. Below is the full code that I'm using:
team_free_agents <- structure(list(Player = c("Adrian Amos", "Dean Lowry", "Marcedes Lewis",
"Allen Lazard", "Robert Tonyan", "Jarran Reed", "Randall Cobb",
"Rudy Ford", "Yosh Nijman", "Keisean Nixon", "Justin Hollins"
), Position = c("S", "IDL", "TE", "WR", "TE", "IDL", "WR", "S",
"LT", "CB", "EDGE"), Age = c(30, 29, 39, 28, 29, 31, 33, 29,
27, 26, 27), `Snap %` = c("94.6%", "46.7%", "41.2%", "78.9%",
"54%", "68.2%", "33.9%", "42.8%", "69.1%", "28%", "40.7%"), `Current APY` = c("$9,000,000",
"$6,775,000", "$4,000,000", "$3,986,000", "$3,750,000", "$3,250,000",
"$3,000,000", "$1,137,500", "$965,000", "$965,000", "$706,724"
), `Current Guarantees` = c("$12,000,000", "$6,000,000", "$2,100,000",
"$0", "$1,000,000", "$1,865,000", "$0", "$0", "$0", "$0", "$306,896"
), `2022 PFF Grade` = c(54.2, 59.3, 65.6, 69, 57.7, 61.9, 70.1,
77.7, 63.1, 63.9, 54)), row.names = c(NA, -11L), class = c("tbl_df",
"tbl", "data.frame"))
tt1 <- gridExtra::ttheme_minimal(base_size = 25)
fa_table <- gridExtra::tableGrob(team_free_agents,
rows = NULL,
theme = tt1)
cowplot::draw_grob(
fa_table,
width = 90
)
Any help with this would be appreciated!
I get the following message of error when using janitor::adorn_totals("row"):
"Error in adorn_totals(., "row") :
trying to re-add a totals dimension that is already been added"
Here is the head of my dataset :
structure(list(code_1 = c("M01", "C03", "M99", "C05", "O01",
"C07"), regroupement_elsan = c("Gastro", "Ophtalmo", "Divers médecine",
"Gynéco", "Accouchements", "bouche et dents"), actes_2019 = c(9179,
5589, 6024, 4150, 4028, 3458), actes_2020 = c(7933, 4167, 3740,
2994, 3348, 2206), actes_2021 = c(6504, 5505, 4682, 3376, 3226,
3035), sejours_2019 = c(1631, 2502, 1028, 852, 1455, 1288), sejours_2020 = c(1335,
1819, 726, 574, 1371, 801), sejours_2021 = c(1109, 2416, 825,
657, 1259, 1106), tx_0_nuit_2019 = c("3.92397302268547", "90.7673860911271",
"32.9766536964981", "57.5117370892019", "0.206185567010309",
"98.9130434782609"), tx_0_nuit_2020 = c("3.29588014981273", "92.9081913139087",
"47.1074380165289", "59.581881533101", "0.291757840991977", "99.250936329588"
), tx_0_nuit_2021 = c("3.6068530207394", "95.4470198675497",
"18.3030303030303", "60.2739726027397", "0.158856235107228",
"98.7341772151899"), pourcentage = c(5.37796226165473, 4.55191916519208,
3.87140518282095, 2.79151300666457, 2.66748251170021, 2.50955034811226
), pourcentage_cumule = c(78.4062908267046, 82.9582099918967,
86.8296151747176, 89.6211281813822, 92.2886106930824, 94.7981610411947
)), row.names = c(NA, -6L), class = c("tabyl", "tbl_df", "tbl",
"data.frame"), core = structure(list(code_1 = c("M01b", "C01",
"C02", "C04", "M01", "C03", "M99", "C05", "O01", "C07", "C08",
"C99", "C98", "C10", "C06", "M03", "O02", "M02", "M04", "C01b",
"O03", "S99", "***", "C10b", "M05", "M98", "O04"), regroupement_elsan = c("Endoscopies
digestives",
"Ortho (+ rhumato et rachis)", "Chirurgie digestive", "Uro-néphro",
"Gastro", "Ophtalmo", "Divers médecine", "Gynéco", "Accouchements",
"bouche et dents", "Tissus mou et chir plastique", "Divers chir",
"Chir esth et hors sécu", "Chir thoracique et vasculaire", "ORL Stomato sf bouche et
dent",
"Pneumologie", "Obstétrique autre (hors IVG)", "Cardio Vasc (médecine)",
"Neurologie", "Rachis", "IVG", "Séances autres", "Autres", "Chir thoracique",
"Soins palliatifs", "Vasculaire interventionnel", "Néo nat"),
actes_2019 = c(36079, 29520, 14618, 6515, 9179, 5589, 6024,
4150, 4028, 3458, 2137, 2180, 575, 449, 866, 388, 294, 311,
714, 395, 292, 1842, 10, 0, 4, 0, 1), actes_2020 = c(30192,
25451, 12845, 7376, 7933, 4167, 3740, 2994, 3348, 2206, 2107,
1477, 575, 437, 337, 897, 193, 218, 267, 308, 118, 737, 8,
4, 0, 11, 5), actes_2021 = c(42333, 24055, 13735, 8196, 6504,
5505, 4682, 3376, 3226, 3035, 2571, 1134, 689, 511, 352,
272, 181, 161, 138, 106, 82, 61, 18, 8, 7, 0, 0), sejours_2019 = c(6992,
5493, 2577, 1221, 1631, 2502, 1028, 852, 1455, 1288, 540,
397, 236, 158, 260, 63, 148, 101, 90, 44, 246, 1820, 4, 0,
1, 0, 1), sejours_2020 = c(5811, 4946, 2220, 1220, 1335,
1819, 726, 574, 1371, 801, 554, 269, 221, 140, 94, 42, 109,
79, 58, 34, 98, 720, 2, 1, 0, 1, 5), sejours_2021 = c(7922,
5144, 2523, 1451, 1109, 2416, 825, 657, 1259, 1106, 649,
264, 278, 162, 111, 51, 108, 69, 30, 21, 77, 54, 7, 1, 2,
0, 0), tx_0_nuit_2019 = c("96.0955377574371", "63.5718186783179",
"41.4435389988359", "36.2817362817363", "3.92397302268547",
"90.7673860911271", "32.9766536964981", "57.5117370892019",
"0.206185567010309", "98.9130434782609", "72.5925925925926",
"53.904282115869", "13.9830508474576", "96.2025316455696",
"50.7692307692308", "42.8571428571429", "85.1351351351351",
"72.2772277227723", "11.1111111111111", "4.54545454545455",
"100,0", "100,0", "100,0", "0,0", "0,0", "0,0", "0,0"), tx_0_nuit_2020 =
c("96.0936155567028",
"67.3069146785281", "40.5855855855856", "34.344262295082",
"3.29588014981273", "92.9081913139087", "47.1074380165289",
"59.581881533101", "0.291757840991977", "99.250936329588",
"76.3537906137184", "49.814126394052", "11.7647058823529",
"99.2857142857143", "53.1914893617021", "16.6666666666667",
"74.3119266055046", "81.0126582278481", "25.8620689655172",
"8.82352941176471", "98.9795918367347", "100,0", "100,0",
"100,0", "0,0", "0,0", "20,0"), tx_0_nuit_2021 = c("96.7053774299419",
"73.2892690513219", "51.0503369005153", "41.9021364576154",
"3.6068530207394", "95.4470198675497", "18.3030303030303",
"60.2739726027397", "0.158856235107228", "98.7341772151899",
"83.9753466872111", "60.2272727272727", "50,0", "94.4444444444444",
"72.972972972973", "1.96078431372549", "81.4814814814815",
"85.5072463768116", "43.3333333333333", "52.3809523809524",
"100,0", "100,0", "100,0", "100,0", "0,0", "0,0", "0,0")), row.names = c(NA,
-27L), class = "data.frame"), tabyl_type = "two_way", totals = "row")
And the code I tried :
library(janitor)
autres %>%
adorn_totals("row")
Could anyone help ? I had indeed used the adorn_totals function on the dataframe used to generate the dataframe "autres", but I made sure the row "total" isn't in the dataframe "autres" anymore.
With the object you have shared as x:
x %>%
untabyl() %>%
adorn_totals()
Why it works:
You can see at the end of the object you shared, tabyl_type = "two_way", totals = "row". Those attributes are stored with the data.frame you're working with. When you try to adorn_totals() a second time, janitor checks this and errors.
When you call untabyl() it strips those attributes. Then adorn_totals() succeeds.
I notice you have a cumulative percentage column. If desired, you can control exactly which columns get a totals value in adorn_totals() - see ?adorn_totals and the ... argument for how, and here's an example: https://stackoverflow.com/a/69759313.
I'm having issues joining a set of columns with a simple inner_join even though all of my data is tidy. Below is the error that I receive and below that I will paste simple samples of my data.
library(tidyverse)
library(janitor)
regions_name = regions %>% select(region, name)
regions_name$region = as.numeric(regions_name$region)
postcode_clean = postcode %>% clean_names()
#postcode_clean$pr = as.double(postcode_clean$pr)
postcode_province = postcode_clean %>% left_join(y = regions_name, by = c("pr", "region"))
#> Error: Join columns must be present in data.
#> x Problem with `region`.
> dput(head(postcode_clean, 10))
structure(list(pc = structure(c("A0A1A0", "A0A1B0", "A0A1C0",
"A0A1C0", "A0A1C0", "A0A1C0", "A0A1C0", "A0A1C0", "A0A1E0", "A0A1G0"
), label = "Postal code", format.spss = "A6"), pr = structure(c(10,
10, 10, 10, 10, 10, 10, 10, 10, 10), label = "Province or territory code", format.spss = "F2.0", display_width = 4L, labels = c(Newfoundland = 10,
`Prince Edward Island` = 11, `Nova Scotia` = 12, `New Brunswick` = 13,
Quebec = 24, Ontario = 35, Manitoba = 46, Saskatchewan = 47,
Alberta = 48, `British Columbia` = 59, Yukon = 60, `Northwest Territories` = 61,
Nunavut = 62), class = c("haven_labelled", "vctrs_vctr", "double"
)), cs_duid = structure(c(1001144, 1001464, 1001557, 1001557,
1001557, 1001557, 1001557, 1001557, 1001347, 1001409), label = "Census subdivision unique identifier", format.spss = "F7.0", display_width = 9L)), row.names = c(NA,
-10L), class = c("tbl_df", "tbl", "data.frame"))
> dput(head(regions_name, 10))
structure(list(region = c(1, 35, 24, 59, 48, 46, 47, 12, 13,
10), name = c("Canada", "Ontario", "Quebec", "British Columbia",
"Alberta", "Manitoba", "Saskatchewan", "Nova Scotia", "New Brunswick",
"Newfoundland and Labrador")), row.names = c(NA, -10L), class = c("tbl_df",
"tbl", "data.frame"), last_updated = structure(1648783776.07826, class = c("POSIXct",
"POSIXt")))
I don't understand it. I'm not doing anything complicated, yet I am being thrown this error. Any suggestions?
I would like to plot two graph together which share the same x-axis. How can I do that?
My data can be build using codes:
df <-structure(list(SDTM_LabN = c("ALP", "AST", "ALT", "AST", "ALT",
"ALT", "ALP", "AST", "ALP", "AST", "ALP", "ALT", "ALP", "ALP",
"ALT", "AST", "ALT", "ALT", "ALT", "AST", "AST", "ALP", "AST",
"ALT", "ALP", "ALP", "AST"), ADY = structure(c(45, 15, 1, 1,
30, 58, 30, 45, 46, -6, 23, 46, -6, 15, 23, 46, 45, -6, 8, 30,
58, 58, 23, 15, 8, 1, 8), class = "difftime", units = "days"),
result = c(0.841269841269841, 0.578947368421053, 0.625, 0.552631578947368,
0.416666666666667, 0.3125, 0.936507936507937, 0.447368421052632,
0.634920634920635, 0.657894736842105, 0.873015873015873,
0.291666666666667, 0.73015873015873, 0.857142857142857, 0.5,
0.447368421052632, 0.479166666666667, 0.625, 0.604166666666667,
0.5, 0.526315789473684, 0.849206349206349, 0.526315789473684,
0.5, 1.00793650793651, 0.896825396825397, 0.894736842105263
)), row.names = c(NA, -27L), class = "data.frame")
df2<-structure(list(ID = c(101, 101, 101, 101, 101, 101), AEDECOD = c("Diarrhoea",
"Vitreous floaters", "Musculoskeletal pain", "Diarrhoea", "Decreased appetite",
"Fatigue"), AESTDY = structure(c(101, 74, 65, 2, 33, 27), class = "difftime", units = "days"),
AEENDY = structure(c(105, 99, NA, 5, NA, NA), class = "difftime", units = "days")), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame"))
My plots codes are:
ggplot(df, aes(colour=SDTM_LabN)) +
geom_line(aes(x=ADY,y=result))
ggplot(df2, aes(colour=AEDECOD)) +
geom_segment(aes(x=AESTDY, xend=AEENDY, y=AEDECOD, yend=AEDECOD),) +
xlab("Duration")
How can I get sth that looks like this:
You should first make sure to calculate common xmin-xmax to both series.
Then with patwhwork a suggested in comments or cowplot:
xmin <- min(df$ADY ,df2$AESTDY)
xmax <- max(df$ADY ,df2$AESTDY)
p1 <- ggplot(df, aes(colour=SDTM_LabN)) +
geom_line(aes(x=ADY,y=result)) +
coord_cartesian(xlim = c(xmin,xmax))
p2 <- ggplot(df2, aes(colour=AEDECOD)) +
geom_segment(aes(x=AESTDY, xend=AEENDY, y=AEDECOD, yend=AEDECOD),) +
xlab("Duration") +
coord_cartesian(xlim = c(xmin,xmax))
library(cowplot)
plot_grid(plotlist = list(p1,p2),align='v',ncol=1)
I am trying to develop a shiny dashboard app that is able to produce a bar graph for different outcome variables that can be selected by the user. To do so, I need to subset my data reactively to generate aggregate data frames. I am able to have the code below successfully filter my data reactively, but I am running into trouble when I try to use dplyr::summarise() reactively.
Here is my data
dput(head(df))
structure(
list(
geoid = c(
"01001020200",
"01001020300",
"01001020700",
"01001020802",
"01001021000",
"01001021100"
),
state = c(
"Alabama",
"Alabama",
"Alabama",
"Alabama",
"Alabama",
"Alabama"
),
county = c(
"Autauga County",
"Autauga County",
"Autauga County",
"Autauga County",
"Autauga County",
"Autauga County"
),
ozzone = structure(
c(1L, 1L, 2L, 1L, 1L, 1L),
.Label = c("non.oz", "oz"),
class = "factor"
),
tract_type = c(
"LICs",
"Contiguous",
"LICs",
"Contiguous",
"Contiguous",
"LICs"
),
investment_score_1_low_10_high = c(4,
6, 9, 10, 5, 6),
socioeconomic_change_flag_1_yes_blank_no = c(0,
0, 0, 0, 0, 0),
fips_county = c("01001", "01001", "01001", "01001",
"01001", "01001"),
total_empl = c(51809L, 51809L, 51809L, 51809L,
51809L, 51809L),
total_payroll = c(338395L, 338395L, 338395L,
338395L, 338395L, 338395L),
total_establishments = c(5090L, 5090L,
5090L, 5090L, 5090L, 5090L),
largest_employer = c(72L, 72L, 72L,
72L, 72L, 72L),
largest_employer_bypayroll = c(44L, 44L, 44L,
44L, 44L, 44L),
trend_employee_change = c(
2735.60000000046,
2735.60000000046,
2735.60000000046,
2735.60000000046,
2735.60000000046,
2735.60000000046
),
trend_payroll_change = c(
23074.8000000037,
23074.8000000037,
23074.8000000037,
23074.8000000037,
23074.8000000037,
23074.8000000037
),
trend_establishment_change = c(
53.4000000000084,
53.4000000000084,
53.4000000000084,
53.4000000000084,
53.4000000000084,
53.4000000000084
),
damage_cost_weather_total = c(20000, 20000, 20000, 20000,
20000, 20000),
deaths_weather_total = c(0L, 0L, 0L, 0L, 0L, 0L),
medianrent = c(537, 633, 525, 680, 409, 303),
vacancyrate = c(
0.108200455580866,
0.113652113652114,
0.0436681222707424,
0.0512166859791425,
0.229962546816479,
0.21030303030303
),
total_pop = c(503, 827, 900, 2989, 740, 813),
undertwo_percent = c(
0.391650099403579,
0.351874244256348,
0.397777777777778,
0.17096018735363,
0.301351351351351,
0.263222632226322
),
mobility_rate = c(
0.133702166897188,
0.0737753882915173,
0.196514423076923,
0.172716680111141,
0.0641304347826087,
0.0681084570690769
),
unemploy_rate = c(
0.0176991150442478,
0.0273203592814371,
0.109881724532621,
0.0127906976744186,
0.0344982078853047,
0.0281910728269381
),
median_income = c(41287, 46806, 41250, 64439,
46607, 36450),
renter_percent = c(
0.337653478854025,
0.310596310596311,
0.331877729257642,
0.268110942458949,
0.328686327077748,
0.365986394557823
),
blackaa_percent = c(
0.5451197053407,
0.264697193500739,
0.145906432748538,
0.152916262243007,
0.258583690987124,
0.530922930542341
),
hispanic_percent = c(
0.0105893186003683,
0.0803545051698671,
0.0400584795321637,
0.0137651107385511,
0.00822603719599428,
0.00666032350142721
),
transit_score_mean = c(0, 0, 0, 0, 0, 0),
life_expectancy = c(75.67, 75.67, 75.67, 75.67, 75.67, 75.67),
trend_life_expectancy = c(5.1, 5.1, 5.1, 5.1, 5.1, 5.1),
median_monthly_housing_costs = c(885,
885, 885, 885, 885, 885),
pestilence_2018 = c(2, 2, 2, 2, 2,
2),
total_pop_county = c(6772, 6772, 6772, 6772, 6772, 6772),
deaths_weather_pop = c(0, 0, 0, 0, 0, 0),
cost_weather_pop = c(
2.95333727111636,
2.95333727111636,
2.95333727111636,
2.95333727111636,
2.95333727111636,
2.95333727111636
),
Male_HSgrad = c(75, 68, 211, 189, 97,
42),
Male_SomeCollege = c(28, 18, 51, 111, 74, 38),
Male_AssocDeg = c(4,
6, 0, 63, 0, 21),
Male_BachDeg = c(7, 9, 0, 11, 0, 9),
Male_GradDeg = c(0,
0, 0, 29, 6, 0),
MaleEduAboveHS = c(114, 101, 262, 403, 177,
110),
Total_Male18.24 = c(145, 123, 285, 455, 202, 110),
MaleEduHSAbove_pop = c(
0.786206896551724,
0.821138211382114,
0.919298245614035,
0.885714285714286,
0.876237623762376,
1
),
Female_HSgrad = c(11, 60, 87, 156, 23, 83),
Female_SomeCollege = c(22,
25, 13, 47, 54, 65),
Female_AssocDeg = c(0, 0, 20, 82, 0,
0),
Female_BachDeg = c(5, 26, 0, 19, 0, 11),
Female_GradDeg = c(5,
16, 0, 0, 0, 0),
FemaleEduAboveHS = c(43, 127, 120, 304,
77, 159),
Total_Female18.24 = c(53, 127, 192, 581, 92, 198),
FemaleEduHSAbove_pop = c(
0.811320754716981,
1,
0.625,
0.523235800344234,
0.83695652173913,
0.803030303030303
)
),
row.names = c(NA,
6L),
class = "data.frame"
)
Here is my code
#List of potential outcome variables to be plotted
variables <- c("total_empl", "total_payroll", "total_establishments", "largest_employer", "largest_employer_bypayroll", "trend_employee_change", "trend_payroll_change", "trend_establishment_change", "damage_cost_weather_total", "deaths_weather_total", "medianrent", "vacancyrate", "total_pop", "undertwo_percent", "mobility_rate", "unemploy_rate", "median_income", "renter_percent", "blackaa_percent", "hispanic_percent", "median_monthly_housing_costs", "MaleEduAboveHS_pop", "FemaleEduHSAbove_pop")
# Define inputs
selectInput('state_name', label = 'Select a state', choices = lookup)
selectInput('DV', label = 'Outcome Measure', choices = variables)
#Filter data based on the State and outcome measure the user would like to investigate.
bar <- reactive({
st <- df %>%
filter(state == input$state_name)
bp <- st %>%
group_by(tract_type) %>%
summarise(Outcome = mean(st[,input$DV]))
return(bp)
})
bar
UPDATE
Right now, this code successfully filters the data by the input$state_name, but there is an issue with the calculation of means. The result is this:
# A tibble: 2 x 2
tract_type Outcome
<chr> <dbl>
1 Contiguous 468296.
2 LICs 468296.
As you can see, the means that are calculated are identical. In fact, these values correspond to the grand average mean for whichever variable is chosen for input$DV. Therefore, the filtered st data is not being successfully grouped into the two levels of tract_type.
I see what you are trying to do. The difference is that in your reactive part you try to calculate the mean of a string, which won't work. What you want to do is summarise one of the columns in df by providing the name
In the following example, I specify the summarising variable manually. Note that investment_score_1_low_10_high does not have quotes. investment_score_1_low_10_high is what is called a symbol in R.
st <- df %>%
filter(state == "Alabama") %>%
group_by(tract_type) %>%
summarise(Outcome = mean(investment_score_1_low_10_high))
But I think this should work:
bar <- reactive({
# Create a symbol from string.
mean_variable <- sym(input$DV)
bp <- df %>%
filter(state == input$state_name) %>%
group_by(tract_type) %>%
summarise(Outcome = mean(!! mean_variable, na.rm = TRUE))
return(bp)
})
Extra information about the use of !! and what it does can be found here: Here
And even better with examples Here
Solution derived by #dylanvanw
bar <- reactive({
# Create a symbol from string.
mean_variable <- sym(input$DV)
bp <- df %>%
filter(state == input$state_name) %>%
group_by(tract_type) %>%
summarise(Outcome = mean(!! mean_variable, na.rm = TRUE))
return(bp)
})