How to summarise (dplyr) user specified variables reactively in flexdashboard/shiny? - r

I am trying to develop a shiny dashboard app that is able to produce a bar graph for different outcome variables that can be selected by the user. To do so, I need to subset my data reactively to generate aggregate data frames. I am able to have the code below successfully filter my data reactively, but I am running into trouble when I try to use dplyr::summarise() reactively.
Here is my data
dput(head(df))
structure(
list(
geoid = c(
"01001020200",
"01001020300",
"01001020700",
"01001020802",
"01001021000",
"01001021100"
),
state = c(
"Alabama",
"Alabama",
"Alabama",
"Alabama",
"Alabama",
"Alabama"
),
county = c(
"Autauga County",
"Autauga County",
"Autauga County",
"Autauga County",
"Autauga County",
"Autauga County"
),
ozzone = structure(
c(1L, 1L, 2L, 1L, 1L, 1L),
.Label = c("non.oz", "oz"),
class = "factor"
),
tract_type = c(
"LICs",
"Contiguous",
"LICs",
"Contiguous",
"Contiguous",
"LICs"
),
investment_score_1_low_10_high = c(4,
6, 9, 10, 5, 6),
socioeconomic_change_flag_1_yes_blank_no = c(0,
0, 0, 0, 0, 0),
fips_county = c("01001", "01001", "01001", "01001",
"01001", "01001"),
total_empl = c(51809L, 51809L, 51809L, 51809L,
51809L, 51809L),
total_payroll = c(338395L, 338395L, 338395L,
338395L, 338395L, 338395L),
total_establishments = c(5090L, 5090L,
5090L, 5090L, 5090L, 5090L),
largest_employer = c(72L, 72L, 72L,
72L, 72L, 72L),
largest_employer_bypayroll = c(44L, 44L, 44L,
44L, 44L, 44L),
trend_employee_change = c(
2735.60000000046,
2735.60000000046,
2735.60000000046,
2735.60000000046,
2735.60000000046,
2735.60000000046
),
trend_payroll_change = c(
23074.8000000037,
23074.8000000037,
23074.8000000037,
23074.8000000037,
23074.8000000037,
23074.8000000037
),
trend_establishment_change = c(
53.4000000000084,
53.4000000000084,
53.4000000000084,
53.4000000000084,
53.4000000000084,
53.4000000000084
),
damage_cost_weather_total = c(20000, 20000, 20000, 20000,
20000, 20000),
deaths_weather_total = c(0L, 0L, 0L, 0L, 0L, 0L),
medianrent = c(537, 633, 525, 680, 409, 303),
vacancyrate = c(
0.108200455580866,
0.113652113652114,
0.0436681222707424,
0.0512166859791425,
0.229962546816479,
0.21030303030303
),
total_pop = c(503, 827, 900, 2989, 740, 813),
undertwo_percent = c(
0.391650099403579,
0.351874244256348,
0.397777777777778,
0.17096018735363,
0.301351351351351,
0.263222632226322
),
mobility_rate = c(
0.133702166897188,
0.0737753882915173,
0.196514423076923,
0.172716680111141,
0.0641304347826087,
0.0681084570690769
),
unemploy_rate = c(
0.0176991150442478,
0.0273203592814371,
0.109881724532621,
0.0127906976744186,
0.0344982078853047,
0.0281910728269381
),
median_income = c(41287, 46806, 41250, 64439,
46607, 36450),
renter_percent = c(
0.337653478854025,
0.310596310596311,
0.331877729257642,
0.268110942458949,
0.328686327077748,
0.365986394557823
),
blackaa_percent = c(
0.5451197053407,
0.264697193500739,
0.145906432748538,
0.152916262243007,
0.258583690987124,
0.530922930542341
),
hispanic_percent = c(
0.0105893186003683,
0.0803545051698671,
0.0400584795321637,
0.0137651107385511,
0.00822603719599428,
0.00666032350142721
),
transit_score_mean = c(0, 0, 0, 0, 0, 0),
life_expectancy = c(75.67, 75.67, 75.67, 75.67, 75.67, 75.67),
trend_life_expectancy = c(5.1, 5.1, 5.1, 5.1, 5.1, 5.1),
median_monthly_housing_costs = c(885,
885, 885, 885, 885, 885),
pestilence_2018 = c(2, 2, 2, 2, 2,
2),
total_pop_county = c(6772, 6772, 6772, 6772, 6772, 6772),
deaths_weather_pop = c(0, 0, 0, 0, 0, 0),
cost_weather_pop = c(
2.95333727111636,
2.95333727111636,
2.95333727111636,
2.95333727111636,
2.95333727111636,
2.95333727111636
),
Male_HSgrad = c(75, 68, 211, 189, 97,
42),
Male_SomeCollege = c(28, 18, 51, 111, 74, 38),
Male_AssocDeg = c(4,
6, 0, 63, 0, 21),
Male_BachDeg = c(7, 9, 0, 11, 0, 9),
Male_GradDeg = c(0,
0, 0, 29, 6, 0),
MaleEduAboveHS = c(114, 101, 262, 403, 177,
110),
Total_Male18.24 = c(145, 123, 285, 455, 202, 110),
MaleEduHSAbove_pop = c(
0.786206896551724,
0.821138211382114,
0.919298245614035,
0.885714285714286,
0.876237623762376,
1
),
Female_HSgrad = c(11, 60, 87, 156, 23, 83),
Female_SomeCollege = c(22,
25, 13, 47, 54, 65),
Female_AssocDeg = c(0, 0, 20, 82, 0,
0),
Female_BachDeg = c(5, 26, 0, 19, 0, 11),
Female_GradDeg = c(5,
16, 0, 0, 0, 0),
FemaleEduAboveHS = c(43, 127, 120, 304,
77, 159),
Total_Female18.24 = c(53, 127, 192, 581, 92, 198),
FemaleEduHSAbove_pop = c(
0.811320754716981,
1,
0.625,
0.523235800344234,
0.83695652173913,
0.803030303030303
)
),
row.names = c(NA,
6L),
class = "data.frame"
)
Here is my code
#List of potential outcome variables to be plotted
variables <- c("total_empl", "total_payroll", "total_establishments", "largest_employer", "largest_employer_bypayroll", "trend_employee_change", "trend_payroll_change", "trend_establishment_change", "damage_cost_weather_total", "deaths_weather_total", "medianrent", "vacancyrate", "total_pop", "undertwo_percent", "mobility_rate", "unemploy_rate", "median_income", "renter_percent", "blackaa_percent", "hispanic_percent", "median_monthly_housing_costs", "MaleEduAboveHS_pop", "FemaleEduHSAbove_pop")
# Define inputs
selectInput('state_name', label = 'Select a state', choices = lookup)
selectInput('DV', label = 'Outcome Measure', choices = variables)
#Filter data based on the State and outcome measure the user would like to investigate.
bar <- reactive({
st <- df %>%
filter(state == input$state_name)
bp <- st %>%
group_by(tract_type) %>%
summarise(Outcome = mean(st[,input$DV]))
return(bp)
})
bar
UPDATE
Right now, this code successfully filters the data by the input$state_name, but there is an issue with the calculation of means. The result is this:
# A tibble: 2 x 2
tract_type Outcome
<chr> <dbl>
1 Contiguous 468296.
2 LICs 468296.
As you can see, the means that are calculated are identical. In fact, these values correspond to the grand average mean for whichever variable is chosen for input$DV. Therefore, the filtered st data is not being successfully grouped into the two levels of tract_type.

I see what you are trying to do. The difference is that in your reactive part you try to calculate the mean of a string, which won't work. What you want to do is summarise one of the columns in df by providing the name
In the following example, I specify the summarising variable manually. Note that investment_score_1_low_10_high does not have quotes. investment_score_1_low_10_high is what is called a symbol in R.
st <- df %>%
filter(state == "Alabama") %>%
group_by(tract_type) %>%
summarise(Outcome = mean(investment_score_1_low_10_high))
But I think this should work:
bar <- reactive({
# Create a symbol from string.
mean_variable <- sym(input$DV)
bp <- df %>%
filter(state == input$state_name) %>%
group_by(tract_type) %>%
summarise(Outcome = mean(!! mean_variable, na.rm = TRUE))
return(bp)
})
Extra information about the use of !! and what it does can be found here: Here
And even better with examples Here

Solution derived by #dylanvanw
bar <- reactive({
# Create a symbol from string.
mean_variable <- sym(input$DV)
bp <- df %>%
filter(state == input$state_name) %>%
group_by(tract_type) %>%
summarise(Outcome = mean(!! mean_variable, na.rm = TRUE))
return(bp)
})

Related

Create mean value plot without missing values count to total

Using a dataframe with missing values:
structure(list(id = c("id1", "test", "rew", "ewt"), total_frq_1 = c(54, 87, 10, 36), total_frq_2 = c(45, 24, 202, 43), total_frq_3 = c(24, NA, 25, 8), total_frq_4 = c(36, NA, 104, NA)), row.names = c(NA, 4L), class = "data.frame")
How is is possible to create a bar plot with the mean for every column, excluding the id column, but without filling the missing values with 0 but leaving out the row with missing values example for total_frq_3 24+25+8 = 57/3 = 19
You can use colMeans function and pass it the appropriate argument to ignore NA.
library(ggplot2)
xy <- structure(list(id = c("id1", "test", "rew", "ewt"),
total_frq_1 = c(54, 87, 10, 36), total_frq_2 = c(45, 24, 202, 43), total_frq_3 = c(24, NA, 25, 8),
total_frq_4 = c(36, NA, 104, NA)),
row.names = c(NA, 4L),
class = "data.frame")
xy.means <- colMeans(x = xy[, 2:ncol(xy)], na.rm = TRUE)
xy.means <- as.data.frame(xy.means)
xy.means$total <- rownames(xy.means)
ggplot(xy.means, aes(x = total, y = xy.means)) +
theme_bw() +
geom_col()
Or just use base image graphic
barplot(height = colMeans(x = xy[, 2:ncol(xy)], na.rm = TRUE))

Reshape long to wide with multiple variables

I have a long df in R that looks like this:
structure(list(pta = c("636", "899", "989", "1007", "561"), cafta_similarity = c(0.81468368791454,
0.68814557488039, 0.96371483934995, 0.71527668922595, 0.69435303348955
), iso3n = c(124, 124, 124, 124, 152), ccode = c(20, 20, 20,
20, 155), country = c("Canada", "Canada", "Canada", "Canada",
"Chile"), year = c("1992", "2016", "2018", "2018", "1960"), gdppc = c(20879.8483300891,
42315.6037056806, 46548.6384108296, 46548.6384108296, 505.349325487754
), polity2 = c(10, 10, 10, 10, 5), openness = c(52.7380309449972,
65.3636199818813, 66.5818530921921, 66.5818530921921, 46.9745037862152
), hog_right = c(3, 0, 0, 0, 3), hog_left = c(0L, 1L, 1L, 1L,
0L), hog_center = c(0, 0, 0, 0, 0)), class = c("data.table",
"data.frame"), row.names = c(NA, -5L), .internal.selfref = <pointer: 0x7fca56811ae0>)
What I am trying to do is to get it wide across all variables so that I can compute averages on a dyadic level. Basically what I want is pta.1, pta.2, iso3n.1, iso3n.2 and etc... Does anyone know how I can do this?
I have looked at most answers here on reshaping data and tried some but nothing seems to work.
Perhaps this helps
library(data.table)
dcast(df, country + year ~ rowid(country, year), value.var = c("pta", "iso3n"), sep = ".")

How to apply functions depending on the column, and mutate into new data frame?

I came up with the idea to represent stats on a chart like this. Example of the plot. And made it like this.
df_n <- df_normalized %>%
transmute(
Height_x = round(Height*cos_my(45), 2),
Height_y = round(Height*sin_my(45), 2),
Weight_x = round(Weight*cos_my(45*2), 2),
Weight_y = round(Weight*sin_my(45*2), 2),
Reach_x = round(Reach*cos_my(45*3), 2),
Reach_y = round(Reach*sin_my(45*3), 2),
SLpM_x = round(SLpM*cos_my(45*4), 2),
SLpM_y = round(SLpM*sin_my(45*4), 2),
Str_Def_x = round(`Str_Def %`*cos_my(45*5), 2),
Str_Def_y = round(`Str_Def %`*sin_my(45*5), 2),
TD_Avg_x = round(TD_Avg*cos_my(45*6), 2),
TD_Avg_y = round(TD_Avg*sin_my(45*6), 2),
TD_Acc_x = round(`TD_Acc %`*cos_my(45*7), 2),
TD_Acc_y = round(`TD_Acc %`*sin_my(45*7), 2),
Sub_Avg_x = round(Sub_Avg*cos_my(45*8), 2),
Sub_Avg_y = round(Sub_Avg*sin_my(45*8), 2))
Now I want to do this smart way, so I created a data frame with same number of rows empty_df, and later in for loop I try to mutate and array, with every iteration. So for example I want to multiply 1st column by cos(30), 2nd by cos(30*2), and so on
But...
It mutate only last column because all columns during iteration have the same name 'column'.
I want to name each column by the variable column, made with paste0().
reprex_df <- structure(list(Height = c(190, 180, 183, 196, 185),
Weight = c(120, 77, 93, 120, 84),
Reach = c(193, 180, 188, 203, 193),
SLpM = c(2.45, 3.8, 2.05, 7.09, 3.17),
`Str_Def %` = c(58, 56, 55, 34, 44),
TD_Avg = c(1.23, 0.33, 0.64, 0.91, 0),
`TD_Acc %` = c(24, 50, 20, 66, 0),
Sub_Avg = c(0.2, 0, 0, 0, 0)), row.names = c(NA, -5L),
class = c("tbl_df", "tbl", "data.frame"))
temp <- apply(reprex_df[,1], function(x) x*cos(60), MARGIN = 2)
temp
empty_df <- data.frame(first_column = replicate(length(temp),1))
for (x in 1:8) {
temp <- apply(df[,x], function(x) round(x*cos((360/8)*x),2), MARGIN = 2)
column <- paste0("Column_",x)
empty_df <- mutate(empty_df, column = temp)
}
Later I want to make it a function where I can pass data frame and receive data frame with X, and Y coordinates.
So, how should I make it?
Perhaps this helps
library(purrr)
library(stringr)
nm1 <- names(reprex_df)
nm_cos <- str_c(names(reprex_df), "_x")
nm_sin <- str_c(names(reprex_df), "_y")
reprex_df[nm_cos] <- map2(reprex_df, seq_along(nm1),
~ round(.x * cos(45 *.y ), 2))
reprex_df[nm_sin] <- map2(reprex_df[nm1], seq_along(nm1),
~ round(.x * sin(45 *.y ), 2))

Build curves of populations in function of time

In my work i'm studying a lot of varieties of maize.
I would like to determinate the area under the curve during flowering (male and female) of these varieties.
I used the package DescTools and the function AUC (area under the curve). I converted my dates as a numeric vector. So my scipt is:
a<-XAUC$Date.flowering.male
b<-XAUC$Date.flowering.female
c<- XAUC$....
Here is my issue, because i would like to identify c as the population as function of time. How can i do this?
In this picture: the first graph is what i have and the second is what i would like to have.
and then the end of my script will be:
AUCfemale<-AUC(b,c,method = c("trapezoid"))
AUCmale<-AUC(a,c,method = c("trapezoid"))
Airdiff<-AUCmale-AUCfemale
Data
XAUC <- structure(list(Varietes = c("Abelastone", "Abelastone", "Abelastone", "Abelastone", "Abelastone"), ligne.rep = c(1, 1, 1, 1, 1), Pied = c(1, 2, 3, 6, 7), `Date.floraison.mâle` = c(7.29, 8.02, 8.01, 8.03, 8.04), Date.floraison.femelle = c(8.1, 8.17, 8.11, 8.25, 8.17 ), ASIi = c(12, 15, 10, 22, 13), Hauteur.des.pieds = c(230, 228, 226, 240, 233), Hauteur.des.soies = c(123, 118, 116, 124, 122), Date.floraison.mâle.graph = c(29, 33, 32, 34, 35), Date.floraison.femelle.graph = c(41, 48, 42, 56, 48)), row.names = c(NA, -5L), class = c("tbl_df", "tbl", "data.frame"), na.action = structure(c("6" = 6L, "10" = 10L, "20" = 20L, "21" = 21L, "24" = 24L), class = "omit"))

Label group of plots

I merged nine plots together and I would like to group them based on different characteristics (A,B,C). Is there a simple way to add labels or annotations at the bottom of plots? When using cowplot or GridExtra i receive the following error:
In as_grob.default(plot) :
Cannot convert object of class list into a grob.
Sample data
list(list(stats = structure(c(43, 96.5, 297.5, 707.5, 778), .Dim = c(5L,
1L)), n = 36, conf = structure(c(136.603333333333, 458.396666666667
), .Dim = 2:1), out = numeric(0), group = numeric(0), names = ""),
list(stats = structure(c(2, 10.5, 55.5, 102, 128), .Dim = c(5L,
1L)), n = 36, conf = structure(c(31.405, 79.595), .Dim = 2:1),
out = numeric(0), group = numeric(0), names = ""),
list(stats = structure(c(1, 3, 5.5, 77, 88), .Dim = c(5L,
1L)), n = 36, conf = structure(c(-13.9866666666667, 24.9866666666667
), .Dim = 2:1), out = numeric(0), group = numeric(0), names = ""),
list(stats = structure(c(531, 632.5, 701, 726.5, 786), .Dim = c(5L,
1L)), n = 36, conf = structure(c(676.246666666667, 725.753333333333
), .Dim = 2:1), out = c(485, 464, 446), group = c(1, 1, 1
), names = ""), list(stats = structure(c(104,
109.5, 113.5, 121, 125), .Dim = c(5L, 1L)), n = 36, conf = structure(c(110.471666666667,
116.528333333333), .Dim = 2:1), out = c(91, 91, 88, 84, 84,
79), group = c(1, 1, 1, 1, 1, 1), names = ""),
list(stats = structure(c(28, 53.5, 83.5, 88, 91), .Dim = c(5L,
1L)), n = 36, conf = structure(c(74.415, 92.585), .Dim = 2:1),
out = numeric(0), group = numeric(0), names = ""),
list(stats = structure(c(80, 89, 102.5, 153, 236), .Dim = c(5L,
1L)), n = 36, conf = structure(c(85.6466666666667, 119.353333333333
), .Dim = 2:1), out = c(343, 318, 299, 257), group = c(1,
1, 1, 1), names = """"), list(stats = structure(c(7,
12, 22.5, 44, 72), .Dim = c(5L, 1L)), n = 36, conf = structure(c(14.0733333333333,
30.9266666666667), .Dim = 2:1), out = numeric(0), group = numeric(0),
names = ""), list(stats = structure(c(5,
5, 6, 12.5, 21), .Dim = c(5L, 1L)), n = 36, conf = structure(c(4.025,
7.975), .Dim = 2:1), out = numeric(0), group = numeric(0),
names = ""))
Many thanks
I agree with the idea of using ggplot2 graphics with facets, but given your plot objects, you could do something like this (to get you started). I used ggplotify instead of cowplot because I ran into trouble with the figure margins, but you might be able to fix that by changing the null device (not tested).
Edit:
Added individual labels and y axis labels, as well as outer margins. You might have to adjust some of that depending on the output size of your composite plot. This may show you how you could adjust those settings for individual plots. Still, using ggplot2 to generate the plots would make things quite a bit easier.
library(grid)
library(gridExtra)
library(ggplotify)
sdt <- list(list(stats = structure(c(43, 96.5, 297.5, 707.5, 778), .Dim = c(5L, 1L)),
n = 36, conf = structure(c(136.603333333333, 458.396666666667), .Dim = 2:1),
out = numeric(0), group = numeric(0), names = ""),
list(stats = structure(c(2, 10.5, 55.5, 102, 128), .Dim = c(5L, 1L)),
n = 36, conf = structure(c(31.405, 79.595), .Dim = 2:1),
out = numeric(0), group = numeric(0), names = ""),
list(stats = structure(c(1, 3, 5.5, 77, 88), .Dim = c(5L, 1L)),
n = 36, conf = structure(c(-13.9866666666667, 24.9866666666667), .Dim = 2:1),
out = numeric(0), group = numeric(0), names = ""),
list(stats = structure(c(531, 632.5, 701, 726.5, 786), .Dim = c(5L, 1L)),
n = 36, conf = structure(c(676.246666666667, 725.753333333333), .Dim = 2:1),
out = c(485, 464, 446), group = c(1, 1, 1), names = ""),
list(stats = structure(c(104, 109.5, 113.5, 121, 125), .Dim = c(5L, 1L)),
n = 36, conf = structure(c(110.471666666667, 116.528333333333), .Dim = 2:1),
out = c(91, 91, 88, 84, 84, 79), group = c(1, 1, 1, 1, 1, 1), names = ""),
list(stats = structure(c(28, 53.5, 83.5, 88, 91), .Dim = c(5L, 1L)),
n = 36, conf = structure(c(74.415, 92.585), .Dim = 2:1),
out = numeric(0), group = numeric(0), names = ""),
list(stats = structure(c(80, 89, 102.5, 153, 236), .Dim = c(5L, 1L)),
n = 36, conf = structure(c(85.6466666666667, 119.353333333333), .Dim = 2:1),
out = c(343, 318, 299, 257), group = c(1,1, 1, 1), names = ""),
list(stats = structure(c(7, 12, 22.5, 44, 72), .Dim = c(5L, 1L)),
n = 36, conf = structure(c(14.0733333333333, 30.9266666666667), .Dim = 2:1),
out = numeric(0), group = numeric(0), names = ""),
list(stats = structure(c(5, 5, 6, 12.5, 21), .Dim = c(5L, 1L)),
n = 36, conf = structure(c(4.025, 7.975), .Dim = 2:1),
out = numeric(0), group = numeric(0), names = ""))
sublabels <- paste0(rep(LETTERS[1:3], each=3), 1:3)
gplts <- lapply(1:9, function(x) as.grob(function(y=sdt[[x]]) {
par(oma=c(0,3,0,3))
bxp(y, ylab="values", main=sublabels[x])}))
grid.arrange(rectGrob(gp=gpar(col="red")), rectGrob(gp=gpar(col="green")),
rectGrob(gp=gpar(col="yellow")), nrow=1, newpage =T)
vp <- viewport(.33/2,0.45, gp = gpar(col="red"))
grid.text("Group A",
y = .1, just = c("center", "bottom"),
gp = gpar(fontsize=20), vp = vp)
vp <- viewport(.5,.45, gp = gpar(col="green"))
grid.text("Group B",
y = .1, just = c("center", "bottom"),
gp = gpar(fontsize=20), vp = vp)
vp <- viewport(1-(.33/2),.45, gp = gpar(col="yellow"))
grid.text("Group C",
y = .1, just = c("center", "bottom"),
gp = gpar(fontsize=20), vp = vp)
grid.arrange(grobs=gplts, nrow=1, newpage=F)
Created on 2021-03-25 by the reprex package (v1.0.0)

Resources