Disaggregate in the context of a time series - r

I have a dataset that I want to visualize overall and disaggregated by a few different variables. I created a flexdashboard with a toy shiny app to select the type of disaggregation, and working code to plot the correct subset.
My approach is repetitive, which is a hint to me that I'm missing out on a better way to do this. The piece that's tripping me up is the need to count by date and expand the matrix. I'm not sure how get group counts by week in one pipe. I do it in several steps and combine.
Thoughts?
(ps. I asked this question on RStudio Community, but I think it's probably more of a "SO question". I don't have permissions to delete it from RSC, so apologies for the cross-post.)
---
title: "test"
output:
flexdashboard::flex_dashboard:
theme: bootstrap
runtime: shiny
---
```{r setup, include=FALSE}
library(flexdashboard)
library(tidyverse)
library(tibbletime)
library(dygraphs)
library(magrittr)
library(xts)
```
```{r global, include=FALSE}
set.seed(1)
dat <- data.frame(date = seq(as.Date("2018-01-01"),
as.Date("2018-06-30"),
"days"),
sex = sample(c("male", "female"), 181, replace=TRUE),
lang = sample(c("english", "spanish"), 181, replace=TRUE),
age = sample(20:35, 181, replace=TRUE))
dat <- sample_n(dat, 80)
```
Sidebar {.sidebar}
=====================================
```{r}
radioButtons("diss", label = "Disaggregation",
choices = list("All" = 1, "By Sex" = 2, "By Language" = 3),
selected = 1)
```
Page 1
=====================================
```{r}
# all
all <- reactive(
dat %>%
mutate(new = 1) %>%
arrange(date) %>%
# time series analysis
as_tbl_time(index = date) %>% # convert to tibble time object
select(date, new) %>%
collapse_by('1 week', side="start", clean=TRUE) %>%
group_by(date) %>%
mutate(total = sum(new, na.rm=TRUE)) %>%
distinct(date, .keep_all = TRUE) %>%
ungroup() %>%
# expand matrix to include weeks without data
complete(date = seq(date[1],
date[length(date)],
by = "1 week"),
fill = list(total = 0))
)
# males only
males <- reactive(
dat %>%
filter(sex=="male") %>%
mutate(new = 1) %>%
arrange(date) %>%
# time series analysis
as_tbl_time(index = date) %>%
select(date, new) %>%
collapse_by('1 week', side="start", clean=TRUE) %>%
group_by(date) %>%
mutate(total_m = sum(new, na.rm=TRUE)) %>%
distinct(date, .keep_all = TRUE) %>%
ungroup() %>%
# expand matrix to include weeks without data
complete(date = seq(date[1],
date[length(date)],
by = "1 week"),
fill = list(total_m = 0))
)
# females only
females <- reactive(
dat %>%
filter(sex=="female") %>%
mutate(new = 1) %>%
arrange(date) %>%
# time series analysis
as_tbl_time(index = date) %>%
select(date, new) %>%
collapse_by('1 week', side="start", clean=TRUE) %>%
group_by(date) %>%
mutate(total_f = sum(new, na.rm=TRUE)) %>%
distinct(date, .keep_all = TRUE) %>%
ungroup() %>%
# expand matrix to include weeks without data
complete(date = seq(date[1],
date[length(date)],
by = "1 week"),
fill = list(total_f = 0))
)
# english only
english <- reactive(
dat %>%
filter(lang=="english") %>%
mutate(new = 1) %>%
arrange(date) %>%
# time series analysis
as_tbl_time(index = date) %>%
select(date, new) %>%
collapse_by('1 week', side="start", clean=TRUE) %>%
group_by(date) %>%
mutate(total_e = sum(new, na.rm=TRUE)) %>%
distinct(date, .keep_all = TRUE) %>%
ungroup() %>%
# expand matrix to include weeks without data
complete(date = seq(date[1],
date[length(date)],
by = "1 week"),
fill = list(total_e = 0))
)
# spanish only
spanish <- reactive(
dat %>%
filter(lang=="spanish") %>%
mutate(new = 1) %>%
arrange(date) %>%
# time series analysis
as_tbl_time(index = date) %>%
select(date, new) %>%
collapse_by('1 week', side="start", clean=TRUE) %>%
group_by(date) %>%
mutate(total_s = sum(new, na.rm=TRUE)) %>%
distinct(date, .keep_all = TRUE) %>%
ungroup() %>%
# expand matrix to include weeks without data
complete(date = seq(date[1],
date[length(date)],
by = "1 week"),
fill = list(total_s = 0))
)
# combine
totals <- reactive({
all <- all()
females <- females()
males <- males()
english <- english()
spanish <- spanish()
all %>%
select(date, total) %>%
full_join(select(females, date, total_f), by = "date") %>%
full_join(select(males, date, total_m), by = "date") %>%
full_join(select(english, date, total_e), by = "date") %>%
full_join(select(spanish, date, total_s), by = "date")
})
# convert to xts
totals_ <- reactive({
totals <- totals()
xts(totals, order.by = totals$date)
})
# plot
renderDygraph({
totals_ <- totals_()
if (input$diss == 1) {
dygraph(totals_[, "total"],
main= "All") %>%
dySeries("total", label = "All") %>%
dyRangeSelector() %>%
dyOptions(useDataTimezone = FALSE,
stepPlot = TRUE,
drawGrid = FALSE,
fillGraph = TRUE)
} else if (input$diss == 2) {
dygraph(totals_[, c("total_f", "total_m")],
main = "By sex") %>%
dyRangeSelector() %>%
dySeries("total_f", label = "Female") %>%
dySeries("total_m", label = "Male") %>%
dyOptions(useDataTimezone = FALSE,
stepPlot = TRUE,
drawGrid = FALSE,
fillGraph = TRUE)
} else {
dygraph(totals_[, c("total_e", "total_s")],
main = "By language") %>%
dyRangeSelector() %>%
dySeries("total_e", label = "English") %>%
dySeries("total_s", label = "Spanish") %>%
dyOptions(useDataTimezone = FALSE,
stepPlot = TRUE,
drawGrid = FALSE,
fillGraph = TRUE)
}
})
```
Update:
#Jon Spring suggested writing a function to reduce some repetition (applied below), which is a nice improvement. The basic approach is the same, however. Segment, calculate, combine, plot. Is there a way to do this without breaking apart and putting back together?
---
title: "test"
output:
flexdashboard::flex_dashboard:
theme: bootstrap
runtime: shiny
---
```{r setup, include=FALSE}
library(flexdashboard)
library(tidyverse)
library(tibbletime)
library(dygraphs)
library(magrittr)
library(xts)
```
```{r global, include=FALSE}
# generate data
set.seed(1)
dat <- data.frame(date = seq(as.Date("2018-01-01"),
as.Date("2018-06-30"),
"days"),
sex = sample(c("male", "female"), 181, replace=TRUE),
lang = sample(c("english", "spanish"), 181, replace=TRUE),
age = sample(20:35, 181, replace=TRUE))
dat <- sample_n(dat, 80)
# Jon Spring's function
prep_dat <- function(filtered_dat, col_name = "total") {
filtered_dat %>%
mutate(new = 1) %>%
arrange(date) %>%
# time series analysis
tibbletime::as_tbl_time(index = date) %>% # convert to tibble time object
select(date, new) %>%
tibbletime::collapse_by("1 week", side = "start", clean = TRUE) %>%
group_by(date) %>%
mutate(total = sum(new, na.rm = TRUE)) %>%
distinct(date, .keep_all = TRUE) %>%
ungroup() %>%
# expand matrix to include weeks without data
complete(
date = seq(date[1], date[length(date)], by = "1 week"),
fill = list(total = 0)
)
}
```
Sidebar {.sidebar}
=====================================
```{r}
radioButtons("diss", label = "Disaggregation",
choices = list("All" = 1, "By Sex" = 2, "By Language" = 3),
selected = 1)
```
Page 1
=====================================
```{r}
# all
all <- reactive(
prep_dat(dat)
)
# males only
males <- reactive(
prep_dat(
dat %>%
filter(sex == "male")
) %>%
rename("total_m" = "total")
)
# females only
females <- reactive(
prep_dat(
dat %>%
filter(sex == "female")
) %>%
rename("total_f" = "total")
)
# english only
english <- reactive(
prep_dat(
dat %>%
filter(lang == "english")
) %>%
rename("total_e" = "total")
)
# spanish only
spanish <- reactive(
prep_dat(
dat %>%
filter(lang == "spanish")
) %>%
rename("total_s" = "total")
)
# combine
totals <- reactive({
all <- all()
females <- females()
males <- males()
english <- english()
spanish <- spanish()
all %>%
select(date, total) %>%
full_join(select(females, date, total_f), by = "date") %>%
full_join(select(males, date, total_m), by = "date") %>%
full_join(select(english, date, total_e), by = "date") %>%
full_join(select(spanish, date, total_s), by = "date")
})
# convert to xts
totals_ <- reactive({
totals <- totals()
xts(totals, order.by = totals$date)
})
# plot
renderDygraph({
totals_ <- totals_()
if (input$diss == 1) {
dygraph(totals_[, "total"],
main= "All") %>%
dySeries("total", label = "All") %>%
dyRangeSelector() %>%
dyOptions(useDataTimezone = FALSE,
stepPlot = TRUE,
drawGrid = FALSE,
fillGraph = TRUE)
} else if (input$diss == 2) {
dygraph(totals_[, c("total_f", "total_m")],
main = "By sex") %>%
dyRangeSelector() %>%
dySeries("total_f", label = "Female") %>%
dySeries("total_m", label = "Male") %>%
dyOptions(useDataTimezone = FALSE,
stepPlot = TRUE,
drawGrid = FALSE,
fillGraph = TRUE)
} else {
dygraph(totals_[, c("total_e", "total_s")],
main = "By language") %>%
dyRangeSelector() %>%
dySeries("total_e", label = "English") %>%
dySeries("total_s", label = "Spanish") %>%
dyOptions(useDataTimezone = FALSE,
stepPlot = TRUE,
drawGrid = FALSE,
fillGraph = TRUE)
}
})
```

Thanks for explaining more about your goals. I think the approach #simon-s-a suggests will simplify things. If we can run the grouping dynamically, and structure it so that we don't need to know the possible components in those groups beforehand, it will be a lot easier to maintain.
Here's a minimum viable product that rebuilds the plotting function to include the grouping logic inside it.
Once grouped by date and whatever our grouping variable is, it counts how many rows each group has, then spreads those so each group gets a column.
Then I use padr::pad to pad out any missing time rows in between, and replace all the NA's with zeros.
Finally, that data frame is converted to an xts object and fed into dygraph, which seems to handle the multiple columns automatically.
Here:
---
title: "test"
output:
flexdashboard::flex_dashboard:
theme: bootstrap
runtime: shiny
---
```{r setup, include=FALSE}
library(flexdashboard)
library(tidyverse)
library(tibbletime)
library(dygraphs)
library(magrittr)
library(xts)
```
```{r global, include=FALSE}
# generate data
set.seed(1)
dat <- data.frame(date = seq(as.Date("2018-01-01"),
as.Date("2018-06-30"),
"days"),
sex = sample(c("male", "female"), 181, replace=TRUE),
lang = sample(c("english", "spanish"), 181, replace=TRUE),
age = sample(20:35, 181, replace=TRUE))
dat <- dplyr::sample_n(dat, 80)
```
Sidebar {.sidebar}
=====================================
```{r}
radioButtons("diss", label = "Disaggregation",
choices = list("All" = "Total",
"By Sex" = "sex",
"By Language" = "lang"),
selected = "Total")
```
Page 1
=====================================
```{r plot}
renderDygraph({
grp_col <- rlang::sym(input$diss) # This converts the input selection to a symbol
dat %>%
mutate(Total = 1) %>% # This is a hack to let us "group" by Total -- all one group
# Here's where we unquote the symbol so that dplyr can use it
# to refer to a column. In this case I make a dummy column
# that's a copy of whatever column we want to group
mutate(my_group = !!grp_col) %>%
# Now we make a group for every existing combination of week
# (using lubridate::floor_date) and level of our grouping column,
# count how many rows in each group, and spread that to wide format.
group_by(date = lubridate::floor_date(date, "1 week"), my_group) %>%
count() %>% spread(my_group, n) %>% ungroup() %>%
# padr:pad() fills in any missing weeks in the sequence with new rows
# Then we replace all the NA's with zeroes.
padr::pad() %>% replace(is.na(.), 0) %>%
# Finally we can convert to xts and feed the wide table into digraph.
xts::xts(order.by = .$date) %>%
dygraph() %>%
dyRangeSelector() %>%
dyOptions(
useDataTimezone = FALSE, stepPlot = TRUE,
drawGrid = FALSE, fillGraph = TRUE
)
})
```

This is a good place to make a function, to shorten your code and make it less prone to error.
http://r4ds.had.co.nz/functions.html
A complicating bit is that programming with dplyr often requires wading into a framework called tidyeval, which is very powerful but can be intimidating.
https://dplyr.tidyverse.org/articles/programming.html
(Here's an alternative approach that sidesteps tidyeval: https://cran.r-project.org/web/packages/seplyr/vignettes/using_seplyr.html)
In your scenario, it's possible to avoid these challenges entirely by doing a bit of manipulation before and after your function. It's not as elegant, but works.
BTW, I can't guarantee it'll work since you didn't share a verifiable reprex (e.g. including a sample of data with the same form as yours), but it worked with the fake data I made up. (See bottom.) Sorry, I missed the chunk where your sample data was provided.
prep_dat <- function(filtered_dat, col_name = "total") {
filtered_dat %>%
mutate(new = 1) %>%
arrange(date) %>%
# time series analysis
tibbletime::as_tbl_time(index = date) %>% # convert to tibble time object
select(date, new) %>%
tibbletime::collapse_by("1 week", side = "start", clean = TRUE) %>%
group_by(date) %>%
mutate(total = sum(new, na.rm = TRUE)) %>%
distinct(date, .keep_all = TRUE) %>%
ungroup() %>%
# expand matrix to include weeks without data
complete(
date = seq(date[1], date[length(date)], by = "1 week"),
fill = list(total = 0)
)
}
Then you could call it with your filtered data and the name of the total column. This fragment should be able to replace the ~20 lines you're currently using:
males <- prep_dat(dat_fake %>%
filter(sex == "male")) %>%
rename("total_m" = "total")
Fake data that I tested on:
dat_fake <- tibble(
date = as.Date("2018-01-01") + runif(500, 0, 100),
new = runif(500, 0, 100),
sex = sample(c("male", "female"),
500, replace = TRUE),
lang = sample(c("english", "french", "spanish", "portuguese", "tagalog"),
500, replace = TRUE)
)

I think you can make some gains by changing the order of your preparation. Right now the flow of your app is approximately:
Data => prepare all combinations => select desired visualization => make plot
Consider instead:
Data => select desired visualization => prepare required combination => make plot
This would make use of Shiny's reactivity to (re)prepare the data required for the requested plot in response to changes in the user's selection.
By way of code snippets (Sorry, I don't have sufficient familiarity with flexdashboard and tibbletime to ensure this code runs, but I hope it is enough to highlight the approach):
Your control selects the column you want to focus on (note we use "All" = "'1'" so this evaluates to a constant in the group-by, else it has to be handled separately):
radioButtons("diss", label = "Disaggregation",
choices = list("All" = "'1'",
"By Sex" = "sex",
"By Language" = "lang",
"By other" = "column_name_of_'other'"),
selected = 1)
And then use this in your group by to prepare only the data required for the present visualization (you'll need to adjust the function suggested by #Jon_Spring in response to this earlier group-by):
preped_dat = reactive({
dat %>%
group_by_(input$diss) %>%
# etc
})
Before plotting (you'll need to adjust the plotting function in response to the possible change in data format):
renderDygraph({
totals = preped_data()
dygraph(totals) %>%
dySeries("total", label = ) %>%
dyRangeSelector()
})
With regard to group_by you can use group_by_ if all your arguments are text strings, or group_by(!! sym(input$diss), other_column_name) if you want to mix the text string input from your control with other column names.
One possible disadvantage of this change in approach is reduced responsiveness during interactivity if your data set is large. The present approach does all the computation up front and then minimal computation each selection - this may be preferable if you have a large amount of processing. My suggested approach will have minimal up front processing and moderate computation each selection.

Related

Error in as.data.frame.default: cannot coerce class ‘c("reactiveExpr", "reactive", "function")’ to a data.frame

I am trying to process a inputed data file by running calculations and organizing it. I then want to use that data in a couple of output functions to display a table and several plots. The problem I am having is how to store these variables so that I can just run ggplotly or select to get the desired data I want to display to the user. I am confused how to use reactive and was wondering if I am doing this right at all.
library(shiny)
library(readr)
library(fs)
library(tidyverse)
library(broom)
library(readxl) # to read in excel files
library(ggpubr) # for stat_cor function to plot linear model equations on graph
library(inflection) # to uses ese function to ID inflection point
library(plotly) # for interactive plots
library(gridExtra) # to combine plots
library(knitr) # to print tables
curves_dataframe_names <- c("dose_number","volume_mL", "mV", "pH", "Graphic", "Temp", "Letter" , "Time")
ui <- fluidPage(
titlePanel("Uploading Files"),
sidebarLayout(
sidebarPanel(
fileInput("file1", "Choose RPT File",
multiple = TRUE,
accept = c("text/rpt",
"text/comma-separated-values,text/plain",
".RPT")),
tags$hr(),
),
mainPanel(
tableOutput("contents"),
)
)
)
server <- function(input, output) {
proccess_data <- function(input) {
reactive ({
req(input$file1)
ALL_CURVES_FRAME1 <-
input$file1$datapath %>%
map_dfr(read_table, .id = "file_name", skip = 1, col_names = curves_dataframe_names)%>%
mutate(
file_id = case_when(grepl("Report", dose_number) ~ mV),
result = case_when(grepl("Result:", dose_number) ~ volume_mL),
#endpt_vol_mL = case_when(grepl("End" , dose_number) ~ pH),
#end_pt_ph = case_when(grepl("pH Fixed End Point: ", X1) ~ gsub("pH Fixed End Point: ", "", X1)),
init_ph = case_when(grepl("Initial", dose_number) ~ Graphic),
fin_ph = case_when(grepl("Initial", dose_number) ~ Letter),
Method = case_when(grepl("Method", dose_number) ~ paste(mV, pH, Graphic)),
date = case_when(grepl("Time", dose_number) ~ paste(Graphic, Temp, Letter))) %>% select(-Graphic, -Temp, -Letter)
ALL_CURVES_FRAME1$date = gsub(",", "", ALL_CURVES_FRAME1$date)
ALL_CURVES_FRAME1$date = as.Date(ALL_CURVES_FRAME1$date, format = "%b%d%Y")
file_ids <- ALL_CURVES_FRAME1 %>% select(file_name, file_id) %>% mutate(file = file_id) %>% select(file_name, file) %>% unique() %>% na.omit()
results <- ALL_CURVES_FRAME1 %>% select(file_name, result) %>% mutate(result_mL = result) %>% select(file_name, result_mL) %>% unique() %>% na.omit()
init_phs <- ALL_CURVES_FRAME1 %>% select(file_name, init_ph) %>% mutate(init_phs = init_ph) %>% select(file_name, init_phs) %>% unique() %>% na.omit()
fin_phs <- ALL_CURVES_FRAME1 %>% select(file_name, fin_ph) %>% mutate(fin_phs = fin_ph) %>% select(file_name, fin_phs) %>% unique() %>% na.omit()
methods <- ALL_CURVES_FRAME1 %>% select(file_name, Method) %>% mutate(method = Method) %>% select(file_name, method) %>% unique() %>% na.omit()
date <- ALL_CURVES_FRAME1 %>% select(file_name, date) %>% mutate(date_analyzed = date) %>% select(file_name, date_analyzed) %>% unique() %>% na.omit()
ALL_CURVES_FRAME <- full_join(ALL_CURVES_FRAME1, file_ids) %>% full_join(results) %>% full_join(init_phs) %>% full_join(fin_phs) %>% full_join(date) %>% select(-result, -file_name, -file_id, -Method, -init_ph, -fin_ph, -date) %>% rename(init_ph = init_phs, fin_ph = fin_phs, file_id = file) %>% filter(!is.na(Time))
raw_data_summary <- ALL_CURVES_FRAME %>% select(date_analyzed, result_mL, file_id) %>% unique()
metadata <- read_excel("~/Google Drive/My Drive/Houlton_Cornell/TitrationData/Autotitration Metadata Sheet.xlsx", sheet = "Growth Chamber Titrations") %>% select(file_id = report_ID , pot_name, date_collected, stock_date_created, date_lab_measured, tube, experiment_num, sample_grams = sample_vol_mL_g, amount_DI_added_mL, result_mL = endpt_vol_mL, process) %>% mutate(std_mL = 0) %>% filter(process == "yes") #not bringing in: pre_pH, sample_name
ALL_CURVES_FRAME$result_mL <- as.double(ALL_CURVES_FRAME$result_mL)
mapping_info <- read_excel("~/Google Drive/My Drive/Houlton_Cornell/Growth Chamber/Growth Chamber Notes.xlsx", sheet = "Mapping_info")
samples <-left_join(metadata, ALL_CURVES_FRAME, by = c("file_id", "result_mL"))
#b.Select Standards
standards_info <- read_excel("~/Google Drive/My Drive/Houlton_Cornell/TitrationData/Autotitration Metadata Sheet.xlsx", sheet = "Stock Solution", skip = 1)
standards_wt <- standards_info %>% select(stock_date_created = Date_Created, std_wt = Bicarbonate_mg)
standards <- samples %>% filter(stock_date_created != "NA") %>% right_join(standards_wt) %>% select(-experiment_num, -date_collected) %>% mutate(standard_name = paste(pot_name, tube, date_lab_measured)) %>% select(-date_lab_measured)
n_standards <- standards %>% filter(dose_number == "0") %>% unique()
#c.Remove bad Titration Curves
standards$pH <- as.numeric(standards$pH)
standards$volume_mL <- as.numeric(standards$volume_mL)
bad_curves_estimated <- standards %>%
mutate(
pH_change = pH - lag(pH)
) %>% filter(pH <5.8) %>% filter(pH_change > 0)%>% select(standard_name) %>% unique() %>% print()
#d. Calculate derivatives
# extrapolate extra data points for low dose curves
extrapolated_vol <- standards %>% group_by(standard_name) %>%
mutate(
n_dose = n()
) %>% filter(n_dose < 20) %>% mutate(
middle_pH = (pH + lag(pH)) / 2,
middle_mL = (volume_mL + lag(volume_mL))/2,
extrapolated = "yes"
) %>% select(-pH, -volume_mL, -n_dose) %>% rename (volume_mL = middle_mL, pH = middle_pH) %>% na.omit()
standards <- standards %>% bind_rows(extrapolated_vol) %>% group_by(standard_name)
#calculate derivatives of pH
all_curves_derived <- standards %>%
mutate(
dv = volume_mL - lag(volume_mL), # change in volume
dpH = abs(pH - lag(pH)), # change in pH
dpH_per_dv = dpH/dv, # 1st deriv, change in pH per change in vol
d2pH = dpH_per_dv - lag(dpH_per_dv), # change in 1st deriv
d2pH_per_dvsq = d2pH/dv^2 #2nd deriv, change in change of pH per change in vol
)
#e.Calculate bicarbonate and compare
#Calculate equivalence point based on derivative
derivative_eq1 <- all_curves_derived %>% group_by(standard_name) %>% filter(pH<6) %>%
mutate(
derivative_eq = case_when(dpH_per_dv == max(dpH_per_dv, na.rm = TRUE) ~ volume_mL)
) %>% select(standard_name, derivative_eq) %>% na.omit()
#Calculate bicarbonate
derivative_eq_first <- left_join(derivative_eq1, all_curves_derived) %>% mutate(
curve_alk_mg_L = ((50044 * derivative_eq*.1/sample_grams)*1.22)) %>% ungroup()
#Calculate for comparisons
standard_check() <- derivative_eq_first %>% select(pot_name, standard_name, derivative_eq, stock_date_created, sample_grams, tube, date_analyzed, std_wt, curve_alk_mg_L) %>% unique() %>% group_by(date_analyzed) %>% mutate(
daily_mean = mean(curve_alk_mg_L),
daily_std_dev = sd(curve_alk_mg_L)
) %>% ungroup() %>% mutate(
analyzed_actual_diff = curve_alk_mg_L - std_wt
)
standard_check_2<- standard_check() %>% select(pot_name, date_analyzed, tube, std_wt, curve_alk_mg_L, daily_mean, daily_std_dev, analyzed_actual_diff) %>% kable()
return(standard_check_2)
}) }
output$contents <- renderTable({
return(proccess_data(input))})
}
shinyApp(ui = ui, server = server)

R: How to create a Drilldown Highchart using loops

when doing a job I have found a problem that I don't know how to solve.
I have a data frame that has 2 columns:
date
value
And it has a total of 1303 rows.
For each date there are 12 values (1 for each month), except in the last year that only has 7
The work I have to do would be to create a 'drilldown' style chart using the 'highcharter' library. The problem is that I don't know how to do it efficiently.
The solution that comes to my mind is not very efficient, below I show my solution so you can see what I mean.
dataframe
# Load packages
library(tidyverse)
library(highcharter)
library(lubridate)
# Load dataset
df <- read.csv('example.csv')
# Prepare df to use
dfDD <- tibble(name = year(df$date),
y = round(df$value, digits = 2),
drilldown = name)
# Create a data frame to use in 'drilldown' (for each year)
df1913 <- df %>%
filter(year(date) == 1913) %>%
data.frame()
df1914 <- df %>%
filter(year(date) == 1914) %>%
data.frame()
# Create a drilldown chart using Highcharter library
highchart() %>%
hc_chart(type = "column") %>%
hc_title(text = "Example Drilldown") %>%
hc_xAxis(type = "category") %>%
hc_legend(enabled = FALSE) %>%
hc_plotOptions(series = list(boderWidth = 2,
dataLabels = list(enabled = TRUE))) %>%
hc_add_series(data = dfDD,
name = "Mean",
colorByPoint = TRUE) %>%
hc_drilldown(allowPointDrilldown = TRUE,
series = list(list(id = 1913,
data = list_parse2(df1913)),
list(id = 1914,
data = list_parse2(df1914))))
Seeing my solution for the first time, I realized that in order to complete the graph I would have to create a subset of values for each year. Having realized that I tried to find a more efficient solution using a 'for loop' but so far I can't get it to work.
Is there a more efficient way to create this graph using a 'loop'!?
If it can be done in another way than using loops, I would also like to know.
Thank you for reading my question and I hope I explained myself well.
Using split and purrr::imap you could split your data by years and loop over the resulting list to convert your data to the nested list object required by hc_drilldown. Note: It's important to make the id a numeric and to pass a unnamed list.
library(tidyverse)
library(highcharter)
library(lubridate)
series <- split(df, year(df$date)) %>%
purrr::imap(function(x, y) list(id = as.numeric(y), data = list_parse2(x)))
# Unname list
names(series) <- NULL
highchart() %>%
hc_chart(type = "column") %>%
hc_title(text = "Example Drilldown") %>%
hc_xAxis(type = "category") %>%
hc_legend(enabled = FALSE) %>%
hc_plotOptions(series = list(boderWidth = 2,
dataLabels = list(enabled = TRUE))) %>%
hc_add_series(data = dfDD,
name = "Mean",
colorByPoint = TRUE) %>%
hc_drilldown(allowPointDrilldown = TRUE,
series = series)

How to include a new geography in this table?

I got this code from someone else and so only know the basic framework. However, to reproduce this you would open a new R markdown document, delete everything below the YAML, and then paste in this. The items in bold below have to be moved to the left for this to knit.
My question is this, how would I bring the United States into the table as a 11th item? Would I do this action in the jolts section or the subtable? United states is code "00". Every state has a two digit state code with the US being "00"
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(readxl)
library(data.table)
library(tigris)
library(lubridate)
library(kableExtra)
library(zoo)
knitr::opts_chunk$set(echo = FALSE)
state_filter <- "Nevada"
all_state <- states(resolution = "20m", cb = TRUE) %>%
mutate(fips_num = as.integer(STATEFP)) %>%
filter(fips_num %in% c(1:56)) %>%
shift_geometry()
jolts_import <- fread("https://download.bls.gov/pub/time.series/jt/jt.data.1.AllItems")
jolts_series <- fread("https://download.bls.gov/pub/time.series/jt/jt.series")
jolts_states <- fread("https://download.bls.gov/pub/time.series/jt/jt.state")
jolts_elements <- fread("https://download.bls.gov/pub/time.series/jt/jt.dataelement")
jolts <- jolts_import %>%
filter(period != "M13") %>%
select(-c(footnote_codes)) %>%
left_join(jolts_series %>% select(-footnote_codes), by = "series_id") %>%
left_join(jolts_states %>% select(-c(display_level:sort_sequence)), by = "state_code") %>%
left_join(jolts_elements %>% select(-c(display_level:sort_sequence)), by =
"dataelement_code") %>%
filter(area_code == 0, sizeclass_code == 0, industry_code == 0) %>%
select(-c(area_code, sizeclass_code, industry_code)) %>%
mutate(date = ymd(paste(year, str_remove(period, "M"), "01", sep="-")))%>%
filter(!(state_code %in% c("MW", "NE", "SO", "WE"))) %>%
mutate(ratelevel_code = case_when(
ratelevel_code == "L" ~ "Level",
ratelevel_code == "R" ~ "Rate",
TRUE ~ "Other"),
periodname = format(date, "%B"),
value = if_else(ratelevel_code == "Rate", value/100, value*1000)) %>%
group_by(state_text, dataelement_code, ratelevel_code, seasonal) %>%
mutate(lag_1mo = lag(value, 1),
lag_12mo = lag(value, 12),
change_1mo = value - lag_1mo,
change_12mo = value - lag_12mo,
avg_12mo = rollapplyr(data = value, width = 12, FUN = mean, partial = TRUE)) %>%
ungroup() %>%
group_by(dataelement_code, ratelevel_code, seasonal, date) %>%
mutate(rank_value = floor(rank(-value)),
rank_1mo = floor(rank(-change_1mo)),
rank_12mo = floor(rank(-change_12mo))
)
subtitle <- paste0("Data for ",state_filter,", ",format(max(jolts$date), "%B %Y"))
jolts_state <- all_state %>%
left_join(jolts, by = c("NAME" = "state_text"))
**```**
---
subtitle: '`r subtitle`'
---
\newpage
<div class = "row">
### Hire Rate
<div class>
**```{r}**
data_filter <- "HI"
data_text <- jolts_elements %>% filter(dataelement_code == data_filter) %>%
pull(dataelement_text) %>% str_to_title()
sub_table <- jolts %>%
ungroup() %>%
filter(
rank_value <= 5 | rank_value >= 47 | state_text == "United States",
date == max(date),
seasonal == "S",
dataelement_code == data_filter,
ratelevel_code == "Rate"
) %>%
select(state_text, value, lag_1mo, lag_12mo, rank_value) %>%
arrange(rank_value)
sub_table %>%
mutate(value = scales::percent(value, accuracy = 0.1),
lag_1mo = scales::percent(lag_1mo, accuracy = 0.1),
lag_12mo = scales::percent(lag_12mo, accuracy = 0.1)) %>%
kable(col.names = c("State","Current","Prior Month","Prior Year","Rank"), align = "lcccr") %>%
kable_paper("hover", full_width = F, position = "float_left", font_size = 12) %>%
row_spec(row = which(sub_table$state_text == state_filter), background = "#005a9c", bold = TRUE, color = "white")
So the solution is two parts.
First, put the following code in after the four jolts elements.
jolts_states <- jolts_states%>%mutate(state_text = if_else(state_text == "Total
US", "United States", state_text))
second, one needs to modify the sub table code with the following
rank_value <= 5 | rank_value >= 47 | state_code == "00",

dygraph is blank but xts object has an observation

Shiny version of the problem (original question):
I am plotting a dygraph that is based on an xts object that is the result of filtering based on 2 inputs: age and language.
If I move the age slider to have a lower and upper bound each set at 32 AND enter "spanish" in the input box, the plot is empty. However, the filtered tibble and the filtered xts object both show 1 observation. This observation should appear in the plot but doesn't.
I feel like I am missing something very basic here, but I can't put my finger on it.
---
title: "test"
output:
flexdashboard::flex_dashboard:
theme: bootstrap
runtime: shiny
---
```{r setup, include=FALSE}
library(flexdashboard)
library(tidyverse)
library(tibbletime)
library(dygraphs)
library(magrittr)
library(xts)
library(DT)
```
```{r global, include=FALSE}
# generate data
set.seed(1)
dat <- data.frame(date = seq(as.Date("2018-01-01"),
as.Date("2018-06-30"),
"days"),
sex = sample(c("male", "female"), 181, replace=TRUE),
lang = sample(c("english", "spanish"), 181, replace=TRUE),
age = sample(20:35, 181, replace=TRUE))
dat <- sample_n(dat, 80)
```
Sidebar {.sidebar}
=====================================
```{r}
sliderInput("agerange", label = "Age",
min = 20,
max = 35,
value = c(20, 35),
step=1)
selectizeInput(
'foo', label = NULL,
choices = c("english", "spanish", "other"),
multiple = TRUE
)
```
Plot
=====================================
```{r}
# all
filtered <- reactive({
req((dat$lang %in% input$foo) | is.null(input$foo))
dat %>%
mutate(new = 1) %>%
arrange(date) %>%
filter(if(is.null(input$foo)) (new==1) else (lang %in% input$foo)) %>%
filter(age >= input$agerange[1] & age <= input$agerange[2])
})
totals <- reactive({
filtered <- filtered()
filtered %>%
# time series analysis
tibbletime::as_tbl_time(index = date) %>% # convert to tibble time object
select(date, new) %>%
tibbletime::collapse_by("1 week", side = "start", clean = TRUE) %>%
group_by(date) %>%
mutate(total = sum(new, na.rm = TRUE)) %>%
distinct(date, .keep_all = TRUE) %>%
ungroup() %>%
# expand matrix to include weeks without data
complete(
date = seq(date[1], date[length(date)], by = "1 week"),
fill = list(total = 0)
)
})
# convert to xts
totals_ <- reactive({
totals <- totals()
xts(totals, order.by = totals$date)
})
# plot
renderDygraph({
totals_ <- totals_()
dygraph(totals_[, "total"]) %>%
dyRangeSelector() %>%
dyOptions(useDataTimezone = FALSE,
stepPlot = TRUE,
drawGrid = FALSE,
fillGraph = TRUE)
})
```
Filtered Tibble
=====================================
```{r}
DT::renderDataTable({
filtered <- filtered()
DT::datatable(filtered,
options = list(bPaginate = TRUE))
})
```
Filtered xts
=====================================
```{r}
DT::renderDataTable({
totals_ <- totals_()
DT::datatable(totals_[, c("date", "total")],
options = list(bPaginate = TRUE))
})
```
Non-shiny version:
I moved my example out of shiny (my actual use case) to isolate the problem.
library(tidyverse)
library(tibbletime)
library(dygraphs)
library(magrittr)
library(xts)
set.seed(1)
dat <- data.frame(date = seq(as.Date("2018-01-01"),
as.Date("2018-06-30"),
"days"),
sex = sample(c("male", "female"), 181, replace=TRUE),
lang = sample(c("english", "spanish"), 181, replace=TRUE),
age = sample(20:35, 181, replace=TRUE))
dat <- sample_n(dat, 80)
totals <-
dat %>%
mutate(new = 1) %>%
arrange(date) %>%
filter(lang=="spanish") %>%
filter(age>=32 & age<=32) %>%
{. ->> filtered} %>%
tibbletime::as_tbl_time(index = date) %>% # convert to tibble time object
select(date, new) %>%
tibbletime::collapse_by("1 week", side = "start", clean = TRUE) %>%
group_by(date) %>%
mutate(total = sum(new, na.rm = TRUE)) %>%
distinct(date, .keep_all = TRUE) %>%
ungroup() %>%
# expand matrix to include weeks without data
complete(
date = seq(date[1], date[length(date)], by = "1 week"),
fill = list(total = 0))
filtered
# date sex lang age new
#1 2018-01-25 male spanish 32 1
# convert to xts
totals_ <- xts(totals, order.by = totals$date)
totals_
# date new total
#2018-01-21 "2018-01-21" "1" "1"
# plot
dygraph(totals_[, "total"]) %>%
dyRangeSelector() %>%
dyOptions(useDataTimezone = FALSE,
stepPlot = TRUE,
drawGrid = FALSE,
fillGraph = TRUE)
I think the fundamental issue is that dygraph will not plot an xts object consisting of 1 row. So whenever filters set via shiny inputs (or static filter calls) reduce the dataset to 1 match (1 row in the xts object), the plot will be empty.
(If there are zero matches, R throws an error in my example at the tibbletime step because there are no rows.)

filtering on shiny selectizeInput and showing blank plot when there are no observations in the dataset that meet the input

In my flexdashboard shiny app, I'm using selectizeInput() with three options: "english", "spanish", and "other". In my toy dataset, there are no observations of the variable lang that take the value "other". Therefore, when only "other" is selected in the input bar, R returns an evaluation error:
missing value where TRUE/FALSE needed.
This is caused by the following line of the pipe in the "Page 1" section:
filter(if(is.null(input$foo)) (new==1) else (lang %in% input$foo)) %>%
What is the right approach to show a blank plot when there are no observations in the dataset that take the value of the input?
---
title: "test"
output:
flexdashboard::flex_dashboard:
theme: bootstrap
runtime: shiny
---
```{r setup, include=FALSE}
library(flexdashboard)
library(tidyverse)
library(tibbletime)
library(dygraphs)
library(magrittr)
library(xts)
```
```{r global, include=FALSE}
# generate data
set.seed(1)
dat <- data.frame(date = seq(as.Date("2018-01-01"),
as.Date("2018-06-30"),
"days"),
sex = sample(c("male", "female"), 181, replace=TRUE),
lang = sample(c("english", "spanish"), 181, replace=TRUE),
age = sample(20:35, 181, replace=TRUE))
dat <- sample_n(dat, 80)
```
Sidebar {.sidebar}
=====================================
```{r}
selectizeInput(
'foo', label = NULL,
choices = c("english", "spanish", "other"),
multiple = TRUE
)
```
Page 1
=====================================
```{r}
# all
totals <- reactive({
dat %>%
mutate(new = 1) %>%
arrange(date) %>%
filter(if(is.null(input$foo)) (new==1) else (lang %in% input$foo)) %>%
# time series analysis
tibbletime::as_tbl_time(index = date) %>% # convert to tibble time object
select(date, new) %>%
tibbletime::collapse_by("1 week", side = "start", clean = TRUE) %>%
group_by(date) %>%
mutate(total = sum(new, na.rm = TRUE)) %>%
distinct(date, .keep_all = TRUE) %>%
ungroup() %>%
# expand matrix to include weeks without data
complete(
date = seq(date[1], date[length(date)], by = "1 week"),
fill = list(total = 0)
)
})
# convert to xts
totals_ <- reactive({
totals <- totals()
xts(totals, order.by = totals$date)
})
# plot
renderDygraph({
totals_ <- totals_()
dygraph(totals_[, "total"]) %>%
dyRangeSelector() %>%
dyOptions(useDataTimezone = FALSE,
stepPlot = TRUE,
drawGrid = FALSE,
fillGraph = TRUE)
})
```
One way to do this is to use the shiny::req function to check the requirements before running the code block.
If you add:
req(dat$lang %in% input$foo)
to the top of your totals <- reactive({ expression, then it will check that the value of input$foo is in dat$lang before running the rest of that expression. If it's not found, then the operation will be stopped silently. No error will be displayed and the plot will remain blank.

Resources