How to conditionally count the number of occurrences using dplyr? - r

In the below reproducible example, the user stratifies the elements of a data frame based on inputs for (A) point-in-time period and (B) which of the 2 columns of values to stratify.
I'm trying to count only those rows marked with an "N" in the "Flag" column in those circumstances where the user has selected to stratify the "Values_2" elements; otherwise the Flag column is ignored when the user selects to stratify the "Values_1" elements.
The below code works except for counting elements flagged "N" when "Values_2" is chosen. I commented out my attempt to count elements flagged "N"... because it produces non-sensical results.
I will also only sum those "Value_2" elements flagged with "N" but I can figure that one out.
The image at the bottom better explains the question.
Any suggestions?
library(shiny)
library(tidyverse)
library(shinyWidgets)
ui <-
fluidPage(
uiOutput("stratPeriod"),
uiOutput("stratValues"),
h5(strong("Raw data frame:")), tableOutput("rawData"),
h5(strong("Stratified data:")), tableOutput("stratData")
)
server <- function(input, output, session) {
dat <- reactive({
data.frame(
ID = c(1,1,2,2,2,2,3,3,3,3),
Period = c("2020-03", "2020-04", "2020-01", "2020-02", "2020-03", "2020-04", "2020-01", "2020-02", "2020-03", "2020-04"),
Values_1 = c(-6, 26, 36, 46, 56, 86, 100, 10, 20, 30),
Values_2 = c(-6, 13, 18, 46, 28, 43, 100, 10, 10, 30),
Flag = c("N","Y","Y","N","Y","Y","N","N","Y","N")
)
})
output$stratPeriod <- renderUI({
chc <- unique(na.omit(dat()[[2]]))
selectInput(inputId = "stratPeriod",
label = "Choose point-in-time:",
choices = chc,
selected = chc[1])
})
output$stratValues <- renderUI({
selectInput("stratValues",
"Choose values type to sum:",
choices = c("Values_1","Values_2"),
selected = c("Values_1")
)
})
output$rawData <- renderTable({dat()})
output$stratData <- renderTable({
req(input$stratValues)
req(input$stratPeriod)
filter_exp1 <- parse(text=paste0("Period", "==", "'",input$stratPeriod, "'"))
dat_1 <- reactive({dat() %>% filter(eval(filter_exp1))})
min <- custom_min(dat_1()[[input$stratValues]])
max <- custom_max(dat_1()[[input$stratValues]])
breaks <- if(any(is.infinite(c(min,max)))) c(0, 10) else seq(min, max, length.out = 6)
tmp <- dat() %>%
filter(eval(filter_exp1)) %>%
mutate(Range = cut(!!sym(input$stratValues), breaks=breaks, include.lowest=TRUE, right = TRUE, dig.lab = 5)) %>%
group_by(Range) %>%
summarise(Count = n(),Values = sum(!!sym(input$stratValues))) %>%
# summarise(Count = if_else(!!sym(input$stratValues) == "Values_1",n(),sum(dat()[[5]]=="N")),Values = sum(!!sym(input$stratValues))) %>%
complete(Range, fill = list(Count = 0,Values = 0)) %>%
ungroup %>%
mutate(Count_pct = Count/sum(Count)*100, Values_pct = Values/sum(Values)*100) %>%
dplyr::select(everything(), Count, Count_pct, Values, Values_pct) %>%
bind_rows(summarise_all(., ~(if(is.numeric(.)) sum(.) else "Total")))
tmp
})
}
shinyApp(ui, server)

I think the easiest way might be to change the renderTable() function to the following:
output$stratData <- renderTable({
req(input$stratValues)
req(input$stratPeriod)
filter_exp1 <- parse(text=paste0("Period", "==", "'",input$stratPeriod, "'"))
dat_1 <- reactive({dat() %>% filter(eval(filter_exp1))})
min <- custom_min(dat_1()[[input$stratValues]])
max <- custom_max(dat_1()[[input$stratValues]])
breaks <- if(any(is.infinite(c(min,max)))) c(0, 10) else seq(min, max, length.out = 6)
tmp <- dat() %>%
filter(eval(filter_exp1)) %>%
mutate(Range = cut(!!sym(input$stratValues), breaks=breaks, include.lowest=TRUE, right = TRUE, dig.lab = 5)) %>%
group_by(Range)
if(input$stratValues == "Values_2"){
tmp <- tmp %>%
filter(Flag == "N")
}
tmp <- tmp %>%
summarise(Count = n(),Values = sum(!!sym(input$stratValues))) %>%
complete(Range, fill = list(Count = 0,Values = 0)) %>%
ungroup %>%
mutate(Count_pct = Count/sum(Count)*100, Values_pct = Values/sum(Values)*100) %>%
dplyr::select(everything(), Count, Count_pct, Values, Values_pct) %>%
bind_rows(summarise_all(., ~(if(is.numeric(.)) sum(.) else "Total")))
tmp
})
In the code above, there is an if() condition that identifies whether stratValues is Values_2. If so, it filters the data to only include the "N" observations on Flag. Then, it continues with the rest of the analysis. This will work if both Values and Countare calculated only on the observations whereFlag == "N"`.

Related

How to make selectInput behave dynamically with on user input?

I am trying to create an app that creates word frequency plot from text. The data I used is here: https://www.kaggle.com/datasets/bharadwaj6/kindle-reviews#:~:text=Never-,kindle_reviews,-.csv(701.88
Following is my shiny code:
library(shiny)
library(tidyverse)
library(tidytext)
# Source functions
source("wrd_freq_df.R")
source("bigrm_df.R")
# UI design ----
ui <- fluidPage(fluidRow(
column(
4,
fileInput("upload", NULL, accept = c(".csv", ".tsv")),
actionButton(
inputId = "submit",
label = "RUN",
class = "btn-primary"
),fluidRow(
column(4, numericInput(
"n",
"Top n words",
value = 5,
min = 1,
step = 1
)),
column(4, selectInput(
"myselect",
label = "Choose Words"
, choices = NULL
, multiple = TRUE
# , options = list(create = TRUE)
, selected = NULL
))),
plotOutput("wrd_frq_plot"),
tableOutput("head")
)
))
# SERVER ----
options(shiny.maxRequestSize = 60 * 1024 ^ 2)
server <- function(input, output, session) {
#upload csv data
data <- reactive({
req(input$upload)
ext <- tools::file_ext(input$upload$name)
switch(
ext,
csv = vroom::vroom(input$upload$datapath, delim = ","),
tsv = vroom::vroom(input$upload$datapath, delim = "\t"),
validate("Invalid file; Please upload a .csv or .tsv file")
)
})
#create different data subset to be used for plotting word_freq and bigrams
rv <- reactiveValues()
#run analysis on pressing run button
observeEvent(input$submit, {
rv$data_head <- data() %>%
mutate(ID = as.character(round(ID))) %>%
mutate(group = if_else(rating >= 4, "High", if_else(rating > 2, "Avg", "Low"))) %>%
select(ID, group, reviewText)
#get base analysis dataset
rv$word_freq <- wrd_freq_df(rv$data_head)
rv$bigrm_freq <- bigrm_freq_df(rv$data_head)
rv$bigrm_tf_idf <- bigrm_freq_tf_idf(rv$bigrm_freq)
#get all unique words from the document
rv$choices <- rv$word_freq %>%
select(word) %>%
unique() %>% unlist()
output$wrd_frq_plot <- renderPlot({
#ensure that if selectinput has no value then all words are used for analysis
if (is.null(input$myselect)) {
selected_choices = rv$choices
}
else{
selected_choices = input$myselect
}
updateSelectInput(session, "myselect", selected = selected_choices)
#get relevant document ID which contains chosen word for analysis
word <- input$myselect
word_df_chose <- data.frame(word)
relvant_ID <- word_df_chose %>%
#THIS IS PROBABLY WHERE THINGS FAILS
inner_join(rv$word_freq) %>%
select(ID) %>%
unique() %>% unlist()
rv$word_freq %>%
filter(ID %in% relevant_ID) %>%
left_join(rv$data_head %>%
select(ID, group) %>%
unique()) %>%
#remove low tf_IDF words
filter(tf_idf > quantile(tf_idf, 0.25)) %>%
group_by(group, word) %>%
summarise(n_tot = sum(n)) %>% ungroup() %>%
group_by(group) %>%
slice_max(n_tot, n = input$n) %>%
ungroup() %>%
mutate(group = as.factor(group),
word = reorder_within(word, n_tot, group)) %>%
ggplot(aes(x = word, y = n_tot, fill = group)) + geom_col() + facet_wrap(group ~
., scales = "free") +
coord_flip() + scale_x_reordered()
})
})
}
#executes app
shinyApp(ui, server)
The functions sourced are as follows:
wrd_freq_df <- function(df){
df %>%
# mutate(ID = as.character(round(ID))) %>%
# mutate(group = if_else(rating >= 4, "High", if_else(rating > 2, "Avg", "Low"))) %>%
# select(ID, group, reviewText) %>%
#get word freq
unnest_tokens(output = word, input = reviewText) %>% ungroup() %>%
anti_join(stop_words) %>%
count(ID, word) %>% ungroup() %>%
group_by(ID) %>%
mutate(total = sum(n)) %>% ungroup() %>%
bind_tf_idf(word, ID, n) }
bigrm_freq_df <- function(df) {
df %>%
# mutate(ID = as.character(round(ID))) %>%
# mutate(group = if_else(rating >= 4, "High", if_else(rating > 2, "Avg", "Low"))) %>%
# select(ID, group, reviewText) %>%
unnest_tokens(bigram, reviewText, token = "ngrams", n = 2) %>%
separate(bigram,
c("word1", "word2"),
sep = " ",
remove = FALSE) %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word)
}
bigrm_freq_tf_idf <- function(df) {
df %>%
select(ID, bigram) %>%
#get tf_idf
count(ID, bigram) %>%
bind_tf_idf(bigram, ID, n) %>%
arrange(desc(tf_idf))
}
i get following error message:
Joining, by = "word"
Warning: Error in inner_join: by must be supplied when x and y have no common variables.
ℹ use by = character()` to perform a cross-join.
181:
Input to asJSON(keep_vec_names=TRUE) is a named vector. In a future version of jsonlite, this option will not be supported, and named vectors will be translated into arrays instead of objects. If you want JSON object output, please use a named list instead. See ?toJSON.
I assume that the error must be arising when

How to make a plot axis label reactive?

In the below reproducible example Code 1, the user stratifies data and selects which variable to stratify the data by, Value_1 or Value_2; and also selects to view the stratification as a table or as a plot. Code 1 works as intended, and the stratification range label is a static "Range" (as shown in the table left-most column header and in the plot x-axis label, and as shown in the images at the bottom).
I am making the range label dynamic (reactive), because in the fuller App this example derives from the user has many variables to choose from for stratification.
In Code 2 below showing an amplified custom function stratData(), I have succeeded in making the table left-most column header for stratification ranges reactive, but I haven't yet figured out how to make the plot x-axis label for stratification ranges similarly reactive. Just replace the stratData() in Code 1 with the stratData() in Code 2 (and comment-out the plot code under renderPlot() to avoid seeing the error) to see how this is working (or not).
How could the plot x-axis label reflect the same reactivity as the table left-most column header?
Code 1:
library(ggplot2)
library(shiny)
library(tidyverse)
library(shinyWidgets)
ui <-
fluidPage(
uiOutput("strat_values"),
radioButtons(
inputId = 'stratsView',
label = NULL,
choices = list("Table view" = 1,"Plot view" = 2),
selected = 1,
inline = TRUE
),
conditionalPanel(condition = "input.stratsView == 1",h5(strong("Stratified data:")), tableOutput("stratData")),
conditionalPanel(condition = "input.stratsView == 2",h5(strong("Stratified data:")), plotOutput("stratPlot"))
)
server <- function(input, output, session) {
dat <- reactive({
data.frame(
ID = c(1,1,2,2,2,2,3,3,3,3),
Period = c("2020-03", "2020-04", "2020-01", "2020-02", "2020-03", "2020-04", "2020-01", "2020-02", "2020-03", "2020-04"),
Values_1 = c(-6, 26, 36, 46, 56, 86, 100, 10, 20, 30),
Values_2 = c(-3, 13, 18, 23, 28, 43, 50, 5, 10, 15)
)
})
output$strat_values <- renderUI({
selectInput("strat_values",
"Variable to range-spread (col 1):",
choices = c("Values_1","Values_2"),
selected = c("Values_1"))
})
stratData <- function(){
req(input$strat_values)
filter_exp1 <- parse(text=paste0("Period", "==", "'","2020-04", "'"))
dat_1 <- reactive({dat() %>% filter(eval(filter_exp1))})
breaks <- seq(min(dat_1()[[input$strat_values]]), max(dat_1()[[input$strat_values]]), length.out = 6)
tmp <- dat() %>%
filter(eval(filter_exp1)) %>%
mutate(Range = cut(!!sym(input$strat_values), breaks=breaks, include.lowest=TRUE, right = TRUE, dig.lab = 5)) %>%
group_by(Range)
tmp <- tmp %>%
summarise(Count = n(),Values = sum(!!sym("Values_2"))) %>%
complete(Range, fill = list(Count = 0,Values = 0)) %>%
ungroup %>%
mutate(Count_pct = Count/sum(Count)*100, Values_pct = Values/sum(Values)*100) %>%
dplyr::select(everything(), Count, Count_pct, Values, Values_pct) %>%
bind_rows(summarise_all(., ~(if(is.numeric(.)) sum(.) else "Total")))
Count <- tmp %>% pull(Count)
Range <- tmp %>% pull(Range)
list(data = tmp,
Range = Range[-length(Range)],
Count = as.data.frame(Count[-length(Count)]))
}
output$stratData <- renderTable({stratData()$data})
output$stratPlot <- renderPlot({
x <- factor(stratData()$Range, levels = c(stratData()$Range))
y <- as.matrix(stratData()$Count)
ggplot(stratData()$Count,aes(x,y)) +
geom_bar(stat="identity") +
labs(x = "Ranges") +
geom_text(aes(y = y + sign(y)/4,label = y))
})
}
shinyApp(ui, server)
Code 2:
stratData <- function(){
req(input$strat_values)
filter_exp1 <- parse(text=paste0("Period", "==", "'","2020-04", "'"))
dat_1 <- reactive({dat() %>% filter(eval(filter_exp1))})
breaks <- seq(min(dat_1()[[input$strat_values]]), max(dat_1()[[input$strat_values]]), length.out = 6)
tmp <- dat() %>%
filter(eval(filter_exp1)) %>%
mutate(sumvar = cut(!!sym(input$strat_values), breaks=breaks, include.lowest=TRUE, right = TRUE, dig.lab = 5)) %>%
group_by(sumvar)
tmp <- tmp %>%
summarise(Count = n(),Values = sum(!!sym("Values_2"))) %>%
complete(sumvar, fill = list(Count = 0,Values = 0)) %>%
ungroup %>%
mutate(Count_pct = Count/sum(Count)*100, Values_pct = Values/sum(Values)*100) %>%
dplyr::select(everything(), Count, Count_pct, Values, Values_pct) %>%
bind_rows(summarise_all(., ~(if(is.numeric(.)) sum(.) else "Total")))
names(tmp)[1] <- paste(input$strat_values, " Range")
Count <- tmp %>% pull(Count)
# Range <- tmp %>% pull(Range)
list(data = tmp,
# Range = Range[-length(Range)],
Count = as.data.frame(Count[-length(Count)]))
}
Resolved code:
Now with OP code correction and reflecting Stefan's comment for plot axis label reactivity; posting revised stratData() custom function where only one line was fixed (commented below) and the renderPlot() function where Stefan's axis-label solution is also commented:
stratData <- function(){
req(input$strat_values)
filter_exp1 <- parse(text=paste0("Period", "==", "'","2020-04", "'"))
dat_1 <- reactive({dat() %>% filter(eval(filter_exp1))})
breaks <- seq(min(dat_1()[[input$strat_values]]), max(dat_1()[[input$strat_values]]), length.out = 6)
tmp <- dat() %>%
filter(eval(filter_exp1)) %>%
mutate(Range = cut(!!sym(input$strat_values), breaks=breaks, include.lowest=TRUE, right = TRUE, dig.lab = 5)) %>%
group_by(Range)
tmp <- tmp %>%
summarise(Count = n(),Values = sum(!!sym("Values_2"))) %>%
complete(Range, fill = list(Count = 0,Values = 0)) %>%
ungroup %>%
mutate(Count_pct = Count/sum(Count)*100, Values_pct = Values/sum(Values)*100) %>%
dplyr::select(everything(), Count, Count_pct, Values, Values_pct) %>%
bind_rows(summarise_all(., ~(if(is.numeric(.)) sum(.) else "Total")))
names(tmp)[1] <- paste(input$strat_values, " Ranges")
Count <- tmp %>% pull(Count)
Range <- tmp %>% pull(var = 1) # var = 1 pulls left-most column from tmp dataframe
list(data = tmp,
Range = Range[-length(Range)],
Count = as.data.frame(Count[-length(Count)]))
}
output$stratPlot <- renderPlot({
x <- factor(stratData()$Range, levels = c(stratData()$Range))
y <- as.matrix(stratData()$Count)
ggplot(stratData()$Count,aes(x,y)) +
geom_bar(stat="identity") +
labs(x = paste(input$strat_values, " Ranges")) + # < Added line this per Stefan comment
geom_text(aes(y = y + sign(y)/4,label = y),
position = position_nudge(y = 0),
size = 5)
})

Why does this dplyr filter not work in shiny, but works fine when run without shiny?

The below code, run without Shiny, works fine for grouping data by 2 different methods of measuring time horizons (by calendar month ("Period_1") and by months elapsed since element origination ("Period_2")), and for expanding the data frame to true-up periods when grouping by Period_2:
library(tidyverse)
data <- data.frame(
ID = c(1,1,2,2,2,2),
Period_1 = c("2020-03", "2020-04", "2020-01", "2020-02", "2020-03", "2020-04"),
Period_2 = c(1, 2, 1, 2, 3, 4),
ColA = c(10, 20, 30, 40, 50, 52),
ColB = c(15, 25, 35, 45, 55, 87)
)
### Expand the dataframe to including missing rows ###
dataExpand <-
data %>%
tidyr::complete(ID, nesting(Period_2)) %>%
tidyr::fill(ColA, ColB, .direction = "down")
### Run the expanded data frame through grouping code ###
# Group by calendar month (Period_1)
groupData_1 <-
dataExpand %>%
group_by(Period_1) %>%
select("ColA","ColB") %>%
summarise(across(everything(), sum)) %>%
filter(!is.na(Period_1)) # << Add this code to delete NA row for calendar period
# Group by vintage month (Period_2)
groupData_2 <-
dataExpand %>%
group_by(Period_2) %>%
select("ColA","ColB") %>%
summarise(across(everything(), sum, na.rm = TRUE))
Results (which are correct when running the above code):
> groupData_1
# A tibble: 4 x 3
Period_1 ColA ColB
<chr> <dbl> <dbl>
1 2020-01 30 35
2 2020-02 40 45
3 2020-03 60 70
4 2020-04 72 112
> groupData_2
# A tibble: 4 x 3
Period_2 ColA ColB
<dbl> <dbl> <dbl>
1 1 40 50
2 2 60 70
3 3 70 80
4 4 72 112
However, when I throw the above into Shiny where the user can click on the radio button to select grouping by either Period_1 or Period_2, the App crashes. The problem appears to lie in the line if(input$grouping == 'Period_1'... because when I comment it out the App runs (but without removing the Period_1's that are NA like this line is suppose to do). How can this be fixed?
library(shiny)
library(tidyverse)
ui <-
fluidPage(
h3("Data table:"),
tableOutput("data"),
h3("Sum the data table columns:"),
radioButtons(
inputId = "grouping",
label = NULL,
choiceNames = c("By period 1", "By period 2"),
choiceValues = c("Period_1", "Period_2"),
selected = "Period_1",
inline = TRUE
),
tableOutput("sums")
)
server <- function(input, output, session) {
data <- reactive({
data.frame(
ID = c(1,1,2,2,2,2),
Period_1 = c("2020-03", "2020-04", "2020-01", "2020-02", "2020-03", "2020-04"),
Period_2 = c(1, 2, 1, 2, 3, 4),
ColA = c(10, 20, 30, 40, 50, 52),
ColB = c(15, 25, 35, 45, 55, 87)
)
})
dataExpand <- reactive({
data() %>%
tidyr::complete(ID, nesting(Period_2)) %>%
tidyr::fill(ColA, ColB, .direction = "down")
})
summed_data <- reactive({
dataExpand() %>%
group_by(!!sym(input$grouping)) %>%
select("ColA","ColB") %>%
summarise(across(everything(), sum, na.rm = TRUE)) #%>%
# Below removes Period_1 rows that are added due to Period_2 < 4 when grouping by Period_2
if(input$grouping == 'Period_1'){filter(!is.na(Period_1))}
})
output$data <- renderTable(data())
output$sums <- renderTable(summed_data())
}
shinyApp(ui, server)
Is this closer to what you need?
library(shiny)
library(tidyverse)
ui <-
fluidPage(
h3("Data table:"),
tableOutput("data"),
h3("Sum the data table columns:"),
radioButtons(
inputId = "grouping",
label = NULL,
choiceNames = c("By period 1", "By period 2"),
choiceValues = c("Period_1", "Period_2"),
selected = "Period_1",
inline = TRUE
),
tableOutput("sums")
)
server <- function(input, output, session) {
data <- reactive({
data.frame(
ID = c(1,1,2,2,2,2),
Period_1 = c("2020-03", "2020-04", "2020-01", "2020-02", "2020-03", "2020-04"),
Period_2 = c(1, 2, 1, 2, 3, 4),
ColA = c(10, 20, 30, 40, 50, 52),
ColB = c(15, 25, 35, 45, 55, 87)
)
})
dataExpand <- reactive({
data() %>%
tidyr::complete(ID, nesting(Period_2)) %>%
tidyr::fill(ColA, ColB, .direction = "down")
})
choice <- reactive(input$grouping)
summed_data <- reactive({
dataExpand() %>%
group_by(across(choice())) %>%
select("ColA","ColB") %>%
summarise(across(everything(), sum, na.rm = TRUE)) |>
filter(across(1,.fns = ~ .x |> negate(is.na)() ))
# Below removes Period_1 rows that are added due to Period_2 < 4 when grouping by Period_2
})
output$data <- renderTable(data())
output$sums <- renderTable(summed_data())
}
shinyApp(ui, server)
Your summed_data block doesn't return anything.
summed_data <- reactive({
dataExpand() %>%
group_by(!!sym(input$grouping)) %>%
select("ColA","ColB") %>%
summarise(across(everything(), sum, na.rm = TRUE)) %>%
# Below removes Period_1 rows that are added due to Period_2 < 4 when grouping by Period_2
if(input$grouping == 'Period_1'){ filter(!is.na(Period_1)) }
})
In fact should be failing with an error.
input <- list(grouping = "Period_2")
mtcars %>%
if (input$grouping == "Period_1") filter(cyl == 4L)
# Warning in if (.) F else filter(is.na(cyl)) :
# the condition has length > 1 and only the first element will be used
# Error in if (.) F else filter(is.na(cyl)) :
# argument is not interpretable as logical
One way to fix that would be
mtcars %>%
{ if (input$grouping == "Period_1") filter(., cyl == 4) else .; }
Done there:
wrapped in braces { ... };
used the special . in the call to filter, so that it actually has data to operate on; and
added an else that returns all data otherwise.
Another method:
mtcars %>%
filter(input$grouping != "Period_1" | cyl == 4L)
Notes:
Note that I inverted the logic. That is, your logic is to filter only if grouping is Period_1; here, input$grouping != "Period_1" returns TRUE when it is not Period_1, which means nothing in cyl == 4 will matter, all will be true; if it is Period_1, then that will return false, and then cyl == 4 will have an impact.
The other problem with your code is that you process the pipe data_Expand() %>% ... summarize(.) but because you do not capture that expression into a variable, it is never used. Like many things in R (including functions and reactive blocks), the last expression evaluated will be the return value (or whatever is in the explicit return(.) call, though often not required). In your case, the if statement is evaluated last. If the condition is true, then it tries to run filter(!is.na(Period_1)), but that has no data (it is not expressly in the pipe); if the condition is false, since there is no else block, it returns NULL (invisibly).
Try changing that block to:
summed_data <- reactive({
dataExpand() %>%
group_by(!!sym(input$grouping)) %>%
select("ColA","ColB") %>%
summarise(across(everything(), sum, na.rm = TRUE)) %>%
filter(input$grouping != "Period_1" | !is.na(Period_1))
})

Add a row with totals and percentages to a DT datatable

I'm trying to add two rows to the bottom of my datatable, one for the Grand Total, and one below that that calculates a percent.
Here's my example code:
if (interactive()) {
library(DT)
fruit <- c("Apple", "Orange", "Pear", "Banana")
num <- c(54, 25, 51, 32)
a <- c(10, 15, 20, 25)
b <- c(5, 7, 10, 15)
c <- c(7, 9, 12, 17)
data <- data.frame(fruit, num, a, b, c)
ui <- fluidPage(
DT::dataTableOutput(outputId = "dt_Fruit")
)
server <- function(input, output, session) {
output$dt_Fruit <- DT::renderDataTable({
df <- data %>%
bind_rows(summarise_all(., funs(if(is.numeric(.)) sum(., na.rm = TRUE) else "Total"))) %>% # calculates Grand Total
bind_rows(summarise_all(., funs(if(is.numeric(.)) sum(., na.rm = TRUE) else "%"))) # need help here
df$num[nrow(df)] = "" # makes last row in num column blank for percent; value not needed here
DT::datatable(
df,
rownames = FALSE,
options = list(
dom = 't',
searchHighlight = TRUE,
pageLength = 100,
scrollX = TRUE
)
)
})
}
shinyApp(ui, server)
}
The "Total" line calculates as expected. The last "%" row is where I need help creating a calculation that takes the Total from each column; a (70), b (37) and c (45), and divides each of them by the Total of num (162), then multiplying that by 100 to give a percent.
So for the last percent row:
A would be (70/162) * 100 = 43.21%
B would be (37/162) * 100 = 22.84%
C would be (45/162) * 100 = 27.78%
Showing the percent symbol would also be appreciated.
Here is my desired result:
I've tried some calculations using df$num[nrow(df)-1] but not quite sure how to incorporate that into the second bind_rows line. Thanks!
This could be achieved like so:
Make totals row
total <- data %>%
summarise(across(where(is.numeric), sum)) %>%
mutate(fruit = "Total")
Make percentages row (format as % via e.g. scales::percent)
total_pct <- total %>%
mutate(across(where(is.numeric), ~ .x / num),
across(where(is.numeric), ~ scales::percent(.x, accuracy = .01)),
fruit = "%")
Bind totals to the data table. As the columns in total_row are of type character we first have to convert data and total to character as well which I do via lapply and mutate_all
df <- lapply(list(data, total, total_pct), mutate_all, as.character) %>%
bind_rows()
Full reproducible code:
library(dplyr)
library(shiny)
library(DT)
fruit <- c("Apple", "Orange", "Pear", "Banana")
num <- c(54, 25, 51, 32)
a <- c(10, 15, 20, 25)
b <- c(5, 7, 10, 15)
c <- c(7, 9, 12, 17)
data <- data.frame(fruit, num, a, b, c)
ui <- fluidPage(
DT::dataTableOutput(outputId = "dt_Fruit")
)
server <- function(input, output, session) {
output$dt_Fruit <- DT::renderDataTable({
total <- data %>%
summarise(across(where(is.numeric), sum)) %>%
mutate(fruit = "Total")
total_pct <- total %>%
mutate(across(where(is.numeric), ~ .x / num),
across(where(is.numeric), ~ scales::percent(.x, accuracy = .01)),
fruit = "%")
df <- lapply(list(data, total, total_pct), mutate_all, as.character) %>%
bind_rows()
df$num[nrow(df)] = "" # makes last row in num column blank for percent; value not needed here
DT::datatable(
df,
rownames = FALSE,
options = list(
dom = 't',
searchHighlight = TRUE,
pageLength = 100,
scrollX = TRUE
)
)
})
}
shinyApp(ui, server)

Reactive Function

Prior to start creating my app with Shiny I've created a function (NextWordPrediction) that updates my dataframe based on an user's input as follows:
If input exists in df increase its Frequency by 1
If input does't exist in df add it
Function code:
NextWordPrediction <- function(input) {
dat <- dat %>%
filter(., N_gram == str_count(input, "\\S+") + 1) %>%
filter(grepl(paste("^", tolower(str_squish(input)), sep = ""), Word)) %>%
arrange(., desc(Prop))
if (nrow(dat) != 0) {
assign("dat",
dat %>%
mutate(Frequency = ifelse(Word == input &
N_gram == str_count(input, "\\S+"),
Frequency + 1,
Frequency)) %>%
group_by(., N_gram) %>%
mutate(., Prop = Frequency/ sum(Frequency)) %>%
data.frame(.),
envir = .GlobalEnv)
val <- dat$Word_to_Predict[1]
ans <- paste(str_squish(input), val)
return(list(ans, head(dat,5)))
} else if (nrow(dat) == 0 & word(input, 1) != "NA") {
assign("dat",
dat %>%
add_row(., Word = tolower(input), Frequency = + 1, N_gram = str_count(input, "\\S+"),
Word_to_Predict = word(input, -1)) %>%
group_by(., N_gram) %>%
mutate(., Prop = Frequency/ sum(Frequency)) %>%
data.frame(.),
envir = .GlobalEnv)
input_1 <- Reduce(paste, word(input, 2:str_count(input,"\\S+")))
return(NextWordPrediction(input_1))
} else if (word(input, 1) == "NA") {
ans <- paste("Word not in dictionary. We added this to our database!")
return(ans)
}
}
As a next step I want to extend this functionality to a Shiny app and I've tried the following without success. The function usability is functional but after an input my df is not updated accordingly.
server.R
library(shiny)
dat <- read.csv("dat_all.csv")
shinyServer(function(input, output) {
NextWordPrediction <- function(input) {
dat <- dat %>%
filter(., N_gram == str_count(input, "\\S+") + 1) %>%
filter(grepl(paste("^", tolower(str_squish(input)), sep = ""), Word)) %>%
arrange(., desc(Prop))
if (nrow(dat) != 0) {
assign("dat",
dat %>%
mutate(Frequency = ifelse(Word == input &
N_gram == str_count(input, "\\S+"),
Frequency + 1,
Frequency)) %>%
group_by(., N_gram) %>%
mutate(., Prop = Frequency/ sum(Frequency)) %>%
data.frame(.),
envir = .GlobalEnv)
val <- dat$Word_to_Predict[1]
ans <- paste(str_squish(input), val)
return(list(ans, head(dat,5)))
} else if (nrow(dat) == 0 & word(input, 1) != "NA") {
assign("dat",
dat %>%
add_row(., Word = tolower(input), Frequency = + 1, N_gram = str_count(input, "\\S+"),
Word_to_Predict = word(input, -1)) %>%
group_by(., N_gram) %>%
mutate(., Prop = Frequency/ sum(Frequency)) %>%
data.frame(.),
envir = .GlobalEnv)
input_1 <- Reduce(paste, word(input, 2:str_count(input,"\\S+")))
return(NextWordPrediction(input_1))
} else if (word(input, 1) == "NA") {
ans <- paste("Word not in dictionary. We added this to our database!")
return(ans)
}
}
output$predictiontext = reactive({
NextWordPrediction(input$text)[1]
})
output$predictiontable = renderTable({
NextWordPrediction(input$text)[2]
})
})
ui.R
library(shiny)
# Define UI for application that draws a histogram
shinyUI(fluidPage(
# Application title
titlePanel("NextWordPrediction"),
# Sidebar with a slider input for number of bins
sidebarLayout(
sidebarPanel(
textInput("text",
"Type something...",
"")
),
# Show a plot of the generated distribution
mainPanel(
wellPanel(
# Link to report
helpText(a('More information on the app',
href=link,
target = '_blank')
),
# Link to repo
helpText(a('Code repository',
href=link,
target = '_blank')
),
textOutput("predictiontext"),
tableOutput('predictiontable')
)
)
))
)
Update 1: Data
df<- data.frame(Word = c("hello", "she was great", "this is", "long time ago in"), Frequency = c(4, 3, 10, 1),
N_gram = c(1, 3, 2, 4), Prop = c(4/18, 3/18, 10/18, 1/18), Word_to_Predict = c(NA, "great", "is", "in"))
NextWordPrediction("she was") ## returns "she was" & "great"
NextWordPrediction("hours ago") ## returns "hours ago" & "in"
NextWordPrediction("words not in data") ## returns "Word not in dictionary. We added this to our database!" after trying "not in data", "in data" and adds "words not in data" to dataset

Resources