How to make selectInput behave dynamically with on user input? - r

I am trying to create an app that creates word frequency plot from text. The data I used is here: https://www.kaggle.com/datasets/bharadwaj6/kindle-reviews#:~:text=Never-,kindle_reviews,-.csv(701.88
Following is my shiny code:
library(shiny)
library(tidyverse)
library(tidytext)
# Source functions
source("wrd_freq_df.R")
source("bigrm_df.R")
# UI design ----
ui <- fluidPage(fluidRow(
column(
4,
fileInput("upload", NULL, accept = c(".csv", ".tsv")),
actionButton(
inputId = "submit",
label = "RUN",
class = "btn-primary"
),fluidRow(
column(4, numericInput(
"n",
"Top n words",
value = 5,
min = 1,
step = 1
)),
column(4, selectInput(
"myselect",
label = "Choose Words"
, choices = NULL
, multiple = TRUE
# , options = list(create = TRUE)
, selected = NULL
))),
plotOutput("wrd_frq_plot"),
tableOutput("head")
)
))
# SERVER ----
options(shiny.maxRequestSize = 60 * 1024 ^ 2)
server <- function(input, output, session) {
#upload csv data
data <- reactive({
req(input$upload)
ext <- tools::file_ext(input$upload$name)
switch(
ext,
csv = vroom::vroom(input$upload$datapath, delim = ","),
tsv = vroom::vroom(input$upload$datapath, delim = "\t"),
validate("Invalid file; Please upload a .csv or .tsv file")
)
})
#create different data subset to be used for plotting word_freq and bigrams
rv <- reactiveValues()
#run analysis on pressing run button
observeEvent(input$submit, {
rv$data_head <- data() %>%
mutate(ID = as.character(round(ID))) %>%
mutate(group = if_else(rating >= 4, "High", if_else(rating > 2, "Avg", "Low"))) %>%
select(ID, group, reviewText)
#get base analysis dataset
rv$word_freq <- wrd_freq_df(rv$data_head)
rv$bigrm_freq <- bigrm_freq_df(rv$data_head)
rv$bigrm_tf_idf <- bigrm_freq_tf_idf(rv$bigrm_freq)
#get all unique words from the document
rv$choices <- rv$word_freq %>%
select(word) %>%
unique() %>% unlist()
output$wrd_frq_plot <- renderPlot({
#ensure that if selectinput has no value then all words are used for analysis
if (is.null(input$myselect)) {
selected_choices = rv$choices
}
else{
selected_choices = input$myselect
}
updateSelectInput(session, "myselect", selected = selected_choices)
#get relevant document ID which contains chosen word for analysis
word <- input$myselect
word_df_chose <- data.frame(word)
relvant_ID <- word_df_chose %>%
#THIS IS PROBABLY WHERE THINGS FAILS
inner_join(rv$word_freq) %>%
select(ID) %>%
unique() %>% unlist()
rv$word_freq %>%
filter(ID %in% relevant_ID) %>%
left_join(rv$data_head %>%
select(ID, group) %>%
unique()) %>%
#remove low tf_IDF words
filter(tf_idf > quantile(tf_idf, 0.25)) %>%
group_by(group, word) %>%
summarise(n_tot = sum(n)) %>% ungroup() %>%
group_by(group) %>%
slice_max(n_tot, n = input$n) %>%
ungroup() %>%
mutate(group = as.factor(group),
word = reorder_within(word, n_tot, group)) %>%
ggplot(aes(x = word, y = n_tot, fill = group)) + geom_col() + facet_wrap(group ~
., scales = "free") +
coord_flip() + scale_x_reordered()
})
})
}
#executes app
shinyApp(ui, server)
The functions sourced are as follows:
wrd_freq_df <- function(df){
df %>%
# mutate(ID = as.character(round(ID))) %>%
# mutate(group = if_else(rating >= 4, "High", if_else(rating > 2, "Avg", "Low"))) %>%
# select(ID, group, reviewText) %>%
#get word freq
unnest_tokens(output = word, input = reviewText) %>% ungroup() %>%
anti_join(stop_words) %>%
count(ID, word) %>% ungroup() %>%
group_by(ID) %>%
mutate(total = sum(n)) %>% ungroup() %>%
bind_tf_idf(word, ID, n) }
bigrm_freq_df <- function(df) {
df %>%
# mutate(ID = as.character(round(ID))) %>%
# mutate(group = if_else(rating >= 4, "High", if_else(rating > 2, "Avg", "Low"))) %>%
# select(ID, group, reviewText) %>%
unnest_tokens(bigram, reviewText, token = "ngrams", n = 2) %>%
separate(bigram,
c("word1", "word2"),
sep = " ",
remove = FALSE) %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word)
}
bigrm_freq_tf_idf <- function(df) {
df %>%
select(ID, bigram) %>%
#get tf_idf
count(ID, bigram) %>%
bind_tf_idf(bigram, ID, n) %>%
arrange(desc(tf_idf))
}
i get following error message:
Joining, by = "word"
Warning: Error in inner_join: by must be supplied when x and y have no common variables.
ℹ use by = character()` to perform a cross-join.
181:
Input to asJSON(keep_vec_names=TRUE) is a named vector. In a future version of jsonlite, this option will not be supported, and named vectors will be translated into arrays instead of objects. If you want JSON object output, please use a named list instead. See ?toJSON.
I assume that the error must be arising when

Related

R How can I place the values of a second column under the values in the left column?

I have the following function with which I want to create summary statistics (for two data sets simultaneously):
mean.k <-function(x){round(mean(x, na.rm=TRUE), digits = 3)}
sd.k <-function(x){round(sd(x, na.rm=TRUE), digits = 3)}
sumstats<-function(x, y) { sumtable <- cbind(as.matrix(colSums(!is.na(x))),sapply(x,mean.k), paste("(",sapply(x,sd.k),")", sep = ""), as.matrix(colSums(!is.na(y))),sapply(y,mean.k), paste("(",sapply(y,sd.k),")", sep = ""))
sumtable=as.data.frame(sumtable)
names(sumtable)=c("Obs","Mean","Std.Dev", "Obs","Mean","Std.Dev");
sumtable}
On some data, the result looks like:
data(iris)
libary(dplyr)
iris_1 <- iris %>% filter(Species == "setosa") %>% select(Sepal.Length, Sepal.Width, Petal.Length, Petal.Width) # data set 1
iris_2 <- iris %>% filter(Species == "versicolor") %>% select(Sepal.Length, Sepal.Width, Petal.Length, Petal.Width) # data set 2
iris_desk_stats <- sumstats(iris_1, iris_2)
However, for me the optimal result would look like this:
So the two changes I need are the following:
Each standard deviation should be placed under the respective mean instead of in a separate column next to the means.
Obs should not be a proper column, but a row at the top in order to avoid redundancy.
Has anyone an idea? The first of the two desired changes would be the most important one.
I thought that I'd try to approach this "directly"; And here's what I've come up with:
iris %>%
as_tibble() %>%
summarise(
across(!where(is.factor), list(
mean = . %>% mean(na.rm = TRUE),
sd = . %>% sd(na.rm = TRUE)
)),
Obs = n()
) %>%
pivot_longer(
# c(contains(c("mean", "sd")), "Obs"),
everything(),
names_to = c("variable", "metric"),
values_to = "stats",
names_sep = "_") %>%
tidyr::replace_na(list(metric = "identity")) %>%
pivot_wider(names_from = metric,
values_from = stats) %>%
select(variable, everything()) %>%
mutate(entries = glue::glue("{mean}<br>({sd})",
mean = round(mean, 3),
sd = round(sd, 3)),
entries = if_else(!is.na(identity),
glue::glue("{identity}"),
entries)) %>%
select(-c("mean", "sd", "identity")) %>%
arrange(variable) %>%
gt::gt() %>%
gt::fmt_markdown(entries) %>%
identity()

How to make a plot axis label reactive?

In the below reproducible example Code 1, the user stratifies data and selects which variable to stratify the data by, Value_1 or Value_2; and also selects to view the stratification as a table or as a plot. Code 1 works as intended, and the stratification range label is a static "Range" (as shown in the table left-most column header and in the plot x-axis label, and as shown in the images at the bottom).
I am making the range label dynamic (reactive), because in the fuller App this example derives from the user has many variables to choose from for stratification.
In Code 2 below showing an amplified custom function stratData(), I have succeeded in making the table left-most column header for stratification ranges reactive, but I haven't yet figured out how to make the plot x-axis label for stratification ranges similarly reactive. Just replace the stratData() in Code 1 with the stratData() in Code 2 (and comment-out the plot code under renderPlot() to avoid seeing the error) to see how this is working (or not).
How could the plot x-axis label reflect the same reactivity as the table left-most column header?
Code 1:
library(ggplot2)
library(shiny)
library(tidyverse)
library(shinyWidgets)
ui <-
fluidPage(
uiOutput("strat_values"),
radioButtons(
inputId = 'stratsView',
label = NULL,
choices = list("Table view" = 1,"Plot view" = 2),
selected = 1,
inline = TRUE
),
conditionalPanel(condition = "input.stratsView == 1",h5(strong("Stratified data:")), tableOutput("stratData")),
conditionalPanel(condition = "input.stratsView == 2",h5(strong("Stratified data:")), plotOutput("stratPlot"))
)
server <- function(input, output, session) {
dat <- reactive({
data.frame(
ID = c(1,1,2,2,2,2,3,3,3,3),
Period = c("2020-03", "2020-04", "2020-01", "2020-02", "2020-03", "2020-04", "2020-01", "2020-02", "2020-03", "2020-04"),
Values_1 = c(-6, 26, 36, 46, 56, 86, 100, 10, 20, 30),
Values_2 = c(-3, 13, 18, 23, 28, 43, 50, 5, 10, 15)
)
})
output$strat_values <- renderUI({
selectInput("strat_values",
"Variable to range-spread (col 1):",
choices = c("Values_1","Values_2"),
selected = c("Values_1"))
})
stratData <- function(){
req(input$strat_values)
filter_exp1 <- parse(text=paste0("Period", "==", "'","2020-04", "'"))
dat_1 <- reactive({dat() %>% filter(eval(filter_exp1))})
breaks <- seq(min(dat_1()[[input$strat_values]]), max(dat_1()[[input$strat_values]]), length.out = 6)
tmp <- dat() %>%
filter(eval(filter_exp1)) %>%
mutate(Range = cut(!!sym(input$strat_values), breaks=breaks, include.lowest=TRUE, right = TRUE, dig.lab = 5)) %>%
group_by(Range)
tmp <- tmp %>%
summarise(Count = n(),Values = sum(!!sym("Values_2"))) %>%
complete(Range, fill = list(Count = 0,Values = 0)) %>%
ungroup %>%
mutate(Count_pct = Count/sum(Count)*100, Values_pct = Values/sum(Values)*100) %>%
dplyr::select(everything(), Count, Count_pct, Values, Values_pct) %>%
bind_rows(summarise_all(., ~(if(is.numeric(.)) sum(.) else "Total")))
Count <- tmp %>% pull(Count)
Range <- tmp %>% pull(Range)
list(data = tmp,
Range = Range[-length(Range)],
Count = as.data.frame(Count[-length(Count)]))
}
output$stratData <- renderTable({stratData()$data})
output$stratPlot <- renderPlot({
x <- factor(stratData()$Range, levels = c(stratData()$Range))
y <- as.matrix(stratData()$Count)
ggplot(stratData()$Count,aes(x,y)) +
geom_bar(stat="identity") +
labs(x = "Ranges") +
geom_text(aes(y = y + sign(y)/4,label = y))
})
}
shinyApp(ui, server)
Code 2:
stratData <- function(){
req(input$strat_values)
filter_exp1 <- parse(text=paste0("Period", "==", "'","2020-04", "'"))
dat_1 <- reactive({dat() %>% filter(eval(filter_exp1))})
breaks <- seq(min(dat_1()[[input$strat_values]]), max(dat_1()[[input$strat_values]]), length.out = 6)
tmp <- dat() %>%
filter(eval(filter_exp1)) %>%
mutate(sumvar = cut(!!sym(input$strat_values), breaks=breaks, include.lowest=TRUE, right = TRUE, dig.lab = 5)) %>%
group_by(sumvar)
tmp <- tmp %>%
summarise(Count = n(),Values = sum(!!sym("Values_2"))) %>%
complete(sumvar, fill = list(Count = 0,Values = 0)) %>%
ungroup %>%
mutate(Count_pct = Count/sum(Count)*100, Values_pct = Values/sum(Values)*100) %>%
dplyr::select(everything(), Count, Count_pct, Values, Values_pct) %>%
bind_rows(summarise_all(., ~(if(is.numeric(.)) sum(.) else "Total")))
names(tmp)[1] <- paste(input$strat_values, " Range")
Count <- tmp %>% pull(Count)
# Range <- tmp %>% pull(Range)
list(data = tmp,
# Range = Range[-length(Range)],
Count = as.data.frame(Count[-length(Count)]))
}
Resolved code:
Now with OP code correction and reflecting Stefan's comment for plot axis label reactivity; posting revised stratData() custom function where only one line was fixed (commented below) and the renderPlot() function where Stefan's axis-label solution is also commented:
stratData <- function(){
req(input$strat_values)
filter_exp1 <- parse(text=paste0("Period", "==", "'","2020-04", "'"))
dat_1 <- reactive({dat() %>% filter(eval(filter_exp1))})
breaks <- seq(min(dat_1()[[input$strat_values]]), max(dat_1()[[input$strat_values]]), length.out = 6)
tmp <- dat() %>%
filter(eval(filter_exp1)) %>%
mutate(Range = cut(!!sym(input$strat_values), breaks=breaks, include.lowest=TRUE, right = TRUE, dig.lab = 5)) %>%
group_by(Range)
tmp <- tmp %>%
summarise(Count = n(),Values = sum(!!sym("Values_2"))) %>%
complete(Range, fill = list(Count = 0,Values = 0)) %>%
ungroup %>%
mutate(Count_pct = Count/sum(Count)*100, Values_pct = Values/sum(Values)*100) %>%
dplyr::select(everything(), Count, Count_pct, Values, Values_pct) %>%
bind_rows(summarise_all(., ~(if(is.numeric(.)) sum(.) else "Total")))
names(tmp)[1] <- paste(input$strat_values, " Ranges")
Count <- tmp %>% pull(Count)
Range <- tmp %>% pull(var = 1) # var = 1 pulls left-most column from tmp dataframe
list(data = tmp,
Range = Range[-length(Range)],
Count = as.data.frame(Count[-length(Count)]))
}
output$stratPlot <- renderPlot({
x <- factor(stratData()$Range, levels = c(stratData()$Range))
y <- as.matrix(stratData()$Count)
ggplot(stratData()$Count,aes(x,y)) +
geom_bar(stat="identity") +
labs(x = paste(input$strat_values, " Ranges")) + # < Added line this per Stefan comment
geom_text(aes(y = y + sign(y)/4,label = y),
position = position_nudge(y = 0),
size = 5)
})

Automate script by changing str_detect?

I am using this script to produce a table. In the second line, for collectionName, I am using "Organization X". I have many different organizations (Org Y, Org Z, .....) to create this table for. Is there a way to automate this? And to automate the naming of the object (currently "orgx" below)?
orgx <- df %>%
filter(str_detect(collectionName, c("Organization X"))) %>%
filter(str_detect(Year, paste(years, collapse = "|"))) %>%
corpus(text_field = "text") %>%
tokens(remove_punct = TRUE) %>%
tokens_select(stopwords('english'),selection='remove') %>%
tokens_tolower(keep_acronyms = FALSE) %>%
tokens_lookup(dictionary = dict, nomatch = TRUE) %>%
dfm() %>%
dfm_group(groups = "Title") %>%
dfm_weight(scheme = "prop") %>%
as.data.frame() %>%
mutate_at(vars(keyterms, true), funs(round(., 4)))
Get the column names specific to that organizations as vector, use that as pattern in str_detect by looping over the vector in map and return the output in a list
library(dplyr)
library(purrr)
library(stringr)
vec <- c("Organization X", "Organization Y")
out <- map(vec, ~
df %>%
filter(str_detect(collectionName, .x)) %>%
filter(str_detect(Year, paste(years, collapse = "|"))) %>%
corpus(text_field = "text") %>%
tokens(remove_punct = TRUE) %>%
tokens_select(stopwords('english'),selection='remove') %>%
tokens_tolower(keep_acronyms = FALSE) %>%
tokens_lookup(dictionary = dict, nomatch = TRUE) %>%
dfm() %>%
dfm_group(groups = "Title") %>%
dfm_weight(scheme = "prop") %>%
as.data.frame() %>%
mutate_at(vars(keyterms, true), funs(round(., 4)))
)
names(out) <- sub("^(...).*\\s+(\\S)$", "\\1\\2", vec)
It may be better to keep the output in a list. But, if we need to assign it to different objects, it can be done with list2env or assign
list2env(out, .GlobalEnv)

Reactive Function

Prior to start creating my app with Shiny I've created a function (NextWordPrediction) that updates my dataframe based on an user's input as follows:
If input exists in df increase its Frequency by 1
If input does't exist in df add it
Function code:
NextWordPrediction <- function(input) {
dat <- dat %>%
filter(., N_gram == str_count(input, "\\S+") + 1) %>%
filter(grepl(paste("^", tolower(str_squish(input)), sep = ""), Word)) %>%
arrange(., desc(Prop))
if (nrow(dat) != 0) {
assign("dat",
dat %>%
mutate(Frequency = ifelse(Word == input &
N_gram == str_count(input, "\\S+"),
Frequency + 1,
Frequency)) %>%
group_by(., N_gram) %>%
mutate(., Prop = Frequency/ sum(Frequency)) %>%
data.frame(.),
envir = .GlobalEnv)
val <- dat$Word_to_Predict[1]
ans <- paste(str_squish(input), val)
return(list(ans, head(dat,5)))
} else if (nrow(dat) == 0 & word(input, 1) != "NA") {
assign("dat",
dat %>%
add_row(., Word = tolower(input), Frequency = + 1, N_gram = str_count(input, "\\S+"),
Word_to_Predict = word(input, -1)) %>%
group_by(., N_gram) %>%
mutate(., Prop = Frequency/ sum(Frequency)) %>%
data.frame(.),
envir = .GlobalEnv)
input_1 <- Reduce(paste, word(input, 2:str_count(input,"\\S+")))
return(NextWordPrediction(input_1))
} else if (word(input, 1) == "NA") {
ans <- paste("Word not in dictionary. We added this to our database!")
return(ans)
}
}
As a next step I want to extend this functionality to a Shiny app and I've tried the following without success. The function usability is functional but after an input my df is not updated accordingly.
server.R
library(shiny)
dat <- read.csv("dat_all.csv")
shinyServer(function(input, output) {
NextWordPrediction <- function(input) {
dat <- dat %>%
filter(., N_gram == str_count(input, "\\S+") + 1) %>%
filter(grepl(paste("^", tolower(str_squish(input)), sep = ""), Word)) %>%
arrange(., desc(Prop))
if (nrow(dat) != 0) {
assign("dat",
dat %>%
mutate(Frequency = ifelse(Word == input &
N_gram == str_count(input, "\\S+"),
Frequency + 1,
Frequency)) %>%
group_by(., N_gram) %>%
mutate(., Prop = Frequency/ sum(Frequency)) %>%
data.frame(.),
envir = .GlobalEnv)
val <- dat$Word_to_Predict[1]
ans <- paste(str_squish(input), val)
return(list(ans, head(dat,5)))
} else if (nrow(dat) == 0 & word(input, 1) != "NA") {
assign("dat",
dat %>%
add_row(., Word = tolower(input), Frequency = + 1, N_gram = str_count(input, "\\S+"),
Word_to_Predict = word(input, -1)) %>%
group_by(., N_gram) %>%
mutate(., Prop = Frequency/ sum(Frequency)) %>%
data.frame(.),
envir = .GlobalEnv)
input_1 <- Reduce(paste, word(input, 2:str_count(input,"\\S+")))
return(NextWordPrediction(input_1))
} else if (word(input, 1) == "NA") {
ans <- paste("Word not in dictionary. We added this to our database!")
return(ans)
}
}
output$predictiontext = reactive({
NextWordPrediction(input$text)[1]
})
output$predictiontable = renderTable({
NextWordPrediction(input$text)[2]
})
})
ui.R
library(shiny)
# Define UI for application that draws a histogram
shinyUI(fluidPage(
# Application title
titlePanel("NextWordPrediction"),
# Sidebar with a slider input for number of bins
sidebarLayout(
sidebarPanel(
textInput("text",
"Type something...",
"")
),
# Show a plot of the generated distribution
mainPanel(
wellPanel(
# Link to report
helpText(a('More information on the app',
href=link,
target = '_blank')
),
# Link to repo
helpText(a('Code repository',
href=link,
target = '_blank')
),
textOutput("predictiontext"),
tableOutput('predictiontable')
)
)
))
)
Update 1: Data
df<- data.frame(Word = c("hello", "she was great", "this is", "long time ago in"), Frequency = c(4, 3, 10, 1),
N_gram = c(1, 3, 2, 4), Prop = c(4/18, 3/18, 10/18, 1/18), Word_to_Predict = c(NA, "great", "is", "in"))
NextWordPrediction("she was") ## returns "she was" & "great"
NextWordPrediction("hours ago") ## returns "hours ago" & "in"
NextWordPrediction("words not in data") ## returns "Word not in dictionary. We added this to our database!" after trying "not in data", "in data" and adds "words not in data" to dataset

Shiny not detecting input in output

If I run the following code, everything runs fine:
library(dplyr)
library(tidyr)
library(shiny)
id <- 1:100
gender <- sample(c('M','F'), 100, replace=TRUE)
age <- sample(18:22, 100, replace=TRUE)
ethnicity <- sample(c('W','B','H','A','O'), 100, replace = TRUE)
grade <- sample(LETTERS[1:4], 100, replace=TRUE)
df <- cbind(id,gender,age,ethnicity,grade) %>% as.data.frame()
list1 <- list("id"="id","gender"="gender","age"="age","ethnicity"="ethnicity","grade"="grade")
list2 <- list("id"="id","gender"="gender","age"="age","ethnicity"="ethnicity","grade"="grade")
ui <-fluidPage(
selectInput("picker1", "PICKER 1", choices = list1, selected = "gender"),
selectInput("picker2", "PICKER 2", choices = list2, selected = "grade"),
tableOutput("crosstabs")
)
server <- function(input,output,session){
output$crosstabs <- renderTable({
t<-df %>% select_all() %>% select(-id) %>%
pivot_longer(cols = input$picker1) %>%
count(name,value, grade) %>% pivot_wider(names_from = grade, values_from = n)
t
})
}
shinyApp(ui,server)
But when I update output$crosstabs to include input$picker2, it returns an error. I'm rather confused, as I'm using the same structure, but simply with grade swapped out for input$picker2:
output$crosstabs <- renderTable({
t<-df %>% select_all() %>% select(-id) %>%
pivot_longer(cols = input$picker1) %>%
count(name,value, input$picker2) %>% pivot_wider(names_from = input$picker2, values_from = n)
t
})
What exactly is happening here?
It is a string and the count expects a unquoted column name, we can convert it to symbol and evaluate (!!)
server <- function(input,output,session){
output$crosstabs <- renderTable({
df %>%
select_all() %>%
select(-id) %>%
pivot_longer(cols = input$picker1) %>%
count(name,value, !!rlang::sym(input$picker2)) %>%
pivot_wider(names_from = input$picker2, values_from = n)
})
}
-testing

Resources