I have the following 3 way table I created in R.
with(dataset, ftable(xtabs(count ~ dos + sex + edu)))
The output looks like
edu high low medium unknown
dos sex
five-to-ten-years female 247776 44916 127133 23793
male 225403 37858 147821 20383
five-years-or-less female 304851 58018 182152 33649
male 253977 55720 193621 28972
more-than-ten-years female 709303 452605 539403 165675
male 629162 309193 689299 121336
native-born female 1988476 1456792 2094297 502153
male 1411509 1197395 2790522 395953
unknown female 57974 75480 73204 593141
male 40176 57786 93108 605542
I want to rename the variables and format the table so that I can include it in a report. I know that I can use dnn to rename the variables, but are there any other recommendations to rename the variables? And to format the table (similar to using kable)?
You could convert the output to a text matrix using the following function, after which you can style with kable however you choose:
ftab_to_matrix <- function(ft)
{
row_vars <- attr(ft, "row.vars")
for(i in seq_along(row_vars)){
row_vars[[i]] <- c(names(row_vars[i]), row_vars[[i]])}
rowvar_widths <- sapply(row_vars, function(x) max(nchar(x))) + 1
col_vars <- attr(ft, "col.vars")
rowvar_widths <- c(1, cumsum(c(rowvar_widths, max(nchar(names(col_vars))))))
ft_text <- capture.output(print(ft))
row_cols <- sapply(seq_along(rowvar_widths)[-1], function(x)
substr(ft_text, rowvar_widths[x - 1], rowvar_widths[x]))
ft_text <- substr(ft_text, rowvar_widths[length(rowvar_widths)] + 2, 100)
ft_breaks <- c(1, cumsum(lapply(strsplit(ft_text[length(ft_text)], "\\d "),
function(x) nchar(x) + 2)[[1]]))
col_cols <- sapply(seq_along(ft_breaks)[-1], function(x)
substr(ft_text, ft_breaks[x - 1], ft_breaks[x]))
trimws(cbind(row_cols, col_cols))
}
So, for example, using my example data from your last question, you could do something like:
my_tab <- with(`3waydata`, ftable(xtabs(count ~ duration + sex + education)))
as_image(kable_styling(kable(ftab_to_df(my_tab))), file = "kable.png")
Might have been easier had you given the full picture when you asked your first question... You could use gt to make fancy tables for reports. This is an edited version more fully demonstrating some capabilities.
library(dplyr)
library(gt)
way3data <- data %>%
group_by(duration, education, sex) %>%
summarise(count = sum(number)) %>%
ungroup
# Reorder with select and Titlecase with stringr
longer <- tidyr::pivot_wider(way3data,
values_from = count,
names_from = "education") %>%
select(duration, sex, high, medium, low, unknown) %>%
rename_with(stringr::str_to_title)
# Demonstrating some of the features of gt
# obviously could have done some of this
# to the original dataframe
myresults <- longer %>%
group_by(Duration) %>%
gt(rowname_col = "Sex") %>%
row_group_order(
groups = c("native-born",
"more-than-ten-years",
"five-to-ten-years",
"five-years-or-less",
"unknown")
) %>%
tab_spanner(label = "Education",
columns = matches("High|Low|Medium|Unknown")) %>%
tab_stubhead(label = "Duration or something") %>%
tab_style(
style = cell_text(style = "oblique", weight = "bold"),
locations = cells_row_groups()) %>%
tab_style(
style = cell_text(align = "right", style = "italic", weight = "bold"),
locations = cells_column_labels(
columns = vars(High, Low, Medium, Unknown)
)) %>%
tab_style(
style = cell_text(align = "right", weight = "bold"),
locations = cells_stub()) %>%
tab_header(
title = "Fancy table of counts with Duration, Education and Gender") %>%
tab_source_note(md("More information is available at https://stackoverflow.com/questions/62284264."))
# myresults
# Can save in other formats including .rtf
myresults %>%
gtsave(
"tab_1.png", expand = 10
)
You can read about all the formatting choices here
Data compliments of Allan
set.seed(69)
data <- data.frame(education = sample(c("high","low","medium","unknown"), 600, T),
sex = rep(c("Male", "Female"), 300),
duration = sample(c("unknown", "native-born",
"five-years-or-less", "five-to-ten-years",
"more-than-ten-years"), 600, T),
number = rpois(600, 10))
Related
I wanted to create a likert graph that is grouped by Question i. I can create the likert graph for total responses ungrouped, but im uncertain of how to reformat question 6 without losing the column for question i. (aka do the reformatting done below but also have it take into account who selected what in question i.)
What I want is the sufficiency of Q6 grouped by their answer in question i.
Sample Dataframe:
SurveyClean2 <- data.frame(i = c("Mail,Email", "Mail", "Mail,Email,Podcast", "Radio,Podcast", "Radio", "Mail,Radio"), Q6_3 = c("Not Sufficient", "Very Sufficient", "Completely Sufficient", "Moderately Sufficient", "Moderately Sufficient", "Not Sufficient"))
Unnesting Question i:
UnnestQi <- SurveyClean2 %>%
as_tibble() %>%
mutate(i = str_split(Q3, ",")) %>%
unnest(i)
Survey2Q6 <- UnnestQi |> drop_na(Q5) |> drop_na(i)
Reformating Question 6 to Likert-friendly format:
clean_survey <- function(data, column, question) {
data %>%
dplyr::select(all_of({{column}})) %>%
dplyr::mutate(Question = question) %>%
dplyr::group_by(Question, across(1)) %>%
dplyr::count() %>%
dplyr::ungroup() %>%
tidyr::pivot_wider(names_from = 2, values_from = n)
}
# table that contains survey questions/columns and the question name
survey_table <- dplyr::tibble(
column = c("Q6_3"),
question = c("Expert advice")
)
# loop through your data and clean it, then bind as dataframe
LikertGroupqi62 <- purrr::map2_df(survey_table$column, survey_table$question, function(x, y){
clean_survey(Survey2Q6, x, y)}) |>
mutate(across(everything(), ~ifelse(is.na(.), 0, .)))
## Likert
LikertGroupqi62 <- LikertGroupqi62 |> dplyr::select(Question, `Not Sufficient`, `Slightly Sufficient`, `Moderately Sufficient`, `Very Sufficient`, `Completely Sufficient`)
Likert WITHOUT grouping:
likert(Question~., LikertGroupqi62, ReferenceZero = 0, auto.key.in = list(columns = 1), main = list("Sufficiency of Cost-share Advice Based on Person or Agency Worked With"), col = c("#db6d00", "#924900", "#000000", "#004949", "#009292"),strip.left = FALSE, ylab = "", xlab = "Total Number of Respondents")
Issue
I'm trying to produce a visualisation using {echarts4r} that involves plotting points with labels displayed on the chart itself, where the labels are unrelated to the position of the points. This sounds like it should be simple, but so far I haven't found any viable method of doing this and I'm beginning to wonder if it's even possible.
Desired output
Here is a minimal example. I will use {ggplot2} to demonstrate what I'd (roughly) like to reproduce:
data <- data.frame(
date_eaten = as.Date(c("2020-01-01", "2020-01-02", "2020-01-03")),
tastiness = c(5, 7, 10),
fruit = c("apple", "orange", "mango")
)
data
#> date_eaten tastiness fruit
#> 1 2020-01-01 5 apple
#> 2 2020-01-02 7 orange
#> 3 2020-01-03 10 mango
library(ggplot2)
ggplot(data, aes(x = date_eaten, y = tastiness, label = fruit)) +
geom_point() +
geom_text(nudge_y = 0.2)
Attempt using e_labels()
This method is visually exactly what I want, however, it seems that there is no option to specify which columns to take the labels from.
library(echarts4r)
data %>%
e_chart(date_eaten) %>%
e_scatter(tastiness, symbol_size = 10) %>%
e_labels()
Attempt using e_mark_point()
This option allows for more customisation, however this is not really a viable solution as it is very clunky and doesn't strictly 'link back' to the original data:
data %>%
e_chart(date_eaten) %>%
e_scatter(tastiness, symbol_size = 10) %>%
e_mark_point(data = list(
xAxis = as.Date("2020-01-01"),
yAxis = 5,
value = "apple"
)) %>%
e_mark_point(data = list(
xAxis = as.Date("2020-01-02"),
yAxis = 7,
value = "orange"
)) %>%
e_mark_point(data = list(
xAxis = as.Date("2020-01-03"),
yAxis = 10,
value = "mango"
))
I think this is the solution. Currently I'm not sure exactly how it works as documentation is a bit limited, but it seems to work:
data %>%
e_chart(date_eaten) %>%
e_scatter(tastiness, symbol_size = 10, bind = fruit) %>%
e_labels(formatter = htmlwidgets::JS("
function(params) {
return(params.name)
}
"))
I'm trying to build a Shiny App, everything works ok, but my issue is at the beginning, the first time that my app is launched i get an error in my highcharts due the size of the data (more than 3M of rows),
After 10 seconds the error disapear and everithing looks ok, but i want to remove the error, now i'm using waiter package, loading screeen is displayed 1.5 seconds, then the error appear and later the graph is showed .
I want to use Waiter package to hide this error until every calculation is finished. This is the Error
Below here my code for the graph
# Graph for shortInterest tab By CvsI (bars) --Dynamic
output$graph_bars_shortInterest_hc <- renderHighchart({
waiter_show(
id = "graph_bars_shortInterest_hc",
html = tagList(spin_fading_circles(),
"Loading Model ..."),
color = "#63666a",
logo = "",
hide_on_render = !is.null(id)
)
Client <- subset(Data_russel, Metrics == "marketCap") %>%
filter(Value >= input$MC_bars_[1])%>%
filter(Value <= input$MC_bars_[2])%>%
select(Client_Name) %>% unique()
Client_2 <- subset(Data_russel, Metrics == "Annual_Limit_Adequacy") %>%
filter(Value >= input$AL_filter_[1])%>%
filter(Value <= input$AL_filter_[2])%>%
select(Client_Name) %>% unique()
Data_Metric <- subset(Data_russel, Metrics == "shortInterest" & Industry %in% input$industry_CvsI_bars)
Client_filtered <- inner_join(Client, Client_2, by = "Client_Name")
Data_ <- inner_join(Client_filtered, Data_Metric, by = "Client_Name") # Clients in the range of Selected Market cap
Data_c <- subset(Data_russel, Metrics == "shortInterest" & Industry %in% input$industry_CvsI_bars & Client_Name == input$clientname_CvsI_bars)
Table_ <- seq(input$perc_range_[1], input$perc_range_[2], 1) %>% as.data.frame()
names(Table_) <- "Percentile"
Table_$Value <- round( quantile(Data_c$Value, Table_$Percentile/100), digits = 2)
Table_$Industry <- round( quantile(Data_$Value, Table_$Percentile/100), digits = 2)
hc_1 <- Table_ %>%
hchart(. , type = "line", hcaes(x = Percentile, y = Value), name = "Client", color = "#FFB81C") %>%
hc_add_series(data = Table_ ,type = 'line' , color = "#00a0d2", name = "Industry", hcaes(x = Percentile, y = Industry))%>%
hc_yAxis(opposite = TRUE) %>%
hc_title(text = "shortInterest Benchmark", margin = 30,
align = "center",
style = list(color = "#702080", useHTML = TRUE)) %>%
hc_yAxis(max = max(Table_$Industry)+(sd(Table_$Industry)/5))%>%
hc_yAxis(min = min(Table_$Industry)-(sd(Table_$Industry)/5))%>%
hc_add_theme(hc_theme_google())
hc_1
})
Thanks !!
I fixed using next function, and using each output in the UI into this function
output %>% withSpinner(
type = getOption("spinner.type", default = 3),
color.background = getOption("spinner.color.background", default = "#C8D7DF" ),
color="#00A0D2")
}```
I am required to re-create the visualization but filter the data to keep only the businesses that had terms less than 360 months.
The data I am using is the SBA data from this link:
https://amstat.tandfonline.com/doi/full/10.1080/10691898.2018.1434342
library('magrittr')
library(dplyr)
library(tidyr)
library(ggplot2)
sba2 <- sba_data %>%
mutate(default_binary = ifelse(MIS_Status=="CHGOFF","Paid in Full","Default"), daysterm = Term*30, xx = as.Date(sba_data$DisbursementDate, format="%Y-%m-%d") + daysterm, recession_binary = ifelse(xx >= "2007-12-01" & xx <="2009-06-30","Active during Recession","Not Active during Recession"), smaller_business_binary = ifelse(NoEmp < 30, "Very Small Biz", "Not Very Small Biz"), business_length = ifelse(Term < 360, "Short Business", "Long Business"))
table(sba2$business_length)
sba_3 <- sba2 %>%
group_by(recession_binary, default_binary) %>%
summarise(frequencies=n()) %>%
drop_na() %>%
mutate(percents = round(frequencies/sum(frequencies),2))
ggplot(data = sba_3 ) +
geom_col(mapping = aes(x = recession_binary, y = percents, fill = default_binary)) +
coord_flip() +
scale_fill_manual(breaks = c("Default", "Paid in Full"),
values=c(rgb(232/255,74/255,39/255), rgb(19/255,41/255,75/255))) +
scale_y_continuous(labels = scales::percent)
This is my code so far to recreate the visualization. However, I am unsure how to filter the data to only keep business with a term less than 360 months. I had created the variable business_length when mutating sba2, but am not sure what the next steps are. Any help would be greatly appreciated, thanks!
Something like this?
sba_3 <- sba2 %>%
filter(Term < 360) %>%
group_by(recession_binary, default_binary) %>%
summarise(frequencies=n()) %>%
drop_na() %>%
mutate(percents = round(frequencies/sum(frequencies),2))
I have some tables in Microsoft Excel that I need to recreate in an R Shiny App. The formatting in R has to remain at least mostly the same as the original context.
Here are images of the original tables:
Table 1
Table 2
Notice the formatting: There are lines under table headers and above totals, headers and totals are bolded, numbers in the Monthly Bill column have thousands seperated by commas and have dollar symbols, and the final number in Table 2 is boxed in.
If the lines were not recreatable it would be fine, but I need to at least be able to bold the selected topics, headers, and totals, and be able to get the correct number format for the Monthly Bill column.
I have tried using the DT package but I can't figure out how to format rows instead of columns. I noticed DT uses wrappers for JavaScript functions but I don't personally know JavaScript myself. Is there a way to format this the way I that I need through R packages or Javascript?
Edit:
Although it would be simple, I cannot merely include an image of the tables because some of the numbers are going to be linked to user input and must have the ability to update.
pixiedust makes it easy to do cell-specific customizations.
T1 <- data.frame(Charge = c("Environmental", "Base Power Cost",
"Base Adjustment Cost", "Distribution Adder",
"Retail Rate Without Fuel", "Fuel Charge Adjustment",
"Retail Rate With Fuel"),
Summer = c(0.00303, 0.06018, 0.00492, 0.00501, 0.07314,
0.02252, 0.09566),
Winter = c(0.00303, 0.05707, 0.00468, 0.01264, 0.07742,
0.02252, 0.09994),
Transition = c(0.00303, 0.05585, 0.00459, 0.01264,
0.07611, 0.02252, 0.09863),
stringsAsFactors = FALSE)
T2 <- data.frame(Period = c("Summer", "Winter", "Transition", "Yearly Bill"),
Rate = c(0.09566, 0.09994, 0.09863, NA),
Monthly = c(118.16, 122.44, 121.13, 1446.92),
stringsAsFactors = FALSE)
library(shiny)
library(pixiedust)
library(dplyr)
options(pixiedust_print_method = "html")
shinyApp(
ui =
fluidPage(
uiOutput("table1"),
uiOutput("table2")
),
server =
shinyServer(function(input, output, session){
output$table1 <-
renderUI({
dust(T1) %>%
sprinkle(rows = 1,
border = "bottom",
part = "head") %>%
sprinkle(rows = c(5, 7),
cols = 2:4,
border = "top") %>%
sprinkle(rows = c(5, 7),
bold = TRUE) %>%
sprinkle(pad = 4) %>%
sprinkle_colnames(Charge = "") %>%
print(asis = FALSE) %>%
HTML()
})
output$table2 <-
renderUI({
T2 %>%
mutate(Monthly = paste0("$", trimws(format(Monthly, big.mark = ",")))) %>%
dust() %>%
sprinkle(rows = 1,
border = "bottom",
part = "head") %>%
sprinkle(rows = 4,
cols = 1,
bold = TRUE) %>%
sprinkle(rows = 4,
cols = 3,
border = "all") %>%
sprinkle(na_string = "",
pad = 4) %>%
sprinkle_colnames(Period = "",
Monthly = "Monthly Bill") %>%
print(asis = FALSE) %>%
HTML()
})
})
)
This would be easier if you provided an example of your data, but sticking with DT, you should be able to utilize formatStyle to change formatting of both rows and columns. For an example to bold the first row, see the following (assuming your data frame is called df):
df %>%
datatable() %>%
formatStyle(
0,
target = "row",
fontWeight = styleEqual(1, "bold")
)
The rstudio DT page offers more examples: http://rstudio.github.io/DT/010-style.html
Alternatively, I think you might be better off using the stargazer package.
The base plot would look very similar to your desired result.
stargazer::stargazer(df, type = "html", title = "Table 1")
That will get you started, but see here for a LOT more flexibility: https://www.jakeruss.com/cheatsheets/stargazer/