I have a DF and wanted to modify the y-axis, ordering my data by a variable from another object. I tried to use fct_reorder from forcats, but didn't work.
My code:
library(tidyverse)
library(ggridges)
library(zoo)
url <- httr::GET("https://xx9p7hp1p7.execute-api.us-east-1.amazonaws.com/prod/PortalGeral",
httr::add_headers("X-Parse-Application-Id" =
"unAFkcaNDeXajurGB7LChj8SgQYS2ptm")) %>%
httr::content() %>%
'[['("results") %>%
'[['(1) %>%
'[['("arquivo") %>%
'[['("url")
data <- openxlsx::read.xlsx(url) %>%
filter(is.na(municipio), is.na(codmun)) %>%
mutate_at(vars(contains(c("Acumulado", "Novos", "novos"))), ~ as.numeric(.))
data[,8] <- openxlsx::convertToDate(data[,8])
bigger_state <- data %>%
group_by(estado) %>%
mutate(diasposdez = 1:n(),
mm7d = rollmean(casosNovos, 7, fill = NA, allign = "right")) %>%
filter(data == data[which.max(mm7d)], !is.na(estado)) %>%
arrange(desc(casosNovos)) %>%
pull(estado)
data %>%
group_by(estado) %>%
mutate(height = rollmean(casosNovos, k = 7, align = "right", fill = NA) / sum(casosNovos),
estado = as_factor(estado)) %>%
filter(data >= "2020-05-01") %>%
ggplot(aes(x = data, y = fct_reorder(.f = estado, .x = bigger_state), height = scales::rescale(height))) +
geom_ridgeline() +
scale_x_date(date_breaks = "2 weeks",
date_labels = "%d/%b/%Y")
Error:
Error in fct_reorder(.f = estado, .x = bigger_state) :
length(f) == length(.x) is not TRUE
Just use factor(estado, bigger_state) instead of fct_reorder(.f = estado, .x = bigger_state). You are trying to match up all values with the factor levels, which gives you the error.
Related
I am trying to combine to different stratified tables made with tbl_strata()and tbl_summary from the gtsummary-package. I want to stratify by the same variable in both tables, but use different variables in the "by" argument in tbl_summary(). When I combine the tables using tbl_stack(), the column headers from the second table are lost. Is there any way to combine the tables and keep the column headers from both tables?
See reproducible example below
library(gtsummary)
library(tidyverse)
data("diamonds")
table1 <- diamonds %>%
filter(cut %in% c("Ideal", "Premium") & color %in% c("E", "I")) %>%
mutate(color = factor(color)) %>%
tbl_strata(strata = cut,
.tbl_fun =
~.x %>%
tbl_summary(by = color, include = price))
table2 <- diamonds %>%
filter(cut %in% c("Ideal", "Premium") & clarity %in% c("SI1", "SI2")) %>%
mutate(clarity = factor(clarity)) %>%
tbl_strata(strata = cut,
.tbl_fun =
~.x %>%
tbl_summary(by = clarity, include = price))
tbl_stack(list(table1, table2), group_header = c("Table 1", "Table 2"))
UPDATE:
Below is an (rough) example of the output I would like to be able to get from tbl_stack():
library(gtsummary)
library(tidyverse)
library(gt)
data("diamonds")
library(flextable)
table1_tibble <- diamonds %>%
filter(cut %in% c("Ideal", "Premium") & color %in% c("E", "I")) %>%
mutate(color = factor(color)) %>%
tbl_strata(strata = cut,
.tbl_fun =
~.x %>%
tbl_summary(by = color, include = price)) %>%
as_tibble()
table1_tibble <- rbind(colnames(table1_tibble), table1_tibble) %>%
rename(label = colnames(table1_tibble)[1],
premium_1 = colnames(table1_tibble)[2],
premium_2 = colnames(table1_tibble)[3],
ideal_1 = colnames(table1_tibble)[4],
ideal_2 = colnames(table1_tibble)[5]) %>%
add_row(label = "Table 1", .before = 1)
table2_tibble <- diamonds %>%
filter(cut %in% c("Ideal", "Premium") & clarity %in% c("SI1", "SI2")) %>%
mutate(clarity = factor(clarity)) %>%
tbl_strata(strata = cut,
.tbl_fun =
~.x %>%
tbl_summary(by = clarity, include = price)) %>%
as_tibble()
table2_tibble <- rbind(colnames(table2_tibble), table2_tibble) %>%
rename(label = colnames(table2_tibble)[1],
premium_1 = colnames(table2_tibble)[2],
premium_2 = colnames(table2_tibble)[3],
ideal_1 = colnames(table2_tibble)[4],
ideal_2 = colnames(table2_tibble)[5]) %>%
add_row(label = "Table 2", .before = 1)
bind_rows(table1_tibble, table2_tibble) %>%
flextable() %>%
merge_at(i = 1, j = 2:3, part = "header") %>%
merge_at(i = 1, j = 4:5, part = "header") %>%
set_header_labels(label = "", premium_1 = "Premium", ideal_1 = "Ideal")
Hope this clarifies matters.
Best regards,
Martin
R newbie here :)
I have recently started using R library Highcharter as an alternative to ggplot2.
This is the sample code I am currently working on:
library(highcharter)
library(dplyr)
## Sample dataframe
YEAR <- c(2019,2020,2021)
CATEGORY <- c("dog", "cat", "mouse")
SAMPLE_DATA <- expand.grid(YEAR, CATEGORY)
names(SAMPLE_DATA)[1] <- "CATEGORY"
names(SAMPLE_DATA)[2] <- "YEAR"
SAMPLE_DATA$VALUE <- runif(n = 9, min = 400, max = 900)
## Chart
SAMPLE_DATA <- SAMPLE_DATA %>%
group_by(YEAR, CATEGORY) %>%
summarise(VALUE = sum(VALUE, na.rm = T))
highchart() %>%
hc_add_series(data = SAMPLE_DATA, hcaes(x = YEAR, y = round(VALUE,0), group = CATEGORY), type = "column") %>%
hc_plotOptions(column = list(stacking = "normal"))
What I am trying to do is:
Sort how the group "CATEGORY" is piled in each column, based on ascending/descending "VALUE"
Have that effect which highlights the same group in all columns as you hover over it
Does anyone have an idea? Thank you!
This is a late answer but I believe this is what you want.
Adding the data again because I think you swapped some column names on accident:
YEAR <- c(2019,2020,2021)
CATEGORY <- c("dog", "cat", "mouse")
SAMPLE_DATA <- expand.grid(YEAR, CATEGORY)
names(SAMPLE_DATA)[1] <- "YEAR"
names(SAMPLE_DATA)[2] <- "CATEGORY"
SAMPLE_DATA$VALUE <- runif(n = 9, min = 400, max = 900)
## Chart
SAMPLE_DATA <- SAMPLE_DATA %>%
group_by(YEAR, CATEGORY) %>%
summarise(VALUE = sum(VALUE, na.rm = T))
Creating plot:
SAMPLE_DATA %>%
ungroup() %>%
mutate(YEAR = factor(YEAR) %>% fct_reorder(VALUE, .desc = TRUE)) %>%
mutate(year_index = as.numeric(YEAR)) %>%
hchart(
type = "column",
hcaes(x = year_index,
y = VALUE,
group = CATEGORY,
name = YEAR),
) %>%
hc_xAxis(type = "category", labels = list(step = 1)) %>%
hc_plotOptions(series = list(stacking = TRUE))
I am trying to create a 95% minimum convex polygon using sf in R. My code works fine as long as I only group my data on 1 variable, but when I group on two variables, the output loses its sf class and becomes a grouped_df instead.
Here is a toy dataset as an example
library(dplyr)
set.seed(12)
toy <- tibble::tibble(
ID = rep(c(1,2), each = 10),
year = rep(c(1,2), 10),
lat = runif(20, 1, 10),
long = runif(20, 1, 10)
) %>%
sf::st_as_sf(., coords = c("long", "lat"))
toy %>%
group_by(ID) %>%
summarize(.groups = "keep") %>%
mutate(cent = sf::st_centroid(geometry)) %>%
sf::st_cast(to = "POINT") %>%
mutate(dist = sf::st_distance(geometry, cent, by_element = TRUE)) %>%
filter(dist <= quantile(dist, .95)) %>%
summarize() %>%
sf::st_convex_hull() %>%
class()
This gives the output I want. But when I try to group by two variables, the result loses the sf class.
toy %>%
group_by(ID, year) %>%
summarize(.groups = "keep") %>%
mutate(cent = sf::st_centroid(geometry)) %>%
sf::st_cast(to = "POINT") %>%
mutate(dist = sf::st_distance(geometry, cent, by_element = TRUE)) %>%
filter(dist <= quantile(dist, .95)) %>%
summarize() %>%
sf::st_convex_hull() %>%
class
Is there something in my code keeping me from being able to group on two variables?
It's because the second summarize is regrouping on ID. You need a .groups = "keeps" there to pass the same grouping through and then convert back to an sf object. Alternatively you could create a grouping variable mutate(grp = paste0(ID, year)) and do group_by(grp) .
toy %>%
group_by(ID, year) %>%
summarize(.groups = "keep") %>%
mutate(cent = sf::st_centroid(geometry)) %>%
sf::st_cast(to = "POINT") %>%
mutate(dist = sf::st_distance(geometry, cent, by_element = TRUE)) %>%
filter(dist <= quantile(dist, .95)) %>%
summarize(.groups = "keep") %>%
sf::st_convex_hull() %>%
st_sf()
I want to create a summary table for some dichotomous variables using the expss package. Since the variables are dichotomous, one of the two levels would the sufficient to "show the picture".
I tried to use the function tab_net_cell, but was not able to get the right results. Here is some example code with BrCa (Breast cancer) with 1 or 0. I only want to show the number of patients with but not without breast cancer.
df <- data.frame(BrCa = c(1,1,1,0,0,0,NA,NA,0,0))
df$group <- c(1,2,1,2,1,2,1,2,1,2)
df %>%
expss::tab_cols(group) %>%
expss::tab_cells(BrCa) %>%
expss::tab_stat_cpct(total_row_position = "none",label = "%") %>%
expss::tab_stat_cases(total_row_position = "none",label = "N") %>%
expss::tab_pivot(stat_position = "inside_rows")
df %>%
expss::tab_cols(group) %>%
expss::tab_cells(BrCa) %>%
expss::tab_net_cells("BrCa" = eq(1)) %>%
expss::tab_stat_cpct(total_row_position = "none",label = "%") %>%
expss::tab_stat_cases(total_row_position = "none",label = "N") %>%
expss::tab_pivot(stat_position = "inside_rows")
The simplest way is to filter resulted table:
df <- data.frame(BrCa = c(1,1,1,0,0,0,NA,NA,0,0))
df$group <- c(1,2,1,2,1,2,1,2,1,2)
df %>%
expss::tab_cols(group) %>%
expss::tab_cells(BrCa) %>%
expss::tab_stat_cpct(total_row_position = "none",label = "%") %>%
expss::tab_stat_cases(total_row_position = "none",label = "N") %>%
expss::tab_pivot(stat_position = "inside_rows") %>%
expss::where(grepl(1, row_labels))
Another way is to use mean and sum instead of cpct and cases:
df %>%
expss::tab_cols(group) %>%
expss::tab_cells(BrCa*100) %>%
expss::tab_stat_mean(label = "%") %>%
expss::tab_stat_sum(label = "N") %>%
expss::tab_pivot(stat_position = "inside_rows")
I am trying to produce a formatted html table which has columns for frequency, cumulative frequency, column percentage, and cumulative column percentage. The table should also have the data subsetted by a grouping variable, and including a group total.
I can almost achieve this using a combination of dplyr and tidyr, but the output is a dataframe which doesn't look so pretty. I wonder if there is an easier way using the tables::tabulate command?
# Sample data
dat <- data.frame(
id = 1:100,
group = factor(sample(c("A", "B"), 100, replace = TRUE)),
sessions = factor(sample(1:10, 100, replace = TRUE))
)
# dplyr/tidyr solution
library(dplyr)
library(tidyr)
dat %>%
group_by(group, sessions) %>%
tally() %>%
spread(key = group, value = n) %>%
mutate(All = rowSums(.[-1])) %>%
gather(key = group, value = n, -sessions) %>%
group_by(group) %>%
mutate(
cum_n = cumsum(n),
p = round(n / sum(n)*100,1),
cum_p = round(cum_n / sum(n)*100,1),
) %>%
data.frame() %>%
reshape(timevar = "group", idvar = "sessions", direction = "wide")
# As far as I get using tables::tabulate
library(tables)
tabular(
Factor(sessions, "Sessions") ~
(Heading()*group + 1) *
(
(n = 1) +
# (cum_n = ??) +
Heading("%")*Percent(denom = "col")*Format(digits = 2)
# + Heading("cum_%")*??*Format(digits = 2)
),
data = dat
)
I would recommend using knitr::kable and kableExtra, amazing packages for producing tables. You can also set it up for multiple format outputs, for example using the same code to produce html and latex for pdf.
library(dplyr)
library(tidyr)
library(knitr)
library(kableExtra)
dat %>%
group_by(group, sessions) %>%
tally() %>%
spread(key = group, value = n) %>%
mutate(All = rowSums(.[-1])) %>%
gather(key = group, value = n, -sessions) %>%
group_by(group) %>%
mutate(
cum_n = cumsum(n),
p = round(n / sum(n)*100,1),
cum_p = round(cum_n / sum(n)*100,1),
) %>%
data.frame() %>%
reshape(timevar = "group", idvar = "sessions", direction = "wide") %>%
kable("html") %>%
kable_styling(bootstrap_options = c("striped", "hover"))