I am looking to wrap the following formula into a function for easier end use:
df %>%
group_by(a, b) %>%
summarize(avg=mean(c)) %>%
ggline(x="a", y="avg", color='b')
however the following returns the error "Error in is.factor(x) : object 'b' not found" even though is.factor(df$b) == TRUE
graph_var <- function(data_source, var) {
var2 <- enquo(var)
data_source %>%
group_by(a, !!var2 )%>%
summarize(avg=mean(c)) %>%
ggline(x="a", y="avg", color=shQuote(var) )+ grids(linetype = 'dashed')
}
graph_var(df, b)
I'm sure the issue lies somewhere around ggpubr using quotes in its arguments, but I can't track down exactly what I need to do get this to work.
For reproducibility:
library(tidyverse)
library(ggpubr)
set.seed(13)
df <- data.frame(
a = rep(1:10),
b = as.factor(rep(LETTERS[24:26], each = 10)),
c = rnorm(30)
)
#explicit declatation - this works
df %>%
group_by(a, b )%>%
summarize(avg=mean(c)) %>%
ggline(x="a", y="avg", color="b" )+ grids(linetype = 'dashed') #works
#declaired via variable, this also works
test_var <- "b"
df %>%
group_by(a, b )%>%
summarize(avg=mean(c)) %>%
ggline(x="a", y="avg", color=test_var )+ grids(linetype = 'dashed') #also works
#declaited via f(x) - yeilds error "Error in is.factor(x) : object 'b' not found"
graph_var_ex <- function(data_source, var) {
var2 <- enquo(var)
data_source %>%
group_by(a, !!var2 )%>%
summarize(avg=mean(c)) %>%
ggline(x="a", y="avg", color=shQuote(var) )+ grids(linetype = 'dashed')
}
graph_var_ex(df, b)
Try as.character(ensym(var)).
Other notes:
Your function included y="c" in the ggline() call, but this should be y="avg" since "c" no longer exists after your summarize().
You can use the {{ embracing operator as a shortcut for !!enquo() when passing var to group_by().
library(dplyr)
library(ggpubr)
# example data
set.seed(13)
df <- data.frame(
a = rep(1:10),
b = rep(LETTERS[24:26], each = 10),
c = rnorm(30)
)
graph_var <- function(data_source, var) {
var2 <- as.character(ensym(var))
data_source %>%
group_by(a, {{var}})%>%
summarize(avg = mean(c)) %>%
ggline(x = "a", y = "avg", color = var2) +
grids(linetype = "dashed")
}
graph_var(df, b)
Related
i have a function in R that generates a table graph picking data from a dataframe and every time i want to pass a different variable (column name from dataframe) i have to repeat the code. So sometimes it can be the variable and sometimes the variableb, other times the variablec... etc.
generates_table_variablea <- function(data) { ## how to pass the column = variablea here like this
####### function(data, column = variablea) .. ???
big_data <- data %>%
group_by(a, b, c, d) %>%
mutate(total_categoria_abs = sum(abs(f))) %>%
mutate(volume_negativo = if_else(variablea < 0, f, 0)) %>%
mutate(volume_positivo = if_else(variablea > 0, f, 0)) %>%
mutate(total = sum(volume_positivo) - sum(volume_negativo)) %>%
mutate(e = if_else(variablea < 0, sum(variablea), 0)) %>%
ungroup() %>%
filter (variablea < 0) %>%
group_by(a, b, c, d) %>%
summarise(e = mean(e), vendas = sum(f*-1), frac_vendas = vendas*-1/mean(total_categoria_abs)) %>%
arrange(e) %>%
ungroup()
big_data$frac_vendas <- round(big_data$frac_vendas, digits = 2)
big_data$e <- round(big_data$e, digits = 0)
}
If I want to change this variable, I have to do the follow:
generates_table_variableb <- function(data) { ## HERE IT WILL BE function(data, column = variableb)...
big_data <- data %>%
group_by(a, b, c, d) %>%
mutate(total_categoria_abs = sum(abs(f))) %>%
mutate(volume_negativo = if_else(variableb < 0, f, 0)) %>% #### HERE I NEED TO CHANGE ALWAYS TO VARIABLEA, VARIABLEB, VARIABLEC...
mutate(volume_positivo = if_else(variableb > 0, f, 0)) %>%
mutate(total = sum(volume_positivo) - sum(volume_negativo)) %>%
mutate(e = if_else(variablea < 0, sum(variableb), 0)) %>%
ungroup() %>%
filter (variableb < 0) %>%
group_by(a, b, c, d) %>%
summarise(e = mean(e), vendas = sum(f*-1), frac_vendas = vendas*-1/mean(total_categoria_abs)) %>%
arrange(e) %>%
ungroup()
big_data$frac_vendas <- round(big_data$frac_vendas, digits = 2)
big_data$e <- round(big_data$e, digits = 0)
}
Having multiple functions doing the same thing is slowing down my code...
How could this be better? All that I want is to pass this column dynamically.
This is one of the way
library(dplyr)
x <- data.frame(v1=1:3, v2=4:6)
f <- function(data, var1){
x %>% select(!!var1)
}
f(x, quo(v1))
You can see more explanation in https://adv-r.hadley.nz/quasiquotation.html
I found a other away that works too:
generates_table_variablea <- function(dataframe, variable) { ## Here pass variable
big_data <- dataframe %>%
group_by(a, b, c, d) %>%
mutate(total_categoria_abs = sum(abs(f))) %>%
mutate(volume_negativo = if_else(.data[[variable]] < 0, f, 0)) %>%
mutate(volume_positivo = if_else(.data[[variable]] > 0, f, 0)) %>%
mutate(total = sum(volume_positivo) - sum(volume_negativo)) %>%
mutate(e = if_else(.data[[variable]] < 0, sum(variablea), 0)) %>%
ungroup() %>%
filter (.data[[variable]] < 0) %>%
group_by(a, b, c, d) %>%
summarise(e = mean(e), vendas = sum(f*-1), frac_vendas = vendas*-1/mean(total_categoria_abs)) %>%
arrange(e) %>%
ungroup()
big_data$frac_vendas <- round(big_data$frac_vendas, digits = 2)
big_data$e <- round(big_data$e, digits = 0)
}
Only replace the variable by .data[[variable]] and you can pass any column inside the function.
Question
I have the following code of ddply where the inside function works well with a single element i in modelNames. However, when I run the entire code in ddply, it gives me error:
Error in str2lang(x) : <text>:2:0: unexpected end of input
1: ~
^
Do you have any idea what part of code may cause the issue?
Current Code
t <-
modelNames %>%
ddply('model', function(i){
print(i)
colnames <- names(d)
dep <-
models %>%
chain.filter('dep') %>%
filter(model == i) %>%
filter(dep == 1) %>%
.$variable
indep <-
models %>%
chain.filter('dep') %>%
filter(model == i) %>%
filter(indep == 1) %>%
.$variable
base <-
Reduce(intersect, list(indep, colnames))
interaction <-
expand.grid(base, base) %>%
mutate(
interaction = paste0(Var1, '*', Var2)
) %>%
.$interaction
interaction <-
Reduce(intersect, list(indep, interaction))
indep <-
c(base, interaction)
eq <-
paste(indep, collapse = ' + ') %>%
paste(dep, ., sep = ' ~ ') %>%
as.formula
s <-
lm(eq, d) %>%
summary(.) %>%
.$coefficient %>%
as.data.frame
r <-
lm(eq, d) %>%
summary(.) %>%
.$r.squared
n <- nobs(lm(eq, d))
t <- data.frame(
model = i,
variable = rownames(s),
estimate = s[, 1],
se = s[, 2],
group = 'Estimates'
) %>%
chain_stars(asterisk = c('', '', '\\textsuperscript{*}', '\\textsuperscript{**}')) %>%
rows_insert(tibble(variable = 'rsqr', estimate = as.character(easy.round4(r)), se = NA, group = 'Overall')) %>%
rows_insert(tibble(variable = 'obs', estimate = as.character(prettyNum(n, big.mark = ",", scientific = F)), se = NA, group = 'Overall')) %>%
gather(state, value, -c(group, variable))
t
}, .progress = 'text')
I have a DF and wanted to modify the y-axis, ordering my data by a variable from another object. I tried to use fct_reorder from forcats, but didn't work.
My code:
library(tidyverse)
library(ggridges)
library(zoo)
url <- httr::GET("https://xx9p7hp1p7.execute-api.us-east-1.amazonaws.com/prod/PortalGeral",
httr::add_headers("X-Parse-Application-Id" =
"unAFkcaNDeXajurGB7LChj8SgQYS2ptm")) %>%
httr::content() %>%
'[['("results") %>%
'[['(1) %>%
'[['("arquivo") %>%
'[['("url")
data <- openxlsx::read.xlsx(url) %>%
filter(is.na(municipio), is.na(codmun)) %>%
mutate_at(vars(contains(c("Acumulado", "Novos", "novos"))), ~ as.numeric(.))
data[,8] <- openxlsx::convertToDate(data[,8])
bigger_state <- data %>%
group_by(estado) %>%
mutate(diasposdez = 1:n(),
mm7d = rollmean(casosNovos, 7, fill = NA, allign = "right")) %>%
filter(data == data[which.max(mm7d)], !is.na(estado)) %>%
arrange(desc(casosNovos)) %>%
pull(estado)
data %>%
group_by(estado) %>%
mutate(height = rollmean(casosNovos, k = 7, align = "right", fill = NA) / sum(casosNovos),
estado = as_factor(estado)) %>%
filter(data >= "2020-05-01") %>%
ggplot(aes(x = data, y = fct_reorder(.f = estado, .x = bigger_state), height = scales::rescale(height))) +
geom_ridgeline() +
scale_x_date(date_breaks = "2 weeks",
date_labels = "%d/%b/%Y")
Error:
Error in fct_reorder(.f = estado, .x = bigger_state) :
length(f) == length(.x) is not TRUE
Just use factor(estado, bigger_state) instead of fct_reorder(.f = estado, .x = bigger_state). You are trying to match up all values with the factor levels, which gives you the error.
I want to create a summary table for some dichotomous variables using the expss package. Since the variables are dichotomous, one of the two levels would the sufficient to "show the picture".
I tried to use the function tab_net_cell, but was not able to get the right results. Here is some example code with BrCa (Breast cancer) with 1 or 0. I only want to show the number of patients with but not without breast cancer.
df <- data.frame(BrCa = c(1,1,1,0,0,0,NA,NA,0,0))
df$group <- c(1,2,1,2,1,2,1,2,1,2)
df %>%
expss::tab_cols(group) %>%
expss::tab_cells(BrCa) %>%
expss::tab_stat_cpct(total_row_position = "none",label = "%") %>%
expss::tab_stat_cases(total_row_position = "none",label = "N") %>%
expss::tab_pivot(stat_position = "inside_rows")
df %>%
expss::tab_cols(group) %>%
expss::tab_cells(BrCa) %>%
expss::tab_net_cells("BrCa" = eq(1)) %>%
expss::tab_stat_cpct(total_row_position = "none",label = "%") %>%
expss::tab_stat_cases(total_row_position = "none",label = "N") %>%
expss::tab_pivot(stat_position = "inside_rows")
The simplest way is to filter resulted table:
df <- data.frame(BrCa = c(1,1,1,0,0,0,NA,NA,0,0))
df$group <- c(1,2,1,2,1,2,1,2,1,2)
df %>%
expss::tab_cols(group) %>%
expss::tab_cells(BrCa) %>%
expss::tab_stat_cpct(total_row_position = "none",label = "%") %>%
expss::tab_stat_cases(total_row_position = "none",label = "N") %>%
expss::tab_pivot(stat_position = "inside_rows") %>%
expss::where(grepl(1, row_labels))
Another way is to use mean and sum instead of cpct and cases:
df %>%
expss::tab_cols(group) %>%
expss::tab_cells(BrCa*100) %>%
expss::tab_stat_mean(label = "%") %>%
expss::tab_stat_sum(label = "N") %>%
expss::tab_pivot(stat_position = "inside_rows")
I have some variable names specified as string (e.g. input from Shiny app) and I would like to use them in my dplyr and ggplot2 code as if they were variables.
I got it to work by trial and error, but I feel like there must be a better way. What is a better way to perform these operations?
library(rlang)
library(ggplot2)
library(dplyr)
someString <- "g1"
df <- tibble(
g1 = c(1, 1, 2, 2, 2),
g2 = c(1, 2, 1, 2, 1),
a = sample(5),
b = sample(5)
)
my_summarise <- function(df, group_var) {
print(group_var)
df %>%
group_by(!!group_var) %>%
summarise(a = mean(a))
}
my_plot <- function(df, group_var) {
print(group_var)
ggplot(data = df %>%
group_by(!!group_var) %>%
summarise(a = mean(a)),
aes_string(x = quo_name(group_var), y = "a")) +
geom_bar(stat = "identity")
}
my_summarise(df, quo(UQ(sym(someString))))
my_plot(df, quo(UQ(sym(someString))))
Either of these options are probably simpler:
my_summarise <- function(df, group_var) {
print(group_var)
df %>%
#Either works
group_by_at(.vars = group_var) %>%
#group_by(!!sym(group_var)) %>%
summarise(a = mean(a))
}
my_summarise(df,someString)
my_plot <- function(df, group_var) {
print(group_var)
ggplot(data = df %>%
group_by_at(.vars = group_var) %>%
#group_by(!!sym(group_var)) %>%
summarise(a = mean(a)),
aes_string(x = group_var, y = "a")) +
geom_bar(stat = "identity")
}
my_plot(df, someString)
...where you could use either group_by or group_by_at.
What about calling with my_summarise(df, as.name(someString))?