Variable column names in the pipe - r

I have the following code:
install.packages('tidyverse')
library(tidyverse)
x <- 1:10
y <- x^2
df <- data.frame(first_column = x, second_column = y)
tibble <- as_tibble(df)
tibble %>%
filter(second_column != 16) %>%
ggplot(aes(x = first_column, y = second_column)) +
geom_line()
Now I would like to create the following function
test <- function(colname) {
tibble %>%
filter(colname != 16) %>%
ggplot(aes(x = first_column, y = colname)) +
geom_line()
}
test('second_column')
But running it creates a vertical line instead of the function. How can I make this function work?
Edit: My focus is on getting the pipe to work, not ggplot.

In order to pass character strings for variable names, you have to use the standard evaluation version of each function. It is aes_string for aes, and filter_ for filter. See the NSE vignette for more details.
Your function could look like:
test <- function(colname) {
tibble %>%
filter_(.dots= paste0(colname, "!= 16")) %>%
ggplot(aes_string(x = "first_column", y = colname)) +
geom_line()
}

Related

R/Shiny: Group by and Summarise on column selected in dropdown?

I am very new to R and Shiny, so I apologize in advance if this is too basic.
I am trying to render a bar graph where I select the X and Y off dropdowns. I need to aggregate by average values of Y for each X I choose. When I ran the below code, I got an error stating "Column 'y' not found". I cannot pass the actual column name selected as that can change. How do I solve this issue?
output$MultivariatePlot <- renderPlotly({
if (input$Apply > 0){
isolate({
req(data$Policies)
x <- input$MultivariateX
y <- input$MultivariateY
rv$g <- data$Policies %>%
group_by(y) %>% summarise(y = mean(y)) %>%
ggplot2::ggplot(aes_(x = sym(x), y = sym(y))) +
# ggplot2::stat_summary(fun.y = "mean", geom = "bar") +
ggplot2::geom_bar(stat='identity') +
ggplot2::scale_fill_manual(values = rara::ColorSelect(2)) +
ggplot2::theme_classic() +
ggplot2::theme(panel.grid.major = element_line(color = 'gray80', linetype = 'longdash', size = 0.3)) +
ggplot2::labs(title = paste('Comparison Between', x, 'and', y))
})
You can use !!sym(x) in places that you would normally use the symbol version of x, and !!sym(y) in places where you would normally use the symbol version of y.
This turns your code into the following:
rv$g <- data$Policies %>%
group_by(!!sym(x)) %>%
summarise(!!sym(y) := mean(!!sym(y))) %>%
ggplot2::ggplot(aes(x = !!sym(x), y = !!sym(y))) +
ggplot2::geom_bar(stat='identity') +
# etc
There's one last complication in there, which is that you had to use := instead of = when it's to the right of a sym(). (Also note that aes_ isn't usually recommended anymore, so I used x = !!sym(x) in regular aes(), and geom_col() is a shortcut for ggplot2::geom_bar(stat='identity')).
Here's a reproducible example of the above, which takes two strings for x and y and aggregates + plots them:
x <- "cyl"
y <- "mpg"
mtcars %>%
group_by(!!sym(x)) %>%
summarise(!!sym(y) := mean(!!sym(y))) %>%
ggplot2::ggplot(aes(x = !!sym(x), y = !!sym(y))) +
ggplot2::geom_col()

Passing params to distribution in geom_qq with facets

I'd like to create QQ-plots of a t-distribution using ggplot2's geom_qq() function. Hadley provides a nice example of how to do this here, but it's only for a single distribution. I wish to extend this to multiple groups with a facet and distribution for each group. I found a similar and related question here, but it doesn't really answer the question.
Passing either a list or vector of greater than length 1 does not seem to work.
library(ggplot2)
a <- 1:10
df <- data.frame(a = a, b = rt(1000, df = a))
deg_free <-
lapply(a, function(x) {
return(MASS::fitdistr(subset(df, a == x)$b,
"t")$estimate["df"])
})
g <-
ggplot(data=df, aes(sample=b)) +
geom_qq(distribution = qt, dparams = deg_free) +
geom_qq_line(distribution = qt, dparams = deg_free) +
facet_wrap(~a)
Does anyone know how to do this without resorting to computing quantiles for the data and manually plotting the QQ points and lines?
For ggplot to take degrees of freedom into account in facets, the dataframe passed into ggplot() should contain that as a column:
library(dplyr)
set.seed(123) # for reproducibility
a <- 1:10
df <- data.frame(a = a, b = rt(1000, df = a))
deg_free <-
lapply(a, function(x) {
return(MASS::fitdistr(subset(df, a == x)$b,
"t")$estimate["df"])
})
df <- df %>%
left_join(data.frame(d = unlist(deg_free), a = a),
by = "a")
rm(a, deg_free)
> head(df)
a b d
1 1 -0.2624269 1.526920
2 2 -3.4784976 1.447293
3 3 1.6535141 2.819679
4 4 2.3848622 3.240377
5 5 0.4233105 3.946170
6 6 1.4423866 5.893569
With that out of the way, we can try to define modified versions of geom_qq / geom_qq_line that look for degrees of freedom df as a mapped aesthetic. Here's how the result can look like:
ggplot(df,
aes(sample=b, df = d)) +
geom_qq2(distribution = qt) +
geom_qq_line2(distribution = qt) +
facet_wrap(~a, scales = "free")
Code to create geom_qq2 / geom_qq_line2:
library(magrittr)
library(ggplot2)
# take reference from the compute_group functions for StatQq / StatQqLine
# but modify the code to include df in dparams, if it's a mapped aesthetic
compute_group_StatQq2 <- environment(StatQq$compute_group)$f
compute_group_StatQqLine2 <- environment(StatQqLine$compute_group)$f
body(compute_group_StatQq2) <- body(compute_group_StatQq2) %>% as.list() %>%
append(quote(if("df" %in% colnames(data)) dparams <- append(dparams, list("df" = data$df[1]))),
after = 1L) %>%
as.call()
body(compute_group_StatQqLine2) <- body(compute_group_StatQqLine2) %>% as.list() %>%
append(quote(if("df" %in% colnames(data)) dparams <- append(dparams, list("df" = data$df[1]))),
after = 1L) %>%
as.call()
# define modified ggproto classes
# which inherit from StatQq / StatQqLine, but use modified compute_group functions
StatQq2 <- ggproto("StatQq2", StatQq, compute_group = compute_group_StatQq2)
StatQqLine2 <- ggproto("StatQqLine2", StatQqLine, compute_group = compute_group_StatQqLine2)
# define modified geom functions
# which are based on geom_qq / geom_qq_line, but use Stat = modified Stat
geom_qq2 <- geom_qq
geom_qq_line2 <- geom_qq_line
body(geom_qq2) <- body(geom_qq) %>% as.list() %>%
inset2(2, (.) %>% extract2(2) %>% as.list() %>%
modifyList(val = list(stat = quote(StatQq2))) %>%
as.call()) %>%
as.call()
body(geom_qq_line2) <- body(geom_qq_line2) %>% as.list() %>%
inset2(2, (.) %>% extract2(2) %>% as.list() %>%
modifyList(val = list(stat = quote(StatQqLine2))) %>%
as.call()) %>%
as.call()
Code used to modify the body of a function took reference from MrFlick's answer to How to insert expression into the body of a function in R.
Disclaimer: I've never used geom_qq** before today. If I've missed out things while modifying the computation functions in StatQq**, let me know & I'll try to sort them out.
I don't think geom_qq is set up to handle having different parameters per facet, so the way to do this might be to produce a plot separately for each subset of the data and combine them with something like cowplot::plot_grid:
library(tidyverse)
plots = df %>%
group_by(a) %>%
mutate(deg_free = MASS::fitdistr(b, "t")$estimate["df"]) %>%
# This second group_by is just used to keep the deg_free value
# in the final dataframe, could be removed
group_by(a, deg_free) %>%
do(
plot = ggplot(data=., aes(sample=b)) +
geom_qq(distribution = qt, dparams = list(.$deg_free)) +
geom_qq_line(distribution = qt, dparams = list(.$deg_free)) +
ggtitle(.$a)
)
# Using map to unpack the list-column into a list, there's
# probably a better way
cowplot::plot_grid(plotlist=map(plots$plot, ~ .))
Example output:

r - make plotting function with ggplot2, aes_string and reorder

I'm trying to make a function that will use ggplot2inside,aes_stringand reorder but with no luck so far.
Basically if we have a sample dataset like the following:
library(ggplot2)
library(dplyr)
set.seed(123)
dt <- data.frame(
id = c(1,1,1,2,2),
a = c("b", "d", "c", "a", "b"),
b = sample(1:10, 5, replace = F),
cat = c(1,1,2,2,2)) %>%
mutate(a = as.factor(a)) %>%
as_tibble()
I want the function to accept the following arguments: the dataset, a filtering variable, and two variables for plotting.
This is what I managed to do:
myplot <- function(df, filtval, var1, var2) {
data <- df %>% filter(id == filtval)
ggplot(data) +
geom_point(
aes_string(
x = reorder(var1, var2),
y = var2)
)
}
Unfortunately when running it returns the error:
myplot(dt, 1, "a", "b")
Warning message:
In mean.default(X[[i]], ...) :
argument is not numeric or logical: returning NA
This is what I want the function to do:
data <- dt %>% filter(id == 1)
ggplot(data) +
geom_col(aes(x = reorder(a, - b), y = b))
With the latest version of ggplot, you should be use aes with !! and sym() to turn your strings into symbols.
myplot <- function(df, filtval, var1, var2) {
data <- df %>% filter(id == filtval)
ggplot(data) +
geom_point(
aes(
x = reorder(!!sym(var1), !!sym(var2)),
y = !!sym(var2))
)
}
After discussing with mr Flick (see below), this should do NOT be used:
myplot <- function(df, filtval, var1, var2) {
data <- df %>% filter(id == filtval)
data$new_order <- reorder(data[[var1]], data[[var2]])
ggplot(data) +
geom_point(mapping=
aes_string(
x = "new_order",
y = var2)
)
}
Take his solution instead :)

Functional programming with dplyr

Looking for a more efficient / elegant way to pass multiple arguments to a group-by using non-standard evaluation in a function using dplyr. I don't want to use the ... operator, but to specify the functions individually.
My specific use case is a function which takes a data frame and creates a ggplot object with simpler syntax. Here is an example of the code I want to automate with my function:
# create data frame
my_df <- data.frame(month = sample(1:12, 1000, replace = T),
category = sample(head(letters, 3), 1000, replace = T),
approved = as.numeric(runif(1000) < 0.5))
my_df$converted <- my_df$approved * as.numeric(runif(1000) < 0.5)
my_df %>%
group_by(month, category) %>%
summarize(conversion_rate = sum(converted) / sum(approved)) %>%
ggplot + geom_line(aes(x = month, y = conversion_rate, group = category,
color = category))
I want to combine that group_by, summarize, ggplot, and geom_line into a simple function that I can feed an x, y, and group, and have it perform all the dirty work under the hood. Here's what I've gotten to work:
# create the function that does the grouping and plotting
plot_lines <- function(df, x, y, group) {
x <- enquo(x)
group <- enquo(group)
group_bys <- quos(!! x, !! group)
df %>%
group_by(!!! group_bys) %>%
my_smry %>%
ggplot + geom_line(aes_(x = substitute(x), y = substitute(y),
group = substitute(group), color = substitute(group)))
}
# create a function to do the summarization
my_smry <- function(x) {
x %>%
summarize(conversion_rate = sum(converted) / sum(approved))
}
# use my function
my_df %>%
plot_lines(x = month, y = conversion_rate, group = category)
I feel like the group_by handling is pretty inelegant: quoting x and group with enquo, then unquoting them with !! inside of another quoting function quos, only to re-unquote them with !!! on the next line, but it's the only thing I've been able to get to work. Is there a better way to do this?
Also, is there a way to get ggplot to take !! instead of substitute? What I'm doing feels inconsistent.
You could just do a straight eval.parent(substitute(...)) like this. Being base R it works consistently across R and is simple to do. One can even use an ordinary aes.
plot_lines <- function(df, x, y, group) eval.parent(substitute(
df %>%
group_by(x, group) %>%
my_smry %>%
ggplot + geom_line(aes(x = x, y = y, group = group, color = group))
))
plot_lines(my_df, month, conversion_rate, category)
The problem is that ggplot hasn't been updated to handle quosures yet, so you've got to pass it expressions, which you can create from quosures with rlang::quo_expr:
library(tidyverse)
set.seed(47)
my_df <- data_frame(month = sample(1:12, 1000, replace = TRUE),
category = sample(head(letters, 3), 1000, replace = TRUE),
approved = as.numeric(runif(1000) < 0.5),
converted = approved * as.numeric(runif(1000) < 0.5))
plot_lines <- function(df, x, y, group) {
x <- enquo(x)
y <- enquo(y)
group <- enquo(group)
df %>%
group_by(!! x, !! group) %>%
summarise(conversion_rate = sum(converted) / sum(approved)) %>%
ggplot(aes_(x = rlang::quo_expr(x),
y = rlang::quo_expr(y),
color = rlang::quo_expr(group))) +
geom_line()
}
my_df %>% plot_lines(month, conversion_rate, category)
However, keep in mind that ggplot will almost inevitably be updated from lazyeval to rlang, so while this interface will probably keep working, a simpler, more consistent one will probably be possible shortly.

Using purrr to plot multiple items against each other

I am trying to learn purrr from the tidyverse
I have set up a piece of code to attempt to plot all variables in the iris data-set against each other to see if they are linearly related. Unfortunately I don't seem to get anything back except blank plots. Below is my example. Can anyone help
library(tidyverse)
mydf <- iris %>%
as_tibble %>%
dplyr::select(everything(), -Species)
# Create a grid of names of columns
mynames <- names(mydf)
mygrid <- expand.grid(x=mynames, y =mynames)
# Define function
plot_my_data <- function(mydata, x, y){
ggplot(mydata, aes(x, y)) +
geom_smooth()}
map2(.x = mygrid$x,
.y = mygrid$y,
.f = ~ plot_my_data(mydf, .x,.y))
You have 2 issues in your code.
First one is that you use aes where you should use aes_string, and second is that you have factors and not characters in mygrid.
This works:
mygrid <- expand.grid(x=mynames, y =mynames,stringsAsFactors = F)
# Define function
plot_my_data <- function(mydata, x, y){
ggplot(mydata, aes_string(x, y)) +
geom_smooth()}
map2(.x = mygrid$x,
.y = mygrid$y,
.f = ~ plot_my_data(mydf, .x,.y))

Resources