Adding Title Formatter to ggplot2 - r

I'm working on a package that will make it easier for other users at my company to use ggplot2. One of the things I would like to do is to add a function that automatically formats ugly variable names to pretty titles.
I have a function that does this already. Let's assume there's some dummy data and a basic plot:
data <- data.frame(
place_name = c("Los Angeles","New York"),
some_amount = c(5,10)
)
g <- ggplot(data, aes(x = place_name, y = some_amount)) +
geom_bar(stat = 'identity') +
labs(title = "test_of_function")
So I have my ggplot, and I want to format the titles. This function works fine once I apply it to the labels of a function.
format_title <- function(...,sep = "[^[:alnum:]]+"){
args <- list(...)
if (is.list(args[[1]]))
args <- args[[1]]
lapply(args, function(x, sep){
stringr::str_to_title(stringr::str_replace_all(x,sep," "))
}, sep = sep)
}
format_plot_titles <- function(g){
g$labels <- format_title(g$labels)
g
}
So now if we compare them:
g
format_plot_titles(g)
What I was hoping to do is add it via the ggplot2 +, but in order to do that I need access to what the previous labels of the plot were.
It would look something like this (with better names):
ggplot(data, aes(x = place_name, y = some_amount)) +
geom_bar() +
title_labels()
I'm having a lot of trouble figuring out how to access the labels of the previous plot as I attempt to layer the new labels on top of the old one. Any help is appreciated!
Edit: Solved this. There was no slick solution though. I basically had to overwrite the default s3 method for the + that ggplot2 exports to take in a new type of object I'm calling a "formatter". This allows me to construct a method that checks for the formatter class, and if my object does inherit the formatter class, it applies that formatter to the plot labels. Here's the code:
`+.gg` <- function (e1, e2) {
e2name <- deparse(substitute(e2))
if (ggplot2::is.theme(e1))
ggplot2:::add_theme(e1, e2, e2name)
else if (ggplot2::is.ggplot(e1) & is.formatter(e2)){
add_formatter(e1, e2, e2name)
}
else if (ggplot2::is.ggplot(e1))
ggplot2:::add_ggplot(e1, e2, e2name)
}
update_format <- function(p, formatter){
p <- ggplot2:::plot_clone(p)
p$labels <- formatter(p$labels)
p
}
add_formatter <- function(p, formatter, objectname) {
update_format(p, formatter)
}
is.formatter <- function(x){
inherits(x,"formatter")
}
format_title <- function(...,sep = "[^[:alnum:]]+"){
args <- list(...)
if (is.list(args[[1]]))
args <- args[[1]]
lapply(args, function(x, sep){
stringr::str_to_title(stringr::str_replace_all(x,sep," "))
}, sep = sep)
}
title_labels <- function(...){
structure(format_title, class = "formatter")
}
ggplot(data, aes(x = place_name, y = some_amount)) +
geom_bar(stat = 'identity') +
title_labels()

Posting edit as a formal answer.
Solved this. There was no slick solution though. I basically had to overwrite the default s3 method for the + that ggplot2 exports to take in a new type of object I'm calling a "formatter". This allows me to construct a method that checks for the formatter class, and if my object does inherit the formatter class, it applies that formatter to the plot labels. Here's the code:
`+.gg` <- function (e1, e2) {
e2name <- deparse(substitute(e2))
if (ggplot2::is.theme(e1))
ggplot2:::add_theme(e1, e2, e2name)
else if (ggplot2::is.ggplot(e1) & is.formatter(e2)){
add_formatter(e1, e2, e2name)
}
else if (ggplot2::is.ggplot(e1))
ggplot2:::add_ggplot(e1, e2, e2name)
}
update_format <- function(p, formatter){
p <- ggplot2:::plot_clone(p)
p$labels <- formatter(p$labels)
p
}
add_formatter <- function(p, formatter, objectname) {
update_format(p, formatter)
}
is.formatter <- function(x){
inherits(x,"formatter")
}
format_title <- function(...,sep = "[^[:alnum:]]+"){
args <- list(...)
if (is.list(args[[1]]))
args <- args[[1]]
lapply(args, function(x, sep){
stringr::str_to_title(stringr::str_replace_all(x,sep," "))
}, sep = sep)
}
title_labels <- function(...){
structure(format_title, class = "formatter")
}
ggplot(data, aes(x = place_name, y = some_amount)) +
geom_bar(stat = 'identity') +
title_labels()

Related

Using ellipsis parameters alongside other parameters

I want to use a function to modify a plot based on the parameters passed. However, I'm not sure what format I should convert the list of modifications parameters into, to be used as parameters within the respective 'geom'.
library(ggplot2)
data <- tibble(a = 1:3, b = 11:13)
p <- ggplot(data)
plot_modify <- function(p, geom = "", ...){
modifications <- list(...)
if(geom == "point"){
p <- p + geom_point(aes(x = a, y = b), modifications)
}
return(p)
}
plot_modify(p, "point", alpha=0.1, size = 0.3)
P.s. I intend to validate the list of parameters based on each 'geom' later on.
After you capture the values in the ellipse, you are responsible for passing them along. In order to inject those values into subsequent calls, you'll need to build the call using something like do.call using base R methods
plot_modify <- function(p, geom = "", ...){
modifications <- list(...)
if(geom == "point"){
p <- p + do.call("geom_point", c(list(aes(x = a, y = b)), modifications))
}
return(p)
}
But do note that list(...) will force evaluation of all your parameters which is slightly different than how the function behaves normally.
If you wanted to use more of an rlang strategy you can do
plot_modify <- function(p, geom = "", ...){
modifications <- list(...)
if(geom == "point"){
p <- p + rlang::inject(geom_point(aes(x = a, y = b), !!!modifications))
}
return(p)
}

R Metaprogramming: return function body with arguments values filled in

I am looking for a function that will return the body of function with the arguments filled in. The goal is to have a function, capture_code such that
my_scatterplot <- function(my_title, xlab = "mpg", ylab = "hp") {
g <- ggplot(mtcars, aes(x = mpg, y = hp)) +
geom_point()
g + labs(x = xlab, y = ylab, title = my_title)
}
capture_code(my_scatterplot("My title", xlab = "MPG"))
Will return
g <- ggplot(mtcars, aes(x = mpg, y = hp)) +
geom_point()
g + labs(x = "MPG", y = ylab, title = "My title")
I am using the code from advanced R Walking AST with recursive functions.
expr_type <- function(x) {
if (rlang::is_syntactic_literal(x)) {
"constant"
} else if (is.symbol(x)) {
"symbol"
} else if (is.call(x)) {
"call"
} else if (is.pairlist(x)) {
"pairlist"
} else {
typeof(x)
}
}
switch_expr <- function(x, ...) {
switch(expr_type(x),
...,
stop("Don't know how to handle type ", typeof(x), call. = FALSE)
)
}
replace_vars <- function(x, envi) {
switch_expr(x,
# Base cases
constant = x,
symbol = {
# Get the variable from the environment
rlang::env_get(nm = as.character(x), default = x, env = envi)
},
# Recursive cases
pairlist = purrr::map(x, replace_vars, envi),
call = {
res <- purrr::map(x, replace_vars, envi)
class(res) <- class(x)
res
}
)
}
capture_code <- function(e) {
e <- rlang::enexpr(e)
cf <- get(toString(e[[1]]))
if(typeof(cf) != "closure") stop(e[[1]], "is not a function")
# Evalation the named functions first
# Then fill in the unnamed
cf_args <- formals(cf)
called_args <- as.list(e[-1])
if(!is.null(names(called_args))) {
not_named <- names(called_args) == ""
named_args <- called_args[!not_named]
unnamed_args <- called_args[not_named]
new_args <- modifyList(cf_args, named_args)
missing_args <- unlist(lapply(new_args, rlang::is_missing))
missing_indices <- seq_along(new_args)[missing_args]
} else {
new_args <- cf_args
unnamed_args <- called_args
missing_indices <- seq_along(new_args)
}
# Add the unnamed arguments
for(i in seq_along(unnamed_args)) {
new_args[[missing_indices[[i]]]] <- unnamed_args[[i]]
}
# Get the function body from
cf_func_body <- functionBody(cf)[-1]
# Pass the arguments as an environment for lookup
replace_vars(cf_func_body, rlang::new_environment( as.list(new_args)))
}
res <- capture_code(my_scatterplot("My title", xlab = "MPG"))
res
I have included the View call from the function body expression as well as my results. It looks almost correct, except I am unable to get the call and <- classes to be of type language. I would like to be able to get back the code from my AST.
Grab the call into mc and extract the function fun. Then wrap its body in substitute(...), replace the function name in the call with fun and run it. No packages are used.
capture_code <- function(call) {
mc <- match.call()[[2]]
fun <- match.fun(mc[[1]])
body(fun) <- substitute(substitute(b), list(b = body(fun)))
mc[[1]] <- as.name("fun")
eval(mc)
}
# test
capture_code(my_scatterplot("My title", xlab = "MPG"))
giving:
{
g <- ggplot(mtcars, aes(x = mpg, y = hp)) + geom_point()
g + labs(x = "MPG", y = "hp", title = "My title")
}
Here's a mildly-hacky approach:
library(rlang)
my_scatterplot <- function(my_title, xlab = "mpg", ylab = "hp") {
g <- ggplot(mtcars, aes(x = mpg, y = hp)) +
geom_point()
g + labs(x = xlab, y = ylab, title = my_title)
}
capture_code <- function(call){
call <- call_standardise(enquo(call)) # capture call and fill in params and default args
args <- call_args(call) # extract cleaned args
body <- fn_body(call_fn(call)) # extract function body
eval(substitute(substitute(body, args))) # substitute args in body
}
capture_code(my_scatterplot("My title", xlab = "MPG"))
#> {
#> g <- ggplot(mtcars, aes(x = mpg, y = hp)) + geom_point()
#> g + labs(x = "MPG", y = ylab, title = "My title")
#> }
The hacky bit is the last line, which uses substitute to replace parameters with arguments wherever they are within the function body. As far as I can tell, there's no simple way to do this with rlang, because the quosure idiom requires you to specify exactly what you'd like to substitute; base::substitute is more of a shotgun approach.
You can also use pryr::modify_lang, which traverses the AST like you've started writing above:
capture_code <- function(call){
call <- call_standardise(enquo(call))
args <- call_args(call)
body <- fn_body(call_fn(call))
pryr::modify_lang(body, function(leaf){
expr_string <- expr_name(leaf)
if (expr_string %in% names(args)) {
args[[expr_string]]
} else {
leaf
}
})
}
capture_code(my_scatterplot("My title", xlab = "MPG"))
#> {
#> g <- ggplot(mtcars, aes(x = mpg, y = hp)) + geom_point()
#> g + labs(x = "MPG", y = ylab, title = "My title")
#> }
Look at its source code if to see how to structure the recursion, but note that there are some weird bits of the language you have to account for to do this right.
If you want to roll your own recursion, ignoring the weirder bits (like formulas, pairlists, etc.) that won't matter for this call anyway,
capture_code <- function(call){
call <- call_standardise(enquo(call))
args <- call_args(call)
body <- fn_body(call_fn(call))
modify_expr <- function(node){
node_string <- expr_name(node)
if (length(node) > 1) {
node <- lapply(node, modify_expr) # recurse
as.call(node)
} else if (node_string %in% names(args)) {
args[[node_string]] # substitute
} else {
node # ignore
}
}
modify_expr(body)
}
capture_code(my_scatterplot("My title", xlab = "MPG"))
#> {
#> g <- ggplot(mtcars, aes(x = mpg, y = hp)) + geom_point()
#> g + labs(x = "MPG", y = ylab, title = "My title")
#> }

How to write a ggplot '+'-pipeable function that can refer to the input plot

I'm trying to write a function that can be called using the '+'-based ggplot2 syntax.
myplot + myfunction
Specifically, the function I'm writing symmetrizes the y-axis about zero, so it needs to determine the y-axis range for the input plot.
So let,
ylim_sym <- function(p){
get_y_range <- function(p){
ggplot2::ggplot_build(p)$layout$panel_ranges[[1]]$y.range
}
max_offset <- max(abs(get_y_range(p)))
p + ylim(- max_offset, max_offset)
}
With this function, the following works:
qplot(x = 1:10, y = exp(rnorm(10))) %>% ylim_sym()
But this doesn't work because of some precedence issue between +.gg and %>%:
qplot(x = 1:10, y = exp(rnorm(10))) +
geom_abline(slope = 0) %>%
ylim_sym()
(I could write the latter (all_my_ggplot_pipeline) %>% ylim_sym() but it's pretty ugly syntax).
Ideally, I'd like to be able to write ylim_sym such that it can be piped like so,
qplot(x = 1:10, y = exp(rnorm(10))) + ylim_sym()
but I can't work out how to access the plot on the LHS of + within ylim_sym
Any ideas?
I was able to solve it by doing the following.
StatSymYLim <- ggproto(
"StatSymYLim", Stat,
compute_group = function(data, scales) {
out <- data.frame(
x = median(data$x),
y = c(-1, 1) * max(abs(data$y))
)
out
},
required_aes = c("x", "y")
)
ylim_sym <- function(...){
geom_blank(..., stat = StatSymYLim)
}
Then the following works as required:
qplot(x = 1:10, y = exp(rnorm(10))) +
geom_abline(slope = 0) +
ylim_sym()
My understanding of ggplot2 internals is pretty shaky to be fair, so this might be a naive solution.
Note: your function needs an update as the structure of the object has slightly changed
Using package ggfun this would work:
# devtools::install_github("moodymudskipper/ggfun")
library(ggfun)
ylim_sym <- function(p){
get_y_range <- function(p){
ggplot2::ggplot_build(p)$layout$panel_params[[1]]$y.range
}
max_offset <- max(abs(get_y_range(p)))
p + ylim(- max_offset, max_offset)
}
qplot(x = 1:10, y = exp(rnorm(10))) +
geom_abline(slope = 0) +
ylim_sym

Function argument to call only part of a column name (ggplot)

I have succeded in building my first function i R.
I would now like to improve it, but don´t know how.
My dataset contains many variabels that have "mirror"variabels with almost the same name. The only naming difference is that the "mirror" variable has a "c" in front of the name.
The function plots comparisons of a variabel (VAR) and it´s "mirror" (cVAR).
Simplified dataset and simplified function code that reproduces the challenge:
library(ggplot2)
df <- data.frame(
X = 1:10+rnorm(10,mean=1,sd=0.5),
cX = 1:10+rnorm(10,mean=1,sd=0.5),
Y = 1:10+rnorm(10,mean=1,sd=0.5),
cY = 1:10-rnorm(10,mean=1,sd=0.5))
compare <- function(VAR, cVAR) {
VAR <- deparse(substitute(VAR))
cVAR <- deparse(substitute(cVAR))
ggplot(df, aes_string(x=VAR, y=cVAR))+
geom_point()+
geom_smooth(method="lm")+
geom_abline(intercept = 0, slope = 1)
}
compare(Y, cY)
I would like the function to do exactly the same as it does above, but I would like to just have to write compare(Y) instead.
In STATA I would try something like this:
y=c`VAR'
but I can´t find a similar approach in R.
How about this
compare <- function(VAR, cVAR) {
VAR <- deparse(substitute(VAR))
cVAR <- if(missing(cVAR)) {
paste0("c", VAR)
} else {
deparse(substitute(cVAR))
}
stopifnot(all(c(VAR, cVAR) %in% names(df)))
ggplot(df, aes_string(x=VAR, y=cVAR))+
geom_point()+
geom_smooth(method="lm")+
geom_abline(intercept = 0, slope = 1)
}
Basically we just use paste0() to add in the "c" to the first parameter when the second parameter is not specified.
Then you can run any of these
compare(Y) # to cY
compare(X) # to cX
compare(Y, cY)
compare(Y, cX)
Hope this is what you wanted. I simply paste0 VAR with defined myLetter and pass VAR to compare() as character.
compare <- function(VAR, myLetter = "c") {
library(ggplot2)
VAR2 <- paste0(myLetter, VAR)
ggplot(df, aes_string(VAR, VAR2))+
geom_point() +
geom_smooth(method = "lm")+
geom_abline(intercept = 0, slope = 1)
}
compare("Y")

Use to 2 parameters ... in one function

I know that the title of question sounds silly but I want to create a function test_f which can use several parameters of functions used within test_f ( I used latest version of ggplot2 with new function aes_ ).
E.g.
devtools::install_github('hadley/scales')
devtools::install_github('hadley/ggplot2')
test <- function(data,x,y,...){
ggplot(data, aes_(substitute(x), substitute(y)))+
geom_point(...)+
scale_y_continuous(...)
}
When
test(mtcars, qsec, mpg,limit = c(1,100))
everything works, but
test(mtcars, qsec, mpg,size = 5)
it shows an error: Error in scale_y_continuous: Unused parameter (size = 5).
I know why it happened but I wonder whether it is any possiblity to use ... for more than one internal function instead of putting all parameters into test_f like below?
test <- function(data,x,y,..., size = 5, limit = c(1,100){
...
...
}
You can, but it just depends on how the receiving functions handle things:
f2 <- function(three, ...) {
g <- as.list(match.call())
print(sprintf("three (from named args) = %d", three))
if ("five" %in% names(g)) print(sprintf("five (from ...) = %d", g$five))
}
f1 <- function(x, y, ...) {
if (missing(x)) stop("x is missing", call.=FALSE)
if (missing(y)) stop("y is missing", call.=FALSE)
g <- as.list(match.call())
print(sprintf("x = %d", x))
print(sprintf("y = %d", y))
f2(...)
}
f1(1, 2, three=4, five=6)
## [1] "x = 1"
## [1] "y = 2"
## [1] "three (from named args) = 4"
## [1] "five (from ...) = 6"
Since what you're getting stuck on is scale_y_continuous (and, hence, continuous_scale) complaining about the unused parameter, you can pass in only what it will accept from the ... list. It means some internal legwork for your function, but it's definitely doable:
mygg <- function(data, x, y, ...) {
gg <- ggplot(data=data, aes_(substitute(x), substitute(y)))
# get what geom_point accepts
geom_point_aes <- c("x", "y", "alpha", "colour", "color", "fill", "shape", "size", "stroke")
point_params <- unique(c(geom_point_aes,
names(formals(geom_point)),
names(formals(layer))))
# get what scale_y_continuous accepts
scale_y_params <- unique(c(names(formals(scale_y_continuous)),
names(formals(continuous_scale))))
# get all ... params passed in (if any)
args <- list(...)
if (length(args) > 0) {
# get all the arg names
arg_names <- names(args)
# which ones are left for point
gg <- gg + do.call(geom_point,
sapply(intersect(arg_names, point_params),
function(x) { list(args[[x]]) }))
# which ones are left for scale_y
gg <- gg + do.call(scale_y_continuous,
sapply(intersect(arg_names, scale_y_params),
function(x) { list(args[[x]]) }))
} else {
gg <- gg + geom_point() + scale_y_continuous()
}
return(gg)
}
I won't clutter up the answer with pngs but if you run the following you should see what the modified function does.
mygg(mtcars, mpg, wt)
mygg(mtcars, mpg, wt, color="blue")
mygg(mtcars, mpg, wt, limits=c(3,4))
mygg(mtcars, mpg, wt, fill="green", color="blue", shape=21, limits=c(3,4), left="over")
it's probably worth mentioning the alternative strategy of using one or two lists to wrap the optional arguments. Borrowing from the other answer,
mygg <- function(data, x, y,
geom_pars = list(),
scale_pars = list()) {
p <- ggplot(data=data, aes_(substitute(x), substitute(y)))
g <- do.call(geom_point, geom_pars)
s <- do.call(scale_y_continuous, scale_pars)
p + list(g, s)
}
Calling the function is a bit more verbose, but often less confusing because we're explicit about where the arguments should go.
mygg(mtcars, mpg, wt)
mygg(mtcars, mpg, wt, geom_pars=list(color="blue"))
mygg(mtcars, mpg, wt, scale_pars=list(limits=c(3,4)))
mygg(mtcars, mpg, wt,
geom_pars=list(fill="green", color="blue", shape=21),
scale_pars=list(limits=c(3,4)))

Resources