How to modify breaks and lables within ggproto? - r

I have a ggplot function (ggproto) to generate a manhattan plot. The code works, but I can't get my function to correctly position the breaks and labels on the x-axis. So I would really appreciate help on: How I can get my function to plot the axis that I want. Is it possible within the stat_manhattan function or would it be better to code an extra scale_x_chr(or so) function? Many thanks in advance! P.S hope my question is clear.
This is the code for the function:
StatManhattan <- ggplot2::ggproto("StatManhattan",
ggplot2::Stat,
# set up parameters, e.g. unpack from list
setup_params = function(data, params) {
params
},
# Compute group is the most granular component to be called
compute_group = function(data, scales, params, col_chrom) {
message("My param has value ", scales)
data_1 <- data %>%
dplyr::arrange(chr, pos) %>%
dplyr::transmute(x = cumsum(as.numeric(pos)),
y,
chr)
data_2 <- data_1 %>%
dplyr::distinct(chr) %>%
dplyr::mutate(colour = rep(col_chrom, length.out = dplyr::n()))
# the final df
data_1 %>%
dplyr::left_join(data_2, by = "chr") %>%
as.data.frame()
},
required_aes = c("y", "pos", "chr"),
default_aes = aes(y = stat(y),
x = stat(x),
colour = stat(colour),
size = 0.2)
)
stat_manhattan <- function(mapping = NULL, data = NULL, geom = "point",
position = "identity", na.rm = FALSE, show.legend = FALSE,
inherit.aes = TRUE, col_chrom = c("magenta2", "grey60"),
...) {
ggplot2::layer(
stat = StatManhattan, data = data, mapping = mapping, geom = geom,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(na.rm = na.rm,
col_chrom = col_chrom,
...)
)
}
This can generate the desired plot, but the x-axis has continuous labels - which is not what I want.
# test data
test_data <- tibble(chr = rep(1:3, each = 1000),
bp = c(sort(sample.int(1e6, 1000)),
sort(sample.int(1e6, 1000)),
sort(sample.int(1e6, 1000))),
p = runif(3000, min=0, max=1))
# plotting the test data
ggplot(test_data)+aes(chr = chr, pos = bp, y =-log10(p))+stat_manhattan()
What I want to achieve is something like this, where the chromosome (chr) numbers are centered on the x-axis.
axisdf = test_data %>%
mutate(bp_cum = cumsum(as.numeric(bp))) %>%
group_by(chr) %>%
summarize(center=( max(bp_cum) + min(bp_cum) ) / 2 )
test_data %>%
mutate(bp_cum = cumsum(as.numeric(bp))) %>%
ggplot(., aes(x=bp_cum, y=-log10(p), colour = as.character(chr))) +
geom_point() +
# custom X axis:
scale_x_continuous(label = axisdf$chr, breaks= axisdf$center )

Related

plotly did not show legend when converted from ggplot

I don't quite understand why the legend disappeared when I converted a plot made by ggplot to plotly using ggplotly. The plotly help page did not have any information. I don't think their examples even worked properly on that page.
Any help is greatly appreciated!
Sample data
library(scales)
packageVersion("ggplot2")
#> [1] '3.4.0'
library(plotly)
packageVersion("plotly")
#> [1] '4.10.1'
data <- data.frame(
stringsAsFactors = FALSE,
Level = c("Fast","Fast","Fast","Fast",
"Fast","Fast","Slow","Slow","Slow",
"Slow","Slow","Slow"),
Period = c("1Year","3Month","1Year","3Month",
"1Year","3Month","1Year","3Month",
"1Year","3Month","1Year","3Month"),
X = c(0.002,0.002,0.1,0.1,0.9,0.9,
0.002,0.002,0.1,0.1,0.9,0.9),
Y = c(1.38,1.29,1.61,1.61,1.74,0.98,
1.14,0.97,1.09,1.1,0.94,0.58)
)
ggplot2
plt <- ggplot(data = data,
aes(x = X,
y = Y,
shape = Period,
color = Level)) +
geom_point(alpha = 0.6, size = 3) +
labs(x = " ",
y = "Value") +
scale_y_continuous(labels = number_format(accuracy = 0.1)) +
guides(color = guide_legend(title = "Level", order = 1),
shape = guide_legend(title = "Period", order = 2)) +
theme(axis.text.x = element_text(angle = 90))
plt
Convert to plotly, legend disappeared
ggplotly(plt, height = 500) %>%
layout(xaxis = list(autorange = "reversed"))
Edit
There was an issue with guides(). If I removed it, the legend in ggplotly showed up
plt2 <- ggplot(data = data,
aes(x = X,
y = Y,
shape = Period,
color = Level)) +
geom_point(alpha = 0.6, size = 3) +
labs(x = " ",
y = "Value") +
scale_y_continuous(labels = number_format(accuracy = 0.1)) +
theme(axis.text.x = element_text(angle = 90))
plt2
ggplotly(plt2, height = 500) %>%
layout(
xaxis = list(autorange = "reversed"),
legend = list(
title = list(text = '(Period, Level)'))
)
After OPs Edit:
Here is a workaround using basic R {plotly} to modify the legend according to #Tung's requirements:
library(scales)
library(ggplot2)
library(plotly)
library(data.table)
DT <- data.frame(
stringsAsFactors = FALSE,
Level = c("Fast","Fast","Fast","Fast",
"Fast","Fast","Slow","Slow","Slow",
"Slow","Slow","Slow"),
Period = c("1Year","3Month","1Year","3Month",
"1Year","3Month","1Year","3Month",
"1Year","3Month","1Year","3Month"),
X = c(0.002,0.002,0.1,0.1,0.9,0.9,
0.002,0.002,0.1,0.1,0.9,0.9),
Y = c(1.38,1.29,1.61,1.61,1.74,0.98,
1.14,0.97,1.09,1.1,0.94,0.58)
)
setDT(DT)
LevelDT <- unique(DT, by = "Level")
PeriodDT <- unique(DT, by = "Period")
LevelDT[, Y := min(DT$Y)-1]
PeriodDT[, Y := min(DT$Y)-1]
plt2 <- ggplot(data = DT,
aes(x = X,
y = Y,
shape = Period,
color = Level)) +
geom_point(alpha = 0.6, size = 3) +
labs(x = " ",
y = "Value") +
scale_y_continuous(labels = number_format(accuracy = 0.1)) +
theme(axis.text.x = element_text(angle = 90))
plt2
markercolors <- hue_pal()(2)
ggplotly(plt2, height = 500) |>
layout(
xaxis = list(autorange = "reversed"),
legend = list(
title = list(text = ''),
itemclick = FALSE,
itemdoubleclick = FALSE,
groupclick = FALSE
)
) |>
add_trace(
data = LevelDT,
x = ~ X,
y = ~ Y,
inherit = FALSE,
type = "scatter",
mode = "markers",
marker = list(
color = markercolors,
size = 14,
opacity = 0.6,
symbol = "circle"
),
name = ~ Level,
legendgroup = "Level",
legendgrouptitle = list(text = "Level")
) |>
add_trace(
data = PeriodDT,
x = ~ X,
y = ~ Y,
inherit = FALSE,
type = "scatter",
mode = "markers",
marker = list(
color = "darkgrey",
size = 14,
opacity = 0.6,
symbol = c("circle", "triangle-up")
),
name = ~Period,
legendgroup = "Period",
legendgrouptitle = list(text = "Period")
) |> style(showlegend = FALSE, traces = 1:4)
Original answer:
I'm not sure why they are set to FALSE in the first place, but setting showlegend = TRUE in layout() and style() (for the traces) brings back the legend:
library(scales)
library(ggplot2)
library(plotly)
data <- data.frame(
stringsAsFactors = FALSE,
Level = c("Fast","Fast","Fast","Fast",
"Fast","Fast","Slow","Slow","Slow",
"Slow","Slow","Slow"),
Period = c("1Year","3Month","1Year","3Month",
"1Year","3Month","1Year","3Month",
"1Year","3Month","1Year","3Month"),
X = c(0.002,0.002,0.1,0.1,0.9,0.9,
0.002,0.002,0.1,0.1,0.9,0.9),
Y = c(1.38,1.29,1.61,1.61,1.74,0.98,
1.14,0.97,1.09,1.1,0.94,0.58)
)
# ggplot2
plt <- ggplot(data = data,
aes(x = X,
y = Y,
shape = Period,
color = Level)) +
geom_point(alpha = 0.6, size = 3) +
labs(x = " ",
y = "Value") +
scale_y_continuous(labels = number_format(accuracy = 0.1)) +
guides(color = guide_legend(title = "Period", order = 1),
shape = guide_legend(title = "", order = 2)) +
theme(axis.text.x = element_text(angle = 90))
plt
# Convert to plotly, legend disappeared
fig <- ggplotly(plt, height = 500) %>%
layout(showlegend = TRUE, xaxis = list(autorange = "reversed")) %>%
style(showlegend = TRUE)
fig
This answer is for plotly 4.10.1. I have defined two functions:
set_legend_names() This edits the names of the htmlwidget created by ggplotly(), before it is passed to plotly.js.
set_legend_symbols(). This appends some js to the htmlwidget object to change the symbols after plotly.js has drawn them.
plt2 |>
ggplotly(height = 500) |>
layout(xaxis = list(autorange = "reversed")) |>
set_legend_names() |>
set_legend_symbols()
Function definitions:
1. set_legend_names()
set_legend_names <- function(p,
new_legend_names = c(
"Fast", "Slow", "One Year", "Three Month"
)) {
# Update legend names and put in one group
for (i in seq_along(p$x$data)) {
p$x$data[[i]]$name <- new_legend_names[i]
}
p$x$layout$legend$title <- ""
return(p)
}
2. set_legend_symbols()
set_legend_symbols <- function(p,
symbol_nums_change_color = c(3, 4),
new_color_string = "rgb(105, 105, 105)",
symbols_num_change_shape = 3,
symbols_nums_target_shape = 1) {
js_get_legend <- htmltools::HTML(
'let legend = document.querySelector(".scrollbox");
let symbols = legend.getElementsByClassName("legendsymbols");
const re = new RegExp("fill: rgb.+;", "i");\n
'
)
js_symbol_const <- paste0(
'const shape_re = new RegExp(\'d=".*?"\');\n',
"const correct_shape = symbols[",
symbols_nums_target_shape,
"].innerHTML.match(shape_re)[0];\n"
)
# subtract 1 for 0-indexed js
change_symbol_color_code <- lapply(
symbol_nums_change_color - 1,
\(i)
paste0(
"symbols[", i, "].innerHTML = ",
"symbols[", i, "].innerHTML.replace(re,",
' "fill: ', new_color_string, ';");'
)
) |>
paste(collapse = "\n")
# subtract 1 for 0-indexed js
change_symbols_shape_code <- lapply(
symbols_num_change_shape - 1,
\(i)
paste0(
"symbols[", i, "].innerHTML = symbols[",
symbols_nums_target_shape, "].innerHTML.replace(shape_re, correct_shape);"
)
) |>
paste(collapse = "\n")
all_js <- htmltools::HTML(
unlist(c(
js_get_legend,
js_symbol_const,
change_symbols_shape_code,
change_symbol_color_code
))
)
# Add it to the plot
p <- htmlwidgets::prependContent(
p,
htmlwidgets::onStaticRenderComplete(all_js)
)
return(p)
}
I've never posted a second answer before but it seems substantially different in plotly 4.10.1. I eagerly anticipate the release of plotly 4.10.2 so I can post a third answer.
Plotly generates a different legend from ggplot2 - this can be fixed with R and and a little javascript
The first thing to do is ensure that you have a reasonably current version of the packages:
packageVersion("ggplot2") # 3.4.0
packageVersion("plotly") # 4.10.0
With these versions, like #Quentin, I do get a legend, although it is different to the one generated by ggplot2.
ggplotly(plt, height = 500) %>%
layout(xaxis = list(autorange = "reversed"))
Steps to replicate the ggplot2 legend:
Change the legend text. This can be done by editing the R object before it is passed to plotly.js.
Remove the color from the shape guide. This can only be done with javascript after the plot has rendered.
Change the third circle into a triangle. This also needs to be done in javascript.
Changing the legend text
To do this manually, we could do p$x$data[[1]]$name <- "Fast", and replicate for each layer.
Fortunately, you have manually specified the legend order, making it easy to know where to access the correct legend names before passing to plotly. If we just do this step, it will create a legend which looks like this, i.e. still wrong (the first triangle should be a circle and neither should be have a color):
Changing the symbol shape and colors
We cannot do this in R. I have written an R helper function to generate some javascript to do this for us:
get_symbol_change_js <- function(symbol_nums,
new_color_string = "rgb(105, 105, 105)") {
js_get_legend <- htmltools::HTML(
'let legend = document.querySelector(".scrollbox");
let symbols = legend.getElementsByClassName("legendsymbols");
const re = new RegExp("fill: rgb.+;", "i");
'
)
change_symbol_color_code <- lapply(
symbol_nums,
\(i)
paste0(
"symbols[", i, "].innerHTML = ",
"symbols[", i, "].innerHTML.replace(re,",
' "fill: ', new_color_string, ';");'
)
) |>
paste(collapse = "\n")
# shape to change
shape_change_num <- symbol_nums[1]
# shape to replace with
shape_change_from <- shape_change_num - 1
change_symbols_shape_code <- paste0(
'const shape_re = new RegExp(\'d=".*?"\');\n',
"const correct_shape = symbols[", shape_change_from, "].innerHTML.match(shape_re)[0];\n",
"symbols[2].innerHTML = symbols[", shape_change_num, "].innerHTML.replace(shape_re, correct_shape);"
)
all_js <- htmltools::HTML(
unlist(c(
js_get_legend,
change_symbol_color_code,
change_symbols_shape_code
))
)
return(all_js)
}
We can put this all together to generate the plot as desired:
draw_plotly_with_legend(plt)
Final draw_plotly_with_legend() function
Note this function calls get_symbol_change_js(), as defined above. It also uses htmlwidgets::prependContent() to attach our custom html to the widget before rendering.
draw_plotly_with_legend <- function(gg = plt,
guide_types = c("colour", "shape")) {
# Period, Level
legend_categories <- lapply(
guide_types, \(x) rlang::quo_get_expr(plt$mapping[[x]])
)
new_legend_names <- lapply(legend_categories, \(category) {
unique(data[[category]])
}) |> setNames(guide_types)
# Work out which symbols need to have color removed
symbols_to_remove_color <- new_legend_names[
names(new_legend_names) != "colour"
] |> unlist()
new_legend_names <- unlist(new_legend_names)
symbol_num_remove_color <- which(
new_legend_names %in% symbols_to_remove_color
)
# Create plot
p <- ggplotly(gg, height = 500) %>%
layout(xaxis = list(autorange = "reversed"))
# Show legend
p$x$layout$showlegend <- TRUE
# Update legend names and put in one group
for (i in seq_along(p$x$data)) {
p$x$data[[i]]$name <- new_legend_names[i]
p$x$data[[1]]$legendgroup <- "Grouped legend"
}
# Get the js code to change legend color
# js is 0 indexed
js_symbol_nums <- symbol_num_remove_color - 1
js_code <- get_symbol_change_js(js_symbol_nums)
# Add it to the plot
p <- htmlwidgets::prependContent(
p,
htmlwidgets::onStaticRenderComplete(js_code)
)
return(p)
}

How to align scale transformation across geoms?

I have a geom_foo() which will do some transformation of the input data and I have a scale transformation. My problem is that these work not as I would expect together with other geom_*s in terms of scaling.
To illustrate the behavior, consider foo() which will be used in the setup_data method of GeomFoo, defined at the end of the question.
foo <- function(x, y) {
data.frame(
x = x + 2,
y = y + 2
)
}
foo(1, 1)
The transformer is:
foo_trans <- scales::trans_new(
name = "foo",
transform = function(x) x / 5,
inverse = function(x) x * 5
)
Given this input data:
df1 <- data.frame(x = c(1, 2), y = c(1, 2))
Here is a basic plot:
library(ggplot2)
ggplot(df1, aes(x = x, y = y)) +
geom_foo()
When I apply the transformation to the vertical scale, I get this
ggplot(df1, aes(x = x, y = y)) +
geom_foo() +
scale_y_continuous(trans = foo_trans)
What I can say is that the y-axis limits are calculate as 11 = 1 + (2*5) and 12 = 2 + (2*5), where 1 and 2 are df1$y, and (2 * 5) are taken from the setup_data method and from trans_foo.
My real problem is, that I would like add a text layer with labels. These labels and their coordinates come from another dataframe, as below.
df_label <- foo(df1$x, df1$y)
df_label$label <- c("A", "B")
Label and point layers are on same x-y positions without the scale transformation
p <- ggplot(df1, aes(x = x, y = y)) +
geom_foo(color = "red", size = 6) +
geom_text(data = df_label, aes(x, y, label = label))
p
But when I apply the transformation, the coordinates do not match anymore
p +
scale_y_continuous(trans = foo_trans)
How do I get the to layer to match in x-y coordinates after the transformation? Thanks
ggproto object:
GeomFoo <- ggproto("GeomFoo", GeomPoint,
setup_data = function(data, params) {
cols_to_keep <- setdiff(names(data), c("x", "y"))
cbind(
foo(data$x, data$y),
data[, cols_to_keep]
)
}
)
geom constructor:
geom_foo <- function(mapping = NULL, data = NULL, ...,
na.rm = FALSE, show.legend = NA,
inherit.aes = TRUE) {
layer(
data = data,
mapping = mapping,
stat = "identity",
geom = GeomFoo,
position = "identity",
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
na.rm = na.rm,
...
)
)
}
Doing data transformations isn't really the task of a geom, but a task of a stat instead. That said, the larger issue is that scale transformations are applied before the GeomFoo$setup_data() method is called. There are two ways one could accomplish this task that I could see.
Apply foo() before scale transformation. I don't think geoms or stats ever have access to the data before scale transformation. A possible place for this is in the ggplot2:::Layer$setup_layer() method. However, this isn't exported, which probably means the devs would like to discourage this even before we make an attempt.
Inverse the scale transformation, apply foo(), and transform again. For this, you need a method with access to the scales. AFAIK, no geom method has this access. However Stat$compute_panel() does have access, so we can use this.
To give an example of (2), I think you could get away with the following:
StatFoo <- ggproto(
"StatFoo", Stat,
compute_panel = function(self, data, scales) {
cols_to_keep <- setdiff(names(data), c("x", "y"))
food <- foo(scales$x$trans$inverse(data$x),
scales$y$trans$inverse(data$y))
cbind(
data.frame(x = scales$x$trans$transform(food$x),
y = scales$y$trans$transform(food$y)),
data[, cols_to_keep]
)
}
)
geom_foo <- function(mapping = NULL, data = NULL, ...,
na.rm = FALSE, show.legend = NA,
inherit.aes = TRUE) {
layer(
data = data,
mapping = mapping,
stat = StatFoo,
geom = GeomPoint,
position = "identity",
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
na.rm = na.rm,
...
)
)
}
If someone else has brighter ideas to do this, I'd also like to know!

Shaded violin plot by group

I'm trying to produce a variation of a grouped violin plot in R (preferably using ggplot2), similar to the one below:
which was produced by the following reproducible example code:
# Load libraries #
library(tidyverse)
# Create dummy data #
set.seed(321)
df <- data.frame(X = rep(c("X1", "X2"), each = 100),
Y = rgamma(n = 200, shape = 2, rate = 2),
Z = rep(c("Za", "Zb"), rep = 100),
stringsAsFactors = FALSE)
# Grouped violin plot #
df %>%
ggplot(., aes(x = X, y = Y, fill = Z)) +
geom_violin(draw_quantiles = 0.5) +
scale_fill_manual(values = c("Za" = "red", "Zb" = "blue"))
The variation I'd like to have is that the density above the median should have a different shade compared to the density below the median, as in the following plot:
I produced the above (single) violin plot for the combination X = X1 and Z = Za in the data, using the following code:
## Shaded violin plot ##
# Calculate limits and median #
df.lim <- df %>%
filter(X == "X1", Z == "Za") %>%
summarise(Y_min = min(Y),
Y_qnt = quantile(Y, 0.5),
Y_max = max(Y))
# Calculate density, truncate at limits and assign shade category #
df.dens <- df %>%
filter(X == "X1", Z == "Za") %>%
do(data.frame(LOC = density(.$Y)$x,
DENS = density(.$Y)$y)) %>%
filter(LOC >= df.lim$Y_min, LOC <= df.lim$Y_max) %>%
mutate(COL = ifelse(LOC > df.lim$Y_qnt, "Empty", "Filled"))
# Find density values at limits #
df.lim.2 <- df.dens %>%
filter(LOC == min(LOC) | LOC == max(LOC))
# Produce shaded single violin plot #
df.dens %>%
ggplot(aes(x = LOC)) +
geom_area(aes(y = DENS, alpha = COL), fill = "red") +
geom_area(aes(y = -DENS, alpha = COL), fill = "red") +
geom_path(aes(y = DENS)) +
geom_path(aes(y = -DENS)) +
geom_segment(data = df.lim.2, aes(x = LOC, y = DENS, xend = LOC, yend = -DENS)) +
coord_flip() +
scale_alpha_manual(values = c("Empty" = 0.1, "Filled" = 1))
As you will notice in the code, I'm building the violin plot from scratch using the density function horizontally and then flipping the axes. The problem arises when I try to produce a grouped violin plot mainly because the axis in which the groups X and Z will appear, is already used for the "height" of the density. I did try to reach the same result by repeating all the calculations by groups but I'm stuck in the final step:
## Shaded grouped violin plot ##
# Calculate limits and median by group #
df.lim <- df %>%
group_by(X, Z) %>%
summarise(Y_min = min(Y),
Y_qnt = quantile(Y, 0.5),
Y_max = max(Y))
# Calculate density, truncate at limits and assign shade category by group #
df.dens <- df %>%
group_by(X, Z) %>%
do(data.frame(LOC = density(.$Y)$x,
DENS = density(.$Y)$y)) %>%
left_join(., df.lim, by = c("X", "Z")) %>%
filter(LOC >= Y_min, LOC <= Y_max) %>%
mutate(COL = ifelse(LOC > Y_qnt, "Empty", "Filled"))
# Find density values at limits by group #
df.lim.2 <- df.dens %>%
group_by(X, Z) %>%
filter(LOC == min(LOC) | LOC == max(LOC))
# Produce shaded grouped violin plot #
df.dens %>%
ggplot(aes(x = LOC, group = interaction(X, Z))) +
# The following two lines don't work when included #
#geom_area(aes(y = DENS, alpha = COL), fill = "red") +
#geom_area(aes(y = -DENS, alpha = COL), fill = "red") +
geom_path(aes(y = DENS)) +
geom_path(aes(y = -DENS)) +
geom_segment(data = df.lim.2, aes(x = LOC, y = DENS, xend = LOC, yend = -DENS)) +
coord_flip() +
scale_alpha_manual(values = c("Empty" = 0.1, "Filled" = 1))
Running the code above will produce the outline of the violin plots for each group, each one on top of the other. But once I try to include the geom_area lines, the code fails.
My gut feeling tells me that I would need to somehow produce the "shaded" violin plot as a new geom which can then be used under the general structure of ggplot2 graphics but I have no idea how to do that, as my coding skills don't extend that far. Any help or pointers, either along my line of thought or in a different direction would be much appreciated. Thank you for your time.
Idea
For the fun of it, I hacked a quick half-violin geom. It is basically a lot of copy & paste from GeomViolin and in order to make it run I had to access some of the internal ggplot2 function, which are not exported via ::: which means that this solution may not run in the future (if the ggplot team decides to change their internal functions).
However, this solution works and you can specify the alpha level of both the upper and the lower part. The geom assumes that you are providing just one quantile. The code is only superficially tested but it gives you an idea of how this can be done. As said it is in large part a simple copy & paste from GeomViolin where I added some code which finds out which values are below and above the quantile and splits the underlying GeomPolygon in 2 parts, as this function uses just a single alpha value. It works with groups and coord_flip likewise.
Code
library(grid)
GeomHalfViolin <- ggproto("GeomHalfViolin", GeomViolin,
draw_group = function (self, data, ..., draw_quantiles = NULL,
alpha_upper = .5, alpha_lower = 1) {
data <- transform(data, xminv = x - violinwidth * (x - xmin),
xmaxv = x + violinwidth * (xmax - x))
newdata <- rbind(transform(data, x = xminv)[order(data$y),
], transform(data, x = xmaxv)[order(data$y, decreasing = TRUE),
])
newdata <- rbind(newdata, newdata[1, ])
if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <=
1))
stopifnot(length(draw_quantiles) <= 1)
## need to add ggplot::: to access ggplot2 internal functions here and there
quantiles <- ggplot2:::create_quantile_segment_frame(data, draw_quantiles)
###------------------------------------------------
## find out where the quantile is supposed to be
quantile_line <- unique(quantiles$y)
## which y values are below this quantile?
ind <- newdata$y <= quantile_line
## set the alpha values accordingly
newdata$alpha[!ind] <- alpha_upper
newdata$alpha[ind] <- alpha_lower
###------------------------------------------------
aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data),
c("x", "y", "group")), drop = FALSE]
aesthetics$alpha <- rep(1, nrow(quantiles))
both <- cbind(quantiles, aesthetics)
both <- both[!is.na(both$group), , drop = FALSE]
quantile_grob <- if (nrow(both) == 0) {
zeroGrob()
}
else {
GeomPath$draw_panel(both, ...)
}
###------------------------------------------------
## GeomPolygon uses a single alpha value by default
## Hence, split the violin in two parts
ggplot2:::ggname("geom_half_violin",
grobTree(GeomPolygon$draw_panel(newdata[ind, ], ...),
GeomPolygon$draw_panel(newdata[!ind, ], ...),
quantile_grob))
###------------------------------------------------
}
else {
ggplot2:::ggname("geom_half_violin", GeomPolygon$draw_panel(newdata,
...))
}
}
)
geom_half_violin <- function(mapping = NULL, data = NULL, stat = "ydensity",
position = "dodge", ..., draw_quantiles = NULL,
alpha_upper = .5, alpha_lower = 1,
trim = TRUE, scale = "area",
na.rm = FALSE, show.legend = NA, inherit.aes = TRUE) {
layer(data = data, mapping = mapping, stat = stat, geom = GeomHalfViolin,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles,
alpha_lower = alpha_lower, alpha_upper = alpha_upper,
na.rm = na.rm, ...))
}
library(tidyverse)
# Create dummy data #
set.seed(321)
df <- data.frame(X = rep(c("X1", "X2"), each = 100),
Y = rgamma(n = 200, shape = 2, rate = 2),
Z = rep(c("Za", "Zb"), rep = 100),
stringsAsFactors = FALSE)
# Grouped violin plot #
df %>%
ggplot(., aes(x = X, y = Y, fill = Z)) +
geom_half_violin(draw_quantiles = 0.5, alpha_upper = .1) +
scale_fill_manual(values = c("Za" = "red", "Zb" = "blue"))
# no groups
df %>% filter(Z == "Za") %>%
ggplot(., aes(x = X, y = Y)) +
geom_half_violin(draw_quantiles = 0.5, alpha_upper = .1, fill = "red") +
scale_fill_manual(values = c("Za" = "red", "Zb" = "blue")) +
coord_flip()
Graphs

Q-Q plot with ggplot2::stat_qq, colours, single group

I'm looking for a more convenient way to get a Q-Q plot in ggplot2 where the quantiles are computed for the data set as a whole. but I can use mappings (colour/shapes) for groups in the data.
library(dplyr)
library(ggplot2)
library(broom) ## for augment()
Make up some data:
set.seed(1001)
N <- 1000
G <- 10
dd <- data_frame(x=runif(N),
f=factor(sample(1:G,size=N,replace=TRUE)),
y=rnorm(N)+2*x+as.numeric(f))
m1 <- lm(y~x,data=dd)
dda <- cbind(augment(m1),f=dd$f)
Basic plot:
ggplot(dda)+stat_qq(aes(sample=.resid))
if I try to add colour, the groups get separated for the quantile computation (which I don't want):
ggplot(dda)+stat_qq(aes(sample=y,colour=f))
If I use stat_qq(aes(sample=y,colour=f,group=1)) ggplot ignores the colour specification and I get the first plot back.
I want a plot where the points are positioned as in the first case, but coloured as in the second case. I have a qqnorm-based manual solution that I can post but am looking for something nicer ...
You could calculate the quantiles yourself and then plot using geom_point:
dda = cbind(dda, setNames(qqnorm(dda$.resid, plot.it=FALSE), c("Theoretical", "Sample")))
ggplot(dda) +
geom_point(aes(x=Theoretical, y=Sample, colour=f))
Ah, I guess I should have read to the end of your question. This is the manual solution you were referring to, right? Although you could just package it as a function:
my_stat_qq = function(data, colour.var) {
data=cbind(data, setNames(qqnorm(data$.resid, plot.it=FALSE), c("Theoretical", "Sample")))
ggplot(data) +
geom_point(aes_string(x="Theoretical", y="Sample", colour=colour.var))
}
my_stat_qq(dda, "f")
Here's a ggproto-based approach that attempts to change StatQq, since the underlying issue here (colour specification gets ignored when group is specified explicitly) is due to how its compute_group function is coded.
Define alternate version of StatQq with modified compute_group (last few lines of code):
StatQq2 <- ggproto("StatQq", Stat,
default_aes = aes(y = after_stat(sample), x = after_stat(theoretical)),
required_aes = c("sample"),
compute_group = function(data, scales, quantiles = NULL,
distribution = stats::qnorm, dparams = list(),
na.rm = FALSE) {
sample <- sort(data$sample)
n <- length(sample)
# Compute theoretical quantiles
if (is.null(quantiles)) {
quantiles <- stats::ppoints(n)
} else if (length(quantiles) != n) {
abort("length of quantiles must match length of data")
}
theoretical <- do.call(distribution, c(list(p = quote(quantiles)), dparams))
res <- ggplot2:::new_data_frame(list(sample = sample,
theoretical = theoretical))
# NEW: append remaining columns from original data
# (e.g. if there were other aesthetic variables),
# instead of returning res directly
data.new <- subset(data[rank(data$sample), ],
select = -c(sample, PANEL, group))
if(ncol(data.new) > 0) res <- cbind(res, data.new)
res
}
)
Define geom_qq2 / stat_qq2 to use modified StatQq2 instead of StatQq for their stat:
geom_qq2 <- function (mapping = NULL, data = NULL, geom = "point",
position = "identity", ..., distribution = stats::qnorm,
dparams = list(), na.rm = FALSE, show.legend = NA,
inherit.aes = TRUE) {
layer(data = data, mapping = mapping, stat = StatQq2, geom = geom,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(distribution = distribution, dparams = dparams,
na.rm = na.rm, ...))
}
stat_qq2 <- function (mapping = NULL, data = NULL, geom = "point",
position = "identity", ..., distribution = stats::qnorm,
dparams = list(), na.rm = FALSE, show.legend = NA,
inherit.aes = TRUE) {
layer(data = data, mapping = mapping, stat = StatQq2, geom = geom,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(distribution = distribution, dparams = dparams,
na.rm = na.rm, ...))
}
Usage:
cowplot::plot_grid(
ggplot(dda) + stat_qq(aes(sample = .resid)), # original
ggplot(dda) + stat_qq2(aes(sample = .resid, # new
color = f, group = 1))
)

ggplot2 2.0 new stat_ function: setting default scale for given aesthetics

I try to use the new functionality of ggplot2 in R that allows creating our own stat_ functions. I'm creating a simple one to compute and plot an interpolated surface between points arranged on a 2d array.
I would like to create a stat_topo() requiring x, y, and val aesthetics, plotting a simple geom_raster of interpolated val mapped to fill.
library(ggplot2)
library(dplyr)
library(akima)
cpt_grp <- function(data, scales) {
#interpolate data in 2D
itrp <- akima::interp(data$x,data$y,data$val,linear=F,extrap=T)
out <- expand.grid(x=itrp$x, y=itrp$y,KEEP.OUT.ATTRS = F)%>%
mutate(fill=as.vector(itrp$z))
# str(out)
return(out)
}
StatTopo <- ggproto("StatTopo", Stat,
compute_group = cpt_grp,
required_aes = c("x","y","val")
)
stat_topo <- function(mapping = NULL, data = NULL, geom = "raster",
position = "identity", na.rm = FALSE, show.legend = NA,
inherit.aes = TRUE, ...) {
layer(
stat = StatTopo, data = data, mapping = mapping, geom = geom,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(na.rm = na.rm, ...)
)
}
set.seed(1)
nchan <- 30
d <- data.frame(val = rnorm(nchan), # some random values to be mapped to fill color
x = 1:nchan*cos(1:nchan), # the x and y position of the points to interpolate
y = 1:nchan*sin(1:nchan))
plot(d$x,d$y)
ggplot(d,aes(x=x,y=y,val=val)) +
stat_topo() +
geom_point()
When I run this, I get the following error:
Error: numerical color values must be >= 0, found -1
I understand that this is because somehow the scale of the fill aesthetic is set to discrete.
If I enter this:
ggplot(d,aes(x=x,y=y,val=val)) +
stat_topo() +
scale_fill_continuous() +
geom_point()
I get what I wanted: the expected raster with a continuous color scale, which I want the stat_ to do by default...
So I guess the question is:
How can I prevent ggplot from setting a discrete scale here, and ideally set a default scale within the call to my new stat_ function.
Apparently, when creating a new variable inside a stat_ function, one needs to explicitly associate it to the aesthetic it will be mapped to with the parameter default_aes = aes(fill = ..fill..) within the ggproto definition.
This is telling ggplot that it is a calculated aesthetic and it will pick a scale based on the data type.
So here we need to define the stat_ as follows:
cpt_grp <- function(data, scales) {
# interpolate data in 2D
itrp <- akima::interp(data$x,data$y,data$val,linear=F,extrap=T)
out <- expand.grid(x=itrp$x, y=itrp$y,KEEP.OUT.ATTRS = F)%>%
mutate(fill=as.vector(itrp$z))
# str(out)
return(out)
}
StatTopo <- ggproto("StatTopo", Stat,
compute_group = cpt_grp,
required_aes = c("x","y","val"),
default_aes = aes(fill = ..fill..)
)
stat_topo <- function(mapping = NULL, data = NULL, geom = "raster",
position = "identity", na.rm = FALSE, show.legend = NA,
inherit.aes = TRUE, ...) {
layer(
stat = StatTopo, data = data, mapping = mapping, geom = geom,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(na.rm = na.rm, ...)
)
}
Then the following code:
set.seed(1)
nchan <- 30
d <- data.frame(val = rnorm(nchan),
x = 1:nchan*cos(1:nchan),
y = 1:nchan*sin(1:nchan))
ggplot(d,aes(x=x,y=y,val=val)) +
stat_topo() +
geom_point()
Produces as expected:
Without the need to specify a scale_ manually, but leaving the possibility to adapt the scale easily as usual with e.g. scale_fill_gradient2(low = 'blue',mid='white',high='red')
I got this answer here: https://github.com/hadley/ggplot2/issues/1481
Okay, slept on it, and had an idea, and I think this might do what you want. In your stat_topo layer function instead of the ggproto I returned a list with it as the first element and then added to that list another ggproto with a call to scale_fill_continuous().
library(ggplot2)
library(dplyr)
library(akima)
cpt_grp <- function(data, scales) {
#interpolate data in 2D
itrp <- akima::interp(data$x,data$y,data$val,linear=F,extrap=T)
out <- expand.grid(x=itrp$x, y=itrp$y,KEEP.OUT.ATTRS = F)%>%
mutate(fill=as.vector(itrp$z))
return(out)
}
StatTopo <- ggproto("StatTopo", Stat,
compute_group = cpt_grp,
required_aes = c("x","y","val")
)
stat_topo <- function(mapping = NULL, data = NULL, geom = "raster",
position = "identity", na.rm = FALSE, show.legend = NA,
inherit.aes = TRUE, ...) {
list(
layer(
stat = StatTopo, data = data, mapping = mapping, geom = geom,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(na.rm = na.rm )
),
scale_fill_continuous()
)
}
set.seed(1)
nchan <- 30
d <- data.frame(val = rnorm(nchan), # some random values to be mapped to fill color
x = 1:nchan*cos(1:nchan), # the x and y position of interp points
y = 1:nchan*sin(1:nchan))
ggplot(d,aes(x=x,y=y,val=val)) +
stat_topo() +
geom_point()
yielding the same picture as above.

Resources