Plot a discontinuous function in R without connecting a "jump" - r

I'd like to plot a discontinuous function without connecting a jump. For example, in the following plot, I'd like to delete the line connecting (0.5, 0.5) and (0.5, 1.5).
f <- function(x){
(x < .5) * (x) + (x >= .5) * (x + 1)
}
ggplot()+
geom_function(fun = f)
Edit: I'm looking for a solution that works even if the discountinuous point is not a round number, say pi/10.

You could write a little wrapper function which finds discontinuities in the given function and plots them as separate groups:
plot_fun <- function(fun, from = 0, to = 1, by = 0.001) {
x <- seq(from, to, by)
groups <- cut(x, c(-Inf, x[which(abs(diff(fun(x))) > 0.1)], Inf))
df <- data.frame(x, groups, y = fun(x))
ggplot(df, aes(x, y, group = groups)) +
geom_line()
}
This allows
plot_fun(f)
plot_fun(floor, 0, 10)

This answer is based on Allan Cameron's answer, but depicts the jump using open and closed circles. Whether the function is right or left continuous is controlled by an argument.
library("ggplot2")
plot_fun <- function(fun, from = 0, to = 1, by = 0.001, right_continuous = TRUE) {
x <- seq(from, to, by)
tol_vertical <- 0.1
y <- fun(x)
idx_break <- which(abs(diff(y)) > tol_vertical)
x_break <- x[idx_break]
y_break_l <- y[idx_break]
y_break_r <- y[idx_break + 1]
groups <- cut(x, c(-Inf, x_break, Inf))
df <- data.frame(x, groups, y = fun(x))
plot_ <- ggplot(df, aes(x, y, group = groups)) +
geom_line()
# add open and closed points showing jump
dataf_l <- data.frame(x = x_break, y = y_break_l)
dataf_r <- data.frame(x = x_break, y = y_break_r)
shape_open_circle <- 1
# this is the default of shape, but might as well specify.
shape_closed_circle <- 19
shape_size <- 4
if (right_continuous) {
shape_l <- shape_open_circle
shape_r <- shape_closed_circle
} else {
shape_l <- shape_closed_circle
shape_r <- shape_open_circle
}
plot_ <- plot_ +
geom_point(data = dataf_l, aes(x = x, y = y), group = NA, shape = shape_l, size = shape_size) +
geom_point(data = dataf_r, aes(x = x, y = y), group = NA, shape = shape_r, size = shape_size)
return(plot_)
}
Here's the OP's original example:
f <- function(x){
(x < .5) * (x) + (x >= .5) * (x + 1)
}
plot_fun(f)
Here's Allan's additional example using floor, which shows multiple discontinuities:
plot_fun(floor, from = 0, to = 10)
And here's an example showing that the function does not need to be piecewise linear:
f_curved <- function(x) ifelse(x > 0, yes = 0.5*(2-exp(-x)), no = 0)
plot_fun(f_curved, from = -1, to = 5)

You can insert everything inside an ifelse:
f <- function(x){
ifelse(x==0.5,
NA,
(x < .5) * (x) + (x >= .5) * (x + 1))
}
ggplot()+
geom_function(fun = f)

Related

How to set a logarithmic scale across multiple ggplot2 contour plots?

I am attempting to create three contour plots, each illustrating the following function applied to two input vectors and a fixed alpha:
alphas <- c(1, 5, 25)
x_vals <- seq(0, 25, length.out = 100)
y_vals <- seq(0, 50, length.out = 100)
my_function <- function(x, y, alpha) {
z <- (1 / (x + alpha)) * (1 / (y + alpha))
}
for each alpha in the vector alphas, I am creating a contour plot of z values—relative to the minimal z value—over x and y axes.
I do so with the following code (probably not best practices; I'm still learning the basics with R):
plots <- list()
for(i in seq_along(alphas)) {
z_table <- sapply(x_vals, my_function, y = y_vals, alpha = alphas[i])
x <- rep(x_vals, each = 100)
y <- rep(y_vals, 100)
z <- unlist(flatten(list(z_table)))
z_rel <- z / min(z)
d <- data.frame(cbind(x, y, z_rel))
plots[[i]] <- ggplot(data = d, aes(x = x, y = y, z = z_rel)) +
geom_contour_filled()
}
When alpha = 1:
When alpha = 25:
I want to display these plots in one grouping using ggarrange(), with one logarithmic color scale (as relative z varies so much from plot to plot). Is there a way to do this?
You can build a data frame with all the data for all alphas combined, with a column indicating the alpha, so you can facet your graph:
I basically removed the plot[[i]] part, and stacked up the d's created in the former loop:
d = numeric()
for(i in seq_along(alphas)) {
z_table <- sapply(x_vals, my_function, y = y_vals, alpha = alphas[i])
x <- rep(x_vals, each = 100)
y <- rep(y_vals, 100)
z <- unlist(flatten(list(z_table)))
z_rel <- z / min(z)
d <- rbind(d, cbind(x, y, z_rel))}
d = as.data.frame(d)
Then we create the alphas column:
d$alpha = factor(paste("alpha =", alphas[rep(1:3, each=nrow(d)/length(alphas))]),
levels = paste("alpha =", alphas[1:3]))
Then build the log scale inside the contour:
ggplot(data = d, aes(x = x, y = y, z = z_rel)) +
geom_contour_filled(breaks=round(exp(seq(log(1), log(1400), length = 14)),1)) +
facet_wrap(~alpha)
Output:

How to find the geom_curve control points in ggplot

I wonder how can I find the control points of geom_curve in ggplot2? e.g.
p <- ggplot(mtcars, aes(wt, mpg)) +
geom_curve(aes(x = 2.62, y = 21.0, xend = 3.57, yend = 15.0),curvature = -0.2, data = df) +
geom_point()
b <- ggplot_build(p)
b$data[[1]]
p$layers[[1]]$geom_params
b$data[[1]] gives the starting and ending points and p$layers[[1]]$geom_params gives the curve information (angle, curvature, ...).
But how can I find the control points, so I can reproduce the graphics?
library(ggplot)
library(grid)
library(stringr)
df <- data.frame(x = 1:3, y = 1:3)
df2 <- data.frame(x = c(1,3), y = c(1,3),
xend = c(2,2), yend = c(2,2))
g <- ggplot(df, aes(x, y)) +
geom_point() +
geom_curve(aes(x = x ,y = y,
xend = xend, yend = yend),
data = df2,
color = c("red", "blue"))
g
getCurve_controlPoints <- function(ggplotObject) {
len_layers <- length(ggplotObject$layers)
layerNames <- lapply(seq_len(len_layers),
function(j) {
className <- class(ggplotObject$layers[[j]]$geom)
className[-which(className %in% c("ggproto" ,"gg", "Geom"))]
})
curveLayerId <- which(sapply(layerNames,
function(layerName){
"GeomCurve" %in% layerName
}) == TRUE
)
gg_build <- ggplot_build(ggplotObject)
# you can also add yes or no in your code
# answer <- utils::menu(c("y", "n"), title="Do you want to draw the ggplot?")
grid.draw(ggplotObject)
grid.force()
gridList <- grid.ls(print = FALSE)
gridList.name <- gridList$name
xspline.name <- gridList.name[which(str_detect(gridList.name, "curve") == TRUE)]
xspline.len <- length(xspline.name)
controlPoints <- lapply(seq_len(length(curveLayerId)),
function (j) {
# curve data
curve_data <- gg_build$data[[curveLayerId[j]]]
# avoid duplicated rows
curve_data <- curve_data[!duplicated(curve_data), ]
n <- dim(curve_data)[1]
# here we go! But wait a second, it seems like the starting and ending position do not match
xsplinePoints <- xsplinePoints(grid.get("xspline", grep=TRUE))
# mapping data to our coordinates
control_data <- lapply(seq_len(n),
function(i){
if (n == 1) {
xy <- lapply(xsplinePoints,
function(coord){
as.numeric(coord)
})
} else {
xy <- lapply(xsplinePoints[[i]],
function(coord){
as.numeric(coord)
})
}
x.start <- curve_data[i, ]$x
x.end <- curve_data[i, ]$xend
y.start <- curve_data[i, ]$y
y.end <- curve_data[i, ]$yend
# mapping to ggplot coordinates
xy_x.diff <- xy$x[length(xy$x)] - xy$x[1]
xy_y.diff <- xy$y[length(xy$y)] - xy$y[1]
# maybe there is a better way?
if(xy_x.diff == 0){
xy_x.diff <- 1e-16
}
if(xy_y.diff == 0){
xy_y.diff <- 1e-16
}
x <- (x.end - x.start) / (xy_x.diff) * (xy$x - xy$x[1]) + x.start
y <- (y.end - y.start) / (xy_y.diff) * (xy$y - xy$y[1]) + y.start
list(x = x, y = y)
})
# grid remove
grid.remove(xspline.name[j], redraw = FALSE)
control_data
})
controlPoints
}
controlPoints <- getCurve_controlPoints(g)
# check the points
plot(controlPoints[[1]][[1]]$x, controlPoints[[1]][[1]]$y,
xlim = c(1,3),
ylim = c(1,3),
xlab = "x",
ylab = "y",
pch = 19)
points(controlPoints[[1]][[2]]$x, controlPoints[[1]][[2]]$y, pch = 19)
I think it works well so far and the ggplot version I used is 3.0.0. If you use any version less than 2.2.1, error may occur.
This idea is suggested by Prof Paul Murrell. Perhaps the easiest way to capture the control points of geom_curve? The cons are that grobs(ggplot object) must be drawn at first, since these points are only generated at the drawing time.

Does ggplot use dynamic function for statistic functions?

So I'm running an optimization problem and am trying to add the function at each point in time to a plot. I'm able to plot the function but I have the variables stored and it seems like r doesn't evaluate the function until it renders it. It's hard to explain, but I have a simple example that shows it.
data = data.frame(x = runif(20, -10, 10), y = runif(20, -10,10))
p <- ggplot(data, aes(x = x, y =y))
slope = 0.5
yoff = 1
p <- p + stat_function(fun = function(x) slope*x+yoff)
slope = 1
yoff = -1
p <- p + stat_function(fun = function(x) slope*x+yoff)
p
And what I want is two lines on the graph with the slope and y-intercept that I had when I added the function to the graph.
If you have a lot of them, make a list of functions:
make_fun <- function(slope,yoff) {slope; yoff; function(x) x*slope + yoff}
> l <- mapply(FUN = make_fun,slope = 1:2,yoff = 3:4)
> l[[1]](1)
[1] 4
> l[[2]](1)
[1] 6
A function is evaluated when used, so there it is at render time.
You can rename your parameters to have different function:
p <- ggplot(data, aes(x = x, y =y))
slope1 = 0.5
yoff1 = 1
p <- p + stat_function(fun = function(x) slope1*x+yoff1)
slope2 = 1
yoff2 = -1
p <- p + stat_function(fun = function(x) slope2*x+yoff2)
Many parameters in ggplot aren't evaluated until the plot is actually rendered. Here we can make the slope and yoff values arguments to the functions and then pass in values via the args= parameter which does get evaluated earlier.
library(ggplot2)
data = data.frame(x = runif(20, -10, 10), y = runif(20, -10,10))
p <- ggplot(data, aes(x = x, y =y))
slope = 0.5
yoff = 1
p <- p + stat_function(fun = function(x, slope, yoff) slope*x+yoff, args=list(slope=slope, yoff=yoff))
slope = 1
yoff = -1
p <- p + stat_function(fun = function(x, slope, yoff) slope*x+yoff, args=list(slope=slope, yoff=yoff))
p

How can one (easier) create nice x-axis ticks (i.e. pi/2, pi, 3pi/2, ...) in ggplot2?

I would like to create a plot, where one can see an alternative ticking of the x-axis, e.g. pi/2, pi, 3pi/2, etc. So far, this works for me only with a rather unhandy code (look at the lines where I create pi.halfs, pi.fulls and merge them later into vec.expr):
require (ggplot2)
# Create vectors: breaks and corresponding labels as multiples of pi/2
vec.breaks <- seq(from = pi/2, to = 7*pi/2, by = pi/2)
pi.halfs <- c(paste(expression(pi), "/2"),
paste(seq(from = 3, to = 21, by = 2), "*" , expression(pi), "/2"))
pi.fulls <- c(paste(expression(pi)),
paste(seq(from = 2, to = 11, by = 1), "*" , expression(pi)))
vec.expr <- parse(text = c(rbind(pi.halfs, pi.fulls)))[1:7]
# Create some time and signal
time <- seq(from = 0, to = 4*pi, by = 0.01)
signal <- sin(time)
df <- data.frame(time,signal)
# Now plot the signal with the new x axis labels
fig <- ggplot(data = df, aes(x = time, y = signal)) +
geom_line() +
scale_x_continuous(breaks=vec.breaks, labels=vec.expr)
print(fig)
... resulting in ...
Is anyone aware of an easier approach, where one can change the base of some x-axis labeling in ggplot2, e.g. like here from decimals to multiples of pi? Are there any nice packages, that I missed so far? I found some duplicates of this question, but only in other languages...
You are looking for the scales package, which lets you create arbitrary formatting functions for scales and also has a number of helpful formatting functions already built in. Looking through the scales package help, I was surprised not to find a radian scale, but you should be able to create one using math_formatter(). This code gets the same results, though not with the fractions.
library(ggplot2)
library(scales)
time <- seq(from = 0, to = 4*pi, by = 0.01)
signal <- sin(time)
df <- data.frame(time,signal)
pi_scales <- math_format(.x * pi, format = function(x) x / pi)
fig <- ggplot(data = df, aes(x = time, y = signal)) +
geom_line() +
scale_x_continuous(labels = pi_scales, breaks = seq(pi / 2, 7 * pi / 2, pi / 2))
print(fig)
Here is a function to make fractional labels (maybe a little clunky). It uses fractions from MASS package and allows you to change the multiplier you want to use on the x-axis. You just pass it a symbol (ie. "pi"). If the symbol has a value, the ticks will be scaled by width*value, otherwise just by width.
# Now plot the signal with the new x axis labels
p <- ggplot(data = df, aes(x = time, y = signal)) +
geom_line()
## Convert x-ticks to fractional x-ticks with a symbol multiplier
fracAx <- function(p, symbol, width=0.5) {
require(MASS) # for fractions
val <- tryCatch(eval(parse(text=symbol)), error=function(e) 1)
info <- ggplot_build(p)
xrange <- info[[2]]$ranges[[1]]$x.range/val # get the x-range of figure
vec.breaks <- seq(floor(xrange[1]), ceiling(xrange[2]), by=width)
fracs <- strsplit(attr(fractions(vec.breaks), "fracs"), "/") # convert to fractions
labels <- sapply(fracs, function(i)
if (length(i) > 1) { paste(i[1], "*", symbol, "/", i[2]) }
else { paste(i, "*", symbol) })
p + scale_x_continuous(breaks=vec.breaks*val, labels=parse(text=labels))
}
## Make the graph with pi axis
fracAx(p, "pi")
## Make the graph with e axis, for example
e <- exp(1)
fracAx(p, "e")
## Make the graph with a symbol that has no value
fracAx(p, "theta", width=2)
Based on the other answers here I was able to piece together some functions which implement a general radians format that can be used independently of mucking about with the internals of ggplot2 objects.
numerator <- function(x) {
f = attr(x, "fracs")
s <- as.integer(sign(x))
ifelse(is.finite(x), as.integer(stringr::str_extract(f, "^[^/]*")), s)
}
denominator <- function(x) {
f = attr(x, "fracs")
s <- as.integer(sign(x))
ratio <- str_detect(f, "/")
d <- as.integer(stringr::str_extract(f, "[^/]*$"))
ifelse(is.finite(x), ifelse(ratio, d, 1L), 0L)
}
char_sign <- function(x) {
s <- sign(x)
ifelse(s == 1, "+",
ifelse(s == -1, "-", ""))
}
#' Convert value to radians formatting
radians <- function(x) {
y = x/pi
f = suppressWarnings(MASS::as.fractions(y))
n = suppressWarnings(numerator(f))
d = suppressWarnings(denominator(f))
s <- char_sign(x)
o <- vector(mode = "character", length = length(x))
o <- ifelse(d == 0 & n != 0, paste0(s, "∞"), o)
o <- ifelse(n == 0 & d != 0, "0", o)
o <- ifelse(n != 0 & d != 0, paste0(n, "π/", d), o)
o <- ifelse(n == 1 & d != 0, paste0("π/", d), o)
o <- ifelse(n == -1 & d == 1, paste0(s, "π"), o)
o <- ifelse(n == -1 & d != 0 & d != 1, paste0(s, "π/", d), o)
o <- ifelse(d == 1 & n != 0 & abs(n) != 1, paste0(n, "π"), o)
o <- ifelse(n == d & is.finite(n), "π", o)
o
}
Here it is in use:
```r
time <- seq(from = 0, to = 4*pi, by = 0.01)
signal <- sin(time)
df <- data.frame(time,signal)
ggplot(df, aes(time, signal)) +
geom_line() +
scale_x_continuous(labels = trans_format(radians, force),
breaks = seq(0, 4*pi, pi/2))

How to get something like Matplotlib's symlog scale in ggplot or lattice?

For very heavy-tailed data of both positive and negative sign, I sometimes like to see all the data on a plot without hiding structure in the unit interval.
When plotting with Matplotlib in Python, I can achieve this by selecting a symlog scale, which uses a logarithmic transform outside some interval, and linear plotting inside it.
Previously in R I have constructed similar behavior by transforming the data with an arcsinh on a one-off basis. However, tick labels and the like are very tricky to do right (see below).
Now, I am faced with a bunch of data where the subsetting in lattice or ggplot would be highly convenient. I don't want to use Matplotlib because of the subsetting, but I sure am missing symlog!
Edit:
I see that ggplot uses a package called scales, which solves a lot of this problem (if it works). Automatically choosing tick mark and label placing still looks pretty hard to do nicely though. Some combination of log_breaks and cbreaks perhaps?
Edit 2:
The following code is not too bad
sinh.scaled <- function(x,scale=1){ sinh(x)*scale }
asinh.scaled <- function(x,scale=1) { asinh(x/scale) }
asinh_breaks <- function (n = 5, scale = 1, base=10)
{
function(x) {
log_breaks.callable <- log_breaks(n=n,base=base)
rng <- rng <- range(x, na.rm = TRUE)
minx <- floor(rng[1])
maxx <- ceiling(rng[2])
if (maxx == minx)
return(sinh.scaled(minx, scale=scale))
big.vals <- 0
if (minx < (-scale)) {
big.vals = big.vals + 1
}
if (maxx>scale) {
big.vals = big.vals + 1
}
brk <- c()
if (minx < (-scale)) {
rbrk <- log_breaks.callable( c(-min(maxx,-scale), -minx ) )
rbrk <- -rev(rbrk)
brk <- c(brk,rbrk)
}
if ( !(minx>scale | maxx<(-scale)) ) {
rng <- c(max(minx,-scale), min(maxx,scale))
minc <- floor(rng[1])
maxc <- ceiling(rng[2])
by <- floor((maxc - minc)/(n-big.vals)) + 1
cb <- seq(minc, maxc, by = by)
brk <- c(brk,cb)
}
if (maxx>scale) {
brk <- c(brk,log_breaks.callable( c(max(minx,scale), maxx )))
}
brk
}
}
asinh_trans <- function(scale = 1) {
trans <- function(x) asinh.scaled(x, scale)
inv <- function(x) sinh.scaled(x, scale)
trans_new(paste0("asinh-", format(scale)), trans, inv,
asinh_breaks(scale = scale),
domain = c(-Inf, Inf))
}
A solution based on the package scales and inspired by Brian Diggs' post mentioned by #Dennis:
symlog_trans <- function(base = 10, thr = 1, scale = 1){
trans <- function(x)
ifelse(abs(x) < thr, x, sign(x) *
(thr + scale * suppressWarnings(log(sign(x) * x / thr, base))))
inv <- function(x)
ifelse(abs(x) < thr, x, sign(x) *
base^((sign(x) * x - thr) / scale) * thr)
breaks <- function(x){
sgn <- sign(x[which.max(abs(x))])
if(all(abs(x) < thr))
pretty_breaks()(x)
else if(prod(x) >= 0){
if(min(abs(x)) < thr)
sgn * unique(c(pretty_breaks()(c(min(abs(x)), thr)),
log_breaks(base)(c(max(abs(x)), thr))))
else
sgn * log_breaks(base)(sgn * x)
} else {
if(min(abs(x)) < thr)
unique(c(sgn * log_breaks()(c(max(abs(x)), thr)),
pretty_breaks()(c(sgn * thr, x[which.min(abs(x))]))))
else
unique(c(-log_breaks(base)(c(thr, -x[1])),
pretty_breaks()(c(-thr, thr)),
log_breaks(base)(c(thr, x[2]))))
}
}
trans_new(paste("symlog", thr, base, scale, sep = "-"), trans, inv, breaks)
}
I am not sure whether the impact of a parameter scale is the same as in Python, but here are a couple of comparisons (see Python version here):
data <- data.frame(x = seq(-50, 50, 0.01), y = seq(0, 100, 0.01))
data$y2 <- sin(data$x / 3)
# symlogx
ggplot(data, aes(x, y)) + geom_line() + theme_bw() +
scale_x_continuous(trans = symlog_trans())
# symlogy
ggplot(data, aes(y, x)) + geom_line() + theme_bw()
scale_y_continuous(trans="symlog")
# symlog both, threshold = 0.015 for y
# not too pretty because of too many breaks in short interval
ggplot(data, aes(x, y2)) + geom_line() + theme_bw()
scale_y_continuous(trans=symlog_trans(thr = 0.015)) +
scale_x_continuous(trans = "symlog")
# Again symlog both, threshold = 0.15 for y
ggplot(data, aes(x, y2)) + geom_line() + theme_bw()
scale_y_continuous(trans=symlog_trans(thr = 0.15)) +
scale_x_continuous(trans = "symlog")

Resources