Related
I'd like to plot a discontinuous function without connecting a jump. For example, in the following plot, I'd like to delete the line connecting (0.5, 0.5) and (0.5, 1.5).
f <- function(x){
(x < .5) * (x) + (x >= .5) * (x + 1)
}
ggplot()+
geom_function(fun = f)
Edit: I'm looking for a solution that works even if the discountinuous point is not a round number, say pi/10.
You could write a little wrapper function which finds discontinuities in the given function and plots them as separate groups:
plot_fun <- function(fun, from = 0, to = 1, by = 0.001) {
x <- seq(from, to, by)
groups <- cut(x, c(-Inf, x[which(abs(diff(fun(x))) > 0.1)], Inf))
df <- data.frame(x, groups, y = fun(x))
ggplot(df, aes(x, y, group = groups)) +
geom_line()
}
This allows
plot_fun(f)
plot_fun(floor, 0, 10)
This answer is based on Allan Cameron's answer, but depicts the jump using open and closed circles. Whether the function is right or left continuous is controlled by an argument.
library("ggplot2")
plot_fun <- function(fun, from = 0, to = 1, by = 0.001, right_continuous = TRUE) {
x <- seq(from, to, by)
tol_vertical <- 0.1
y <- fun(x)
idx_break <- which(abs(diff(y)) > tol_vertical)
x_break <- x[idx_break]
y_break_l <- y[idx_break]
y_break_r <- y[idx_break + 1]
groups <- cut(x, c(-Inf, x_break, Inf))
df <- data.frame(x, groups, y = fun(x))
plot_ <- ggplot(df, aes(x, y, group = groups)) +
geom_line()
# add open and closed points showing jump
dataf_l <- data.frame(x = x_break, y = y_break_l)
dataf_r <- data.frame(x = x_break, y = y_break_r)
shape_open_circle <- 1
# this is the default of shape, but might as well specify.
shape_closed_circle <- 19
shape_size <- 4
if (right_continuous) {
shape_l <- shape_open_circle
shape_r <- shape_closed_circle
} else {
shape_l <- shape_closed_circle
shape_r <- shape_open_circle
}
plot_ <- plot_ +
geom_point(data = dataf_l, aes(x = x, y = y), group = NA, shape = shape_l, size = shape_size) +
geom_point(data = dataf_r, aes(x = x, y = y), group = NA, shape = shape_r, size = shape_size)
return(plot_)
}
Here's the OP's original example:
f <- function(x){
(x < .5) * (x) + (x >= .5) * (x + 1)
}
plot_fun(f)
Here's Allan's additional example using floor, which shows multiple discontinuities:
plot_fun(floor, from = 0, to = 10)
And here's an example showing that the function does not need to be piecewise linear:
f_curved <- function(x) ifelse(x > 0, yes = 0.5*(2-exp(-x)), no = 0)
plot_fun(f_curved, from = -1, to = 5)
You can insert everything inside an ifelse:
f <- function(x){
ifelse(x==0.5,
NA,
(x < .5) * (x) + (x >= .5) * (x + 1))
}
ggplot()+
geom_function(fun = f)
I am attempting to create three contour plots, each illustrating the following function applied to two input vectors and a fixed alpha:
alphas <- c(1, 5, 25)
x_vals <- seq(0, 25, length.out = 100)
y_vals <- seq(0, 50, length.out = 100)
my_function <- function(x, y, alpha) {
z <- (1 / (x + alpha)) * (1 / (y + alpha))
}
for each alpha in the vector alphas, I am creating a contour plot of z values—relative to the minimal z value—over x and y axes.
I do so with the following code (probably not best practices; I'm still learning the basics with R):
plots <- list()
for(i in seq_along(alphas)) {
z_table <- sapply(x_vals, my_function, y = y_vals, alpha = alphas[i])
x <- rep(x_vals, each = 100)
y <- rep(y_vals, 100)
z <- unlist(flatten(list(z_table)))
z_rel <- z / min(z)
d <- data.frame(cbind(x, y, z_rel))
plots[[i]] <- ggplot(data = d, aes(x = x, y = y, z = z_rel)) +
geom_contour_filled()
}
When alpha = 1:
When alpha = 25:
I want to display these plots in one grouping using ggarrange(), with one logarithmic color scale (as relative z varies so much from plot to plot). Is there a way to do this?
You can build a data frame with all the data for all alphas combined, with a column indicating the alpha, so you can facet your graph:
I basically removed the plot[[i]] part, and stacked up the d's created in the former loop:
d = numeric()
for(i in seq_along(alphas)) {
z_table <- sapply(x_vals, my_function, y = y_vals, alpha = alphas[i])
x <- rep(x_vals, each = 100)
y <- rep(y_vals, 100)
z <- unlist(flatten(list(z_table)))
z_rel <- z / min(z)
d <- rbind(d, cbind(x, y, z_rel))}
d = as.data.frame(d)
Then we create the alphas column:
d$alpha = factor(paste("alpha =", alphas[rep(1:3, each=nrow(d)/length(alphas))]),
levels = paste("alpha =", alphas[1:3]))
Then build the log scale inside the contour:
ggplot(data = d, aes(x = x, y = y, z = z_rel)) +
geom_contour_filled(breaks=round(exp(seq(log(1), log(1400), length = 14)),1)) +
facet_wrap(~alpha)
Output:
I am trying to plot a heatmap with the library pheatmap in R.
I think that by default the branch length is proportional to the "dissimilarity" of the clusters that got merged at this step. I would like to chance that, so it is a fixed value because for my purpose it looks very weird!
If anyone has an idea how I can fix this, I would be very happy.
Here is a sample code
library(pheatmap)
test = matrix(rnorm(6000), 100, 60)
pheatmap(test)
Cheers!
Here is an example of two column groups with high dissimilarity:
library(pheatmap)
test = cbind(matrix(rnorm(3000), 100, 30),
matrix(rnorm(3000)+10, 100, 30))
pheatmap(test)
TIn pheatmapthe dendrogram is plotted by the pheatmap:::draw_dendrogram function
and branch lengths are stored in the h object.
Below I define equal-length branches adding the command
hc$height <- cumsum(rep(1/length(hc$height), length(hc$height)))
as follows:
draw_dendrogram <- function(hc, gaps, horizontal = T) {
# Define equal-length branches
hc$height <- cumsum(rep(1/length(hc$height), length(hc$height)))
h = hc$height/max(hc$height)/1.05
m = hc$merge
o = hc$order
n = length(o)
m[m > 0] = n + m[m > 0]
m[m < 0] = abs(m[m < 0])
dist = matrix(0, nrow = 2 * n - 1, ncol = 2, dimnames = list(NULL,
c("x", "y")))
dist[1:n, 1] = 1/n/2 + (1/n) * (match(1:n, o) - 1)
for (i in 1:nrow(m)) {
dist[n + i, 1] = (dist[m[i, 1], 1] + dist[m[i, 2], 1])/2
dist[n + i, 2] = h[i]
}
draw_connection = function(x1, x2, y1, y2, y) {
res = list(x = c(x1, x1, x2, x2), y = c(y1, y, y, y2))
return(res)
}
x = rep(NA, nrow(m) * 4)
y = rep(NA, nrow(m) * 4)
id = rep(1:nrow(m), rep(4, nrow(m)))
for (i in 1:nrow(m)) {
c = draw_connection(dist[m[i, 1], 1], dist[m[i, 2], 1],
dist[m[i, 1], 2], dist[m[i, 2], 2], h[i])
k = (i - 1) * 4 + 1
x[k:(k + 3)] = c$x
y[k:(k + 3)] = c$y
}
x = pheatmap:::find_coordinates(n, gaps, x * n)$coord
y = unit(y, "npc")
if (!horizontal) {
a = x
x = unit(1, "npc") - y
y = unit(1, "npc") - a
}
res = polylineGrob(x = x, y = y, id = id)
return(res)
}
# Replace the non-exported function `draw_dendrogram` in `pheatmap`:
assignInNamespace(x="draw_dendrogram", value=draw_dendrogram, ns="pheatmap")
pheatmap(test)
The result is:
I am currently working with B-splines using R's function bs from the package splines and as a graphic example I would like to provide a figure showing the differences between set of splines with different degrees.
The problem is that bs only supports degrees bigger than 0.
A spline of degree zero, is nothing more than an indicator function for the given region defined by the knots, but I don't really know how to generate it.
This is what I've done so far
x<-seq(0,1,length.out =1000)
par(mfrow=c(3,1))
B1<-bs(x,knots = seq(0,1,length.out = 11)[-c(1,11)],Boundary.knots = c(0,1),intercept = T,degree = 1)
matplot(x,B1,type="l",lty=1,ylim = c(-0.1,1.2),xlab = "",ylab = "")
abline(v=seq(0,1,length.out = 11),lty=2)
legend("top", legend ="B-splines of order 2")
B2<-bs(x,knots = seq(0,1,length.out = 11)[-c(1,11)],Boundary.knots = c(0,1),intercept = T,degree = 2)
matplot(x,B2,type="l",lty=1,ylim = c(-0.1,1.2),xlab = "",ylab = "")
abline(v=seq(0,1,length.out = 11),lty=2)
legend("top", legend ="B-splines of order 3")
B3<-bs(x,knots = seq(0,1,length.out = 11)[-c(1,11)],Boundary.knots = c(0,1),intercept = T,degree = 3)
matplot(x,B3,type="l",lty=1,ylim = c(-0.1,1.2),xlab = "",ylab = "")
abline(v=seq(0,1,length.out = 11),lty=2)
legend("top", legend ="B-splines of order 4")
This image taken from Hastie et.al (2017) is basically what I am missing.
Thanks in advance
As I understand from the comments, you want a function that given an input vector x of n points returns a series of n-1 "splines"; where the ith spline is defined as having the value 1 in the range x[i] < x < x[i+1] or 0 elsewhere.
We can do this so:
x <- seq(0,1,length.out =10)
zero_spline = function(x, xout, n=1000) {
if (missing(xout)) xout = seq(min(x), max(x), length.out = n)
zs = data.frame()
y = numeric(length(xout))
for (i in 1:(length(x)-1L)) {
yi = y
yi[(xout > x[i]) & (xout < x[i+1])] = 1
zs = rbind(zs, data.frame(xout, yi, interval=i))
}
zs
}
zs = zero_spline(x, n=100)
library(ggplot2)
ggplot(zs, aes(xout, yi, color=factor(interval))) +
geom_line()
I'm trying to plot a principal component analysis using prcomp and ggbiplot. I'm getting data values outside of the unit circle, and haven't been able to rescale the data prior to calling prcomp in such a way that I can constrain the data to the unit circle.
data(wine)
require(ggbiplot)
wine.pca=prcomp(wine[,1:3],scale.=TRUE)
ggbiplot(wine.pca,obs.scale = 1,
var.scale=1,groups=wine.class,ellipse=TRUE,circle=TRUE)
I tried scaling by subtracting mean and dividing by standard deviation before calling prcomp:
wine2=wine[,1:3]
mean=apply(wine2,2,mean)
sd=apply(wine2,2,mean)
for(i in 1:ncol(wine2)){
wine2[,i]=(wine2[,i]-mean[i])/sd[i]
}
wine2.pca=prcomp(wine2,scale.=TRUE)
ggbiplot(wine2.pca,obs.scale=1,
var.scale=1,groups=wine.class,ellipse=TRUE,circle=TRUE)
ggbiplot package installed as follows:
require(devtools)
install_github('ggbiplot','vqv')
Output of either code chunk:
Per #Brian Hanson's comment below, I'm adding an additional image reflecting the output I'm trying to get.
I edited the code for the plot function and was able to get the functionality I wanted.
ggbiplot2=function(pcobj, choices = 1:2, scale = 1, pc.biplot = TRUE,
obs.scale = 1 - scale, var.scale = scale,
groups = NULL, ellipse = FALSE, ellipse.prob = 0.68,
labels = NULL, labels.size = 3, alpha = 1,
var.axes = TRUE,
circle = FALSE, circle.prob = 0.69,
varname.size = 3, varname.adjust = 1.5,
varname.abbrev = FALSE, ...)
{
library(ggplot2)
library(plyr)
library(scales)
library(grid)
stopifnot(length(choices) == 2)
# Recover the SVD
if(inherits(pcobj, 'prcomp')){
nobs.factor <- sqrt(nrow(pcobj$x) - 1)
d <- pcobj$sdev
u <- sweep(pcobj$x, 2, 1 / (d * nobs.factor), FUN = '*')
v <- pcobj$rotation
} else if(inherits(pcobj, 'princomp')) {
nobs.factor <- sqrt(pcobj$n.obs)
d <- pcobj$sdev
u <- sweep(pcobj$scores, 2, 1 / (d * nobs.factor), FUN = '*')
v <- pcobj$loadings
} else if(inherits(pcobj, 'PCA')) {
nobs.factor <- sqrt(nrow(pcobj$call$X))
d <- unlist(sqrt(pcobj$eig)[1])
u <- sweep(pcobj$ind$coord, 2, 1 / (d * nobs.factor), FUN = '*')
v <- sweep(pcobj$var$coord,2,sqrt(pcobj$eig[1:ncol(pcobj$var$coord),1]),FUN="/")
} else {
stop('Expected a object of class prcomp, princomp or PCA')
}
# Scores
df.u <- as.data.frame(sweep(u[,choices], 2, d[choices]^obs.scale, FUN='*'))
# Directions
v <- sweep(v, 2, d^var.scale, FUN='*')
df.v <- as.data.frame(v[, choices])
names(df.u) <- c('xvar', 'yvar')
names(df.v) <- names(df.u)
if(pc.biplot) {
df.u <- df.u * nobs.factor
}
# Scale the radius of the correlation circle so that it corresponds to
# a data ellipse for the standardized PC scores
r <- 1
# Scale directions
v.scale <- rowSums(v^2)
df.v <- df.v / sqrt(max(v.scale))
## Scale Scores
r.scale=sqrt(max(df.u[,1]^2+df.u[,2]^2))
df.u=.99*df.u/r.scale
# Change the labels for the axes
if(obs.scale == 0) {
u.axis.labs <- paste('standardized PC', choices, sep='')
} else {
u.axis.labs <- paste('PC', choices, sep='')
}
# Append the proportion of explained variance to the axis labels
u.axis.labs <- paste(u.axis.labs,
sprintf('(%0.1f%% explained var.)',
100 * pcobj$sdev[choices]^2/sum(pcobj$sdev^2)))
# Score Labels
if(!is.null(labels)) {
df.u$labels <- labels
}
# Grouping variable
if(!is.null(groups)) {
df.u$groups <- groups
}
# Variable Names
if(varname.abbrev) {
df.v$varname <- abbreviate(rownames(v))
} else {
df.v$varname <- rownames(v)
}
# Variables for text label placement
df.v$angle <- with(df.v, (180/pi) * atan(yvar / xvar))
df.v$hjust = with(df.v, (1 - varname.adjust * sign(xvar)) / 2)
# Base plot
g <- ggplot(data = df.u, aes(x = xvar, y = yvar)) +
xlab(u.axis.labs[1]) + ylab(u.axis.labs[2]) + coord_equal()
if(var.axes) {
# Draw circle
if(circle)
{
theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50))
circle <- data.frame(xvar = r * cos(theta), yvar = r * sin(theta))
g <- g + geom_path(data = circle, color = muted('white'),
size = 1/2, alpha = 1/3)
}
# Draw directions
g <- g +
geom_segment(data = df.v,
aes(x = 0, y = 0, xend = xvar, yend = yvar),
arrow = arrow(length = unit(1/2, 'picas')),
color = muted('red'))
}
# Draw either labels or points
if(!is.null(df.u$labels)) {
if(!is.null(df.u$groups)) {
g <- g + geom_text(aes(label = labels, color = groups),
size = labels.size)
} else {
g <- g + geom_text(aes(label = labels), size = labels.size)
}
} else {
if(!is.null(df.u$groups)) {
g <- g + geom_point(aes(color = groups), alpha = alpha)
} else {
g <- g + geom_point(alpha = alpha)
}
}
# Overlay a concentration ellipse if there are groups
if(!is.null(df.u$groups) && ellipse) {
theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50))
circle <- cbind(cos(theta), sin(theta))
ell <- ddply(df.u, 'groups', function(x) {
if(nrow(x) < 2) {
return(NULL)
} else if(nrow(x) == 2) {
sigma <- var(cbind(x$xvar, x$yvar))
} else {
sigma <- diag(c(var(x$xvar), var(x$yvar)))
}
mu <- c(mean(x$xvar), mean(x$yvar))
ed <- sqrt(qchisq(ellipse.prob, df = 2))
data.frame(sweep(circle %*% chol(sigma) * ed, 2, mu, FUN = '+'),
groups = x$groups[1])
})
names(ell)[1:2] <- c('xvar', 'yvar')
g <- g + geom_path(data = ell, aes(color = groups, group = groups))
}
# Label the variable axes
if(var.axes) {
g <- g +
geom_text(data = df.v,
aes(label = varname, x = xvar, y = yvar,
angle = angle, hjust = hjust),
color = 'darkred', size = varname.size)
}
# Change the name of the legend for groups
# if(!is.null(groups)) {
# g <- g + scale_color_brewer(name = deparse(substitute(groups)),
# palette = 'Dark2')
# }
# TODO: Add a second set of axes
return(g)
}