Increasing vertical space between segments in base R - r

I'm trying to increase the space between each segment for each successive interval to avoid overplotting. Not sure how to approach this so that it will increment vertically in the loop.
Here is my code and some reproducible data:
set.seed(200)
x <- rnorm(100, 10)
truemean <- mean(x)
mat <- replicate(100, t.test(sample(x, rep = T))$conf.int)
mat <- t(mat)
myfunc <- function(mat, truemean) {
plot(x = c(min(mat[ , 1]), max(mat[ , 2])),
y = c(1, 100),
type = "n",
xlab = "0:100",
ylab = "0:100")
abline(v = truemean)
for (i in 1:nrow(mat)) {
if (mat[i, 1] <= truemean & mat[i, 2] >= truemean) {
segments(x0 = mat[i, 1], y0 = i,
x1 = mat[i, 2], y1 = i,
col = "blue",
lwd = 2)
} else {
segments(x0 = mat[i, 1], y0 = i,
x1 = mat[i, 2], y1 = i,
col = "red",
lwd = 2)
}
}
}
myfunc(mat, truemean)

You can certainly add anything you want in the segment call, but I'm not sure what you are asking. First we can greatly simplify your code:
set.seed(200)
x <- rnorm(100, 10)
truemean <- mean(x)
mat <- replicate(100, t.test(sample(x, rep = T))$conf.int)
mat <- t(mat)
yval <- seq(dim(mat)[1])
clr <- ifelse(mat[, 1] <= truemean & mat[, 2] >= truemean, "blue", "red")
plot(NA, xlim=c(min(mat[ , 1]), max(mat[ , 2])), ylim=c(0, length(yval)), type="n",
xlab="Conf Int", ylab="Trials")
abline(v=truemean)
segments(mat[, 1], yval, mat[, 2], yval, col=clr)
This produces the following plot:
You could replace yval with yval+.1 in the segments function to shift everything up. If there are so many lines that they overlap you can increase the height of the plot to make more room.

Related

Changing branch length in dendrogram (pheatmap)

I am trying to plot a heatmap with the library pheatmap in R.
I think that by default the branch length is proportional to the "dissimilarity" of the clusters that got merged at this step. I would like to chance that, so it is a fixed value because for my purpose it looks very weird!
If anyone has an idea how I can fix this, I would be very happy.
Here is a sample code
library(pheatmap)
test = matrix(rnorm(6000), 100, 60)
pheatmap(test)
Cheers!
Here is an example of two column groups with high dissimilarity:
library(pheatmap)
test = cbind(matrix(rnorm(3000), 100, 30),
matrix(rnorm(3000)+10, 100, 30))
pheatmap(test)
TIn pheatmapthe dendrogram is plotted by the pheatmap:::draw_dendrogram function
and branch lengths are stored in the h object.
Below I define equal-length branches adding the command
hc$height <- cumsum(rep(1/length(hc$height), length(hc$height)))
as follows:
draw_dendrogram <- function(hc, gaps, horizontal = T) {
# Define equal-length branches
hc$height <- cumsum(rep(1/length(hc$height), length(hc$height)))
h = hc$height/max(hc$height)/1.05
m = hc$merge
o = hc$order
n = length(o)
m[m > 0] = n + m[m > 0]
m[m < 0] = abs(m[m < 0])
dist = matrix(0, nrow = 2 * n - 1, ncol = 2, dimnames = list(NULL,
c("x", "y")))
dist[1:n, 1] = 1/n/2 + (1/n) * (match(1:n, o) - 1)
for (i in 1:nrow(m)) {
dist[n + i, 1] = (dist[m[i, 1], 1] + dist[m[i, 2], 1])/2
dist[n + i, 2] = h[i]
}
draw_connection = function(x1, x2, y1, y2, y) {
res = list(x = c(x1, x1, x2, x2), y = c(y1, y, y, y2))
return(res)
}
x = rep(NA, nrow(m) * 4)
y = rep(NA, nrow(m) * 4)
id = rep(1:nrow(m), rep(4, nrow(m)))
for (i in 1:nrow(m)) {
c = draw_connection(dist[m[i, 1], 1], dist[m[i, 2], 1],
dist[m[i, 1], 2], dist[m[i, 2], 2], h[i])
k = (i - 1) * 4 + 1
x[k:(k + 3)] = c$x
y[k:(k + 3)] = c$y
}
x = pheatmap:::find_coordinates(n, gaps, x * n)$coord
y = unit(y, "npc")
if (!horizontal) {
a = x
x = unit(1, "npc") - y
y = unit(1, "npc") - a
}
res = polylineGrob(x = x, y = y, id = id)
return(res)
}
# Replace the non-exported function `draw_dendrogram` in `pheatmap`:
assignInNamespace(x="draw_dendrogram", value=draw_dendrogram, ns="pheatmap")
pheatmap(test)
The result is:

Adding annotation to R plots with layout

I am plotting multiple panels of variable size in R, and using layout to accomplish this. I, however, would like to add arrows and text between the plots but seem to, in my best attempts using arrow and text functions, have the annotations being cut-off at the end of the axis I'm plotting on.
MWE:
circle <- function(){
theta <- seq(from = 0, to = 2*pi - pi/1000, by = 2*pi/1000)
x <- cos(theta)
y <- sin(theta)
circ <- list(x, y)
}
shape_plot <- function(xs, ys, typ){
plot(xs, ys, type=typ, axes=FALSE, xlab='', ylab='', asp=1, lwd=4)
}
sample1 <- function(data, n){
pts <- sort(floor(runif(n, min=1, max=1000)))
xs <- data[[1]]
x <- xs[pts] + .1*runif(n, min=-1, max=1)
x <- c(x, x[1])
ys <- data[[2]]
y <- ys[pts] + .1*runif(n, min=-1, max=1)
y <- c(y, y[1])
samp <- list(x, y)
}
layout(matrix(c(1,2, 1,3, 1,4), 3, 2, byrow = TRUE))
circ <- circle()
shape_plot(circ[[1]], circ[[2]], 'l')
label <- rep('circle', 3)
for (i in 1:3){
samp <- sample1(circ, 50)
shape_plot(samp[[1]], samp[[2]], 'p')
}
This returns the following figure:
What I would like is something like this (but less ugly, and not created in GIMP):
Thanks!
In response to a comment by #rawr, I came up with the following modifications which enabled me to accomplish this goal:
...
layout(matrix(c(1,2, 1,3, 1,4), 3, 2, byrow = TRUE))
par(xpd=NA)
...
...
arrows(x0=1.5, x1=2.5, y0=1, y1=2, length=0.1, lwd = lweight)
arrows(x0=1.5, x1=2.5, y0=0, y1=0, length=0.1, lwd = lweight)
arrows(x0=1.5, x1=2.5, y0=-1, y1=-2, length=0.1, lwd = lweight)
text(x=1.8, y=1.8, expression('s'[1]^'a'), cex=2)
text(x=1.8, y=0.3, expression('s'[2]^'a'), cex=2)
text(x=1.8, y=-1.8, expression('s'[3]^'a'), cex=2)
...
Result:
Using layout seemed to cause problems.
Below is a modified version of your MWE that I think does what you are looking for.
circle <- function(){
theta <- seq(from = 0, to = 2*pi - pi/1000, by = 2*pi/1000)
x <- cos(theta)
y <- sin(theta)
circ <- data.frame(x, y)
}
shape_plot <- function(xs, ys, typ){
plot(xs, ys, type=typ, axes=FALSE, xlab='', ylab='', asp=1, lwd=4)
}
sample1 <- function(data, n){
pts <- sort(floor(runif(n, min=1, max=1000)))
xs <- data[[1]]
x <- xs[pts] + .1*runif(n, min=-1, max=1)
x <- c(x, x[1])
ys <- data[[2]]
y <- ys[pts] + .1*runif(n, min=-1, max=1)
y <- c(y, y[1])
samp <- data.frame(x, y)
}
circ <- circle()
shape_plot(circ[[1]], circ[[2]], 'l')
label <- rep('circle', 3)
samp <- list()
nudge <- cbind(5,c(3,0,-3))
for (i in 1:3){
s <- sample1(circ, 50)
samp[[i]] <- t(apply(s, 1, function(x) x + nudge[i,]))
}
samp <- Reduce(rbind, samp)
tot <- rbind(circ, samp)
shape_plot(tot[[1]], tot[[2]], typ = 'n')
points(circ, type = 'l')
points(samp, type = 'p')
arrows(1.2,1,3.7,2.5)
arrows(1.2,0,3.7,0)
arrows(1.2,-1,3.7,-2.5)
text(2.5,2, label = "arrow 1", srt = atan(1/1.5)*180/pi)
text(2.5,0.25, label = "arrow 2")
text(2.5,-2, label = "arrow 3", srt = atan(-1/1.5)*180/pi)

How to plot a CDF functon from PDF in R

I have the following function:
fx <- function(x) {
if(x >= 0 && x < 3) {
res <- 0.2;
} else if(x >=3 && x < 5) {
res <- 0.05;
} else if(x >= 5 && x < 6) {
res <- 0.15;
} else if(x >= 7 && x < 10) {
res <- 0.05;
} else {
res <- 0;
}
return(res);
}
How can I plot it's CDF function on the interval [0,10]?
Try
fx <- Vectorize(fx)
grid <- 0:10
p <- fx(grid)
cdf <- cumsum(p)
plot(grid, cdf, type = 'p', ylim = c(0, 1), col = 'steelblue',
xlab = 'x', ylab = expression(F(x)), pch = 19, las = 1)
segments(x0 = grid, x1 = grid + 1, y0 = cdf)
segments(x0 = grid + 1, y0 = c(cdf[-1], 1), y1 = cdf, lty = 2)
To add a bit accuracy to #Martin Schmelzer's answer. A cummulative distribution function(CDF)
evaluated at x, is the probability that X will take a value less than
or equal to x
So to get CDF from Probability Density Function(PDF), you need to integrate on PDF:
fx <- Vectorize(fx)
dx <- 0.01
x <- seq(0, 10, by = dx)
plot(x, cumsum(fx(x) * dx), type = "l", ylab = "cummulative probability", main = "My CDF")
Just adding up on the previous answers and using ggplot
# cdf
Fx <- function(x, dx) {
cumsum(fx(x)*dx)
}
fx <- Vectorize(fx)
dx <- 0.01
x <- seq(0, 10, dx)
df <- rbind(data.frame(x, value=fx(x), func='pdf'),
data.frame(x, value=Fx(x, dx), func='cdf'))
library(ggplot2)
ggplot(df, aes(x, value, col=func)) +
geom_point() + geom_line() + ylim(0, 1)

R mark quantiles in a plot

I would like to point out multiple quantiles in this plot as in the this link.
x <- rnorm(1000,0,1)
me <- mean(x)
s <- sd(x)
y <- 1/sqrt(2*pi)/s*exp(-(x-me)^2/2/s/s)
sx <- sort(x)
n <- seq(1, length(sx),1)
Px <- (3*n-1)/(3*length(sx)+1)
plot(sx,Px)
lines(sx, pnorm(sx, mean=me, sd = s), lwd=3, col="red")
Which function would you recommend? I calculated so quantiles so far.
qq<-quantile(x,c(0.5,0.95))
You can try:
# data
set.seed(1221)
x <- rnorm(1000,0,1)
# [your code to plot the graph]
# Quantiles
P <- c(0.05, .25, 0.5, 0.75, 0.95) # The quantiles you want to calculate
qq <- quantile(x, P)
df <- cbind(P, qq)
# the segments
apply(df, 1, function(x) segments(x0 = x[2], x1 = x[2], y0 = -10, y1 = x[1], lty = 2, col = 2))
apply(df, 1, function(x, y) segments(x0 = y-10, x1 = x[2], y0 = x[1], y1 = x[1], lty = 2, col = 2), min(x))
Edit
# add the text, not that elegant, but it works:
sapply(1:length(names(qq)), function(x) text(df[x,2], -0.08, bquote(Q[ ~ .(names(qq)[x])]), xpd = TRUE))
At its crudest, you can:
lines(x=c(qq[1], qq[1]), y=c(-2, 0.5), col="darkgreen", lwd=2, lty="dotted")
lines(x=c(-5, 0), c(0.5, 0.5), col="darkgreen", lwd=2, lty="dotted")
And have a play with axis to add custom labels.

How to plot bivariate normal distribution with expanding ellipses

How to plot bivariate normal distribution with expanding ellipses and add 5%, 25%, 50%, 75% and
95% label in the plot? Thank you!
You can create a contour plot using an R package called mvtnorm.
Let's say you're trying to plot a bivariate normal distribution where mu_x = 1 and mu_y = 1 and variance matrix is c(2,1,1,1). Generate 100 observations for x,y,z. You can create a contour plot for this scenario as such:
library(mvtnorm)
x.points <- seq(-3,3,length.out=100)
y.points <- x.points
z <- matrix(0,nrow=100,ncol=100)
mu <- c(1,1)
sigma <- matrix(c(2,1,1,1),nrow=2)
for (i in 1:100) {
for (j in 1:100) {
z[i,j] <- dmvnorm(c(x.points[i],y.points[j]),
mean=mu,sigma=sigma)
}
}
contour(x.points,y.points,z)
Here is a solution that computes the contours at the levels that you want
#####
# Compute points and rotation matrix
# input
theta <- c(1, 2)
sigma <- diag(c(3^2, 2^2))
sigma[2, 1] <- sigma[1, 2] <- sqrt(sigma[1, 1] * sigma[2, 2]) * .5
# we start from points on the unit circle
n_points <- 100
xy <- cbind(sin(seq(0, 2 * pi, length.out = n_points)),
cos(seq(0, 2 * pi, length.out = n_points)))
# then we scale the dimensions
ev <- eigen(sigma)
xy[, 1] <- xy[, 1] * 1
xy[, 2] <- xy[, 2] * sqrt(min(ev$values) / max(ev$values))
# then rotate
phi <- atan(ev$vectors[2, 1] / ev$vectors[1, 1])
R <- matrix(c(cos(phi), sin(phi), -sin(phi), cos(phi)), 2)
xy <- tcrossprod(R, xy)
# the quantiles you ask for
chi_vals <- qchisq(c(.05, .25, .50, .75, .95), df = 2) * max(ev$values)
#####
# Plot contours
par(mar = c(4.5, 4, .5, .5))
plot(c(-8, 10), c(-4, 8), type = "n", xlab = "x", ylab = "y")
for(r in sqrt(chi_vals))
lines(r * xy[1, ] + theta[1], r * xy[2, ] + theta[2], lty = 1)
Detailed Explanation
theta <- c(1, 2)
sigma <- diag(c(3^2, 2^2))
sigma[2, 1] <- sigma[1, 2] <- sqrt(sigma[1, 1] * sigma[2, 2]) * .5
# we start from points on the unit circle
n_points <- 100
xy <- cbind(sin(seq(0, 2 * pi, length.out = n_points)),
cos(seq(0, 2 * pi, length.out = n_points)))
par(mar = c(5, 5, 3, 1), mfcol = c(2, 2))
plot(xy[, 1], xy[, 2], xlab = "x", ylab = "y", xlim = c(-8, 10), bty = "l",
ylim = c(-4, 8), main = "Unit circle", type = "l")
arrows(c(0, 0), c(0, 0), c(0, 1), c(1, 0), length = .05)
# this is very much like PCA. We scale the dimensions such that the first
# dimension has length one and the others are scaled proportional to the square
# root of their eigenvalue relative to the largest eigenvalue
ev <- eigen(sigma)
scal <- sqrt(min(ev$values) / max(ev$values))
xy[, 2] <- xy[, 2] * scal
plot(xy[, 1], xy[, 2], xlab = "x", ylab = "y", xlim = c(-8, 10), bty = "l",
ylim = c(-4, 8), main = "Scaled", type = "l")
arrows(c(0, 0), c(0, 0), c(0, 1), c(scal, 0),
length = .05)
# then we rotate phi degrees to account for the correlation coefficient. See
# https://en.wikipedia.org/wiki/Rotation_matrix
# and notice that we compute the angle of the first eigenvector
phi <- atan(ev$vectors[2, 1] / ev$vectors[1, 1])
R <- matrix(c(cos(phi), sin(phi), -sin(phi), cos(phi)), 2)
xy <- tcrossprod(R, xy) # R %*% t(xy)
plot(xy[1, ], xy[2, ], xlab = "x", ylab = "y", xlim = c(-8, 10), bty = "l",
ylim = c(-4, 8), main = "Rotated", type = "l")
arrs <- tcrossprod(R, matrix(c(0, 1, scal, 0), 2L))
arrows(c(0, 0), c(0, 0), arrs[1, ], arrs[2, ], length = .05)
# the right size of each circle can now be found by taking the wanted
# quantile from the chi-square distribution with two degrees of freedom
# multiplied by the largest eigenvalue
plot(c(-8, 10), c(-4, 8), type = "n", xlab = "x", ylab = "y", main = "Final",
bty = "l")
chi_vals <- qchisq(c(.05, .25, .50, .75, .95), df = 2) * max(ev$values)
for(r in sqrt(chi_vals))
lines(r * xy[1, ] + theta[1], r * xy[2, ] + theta[2], lty = 1)

Resources