R mark quantiles in a plot - r

I would like to point out multiple quantiles in this plot as in the this link.
x <- rnorm(1000,0,1)
me <- mean(x)
s <- sd(x)
y <- 1/sqrt(2*pi)/s*exp(-(x-me)^2/2/s/s)
sx <- sort(x)
n <- seq(1, length(sx),1)
Px <- (3*n-1)/(3*length(sx)+1)
plot(sx,Px)
lines(sx, pnorm(sx, mean=me, sd = s), lwd=3, col="red")
Which function would you recommend? I calculated so quantiles so far.
qq<-quantile(x,c(0.5,0.95))

You can try:
# data
set.seed(1221)
x <- rnorm(1000,0,1)
# [your code to plot the graph]
# Quantiles
P <- c(0.05, .25, 0.5, 0.75, 0.95) # The quantiles you want to calculate
qq <- quantile(x, P)
df <- cbind(P, qq)
# the segments
apply(df, 1, function(x) segments(x0 = x[2], x1 = x[2], y0 = -10, y1 = x[1], lty = 2, col = 2))
apply(df, 1, function(x, y) segments(x0 = y-10, x1 = x[2], y0 = x[1], y1 = x[1], lty = 2, col = 2), min(x))
Edit
# add the text, not that elegant, but it works:
sapply(1:length(names(qq)), function(x) text(df[x,2], -0.08, bquote(Q[ ~ .(names(qq)[x])]), xpd = TRUE))

At its crudest, you can:
lines(x=c(qq[1], qq[1]), y=c(-2, 0.5), col="darkgreen", lwd=2, lty="dotted")
lines(x=c(-5, 0), c(0.5, 0.5), col="darkgreen", lwd=2, lty="dotted")
And have a play with axis to add custom labels.

Related

Is there a way to add a line to a specific subplot in R

I'm new to R, trying to understand how plotting works.
I'm trying to plot a graph consisting of three subgraphs. I use the par(mfrow=c(1, 3)) to deal with that. But inside a loop, I want to add various lines to the three graphs. How do I choose which of the three subplots I apply the lines command to?
As an example, see the code below. All the lines commands get applied to the third subgraph, but this is of course not desired. See also the commands # apply lines to first figure! but how? and # apply lines to second figure! but how?
set.seed(1)
n <- 100
x <- seq(0, 4, length.out = n)
no_datasets <- 50
par(mfrow=c(1, 3))
for (i in 1:no_datasets) {
x <- seq(0, 4, length.out = n) # x <- seq(-pi, pi, length.out = n)
y <- sin(x)
errs <- rnorm(n, mean = 0, sd = 0.25) # rnorm(n) generates random numbers whose distribution is normal
t <- y + errs
df <- data.frame(x, y, t, errs)
model1 <- lm(t ~ poly(x, 1), data = df)
model5 <- lm(t ~ poly(x, 5), data = df)
model25 <- lm(t ~ poly(x, 25), data = df)
if (i == 1) {
plot(df$x, df$y, xlab = "x", ylab="", col="black", type="l")
plot(df$x, df$y, xlab = "x", ylab="", col="black", type="l")
plot(df$x, df$y, xlab = "x", ylab="", col="black", type="l")
}
t_hat <- predict(model1, df)
# apply lines to first figure! but how?
lines(df$x, t_hat, col="blue")
t_hat <- predict(model5, df)
# apply lines to second figure! but how?
lines(df$x, t_hat, col="blue")
t_hat <- predict(model25, df)
# apply lines to third figure!
lines(df$x, t_hat, col="blue")
}
Since the third plot is last, then it is just adding all the lines to the last plot. But if you nest everything in an if statement for each plot, then you will get the lines on each respective plot.
set.seed(1)
n <- 100
x <- seq(0, 4, length.out = n)
no_datasets <- 50
par(mfrow=c(1, 3))
for (i in 1:no_datasets) {
x <- seq(0, 4, length.out = n) # x <- seq(-pi, pi, length.out = n)
y <- sin(x)
errs <- rnorm(n, mean = 0, sd = 0.25) # rnorm(n) generates random numbers whose distribution is normal
t <- y + errs
df <- data.frame(x, y, t, errs)
model1 <- lm(t ~ poly(x, 1), data = df)
model5 <- lm(t ~ poly(x, 5), data = df)
model25 <- lm(t ~ poly(x, 25), data = df)
if (i == 1) {
plot(df$x, df$y, xlab = "x", ylab="", col="black", type="l")
t_hat <- predict(model1, df)
lines(df$x, t_hat, col="blue")
}
if (i == 2) {
plot(df$x, df$y, xlab = "x", ylab="", col="black", type="l")
t_hat <- predict(model5, df)
lines(df$x, t_hat, col="blue")
}
if (i == 3) {
plot(df$x, df$y, xlab = "x", ylab="", col="black", type="l")
t_hat <- predict(model25, df)
lines(df$x, t_hat, col="blue")
}
}
Ok, if I understand AndrewGB, it is not possible. So then the answer should be:
set.seed(1)
n <- 100
no_datasets <- 50
par(mfrow=c(1, 3))
polynomials <- c(1, 5, 25)
x <- seq(0, 4, length.out = n) # x <- seq(-pi, pi, length.out = n)
y <- sin(x)
for (i in 1:length(polynomials)) {
degree <- polynomials[i]
for (j in 1:no_datasets) {
errs <- rnorm(n, mean = 0, sd = 0.25) # rnorm(n) generates random numbers whose distribution is normal
t <- y + errs
df <- data.frame(x, y, t, errs)
model <- lm(t ~ poly(x, degree), data = df)
if (j == 1) {
plot(df$x, df$y, xlab = "x", ylab="", col="black", type="l")
}
t_hat <- predict(model, df)
lines(df$x, t_hat, col="blue")
}
}

Increasing vertical space between segments in base R

I'm trying to increase the space between each segment for each successive interval to avoid overplotting. Not sure how to approach this so that it will increment vertically in the loop.
Here is my code and some reproducible data:
set.seed(200)
x <- rnorm(100, 10)
truemean <- mean(x)
mat <- replicate(100, t.test(sample(x, rep = T))$conf.int)
mat <- t(mat)
myfunc <- function(mat, truemean) {
plot(x = c(min(mat[ , 1]), max(mat[ , 2])),
y = c(1, 100),
type = "n",
xlab = "0:100",
ylab = "0:100")
abline(v = truemean)
for (i in 1:nrow(mat)) {
if (mat[i, 1] <= truemean & mat[i, 2] >= truemean) {
segments(x0 = mat[i, 1], y0 = i,
x1 = mat[i, 2], y1 = i,
col = "blue",
lwd = 2)
} else {
segments(x0 = mat[i, 1], y0 = i,
x1 = mat[i, 2], y1 = i,
col = "red",
lwd = 2)
}
}
}
myfunc(mat, truemean)
You can certainly add anything you want in the segment call, but I'm not sure what you are asking. First we can greatly simplify your code:
set.seed(200)
x <- rnorm(100, 10)
truemean <- mean(x)
mat <- replicate(100, t.test(sample(x, rep = T))$conf.int)
mat <- t(mat)
yval <- seq(dim(mat)[1])
clr <- ifelse(mat[, 1] <= truemean & mat[, 2] >= truemean, "blue", "red")
plot(NA, xlim=c(min(mat[ , 1]), max(mat[ , 2])), ylim=c(0, length(yval)), type="n",
xlab="Conf Int", ylab="Trials")
abline(v=truemean)
segments(mat[, 1], yval, mat[, 2], yval, col=clr)
This produces the following plot:
You could replace yval with yval+.1 in the segments function to shift everything up. If there are so many lines that they overlap you can increase the height of the plot to make more room.

How to plot bivariate normal distribution with expanding ellipses

How to plot bivariate normal distribution with expanding ellipses and add 5%, 25%, 50%, 75% and
95% label in the plot? Thank you!
You can create a contour plot using an R package called mvtnorm.
Let's say you're trying to plot a bivariate normal distribution where mu_x = 1 and mu_y = 1 and variance matrix is c(2,1,1,1). Generate 100 observations for x,y,z. You can create a contour plot for this scenario as such:
library(mvtnorm)
x.points <- seq(-3,3,length.out=100)
y.points <- x.points
z <- matrix(0,nrow=100,ncol=100)
mu <- c(1,1)
sigma <- matrix(c(2,1,1,1),nrow=2)
for (i in 1:100) {
for (j in 1:100) {
z[i,j] <- dmvnorm(c(x.points[i],y.points[j]),
mean=mu,sigma=sigma)
}
}
contour(x.points,y.points,z)
Here is a solution that computes the contours at the levels that you want
#####
# Compute points and rotation matrix
# input
theta <- c(1, 2)
sigma <- diag(c(3^2, 2^2))
sigma[2, 1] <- sigma[1, 2] <- sqrt(sigma[1, 1] * sigma[2, 2]) * .5
# we start from points on the unit circle
n_points <- 100
xy <- cbind(sin(seq(0, 2 * pi, length.out = n_points)),
cos(seq(0, 2 * pi, length.out = n_points)))
# then we scale the dimensions
ev <- eigen(sigma)
xy[, 1] <- xy[, 1] * 1
xy[, 2] <- xy[, 2] * sqrt(min(ev$values) / max(ev$values))
# then rotate
phi <- atan(ev$vectors[2, 1] / ev$vectors[1, 1])
R <- matrix(c(cos(phi), sin(phi), -sin(phi), cos(phi)), 2)
xy <- tcrossprod(R, xy)
# the quantiles you ask for
chi_vals <- qchisq(c(.05, .25, .50, .75, .95), df = 2) * max(ev$values)
#####
# Plot contours
par(mar = c(4.5, 4, .5, .5))
plot(c(-8, 10), c(-4, 8), type = "n", xlab = "x", ylab = "y")
for(r in sqrt(chi_vals))
lines(r * xy[1, ] + theta[1], r * xy[2, ] + theta[2], lty = 1)
Detailed Explanation
theta <- c(1, 2)
sigma <- diag(c(3^2, 2^2))
sigma[2, 1] <- sigma[1, 2] <- sqrt(sigma[1, 1] * sigma[2, 2]) * .5
# we start from points on the unit circle
n_points <- 100
xy <- cbind(sin(seq(0, 2 * pi, length.out = n_points)),
cos(seq(0, 2 * pi, length.out = n_points)))
par(mar = c(5, 5, 3, 1), mfcol = c(2, 2))
plot(xy[, 1], xy[, 2], xlab = "x", ylab = "y", xlim = c(-8, 10), bty = "l",
ylim = c(-4, 8), main = "Unit circle", type = "l")
arrows(c(0, 0), c(0, 0), c(0, 1), c(1, 0), length = .05)
# this is very much like PCA. We scale the dimensions such that the first
# dimension has length one and the others are scaled proportional to the square
# root of their eigenvalue relative to the largest eigenvalue
ev <- eigen(sigma)
scal <- sqrt(min(ev$values) / max(ev$values))
xy[, 2] <- xy[, 2] * scal
plot(xy[, 1], xy[, 2], xlab = "x", ylab = "y", xlim = c(-8, 10), bty = "l",
ylim = c(-4, 8), main = "Scaled", type = "l")
arrows(c(0, 0), c(0, 0), c(0, 1), c(scal, 0),
length = .05)
# then we rotate phi degrees to account for the correlation coefficient. See
# https://en.wikipedia.org/wiki/Rotation_matrix
# and notice that we compute the angle of the first eigenvector
phi <- atan(ev$vectors[2, 1] / ev$vectors[1, 1])
R <- matrix(c(cos(phi), sin(phi), -sin(phi), cos(phi)), 2)
xy <- tcrossprod(R, xy) # R %*% t(xy)
plot(xy[1, ], xy[2, ], xlab = "x", ylab = "y", xlim = c(-8, 10), bty = "l",
ylim = c(-4, 8), main = "Rotated", type = "l")
arrs <- tcrossprod(R, matrix(c(0, 1, scal, 0), 2L))
arrows(c(0, 0), c(0, 0), arrs[1, ], arrs[2, ], length = .05)
# the right size of each circle can now be found by taking the wanted
# quantile from the chi-square distribution with two degrees of freedom
# multiplied by the largest eigenvalue
plot(c(-8, 10), c(-4, 8), type = "n", xlab = "x", ylab = "y", main = "Final",
bty = "l")
chi_vals <- qchisq(c(.05, .25, .50, .75, .95), df = 2) * max(ev$values)
for(r in sqrt(chi_vals))
lines(r * xy[1, ] + theta[1], r * xy[2, ] + theta[2], lty = 1)

Bivariate normal with marginal and conditional densities

I am trying to create a figure in R. It consists of the contour plot of a bivariate normal distribution for the vector variable (x,y) along with the marginals f(x), f(y); the conditional distribution f(y|x) and the line through the conditioning value X=x (it will be a simple abline(v=x)).
I already got the contour and the abline:
but I don't know how to continue.
Here is the code I used so far:
bivariate.normal <- function(x, mu, Sigma) {
exp(-.5 * t(x-mu) %*% solve(Sigma) %*% (x-mu)) / sqrt(2 * pi * det(Sigma))
}
mu <- c(0,0)
Sigma <- matrix(c(1,.8,.8,1), nrow=2)
x1 <- seq(-3, 3, length.out=50)
x2 <- seq(-3, 3, length.out=50)
z <- outer(x1, x2, FUN=function(x1, x2, ...){
apply(cbind(x1,x2), 1, bivariate.normal, ...)
}, mu=mu, Sigma=Sigma)
contour(x1, x2, z, col="blue", drawlabels=FALSE, nlevels=4,
xlab=expression(x[1]), ylab=expression(x[2]), lwd=1)
abline(v=.7, col=1, lwd=2, lty=2)
text(2, -2, labels=expression(x[1]==0.7))
It would have been helpful if you had provided the function to calculate the marginal distribution. I may have got the marginal distribution function wrong, but I think this gets you what you want:
par(lwd=2,mgp=c(1,1,0))
# Modified to extract diagonal.
bivariate.normal <- function(x, mu, Sigma)
exp(-.5 * diag(t(x-mu) %*% solve(Sigma) %*% (x-mu))) / sqrt(2 * pi * det(Sigma))
mu <- c(0,0)
Sigma <- matrix(c(1,.8,.8,1), nrow=2)
x1 <- seq(-3, 3, length.out=50)
x2 <- seq(-3, 3, length.out=50)
plot(1:10,axes=FALSE,frame.plot=TRUE,lwd=1)
# z can now be calculated much easier.
z<-bivariate.normal(t(expand.grid(x1,x2)),mu,Sigma)
dim(z)<-c(length(x1),length(x2))
contour(x1, x2, z, col="#4545FF", drawlabels=FALSE, nlevels=4,
xlab=expression(x[1]), ylab=expression(x[2]), lwd=2,xlim=range(x1),ylim=range(x2),frame.plot=TRUE,axes=FALSE,xaxs = "i", yaxs = "i")
axis(1,labels=FALSE,lwd.ticks=2)
axis(2,labels=FALSE,lwd.ticks=2)
abline(v=.7, col=1, lwd=2, lty=2)
text(2, -2, labels=expression(x[1]==0.7))
# Dotted
f<-function(x1,x2) bivariate.normal(t(cbind(x1,x2)),mu,Sigma)
x.s<-seq(from=min(x1),to=max(x1),by=0.1)
vals<-f(x1=0.7,x2=x.s)
lines(vals-abs(min(x1)),x.s,lty=2,lwd=2)
# Marginal probability distribution: http://mpdc.mae.cornell.edu/Courses/MAE714/biv-normal.pdf
# Please check this, I'm not sure it is correct.
marginal.x1<-function(x) exp((-(x-mu[1])^2)/2*(Sigma[1,2]^2)) / (Sigma[1,2]*sqrt(2*pi))
marginal.x2<-function(x) exp((-(x-mu[1])^2)/2*(Sigma[2,1]^2)) / (Sigma[2,1]*sqrt(2*pi))
# Left side solid
vals<-marginal.x2(x.s)
lines(vals-abs(min(x1)),x.s,lty=1,lwd=2)
# Bottom side solid
vals<-marginal.x1(x.s)
lines(x.s,vals-abs(min(x2)),lty=1,lwd=2)
My solution in ggplot2, inspired in this post
rm(list=ls())
options(max.print=999999)
library(pacman)
p_load(tidyverse)
p_load(mvtnorm)
my_mean<-c(25,65)
mycors<-seq(-1,1,by=.25)
sd_vec<-c(5,7)
i<-3
temp_cor<-matrix(c(1,mycors[i],
mycors[i],1),
byrow = T,ncol=2)
V<-sd_vec %*% t(sd_vec) *temp_cor
###data for vertical curve
my_dnorm<- function(x, mean = 0, sd = 1, log = FALSE, new_loc, multplr){
new_loc+dnorm(x, mean , sd, log)*multplr
}
##margina Y distribution
yden<-data.frame(y=seq(48,82,length.out = 100),x=my_dnorm(seq(48,82,length.out = 100),my_mean[2],sd_vec[2],new_loc=8,multplr=100))
##conditional distribution
my_int<-(my_mean[2]-(V[1,2]*my_mean[1]/V[1,1]))
my_slp<-V[1,2]/V[1,1]
givenX<-34
mu_givenX<-my_int+givenX*my_slp
sigma2_givenX<-(1-mycors[i]^2)*V[2,2]
y_givenX_range<-seq(mu_givenX-3*sqrt(sigma2_givenX),mu_givenX+3*sqrt(sigma2_givenX),length.out = 100)
yden_x<-data.frame(y=y_givenX_range, x=my_dnorm(y_givenX_range,mu_givenX,sqrt(sigma2_givenX),new_loc=givenX,multplr=80))
yden_x<-data.frame(y=y_givenX_range, x=my_dnorm(y_givenX_range,mu_givenX,sqrt(sigma2_givenX),new_loc=8,multplr=80))
###data for drawing ellipse
data.grid <- expand.grid(x = seq(my_mean[1]-3*sd_vec[1], my_mean[1]+3*sd_vec[1], length.out=200),
y = seq(my_mean[2]-3*sd_vec[2], my_mean[2]+3*sd_vec[2], length.out=200))
q.samp <- cbind(data.grid, prob = dmvnorm(data.grid, mean = my_mean, sigma = V))
###plot
ggplot(q.samp, aes(x=x, y=y, z=prob)) +
geom_contour() + theme_bw()+
geom_abline(intercept = my_int, slope = my_slp, color="red",
linetype="dashed")+
stat_function(fun = my_dnorm, n = 101, args = list(mean = my_mean[1], sd = sd_vec[1], new_loc=35,multplr=100),color=1) +
geom_path(aes(x=x,y=y), data = yden,inherit.aes = FALSE) +
geom_path(aes(x=x,y=y), data = yden_x,inherit.aes = FALSE,color=1,linetype="dashed") +
geom_vline(xintercept = givenX,linetype="dashed")
Created on 2020-10-31 by the reprex package (v0.3.0)

Ellipse representing horizontal and vertical error bars with R

In R, how to use ellipses to represent error bars (standard deviation) for x and y variables if only summary data, i.e. mean and SD for different data sets, are available. Any feedback is appreciated.
You can write your own function like this one:
draw_ellipse = function (mean_x, mean_y, sd_x, sd_y)
{
ellipse <- function (x) { sin(acos(x)) }
t = seq(-1, 1, length.out = 100)
el_y = sd_y*ellipse(t)
newx = mean_x + sd_x * t
polygon(c(newx, rev(newx)), c(mean_y + el_y, rev(mean_y - el_y)), col = "grey", border = NA)
}
You can use it very easily using apply():
x = runif(10)
y = runif(10)
sd_x = abs(rnorm(10, 0.1, 0.02))
sd_y = abs(rnorm(10, 0.05, 0.01))
plot(x, y)
df = data.frame(x, y, sd_x, sd_y)
apply(df, 1, function (x) { draw_ellipse(x[1], x[2], x[3], x[4]) })
points(x, y, pch = 3)
Solution for plotting ellipses with different colors:
draw_ellipse = function (mean_x, mean_y, sd_x, sd_y, colidx)
{
ellipse <- function (x) { sin(acos(x)) }
t = seq(-1, 1, length.out = 100)
el_y = sd_y*ellipse(t)
newx = mean_x + sd_x * t
polygon(c(newx, rev(newx)), c(mean_y + el_y, rev(mean_y - el_y)), col = as.character(colors[colidx]), border = NA)
}
x = runif(10)
y = runif(10)
sd_x = abs(rnorm(10, 0.1, 0.02))
sd_y = abs(rnorm(10, 0.05, 0.01))
plot(x, y)
colors = rainbow(length(x))
df = data.frame(x, y, sd_x, sd_y, colidx = 1:length(x))
apply(df, 1, function (x) { draw_ellipse(x[1], x[2], x[3], x["sd_y"], x["colidx"]) })
points(x, y, pch = 3)
You might like the function car::ellipse , i.e., the ellipse() function in the car package.
The ellipse function in the ellipse package will take summary information (including correlation) and provide the ellipse representing the confidence region.

Resources