Understanding "levels" in r contour function of bivariate distribution - r

I have trouble understanding how to set the levels in the plot of a bivariate distribution in r. The documentation states that I can choose the levels by setting a
numeric vector of levels at which to draw contour lines
Now I would like the contour to show the limit containing 95% of the density or mass. But if, in the example below (adapted from here) I set the vector as a <- c(.95,.90) the code runs without error but the plot is not displayed. If instead, I set the vector as a <- c(.01,.05) the plot is displayed. But I am not sure I understand what the labels "0.01" and "0.05" mean with respect to the density.
library(mnormt)
x <- seq(-5, 5, 0.25)
y <- seq(-5, 5, 0.25)
mu1 <- c(0, 0)
sigma1 <- matrix(c(2, -1, -1, 2), nrow = 2)
f <- function(x, y) dmnorm(cbind(x, y), mu1, sigma1)
z <- outer(x, y, f)
a <- c(.01,.05)
contour(x, y, z, levels = a)

But I am not sure I understand what the labels "0.01" and "0.05" mean with respect to the density.
It means the points where the density is equal 0.01 and 0.05. From help("contour"):
numeric vector of levels at which to draw contour lines.
So it is the function values at which to draw the lines (contours) where the function is equal to those levels (in this case the density). Take a simple example which may help is x + y:
y <- x <- seq(0, 1, length.out = 50)
z <- outer(x, y, `+`)
par(mar = c(5, 5, 1, 1))
contour(x, y, z, levels = c(0.5, 1, 1.5))
Now I would like the contour to show the limit containing 95% of the density or mass.
In your example, you can follow my answer here and draw the exact points:
# input
mu1 <- c(0, 0)
sigma1 <- matrix(c(2, -1, -1, 2), nrow = 2)
# we start from points on the unit circle
n_points <- 100
xy <- cbind(sin(seq(0, 2 * pi, length.out = n_points)),
cos(seq(0, 2 * pi, length.out = n_points)))
# then we scale the dimensions
ev <- eigen(sigma1)
xy[, 1] <- xy[, 1] * 1
xy[, 2] <- xy[, 2] * sqrt(min(ev$values) / max(ev$values))
# then rotate
phi <- atan(ev$vectors[2, 1] / ev$vectors[1, 1])
R <- matrix(c(cos(phi), sin(phi), -sin(phi), cos(phi)), 2)
xy <- tcrossprod(R, xy)
# find the right length. You can change .95 to which ever
# quantile you want
chi_vals <- qchisq(.95, df = 2) * max(ev$values)
s <- sqrt(chi_vals)
par(mar = c(5, 5, 1, 1))
plot(s * xy[1, ] + mu1[1], s * xy[2, ] + mu1[2], lty = 1,
type = "l", xlab = "x", ylab = "y")

The levels indicates where the lines are drawn, with respect to the specific 'z' value of the bivariate normal density. Since max(z) is
0.09188815, levels of a <- c(.95,.90) can't be drawn.
To draw the line delimiting 95% of the mass I used the ellipse() function as suggested in this post (second answer from the top).
library(mixtools)
library(mnormt)
x <- seq(-5, 5, 0.25)
y <- seq(-5, 5, 0.25)
mu1 <- c(0, 0)
sigma1 <- matrix(c(2, -1, -1, 2), nrow = 2)
f <- function(x, y) dmnorm(cbind(x, y), mu1, sigma1)
z <- outer(x, y, f)
a <- c(.01,.05)
contour(x, y, z, levels = a)
ellipse(mu=mu1, sigma=sigma1, alpha = .05, npoints = 250, col="red")

I also found another solution in the book "Applied Multivariate Statistics with R" by Daniel Zelterman.
# Figure 6.5: Bivariate confidence ellipse
library(datasets)
library(MASS)
library(MVA)
#> Loading required package: HSAUR2
#> Loading required package: tools
biv <- swiss[, 2 : 3] # Extract bivariate data
bivCI <- function(s, xbar, n, alpha, m)
# returns m (x,y) coordinates of 1-alpha joint confidence ellipse of mean
{
x <- sin( 2* pi * (0 : (m - 1) )/ (m - 1)) # m points on a unit circle
y <- cos( 2* pi * (0 : (m - 1)) / (m - 1))
cv <- qchisq(1 - alpha, 2) # chisquared critical value
cv <- cv / n # value of quadratic form
for (i in 1 : m)
{
pair <- c(x[i], y[i]) # ith (x,y) pair
q <- pair %*% solve(s, pair) # quadratic form
x[i] <- x[i] * sqrt(cv / q) + xbar[1]
y[i] <- y[i] * sqrt(cv / q) + xbar[2]
}
return(cbind(x, y))
}
### pdf(file = "bivSwiss.pdf")
plot(biv, col = "red", pch = 16, cex.lab = 1.5)
lines(bivCI(var(biv), colMeans(biv), dim(biv)[1], .01, 1000), type = "l",
col = "blue")
lines(bivCI(var(biv), colMeans(biv), dim(biv)[1], .05, 1000),
type = "l", col = "green", lwd = 1)
lines(colMeans(biv)[1], colMeans(biv)[2], pch = 3, cex = .8, type = "p",
lwd = 1)
Created on 2021-03-15 by the reprex package (v0.3.0)

Related

Differences between plotting contour() function in base R and using geom_contour() or stat_contour() in ggplot2

I have plotted a density function in base R and I would like to replicate the plot in ggplot2.
This is the plot in base R:
library(tidyverse)
library(mvtnorm)
sd <- 1 / 2
# sigma
s1 <- sd^2
# first two vectors
x.points <- seq(-3, 3, length.out = 100)
y.points <- seq(-3, 3, length.out = 100)
# the third vector is a density
z <- matrix(0, nrow = 100, ncol = 100)
mu1 <- c(0, 0)
sigma1 <- matrix(c(s1^2, 0, 0, s1^2), nrow = 2)
for (i in 1:100) {
for (j in 1:100) {
z[i, j] <- dmvnorm(c(x.points[i], y.points[j]),
mean = mu1, sigma = sigma1
)
}
}
contour(x.points, y.points, z, xlim = range(-3, 3), ylim = c(-3, 3), nlevels = 5, drawlabels = TRUE)
To obtain the same result in ggplot2, I am following this example:
library(ggplot2)
library(reshape2) # for melt
volcano3d <- melt(volcano)
names(volcano3d) <- c("x", "y", "z")
# Basic plot
v <- ggplot(volcano3d, aes(x, y, z = z))
v + stat_contour()
But in my case vector z has a different length than x.points and y.points. From the errors I get below, it looks like the three vectors should have the same length. How can I transform the dataset presented above so that it can be run through ggplot2?
data1 <- as.data.frame(cbind(x.points, y.points))
p <- ggplot(data = data1, mapping = aes(x.points, y.points, z=z))
p + geom_contour()
#> Error: Aesthetics must be either length 1 or the same as the data (100): z
p + stat_contour()
#> Error: Aesthetics must be either length 1 or the same as the data (100): z
p + stat_function(fun = contour) + xlim(-3,3)
#> Error: Aesthetics must be either length 1 or the same as the data (100): z
Created on 2021-04-08 by the reprex package (v0.3.0)
The problem is likely that your data isn't in long format: for every value of the z matrix, you need the x and y position, which is different from the base R approach, wherein you just need these positions for every row/column.
We can transform the matrix z to a long format using reshape2::melt and then grab the correct positions from your vectors.
library(tidyverse)
library(mvtnorm)
sd <- 1 / 2
# sigma
s1 <- sd^2
# first two vectors
x.points <- seq(-3, 3, length.out = 100)
y.points <- seq(-3, 3, length.out = 100)
# the third vector is a density
z <- matrix(0, nrow = 100, ncol = 100)
mu1 <- c(0, 0)
sigma1 <- matrix(c(s1^2, 0, 0, s1^2), nrow = 2)
for (i in 1:100) {
for (j in 1:100) {
z[i, j] <- dmvnorm(c(x.points[i], y.points[j]),
mean = mu1, sigma = sigma1
)
}
}
# Here be the reshaping bit
df <- reshape2::melt(z)
df <- transform(
df,
x = x.points[Var1],
y = y.points[Var2]
)
ggplot(df, aes(x, y)) +
geom_contour(aes(z = value))
Created on 2021-04-08 by the reprex package (v1.0.0)

Performing residual bootstrap using kernel regression in R

Kernel regression is a non-parametric technique that wants to estimate the conditional expectation of a random variable. It uses local averaging of the response value, Y, in order to find some non-linear relationship between X and Y.
I am have used bootstrap for kernel density estimation and now want to use it for kernel regression as well. I have been told to use residual bootstrapping for kernel regression and have read a couple of papers on this. I am however unsure how to perform this. Programming has been done in R using the FKSUM package. I have made an attempt to use standard resampling on kernel regression:
library(FKSUM)
set.seed(1)
n <- 5000
sample.size <- 500
B.replications <- 200
x <- rbeta(n, 2, 2) * 10
y <- 3 * sin(2 * x) + 10 * (x > 5) * (x - 5)
y <- y + rnorm(n) + (rgamma(n, 2, 2) - 1) * (abs(x - 5) + 3)
#taking x.y to be the population
x.y <- data.frame(x, y)
xs <- seq(min(x), max(x), length = 1000)
ftrue <- 3 * sin(2 * xs) + 10 * (xs > 5) * (xs - 5)
#Sample from the population
seqx<-seq(1,5000,by=1)
sample.ind <- sample(seqx, size = sample.size, replace = FALSE)
sample.reg<-x.y[sample.ind,]
x_s <- sample.reg$x
y_s <- sample.reg$y
fhat_loc_lin.pop <- fk_regression(x, y)
fhat_loc_lin.sample <- fk_regression(x = x_s, y = y_s)
plot(x, y, col = rgb(.7, .7, .7, .3), pch = 16, xlab = 'x',
ylab = 'x', main = 'Local linear estimator with amise bandwidth')
lines(xs, ftrue, col = 2, lwd = 3)
lines(fhat_loc_lin, lty = 2, lwd = 2)
#Bootstrap
n.B.sample = sample.size # sample bootstrap size
boot.reg.mat.X <- matrix(0,ncol=B.replications, nrow=n.B.sample)
boot.reg.mat.Y <- matrix(0,ncol=B.replications, nrow=n.B.sample)
fhat_loc_lin.boot <- matrix(0,ncol = B.replications, nrow=100)
Temp.reg.y <- matrix(0,ncol = B.replications,nrow = 1000)
for(i in 1:B.replications){
sequence.x.boot <- seq(from=1,to=n.B.sample,by=1)
sample.ind.boot <- sample(sequence.x.boot, size = sample.size, replace = TRUE)
boot.reg.mat <- sample.reg[sample.ind.boot,]
boot.reg.mat.X <- boot.reg.mat$x
boot.reg.mat.Y <- boot.reg.mat$y
fhat_loc_lin.boot <- fk_regression(x = boot.reg.mat.X ,
y = boot.reg.mat.Y,
h = fhat_loc_lin.sample$h)
lines(y=fhat_loc_lin.boot$y,x= fhat_loc_lin.sample$x, col =c(i) )
Temp.reg.y[,i] <- fhat_loc_lin.boot$y
}
quan.reg.l <- vector()
quan.reg.u <- vector()
for(i in 1:length(xs)){
quan.reg.l[i] <- quantile(x = Temp.reg.y[i,],probs = 0.025)
quan.reg.u[i] <- quantile(x = Temp.reg.y[i,],probs = 0.975)
}
# Lower Bound
Temp.reg.2 <- quan.reg.l
lines(y=Temp.reg.2,x=fhat_loc_lin.boot$x ,col="red",lwd=4,lty=1)
# Upper Bound
Temp.reg.3 <- quan.reg.u
lines(y=Temp.reg.3,x=fhat_loc_lin.boot$x ,col="navy",lwd=4,lty=1)
Asking the question on here now since I haven't received any response on CV. Any help would be greatly appreciated!

Reproduce Fisher linear discriminant figure

Many books illustrate the idea of Fisher linear discriminant analysis using the following figure (this particular is from Pattern Recognition and Machine Learning, p. 188)
I wonder how to reproduce this figure in R (or in any other language). Pasted below is my initial effort in R. I simulate two groups of data and draw linear discriminant using abline() function. Any suggestions are welcome.
set.seed(2014)
library(MASS)
library(DiscriMiner) # For scatter matrices
# Simulate bivariate normal distribution with 2 classes
mu1 <- c(2, -4)
mu2 <- c(2, 6)
rho <- 0.8
s1 <- 1
s2 <- 3
Sigma <- matrix(c(s1^2, rho * s1 * s2, rho * s1 * s2, s2^2), byrow = TRUE, nrow = 2)
n <- 50
X1 <- mvrnorm(n, mu = mu1, Sigma = Sigma)
X2 <- mvrnorm(n, mu = mu2, Sigma = Sigma)
y <- rep(c(0, 1), each = n)
X <- rbind(x1 = X1, x2 = X2)
X <- scale(X)
# Scatter matrices
B <- betweenCov(variables = X, group = y)
W <- withinCov(variables = X, group = y)
# Eigenvectors
ev <- eigen(solve(W) %*% B)$vectors
slope <- - ev[1,1] / ev[2,1]
intercept <- ev[2,1]
par(pty = "s")
plot(X, col = y + 1, pch = 16)
abline(a = slope, b = intercept, lwd = 2, lty = 2)
MY (UNFINISHED) WORK
I pasted my current solution below. The main question is how to rotate (and move) the density plot according to decision boundary. Any suggestions are still welcome.
require(ggplot2)
library(grid)
library(MASS)
# Simulation parameters
mu1 <- c(5, -9)
mu2 <- c(4, 9)
rho <- 0.5
s1 <- 1
s2 <- 3
Sigma <- matrix(c(s1^2, rho * s1 * s2, rho * s1 * s2, s2^2), byrow = TRUE, nrow = 2)
n <- 50
# Multivariate normal sampling
X1 <- mvrnorm(n, mu = mu1, Sigma = Sigma)
X2 <- mvrnorm(n, mu = mu2, Sigma = Sigma)
# Combine into data frame
y <- rep(c(0, 1), each = n)
X <- rbind(x1 = X1, x2 = X2)
X <- scale(X)
X <- data.frame(X, class = y)
# Apply lda()
m1 <- lda(class ~ X1 + X2, data = X)
m1.pred <- predict(m1)
# Compute intercept and slope for abline
gmean <- m1$prior %*% m1$means
const <- as.numeric(gmean %*% m1$scaling)
z <- as.matrix(X[, 1:2]) %*% m1$scaling - const
slope <- - m1$scaling[1] / m1$scaling[2]
intercept <- const / m1$scaling[2]
# Projected values
LD <- data.frame(predict(m1)$x, class = y)
# Scatterplot
p1 <- ggplot(X, aes(X1, X2, color=as.factor(class))) +
geom_point() +
theme_bw() +
theme(legend.position = "none") +
scale_x_continuous(limits=c(-5, 5)) +
scale_y_continuous(limits=c(-5, 5)) +
geom_abline(intecept = intercept, slope = slope)
# Density plot
p2 <- ggplot(LD, aes(x = LD1)) +
geom_density(aes(fill = as.factor(class), y = ..scaled..)) +
theme_bw() +
theme(legend.position = "none")
grid.newpage()
print(p1)
vp <- viewport(width = .7, height = 0.6, x = 0.5, y = 0.3, just = c("centre"))
pushViewport(vp)
print(p2, vp = vp)
Basically you need to project the data along the direction of the classifier, plot a histogram for each class, and then rotate the histogram so its x axis is parallel to the classifier. Some trial-and-error with scaling the histogram is needed in order to get a nice result. Here's an example of how to do it in Matlab, for the naive classifier (difference of class' means). For the Fisher classifier it is of course similar, you just use a different classifier w. I changed the parameters from your code so the plot is more similar to the one you gave.
rng('default')
n = 1000;
mu1 = [1,3]';
mu2 = [4,1]';
rho = 0.3;
s1 = .8;
s2 = .5;
Sigma = [s1^2,rho*s1*s1;rho*s1*s1, s2^2];
X1 = mvnrnd(mu1,Sigma,n);
X2 = mvnrnd(mu2,Sigma,n);
X = [X1; X2];
Y = [zeros(n,1);ones(n,1)];
scatter(X1(:,1), X1(:,2), [], 'b' );
hold on
scatter(X2(:,1), X2(:,2), [], 'r' );
axis equal
m1 = mean(X(1:n,:))';
m2 = mean(X(n+1:end,:))';
plot(m1(1),m1(2),'bx','markersize',18)
plot(m2(1),m2(2),'rx','markersize',18)
plot([m1(1),m2(1)], [m1(2),m2(2)],'g')
%% classifier taking only means into account
w = m2 - m1;
w = w / norm(w);
% project data onto w
X1_projected = X1 * w;
X2_projected = X2 * w;
% plot histogram and rotate it
angle = 180/pi * atan(w(2)/w(1));
[hy1, hx1] = hist(X1_projected);
[hy2, hx2] = hist(X2_projected);
hy1 = hy1 / sum(hy1); % normalize
hy2 = hy2 / sum(hy2); % normalize
scale = 4; % set manually
h1 = bar(hx1, scale*hy1,'b');
h2 = bar(hx2, scale*hy2,'r');
set([h1, h2],'ShowBaseLine','off')
% rotate around the origin
rotate(get(h1,'children'),[0,0,1], angle, [0,0,0])
rotate(get(h2,'children'),[0,0,1], angle, [0,0,0])

How to plot bivariate normal distribution with expanding ellipses

How to plot bivariate normal distribution with expanding ellipses and add 5%, 25%, 50%, 75% and
95% label in the plot? Thank you!
You can create a contour plot using an R package called mvtnorm.
Let's say you're trying to plot a bivariate normal distribution where mu_x = 1 and mu_y = 1 and variance matrix is c(2,1,1,1). Generate 100 observations for x,y,z. You can create a contour plot for this scenario as such:
library(mvtnorm)
x.points <- seq(-3,3,length.out=100)
y.points <- x.points
z <- matrix(0,nrow=100,ncol=100)
mu <- c(1,1)
sigma <- matrix(c(2,1,1,1),nrow=2)
for (i in 1:100) {
for (j in 1:100) {
z[i,j] <- dmvnorm(c(x.points[i],y.points[j]),
mean=mu,sigma=sigma)
}
}
contour(x.points,y.points,z)
Here is a solution that computes the contours at the levels that you want
#####
# Compute points and rotation matrix
# input
theta <- c(1, 2)
sigma <- diag(c(3^2, 2^2))
sigma[2, 1] <- sigma[1, 2] <- sqrt(sigma[1, 1] * sigma[2, 2]) * .5
# we start from points on the unit circle
n_points <- 100
xy <- cbind(sin(seq(0, 2 * pi, length.out = n_points)),
cos(seq(0, 2 * pi, length.out = n_points)))
# then we scale the dimensions
ev <- eigen(sigma)
xy[, 1] <- xy[, 1] * 1
xy[, 2] <- xy[, 2] * sqrt(min(ev$values) / max(ev$values))
# then rotate
phi <- atan(ev$vectors[2, 1] / ev$vectors[1, 1])
R <- matrix(c(cos(phi), sin(phi), -sin(phi), cos(phi)), 2)
xy <- tcrossprod(R, xy)
# the quantiles you ask for
chi_vals <- qchisq(c(.05, .25, .50, .75, .95), df = 2) * max(ev$values)
#####
# Plot contours
par(mar = c(4.5, 4, .5, .5))
plot(c(-8, 10), c(-4, 8), type = "n", xlab = "x", ylab = "y")
for(r in sqrt(chi_vals))
lines(r * xy[1, ] + theta[1], r * xy[2, ] + theta[2], lty = 1)
Detailed Explanation
theta <- c(1, 2)
sigma <- diag(c(3^2, 2^2))
sigma[2, 1] <- sigma[1, 2] <- sqrt(sigma[1, 1] * sigma[2, 2]) * .5
# we start from points on the unit circle
n_points <- 100
xy <- cbind(sin(seq(0, 2 * pi, length.out = n_points)),
cos(seq(0, 2 * pi, length.out = n_points)))
par(mar = c(5, 5, 3, 1), mfcol = c(2, 2))
plot(xy[, 1], xy[, 2], xlab = "x", ylab = "y", xlim = c(-8, 10), bty = "l",
ylim = c(-4, 8), main = "Unit circle", type = "l")
arrows(c(0, 0), c(0, 0), c(0, 1), c(1, 0), length = .05)
# this is very much like PCA. We scale the dimensions such that the first
# dimension has length one and the others are scaled proportional to the square
# root of their eigenvalue relative to the largest eigenvalue
ev <- eigen(sigma)
scal <- sqrt(min(ev$values) / max(ev$values))
xy[, 2] <- xy[, 2] * scal
plot(xy[, 1], xy[, 2], xlab = "x", ylab = "y", xlim = c(-8, 10), bty = "l",
ylim = c(-4, 8), main = "Scaled", type = "l")
arrows(c(0, 0), c(0, 0), c(0, 1), c(scal, 0),
length = .05)
# then we rotate phi degrees to account for the correlation coefficient. See
# https://en.wikipedia.org/wiki/Rotation_matrix
# and notice that we compute the angle of the first eigenvector
phi <- atan(ev$vectors[2, 1] / ev$vectors[1, 1])
R <- matrix(c(cos(phi), sin(phi), -sin(phi), cos(phi)), 2)
xy <- tcrossprod(R, xy) # R %*% t(xy)
plot(xy[1, ], xy[2, ], xlab = "x", ylab = "y", xlim = c(-8, 10), bty = "l",
ylim = c(-4, 8), main = "Rotated", type = "l")
arrs <- tcrossprod(R, matrix(c(0, 1, scal, 0), 2L))
arrows(c(0, 0), c(0, 0), arrs[1, ], arrs[2, ], length = .05)
# the right size of each circle can now be found by taking the wanted
# quantile from the chi-square distribution with two degrees of freedom
# multiplied by the largest eigenvalue
plot(c(-8, 10), c(-4, 8), type = "n", xlab = "x", ylab = "y", main = "Final",
bty = "l")
chi_vals <- qchisq(c(.05, .25, .50, .75, .95), df = 2) * max(ev$values)
for(r in sqrt(chi_vals))
lines(r * xy[1, ] + theta[1], r * xy[2, ] + theta[2], lty = 1)

Bivariate normal with marginal and conditional densities

I am trying to create a figure in R. It consists of the contour plot of a bivariate normal distribution for the vector variable (x,y) along with the marginals f(x), f(y); the conditional distribution f(y|x) and the line through the conditioning value X=x (it will be a simple abline(v=x)).
I already got the contour and the abline:
but I don't know how to continue.
Here is the code I used so far:
bivariate.normal <- function(x, mu, Sigma) {
exp(-.5 * t(x-mu) %*% solve(Sigma) %*% (x-mu)) / sqrt(2 * pi * det(Sigma))
}
mu <- c(0,0)
Sigma <- matrix(c(1,.8,.8,1), nrow=2)
x1 <- seq(-3, 3, length.out=50)
x2 <- seq(-3, 3, length.out=50)
z <- outer(x1, x2, FUN=function(x1, x2, ...){
apply(cbind(x1,x2), 1, bivariate.normal, ...)
}, mu=mu, Sigma=Sigma)
contour(x1, x2, z, col="blue", drawlabels=FALSE, nlevels=4,
xlab=expression(x[1]), ylab=expression(x[2]), lwd=1)
abline(v=.7, col=1, lwd=2, lty=2)
text(2, -2, labels=expression(x[1]==0.7))
It would have been helpful if you had provided the function to calculate the marginal distribution. I may have got the marginal distribution function wrong, but I think this gets you what you want:
par(lwd=2,mgp=c(1,1,0))
# Modified to extract diagonal.
bivariate.normal <- function(x, mu, Sigma)
exp(-.5 * diag(t(x-mu) %*% solve(Sigma) %*% (x-mu))) / sqrt(2 * pi * det(Sigma))
mu <- c(0,0)
Sigma <- matrix(c(1,.8,.8,1), nrow=2)
x1 <- seq(-3, 3, length.out=50)
x2 <- seq(-3, 3, length.out=50)
plot(1:10,axes=FALSE,frame.plot=TRUE,lwd=1)
# z can now be calculated much easier.
z<-bivariate.normal(t(expand.grid(x1,x2)),mu,Sigma)
dim(z)<-c(length(x1),length(x2))
contour(x1, x2, z, col="#4545FF", drawlabels=FALSE, nlevels=4,
xlab=expression(x[1]), ylab=expression(x[2]), lwd=2,xlim=range(x1),ylim=range(x2),frame.plot=TRUE,axes=FALSE,xaxs = "i", yaxs = "i")
axis(1,labels=FALSE,lwd.ticks=2)
axis(2,labels=FALSE,lwd.ticks=2)
abline(v=.7, col=1, lwd=2, lty=2)
text(2, -2, labels=expression(x[1]==0.7))
# Dotted
f<-function(x1,x2) bivariate.normal(t(cbind(x1,x2)),mu,Sigma)
x.s<-seq(from=min(x1),to=max(x1),by=0.1)
vals<-f(x1=0.7,x2=x.s)
lines(vals-abs(min(x1)),x.s,lty=2,lwd=2)
# Marginal probability distribution: http://mpdc.mae.cornell.edu/Courses/MAE714/biv-normal.pdf
# Please check this, I'm not sure it is correct.
marginal.x1<-function(x) exp((-(x-mu[1])^2)/2*(Sigma[1,2]^2)) / (Sigma[1,2]*sqrt(2*pi))
marginal.x2<-function(x) exp((-(x-mu[1])^2)/2*(Sigma[2,1]^2)) / (Sigma[2,1]*sqrt(2*pi))
# Left side solid
vals<-marginal.x2(x.s)
lines(vals-abs(min(x1)),x.s,lty=1,lwd=2)
# Bottom side solid
vals<-marginal.x1(x.s)
lines(x.s,vals-abs(min(x2)),lty=1,lwd=2)
My solution in ggplot2, inspired in this post
rm(list=ls())
options(max.print=999999)
library(pacman)
p_load(tidyverse)
p_load(mvtnorm)
my_mean<-c(25,65)
mycors<-seq(-1,1,by=.25)
sd_vec<-c(5,7)
i<-3
temp_cor<-matrix(c(1,mycors[i],
mycors[i],1),
byrow = T,ncol=2)
V<-sd_vec %*% t(sd_vec) *temp_cor
###data for vertical curve
my_dnorm<- function(x, mean = 0, sd = 1, log = FALSE, new_loc, multplr){
new_loc+dnorm(x, mean , sd, log)*multplr
}
##margina Y distribution
yden<-data.frame(y=seq(48,82,length.out = 100),x=my_dnorm(seq(48,82,length.out = 100),my_mean[2],sd_vec[2],new_loc=8,multplr=100))
##conditional distribution
my_int<-(my_mean[2]-(V[1,2]*my_mean[1]/V[1,1]))
my_slp<-V[1,2]/V[1,1]
givenX<-34
mu_givenX<-my_int+givenX*my_slp
sigma2_givenX<-(1-mycors[i]^2)*V[2,2]
y_givenX_range<-seq(mu_givenX-3*sqrt(sigma2_givenX),mu_givenX+3*sqrt(sigma2_givenX),length.out = 100)
yden_x<-data.frame(y=y_givenX_range, x=my_dnorm(y_givenX_range,mu_givenX,sqrt(sigma2_givenX),new_loc=givenX,multplr=80))
yden_x<-data.frame(y=y_givenX_range, x=my_dnorm(y_givenX_range,mu_givenX,sqrt(sigma2_givenX),new_loc=8,multplr=80))
###data for drawing ellipse
data.grid <- expand.grid(x = seq(my_mean[1]-3*sd_vec[1], my_mean[1]+3*sd_vec[1], length.out=200),
y = seq(my_mean[2]-3*sd_vec[2], my_mean[2]+3*sd_vec[2], length.out=200))
q.samp <- cbind(data.grid, prob = dmvnorm(data.grid, mean = my_mean, sigma = V))
###plot
ggplot(q.samp, aes(x=x, y=y, z=prob)) +
geom_contour() + theme_bw()+
geom_abline(intercept = my_int, slope = my_slp, color="red",
linetype="dashed")+
stat_function(fun = my_dnorm, n = 101, args = list(mean = my_mean[1], sd = sd_vec[1], new_loc=35,multplr=100),color=1) +
geom_path(aes(x=x,y=y), data = yden,inherit.aes = FALSE) +
geom_path(aes(x=x,y=y), data = yden_x,inherit.aes = FALSE,color=1,linetype="dashed") +
geom_vline(xintercept = givenX,linetype="dashed")
Created on 2020-10-31 by the reprex package (v0.3.0)

Resources