Plot 3D plane (true regression surface) - r

I'm trying to simulate some data (x1 and x2 - my explanatory variables), calculate y using a specified function + random noise and plot the resulting observations AND the true regression surface. Here's what I have so far:
set.seed(1)
library(rgl)
# Simulate some data
x1 <- runif(50)
x2 <- runif(50)
y <- sin(x1)*x2+x1*x2 + rnorm(50, sd=0.3)
# 3D scatterplot of observations
plot3d(x1,x2,y, type="p", col="red", xlab="X1", ylab="X2", zlab="Y", site=5, lwd=15)
Now I'm not sure how I can add the "true" regression plane. I'm basically looking for something like curve() where I can plug in my (true) model formula.
Thanks!

If you wanted a plane, you could use planes3d.
Since your model is not linear, it is not a plane: you can use surface3d instead.
my_surface <- function(f, n=10, ...) {
ranges <- rgl:::.getRanges()
x <- seq(ranges$xlim[1], ranges$xlim[2], length=n)
y <- seq(ranges$ylim[1], ranges$ylim[2], length=n)
z <- outer(x,y,f)
surface3d(x, y, z, ...)
}
library(rgl)
f <- function(x1, x2)
sin(x1) * x2 + x1 * x2
n <- 200
x1 <- 4*runif(n)
x2 <- 4*runif(n)
y <- f(x1, x2) + rnorm(n, sd=0.3)
plot3d(x1,x2,y, type="p", col="red", xlab="X1", ylab="X2", zlab="Y", site=5, lwd=15)
my_surface(f, alpha=.2 )

Apologies: ( I didn't read the question very carefllly and now see that I rushed into estimation when you wanted to plot the Truth.)
Here's an approach to estimation followed by surface plotting using loess:
mod2 <- loess(y~x1+x2)
grd<- data.frame(x1=seq(range(x1)[1],range(x1)[2],len=20),
x2=seq(range(x2)[1],range(x2)[2],len=20))
grd$pred <- predict(mod2, newdata=grd)
grd <- grd[order(grd$x1,grd$x2),]
x1 <- unique(grd$x1)
x2 <- unique(grd$x2) # shouldn't have used y
surface3d(x1, x2, z=matrix(grd$pred,length(x1),length(x2)) )

IRTFM's somewhat imperfect answers above let me to a thread on the CRAN help pages. https://stat.ethz.ch/pipermail/r-help/2013-December/364037.html
I extracted the relevant bits of code and turned them into a function like so:
require(rgl)
pred.surf.3d <- function(df, x.nm,y.nm,z.nm, ...){
x <- df[,x.nm]; y <- df[,y.nm]; z<-df[,z.nm]
fit <- lm(z ~ x + y + x*y + x^2 + y^2)
xnew <- seq(range(x)[1],range(x)[2],len=20)
ynew <- seq(range(y)[1],range(y)[2],len=20)
df <- expand.grid(x=xnew, y=ynew)
df$z <- predict(fit, newdata=df)
with(df, surface3d(xnew, ynew, z=df$z))
}
I may end up bundling this into my CRAN utility package at some point.
In the mean time, I hope you find it useful! (Run it on IRTFM's first code chunk like so:)
pred.surf.3d(data.frame(x1,x2,y),'x1','x2','y')

Related

Plotting quantile regression estimates as function of their quantiles in R

The following code is a reproducible example based on the Swiss dataset (datasets::swiss).
My question is that I would like to plot the betas, which are the quantile regression estimates provided by the smrq() function on the y-axis, according to the tau values (the quantiles) ranging from [0:1]; but unfortunately I am not succeeding. Many thanks for the precious help, of course I can edit my post if I have forgotten anything.
Code:
library(quantreg)
library(limma)
#Generalized Functions
minimize.logcosh <- function(par, X, y, tau) {
diff <- y-(X %*% par)
check <- (tau-0.5)*diff+(0.5/0.7)*logcosh(0.7*diff)+0.4
return(sum(check))
}
smrq <- function(X, y, tau){
p <- ncol(X)
op.result <- optim(
rep(0, p),
fn = minimize.logcosh,
method = 'BFGS',
X = X,
y = y,
tau = tau
)
beta <- op.result$par
return(beta)
}
run_smrq <- function(data, fml, response) {
x <- model.matrix(fml, data)[,-1]
y <- data[[response]]
X <- cbind(x, rep(1,nrow(x)))
n <- 99
betas <- sapply(1:n, function(i) smrq(X, y, tau=i/(n+1)))
return(betas)
}
#Callers
swiss <- datasets::swiss
smrq_models <- run_smrq(data=swiss, fml=Fertility~., response="Fertility")
#langtang's solution gives this graphical output:
Without making any comment on the "correctness" of the output of run_smrq(), you can try this:
library(dplyr)
library(tidyr)
library(ggplot2)
as.data.frame(t(smrq_models)) %>%
mutate(q=row_number()) %>%
pivot_longer(!q,names_to="beta",values_to = "coef") %>%
ggplot(aes(q,coef,color=beta)) +
geom_point()
Also, if the betas are on largely different scales, this your visualization approach may not be the most appropriate. As as starting point, you might add + facet_wrap(~beta, scales="free_y")

+- before a square root in R

I'm trying to write an equation in R (see code below). I was wondering how I can correctly use +- before sqrt() in my code?
x <- seq(0,1,by=0.01)
y <- %+-%sqrt((.5^2)-(x-.5)^2)+.5
Need to plot them separately but the %+-% operator can be used in plotmath expressions. Needs to be flanked by two values, however, hence the need to use the non-printing phantom():
x <- c( seq(0,1,by=0.01) )
y <- c( sqrt((.5^2)-(x-.5)^2)+.5, -sqrt((.5^2)-(x-.5)^2)+.5)
plot( rep(x,times=2), y)
title(main= bquote( phantom(0) %+-% sqrt((.5^2)-(x-.5)^2)+.5))
You may want to have the equation in parametric form, without requiring +- of sqrt.
theta <- seq(0,2*pi,0.01)
x <- 0.5 + 0.5*sin(theta)
y <- 0.5 + 0.5*cos(theta)
plot(x, y)
title(main= substitute(paste('x=(1+sin',theta,')/2, y=(1+cos', theta, ')/2')))
Try this:
draw.circle <- function(stepsize=.01) {
theta <- seq(0,2*pi,by=stepsize)
x <- 0.5 + 0.5*sin(theta)
y <- 0.5 + 0.5*cos(theta)
plot(x, y,type="n",xlim = c(0,1),ylim = c(0,1))
segments(x,y,.5,.5)
}
draw.circle(.01)
draw.circle(.02)
draw.circle(.05)

obtaining components of plot from rms calibrate

Are the values for the plot available from rms calibrate? From ?calibrate
library("rms")
set.seed(1)
n <- 200
d.time <- rexp(n)
x1 <- runif(n)
x2 <- factor(sample(c('a', 'b', 'c'), n, TRUE))
f <- cph(Surv(d.time) ~ pol(x1,2) * x2, x=TRUE, y=TRUE, surv=TRUE,time.inc=1.5)
cal <- calibrate(f, u=1.5, cmethod='KM', m=50, B=20)
plot(cal)
cal
I would like to obtain the values that make up this plot. The y-values can be seen on the screen, but it is not clear to me how to obtain the x and y values. Thanks.

R plot heatmap of matrix with superposed line

I have a matrix whose values I want to plot. Using the image function it looks like this.
How can I plot a line over the image?
(In my case, it want to plot a line over the maximum values along the x axes)
Edit
The image and the line I want to plot over are the output from the Bayesian Online Changepoint detection. Since it is quite short I'll share the whole code. The plotting part is at the end:
# Bayesian Online Changepoint Detection
# Adams, MacKay 2007
# http://hips.seas.harvard.edu/content/bayesian-online-changepoint-detection
#######################################
# Other python and matlab implementations
# https://github.com/JackKelly/bayesianchangepoint
# http://hips.seas.harvard.edu/content/bayesian-online-changepoint-detection
# http://www.inference.phy.cam.ac.uk/rpa23/cp/gaussdemo.m
# http://www.inference.phy.cam.ac.uk/rpa23/cp/studentpdf.m
# http://www.inference.phy.cam.ac.uk/rpa23/cp/constant_hazard.m
# Even more commented, but different paper:
# https://github.com/davyfeng/ipdata/blob/master/csv/bocpd/core/bocpd.m
# Generate data
x1 <- rnorm(100, 10.5, 0.1)
x2 <- rnorm(100, 1, 0.1)
x3 <- rnorm(100, -10, 0.1)
x4 <- rnorm(100, -1, 0.1)
x5 <- rnorm(100, 5, 0.1)
x6 <- rnorm(30, 1, 0.1)
x7 <- rnorm(100, 8, 0.1)
x <- c(x1,x2,x3,x4,x5, x6,x7)
##############
# Algorithm
##############
# Prepare the scaled and shifted student-t
dt.scaled.shifted <- function(x, m, s, df) stats::dt((x-m)/s, df)/s
# Prepare the Hazard function
hazard <-function(x, lambda=200){rep(1/lambda, length(x))}
L <- length(x)
R <- matrix(rep(0,(L+1)*(L+1)), L+1, L+1)
R[1,1] <- 1 # for t=1 where are sure that p(r=1)=1
mu0 <- 0; kappa0 <- 1; alpha0 <-1; beta0 <- 1;
muT <- mu0
kappaT <- kappa0
alphaT <- alpha0
betaT <- beta0
maxes <- rep(0, L)
# Process data as they come in
for(t in 1:L){
# Evaluate predictive probability
predprobs <- dt.scaled.shifted(x[t], muT, betaT*(kappaT+1)/(alphaT*-kappaT), 2*alphaT)
H <- hazard(x[1:t])
# Calculate growth probabilities
R[2:(t+1), t+1] <- R[1:t,t]*predprobs*(1-H)
# Calculate changepoint (reset) probabilities
R[1,t+1] <- sum(R[1:t,t]*predprobs*H)
# Renormalize
R[,t+1] <- R[,t+1] / sum(R[,t+1])
# Update parameters for each possible run length
# keep the past ones since they will be used iteratively
muT0 <- c(mu0, (kappaT*muT + x[t])/(kappaT+1))
kappaT0 <- c(kappa0,kappaT+1)
alphaT0 <- c(alpha0, alphaT + 0.5)
betaT0 <- c(beta0, kappaT + (kappaT * (x[t]-muT)^2)/(2*(kappaT+1)))
muT <- muT0
kappaT <- kappaT0
alphaT <- alphaT0
betaT <- betaT0
# Store the maximum, to plot later
maxes[t] <- which.max(R[,t])
}
# Plot results
par(mfrow=c(2,1))
plot(x, type='l')
image((-t(log(R))), col = grey(seq(0,1,length=256)), axes=T)
par(new=T)
plot(1:(dim(R)[1]-1), maxes,type='l', col="red")
On the top there is the original data. On the bottom, the probability of a current run to have length y. The red line on the bottom should fit the dark shades.
(to be deleted. It does not work. I leave it temporaly to save the comments.)
I got it, I thought I had already tried par(new=T) but obviously I hadn't:
m <- matrix(rnorm(100,1,1),50,50)
image(m, col = grey(seq(0,1,length=256)))
par(new=T)
plot(seq(0,1, length=50), type='l', col="red", lwd=5)
Quick example simulating the whole process:
data <- vector()
for(i in 1:50){
data <- rbind(data, dpois(1:50, i^1.2))
}
maxes <- apply(data, 1, which.max)
image(-data, col = grey(seq(0,1,length=256)))
par(new=T)
plot(1:dim(data)[1], c(maxes),type='l')

Adding two random variables via convolution in R

I would like to compute the convolution of two probability distributions in R and I need some help. For the sake of simplicity, let's say I have a variable x that is normally distributed with mean = 1.0 and stdev = 0.5, and y that is log-normally distributed with mean = 1.5 and stdev = 0.75. I want to determine z = x + y. I understand that the distribution of z is not known a priori.
As an aside the real world example I am working with requires addition to two random variables that are distributed according to a number of different distributions.
Does anyone know how to add two random variables by convoluting the probability density functions of x and y?
I have tried generating n normally distributed random values (with above parameters) and adding them to n log-normally distributed random values. However, I wish to know if I can use the convolution method instead. Any help would be greatly appreciated.
EDIT
Thank you for these answers. I define a pdf, and try to do the convolution integral, but R complains on the integration step. My pdfs are Log Pearson 3 and are as follows
dlp3 <- function(x, a, b, g) {
p1 <- 1/(x*abs(b) * gamma(a))
p2 <- ((log(x)-g)/b)^(a-1)
p3 <- exp(-1* (log(x)-g) / b)
d <- p1 * p2 * p3
return(d)
}
f.m <- function(x) dlp3(x,3.2594,-0.18218,0.53441)
f.s <- function(x) dlp3(x,9.5645,-0.07676,1.184)
f.t <- function(z) integrate(function(x,z) f.s(z-x)*f.m(x),-Inf,Inf,z)$value
f.t <- Vectorize(f.t)
integrate(f.t, lower = 0, upper = 3.6)
R complains at the last step since the f.t function is bounded and my integration limits are probably not correct. Any ideas on how to solve this?
Here is one way.
f.X <- function(x) dnorm(x,1,0.5) # normal (mu=1.5, sigma=0.5)
f.Y <- function(y) dlnorm(y,1.5, 0.75) # log-normal (mu=1.5, sigma=0.75)
# convolution integral
f.Z <- function(z) integrate(function(x,z) f.Y(z-x)*f.X(x),-Inf,Inf,z)$value
f.Z <- Vectorize(f.Z) # need to vectorize the resulting fn.
set.seed(1) # for reproducible example
X <- rnorm(1000,1,0.5)
Y <- rlnorm(1000,1.5,0.75)
Z <- X + Y
# compare the methods
hist(Z,freq=F,breaks=50, xlim=c(0,30))
z <- seq(0,50,0.01)
lines(z,f.Z(z),lty=2,col="red")
Same thing using package distr.
library(distr)
N <- Norm(mean=1, sd=0.5) # N is signature for normal dist
L <- Lnorm(meanlog=1.5,sdlog=0.75) # same for log-normal
conv <- convpow(L+N,1) # object of class AbscontDistribution
f.Z <- d(conv) # distribution function
hist(Z,freq=F,breaks=50, xlim=c(0,30))
z <- seq(0,50,0.01)
lines(z,f.Z(z),lty=2,col="red")
I was having trouble getting integrate() to work for different density parameters, so I came up with an alternative to #jlhoward's using Riemann approximation:
set.seed(1)
#densities to be convolved. could also put these in the function below
d1 <- function(x) dnorm(x,1,0.5) #
d2 <- function(y) dlnorm(y,1.5, 0.75)
#Riemann approximation of convolution
conv <- function(t, a, b, d) { #a to b needs to cover the range of densities above. d needs to be small for accurate approx.
z <- NA
x <- seq(a, b, d)
for (i in 1:length(t)){
print(i)
z[i] <- sum(d1(x)*d2(t[i]-x)*d)
}
return(z)
}
#check against sampled convolution
X <- rnorm(1000, 1, 0.5)
Y <- rlnorm(1000, 1.5, 0.75)
Z <- X + Y
t <- seq(0, 50, 0.05) #range to evaluate t, smaller increment -> smoother curve
hist(Z, breaks = 50, freq = F, xlim = c(0,30))
lines(t, conv(t, -100, 100, 0.1), type = "s", col = "red")

Resources