Calculate 5th quantile of curve generated from vectors of X, Y points - r

I have these curves below:
These curves were generated using a library called discreteRV.
library(discreteRV)
placebo.rate <- 0.5
mmm.rate <- 0.3
mmm.power <- power.prop.test(p1 = placebo.rate, p2 = mmm.rate, power = 0.8, alternative = "one.sided")
n <- as.integer(ceiling(mmm.power$n))
patients <- seq(from = 0, to = n, by = 1)
placebo_distribution <- dbinom(patients, size = n, prob = placebo.rate)
mmm_distribution <- dbinom(patients, size = n, prob = mmm.rate)
get_pmf <- function(p1, p2) {
X1 <- RV(patients,p1, fractions = F)
X2 <- RV(patients,p2, fractions = F)
pmf <- joint(X1, X2, fractions = F)
return(pmf)
}
extract <- function(string) {
ints <- unlist(strsplit(string,","))
x1 <- as.integer(ints[1])
x2 <- as.integer(ints[2])
return(x1-x2)
}
diff_prob <- function(pmf) {
diff <- unname(sapply(outcomes(pmf),FUN = extract)/n)
probabilities <- unname(probs(pmf))
df <- data.frame(diff,probabilities)
df <- aggregate(. ~ diff, data = df, FUN = sum)
return(df)
}
most_likely_rate <- function(x) {
x[which(x$probabilities == max(x$probabilities)),]$diff
}
mmm_rate_diffs <- diff_prob(get_pmf(mmm_distribution,placebo_distribution))
placebo_rate_diffs <- diff_prob(get_pmf(placebo_distribution,placebo_distribution))
plot(mmm_rate_diffs$diff,mmm_rate_diffs$probabilities * 100, type = "l", lty = 2, xlab = "Rate difference", ylab = "# of trials per 100", main = paste("Trials with",n,"patients per treatment arm",sep = " "))
lines(placebo_rate_diffs$diff, placebo_rate_diffs$probabilities * 100, lty = 1, xaxs = "i")
abline(v = c(most_likely_rate(placebo_rate_diffs), most_likely_rate(mmm_rate_diffs)), lty = c(1,2))
legend("topleft", legend = c("Alternative hypothesis", "Null hypothesis"), lty = c(2,1))
Basically, I took two binomial discrete random variables, created a joint probability mass function, determined the probability of any given rate difference then plotted them to demonstrate a distribution of those rate differences if the null hypothesis was true or if the alternative hypothesis was true over 100 identical trials.
Now I want to illustrate the 5% percentile on the null hypothesis curve. Unfortunately, I don't know how to do this. If I simply use quantile(x = placebo_rate_diffs$diff, probs = 0.05, I get -0.377027. This can't be correct looking at the graph. I want to calculate the 5th percentile like I would using pbinom() but I don't know how to do that with a graph created from essentially what are just x and y vectors.
Maybe I can approximate these two curves as binomial since they appear to be, but I am still not sure how to do this.
Any help would be appreciated.

Related

Plotting with GLMMadaptive for zero-inflated, semi-continuous data?

I'm trying to utilize the effectPlotData as described here: https://cran.r-project.org/web/packages/GLMMadaptive/vignettes/Methods_MixMod.html
But, I'm trying to apply it to a model (two-part mixed model for zero-inflated semi-continuous data) that includes random/fixed effects for both a linear and logistic portion (hurdle lognormal). I get the following error:
'Error in Qs[1, ] : incorrect number of dimensions'
Which, I think is from having more than one set of random/fixed effect outcomes, but if anyone else has come across this error or can advise, it would be appreciated! I've tried changing the terms in the new data frame and tried a couple of different options with length.out (attempted this as number of subjects and then number of total observations across all subjects), but get the same error each time.
Code below, specifies the model into m and new data frame into nDF:
m = mixed_model(Y~X, random = ~1|Subject,
data = data_combined_temp_Fix_Num3,
family = hurdle.lognormal,
n_phis = 1, zi_fixed = ~X , zi_random = ~1|Subject,
na.action = na.exclude)
nDF <- with(data_combined_temp_Fix_Num3,
expand.grid(X = seq(min(X), max(X), length.out = 908),
Y = levels(Y)))
effectPlotData(m, nDF)
It seems to work for with the following example:
library("GLMMadaptive")
set.seed(1234)
n <- 100 # number of subjects
K <- 8 # number of measurements per subject
t_max <- 5 # maximum follow-up time
# we constuct a data frame with the design:
# everyone has a baseline measurment, and then measurements at random follow-up times
DF <- data.frame(id = rep(seq_len(n), each = K),
time = c(replicate(n, c(0, sort(runif(K - 1, 0, t_max))))),
sex = rep(gl(2, n/2, labels = c("male", "female")), each = K))
# design matrices for the fixed and random effects non-zero part
X <- model.matrix(~ sex * time, data = DF)
Z <- model.matrix(~ time, data = DF)
# design matrices for the fixed and random effects zero part
X_zi <- model.matrix(~ sex, data = DF)
Z_zi <- model.matrix(~ 1, data = DF)
betas <- c(-2.13, -0.25, 0.24, -0.05) # fixed effects coefficients non-zero part
sigma <- 0.5 # standard deviation error terms non-zero part
gammas <- c(-1.5, 0.5) # fixed effects coefficients zero part
D11 <- 0.5 # variance of random intercepts non-zero part
D22 <- 0.1 # variance of random slopes non-zero part
D33 <- 0.4 # variance of random intercepts zero part
# we simulate random effects
b <- cbind(rnorm(n, sd = sqrt(D11)), rnorm(n, sd = sqrt(D22)), rnorm(n, sd = sqrt(D33)))
# linear predictor non-zero part
eta_y <- as.vector(X %*% betas + rowSums(Z * b[DF$id, 1:2, drop = FALSE]))
# linear predictor zero part
eta_zi <- as.vector(X_zi %*% gammas + rowSums(Z_zi * b[DF$id, 3, drop = FALSE]))
# we simulate log-normal longitudinal data
DF$y <- exp(rnorm(n * K, mean = eta_y, sd = sigma))
# we set the zeros from the logistic regression
DF$y[as.logical(rbinom(n * K, size = 1, prob = plogis(eta_zi)))] <- 0
###############################################################################
km1 <- mixed_model(y ~ sex * time, random = ~ 1 | id, data = DF,
family = hurdle.lognormal(),
zi_fixed = ~ sex)
km1
nDF <- with(DF, expand.grid(time = seq(min(time), max(time), length.out = 15),
sex = levels(sex)))
plot_data <- effectPlotData(km1, nDF)
library("lattice")
xyplot(pred + low + upp ~ time | sex, data = plot_data,
type = "l", lty = c(1, 2, 2), col = c(2, 1, 1), lwd = 2,
xlab = "Follow-up time", ylab = "")
local({
km1$Funs$mu_fun <- function (eta) {
pmax(exp(eta + 0.5 * exp(2 * km1$phis)), .Machine$double.eps)
}
km1$family$linkfun <- function (mu) log(mu)
plot_data <- effectPlotData(km1, nDF)
xyplot(exp(pred) + exp(low) + exp(upp) ~ time | sex, data = plot_data,
type = "l", lty = c(1, 2, 2), col = c(2, 1, 1), lwd = 2,
xlab = "Follow-up time", ylab = "")
})
In case someone comes across the same error, I was filtering data from my data frame within the model -- which caused the dimensions of the model and the variable from the data frame to not match. I applied the same filtering to the new data frame (I've also moved forward with a completely new data frame that only includes trials that are actually used by the model so that no filtering has to be used at any step).
m = mixed_model(Y~X, random = ~1|Subject,
data = data_combined_temp_Fix_Num3[data_combined_temp_Fix_Num3$Z>=4 &
data_combined_temp_Fix_Num3$ZZ>= 4,],
family = hurdle.lognormal,
n_phis = 1, zi_fixed = ~X , zi_random = ~1|Subject,
na.action = na.exclude)`
nDF <- with(data_combined_temp_Fix_Num3,
expand.grid(X = seq(min(X[data_combined_temp_Fix_Num3$Z>= 4 &
data_combined_temp_Fix_Num3$ZZ>= 4])),
max(X[data_combined_temp_Fix_Num3$Z>= 4 &
data_combined_temp_Fix_Num3$ZZ>= 4])), length.out = 908),
Y = levels(Y)))`
effectPlotData(m, nDF)

Adding probability curve on top of density histogram

I am trying to add a probability curve on top of the histogram but it does not seem to work. For example
I wanted to add a probability line on the right side so I could show the density on the left and probability of happening on the right.
The code that I have done
x <- Delays_weather0$dif
h<-hist(x, breaks=10, col="red", xlab="Delays",
main="Flight Delays")
and the probability curve that I want to add on
my <- pnorm(-18:265, mean = mean(Delays_weather0$dif), sd = sd(Delays_weather0$dif), lower.tail = FALSE)
plot(my, type = "l")
I hope this is understandable
We don't have access to the Delays_weather0 dataset. Hence, I'll use the 1st 100 observations on dep_delay of the flights dataset, provided in the nycflights13 package.
Since the histogram in R by default plots the frequency, I'll multiply the probabilities by the number of observations, i.e. 1000 to make the two graph comparable.
I'm using the lines function at first.
library(nycflights13)
dataset <- flights$dep_delay[1:1000]
hist(x = dataset,
breaks = 10,
col = "red",
xlab = "Delays",
main = "Flight Delays")
range_dataset <- range(dataset,
na.rm = TRUE)
equidistant_points_in_range <- seq(from = range_dataset[1],
to = range_dataset[2],
length.out = length(x = dataset))
upper_cdf_probabilities <- pnorm(q = equidistant_points_in_range,
mean = mean(x = dataset,
na.rm = TRUE),
sd = sd(x = dataset,
na.rm = TRUE),
lower.tail = FALSE)
lines(x = length(x = dataset) * upper_cdf_probabilities,
col = "blue")
Created on 2019-03-17 by the reprex package (v0.2.1)
Another way using the curve function.
dataset <- nycflights13::flights$dep_delay[1:1000]
range_dataset <- range(dataset,
na.rm = TRUE)
upper_tail_probability <- function(x)
{
pnorm(q = x,
mean = mean(x = dataset,
na.rm = TRUE),
sd = sd(x = dataset,
na.rm = TRUE),
lower.tail = FALSE)
}
vectorized_upper_tail_probability <- Vectorize(FUN = upper_tail_probability)
hist(x = dataset,
freq = FALSE,
col = "red",
xlab = "Delays",
main = "Flight Delays")
curve(expr = vectorized_upper_tail_probability,
from = range_dataset[1],
to = range_dataset[2],
n = 1000,
add = TRUE,
col = "blue")
Created on 2019-03-17 by the reprex package (v0.2.1)
Following up #yarnabrina's reproducible example to (1) use a kernel density estimator rather than assuming Normality, (2) put a probability axis on the right side:
library(nycflights13)
npts <- 1000
dataset <- flights$dep_delay[1:npts]
par(las=1,bty="l", ## cosmetic
mar=c(5,4,2,5)) ## expand R margin to make room for second axis
h0 <- hist(x = dataset,
breaks=100,
col = "red",
xlab = "Delay (min)",
ylab="",
main="",
xlim=c(-50,200)) ## cosmetic: leave out a few extreme values
## put axis label at *top* of axis
mtext(side=2,at=550,"Frequency")
## compute kernel density estimate
dd <- density(na.omit(dataset), adjust=3)
dx <- diff(h0$mids)[1] ## histogram bin width
## scale density to match count vales
lines(dd$x,dd$y*npts*dx,lwd=2,col="blue")
## set up auxiliary axis
dbrks <- seq(0,0.05,by=0.01)
axis(side=4,at=dbrks*npts*dx,label=dbrks)
mtext(side=4,at=550,"Probability") ## axis label

After fitting the cumulative distribution in R creating the normal distribution from fitted parameters

After successfully fitting my cumulative data with Gompertz function, I need to create normal distribution from fitted function.
This is the code so far:
df <- data.frame(x = c(0.01,0.011482,0.013183,0.015136,0.017378,0.019953,0.022909,0.026303,0.0302,0.034674,0.039811,0.045709,0.052481,0.060256,0.069183,0.079433,0.091201,0.104713,0.120226,0.138038,0.158489,0.18197,0.20893,0.239883,0.275423,0.316228,0.363078,0.416869,0.47863,0.549541,0.630957,0.724436,0.831764,0.954993,1.096478,1.258925,1.44544,1.659587,1.905461,2.187762,2.511886,2.884031,3.311311,3.801894,4.365158,5.011872,5.754399,6.606934,7.585776,8.709636,10,11.481536,13.182567,15.135612,17.378008,19.952623,22.908677,26.30268,30.199517,34.673685,39.810717,45.708819,52.480746,60.255959,69.183097,79.432823,91.201084,104.712855,120.226443,138.038426,158.489319,181.970086,208.929613,239.883292,275.42287,316.227766,363.078055,416.869383,478.630092,549.540874,630.957344,724.43596,831.763771,954.992586,1096.478196),
y = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.00044816,0.00127554,0.00221488,0.00324858,0.00438312,0.00559138,0.00686054,0.00817179,0.00950625,0.01085188,0.0122145,0.01362578,0.01514366,0.01684314,0.01880564,0.02109756,0.0237676,0.02683182,0.03030649,0.0342276,0.03874555,0.04418374,0.05119304,0.06076553,0.07437854,0.09380666,0.12115065,0.15836926,0.20712933,0.26822017,0.34131335,0.42465413,0.51503564,0.60810697,0.69886817,0.78237651,0.85461023,0.91287236,0.95616228,0.98569093,0.99869001,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999))
library(drc)
fm <- drm(y ~ x, data = df, fct = G.3())
options(scipen = 10) #to avoid scientific notation in x axis
plot(df$x, predict(fm),type = "l", log = "x",col = "blue",
main = "Cumulative function distribution",xlab = "x", ylab = "y")
points(df,col = "red")
legend("topleft", inset = .05,legend = c("exp","fit")
,lty = c(NA,1), col = c("red", "blue"), pch = c(1,NA), lwd=1, bty = "n")
summary(fm)
And this is the following plot:
My idea is now to transform somehow this cumulative fit to the normal distribution. Is there any idea how could I do that?
While your original intention might be non-parametric, I suggest using parametric estimation method: method of moments, which is widely used for problems like this, because you have a certain parametric distribution (normal distribution) to fit. The idea is quite simple, from the fitted cumulative distribution function, you can calculate the mean (E1 in my code) and variance (square of SD in my code), and then the problem is solved, because normal distribution can be totally determined by mean and variance.
df <- data.frame(x=c(0.01,0.011482,0.013183,0.015136,0.017378,0.019953,0.022909,0.026303,0.0302,0.034674,0.039811,0.045709,0.052481,0.060256,0.069183,0.079433,0.091201,0.104713,0.120226,0.138038,0.158489,0.18197,0.20893,0.239883,0.275423,0.316228,0.363078,0.416869,0.47863,0.549541,0.630957,0.724436,0.831764,0.954993,1.096478,1.258925,1.44544,1.659587,1.905461,2.187762,2.511886,2.884031,3.311311,3.801894,4.365158,5.011872,5.754399,6.606934,7.585776,8.709636,10,11.481536,13.182567,15.135612,17.378008,19.952623,22.908677,26.30268,30.199517,34.673685,39.810717,45.708819,52.480746,60.255959,69.183097,79.432823,91.201084,104.712855,120.226443,138.038426,158.489319,181.970086,208.929613,239.883292,275.42287,316.227766,363.078055,416.869383,478.630092,549.540874,630.957344,724.43596,831.763771,954.992586,1096.478196),
y=c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.00044816,0.00127554,0.00221488,0.00324858,0.00438312,0.00559138,0.00686054,0.00817179,0.00950625,0.01085188,0.0122145,0.01362578,0.01514366,0.01684314,0.01880564,0.02109756,0.0237676,0.02683182,0.03030649,0.0342276,0.03874555,0.04418374,0.05119304,0.06076553,0.07437854,0.09380666,0.12115065,0.15836926,0.20712933,0.26822017,0.34131335,0.42465413,0.51503564,0.60810697,0.69886817,0.78237651,0.85461023,0.91287236,0.95616228,0.98569093,0.99869001,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999))
library(drc)
fm <- drm(y ~ x, data = df, fct = G.3())
options(scipen = 10) #to avoid scientific notation in x axis
plot(df$x, predict(fm),type="l", log = "x",col="blue", main="Cumulative distribution function",xlab="x", ylab="y")
points(df,col="red")
E1 <- sum((df$x[-1] + df$x[-length(df$x)]) / 2 * diff(predict(fm)))
E2 <- sum((df$x[-1] + df$x[-length(df$x)]) ^ 2 / 4 * diff(predict(fm)))
SD <- sqrt(E2 - E1 ^ 2)
points(df$x, pnorm((df$x - E1) / SD), col = "green")
legend("topleft", inset = .05,legend= c("exp","fit","method of moment")
,lty = c(NA,1), col = c("red", "blue", "green"), pch = c(1,NA), lwd=1, bty="n")
summary(fm)
And the estimation results:
## > E1 (mean of fitted normal distribution)
## [1] 65.78474
## > E2 (second moment of fitted normal distribution)
##[1] 5792.767
## > SD (standard deviation of fitted normal distribution)
## [1] 38.27707
## > SD ^ 2 (variance of fitted normal distribution)
## [1] 1465.134
Edit: updated method to calculate moments from cdf fitted by drc. The function moment defined below calculates moment estimation using the moment formula for continuous r.v. E(X ^ k) = k * \int x ^ {k - 1} (1 - cdf(x)) dx. These are the best estimation I can get from the fitted cdf. And the fit is not very good when x is near zero because of the reason in original datasets as I discussed in comments.
df <- data.frame(x=c(0.01,0.011482,0.013183,0.015136,0.017378,0.019953,0.022909,0.026303,0.0302,0.034674,0.039811,0.045709,0.052481,0.060256,0.069183,0.079433,0.091201,0.104713,0.120226,0.138038,0.158489,0.18197,0.20893,0.239883,0.275423,0.316228,0.363078,0.416869,0.47863,0.549541,0.630957,0.724436,0.831764,0.954993,1.096478,1.258925,1.44544,1.659587,1.905461,2.187762,2.511886,2.884031,3.311311,3.801894,4.365158,5.011872,5.754399,6.606934,7.585776,8.709636,10,11.481536,13.182567,15.135612,17.378008,19.952623,22.908677,26.30268,30.199517,34.673685,39.810717,45.708819,52.480746,60.255959,69.183097,79.432823,91.201084,104.712855,120.226443,138.038426,158.489319,181.970086,208.929613,239.883292,275.42287,316.227766,363.078055,416.869383,478.630092,549.540874,630.957344,724.43596,831.763771,954.992586,1096.478196),
y=c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.00044816,0.00127554,0.00221488,0.00324858,0.00438312,0.00559138,0.00686054,0.00817179,0.00950625,0.01085188,0.0122145,0.01362578,0.01514366,0.01684314,0.01880564,0.02109756,0.0237676,0.02683182,0.03030649,0.0342276,0.03874555,0.04418374,0.05119304,0.06076553,0.07437854,0.09380666,0.12115065,0.15836926,0.20712933,0.26822017,0.34131335,0.42465413,0.51503564,0.60810697,0.69886817,0.78237651,0.85461023,0.91287236,0.95616228,0.98569093,0.99869001,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999))
library(drc)
fm <- drm(y ~ x, data = df, fct = G.3())
moment <- function(k){
f <- function(x){
x ^ (k - 1) * pmax(0, 1 - predict(fm, data.frame(x = x)))
}
k * integrate(f, lower = min(df$x), upper = max(df$x))$value
}
E1 <- moment(1)
E2 <- moment(2)
SD <- sqrt(E2 - E1 ^ 2)
I was thinking of the cumdiff (for lack of a better term). The link helped a lot.
EDIT
plot(df$x[-1], Mod(df$y[-length(df$y)]-df$y[-1]), log = "x", type = "b",
main = "Normal distribution for original data",
xlab = "x", ylab = "y")
yielding:
ADDITION
In order to get the Gaussian from the fittedfunction:
df$y_pred<-predict(fm)
plot(df$x[-1], Mod(df$y_pred[-length(df$y_pred)]-df$y_pred[-1]), log = "x",
type = "b", main="Normal distribution for fitted function",
xlab = "x", lab = "y")
yielding:

Plotting the CDF and Quantile Functions Given the PDF

How would I plot the CDF and Quantile functions, in R, if I have the PDF. Currently, I have the following (but I think there must be a better way to do it):
## Probability Density Function
p <- function(x) {
result <- (x^2)/9
result[x < 0 | x > 3] <- 0
result
}
plot(p, xlim = c(0,3), main="Probability Density Function")
## Cumulative Distribution Function
F <- function(a = 0,b){
result <- ((b^3)/27) - ((a^3)/27)
result[a < 0 ] <- 0
result[b > 3] <- 1
result
}
plot(F(,x), xlim=c(0,3), main="Cumulative Distribution Function")
## Quantile Function
Finv <- function(p) {
3*x^(1/3)
}
As #dash2 suggested, the CDF would need you to integrate the PDF, in essence needing you to find the area under the curve.
Here's a generic solution which should help. I am using a gaussian distribution as an example - you should be able to feed to it any generic function.
Note that quantiles reported are approximations only. Also, dont forget to look into the documentation for integrate().
# CDF Function
CDF <- function(FUNC = p, plot = T, area = 0.5, LOWER = -10, UPPER = 10, SIZE = 1000){
# Create data
x <- seq(LOWER, UPPER, length.out = SIZE)
y <- p(x)
area.vec <- c()
area.vec[1] <- 0
for(i in 2:length(x)){
x.vec <- x[1:i]
y.vec <- y[1:i]
area.vec[i] = integrate(p, lower = x[1], upper = x[i])$value
}
# Quantile
quantile = x[which.min(abs(area.vec - area))]
# Plot if requested
if(plot == TRUE){
# PDF
par(mfrow = c(1, 2))
plot(x, y, type = "l", main = "PDF", col = "indianred", lwd = 2)
grid()
# CDF
plot(x, area.vec, type = "l", main = "CDF", col = "slateblue",
xlab = "X", ylab = "CDF", lwd = 2)
# Quantile
mtext(text = paste("Quantile at ", area, "=",
round(quantile, 3)), side = 3)
grid()
par(mfrow = c(1, 1))
}
}
# Sample data
# PDF Function - Gaussian distribution
p <- function(x, SD = 1, MU = 0){
y <- (1/(SD * sqrt(2*pi)) * exp(-0.5 * ((x - MU)/SD) ^ 2))
return(y)
}
# Call to function
CDF(p, area = 0.5, LOWER = -5, UPPER = 5)

Drawing a regression surface with an interaction in a 3D figure in R

Using car::scatter3d(), I am trying to create a 3D figure with a regression surface indicating an interaction between a categorical and a continuous variable. Partly following the code here, I obtained a figure below.
The figure is obviously wrong in that the regression surface does not reach one of the values of the categorical variable. The problem perhaps lies in the use of the rgl::persp3d() (the last block of the code below), but I have not been able to identify what exactly I'm doing wrongly. Could someone let me know what I'm missing and how to fix the problem?
library(rgl)
library(car)
n <- 100
set.seed(1)
x <- runif(n, 0, 10)
set.seed(1)
z <- sample(c(0, 1), n, replace = TRUE)
set.seed(1)
y <- 0.5 * x + 0.1 * z + 0.3 * x * z + rnorm(n, sd = 1.5)
d <- data.frame(x, z, y)
scatter3d(y ~ x + z, data = d,
xlab = "continuous", zlab = "categorical", ylab = "outcome",
residuals = FALSE, surface = FALSE
)
d2 <- d
d2$x <- d$x / (max(d$x) - min(d$x))
d2$y <- d$y / (max(d$y) - min(d$y))
mod <- lm(y ~ x * z, data = d2)
grd <- expand.grid(x = unique(d2$x), z = unique(d2$z))
grd$pred <- predict(mod, newdata = grd)
grd <- grd[order(grd$z, grd$x), ]
# The problem is likely to lie somewhere below.
persp3d(x = unique(grd$x), y = unique(grd$z),
z = matrix(grd$pred, length(unique(grd$z)), length(unique(grd$x))),
alpha = 0.5,
col = "blue",
add = TRUE,
xlab = "", ylab = "", zlab = ""
)
I prefer sticking to car::scatter3d() in drawing the original graph because I already made several figures with car::scatter3d() and want to make this figure consistent with them as well.

Resources