Fitting a 3D surface to a dataset of points [R] - r

I have been trying to fit a polynomial surface to a set of point with 3 coordinates.
Let the data be:
DATA <- with(mtcars, as.data.frame(cbind(1:32, wt,disp,mpg)))
I have been trying to draw a surface using:
plot3d from rgl package,
using rsm package,
scatterplot3d package.
For example:
library(scatterplot3d)
attach(mtcars)
DATA <- as.data.frame(cbind(1:32, wt,disp,mpg))
scatterplot3d(wt,disp,mpg, main="3D Scatterplot")
model <- loess(mpg ~wt + disp, data=DATA)
x <-range(DATA$wt)
x <- seq(x[1], x[2], length.out=50)
y <- range(DATA$disp)
y <- seq(y[1], y[2], length.out=50)
z <- outer(x,y,
function(wt,disp)
predict(model, data.frame(wt,disp)))
z
p <- persp(x,y,z, theta=30, phi=30,
col="lightblue",expand = 0.5,shade = 0.2,
xlab="wt", ylab="disp", zlab="mpg")
I have also tried using surf.ls function:
surf.ls(2,DATA[,2],DATA[,3],DATA[,4])
But what I got looks like this:
I don't really know how to transform it to a 3D plot and more importantly, how to get the formula for the best fit surface obtained.
I would really appreciate your help.
PS I have deleted my last post and included more details in this one.

Try this:
attach(mtcars)
DATA <- as.data.frame(cbind(1:32, wt,disp,mpg))
x_wt <- DATA$wt
y_disp <- DATA$disp
z_mpg <- DATA$mpg
fit <- lm(z_mpg ~ poly(x_wt, y_disp, degree = 2), data = DATA)
To plot with rsm, use the following:
library(rsm)
image(fit, y_disp ~ x_wt)
contour(fit, y_disp ~ x_wt)
persp(fit, y_disp ~ x_wt, zlab = "z_mpg")
To plot with ggplot, use the following:
## ggplot
# Use rsm package to create surface model.
library(rsm)
SurfMod <- contour(fit, y_disp ~ x_wt)
# extract list values from rsm Surface Model
Xvals <- SurfMod$`x_wt ~ y_disp`[1]
Yvals <- SurfMod$`x_wt ~ y_disp`[2]
Zvals <- SurfMod$`x_wt ~ y_disp`[3]
# Construct matrix with col and row names
SurfMatrix <- Zvals$z
colnames(SurfMatrix) <- Yvals$y
rownames(SurfMatrix) <- Xvals$x
# Convert matrix to data frame
library(reshape2)
SurfDF <- melt(SurfMatrix)
library(ggplot2)
gg <- ggplot(data = SurfDF) +
geom_tile(data = SurfDF, aes(Var1, Var2,z = value, fill = value)) +
stat_contour(data = SurfDF, aes(Var1, Var2, z = value, color = ..level..)) +
scale_colour_gradient(low = "green", high = "red") +
geom_point(data = DATA, aes(wt, disp, z = mpg, color = mpg)) +
geom_text(data = DATA, aes(wt, disp,label=mpg),hjust=0, vjust=0) +
scale_fill_continuous(name="mpg") +
xlab("x_wt") +
ylab("y_disp")
library(directlabels)
direct.label.ggplot(gg, "angled.endpoints")
To see all of the available direct.label methods, go to http://directlabels.r-forge.r-project.org/docs/index.html

Related

Adding loess regresion line on a hexbin plot

I have been trying to find method to add a loess regression line on a hexbin plot. So far I do not have any success... Any suggestions?
My code is as follow:
bin<-hexbin(Dataset$a, Dataset$b, xbins=40)
plot(bin, main="Hexagonal Binning",
xlab = "a", ylab = "b",
type="l")
I would suggest using ggplot2 to build the plot.
Since you didn't include any example data, I've used the palmerpenguins package dataset for the example below.
library(palmerpenguins) # For the data
library(ggplot2) # ggplot2 for plotting
ggplot(penguins, aes(x = body_mass_g,
y = bill_length_mm)) +
geom_hex(bins = 40) +
geom_smooth(method = 'loess', se = F, color = 'red')
Created on 2021-01-05 by the reprex package (v0.3.0)
I don't have a solution for base, but it's possible to do this with ggplot. It should be possible with base too, but if you look at the documentation for ?hexbin, you can see the quote:
Note that when plotting a hexbin object, the grid package is used. You must use its graphics (or those from package lattice if you know how) to add to such plots.
I'm not familiar with how to modify these. I did try ggplotify to convert the base to ggplot and edit that way, but couldn't get the loess line added to the plot window properly.
So here is a solution with ggplot with some fake data that you can try on your Datasets:
library(hexbin)
library(ggplot2)
# fake data with a random walk, replace with your data
set.seed(100)
N <- 1000
x <- rnorm(N)
x <- sort(x)
y <- vector("numeric", length=N)
for(i in 2:N){
y[i] <- y[i-1] + rnorm(1, sd=0.1)
}
# current method
# In documentation for ?hexbin it says:
# "You must use its graphics (or those from package lattice if you know how) to add to such plots."
(bin <- hexbin(x, y, xbins=40))
plot(bin)
# ggplot option. Can play around with scale_fill_gradient to
# get the colour scale similar or use other ggplot options
df <- data.frame(x=x, y=y)
d <- ggplot(df, aes(x, y)) +
geom_hex(bins=40) +
scale_fill_gradient(low = "grey90", high = "black") +
theme_bw()
d
# easy to add a loess fit to the data
# span controls the degree of smoothing, decrease to make the line
# more "wiggly"
model <- loess(y~x, span=0.2)
fit <- predict(model)
loess_data <- data.frame(x=x, y=fit)
d + geom_line(data=loess_data, aes(x=x, y=y), col="darkorange",
size=1.5)
Here are two options; you will need to decide if you want to smooth over the raw data or the binned data.
library(hexbin)
library(grid)
# Some data
set.seed(101)
d <- data.frame(x=rnorm(1000))
d$y <- with(d, 2*x^3 + rnorm(1000))
Method A - binned data
# plot hexbin & smoother : need to grab plot viewport
# From ?hexVP.loess : "Fit a loess line using the hexagon centers of mass
# as the x and y coordinates and the cell counts as weights."
bin <- hexbin(d$x, d$y)
p <- plot(bin)
hexVP.loess(bin, hvp = p$plot.vp, span = 0.4, col = "red", n = 200)
Method B - raw data
# calculate loess predictions outside plot on raw data
l = loess(y ~ x, data=d, span=0.4)
xp = with(d, seq(min(x), max(x), length=200))
yp = predict(l, xp)
# plot hexbin
bin <- hexbin(d$x, d$y)
p <- plot(bin)
# add loess line
pushHexport(p$plot.vp)
grid.lines(xp, yp, gp=gpar(col="red"), default.units = "native")
upViewport()

Plotting multiple lm() models in one plot

I have fitted 6 lm() models and 1 gam() model on the same dataset.
Now I want to plot them all in one plot on top of each other. Can I do this without defining the models again in ggplot?
My case is this
I have
model1 <- lm(y~1, data = data) %>% coef()
model2 <- lm(y~x, data = data) %>% coef()
model3 <- lm(y~abs(x), data = data) %>% coef()
...
model7 <- gam(y~s(x), data = data) %>% coef()
can I feed the stored coefficients of my models to ggplot?
ggplot(data, mapping = aes(x = x, y = y)) +
geom_point() +
geom_abline(model1) +
geom_abline(model2) +
....
Or do Is the only way to plot the model prediction lines to manualy fill out the parameters like this:
ggplot(data, mapping = aes(x = x, y = y)) +
geom_point() +
geom_abline(intercept = model1[1]) +
geom_abline(slope = model2[2], intercept = model2[1]) +
geom_abline(slope = model3[2], intercept = model3[1]) +
...
Example code
set.seed(123)
x <- rnorm(50)
y <- rweibull(50,1)
d <- as.data.frame(cbind(x,y))
model1 <- coef(lm(y~1, data = d))
model2 <- coef(lm(y~x, data = d))
model3 <- coef(lm(y~abs(x), data = d))
Including the SE for each line/model and a legend would be welcome as well.
In order for this to work, you really need to save the whole model. So if we assume you have the entire model
# set.seed(101) used for sample data
model1 <- lm(y~1, data = d)
model2 <- lm(y~x, data = d)
model3 <- lm(y~abs(x), data = d)
We can write a helper function to predict new values from these models over a the given range of x values. Here's such a function
newvalsforx <- function(x) {
xrng <- seq(min(x), max(x), length.out=100)
function(m) data.frame(x=xrng, y=predict(m, data.frame(x=xrng)))
}
pred <- newvals(d$x)
This pred() will make predictions from the models over the observed range of x. We can then use these as new data to pass to geom_lines that we can add to a plot. For example
ggplot(d, aes(x,y)) +
geom_point() +
geom_line(data=pred(model1), color="red") +
geom_line(data=pred(model2), color="blue") +
geom_line(data=pred(model3), color="green")
This gives me

visreg: overlay two models in a single plot

I'm trying to draw in a single plot crude and adjusted GAM models using library visreg:
# Create DF
set.seed(123)
x1 = rnorm(2000)
z = 1 + 3*x1 + 3*exp(x1)
pr = 1/(1+exp(-z))
y = rbinom(2000,1,pr)
df = data.frame(y=y,x1=x1, x2=exp(x1)*z)
# Fitting GAMs
library(mgcv)
crude <- gam(y ~ s(x1), family=binomial(link=logit), data=df)
adj <- gam(y ~ s(x1) + s(x2), family=binomial(link=logit), data=df)
# Plot results using 'visreg'
library(visreg)
p.crude <- visreg(crude, scale='response', "x1", line.par = list(col = 'red'), gg=TRUE) + theme_bw()
p.adj <- visreg(adj, scale='response', "x1", gg=TRUE) + theme_bw()
Using gridExtra I can produce a two columns plot, however I would have a single plot which overlays the two model plots.
You can use the plot=FALSE parameter to get the data without the plots:
p.crude <- visreg(crude, scale='response', "x1", line.par = list(col = 'red'), plot=FALSE)
p.adj <- visreg(adj, scale='response', "x1", plot = FALSE)
And, then re-create it by hand:
dplyr::bind_rows(
dplyt::mutate(p.crude$fit, plt = "crude"),
dplyr::mutate(p.adj$fit, plt = "adj")
) -> fits
ggplot() +
geom_ribbon(
data = fits,
aes(x1, ymin=visregLwr, ymax=visregUpr, group=plt), fill="gray90"
) +
geom_line(data = fits, aes(x1, visregFit, group=plt, color=plt)) +
theme_bw()
https://github.com/pbreheny/visreg/blob/master/R/ggFactorPlot.R has all the other computations and geoms/aesthetics you can use in the recreation.

Having several fits in one plot (in R)

I was wondering how I can modify the following code to have a plot something like
data(airquality)
library(quantreg)
library(ggplot2)
library(data.table)
library(devtools)
# source Quantile LOESS
source("https://www.r-statistics.com/wp-content/uploads/2010/04/Quantile.loess_.r.txt")
airquality2 <- na.omit(airquality[ , c(1, 4)])
#'' quantreg::rq
rq_fit <- rq(Ozone ~ Temp, 0.95, airquality2)
rq_fit_df <- data.table(t(coef(rq_fit)))
names(rq_fit_df) <- c("intercept", "slope")
#'' quantreg::lprq
lprq_fit <- lapply(1:3, function(bw){
fit <- lprq(airquality2$Temp, airquality2$Ozone, h = bw, tau = 0.95)
return(data.table(x = fit$xx, y = fit$fv, bw = paste0("bw=", bw), fit = "quantreg::lprq"))
})
#'' Quantile LOESS
ql_fit <- Quantile.loess(airquality2$Ozone, jitter(airquality2$Temp), window.size = 10,
the.quant = .95, window.alignment = c("center"))
ql_fit_df <- data.table(x = ql_fit$x, y = ql_fit$y.loess, bw = "bw=1", fit = "Quantile LOESS")
I want to have all these fits in a plot.
geom_quantile can calculate quantiles using the rq method internally, so we don't need to create the rq_fit_df separately. However, the lprq and Quantile LOESS methods aren't available within geom_quantile, so I've used the data frames you provided and plotted them using geom_line.
In addition, to include the rq line in the color and linetype mappings and in the legend we add aes(colour="rq", linetype="rq") as a sort of "artificial" mapping inside geom_quantile.
library(dplyr) # For bind_rows()
ggplot(airquality2, aes(Temp, Ozone)) +
geom_point() +
geom_quantile(quantiles=0.95, formula=y ~ x, aes(colour="rq", linetype="rq")) +
geom_line(data=bind_rows(lprq_fit, ql_fit_df),
aes(x, y, colour=paste0(gsub("q.*:","",fit),": ", bw),
linetype=paste0(gsub("q.*:","",fit),": ", bw))) +
theme_bw() +
scale_linetype_manual(values=c(2,4,5,1,1)) +
labs(colour="Method", linetype="Method",
title="Different methods of estimating the 95th percentile by quantile regression")

Plot two regression equations (or more) on the same graph ggplot

I need to display on the same graph two linear regression equations and the coefficients (r, r², p, N). I did this using the facet_grid, but now the two curves can not be displayed separately.
I modified the code that was like facet_grid function:
equation = function(file) {
mod = lm(y ~ x,data=file)
mod_sum = summary(mod)
formula = sprintf("y= %.3f %+.3f*x", coef(mod)[1], coef(mod)[2])
r = mod_sum$r.squared
r2 = sprintf("r2= %.3f", r)
x = cor.test(~x + y,data=file)
r0 = sprintf("r= %.3f", x[4])
p1 = pf(mod_sum$fstatistic[1],mod_sum$fstatistic[2],mod_sum$fstatistic[3],lower.tail=F)
p =sprintf("p = %.3f", p1)
n0 = length(mod_sum$residual)
n1 = sprintf("N = %.f", n0)
data.frame(formula=formula, r=r0,r2=r2, p=p,n=n1, stringsAsFactors=FALSE)
}
equation_end = ddply(file, c("outlier"), equation)
The data of the two regressions are in the same column and are separated by the factor "outlier"
How can I display these equations on the same graph?
You can use annotate to place text on your figure
library(ggplot2)
ggplot(file, aes(x, y, color=outlier)) +
geom_point() +
annotate("text", c(-1,-1), c(3,4), label=equation_end$formula)
If you want the text the same color as some lines, try using geom_text,
ggplot(file, aes(x, y, color=outlier)) +
geom_point() +
geom_smooth(fill=NA) +
geom_text(data=equation_end, aes(x=c(-1,-1), y=c(3,4), label=formula), show_guide=F)
Data:
library(plyr)
x <- rnorm(100)
file <- data.frame(x=x, y=2*x + rnorm(100), outlier=factor(sample(0:1, 100, rep=T)))
equation_end = ddply(file, c("outlier"), equation)

Resources