Plotting multiple lm() models in one plot - r

I have fitted 6 lm() models and 1 gam() model on the same dataset.
Now I want to plot them all in one plot on top of each other. Can I do this without defining the models again in ggplot?
My case is this
I have
model1 <- lm(y~1, data = data) %>% coef()
model2 <- lm(y~x, data = data) %>% coef()
model3 <- lm(y~abs(x), data = data) %>% coef()
...
model7 <- gam(y~s(x), data = data) %>% coef()
can I feed the stored coefficients of my models to ggplot?
ggplot(data, mapping = aes(x = x, y = y)) +
geom_point() +
geom_abline(model1) +
geom_abline(model2) +
....
Or do Is the only way to plot the model prediction lines to manualy fill out the parameters like this:
ggplot(data, mapping = aes(x = x, y = y)) +
geom_point() +
geom_abline(intercept = model1[1]) +
geom_abline(slope = model2[2], intercept = model2[1]) +
geom_abline(slope = model3[2], intercept = model3[1]) +
...
Example code
set.seed(123)
x <- rnorm(50)
y <- rweibull(50,1)
d <- as.data.frame(cbind(x,y))
model1 <- coef(lm(y~1, data = d))
model2 <- coef(lm(y~x, data = d))
model3 <- coef(lm(y~abs(x), data = d))
Including the SE for each line/model and a legend would be welcome as well.

In order for this to work, you really need to save the whole model. So if we assume you have the entire model
# set.seed(101) used for sample data
model1 <- lm(y~1, data = d)
model2 <- lm(y~x, data = d)
model3 <- lm(y~abs(x), data = d)
We can write a helper function to predict new values from these models over a the given range of x values. Here's such a function
newvalsforx <- function(x) {
xrng <- seq(min(x), max(x), length.out=100)
function(m) data.frame(x=xrng, y=predict(m, data.frame(x=xrng)))
}
pred <- newvals(d$x)
This pred() will make predictions from the models over the observed range of x. We can then use these as new data to pass to geom_lines that we can add to a plot. For example
ggplot(d, aes(x,y)) +
geom_point() +
geom_line(data=pred(model1), color="red") +
geom_line(data=pred(model2), color="blue") +
geom_line(data=pred(model3), color="green")
This gives me

Related

Plot combining regression coefficients (partial derivatives) with CIs in R, lincom + coefplot or plotbeta?

Most of the time we run a regression with interactive terms, we are interested in a partial derivative. For example, consider the model below,
If I am interested to know the effect of X1 on P(Y), or the partial derivative of X1 on P(Y), I need the following combination of coefficients:
Instead of calculating it by hand, I can use, for example, the lincom function in R to calculate linear combination of regression parameters. But I would like not only to know the numbers from calculations like this; I would like to plot them. The problem is, if I am using a R package to plot coefficients (e.g., coefplot) it plots the coefficients from my model, but with no option for linear combination of coefficients. Is there any way to combine the lincom function (or other function that calculates combination of parameter) with coefplot (or other coefficient plot packages with this option)?
Of course, in the example above I only consider the derivative of X1, and if I plot it I will have a plot with one dot and its confidence intervals only, but I would like to show in the plot the coefficients for the partial derivatives of X1, X2, and Z, as in the example below.
Coefficients plot (the one I have):
Combination of parameters or partial derivatives plot (the one I am trying to get):
I discovered that Stata has a function that does what I am looking for, called "plotbeta." Does R have something similar?
Here's a start. This defined a function called plotBeta(), the ... are arguments that get passed down to geom_text() for the estimate text.
plotBeta <- function(mod, confidence_level = .95, include_est=TRUE, which.terms=NULL, plot=TRUE, ...){
require(glue)
require(ggplot2)
b <- coef(mod)
mains <- grep("^[^:]*$", names(b), value=TRUE)
mains.ind <- grep("^[^:]*$", names(b))
if(!is.null(which.terms)){
if(!(all(which.terms %in% mains)))stop("Not all terms in which.terms are in the model\n")
ins <- match(which.terms, mains)
mains <- mains[ins]
mains.ind <- mains.ind[ins]
}
icept <- grep("Intercept", mains)
if(length(icept) > 0){
mains <- mains[-icept]
mains.ind <- mains.ind[-icept]
}
if(inherits(mod, "lm") & !inherits(mod, "glm")){
crit <- qt(1-(1-confidence_level)/2, mod$df.residual)
}else{
crit <- qnorm(1-(1-confidence_level)/2)
}
out.df <- NULL
for(i in 1:length(mains)){
others <- grep(glue("^{mains[i]}:"), names(b))
others <- c(others, grep(glue(":{mains[i]}:"), names(b)))
others <- c(others, grep(glue(":{mains[i]}$"), names(b)))
all.inds <- c(mains.ind[i], others)
ones <- rep(1, length(all.inds))
est <- c(b[all.inds] %*% ones)
se.est <- sqrt(c(ones %*% vcov(mod)[all.inds, all.inds] %*% ones))
lower <- est - crit*se.est
upper <- est + crit*se.est
tmp <- data.frame(var = mains[i],
lab = glue("dy/d{mains[i]} = {paste('B', all.inds, sep='', collapse=' + ')}"),
labfac = i,
est = est,
se.est = se.est,
lower = lower,
upper=upper)
tmp$est_text <- sprintf("%.2f (%.2f, %.2f)", tmp$est, tmp$lower, tmp$upper)
out.df <- rbind(out.df, tmp)
}
out.df$labfac <- factor(out.df$labfac, labels=out.df$lab)
if(!plot){
return(out.df)
}else{
g <- ggplot(out.df, aes(x=est, y=labfac, xmin=lower, xmax=upper)) +
geom_vline(xintercept=0, lty=2, size=.25, col="gray50") +
geom_errorbarh(height=0) +
geom_point() +
ylab("") + xlab("Estimates Combined") +
theme_classic()
if(include_est){
g <- g + geom_text(aes(label=est_text), vjust=0, ...)
}
g
}
}
Here's an example with some made-up data:
set.seed(2101)
dat <- data.frame(
X1 = rnorm(500),
X2 = rnorm(500),
Z = rnorm(500),
W = rnorm(500)
)
dat <- dat %>%
mutate(yhat = X1 - X2 + X1*X2 - X1*Z + .5*X2*Z - .75*X1*X2*Z + W,
y = yhat + rnorm(500, 0, 1.5))
mod <- lm(y ~ X1*X2*Z + W, data=dat)
plotBeta(mod, position=position_nudge(y=.1), size=3) + xlim(-2.5,2)
EDIT: comparing two models
Using the newly-added plot=FALSE, we can generate the data and then combine and plot.
mod <- lm(y ~ X1*X2*Z + W, data=dat)
p1 <- plotBeta(mod, plot=FALSE)
mod2 <- lm(y ~ X1*X2 + Z + W, data=dat)
p2 <- plotBeta(mod2, plot=FALSE)
p1 <- p1 %>% mutate(model = factor(1, levels=1:2,
labels=c("Model 1", "Model 2")))
p2 <- p2 %>% mutate(model = factor(2, levels=1:2,
labels=c("Model 1", "Model 2")))
p_both <- bind_rows(p1, p2)
p_both <- p_both %>%
arrange(var, model) %>%
mutate(labfac = factor(1:n(), labels=paste("dy/d", var, sep="")))
ggplot(p_both, aes(x=est, y=labfac, xmin=lower, xmax=upper)) +
geom_vline(xintercept=0, lty=2, size=.25, col="gray50") +
geom_linerange(position=position_nudge(y=c(-.1, .1))) +
geom_point(aes(shape=model),
position=position_nudge(y=c(-.1, .1))) +
geom_text(aes(label=est_text), vjust=0,
position=position_nudge(y=c(-.2, .15))) +
scale_shape_manual(values=c(1,16)) +
ylab("") + xlab("Estimates Combined") +
theme_classic()

How to convert log function in RStudio?

fit1 = lm(price ~ . , data = car)
fit2 = lm(log(price) ~ . , data = car)
I'm not sure how to convert log(price) to price in fit2 Won't it just become the same thing as fit1 if I do convert it? Please help.
Let's take a very simple example. Suppose I have some data points like this:
library(ggplot2)
df <- data.frame(x = 1:10, y = (1:10)^2)
(p <- ggplot(df, aes(x, y)) + geom_point())
I want to try to fit a model to them, but don't know what form this should take. I try a linear regression first and plot the resultant prediction:
mod1 <- lm(y ~ x, data = df)
(p <- p + geom_line(aes(y = predict(mod1)), color = "blue"))
Next I try a linear regression on log(y). Whatever results I get from predictions from this model will be predicted values of log(y). But I don't want log(y) predictions, I want y predictions, so I need to take the 'anti-log' of the prediction. We do this in R by doing exp:
mod2 <- lm(log(y) ~ x, data = df)
(p <- p + geom_line(aes(y = exp(predict(mod2))), color = "red"))
But we can see that we have different regression lines. That's because when we took the log of y, we were effectively fitting a straight line on the plot of log(y) against x. When we transform the axis back to a non-log axis, our straight line becomes an exponential curve. We can see this more clearly by drawing our plot again with a log-transformed y axis:
p + scale_y_log10(limits = c(1, 500))
Created on 2020-08-04 by the reprex package (v0.3.0)

How to plot 3 models in one Figure in R?

I'm new with R and I have fit 3 models for my data as follows:
Model 1: y = a(x) + b
lm1 = lm(data$CBI ~ data$dNDVI)
Model 2: y = a(x)2 + b(x) + c
lm2 <- lm(CBI ~ dNDVI + I(dNDVI^2), data=data)
Model 3: y = x(a|x| + b)–1
lm3 = nls(CBI ~ dNDVI*(a*abs(dNDVI) + b) - 1, start = c(a = 1.5, b = 2.7), data = data)
Now I would like to plot all these three models in R but I could not find the way to do it, can you please help me? I have tried with the first two models as follow and it work but I don't know how to add the Model 3 on it:
ggplot(data = data, aes(x = dNDVI, y = CBI)) +
geom_point() +
geom_smooth(method = lm, formula = y ~ x, size = 1, se = FALSE) +
geom_smooth(method = lm, formula = y ~ x + I(x^2), size = 1, se = FALSE ) +
theme_bw()
I also would like to add a legend which show 3 different colours or types of lines/curves for the 3 models as well. Can you please guide me how to make it in the figure?
Using iris as a dummy set to represent the three models:
new.dat <- data.frame(Sepal.Length=seq(min(iris$Sepal.Length),
max(iris$Sepal.Length), length.out=50)) #new data.frame to predict the fitted values for each model
m1 <- lm(Petal.Length ~ Sepal.Length, iris)
m2 <- lm(Petal.Length ~ Sepal.Length + I(Sepal.Length^2), data=iris)
m3 <- nls(Petal.Length ~ Sepal.Length*(a*abs(Sepal.Length) + b) - 1,
start = c(a = 1.5, b = 2.7), data = iris)
new.dat$m1.fitted <- predict(m1, new.dat)
new.dat$m2.fitted <- predict(m2, new.dat)
new.dat$m3.fitted <- predict(m3, new.dat)
new.dat <- new.dat %>% gather(var, val, m1.fitted:m3.fitted) #stacked format of fitted data of three models (to automatically generate the legend in ggplot)
ggplot(new.dat, aes(Sepal.Length, val, colour=var)) +
geom_line()

Fitting a 3D surface to a dataset of points [R]

I have been trying to fit a polynomial surface to a set of point with 3 coordinates.
Let the data be:
DATA <- with(mtcars, as.data.frame(cbind(1:32, wt,disp,mpg)))
I have been trying to draw a surface using:
plot3d from rgl package,
using rsm package,
scatterplot3d package.
For example:
library(scatterplot3d)
attach(mtcars)
DATA <- as.data.frame(cbind(1:32, wt,disp,mpg))
scatterplot3d(wt,disp,mpg, main="3D Scatterplot")
model <- loess(mpg ~wt + disp, data=DATA)
x <-range(DATA$wt)
x <- seq(x[1], x[2], length.out=50)
y <- range(DATA$disp)
y <- seq(y[1], y[2], length.out=50)
z <- outer(x,y,
function(wt,disp)
predict(model, data.frame(wt,disp)))
z
p <- persp(x,y,z, theta=30, phi=30,
col="lightblue",expand = 0.5,shade = 0.2,
xlab="wt", ylab="disp", zlab="mpg")
I have also tried using surf.ls function:
surf.ls(2,DATA[,2],DATA[,3],DATA[,4])
But what I got looks like this:
I don't really know how to transform it to a 3D plot and more importantly, how to get the formula for the best fit surface obtained.
I would really appreciate your help.
PS I have deleted my last post and included more details in this one.
Try this:
attach(mtcars)
DATA <- as.data.frame(cbind(1:32, wt,disp,mpg))
x_wt <- DATA$wt
y_disp <- DATA$disp
z_mpg <- DATA$mpg
fit <- lm(z_mpg ~ poly(x_wt, y_disp, degree = 2), data = DATA)
To plot with rsm, use the following:
library(rsm)
image(fit, y_disp ~ x_wt)
contour(fit, y_disp ~ x_wt)
persp(fit, y_disp ~ x_wt, zlab = "z_mpg")
To plot with ggplot, use the following:
## ggplot
# Use rsm package to create surface model.
library(rsm)
SurfMod <- contour(fit, y_disp ~ x_wt)
# extract list values from rsm Surface Model
Xvals <- SurfMod$`x_wt ~ y_disp`[1]
Yvals <- SurfMod$`x_wt ~ y_disp`[2]
Zvals <- SurfMod$`x_wt ~ y_disp`[3]
# Construct matrix with col and row names
SurfMatrix <- Zvals$z
colnames(SurfMatrix) <- Yvals$y
rownames(SurfMatrix) <- Xvals$x
# Convert matrix to data frame
library(reshape2)
SurfDF <- melt(SurfMatrix)
library(ggplot2)
gg <- ggplot(data = SurfDF) +
geom_tile(data = SurfDF, aes(Var1, Var2,z = value, fill = value)) +
stat_contour(data = SurfDF, aes(Var1, Var2, z = value, color = ..level..)) +
scale_colour_gradient(low = "green", high = "red") +
geom_point(data = DATA, aes(wt, disp, z = mpg, color = mpg)) +
geom_text(data = DATA, aes(wt, disp,label=mpg),hjust=0, vjust=0) +
scale_fill_continuous(name="mpg") +
xlab("x_wt") +
ylab("y_disp")
library(directlabels)
direct.label.ggplot(gg, "angled.endpoints")
To see all of the available direct.label methods, go to http://directlabels.r-forge.r-project.org/docs/index.html

Stack coefficient plots in R

I'm running a set of models with the same independent variables but different dependent variables and would like to create a set of coefficient plots in one figures in which each model gets its own panel. The following code provides intuition but in this all of the models are integrated into one figure rather than have 3 unique panels side-by-side in one figure:
require("coefplot")
set.seed(123)
dat <- data.frame(x = rnorm(100), z = rnorm(100), y1 = rnorm(100), y2 = rnorm(100), y3 = rnorm(100))
mod1 <- lm(y1 ~ x + z, data = dat)
mod2 <- lm(y2 ~ x + z, data = dat)
mod3 <- lm(y3 ~ x + z, data = dat)
multiplot(mod1,mod2, mod3)
Which generates this plot:
Any thoughts on how to get them to panel next to each other in one figure? Thanks!
I haven't used the coefplot package before, but you can create a coefficient plot directly in ggplot2.
set.seed(123)
dat <- data.frame(x = rnorm(100), z = rnorm(100), y1 = rnorm(100), y2 = rnorm(100), y3 = rnorm(100))
mod1 <- lm(y1 ~ x + z, data = dat)
mod2 <- lm(y2 ~ x + z, data = dat)
mod3 <- lm(y3 ~ x + z, data = dat)
## Create data frame of model coefficients and standard errors
# Function to extract what we need
ce = function(model.obj) {
extract = summary(get(model.obj))$coefficients[ ,1:2]
return(data.frame(extract, vars=row.names(extract), model=model.obj))
}
# Run function on the three models and bind into single data frame
coefs = do.call(rbind, sapply(paste0("mod",1:3), ce, simplify=FALSE))
names(coefs)[2] = "se"
# Faceted coefficient plot
ggplot(coefs, aes(vars, Estimate)) +
geom_hline(yintercept=0, lty=2, lwd=1, colour="grey50") +
geom_errorbar(aes(ymin=Estimate - se, ymax=Estimate + se, colour=vars),
lwd=1, width=0) +
geom_point(size=3, aes(colour=vars)) +
facet_grid(. ~ model) +
coord_flip() +
guides(colour=FALSE) +
labs(x="Coefficient", y="Value") +
theme_grey(base_size=15)

Resources