Use ggplot to plot partial effects obtained with effects library - r

I would like to use ggplot to replicate the plots partial effects (with partial residuals), as obtained with the "effect" package. To do this I need to retrieve some information.
This is the plot I want to replicate with ggplot.
library(effects)
mod <- lm(log(prestige) ~ income:type + education, data=Prestige)
eff = effect("education", mod, partial.residuals=T)
plot(eff)
From the eff object I am able to retrieve the partial residuals, as eff$residuals, but they are not sufficient to replicate the plot. I think that what I need is the both the residuals, AND the marginal predicted effect. However I was not able to retrieve them from my eff object.
Otherwise I only have the residuals scores that cannot be plotted against the line of the marginal effect.
Any hint on how to retrieve this information?

You have almost all the information available. This would take some more time to generalize, but here's some code that results in a figure approximately like from the effects package. Notice that the smoother is off, but I didn't bother to dig up why.
The code should be self explanatory. I only copied function closest from the package.
mod <- lm(log(prestige) ~ income:type + education, data=Prestige)
eff = effect("education", mod, partial.residuals=T)
library(ggplot2)
library(gridExtra)
closest <- function(x, x0) apply(outer(x, x0, FUN=function(x, x0) abs(x - x0)), 1, which.min)
x.fit <- unlist(eff$x.all)
trans <- I
x <- data.frame(lower = eff$lower, upper = eff$upper, fit = eff$fit, education = eff$x$education)
xy <- data.frame(x = x.fit, y = x$fit[closest(trans(x.fit), x$education)] + eff$residuals)
g <- ggplot(x, aes(x = education, y = fit)) +
theme_bw() +
geom_line(size = 1) +
geom_point(data = xy, aes(x = x, y = y), shape = 1, col = "blue", size = 2) +
geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.5) +
geom_smooth(data = xy, aes(x = trans(x), y = y),
method = "loess", span = 2/3, linetype = "dashed", se = FALSE)
grid.arrange(plot(eff), g, ncol = 2)

Related

How to fit non-linear function to data in ggplot2 using maximum likelihood model in R?

The data set (x.test, y.test) is an exponential fit. I'm trying to fit a custom non-linear function and attached is the code. The regular points plot just fine but I'm unable to get the fit line to work. Any suggestions?
x.test <- runif(50,2,8)
y.test <- 0.5^(x.test)
df <- data.frame(x.test, y.test)
library(ggpmisc)
my.formula <- y ~ lambda/ (1 + aii*x)
ggplot(data = df, aes(x=x.test,y=y.test)) +
geom_point(shape=21, fill="white", color="red", size=3) +
stat_smooth(method="nls",formula = y.test ~ lambda/ (1 + aii*x.test), method.args=list(start=c(lambda=1000,aii=-816.39)),se=F,color="red") +
geom_smooth(method="lm", formula = my.formula , col = "red") + stat_poly_eq(formula = my.formula, aes(label = stringr::str_wrap(paste(..eq.label.., ..rr.label.., sep = "~~~"))), parse = TRUE, size = 2.5, col = "red") + stat_function(fun=function (x.test){
y.test ~ lambda/ (1 + aii*x.test)}, color = "blue")
A few things:
you need to use y and x as the variable names in the formula argument to geom_smooth, regardless of what the names are in your data set
you need better starting values (see below)
there's a GLM trick you can use to fit this model; doesn't always work (can be numerically unstable), but it doesn't need starting values and will work more often than nls()
I don't think lm() and stat_poly_eq() are going to work as expected (or maybe at all) with a nonlinear formula ...
simulate data
(same as your code but using set.seed() - probably not important here but good practice)
set.seed(101)
x.test <- runif(50,2,8)
y.test <- 0.5^(x.test)
df <- data.frame(x.test, y.test)
attempt nls fit with your starting values
It's usually a good idea to troubleshoot by fitting any smoothing terms outside of ggplot2, so you have fewer layers to dig through to find the problems:
nls(y.test ~ lambda/(1+ aii*x.test),
start = list(lambda=1000,aii=-816.39),
data = df)
Error in nls(y.test ~ lambda/(1 + aii * x.test), start = list(lambda = 1000, :
singular gradient
OK, still doesn't work. Let's use glm() to get better starting values: we use an inverse-link GLM:
1/y = b0 + b1*x
y = 1/(b0 + b1*x)
= (1/b0)/(1 + (b1/b0)*x)
So:
g1 <- glm(y.test ~ x.test, family = gaussian(link = "inverse"))
s0 <- with(as.list(coef(g1)), list(lambda = 1/`(Intercept)`, aii = x.test/`(Intercept)`))
This gives lambda = -0.09, aii = -0.638 (with a little bit more work we could probably also figure out how to eyeball these by looking at the starting point and scale of the curve).
ggplot(data = df, aes(x=x.test,y=y.test)) +
geom_point(shape=21, fill="white", color="red", size=3) +
stat_smooth(method="nls",
formula = y ~ lambda/ (1 + aii*x),
method.args=list(start=s0),
se=FALSE,color="red") +
stat_smooth(method = "glm",
formula = y ~ x,
method.args = list(gaussian(link = "inverse")),
color = "blue", linetype = 2)

Plotting different models for different x value ranges in ggplot()

I am attempting to display a linear model for low x values and a non-linear model for higher x values. To do this, I will use DNase as an example:
library(ggplot2)
#Assinging DNase as a new dataframe:
data_1 <- DNase
#Creating a column that can distinguish low and high range values:
data_1$range <- ifelse(data_1$conc <5, "low", "high")
#Attempting to plot separate lines for low and high range values, and also facet_wrap by run:
ggplot(data_1, aes(x = conc, y = density, colour = range)) +
geom_point(size = 0.5) + stat_smooth(method = "nls",
method.args = list(formula = y ~ a*exp(b*x),
start = list(a = 0.8, b = 0.1)),
data = data_1,
se = FALSE) +
stat_smooth(method = 'lm', formula = 'y~0+x') +
facet_wrap(~Run)
However, as you can see, it seems to plot both the linear model and the non-linear model for both, and I can't quite figure out where to put information that would tell it to only plot one for each. Also, if possible, can I extend these models out to the full range of values on the x axis?
You can provide specific data to each geom. In this case use subset data_1 using range to only provide the relevant data to each stat_smooth() call (and the whole frame to geom_point()
ggplot(NULL, aes(x = conc, y = density, colour = range)) +
geom_point(data = data_1, size = 0.5) +
stat_smooth(data = subset(data_1, range == "high"),
method = "nls",
method.args = list(formula = y ~ a*exp(b*x),
start = list(a = 0.8, b = 0.1)),
se = FALSE) +
stat_smooth(data = subset(data_1, range == "low"), method = 'lm', formula = 'y~0+x') +
facet_wrap(~Run)
If you want to fit both models on all the data, then just calculate those manually in data_1 and plot manually.

How can I add confidence intervals to a scatterplot for a regression on two variables?

I need to create an insightful graphic with a regression line, data points, and confidence intervals. I am not looking for smoothed lines. I have tried multiple codes, but I just can't get it right.
I am looking for something like this:
Some codes I have tried:
p <- scatterplot(df.regsoft$w ~ df.regsoft$b,
data = df.regsoft,
boxplots = FALSE,
regLine = list(method=lm, col="red"),
pch = 16,
cex = 0.7,
xlab = "Fitted Values",
ylab = "Residuals",
legend = TRUE,
smooth = FALSE)
abline(coef = confint.lm(result.rs))
But this doesn't create what I want to create, however it is closest to what I intended. Notice that I took out "smooth" since this is not really what I am looking for.
How can I make this plot interactive?
If you don't mind switch to ggplot and the tidyverse, then this is simply a geom_smooth(method = "lm"):
library(tidyverse)
d <- tibble( #random stuff
x = rnorm(100, 0, 1),
y = 0.25 * x + rnorm(100, 0, 0.25)
)
m <- lm(y ~ x, data = d) #linear model
d %>%
ggplot() +
aes(x, y) + #what to plot
geom_point() +
geom_smooth(method = "lm") +
theme_bw()
without method = "lm" it draws a smoothed line.
As for the Conf. interval (Obs 95%) lines, it seems to me that's simply a quantile regression. In that case, you can use the quantreg package.
If you want to make it interactive, you can use the plotly package:
library(plotly)
p <- d %>%
ggplot() +
aes(x, y) +
geom_point() +
geom_smooth(method = "lm") +
theme_bw()
ggplotly(p)
================================================
P.S.
I am not completely sure this is what the figure you posted is showing (I guess so), but to add the quantile lines, I would just perform two quantile regressions (upper and lower) and then calculate the values of the quantile lines for your data:
library(tidyverse)
library(quantreg)
d <- tibble( #random stuff
x = rnorm(100, 0, 1),
y = 0.25 * x + rnorm(100, 0, 0.25)
)
m <- lm(y ~ x, data = d) #linear model
# 95% quantile, two tailed
rq_low <- rq(y ~ x, data = d, tau = 0.025) #lower quantile
rq_high <- rq(y ~ x, data = d, tau = 0.975) #upper quantile
d %>%
mutate(low = rq_low$coefficients[1] + x * rq_low$coefficients[2],
high = rq_high$coefficients[1] + x * rq_high$coefficients[2]) %>%
ggplot() +
geom_point(aes(x, y)) +
geom_smooth(aes(x, y), method = "lm") +
geom_line(aes(x, low), linetype = "dashed") +
geom_line(aes(x, high), linetype = "dashed") +
theme_bw()

Customize how the smooth confidence interval is computed

I use to plot the loess estimation of a bunch of points along with the confidence interval by means of the geom_smooth function.
Now I need to change the method by which the confidence bounds are computed (i.e. I need to change the shape of the blur band). Is there a way to do that in geom_smooth?
Or, how can I emulate it with ggplot2? How can I such a blur band?
If you need a to plot something that isn't one of the options in geom_smooth your best bet is to manually fit the model yourself.
You haven't said what method you need.
But here is an example of fitting the loess with family symmetric and computing the standard errors of that.
d <- data.frame(x = rnorm(100), y = rnorm(100))
# The original plot using the default loess method
p <- ggplot(d, aes(x, y)) + geom_smooth(method = 'loess', se = TRUE)
# Fit loess model with family = 'symmetric'
# Replace the next 2 lines with whatever different method you need
loess_smooth <- loess(d$x ~ d$y, family = 'symmetric')
# Predict the model over the range of data you are interested in.
loess_pred <- predict(loess_smooth,
newdata = seq(min(d$x), max(d$x), length.out = 1000),
se = TRUE)
loess.df <- data.frame(fit = loess_pred$fit,
x = seq(min(d$x), max(d$x), length.out = 1000),
upper = loess_pred$fit + loess_pred$se.fit,
lower = loess_pred$fit - loess_pred$se.fit)
# plot to compare
p +
geom_ribbon(data = loess.df, aes(x = x, y = fit, ymax = upper, ymin = lower), alpha = 0.6) +
geom_line(data = loess.df, aes(x = x, y = fit))

graphing confidence intervals nls r

I'm in the process of putting some incidence data together for a proposal. I know that the data takes on a sigmoid shape overall so I fit it using NLS in R. I was trying to get some confidence intervals to plot as well so I used bootstrapping for the parameters, made three lines and here's where I'm having my problem. The bootstrapped CIs give me three sets of values, but because of equation the lines they are crossing.
Picture of Current Plot with "Ideal" Lines in Black
NLS is not my strong suit so perhaps I'm not going about this the right way. I've used mainly a self start function to this point just to get something down on the plot. The second NLS equation will give the same output, but I've put it down now so that I can alter later if needed.
Here is my code thus far:
data <- readRDS(file = "Incidence.RDS")
inc <- nls(y ~ SSlogis(x, beta1, beta2, beta3),
data = data,
control = list(maxiter = 100))
b1 <- summary(inc)$coefficients[1,1]
b2 <- summary(inc)$coefficients[2,1]
b3 <- summary(inc)$coefficients[3,1]
inc2 <- nls(y ~ phi1 / (1 + exp(-(x - phi2) / phi3)),
data = data,
start = list(phi1 = b1, phi2 = b2, phi3 = b3),
control = list(maxiter = 100))
inc2.boot <- nlsBoot(inc2, niter = 1000)
phi1 <- summary(inc2)$coefficients[1,1]
phi2 <- summary(inc2)$coefficients[2,1]
phi3 <- summary(inc2)$coefficients[3,1]
phi1_L <- inc2.boot$bootCI[1,2]
phi2_L <- inc2.boot$bootCI[2,2]
phi3_L <- inc2.boot$bootCI[3,2]
phi1_U <- inc2.boot$bootCI[1,3]
phi2_U <- inc2.boot$bootCI[2,3]
phi3_U <- inc2.boot$bootCI[3,3]
#plot lines
age <- c(20:95)
mean_incidence <- phi1 / (1 + exp(-(age - phi2) / phi3))
lower_incidence <- phi1_L / (1 + exp(-(age - phi2_L) / phi3_L))
upper_incidence <- phi1_U / (1 + exp(-(age - phi2_U) / phi3_U))
inc_line <- data.frame(age, mean_incidence, lower_incidence, upper_incidence)
p <- ggplot()
p <- (p
+ geom_point(data = data, aes(x = x, y = y), color = "darkgreen")
+ geom_line(data = inc_line,
aes(x = age, y = mean_incidence),
color = "blue",
linetype = "solid")
+ geom_line(data = inc_line,
aes(x = age, y = lower_incidence),
color = "blue",
linetype = "dashed")
+ geom_line(data = inc_line,
aes(x = age, y = upper_incidence),
color = "blue",
linetype = "dashed")
+ geom_ribbon(data = inc_line,
aes(x = age, ymin = lower_incidence, ymax = upper_incidence),
fill = "blue", alpha = 0.20)
+ labs(x = "\nAge", y = "Incidence (per 1,000 person years)\n")
)
print(p)
Here's a link to the data.
Any help on what to do next or if this is even possible given my current set up would be appreciated.
Thanks
Try plot.drc in the drc package.
library(drc)
fm <- drm(y ~ x, data = data, fct = LL.3())
plot(fm, type = "bars")
P.S. Please include the library calls in your questions so that the code is self contained and complete. In the case of the question here: library(ggplot2); library(nlstools) .

Resources