Plotting lmer() in ggplot2 - r

I'm trying to adjust my graph to make it suitable for a scientific report. See example below (from here: http://glmm.wikidot.com/faq).
How do I change the ggplot settings so the lines are displayed in greyscale?
library("lme4")
library("ggplot2") # Plotting
data("Orthodont",package="MEMSS")
fm1 <- lmer(
formula = distance ~ age*Sex + (age|Subject)
, data = Orthodont
)
newdat <- expand.grid(
age=c(8,10,12,14)
, Sex=c("Female","Male")
, distance = 0
)
mm <- model.matrix(terms(fm1),newdat)
newdat$distance <- predict(fm1,newdat,re.form=NA)
pvar1 <- diag(mm %*% tcrossprod(vcov(fm1),mm))
tvar1 <- pvar1+VarCorr(fm1)$Subject[1]
cmult <- 2 ## could use 1.96
newdat <- data.frame(
newdat
, plo = newdat$distance-cmult*sqrt(pvar1)
, phi = newdat$distance+cmult*sqrt(pvar1)
, tlo = newdat$distance-cmult*sqrt(tvar1)
, thi = newdat$distance+cmult*sqrt(tvar1)
)
g0 <- ggplot(newdat, aes(x=age, y=distance, colour=Sex))+geom_point()
g0 + geom_errorbar(aes(ymin = plo, ymax = phi))+
labs(title="CI based on fixed-effects uncertainty ONLY") + theme_bw()
I'm also unsure why sqrt() is used in this line of code:
plo = newdat$distance-cmult*sqrt(pvar1)
Thanks

#aosmith is right - scale_color_grey is probably what you're looking for.
g0 <- ggplot(newdat, aes(x=age, y=distance, colour=Sex))+geom_point()
g0 + geom_errorbar(aes(ymin = plo, ymax = phi)) +
labs(title="CI based on fixed-effects uncertainty ONLY") +
theme_bw() + scale_color_grey(start = 0.2, end = 0.5)
If you're able to (which you are here), it's generally best to use redundant encoding, i.e., encoding sex with two variables (such as color and linetype). It makes it easier to perceive the difference between the two.
g0 <- ggplot(newdat, aes(x=age, y=distance, colour=Sex, linetype = Sex)) + geom_point()
g0 + geom_errorbar(aes(ymin = plo, ymax = phi)) +
labs(title="CI based on fixed-effects uncertainty ONLY") +
theme_bw() + scale_color_grey(start = 0.2, end = 0.5) + scale_linetype()

Related

Expanding confidence interval in R ggplots using coord_cartesian -- not working [duplicate]

my ggplot R-code works perfectly ok with my other datasets but I'm stumbled with why it's not working for one particular data set. See image below where the filled confidence interval stops at 0.10:
For reproducing the problem:
library(nlme)
library(ggeffects)
library(ggplot2)
SurfaceCoverage <- c(0.02,0.04,0.06,0.08,0.1,0.12,0.02,0.04,0.06,0.08,0.1,0.12)
SpecificSurfaceEnergy <- c(18.0052997,15.9636971,14.2951057,13.0263081,13.0816591,13.3825573,2.9267577,2.2889628,1.8909175,1.0083036,0.5683574,0.1681063)
sample <- c(1,1,1,1,1,1,2,2,2,2,2,2)
highW <- data.frame(sample,SurfaceCoverage,SpecificSurfaceEnergy)
highW$sample <- sub("^", "Wettable", highW$sample)
highW$RelativeHumidity <- "High relative humidity"; highW$group <- "Wettable"
highW$sR <- paste(highW$sample,highW$RelativeHumidity)
dfhighW <- data.frame(
"y"=c(highW$SpecificSurfaceEnergy),
"x"=c(highW$SurfaceCoverage),
"b"=c(highW$sample),
"sR"=c(highW$sR)
)
mixed.lme <- lme(y~log(x),random=~1|b,data=dfhighW)
pred.mmhighW <- ggpredict(mixed.lme, terms = c("x"))
(ggplot(pred.mmhighW) +
geom_line(aes(x = x, y = predicted)) + # slope
geom_ribbon(aes(x = x, ymin = predicted - std.error, ymax = predicted + std.error),
fill = "lightgrey", alpha = 0.5) + # error band
geom_point(data = dfhighW, # adding the raw data (scaled values)
aes(x = x, y = y, shape = b)) +
xlim(0.01,0.2) +
ylim(0,30) +
labs(title = "") +
ylab(bquote('Specific Surface Energy ' (mJ/m^2))) +
xlab(bquote('Surface Coverage ' (n/n[m]) )) +
theme_minimal()
)
Can someone advise me how to fix this? Thanks.
The last part of your ribbon has disappeared because you have excluded it from the plot. The lower edge of your ribbon is the following vector:
pred.mmhighW$predicted - pred.mmhighW$std.error
#> [1] 3.91264018 2.37386628 1.47061258 0.82834206 0.32935718 -0.07886245
Note the final value is a small negative number, but you have set the y axis limits with:
ylim(0, 30)
So anything negative will be cut off. If you change to
ylim(-2, 30)
You get
I don't know whether this is already answered previously, but coord_cartesian and scales::squish are two solutions to this problem.
coord_cartesian adjusts the viewport without adjusting the spacing of grid lines etc. (unlike xlim()/scale_*_continuous(limits = ...), which will "zoom")
scales::squish() is suboptimal if you are "squishing" lines and points, not just edgeless polygons (in the case of fill/polygons, squishing and clipping produce the same results)
gg0 <- (ggplot(pred.mmhighW)
+ geom_ribbon(aes(x = x, ymin = predicted - std.error,
ymax = predicted + std.error),
fill = "lightgrey", alpha = 0.5)
+ theme_minimal()
)
## set lower limit to 5 for a more obvious effect
gg0 + coord_cartesian(ylim = c(5, 30))
gg0 + scale_y_continuous(limits = c(5, 30),
## oob = "out of bounds" behaviour
oob = scales::squish)

ggplot confidence interval not filling the whole dataset for my linear mixed model

my ggplot R-code works perfectly ok with my other datasets but I'm stumbled with why it's not working for one particular data set. See image below where the filled confidence interval stops at 0.10:
For reproducing the problem:
library(nlme)
library(ggeffects)
library(ggplot2)
SurfaceCoverage <- c(0.02,0.04,0.06,0.08,0.1,0.12,0.02,0.04,0.06,0.08,0.1,0.12)
SpecificSurfaceEnergy <- c(18.0052997,15.9636971,14.2951057,13.0263081,13.0816591,13.3825573,2.9267577,2.2889628,1.8909175,1.0083036,0.5683574,0.1681063)
sample <- c(1,1,1,1,1,1,2,2,2,2,2,2)
highW <- data.frame(sample,SurfaceCoverage,SpecificSurfaceEnergy)
highW$sample <- sub("^", "Wettable", highW$sample)
highW$RelativeHumidity <- "High relative humidity"; highW$group <- "Wettable"
highW$sR <- paste(highW$sample,highW$RelativeHumidity)
dfhighW <- data.frame(
"y"=c(highW$SpecificSurfaceEnergy),
"x"=c(highW$SurfaceCoverage),
"b"=c(highW$sample),
"sR"=c(highW$sR)
)
mixed.lme <- lme(y~log(x),random=~1|b,data=dfhighW)
pred.mmhighW <- ggpredict(mixed.lme, terms = c("x"))
(ggplot(pred.mmhighW) +
geom_line(aes(x = x, y = predicted)) + # slope
geom_ribbon(aes(x = x, ymin = predicted - std.error, ymax = predicted + std.error),
fill = "lightgrey", alpha = 0.5) + # error band
geom_point(data = dfhighW, # adding the raw data (scaled values)
aes(x = x, y = y, shape = b)) +
xlim(0.01,0.2) +
ylim(0,30) +
labs(title = "") +
ylab(bquote('Specific Surface Energy ' (mJ/m^2))) +
xlab(bquote('Surface Coverage ' (n/n[m]) )) +
theme_minimal()
)
Can someone advise me how to fix this? Thanks.
The last part of your ribbon has disappeared because you have excluded it from the plot. The lower edge of your ribbon is the following vector:
pred.mmhighW$predicted - pred.mmhighW$std.error
#> [1] 3.91264018 2.37386628 1.47061258 0.82834206 0.32935718 -0.07886245
Note the final value is a small negative number, but you have set the y axis limits with:
ylim(0, 30)
So anything negative will be cut off. If you change to
ylim(-2, 30)
You get
I don't know whether this is already answered previously, but coord_cartesian and scales::squish are two solutions to this problem.
coord_cartesian adjusts the viewport without adjusting the spacing of grid lines etc. (unlike xlim()/scale_*_continuous(limits = ...), which will "zoom")
scales::squish() is suboptimal if you are "squishing" lines and points, not just edgeless polygons (in the case of fill/polygons, squishing and clipping produce the same results)
gg0 <- (ggplot(pred.mmhighW)
+ geom_ribbon(aes(x = x, ymin = predicted - std.error,
ymax = predicted + std.error),
fill = "lightgrey", alpha = 0.5)
+ theme_minimal()
)
## set lower limit to 5 for a more obvious effect
gg0 + coord_cartesian(ylim = c(5, 30))
gg0 + scale_y_continuous(limits = c(5, 30),
## oob = "out of bounds" behaviour
oob = scales::squish)

Transfer calculated value from stat_smooth to other geom like linerange

I have a question about ggplot2.
I want to connect data point with ols result via vertical line, like the code listed below.
Can I transfer ..y.., the value calculated by stat_smooth, to geom_linerange directly?
I tried stat_smooth(..., geom = "linerange", mapping(aes(ymin=pmin(myy, ..y..), ymax=pmax(myy,..y..)) but it is not the result I want.
library(ggplot2)
df <- data.frame(myx = 1:10,
myy = c(1:10) * 5 + 2 * rnorm(10, 0, 1))
lm.fit <- lm("myy~myx", data = df)
pred <- predict(lm.fit)
ggplot(df, aes(myx, myy)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
geom_linerange(mapping = aes(ymin = pmin(myy, pred),
ymax = pmax(myy, pred)))
stat_smooth evaluates the values at n evenly spaced points, with n = 80 by default. These points may not coincide with the original x values in your data frame.
Since you are calculating predicted values anyway, it would probably be more straightforward to add that back to your data frame and plot all geom layers based on that as your data source, for example:
df$pred <- pred
ggplot(df, aes(myx, myy)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
geom_linerange(aes(ymin = myy, ymax = pred))

geom_smooth gives different fit than nls alone

Because I wanted to have the nls model separately, I did a fit to my data inside the geom_smooth function and outside ggplot:
library(ggplot2)
set.seed(1)
data <- data.frame(x=rnorm(100))
a <- 4
b <- -2
data$y <- with(data, exp(a + b * x) + rnorm(100) + 100)
mod <- nls(formula = y ~ (exp(a + b * x)), data = data, start = list(a = a, b = b))
data$fit <- predict(mod, newdata=data)
plot <- ggplot(data, aes(x=x, y=y)) +
geom_point() +
geom_smooth(method = "nls", colour = "red", formula=y ~ exp(a + b * x),
method.args = list(start = c(a = a, b = b)), se=F, span=0) +
geom_line(aes(x=x, y=fit), colour="blue") +
scale_y_log10()
I just wondering why both methods, though with the same parameters, give a different fit? Does geom_smooth use some transformation?
geom_smooth doesn't make predictions from the original dataset, but instead makes a dataset for prediction. By default this dataset has 80 rows, but you can change this with the n argument.
To see that the model fit via geom_smooth and the model fit by nls are the same, you need to use the same dataset for prediction. You can pull the one used by geom_smooth out via ggplot_build. The dataset used for prediction is the second in the list.
dat2 = ggplot_build(plot)$data[[2]]
Now use dat2 for making predictions from the nls model and remake the plot.
dat2$fit2 = predict(mod, newdata = dat2)
ggplot(data, aes(x=x, y=y)) +
geom_point() +
geom_smooth(method = "nls", colour = "red", formula=y ~ exp(a + b * x),
method.args = list(start = c(a = 4, b = -2)), se = FALSE) +
geom_line(data = dat2, aes(x=x, y=fit2), colour="blue")
Note that if you want to display on the log10 scale when comparing geom_smooth to a predicted line you'll want to use coord_trans(y = "log10") instead of scale_y_log10. Scale transformation happens prior to model fitting, so you would be fitting a model to a log10-transformed y if you use scale_y_log10.

Is it possible to plot the smooth components of a gam fit with ggplot2?

I am fitting a model using gam from the mgcv package and store the result in model and so far I have been looking at the smooth components using plot(model). I have recently started using ggplot2 and like its output. So I am wondering, is it possible to plot these graphs using ggplot2?
Here is an example:
x1 = rnorm(1000)
x2 = rnorm(1000)
n = rpois(1000, exp(x1) + x2^2)
model = gam(n ~ s(x1, k=10) + s(x2, k=20), family="poisson")
plot(model, rug=FALSE, select=1)
plot(model, rug=FALSE, select=2)
And I am interest in s(x1, k=10) and s(x2, k=20) not in the fit.
Partial answer:
I dug deeper into plot.gam and mgcv:::plot.mgcv.smooth and built my own function which extracts the predicted effects and standard errors from the smooth components. It doesn't handle all options and cases of plot.gam so I only consider it a partial solution, but it works well for me.
EvaluateSmooths = function(model, select=NULL, x=NULL, n=100) {
if (is.null(select)) {
select = 1:length(model$smooth)
}
do.call(rbind, lapply(select, function(i) {
smooth = model$smooth[[i]]
data = model$model
if (is.null(x)) {
min = min(data[smooth$term])
max = max(data[smooth$term])
x = seq(min, max, length=n)
}
if (smooth$by == "NA") {
by.level = "NA"
} else {
by.level = smooth$by.level
}
range = data.frame(x=x, by=by.level)
names(range) = c(smooth$term, smooth$by)
mat = PredictMat(smooth, range)
par = smooth$first.para:smooth$last.para
y = mat %*% model$coefficients[par]
se = sqrt(rowSums(
(mat %*% model$Vp[par, par, drop = FALSE]) * mat
))
return(data.frame(
label=smooth$label
, x.var=smooth$term
, x.val=x
, by.var=smooth$by
, by.val=by.level
, value = y
, se = se
))
}))
}
This returns a "molten" data frame with the smooth components, so it is now possible to use ggplot with the example above :
smooths = EvaluateSmooths(model)
ggplot(smooths, aes(x.val, value)) +
geom_line() +
geom_line(aes(y=value + 2*se), linetype="dashed") +
geom_line(aes(y=value - 2*se), linetype="dashed") +
facet_grid(. ~ x.var)
If anyone knows a package which allows this in the general case I would be very grateful.
You can use the visreg package combined with the plyr package. visreg basically plots any model that you can use predict() on.
library(mgcv)
library(visreg)
library(plyr)
library(ggplot2)
# Estimating gam model:
x1 = rnorm(1000)
x2 = rnorm(1000)
n = rpois(1000, exp(x1) + x2^2)
model = gam(n ~ s(x1, k=10) + s(x2, k=20), family="poisson")
# use plot = FALSE to get plot data from visreg without plotting
plotdata <- visreg(model, type = "contrast", plot = FALSE)
# The output from visreg is a list of the same length as the number of 'x' variables,
# so we use ldply to pick the objects we want from the each list part and make a dataframe:
smooths <- ldply(plotdata, function(part)
data.frame(Variable = part$meta$x,
x=part$fit[[part$meta$x]],
smooth=part$fit$visregFit,
lower=part$fit$visregLwr,
upper=part$fit$visregUpr))
# The ggplot:
ggplot(smooths, aes(x, smooth)) + geom_line() +
geom_line(aes(y=lower), linetype="dashed") +
geom_line(aes(y=upper), linetype="dashed") +
facet_grid(. ~ Variable, scales = "free_x")
We can put the whole thing into a function, and add an option to show the residuals from the model (res = TRUE):
ggplot.model <- function(model, type="conditional", res=FALSE,
col.line="#7fc97f", col.point="#beaed4", size.line=1, size.point=1) {
require(visreg)
require(plyr)
plotdata <- visreg(model, type = type, plot = FALSE)
smooths <- ldply(plotdata, function(part)
data.frame(Variable = part$meta$x,
x=part$fit[[part$meta$x]],
smooth=part$fit$visregFit,
lower=part$fit$visregLwr,
upper=part$fit$visregUpr))
residuals <- ldply(plotdata, function(part)
data.frame(Variable = part$meta$x,
x=part$res[[part$meta$x]],
y=part$res$visregRes))
if (res)
ggplot(smooths, aes(x, smooth)) + geom_line(col=col.line, size=size.line) +
geom_line(aes(y=lower), linetype="dashed", col=col.line, size=size.line) +
geom_line(aes(y=upper), linetype="dashed", col=col.line, size=size.line) +
geom_point(data = residuals, aes(x, y), col=col.point, size=size.point) +
facet_grid(. ~ Variable, scales = "free_x")
else
ggplot(smooths, aes(x, smooth)) + geom_line(col=col.line, size=size.line) +
geom_line(aes(y=lower), linetype="dashed", col=col.line, size=size.line) +
geom_line(aes(y=upper), linetype="dashed", col=col.line, size=size.line) +
facet_grid(. ~ Variable, scales = "free_x")
}
ggplot.model(model)
ggplot.model(model, res=TRUE)
Colors are picked from http://colorbrewer2.org/.
FYI, visreg can directly output a gg object:
visreg(model, "x1", gg=TRUE)
There is now also the gratia package by #GavinSimpson and available on CRAN: https://cran.r-project.org/web/packages/gratia/index.html
Information is also on Gavin's Github site and a getting started vignette can be found here.
Updated to allow user to choose which variables are plotted.
Changed 'residuals' term to 'res_data' to avoid conflict with residuals function.
ggplot.model <- function(model, type="conditional", res=FALSE,
col.line="#7fc97f", col.point="#beaed4", size.line=1, size.point=1, no_col = NULL,
what = "all") {
require(visreg)
require(plyr)
plotdata <- visreg(model, type = type, plot = FALSE)
smooths <- ldply(plotdata, function(part)
data.frame(Variable = part$meta$x,
x=part$fit[[part$meta$x]],
smooth=part$fit$visregFit,
lower=part$fit$visregLwr,
upper=part$fit$visregUpr))
res_data <- ldply(plotdata, function(part)
data.frame(Variable = part$meta$x,
x=part$res[[part$meta$x]],
y=part$res$visregRes))
if (what != "all") {
smooths <- smooths %>%
filter(lapply(Variable,as.character)%in% what)
res_data <- res_data%>%
filter(lapply(Variable,as.character)%in% what)
}
if (res)
ggplot(smooths, aes(x, smooth)) + geom_line(col=col.line, size=size.line) +
geom_line(aes(y=lower), linetype="dashed", col=col.line, size=size.line) +
geom_line(aes(y=upper), linetype="dashed", col=col.line, size=size.line) +
geom_point(data = res_data, aes(x, y), col=col.point, size=size.point) +
facet_wrap(. ~ Variable, scales = "free_x", ncol = no_col) + theme_bw()
else
ggplot(smooths, aes(x, smooth)) + geom_line(col=col.line, size=size.line) +
geom_line(aes(y=lower), linetype="dashed", col=col.line, size=size.line) +
geom_line(aes(y=upper), linetype="dashed", col=col.line, size=size.line) +
facet_wrap(. ~ Variable, scales = "free_x", ncol=no_col)
}

Resources