I try to make a plot for standard purposes with zero inflated model and zero inflated mixed model using ggplot2 without success. For this, I try:
#Packages
library(pscl)
library(glmmTMB)
library(ggplot2)
library(gridExtra)
# Artificial data set
set.seed(007)
n <- 100 # number of subjects
K <- 8 # number of measurements per subject
t_max <- 5 # maximum follow-up time
DF <- data.frame(id = rep(seq_len(n), each = K),
time = c(replicate(n, c(0, sort(runif(K - 1, 0, t_max))))),
sex = rep(gl(2, n/2, labels = c("male", "female")), each = K))
DF$y <- rnbinom(n * K, size = 2, mu = exp(1.552966))
str(DF)
Using zero inflated poisson model with pscl package
time2<-(DF$time)^2
mZIP <- zeroinfl(y~time+time2+sex|time+sex, data=DF)
summary(mZIP)
If I imagine thal all coefficients are significant
# Y estimated
pred.data1 = data.frame(
time<-DF$time,
time2<-(DF$time)^2,
sex<-DF$sex)
pred.data1$y = predict(mZIP, newdata=pred.data1, type="response")
Now using zero inflated poisson mixed model with glmmTMB package
mZIPmix<- glmmTMB(y~time+time2+sex+(1|id),
data=DF, ziformula=~1,family=poisson)
summary(mZIPmix)
#
# new Y estimated
pred.data2 = data.frame(
time<-DF$time,
time2<-(DF$time)^2,
sex<-DF$sex,
id<-DF$id)
pred.data2$y = predict(mZIPmix, newdata=pred.data2, type="response")
Plot zero inflated poisson model and mixed poisson model
par(mfrow=c(1,2))
plot1<-ggplot(DF, aes(time, y, colour=sex)) +
labs(title="Zero inflated model") +
geom_point() +
geom_line(data=pred.data1) +
stat_smooth(method="glm", family=poisson(link="log"), formula = y~poly(x,2),fullrange=TRUE)
plot2<-ggplot(DF, aes(time, y, colour=sex)) +
labs(title="Zero inflated mixed model") +
geom_point() +
geom_line(data=pred.data2) +
stat_smooth(method="glm", family=poisson(link="log"), formula = y~poly(x,2),fullrange=TRUE)## here a don't find any method to mixed glm
grid.arrange(plot1, plot2, ncol=2)
#-
Doesn't work of sure. Is possible to make this using ggplot2?
Thanks in advance
I'm not sure, but it looks to me that you're looking for marginal effects. You can do this with the ggeffects-package. Here are two examples, using your simulated data, that create a ggplot-object, one with and one w/o raw data.
library(glmmTMB)
library(ggeffects)
mZIPmix<- glmmTMB(y~poly(time,2)+sex+(1|id), data=DF, ziformula=~1,family=poisson)
# compute marginal effects and create a plot.
# the tag "[all]" is useful for polynomial terms, to produce smoother plots
ggpredict(mZIPmix, c("time [all]", "sex")) %>% plot(rawdata = TRUE, jitter = .01)
ggpredict(mZIPmix, c("time [all]", "sex")) %>% plot(rawdata = FALSE)
Created on 2019-05-16 by the reprex package (v0.2.1)
Note that sex only has an "additive" effect. Maybe you want to model an intercation between time and sex?
mZIPmix<- glmmTMB(y~poly(time,2)*sex+(1|id), data=DF, ziformula=~1,family=poisson)
ggpredict(mZIPmix, c("time [all]", "sex")) %>% plot(rawdata = TRUE, jitter = .01)
ggpredict(mZIPmix, c("time [all]", "sex")) %>% plot()
Created on 2019-05-16 by the reprex package (v0.2.1)
Related
I am trying to plot 95% confidence intervals on some simulated values but am running into so issues when i am trying to plot the CIs using the geom_ribbon() argument. The trouble I'm having it that my model does not show the CIs when i plot them, like so;
I have included all of my code below if anyone knows where i have gone wrong here;
set.seed(20220520)
#simulating 200 values between 0 and 1 from a uniform distribution
x = runif(200, min = 0, max = 1)
lam = exp(0.3+5*x)
y = rpois(200, lambda = lam)
#before we do this each Yi may contain zeros so we need to add a small constant
y <- y + .1
#combining x and y into a dataframe so we can plot
df = data.frame(x, y)
#fitting a Poisson GLM
model2 <- glm(y ~ x,
data = df,
family = poisson(link='log'))
#make predictions (this may be the same as predictions_mod2)
preds <- predict(model2, type = "response")
#making CI predictions
predictions_mod2 = predict(model2, df, se.fit = TRUE, type = 'response')
#calculate confidence intervals limit
upper_mod2 = predictions_mod2$fit+1.96*predictions_mod2$se.fit
lower_mod2 = predictions_mod2$fit-1.96*predictions_mod2$se.fit
#transform the CI limit to get one at the level of the mean
upper_mod2 = exp(upper_mod2)/(1+exp(upper_mod2))
lower_mod2 = exp(lower_mod2)/(1+exp(lower_mod2))
#combining into a df
predframe = data.frame(lwr=lower_mod2,upr=upper_mod2, x = df$x, y = df$y)
#plot model with 95% confidence intervals using ggplot
ggplot(df, aes(x, y)) +
geom_ribbon(data = predframe, aes(ymin=lwr, ymax=upr), alpha = 0.4) +
geom_point() +
geom_line(aes(x, preds2), col = 'blue')
In a comment to the question, it's asked why not to logit transform the predicted values. The reason why is that the type of prediction asked for is "response". From the documentation, my emphasis.
type
the type of prediction required. The default is on the scale of the linear predictors; the alternative "response" is on the scale of the response variable. Thus for a default binomial model the default predictions are of log-odds (probabilities on logit scale) and type = "response" gives the predicted probabilities. The "terms" option returns a matrix giving the fitted values of each term in the model formula on the linear predictor scale.
There is a good way to answer, to show the code.
library(ggplot2, quietly = TRUE)
set.seed(20220520)
#simulating 200 values between 0 and 1 from a uniform distribution
x = runif(200, min = 0, max = 1)
lam = exp(0.3+5*x)
y = rpois(200, lambda = lam)
#before we do this each Yi may contain zeros so we need to add a small constant
y <- y + 0.1
#combining x and y into a dataframe so we can plot
df = data.frame(x, y)
#fitting a Poisson GLM
suppressWarnings(
model2 <- glm(y ~ x,
data = df,
family = poisson(link='log'))
)
#make predictions (this may be the same as predictions_mod2)
preds <- predict(model2, type = "response")
#making CI predictions
predictions_mod2 = predict(model2, df, se.fit = TRUE, type = 'response')
#calculate confidence intervals limit
upper_mod2 = predictions_mod2$fit+1.96*predictions_mod2$se.fit
lower_mod2 = predictions_mod2$fit-1.96*predictions_mod2$se.fit
#combining into a df
predframe = data.frame(lwr=lower_mod2,upr=upper_mod2, x = df$x, y = df$y)
#plot model with 95% confidence intervals using ggplot
ggplot(df, aes(x, y)) +
geom_ribbon(data = predframe, aes(ymin=lwr, ymax=upr), alpha = 0.4) +
geom_point() +
geom_line(aes(x, preds), col = 'blue')
Created on 2022-05-29 by the reprex package (v2.0.1)
I noticed that most codes provided for survival curve plot are about the trend of survival by time, is there any r package that can set "time" as fixed, e.g 10 years, and plot the 10-year survival probability with the change of covariates, e.g. age?
I'm using a COX regression.
Thanks!
You can use predict to get the predicted survival at any time and any level of covariates you wish. You haven't supplied your model or your data, so I will create a demo from the survival package's built-in lung data set.
library(survival)
df <- survival::lung[c(2, 3, 4, 5)]
df$time <- (df$time * 2) / 365
df$status <- df$status - 1
model <- coxph(Surv(time, status) ~ age + strata(sex), df)
To get predictions at ten years for both sexes at a range of ages, we create a little data frame with all the variables used in our model but set to the values we want (including status, which is ignored, so we can set that to 0)
new_data <- data.frame(age = rep(50:90, 2), sex = rep(1:2, each = 41),
time = 10, status = 0)
Now we plug this new data into predict with type = "survival" and store it in our new data frame.
new_data$survival <- predict(model, newdata = new_data, type = "survival")
Now we can just plot the result. Here, I'll use ggplot:
library(ggplot2)
ggplot(new_data, aes(age, survival, color = c("male", "female")[sex])) +
geom_line(size = 1.5) +
scale_color_manual(values = c("orangered", "deepskyblue3"), name = "Sex") +
scale_y_continuous(labels = scales::percent) +
labs(title = "10 year survival according to age") +
theme_minimal() +
theme(text = element_text(size = 16))
Created on 2022-03-10 by the reprex package (v2.0.1)
Using the 'iris' dataset (sightly modified as below), I plot the results of an LME.
PLEASE NOTE: I am only using the iris dataset as mock data for the purpose of plotting, so please do not critique the appropriateness of this test. I'm not interested in the statistics, rather the plotting.
Using ggpredict function and plotting the results, the plot extends the predictions beyond the range of the data. Is there a systematic way plot predictions only within the range of each faceted data?
I can plot each facet separately, limit the axis per plot manually, and cowplot them back together, but if there is way to say 'predict only to the max. and min. of the data for that group', this would be great.
Given that these are facets of a single model, perhaps not showing the predictions for different groups is in fact misleading, and I should rather create three different models if I only want predictions within those data subsets?
library(lme4)
library(ggeffects)
library(ggplot2)
data(iris)
glimpse(iris)
df = iris
glimpse(df)
df_ed = df %>% group_by(Species) %>% mutate(Sepal.Length = ifelse(Species == "setosa",Sepal.Length+10,Sepal.Length+0))
df_ed = df_ed %>% group_by(Species) %>% mutate(Sepal.Length = ifelse(Species == "versicolor",Sepal.Length-3,Sepal.Length+0))
glimpse(df_ed)
m_test =
lmer(Sepal.Width ~ Sepal.Length * Species +
(1|Petal.Width),
data = df_ed, REML = T)
summary(m_test)
test_plot = ggpredict(m_test, c("Sepal.Length", "Species"), type = "re") %>% plot(rawdata = T, dot.alpha = 0.6, facet = T, alpha = 0.3)
As per the OP's comment, I think this will provide a solution. In this example, I use data from the sleepstudy dataset that comes with the lme4 package. First, we have to postulate a mixed model, which I generically call fit.
Note that I do not perform any hypothesis test to formally select an appropriate random-effects structure. Of course, this is essential to adequately capture the correlations in the repeated measurements, but falls outside the scope of this post.
library(lme4)
library(splines)
# quantiles of Days
quantile(sleepstudy$Days, c(0.05, 0.95))
# 5% 95%
# 0 9
# mixed model
fit <- lmer(Reaction ~ ns(Days, df = 2, B = c(0, 9)) +
(Days | Subject), data = sleepstudy)
# new data.frame for prediction
ND <- with(sleepstudy, expand.grid(Days = seq(0L, 9L, len = 50)))
Then, we need a fucntion that enables us to obtain predictions from fit for certain values of the covariates. The function effectPlot_lmer() takes the following arguments:
object: a character string indicating the merMod object that was fitted (the mixed model).
ND: a character string indicating the new data.frame, which specifies the values of the covariates for which we want to obtain predictions.
orig_data: a character string specifying the data on which the mixed model was fitted.
# function to obtain predicted reaction times
effectPlot_lmer <- function (object, ND, orig_data) {
form <- formula(object, fixed.only = TRUE)
namesVars <- all.vars(form)
betas <- fixef(object)
V <- vcov(object)
orig_data <- orig_data[complete.cases(orig_data[namesVars]), ]
Terms <- delete.response(terms(form))
mfX <- model.frame(Terms, data = orig_data)
Terms_new <- attr(mfX, "terms")
mfX_new <- model.frame(Terms_new, ND, xlev = .getXlevels(Terms, mfX))
X <- model.matrix(Terms_new, mfX_new)
pred <- c(X %*% betas)
ses <- sqrt(diag(X %*% V %*% t(X)))
ND$pred <- pred
ND$low <- pred - 1.96 * ses
ND$upp <- pred + 1.96 * ses
return(ND)
}
Finally, we can make an effect plot with ggplot.
# effect plot
library(ggplot2)
ggplot(effectPlot_lmer(fit, ND, orig_data = sleepstudy),
aes(x = Days, y = pred)) +
geom_line(size = 1.2, colour = 'blue4') +
geom_ribbon(aes(ymin = low, ymax = upp), colour = NA,
fill = adjustcolor('blue4', 0.2)) +
theme_bw() + ylab('Expected Reaction (ms)')
i have the following data and created a model with the package glmmTMB in R for plant diameters ~ plant density (number of plants) with a random plot effect:
d <- data.frame (diameter = c(17,16,15,13,11, 19,17,15,11,11, 19,15,14,11,8),
plant_density = c(1000,2000,3000,4000,5000, 1000,2000,3000,4000,5000, 1000,2000,3000,4000,5000),
plot = c(1,1,1,1,1, 2,2,2,2,2, 3,3,3,3,3))
glmm.model <- glmmTMB(diameter ~ plant_density + (1|plot),
data = d,
na.action = na.omit,
family="gaussian",
ziformula = ~ 0)
My intention was to create a plot with predicted diameter data for different plant densities with an included random plot effect. So i tried to predict the data:
new.dat <- data.frame(diameter= d$diameter,
plant_density = d$plant_density,
plot= d$plot)
new.dat$prediction <- predict(glmm.model, new.data = new.dat,
type = "response", re.form = NA)
Unfortunately I get an output for every plot but wanted a generalized prediction for the diameter ~ plant density.
My goal is to create a plot like here, but with a regression model from glmmTMB which consider the random effect.
Thanks for ur help!
The ggeffects package makes this type of thing very easy to implement and customize.
For example
library('ggplot2')
library('glmmTMB')
library('ggeffects')
d <- data.frame (diameter = c(17,16,15,13,11, 19,17,15,11,11, 19,15,14,11,8),
plant_density = c(1000,2000,3000,4000,5000, 1000,2000,3000,4000,5000, 1000,2000,3000,4000,5000),
plotx = as.factor( c(1,1,1,1,1, 2,2,2,2,2, 3,3,3,3,3)))
glmm.model <- glmmTMB(diameter ~ plant_density + (1|plotx),
data = d,
family="gaussian")
# basically what your looking for
plot(ggpredict(glmm.model, terms = "plant_density"))
# with additional a change of limits on the y-axis
plot(ggpredict(glmm.model, terms = "plant_density")) +
scale_y_continuous(limits = c(0, 20))
You can really do whatever you'd like with it from there, changing colors, themes, scales, the package has some nice vignettes as well.
I use lme4 in R to fit the mixed model
lmer(value~status+(1|experiment)))
where value is continuous, status(N/D/R) and experiment are factors, and I get
Linear mixed model fit by REML
Formula: value ~ status + (1 | experiment)
AIC BIC logLik deviance REMLdev
29.1 46.98 -9.548 5.911 19.1
Random effects:
Groups Name Variance Std.Dev.
experiment (Intercept) 0.065526 0.25598
Residual 0.053029 0.23028
Number of obs: 264, groups: experiment, 10
Fixed effects:
Estimate Std. Error t value
(Intercept) 2.78004 0.08448 32.91
statusD 0.20493 0.03389 6.05
statusR 0.88690 0.03583 24.76
Correlation of Fixed Effects:
(Intr) statsD
statusD -0.204
statusR -0.193 0.476
I would like to graphically represent the fixed effects evaluation. However the seems to be no plot function for these objects. Is there any way I can graphically depict the fixed effects?
Using coefplot2 (on r-forge):
Stealing the simulation code from #Thierry:
set.seed(101)
dataset <- expand.grid(experiment = factor(seq_len(10)),
status = factor(c("N", "D", "R"), levels = c("N", "D", "R")),
reps = seq_len(10))
X <- model.matrix(~status,dataset)
dataset <- transform(dataset,
value=rnorm(nrow(dataset), sd = 0.23) + ## residual
rnorm(length(levels(experiment)), sd = 0.256)[experiment] + ## block effects
X %*% c(2.78,0.205,0.887)) ## fixed effects
Fit model:
library(lme4)
model <- lmer(value~status+(1|experiment), data = dataset)
Plot:
install.packages("coefplot2",repos="http://r-forge.r-project.org")
library(coefplot2)
coefplot2(model)
edit:
I have frequently been having problems with the R-Forge build. This fallback should work if the R-Forge build is not working:
install.packages("coefplot2",
repos="http://www.math.mcmaster.ca/bolker/R",
type="source")
Note that the coda dependency must already be installed.
I like the coefficient confidence interval plots, but it may be useful to consider some additional plots to understand the fixed effects..
Stealing the simulation code from #Thierry:
library(ggplot2)
library(lme4)
library(multcomp)
dataset <- expand.grid(experiment = factor(seq_len(10)), status = factor(c("N", "D", "R"), levels = c("N", "D", "R")), reps = seq_len(10))
dataset$value <- rnorm(nrow(dataset), sd = 0.23) + with(dataset, rnorm(length(levels(experiment)), sd = 0.256)[experiment] + ifelse(status == "D", 0.205, ifelse(status == "R", 0.887, 0))) + 2.78
model <- lmer(value~status+(1|experiment), data = dataset)
Get a look at the structure of the data...looks balanced..
library(plotrix); sizetree(dataset[,c(1,2)])
It might be interesting to track the correlation between fixed effects, especially if you fit different correlation structures. There's some cool code provided at the following link...
http://hlplab.wordpress.com/2012/03/20/correlation-plot-matrices-using-the-ellipse-library/
my.plotcorr(
matrix(c(1, .891, .891,
.891, 1, .891,
.891, .891, 1), nrow=3)
)
Finally it seems relevant to look at the variability across the 10 experiments as well as the variability across "status" within experiments. I'm still working on the code for this as I break it on unbalanced data, but the idea is...
My2Boxes(m=4,f1=dataset$experiment,f2=dataset$status,x=dataset$value,color=c("red","yellow","green"))
Finally the already mentioned Piniero and Bates (2000) book strongly favored lattice from what little I've skimmed.. So you might give that a shot. Maybe something like plotting the raw data...
lattice::xyplot(value~status | experiment, groups=experiment, data=dataset, type=c('p','r'), auto.key=F)
And then plotting the fitted values...
lattice::xyplot(fitted(model)~status | experiment, groups=experiment, data=dataset, type=c('p','r'), auto.key=F)
Here are a few suggestions.
library(ggplot2)
library(lme4)
library(multcomp)
# Creating datasets to get same results as question
dataset <- expand.grid(experiment = factor(seq_len(10)),
status = factor(c("N", "D", "R"),
levels = c("N", "D", "R")),
reps = seq_len(10))
dataset$value <- rnorm(nrow(dataset), sd = 0.23) +
with(dataset, rnorm(length(levels(experiment)),
sd = 0.256)[experiment] +
ifelse(status == "D", 0.205,
ifelse(status == "R", 0.887, 0))) +
2.78
# Fitting model
model <- lmer(value~status+(1|experiment), data = dataset)
# First possibility
tmp <- as.data.frame(confint(glht(model, mcp(status = "Tukey")))$confint)
tmp$Comparison <- rownames(tmp)
ggplot(tmp, aes(x = Comparison, y = Estimate, ymin = lwr, ymax = upr)) +
geom_errorbar() + geom_point()
# Second possibility
tmp <- as.data.frame(confint(glht(model))$confint)
tmp$Comparison <- rownames(tmp)
ggplot(tmp, aes(x = Comparison, y = Estimate, ymin = lwr, ymax = upr)) +
geom_errorbar() + geom_point()
# Third possibility
model <- lmer(value ~ 0 + status + (1|experiment), data = dataset)
tmp <- as.data.frame(confint(glht(model))$confint)
tmp$Comparison <- rownames(tmp)
ggplot(tmp, aes(x = Comparison, y = Estimate, ymin = lwr, ymax = upr)) +
geom_errorbar() + geom_point()
This answer illustrates the newer dotwhisker::dwplot + broom.mixed solution.
Adding one more variable in the simulation:
dataset <- transform(dataset,
value=rnorm(nrow(dataset), sd = 0.23) + ## residual
rnorm(length(levels(experiment)), sd = 0.256)[experiment] + ## block effects
X %*% c(2.78,0.205,0.887),
var2=rnorm(nrow(dataset))) ## fixed effects
Fitting two different models:
library(lme4)
model <- lmer(value~status+var2 + (1|experiment), data = dataset)
model2 <- update(model, . ~ . -var2)
Plotting:
library(broom.mixed)
library(dotwhisker)
dwplot(list(first=model,second=model2), effects="fixed")+
geom_vline(xintercept=0, lty=2)
(using effects="fixed" gets us just the fixed-effect parameters, dropping the intercept by default).
broom.mixed has many other options. When I want to do something complex I may use ggplot + ggstance::geom_pointrangeh (+ position="position_dodgev") to make my own custom plot rather than relying on dotwhisker::dwplot().