Plotting Growth Curve with Quadratic Growth - r

I am trying to see how I can plot quadratic growth in R for a growth curve model I've been running.
Model:
m1 <- lmer(score ~ Time + Group + Time_Sqaure +
(1 + School | Subject), data=df, REML = FALSE)
tab_model(m1)
Both Time (B = 9.58, p<.01) and Time_Square (B = - 0.51, p <.01) along with Group (B = 2.77, p <.01) differences are significant.
If I use plot_model, it gives me the best fit line for each group.
plot_model(m1, type = "pred", terms = c("Time", "Group"))
Is there a way to plot the fitted curves or quadratic growth that shows the rate of growth slowing over time?
Thanks!

For sjPlot::plot_model to understand what is going on, you have to enter Time_Square as I(Time^2) not as a separate predictor.
Given that df$Time_Square <- df$Time^2, the following two models should give you the same results:
m1 <- lmer(score ~ Time + Group + Time_Square +
(1 + School | Subject), data=df, REML = FALSE)
m2 <- lmer(score ~ Time + Group + I(Time^2) +
(1 + School | Subject), data=df, REML = FALSE)
However, in the second model, it is clear that the predictor Time is entered twice and so it can be taken into account when plotting it with sjPlot::plot_model(...).
To make sure, I tested it with the following simulated data:
library(dplyr)
grps <- 2 #number of groups
subj <- 100 #number of subjects within group
obs <- 10 #number of observations/times per subjects
b_0 <- 0 #overall intercept
b_1 <- 9.58 #linear time effect
b_2 <- -0.51 #quadratic time effect
sd_b0 <- 0.4 #SD of random intercept per subject
sd_b1 <- 3 #SD of random slope per subject
sd_b3 <- 1 #SD of group effect (you can simulate more than 2 groups)
sd_resid <- 10 #SD of residuals
df <- list(Group = factor(rep(letters[1:grps], each=obs*subj)),
Subject = factor(rep(1:subj, times=grps, each=obs)),
Time = rep(1:obs, times=subj*grps)
) %>% as.data.frame()
df$TimeSq <- df$Time^2
subj_b0 <- rnorm(subj, b_0, sd_b0) %>% rep(times=grps, each=obs)
subj_b1 <- rnorm(subj, b_1, sd_b1) %>% rep(times=grps, each=obs)
grp_m <- rnorm(grps, 0, sd_b3) %>% rep(times=, each=subj*obs)
df$Score <- with(df, subj_b0 + Time*subj_b1 + (Time^2)*b_2 + grp_m + rnorm(grps*subj*obs, 0, sd_resid))
fit1 <- lme4::lmer(Score ~ Time + I(Time^2) + Group + (Time | Subject), data=df)
sjPlot::plot_model(fit1, type="pred", terms=c("Time"))

Related

How to specify all levels for R{stats} predict() function in non linear mixed model after model fit with package medrc

I have 3 trials (trial: e1, e2, e3), 2 products/trial (products: A, B), 5 rates/product (.1,1,10,100,1000), total of 6 curves (curve: c1,...,c6).
After fitting a non linear mixed model, I want to plot all the curves and the curves resulting from the model in the same chart.
This is the reference (package medrc in github): https://doseresponse.github.io/medrc/articles/medrc.html
This is the code to generate the non-linear mixed model.
#packages
library(drc)
library(medrc)
library(dplyr)
library(tidyr)
#my data
trial <- c("e1","e1","e1","e1","e1","e1","e1","e1","e1","e1","e1","e1","e1","e1","e1",
"e1","e1","e1","e1","e1","e1","e1","e1","e1","e1","e1","e1","e1","e1","e1",
"e2","e2","e2","e2","e2","e2","e2","e2","e2","e2","e2","e2","e2","e2","e2",
"e2","e2","e2","e2","e2","e2","e2","e2","e2","e2","e2","e2","e2","e2","e2",
"e3","e3","e3","e3","e3","e3","e3","e3","e3","e3","e3","e3","e3","e3","e3",
"e3","e3","e3","e3","e3","e3","e3","e3","e3","e3","e3","e3","e3","e3","e3")
curve <- c("c1","c1","c1","c1","c1","c1","c1","c1","c1","c1","c1","c1","c1","c1","c1",
"c2","c2","c2","c2","c2","c2","c2","c2","c2","c2","c2","c2","c2","c2","c2",
"c3","c3","c3","c3","c3","c3","c3","c3","c3","c3","c3","c3","c3","c3","c3",
"c4","c4","c4","c4","c4","c4","c4","c4","c4","c4","c4","c4","c4","c4","c4",
"c5","c5","c5","c5","c5","c5","c5","c5","c5","c5","c5","c5","c5","c5","c5",
"c6","c6","c6","c6","c6","c6","c6","c6","c6","c6","c6","c6","c6","c6","c6")
rates <- c(.1,.1,.1,1,1,1,10,10,10,100,100,100,1000,1000,1000,
.1,.1,.1,1,1,1,10,10,10,100,100,100,1000,1000,1000,
.1,.1,.1,1,1,1,10,10,10,100,100,100,1000,1000,1000,
.1,.1,.1,1,1,1,10,10,10,100,100,100,1000,1000,1000,
.1,.1,.1,1,1,1,10,10,10,100,100,100,1000,1000,1000,
.1,.1,.1,1,1,1,10,10,10,100,100,100,1000,1000,1000)
product <- c("A","A","A","A","A","A","A","A","A","A","A","A","A","A","A",
"B","B","B","B","B","B","B","B","B","B","B","B","B","B","B",
"A","A","A","A","A","A","A","A","A","A","A","A","A","A","A",
"B","B","B","B","B","B","B","B","B","B","B","B","B","B","B",
"A","A","A","A","A","A","A","A","A","A","A","A","A","A","A",
"B","B","B","B","B","B","B","B","B","B","B","B","B","B","B")
resp <- c(.295,.3232,.3015,.155,.1501,.1483,.0511,.036,.0445,.0021,.0022,.0035,.0015,.0025,.0009,
.312,.3373,.2994,.265,.2501,.2482,.1022,.103,.1142,.0220,.0198,.0159,.0036,.0099,.0100,
.289,.3122,.3093,.141,.1612,.1398,.0722,.022,.0581,.0019,.0015,.0011,.0018,.0009,.0014,
.325,.3451,.2952,.267,.2412,.2398,.1125,.109,.1019,.0554,.0547,.0118,.0029,.0075,.0078,
.294,.2452,.2991,.121,.1925,.1485,.0871,.025,.0658,.0019,.0019,.0010,.0025,.0019,.0008,
.285,.3412,.3069,.124,.1861,.1958,.1276,.132,.1985,.0325,.0201,.0225,.0031,.0089,.0094)
data.test <- data.frame(trial,curve,rates,product,resp) #my data frame
#my model
m1 <- medrm(resp ~ rates,
curveid=b + c + d + e ~ product,
data = data.test,
fct=LL.4(),
random = c + d ~ 1|trial,
start=NULL)
To make the plot:
#plotting
pdata <- data.test%>%
group_by(curve, product) %>%
expand(rates=exp(seq(-3, 10, length=50)))
#pdata$resp_ind <- predict(m1, newdata=pdata)
pdata$resp <- predict(m1, newdata=pdata, level=0)
ggplot(data.test, aes(x=log(rates), y=resp,
colour=product, group=curve, shape=product)) +
geom_point() +
geom_line(data=pdata) +
#geom_line(data=pdata, aes(y=resp_ind), linetype=2) +
theme_bw() +
scale_x_continuous("DOSE",
breaks=log(c(.1, 1, 10, 100, 1000)),
labels=c(.1, 1, 10, 100, 1000))
Note that two rows of code are commented.
When extracting the predict data per curve, I cannot specify the levels, i.e. the curves that gave the random components. What am I missing?
pdata$resp_ind <- predict(m1, newdata=pdata)
is resulting the error:
Error in predict.nlme(object$fit, newdata = newdata, level = level) :
cannot evaluate groups for desired levels on 'newdata'
So I cannot use this row of code to plot each curve line
geom_line(data=pdata, aes(y=resp_ind), linetype=2) +
These are similar questions, but I'm getting the average trend with the code:
pdata$resp <- predict(m1, newdata=pdata, level=0)
I wanted to specify the levels to get all curves.
R: lme, cannot evaluate groups for desired levels on 'newdata'
https://stats.stackexchange.com/questions/58031/prediction-on-mixed-effect-models-what-to-do-with-random-effects
I could identify the problem and I'll share what I found.
To have the plot code to working as it is in the question, the random factor row in the model should have curve instead of trial
#my model
m1 <- medrm(resp ~ rates,
curveid=b + c + d + e ~ product,
data = data.test,
fct=LL.4(),
random = c + d ~ 1|curve,
start=NULL)
#plotting
pdata <- data.test%>%
group_by(curve, product) %>%
expand(rates=exp(seq(-3, 10, length=50)))
pdata$resp_ind <- predict(m1, newdata=pdata)
pdata$resp <- predict(m1, newdata=pdata, level=0)
ggplot(data.test, aes(x=log(rates), y=resp,
colour=product, group=curve, shape=product)) +
geom_point() +
geom_line(data=pdata) +
geom_line(data=pdata, aes(y=resp_ind), linetype=2) +
theme_bw() +
scale_x_continuous("DOSE",
breaks=log(c(.1, 1, 10, 100, 1000)),
labels=c(.1, 1, 10, 100, 1000))
Other models with different random parameters with trial should have the trial in the group_by to plot:
#my model
m2 <- medrm(resp ~ rates,
curveid=b + c + d + e ~ product,
data = data.test,
fct=LL.4(),
random = c + d ~ 1|trial/curve,
start=NULL)
#plotting
pdata <- data.test%>%
group_by(trial, curve, product) %>%
expand(rates=exp(seq(-3, 10, length=50)))
pdata$resp_ind <- predict(m2, newdata=pdata)
pdata$resp <- predict(m2, newdata=pdata, level=0)
ggplot(data.test, aes(x=log(rates), y=resp,
colour=product, group=curve, shape=product)) +
geom_point() +
geom_line(data=pdata) +
geom_line(data=pdata, aes(y=resp_ind), linetype=2) +
theme_bw() +
scale_x_continuous("DOSE",
breaks=log(c(.1, 1, 10, 100, 1000)),
labels=c(.1, 1, 10, 100, 1000))
The correct model to use depends on each case and it is another subject.

How to obtain SEs or CIs for predicted probabilities for Heckman models

Question: How to get SEs (or confidence intervals) for predicted probabilities from Heckman model
Reproducible example using Mroz87 dataset contained within sampleSelection package to get predicted probability of lfp (labor force participation) for women with kids and without kids:
library(sampleSelection)
data("Mroz87")
Mroz87$kids <- (Mroz87$kids5 + Mroz87$kids618 > 0)
ml1 <- selection(lfp ~ age + I(age^2) + kids + huswage + educ,
log(wage) ~ educ + exper + I(exper^2) + city, method = "2step", data = Mroz87)
ndata <- data.frame(
age = median(Mroz87$age),
kids = c(0, 1),
huswage = median(Mroz87$huswage),
educ = median(Mroz87$educ)
)
pred_probs <- cbind(ndata, probs = predict(ml1, ndata, part = "selection", type = "response"))
# Predicted prob of lfp (labor force participation):
# with kids (kids = 1): 59%;
# without kids (kids = 0): 73%
# Upper and lower bounds?

How to add linear segments to ggplot in Interrupted segmented time series regression

I have fitted an interrupted time series regression to count data, and would like to display the results similar to this
Taken from: Lindstrand A, Bennet R, Galanis I, et al. Sinusitis and pneumonia hospitalization after introduction of pneumococcal conjugate vaccine. Pediatrics. 2014;134(6):e1528-36. doi:10.1542/peds.2013-4177.
Specifically, what I am trying (and failing) to reproduce is the added magenta, and cyan trend-lines respectively. I have been attempting to do this in ggplot. The problem is that my model is fit as glm(family = poisson) so that the coefficients are not on the original scale. Further complicating this is that I have supplied the at risk population as an offset i.e. glm(count ~ ., offset(log(at_risk)), family = poisson, data = df) but would like to display the data as (count / at_risk)*1000 on the Y-axis.
set.seed(42)
int = 85
df <- data.frame(
count = as.integer(rpois(132, 9) + rnorm(132, 1, 1)),
time = 1:132,
at_risk = rep(
c(4305, 4251, 4478, 4535, 4758, 4843, 4893, 4673, 4522, 4454, 4351),
each = 12
)
)
df$month <- factor(month.name, levels = month.name)
df$intv <- ifelse(df$time >= int, 1, 0)
df$intv_trend <- c(rep(0, (int - 1)),
1:(length(unique(df$time)) - (int - 1)))
df <-
df %>%
mutate(lag_count = dplyr::lag(count))
fit <- glm(
count ~ month + time + intv + intv_trend +
log(lag_count) + offset(log(at_risk)),
family = "poisson",
data = df
)
df$fit <- exp(c(NA, predict(fit)))
ggplot(df, aes(x = time, y = (fit / at_risk) * 1000)) +
geom_line()
(I have drawn the lines I want to be able to create into the resulting ggplot lineplot)
There is a continuous secular trend time that is given by the pseudo equation count ~ intercept + B1 * time which I would like to truncate such that it stops at roughly time = 72. This would be analogous to the magenta line in the above plot. An intervention intv occurs at time = 85 which causes a change in level intv and change in slope intv_trend. The pseudo code for the line of the intv effect with respect to time is count ~ intercept + intv + B1 * time + B2* intv_trend, analogous to the cyan line above.
I have attempted geom_abline() with different versions of exp(coef(fit)[1] ... etc. but I can't get the line to even show in the plot.
Any thoughts?
As I said in my comment, if you have a way of identifying the change point, you can add a column called, say, group and label the first part of the prediction line Control and the second Intervention (or whatever labels you prefer). Then use group as a color aesthetic in your plot to get two different lines. In the code below, I've added the grouping variable manually. To get predictions on the scale of the data, add type="response" to predict.
First, set up the data:
library(ggplot2)
library(dplyr)
int = 85
set.seed(42)
df <- data.frame(
count = as.integer(rpois(132, 9) + rnorm(132, 1, 1)),
time = 1:132,
at_risk = rep(
c(4305, 4251, 4478, 4535, 4758, 4843, 4893, 4673, 4522, 4454, 4351),
each = 12
)
)
df$month <- factor(month.name, levels = month.name)
df$intv <- ifelse(df$time >= int, 1, 0)
df$intv_trend <- c(rep(0, (int - 1)),
1:(length(unique(df$time)) - (int - 1)))
df <- df %>%
mutate(lag_count = dplyr::lag(count))
Create model and get predictions:
fit <- glm(
count ~ month + time + intv + intv_trend +
log(lag_count) + offset(log(at_risk)),
family = "poisson",
data = df
)
df$fit <- exp(c(NA, predict(fit)))
# Get predictions on the same scale as the data
df$fit2 = c(NA, predict(fit, type="response"))
# Add a grouping variable manually
df$group = rep(c("Control","Intervention"), c(72, 132 - 72))
Plot:
ggplot(df, aes(x = time, y = fit2)) +
geom_line() +
geom_smooth(method="lm", se=FALSE, aes(colour=group)) +
theme_bw() +
labs(colour="")

In R, is there a parsimonious or efficient way to get a regression prediction holding all covariates at their means?

I'm wondering if there is essentially a faster way of getting predictions from a regression model for certain values of the covariates without manually specifying the formulation. For example, if I wanted to get a prediction for a given dependent variable at means of the covariates, I can do something like this:
glm(ins ~ retire + age + hstatusg + qhhinc2 + educyear + married + hisp,
family = binomial, data = dat)
meanRetire <- mean(dat$retire)
meanAge <- mean(dat$age)
meanHStatusG <- mean(dat$hStatusG)
meanQhhinc2 <- mean(dat$qhhinc2)
meanEducyear <- mean(dat$educyear)
meanMarried <- mean(dat$married)
meanYear <- mean(dat$year)
ins_predict <- coef(r_3)[1] + coef(r_3)[2] * meanRetire + coef(r_3)[3] * meanAge +
coef(r_3)[4] * meanHStatusG + coef(r_3)[5] * meanQhhinc2 +
coef(r_3)[6] * meanEducyear + coef(r_3)[7] * meanMarried +
coef(r_3)[7] * meanHisp
Oh... There is a predict function:
fit <- glm(ins ~ retire + age + hstatusg + qhhinc2 + educyear + married + hisp,
family = binomial, data = dat)
newdat <- lapply(dat, mean) ## column means
lppred <- predict(fit, newdata = newdat) ## prediction of linear predictor
To get predicted response, use:
predict(fit, newdata = newdat, type = "response")
or (more efficiently from lppred):
binomial()$linkinv(lppred)

How to plot the results of a mixed model

I use lme4 in R to fit the mixed model
lmer(value~status+(1|experiment)))
where value is continuous, status(N/D/R) and experiment are factors, and I get
Linear mixed model fit by REML
Formula: value ~ status + (1 | experiment)
AIC BIC logLik deviance REMLdev
29.1 46.98 -9.548 5.911 19.1
Random effects:
Groups Name Variance Std.Dev.
experiment (Intercept) 0.065526 0.25598
Residual 0.053029 0.23028
Number of obs: 264, groups: experiment, 10
Fixed effects:
Estimate Std. Error t value
(Intercept) 2.78004 0.08448 32.91
statusD 0.20493 0.03389 6.05
statusR 0.88690 0.03583 24.76
Correlation of Fixed Effects:
(Intr) statsD
statusD -0.204
statusR -0.193 0.476
I would like to graphically represent the fixed effects evaluation. However the seems to be no plot function for these objects. Is there any way I can graphically depict the fixed effects?
Using coefplot2 (on r-forge):
Stealing the simulation code from #Thierry:
set.seed(101)
dataset <- expand.grid(experiment = factor(seq_len(10)),
status = factor(c("N", "D", "R"), levels = c("N", "D", "R")),
reps = seq_len(10))
X <- model.matrix(~status,dataset)
dataset <- transform(dataset,
value=rnorm(nrow(dataset), sd = 0.23) + ## residual
rnorm(length(levels(experiment)), sd = 0.256)[experiment] + ## block effects
X %*% c(2.78,0.205,0.887)) ## fixed effects
Fit model:
library(lme4)
model <- lmer(value~status+(1|experiment), data = dataset)
Plot:
install.packages("coefplot2",repos="http://r-forge.r-project.org")
library(coefplot2)
coefplot2(model)
edit:
I have frequently been having problems with the R-Forge build. This fallback should work if the R-Forge build is not working:
install.packages("coefplot2",
repos="http://www.math.mcmaster.ca/bolker/R",
type="source")
Note that the coda dependency must already be installed.
I like the coefficient confidence interval plots, but it may be useful to consider some additional plots to understand the fixed effects..
Stealing the simulation code from #Thierry:
library(ggplot2)
library(lme4)
library(multcomp)
dataset <- expand.grid(experiment = factor(seq_len(10)), status = factor(c("N", "D", "R"), levels = c("N", "D", "R")), reps = seq_len(10))
dataset$value <- rnorm(nrow(dataset), sd = 0.23) + with(dataset, rnorm(length(levels(experiment)), sd = 0.256)[experiment] + ifelse(status == "D", 0.205, ifelse(status == "R", 0.887, 0))) + 2.78
model <- lmer(value~status+(1|experiment), data = dataset)
Get a look at the structure of the data...looks balanced..
library(plotrix); sizetree(dataset[,c(1,2)])
It might be interesting to track the correlation between fixed effects, especially if you fit different correlation structures. There's some cool code provided at the following link...
http://hlplab.wordpress.com/2012/03/20/correlation-plot-matrices-using-the-ellipse-library/
my.plotcorr(
matrix(c(1, .891, .891,
.891, 1, .891,
.891, .891, 1), nrow=3)
)
Finally it seems relevant to look at the variability across the 10 experiments as well as the variability across "status" within experiments. I'm still working on the code for this as I break it on unbalanced data, but the idea is...
My2Boxes(m=4,f1=dataset$experiment,f2=dataset$status,x=dataset$value,color=c("red","yellow","green"))
Finally the already mentioned Piniero and Bates (2000) book strongly favored lattice from what little I've skimmed.. So you might give that a shot. Maybe something like plotting the raw data...
lattice::xyplot(value~status | experiment, groups=experiment, data=dataset, type=c('p','r'), auto.key=F)
And then plotting the fitted values...
lattice::xyplot(fitted(model)~status | experiment, groups=experiment, data=dataset, type=c('p','r'), auto.key=F)
Here are a few suggestions.
library(ggplot2)
library(lme4)
library(multcomp)
# Creating datasets to get same results as question
dataset <- expand.grid(experiment = factor(seq_len(10)),
status = factor(c("N", "D", "R"),
levels = c("N", "D", "R")),
reps = seq_len(10))
dataset$value <- rnorm(nrow(dataset), sd = 0.23) +
with(dataset, rnorm(length(levels(experiment)),
sd = 0.256)[experiment] +
ifelse(status == "D", 0.205,
ifelse(status == "R", 0.887, 0))) +
2.78
# Fitting model
model <- lmer(value~status+(1|experiment), data = dataset)
# First possibility
tmp <- as.data.frame(confint(glht(model, mcp(status = "Tukey")))$confint)
tmp$Comparison <- rownames(tmp)
ggplot(tmp, aes(x = Comparison, y = Estimate, ymin = lwr, ymax = upr)) +
geom_errorbar() + geom_point()
# Second possibility
tmp <- as.data.frame(confint(glht(model))$confint)
tmp$Comparison <- rownames(tmp)
ggplot(tmp, aes(x = Comparison, y = Estimate, ymin = lwr, ymax = upr)) +
geom_errorbar() + geom_point()
# Third possibility
model <- lmer(value ~ 0 + status + (1|experiment), data = dataset)
tmp <- as.data.frame(confint(glht(model))$confint)
tmp$Comparison <- rownames(tmp)
ggplot(tmp, aes(x = Comparison, y = Estimate, ymin = lwr, ymax = upr)) +
geom_errorbar() + geom_point()
This answer illustrates the newer dotwhisker::dwplot + broom.mixed solution.
Adding one more variable in the simulation:
dataset <- transform(dataset,
value=rnorm(nrow(dataset), sd = 0.23) + ## residual
rnorm(length(levels(experiment)), sd = 0.256)[experiment] + ## block effects
X %*% c(2.78,0.205,0.887),
var2=rnorm(nrow(dataset))) ## fixed effects
Fitting two different models:
library(lme4)
model <- lmer(value~status+var2 + (1|experiment), data = dataset)
model2 <- update(model, . ~ . -var2)
Plotting:
library(broom.mixed)
library(dotwhisker)
dwplot(list(first=model,second=model2), effects="fixed")+
geom_vline(xintercept=0, lty=2)
(using effects="fixed" gets us just the fixed-effect parameters, dropping the intercept by default).
broom.mixed has many other options. When I want to do something complex I may use ggplot + ggstance::geom_pointrangeh (+ position="position_dodgev") to make my own custom plot rather than relying on dotwhisker::dwplot().

Resources