lmer and plot_coefs: add values for estimates - r

I need to plot my coefficient values for a linear model (lm). I use plot_coef() to plot, but plot only selected variables. But plot_coef() does not allow to add values of those estimates to the plot so that they actually show as numbers?
states <- as.data.frame(state.x77)
fit1 <- lm(Income ~ Frost + Illiteracy + Murder +
Population + Area + `Life Exp` + `HS Grad`,
data = states, weights = runif(50, 0.1, 3))
plot_summs(fit1,
coefs = c("Frost Days" = "Frost", "% Illiterate" = "Illiteracy"),
scale = TRUE)

This may get you started:
library(jtools)
library(ggplot2)
p <- plot_summs(fit1,
coefs = c("Frost Days" = "Frost", "% Illiterate" = "Illiteracy"),
scale = TRUE)
p +
geom_label(aes(label = round(estimate)))+
theme(legend.position = "none")
You get this:
If you want to be in better control of the final product, it would be easier to get the data using:
df <- broom::tidy(fit1, conf.int = TRUE)
Then, use ggplot().

Related

How to remove variables in plot_summs?

I use plot_summs to plot my regression coefficients. below is a reproducible sample. I want to do two things. First, I want to remove Frost and Murder from the graph. Second, I want to change the color of Illiteracy and Population to Green and Life Exp and HS Grad to red. I’d appreciate any help I can get here.
fit1 <- lm(Income ~ Frost + Illiteracy + Murder +
Population + Area + Life Exp + HS Grad,
data = states, weights = runif(50, 0.1, 3))
plot_summs(fit1, scale = TRUE)
Without using the jtools package:
If I'm not wrong, plot_summ with scale - TRUE, scales the independent variables and plots the summary of the variable estimates, with mean as point and 2*SE as segments.
states2 <- states
states2[,-1] <- scale(states2[,-1]) # Considering first column is Income
fit2 <- lm(Income ~ ., data = states2, weights = runif(50, 0.1, 3))
df <- as.data.frame(summary(fit2)[["coefficients"]][-1,1:2])
df$variable <- rownames(df)
df <- df[!df$variable %in% c("Frost", "Murder"), ]
library(ggplot2)
ggplot(df) +
geom_point(aes(x = variable, y = Estimate,
color = variable), size = 6) +
geom_segment(aes(x = variable, xend = variable,
y = Estimate - (2 * `Std. Error`),
yend = Estimate + (2 * `Std. Error`),
color = variable), lwd = 2) +
scale_color_manual(values = c("Illiteracy" = "green","Population" = "green",
"Area" = "blue",
"`Life Exp`" = "red", "`HS Grad`" = "red")) +
coord_flip() +
theme_classic()

Fit and plot a Weibull model to a survival data

I want to achieve the exact same thing asked in this question:
How to plot the survival curve generated by survreg (package survival of R)?
Except for the fact that I don't want the data to be stratified by a variable (in the question above it was stratified by sex).
I just want the progression free survival for the whole group of treated patients.
So when I copy the code from the other question, here is where I get stuck:
library(survminer)
library(tidyr)
s <- with(lung,Surv(time,status))
fKM <- survfit(s ~ sex,data=lung)
sWei <- survreg(s ~ as.factor(sex),dist='weibull',data=lung) # in my case here I would replace as.factor(sex) by 1
pred.sex1 = predict(sWei, newdata=list(sex=1),type="quantile",p=seq(.01,.99,by=.01)) #Since I don't want to stratify, what do I do with these 2 lines of code?
pred.sex2 = predict(sWei, newdata=list(sex=2),type="quantile",p=seq(.01,.99,by=.01))
df = data.frame(y=seq(.99,.01,by=-.01), sex1=pred.sex1, sex2=pred.sex2)
df_long = gather(df, key= "sex", value="time", -y)
p = ggsurvplot(fKM, data = lung, risk.table = T)
p$plot = p$plot + geom_line(data=df_long, aes(x=time, y=y, group=sex))
I tried replacing as.factor(sex) by 1 and then the rest of the code just does not make sense, can someone help me with this?
Many thanks in advance!
If you just want to plot the overall empirical survival curve, you might do something like this:
library(survival)
library(survminer)
library(tidyr)
s <- with(lung, Surv(time, status))
fKM <- survfit(s ~ 1, data = survival::lung)
ggsurvplot(fKM, ggtheme = theme_bw())
However, if you want to fit a Weibull model with no predictors, then your formula is fine.
sWei <- survreg(s ~ 1, dist = 'weibull', data = lung)
probs <- seq(0.01, 1, by = 0.01)
time <- predict(sWei, type = "quantile", se = TRUE, p = probs)
The only problem is that time is now a named list of two matrices: fit and se.fit. Both have the same number of rows as lung, but all rows are identical, so we just take one from each and calculate the confidence interval in a data frame which we can then use to create a ggplot:
ggplot(data = data.frame(p = 1 - probs,
time = time$fit[1,],
upper = time$fit[1,] + 1.96 * time$se.fit[1,],
lower = time$fit[1,] - 1.96 * time$se.fit[1,])) +
geom_step(aes(p, time, colour = "All"), size = 1) +
geom_ribbon(aes(p, ymin = lower, ymax = upper, fill = "All"), alpha = 0.2) +
coord_flip(ylim = c(0, 1000)) +
scale_fill_discrete(name = "Strata") +
scale_color_discrete(name = "Strata") +
theme_bw() +
theme(legend.position = "top")
Which we can see looks like a pretty good fit.
If you want both in the same plot you can do something like:
df <- data.frame(p = 1 - probs,
time = time$fit[1,],
upper = time$fit[1,] + 1.96 * time$se.fit[1,],
lower = time$fit[1,] - 1.96 * time$se.fit[1,])
ggsurvplot(fKM, ggtheme = theme_bw())$plot +
geom_line(data = df, aes(time, p), linetype = 2, size = 1) +
geom_line(data = df, aes(upper, p), linetype = 2, size = 1) +
geom_line(data = df, aes(lower, p), linetype = 2, size = 1)
Created on 2020-08-18 by the reprex package (v0.3.0)

Model predictions: how to manipulate dummy variables across time?

I'm trying to predict insect populations across a year. I've built my model (a GAM, using the package mgcv). I then used the predict() function after I built a dummy dataset to build this prediction off of . This is where I'm struggling.
My question is: how can I build a new dummy dataset that will simulate, say cold winters vs. warm winters? I have just a "temperature" parameter, and I'm not sure how to manipulate that through time (or seasons). Ideally, I'd like to create a cold winter with mean summer temperatures and a warm winter with mean summer temperatures. Any suggestions would be greatly appreciated!
Quickly, my smoothing parameters in the model are: Average temperature, humidity, and day of year (doy). I have three random effect parameters in the model too. My model, prediction, and graph generated are below.
m1 <- gam(total ~ s(temp.avg) + s(humid) + s(doy, bs="cc", k=5) +
s(trap, bs="re")+s(site, bs="re")+s(year, bs="re"),
family=nb(),gamma=1.4,method="REML",data=dfe)
N <- 200
M <- 365
pdat1 <- with(dfe, expand.grid(year = c("2013","2014","2015","2016","2017"),
humid = mean(humid, na.rm = TRUE),
temp.avg = mean(temp.avg, na.rm = TRUE),
doy = seq(min(doy), max(doy), length = M),
trap = c("a","b","c","d"),
site = c("A","B", "C", "D")))
pred1 <- predict(m1, newdata = pdat1, type = "response", se.fit=TRUE)
crit <- qt(0.975, df = df.residual(m1)) # ~95% interval critical t
pdat1 <- transform(pdat1, fitted = pred1$fit, se = pred1$se.fit)
pdat1 <- transform(pdat1,
upper = fitted + (crit * se),
lower = fitted - (crit * se))
ggplot(pdat1, aes(x = doy, y = fitted)) +
geom_line() + theme_classic()+
labs(y = "Predicted Population", x = "Day of Year") +
theme(legend.position = "top")

Hazard ratio plot with confidence "waist" in ggplot2

When fitting a cox model that includes spline terms for a continuous covariate, I would like to be able to produce a plot of the hazard ratio across range of that covariate (relative to a fixed reference value) using ggplot2.
I have adapted an example from Terry Therneau's splines vignette here (see page 3). The only issue with this approach is the lack of a "waist" in the confidence interval at the reference value, as in this plot:
The example below produces the following plot, without the narrowing of the CI at the reference value.
library(survival)
library(splines)
library(ggplot2)
# colon cancer death dataset
ccd <- na.omit(subset(colon, etype == 2))
# fit model with ns() term for age
cox <- coxph(Surv(time, status) ~ rx + sex + ns(age, knots = c(20, 50, 70)), data = ccd)
# get data for plot
tp <- termplot(cox, se = TRUE, plot = FALSE)
# hazard ratio plot for natural spline of age, with reference # 50 yrs
ref <- tp$age$y[tp$age$x == 50]
ggplot() +
geom_line(data = tp$age, aes(x = x, y = exp(y - ref))) +
geom_line(data = tp$age, aes(x = x, y = exp(y - 1.96 * se - ref)), linetype = 2) +
geom_line(data = tp$age, aes(x = x, y = exp(y + 1.96 * se - ref)), linetype = 2) +
geom_hline(aes(yintercept = 1), linetype = 3) +
geom_rug(data = ccd, aes(x = age), sides = "b") +
labs(x = "Age at baseline, years",
y = "Hazard Ratio (95% CI) vs. 50 years",
title = "Mortality hazard ratio as a function of age",
subtitle = "Natural spline: knots at 20, 50, and 70 years")
I am aware that there are features in the rms package and the smoothHRpackage that produce these types of plots, but I am looking for a solution that is amenable to ggplot2 graphics and the coxph() function in the survival package. My question therefore boils down to:
Is there a way to adapt the output of termplot() to produce a plot with a "waist" at the reference value?
If termplot() cannot be used, how can I obtain the relevant plotting data by other means?
Edit 1: As the first comment suggested, this can be accomplished using rms and ggplot2 together. For example:
library(rms)
dd <- datadist(ccd)
dd$limits$age[2] <- 50
options(datadist = "dd")
cph <- cph(Surv(time, status) ~ rx + sex + rcs(age, c(20, 50, 70)), data = ccd, x = TRUE, y = TRUE)
pdata <- Predict(cph, age, ref.zero = TRUE, fun = exp)
ggplot(data = pdata) +
geom_hline(aes(yintercept = 1), linetype = 3) +
labs(x = "Age at baseline, years",
y = "Hazard Ratio (95% CI) vs. 50 years",
title = "Mortality hazard ratio as a function of age",
subtitle = "Natural spline: knots at 20, 50, and 70 years")
Which produces a plot very close to what I am after:
However, I would still like to know if there is a way to do this using coxph() and ns(). Not that I have anything against the rms package, I just have a bunch of old code based on survivalfunctionality.

Visualizing multiple curves in ggplot from bootstrapping, curve fitting

I have time series data that is well modeled using a sinusoidal curve. I'd like to visualize the uncertainty in the fitted model using bootstrapping.
I adapted the approach from here. I am also interested in this approach too, using nlsBoot. I can get the first approach to run, but the resulting plot contains curves that are not continuous, but jagged.
library(dplyr)
library(broom)
library(ggplot2)
xdata <- c(-35.98, -34.74, -33.46, -32.04, -30.86, -29.64, -28.50, -27.29, -26.00,
-24.77, -23.57, -22.21, -21.19, -20.16, -18.77, -17.57, -16.47, -15.35,
-14.40, -13.09, -11.90, -10.47, -9.95,-8.90,-7.77,-6.80, -5.99,
-5.17, -4.21, -3.06, -2.29, -1.04)
ydata <- c(-4.425, -4.134, -5.145, -5.411, -6.711, -7.725, -8.087, -9.059, -10.657,
-11.734, NA, -12.803, -12.906, -12.460, -12.128, -11.667, -10.947, -10.294,
-9.185, -8.620, -8.025, -7.493, -6.713, -6.503, -6.316, -5.662, -5.734, -4.984,
-4.723, -4.753, -4.503, -4.200)
data <- data.frame(xdata,ydata)
bootnls_aug <- data %>% bootstrap(100) %>%
do(augment(nls(ydata ~ A*cos(2*pi*((xdata-x_0)/z))+M, ., start=list(A=4,M=-7,x_0=-10,z=30),.)))
ggplot(bootnls_aug, aes(xdata, ydata)) +
geom_line(aes(y=.fitted, group=replicate), alpha=.1, color="blue") +
geom_point(size=3) +
theme_bw()
ggplot output
Can anyone offer help? Why are the displayed curves not smooth? Is there a better way to implement?
broom::augment is merely returning fitted values for each of the available data points. Therefore, the resolution of x is limited to the resolution of the data. You can predict values from the model with a much higher resolution:
x_range <- seq(min(xdata), max(xdata), length.out = 1000)
fitted_boot <- data %>%
bootstrap(100) %>%
do({
m <- nls(ydata ~ A*cos(2*pi*((xdata-x_0)/z))+M, ., start=list(A=4,M=-7,x_0=-10,z=30))
f <- predict(m, newdata = list(xdata = x_range))
data.frame(xdata = x_range, .fitted = f)
} )
ggplot(data, aes(xdata, ydata)) +
geom_line(aes(y=.fitted, group=replicate), fitted_boot, alpha=.1, color="blue") +
geom_point(size=3) +
theme_bw()
Some more work is needed to add the mean and 95% confidence interval:
quants <- fitted_boot %>%
group_by(xdata) %>%
summarise(mean = mean(.fitted),
lower = quantile(.fitted, 0.025),
upper = quantile(.fitted, 0.975)) %>%
tidyr::gather(stat, value, -xdata)
ggplot(mapping = aes(xdata)) +
geom_line(aes(y = .fitted, group = replicate), fitted_boot, alpha=.05) +
geom_line(aes(y = value, lty = stat), col = 'red', quants, size = 1) +
geom_point(aes(y = ydata), data, size=3) +
scale_linetype_manual(values = c(lower = 2, mean = 1, upper = 2)) +
theme_bw()

Resources