How to modify the plot when ggpredict() is used - r

The ggpredict() function is part of the ggeffects package. I like to change the names of the coefficients in the following plot:
lme1<- lme(mpg ~ cyl + disp + hp,
random = ~1|disp, method = "ML", data=mtcars)
plot(ggpredict(lme1, terms = c("cyl", "disp", "hp[80,150,215]")))
How to change the name of the y and x axis can be achieved by adding "labs(x = "Number of cylinders",
y = "Miles/(US) gallon")".
plot(ggpredict(lme1, terms = c("cyl", "disp", "hp[80,150,215]"))) +
labs(x = "Number of cylinders",
y = "Miles/(US) gallon")
But how can the third coefficients "hp" be changed into e.g. "Gross horsepower"?

You could build the plot by yourself, and then use a custom labeller-function. Another solution might be the ggggeffects-package, which provides utilities to build own plots.
The package maintainer has proposed two solutions:
library(ggggeffects)
#> Loading required package: ggeffects
#> Loading required package: ggplot2
library(nlme)
lme1<- lme(mpg ~ cyl + disp + hp,
random = ~1|disp, method = "ML", data=mtcars)
gge <- ggpredict(lme1, terms = c("cyl", "disp", "hp[80,150,215]"))
hp_labeller <- as_labeller(c("80" = "Gross horsepower = 80",
"150" = "Gross horsepower = 150",
"215" = "Gross horsepower = 215"))
autoplot(gge, labeller = labeller(hp = hp_labeller)) +
geom_expected_line() +
geom_CI_ribbon() +
labs(x = "Number of cylinders",
y = "Miles/(US) gallon")
library(ggeffects)
library(ggplot2)
library(nlme)
lme1<- lme(mpg ~ cyl + disp + hp,
random = ~1|disp, method = "ML", data=mtcars)
gge <- ggpredict(lme1, terms = c("cyl", "disp", "hp[80,150,215]"))
p <- plot(gge)
hp_labeller <- as_labeller(c("hp = 80" = "Gross horsepower = 80",
"hp = 150" = "Gross horsepower = 150",
"hp = 215" = "Gross horsepower = 215"))
p +
facet_grid(cols = vars(facet),
labeller = labeller(facet = hp_labeller))

Rename the column before plotting?
library(dplyr)
library(nlme)
library(ggeffects)
df <- mtcars
df <- df %>% rename(Horse_Power = hp)
lme1<- lme(mpg ~ cyl + disp + Horse_Power,
random = ~1|disp, method = "ML", data=df)
plot(ggpredict(lme1, terms = c("cyl", "disp", "Horse_Power[80,150,215]")))

Related

persp add factor group in R

Following the margins vignette https://cran.r-project.org/web/packages/margins/vignettes/Introduction.html#Motivation I would like to know how to plot using persp after a logit containing a triple interaction.
Using only persp and effect only part of the interaction is shown (drat and wt)
x1 <- lm(mpg ~ drat * wt * am, data = mtcars)
head(mtcars)
persp(x1, what = "effect")
However I would like to see the same graph above but at am=0 and am=1. I tried:
persp(x1,"drat","wt", at = list(am = 0:1), what = "effect")
But the same graph is produced. How to see two graphs at am=0 and am=1? or at least two curves representing am=0 and am=1 in the same cube.
Thanks
It doesn't look like you can do it with the persp.glm() function in the margins package. You will probably have to do it "by hand".
data(mtcars)
mtcars$hihp <- as.numeric(mtcars$hp > quantile(mtcars$hp,.5))
x1 <- glm(hihp ~ drat * wt * am + disp + qsec, data = mtcars, family=binomial)
#> Warning: glm.fit: algorithm did not converge
#> Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
drat_s <- with(mtcars, seq(min(drat), max(drat),length=25))
wt_s <- with(mtcars, seq(min(wt), max(wt), length=25))
pred_fun <- function(x,y, am=0){
tmp <- data.frame(drat = x, wt = y, am=am,
disp = mean(mtcars$disp, na.rm=TRUE),
qsec = mean(mtcars$qsec, na.rm=TRUE))
predict(x1, newdata=tmp, type="response")
}
p0 <- outer(drat_s, wt_s, pred_fun)
p1 <- outer(drat_s, wt_s, pred_fun, am=1)
persp(drat_s, wt_s, p0, zlim=c(0,1), theta=-80, col=rgb(.75,.75, .75, .75),
xlab = "Axle Ratio",
ylab="Weight",
zlab="Predicted Probability")
par(new=TRUE)
persp(drat_s, wt_s, p1, zlim=c(0,1), theta=-80, col=rgb(1,0,0,.75), xlab="", ylab="", zlab="")
Created on 2022-05-16 by the reprex package (v2.0.1)
Edit: what if you add a factor to the model?
If we turn cyl into a factor and add it to the model, we also have to add it to the tmp object in the predfun() function, however it has to have the same properties that it has in the data, i.e., it has to be a factor (that has a single value) that has the same levels and labels as the one in the data. Here's an example:
data(mtcars)
mtcars$hihp <- as.numeric(mtcars$hp > quantile(mtcars$hp,.5))
mtcars$cyl <- factor(mtcars$cyl)
x1 <- glm(hihp ~ drat * wt * am + disp + qsec + cyl, data = mtcars, family=binomial)
#> Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
drat_s <- with(mtcars, seq(min(drat), max(drat),length=25))
wt_s <- with(mtcars, seq(min(wt), max(wt), length=25))
pred_fun <- function(x,y, am=0){
tmp <- data.frame(drat = x, wt = y, am=am,
disp = mean(mtcars$disp, na.rm=TRUE),
qsec = mean(mtcars$qsec, na.rm=TRUE),
cyl = factor(2, levels=1:3, labels=levels(mtcars$cyl)))
predict(x1, newdata=tmp, type="response")
}
p0 <- outer(drat_s, wt_s, pred_fun)
p1 <- outer(drat_s, wt_s, pred_fun, am=1)
persp(drat_s, wt_s, p0, zlim=c(0,1), theta=-80, col=rgb(.75,.75, .75, .75),
xlab = "Axle Ratio",
ylab="Weight",
zlab="Predicted Probability")
par(new=TRUE)
persp(drat_s, wt_s, p1, zlim=c(0,1), theta=-80, col=rgb(1,0,0,.75), xlab="", ylab="", zlab="")
Created on 2022-06-06 by the reprex package (v2.0.1)

Path diagram in r

I am trying to plot a path diagram of a Structural Equation Model(SEM) in R. I was able to plot it using semPlot::semPaths(). The output is similar to The SEM was modeled using lavaan package.
I want a plot similar to . with estimates and p values. Can anyone help me out?
My suggestion would be lavaanPlot (see more of it in the author's personal website):
library(lavaan)
library(lavaanPlot)
# path model
model <- 'mpg ~ cyl + disp + hp
qsec ~ disp + hp + wt'
fit1 <- sem(model, data = mtcars)
labels1 <- list(mpg = "Miles Per Gallon", cyl = "Cylinders", disp = "Displacement", hp = "Horsepower", qsec = "Speed", wt = "Weight") #define labels
lavaanPlot(model = fit1, labels = labels1, coefs = TRUE, stand = TRUE, sig = 0.05) #standardized regression paths, showing only paths with p<= .05
check this example, it might be helpful
https://rstudio-pubs-static.s3.amazonaws.com/78926_5aa94ae32fae49f3a384ce885744ef4a.html

R logistic regression extracting coefficients in a loop: error with setting up loop

I'm trying to build a logistic regression model with 3 predictors, and I have a list of IDs for each predictor like below. (using mtcars dataset as an example)
var1 <- c("mpg", "cyl", "disp")
var2 <- c("mpg", "hp", "wt")
var3 <- c("drat", "wt", "gear", "carb")
I want to build multiple regression models with each of these IDs used. am is a fixed variable that I want to predict, so each of my model would look like:
mod1 <- glm(am ~ mpg + mpg + drat, data=mtcars, ...)
mod2 <- glm(am ~ mpg + mpg + wt, data=mtcars, ...)
mod3 <- glm(am ~ mpg + mpg + gear, data=mtcars, ...)
...
mod5 <- glm(am ~ mpg + hp + drat, data=mtcars, ...)
...
mod9 <- glm(am ~ mpg + wt + drat, data=mtcars, ...)
...
mod36 <- glm(am ~ disp + wt + carb, data=mtcars, ...)
So in this case it would be 3*3*4 = 36 models total. I'm trying to use apply like below.
coefs_mat <- expand.grid(var1, var2, var3)
mods = apply(coefs_mat, 1, function(row) {
glm(as.formula(am ~ row[1] + row[2] + row[3]), data = mtcars,
family = "binomial",control=list(maxit=20))
})
(+ Edit: coefs_mat looks like below:
>coefs_mat
var1 var2 var3
1 mpg mpg drat
2 cyl mpg drat
3 disp mpg drat
4 mpg hp drat
...
36 disp wt carb
This gives the following error: "object of type 'closure' is not subsettable".
I searched for other Stackoverflow posts that had similar problems, and tried this instead:
mods = apply(coefs_mat, 1, function(row) {
glm(as.formula(paste("am~", row[1] + row[2] + row[3])), data = mtcars,
family = "binomial",control=list(maxit=20))
})
But this gave another error: "Error in row[1] + row[2] : non-numeric argument to binary operator". What's causing these errors in my code?
I solved this by using sprintf.
var1 <- c("mpg", "cyl", "disp")
var2 <- c("mpg", "hp", "wt")
var3 <- c("drat", "wt", "gear", "carb")
coefs_mat <- expand.grid(var1, var2, var3)
vars_comb <- apply(coefs_mat, 1, function(x){paste(sort(x), collapse = '+')})
formula_vec <- sprintf("am ~ %s", vars_comb)
glm_res <- lapply(formula_vec, function(x) {
fit1 <- glm(x, data = mtcars, family = binomial("logit"))
return(fit1)
})

broom::augment: Evaluation error: object not found with gamlss but all good with lm

I'm wrestling with collecting gamlss results into a data frame. This continues the example here
Working example using lm
library(tidyverse)
library(broom)
library(gamlss)
library(datasets)
# working
mro <- mtcars %>%
nest(-am) %>%
mutate(am = factor(am, levels = c(0, 1), labels = c("automatic", "manual")),
fit = map(data, ~lm(mpg ~ hp + wt + disp, data = .)),
results = map(fit, augment))
Broken example using gamlss
# GAMLSS model.frame workaround for dplyr
# See https://stackoverflow.com/q/48979322/152860
model.frame.gamlss <- function(formula, what = c("mu", "sigma", "nu", "tau"), parameter = NULL, ...) {
object <- formula
dots <- list(...)
what <- if (!is.null(parameter)) {
match.arg(parameter, choices = c("mu", "sigma", "nu", "tau"))
} else match.arg(what)
Call <- object$call
parform <- formula(object, what)
data <- if (!is.null(Call$data)) {
## problem here, as Call$data is .
#eval(Call$data)
# instead, this would work:
eval(Call$data, environment(formula$mu.terms))
} else {
environment(formula$terms)
}
Terms <- terms(parform)
mf <- model.frame(
Terms,
data,
xlev = object[[paste(what, "xlevels", sep = ".")]]
)
mf
}
# broken
mro <- mtcars %>%
nest(-am) %>%
mutate(am = factor(am, levels = c(0, 1), labels = c("automatic", "manual")),
fit = map(data, ~gamlss(mpg ~ hp + wt + disp, data = .)),
results = map(fit, augment))
Appreciate any hints or tips.
So far this is the most elegant approach I have discovered (trial-and-error). Happy to stand corrected.
aug_func <- function(df){
augment(gamlss(mpg ~ hp + wt + disp, data=df))
}
mtcars %>%
mutate(am = factor(am, levels = c(0, 1), labels = c("automatic", "manual"))) %>%
group_by(am) %>%
do(aug_func(df=.)) %>%
ggplot(aes(x = mpg, y = .fitted)) +
geom_abline(intercept = 0, slope = 1, alpha = .2) + # Line of perfect fit
geom_point() +
facet_grid(am ~ .) +
labs(x = "Miles Per Gallon", y = "Predicted Value") +
theme_bw()

R print equation of linear regression on the plot itself

How do we print the equation of a line on a plot?
I have 2 independent variables and would like an equation like this:
y=mx1+bx2+c
where x1=cost, x2 =targeting
I can plot the best fit line but how do i print the equation on the plot?
Maybe i cant print the 2 independent variables in one equation but how do i do it for say
y=mx1+c at least?
Here is my code:
fit=lm(Signups ~ cost + targeting)
plot(cost, Signups, xlab="cost", ylab="Signups", main="Signups")
abline(lm(Signups ~ cost))
I tried to automate the output a bit:
fit <- lm(mpg ~ cyl + hp, data = mtcars)
summary(fit)
##Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 36.90833 2.19080 16.847 < 2e-16 ***
## cyl -2.26469 0.57589 -3.933 0.00048 ***
## hp -0.01912 0.01500 -1.275 0.21253
plot(mpg ~ cyl, data = mtcars, xlab = "Cylinders", ylab = "Miles per gallon")
abline(coef(fit)[1:2])
## rounded coefficients for better output
cf <- round(coef(fit), 2)
## sign check to avoid having plus followed by minus for negative coefficients
eq <- paste0("mpg = ", cf[1],
ifelse(sign(cf[2])==1, " + ", " - "), abs(cf[2]), " cyl ",
ifelse(sign(cf[3])==1, " + ", " - "), abs(cf[3]), " hp")
## printing of the equation
mtext(eq, 3, line=-2)
Hope it helps,
alex
You use ?text. In addition, you should not use abline(lm(Signups ~ cost)), as this is a different model (see my answer on CV here: Is there a difference between 'controling for' and 'ignoring' other variables in multiple regression). At any rate, consider:
set.seed(1)
Signups <- rnorm(20)
cost <- rnorm(20)
targeting <- rnorm(20)
fit <- lm(Signups ~ cost + targeting)
summary(fit)
# ...
# Coefficients:
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 0.1494 0.2072 0.721 0.481
# cost -0.1516 0.2504 -0.605 0.553
# targeting 0.2894 0.2695 1.074 0.298
# ...
windows();{
plot(cost, Signups, xlab="cost", ylab="Signups", main="Signups")
abline(coef(fit)[1:2])
text(-2, -2, adj=c(0,0), labels="Signups = .15 -.15cost + .29targeting")
}
Here's a solution using tidyverse packages.
The key is the broom package, whcih simplifies the process of extracting model data. For example:
fit1 <- lm(mpg ~ cyl, data = mtcars)
summary(fit1)
fit1 %>%
tidy() %>%
select(estimate, term)
Result
# A tibble: 2 x 2
estimate term
<dbl> <chr>
1 37.9 (Intercept)
2 -2.88 cyl
I wrote a function to extract and format the information using dplyr:
get_formula <- function(object) {
object %>%
tidy() %>%
mutate(
term = if_else(term == "(Intercept)", "", term),
sign = case_when(
term == "" ~ "",
estimate < 0 ~ "-",
estimate >= 0 ~ "+"
),
estimate = as.character(round(abs(estimate), digits = 2)),
term = if_else(term == "", paste(sign, estimate), paste(sign, estimate, term))
) %>%
summarize(terms = paste(term, collapse = " ")) %>%
pull(terms)
}
get_formula(fit1)
Result
[1] " 37.88 - 2.88 cyl"
Then use ggplot2 to plot the line and add a caption
mtcars %>%
ggplot(mapping = aes(x = cyl, y = mpg)) +
geom_point() +
geom_smooth(formula = y ~ x, method = "lm", se = FALSE) +
labs(
x = "Cylinders", y = "Miles per Gallon",
caption = paste("mpg =", get_formula(fit1))
)
Plot using geom_smooth()
This approach of plotting a line really only makes sense to visualize the relationship between two variables. As #Glen_b pointed out in the comment, the slope we get from modelling mpg as a function of cyl (-2.88) doesn't match the slope we get from modelling mpg as a function of cyl and other variables (-1.29). For example:
fit2 <- lm(mpg ~ cyl + disp + wt + hp, data = mtcars)
summary(fit2)
fit2 %>%
tidy() %>%
select(estimate, term)
Result
# A tibble: 5 x 2
estimate term
<dbl> <chr>
1 40.8 (Intercept)
2 -1.29 cyl
3 0.0116 disp
4 -3.85 wt
5 -0.0205 hp
That said, if you want to accurately plot the regression line for a model that includes variables that don't appear included in the plot, use geom_abline() instead and get the slope and intercept using broom package functions. As far as I know geom_smooth() formulas can't reference variables that aren't already mapped as aesthetics.
mtcars %>%
ggplot(mapping = aes(x = cyl, y = mpg)) +
geom_point() +
geom_abline(
slope = fit2 %>% tidy() %>% filter(term == "cyl") %>% pull(estimate),
intercept = fit2 %>% tidy() %>% filter(term == "(Intercept)") %>% pull(estimate),
color = "blue"
) +
labs(
x = "Cylinders", y = "Miles per Gallon",
caption = paste("mpg =", get_formula(fit2))
)
Plot using geom_abline()

Resources