Combine correlation plot with coefficient table (ggplot2 -> ggstatsplot) - r

What are your preferred techniques for combining a table with a plot in one image using R? I remember using tableGrob() and either patchwork or cowplot months ago but cannot remember the details.
This example uses the ggstatsplot package. I would like to add the correlation coefficients to the correlogram (correlation plot).
if (!('ggstatsplot' %in% installed_packages)) {
devtools::install_github('https://github.com/IndrajeetPatil/ggstatsplot')
}
needed_pkgs <- setdiff(c('ggstatsplot', 'statsExpressions',
'dplyr', 'nnet', 'MASS'),
installed_packages)
if (length(needed_pkgs) > 0) {
install.packages(needed_pkgs)
}
library(ggstatsplot)
library(statsExpressions)
library(dplyr)
library(nnet)
library(MASS)
utils::example(topic = birthwt, echo = FALSE)
# model
bwt.mu <-
nnet::multinom(
formula = low ~ .,
data = bwt,
trace = FALSE
)
original_cols <- colnames(bwt)
bwt.mu_coefstats <- ggcoefstats(x = bwt.mu, output = "tidy") %>%
# skipping first row = intercept
slice(2:n()) %>%
dplyr::filter(term %in% original_cols) %>%
arrange(desc(p.value)) %>%
dplyr::select(term, estimate, p.value)
# Correlogram
cor_plot_out <-
ggstatsplot::ggcorrmat(bwt %>% dplyr::select(low, lwt, age))
Want to combine
bwt.mu_coefstats
cor_plot_out

The key elemnent is tableGrob() from gridExtra package!
We could use grid.arrange().
For the table use tableGrob() to create a table like the plot of a data frame. Then you can use it with grid.arrange() function.
library(gridExtra)
bwt.mu_coefstats <- tableGrob(
bwt.mu_coefstats,
theme = ttheme_default(
base_size = 10,
base_colour = "grey25",
parse = T
),
rows = NULL
)
grid.arrange(cor_plot_out, bwt.mu_coefstats,
heights = c(10, 4))
OR with patchwork:
library(patchwork)
cor_plot_out + bwt.mu_coefstats

Related

How can I select one plot from the partial dependence plot in R?

Here is the code for the partial dependence plot. I use the example data for this. First of all, I made a random forest model. Then I made a partial dependence plot.
rm(list = ls())
library(tidyverse)
library(mlbench)
library(randomForest)
library(caret)
library(edarf)
data("Sonar")
df<-Sonar
rm(Sonar)
# Clean up variable names (becuz I'm a bit OCD)
df <- df %>% rename(V01 = V1, V02 = V2, V03 = V3, V04 = V4,
V05 = V5, V06 = V6, V07 = V7, V08 = V8,
V09 = V9)
# Get minimum class frequency
min <- min(table(df$Class))
set.seed(223)
df_rf <- df %>% na.omit()
fit_rf <- randomForest(data = df_rf,
Class ~ .,
ntree = 500,
importance = TRUE,
sampsize = c(min, min))
# Add predicted values to data frame
df_rf <- df_rf %>%
mutate(predicted = predict(fit_rf))
# Get performance measures
confusionMatrix(df_rf$predicted, df_rf$Class, positive = "R")
# Get variable importance measures
imp_df <- data.frame(importance(fit_rf, scale = FALSE, type = 1))
# Tidy up and sort the data frame
imp_df <- imp_df %>%
mutate(names = rownames(imp_df)) %>%
arrange(desc(MeanDecreaseAccuracy))
# Save top predictor names as character vector
nm <- as.character(imp_df$names)[1:10]
# Get partial depedence values for top predictors
pd_df <- partial_dependence(fit = fit_rf,
vars = nm,
data = df_rf,
n = c(100, 200))
# Plot partial dependence using edarf
plot_pd(pd_df)
Then I got the result as follows.
I successfully got the multiple images that combined as one big plot. However, I need to select any one of these plots. Is there any way I can try?
You could use the dataframe your pd_df where you first have to make it a longer format by the columns M and R to visualize it in ggplot by a variable you want like this with example of V11:
library(ggplot2)
library(tidyr)
library(dplyr)
pd_df %>%
pivot_longer(cols = c(M, R)) %>%
ggplot(aes(x = V11, y = value, color = name)) +
geom_line() +
geom_point() +
labs(x = "value", y = "prediction")
Created on 2023-01-09 with reprex v2.0.2
You can replace V11 with other variables like you want.

Changing aesthetics in ggplot generated by svars package in R

I'm using the svars package to generate some IRF plots. The plots are rendered using ggplot2, however I need some help with changing some of the aesthetics.
Is there any way I can change the fill and alpha of the shaded confidence bands, as well as the color of the solid line? I know in ggplot2 you can pass fill and alpha arguments to geom_ribbon (and col to geom_line), just unsure of how to do the same within the plot function of this package's source code.
# Load Dataset and packages
library(tidyverse)
library(svars)
data(USA)
# Create SVAR Model
var.model <- vars::VAR(USA, lag.max = 10, ic = "AIC" )
svar.model <- id.chol(var.model)
# Wild Bootstrap
cores <- parallel::detectCores() - 1
boot.svar <- wild.boot(svar.model, n.ahead = 30, nboot = 500, nc = cores)
# Plot the IRFs
plot(boot.svar)
I'm also looking at the command for a historical decomposition plot (see below). Is there any way I could omit the first two facets and plot only the bottom three lines on the same facet?
hist.decomp <- hd(svar.model, series = 1)
plot(hist.decomp)
Your first desired result is easily achieved by resetting the aes_params after calling plot. For your second goal. There is probably an approach to manipulate the ggplot object. Instead my approach below constructs the plot from scratch. Basically I copy and pasted the data wrangling code from vars:::plot.hd and filtered the prepared dataset for the desired series:
# Plot the IRFs
p <- plot(boot.svar)
p$layers[[1]]$aes_params$fill <- "pink"
p$layers[[1]]$aes_params$alpha <- .5
p$layers[[2]]$aes_params$colour <- "green"
p
# Helper to convert to long dataframe. Source: svars:::plot.hd
hd2PlotData <- function(x) {
PlotData <- as.data.frame(x$hidec)
if (inherits(x$hidec, "ts")) {
tsStructure = attr(x$hidec, which = "tsp")
PlotData$Index <- seq(from = tsStructure[1], to = tsStructure[2],
by = 1/tsStructure[3])
PlotData$Index <- as.Date(yearmon(PlotData$Index))
}
else {
PlotData$Index <- 1:nrow(PlotData)
PlotData$V1 <- NULL
}
dat <- reshape2::melt(PlotData, id = "Index")
dat
}
hist.decomp <- hd(svar.model, series = 1)
dat <- hd2PlotData(hist.decomp)
dat %>%
filter(grepl("^Cum", variable)) %>%
ggplot(aes(x = Index, y = value, color = variable)) +
geom_line() +
xlab("Time") +
theme_bw()
EDIT One approach to change the facet labels is via a custom labeller function. For a different approach which changes the facet labels via the data see here:
myvec <- LETTERS[1:9]
mylabel <- function(labels, multi_line = TRUE) {
data.frame(variable = labels)
}
p + facet_wrap(~variable, labeller = my_labeller(my_labels))

Number of displayed decimals in annotation_customs(tableGrob) in R

I want to add a summary table to plot with ggplot. I am using annotation_custom to add a previous created table.
My problem is that the table shows a different number of decimals.
As an example I am using the mtcars database and my lines of code are the following:
rm(list=ls()) #Clear environment console
data(mtcars)
head(mtcars)
library(dplyr)
library(tidyr)
library(ggplot2)
library(gridExtra)
table <- mtcars %>% #summary table that needs to be avelayed to the plot
select(wt) %>%
summarise(
Obs = length(mtcars$wt),
Q05 = quantile(mtcars$wt, prob = 0.05),
Mean = mean(mtcars$wt),
Med = median(mtcars$wt),
Q95 = quantile(mtcars$wt, prob = 0.95),
SD = sd(mtcars$wt))
dens <- ggplot(mtcars) + #Create example density plot for wt variable
geom_density(data = mtcars, aes(mtcars$wt))+
labs(title = "Density plot")
plot(dens)
dens1 <- dens + #Overlaping summary table to density plot
annotation_custom(tableGrob(t(table),
cols = c("WT"),
rows=c("Obs", "Q-05", "Mean", "Median", "Q-95", "S.D." ),
theme = ttheme_default(base_size = 11)),
xmin=4.5, xmax=5, ymin=0.2, ymax=0.5)
print(dens1)
Running the previous I obtain the following picture
density plot
I would like to fix the number of displayed decimals to only 2.
I already tried adding sprintf
annotation_custom(tableGrob(t(sprintf("%0.2f",table)),
But obtained the following error "Error in sprintf("%0.2f", table_pet) :
(list) object cannot be coerced to type 'double'"
I have been looking without any look. Any idea how can I do this.
Thank you in advance
grid.table leaves the formatting up to you,
d = data.frame(x = "pi", y = pi)
d2 = d %>% mutate_if(is.numeric, ~sprintf("%.3f",.))
grid.table(d2)

Controlling facet column number with visreg

While I am enjoying using package visreg to visualize my regressions, there's one thing that I can't yet control: the number of columns when faceting. See the following factor-by-curve generalized additive regression for example:
library(dplyr)
library(mgcv)
library(visreg)
data(airquality)
test <- gam(
Ozone ~ s(Temp, by = Month),
data = airquality %>% mutate(Month = as.factor(Month))
)
If I do
visreg(test, xvar = "Temp", by = "Month", gg = TRUE)
I get a 1-row, 5-column factor-by-curves.
Funnily enough, if I take the gg = TRUE out, it becomes 2-row. But whichever is the case I would like to be able to control the number of columns and rows when faceting. So far I have been unsuccessful, by either manipulating the ellipsis argument of visreg or by directly trying to manipulate the resulting ggplot object.
So for example, if I wanted to do visreg with gg = TRUE with 3-row, 2-column, what would be my best chance---or is there another package that is recommended?
You can just modify the ggplot object and add facet_wrap in the usual way:
p <- visreg(test, xvar = "Temp", by = "Month", gg = TRUE)
p + facet_wrap(vars(Month), nrow = 3)
You don't actually need to create p first, this gives the same result:
visreg(test, xvar = "Temp", by = "Month", gg = TRUE) +
facet_wrap(vars(Month), nrow = 3)

Predicted vs. Actual plot

I'm new to R and statistics and haven't been able to figure out how one would go about plotting predicted values vs. Actual values after running a multiple linear regression. I have come across similar questions (just haven't been able to understand the code). I would greatly appreciate it if you explain the code.
This is what I have done so far:
# Attach file containing variables and responses
q <- read.csv("C:/Users/A/Documents/Design.csv")
attach(q)
# Run a linear regression
model <- lm(qo~P+P1+P4+I)
# Summary of linear regression results
summary(model)
The plot of predicted vs. actual is so I can graphically see how well my regression fits on my actual data.
It would be better if you provided a reproducible example, but here's an example I made up:
set.seed(101)
dd <- data.frame(x=rnorm(100),y=rnorm(100),
z=rnorm(100))
dd$w <- with(dd,
rnorm(100,mean=x+2*y+z,sd=0.5))
It's (much) better to use the data argument -- you should almost never use attach() ..
m <- lm(w~x+y+z,dd)
plot(predict(m),dd$w,
xlab="predicted",ylab="actual")
abline(a=0,b=1)
Besides predicted vs actual plot, you can get an additional set of plots which help you to visually assess the goodness of fit.
--- execute previous code by Ben Bolker ---
par(mfrow = c(2, 2))
plot(m)
A tidy way of doing this would be to use modelsummary::augment():
library(tidyverse)
library(cowplot)
library(modelsummary)
set.seed(101)
# Using Ben's data above:
dd <- data.frame(x=rnorm(100),y=rnorm(100),
z=rnorm(100))
dd$w <- with(dd,rnorm(100,mean=x+2*y+z,sd=0.5))
m <- lm(w~x+y+z,dd)
m %>% augment() %>%
ggplot() +
geom_point(aes(.fitted, w)) +
geom_smooth(aes(.fitted, w), method = "lm", se = FALSE, color = "lightgrey") +
labs(x = "Actual", y = "Fitted") +
theme_bw()
This will work nicely for deep nested regression lists especially.
To illustrate this, consider some nested list of regressions:
Reglist <- list()
Reglist$Reg1 <- dd %>% do(reg = lm(as.formula("w~x*y*z"), data = .)) %>% mutate( Name = "Type 1")
Reglist$Reg2 <- dd %>% do(reg = lm(as.formula("w~x+y*z"), data = .)) %>% mutate( Name = "Type 2")
Reglist$Reg3 <- dd %>% do(reg = lm(as.formula("w~x"), data = .)) %>% mutate( Name = "Type 3")
Reglist$Reg4 <- dd %>% do(reg = lm(as.formula("w~x+z"), data = .)) %>% mutate( Name = "Type 4")
Now is where the power of the above tidy plotting framework comes to life...:
Graph_Creator <- function(Reglist){
Reglist %>% pull(reg) %>% .[[1]] %>% augment() %>%
ggplot() +
geom_point(aes(.fitted, w)) +
geom_smooth(aes(.fitted, w), method = "lm", se = FALSE, color = "lightgrey") +
labs(x = "Actual", y = "Fitted",
title = paste0("Regression Type: ", Reglist$Name) ) +
theme_bw()
}
Reglist %>% map(~Graph_Creator(.)) %>%
cowplot::plot_grid(plotlist = ., ncol = 1)
Same as #Ben Bolker's solution but getting a ggplot object instead of using base R
#first generate the dd data set using the code in Ben's solution, then...
require(ggpubr)
m <- lm(w~x+y+z,dd)
ggscatter(x = "prediction",
y = "actual",
data = data.frame(prediction = predict(m),
actual = dd$w)) +
geom_abline(intercept = 0,
slope = 1)

Resources