ggplot: Adding Regression Line Equation and R2 with Facet [duplicate] - r

This question already has answers here:
Add regression line equation and R^2 on graph
(10 answers)
Closed 9 years ago.
I've created a faceted scatterplot with ggplot but I'm struggling to add the regression line equation to each of the facets. The simple case where there is no faceting has been answered here but this method won't extend to a faceted plot.
Any ideas how to accomplish this in a clean fashion?

Here is an example starting from this answer
require(ggplot2)
require(plyr)
df <- data.frame(x = c(1:100))
df$y <- 2 + 3 * df$x + rnorm(100, sd = 40)
lm_eqn = function(df){
m = lm(y ~ x, df);
eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2,
list(a = format(coef(m)[1], digits = 2),
b = format(coef(m)[2], digits = 2),
r2 = format(summary(m)$r.squared, digits = 3)))
as.character(as.expression(eq));
}
Create two groups on which you want to facet
df$group <- c(rep(1:2,50))
Create the equation labels for the two groups
eq <- ddply(df,.(group),lm_eqn)
And plot
p <- ggplot(data = df, aes(x = x, y = y)) +
geom_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) +
geom_point()
p1 = p + geom_text(data=eq,aes(x = 25, y = 300,label=V1), parse = TRUE, inherit.aes=FALSE) + facet_grid(group~.)
p1

Does this do what you want?
library(ggplot2); library(gridExtra)
ggplot(iris, aes(Sepal.Length, Sepal.Width)) +
geom_point() +
geom_smooth(method="lm") +
facet_wrap(~ Species)
grid.newpage()
vpa_ <- viewport(width = 1, height = 1)
print(p, vp = vpa_)
grid.text("y ~ mx + b", x=0.3, y=0.8)
grid.text("y ~ mx + b", x=0.5, y=0.8)
grid.text("y ~ mx + b", x=0.8, y=0.8)

Using gridExtra you can arrange yours plots like this.
library(ggplot2)
library(ggplot2)
iris$x = iris$Sepal.Length
iris$y = iris$Sepal.Width
xx <- range(iris$x)
yy <- range(iris$y)
ll <- by(iris,iris$Species,function(df){
x.eq <- max(xx)-mean(xx)/2
y.eq <- max(yy)*0.95
p <- ggplot(df, aes(x, y)) +
geom_point() +
geom_smooth(method="lm") +
annotate(x=x.eq, y =y.eq , geom='text',
label = lm_eqn(df), size=5,parse=TRUE) +
xlim(xx[1],xx[2])+ylim(yy[1],yy[2])
})
library(gridExtra)
do.call(grid.arrange,ll)

Related

Multiple Polynomial fits in ggplot using facetwrap

So I would like to use multiple polynomial curves to fit 2 dimensional data,
I am able to plot one polynomial function but I would like to use for example 4 and then plot all of them at the same time using facet_wrap.
Now I am using simple 2 order polynomial:
library(ggplot2)
df <- mtcars
df <- data.frame("x"=df$mpg, "y"=df$hp)
my.formula <- y ~ x + I(x^2)
p <- ggplot(df, aes(x, y)) +
geom_point(shape=21, fill="blue", colour="black", size=2, alpha = 0.7) +
geom_smooth(method = "lm", se = F,
formula = my.formula,
colour = "red")
m <- lm(my.formula, df)
my.eq <- as.character(signif(as.polynomial(coef(m)), 3))
label.text <- paste(gsub("x", "~italic(x)", my.eq, fixed = TRUE),
paste("italic(R)^2",
format(summary(m)$r.squared, digits = 2),
sep = "~`=`~"),
sep = "~~~~")
p + annotate(geom = "text", label = label.text,
family = "serif", hjust = 0, parse = TRUE, size = 4)
lets say we would like to use another formulas such as:
my.formula2 <- y ~ x + I(x^2) + I(x^3)
my.formula4 <- y ~ x + I(x^2) + I(x^3) + I(x^4)
my.formula5 <- y ~ x + I(x^2) + I(x^3) + I(x^4) + I(x^5)
And plot it in the base plot above using facet_wrap so we would have 4 seperate plots and each has to have its own label text and anotation.
Here is an answer that first fits polynomial regression and gets the predicted values, then plots them all with geom_line, not geom_smooth.
library(ggplot2)
df <- mtcars
df <- data.frame("x"=df$mpg, "y"=df$hp)
tmp <- sapply(2:5, function(d){
predict(lm(y ~ poly(x, d), df))
})
df2 <- df
df2 <- cbind(df2, tmp)
rm(tmp)
names(df2)[-(1:2)] <- paste0("degree", 2:5)
long <- reshape2::melt(df2, id.vars = c("x", "y"))
ggplot(long, aes(x, y)) +
geom_point(shape=21, fill="blue", colour="black", size=2, alpha = 0.7) +
geom_line(aes(y = value), colour = "red") +
facet_wrap(~ variable)
Edit.
Another way, without fitting the models previously, is the following, inspired in a RStudio community post.
library(tidyverse)
cbind(df, tmp) %>%
gather(degree, value, -x, -y) %>%
{
reduce2(.init = ggplot(., aes(x = x, y = y)),
.x = .$degree,
.y = .$value,
function(prev, .x, .y) {
force(.y) # The formula below won't evaluate .y by itself
prev + geom_smooth(
data = . %>% filter(degree == .x),
method = "lm",
se = FALSE,
formula = y ~ poly(x, .y))
})
} +
geom_point(fill = "blue", colour = "black",size = 2, alpha = 0.7) +
facet_wrap(~ degree)
It's pretty simple with the stat_function function
I know you said you wanted to use facet_wrap but I would suggest using ggarrange in the ggpubr library
mylm1 <- lm(hp ~ mpg + I(mpg^2), data = df)
mylm2 <- lm(hp ~ mpg + I(mpg^2) + I(mpg^3), data = df)
mylm3 <- lm(hp ~ mpg + I(mpg^2) + I(mpg^3) + I(mpg^4), data = df)
mylm4 <- lm(hp ~ mpg + I(mpg^2) + I(mpg^3) + I(mpg^4) + I(mpg^5), data = df)
b1 <- coef(mylm1)
b2 <- coef(mylm2)
b3 <- coef(mylm3)
b4 <- coef(mylm4)
p1 <- df %>%
ggplot() +
geom_point(aes(x = mpg, y = hp)) +
stat_function(fun = function(x) b1[1] + b1[2]*x + b1[3]*x^2)
p2 <- df %>%
ggplot() +
geom_point(aes(x = mpg, y = hp)) +
stat_function(fun = function(x) b2[1] + b2[2]*x + b2[3]*x^2 + b2[4]*x^3)
p3 <- df %>%
ggplot() +
geom_point(aes(x = mpg, y = hp)) +
stat_function(fun = function(x) b3[1] + b3[2]*x + b3[3]*x^2 + b3[4]*x^3 + b3[5]*x^4)
p4 <- df %>%
ggplot() +
geom_point(aes(x = mpg, y = hp)) +
stat_function(fun = function(x) b4[1] + b4[2]*x + b4[3]*x^2 + b4[4]*x^3 + b4[5]*x^4 + b4[6]*x^5)
library(ggpubr)
ggarrange(p1,p2,p3,p4)

Can I mimick facet_wrap() with 5 completely separate ggplots?

I like the neatness of using facet_wrap() or facet_grid() with ggplot since the plots are all made to be the same size and are fitted row and column wise automatically.
I have a data frame and I am experimenting with various transformations and their impact on fit as measured by R2
dm1 <- lm(price ~ x, data = diamonds)
dm1R2 <- summary(dm1)$r.squared #0.78
dm2 <- lm(log(price) ~ x, data = diamonds)
dm2R2 <- summary(dm2)$r.squared # 0.9177831
dm3 <- lm(log(price) ~ x^2, data = diamonds)
dm3R2 <- summary(dm3)$r.squared # also 0.9177831. Aside, why?
ggplot(diamonds, aes(x = x, y = price)) +
geom_point() +
geom_smooth(method = "lm", se = F) +
geom_text(x = 3.5, y = 10000, label = paste0('R-Squared: ', round(dm1R2, 3)))
ggplot(diamonds, aes(x = x, y = log(price))) +
geom_point() +
geom_smooth(method = "lm", se = F) +
geom_text(x = 3, y = 9, label = paste0('R-Squared: ', round(dm2R2, 3)))
ggplot(diamonds, aes(x = x^2, y = log(price))) +
geom_point() +
geom_smooth(method = "lm", se = F) +
geom_text(x = 3, y = 20, label = paste0('R-Squared: ', round(dm3R2, 3)))
This produces 3 completely separate plots. Within Rmd file they will appear one after the other.
Is there a way to add them to a grid like when using facet_wrap?
You can use ggplot2's built-in faceting if you generate a "long" data frame from the regression model objects. The model object returned by lm includes the data used to fit the model, so we can extract the data and the r-squared for each model, stack them into a single data frame, and generate a faceted plot.
The disadvantage of this approach is that you lose the ability to easily set separate x-axis and y-axis titles for each panel, which is important, because the x and y values have different transformations in different panels. In an effort to mitigate that problem, I've used the model formulas as the facet labels.
Also, the reason you got the same r-squared for the models specified by log(price) ~ x and log(price) ~ x^2 is that R treats them as the same model. To tell R that you literally mean x^2 in a model formula, you need to wrap it in the I() function, making the formula log(price) ~ I(x^2). You could also do log(price) ~ poly(x, 2, raw=TRUE).
library(tidyverse)
theme_set(theme_bw(base_size=14))
# Generate a small subset of the diamonds data frame
set.seed(2)
dsub = diamonds[sample(1:nrow(diamonds), 2000), ]
dm1 <- lm(price ~ x, data = dsub)
dm2 <- lm(log(price) ~ x, data = dsub)
dm3 <- lm(log(price) ~ I(x^2), data = dsub)
# Create long data frame from the three model objects
dat = list(dm1, dm2, dm3) %>%
map_df(function(m) {
tibble(r2=summary(m)$r.squared,
form=as_label(formula(m))) %>%
cbind(m[["model"]] %>% set_names(c("price","x")))
}, .id="Model") %>%
mutate(form=factor(form, levels=unique(form)))
# Create data subset for geom_text
text.dat = dat %>% group_by(form) %>%
summarise(x = quantile(x, 1),
price = quantile(price, 0.05),
r2=r2[1])
dat %>%
ggplot(aes(x, price)) +
geom_point(alpha=0.3, colour="red") +
geom_smooth(method="lm") +
geom_text(data=text.dat, parse=TRUE,
aes(label=paste0("r^2 ==", round(r2, 2))),
hjust=1, size=3.5, colour="grey30") +
facet_wrap(~ form, scales="free")
ggarrange from the ggpubr package can do this:
p1 = ggplot(diamonds, aes(x = x, y = price)) +
geom_point() +
geom_smooth(method = "lm", se = F) +
geom_text(x = 3.5, y = 10000, label = paste0('R-Squared: ', round(dm1R2, 3)))
p2 = ggplot(diamonds, aes(x = x, y = log(price))) +
geom_point() +
geom_smooth(method = "lm", se = F) +
geom_text(x = 3, y = 9, label = paste0('R-Squared: ', round(dm2R2, 3)))
p3 = ggplot(diamonds, aes(x = x^2, y = log(price))) +
geom_point() +
geom_smooth(method = "lm", se = F) +
geom_text(x = 3, y = 20, label = paste0('R-Squared: ', round(dm3R2, 3)))
ggpubr::ggarrange(p1, p2, p3, ncol = 2, nrow = 2, align = "hv")
Other packages that have been suggested in the comments like cowplot and patchwork also offer good options for this.

Is it possible to force a scale on geom_raster()?

I am trying to compare three plots using geom_raster(). The problem is that I would like to maintain the same scale in all three plots that was set in the first plot [-3,3].
Here is my code:
#raster plots
box <- .05
df <- expand.grid(x1 = seq(-1, 1, box), x2 = seq(-1, 1, box))
df$risk <- df$x1 + 2*df$x2
p1 <- ggplot(df, aes(x1, x2, fill = risk)) + geom_raster() +
scale_fill_gradientn(colours=c("#FFFFFF","#046380","#000000")) +
theme_minimal() +
ggtitle("True Risk")
df <- expand.grid(x1 = seq(-1, 1, box), x2 = seq(-1, 1, box))
df$risk <- .99*df$x1 + 1.98*df$x2
p2 <- ggplot(df, aes(x1, x2, fill = risk)) + geom_raster() +
scale_fill_gradientn(colours=c("#FFFFFF","#046380","#000000")) +
theme_minimal() +
ggtitle("Estimated Risk")
df <- expand.grid(x1 = seq(-1, 1, box), x2 = seq(-1, 1, box))
df$risk <- .01*df$x1 + .02*df$x2
p3 <- ggplot(df, aes(x1, x2, fill = risk)) + geom_raster() +
scale_fill_gradientn(colours=c("#FFFFFF","#046380","#000000")) +
theme_minimal() +
ggtitle("Difference")
library(gridExtra)
grid.arrange(p1, p2, p3, ncol=1)
This is my output
It is hard to see that the difference is minimal because the scale changes to [0.03, -0.03]. How can I show the correct surface, but on the original scale?
You can keep all the three variables in one data.frame and use facet_grid or facet_wrap to maintain the scale.
library(ggplot2)
library(reshape2)
box <- .05
df <- expand.grid(x1 = seq(-1, 1, box), x2 = seq(-1, 1, box))
# Calculate each field
df$TrueRisk <- df$x1 + 2*df$x2
df$EstimatedRisk <- .99*df$x1 + 1.98*df$x2
df$Difference <- .01*df$x1 + .02*df$x2
# Transform the data into long format for ggplot2
df <- melt(df, c("x1", "x2"))
# Use facet_grid/facet_wrap to create the plot
ggplot(df, aes(x1, x2, fill = value)) + geom_raster() +
facet_grid(variable ~ .) +
scale_fill_gradientn(colours=c("#FFFFFF","#046380","#000000")) +
theme_minimal() +
ggtitle("Risk")

"Quick" Scatterplot Legend with ggplot? [duplicate]

This question already has answers here:
Closed 11 years ago.
Possible Duplicate:
ggplot2: Adding Regression Line Equation and R2 on graph
I'm graphing data in a scatter plot with
ggplot(work.rootsfnp.h1, aes(x=fnpltrfac, y=rootsscore, group=1)) +
geom_smooth(method=lm, se = F) + geom_point(shape=1)
Is there a "quick" way to add a basic legend that includes the formula of the line of best fit as well as the correlation coefficient?
Not quick, but possible:
First, fit a model with lm
model <- lm(mpg ~ wt + factor(cyl), data=mtcars)
Then extract the coefficients and R^2, and construct expressions for each
x <- coef(model)
intercept <- signif(x[1], 3)
terms <- paste(signif(x[-1], 3), names(x[-1]), sep="*", collapse= " + ")
e1 <- paste(intercept, terms, collapse = " + ")
e2 <- paste("R^2 = ", round(summary(model)$r.squared, 3))
Finally, plot with ggplot and use annotate to place labels.
ggplot(mtcars, aes(x=wt, y=mpg)) +
geom_point() +
geom_smooth(method=lm) +
annotate("text", label=e1, x=max(mtcars$wt), y=max(mtcars$mpg),
hjust=1, size=3, vjust=0) +
annotate("text", label=e2, x=max(mtcars$wt), y=max(mtcars$mpg),
hjust=1, size=3, vjust=1)
See Ramnath's answer to similar question that I asked sometime ago.
library(ggplot2)
df <- data.frame(x = c(1:100))
df$y <- 2 + 3 * df$x + rnorm(100, sd = 40)
# GET EQUATION AND R-SQUARED AS STRING
# SOURCE: http://goo.gl/K4yh
lm_eqn = function(df){
m = lm(y ~ x, df);
eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2,
list(a = format(coef(m)[1], digits = 2),
b = format(coef(m)[2], digits = 2),
r2 = format(summary(m)$r.squared, digits = 3)))
as.character(as.expression(eq));
}
p <- ggplot(data = df, aes(x = x, y = y)) +
geom_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) +
geom_point()
p <- p + geom_text(aes(x = 25, y = 300, label = lm_eqn(df)), parse = TRUE)
print(p)

How do I place an identical smooth on each facet of a ggplot2 object?

Here's an example:
eg <- data.frame(x = c(1:50, 50:1),
y = c(1:50, 1:50) + rnorm(100),
g = rep(c("a","b"), each=50))
qplot(x, y, data = eg) +
facet_wrap(~ g) +
geom_smooth()
I'd like to be able to plot the overall smooth on both facets as well as having the facet-specific smooths.
Edit: here's one way.
my.smooth <- gam(y ~ s(x), data = eg)
my.data <- data.frame(x = 1:50)
my.data$y <- predict(my.smooth, newdata = my.data)
qplot(x, y, data = eg) +
facet_wrap(~ g) +
geom_smooth() +
geom_smooth(data = my.data)
Thanks for any help!
Andrew
Clever trick: setting the faceting variable to NULL
library(ggplot2)
eg <- data.frame(x = c(1:50, 50:1),
y = c(1:50, 1:50) + rnorm(100),
g = rep(c("a","b"), each=50))
p <- qplot(x, y, data = eg) +
facet_wrap(~ g) +
geom_smooth()
p + geom_smooth(data=within(eg, g <- NULL), fill="red")
Or if you prefer, use facet_grid(..., margins=TRUE):
p + facet_grid(.~g, margins=TRUE)

Resources