Using R how do I add superscript in a title in ggplot2? - r

I've looked through some other threads and tried using expression and bquote but without any luck. I'm a beginner with R and would appreciate help with this specific example. I've found the following code on another website and it is working for me, but I can't make the 2 in R2 a superscript.
This is the initial code:
ggplotRegression <- function (fit) {
require(ggplot2)
ggplot(fit$model, aes_string(x = names(fit$model)[2], y = names(fit$model)[1])) +
geom_point() +
stat_smooth(method = "lm") +
labs(title = paste("R2 = ",signif(summary(fit)$r.squared, 5),
"Intercept =",signif(fit$coef[[1]],5 ),
" Slope =",signif(fit$coef[[2]], 5),
" P =",signif(summary(fit)$coef[2,4], 5)))
}
ggplotRegression(lm(TOA_NDVI ~ Field_NDVI, data = NDVI_type))
This is one of the things I've tried:
ggplotRegression <- function (fit) {
require(ggplot2)
ggplot(fit$model, aes_string(x = names(fit$model)[2], y = names(fit$model)[1])) +
geom_point() +
stat_smooth(method = "lm") +
labs(title = paste(expression("R^2 *="),signif(summary(fit)$r.squared, 5)),
"Intercept =",signif(fit$coef[[1]],5 ),
" Slope =",signif(fit$coef[[2]], 5),
" P =",signif(summary(fit)$coef[2,4], 5)))
}
ggplotRegression(lm(TOA_NDVI ~ Field_NDVI, data = NDVI_type))
Any help would be very much appreciated. Thank you.

You can use "bquote". Here is the code:
ggplotRegression <- function (fit) {
require(ggplot2)
ggplot(fit$model, aes_string(x = names(fit$model)[2], y = names(fit$model)[1])) +
geom_point() +
stat_smooth(method = "lm") +
labs(title = bquote(R^2== .(signif(summary(fit)$r.squared, 5))~
"Intercept = "~ .(signif(fit$coef[[1]],5 )) ~
" Slope = "~ .(signif(fit$coef[[2]], 5)) ~
" P = "~ .(signif(summary(fit)$coef[2,4], 5)) ))
}
ggplotRegression(lm(speed ~ dist, data = cars))
Here is the generated plot:

Related

Customize formula in geom-smooth / ggplot2 / R

I want to customize the formula used in geom_smooth like this:
library(MASS)
library(ggplot2)
data("Cars93", package = "MASS")
str(Cars93)
Cars93.log <- transform(Cars93, log.price = log(Price))
log.model <- lm(log.price ~ Horsepower*Origin, data = Cars93.log)
summary(log.model)
plot(log.model)
p <- ggplot(data = Cars93.log, aes(x = Horsepower, y = log.price, colour = Origin)) +
geom_point(aes(shape = Origin, color = Origin)) + # Punkte
facet_grid(~ Origin) +
theme(axis.title.x = element_text(margin=margin(15,0,0,0)),
axis.title.y = element_text(margin=margin(0,15,0,0))) +
scale_y_continuous(n.breaks = 7) +
scale_colour_manual(values = c("USA" = "red","non-USA" = "black")) +
scale_shape_manual(values = c(16,16)) +
ylab("Price(log)")
lm.mod <- function(df) {
y ~ x*Cars93.log$Origin
}
p_smooth <- by(Cars93.log, Cars93.log$Origin,
function(x) geom_smooth(data=x, method = lm, formula = lm.mod(x)))
p + p_smooth
However, I receive the error that the computation failed because of different lengths of my used variables.
length(Cars93.log$log.price)
length(Cars93.log$Origin)
length(Cars93.log$Horsepower)
But when I check the length for each variable they're all the same... Any ideas, what's wrong?
Thanks a lot, Martina
I agree with #Rui Barradas, seems like the issue is the lines for lm.mod and p_smooth and the by function
Once you are making a distinction by Origin (e.g., by doing either facet_wrap or color = Origin) then geom_smooth will automatically run different models for those facets.
p <- ggplot(data = Cars93.log,
aes(x = Horsepower, y = log.price, color = Origin)) +
geom_point(aes(shape = Origin)) +
facet_wrap(~ Origin) +
theme(axis.title.x = element_text(margin=margin(15,0,0,0)),
axis.title.y = element_text(margin=margin(0,15,0,0))) +
scale_y_continuous(n.breaks = 7) +
scale_colour_manual(values = c("USA" = "red","non-USA" = "black")) +
scale_shape_manual(values = c(16,16)) +
ylab("Price(log)")
p + geom_smooth(method = lm, formula = y ~ x)
you can convince yourself that this is the same as the output of log.model by extending the x-axis limits to see where the geom_smooth line would cross the y axis (e.g., + coord_cartesian(xlim = c(0, 300)))
You can also see the difference in the graph if you don't pass color = Origin to the geom_smooth function (essentially what is happening if you comment this out from the first ggplot() initialization):
p <- ggplot(data = Cars93.log,
aes(x = Horsepower, y = log.price)) + # color = Origin)) +
geom_point(aes(shape = Origin)) +
#facet_wrap(~ Origin) +
theme(axis.title.x = element_text(margin=margin(15,0,0,0)),
axis.title.y = element_text(margin=margin(0,15,0,0))) +
scale_y_continuous(n.breaks = 7) +
scale_colour_manual(values = c("USA" = "red","non-USA" = "black")) +
scale_shape_manual(values = c(16,16)) +
ylab("Price(log)")
p + geom_smooth(method = lm, formula = y ~ x)

Automatically placing of text in ggplot2

I need to do plot a lot of variables against each other. In each of these plots I would like to automatically place information from a linear regression model in the upper left corner of the graph.
Taking the mtcars dataset for an example, I would like a piece of code I could use that would give me the the R2 and p-value from a linear regression model in the top left corner of the graph no matter what variables I plotted against each other. I have made a solution, where I plot R2 and P in the title, but since I need another title it's not optimal.
ggplotRegression <- function (fit) {
require(ggplot2)
ggplot(fit$model, aes_string(x = names(fit$model)[2], y = names(fit$model)[1])) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
labs(title = paste("Adj R2 = ",signif(summary(fit)$adj.r.squared, 1),
" P =",signif(summary(fit)$coef[2,4], 1)))
}
disp_vs_wt_cyl4 <- mtcars %>%
filter(cyl=="4")
ggplotRegression(lm(disp ~ wt, data = disp_vs_wt_cyl4)) +
geom_point(size = 3.74, colour = "#0c4c8a") +
theme_bw()
You could use annotation_custom in your plot, which would allow you to have a separate title. In this example, we allow a title to be passed to your function:
ggplotRegression <- function (fit, title) {
require(ggplot2)
lab <- grid::textGrob(label = paste0(
as.character(as.expression(fit$call$formula)), "\n",
"Adj R\u00b2 = ",
signif(summary(fit)$adj.r.squared, 1),
", p = ", signif(summary(fit)$coef[2,4], 1)),
x = unit(0.05, "npc"),
y = unit(0.9, "npc"), just = "left",
gp = grid::gpar(size = 14, fontface = "bold"))
ggplot(fit$model, aes_string(x = names(fit$model)[2],
y = names(fit$model)[1])) +
ggtitle(title) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
annotation_custom(lab)
}
So we can do:
disp_vs_wt_cyl4 <- mtcars %>% filter(cyl=="4")
ggplotRegression(lm(disp ~ wt, data = disp_vs_wt_cyl4), "My Title") +
geom_point(size = 3.74, colour = "#0c4c8a") +
theme_bw()

Coefficients per facet with output.type="numeric" in ggpmisc::stat_poly_eq

ggpmisc::stat_poly_eq has an option output.type = "numeric" allowing to get the estimates of the parameters of the fitted model. Below is my attempt to use it with facet_wrap. I get a different R² per facet but the coefficients are the same in the two facets. Do I do something wrong, or is it a bug?
library(ggpmisc)
set.seed(4321)
x <- 1:100
y <- (x + x^2 + x^3) + rnorm(length(x), mean = 0, sd = mean(x^3) / 4)
my.data <- data.frame(x = x,
y = y,
group = c("A", "B"))
my.data[my.data$group=="A",]$y <- my.data[my.data$group=="A",]$y + 200000
formula <- y ~ poly(x, 1, raw = TRUE)
myformat <- "Intercept: %s\nSlope: %s\nR²: %s"
ggplot(my.data, aes(x, y)) +
facet_wrap(~ group) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(formula = formula, output.type = "numeric",
mapping = aes(label =
sprintf(myformat,
formatC(stat(coef.ls)[[1]][[1, "Estimate"]]),
formatC(stat(coef.ls)[[1]][[2, "Estimate"]]),
formatC(stat(r.squared)))))
Edit
We have to catch the panel number. It is strange that formatC(stat(as.integer(PANEL))) returns the panel number per facet:
but however formatC(stat(coef.ls)[[stat(as.integer(PANEL))]][[1, "Estimate"]]) does not work, because here PANEL = c(1,2).
Ok, I figured it out.
ggplot(my.data, aes(x, y)) +
facet_wrap(~ group) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(
formula = formula, output.type = "numeric",
mapping = aes(label =
sprintf(myformat,
c(formatC(stat(coef.ls)[[1]][[1, "Estimate"]]),
formatC(stat(coef.ls)[[2]][[1, "Estimate"]])),
c(formatC(stat(coef.ls)[[1]][[2, "Estimate"]]),
formatC(stat(coef.ls)[[2]][[2, "Estimate"]])),
formatC(stat(r.squared)))))
Version 0.3.2 of 'ggpmisc' is now in CRAN. Submitted earlier this week. In the documentation I now give some examples of the use of geom_debug() from my package 'gginnards' to have a look at the data frame returned by stats (usable with any ggplot stat or by itself). For your example, it would work like this:
library(ggpmisc)
library(gginnards)
set.seed(4321)
x <- 1:100
y <- (x + x^2 + x^3) + rnorm(length(x), mean = 0, sd = mean(x^3) / 4)
my.data <- data.frame(x = x,
y = y,
group = c("A", "B"))
my.data[my.data$group=="A",]$y <- my.data[my.data$group=="A",]$y + 200000
formula <- y ~ poly(x, 1, raw = TRUE)
myformat <- "Intercept: %s\nSlope: %s\nR²: %s"
ggplot(my.data, aes(x, y)) +
facet_wrap(~ group) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(formula = formula, output.type = "numeric",
aes(label = ""),
geom = "debug")
Which prints to the console, two tibbles, one for each panel:
Example below added to address comment:
ggplot(my.data, aes(x, y)) +
facet_wrap(~ group) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(formula = formula, output.type = "numeric",
aes(label = ""),
summary.fun = function(x) {x[["coef.ls"]][[1]]})
prints just the coefs.ls.
I added the "numeric" option recently in response to a suggestion and with this example I noticed a bug: aes(label = "") should not have been needed, but is needed because the default mapping for the label aesthetic is wrong. I will fix this for the next release.

ggplot2: Add the p-value, Rsq and slope for multiple columns

Let's say I have this data frame:
library(ggplot2)
Y <- rnorm(100)
df <- data.frame(A = rnorm(100), B = runif(100), C = rlnorm(100),
Y = Y)
colNames <- names(df)[1:3]
for(i in colNames){
plt <- ggplot(df, aes_string(x=i, y = Y)) +
geom_point(color="#B20000", size=4, alpha=0.5) +
geom_hline(yintercept=0, size=0.06, color="black") +
geom_smooth(method=lm, alpha=0.25, color="black", fill="black")
print(plt)
Sys.sleep(2)
}
I want to do a lm model and display for each column the adjusted Rsq, Intercept, Slope and p-value. I found an example bellow
data(iris)
ggplotRegression <- function (fit) {
require(ggplot2)
ggplot(fit$model, aes_string(x = names(fit$model)[2], y = names(fit$model)[1])) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
labs(title = paste("Adj R2 = ",signif(summary(fit)$adj.r.squared, 5),
"Intercept =",signif(fit$coef[[1]],5 ),
" Slope =",signif(fit$coef[[2]], 5),
" P =",signif(summary(fit)$coef[2,4], 5)))
}
fit1 <- lm(Sepal.Length ~ Petal.Width, data = iris)
ggplotRegression(fit1)
But it's working only for one column.
(I took the examples from this question) and this one over here)
Thanks!
Building on the comment above you can put the fit inside the function and then loop through with lapply.
library(ggplot2)
Y <- rnorm(100)
df <- data.frame(A = rnorm(100), B = runif(100), C = rlnorm(100),
Y = Y)
colNames <- names(df)[1:3]
plot_ls <- lapply(colNames, function(x){
fit <- lm(Y ~ df[[x]], data = df)
ggplot(fit$model, aes_string(x = names(fit$model)[2], y = names(fit$model)[1])) +
geom_point() +
scale_x_continuous(x)+
stat_smooth(method = "lm", col = "red") +
ggtitle(paste("Adj R2 = ",signif(summary(fit)$adj.r.squared, 5),
"Intercept =",signif(fit$coef[[1]],5 ),
" Slope =",signif(fit$coef[[2]], 5),
" P =",signif(summary(fit)$coef[2,4], 5))
)
})
gridExtra::grid.arrange(plot_ls[[1]],plot_ls[[2]],plot_ls[[3]])

"Quick" Scatterplot Legend with ggplot? [duplicate]

This question already has answers here:
Closed 11 years ago.
Possible Duplicate:
ggplot2: Adding Regression Line Equation and R2 on graph
I'm graphing data in a scatter plot with
ggplot(work.rootsfnp.h1, aes(x=fnpltrfac, y=rootsscore, group=1)) +
geom_smooth(method=lm, se = F) + geom_point(shape=1)
Is there a "quick" way to add a basic legend that includes the formula of the line of best fit as well as the correlation coefficient?
Not quick, but possible:
First, fit a model with lm
model <- lm(mpg ~ wt + factor(cyl), data=mtcars)
Then extract the coefficients and R^2, and construct expressions for each
x <- coef(model)
intercept <- signif(x[1], 3)
terms <- paste(signif(x[-1], 3), names(x[-1]), sep="*", collapse= " + ")
e1 <- paste(intercept, terms, collapse = " + ")
e2 <- paste("R^2 = ", round(summary(model)$r.squared, 3))
Finally, plot with ggplot and use annotate to place labels.
ggplot(mtcars, aes(x=wt, y=mpg)) +
geom_point() +
geom_smooth(method=lm) +
annotate("text", label=e1, x=max(mtcars$wt), y=max(mtcars$mpg),
hjust=1, size=3, vjust=0) +
annotate("text", label=e2, x=max(mtcars$wt), y=max(mtcars$mpg),
hjust=1, size=3, vjust=1)
See Ramnath's answer to similar question that I asked sometime ago.
library(ggplot2)
df <- data.frame(x = c(1:100))
df$y <- 2 + 3 * df$x + rnorm(100, sd = 40)
# GET EQUATION AND R-SQUARED AS STRING
# SOURCE: http://goo.gl/K4yh
lm_eqn = function(df){
m = lm(y ~ x, df);
eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2,
list(a = format(coef(m)[1], digits = 2),
b = format(coef(m)[2], digits = 2),
r2 = format(summary(m)$r.squared, digits = 3)))
as.character(as.expression(eq));
}
p <- ggplot(data = df, aes(x = x, y = y)) +
geom_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) +
geom_point()
p <- p + geom_text(aes(x = 25, y = 300, label = lm_eqn(df)), parse = TRUE)
print(p)

Resources