I am adding the regression line equation to my ggplot. However, I would like to remove the intercept from plot and keep only the slope and R^2.
Here is the code I am using to generate the plot and equation. Do you have any idea how can I remove the intercept?
library(ggpmisc)
df <- data.frame(x = c(1:100))
df$y <- 20 * c(0, 1) + 3 * df$x + rnorm(100, sd = 40)
df$group <- factor(rep(c("A", "B"), 50))
df <- df %>% group_by(group) %>% mutate(ymax = max(y))
df %>%
group_by(group) %>%
do(tidy(lm(y ~ x, data = .)))
p <- ggplot(data = df, aes(x = x, y = y, colour = group)) +
geom_smooth(method = "lm", se=FALSE, formula = y ~ x) +
stat_regline_equation(
aes( x = x, y = y , label = paste(..eq.label..,..rr.label.., sep = "~~~~")),
formula=y~x, size=3,
)
p
Thanks,
You can use stat_fit_tidy from the ggpmisc package:
df <- data.frame(x = c(1:100))
df$y <- 20 * c(0, 1) + 3 * df$x + rnorm(100, sd = 40)
df$group <- factor(rep(c("A", "B"), 50))
library(ggpmisc)
my_formula <- y ~ x
ggplot(df, aes(x = x, y = y, colour = group)) +
geom_point() +
geom_smooth(method = "lm", formula = my_formula, se = FALSE) +
stat_fit_tidy(
method = "lm",
method.args = list(formula = my_formula),
mapping = aes(label = sprintf('slope~"="~%.3g',
after_stat(x_estimate))),
parse = TRUE)
EDIT
If you want the R squared as well:
ggplot(df, aes(x = x, y = y, colour = group)) +
geom_point() +
geom_smooth(method = "lm", formula = my_formula, se = FALSE) +
stat_fit_tidy(
method = "lm",
method.args = list(formula = my_formula),
mapping = aes(label = sprintf('slope~"="~%.3g',
after_stat(x_estimate))),
parse = TRUE) +
stat_poly_eq(formula = my_formula,
aes(label = ..rr.label..),
parse = TRUE,
label.x = 0.6)
EDIT
Another way:
myformat <- "Slope: %s --- R²: %s"
ggplot(df, aes(x, y, colour = group)) +
geom_point() +
geom_smooth(method = "lm", formula = my_formula, se = FALSE) +
stat_poly_eq(
formula = my_formula, output.type = "numeric",
mapping = aes(label =
sprintf(myformat,
formatC(stat(coef.ls)[[1]][[2, "Estimate"]]),
formatC(stat(r.squared)))),
vstep = 0.1
)
I am trying to fit an exponential model through my data using ggplot2 and the package plotly, further I want to display the regression line and also obtain an R^2 to check the model assumption
This is my data
SR.irrig<-c(67.39368816,28.7369497,60.18499455,49.32404863,166.393182,222.2902192 ,271.8357323,241.7224707,368.4630364,220.2701789,169.9234274,56.49579274,38.183813,49.337,130.9175233,161.6353594,294.1473982,363.910286,358.3290509,239.8411217,129.6507822 ,32.76462234,30.13952285,52.8365588,67.35426966,132.2303449,366.8785687,247.4012487
,273.1931613,278.2790213,123.2425639,45.98362999,83.50199402,240.9945866
,308.6981358,228.3425602,220.5131914,83.97942185,58.32171185,57.93814837,94.64370151 ,264.7800652,274.258633,245.7294036,155.4177734,77.4523639,70.44223322,104.2283817 ,312.4232116,122.8083088,41.65770103,242.2266084,300.0714687,291.5990173,230.5447786,89.42497778,55.60525466,111.6426307,305.7643166,264.2719213,233.2821407,192.7560296,75.60802862,63.75376269)
temp.pred<-c(2.8,8.1,12.6,7.4,16.1,20.5,20.4,18.4,25.8,14.8,13,5.3,9.4,6.8,15.2,14.3,22.4,23.7,20.8,16.5,7.4,4.61,4.79,8.3,12.1,18.4,22,14.6,15.4,15.5,8.2,10.2,14.8,23.4,20.9,14.5,13,9,2,11.6,13,21,24.7,22.3,10.8,13.2,9.7,15.6,21,10.6,8.3,20.7,24.3,17.9,14.7,5.5,7.,11.7,22.3,17.8,15.5,14.8,2.1,7.3)
temp2 <- data.frame(SR.irrig,temp.pred)
This is my code:
gg1 <- ggplot(temp2, aes(x=temp.pred, y=SR.irrig)) +
geom_point() + #show points
stat_smooth(method = 'lm', aes(colour = 'linear'), se = FALSE) +
stat_smooth(method = 'lm', formula = y ~ poly(x,2), aes(colour = 'polynomial'), se= FALSE)+
stat_smooth(method = 'nls', formula = y ~ a*exp(b*x), aes(colour = 'Exponential'), se = FALSE, start = list(a=1,b=1))+
stat_smooth(method = 'nls', formula = y ~ a * log(x) +b, aes(colour = 'logarithmic'), se = FALSE, start = list(a=1,b=1))
For the starting values I tried multiple different options and nothing works for the exponential model.
As an output I get following graph, where all the models are included expect the exponential one
What am I missing that no exp. curve is displayed? and how can I check how good the exponential fit is?
You can try with better initial values for nls and also considering what #RichardTelford suggested:
library(tidyverse)
#Data
SR.irrig<-c(67.39368816,28.7369497,60.18499455,49.32404863,166.393182,222.2902192 ,271.8357323,241.7224707,368.4630364,220.2701789,169.9234274,56.49579274,38.183813,49.337,130.9175233,161.6353594,294.1473982,363.910286,358.3290509,239.8411217,129.6507822 ,32.76462234,30.13952285,52.8365588,67.35426966,132.2303449,366.8785687,247.4012487
,273.1931613,278.2790213,123.2425639,45.98362999,83.50199402,240.9945866
,308.6981358,228.3425602,220.5131914,83.97942185,58.32171185,57.93814837,94.64370151 ,264.7800652,274.258633,245.7294036,155.4177734,77.4523639,70.44223322,104.2283817 ,312.4232116,122.8083088,41.65770103,242.2266084,300.0714687,291.5990173,230.5447786,89.42497778,55.60525466,111.6426307,305.7643166,264.2719213,233.2821407,192.7560296,75.60802862,63.75376269)
temp.pred<-c(2.8,8.1,12.6,7.4,16.1,20.5,20.4,18.4,25.8,14.8,13,5.3,9.4,6.8,15.2,14.3,22.4,23.7,20.8,16.5,7.4,4.61,4.79,8.3,12.1,18.4,22,14.6,15.4,15.5,8.2,10.2,14.8,23.4,20.9,14.5,13,9,2,11.6,13,21,24.7,22.3,10.8,13.2,9.7,15.6,21,10.6,8.3,20.7,24.3,17.9,14.7,5.5,7.,11.7,22.3,17.8,15.5,14.8,2.1,7.3)
temp2 <- data.frame(SR.irrig,temp.pred)
#Try with better initial vals
fm0 <- nls(log(SR.irrig) ~ log(a*exp(b*temp.pred)), temp2, start = c(a = 1, b = 1))
#Plot
gg1 <- ggplot(temp2, aes(x=temp.pred, y=SR.irrig)) +
geom_point() + #show points
stat_smooth(method = 'lm', aes(colour = 'linear'), se = FALSE) +
stat_smooth(method = 'lm', formula = y ~ poly(x,2), aes(colour = 'polynomial'), se= FALSE)+
stat_smooth(method = 'nls', formula = y ~ a*exp(b*x), aes(colour = 'Exponential'), se = FALSE,
method.args = list(start=coef(fm0)))+
stat_smooth(method = 'nls', formula = y ~ a * log(x) +b, aes(colour = 'logarithmic'), se = FALSE, start = list(a=1,b=1))
#Display
gg1
Output:
You can do this within ggplot without needing to get the nls model first (though the end result is the same). You need to decrease the minFactor and increase the maximum iterations of the nls control to get the model to converge, but the results seem reasonable. Note how the arguments are passed from stat_smooth to nls.
ggplot(temp2, aes(x=temp.pred, y=SR.irrig)) +
geom_point() +
stat_smooth(method = 'lm',
formula = y ~ x,
mapping = aes(colour = 'linear'),
se = FALSE) +
stat_smooth(method = 'lm',
formula = y ~ poly(x,2),
mapping = aes(colour = 'polynomial'),
se= FALSE)+
stat_smooth(method = 'nls',
formula = y ~ a*exp(b*x),
mapping = aes(colour = 'Exponential'),
se = FALSE,
method.args = list(start = list(a = 1, b = 1),
control = list(minFactor = 1/ 8192,
maxiter = 100))) +
stat_smooth(method = 'nls',
formula = y ~ a * log(x) +b,
mapping = aes(colour = 'logarithmic'),
se = FALSE,
method.args = list(start = list(a=1,b=1)))
ggpmisc::stat_poly_eq has an option output.type = "numeric" allowing to get the estimates of the parameters of the fitted model. Below is my attempt to use it with facet_wrap. I get a different R² per facet but the coefficients are the same in the two facets. Do I do something wrong, or is it a bug?
library(ggpmisc)
set.seed(4321)
x <- 1:100
y <- (x + x^2 + x^3) + rnorm(length(x), mean = 0, sd = mean(x^3) / 4)
my.data <- data.frame(x = x,
y = y,
group = c("A", "B"))
my.data[my.data$group=="A",]$y <- my.data[my.data$group=="A",]$y + 200000
formula <- y ~ poly(x, 1, raw = TRUE)
myformat <- "Intercept: %s\nSlope: %s\nR²: %s"
ggplot(my.data, aes(x, y)) +
facet_wrap(~ group) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(formula = formula, output.type = "numeric",
mapping = aes(label =
sprintf(myformat,
formatC(stat(coef.ls)[[1]][[1, "Estimate"]]),
formatC(stat(coef.ls)[[1]][[2, "Estimate"]]),
formatC(stat(r.squared)))))
Edit
We have to catch the panel number. It is strange that formatC(stat(as.integer(PANEL))) returns the panel number per facet:
but however formatC(stat(coef.ls)[[stat(as.integer(PANEL))]][[1, "Estimate"]]) does not work, because here PANEL = c(1,2).
Ok, I figured it out.
ggplot(my.data, aes(x, y)) +
facet_wrap(~ group) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(
formula = formula, output.type = "numeric",
mapping = aes(label =
sprintf(myformat,
c(formatC(stat(coef.ls)[[1]][[1, "Estimate"]]),
formatC(stat(coef.ls)[[2]][[1, "Estimate"]])),
c(formatC(stat(coef.ls)[[1]][[2, "Estimate"]]),
formatC(stat(coef.ls)[[2]][[2, "Estimate"]])),
formatC(stat(r.squared)))))
Version 0.3.2 of 'ggpmisc' is now in CRAN. Submitted earlier this week. In the documentation I now give some examples of the use of geom_debug() from my package 'gginnards' to have a look at the data frame returned by stats (usable with any ggplot stat or by itself). For your example, it would work like this:
library(ggpmisc)
library(gginnards)
set.seed(4321)
x <- 1:100
y <- (x + x^2 + x^3) + rnorm(length(x), mean = 0, sd = mean(x^3) / 4)
my.data <- data.frame(x = x,
y = y,
group = c("A", "B"))
my.data[my.data$group=="A",]$y <- my.data[my.data$group=="A",]$y + 200000
formula <- y ~ poly(x, 1, raw = TRUE)
myformat <- "Intercept: %s\nSlope: %s\nR²: %s"
ggplot(my.data, aes(x, y)) +
facet_wrap(~ group) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(formula = formula, output.type = "numeric",
aes(label = ""),
geom = "debug")
Which prints to the console, two tibbles, one for each panel:
Example below added to address comment:
ggplot(my.data, aes(x, y)) +
facet_wrap(~ group) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(formula = formula, output.type = "numeric",
aes(label = ""),
summary.fun = function(x) {x[["coef.ls"]][[1]]})
prints just the coefs.ls.
I added the "numeric" option recently in response to a suggestion and with this example I noticed a bug: aes(label = "") should not have been needed, but is needed because the default mapping for the label aesthetic is wrong. I will fix this for the next release.
For example, I have a couple of plots like this:
ggplot(mpg, aes(displ, hwy)) +
geom_point() +
geom_smooth(method = "lm", formula = y ~ splines::bs(x, 3), se = T)
Is it possible to plot 2 of these kind into the same plot?
If you only want to add another fonction, add another layer: + geom_smooth()
ggplot(mpg, aes(displ, hwy)) + geom_point() +
geom_smooth(method = "lm", formula = y ~ splines::bs(x, 3), se = T) +
geom_smooth(method = "lm", formula = y ~ splines::bs(x, 4), se = T)
If you want to add data from a different data frame, add df information inside geom_smooth :
ggplot(mpg, aes(displ, hwy)) + geom_point() +
geom_smooth(method = "lm", formula = y ~ splines::bs(x, 3), se = T) +
geom_smooth(data = mpg, aes(x = displ, y = cyl), method = "lm", formula = y ~ splines::bs(x, 4), se = T)
Finally, customize colors and legend :
color argument needs to be inside aes to appear in the legend
ggplot(mpg, aes(displ, hwy)) + geom_point() +
geom_smooth(aes(color = "B"),method = "lm", formula = y ~ splines::bs(x, 3), se = T) +
geom_smooth(data = mpg, aes(x = displ, y = cyl, color = "A"), method = "lm", formula = y ~ splines::bs(x, 4), se = T) +
scale_color_manual("Legend Title", values = c("A" = "red", "B" = "blue"))