Adding regression line to plotted matrix - r

How do I add a regression line to this graph? I tried abline() but it seems to only work with dataframes and I'm working with a matrix.
https://i.stack.imgur.com/fAeyL.png
This is my code for the graph:
plot(Extended[,1], Extended[,14], xlim=c(1877, 2017), ylim=c(-12, 15), pch=19, col = 'blue')

This is in general easier with ggplot.
library(ggplot2)
Ex <- as.data.frame(Extended);
names(Ex) <- paste0('V', 1:ncol(Ex));
ggplot(Ex, aes(x = V1, y = V14)) +
geom_point(size = 4, col = 'blue') +
geom_smooth(method = 'lm') +
coord_cartesian(xlim = c(1877, 2017),
ylim = c(-12, 15))
For base method there are myriad ways of visualizing the effects. The effects package is a very general way of doing this. The way this works is by using the predictorEffect function and specifying which effect to plot.
library(effects)
data(mtcars)
model <- lm(mpg ~ hp, data = mtcars)
plot(predictorEffect('hp', model))
The effects package has some very general implementations that can do quite a lot of things. I'd recommend reading the vignettes to get an idea how the package works.
For a manual base plot version we could do something like:
mtcars[, c('fit', 'lwr', 'upr')] <- predict(model, interval = 'predict')
mtcars <- mtcars[order(mtcars$hp),]
plot(y = mtcars$mpg, x = mtcars$hp,
pch=19, col = 'blue')
lines(y = mtcars$fit, x = mtcars$hp, col = 'green')
lines(y = mtcars$upr, x = mtcars$hp, col = 'red')
lines(y = mtcars$lwr, x = mtcars$hp, col = 'red')

Related

log linear model in ggplot?

The data is from: http://www.principlesofeconometrics.com/poe5/poe5rdata.html, in the file: collegetown.csv
A log linear model is of the form: ln(y) = b1 + b2x
library(ggthemes)
library(ggplot2)
theUrl <- "../poedata/collegetown.csv"
collegetown <- read.csv(theUrl)
g1 <- ggplot(data = collegetown, aes(x = sqft, y = price))+
geom_point(col = "blue")
plot(g1)
logLinearModel <- lm(log(price)~sqft, data = collegetown)
g1 + geom_smooth(method = "lm", formula = y ~ exp(x), se = F, col = "green")+
theme_economist()
summary(logLinearModel)
This gives me the weird plot below:
How do I plot the proper curve? Do I need to store the predicted values explicitly in the data frame?
PS: I want the axis to stay untransformed i.e. in their original scales.
The model y~exp(x) is not the same as the model log(y)~x, so you're not getting the smoother you expect. You can specify that the smoother is a generalised linear model with a log-link function using the code:
g1 <- ggplot(data = collegetown, aes(x = sqft, y = price))+
geom_point(col = "blue")
g1 + geom_smooth(method = "glm", formula = y ~ x, se = F, col = "green",
method.args = list(family=gaussian(link="log"))) +
theme_economist()
which gives what you're wanting. If that doesn't seem intuitive, you can fit the lm outside the plotting with:
logLinearModel <- lm(log(price)~sqft, data = collegetown)
collegetown$pred <- exp(predict(logLinearModel))
ggplot(data = collegetown, aes(x = sqft, y = price))+
geom_point(col = "blue") +
geom_line(aes(y=pred), col = "green")+
theme_economist()
Warning - the two versions aren't the same if you want the standard errors; the first approach gives symmetric errors, the standard errors that you might get from the lm prediction are symmetric on a log scale. See here.
I think a relatively simpler method to build the curve is using stat_function() method.
# LOG LINEAR MODEL
logLinearModel <- lm(log(price)~sqft, data = collegetown)
smodloglinear <- summary(logLinearModel)
logLinearModel
names(logLinearModel)
yn <- exp(logLinearModel$fitted.values)
rgloglinear <- cor(yn, collegetown$price)
rgloglinear^2
b1 <- coef(smod)[[1]]
b2 <- coef(smod)[[2]]
sighat2 <- smod$sigma^2
g2 <- ggplot(data = collegetown,aes(x = sqft, y = price))+
geom_point(col = "white") +
stat_function(fun = function(x){exp(b1+b2*x)}, aes(color = "red"))+
stat_function(fun = function(x){exp(b1+b2*x+sighat2/2)} , aes(color = "green"))+
dark_theme_bw()+
scale_color_identity(name = "Model fit",
breaks = c("red", "green"),
labels = c("yn", "yc"),
guide = "legend")
g2
which gives:

How to visualize spline regression with ggplot2?

I'm working with the Wage dataset in the ISLR library. My objective is to perform a spline regression with knots at 3 locations (see code below). I can do this regression. That part is fine.
My issue concerns the visualization of the regression curve. Using base R functions, I seem to get the correct curve. But I can't seem to get quite the right curve using the tidyverse. This is what is expected, and what I get with the base functions:
This is what ggplot spits out
It's noticeably different. R gives me the following message when running the ggplot functions:
geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")
What does this mean and how do I fix it?
library(tidyverse)
library(ISLR)
attach(Wage)
agelims <- range(age)
age.grid <- seq(from = agelims[1], to = agelims[2])
fit <- lm(wage ~ bs(age, knots = c(25, 40, 60), degree = 3), data = Wage) #Default is 3
plot(age, wage, col = 'grey', xlab = 'Age', ylab = 'Wages')
points(age.grid, predict(fit, newdata = list(age = age.grid)), col = 'darkgreen', lwd = 2, type = "l")
abline(v = c(25, 40, 60), lty = 2, col = 'darkgreen')
ggplot(data = Wage) +
geom_point(mapping = aes(x = age, y = wage), color = 'grey') +
geom_smooth(mapping = aes(x = age, y = fit$fitted.values), color = 'red')
I also tried
ggplot() +
geom_point(data = Wage, mapping = aes(x = age, y = wage), color = 'grey') +
geom_smooth(mapping = aes(x = age.grid, y = predict(fit, newdata = list(age = age.grid))), color = 'red')
but that looks very similar to the 2nd picture.
Thanks for any help!
splines::bs() and s(., type="bs") from mgcv do very different things; the latter is a penalized regression spline. I would try (untested!)
geom_smooth(method="lm",
formula= y ~ splines::bs(x, knots = c(25, 40, 60), degree = 3))

How to plot multiple Poisson distribution in one plot

I would like to plot multiple Poisson (with different lambdas (1:10))
I found the following function to draw a plot
plot_pois = function(lambda = 5)
{
plot(0:20, dpois( x=0:20, lambda=lambda ), xlim=c(-2,20))
normden <- function(x){dnorm(x, mean= lambda, sd=sqrt(lambda))}
curve(normden, from=-4, to=20, add=TRUE, col=lambda)
}
plot.new()
plot_pois(2)
But I can't plot another Poisson over it. I tried to change plot to points or lines but it totally changes the plot. I would also like to add a legends containing different colors for different values of lambda.
If I could plot it using ggplot, it would be a better option.
Another possible tidyverse solution:
library(tidyverse)
# Build Poisson distributions
p_dat <- map_df(1:10, ~ tibble(
l = paste(.),
x = 0:20,
y = dpois(0:20, .)
))
# Build Normal distributions
n_dat <- map_df(1:10, ~ tibble(
l = paste(.),
x = seq(0, 20, by = 0.001),
y = dnorm(seq(0, 20, by = 0.001), ., sqrt(.))
))
# Use ggplot2 to plot
ggplot(n_dat, aes(x, y, color = factor(l, levels = 1:10))) +
geom_line() +
geom_point(data = p_dat, aes(x, y, color = factor(l, levels = 1:10))) +
labs(color = "Lambda:") +
theme_minimal()
Created on 2019-05-06 by the reprex package (v0.2.1)
In ggplot2 you can use lapply to loop over different lambdas:
library(ggplot2)
lambdas <- c(5, 2)
ggplot(data = data.frame(x = 0:20)) +
lapply(lambdas, function(l) geom_point(aes(x = x, y = dpois(x, lambda = l), col = factor(l)))) +
lapply(lambdas, function(l) stat_function(fun = dnorm, args = list(mean = l, sd = sqrt(l)),
aes(x = x, col = factor(l))))
Axes titles and limits, the legend title etc. can then be customized as usual in ggplot2.

Replicating lattice graph for a mixed model

I am trying to replicate a lattice graph using ggplot2 for a mixed model. My ggplot graph looks very similar but I am not sure about about loess line model fitted.
My goal is to add a loess line from the mixed model using ggplot2. Below is an example of my commands :
library(nlme)
library(ggplot2)
library(lattice)
library(lme4)
data(MathAchieve)
attach(MathAchieve)
mses <- tapply(SES, School, mean)
mses[as.character(MathAchSchool$School[1:10])]
Bryk <- as.data.frame(MathAchieve[, c("School", "SES", "MathAch")])
names(Bryk) <- c("school", "ses", "mathach")
sample20 <- sort(sample(7185, 20)) # 20 randomly sampled students
Bryk$meanses <- mses[as.character(Bryk$school)]
Bryk$cses <- Bryk$ses - Bryk$meanses
sector <- MathAchSchool$Sector
names(sector) <- row.names(MathAchSchool)
Bryk$sector <- sector[as.character(Bryk$school)]
attach(Bryk)
cat <- sample(unique(school[sector=="Catholic"]), 20)
Cat.20 <- groupedData(mathach ~ ses | school, data=Bryk[is.element(school, cat),])
Graph with Lattice:
trellis.device(color=T)
xyplot(mathach ~ ses | school, data=Cat.20, main="Catholic",
panel=function(x, y) {
panel.loess(x, y, span=1)
panel.xyplot(x, y)
panel.lmline(x, y, lty=2)
})
Graph with ggplot:
ggplot(Cat.20, aes(x = ses, y =mathach )) +
geom_point(size=1, shape=1) +
stat_smooth(method="lm",se=F)+
stat_smooth(, colour="Red",se=F)+
facet_wrap(school~., scale = "free_y")
Please any advice will be appreciated.
Preamble
Before going into the explanation, allow me to refer you to this question: Why is it not advisable to use attach() in R, and what should I use instead?
While it's recommendable that you made your question reproducible, the code you used can do with some clean-up. For example:
Don't include packages that aren't used in the code (I didn't see a need for the lme4 package);
There's no need to use data(...) to load MathAchieve. See the "Good Practices" section from ?data for more details.
As mentioned above, don't use attach().
For complete reproducibility, use set.seed() before any random sampling.
For a minimal example, don't plot 20 schools when a smaller number would do.
Since you are using one of the tidyverse packages for plotting, I recommend another from its collection for data manipulation:
library(nlme)
library(ggplot2)
library(lattice)
library(dplyr)
Bryk <- MathAchieve %>%
select(School, SES, MathAch) %>%
group_by(School) %>%
mutate(meanses = mean(SES),
cses = SES - meanses) %>%
ungroup() %>%
left_join(MathAchSchool %>% select(School, Sector),
by = "School")
colnames(Bryk) <- tolower(colnames(Bryk))
set.seed(123)
cat <- sample(unique(Bryk$school[Bryk$sector == "Catholic"]), 2)
Cat.2 <- groupedData(mathach ~ ses | school,
data = Bryk %>% filter(school %in% cat))
Explanation
Now that that's out of the way, let's look at the relevant functions for loess:
from ?panel.loess:
panel.loess(x, y, span = 2/3, degree = 1,
family = c("symmetric", "gaussian"),
... # omitted for space
)
from ?stat_smooth:
stat_smooth(mapping = NULL, data = NULL, geom = "smooth",
method = "auto", formula = y ~ x, span = 0.75, method.args = list(),
... # omitted for space
)
where method = "auto" defaults to loess from the stats package for <1000 observations.
from ?loess:
loess(formula, data, span = 0.75, degree = 2,
family = c("gaussian", "symmetric"),
... #omitted for space
)
In short, a loess plot's default parameters are span = 2/3, degree = 1, family = "symmetric" for the lattice package, and span = 0.75, degree = 2, family = "gaussian" for the ggplot2 package. You have to specify matching parameters if you want the resulting plots to match:
xyplot(mathach ~ ses | school, data = Cat.2, main = "Catholic",
panel=function(x, y) {
panel.loess(x, y, span=1, col = "red") # match ggplot's colours
panel.xyplot(x, y, col = "black") # to facilitate comparison
panel.lmline(x, y, lty=2, col = "blue")
})
ggplot(Cat.2, aes(x = ses, y = mathach)) +
geom_point(size = 2, shape = 1) +
stat_smooth(method = "lm", se = F)+
stat_smooth(span = 1,
method.args = list(degree = 1, family = "symmetric"),
colour = "red", se = F)+
facet_wrap(school ~ .) +
theme_classic() # less cluttered background to facilitate comparison

How to plot the result of a regression prediction in R

I am beginning with ML in R, and I really like the idea of visualize the results of my calculations, I am wondering how to plot a Prediction.
library("faraway")
library(tibble)
library(stats)
data("sat")
df<-sat[complete.cases(sat),]
mod_sat_sal <- lm(total ~ salary, data = df)
new_teacher <- tibble(salary = 40)
predict(mod_sat_sal, new_teacher)
Expected result:
Data and Regression Model
data(sat, package = "faraway")
df <- sat[complete.cases(sat), ]
model <- lm(total ~ salary, data = df)
Method (1) : graphics way
# Compute the confidence band
x <- seq(min(df$salary), max(df$salary), length.out = 300)
x.conf <- predict(model, data.frame(salary = x),
interval = 'confidence')
# Plot
plot(total ~ salary, data = df, pch = 16, xaxs = "i")
polygon(c(x, rev(x)), c(x.conf[, 2], rev(x.conf[, 3])),
col = gray(0.5, 0.5), border = NA)
abline(model, lwd = 3, col = "darkblue")
Method (2) : ggplot2 way
library(ggplot2)
ggplot(df, aes(x = salary, y = total)) +
geom_point() +
geom_smooth(method = "lm")

Resources