Drawing only boundaries of stat_smooth in ggplot2 - r

When using stat_smooth() with geom_point is there a way to remove the shaded fit region, but only draw its outer bounds? I know I can remove the shaded region with something like:
geom_point(aes(x=x, y=y)) + geom_stat(aes(x=x, y=y), alpha=0)
but how can I make the outer bounds of it (outer curves) still visible as faint black lines?

You can also use geom_ribbon with fill = NA.
gg <- ggplot(mtcars, aes(qsec, wt))+
geom_point() +
stat_smooth( alpha=0,method='loess')
rib_data <- ggplot_build(gg)$data[[2]]
ggplot(mtcars)+
stat_smooth(aes(qsec, wt), alpha=0,method='loess')+
geom_point(aes(qsec, wt)) +
geom_ribbon(data=rib_data,aes(x=x,ymin=ymin,ymax=ymax,col='blue'),
fill=NA,linetype=1)
...and if for some reason you don't want the vertical bars, you can just use two geom_line layers:
ggplot(mtcars)+
stat_smooth(aes(qsec, wt), alpha=0,method='loess')+
geom_point(aes(qsec, wt)) +
geom_line(data = rib_data,aes(x = x,y = ymax)) +
geom_line(data = rib_data,aes(x = x,y = ymin))

There are most likely easier ways, but you may try this as a start. I grab data for the confidence interval with ggbuild, which I then use in geom_line
# create a ggplot object with a linear smoother and a CI
library(ggplot2)
gg <- ggplot(data = mtcars, aes(x = wt, y = mpg)) +
geom_point() +
geom_smooth(method = "lm")
gg
# grab the data from the plot object
gg_data <- ggplot_build(gg)
str(gg_data)
head(gg_data$data[[2]])
gg2 <- gg_data$data[[2]]
# plot with 'CI-lines' and the shaded confidence area
ggplot(data = mtcars, aes(x = wt, y = mpg)) +
geom_point() +
geom_smooth(method = "lm", se = TRUE, size = 1) +
geom_line(data = gg2, aes(x = x, y = ymin), size = 0.02) +
geom_line(data = gg2, aes(x = x, y = ymax), size = 0.02)
# plot with 'CI-lines' but without confidence area
ggplot(data = mtcars, aes(x = wt, y = mpg)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, size = 1) +
geom_line(data = gg2, aes(x = x, y = ymin), size = 0.02) +
geom_line(data = gg2, aes(x = x, y = ymax), size = 0.02)

Related

Why does the shape of the legend's elements are not the same than the ones on the chart with `ggplot` in R?

library(ggplot2)
ggplot(mtcars) +
aes(x = mpg, y = disp, size = cyl) +
geom_point() +
geom_smooth(level = 0.99, method = "loess")
As you can see there are circles in the charts, but there are rectangles in the legend.
How to have circles in the legend as well?
You should not add aes separately, instead you can do this:
library(ggplot2)
ggplot(mtcars, aes(x = mpg, y = disp)) +
geom_point(aes(size = cyl)) +
geom_smooth(level = 0.99, method = "loess")
#> `geom_smooth()` using formula = 'y ~ x'

Cut off geom_vline at geom_density height

I am trying to highlight a specific point in a distribution. I am using geom_vline which works fine except that it extends beyond the curve drawn by geom_density. Is there any way to cut that vertical line off a the point it meets the curve drawn by geom_density?
library(ggplot2)
mtcars$car <- rownames(mtcars)
javelin <- mtcars[mtcars$car == "AMC Javelin", ]
ggplot(mtcars) +
geom_density(aes(x = mpg)) +
geom_vline(data = javelin, aes(xintercept = mpg))
You can't use geom_vline here. You'll have to calculate the correct point and use geom_segment, but this really isn't that complex:
ggplot(mtcars) +
geom_density(aes(x = mpg)) +
geom_segment(data = as.data.frame(density(mtcars$mpg)[1:2]) %>%
filter(seq(nrow(.)) == which.min(abs(x - javelin$mpg))),
aes(x, 0, xend = x, yend = y))
If you want this to be easily generalised, you can define a little helper function:
geom_vdensity <- function(data, at, ...) {
ggplot2::geom_segment(
data = dplyr::filter(as.data.frame(density(data)[1:2]),
seq_along(x) == which.min(abs(x - at))),
ggplot2::aes(x, 0, xend = x, yend = y), ...)
}
This allows:
ggplot(mtcars) +
geom_density(aes(x = mpg)) +
geom_vdensity(data = mtcars$mpg, at = javelin$mpg, color = "red")
and
ggplot(iris) +
geom_density(aes(x = Petal.Length)) +
geom_vdensity(data = iris$Petal.Length, at = 4)

ggplot: adding a frequency plot over a percentage plot

I am interested in doing a plot showing percentages by group.
something like this:
data(iris)
ggplot(iris,
aes(x = Sepal.Length, group = factor(Species), fill = factor(Species))) +
geom_histogram(position = "fill")+theme_bw()
however, I would also like to plot a histogram showing the frequency distribution on top of this graph.
something like the plot below.
ggplot(iris,aes(x = Sepal.Length)) +
geom_histogram()+theme_bw()
Does anyone know how to do this?
Note I know how to do a frequency plot by group: ggplot(iris,aes(x = Sepal.Length, group = factor(Species), fill = factor(Species))) + geom_histogram()+theme_bw(). But this is not what I want. Rather I would like a small frequency distribution at the bottom of the percentage plot presented at the beginning.
Thank you very much
Something like this?
library(gridExtra)
p1 <- ggplot(iris,
aes(x = Sepal.Length,
group = factor(Species),
fill = factor(Species))) +
geom_histogram(position = "fill") +
theme_bw() +
theme(legend.position = "top")
p2 <- ggplot(iris,aes(x = Sepal.Length,
group = factor(Species),
fill = factor(Species))) +
geom_histogram() +
theme_bw() +
theme(legend.position = "none")
grid.arrange(p1, p2,
heights = c(4, 1.5))
Edit: So you are looking for this then? Note that in this case the absolute values of the smaller histogram become meaningless since they were scaled down to be ~25% of the vertical chart range.
ggplot() +
geom_histogram(data = iris,
aes(x = Sepal.Length,
group = factor(Species),
fill = factor(Species)),
position = "fill",
alpha = 1) +
geom_histogram(data = iris,
aes(x = Sepal.Length,
y = ..ncount.. / 4),
alpha = 0.5,
fill = 'black')

Combined bar plot and points in ggplot2

I would like to plot a "combined" bar plot with points.
Consider to following dummy data:
library(ggplot2)
library(gridExtra)
library(dplyr)
se <- function(x){sd(x)/sqrt(length(x))}
p1 <- ggplot(mtcars, aes(y=disp, x=cyl, fill=cyl))
p1 <- p1 + geom_point() + theme_classic() + ylim(c(0,500))
my_dat <- summarise(group_by(mtcars, cyl), my_mean=mean(disp),my_se=se(disp))
p2 <- ggplot(my_dat, aes(y=my_mean,x=cyl,ymin=my_mean-my_se,ymax=my_mean+my_se))
p2 <- p2 + geom_bar(stat="identity",width=0.75) + geom_errorbar(stat="identity",width=0.75) + theme_classic() + ylim(c(0,500))
The final plot should look like that:
You can add layers together, but if they have different data and/or aesthetics you'll want to include the data and aes arguments in each graphical layer.
p3 <- ggplot() +
geom_bar(data=my_dat, aes(y=my_mean,x=cyl,ymin=my_mean-my_se,ymax=my_mean+my_se), stat="identity", width = 0.75) +
geom_errorbar(data=my_dat, aes(y=my_mean,x=cyl,ymin=my_mean-my_se,ymax=my_mean+my_se), width = 0.75) +
geom_point(data=mtcars, aes(y=disp, x=cyl, fill=cyl)) +
ylim(c(0,500)) +
theme_classic()
If you want to make it so that the the points are off to the side of the bars, you could subtract an offset from the cyl values to move over the points. Like #LukeA mentioned, by changing the geom_point to geom_point(data=mtcars, aes(y=disp, x=cyl-.5, fill=cyl)).
You can specify each layer individually to ggplot2. Often you are using the same data frame and options for each geom, so it makes sense to set defaults in ggplot(). In your case you should specify each geom separately:
library(ggplot2)
library(gridExtra)
library(dplyr)
se <- function(x){sd(x)/sqrt(length(x))}
my_dat <- summarise(group_by(mtcars, cyl),
my_mean = mean(disp),
my_se = se(disp))
p1 <- ggplot() +
geom_bar(data = my_dat,
aes(y = my_mean, x = cyl,
ymin = my_mean - my_se,
ymax = my_mean + my_se), stat="identity", width=0.75) +
geom_errorbar(data = my_dat,
aes(y = my_mean, x = cyl,
ymin = my_mean - my_se,
ymax = my_mean + my_se), stat="identity", width=0.75) +
geom_point(data = mtcars, aes(y = disp, x = cyl, fill = cyl)) +
theme_classic() + ylim(c(0,500))
p1

Plot mean in an R plot

I would like to add a mean of valuus to windows in a scatter plot I have. I created the scatter plot with ggplot2
p <- ggplot(mtcars, aes(wt, mpg))
p + geom_point()
This will give the scatter plot but I woudl like to add add the mean of a window (say size equals 1) and plot this points of the mean as a line. Additionally I woudl like to have vertical bars at each point to indicate the variance.
Mtcars is the data set standard available in ggplot 2
This uses the new dplyr library.
library(dplyr)
forLines <- mtcars %.%
group_by(cut(wt, breaks = 6)) %.%
summarise(mean_mpg = mean(mpg), mean_wt = mean(wt))
p +
geom_point(size=5) +
geom_boxplot(aes(group = cut(wt, breaks = 6))) +
geom_line(data=forLines,aes(x=mean_wt,y=mean_mpg))
Maybe this is what you're looking for:
library(ggplot2)
s <- seq(0, ceiling(max(mtcars$wt)), 1)
ind <- as.integer(cut(mtcars$wt, s))
myfun <- function(i)
c(y = mean(i), ymin = mean(i) - var(i), ymax = mean(i) + var(i))
ggplot(mtcars, aes(wt, mpg)) +
geom_point() +
stat_summary(fun.data = myfun, aes(group = ind, x = ind - .5),
colour = "red") +
stat_summary(fun.y = mean, aes(x = ind - .5), geom = "line",
colour = "red")
Is this what you want?
p <- ggplot(mtcars, aes(wt, mpg))
p + geom_point() + geom_smooth(aes(wt, mpg, group=1), method = "lm")

Resources