Cut off geom_vline at geom_density height - r

I am trying to highlight a specific point in a distribution. I am using geom_vline which works fine except that it extends beyond the curve drawn by geom_density. Is there any way to cut that vertical line off a the point it meets the curve drawn by geom_density?
library(ggplot2)
mtcars$car <- rownames(mtcars)
javelin <- mtcars[mtcars$car == "AMC Javelin", ]
ggplot(mtcars) +
geom_density(aes(x = mpg)) +
geom_vline(data = javelin, aes(xintercept = mpg))

You can't use geom_vline here. You'll have to calculate the correct point and use geom_segment, but this really isn't that complex:
ggplot(mtcars) +
geom_density(aes(x = mpg)) +
geom_segment(data = as.data.frame(density(mtcars$mpg)[1:2]) %>%
filter(seq(nrow(.)) == which.min(abs(x - javelin$mpg))),
aes(x, 0, xend = x, yend = y))
If you want this to be easily generalised, you can define a little helper function:
geom_vdensity <- function(data, at, ...) {
ggplot2::geom_segment(
data = dplyr::filter(as.data.frame(density(data)[1:2]),
seq_along(x) == which.min(abs(x - at))),
ggplot2::aes(x, 0, xend = x, yend = y), ...)
}
This allows:
ggplot(mtcars) +
geom_density(aes(x = mpg)) +
geom_vdensity(data = mtcars$mpg, at = javelin$mpg, color = "red")
and
ggplot(iris) +
geom_density(aes(x = Petal.Length)) +
geom_vdensity(data = iris$Petal.Length, at = 4)

Related

Plotting multiple plots with geom_violing and lapply

I'm trying to use lapply to make multiple violin plots, stacked side by side.
The base code is:
ggplot(mpg, aes(x = class, y = cyl, fill = class)) +
geom_violin() + ggtitle("cyl") +
geom_jitter(shape=16,position=position_jitter(0.1)
So I'm trying to use lapply:
plots_list = lapply(
names(mpg[,3:5]),
function(n)
ggplot(mpg, aes(x = class, y = n, fill = class)) +
geom_violin() + geom_jitter(shape=16, position=position_jitter(0.1))
+ ggtitle(n)
)
plots_list[[1]]
But y = n gives no violin plot.
If I use:
plots_list = lapply(
mpg[,3:5],
function(n)
ggplot(mpg, aes(x = class, y = n, fill = class)) +
geom_violin() + geom_jitter(shape=16, position=position_jitter(0.1)) + ggtitle(n)
)
plots_list[[1]]
Then the plots titles are not correct.
Also, when using:
grid.arrange(plots_list[1:3], ncol = 2)
I get errors, but ploting with:
plots_list[1:3]
works like a charm
Your n is not a symbol, it's a string, so you need aes_string:
plots_list = lapply(
names(mpg[,3:5]),
function(n)
ggplot(mpg, aes_string(x = "class", y = n, fill = "class")) +
geom_violin() + geom_jitter(shape=16, position=position_jitter(0.1))
+ ggtitle(n)
)
plots_list[[1]]

Pass changed geom from object to other ggplot

I first make a plot
df <- data.frame(x = c(1:40, rep(1:20, 3), 15:40))
p <- ggplot(df, aes(x=x, y = x)) +
stat_density2d(aes(fill='red',alpha=..level..),geom='polygon', show.legend = F)
Then I want to change the geom_density values and use these in another plot.
# build plot
q <- ggplot_build(p)
# Change density
dens <- q$data[[1]]
dens$y <- dens$y - dens$x
Build the other plot using the changed densities, something like this:
# Built another plot
ggplot(df, aes(x=x, y =1)) +
geom_point(alpha = 0.3) +
geom_density2d(dens)
This does not work however is there a way of doing this?
EDIT: doing it when there are multiple groups:
df <- data.frame(x = c(1:40, rep(1:20, 3), 15:40), group = c(rep('A',40), rep('B',60), rep('C',26)))
p <- ggplot(df, aes(x=x, y = x)) +
stat_density2d(aes(fill=group,alpha=..level..),geom='polygon', show.legend = F)
q <- ggplot_build(p)
dens <- q$data[[1]]
dens$y <- dens$y - dens$x
ggplot(df, aes(x=x, y =1)) +
geom_point(aes(col = group), alpha = 0.3) +
geom_polygon(data = dens, aes(x, y, fill = fill, group = piece, alpha = alpha)) +
scale_alpha_identity() +
guides(fill = F, alpha = F)
Results when applied to my own dataset
Although this is exactly what I'm looking for the fill colors seem not to correspond to the initial colors (linked to A, B and C):
Like this? It is possible to plot a transformation of the shapes plotted by geom_density. But that's not quite the same as manipulating the underlying density...
ggplot(df, aes(x=x, y =1)) +
geom_point(alpha = 0.3) +
geom_polygon(data = dens, aes(x, y, fill = fill, group = piece, alpha = alpha)) +
scale_alpha_identity() +
guides(fill = F, alpha = F)
Edit - OP now has multiple groups. We can plot those with the code below, which produces an artistic plot of questionably utility. It does what you propose, but I would suggest it would be more fruitful to transform the underlying data and summarize that, if you are looking for representative output.
ggplot(df, aes(x=x, y =1)) +
geom_point(aes(col = group), alpha = 0.3) +
geom_polygon(data = dens, aes(x, y, fill = group, group = piece, alpha = alpha)) +
scale_alpha_identity() +
guides(fill = F, alpha = F) +
theme_minimal()

Avoid changing ggplot

Q1.Is there a way of avoiding the following behavior in a ggplot?
x=c(1,3,4,5,6)
y=c(0.5,2,3,7,1)
n=2
library(ggplot2)
p <- ggplot(mtcars, aes(wt, mpg))
p <-p + geom_segment(aes(x = x[1], y = y[1], xend = x[2], yend = y[2]), colour = "red")
p <-p + geom_segment(aes(x = x[2], y = y[2], xend = x[3], yend = y[3]), colour = "red")
p
x[1]=10
p
The drawing on the plot remains anchored to the value of the numeric values given in input. Do I have to create other variables for storing the coordinates?
Q2.How to draw a polygonal chain in ggplot?
n=length(x)-1
library(ggplot2)
p <- ggplot(mtcars, aes(wt, mpg))
for (i in 1:n){
p <-p + geom_segment(aes(x = x[i], y = y[i], xend = x[i+1], yend = y[i+1]), colour = "red")
p$plot_env <- list2env(list(x=x,y=y))
}
Answer here: Drawing polygonal chains in ggplot
This seems like a hack but apparently works: before modifying x, do
p$plot_env <- list2env(list(x=x,y=y))
Then
x[1] <- 10
p
leaves the plot unchanged.

Plot mean in an R plot

I would like to add a mean of valuus to windows in a scatter plot I have. I created the scatter plot with ggplot2
p <- ggplot(mtcars, aes(wt, mpg))
p + geom_point()
This will give the scatter plot but I woudl like to add add the mean of a window (say size equals 1) and plot this points of the mean as a line. Additionally I woudl like to have vertical bars at each point to indicate the variance.
Mtcars is the data set standard available in ggplot 2
This uses the new dplyr library.
library(dplyr)
forLines <- mtcars %.%
group_by(cut(wt, breaks = 6)) %.%
summarise(mean_mpg = mean(mpg), mean_wt = mean(wt))
p +
geom_point(size=5) +
geom_boxplot(aes(group = cut(wt, breaks = 6))) +
geom_line(data=forLines,aes(x=mean_wt,y=mean_mpg))
Maybe this is what you're looking for:
library(ggplot2)
s <- seq(0, ceiling(max(mtcars$wt)), 1)
ind <- as.integer(cut(mtcars$wt, s))
myfun <- function(i)
c(y = mean(i), ymin = mean(i) - var(i), ymax = mean(i) + var(i))
ggplot(mtcars, aes(wt, mpg)) +
geom_point() +
stat_summary(fun.data = myfun, aes(group = ind, x = ind - .5),
colour = "red") +
stat_summary(fun.y = mean, aes(x = ind - .5), geom = "line",
colour = "red")
Is this what you want?
p <- ggplot(mtcars, aes(wt, mpg))
p + geom_point() + geom_smooth(aes(wt, mpg, group=1), method = "lm")

Drawing only boundaries of stat_smooth in ggplot2

When using stat_smooth() with geom_point is there a way to remove the shaded fit region, but only draw its outer bounds? I know I can remove the shaded region with something like:
geom_point(aes(x=x, y=y)) + geom_stat(aes(x=x, y=y), alpha=0)
but how can I make the outer bounds of it (outer curves) still visible as faint black lines?
You can also use geom_ribbon with fill = NA.
gg <- ggplot(mtcars, aes(qsec, wt))+
geom_point() +
stat_smooth( alpha=0,method='loess')
rib_data <- ggplot_build(gg)$data[[2]]
ggplot(mtcars)+
stat_smooth(aes(qsec, wt), alpha=0,method='loess')+
geom_point(aes(qsec, wt)) +
geom_ribbon(data=rib_data,aes(x=x,ymin=ymin,ymax=ymax,col='blue'),
fill=NA,linetype=1)
...and if for some reason you don't want the vertical bars, you can just use two geom_line layers:
ggplot(mtcars)+
stat_smooth(aes(qsec, wt), alpha=0,method='loess')+
geom_point(aes(qsec, wt)) +
geom_line(data = rib_data,aes(x = x,y = ymax)) +
geom_line(data = rib_data,aes(x = x,y = ymin))
There are most likely easier ways, but you may try this as a start. I grab data for the confidence interval with ggbuild, which I then use in geom_line
# create a ggplot object with a linear smoother and a CI
library(ggplot2)
gg <- ggplot(data = mtcars, aes(x = wt, y = mpg)) +
geom_point() +
geom_smooth(method = "lm")
gg
# grab the data from the plot object
gg_data <- ggplot_build(gg)
str(gg_data)
head(gg_data$data[[2]])
gg2 <- gg_data$data[[2]]
# plot with 'CI-lines' and the shaded confidence area
ggplot(data = mtcars, aes(x = wt, y = mpg)) +
geom_point() +
geom_smooth(method = "lm", se = TRUE, size = 1) +
geom_line(data = gg2, aes(x = x, y = ymin), size = 0.02) +
geom_line(data = gg2, aes(x = x, y = ymax), size = 0.02)
# plot with 'CI-lines' but without confidence area
ggplot(data = mtcars, aes(x = wt, y = mpg)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, size = 1) +
geom_line(data = gg2, aes(x = x, y = ymin), size = 0.02) +
geom_line(data = gg2, aes(x = x, y = ymax), size = 0.02)

Resources