multiple kernal densities in ggplot2 - r

I would like to add a kernel density estimate for 2 types of data to a ggplot. If I use the following code, it displays a kernel density estimate for the 2nd factor level only. How do I get a kernel density estimate for both factor levels (preferably different colors)?
ggplot(mtcars, aes(x = disp, y=mpg, color=factor(vs))) +
theme_bw() +
geom_point(size=.5) +
geom_smooth(method = 'loess', se = FALSE) +
stat_density_2d(geom = "raster", aes(fill = ..density.., alpha = ..density..), contour = FALSE) +
scale_alpha(range = c(0,1)) +
guides(alpha=FALSE)

One approach is to use two stat_density_2d layers with subsets of the data and manually color them. It is not exactly what you are after but with tweaking it can be solid:
ggplot(mtcars, aes(x = disp, y=mpg, color=factor(vs))) +
theme_bw() +
geom_point(size=.5) +
geom_smooth(method = 'loess', se = FALSE) +
stat_density_2d(data = subset(mtcars, vs == 0), geom = "raster", aes(alpha = ..density..), fill = "#F8766D" , contour = FALSE) +
stat_density_2d(data = subset(mtcars, vs == 1), geom = "raster", aes(alpha = ..density..), fill = "#00BFC4" , contour = FALSE) +
scale_alpha(range = c(0, 1))

This might do what you want :
```
ggplot(mtcars, aes(x = disp, y=mpg, color=factor(vs))) +
theme_bw() +
geom_point(size=.5) +
geom_smooth(method = 'loess', se = FALSE) +
stat_density_2d(data = subset(mtcars, vs==1), geom = "raster", fill='blue', aes(fill = ..density.., alpha = ..density..), contour = FALSE) +
scale_alpha(range = c(0,0.8)) +
stat_density_2d(data = subset(mtcars, vs==0), geom = "raster", fill='red', aes(fill = ..density.., alpha = ..density..), contour = FALSE) +
guides(alpha=FALSE)
```

Another potential solution that I discovered in this post is to use geom="tile" in the stat_density2d() call instead of geom="raster".
ggplot(mtcars, aes(x = disp, y=mpg, color=factor(vs))) +
theme_bw() +
geom_point(size=.5) +
geom_smooth(method = 'loess', se = FALSE) +
stat_density_2d(geom = "tile", aes(fill = factor(vs), alpha = ..density..), contour = FALSE, linetype=0) +
scale_alpha(range = c(0,1))

Related

Selective Labelling of Points in ggplot

I'm currently working on creating a funnel plot in R for a set of mortality rates. I've used the following code to create my funnel plot, and got the following plot:
fp1<-ggplot(data=agg.hd2,
aes(x=total, group=OverallDR))
fp1<-fp1 + geom_smooth(aes(y=lcl95),
se = FALSE,
linetype="solid",
color = "red",
size=0.5)
fp1<-fp1 + geom_smooth(aes(y=ucl95),
se = FALSE,
linetype="solid",
color = "red",
size=0.5)
fp1<-fp1 + geom_smooth(aes(y=lcl99.8),
se = FALSE,
linetype="solid",
color="blue",
size=0.5)
fp1<-fp1 + geom_smooth(aes(y=ucl99.8),
se = FALSE,
linetype="solid",
color="blue",
size=0.5)
fp1<-fp1+geom_smooth(aes(y=OverallDR),
se=FALSE,
color="black",
size=0.5)
fp1<-fp1 + geom_point(aes(y=DRbyhosp),
color ="black")
fp1<-fp1 + theme_classic()
fp1<-fp1 + scale_x_continuous(breaks=seq(0,6000, by=500))
fp1<-fp1 + scale_y_continuous(labels = scales::percent)
fp1<-fp1 + labs(title="Funnel Plot showing Death Rate for Each Hospital")
fp1<-fp1 + labs(x="Operations Performed")
fp1<-fp1 + labs(y="Death Rate")
fp1
I wish to display labels for all of the points which are above or below the blue (99.8%) line. I've tried the subsetting solutions suggested on other threads, but haven't been able to make them work. Does anyone have any suggestions of how I can achieve this?
Here's a demonstration of my suggestion in the comments:
data(iris)
library(ggplot2)
# Define model
mymod <- lm(Petal.Length ~ Sepal.Length, data = iris)
mydat <- cbind(iris, predict(mymod, data = iris, se = TRUE))
mydat$upr <- mydat$fit + mydat$se.fit
mydat$lwr <- mydat$fit - mydat$se.fit
# Visualize
ggplot(data = mydat) +
geom_point(aes(x = Sepal.Length, y = Petal.Length)) +
geom_line(aes(x = Sepal.Length, y = upr), col = "blue") +
geom_line(aes(x = Sepal.Length, y = lwr), col = "blue") +
geom_line(aes(x = Sepal.Length, y = fit), col = "black") +
geom_text(data = mydat[mydat$Petal.Length > mydat$upr+1.3,], aes(x = Sepal.Length, y = Petal.Length, label = Species), nudge_y = 0.1)

R weird behavior with geom_abline()

I am using this code:
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_smooth(method = "lm", se = FALSE, color = '#376795', size = 1) +
geom_abline(intercept = 34.232237, slope = -4.539474, linetype = 'dashed')
And I get this graph:
Then I comment out the middle line of code with command + shift + c
ggplot(mtcars, aes(x = wt, y = mpg)) +
# geom_smooth(method = "lm", se = FALSE, color = '#376795', size = 1) +
geom_abline(intercept = 34.232237, slope = -4.539474, linetype = 'dashed')
I get a graph without any lines. Where did the line from geom_abline() go?
I then switch the order and be careful with the + signs...
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_abline(intercept = 34.232237, slope = -4.539474, linetype = 'dashed') +
geom_smooth(method = "lm", se = FALSE, color = '#376795', size = 1)
Both lines are back. So the code for geom_abline() seemed fine, right?
So I then comment out the middle line:
ggplot(mtcars, aes(x = wt, y = mpg)) +
# geom_abline(intercept = 34.232237, slope = -4.539474, linetype = 'dashed') +
geom_smooth(method = "lm", se = FALSE, color = '#376795', size = 1)
The geom_smooth() is there but not the abline. I'm really confused by this behavior. I really just want the abline and not the smooth but this doesn't work:
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_abline(intercept = 34.232237, slope = -4.539474, linetype = 'dashed')
There must be a simple reason. But also - why is the behavior inconsistent? It feels like a bug because the same code in one place seems to work and in another place doesn't.
You can use this code to plot only the abline:
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_blank() +
geom_abline(intercept = 34.232237, slope = -4.539474, linetype = 'dashed')
Output:

How to colour and move R squared equation to next line in ggplot2 facet_grid?

For the graph dimensions I need, I want the R squared to appear on the next line. I also want the colour of the text to correspond to the color of factorz
x <- c(1:50)
y <- rnorm(50,4,1)
z <- rep(c("J","F","H","I","J","K","L","M","N","O"), each = 5)
df <- data.frame(x,y,z)
my.formula = y ~ x
ggplot(aes(x = x, y = y, color = z), data = df) +
geom_point() +
stat_summary(fun.data=mean_cl_boot, geom="errorbar", width=0.2, colour="black") +
stat_summary(fun = mean, color = "black", geom ="point", size = 3,show.legend = FALSE) +
geom_smooth(method="lm", formula = y ~ x ) +
stat_poly_eq(formula = my.formula, aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), parse = TRUE, size = 2.5, col = "black")+
facet_grid(.~z, scales = "free") + theme_classic()
Here is a way.
To have the colors right, comment out color = "black";
To have r-squared below the regression equation, use atop, see ?plotmath.
The result of atop is not left aligned, but here it is.
ggplot(mapping = aes(x = x, y = y, color = z), data = df) +
geom_point() +
stat_summary(fun.data=mean_cl_boot, geom="errorbar", width=0.2, colour="black") +
stat_summary(fun = mean, color = "black", geom ="point", size = 3,show.legend = FALSE) +
geom_smooth(method="lm", formula = y ~ x ) +
stat_poly_eq(
formula = my.formula,
aes(label = paste("atop(", ..eq.label.., ",", ..rr.label.., ")")),
label.y = 0.9,
parse = TRUE,
size = 2.5
#, col = "black"
)+
facet_grid(.~z, scales = "free") +
theme_classic()

geom_freqpoly not plotting when I run this chunk of code with ggplot2

I don't know why geom_freqpoly is not graphing. When I run the chunk I only see the histogram but not the line graph.
library(ggplot2)
lambda1 = 1/2
sequence = seq(1, 10000, by=1)
df1 = tibble(x=seq(1, 10000, by=1), expo1 = lambda1*exp(-(lambda1)*x))
lambda2 = 1/5
df2 = tibble(x=seq(1, 10000, by=1), expo2 = lambda2*exp(-(lambda2)*x))
ggplot() +
geom_histogram(aes(x=expo2, y=..density..), binwidth=.5, colour="blue", fill="white") +
geom_histogram(aes(x=expo1, y=..density..), binwidth=.5, colour="red", fill="white") +
geom_freqpoly(data=df2, aes(x=expo2, y=..density..), binwidth=.5, colour="blue") +
geom_vline(aes(xintercept=mean(expo2)), color="blue", linetype="dashed", size=1) +
geom_density(alpha=.2, fill="#FF6666") +
geom_freqpoly(data=df1, aes(x=expo1, y=..density..), binwidth=.5, colour="red") +
geom_vline(aes(xintercept=mean(expo1)), color="red", linetype="dashed", size=1) +
geom_density(alpha=.2, fill="#FF6666") +
xlim(0,10) +
ylim(0,0.5)+
ggtitle("Distribution of averages of random exponential distribution with their means", "theta = 2 in red and theta = 5 in ")+
xlab("Averages of the distribution")+
ylab("Density")
You are missing the data object in many of your geom lines. Each geom needs data. What are you trying to do? Plot two dataframes on one plot? Your scales are all over the place. Try this.
library(ggplot2)
library(tibble)
x <- seq(0, 10, by = 0.1)
lambda1 <- 1 / 2
df1 <- tibble(x = x, expo1 = lambda1 * exp(-(lambda1) * x))
lambda2 <- 1 / 5
df2 <- tibble(x = x, expo2 = lambda2 * exp(-(lambda2) * x))
ggplot() +
ggtitle("Distribution of averages of random exponential distribution with their means", "theta = 2 in red and theta = 5 in blue") +
xlab("Averages of the distribution") +
ylab("Density") +
geom_histogram(data = df2, aes(x = x, y = expo2), stat = "identity", colour = "blue", fill = "white") +
geom_freqpoly(data = df2, aes(x = x, y = expo2), stat = "identity", colour = "blue") +
geom_vline(xintercept = lambda2, color = "blue", linetype = "dashed", size = 1) +
geom_density(data = df2, aes(x = x, y = expo2), stat = "identity", alpha = .2, fill = "#FF6666") +
geom_histogram(data = df1, aes(x = x, y = expo1), stat = "identity", colour = "red", fill = "white") +
geom_freqpoly(data = df1, aes(x = x, y = expo1), stat = "identity", colour = "red") +
geom_vline(xintercept = lambda1, color = "red", linetype = "dashed", size = 1) +
geom_density(data = df1, aes(x = x, y = expo1), stat = "identity", alpha = .2, fill = "#FF6666")
#> Warning: Ignoring unknown parameters: binwidth, bins, pad
#> Warning: Ignoring unknown parameters: binwidth, bins, pad
Created on 2020-11-02 by the reprex package (v0.3.0)

How to use ggplot2 legend to denote different geoms

I am using ggplot2 to plot points from a .csv file that is just a column used a x values and a column used a y values. I am a little confused as to how ggplot decides what to make a legend for and haven't found any good examples online.
I would like the legend to show that geom_point is stress vs strain, and my geom_smooth is the best fit line.
Here is my code:
library(ggplot2)
imported = read.csv("data.csv")
Strain = imported$Strain
Stress = imported$Stress..N.m.2.
err = .0005
gg <-
ggplot(imported, aes(x=Strain, y=Stress)) +
geom_point(aes(group = "Points"), shape = 79, colour = "black", size = 2, stroke = 4) +
geom_smooth(method = "lm", se = FALSE, color = "orange") +
geom_errorbarh(xmin = Strain - err, xmax = Strain + err, show.legend = TRUE) +
theme_gray() + ggtitle("Stress vs Strain") +
theme(legend.position = "top")
gg
And it is producing the following plot:
my plot
Edit: added approach at top to create legend for each geom, by creating dummy mapping to separate aesthetics.
library(ggplot2)
ggplot(mtcars, aes(mpg, wt)) +
geom_point(aes(color = "point")) + # dummy mapping to color
geom_smooth(method = "lm", se = FALSE, color = "orange",
aes(linetype = "best fit")) + # dummy mapping to linetype
geom_errorbarh(aes(xmin = mpg - 2, xmax = mpg + 1)) +
scale_color_manual(name = "Stress vs. Strain", values = "black") +
scale_linetype_manual(name = "Best fit line", values = "solid")
original answer:
Note the difference in legend here:
library(ggplot2)
ggplot(mtcars, aes(mpg, wt, color = as.character(cyl))) +
geom_point() +
geom_errorbarh(aes(xmin = mpg - 2, xmax = mpg + 1),
show.legend = TRUE) # error bars reflected in legend
ggplot(mtcars, aes(mpg, wt, color = as.character(cyl))) +
geom_point() +
geom_errorbarh(aes(xmin = mpg - 2, xmax = mpg + 1),
show.legend = FALSE) # error bars not shown in legend

Resources