I'm currently working on creating a funnel plot in R for a set of mortality rates. I've used the following code to create my funnel plot, and got the following plot:
fp1<-ggplot(data=agg.hd2,
aes(x=total, group=OverallDR))
fp1<-fp1 + geom_smooth(aes(y=lcl95),
se = FALSE,
linetype="solid",
color = "red",
size=0.5)
fp1<-fp1 + geom_smooth(aes(y=ucl95),
se = FALSE,
linetype="solid",
color = "red",
size=0.5)
fp1<-fp1 + geom_smooth(aes(y=lcl99.8),
se = FALSE,
linetype="solid",
color="blue",
size=0.5)
fp1<-fp1 + geom_smooth(aes(y=ucl99.8),
se = FALSE,
linetype="solid",
color="blue",
size=0.5)
fp1<-fp1+geom_smooth(aes(y=OverallDR),
se=FALSE,
color="black",
size=0.5)
fp1<-fp1 + geom_point(aes(y=DRbyhosp),
color ="black")
fp1<-fp1 + theme_classic()
fp1<-fp1 + scale_x_continuous(breaks=seq(0,6000, by=500))
fp1<-fp1 + scale_y_continuous(labels = scales::percent)
fp1<-fp1 + labs(title="Funnel Plot showing Death Rate for Each Hospital")
fp1<-fp1 + labs(x="Operations Performed")
fp1<-fp1 + labs(y="Death Rate")
fp1
I wish to display labels for all of the points which are above or below the blue (99.8%) line. I've tried the subsetting solutions suggested on other threads, but haven't been able to make them work. Does anyone have any suggestions of how I can achieve this?
Here's a demonstration of my suggestion in the comments:
data(iris)
library(ggplot2)
# Define model
mymod <- lm(Petal.Length ~ Sepal.Length, data = iris)
mydat <- cbind(iris, predict(mymod, data = iris, se = TRUE))
mydat$upr <- mydat$fit + mydat$se.fit
mydat$lwr <- mydat$fit - mydat$se.fit
# Visualize
ggplot(data = mydat) +
geom_point(aes(x = Sepal.Length, y = Petal.Length)) +
geom_line(aes(x = Sepal.Length, y = upr), col = "blue") +
geom_line(aes(x = Sepal.Length, y = lwr), col = "blue") +
geom_line(aes(x = Sepal.Length, y = fit), col = "black") +
geom_text(data = mydat[mydat$Petal.Length > mydat$upr+1.3,], aes(x = Sepal.Length, y = Petal.Length, label = Species), nudge_y = 0.1)
Related
So I have a reproducible code below, I only want to color the points below and above the dotted black lines and nothing in between. The points in between should be grey or whatever default color without aes(color = x)
color_above <- mean(iris$Sepal.Width) + sd(iris$Sepal.Width)
color_below <- mean(iris$Sepal.Width) - sd(iris$Sepal.Width)
ggplot(data=iris, aes(x = Sepal.Length, y = Sepal.Width)) + geom_point(aes(color=Species, shape=Species)) +
xlab("Sepal Length") + ylab("Sepal Width")+
geom_hline(yintercept = color_above, linetype = "dashed", color = "black") +
geom_hline(yintercept = color_below, linetype = "dashed", color = "black")
One option would be to add a column to your dataframe where using an ifelse assigns an NA to obs. between your threshold lines and map this new column on the color aes. These values will automatically be assigned the na.value of scale_color_discrete. To get rid of the NA entry in the legend I use the breaks argument and set the name for the color legend to Species so that it still gets merged with the shape legend.
library(ggplot2)
color_above <- mean(iris$Sepal.Width) + sd(iris$Sepal.Width)
color_below <- mean(iris$Sepal.Width) - sd(iris$Sepal.Width)
iris$color <- ifelse(iris$Sepal.Width > color_above | iris$Sepal.Width < color_below, as.character(iris$Species), NA_character_)
ggplot(data=iris, aes(x = Sepal.Length, y = Sepal.Width, color = color)) +
geom_point(aes(shape=Species)) +
scale_color_discrete(breaks = unique(iris$Species), name = "Species") +
xlab("Sepal Length") +
ylab("Sepal Width")+
geom_hline(yintercept = color_above, linetype = "dashed", color = "black") +
geom_hline(yintercept = color_below, linetype = "dashed", color = "black")
You could add a new layer:
grey <- iris[iris$Sepal.Width < color_above & iris$Sepal.Width > color_below,]
ggplot(data=iris, aes(x = Sepal.Length, y = Sepal.Width)) + geom_point(aes(color=Species, shape=Species)) +
xlab("Sepal Length") + ylab("Sepal Width")+
geom_hline(yintercept = color_above, linetype = "dashed", color = "black") +
geom_hline(yintercept = color_below, linetype = "dashed", color = "black") +
geom_point(data=grey,aes(x = Sepal.Length, y = Sepal.Width),color='grey')
We could also use gghighlight package: I think this made for such situations:
p + gghighlight::gghighlight(Sepal.Width > 3.5 | Sepal.Width < 2.6)
I don't know why geom_freqpoly is not graphing. When I run the chunk I only see the histogram but not the line graph.
library(ggplot2)
lambda1 = 1/2
sequence = seq(1, 10000, by=1)
df1 = tibble(x=seq(1, 10000, by=1), expo1 = lambda1*exp(-(lambda1)*x))
lambda2 = 1/5
df2 = tibble(x=seq(1, 10000, by=1), expo2 = lambda2*exp(-(lambda2)*x))
ggplot() +
geom_histogram(aes(x=expo2, y=..density..), binwidth=.5, colour="blue", fill="white") +
geom_histogram(aes(x=expo1, y=..density..), binwidth=.5, colour="red", fill="white") +
geom_freqpoly(data=df2, aes(x=expo2, y=..density..), binwidth=.5, colour="blue") +
geom_vline(aes(xintercept=mean(expo2)), color="blue", linetype="dashed", size=1) +
geom_density(alpha=.2, fill="#FF6666") +
geom_freqpoly(data=df1, aes(x=expo1, y=..density..), binwidth=.5, colour="red") +
geom_vline(aes(xintercept=mean(expo1)), color="red", linetype="dashed", size=1) +
geom_density(alpha=.2, fill="#FF6666") +
xlim(0,10) +
ylim(0,0.5)+
ggtitle("Distribution of averages of random exponential distribution with their means", "theta = 2 in red and theta = 5 in ")+
xlab("Averages of the distribution")+
ylab("Density")
You are missing the data object in many of your geom lines. Each geom needs data. What are you trying to do? Plot two dataframes on one plot? Your scales are all over the place. Try this.
library(ggplot2)
library(tibble)
x <- seq(0, 10, by = 0.1)
lambda1 <- 1 / 2
df1 <- tibble(x = x, expo1 = lambda1 * exp(-(lambda1) * x))
lambda2 <- 1 / 5
df2 <- tibble(x = x, expo2 = lambda2 * exp(-(lambda2) * x))
ggplot() +
ggtitle("Distribution of averages of random exponential distribution with their means", "theta = 2 in red and theta = 5 in blue") +
xlab("Averages of the distribution") +
ylab("Density") +
geom_histogram(data = df2, aes(x = x, y = expo2), stat = "identity", colour = "blue", fill = "white") +
geom_freqpoly(data = df2, aes(x = x, y = expo2), stat = "identity", colour = "blue") +
geom_vline(xintercept = lambda2, color = "blue", linetype = "dashed", size = 1) +
geom_density(data = df2, aes(x = x, y = expo2), stat = "identity", alpha = .2, fill = "#FF6666") +
geom_histogram(data = df1, aes(x = x, y = expo1), stat = "identity", colour = "red", fill = "white") +
geom_freqpoly(data = df1, aes(x = x, y = expo1), stat = "identity", colour = "red") +
geom_vline(xintercept = lambda1, color = "red", linetype = "dashed", size = 1) +
geom_density(data = df1, aes(x = x, y = expo1), stat = "identity", alpha = .2, fill = "#FF6666")
#> Warning: Ignoring unknown parameters: binwidth, bins, pad
#> Warning: Ignoring unknown parameters: binwidth, bins, pad
Created on 2020-11-02 by the reprex package (v0.3.0)
I am having some trouble with ggplot and stat_summary.
Please consider following data:
head(mtcars)
data<-mtcars
data$hp2<-mtcars$hp+50
Please consider following code:
ggplot(mtcars, aes(x = cyl, y = hp)) +
stat_summary(aes(y = hp, group = 1), fun.y=mean, colour="red", geom="line",group=1) +
stat_summary(fun.y=mean, colour="red", geom="text", show_guide = FALSE, vjust=-0.7, aes( label=round(..y.., digits=0)))
The code will produce line plot with means of hp and text labels for means ans well. If we would like to add another line/curve we simply have to add:
ggplot(mtcars, aes(x = cyl, y = hp)) +
stat_summary(aes(y = hp, group = 1), fun.y=mean, colour="red", geom="line",group=1) +
stat_summary(fun.y=mean, colour="red", geom="text", show_guide = FALSE, vjust=-0.7, aes( label=round(..y.., digits=0)))+
stat_summary(aes(y = hp2), fun.y=mean, colour="blue", geom="line",group=1)
Now comes the tricky part:
How to use stat_summary with geom="text" but for the hp2 i.e. how to technically force stat_summary to calculate means on hp2 and print the text labels? It seems that I can only use it for the "main" y.
This type of problem, that asks for graphs of related vector columns, is almost always a wide-to-long data format reshaping problem.
library(ggplot2)
data_long <- reshape2::melt(data[c('cyl', 'hp', 'hp2')], id.vars = 'cyl')
head(data_long)
ggplot(data_long, aes(x = cyl, y = value, colour = variable)) +
stat_summary(fun.y = mean, geom = "line", show.legend = FALSE) +
stat_summary(fun.y = mean, geom = "text", show.legend = FALSE, vjust=-0.7, aes( label=round(..y.., digits=0))) +
scale_color_manual(values = c("red", "blue"))
I would like to add a kernel density estimate for 2 types of data to a ggplot. If I use the following code, it displays a kernel density estimate for the 2nd factor level only. How do I get a kernel density estimate for both factor levels (preferably different colors)?
ggplot(mtcars, aes(x = disp, y=mpg, color=factor(vs))) +
theme_bw() +
geom_point(size=.5) +
geom_smooth(method = 'loess', se = FALSE) +
stat_density_2d(geom = "raster", aes(fill = ..density.., alpha = ..density..), contour = FALSE) +
scale_alpha(range = c(0,1)) +
guides(alpha=FALSE)
One approach is to use two stat_density_2d layers with subsets of the data and manually color them. It is not exactly what you are after but with tweaking it can be solid:
ggplot(mtcars, aes(x = disp, y=mpg, color=factor(vs))) +
theme_bw() +
geom_point(size=.5) +
geom_smooth(method = 'loess', se = FALSE) +
stat_density_2d(data = subset(mtcars, vs == 0), geom = "raster", aes(alpha = ..density..), fill = "#F8766D" , contour = FALSE) +
stat_density_2d(data = subset(mtcars, vs == 1), geom = "raster", aes(alpha = ..density..), fill = "#00BFC4" , contour = FALSE) +
scale_alpha(range = c(0, 1))
This might do what you want :
```
ggplot(mtcars, aes(x = disp, y=mpg, color=factor(vs))) +
theme_bw() +
geom_point(size=.5) +
geom_smooth(method = 'loess', se = FALSE) +
stat_density_2d(data = subset(mtcars, vs==1), geom = "raster", fill='blue', aes(fill = ..density.., alpha = ..density..), contour = FALSE) +
scale_alpha(range = c(0,0.8)) +
stat_density_2d(data = subset(mtcars, vs==0), geom = "raster", fill='red', aes(fill = ..density.., alpha = ..density..), contour = FALSE) +
guides(alpha=FALSE)
```
Another potential solution that I discovered in this post is to use geom="tile" in the stat_density2d() call instead of geom="raster".
ggplot(mtcars, aes(x = disp, y=mpg, color=factor(vs))) +
theme_bw() +
geom_point(size=.5) +
geom_smooth(method = 'loess', se = FALSE) +
stat_density_2d(geom = "tile", aes(fill = factor(vs), alpha = ..density..), contour = FALSE, linetype=0) +
scale_alpha(range = c(0,1))
I have a following scatterplot with a smooth spline
a<-rep(1:50,len=500)
b<-sample(0:5000,500)
c<-round(seq(0,600,len=500))
data_frame<-as.data.frame(cbind(a,b,c))
names(data_frame)<-c("ID","toxin_level","days_to_event")
plot(data_frame$days_to_event,data_frame$toxin_level, xlim=c(600,0),xlab="days before the event",ylab="Toxin level",type="p")
abline(v=0,col="red")
x <- data_frame$days_to_event
y <- data_frame$toxin_level
fit.sp = smooth.spline(y ~ x, nknots=20)
lines(fit.sp, col="blue")
This is the resulting plot
I was wondernig if it is possible to somehow add confidence bands to this curve? I deally I would like it to be in a transparent blue, but any color including gray is OK.
Updated: using scale_x_reverse to match your graph more precisely...
How about this using ggplot2?
library(ggplot2)
ggplot(data_frame, aes(x = days_to_event, y = toxin_level)) + geom_point() +
geom_vline(xintercept = 0, color = "red") + scale_x_reverse() +
xlab("Days before the event") + ylab("Toxin Level") +
geom_smooth(method = lm, se = TRUE)
Which gives this:
Or to match your question a bit more:
ggplot(data_frame, aes(x = days_to_event, y = toxin_level)) + geom_point(shape = 1) +
geom_vline(xintercept = 0, color = "red") + scale_x_reverse() +
xlab("Days before the event") + ylab("Toxin Level") +
geom_smooth(method = lm, se = TRUE, color = "blue", fill = "lightblue") +
theme_bw()