I am trying to shade the area under a lognormal density plot for a certain interval using the code below. This has worked for me in the past using other density functions and intervals, but for some reason now it produces the defect you can see in the graphic.
library(ggplot2)
library(plyr)
library(dplyr)
library(tidyr)
x <- seq(0, 43, 0.1)
x_min <- 16
x_max <- 22
df <- data.frame(x = x, f = dlnorm(x, meanlog = 2.5,
sdlog = 0.24))
df <- df %>% mutate(area = ifelse(x >= x_min & x < x_max,
"Participating", "Not Participating"))
gg <- ggplot(data = df, aes(x = x, ymin = 0, ymax = f, fill = area))
gg <- gg + geom_ribbon()
gg <- gg + theme(legend.title = element_blank())
gg
The issue here is that you are trying to have a ribbon consisting of two pieces. Consequently, the two intended red areas try to connect to each other: ymax where the left area ends and ymax where the right one starts, and the same for ymin. Probably in the past you always used this method for the distribution tails and this problem never arose.
As to solve this, you are going to need to somehow manually have two geom_ribbon. A not particularly intrusive way would be
gg <- ggplot(data = df, aes(x = x, ymin = 0, ymax = f))
gg <- gg + geom_ribbon(aes(fill = factor("Not participating")))
gg <- gg + geom_ribbon(data = df[df$area == "Participating", ], aes(fill = area))
gg <- gg + theme(legend.title = element_blank())
Depending on that you wish to do with colors, it potentially can be further modified/simplified.
Related
I want to plot the gradient plot of intensities, something like this:
I though myself about creating a gradient grid whose distribution was my "I" function, but I have no idea how to do it or if there is an explicit package in R to accomplish this task.
Thank you so much for even thinking about this.
a <- 5*10^(-6)
d <- 0.5*0.005
l <- 500*10^(-9)
n <- pi
theta <- seq(-n,n,length=3500)
I <- function(x){(cos((pi*d*sin(x))/l))^2*(sin((pi*a*sin(x))/l)/((pi*a*sin(x))/l))^2}
y1 <- lapply(theta,I)
y <- unlist(y1)
df <- data.frame(theta,y)
I2 <- function(x){(sin((pi*a*sin(x))/l)/((pi*a*sin(x))/l))^2}
y12 <- lapply(theta,I2)
y2 <- unlist(y12)
df2 <- data.frame(theta,y2)
p = ggplot()
p +
geom_line(data = df, aes(theta,y)) +
xlim(-0.3,0.3) +
geom_line(data = df2, aes(theta,y2))
Making use of patchwork this could be achieved like so:
For the gradient make a second ggplot of rectangles using e.g. geom_rect where you map intensity on color and/or fill
This gradient plot could then be glued to the main plot via patchwork
To get a nice gradient plot
I tripled the number of grid points for the gradient plot,
mapped the cubic root of intensity on color and
get rid of all unnecessary elemnts like y-axis, color guide, ...
BTW:
As your functions are vectorized you don't need lapply to compute the intensities.
Instead of adjusting the limits via xlim() (which removes rows falling outside of the range), set them using coord_cartesian.
library(ggplot2)
library(tibble)
library(patchwork)
a <- 5*10^(-6)
d <- 0.5*0.005
l <- 500*10^(-9)
n <- pi
theta <- seq(-n,n,length=3500)
I <- function(x){(cos((pi*d*sin(x))/l))^2*(sin((pi*a*sin(x))/l)/((pi*a*sin(x))/l))^2}
y <- I(theta)
df <- data.frame(theta,y)
I2 <- function(x){(sin((pi*a*sin(x))/l)/((pi*a*sin(x))/l))^2}
y2 <- I2(theta)
df2 <- data.frame(theta,y2)
p1 = ggplot() +
geom_line(data = df, aes(theta,y)) +
geom_line(data = df2, aes(theta,y2)) +
coord_cartesian(xlim = c(-0.3,0.3))
g <- tibble(
xmin = seq(-n, n, length = 3 * 3500),
xmax = dplyr::lead(xmin),
y = I(xmin)
)
p2 <- ggplot(g, aes(xmin = xmin, xmax = xmax, ymin = 0, ymax = 1, color = y^(1/3))) +
geom_rect() +
coord_cartesian(xlim = c(-0.3,0.3)) +
guides(color = FALSE) +
theme_minimal() +
theme(axis.ticks.y = element_blank(), axis.text.y = element_blank())
p1 / p2 + plot_layout(heights = c(10, 1))
#> Warning: Removed 1 rows containing missing values (geom_rect).
ggplot2 can create a very attractive filled violin plot:
ggplot() + geom_violin(data=data.frame(x=1, y=rnorm(10 ^ 5)),
aes(x=x, y=y), fill='gray90', color='black') +
theme_classic()
I'd like to restrict the fill to the central 95% of the distribution if possible, leaving the outline intact. Does anyone have suggestions on how to accomplish this?
Does this do what you want? It requires some data-processing and the drawing of two violins.
set.seed(1)
dat <- data.frame(x=1, y=rnorm(10 ^ 5))
#calculate for each point if it's central or not
dat_q <- quantile(dat$y, probs=c(0.025,0.975))
dat$central <- dat$y>dat_q[1] & dat$y < dat_q[2]
#plot; one'95' violin and one 'all'-violin with transparent fill.
p1 <- ggplot(data=dat, aes(x=x,y=y)) +
geom_violin(data=dat[dat$central,], color="transparent",fill="gray90")+
geom_violin(color="black",fill="transparent")+
theme_classic()
Edit: the rounded edges bothered me, so here is a second approach. If I were doing this, I would want straight lines. So I did some playing with the density (which is what violin plots are based on)
d_y <- density(dat$y)
right_side <- data.frame(x=d_y$y, y=d_y$x) #note flip of x and y, prevents coord_flip later
right_side$central <- right_side$y > dat_q[1]&right_side$y < dat_q[2]
#add the 'left side', this entails reversing the order of the data for
#path and polygon
#and making x negative
left_side <- right_side[nrow(right_side):1,]
left_side$x <- 0 - left_side$x
density_dat <- rbind(right_side,left_side)
p2 <- ggplot(density_dat, aes(x=x,y=y)) +
geom_polygon(data=density_dat[density_dat$central,],fill="red")+
geom_path()
p2
Just make a selection first. Proof of concept:
df1 <- data.frame(x=1, y=rnorm(10 ^ 5))
df2 <- subset(df1, y > quantile(df1$y, 0.025) & y < quantile(df1$y, 0.975))
ggplot(mapping = aes(x = x, y = y)) +
geom_violin(data = df1, aes(fill = '100%'), color = NA) +
geom_violin(data = df2, aes(fill = '95%'), color = 'black') +
theme_classic() +
scale_fill_grey(name = 'level')
#Heroka gave a great answer. Here is a more general function based on his answer that allows to fill the violin plot according to any ranges (not just quantiles).
violincol <- function(x,from=-Inf,to=Inf,col='grey'){
d <- density(x)
right <- data.frame(x=d$y, y=d$x) #note flip of x and y, prevents coord_flip later
whichrange <- function(r,x){x <= r[2] & x > r[1]}
ranges <- cbind(from,to)
right$col <- sapply(right$y,function(y){
id <- apply(ranges,1,whichrange,y)
if(all(id==FALSE)) NA else col[which(id)]
})
left <- right[nrow(right):1,]
left$x <- 0 - left$x
dat <- rbind(right,left)
p <- ggplot(dat, aes(x=x,y=y)) +
geom_polygon(data=dat,aes(fill=col),show.legend = F)+
geom_path()+
scale_fill_manual(values=col)
return(p)
}
x <- rnorm(10^5)
violincol(x=x)
violincol(x=x,from=c(-Inf,0),to=c(0,Inf),col=c('green','red'))
r <- seq(-5,5,0.5)
violincol(x=x,from=r,to=r+0.5,col=rainbow(length(r)))
I am trying to overlay a Plot and a Histogram in R, usign the ggplot2 package.
The Plot contains a set of curves (visualized as straight lines due to logarithmich axis) and a horizontal line.
I would like to plot on the same image an histogram showing the density distribution of the crossing ponts between the curves and the horizontal line. I can plot the histogram alone but not on the graph because the aes-length is not the same (the last intersection is at x = 800, while the x asis is much longer).
the code I wrote is:
baseplot +
geom_histogram(data = timesdf, aes(v)) + xlim(0,2000)
where v contains the intersections between the curves and the dashed line.
Any ideas?
edited: as suggested I wrote a little reproducible example:
library(ggplot2)
xvalues <- c(0:100)
yvalues1 <- xvalues^2-1000
yvalues2 <- xvalues^3-100
yvalues3 <- xvalues^4-10
yvalues4 <- xvalues^5-50
plotdf <- as.data.frame(xvalues)
plotdf$horiz <- 5
plotdf$vert1 <- yvalues1
plotdf$vert2 <- yvalues2
plotdf$vert3 <- yvalues3
plotdf$vert4 <- yvalues4
baseplot <- ggplot(data = plotdf, mapping = aes(x= xvalues, y= horiz))+
geom_line(linetype = "dashed", size = 1)+
geom_line(data = plotdf, mapping = aes(x= xvalues, y = vert1))+
geom_line(data = plotdf, mapping = aes(x= xvalues, y = vert2))+
geom_line(data = plotdf, mapping = aes(x= xvalues, y = vert3))+
geom_line(data = plotdf, mapping = aes(x= xvalues, y = vert4))+
coord_cartesian(xlim=c(0, 100), ylim=c(0, 1000))
baseplot
v<-c(ncol(plotdf)-1)
for(i in 1:ncol(plotdf)){
v[i] <- plotdf[max(which(plotdf[,i]<5)),1]
}
v <- as.integer(v[-1])
timesdf <- as.data.frame(v)
# my wish: visualize baseplot and histplot on the same image
histplot <- ggplot() + geom_histogram(data = timesdf, aes(v)) +
coord_cartesian(xlim=c(0, 100), ylim=c(0, 10))
ggplot2 can create a very attractive filled violin plot:
ggplot() + geom_violin(data=data.frame(x=1, y=rnorm(10 ^ 5)),
aes(x=x, y=y), fill='gray90', color='black') +
theme_classic()
I'd like to restrict the fill to the central 95% of the distribution if possible, leaving the outline intact. Does anyone have suggestions on how to accomplish this?
Does this do what you want? It requires some data-processing and the drawing of two violins.
set.seed(1)
dat <- data.frame(x=1, y=rnorm(10 ^ 5))
#calculate for each point if it's central or not
dat_q <- quantile(dat$y, probs=c(0.025,0.975))
dat$central <- dat$y>dat_q[1] & dat$y < dat_q[2]
#plot; one'95' violin and one 'all'-violin with transparent fill.
p1 <- ggplot(data=dat, aes(x=x,y=y)) +
geom_violin(data=dat[dat$central,], color="transparent",fill="gray90")+
geom_violin(color="black",fill="transparent")+
theme_classic()
Edit: the rounded edges bothered me, so here is a second approach. If I were doing this, I would want straight lines. So I did some playing with the density (which is what violin plots are based on)
d_y <- density(dat$y)
right_side <- data.frame(x=d_y$y, y=d_y$x) #note flip of x and y, prevents coord_flip later
right_side$central <- right_side$y > dat_q[1]&right_side$y < dat_q[2]
#add the 'left side', this entails reversing the order of the data for
#path and polygon
#and making x negative
left_side <- right_side[nrow(right_side):1,]
left_side$x <- 0 - left_side$x
density_dat <- rbind(right_side,left_side)
p2 <- ggplot(density_dat, aes(x=x,y=y)) +
geom_polygon(data=density_dat[density_dat$central,],fill="red")+
geom_path()
p2
Just make a selection first. Proof of concept:
df1 <- data.frame(x=1, y=rnorm(10 ^ 5))
df2 <- subset(df1, y > quantile(df1$y, 0.025) & y < quantile(df1$y, 0.975))
ggplot(mapping = aes(x = x, y = y)) +
geom_violin(data = df1, aes(fill = '100%'), color = NA) +
geom_violin(data = df2, aes(fill = '95%'), color = 'black') +
theme_classic() +
scale_fill_grey(name = 'level')
#Heroka gave a great answer. Here is a more general function based on his answer that allows to fill the violin plot according to any ranges (not just quantiles).
violincol <- function(x,from=-Inf,to=Inf,col='grey'){
d <- density(x)
right <- data.frame(x=d$y, y=d$x) #note flip of x and y, prevents coord_flip later
whichrange <- function(r,x){x <= r[2] & x > r[1]}
ranges <- cbind(from,to)
right$col <- sapply(right$y,function(y){
id <- apply(ranges,1,whichrange,y)
if(all(id==FALSE)) NA else col[which(id)]
})
left <- right[nrow(right):1,]
left$x <- 0 - left$x
dat <- rbind(right,left)
p <- ggplot(dat, aes(x=x,y=y)) +
geom_polygon(data=dat,aes(fill=col),show.legend = F)+
geom_path()+
scale_fill_manual(values=col)
return(p)
}
x <- rnorm(10^5)
violincol(x=x)
violincol(x=x,from=c(-Inf,0),to=c(0,Inf),col=c('green','red'))
r <- seq(-5,5,0.5)
violincol(x=x,from=r,to=r+0.5,col=rainbow(length(r)))
I have following code to draw my logistic distribution:
x=seq(-2000,2000,length=1000)
dat <- data.frame(x=x)
dat$value <- dlogis(x,location=200,scale=400/log(10))
dat$type <- "Expected score"
p <- ggplot(data=dat, aes(x=x, y=value)) + geom_line(col="blue", size=1) +
coord_cartesian(xlim = c(-500, 900), ylim = c(0, 0.0016)) +
scale_x_continuous(breaks=c(seq(-500, 800, 100)))
pp <- p + geom_line(aes(x = c(0,0), y = c(0,0.0011)), size=0.9, colour="green", linetype=2, alpha=0.7)
Now what I would like to do is to highlight the area to the left of x = 0.
I tried to do it like this:
x = seq(-500, 0, length=10)
y = dlogis(x,location=200,scale=400/log(10))
pol <- data.frame(x = x, y = y)
pp + geom_polygon(aes(data=pol,x=x, y=y), fill="light blue", alpha=0.6)
But this does not work. Not sure what I am doing wrong. Any help?
I haven't diagnosed the problem with your polygon (although I think you would need to give the full path around the outside, i.e. attach rep(0,length(x)) to the end of y and rev(x) to the end of x), but geom_ribbon (as in Shading a kernel density plot between two points. ) seems to do the trick:
pp + geom_ribbon(data=data.frame(x=x,y=y),aes(ymax=y,x=x,y=NULL),
ymin=0,fill="light blue",alpha=0.5)