I have a contour plot and I would like to add a geom_path with a different set of data over it.
Right now I have the below code, but as soon as it gets to the geom_path, it overwrites the contour plot. Is there a way to prevent this from happening?
v <- ggplot(pts, aes(theta_1, theta_2, z = z))
v + stat_contour(aes(colour = ..level..),bins=50) + xlab(expression(Theta[1])) + ylab(expression(Theta[2]))
v+geom_path(aes(x=x,y=y,z=z), data=some.mat)
probably you can do by:
v <- ggplot(pts, aes(theta_1, theta_2, z = z))
v <- v + stat_contour(aes(colour = ..level..),bins=50) + xlab(expression(Theta[1])) + ylab(expression(Theta[2]))
v + geom_path(aes(x=x,y=y,z=z), data=some.mat)
Related
ggplot2 can create a very attractive filled violin plot:
ggplot() + geom_violin(data=data.frame(x=1, y=rnorm(10 ^ 5)),
aes(x=x, y=y), fill='gray90', color='black') +
theme_classic()
I'd like to restrict the fill to the central 95% of the distribution if possible, leaving the outline intact. Does anyone have suggestions on how to accomplish this?
Does this do what you want? It requires some data-processing and the drawing of two violins.
set.seed(1)
dat <- data.frame(x=1, y=rnorm(10 ^ 5))
#calculate for each point if it's central or not
dat_q <- quantile(dat$y, probs=c(0.025,0.975))
dat$central <- dat$y>dat_q[1] & dat$y < dat_q[2]
#plot; one'95' violin and one 'all'-violin with transparent fill.
p1 <- ggplot(data=dat, aes(x=x,y=y)) +
geom_violin(data=dat[dat$central,], color="transparent",fill="gray90")+
geom_violin(color="black",fill="transparent")+
theme_classic()
Edit: the rounded edges bothered me, so here is a second approach. If I were doing this, I would want straight lines. So I did some playing with the density (which is what violin plots are based on)
d_y <- density(dat$y)
right_side <- data.frame(x=d_y$y, y=d_y$x) #note flip of x and y, prevents coord_flip later
right_side$central <- right_side$y > dat_q[1]&right_side$y < dat_q[2]
#add the 'left side', this entails reversing the order of the data for
#path and polygon
#and making x negative
left_side <- right_side[nrow(right_side):1,]
left_side$x <- 0 - left_side$x
density_dat <- rbind(right_side,left_side)
p2 <- ggplot(density_dat, aes(x=x,y=y)) +
geom_polygon(data=density_dat[density_dat$central,],fill="red")+
geom_path()
p2
Just make a selection first. Proof of concept:
df1 <- data.frame(x=1, y=rnorm(10 ^ 5))
df2 <- subset(df1, y > quantile(df1$y, 0.025) & y < quantile(df1$y, 0.975))
ggplot(mapping = aes(x = x, y = y)) +
geom_violin(data = df1, aes(fill = '100%'), color = NA) +
geom_violin(data = df2, aes(fill = '95%'), color = 'black') +
theme_classic() +
scale_fill_grey(name = 'level')
#Heroka gave a great answer. Here is a more general function based on his answer that allows to fill the violin plot according to any ranges (not just quantiles).
violincol <- function(x,from=-Inf,to=Inf,col='grey'){
d <- density(x)
right <- data.frame(x=d$y, y=d$x) #note flip of x and y, prevents coord_flip later
whichrange <- function(r,x){x <= r[2] & x > r[1]}
ranges <- cbind(from,to)
right$col <- sapply(right$y,function(y){
id <- apply(ranges,1,whichrange,y)
if(all(id==FALSE)) NA else col[which(id)]
})
left <- right[nrow(right):1,]
left$x <- 0 - left$x
dat <- rbind(right,left)
p <- ggplot(dat, aes(x=x,y=y)) +
geom_polygon(data=dat,aes(fill=col),show.legend = F)+
geom_path()+
scale_fill_manual(values=col)
return(p)
}
x <- rnorm(10^5)
violincol(x=x)
violincol(x=x,from=c(-Inf,0),to=c(0,Inf),col=c('green','red'))
r <- seq(-5,5,0.5)
violincol(x=x,from=r,to=r+0.5,col=rainbow(length(r)))
I have a table with a binning variable VAR2_BY_NS_BIN and an x-y data pair (MP_BIN,CORRECT_PROP). I want to plot the data point binned, and also draw a different line for each bin using stat_function, taking a different reference each time using the for loop.
test_tab <- data.table(VAR2_BY_NS_BIN=c(0.0005478, 0.0005478, 0.002266, 0.002266, 0.006783, 0.006783, 0.020709, 0.020709, 0.142961, 0.142961),
MP_BIN=rep(c(0.505, 0.995), 5),
CORRECT_PROP=c(0.5082, 0.7496, 0.5024, 0.8627, 0.4878, 0.9368, 0.4979, 0.9826, 0.4811, 0.9989))
VAR2_BIN <- sort(unique(test_tab$VAR2_BY_NS_BIN)) #get unique bin values
LEN_VAR2_BIN <- length(VAR2_BIN) #get number of bins
col_base <- c("#FF0000", "#BB0033", "#880088", "#3300BB", "#0000FF") #mark bins with different colours
p <- ggplot(data = test_tab)
for (i in 1:LEN_VAR2_BIN) {
p <- p + geom_point(data = test_tab[test_tab$VAR2_BY_NS_BIN==VAR2_BIN[i],],
aes(x = MP_BIN, y = CORRECT_PROP),
col = col_base[i],
alpha = 0.5) +
stat_function(fun = function(t) {VAR2_BIN[i]*(t-0.5)+0.5}, col = col_base[i])
}
p <- p + xlab("MP") + ylab("Observed proportion")
print(p)
The above code (a reproducible example), however, always returns a plot with only the last stat_function line drawn (which is the 5th line in the above case).
The following code (without using the for loop) works, but I in fact have a large number of bins so it is not very feasible...
p <- p + stat_function(fun = function(t) {VAR2_BIN[1]*(t-0.5)+0.5}, col = col_base[1])
p <- p + stat_function(fun = function(t) {VAR2_BIN[2]*(t-0.5)+0.5}, col = col_base[2])
p <- p + stat_function(fun = function(t) {VAR2_BIN[3]*(t-0.5)+0.5}, col = col_base[3])
p <- p + stat_function(fun = function(t) {VAR2_BIN[4]*(t-0.5)+0.5}, col = col_base[4])
p <- p + stat_function(fun = function(t) {VAR2_BIN[5]*(t-0.5)+0.5}, col = col_base[5])
Thanks in advance!
You don't need a for loop or stat_function. To plot the points, just map MP_BIN and CORRECT_PROP to x and y and the points can be plotted with a single call to geom_point. For the lines, you can create the necessary values on the fly (as done in the code below) and plot those with geom_line.
library(tidyverse)
ggplot(test_tab %>% mutate(model=VAR2_BY_NS_BIN*(MP_BIN - 0.5) + 0.5),
aes(x=MP_BIN, colour=factor(VAR2_BY_NS_BIN))) +
geom_point(aes(y=CORRECT_PROP)) +
geom_line(aes(y=model)) +
labs(colour="VAR2_BY_NS_BIN") +
guides(colour=guide_legend(reverse=TRUE))
In terms of the problem you were having with the for loop, what's going on is that ggplot doesn't actually evaluate the loop variable (i) until you print the plot. The value of i is 5 at the end of the loop when the plot is printed, so that's the only line you get. You can find several questions related to this issue on Stack Overflow. Here's one of them.
I'm trying to plot isoclines under a scatterplot using ggplot but I can't figure out how to use stat_functioncorrectly.
The isoclines are based on the distance formula:
sqrt((x1-x2)^2 + (y1-y2)^2)
and would look like these
concentric circles, except the center would be the origin of the plot:
What I've tried so far is calling the distance function within ggplot like so (Note: I use x1=1 and y1=1 because in my real problem I also have fixed values)
distance <- function(x, y) {sqrt((x - 1)^2 + (y - 1)^2)}
ggplot(my_data, aes(x, y))+
geom_point()+
stat_function(fun=distance)
but R returns the error:
Computation failed in 'stat_function()': argument "y" is missing, with
no default
How do I correctly feed x and y values to stat_function so that it plots a generic plot of the distance formula, with the center at the origin?
For anything a bit complicated, I avoid the use of the stat functions. They are mostly aimed at quick calculations. They are usually limited to calculating y based on x. I would just pre-calculate the data and the plot with stat_contour instead:
distance <- function(x, y) {sqrt((x - 1)^2 + (y - 1)^2)}
d <- expand.grid(x = seq(0, 2, 0.02), y = seq(0, 2, 0.02))
d$dist <- mapply(distance, x = d$x, y = d$y)
ggplot(d, aes(x, y)) +
geom_raster(aes(fill = dist), interpolate = T) +
stat_contour(aes(z = dist), col = 'white') +
coord_fixed() +
viridis::scale_fill_viridis(direction = -1)
ggplot2 can create a very attractive filled violin plot:
ggplot() + geom_violin(data=data.frame(x=1, y=rnorm(10 ^ 5)),
aes(x=x, y=y), fill='gray90', color='black') +
theme_classic()
I'd like to restrict the fill to the central 95% of the distribution if possible, leaving the outline intact. Does anyone have suggestions on how to accomplish this?
Does this do what you want? It requires some data-processing and the drawing of two violins.
set.seed(1)
dat <- data.frame(x=1, y=rnorm(10 ^ 5))
#calculate for each point if it's central or not
dat_q <- quantile(dat$y, probs=c(0.025,0.975))
dat$central <- dat$y>dat_q[1] & dat$y < dat_q[2]
#plot; one'95' violin and one 'all'-violin with transparent fill.
p1 <- ggplot(data=dat, aes(x=x,y=y)) +
geom_violin(data=dat[dat$central,], color="transparent",fill="gray90")+
geom_violin(color="black",fill="transparent")+
theme_classic()
Edit: the rounded edges bothered me, so here is a second approach. If I were doing this, I would want straight lines. So I did some playing with the density (which is what violin plots are based on)
d_y <- density(dat$y)
right_side <- data.frame(x=d_y$y, y=d_y$x) #note flip of x and y, prevents coord_flip later
right_side$central <- right_side$y > dat_q[1]&right_side$y < dat_q[2]
#add the 'left side', this entails reversing the order of the data for
#path and polygon
#and making x negative
left_side <- right_side[nrow(right_side):1,]
left_side$x <- 0 - left_side$x
density_dat <- rbind(right_side,left_side)
p2 <- ggplot(density_dat, aes(x=x,y=y)) +
geom_polygon(data=density_dat[density_dat$central,],fill="red")+
geom_path()
p2
Just make a selection first. Proof of concept:
df1 <- data.frame(x=1, y=rnorm(10 ^ 5))
df2 <- subset(df1, y > quantile(df1$y, 0.025) & y < quantile(df1$y, 0.975))
ggplot(mapping = aes(x = x, y = y)) +
geom_violin(data = df1, aes(fill = '100%'), color = NA) +
geom_violin(data = df2, aes(fill = '95%'), color = 'black') +
theme_classic() +
scale_fill_grey(name = 'level')
#Heroka gave a great answer. Here is a more general function based on his answer that allows to fill the violin plot according to any ranges (not just quantiles).
violincol <- function(x,from=-Inf,to=Inf,col='grey'){
d <- density(x)
right <- data.frame(x=d$y, y=d$x) #note flip of x and y, prevents coord_flip later
whichrange <- function(r,x){x <= r[2] & x > r[1]}
ranges <- cbind(from,to)
right$col <- sapply(right$y,function(y){
id <- apply(ranges,1,whichrange,y)
if(all(id==FALSE)) NA else col[which(id)]
})
left <- right[nrow(right):1,]
left$x <- 0 - left$x
dat <- rbind(right,left)
p <- ggplot(dat, aes(x=x,y=y)) +
geom_polygon(data=dat,aes(fill=col),show.legend = F)+
geom_path()+
scale_fill_manual(values=col)
return(p)
}
x <- rnorm(10^5)
violincol(x=x)
violincol(x=x,from=c(-Inf,0),to=c(0,Inf),col=c('green','red'))
r <- seq(-5,5,0.5)
violincol(x=x,from=r,to=r+0.5,col=rainbow(length(r)))
I have a contour plot in ggplot2 that I want to map one point to.
My contour plot looks like this:
v = ggplot(pts, aes(theta_1, theta_2, z = z))
v + stat_contour(aes(colour = ..level..),bins=50)
+ xlab(expression(Theta[1])) + ylab(expression(Theta[2]))
and I have a point that looks like this:
p = ggplot(ts,aes(x,y))
p + geom_point()
unfortunately the second overwrites the first.
Is there a way to get them to show up on the same plot, similar to MATLAB's "hold on;"?
Thanks!
You can provide the points directly to geom_point():
set.seed(1000)
x = rnorm(1000)
g = ggplot(as.data.frame(x), aes(x = x))
g + stat_bin() + geom_point(data = data.frame(x = -1, y = 40), aes(x=x,y=y))
Not sure if this is still of interest, but I think you just needed to save the updated v object then add the point to that, rather than create a new ggplot2 object. For example
v <- ggplot(pts, aes(theta_1, theta_2, z = z))
v <- v + stat_contour(aes(colour = ..level..),bins=50)
+ xlab(expression(Theta[1])) + ylab(expression(Theta[2]))
v <- v + geom_point(aes(x=ts$x, y=ts$y))
v # to display
ggplot2 is very good at adding layers incrementally, not all have to be based on the same dataset specified in the first ggplot call.