ggplot() lines transparency - r

How to change the transparency level of lines in ggplot() diagram (i.e. histogram, line plot, etc.)?
For instance consider the code below:
data <- data.frame(a=rnorm(100), b = rnorm(100,.5,1.2))
data <- melt(data)
colnames(data) <- c("Category", "Intensity")
p <- ggplot(data, aes(x=Intensity))
p <- p + geom_density(aes(color=Category), size=2, alpha=.4)
print(p)
I expected the lines would be transparent (as alpha=.4), but they're not.

Simply following #baptiste's directions,
data <- data.frame(a=rnorm(100), b = rnorm(100,.5,1.2))
data <- melt(data)
colnames(data) <- c("Category", "Intensity")
p <- ggplot(data, aes(x=Intensity))
p + geom_line(aes(color=Category), stat="density", size=2, alpha=0.4)

Related

Can I use Annotate in ggplot2 to fill my violin graph? [duplicate]

ggplot2 can create a very attractive filled violin plot:
ggplot() + geom_violin(data=data.frame(x=1, y=rnorm(10 ^ 5)),
aes(x=x, y=y), fill='gray90', color='black') +
theme_classic()
I'd like to restrict the fill to the central 95% of the distribution if possible, leaving the outline intact. Does anyone have suggestions on how to accomplish this?
Does this do what you want? It requires some data-processing and the drawing of two violins.
set.seed(1)
dat <- data.frame(x=1, y=rnorm(10 ^ 5))
#calculate for each point if it's central or not
dat_q <- quantile(dat$y, probs=c(0.025,0.975))
dat$central <- dat$y>dat_q[1] & dat$y < dat_q[2]
#plot; one'95' violin and one 'all'-violin with transparent fill.
p1 <- ggplot(data=dat, aes(x=x,y=y)) +
geom_violin(data=dat[dat$central,], color="transparent",fill="gray90")+
geom_violin(color="black",fill="transparent")+
theme_classic()
Edit: the rounded edges bothered me, so here is a second approach. If I were doing this, I would want straight lines. So I did some playing with the density (which is what violin plots are based on)
d_y <- density(dat$y)
right_side <- data.frame(x=d_y$y, y=d_y$x) #note flip of x and y, prevents coord_flip later
right_side$central <- right_side$y > dat_q[1]&right_side$y < dat_q[2]
#add the 'left side', this entails reversing the order of the data for
#path and polygon
#and making x negative
left_side <- right_side[nrow(right_side):1,]
left_side$x <- 0 - left_side$x
density_dat <- rbind(right_side,left_side)
p2 <- ggplot(density_dat, aes(x=x,y=y)) +
geom_polygon(data=density_dat[density_dat$central,],fill="red")+
geom_path()
p2
Just make a selection first. Proof of concept:
df1 <- data.frame(x=1, y=rnorm(10 ^ 5))
df2 <- subset(df1, y > quantile(df1$y, 0.025) & y < quantile(df1$y, 0.975))
ggplot(mapping = aes(x = x, y = y)) +
geom_violin(data = df1, aes(fill = '100%'), color = NA) +
geom_violin(data = df2, aes(fill = '95%'), color = 'black') +
theme_classic() +
scale_fill_grey(name = 'level')
#Heroka gave a great answer. Here is a more general function based on his answer that allows to fill the violin plot according to any ranges (not just quantiles).
violincol <- function(x,from=-Inf,to=Inf,col='grey'){
d <- density(x)
right <- data.frame(x=d$y, y=d$x) #note flip of x and y, prevents coord_flip later
whichrange <- function(r,x){x <= r[2] & x > r[1]}
ranges <- cbind(from,to)
right$col <- sapply(right$y,function(y){
id <- apply(ranges,1,whichrange,y)
if(all(id==FALSE)) NA else col[which(id)]
})
left <- right[nrow(right):1,]
left$x <- 0 - left$x
dat <- rbind(right,left)
p <- ggplot(dat, aes(x=x,y=y)) +
geom_polygon(data=dat,aes(fill=col),show.legend = F)+
geom_path()+
scale_fill_manual(values=col)
return(p)
}
x <- rnorm(10^5)
violincol(x=x)
violincol(x=x,from=c(-Inf,0),to=c(0,Inf),col=c('green','red'))
r <- seq(-5,5,0.5)
violincol(x=x,from=r,to=r+0.5,col=rainbow(length(r)))

Creating graphs in a loop: one graph different from the others

I have an elaborate code to create a series of graphs. I would like to put a vertical line in one of the many graphs I create.
Consider the following simple code:
library(ggplot2)
library(grid)
library(gridExtra)
plots <- list()
for (i in 1:4) {
V1 <- rnorm(1000)
V2 <- seq(1000)
df <- data.frame(V1, V2)
plots[[i]] <- ggplot(df, aes(x= V2, y=V1)) +
geom_point()+
geom_vline(xintercept = 500, color="red")
}
grid.arrange(grobs=plots, nrow=2)
I would like to have the red vertical line for graph 4 but not the others. How would I do this efficiently?
You don't need a for-loop and if-statement for this matter. You can use faceting;
library(ggplot2)
library(grid)
library(gridExtra)
library(dplyr)
set.seed(123) ## set the seed for random numbers to be reproducible
df <- bind_rows(lapply(1:4, function(x)
data.frame(V1=rnorm(1000), V2=seq(1000))), .id = 'facet')
ggplot(df, aes(x= V2, y=V1)) +
geom_point() +
facet_wrap(~facet) +
geom_vline(data=data.frame(xint=500,facet=4), aes(xintercept = xint), color = "red")
just split your plot production and set a condition :)
library(ggplot2)
library(grid)
library(gridExtra)
plots <- list()
for (i in 1:4) {
V1 <- rnorm(1000)
V2 <- seq(1000)
df <- data.frame(V1, V2)
plots[[i]] <- ggplot(df, aes(x= V2, y=V1)) +
geom_point()
if (i == 4) plots[[i]] <- plots[[i]] + geom_vline(xintercept = 500, color="red")
}
grid.arrange(grobs=plots, nrow=2)

Getting even figure widths in grid.arrange

I'm trying to plot three figures using grid.arrange of R'sgridExtra package. I want them to appear as 3 columns in one row, where the left most figure should have the y-axis but no legend, the middle figure no y-axis and no legend, and the right most figure should have no y-axis but should include the legend. That way the legend and y-axis, which are identical to all figures, appear only once.
Here are the data - they relate to gene ontology enrichment tests:
First, the color scheme of the legend - a color for each enrichment p-value range:
color.order <- c("#7d4343","#B20000","#C74747","#E09898","#EBCCD6","#C8C8C8")
names(color.order) <- c("(0-0.05]","(0.05-0.1]","(0.1-0.15]","(0.15-0.2]","(0.2-0.25]","(0.25-1]")
Then the figure data.frames:
df.g1 <- data.frame(category=c("C1-up","C1-down","C2-up","C2-down"),
p.value=c(0.4833,0.5114,0.3487,0.6522),log10.p.value=c(3.157832,2.912393,4.575481,1.856192),
col=c("(0.25-1]","(0.25-1]","(0.25-1]","(0.25-1]"),
col.cat=c("(0.25-1]","(0.25-1]","(0.25-1]","(0.25-1]"))
df.g2 <- data.frame(category=c("C1-up","C1-down","C2-up","C2-down"),
p.value=c(0.5345,0.4819,0.9986,0.0013),log10.p.value=c(2.720522905,3.170430737,0.006084383,28.860566477),
col=c("(0.25-1]","(0.25-1]","(0.25-1]","(0-0.05]"),
col.cat=c("(0.25-1]","(0.25-1]","(0.25-1]","(0-0.05]"))
df.g3 <- data.frame(category=c("C1-up","C1-down","C2-up","C2-down"),
p.value=c(0.2262,0.7703,0.9926,0.0080),log10.p.value=c(6.45507399,1.13340102,0.03225729,20.96910013),
col=c("(0.2-0.25]","(0.25-1]","(0.25-1]","(0-0.05]"),
col.cat=c("(0.2-0.25]","(0.25-1]","(0.25-1]","(0-0.05]"))
Putting them together in a list:
df.list <- list(g1=df.g1,g2=df.g2,g3=df.g3)
This is for the legend which associates p-value ranges with colors:
color.order <- c("#7d4343","#B20000","#C74747","#E09898","#EBCCD6","#C8C8C8")
names(color.order) <- c("(0-0.05]","(0.05-0.1]","(0.1-0.15]","(0.15-0.2]","(0.2-0.25]","(0.25-1]")
And the plot creation code:
library(ggplot2)
library(gridExtra)
ggplot.list <- vector(mode="list", length(df.list))
for(g in 1:length(df.list))
{
if(g==1){ #draw y-axis but no legend
ggplot.list[[g]] <- ggplot(df.list[[g]], aes(y=log10.p.value,x=category,fill=col))+
scale_fill_manual(drop=FALSE,values=color.order,name="Enrichment P-value",guide=F)+
geom_bar(stat="identity",width=0.2)+scale_y_continuous(limits=c(0,30),labels=c(seq(0,20,10)," >30"),expand=c(0,0))+
theme_bw()+theme(panel.border=element_blank(),axis.text=element_text(size=8),axis.title=element_text(size=8,face="bold"))+coord_flip()+theme(plot.margin=unit(c(0.1,1,0.1,0.1),"cm"),axis.title.y = element_text(size=8),axis.title.x = element_text(size=8))+labs(x="Category",y="-10log10(P-value)")+ggtitle(names(df.list)[g])
} else if(g==2){ #no y-axis and no legend
ggplot.list[[g]] <- ggplot(df.list[[g]], aes(y=log10.p.value,x=category,fill=col))+
scale_fill_manual(drop=FALSE,values=color.order,name="Enrichment P-value",guide=F)+
geom_bar(stat="identity",width=0.2)+scale_y_continuous(limits=c(0,30),labels = c(seq(0,20,10)," >30"),expand=c(0,0))+
theme_bw()+theme(panel.border=element_blank(),axis.text=element_text(size=8),axis.title=element_text(size=8,face="bold"))+coord_flip()+theme(plot.margin=unit(c(0.1,1,0.1,0.1),"cm"),axis.title.y = element_blank(),axis.text.y=element_blank(),axis.title.x = element_text(size=8))+labs(y="-10log10(P-value)")+ggtitle(names(df.list)[g])
} else if(g==3){ #only legend
ggplot.list[[g]] <- ggplot(df.list[[g]], aes(y=log10.p.value,x=category,fill=col))+
scale_fill_manual(drop=FALSE,values=color.order,name="Enrichment P-value")+
geom_bar(stat="identity",width=0.2)+scale_y_continuous(limits=c(0,30),labels = c(seq(0,20,10)," >30"),expand=c(0,0))+
theme_bw()+theme(panel.border=element_blank(),axis.text=element_text(size=8),axis.title=element_text(size=8,face="bold"))+coord_flip()+theme(plot.margin=unit(c(0.1,1,0.1,0.1),"cm"),axis.title.y = element_blank(),axis.text.y=element_blank(),axis.title.x = element_text(size=8))+labs(y="-10log10(P-value)")+ggtitle(names(df.list)[g])
}
}
This gives me almost what I need:
My problem is that the three figures have different widths. So my question is how do I make the widths identical?
This data seems tailor-made for faceting:
library(dplyr)
library(ggplot2)
color.order <- c("#7d4343","#B20000","#C74747","#E09898","#EBCCD6","#C8C8C8")
names(color.order) <- c("(0-0.05]","(0.05-0.1]","(0.1-0.15]","(0.15-0.2]","(0.2-0.25]","(0.25-1]")
df <- bind_rows(df.list, .id="grp")
df <- mutate(df, col=factor(col, levels=names(color.order)))
gg <- ggplot(df, aes(y=log10.p.value, x=category, fill=col))
gg <- gg + geom_bar(stat="identity", width=0.2)
gg <- gg + scale_y_continuous(limits=c(0,30), labels=c(seq(0,20,10)," >30"), expand=c(0,0))
gg <- gg + scale_fill_manual(drop=FALSE, values=color.order, name="Enrichment P-value")
gg <- gg + coord_flip()
gg <- gg + facet_wrap(~grp)
gg <- gg + labs(x="Category", y="-10log10(P-value)")
gg <- gg + theme_bw()
gg <- gg + theme(panel.border=element_blank(),
panel.margin=margin(1,1,1,1, unit="cm"),
axis.text=element_text(size=8),
axis.title=element_text(size=8,face="bold"),
axis.title.y=element_text(size=8),
axis.title.x=element_text(size=8),
strip.background=element_blank(),
plot.margin=margin(0.1, 0.1, 0.1, 0.1, unit="cm"))
gg

ggplot2 violin plot: fill central 95% only?

ggplot2 can create a very attractive filled violin plot:
ggplot() + geom_violin(data=data.frame(x=1, y=rnorm(10 ^ 5)),
aes(x=x, y=y), fill='gray90', color='black') +
theme_classic()
I'd like to restrict the fill to the central 95% of the distribution if possible, leaving the outline intact. Does anyone have suggestions on how to accomplish this?
Does this do what you want? It requires some data-processing and the drawing of two violins.
set.seed(1)
dat <- data.frame(x=1, y=rnorm(10 ^ 5))
#calculate for each point if it's central or not
dat_q <- quantile(dat$y, probs=c(0.025,0.975))
dat$central <- dat$y>dat_q[1] & dat$y < dat_q[2]
#plot; one'95' violin and one 'all'-violin with transparent fill.
p1 <- ggplot(data=dat, aes(x=x,y=y)) +
geom_violin(data=dat[dat$central,], color="transparent",fill="gray90")+
geom_violin(color="black",fill="transparent")+
theme_classic()
Edit: the rounded edges bothered me, so here is a second approach. If I were doing this, I would want straight lines. So I did some playing with the density (which is what violin plots are based on)
d_y <- density(dat$y)
right_side <- data.frame(x=d_y$y, y=d_y$x) #note flip of x and y, prevents coord_flip later
right_side$central <- right_side$y > dat_q[1]&right_side$y < dat_q[2]
#add the 'left side', this entails reversing the order of the data for
#path and polygon
#and making x negative
left_side <- right_side[nrow(right_side):1,]
left_side$x <- 0 - left_side$x
density_dat <- rbind(right_side,left_side)
p2 <- ggplot(density_dat, aes(x=x,y=y)) +
geom_polygon(data=density_dat[density_dat$central,],fill="red")+
geom_path()
p2
Just make a selection first. Proof of concept:
df1 <- data.frame(x=1, y=rnorm(10 ^ 5))
df2 <- subset(df1, y > quantile(df1$y, 0.025) & y < quantile(df1$y, 0.975))
ggplot(mapping = aes(x = x, y = y)) +
geom_violin(data = df1, aes(fill = '100%'), color = NA) +
geom_violin(data = df2, aes(fill = '95%'), color = 'black') +
theme_classic() +
scale_fill_grey(name = 'level')
#Heroka gave a great answer. Here is a more general function based on his answer that allows to fill the violin plot according to any ranges (not just quantiles).
violincol <- function(x,from=-Inf,to=Inf,col='grey'){
d <- density(x)
right <- data.frame(x=d$y, y=d$x) #note flip of x and y, prevents coord_flip later
whichrange <- function(r,x){x <= r[2] & x > r[1]}
ranges <- cbind(from,to)
right$col <- sapply(right$y,function(y){
id <- apply(ranges,1,whichrange,y)
if(all(id==FALSE)) NA else col[which(id)]
})
left <- right[nrow(right):1,]
left$x <- 0 - left$x
dat <- rbind(right,left)
p <- ggplot(dat, aes(x=x,y=y)) +
geom_polygon(data=dat,aes(fill=col),show.legend = F)+
geom_path()+
scale_fill_manual(values=col)
return(p)
}
x <- rnorm(10^5)
violincol(x=x)
violincol(x=x,from=c(-Inf,0),to=c(0,Inf),col=c('green','red'))
r <- seq(-5,5,0.5)
violincol(x=x,from=r,to=r+0.5,col=rainbow(length(r)))

ggplot2 Scatter Plot Labels

I'm trying to use ggplot2 to create and label a scatterplot. The variables that I am plotting are both scaled such that the horizontal and the vertical axis are plotted in units of standard deviation (1,2,3,4,...ect from the mean). What I would like to be able to do is label ONLY those elements that are beyond a certain limit of standard deviations from the mean. Ideally, this labeling would be based off of another column of data.
Is there a way to do this?
I've looked through the online manual, but I haven't been able to find anything about defining labels for plotted data.
Help is appreciated!
Thanks!
BEB
Use subsetting:
library(ggplot2)
x <- data.frame(a=1:10, b=rnorm(10))
x$lab <- letters[1:10]
ggplot(data=x, aes(a, b, label=lab)) +
geom_point() +
geom_text(data = subset(x, abs(b) > 0.2), vjust=0)
The labeling can be done in the following way:
library("ggplot2")
x <- data.frame(a=1:10, b=rnorm(10))
x$lab <- rep("", 10) # create empty labels
x$lab[c(1,3,4,5)] <- LETTERS[1:4] # some labels
ggplot(data=x, aes(x=a, y=b, label=lab)) + geom_point() + geom_text(vjust=0)
Subsetting outside of the ggplot function:
library(ggplot2)
set.seed(1)
x <- data.frame(a = 1:10, b = rnorm(10))
x$lab <- letters[1:10]
x$lab[!(abs(x$b) > 0.5)] <- NA
ggplot(data = x, aes(a, b, label = lab)) +
geom_point() +
geom_text(vjust = 0)
Using qplot:
qplot(a, b, data = x, label = lab, geom = c('point','text'))

Resources