Adding reference lines to a bar-plot with ggplot in R - r

This is a minimal example that shows the plots I am trying to make.
Data looks like this:
plot1 = data.frame(
Factor1 = as.factor(rep('A', 4)),
Factor2 = as.factor(rep(c('C', 'D'), 2)),
Factor3 = as.factor(c( rep('E', 2), rep('F', 2))),
Y = c(0.225490, 0.121958, 0.218182, 0.269789)
)
plot2 = data.frame(
Factor1 = as.factor(rep('B', 4)),
Factor2 = as.factor(rep(c('C', 'D'), 2)),
Factor3 = as.factor(c( rep('E', 2), rep('F', 2))),
Y = c(-0.058585, -0.031686, 0.013141, 0.016249)
)
While the basic code for plotting looks like this:
require(ggplot2)
require(grid)
p1 <- ggplot(data=plot1, aes(x=Factor2, y=Y, fill=factor(Factor3))) +
ggtitle('Type: A') +
coord_cartesian(ylim = c(-0.10, 0.30)) +
geom_bar(position=position_dodge(.9), width=0.5, stat='identity') +
scale_x_discrete(name='Regime',
labels=c('C', 'D')) +
scale_y_continuous('Activations') +
scale_fill_brewer(palette='Dark2', name='Background:',
breaks=c('E','F'),
labels=c('E','F')) +
theme(axis.text=element_text(size=11),
axis.title.x=element_text(size=13, vjust=-0.75),
axis.title.y=element_text(size=13, vjust=0.75),
legend.text=element_blank(),
legend.title=element_blank(),
legend.position='none',
plot.title=element_text(hjust=0.5))
p2 <- ggplot(data=plot2, aes(x=Factor2, y=Y, fill=factor(Factor3))) +
ggtitle('Type: B') +
coord_cartesian(ylim = c(-0.10, 0.30)) +
geom_bar(position=position_dodge(.9), width=0.5, stat='identity') +
scale_x_discrete(name='Regime',
labels=c('C', 'D')) +
scale_y_continuous('Activations') +
scale_fill_brewer(palette='Dark2', name='Background:',
breaks=c('E','F'),
labels=c('E','F')) +
theme(axis.text=element_text(size=11),
axis.title.x=element_text(size=13, vjust=-0.75),
axis.title.y=element_blank(),
legend.text=element_text(size=11),
legend.title=element_text(size=13),
plot.title=element_text(hjust=0.5))
pushViewport(viewport(
layout=grid.layout(1, 2, heights=unit(4, 'null'),
widths=unit(c(1,1.17), 'null'))))
print(p1, vp=viewport(layout.pos.row=1, layout.pos.col=1))
print(p2, vp=viewport(layout.pos.row=1, layout.pos.col=2))
And the figure looks like this:
However, I would need something like this:
Thick black lines are the reference values. They are constant and the Figure presents that "reference situation". However, in other plots that I need to produce bars will change but the reference values should remain the same to make the comparisons straightforward and easy. I know I should be using geom_segment() but those lines in my attempts to make this work are just missing the bars.
Any help/advice? Thanks!

I was able to do this using geom_errorbarh. For instance, with the second figure:
p1 +
geom_errorbarh(
aes(xmin = as.numeric(Factor2)-.2,xmax = as.numeric(Factor2)+.2), #+/-.2 for width
position = position_dodge(0.9), size = 2, height = 0
)
OUTPUT:
And, if I understand the other plots you describe, you can specify the reference data in those, eg data = plot1

If your references are not going to be changed, you can create a second dataset and merge it to the dataset you are going to plot.
Here, I first add plot1 and plot2. Then, I create a new dataset that will be the reference dataset.
library(dplyr)
new_df = rbind(plot1, plot2)
ref_plot = new_df
ref_plot <- ref_plot %>% rename(Ref_value = Y)
Then, now you have the new_df which is the dataset to be plot and ref_plot that contains references values for each conditions.
Instead of using grid and create two different plot that I will merge after, I preferred to use facet_wrap which put all plots on the same figure. It is much more convenient and don't require to write twice the same thing.
As mentioned by #AHart few minutes before me, you can use geom_errorbar to define your reference values on the plot. The difference is I prefere to use geom_errorbar instead of geom_errobarh.
Here is for the plot:
library(ggplot2)
new_df %>% left_join(ref_plot) %>%
ggplot(aes(x = Factor2, y = Y, fill = Factor3))+
geom_bar(stat = "identity", position = position_dodge())+
geom_errorbar(aes(ymin = Ref_value-0.00001, ymax = Ref_value+0.0001, group = Factor3), position = position_dodge(.9),width = 0.2)+
facet_wrap(.~Factor1, labeller = labeller(Factor1 = c(A = "Type A", B = "Type B"))) +
scale_x_discrete(name='Regime',
labels=c('C', 'D')) +
scale_fill_brewer(palette='Dark2', name='Background:',
breaks=c('E','F'),
labels=c('E','F')) +
theme(axis.text=element_text(size=11),
axis.title.x=element_text(size=13, vjust=-0.75),
axis.title.y=element_blank(),
legend.text=element_text(size=11),
legend.title=element_text(size=13),
plot.title=element_text(hjust=0.5))

Related

How can I get the real scale from a facet_grid plot in R?

I am trying to add captions as it appears in this post.
For that reason, I need the real scale of the plot (x and y axis) when I am using facet_grid. I know that I can use layer_data, since it saves everything from the plot... However, it is not really accurate, because when I try to establish the limits using min and max from that output, the plot changes.
Here you have an example:
library(ggplot2)
library(dplyr)
val1 <- c(2.1490626,2.2035281,1.5927854,3.1399245,2.3967338,3.7915825,4.6691277,3.0727319,2.9230937,2.6239759,3.7664386,4.0160378,1.2500835,4.7648343,0.0000000,5.6740227,2.7510256,3.0709322,2.7998003,4.0809085,2.5178086,5.9713330,2.7779843,3.6724801,4.2648527,3.6841084,2.5597235,3.8477471,2.6587736,2.2742209,4.5862788,6.1989269,4.1167091,3.1769325,4.2404515,5.3627032,4.1576810,4.3387921,1.4024381,0.0000000,4.3999099,3.4381837,4.8269218,2.6308474,5.3481382,4.9549753,4.5389650,1.3002293,2.8648220,2.4015338,2.0962332,2.6774765,3.0581759,2.5786137,5.0539080,3.8545796,4.3429043,4.2233248,2.0434363,4.5980727)
val2 <- c(3.7691229,3.6478055,0.5435826,1.9665861,3.0802654,1.2248374,1.7311236,2.2492826,2.2365337,1.5726119,2.0147144,2.3550348,1.9527204,3.3689502,1.7847986,3.5901329,1.6833872,3.4240479,1.8372175,0.0000000,2.5701453,3.6551315,4.0327091,3.8781182)
df1 <- data.frame(value = val1)
df2 <- data.frame(value = val2)
data <- bind_rows(lst(df1, df2), .id = 'id')
data$Sex <- rep(c("Male", "Female"), times=84/2)
p <- data %>%
ggplot(aes(value)) +
geom_density(lwd = 1.2, colour="red", show.legend = FALSE) +
geom_histogram(aes(y=..density.., fill = id), bins=10, col="black", alpha=0.2) +
facet_grid(id ~ Sex ) +
xlab("type_data") +
ylab("Density") +
ggtitle("title") +
guides(fill=guide_legend(title="legend_title")) +
theme(strip.text.y = element_blank())
p
plot_info <- layer_data(p)
> min(plot_info$density)
[1] 7.166349e-09
> max(plot_info$density)
[1] 0.5738021
As you can see in the plot, the y-axis starts at 0 and if finishes around 0.7 more less. However, the maximum density is 0.57.
If I try to use the info from layer_data:
p + coord_cartesian(clip="off", ylim=c(min(plot_info$density), max(plot_info$density)),
xlim = c(min(plot_info$x), max(plot_info$x)))
The plot changes completely.
Does anyone know how can I get the scales that ggplot2 and facet_grid are using? I need the information of the density (y_axis) and the info from the x_axis.
Yes, to get the scales directly, use layer_scales(p), which gives you the range of the axes rather than just the range of the data, which is what you get from layer_data(p)
p + coord_cartesian(clip = "off",
ylim = layer_scales(p)$y$range$range,
xlim = layer_scales(p)$x$range$range)
Or, to combine this question with your last, where you add the text labels outside of the plotting panels, your result might be something like:
p + coord_cartesian(clip = "off",
ylim = layer_scales(p)$y$range$range,
xlim = layer_scales(p)$x$range$range) +
geom_text(data = data.frame(value = c(0, 6), id = c("df2", "df2"),
Sex = c('Female', 'Male')),
aes(y = -0.15, label = c('Female', 'Male')))
Does this help?
?layer_data
summary(layer_data(p, i = 2))
i is the layer you want to return
Can min the xmin and max the xmax etc

How to add captions outside the plot on individual facets in ggplot2?

I am trying to add a caption in each facet (I am using facet_grid). I have seen these approach and this one: but nothing gives me what I need. Also, the first approach returns a warning message that I didn't find any solution:
Warning message:
Vectorized input to `element_text()` is not officially supported.
Results may be unexpected or may change in future versions of ggplot2.
My example:
library(ggplot2)
library(datasets)
mydf <- CO2
a <- ggplot(data = mydf, aes(x = conc)) + geom_histogram(bins = 15, alpha = 0.75) +
labs(y = "Frequency") + facet_grid(Type ~ Treatment)
a
caption_df <- data.frame(
cyl = c(4,6),
txt = c("1st=4", "2nd=6")
)
a + coord_cartesian(clip="off", ylim=c(0, 3)) +
geom_text(
data=caption_df, y=1, x=100,
mapping=aes(label=txt), hjust=0,
fontface="italic", color="red"
) +
theme(plot.margin = margin(b=25))
The idea is to have 1 caption per plot, but with this approach it repeats the caption and it is overwritten.
Is it possible to have something like this? (caption OUTSIDE the plot) (but without the previous warning)
a + labs(caption = c("nonchilled=4", "chilled=6")) + theme(plot.caption = element_text(hjust=c(0, 1)))
NOTE: This is only an example, but I may need to put long captions (sentences) for each plot.
Example:
a + labs(caption = c("This is my first caption that maybe it will be large. Color red, n= 123", "This is my second caption that maybe it will be large. Color blue, n= 22")) +
theme(plot.caption = element_text(hjust=c(1, 0)))
Does anyone know how to do it?
Thanks in advance
You need to add the same faceting variable to your additional caption data frame as are present in your main data frame to specify the facets in which each should be placed. If you want some facets unlabelled, simply have an empty string.
caption_df <- data.frame(
cyl = c(4, 6, 8, 10),
conc = c(0, 1000, 0, 1000),
Freq = -1,
txt = c("1st=4", "2nd=6", '', ''),
Type = rep(c('Quebec', 'Mississippi'), each = 2),
Treatment = rep(c('chilled', 'nonchilled'), 2)
)
a + coord_cartesian(clip="off", ylim=c(0, 3), xlim = c(0, 1000)) +
geom_text(data = caption_df, aes(y = Freq, label = txt)) +
theme(plot.margin = margin(b=25))

R : ggplot2 plot several data frames in one plot

I'm little bit stuck on ggplot2 trying to plot several data frame in one plot.
I have several data frame here I'll present just two exemples.
The data frame have the same Header but are different. Let say that I want to count balls that I have in 2 boxes.
name=c('red','blue','green','purple','white','black')
value1=c(2,3,4,2,6,8)
value2=c(1,5,7,3,4,2)
test1=data.frame("Color"=name,"Count"=value1)
test2=data.frame("Color"=name,"Count"=value2)
What I'm trying to do it's to make a bar plot of my count.
At the moment what I did it's :
(plot_test=ggplot(NULL, aes(x= Color, y=Count)) +
geom_bar(data=test1,stat = "identity",color='green')+
geom_bar(data=test2,stat = "identity",color='blue')
)
I want to have x=Color and y=Count, and barplot of test2 data frame next to test1. Here there are overlapping themselves. So I'll have same name twice in x but I want to plot the data frames in several color and got in legend the name.
For example "Green bar" = test1
"Blue bar" = test2
Thank you for your time and your help.
Best regards
You have two options here:
Either tweak the size and position of the bars
ggplot(NULL, aes(x= Color, y=Count)) +
geom_bar(data=test1, aes(color='test1'), stat = "identity",
width=.4, position=position_nudge(x = -0.2)) +
geom_bar(data=test2, aes(color='test2'), stat = "identity",
width=.4, position=position_nudge(x = 0.2))
or what I recommend is join the two data frames together and then plot
library(dplyr)
test1 %>%
full_join(test2, by = 'Color') %>%
data.table::melt(id.vars = 'Color') %>%
ggplot(aes(x= Color, y=value, fill = variable)) +
geom_bar(stat = "identity", position = 'dodge')
Try this:
name=c('red','blue','green','purple','white','black')
value1=c(2,3,4,2,6,8)
value2=c(1,5,7,3,4,2)
test1=data.frame("Color"=name,"Count"=value1)
test2=data.frame("Color"=name,"Count"=value2)
test1$var <- 'test1'
test2$var <- 'test2'
test_all <- rbind(test1,test2)
(plot_test=ggplot(data=test_all) +
geom_bar(aes(x=Color,y=Count,color=var),
stat = "identity", position=position_dodge(1))+
scale_color_manual(values = c('green', 'blue'))
)
This will do what you were trying to do:
balls <- data.frame(
count = c(c(2,3,4,2,6,8),c(1,5,7,3,4,2)),
colour = c(c('red','blue','green','purple','white','black'),c('red','blue','green','purple','white','black')),
box = c(rep("1", times = 6), rep("2", times = 6))
)
ggplot(balls, aes(x = colour, y = count, fill = box)) +
geom_col() +
scale_fill_manual(values = c("green","blue"))
This is better because it facilitates comparisons between the box counts:
ggplot(balls, aes(x = colour, y = count)) +
geom_col() +
facet_wrap(~ box, ncol = 1, labeller = as_labeller(c("1" = "Box #1", "2" = "Box #2")))

ggplot pie charts / bar graph, force a legend [duplicate]

I'm struggling with ggplot (I always do). There are a number of very similar questions about forcing ggplot to include zero value categories in legends - here and here (for example). BUT I (think I) have a slightly different requirement to which all my mucking about with scale_x_discrete and scale_fill_manual has not helped.
Requirement: As you can see; the right-hand plot has no data in the TM=5 category - so is missing. What I need is for that right plot to have category 5 shown on the axis but obviously with no points or box.
Current Plot Script:
#data
plotData <- data.frame("TM" = c(3,2,3,3,3,4,3,2,3,3,4,3,4,3,2,3,2,2,3,2,3,3,3,2,3,1,3,2,2,4,4,3,2,3,4,2,3),
"Score" = c(5,4,4,4,3,5,5,5,5,5,5,3,5,5,4,4,5,4,5,4,5,4,5,4,4,4,4,4,5,4,4,5,3,5,5,5,5))
#vars
xTitle <- bquote("T"["M"])
v.I <- plotData$TM
depVar <- plotData$Score
#plot
p <- ggplot(plotData, aes_string(x=v.I,y=depVar,color=v.I)) +
geom_point() +
geom_jitter(alpha=0.8, position = position_jitter(width = 0.2, height = 0.2)) +
geom_boxplot(width=0.75,alpha=0.5,aes_string(group=v.I)) +
theme_bw() +
labs(x=xTitle) +
labs(y=NULL) +
theme(legend.position='none',
axis.text=element_text(size=10, face="bold"),
axis.title=element_text(size=16))
Attempted Solutions:
drop=False to scales (suggested by #Jarretinha here) totally borks margins and x-axis labels
> plot + scale_x_discrete(drop=FALSE) + scale_fill_manual(drop=FALSE)
Following logic from here and manually setting the labels in scale_fill_manual does nothing and results in the same right-hand plot from example above.
> p + scale_fill_manual(values = c("red", "blue", "green", "purple", "pink"),
labels = c("Cat1", "Cat2", "Cat3", "Cat4", "Cat5"),
drop=FALSE)
Playing with this logic and trying something with scale_x_discrete results in a change to category names on x-axis but the fifth is still missing AND the margins (as attempt 1) are borked again. BUT apparent that scale_x_discrete is important and NOT the whole answer
> p + scale_x_discrete(limits = c("Cat1", "Cat2", "Cat3", "Cat4", "Cat5"), drop=FALSE)
ANSWER for above example courtesy of input from #Bouncyball & #aosmith
#data
plotData <- data.frame("TM" = c(3,2,3,3,3,4,3,2,3,3,4,3,4,3,2,3,2,2,3,2,3,3,3,2,3,1,3,2,2,4,4,3,2,3,4,2,3),
"Score" = c(5,4,4,4,3,5,5,5,5,5,5,3,5,5,4,4,5,4,5,4,5,4,5,4,4,4,4,4,5,4,4,5,3,5,5,5,5))
plotData$TM <- factor(plotData$TM, levels=1:5) # add correct (desired number of factors to input data)
#vars
xTitle <- bquote("T"["M"])
v.I <- plotData$TM
depVar <- plotData$Score
myPalette <- c('#5c9bd4','#a5a5a4','#4770b6','#275f92','#646464','#002060')
#plot
ggplot(plotData, aes_string(x=v.I,y=depVar,color=v.I)) +
geom_jitter(alpha=0.8, position = position_jitter(width = 0.2, height = 0.2)) +
geom_boxplot(width=0.75,alpha=0.5,aes_string(group=v.I)) +
scale_colour_manual(values = myPalette, drop=F) + # new line added here
scale_x_discrete(drop=F) + # new line added here
theme_bw() +
labs(x=xTitle) +
labs(y=NULL) +
theme(legend.position='none',
axis.text=element_text(size=10, face="bold"),
axis.title=element_text(size=16))
Here's a workaround you could use:
# generate dummy data
set.seed(123)
df1 <- data.frame(lets = sample(letters[1:4], 20, replace = T),
y = rnorm(20), stringsAsFactors = FALSE)
# define factor, including the missing category as a level
df1$lets <- factor(df1$lets, levels = letters[1:5])
# make plot
ggplot(df1, aes(x = lets, y = y))+
geom_boxplot(aes(fill = lets))+
geom_point(data = NULL, aes(x = 'e', y = 0), pch = NA)+
scale_fill_brewer(drop = F, palette = 'Set1')+
theme_bw()
Basically, we plot an "empty" point (i.e. pch = NA) so that the category shows up on the x-axis, but has no visible geom associated with it. We also define our discrete variable, lets as a factor with five levels when only four are present in the data.frame. The missing category is the letter e.
NB: You'll have to adjust the positioning of this "empty" point so that it doesn't skew your y axis.
Otherwise, you could use the result from this answer to avoid having to plot an "empty" point.
# generate dummy data
set.seed(123)
df1 <- data.frame(lets = sample(letters[1:4], 20, replace = T),
y = rnorm(20), stringsAsFactors = FALSE)
# define factor, including the missing category as a level
df1$lets <- factor(df1$lets, levels = letters[1:5])
# make plot
ggplot(df1, aes(x = lets, y = y)) +
geom_boxplot(aes(fill = lets)) +
scale_x_discrete(drop = F) +
scale_fill_brewer(drop = F, palette = 'Set1') +
theme_bw()

ggplot2 geom_tile diagonal line overlay

I'm looking for a way to produce a diagonal slash from the bottom left the to top right corner of a cell within a plot made using geom_tile.
The input is a melted data frame with two categorical factor columns, sample and gene. I'd like to use something like geom_segment, but I'm not able to specify fractional increments. Any ideas on the best way to accomplish this?
edit: Here is a reproducible example, I can't share one from my own data, as it's protected patient information.
df <- data_frame( gene = c('TP53','TP53','MTOR','BRACA1'),
sample = c('A','B','A','B'),
diagonal = c(FALSE,TRUE,TRUE,FALSE),
effect = c('missense', 'nonsense', 'missense', 'silent') )
ggplot(df, aes(sample, gene)) + geom_tile(aes(fill = effect))
what I'm looking for:
One way to do it:
library(ggplot2)
df <- data.frame(
x = rep(c(2, 5, 7, 9, 12), 2),
y = rep(c(1, 2), each = 5),
z = factor(1:10),
w = rep(diff(c(0, 4, 6, 8, 10, 14)), 2)
)
p <- ggplot(df, aes(x, y)) + geom_tile(aes(fill = z))
gb <- ggplot_build(p)
p + geom_segment(data=gb$data[[1]][1:2, ],
aes(x=xmin, xend=xmax, y=ymin, yend=ymax),
color="white")
In your example, could also rely on the indices of the factor levels like this:
library(ggplot2)
df <- data.frame( gene = c('TP53','TP53','MTOR','BRACA1'),
sample = c('A','B','A','B'),
diagonal = c(FALSE,TRUE,TRUE,FALSE),
effect = c('missense', 'nonsense', 'missense', 'silent') )
df$cross <- c(F,T,T,F)
ggplot(df, aes(sample, gene)) +
geom_tile(aes(fill = effect)) +
geom_segment(data=transform(subset(df, !!cross), sample=as.numeric(sample), gene=as.numeric(gene)),
aes(x=sample-.49, xend=sample+.49, y=gene-.49, yend=gene+.49),
color="white", size=2)
(Note that I used data.frame and not dplyr::data_frame, so that both columns become factors.)
If you want a legend:
ggplot(df, aes(sample, gene)) +
geom_tile(aes(fill = effect)) +
geom_segment(data=transform(subset(df, !!cross), sample=as.numeric(sample), gene=as.numeric(gene)),
aes(x=sample-.49, xend=sample+.49, y=gene-.49, yend=gene+.49, color=cross),
size=2) +
scale_color_manual(values=c("TRUE"="white", "FALSE"=NA))
You can use geom_abline. You can tweak intercept and slope to get what you want. More info and examples here.
ggplot(df, aes(sample, gene)) +
geom_tile(aes(fill = effect)) +
geom_abline(intercept = 1, slope = 1, color="white", size=2)
If you don't actually want specific lines, but just want to highlight, you can simply draw dots:
ggplot(df, aes(sample, gene)) + geom_tile(aes(fill = effect)) +
geom_point(aes(sample, gene))
You can make it look like a line: geom_point(aes(sample, gene), shape='/', size=10, color='white')
To have the lines be only on some tiles, simply pass only the rows with those coordinates to geom_point: geom_point(data=filter(df, diagonal), aes(sample, gene))
Alternatively, you can hack it with a manual shape scale: geom_point(aes(sample, gene, shape=diagonal)) + scale_shape_manual(values=c(' ', '/'))

Resources