ggplot2 Barplot with broken Y axis - r

I am trying to make a ggplot2 Barplot with broken Y axis using the script provided HERE
My script
library(ggplot2)
dat <- read.csv("expression",header=T, check.names = FALSE)
dat
pd <- position_dodge(0.7) # move them .05 to the left and right
#Function to transform data to y positions
trans <- function(x){pmin(x,2) + 0.05*pmax(x-2,0)}
yticks <- c(0, 1, 2, 10,15,20,25,30)
#Transform the data onto the display scale
dat$mean_t <- trans(dat$value)
dat$sd_up_t <- trans(dat$value + dat$sd)
dat$sd_low_t <- pmax(trans(dat$value - dat$sd),1) #
png("test.png", units="in", family="Times", width=6, height=4, res=300) #pointsize is font size| increase image size to see the key
ggplot(data=dat, aes(x=gene, y=mean_t, group=treatment, fill=treatment)) +
geom_errorbar(aes(ymin=sd_low_t, ymax=sd_up_t),size=.5,
width=.2,
position=position_dodge(0.7)) +
geom_col(position="dodge") +
geom_rect(aes(xmin=0, xmax=4, ymin=2.2, ymax=2.3), fill="white") +
scale_y_continuous(limits=c(0,NA),expand = c(0,0), breaks=trans(yticks), labels=yticks) +
labs(y="Relative titer of CLas")+
theme_classic()
My data looks like this
gene,treatment,value,sd
tar1,EV,1,0.1
tar1,OX6,25.4,3
tar1,OX102,18.3,3
tar2,EV,1,0.1
tar2,OX6,1.6,0.2
tar2,OX102,1.5,0.2
tar3,EV,1,0.1
tar3,OX6,3.98,0.3
tar3,OX102,1.7,0.1
My plot
I have been running through 3 problems
Set maximum on the y-axis is not working.
My error bars are not properly placed.
My y-axis is not covered with white geom_rect
Thanks for your help.

Related

Issue with log_2 scaling using ggplot2 and log2_trans()

I am trying to plot data using ggplot2 in R.
The datapoints occur for each 2^i-th x-value (4, 8, 16, 32,...). For that reason, I want to scale my x-Axis by log_2 so that my datapoints are spread out evenly. Currently most of the datapoints are clustered on the left side, making my plot hard to read (see first image).
I used the following command to get this image:
ggplot(summary, aes(x=xData, y=yData, colour=groups)) +
geom_errorbar(aes(ymin=yData-se, ymax=yData+se), width=2000, position=pd) +
geom_line(position=pd) +
geom_point(size=3, position=pd)
However trying to scale my x-axis with log2_trans yields the second image, which is not what I expected and does not follow my data.
Code used:
ggplot(summary, aes(x=settings.numPoints, y=benchmark.costs.average, colour=solver.name)) +
geom_errorbar(aes(ymin=benchmark.costs.average-se, ymax=benchmark.costs.average+se), width=2000, position=pd) +
geom_line(position=pd) +
geom_point(size=3, position=pd) +
scale_x_continuous(trans = log2_trans(),
breaks = trans_breaks("log2", function(x) 2^x),
labels = trans_format("log2", math_format(2^.x)))
Using scale_x_continuous(trans = log2_trans()) only doesn't help either.
EDIT:
Attached the data for reproducing the results:
https://pastebin.com/N1W0z11x
EDIT 2:
I have used the function pd <- position_dodge(1000) to avoid overlapping of my error bars, which caused the problem.
Removing the position=pd statements solved the issue
Here is a way you could format your x-axis:
# Generate dummy data
x <- 2^seq(1, 10)
df <- data.frame(
x = c(x, x, x),
y = c(0.5*x, x, 1.5*x),
z = rep(letters[seq_len(3)], each = length(x))
)
The plot of this would look like this:
ggplot(df, aes(x, y, colour = z)) +
geom_point() +
geom_line()
Adjusting the x-axis would work like so:
ggplot(df, aes(x, y, colour = z)) +
geom_point() +
geom_line() +
scale_x_continuous(
trans = "log2",
labels = scales::math_format(2^.x, format = log2)
)
The labels argument is just so you have labels in the format 2^x, you could change that to whatever you like.
I have used the function pd <- position_dodge(1000) to avoid overlapping of my error bars, which caused the problem.
Adjusting the amount of position dodge and the with of the error bars according to the new scaling solved the problem.
pd <- position_dodge(0.2) # move them .2 to the left and right
ggplot(summary, aes(x=settings.numPoints, y=benchmark.costs.average, colour=algorithm)) +
geom_errorbar(aes(ymin=benchmark.costs.average-se, ymax=benchmark.costs.average+se), width=0.4, position=pd) +
geom_line(position=pd) +
geom_point(size=3, position=pd) +
scale_x_continuous(
trans = "log2",
labels = scales::math_format(2^.x, format = log2)
)
Adding scale_y_continuous(trans="log2") yields the results I was looking for:

X axis label is not showing in clustering dendrogram in ggplot

I have done a clustering dendrogram following a previous code I found online, but the x-axis of is not being shown in the graph. I would like to have the dissimilarity value shown in the x-axis, but I have not been successful.
females<-cervidae[cervidae$Sex=="female",]
dstf <- daisy(females[,9:14], metric = "euclidean", stand = FALSE)
hcaf <- hclust(dstf, method = "ave")
k <- 3
clustf <- cutree(hcaf,k=k) # k clusters
dendrf <- dendro_data(hcaf, type="rectangle") # convert for ggplot
clust.dff <- data.frame(label=rownames(females), cluster=factor(clustf),
females$Genus, females$Species)
dendrf[["labels"]] <- merge(dendrf[["labels"]],clust.dff, by="label")
rectf <- aggregate(x~cluster,label(dendrf),range)
rectf <- data.frame(rectf$cluster,rectf$x)
ymax <- mean(hcaf$height[length(hcaf$height)-((k-2):(k-1))])
fem=ggplot() +
geom_segment(data=segment(dendrf), aes(x=x, y=y, xend=xend, yend=yend)) +
geom_text(data=label(dendrf), aes(x, y, label= females.Genus, hjust=0,
color=females.Genus),
size=3) +
geom_rect(data=rectf, aes(xmin=X1-.3, xmax=X2+.3, ymin=0, ymax=ymax),
color="red", fill=NA)+
coord_flip() + scale_y_reverse(expand=c(0.2, 0)) +
theme_dendro() + scale_color_discrete(name="Genus") +
theme(legend.position="none")
Here is how my dendrogram looks:
Your code included theme_dendro(), which is described in its help file as:
Sets most of the ggplot options to blank, by returning blank theme
elements for the panel grid, panel background, axis title, axis text,
axis line and axis ticks.
You force the x-axis line / text / ticks to be visible in theme():
ggplot() +
geom_segment(data=segment(dendrf), aes(x=x, y=y, xend=xend, yend=yend)) +
geom_text(data=label(dendrf), aes(x, y, label= label, hjust=0,
color=cluster),
size=3) +
geom_rect(data=rectf, aes(xmin=X1-.3, xmax=X2+.3, ymin=0, ymax=ymax),
color="red", fill=NA)+
coord_flip() +
scale_y_reverse(expand=c(0.2, 0)) +
theme_dendro() +
scale_color_discrete(name="Cluster") +
theme(legend.position="none",
axis.text.x = element_text(), # show x-axis labels
axis.ticks.x = element_line(), # show x-axis tick marks
axis.line.x = element_line()) # show x-axis lines
(This demonstration uses a built-in dataset, since I'm not sure what's cervidae. Code used to create this is reproduced below:)
library(cluster); library(ggdendro); library(ggplot2)
hcaf <- hclust(dist(USArrests), "ave")
k <- 3
clustf <- cutree(hcaf,k=k) # k clusters
dendrf <- dendro_data(hcaf, type="rectangle") # convert for ggplot
clust.dff <- data.frame(label=rownames(USArrests),
cluster=factor(clustf))
dendrf[["labels"]] <- merge(dendrf[["labels"]],clust.dff, by="label")
rectf <- aggregate(x~cluster,label(dendrf),range)
rectf <- data.frame(rectf$cluster,rectf$x)
ymax <- mean(hcaf$height[length(hcaf$height)-((k-2):(k-1))])

How to fill the area under the lines in ggplot2 geom_freqpoly graph? [duplicate]

This question already has answers here:
What is the simplest method to fill the area under a geom_freqpoly line?
(4 answers)
Closed 6 years ago.
I am plotting a continuous variable in X-axis against the the corresponding counts (not the density) in the Y-axis using ggplot2.
This is my code
p <- ggplot(matched.frame, aes(x = AGE, color = as.factor(DRUG_KEY))) + geom_freqpoly(binwidth=5)
p1 <- p + theme_minimal()
plot(p1)
This produces a graph like this this:
I want the areas under these lines to be filled with colors and with little bit of transparency. I know to do this for density plots in ggplot2, but I am stuck with this frequency polygon.
Also, how do I change the legends on the right side? For example, I want 'Cases' instead of 26 and Controls instead of '27'. Instead of as.factor(DRUG_KEY), I want it to appear as 'Colors"
Sample data
matched.frame <- data.frame("AGE"=c(18,19,20,21,22,23,24,25,26,26,27,18,19,20,24,23,23,23,22,30,28,89,30,20,23))
matched.frame$DRUG_KEY <- 26
matched.frame$DRUG_KEY[11:25] <- 27
You can use geom_ribbon to fill the area under the curves and scale_fill_discrete (fill color) as well as scale_color_discrete (line color) to change the legend labels:
library(ggplot2)
set.seed(1)
df <- data.frame(x = 1:10, y = runif(20), f = gl(2, 10))
ggplot(df, aes(x=x, ymin=0, ymax=y, fill=f)) +
geom_ribbon(, alpha=.5) +
scale_fill_discrete(labels = c("1"="foo", "2"="bar"), name = "Labels")
With regards to your edit:
ggplot(matched.frame, aes(x=AGE, fill=as.factor(DRUG_KEY), color=as.factor(DRUG_KEY))) +
stat_bin(aes(ymax=..count..,), alpha=.5, ymin=0, geom="ribbon", binwidth =5, position="identity", pad=TRUE) +
geom_freqpoly(binwidth=5, size=2) +
scale_fill_discrete(labels = c("26"="foo", "27"="bar"), name = "Labels") +
scale_color_discrete(labels = c("26"="foo", "27"="bar"), name = "Labels")

How to get vertical lines in legend key using ggplot2 for geom_pointrange() type graphic

UPDATE: The question is moot. The vertical lines in the legend key are now default for geom_pointrange() in ggplot2.
For ggplot2 graphics that have a symbol for a point estimate and a vertical line representing a range about that estimate (95% confidence interval, Inter-quartile Range, Minimum and Maximum, etc) I cannot get the legend key to show the symbol with a vertical line. Since geom_pointrange() only has arguments for ymin and ymax, I would think the intended (default) functionality of geom_pointrange(show_guide=T) would be to have vertical lines (I say default because I understand that with coord_flip one could make horizontal lines in the plot). I also understand that having vertical lines in the legend key when the legend position is right or left will have the vertical lines "run together"...but for legends in the top or bottom having a vertical line through the symbol means that the key will match what appears in the plot.
Yet the approaches I've tried still put horizontal lines in the legend key:
## set up
library(ggplot2)
set.seed(123)
ru <- 2*runif(10) - 1
dt <- data.frame(x = 1:10,
y = rep(5,10)+ru,
ylo = rep(1,10)+ru,
yhi = rep(9,10)+ru,
s = rep(c("A","B"),each=5),
f = rep(c("facet1", "facet2"), each=5))
Default show_guide=T for geom_pointrange yields desired plot but has horizontal lines in legend key where vertical is desired (so as to match the plot):
ggplot(data=dt)+
geom_pointrange(aes(x = x,
y = y,
ymin = ylo,
ymax = yhi,
shape = s),
size=1.1,
show_guide=T)+
theme(legend.position="bottom")
An attempt with geom_point and geom_segment together yields desired plot but has horizontal lines in legend key where vertical is desired (so as to match the plot):
ggplot(data=dt)+
geom_point(aes( x = x,
y = y,
shape = s),
size=3,
show_guide=T)+
geom_segment(aes( x = x,
xend = x,
y = ylo,
yend = yhi),
show_guide=T)+
theme(legend.position="bottom")
An attempt with geom_point and geom_vline together yields desired legend key but does not respect the ymin and ymax values in the plot:
ggplot(data=dt)+
geom_point(aes(x=x, y=y, shape=s), show_guide=T, size=3)+
geom_vline(aes(xintercept=x, ymin=ylo, ymax=yhi ), show_guide=T)+
theme(legend.position="bottom")
How do I get the legend key of the 3rd graph but the plot of one of the first two?
My solution involves plotting a vertical line with geom_vline(show_guide=T) for an x-value that is out of the bounds of the displayed x-axis along with plotting geom_segment(show_guide=F):
ggplot(data=dt)+
geom_point(aes(x=x, y=y, shape=s), show_guide=T, size=3)+
geom_segment(aes(x=x, xend=x, y=ylo, yend=yhi), show_guide=F)+
geom_vline(xintercept=-1, show_guide=T)+
theme(legend.position="bottom")+
coord_cartesian(xlim=c(0.5,10.5))
The solution with coord_cartesian() for a numeric x axis is fine but facet_grid(scales='free_x') can be problematic:
# problem: coord_cartesian with numeric x and facetting with scales=free_x
ggplot(data=dt)+
geom_point(aes(x=x, y=y, shape=s), show_guide=T, size=3)+
geom_segment(aes(x=x, xend=x, y=ylo, yend=yhi), show_guide=F)+
geom_vline(xintercept=-1, show_guide=T)+
theme(legend.position="bottom")+
coord_cartesian(xlim=c(0.5,10.5))+
facet_grid(.~f, scales="free_x")
So in that situation, another solution that might not apply in every situation, but change x values to some meaningful character of factor and then adjust the xlim:
## hack solution: adjust xlim after change x to factor or character
## (carefully -- double check conversion):
dt$x <- factor(dt$x)
ggplot(data=dt)+
geom_point(aes(x=x, y=y, shape=s), show_guide=T, size=3)+
geom_segment(aes(x=x, xend=x, y=ylo, yend=yhi), show_guide=F)+
geom_vline(xintercept=-1, show_guide=T)+
theme(legend.position="bottom")+
coord_cartesian(xlim=c(0.5,5.5))+
facet_grid(.~f, scales="free_x")
If you don't mind having to use grid to draw the plot, you can manipulate the guide grobs directly:
library(grid)
library(gtable)
library(ggplot2)
set.seed(123)
ru <- 2*runif(10) - 1
dt <- data.frame(x = 1:10,
y = rep(5,10)+ru,
ylo = rep(1,10)+ru,
yhi = rep(9,10)+ru,
s = rep(c("A","B"),each=5),
f = rep(c("facet1", "facet2"), each=5))
ggplot(data=dt)+
geom_pointrange(aes(x = x,
y = y,
ymin = ylo,
ymax = yhi,
shape = s),
size=1.1,
show_guide=T)+
theme(legend.position="bottom") -> gg
gb <- ggplot_build(gg)
gt <- ggplot_gtable(gb)
seg <- grep("segments", names(gt$grobs[[8]]$grobs[[1]]$grobs[[4]]$children))
gt$grobs[[8]]$grobs[[1]]$grobs[[4]]$children[[seg]]$x0 <- unit(0.5, "npc")
gt$grobs[[8]]$grobs[[1]]$grobs[[4]]$children[[seg]]$x1 <- unit(0.5, "npc")
gt$grobs[[8]]$grobs[[1]]$grobs[[4]]$children[[seg]]$y0 <- unit(0.1, "npc")
gt$grobs[[8]]$grobs[[1]]$grobs[[4]]$children[[seg]]$y1 <- unit(0.9, "npc")
seg <- grep("segments", names(gt$grobs[[8]]$grobs[[1]]$grobs[[6]]$children))
gt$grobs[[8]]$grobs[[1]]$grobs[[6]]$children[[seg]]$x0 <- unit(0.5, "npc")
gt$grobs[[8]]$grobs[[1]]$grobs[[6]]$children[[seg]]$x1 <- unit(0.5, "npc")
gt$grobs[[8]]$grobs[[1]]$grobs[[6]]$children[[seg]]$y0 <- unit(0.1, "npc")
gt$grobs[[8]]$grobs[[1]]$grobs[[6]]$children[[seg]]$y1 <- unit(0.9, "npc")
grid.newpage()
grid.draw(gt)

How to use scale from previous plot in current plot with ggplot2?

I am using ggplot2 to produce a plot that has 3 facets. Because I am comparing two different data sets, I would like to then be able to plot a second data set using the same y scale for the facets as in the first plot. However, I cannot find a simple way to save the settings of the first plot to then re-use them with the second plot. Since each facet has its own y scale, it will be a pain to specify them by hand for the second plot. Does anyone know of a quick way of re-using scales? To make this concrete, here is how I am generating first my plot:
p <- ggplot(mtcars, aes(mpg, wt)) + geom_point()
p + facet_wrap(~ cyl, scales = "free_y")
EDIT
When applying one of the suggestions below, I found out that my problem was more specific than described in the original post, and it had to do specifically with scaling of the error bars. Concretely, the error bars look weird when I rescale the second plot as suggested. Does anyone have any suggestions on how to keep the same scale for both plots and dtill display the error bars correctly? I am attaching example below for concreteness:
#Create sample data
d1 <- data.frame(fixtype=c('ff','ff','fp','fp'), detype=c('det','pro','det','pro'),
diffscore=c(-1,-15,3,-17),se=c(2,3,1,2))
d2 <- data.frame(fixtype=c('ff','ff','fp','fp'), detype=c('det','pro','det','pro'),
diffscore=c(-1,-3,-2,-1),se=c(4,3,5,3))
#Plot for data frame 1, this is the scale I want to keep
lim_d1 <- aes(ymax = diffscore + se, ymin=diffscore - se)
ggplot(d1, aes(colour=detype, y=diffscore, x=detype)) +
geom_point(aes(size=1), shape=15) +
geom_errorbar(lim_d1, width=0.2,size=1) +
facet_wrap(~fixtype, nrow=2, ncol=2, scales = "free_y")
#Plot for data frame 2 original scale
lim_d2 <- aes(ymax = diffscore + se, ymin=diffscore - se)
ggplot(d2, aes(colour=detype, y=diffscore, x=detype)) +
geom_point(aes(size=1), shape=15) +
geom_errorbar(lim_d2, width=0.2,size=1) +
facet_wrap(~fixtype, nrow=2, ncol=2, scales = "free_y")
#Plot for data frame 2 adjusted scale. This is where things go wrong!
#As suggested below, first I plot the first plot, then I draw a blank screen and try
#to plot the second data frame on top.
lim_d2 <- aes(ymax = diffscore + se, ymin=diffscore - se)
ggplot(d1, aes(colour=detype, y=diffscore, x=detype)) +
geom_blank() +
geom_point(data=d2, aes(size=1), shape=15) +
geom_errorbar(lim_d2, width=0.2,size=1) +
facet_wrap(~fixtype, nrow=2, ncol=2, scales = "free_y")
#If the error bars are fixed, by adding data=d2 to geom_errorbar(), then
#the error bars are displayed correctly but the scale gets distorted again
lim_d2 <- aes(ymax = diffscore + se, ymin=diffscore - se)
ggplot(d1, aes(colour=detype, y=diffscore, x=detype)) +
geom_blank() +
geom_point(data=d2, aes(size=1), shape=15) +
geom_errorbar(data=d2,lim_d2, width=0.2,size=1) +
facet_wrap(~fixtype, nrow=2, ncol=2, scales = "free_y")
You may first call ggplot on your original data where you add a geom_blank as a first layer. This sets up a plot area, with axes and legends based on the data provided in ggplot.
Then add geoms which use data other than the original data. In the example, I use a simple subset of the original data.
From ?geom_blank: "The blank geom draws nothing, but can be a useful way of ensuring common scales between different plots.".
ggplot(data = mtcars, aes(mpg, wt)) +
geom_blank() +
geom_point(data = subset(mtcars, wt < 3)) +
facet_wrap(~ cyl, scales = "free_y")
Here is an ugly hack that assumes you have an identical facetting layout in both plots.
It replaces the panel element of the ggplot build.
p <- ggplot(mtcars, aes(mpg, wt)) + geom_point()
p1 <- p + facet_wrap(~ cyl, scales = "free_y") + labs(title = 'original')
# create "other" data.frame
n <- nrow(mtcars)
set.seed(201405)
mtcars2 <- mtcars[sample(seq_len(n ),n-15),]
# create this second plot
p2 <- p1 %+% mtcars2 + labs(title = 'new data')
# and a copy so we can attempt to fix
p3 <- p2 + labs(title = 'new data original scale')
# use ggplot_build to construct the plots for rendering
p1b <- ggplot_build(p1)
p3b <- ggplot_build(p3)
# replace the 'panel' information in plot 2 with that
# from plot 1
p3b[['panel']] <- p1b[['panel']]
# render the revised plot
# for comparison
library(gridExtra)
grid.arrange(p1 , p2, ggplot_gtable(p3b))

Resources