add y=0 line in some plots facet_grid ggplot2 - r

I have a big plot, using facet_grid().
I want to add a vertical line to indicate y=0, but only in some of the plot.
Reproducible example -
df <- data.frame(x = 1:100, y = rnorm(100,sd=0.5), type = rep(c('A','B'), 50))
ggplot(df) + facet_grid(type~.) +
geom_point(data = df[df$type == 'A',], mapping = aes(x=x, y=y)) +
geom_rect(data = df[df$type == 'B',], mapping=aes(xmin=x,ymin=0,xmax=(x+2),ymax=y)) +
theme(panel.background=element_rect(fill="white"))
I want the line only in the top ptot for example.

Just create another data object for an hline geom and make sure to include the relevant faceted variable.
df <- data.frame(x = 1:100, y = rnorm(100,sd=0.5), type = rep(c('A','B'), 50))
ggplot(df) + facet_grid(type~.) +
geom_point(data = df[df$type == 'A',], mapping = aes(x=x, y=y)) +
geom_rect(data = df[df$type == 'B',], mapping=aes(xmin=x,ymin=0,xmax=(x+2),ymax=y)) +
geom_hline(data = data.frame(type="A", y=0), mapping=aes(yintercept=y)) +
theme(panel.background=element_rect(fill="white"))

Related

How to add legend of boxplot and points in ggplot2?

I have the following to plot a boxplot of some data "Samples" and add points of the "Baseline" and "Theoretical" data.
library(reshape2)
library(ggplot2)
meltshear <- melt(Shear)
samples <- rep(c("Samples"), each = 10)
baseline <- c("Baseline",samples)
method <- rep(baseline, 4)
xlab <- rep(c("EXT.Single","EXT.Multi","INT.Single","INT.Multi"), each = 11)
plotshear <- data.frame(Source = c(method,"theoretical","theoretical","theoretical"),
Shear = c(xlab,"EXT.Multi","INT.Single","INT.Multi"),
LLDF = c(meltshear[,2],0.825,0.720,0.884))
data <- subset(plotshear, Source %in% c("Samples"))
baseline <- subset(plotshear, Source %in% c("Baseline"))
theoretical <- subset(plotshear, Source %in% c("theoretical"))
ggplot(data = data, aes(x = Shear, y = LLDF)) + geom_boxplot(outlier.shape = NA) +
stat_summary(fun = mean, geom="point", shape=23, size=3) +
stat_boxplot(geom='errorbar', linetype=1, width=0.5) +
geom_jitter(data = baseline, colour = "green4") +
geom_jitter(data = theoretical, colour = "red")
I get the following plot but I cannot add the legend to the plot. I want to have the legend showing labels = c("Samples","Baseline","Theoretical") for the boxplot shape, green dot, and red dot respectively.
You could try to add fill into aes.
ggplot(data = data, aes(x = Shear, y = LLDF, fill = Shear))
Or you can see this resource, maybe it is useful http://www.cookbook-r.com/Graphs/

overlaying plots from different dataframes in ggplot without messing with legend

I want to overlay two plots: one is a simple point plot where a variable is used to control the dot size; and another is a simple curve.
Here is a dummy example for the first plot;
library(ggplot2)
x <- seq(from = 1, to = 10, by = 1)
df = data.frame(x=x, y=x^2, v=2*x)
ggplot(df, aes(x, y, size = v)) + geom_point() + theme_classic() + scale_size("blabla")
Now lets overlay a curve to this plot with data from another dataframe:
df2 = data.frame(x=x, y=x^2-x+2)
ggplot(df, aes(x, y, size = v)) + geom_point() + theme_classic() + scale_size("blabla") + geom_line(data=df2, aes(x, y), color = "blue") + scale_color_discrete(name = "other", labels = c("nanana"))
It produces the error:
Error in FUN(X[[i]], ...) : object 'v' not found
The value in v is not used to draw the intended curse, but anyway, I added a dummy v to df2.
df2 = data.frame(x=x, y=x^2-x+2, v=replicate(length(x),0)) # add a dummy v
ggplot(df, aes(x, y, size = v)) + geom_point() + theme_classic() + scale_size("blabla") + geom_line(data=df2, aes(x, y), color = "blue") + scale_color_discrete(name = "other", labels = c("nanana"))
An the result has a messed legend:
What is the right way to achieve the desired plot?
You can put the size aes in the geom_point() call to make it so that you don't need the dummy v in df2.
Not sure exactly what you want regarding the legend. If you replace the above, then the blue portion goes away. If you want to have a legend for the line color, then you have to place color inside the geom_line aes call.
x <- seq(from = 1, to = 10, by = 1)
df = data.frame(x=x, y=x^2, v=2*x)
df2 = data.frame(x=x, y=x^2-x+2)
ggplot(df, aes(x, y)) +
geom_point(aes(size = v)) +
theme_classic() +
scale_size("blabla") +
geom_line(data=df2, aes(x, y, color = "blue")) +
scale_color_manual(values = "blue", labels = "nanana", name = "other")

Make overlapping histogram in with geom_histogram

I am trying to make an overlapping histogram like this:
ggplot(histogram, aes = (x), mapping = aes(x = value)) +
geom_histogram(data = melt(tpm_18_L_SD), breaks = seq(1,10,by = 1),
aes(y = 100*(..count../sum(..count..))), alpha=0.2) +
geom_histogram(data = melt(tpm_18_S_SD), breaks = seq(1,10,by = 1),
aes(y = 100*(..count../sum(..count..))), alpha=0.2) +
geom_histogram(data = melt(tpm_18_N_SD), breaks = seq(1,10,by = 1),
aes(y = 100*(..count../sum(..count..))), alpha=0.2) +
facet_wrap(~variable, scales = 'free_x') +
ylim(0, 20) +
ylab("Percentage of Genes") +
xlab("Standard Deviation")
My code can only make them plot side by side and I would like to also make them overlap. Thank you! I based mine off of the original post where this came from but it did not work for me. It was originally 3 separate graphs which I combined with grid and ggarrange. It looks like this right now.
Here is the code of the three separate graphs.
SD_18_L <- ggplot(data = melt(tpm_18_L_SD), mapping = aes(x = value)) +
geom_histogram(aes(y = 100*(..count../sum(..count..))), breaks = seq(1, 10, by = 1)) +
facet_wrap(~variable, scales = 'free_x') +
ylim(0, 20) +
ylab("Percentage of Genes") +
xlab("Standard Deviation")
SD_18_S <- ggplot(data = melt(tpm_18_S_SD), mapping = aes(x = value)) +
geom_histogram(aes(y = 100*(..count../sum(..count..))), breaks = seq(1, 10, by = 1)) +
facet_wrap(~variable, scales = 'free_x') +
ylim(0, 20) +
ylab("Percentage of Genes") +
xlab("Standard Deviation")
SD_18_N <- ggplot(data = melt(tpm_18_N_SD), mapping = aes(x = value)) +
geom_histogram(aes(y = 100*(..count../sum(..count..))), breaks = seq(1, 10, by = 1)) +
facet_wrap(~variable, scales = 'free_x') +
ylim(0, 20) +
ylab("Percentage of Genes") +
xlab("Standard Deviation")
What my graphs look like now:
ggplot expects dataframes in a long format. I'm not sure what your data looks like, but you shouldn't have to call geom_histogram for each category. Instead, get all your data into a single dataframe (you can use rbind for this) in long format (what you're doing already with melt) first, then feed it into ggplot and map fill to whatever your categorical variable is.
Your call to facet_wrap is what puts them in 3 different plots. If you want them all on the same plot, take that line out.
An example using the iris data:
ggplot(iris, aes(x = Sepal.Length, fill = Species)) +
geom_histogram(alpha = 0.6, position = "identity")
I decreased alpha in geom_histogram so you can see where colors overlap, and added position = "identity" so observations aren't being stacked. Hope that helps!

Plot with multiple breaks of different sizes

I would like to create a plot with multiple breaks of different sized intervals on the y axis. The closest post I could find is this Show customised X-axis ticks in ggplot2 But it doesn't fully solve my problem.
# dummy data
require(ggplot2)
require(reshape2)
a<-rnorm(mean=15,sd=1.5, n=100)
b<-rnorm(mean=1500,sd=150, n=100)
df<-data.frame(a=a,b=b)
df$x <- factor(seq(100), ordered = T)
df.m <- melt(df)
ggplot(data = df.m, aes(x = x, y=value, colour=variable, group=variable)) +
geom_line() + scale_y_continuous(breaks = c(seq(from = 0, to = 20, by = 1),
seq(from = 1100, to = max(y), by = 100))) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
The problem is how to get the first set of breaks to be proportional to the second (thus visible).
Any pointer would be very much appreciated, thanks!
You can try something like this:
# Rearrange the factors in the data.frame
df.m$variable <- factor(df.m$variable, levels = c("b", "a"))
ggplot(data = df.m, aes(x = x, y=value, colour=variable, group=variable)) +
geom_line() + facet_grid(variable~., scales = "free")
Hope this helps

Adding lines to a qplot that don't cover whole axes

I want to add partial lines (dont expand whole plot) to a scatterplot. I have two separate plots working for this, but can't figure out how to join them together:
First plot -
p = qplot(x, y, data=data) + theme_bw() + theme(aspect.ratio=1)
lines = data.frame(x = c(-2,-2,5), y = c(0,2,2))
Second plot -
lines = data.frame(x = c(-2,-2,5), y = c(0,2,2))
base = ggplot(lines, aes(x, y))
base + geom_path(size = 1)
How to overlay them? Thanks.
Can you try:
p = qplot(x, y, data=data) + theme_bw() + theme(aspect.ratio=1)
lines = data.frame(x = c(-2,-2,5), y = c(0,2,2))
p+geom_line(data = lines, aes(x=x, y=y))
Taking an example dataset mydf:
Dept <- c("Res","Bankings","Customer","Legal","Collection","Business")
YScore <- c(1,2,3,1,2,3)
XScore <- c(2,1,3,1,2,4)
mydf<-data.frame(Dept,YScore,XScore)
p = qplot(XScore, YScore, data=mydf) + theme_bw() + theme(aspect.ratio=1)
lines = data.frame(x = c(-2,-2,5), y = c(0,2,2))
p+geom_line(data = lines, aes(x=x, y=y))

Resources