R running average for non-time data - r

This is the plot I'm having now.
It's generated from this code:
ggplot(data1, aes(x=POS,y=DIFF,colour=GT)) +
geom_point() +
facet_grid(~ CHROM,scales="free_x",space="free_x") +
theme(strip.text.x = element_text(size=40),
strip.background = element_rect(color='lightblue',fill='lightblue'),
legend.position="top",
legend.title = element_text(size=40,colour="lightblue"),
legend.text = element_text(size=40),
legend.key.size = unit(2.5, "cm")) +
guides(fill = guide_legend(title.position="top",
title = "Legend:GT='REF'+'ALT'"),
shape = guide_legend(override.aes=list(size=10))) +
scale_y_log10(breaks=trans_breaks("log10", function(x) 10^x, n=10)) +
scale_x_continuous(breaks = pretty_breaks(n=3)) +
geom_line(stat = "hline",
yintercept = "mean",
size = 1)
The last line, geom_line creates the mean line for each panel.
But now I want to have the more specific running average inside each panel.
i.e. If panel1('chr01') has x-axis range from 0 to 100,000,000, I would want to have the mean value for each 1,000,000 range.
mean1 = mean(x=0 to x=1,000,000)
mean2 = mean(x=1,000,001 to x=2,000,000)

One way to provide a running mean is with geom_smooth() using the loess local regression method. In order to demonstrate my proposed solution, I created a fake genomic dataset using R functions. You can adjust the span parameter of geom_smooth to make the running mean smoother (closer to 1.0) or rougher (closer to 1/number of data points).
# Create example data.
set.seed(27182)
y1 = rnorm(10000) +
c(rep(0, 1000), dnorm(seq(-2, 5, length.out=8000)) * 3, rep(0, 1000))
y2 = c(rnorm(2000), rnorm(1000, mean=1.5), rnorm(1000, mean=-1, sd=2),
rnorm(2000, sd=2))
y3 = rnorm(4000)
pos = c(sort(runif(10000, min=0, max=1e8)),
sort(runif(6000, min=0, max=6e7)),
sort(runif(4000, min=0, max=4e7)))
chr = rep(c("chr01", "chr02", "chr03"), c(10000, 6000, 4000))
data1 = data.frame(CHROM=chr, POS=pos, DIFF=c(y1, y2, y3))
# Plot.
p = ggplot(data1, aes(x=POS, y=DIFF)) +
geom_point(alpha=0.1, size=1.5) +
geom_smooth(colour="darkgoldenrod1", size=1.5, method="loess", degree=0,
span=0.1, se=FALSE) +
scale_x_continuous(breaks=seq(1e7, 3e8, 1e7),
labels=paste(seq(10, 300, 10)), expand=c(0, 0)) +
xlab("Position, Megabases") +
theme(axis.text.x=element_text(size=8)) +
facet_grid(. ~ CHROM, scales="free", space="free")
ggsave(filename="plot_1.png", plot=p, width=10, height=5, dpi=150)

Related

Generate two plots with same x axis breaks

I have two plots I want the x axes being broken by the same way.
This is the code for plot 1:
m <- read.csv('Finalfor1lowergreaterthan1.csv', header=T, row.names=1)
m <- m[m$SVM.Count >= 40,]
boxOdds = m$Odd
df <- data.frame(
yAxis = length(boxOdds):1,
boxnucleotide = m$Position,
boxCILow = m$lower,
boxCIHigh = m$upper,
Mutation = m$Resistance)
ticksy <- c(seq(0,0.3,by=.1), seq(0, 1, by =.5), seq(0, 20, by =5), seq(0, 150, by =50))
ticksx <- c(seq(0,300,by=25))
p <- ggplot(df,
aes(x = boxnucleotide, y = boxOdds, colour=Mutation, label=rownames(m)))
p1 <- p +
geom_errorbar(aes(ymax = boxCIHigh, ymin = boxCILow), size = .5, height = .01) +
geom_point(size = 1) +
theme_bw() +
theme(panel.grid.minor = element_blank()) +
scale_y_continuous(breaks=ticksy, labels = ticksy) +
scale_x_continuous(breaks=ticksx, labels = ticksx) +
coord_trans(y = "log10") +
ylab("Odds ratio (log scale)") +
scale_color_manual(values=c("#00BFC4","#F8766D","#619CFF")) +
xlab("Integrase nucleotide position") +
geom_text(size=2,hjust=0, vjust=0)
Then I have another plot:
m <- read.csv('Finalfor20lowergreaterthan1.csv', header=T, row.names=1)
#m <- m[m$SVM.Count >= 40, ]
boxOdds = m$Odd
df <- data.frame(
yAxis = length(boxOdds):1,
boxnucleotide = m$Position,
boxCILow = m$lower,
boxCIHigh = m$upper,
Mutation = m$Resistance)
ticksy <- c(seq(0,0.3,by=.1), seq(0, 1, by =.5), seq(0, 20, by =5), seq(0, 150, by =50))
ticksx <- c(seq(0,300,by=25))
p <- ggplot(df,
aes(x = boxnucleotide, y = boxOdds, colour=Mutation, label=rownames(m)))
p1 <- p +
geom_errorbar(aes(ymax = boxCIHigh, ymin = boxCILow), size = .5, height = .01) +
geom_point(size = 1) +
theme_bw() +
theme(panel.grid.minor = element_blank()) +
scale_y_continuous(breaks=ticksy, labels = ticksy) +
scale_x_continuous(breaks=ticksx, labels = ticksx) +
coord_trans(y = "log10") +
ylab("Odds ratio (log scale)") +
scale_color_manual(values=c("#00BFC4","#F8766D","#619CFF")) +
xlab("Integrase nucleotide position") +
geom_text(size=2,hjust=0, vjust=0)
Why is plot 1 starting from 75 on x axis and plot 2 starting at 100...how can plot2 start at 75 as well and being scaled like plot 1.
The two codes get the same piece of: ticksx <- c(seq(0, 300, by=25))
A good technique to align the axis range on different plots is to use expand_limits.
You can simply use p1 + expand_limits(x=c(0, 300)). This will ensure the x-axis contains at least 0 and 300 on all your plots. You can also control the y-axis range by using the y argument.
From ?expand_limits:
Sometimes you may want to ensure limits include a single value, for all panels or all plots. This function is a thin wrapper around geom_blank() that makes it easy to add such values.

Why wont it let me add a legend to my graph on ggplot2 in R? [duplicate]

This question already has answers here:
Construct a manual legend for a complicated plot
(2 answers)
ggplot legends when plot is built from two data frames
(1 answer)
Closed 4 years ago.
I would like to add a legend to this but am struggling to find out how. Can someone please help?
df1 <- data.frame(Rate=rnorm(10, 500, 100), Damage=rnorm(10, 50, 15))
x1 <- data.frame(R=rnorm(20, 550, 50), V=rnorm(20, 35, 10))
ggplot(df1,aes(x=Rate,y=Damage)) +
geom_smooth(method="auto",se=FALSE) +
geom_smooth(data=x1, mapping=aes(x=R, y=V),
method="auto",col="firebrick",se=FALSE) +
coord_cartesian(xlim=c(0,1000), ylim=c(0, 100)) +
ggtitle("", subtitle="PPS post-emergence") +
theme_bw() +
scale_y_continuous(breaks=seq(0, 100, 20),) +
xlab("Rate (mg/Ha)") +
ylab("")
The easiest way to do what you want is to merge your data. But you can also do a manual color mapping. I'll show you both below.
Without merging your data
You want to create a manual color scale. The trick is to pass the color in aes then add a scale_color_manual to map names to colors.
ggplot(df1,aes(x=Rate,y=Damage)) +
geom_smooth(aes(col = "val1"), method="auto",se=FALSE) +
geom_smooth(data=x1, mapping=aes(x=R, y=V, col="val2"),
method="auto",se=FALSE) +
coord_cartesian(xlim=c(0,1000), ylim=c(0, 100)) +
ggtitle("", subtitle="PPS post-emergence") +
theme_bw() +
scale_y_continuous(breaks=seq(0, 100, 20),) +
xlab("Rate (mg/Ha)") +
ylab("") +
scale_color_manual("My legend", values=c("val1" = "firebrick",
"val2" = "steelblue"))
Less lines by using labs
By the way, there is a simpler way to set the title (or subtitle) and axis labels with labs. You don't have to pass a title so you gain some vertical space and passing NULL (instead of "") as the y label actually removes it which gains some horizontal space.
Below, the picture is the same size but the graph occupies a larger part of it.
ggplot(df1,aes(x=Rate,y=Damage)) +
geom_smooth(aes(col = "val1"), method="auto",se=FALSE) +
geom_smooth(data=x1, mapping=aes(x=R, y=V, col="val2"),
method="auto",se=FALSE) +
coord_cartesian(xlim=c(0,1000), ylim=c(0, 100)) +
theme_bw() +
scale_y_continuous(breaks=seq(0, 100, 20),) +
labs(subtitle="PPS post-emergence",
x = "Rate (mg/Ha)",
y = NULL) +
scale_color_manual("My legend", values=c("val1" = "firebrick",
"val2" = "steelblue"))
Merging your data
The best way of doing it would actually be to merge your data while keeping track of the source, then use source as the color. Much cleaner but not always possible.
df <- bind_rows(
mutate(df1, source="df1"),
x1 %>% rename(Rate = R, Damage = V) %>%
mutate(source="x1")
)
ggplot(df, aes(x=Rate, y=Damage, col=source)) +
geom_smooth(method="auto", se=FALSE) +
coord_cartesian(xlim=c(0,1000), ylim=c(0, 100)) +
theme_bw() +
scale_y_continuous(breaks=seq(0, 100, 20),) +
labs(subtitle="PPS post-emergence",
x = "Rate (mg/Ha)",
y = NULL)
We could row bind then plot:
library(ggplot2)
library(dplyr)
set.seed(1)
df1 <- data.frame(Rate=rnorm(10, 500, 100), Damage=rnorm(10, 50, 15))
x1 <- data.frame(R=rnorm(20, 550, 50), V=rnorm(20, 35, 10))
plotDat <- rbind(df1 %>% transmute(x = Rate, y = Damage, grp = "df1"),
x1 %>% transmute(x = R, y = V, grp = "x1"))
ggplot(plotDat, aes(x, y, col = grp)) +
geom_smooth(se = FALSE)

Dealing with different sample sizes for facet histograms in ggplot2

I would like to make stacked (facet_grid) size histograms in ggplot2 by Year. The years have different sample sizes. I have not been able to get the ..density.. to produce correct proportions for each histogram bin. So, I've been using ..count../(sample size number). From my reading of the stat tranformations ..count.., you cannot perform an operation with an object (e.g. nrow(data)). How can I get these stacked histograms with different sample sizes? The format in the code below would produce a figure that matches other figures for a report, which is why I would like to stick with ggplot2, but maybe there are other packages. Here is an example:
d1 <- as.data.frame(round(rnorm(121, 86, 28), 0))
colnames(d1) <- "Length"
d1$Year <- "2015"
d2 <- as.data.frame(round(rnorm(86, 70, 32), 0))
colnames(d2) <- "Length"
d2$Year <- "2016"
D <- rbind(d1, d2)
ggplot(D, aes(x = Length)) +
geom_histogram(aes(y = ..count../nrow(D)),
breaks=seq(0, 160, by = 3),
col="black",
fill="grey48",
alpha = .8)+
labs(title = "Size by Year", x = "Length", y = "frequency") +
scale_x_continuous(breaks = scales::pretty_breaks(n = 10)) +
theme_bw() +
theme(text = element_text(size=16),
axis.text.y = element_text(size=12)) +
geom_vline(aes(xintercept = 95.25),
colour = "red", size = 1.3)+
facet_grid(Year ~ .)
This part ..count../nrow(D) won't work and needs the sample size for each year when I facet them facet_grid(Year ~ .)
Is this what you are looking for? You didn't specify what went wrong when you used ..density.., but it seems like you just need to scale by the binwidth. ..density.. scales so that the total bar area is 1, meaning that each bar has height ..count.. / (n * binwidth). You just want the height to be ..count.. / n, which is ..density.. * binwidth. So set the binwidth manually (you should do this anyway) and multiply by it.
set.seed(1234)
d1 <- as.data.frame(round(rnorm(121, 86, 28), 0))
colnames(d1) <- "Length"
d1$Year <- "2015"
d2 <- as.data.frame(round(rnorm(86, 70, 32), 0))
colnames(d2) <- "Length"
d2$Year <- "2016"
D <- rbind(d1, d2)
library(ggplot2)
ggplot(D, aes(x = Length)) +
geom_histogram(aes(y = ..density.. * 5), binwidth = 5) +
geom_vline(aes(xintercept = 95.25), colour = "red", size = 1.3) +
facet_grid(Year ~ .) +
labs(title = "Size by Year", x = "Length", y = "frequency") +
scale_x_continuous(breaks = scales::pretty_breaks(n = 10)) +
theme_bw() +
theme(
text = element_text(size = 16),
axis.text.y = element_text(size = 12)
)
Created on 2018-09-19 by the reprex package (v0.2.0).

how to set ticks axes text size using geom_boxplot in ggplot?

I´m trying with no success to increase the size of the status label (ticks labels yes/no) on the boxplot below.
What am I doing wrong?
Thanks in advance.
status <- sample(c("yes", "no"), 50, replace=T)
value <- rnorm(50, mean = 5, sd = 3)
data <- melt(data.frame(status, value),measure.vars="value")
p <- ggplot ( data, aes(x=status, y=value) ) +
geom_boxplot(outlier.shape=10, outlier.size=4) +
geom_jitter(aes(colour=value), size=3, position = position_jitter(width = .3)) +
coord_flip() +
scale_colour_gradient(limits=c(min(data$value), max(data$value)),low="#0000FF", high="#FF0000") +
labs(title="Title",x="status", y="value", colour="value")
p + theme(plot.title = element_text(size = rel(5)),
axis.title.x =element_text(size=10)
)
p
If I understand your question right, you need to use axis.text rather than axis.title.
library(reshape2)
library(ggplot2)
status <- sample(c("yes", "no"), 50, replace=T)
value <- rnorm(50, mean = 5, sd = 3)
data <- melt(data.frame(status, value),measure.vars="value")
ggplot(data, aes(x=status, y=value)) +
geom_boxplot(, outlier.shape=10, outlier.size=4) +
geom_jitter(aes(colour=value), size=3, position = position_jitter(width = .3)) +
coord_flip() +
scale_colour_gradient(limits=c(min(data$value), max(data$value)),low="#0000FF", high="#FF0000") +
labs(title="Title",x="status", y="value", colour="value")+
theme(plot.title = element_text(size = rel(5)),
axis.text.y =element_text(size=20))

Histogram with "negative" logarithmic scale in R

I have a dataset with some outliers, such as the following
x <- rnorm(1000,0,20)
x <- c(x, 500, -500)
If we plot this on a linear x axis scale at this we see
histogram(x)
I worked out a nice way to put it on a log scale using this useful thread:
how to use a log scale for y-axis of histogram in R? :
mat <- data.frame(x)
ggplot(ee, aes(x = xx)) + geom_histogram(colour="darkblue", size=1, fill="blue") + scale_x_log10()
However, I would like the x axis labels from this 2nd example to match that of the first example, except with a kind of "negative log" - i.e. first tick (moving from the centre to the left) could be -1, then the next could be -10, the next -100, but all equidistant. Does that make sense?
I am not sure I understand your goal, but when you want a log-like transformation yet have zeroes or negative values, the inverse hyperbolic sine transformation asinh() is often a good option. It is log-like for large values and is defined for all real values. See Rob Hyndman's blog and this question on stats.stackexchange.com for discussion, details, and other options.
If this is an acceptable approach, you can create a custom scale for ggplot. The code below demonstrates how to create and use a custom scale (with custom breaks), along with a visualization of the asinh() transformation.
library(ggplot2)
library(scales)
limits <- 100
step <- 0.005
demo <- data.frame(x=seq(from=-1*limits,to=limits,by=step))
asinh_trans <- function(){
trans_new(name = 'asinh', transform = function(x) asinh(x),
inverse = function(x) sinh(x))
}
ggplot(demo,aes(x,x))+geom_point(size=2)+
scale_y_continuous(trans = 'asinh',breaks=c(-100,-50,-10,-1,0,1,10,50,100))+
theme_bw()
ggplot(demo,aes(x,x))+geom_point(size=2)+
scale_x_continuous(trans = 'asinh',breaks=c(0,1,10,50,100))+
scale_y_log10(breaks=c(0,1,10,50,100))+ # zero won't plot
xlab("asinh() scale")+ylab("log10 scale")+
theme_bw()
Realizing that the question is fairly old, I decided to answer it anyway since I ran into exactly the same problem.
I see that some answers above misunderstood your original question. I think it is a valid visualization question and I outline below my solution that will hopefully be useful for others as well.
My approach was to use ggplot and create custom log transform for x and y axis (as well as custom break generators)
library(ggplot2)
library(scales)
# Create custom log-style x axis transformer (...,-10,-3,-1,0,1,3,10,...)
custom_log_x_trans <- function()
trans_new("custom_log_x",
transform = function (x) ( sign(x)*log(abs(x)+1) ),
inverse = function (y) ( sign(y)*( exp(abs(y))-1) ),
domain = c(-Inf,Inf))
# Custom log x breaker (...,-10,-3,-1,0,1,3,10,...)
custom_x_breaks <- function(x)
{
range <- max(abs(x), na.rm=TRUE)
return (sort( c(0,
sapply(0:log10(range), function(z) (10^z) ),
sapply(0:log10(range/3), function(z) (3*10^z) ),
sapply(0:log10(range), function(z) (-10^z) ),
sapply(0:log10(range/3), function(z) (-3*10^z) )
)))
}
# Create custom log-style y axis transformer (0,1,3,10,...)
custom_log_y_trans <- function()
trans_new("custom_log_y",
transform = function (x) ( log(abs(x)+1) ),
inverse = function (y) ( exp(abs(y))-1 ),
domain = c(0,Inf))
# Custom log y breaker (0,1,3,10,...)
custom_y_breaks <- function(x)
{
max_y <- length(x)
range <- max(abs(max_y), na.rm=TRUE)
return (sort( c(0,
sapply(0:log10(range), function(z) (10^z) ),
sapply(0:log10(range/3), function(z) (3*10^z) )
)))
}
ggplot(data=mat) +
geom_histogram(aes(x=x,fill=..count..),
binwidth = 1, color="black", size=0.1) +
scale_fill_gradient("Count", low = "steelblue", high = "red") +
coord_trans(x="custom_log_x",y="custom_log_y") +
scale_x_continuous(breaks = custom_x_breaks(mat$x)) +
scale_y_continuous(breaks = custom_y_breaks(mat$x)) +
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) +
theme_bw()
which gives me the following plot.
Note that:
the plot also includes coloring scheme to show visually the absolute value of each bar.
the bins become increasingly thinner as x increases (side effect of log-transform)
In either case, the two outliers are clearly visible.
I found a way to cheat on it. I say "cheat", because it actually plot negative and positive parts of the data separately. Thus you can not compare the negative and positive data. But only can show the distribution of negative and positive parts separately.
And one of the problem is if there is zero values in your data, it will not be shown in the plot.
reverselog_trans <- function(base = exp(1)) {
trans <- function(x) -log(x, base)
inv <- function(x) base^(-x)
trans_new(paste0("reverselog-", format(base)), trans, inv,
log_breaks(base = base),
domain = c(1e-100, Inf))
}
quartz();
dist1 <- ggplot(data=df.meltFUAC) +
geom_point(alpha=1,aes(x=deltaU.deltaUltrasensitivity,y=deltaF.deltaFitness,
colour=deltaF.w_c)) +
scale_x_continuous(name = expression(Delta * S[ult]),
limits=c(1e-7,1),trans = "log10",breaks=c(1e-01,1e-03,1e-05),
labels=c("1e-01","1e-03","1e-05")) +
scale_y_continuous(name = expression(paste(Delta, " Fitness")),trans = "log10",
limits = c(1e-7,1), breaks=c(1e-01,1e-03,1e-05),
labels=c("1e-01","1e-03","1e-05")) +
theme_bw() +
theme(legend.position = "none", axis.title.x=element_blank(),strip.background=element_blank(),
panel.border=element_rect(colour = "black"),panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),plot.background=element_blank(),
plot.margin=unit(c(0,0,0,-11),"mm"))
dist2 <- ggplot(data=df.meltFUAC, aes(x=-deltaU.deltaUltrasensitivity,y=deltaF.deltaFitness,
colour=deltaF.w_c)) +
geom_point(alpha=1) +
scale_x_continuous(name = expression(Delta * sqrt(S[ult] %.% S[amp])),limits=c(1,1e-7),
trans = reverselog_trans(10),breaks=c(1e-01,1e-03,1e-05),
labels=c("-1e-01","-1e-03","-1e-05")) +
scale_y_continuous(name = expression(paste(Delta, " Fitness")),trans = "log10",
limits = c(1e-7,1), breaks=c(1e-01,1e-03,1e-05),
labels=c("1e-01","1e-03","1e-05")) +
theme_bw() +
theme(legend.position = "none",strip.background=element_blank(),panel.border=element_rect(colour = "black"),
axis.text.y=element_blank(), axis.ticks.y=element_blank(), axis.title.y=element_blank(),
axis.line.y=element_line(colour="black",size=1,linetype="solid"),axis.title.x=element_blank(),
panel.grid.major=element_blank(),panel.grid.minor=element_blank(),plot.background=element_blank(),
plot.margin=unit(c(0,-8,0,2.5),"mm"))
hist0 <- ggplot(data=df.meltFUAC, aes(deltaF.deltaFitness,fill=deltaF.w_c)) +
#geom_histogram(alpha = 0.5, aes(y=..density..),position = 'identity') +
geom_density(alpha = 0.5, aes(colour=deltaF.w_c)) +
scale_x_continuous(name = expression(paste(Delta, " Fitness")),
limits=c(1e-7,1),trans = "log10",breaks=c(1e-01,1e-03,1e-05),
labels=c("1e-01","1e-03","1e-05")) +
scale_y_continuous(name = "Density", limits=c(0,0.6)) +
theme_bw() +
theme(legend.position = "none", axis.title.x=element_blank(),strip.background=element_blank(),
axis.text.y=element_blank(), axis.ticks.y=element_blank(), axis.title.y=element_blank(),
axis.text.x=element_blank(), axis.ticks.x=element_blank(), axis.title.x=element_blank(),
panel.border=element_rect(colour = "black"),panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),plot.background=element_blank(),
plot.margin=unit(c(0,5,2.5,-2.5),"mm")) +
coord_flip()
hist1 <- ggplot(data=df.meltFUAC, aes(deltaU.deltaUltrasensitivity,fill=deltaF.w_c)) +
#geom_histogram(alpha = 0.5, aes(y=..density..),position = 'identity') +
geom_density(alpha = 0.5, aes(colour=deltaF.w_c)) +
scale_x_continuous(name = expression(Delta * S[ult]),
limits=c(1e-7,1),trans = "log10",breaks=c(1e-01,1e-03,1e-05),
labels=c("1e-01","1e-03","1e-05")) +
scale_y_continuous(name = "Density", limits=c(0,0.6)) +
theme_bw() +
theme(legend.position = "none", axis.title.x=element_blank(),strip.background=element_blank(),
axis.text.y=element_blank(), axis.ticks.y=element_blank(), axis.title.y=element_blank(),
axis.text.x=element_blank(), axis.ticks.x=element_blank(), axis.title.x=element_blank(),
axis.line.x=element_line(colour="black",size=1,linetype="solid"),
panel.border=element_rect(colour = "black"),panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),plot.background=element_blank(),
plot.margin=unit(c(5,0,-2.5,2),"mm"))
hist2 <- ggplot(data=df.meltFUAC, aes(-deltaU.deltaUltrasensitivity,fill=deltaF.w_c)) +
#geom_histogram(alpha = 0.5, aes(y=..density..),position = 'identity') +
geom_density(alpha = 0.5, aes(colour=deltaF.w_c)) +
scale_x_continuous(name = expression(Delta * S[ult]),limits=c(1,1e-7),
trans = reverselog_trans(10),breaks=c(1e-01,1e-03,1e-05),
labels=c("-1e-01","-1e-03","-1e-05")) +
scale_y_continuous(name = "Density", limits=c(0,0.6)) +
theme_bw() +
theme(legend.position = "none",strip.background=element_blank(),panel.border=element_rect(colour = "black"),
axis.text.y=element_blank(), axis.ticks.y=element_blank(), axis.title.y=element_blank(),
axis.text.x=element_blank(), axis.ticks.x=element_blank(), axis.title.x=element_blank(),
axis.line.y=element_line(colour="black",size=1,linetype="solid"),
axis.line.x=element_line(colour="black",size=1,linetype="solid"),
panel.grid.major=element_blank(),panel.grid.minor=element_blank(),plot.background=element_blank(),
plot.margin=unit(c(5,-8,-2.5,2.5),"mm"))
grid.newpage();
pushViewport(viewport(layout = grid.layout(3, 3, widths = unit(c(4,4,2),"null"),
heights=unit(c(2,7.5,0.5),"null"))));
vplayout <- function(x, y) viewport(layout.pos.row = x, layout.pos.col = y);
print(dist2, vp = vplayout(2, 1));
print(dist1, vp = vplayout(2, 2));
print(hist2, vp = vplayout(1, 1));
print(hist1, vp = vplayout(1, 2));
print(hist0, vp = vplayout(2, 3));
grid.text(expression(Delta * Ultrasensitivity),vp = vplayout(3,1:2),x = unit(0.55, "npc"),
y = unit(0.9, "npc"),gp=gpar(fontsize=12, col="black"));
dev.copy2pdf(file=sprintf("%s/_dist/dist_hist_deltaF_deltaU_wc_01vs10.pdf", resultDir));
dev.off();
Here is the graph it got (but you need to manually to put the legend on):
Or a simpler one:
reverselog_trans <- function(base = exp(1)) {
trans <- function(x) -log(x, base)
inv <- function(x) base^(-x)
trans_new(paste0("reverselog-", format(base)), trans, inv,
log_breaks(base = base),
domain = c(1e-100, Inf))
}
quartz();
hist1 <- ggplot(deltaF, aes(deltaFitness,fill=w_c)) + guides(fill=guide_legend(title=expression(omega[c]))) + geom_histogram(alpha = 0.5, aes(y=..density..),position = 'identity') + geom_density(alpha = 0.05, aes(colour=w_c)) + scale_x_continuous(name = expression(paste(Delta, " Fitness")),trans = "log10");
hist1 <- hist1 + scale_y_continuous(name = "Density", limits=c(0,1));
#hist1 <- hist1 + theme(panel.background=element_blank(),panel.border=element_blank(),axis.line.x=element_blank(),axis.line.y=element_line(colour="black",linetype="solid",size=1),axis.title.x=element_blank(),panel.grid.major=element_blank(),panel.grid.minor=element_blank(),plot.background=element_blank(),plot.margin=unit(c(5,5,0,5),"mm"));
hist1 <- hist1 + theme_bw();
hist1 <- hist1 + theme(strip.background=element_blank(),panel.border=element_rect(colour = "black"),axis.title.x=element_blank(),panel.grid.major=element_blank(),panel.grid.minor=element_blank(),plot.background=element_blank(),plot.margin=unit(c(5,5,0,5),"mm"));
hist1 <- hist1 + scale_color_discrete(name=expression(omega[c]));# + geom_vline(xintercept=0, colour="grey", size = 1);# + geom_hline(yintercept=0, colour="grey", size = 0.5);
hist2 <- ggplot(deltaU, aes(deltaUltrasensitivity,fill=w_c)) + geom_histogram(alpha = 0.5, aes(y=..density..),position = 'identity') + geom_density(alpha = 0.05, aes(colour=w_c)) + scale_x_continuous(name = expression(paste(Delta, " Ultrasensitivity")), limits=c(1e-7,1),trans = "log10",breaks=c(1e-01,1e-03,1e-05),labels=c("1e-01","1e-03","1e-05"));
hist2 <- hist2 + scale_y_continuous(name = "Density",limits=c(0,1)) ;#+ geom_vline(xintercept=0, colour="grey", size = 1);# + geom_hline(yintercept=0, colour="grey", size = 0.5);
#hist2 <- hist2 + theme(legend.position = "none", axis.title.x=element_blank(),panel.background=element_blank(),panel.border=element_blank(),panel.grid.major=element_blank(),panel.grid.minor=element_blank(),plot.background=element_blank(),plot.margin=unit(c(0,5,0,-7.5),"mm"));
hist2 <- hist2 + theme_bw();
hist2 <- hist2 + theme(legend.position = "none", axis.title.x=element_blank(),strip.background=element_blank(),panel.border=element_rect(colour = "black"),panel.grid.major=element_blank(),panel.grid.minor=element_blank(),plot.background=element_blank(),plot.margin=unit(c(0,5,0,-7.5),"mm"));
# + ggtitle("Positive part")
hist3 <- ggplot(deltaU, aes(-deltaUltrasensitivity,fill=w_c)) + geom_histogram(alpha = 0.5, aes(y=..density..),position = 'identity') + geom_density(alpha = 0.05, aes(colour=w_c)) + scale_x_continuous(name = expression(paste(Delta, " Ultrasensitivity")), limits=c(1,1e-7),trans = reverselog_trans(10),breaks=c(1e-01,1e-03,1e-05),labels=c("-1e-01","-1e-03","-1e-05"));
hist3 <- hist3 + scale_y_continuous(name = "Density", limits=c(0,1));# + geom_hline(yintercept=0, colour="black", size = 0.5);
#hist3 <- hist3 + theme(legend.position = "none",panel.background=element_blank(),axis.text.y=element_blank(), axis.ticks.y=element_blank(), axis.title.y=element_blank(),axis.line.y=element_line(colour="black",size=1,linetype="solid"),axis.title.x=element_blank(),panel.grid.major=element_blank(),panel.grid.minor=element_blank(),plot.background=element_blank(),plot.margin=unit(c(0,-7.5,0,5),"mm"));
hist3 <- hist3 + theme_bw();
hist3 <- hist3 + theme(legend.position = "none",strip.background=element_blank(),panel.border=element_rect(colour = "black"),axis.text.y=element_blank(), axis.ticks.y=element_blank(), axis.title.y=element_blank(),axis.line.y=element_line(colour="black",size=1,linetype="solid"),axis.title.x=element_blank(),panel.grid.major=element_blank(),panel.grid.minor=element_blank(),plot.background=element_blank(),plot.margin=unit(c(0,-7.5,0,5),"mm"));
# + ggtitle("Negative part")
grid.newpage();
pushViewport(viewport(layout = grid.layout(4, 2, widths = unit(c(5,5),"null"),heights=unit(c(4.6,0.4,4.6,0.4),"null"))));
vplayout <- function(x, y) viewport(layout.pos.row = x, layout.pos.col = y);
print(hist1, vp = vplayout(1, 1:2)); # key is to define vplayout
grid.text(expression(paste(Delta, " Fitness")),vp = vplayout(2,1:2),x = unit(0.5, "npc"), y = unit(0.9, "npc"),gp=gpar(fontsize=12, col="black"));
print(hist3, vp = vplayout(3, 1));
print(hist2, vp = vplayout(3, 2));
grid.text(expression(paste(Delta, " Ultrasensitivity")),vp = vplayout(4,1:2),x = unit(0.5, "npc"), y = unit(0.9, "npc"),gp=gpar(fontsize=12, col="black"));
dev.copy2pdf(file=sprintf("%s/deltaF_deltaU_wc_01vs10.pdf", resultDir));
dev.off();
Here is the graph I got:
Why suffer with ggplot2 solution? Your first plot was done with lattice histogram function, and this is where you should stay. Just apply logarithmic transformation directly within histogram function, use nint argument to specify the number of histogram bins, and type argument to choose between "count", or "density". I think that you got everything you need there, but maybe I'm missing some crucial detail of your question...
library(lattice)
histogram(log10(x), nint=50, type="count")

Resources