label only subset of points in a ggplot where x was modified - r

I have this kind of table:
dt <- data.table(titles=c('B','C','A','C'),
labs =c('b','c','a','c'),
values=c( 3, 2, 3, 4))
In order to plot the points without collapsing and re-ordering, I had to do the following trick with ggplot(): instead of aes(x=titles, y=values) I use aes(x=seq_len(nrow(dt)), y=values):
ggplot(data = dt,
aes(x=seq_len(nrow(dt)), y=values)) +
geom_point() +
geom_text(aes(label=labs)) +
scale_x_discrete(labels=dt$titles) + xlab('titles')
Now I want to have labels not for all points but only for a subset of them (for example, where values>2). This call doesn't work:
ggplot(data = dt,
aes(x=seq_len(nrow(dt)), y=values)) +
geom_point() +
geom_text(data=subset(dt, values>2), aes(label=labs)) +
scale_x_discrete(labels=dt$titles) + xlab('titles')
# Error: Aesthetics must be either length 1 or the same as the data (2): label, x, y
How to call geom_text() in this case?

I think you are looking for the inherit.aes=F option:
dt2 <- subset(dt, values>2)
ggplot(data = dt,
aes(x=seq_len(nrow(dt)), y=values)) +
geom_point() +
geom_text(data=dt2, aes(x=values, y=values, label=labs), inherit.aes=F) +
scale_x_discrete(labels=dt$titles) + xlab('titles')

Related

Change axis breaks without defining sequence - ggplot

Is there any way to set the break step size in ggplot without defining a sequence. For example:
x <- 1:10
y <- 1:10
df <- data.frame(x, y)
# Plot with auto scale
ggplot(df, aes(x,y)) + geom_point()
# Plot with breaks defined by sequence
ggplot(df, aes(x,y)) + geom_point() +
scale_y_continuous(breaks = seq(0,10,1))
# Plot with automatic sequence for breaks
ggplot(df, aes(x,y)) + geom_point() +
scale_y_continuous(breaks = seq(min(df$y),max(df$y),1))
# Does this exist?
ggplot(df, aes(x,y)) + geom_point() +
scale_y_continuous(break_step = 1)
You may say I am being lazy but there have been a few occasions where I have had to change the min and max limits of my seq due to the addition of error bars. So I just want to say...use a break size of x, with automatic scale limits.
You can define your own function to pass to the breaks argument. An example that would work in your case would be
f <- function(y) seq(floor(min(y)), ceiling(max(y)))
Then
ggplot(df, aes(x,y)) + geom_point() + scale_y_continuous(breaks = f)
gives
You could modify this to pass the step of the breaks, e.g.
f <- function(k) {
step <- k
function(y) seq(floor(min(y)), ceiling(max(y)), by = step)
}
then
ggplot(df, aes(x,y)) + geom_point() + scale_y_continuous(breaks = f(2))
would create a y-axis with ticks at 2, 4, .., 10, etc.
You can take this even further by writing your own scale function
my_scale <- function(step = 1, ...) scale_y_continuous(breaks = f(step), ...)
and just call it like
ggplot(df, aes(x,y)) + geom_point() + my_scale()
> # Does this exist?
> ggplot(df, aes(x,y)) + geom_point() +
> scale_y_continuous(break_step = 1)
If you're looking for an off-the-shelf solution, then you can use the scales::breaks_width() function like so:
scale_y_continuous(breaks = scales::breaks_width(1))
The scales package also includes handy functions to control breaks easily in "special" scales such as date-time, e.g. scale_x_datetime(breaks='6 hours').

ggplot2 geom_tile diagonal line overlay

I'm looking for a way to produce a diagonal slash from the bottom left the to top right corner of a cell within a plot made using geom_tile.
The input is a melted data frame with two categorical factor columns, sample and gene. I'd like to use something like geom_segment, but I'm not able to specify fractional increments. Any ideas on the best way to accomplish this?
edit: Here is a reproducible example, I can't share one from my own data, as it's protected patient information.
df <- data_frame( gene = c('TP53','TP53','MTOR','BRACA1'),
sample = c('A','B','A','B'),
diagonal = c(FALSE,TRUE,TRUE,FALSE),
effect = c('missense', 'nonsense', 'missense', 'silent') )
ggplot(df, aes(sample, gene)) + geom_tile(aes(fill = effect))
what I'm looking for:
One way to do it:
library(ggplot2)
df <- data.frame(
x = rep(c(2, 5, 7, 9, 12), 2),
y = rep(c(1, 2), each = 5),
z = factor(1:10),
w = rep(diff(c(0, 4, 6, 8, 10, 14)), 2)
)
p <- ggplot(df, aes(x, y)) + geom_tile(aes(fill = z))
gb <- ggplot_build(p)
p + geom_segment(data=gb$data[[1]][1:2, ],
aes(x=xmin, xend=xmax, y=ymin, yend=ymax),
color="white")
In your example, could also rely on the indices of the factor levels like this:
library(ggplot2)
df <- data.frame( gene = c('TP53','TP53','MTOR','BRACA1'),
sample = c('A','B','A','B'),
diagonal = c(FALSE,TRUE,TRUE,FALSE),
effect = c('missense', 'nonsense', 'missense', 'silent') )
df$cross <- c(F,T,T,F)
ggplot(df, aes(sample, gene)) +
geom_tile(aes(fill = effect)) +
geom_segment(data=transform(subset(df, !!cross), sample=as.numeric(sample), gene=as.numeric(gene)),
aes(x=sample-.49, xend=sample+.49, y=gene-.49, yend=gene+.49),
color="white", size=2)
(Note that I used data.frame and not dplyr::data_frame, so that both columns become factors.)
If you want a legend:
ggplot(df, aes(sample, gene)) +
geom_tile(aes(fill = effect)) +
geom_segment(data=transform(subset(df, !!cross), sample=as.numeric(sample), gene=as.numeric(gene)),
aes(x=sample-.49, xend=sample+.49, y=gene-.49, yend=gene+.49, color=cross),
size=2) +
scale_color_manual(values=c("TRUE"="white", "FALSE"=NA))
You can use geom_abline. You can tweak intercept and slope to get what you want. More info and examples here.
ggplot(df, aes(sample, gene)) +
geom_tile(aes(fill = effect)) +
geom_abline(intercept = 1, slope = 1, color="white", size=2)
If you don't actually want specific lines, but just want to highlight, you can simply draw dots:
ggplot(df, aes(sample, gene)) + geom_tile(aes(fill = effect)) +
geom_point(aes(sample, gene))
You can make it look like a line: geom_point(aes(sample, gene), shape='/', size=10, color='white')
To have the lines be only on some tiles, simply pass only the rows with those coordinates to geom_point: geom_point(data=filter(df, diagonal), aes(sample, gene))
Alternatively, you can hack it with a manual shape scale: geom_point(aes(sample, gene, shape=diagonal)) + scale_shape_manual(values=c(' ', '/'))

R - How to overlay the average of a set of iid RVs

In the code below I build a 40x1000 data frame where in each column I have the cumulative means for successive random draws from an exponential distribution with parameter lambda = 0.2.
I add an additional column to host the specific number of the "draw".
I also calculate the rowmeans as df_means.
How do I add df_means (as a black line) on top of all my simulated RVs? I don't understand ggplot well enough to do this.
df <- data.frame(replicate(1000,cumsum(rexp(40,lambda))/(1:40)))
df$draw <- seq(1,40)
df_means <- rowMeans(df)
Molten <- melt(df, id.vars="draw")
ggplot(Molten, aes(x = draw, y = value, colour = variable)) + geom_line() + theme(legend.position = "none") + geom_line(df_means)
How would I add plot(df_means, type="l") to my ggplot, below?
Thank you,
You can make another data.frame with the means and ids and use that to draw the line,
df_means <- rowMeans(df)
means <- data.frame(id=1:40, mu=df_means)
ggplot(Molten, aes(x=draw, y=value, colour=variable)) +
geom_line() +
theme(legend.position = "none") +
geom_line(data=means, aes(x=id, y=mu), color="black")
As described here
stat_sum_df <- function(fun, geom="crossbar", ...) {
stat_summary(fun.data=fun, colour="red", geom=geom, width=0.2, ...)
}
k<-ggplot(Molten, aes(x = draw, y = value, colour = variable)) + geom_line() + theme(legend.position = "none")
k+stat_sum_single(mean) #gives you the required plot

Varying factor order in each facet of ggplot2

I am trying to create a Cleveland Dot Plot given for two categories in this case J and K. The problem is the elements A,B,C are in both categories so R keeps farting. I have made a simple example:
x <- c(LETTERS[1:10],LETTERS[1:3],LETTERS[11:17])
type <- c(rep("J",10),rep("K",10))
y <- rnorm(n=20,10,2)
data <- data.frame(x,y,type)
data
data$type <- as.factor(data$type)
nameorder <- data$x[order(data$type,data$y)]
data$x <- factor(data$x,levels=nameorder)
ggplot(data, aes(x=y, y=x)) +
geom_segment(aes(yend=x), xend=0, colour="grey50") +
geom_point(size=3, aes(colour=type)) +
scale_colour_brewer(palette="Set1", limits=c("J","K"), guide=FALSE) +
theme_bw() +
theme(panel.grid.major.y = element_blank()) +
facet_grid(type ~ ., scales="free_y", space="free_y")
Ideally, I would want a dot plot for both categories(J,K) individually with each factor(vector x) decreasing with respect to the y vector. What ends up happening is that both categories aren't going from biggest to smallest and are erratic at the end instead. Please help!
Unfortunately factors can only have one set of levels. The only way i've found to do this is actually to create two separate data.frames from your data and re-level the factor in each. For example
data <- data.frame(
x = c(LETTERS[1:10],LETTERS[1:3],LETTERS[11:17]),
y = rnorm(n=20,10,2),
type= c(rep("J",10),rep("K",10))
)
data$type <- as.factor(data$type)
J<-subset(data, type=="J")
J$x <- reorder(J$x, J$y, max)
K<-subset(data, type=="K")
K$x <- reorder(K$x, K$y, max)
Now we can plot them with
ggplot(mapping = aes(x=y, y=x, xend=0, yend=x)) +
geom_segment(data=J, colour="grey50") +
geom_point(data=J, size=3, aes(colour=type)) +
geom_segment(data=K, colour="grey50") +
geom_point(data=K, size=3, aes(colour=type)) +
theme_bw() +
theme(panel.grid.major.y = element_blank()) +
facet_grid(type ~ ., scales="free_y", space="free_y")
which results in

Grouping labels when x is a factor variable in ggplot2

I'm trying to replace the x-axis labels "A0" and "A1" by one "A" which can be placed in the middle of "A0" and "A1". It would be better if there is a method which works like the following question:
grouping of axis labels ggplot2
By that, I mean to redraw the x-axis only for each group, and leave a blank between groups.
Here is the code I'm working on:
y = 1*round(runif(20)*10,1)
x1 = c("A","B")
x2 = c(0,1)
x = expand.grid(x1,x2)
xy = cbind(x,y)
xy$z = paste(xy$Var1,xy$Var2,sep="")
p <- ggplot(xy, aes(x=factor(z), y=y,fill=factor(Var2)))
p + geom_boxplot() + geom_jitter(position=position_jitter(width=.2)) + theme_bw() + xlab("X") + ylab("Y") + scale_fill_discrete(name="Var2",breaks=c(0, 1),labels=c("T", "C"))
Try this. No need for the variable z, just use position="dodge":
p <- ggplot(xy, aes(x=factor(Var1), y=y,fill=factor(Var2)))
p + geom_boxplot(position="dodge") + geom_jitter(position=position_jitter(width=.2)) + theme_bw() + xlab("X") + ylab("Y") + scale_fill_discrete(name="Var2",breaks=c(0, 1),labels=c("T", "C"))

Resources