Is it possible to avoid axis label overlapping by ggrepel? - r

I am drawing heatmap with ggplot2. Several ticks on y axis need to be labeled. However,some of them are too close and overlap. I know ggrepel could separate text labels, but currently I have not worked out for my problem.
My code is as following. Any suggestion is welcome. Thanks.
Code:
df <- data.frame()
for (i in 1:50){
tmp_df <- data.frame(cell=paste0("cell", i),
gene=paste0("gene", 1:100), exp = rnorm(100), ident = i %% 5)
df<-rbind(df, tmp_df)
}
labelRow=rep("", 100)
for (i in c(2, 5, 7, 11, 19, 23)){
labelRow[i] <- paste0("gene", i)
}
library(ggplot2)
heatmap <- ggplot(data = df, mapping = aes(x = cell, y = gene, fill = exp)) +
geom_tile() +
scale_fill_gradient2(name = "Expression") +
scale_y_discrete(position = "right", labels = labelRow) +
facet_grid(facets = ~ident,
drop = TRUE,
space = "free",
scales = "free", switch = "x") +
scale_x_discrete(expand = c(0, 0), drop = TRUE) +
theme(axis.line = element_blank(),
axis.ticks = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_text(),
axis.title.x = element_blank(),
axis.text.x = element_blank(),
strip.text.x = element_text(angle = -90))
heatmap

For these kinds of problems, I prefer to draw the axis as a separate plot and then combine. It takes a bit of fiddling but allows you to draw pretty much any axis you want.
In my solution, I'm using the functions get_legend(), align_plots(), and plot_grid() from the cowplot package. Disclaimer: I'm the package author.
library(ggplot2)
library(cowplot); theme_set(theme_gray()) # undo cowplot theme setting
library(ggrepel)
df<-data.frame()
for (i in 1:50){
tmp_df <- data.frame(cell=paste0("cell", i),
gene=paste0("gene", 1:100), exp=rnorm(100), ident=i%%5)
df<-rbind(df, tmp_df)
}
labelRow <- rep("", 100)
genes <- c(2, 5, 7, 11, 19, 23)
labelRow[genes] <- paste0("gene ", genes)
# make the heatmap plot
heatmap <- ggplot(data = df, mapping = aes(x = cell,y = gene, fill = exp)) +
geom_tile() +
scale_fill_gradient2(name = "Expression") +
scale_x_discrete(expand = c(0, 0), drop = TRUE) +
facet_grid(facets = ~ident,
drop = TRUE,
space = "free",
scales = "free", switch = "x") +
theme(axis.line = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
strip.text.x = element_text(angle = -90),
legend.justification = "left",
plot.margin = margin(5.5, 0, 5.5, 5.5, "pt"))
# make the axis plot
axis <- ggplot(data.frame(y = 1:100,
gene = labelRow),
aes(x = 0, y = y, label = gene)) +
geom_text_repel(min.segment.length = grid::unit(0, "pt"),
color = "grey30", ## ggplot2 theme_grey() axis text
size = 0.8*11/.pt ## ggplot2 theme_grey() axis text
) +
scale_x_continuous(limits = c(0, 1), expand = c(0, 0),
breaks = NULL, labels = NULL, name = NULL) +
scale_y_continuous(limits = c(0.5, 100.5), expand = c(0, 0),
breaks = NULL, labels = NULL, name = NULL) +
theme(panel.background = element_blank(),
plot.margin = margin(0, 0, 0, 0, "pt"))
# align and combine
aligned <- align_plots(heatmap + theme(legend.position = "none"), axis, align = "h", axis = "tb")
aligned <- append(aligned, list(get_legend(heatmap)))
plot_grid(plotlist = aligned, nrow = 1, rel_widths = c(5, .5, .7))

Related

How to remove unwanted horizontal lines added to raster image by ggsave?

I can't solve a problem I found when plotting and saving an image from raster with the ggsave() function in R.
When I plot it, it works well. When I use ggsave() to export it, horizontal gray lines are added to the plot.
I want to remove them but I don't know how to do it.
That's an example image with the options and the code I used:
gg.opzioni = list(geom_tile(aes(x, y, fill = values)),
scale_fill_gradientn(n.breaks = 3, colours = c("#52647A", "#2C413C", "#646859"), guide = "legend", na.value = "white"),
theme(plot.title = element_text(size = 14, face = "bold", hjust = 0.5),
axis.title.x = element_text(size = 12), axis.title.y = element_text(size = 12),
plot.margin = unit(c(2, 2, 2, 2), "mm"), panel.background = element_blank(),
panel.border = element_rect(colour = "black", fill = NA, size = 1),
axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.text.y = element_blank(),
axis.ticks.y = element_blank(), panel.grid.minor = element_blank(), panel.grid.major = element_blank(),
panel.grid.major.x = element_blank(), panel.grid.major.y = element_blank(),
panel.grid.minor.x = element_blank(), panel.grid.minor.y = element_blank(), aspect.ratio = 11/10),
scale_x_continuous(limits = c(0, 1), expand = c(0, 0), breaks = seq(0, 1, 0.1), labels = seq(0, 10, 1)),
scale_y_continuous(limits = c(0, 1), expand = c(0, 0), breaks = seq(0, 1, 0.1), labels = seq(0, 10, 1)),
coord_fixed())
r.sam = ggplot(df) + gg.opzioni + labs(title = "Campione ricostruito", x = "", y = "", fill = "classe:")
ggsave(filename = "lapalma_sam.png", plot = r.sam, device = "png", path = "/Users/Francesco/Downloads/")
I tried to remove the possible grid with the panel.grid options, but it didn't work.
Originally three variables are included in the df object: two of coordinates and one with the pixel class.
library(tidyverse)
df <- tibble(
val = rep(sin(seq(0, 4*pi, length = 100)), 100),
x = rep(1:100, 100),
y = rep(1:100, each = 100)
)
The following replicates your problem, where horizontal lines are visible around each cell:
plot.tiles <- ggplot(data = df, aes(x = x, y = y, fill = val)) +
geom_tile()
ggsave('plot_tile.png', plot.tiles)
This arises because geom_tile() has a border color property. One solution is to make the "color" aesthetic match the "fill" aesthetic:
plot.border <- ggplot(data = df, aes(x = x, y = y, fill = val, color = val)) +
geom_tile()
ggsave('plot_border.png', plot.border)
Or you can use geom_raster(), which does not have a cell border, but functions similarly to geom_tile():
plot.raster <- ggplot(data = df, aes(x = x, y = y, fill = val)) +
geom_raster()
ggsave('plot_raster.png', plot.raster)

How to separate aesthetics of two different geom_lines?

I'm trying to plot a liine on the x axis which is basically a bunch of zeros and ones. Ones are green and zeros are red. When I try to do that, the color_scale_gradient of the ggplot basically colors on top of the line.
It looks like this
Where the line should be colored as follows:
colorbar is a vector of zeros and ones.
p <- ggplot(data1,aes(newx,newy, group = 1, colour=newy))+
geom_line(size=1.5, show.legend = FALSE)+
scale_colour_gradient(low="red2", high="green3") +
geom_line(data = colorFrame, aes(as.numeric(x)-5,as.numeric(ys), color = colorbar),size=3, show.legend = FALSE)+
xlim(0,1300)
p <- p +
theme(panel.background = element_blank(), axis.ticks.x = element_blank(),
axis.text.x = element_blank(), axis.line.y = element_line(colour = 'black'),
axis.ticks.y.left = element_line(colour = 'black')) +
scale_y_continuous(breaks = seq(0, 12, 1), limits = c(-1, 12), expand = c(0,0))
One solution would be to create two subplots and stitch them together. I use cowplot and theme_void here, but really the second plot below could look however you want it to.
p1 <- ggplot(df, aes(x,y, group = 1, colour=y)) +
geom_line(size=1.5, show.legend = FALSE) +
scale_colour_gradient(low="red2", high="green3") +
theme(panel.background = element_blank(),
axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
axis.line.y = element_line(colour = 'black'),
axis.ticks.y.left = element_line(colour = 'black')) +
scale_y_continuous(breaks = seq(0, 12, 1), limits = c(-1, 12), expand = c(0,0)) +
labs(x = NULL)
p2 <- ggplot(df, aes(x, y = 0, colour=z)) +
geom_line(size=1.5, show.legend = FALSE) +
scale_colour_gradient(low="red2", high="green3") +
theme_void()
cowplot::plot_grid(p1, p2,
ncol = 1,
rel_heights = c(1, .05),
align = 'v')
Data
df <- data.frame(x = 1:50,
y = runif(50, 0, 12),
z = sample(c(0,1), 50, replace = TRUE))

Right align horizontal y axis titles for multiple plots using R ggplot2

I'm having trouble right aligning horizontal y axis titles for multiple plots in R ggplot2. I have a main plot which is a dendrogram with leaf labels created using the ggdendro package, and I have multiple color bars below the main plot with titles to the left. If I use grid.arrange to place the plots on the same page, I'm able to get good vertical spacing between the plots, but I'm not able to right-align the y axis titles for the color bars consistently. If I use plot_grid, I can right-align the y axis titles consistently, but I'm having trouble getting appropriate vertical spacing between plots. Any help would be appreciated!
Update: Two suggested solutions work equally well so I'm accepting the first one as the answer. Using ggarrange from the egg package and using plot_grid with align = "v" instead of align = "hv" both fixed my problem.
Create main plot and color bars:
require(ggplot2)
require(gridExtra)
require(cowplot)
require(ggdendro)
hc = hclust(dist(USArrests), "ave")
df = data.frame(cluster = cutree(hc, 6),
states = factor(hc$labels, levels = hc$labels[hc$order]))
p1_dendro = dendro_data(hc)
p1 = ggdendrogram(hc) +
coord_cartesian(xlim = c(-1, nrow(df) + 1), ylim = c( -1, max(p1_dendro$segments$y)), expand = F)
p2 = ggplot(df, aes(states, y = 1, fill = factor(cluster))) +
ylab("y label") +
geom_tile() + theme_minimal() +
coord_cartesian(xlim = c(-1, nrow(df) + 1), expand = F) +
theme(axis.title.x = element_blank(),
axis.title.y = element_text(angle = 0, vjust = 0.5, hjust = 1),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "none",
line = element_blank())
p3 = ggplot(df, aes(states, y = 1, fill = factor(cluster))) +
ylab("a longer y label") +
geom_tile() + theme_minimal() +
coord_cartesian(xlim = c(-1, nrow(df) + 1), expand = F) +
theme(axis.title.x = element_blank(),
axis.title.y = element_text(angle = 0, vjust = 0.5, hjust = 1),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "none",
line = element_blank())
grid.arrange approach:
gp1 = ggplotGrob(p1)
gp2 = ggplotGrob(p2)
gp3 = ggplotGrob(p3)
maxWidth = grid::unit.pmax(gp1$widths[2:5], gp2$widths[2:5], gp3$widths[2:5])
gp1$widths[2:5] = as.list(maxWidth)
gp2$widths[2:5] = as.list(maxWidth)
gp3$widths[2:5] = as.list(maxWidth)
grid.arrange(gp1, gp2, gp3, ncol = 1, heights = c(8,1,1))
plot_grid approach:
plot_grid(p1, p2, p3, ncol = 1, align = "hv", axis = "tblr", rel_heights = c(8,1,1))
egg package will get the job done
require(ggplot2)
require(ggdendro)
hc = hclust(dist(USArrests), "ave")
df = data.frame(cluster = cutree(hc, 6),
states = factor(hc$labels, levels = hc$labels[hc$order]))
p1_dendro = dendro_data(hc)
p1 = ggdendrogram(hc) +
coord_cartesian(xlim = c(-1, nrow(df) + 1), ylim = c( -1, max(p1_dendro$segments$y)), expand = F)
p2 = ggplot(df, aes(states, y = 1, fill = factor(cluster))) +
ylab("y label") +
geom_tile() + theme_minimal() +
coord_cartesian(xlim = c(-1, nrow(df) + 1), expand = F) +
theme(axis.title.x = element_blank(),
axis.title.y = element_text(angle = 0, vjust = 0.5, hjust = 1),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "none",
line = element_blank())
p3 = ggplot(df, aes(states, y = 1, fill = factor(cluster))) +
ylab("a longer y label") +
geom_tile() + theme_minimal() +
coord_cartesian(xlim = c(-1, nrow(df) + 1), expand = F) +
theme(axis.title.x = element_blank(),
axis.title.y = element_text(angle = 0, vjust = 0.5, hjust = 1),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "none",
line = element_blank())
Stack p1, p2 and p3 together using ggarrange()
# install.packages("egg", dependencies = TRUE)
library(egg)
ggarrange(p1, p2, p3,
ncol = 1,
heights = c(8, 1, 1))
Created on 2020-08-06 by the reprex package (v0.3.0)

ggplot2 Create shaded area with gradient below curve

I would like to create the plot below using ggplot.
Does anyone know of any geom that create the shaded region below the line chart?
Thank you
I think you're just looking for geom_area. However, I thought it might be a useful exercise to see how close we can get to the graph you are trying to produce, using only ggplot:
Pretty close. Here's the code that produced it:
Data
library(ggplot2)
library(lubridate)
# Data points estimated from the plot in the question:
points <- data.frame(x = seq(as.Date("2019-10-01"), length.out = 7, by = "month"),
y = c(2, 2.5, 3.8, 5.4, 6, 8.5, 6.2))
# Interpolate the measured points with a spline to produce a nice curve:
spline_df <- as.data.frame(spline(points$x, points$y, n = 200, method = "nat"))
spline_df$x <- as.Date(spline_df$x, origin = as.Date("1970-01-01"))
spline_df <- spline_df[2:199, ]
# A data frame to produce a gradient effect over the filled area:
grad_df <- data.frame(yintercept = seq(0, 8, length.out = 200),
alpha = seq(0.3, 0, length.out = 200))
Labelling functions
# Turns dates into a format matching the question's x axis
xlabeller <- function(d) paste(toupper(month.abb[month(d)]), year(d), sep = "\n")
# Format the numbers as per the y axis on the OP's graph
ylabeller <- function(d) ifelse(nchar(d) == 1 & d != 0, paste0("0", d), d)
Plot
ggplot(points, aes(x, y)) +
geom_area(data = spline_df, fill = "#80C020", alpha = 0.35) +
geom_hline(data = grad_df, aes(yintercept = yintercept, alpha = alpha),
size = 2.5, colour = "white") +
geom_line(data = spline_df, colour = "#80C020", size = 1.2) +
geom_point(shape = 16, size = 4.5, colour = "#80C020") +
geom_point(shape = 16, size = 2.5, colour = "white") +
geom_hline(aes(yintercept = 2), alpha = 0.02) +
theme_bw() +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank(),
panel.border = element_blank(),
axis.line.x = element_line(),
text = element_text(size = 15),
plot.margin = margin(unit(c(20, 20, 20, 20), "pt")),
axis.ticks = element_blank(),
axis.text.y = element_text(margin = margin(0,15,0,0, unit = "pt"))) +
scale_alpha_identity() + labs(x="",y="") +
scale_y_continuous(limits = c(0, 10), breaks = 0:5 * 2, expand = c(0, 0),
labels = ylabeller) +
scale_x_date(breaks = "months", expand = c(0.02, 0), labels = xlabeller)

Prevent geom_points and their corresponding labels from overlapping

Thanks for the suggested duplicate, this is however not only about the labels, but is also about adjusting the points themselves so they do not overlap.
have a quick look at the plot below...
I need the coloured points, and their corresponding labels, to never overlap. They should be clustered together and all visible, perhaps with some indication that they are spaced and not 100% accurate, perhaps some sort of call out? Open to suggestions on that.
I've tried adding position = 'jitter' to both geom_point and geom_text, but that doesn't seem to be working (assume it is only for small overlaps?)
Ideas?
# TEST DATA
srvc_data <- data.frame(
Key = 1:20,
X = sample(40:80, 20, replace = T),
Y = sample(30:65, 20, replace = T)
)
srvc_data$Z <- with(srvc_data,abs(X-Y))
t1<-theme(
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.line = element_line(size=.4)
)
main_plot <- ggplot(srvc_data, aes(x = X, y = Y),xlim=c(0,100), ylim=c(0,100)) +
t1 +
theme_bw() +
labs(x="X", y="Y") +
scale_x_continuous(limits = c(0, 100)) +
scale_y_continuous(limits = c(0, 100)) +
geom_abline(intercept = 0, slope = 1, colour="blue", size=34, alpha=.1)+
geom_abline(intercept = 0, slope = 1, colour="black", size=.2, alpha=.5,linetype="dashed")+
geom_point(size = 7, aes(color = Z), alpha=.7) +
scale_color_gradient("Gap %\n",low="green", high="red")+
coord_fixed()+
geom_text(aes(label=Key,size=6),show_guide = FALSE)
main_plot
Produces this plot (of course with your random data it will vary)
Thanks in advance.
Here's your plot with ggrepel geom_text_repel:
library(ggrepel)
# TEST DATA
set.seed(42)
srvc_data <- data.frame(
Key = 1:20,
X = sample(40:80, 20, replace = T),
Y = sample(30:65, 20, replace = T)
)
srvc_data$Z <- with(srvc_data,abs(X-Y))
t1<-theme(
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.line = element_line(size=.4)
)
ggplot(srvc_data, aes(x = X, y = Y),xlim=c(0,100), ylim=c(0,100)) +
t1 +
theme_bw() +
labs(x="X", y="Y") +
scale_x_continuous(limits = c(0, 100)) +
scale_y_continuous(limits = c(0, 100)) +
geom_abline(intercept = 0, slope = 1, colour="blue", size=34, alpha=.1)+
geom_abline(intercept = 0, slope = 1, colour="black", size=.2, alpha=.5,linetype="dashed")+
geom_point(size = 7, aes(color = Z), alpha=.7) +
scale_color_gradient("Gap %\n",low="green", high="red")+
coord_fixed()+
geom_text_repel(aes(label=Key,size=6),show_guide = FALSE)

Resources