Add small arrows instead of axis to UMAP - r

More often I see in publications that instead of printing the UMAP axis in scRNAseq experiments (or even t-SNE or PCA) they just add two small arrows in the bottom left corner.
Something like this:
I really like the aesthetics of it but I donĀ“t know how to replicate this in R. I guess this is normally done separately with some image editor but it can probably be done with ggplot2 package to make it more reproducible.
So far I only got the arrows in the axis:
x <- data.frame(UMAP1=rnorm(300),UMAP2=rnorm(300))
ggplot(x, aes(UMAP1,UMAP2)) + geom_point() + theme_minimal() +
theme(axis.line = element_line(arrow = arrow(type='closed',
length = unit(10,'pt'))))
But I don't know how to make them smaller and with the title underneath. Does anyone have any suggestions on how to do this?

In code below, adjust unit(3, "cm") and hjust = 0 to taste.
Disclaimer: I wrote ggh4x.
library(ggplot2)
axis <- ggh4x::guide_axis_truncated(
trunc_lower = unit(0, "npc"),
trunc_upper = unit(3, "cm")
)
x <- data.frame(UMAP1=rnorm(300),UMAP2=rnorm(300))
ggplot(x, aes(UMAP1, UMAP2)) +
geom_point() +
guides(x = axis, y = axis) +
theme(axis.line = element_line(arrow = arrow()),
axis.title = element_text(hjust = 0))
Created on 2022-12-07 by the reprex package (v2.0.1)
Optionally, add the code below if you want to get rid of the ticks and labels (which don't make any sense in terms of UMAP/tSNE anyway)
scale_x_continuous(breaks = NULL) +
scale_y_continuous(breaks = NULL)

I'd suggest faking it with an annotation:
library(dplyr); library(umap); library(ggplot2)
arr <- list(x = -10, y = -15, x_len = 5, y_len = 5)
ggplot(storms_umap_extract, aes(x,y, color = category, alpha = wind)) +
geom_point() +
annotate("segment",
x = arr$x, xend = arr$x + c(arr$x_len, 0),
y = arr$y, yend = arr$y + c(0, arr$y_len),
arrow = arrow(type = "closed", length = unit(10, 'pt'))) +
theme_void()
Here's the umap data:
storms_umap <- storms |>
select(lat,long, wind, pressure) |>
umap() # this took about a minute to run
storms_umap_extract <- tibble(
x = storms_umap$layout[,1],
y = storms_umap$layout[,2],
wind = storms_umap$data[,3],
category = storms$category
)

Related

Creating exact squares with panel.grid.major in ggplot2

i have the following issue right now;
I want to create plots with ggplot2 where the elements panel.grid.major.x and panel.grid.major.y form squares within the plot.
My solution so far includes defining the amount of major lines from the x- and y-axis of the plot as well as the option aspect.ratio in the theme options. Following code is a MWE, my actual code right now contains more options:
library(ggplot2)
#remotes::install_github("allisonhorst/palmerpenguins")
library(palmerpenguins)
equal_breaks2 <- function(n = 3, s = 0.05, ...){
function(x){
# rescaling
d <- s * diff(range(x)) / (1+2*s)
seq(min(x)+d, max(x)-d, length=n)
}
}
# This functions comes from a great answer here
# https://stackoverflow.com/questions/28436855/change-the-number-of-breaks-using-facet-grid-in-ggplot2
n_x <- 5
n_y <- 3
ggplot(palmerpenguins::penguins, aes(x = bill_depth_mm, y= bill_length_mm)) +
geom_point(aes(colour = species, shape = sex)) +
scale_color_viridis_d() +
scale_x_continuous(breaks = equal_breaks2(n = n_x, s = 0.00), expand = c(0,0)) +
scale_y_continuous(breaks = equal_breaks2(n = n_y, s = 0.00), expand = c(0,0)) +
theme(aspect.ratio = n_y/n_x,
panel.grid.minor = element_blank()) +
coord_fixed()
This plot unfortunately does not produce exact squares from the grid lines. One has to manually adjust the aspect ratio (in this example n_y=3.7 looks pretty good).
Does anyone have an idea how to solve this, without having to adjust values manually?
Edit: I forgot to mention this in my initial request; Ideally my plot limits are the min and max value of my breaks, so i also have squares at the borders of the plot.
To get a nice scale, I used scales::pretty_breaks.
Let d_x and d_y be the step size between breaks calculated by the scale function.
Let range_x and range_y be the x and y range of the data to plot.
To get squares, aspect.ratio should be :
d_x * range_y / ( d_y * range_x)
Try :
library(ggplot2)
library(scales)
data <- palmerpenguins::penguins
scale_x <- scales::pretty_breaks(n = 5)(data$bill_depth_mm)
scale_y <- scales::pretty_breaks(n = 3)(data$bill_length_mm)
d_x <- diff(scale_x)[1]
d_y <- diff(scale_y)[1]
range_x <- diff(range(scale_x))
range_y <- diff(range(scale_y))
ggplot(data, aes(x = bill_depth_mm, y= bill_length_mm)) +
geom_point(aes(colour = species, shape = sex)) +
scale_color_viridis_d() +
scale_x_continuous(breaks = scale_x, expand = c(0,0)) +
scale_y_continuous(breaks = scale_y, expand = c(0,0)) +
theme(aspect.ratio = d_x * range_y / ( d_y * range_x),
panel.grid.minor = element_blank()) +
coord_fixed(xlim=range(scale_x),ylim=range(scale_y))
So, with the great help of #Waldi, i came up with an automatic solution. Its totally viable to do all the calculation beforehand, but i wanted an automatic solution, within the ggplot-chain.
I created my own coord-ggproto object, which can calculate the aspect ratio from the internals in ggplot (According to the Formula of #Waldi).
CoordOwn <- ggproto("CoordOwn", CoordCartesian,
is_free = function() FALSE,
aspect = function(self, ranges) {
d_x = diff(ranges$x.major_source)[1]
d_y = diff(ranges$y.major_source)[1]
(d_x * diff(ranges$y.range)) / (d_y * diff(ranges$x.range))
}
)
coord_own <- function(ratio = 1, xlim = NULL, ylim = NULL, expand = TRUE, clip = "on") {
ggproto(NULL, CoordOwn,
limits = list(x = xlim, y = ylim),
ratio = ratio,
expand = expand,
clip = clip
)
}
Now i can change n_x and n_y however i want them to, and coord_own fixes the aspect ratio accordingly:
n_x <- 5
n_y <- 5
ggplot(palmerpenguins::penguins, aes(x = bill_depth_mm, y= bill_length_mm)) +
geom_point(aes(colour = species, shape = sex)) +
scale_color_viridis_d() +
scale_x_continuous(breaks = equal_breaks2(n = n_x, s = 0.00), expand = c(0,0)) +
scale_y_continuous(breaks = equal_breaks2(n = n_y, s = 0.00), expand = c(0,0)) +
theme(panel.grid.minor = element_blank()) +
coord_own()

R Windrose percent label on figure

I am using the windrose function posted here: Wind rose with ggplot (R)?
I need to have the percents on the figure showing on the individual lines (rather than on the left side), but so far I have not been able to figure out how. (see figure below for depiction of goal)
Here is the code that makes the figure:
p.windrose <- ggplot(data = data,
aes(x = dir.binned,y = (..count..)/sum(..count..),
fill = spd.binned)) +
geom_bar()+
scale_y_continuous(breaks = ybreaks.prct,labels=percent)+
ylab("")+
scale_x_discrete(drop = FALSE,
labels = waiver()) +
xlab("")+
coord_polar(start = -((dirres/2)/360) * 2*pi) +
scale_fill_manual(name = "Wind Speed (m/s)",
values = spd.colors,
drop = FALSE)+
theme_bw(base_size = 12, base_family = "Helvetica")
I marked up the figure I have so far with what I am trying to do! It'd be neat if the labels either auto-picked the location with the least wind in that direction, or if it had a tag for the placement so that it could be changed.
I tried using geom_text, but I get an error saying that "aesthetics must be valid data columns".
Thanks for your help!
One of the things you could do is to make an extra data.frame that you use for the labels. Since the data isn't available from your question, I'll illustrate with mock data below:
library(ggplot2)
# Mock data
df <- data.frame(
x = 1:360,
y = runif(360, 0, 0.20)
)
labels <- data.frame(
x = 90,
y = scales::extended_breaks()(range(df$y))
)
ggplot(data = df,
aes(x = as.factor(x), y = y)) +
geom_point() +
geom_text(data = labels,
aes(label = scales::percent(y, 1))) +
scale_x_discrete(breaks = seq(0, 1, length.out = 9) * 360) +
coord_polar() +
theme(axis.ticks.y = element_blank(), # Disables default y-axis
axis.text.y = element_blank())
#teunbrand answer got me very close! I wanted to add the code I used to get everything just right in case anyone in the future has a similar problem.
# Create the labels:
x_location <- pi # x location of the labels
# Get the percentage
T_data <- data %>%
dplyr::group_by(dir.binned) %>%
dplyr::summarise(count= n()) %>%
dplyr::mutate(y = count/sum(count))
labels <- data.frame(x = x_location,
y = scales::extended_breaks()(range(T_data$y)))
# Create figure
p.windrose <- ggplot() +
geom_bar(data = data,
aes(x = dir.binned, y = (..count..)/sum(..count..),
fill = spd.binned))+
geom_text(data = labels,
aes(x=x, y=y, label = scales::percent(y, 1))) +
scale_y_continuous(breaks = waiver(),labels=NULL)+
scale_x_discrete(drop = FALSE,
labels = waiver()) +
ylab("")+xlab("")+
coord_polar(start = -((dirres/2)/360) * 2*pi) +
scale_fill_manual(name = "Wind Speed (m/s)",
values = spd.colors,
drop = FALSE)+
theme_bw(base_size = 12, base_family = "Helvetica") +
theme(axis.ticks.y = element_blank(), # Disables default y-axis
axis.text.y = element_blank())

Unit-independent position_nudge

I would like to apply a position_nudge to an object, but it should always be a certain distance (e.g. in "cm") rather than relative to the scale of the measured variable.
data <- data.frame(
name=c("de","gb","cn","ir","ru") ,
value=c(3,12,5,18,45)*1
)
ggplot(data,
aes(x=name, y=value)) +
geom_bar(stat = "identity") +
geom_text(aes(y = 0,
label = paste0(name,value)),
position = position_nudge(y = -12)) +
coord_cartesian(ylim = c(0, 50), # This focuses the x-axis on the range of interest
clip = 'off') + # This keeps the labels from disappearing
theme(plot.margin = unit(c(1,1,1,1), "lines"))
When changing the scale of the variable, that adjustment should not need to be made in the position_nudge argument, e.g.
factor = 100
data <- data.frame(
name=c("de","gb","cn","ir","ru") ,
value=c(3,12,5,18,45)*factor
)
ggplot(data,
aes(x=name, y=value)) +
geom_bar(stat = "identity") +
geom_text(aes(y = 0,
label = paste0(name,value)),
position = position_nudge(y = -12)) +
coord_cartesian(ylim = c(0, 50*factor), # This focuses the x-axis on the range of interest
clip = 'off') + # This keeps the labels from disappearing
theme(plot.margin = unit(c(1,1,1,1), "lines"))
Currently, this does not work, so that I need to manually change -12 to -1200 to achieve this:
This is of course only a short reproducible example, the actual use-case is placing country flags as x-axis labels below the plot.
The final product will look somewhat like this, but currently requires updating the nudges each time the y-values change:
Thank you very much!
The easiest "hack" is to make this two plots and bind them with patchwork or cowplot. If you try it differently, you'd soon get into deep grid ... trouble.
Related
baptiste on github
baptiste on stackoverflow
Sandy Muspratt's answer
The easy way:
library(ggplot2)
library(patchwork)
foo <- data.frame(
name=c("de","gb","cn","ir","ru") ,
value=c(3,12,5,18,45)*1
)
foo_label = paste(foo$name, foo$value)
p <- ggplot(foo, aes(x=name, y=value)) +
geom_blank() # essential, so that both plots have same scaling
p_1 <-
p + geom_col() +
coord_cartesian(ylim = c(0, 50),clip = 'off') +
theme(plot.margin = margin())
p_text <-
p + annotate("text", label = foo_label, x = 1:5, y = 0, col="red") +
theme_void() +
coord_cartesian(clip = "off") +
theme(plot.margin = margin(1,0,1,0, unit = "lines"))
p_1/p_text + plot_layout(heights = c(1,0)) #this is a workaround to make the height of the text plot minimal!
You can then of course annotate with anything.
For your stated goal, the ggtext library may be more appropriate, as it allows you to embed images directly into the x axis labels. See also here for another example.
library(ggplot2)
library(ggtext)
labels <- c(
setosa = "<img src='https://upload.wikimedia.org/wikipedia/commons/thumb/8/86/Iris_setosa.JPG/180px-Iris_setosa.JPG'
width='100' /><br>*I. setosa*",
virginica = "<img src='https://upload.wikimedia.org/wikipedia/commons/thumb/3/38/Iris_virginica_-_NRCS.jpg/320px-Iris_virginica_-_NRCS.jpg'
width='100' /><br>*I. virginica*",
versicolor = "<img src='https://upload.wikimedia.org/wikipedia/commons/thumb/2/27/20140427Iris_versicolor1.jpg/320px-20140427Iris_versicolor1.jpg'
width='100' /><br>*I. versicolor*"
)
ggplot(iris, aes(Species, Sepal.Width)) +
geom_boxplot() +
scale_x_discrete(
name = NULL,
labels = labels
) +
theme(
axis.text.x = element_markdown(color = "black", size = 11)
)

Left-adjust (hjust = 0) vertical x axis labels on facets with free scale

I have decided to rephrase this question. (Editing would have taken more time and in my opinion would also not have helped the OP.)
How can one left-adjust (hjust = 0, i.e., in text direction) over facets, when scale = 'free_x'?
I don't really think that left-adjustment of x-labels is a very necessary thing to do (long labels generally being difficult to read, and right-adjusting probably the better choice) - but I find the problem interesting enough.
I tried with empty padding to the maximum character length, but this doesn't result in the same length for all strings. Also, setting axis.text.x = element.text(margin = margin()) doesn't help. Needless to say, hjust = 0 does not help, because it is adjusting within each facet.
library(ggplot2)
diamonds$cut_label <- paste("Super Dee-Duper", as.character(diamonds$cut))
ggplot(data = diamonds, aes(cut_label, carat)) +
facet_grid(~ cut, scales = "free_x") +
theme(axis.text.x = element_text(angle = 90))
The red arrows and dashed line indicate how the labels should adjust. hjust = 0 or margins or empty padding do not result in adjustment of those labels over all facets.
Data modification from this famous question
I tried with empty padding to the maximum character length, but this
doesn't result in the same length for all strings.
This caught my attention. Actually, it would result in the same length for all strings if you padded the labels with spaces, made them all the same length, and ensured the font family was non-proportionally spaced.
First, pad the labels with spaces such that all labels have the same length. I'm going to ustilise the str_pad function from the stringr package.
library(ggplot2)
data("diamonds")
diamonds$cut_label <- paste("Super Dee-Duper", as.character(diamonds$cut))
library(stringr)
diamonds$cut_label <- str_pad(diamonds$cut_label, side="right",
width=max(nchar(diamonds$cut_label)), pad=" ")
Then, you may need to load a non-proportionally-spaced font using the extrafont package.
library(extrafont)
font_import(pattern='consola') # Or any other of your choice.
Then, run the ggplot command and specify a proportionally spaced font using the family argument.
ggplot(data = diamonds, aes(cut_label, carat)) +
facet_grid(~cut, scales = "free_x") +
theme(axis.text.x = element_text(angle = 90, family="Consolas"))
One way, and possibly the most straight forward hack, would be to annotate outside the coordinates.
Disadvantage is that the parameters would need manual adjustments (y coordinate, and plot margin), and I don't see how to automate this.
library(ggplot2)
diamonds$cut_label <- paste("Super Dee-Duper", as.character(diamonds$cut))
ann_x <- data.frame(x = unique(diamonds$cut_label), y = -16, cut = unique(diamonds$cut))
ggplot(data = diamonds, aes(cut_label, carat)) +
facet_grid(~cut, scales = "free_x") +
geom_text(data = ann_x, aes(x, y, label = x), angle = 90, hjust = 0) +
theme(
axis.text.x = element_blank(),
plot.margin = margin(t = 0.1, r = 0.1, b = 2.2, l = 0.1, unit = "in")
) +
coord_cartesian(ylim = c(0, 14), clip = "off")
Created on 2020-03-14 by the reprex package (v0.3.0)
I'd approach this by making 2 plots, one of the plot area and one of the axis labels, then stick them together with a package like cowplot. You can use some theme settings to disguise the fact that the axis labels are actually made by a geom_text.
The first plot is fairly straightforward. For the second which becomes the axis labels, use dummy data with the same variables and adjust spacing how you want via text size and scale expansion. You'll probably also want to mess with the rel_heights argument in plot_grid to change the ratio of the two charts' heights.
library(ggplot2)
library(cowplot)
p1 <- ggplot(diamonds, aes(x = cut_label, y = carat)) +
facet_grid(cols = vars(cut), scales = "free_x") +
theme(axis.text.x = element_blank()) +
labs(x = NULL)
axis <- ggplot(dplyr::distinct(diamonds, cut_label, cut), aes(x = cut_label, y = 1)) +
geom_text(aes(label = cut_label), angle = 90, hjust = 0, size = 3.5) +
facet_grid(cols = vars(cut), scales = "free_x") +
scale_x_discrete(breaks = NULL) +
scale_y_continuous(expand = expansion(add = c(0.1, 1)), breaks = NULL) +
labs(y = NULL) +
theme(strip.text = element_blank(),
axis.text.x = element_blank(),
axis.ticks = element_blank(),
panel.background = element_blank())
plot_grid(p1, axis, ncol = 1, axis = "lr", align = "v")
We can edit the text grobs after generating the plot, using library(grid).
g <- ggplot(data = diamonds, aes(cut_label, carat)) +
facet_grid(~cut, scales = "free_x") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
gt <- cowplot::as_gtable(g)
axis_grobs <- which(grepl("axis-b", gt$layout$name))
labs <- levels(factor(diamonds$cut_label))[order(levels(diamonds$cut))]
for (i in seq_along(axis_grobs)) {
gt$grobs[axis_grobs[i]][[1]] <-
textGrob(labs[i], y = unit(0, "npc"), just = "left", rot = 90, gp = gpar(fontsize = 9))
}
grid.draw(gt)

Gradient fill for geom_bar scaled relative to each bar and not mapped to a variable

My goal is to reproduce this plot, and my problem is reproducing the gradient fill in each bar.
ADDED after COMMENT
The good comment of #PavoDive directs us to a question that basically says "You can't do it with ggplot2 and furthermore it is a bad idea even if you could do it." Agreed as to it being a poor graphical choice, but for didactic purposes I wanted to re-create the original and then show improvements. So, is there a programming solution nevertheless?
With the data that follows the ggplot code I have gotten close, other than the consistent gradient coloring that is the same for each bar (and the tiny tick marks). But my efforts result in bars that are filled to match the y value, while in the original plot each bar is filled with the same pattern. How do I achieve that effect?
ggplot(df, aes(x = x, y = y, fill = y)) +
geom_hline(yintercept = seq(0, .35, .05), color = "grey30", size = 0.5, linetype = "solid") +
geom_bar(stat = "identity", width = 0.4) +
scale_fill_gradient(low='green4', high='green1', guide = FALSE) +
theme(legend.position = "none") +
theme_minimal() +
geom_text(data = df, aes(label = scales::percent(y), vjust = -.5)) +
theme(axis.text.y = element_blank()) +
theme(axis.ticks = element_blank()) +
labs(y = "", x = "") +
ggtitle("Question 15: Do you feel prepared to comply with the upcoming December
2015 updated requirements of the FRCP that relate to ediscovery") +
theme(plot.title = element_text(face = "bold", size = 18)) +
theme(panel.border = element_blank())
Data
df <- data.frame(x = c("Prepared", "Somewhat\nprepared", "Not prepared", "Very prepared"),
y = c(.32, .31, .20, .17))
df$x <- factor(df$x, levels = c("Prepared", "Somewhat\nPrepared", "Not Prepared", "Very Prepared"))
This may be acheived with functions from package gridSVG. I use a stripped-down version of your example with only the most necessary parts for the actual problem:
# load additional packages
library(grid)
library(gridSVG)
# create a small data set
df <- data.frame(x = factor(1:3), y = 1:3)
# a basic bar plot to be modified
ggplot(data = df, aes(x = x, y = y)) +
geom_bar(stat = "identity")
# create a linear color gradient
cols <- linearGradient(col = c("green4", "green1"),
x0 = unit(0.5, "npc"), x1 = unit(0.5, "npc"))
# create a definition of a gradient fill
registerGradientFill(label = "cols", gradient = cols)
# list the names of grobs and look for the relevant geometry
grid.ls()
# GRID.gTableParent.76
# ...snip...
# panel.3-4-3-4
# geom_rect.rect.2 # <~~~~ this is the grob! Note that the number may differ
# apply the gradient to each bar
grid.gradientFill("geom_rect.rect", label = rep("cols", length(unique(df$x))),
group = FALSE, grep = TRUE)
# generate SVG output from the grid graphics
grid.export("myplot.svg")
You find more gridSVG examples here, here, and here.
Here is an option, using geom_path with a scaled y to color by instead of bars. This creates some new data (dat), sequences from 0 to each df$y value (length 100 here, in column dat$y). Then, a scaled version of each sequence is created (from 0 to 1), that is used as the color gradient (called dat$scaled). The scaling is done by simply dividing each sequence by its maximum value.
## Make the data for geom_path
mat <- mapply(seq, 0, df$y, MoreArgs = list(length=100)) # make sequences 0 to each df$y
dat <- stack(data.frame(lapply(split(mat, col(mat)), function(x) x/tail(x,1)))) # scale each, and reshape
dat$x <- rep(df$x, each=100) # add the x-factors
dat$y <- stack(as.data.frame(mat))[,1] # add the unscaled column
names(dat)[1] <- "scaled" # rename
## Make the plot
ggplot(dat, aes(x, y, color=scaled)) + # use different data
## *** removed some code here ***
geom_hline(yintercept = seq(0, .35, .05), color = "grey30", size = 0.5, linetype = "solid") +
theme(legend.position = "none") +
theme_minimal() +
geom_text(data = df, aes(label = scales::percent(y), vjust = -.5), color="black") +
theme(axis.text.y = element_blank()) +
theme(axis.ticks = element_blank()) +
labs(y = "", x = "") +
ggtitle("Question 15: Do you feel prepared to comply with the upcoming December
2015 updated requirements of the FRCP that relate to ediscovery") +
theme(plot.title = element_text(face = "bold", size = 18)) +
theme(panel.border = element_blank()) +
## *** Added this code ***
geom_path(lwd=20) +
scale_color_continuous(low='green4', high='green1', guide=F)

Resources