Make text labels span multiple ggplot facet grid panes - r

How can geom_text_repel() labels be made to span multiple facet_grid() panes? For instance, if there are many long labels that do not fit within the proper dimensions of each grid plot, the label should be positioned as if the facet_grid() were a single plot.
For example:
df <- data.frame(
x = rep(1:3, 5),
y = runif(15),
label = paste0("very long label ", 1:15),
group = do.call(c, lapply(paste0("group ", 1:5), function(x) rep(x, 3)))
)
library(ggplot2)
library(ggrepel)
ggplot(df, aes(x, y, label = label)) +
geom_point() +
facet_grid(cols = vars(group)) +
geom_text_repel()
If there is another way to group samples on the x-axis that would mimic a column-wise facet-grid, that's perfectly fine too. In my case, I need to group samples by a grouping variable (correspondingly labeled), and then within each group order them by a continuous variable.

Use clip = "off" from coord_cartesian:
library(ggplot2)
library(ggrepel)
ggplot(df, aes(x, y, label = label)) +
geom_point() +
facet_grid(cols = vars(group)) +
geom_text_repel() +
coord_cartesian(clip = "off")
If this is not enough, one other option is to use multilining with stringr::str_wrap:
library(stringr)
library(dplyr)
df %>%
mutate(label_wrapped = str_wrap(label, width = 20)) %>%
ggplot(aes(x, y, label = label_wrapped)) +
geom_point() +
facet_grid(cols = vars(group)) +
geom_text_repel() +
coord_cartesian(clip = 'off')
data
set.seed(2)
df <- data.frame(
x = rep(1:3, 5),
y = runif(15),
label = paste0("very very very long label ", 1:15),
group = do.call(c, lapply(paste0("group ", 1:5), function(x) rep(x, 3)))
)

Related

R ggplot label number of observations per ordered violin with facet wrap

I've got a plot that looks like the output of the following code using the iris data
require(tidyverse)
require(purrr)
require(forcats) # Useful for ordering facets found at [here][1]
# Make some long data and set a custom sorting order using some of t
tbl <- iris %>%
pivot_longer(., cols = 1:4, names_to = "Msr", values_to = "Vls") %>%
mutate(Msr = factor(Msr)) %>%
mutate(plot_fct = fct_cross(Species, Msr)) %>%
mutate(plot_fct = fct_reorder(plot_fct, Vls))
# A functioning factory for minor log breaks found [here][1] (very helpful)
minor_breaks_log <- function(base) {
# Prevents lazy evaluation
force(base)
# Wrap calculation in a function that the outer function returns
function(limits) {
ggplot2:::calc_logticks(
base = base,
minpow = floor(log(limits[1], base = base)),
maxpow = ceiling(log(limits[2], base = base))
)$value
}
}
# Plot the images
ggplot(data = tbl, aes(x =plot_fct, y = Vls, fill = Species)) +
geom_violin() +
coord_flip() + # swap coords
scale_y_log10(labels = function(x) sprintf("%g", x),
minor_breaks = minor_breaks_log(10)) + # format for labels # box fills
theme_bw(base_size = 12) +
annotation_logticks(base = 10, sides = "b") +
facet_wrap(~Species, nrow = 1, scales = "free")
I would now like to list the number of observations per violin on the right side of each facet just inside the maximum border, which I'm sure is possible but cannot seem to find an example that does this sort of labeling, with violins and facets.
ggplot(data = tbl, aes(y = plot_fct, fill = Species)) +
geom_violin(aes(x = Vls)) +
geom_text(aes(label = after_stat(count)), hjust = 1,
stat = "count", position = "fill") +
scale_x_log10(labels = function(x) sprintf("%g", x),
minor_breaks = minor_breaks_log(10)) + # format for labels # box fills
theme_bw(base_size = 12) +
annotation_logticks(base = 10, sides = "b") +
facet_wrap(~Species, nrow = 1, scales = "free")

Make geom_histogram display x-axis labels as integers instead of numerics

I have a data.frame that has counts for several groups:
set.seed(1)
df <- data.frame(group = sample(c("a","b"),200,replace = T),
n = round(runif(200,1,2)))
df$n <- as.integer(df$n)
And I'm trying to display a histogram of df$n, facetted by the group using ggplot2's geom_histogram:
library(ggplot2)
ggplot(data = df, aes(x = n)) + geom_histogram() + facet_grid(~group) + theme_minimal()
Any idea how to get ggplot2 to label the x-axis ticks with the integers the histogram is summarizing rather than the numeric values it is currently showing?
You could tweak this by the binwidth argument of geom_histogram:
library(ggplot2)
ggplot(data = df, aes(x = n)) +
geom_histogram(binwidth = 0.5) +
facet_grid(~group) +
theme_minimal()
Another example:
set.seed(1)
df <- data.frame(group = sample(c("a","b"),200,replace = T),
n = round(runif(200,1,5)))
library(ggplot2)
ggplot(data = df, aes(x = n)) +
geom_histogram(binwidth = 0.5) +
facet_grid(~group) +
theme_minimal()
You can manually specify the breaks with scale_x_continuous(breaks = seq(1, 2)). Alternatively, you can set the breaks and labels separately as well.

Draw only specific axis text labels

I have the following data.table:
require(data.table)
require(ggplot2)
set.seed(1234)
dt = data.table(id = paste0('ID_', 1:2000),
group = rep(LETTERS[1:5], 400),
value = as.logical(rbinom(2000, 1, prob = 0.25)))
I would like to create a ggplot like below, where on the y-axis only the text labels, for which the variable value equals TRUE are drawn, without removing the other data. Additionally, it would be best, if the text labels won't overlap (if possible), hence using all the empty y-axis space.
# draws all categorical id values
ggplot(dt, aes(y = reorder(id, -value), x = group, fill = value)) +
geom_tile() +
theme_bw()
Update: For the first 100 cases:
dt1 <- dt %>%
mutate(ylabel = ifelse(value==TRUE, id, "")) %>%
slice(1:100)
ggplot(dt1, aes(y = reorder(id, -value), x = group, fill = value)) +
geom_tile() +
scale_y_discrete(
labels = dt1$ylabel
) +
theme_bw()

Color outlier dots above a specific value in R

How do I color outliers that are above a specific value using ggplot2 in R?.
(Sorry for the seemingly easy question, I am a beginner. the reason why is that these are frequencies of a value of 0, I am then transforming this column of data by taking the -log10(). So anything that has a frequency of 0 would then be transformed into Inf. Attached is a screenshot of my plot, essentially I want to make all the outlier points above 10 on the y axis to be a different color.
boxplots <- function(df){
df$'frequency'[is.na(df$'frequency')] <- 0.00
df$'-log10(frequency)' <- -log10(df$'frequency')
x <- data.frame(group = 'x', value = df$'-log10(frequency)'[df$'Type'=='x'])
y <- data.frame(group = 'y', value = df$'-log10(frequency)'[df$'Type'=='y'])
z <- data.frame(group = 'z', value = df$'-log10(frequency)'[df$'Type'=='c=z'])
plot.data <<- rbind(x, y, z)
labels <- c("z", "y", "z")
t<-plot.data %>%
ggplot(aes(x = group, y = value, fill = group))+
geom_boxplot()+
scale_fill_viridis(discrete = TRUE, alpha = 0.6)+
geom_jitter(color="black", size=0.4, alpha=0.9) +
theme_ipsum() +
theme(
legend.position="none",
plot.title = element_text(size=11)
) +
ggtitle("Distribution of -log10(frequency) by Type") +
xlab("Type")+
ylab("-log10(frequency)")+
scale_x_discrete(labels=labels)+
scale_y_continuous(limits = c(0, 10), breaks = seq(0, 10, by = 2))
print(t)
s<<-t
ggsave("frequency_by_type.png", plot = t)
}
you could just create a new column indicating wheather it is an outlier or not and map this to the geom_jitter color. I resumed the answer in a smaller example but you should be able to fit this accordingly:
library(ggplot2)
library(viridis)
plot.data <- data.frame(group = c("1","1","1","1","1","2","2","2","2","2"),
value = c(1,5,10,6,3,1,5,10,6,3))
t<-plot.data %>%
mutate(outlier = ifelse(value >9, "YES", "NO")) %>%
ggplot(aes(x = group, y = value, fill = group))+
geom_boxplot()+
geom_jitter(aes(group, value, color = outlier) , size=2, alpha=0.9)+
scale_fill_viridis(discrete = TRUE, alpha = 0.6)
t
library(ggplot2)
# Basic box plot
p <- ggplot(ToothGrowth, aes(x=dose, y=len)) +
geom_boxplot()
p
# Rotate the box plot
p + coord_flip()
# Notched box plot
ggplot(ToothGrowth, aes(x=dose, y=len)) +
geom_boxplot(notch=TRUE)
# Change outlier, color, shape and size
ggplot(ToothGrowth, aes(x=dose, y=len)) +
geom_boxplot(outlier.colour="red", outlier.shape=8,
outlier.size=4)

Directlabels package-- labels do not fit in plot area

I want to explore the directlabels package with ggplot. I am trying to plot labels at the endpoint of a simple line chart; however, the labels are clipped by the plot panel. (I intend to plot about 10 financial time series in one plot and I thought directlabels would be the best solution.)
I would imagine there may be another solution using annotate or some other geoms. But I would like to solve the problem using directlabels. Please see code and image below. Thanks.
library(ggplot2)
library(directlabels)
library(tidyr)
#generate data frame with random data, for illustration and plot:
x <- seq(1:100)
y <- cumsum(rnorm(n = 100, mean = 6, sd = 15))
y2 <- cumsum(rnorm(n = 100, mean = 2, sd = 4))
data <- as.data.frame(cbind(x, y, y2))
names(data) <- c("month", "stocks", "bonds")
tidy_data <- gather(data, month)
names(tidy_data) <- c("month", "asset", "value")
p <- ggplot(tidy_data, aes(x = month, y = value, colour = asset)) +
geom_line() +
geom_dl(aes(colour = asset, label = asset), method = "last.points") +
theme_bw()
On data visualization principles, I would like to avoid extending the x-axis to make the labels fit--this would mean having data space with no data. Rather, I would like the labels to extend toward the white space beyond the chart box/panel (if that makes sense).
In my opinion, direct labels is the way to go. Indeed, I would position labels at the beginning and at the end of the lines, creating space for the labels using expand(). Also note that with the labels, there is no need for the legend.
This is similar to answers here and here.
library(ggplot2)
library(directlabels)
library(grid)
library(tidyr)
x <- seq(1:100)
y <- cumsum(rnorm(n = 100, mean = 6, sd = 15))
y2 <- cumsum(rnorm(n = 100, mean = 2, sd = 4))
data <- as.data.frame(cbind(x, y, y2))
names(data) <- c("month", "stocks", "bonds")
tidy_data <- gather(data, month)
names(tidy_data) <- c("month", "asset", "value")
ggplot(tidy_data, aes(x = month, y = value, colour = asset, group = asset)) +
geom_line() +
scale_colour_discrete(guide = 'none') +
scale_x_continuous(expand = c(0.15, 0)) +
geom_dl(aes(label = asset), method = list(dl.trans(x = x + .3), "last.bumpup")) +
geom_dl(aes(label = asset), method = list(dl.trans(x = x - .3), "first.bumpup")) +
theme_bw()
If you prefer to push the labels into the plot margin, direct labels will do that. But because the labels are positioned outside the plot panel, clipping needs to be turned off.
p1 <- ggplot(tidy_data, aes(x = month, y = value, colour = asset, group = asset)) +
geom_line() +
scale_colour_discrete(guide = 'none') +
scale_x_continuous(expand = c(0, 0)) +
geom_dl(aes(label = asset), method = list(dl.trans(x = x + .3), "last.bumpup")) +
theme_bw() +
theme(plot.margin = unit(c(1,4,1,1), "lines"))
# Code to turn off clipping
gt1 <- ggplotGrob(p1)
gt1$layout$clip[gt1$layout$name == "panel"] <- "off"
grid.draw(gt1)
This effect can also be achieved using geom_text (and probably also annotate), that is, without the need for direct labels.
p2 = ggplot(tidy_data, aes(x = month, y = value, group = asset, colour = asset)) +
geom_line() +
geom_text(data = subset(tidy_data, month == 100),
aes(label = asset, colour = asset, x = Inf, y = value), hjust = -.2) +
scale_x_continuous(expand = c(0, 0)) +
scale_colour_discrete(guide = 'none') +
theme_bw() +
theme(plot.margin = unit(c(1,3,1,1), "lines"))
# Code to turn off clipping
gt2 <- ggplotGrob(p2)
gt2$layout$clip[gt2$layout$name == "panel"] <- "off"
grid.draw(gt2)
Since you didn't provide a reproducible example, it's hard to say what the best solution is. However, I would suggest trying to manually adjust the x-scale. Use a "buffer" increase the plot area.
#generate data frame with random data, for illustration and plot:
p <- ggplot(tidy_data, aes(x = month, y = value, colour = asset)) +
geom_line() +
geom_dl(aes(colour = asset, label = asset), method = "last.points") +
theme_bw() +
xlim(minimum_value, maximum_value + buffer)
Using scale_x_discrete() or scale_x_continuous() would likely also work well here if you want to use the direct labels package. Alternatively, annotate or a simple geom_text would also work well.

Resources