Below is a working example
library(ggplot2)
set.seed(926)
df <- data.frame(expression = rnorm(900),
time = c(rnorm(300), rnorm(300, 1, 2), rnorm(300, 2,0.5)),
membership = factor(rep(1:3, each = 300)))
ggplot(df, aes(x = time, y = expression, fill = membership)) +
geom_point(shape=21, size = 3) +
geom_rug(data = subset(df, membership ==3), sides = "b", color = "green", length = unit(1.5, "cm")) +
geom_rug(data = subset(df, membership ==2), sides = "b", color = "blue", length = unit(1, "cm")) +
geom_rug(data = subset(df, membership ==1), sides = "b", color = "red") +
scale_y_continuous(expand = c(0.3, 0))
My hope is something like
.
Note that I know the options of outside = TRUE, side = "tb" out there. But placing all rug plots at the bottom is what I really hope for.
geom_rug is designed to be drawn at the margins of a plot. It's probably best to use geom_point with a custom symbol in this case:
ggplot(df, aes(x = time, y = expression, fill = membership)) +
geom_point(shape=21, size = 3) +
geom_point(aes(y = -as.numeric(membership) - 2.5, color = membership),
shape = "|", size = 8) +
geom_hline(yintercept = -3) +
theme_classic(base_size = 20) +
scale_y_continuous(breaks = c(-2, 0, 2))
I don't think the position of geom_rug() can be easily customised. I'd recommend to use geom_segment() instead to draw the rugs like you'd want them.
library(ggplot2)
#> Warning: package 'ggplot2' was built under R version 4.2.2
set.seed(926)
df <- data.frame(expression = rnorm(900),
time = c(rnorm(300), rnorm(300, 1, 2), rnorm(300, 2,0.5)),
membership = factor(rep(1:3, each = 300)))
# Helper variables
limits <- range(df$expression)
step <- diff(limits) * 0.1
size <- 0.45 * step
ggplot(df, aes(x = time, y = expression, fill = membership)) +
geom_point(shape=21, size = 3) +
geom_segment(
aes(
colour = membership,
xend = time,
y = limits[1] - as.numeric(membership) * step + size,
yend = limits[1] - as.numeric(membership) * step - size
)
)
Created on 2022-12-12 by the reprex package (v2.0.1)
Related
First we prepare some toy data that sufficiently resembles the one I am working with.
rawdata <- data.frame(Score = rnorm(1000, seq(1, 0, length.out = 10), sd = 1),
Group = rep(LETTERS[1:3], 10000))
stdev <- c(10.78,10.51,9.42)
Now we plot the estimated densities via geom_density_ridges. I also add a grey highlight around zero via geom_rect. I also flip the chart with coord_flip.
p <- ggplot(rawdata, aes(x = Score, y = Group)) +
scale_y_discrete() +
geom_rect(inherit.aes = FALSE, mapping = aes(ymin = 0, ymax = Inf, xmin = -0.1 * min(stdev), xmax = 0.1 * max(stdev)),
fill = "grey", alpha = 0.5) +
geom_density_ridges(aes(fill = Group), scale = 0.5, size = 1, alpha=0.5) +
scale_color_manual(values = col) +
scale_fill_manual(values = col) +
labs(title="Toy Graph", y="Group", x="Value") +
coord_flip(xlim = c(-8, 8), ylim = NULL, expand = TRUE, clip = "on")
p
And this is the solution I get, which is close to what I was expecting, despite the detail of this enormous gap between the y axis an the start of the first factor in the x axis A. I tried using expand=c(0,0) inside scale_y_discrete() following some suggestions from other posts, but it does not make the gap smaller at all. If possible I would still like to have a certain gap, although minimal. I've been also trying to flip the densities in the y axis so the gap is filled by first factor density plot but I have been unsuccessful as it does not seem as trivial as one could expect.
Sorry, I know this might be technically two different questions, "How to reduce the gap from the y axis to the first density plot?" and "How to flip the densities from y axis to reduce the gap?" But I would really be happy with the first one as I understand the second question seems to be apparently less straightforward.
Thanks in advance! Any help is appreciated.
Flipping the densities also effectively reduces the space, so this might be all you need to do. You can achieve it with a negative scale parameter:
ggplot(rawdata, aes(x = Score, y = Group)) +
scale_y_discrete() +
geom_rect(inherit.aes = FALSE,
mapping = aes(ymin = 0, ymax = Inf,
xmin = -0.1 * min(stdev),
xmax = 0.1 * max(stdev)),
fill = "grey", alpha = 0.5) +
geom_density_ridges(aes(fill = Group), scale = -0.5, size = 1, alpha = 0.5) +
scale_color_manual(values = col) +
scale_fill_manual(values = col) +
labs(title = "Toy Graph", y = "Group", x = "Value") +
coord_flip(xlim = c(-8, 8), ylim = NULL, expand = TRUE, clip = "on")
If you want to keep the densities pointing the same way but just reduce space on the left side, simply set hard limits in your coord_flip, with no expansion:
ggplot(rawdata, aes(x = Score, y = Group)) +
geom_rect(inherit.aes = FALSE,
mapping = aes(ymin = 0, ymax = Inf,
xmin = -0.1 * min(stdev),
xmax = 0.1 * max(stdev)),
fill = "grey", alpha = 0.5) +
geom_density_ridges(aes(fill = Group), scale = 0.5, size = 1, alpha = 0.5) +
scale_color_manual(values = col) +
scale_fill_manual(values = col) +
scale_y_discrete() +
labs(title = "Toy Graph", y = "Group", x = "Value") +
coord_flip(xlim = c(-8, 8), ylim = c(0.8, 4), expand = FALSE)
so far I can manage to build the following geom_density figure using ggpot2:
cuts1 <- data.frame(Ref="p", vals=c(140))
cuts2 <- data.frame(Ref="s", vals=c(300))
cuts3 <- data.frame(Ref="m", vals=c(250))
cuts <- rbind(cuts1, cuts2, cuts3)
ggplot(mtcars, aes(x=disp)) +
geom_density(color = "black",
fill = 4,
alpha = 1) +
geom_vline(data = cuts , aes(xintercept=vals, color= Ref) )
And I wondered if someone knew a way to plot the geom_vline much more like that :
Where the lines do not reach the top and bottom of the figure and where the labels are all displayed with a rotation.
Here is one potential solution:
library(ggplot2)
cuts1 <- data.frame(Ref="p", vals=c(140))
cuts2 <- data.frame(Ref="s", vals=c(300))
cuts3 <- data.frame(Ref="m", vals=c(250))
cuts <- rbind(cuts1, cuts2, cuts3)
ggplot(mtcars, aes(x=disp)) +
geom_density(color = "black",
fill = 4,
alpha = 1) +
geom_segment(data = cuts, aes(x=vals, xend = vals,
y = 0, yend = max(density(mtcars$disp)[[2]]),
color= Ref), key_glyph = "vpath") +
geom_text(data = cuts, aes(x = vals, y = max(density(mtcars$disp)[[2]]) * 1.02,
label = Ref), nudge_x = 5, angle = 45)
Created on 2022-08-29 by the reprex package (v2.0.1)
Take a look at geom_segment, you can set the yend parameter to where you want your lines to end.
I try to establish R as data visualisation tool in my company. A typical graph type used in my department are waterfall charts (https://en.wikipedia.org/wiki/Waterfall_chart).
In R, there are some packages and hints for ggplot to generate a waterfall chart (https://learnr.wordpress.com/2010/05/10/ggplot2-waterfall-charts/), which I used already.
Unfortunately, a common feature for the used waterfall charts are annotations with arrows to indicate the percentage changes within the steps.
See an example below:
Or here in this video (https://www.youtube.com/watch?v=WMHf7uFR6Rk)
The software used to produce such kind of plots is think cell (https://www.think-cell.com/), which is an add-on to Excel and Powerpoint.
The problem I have is that I don't know how to start to tackle the topic. My first thoughts are going in this direction:
Use geom_segment for generating the arrows and boxes
Use ggplot's annotate funktion to place the text at the arrows or in the boxes
Calculate the positions automatically based on the data provided to the waterfall chart.
May I ask you, if you have additional thoughts/ideas to implement such graphs in ggplot?
Best Regards Markus
Here's an example of the approach I would take.
Step 1. Pick which elements should be added, and add them one at a time.
Let's say we're starting with this simple chart:
df <- data.frame(x = c(2007, 2008, 2009),
y = c(100, 120, 140))
ggplot(df, aes(x, y, label = y)) +
geom_col() +
geom_text(vjust = -0.5)
First of all, we need some extra vertical space:
ggplot(df, aes(x, y, label = y)) +
geom_col() +
geom_text(vjust = -0.5) +
scale_y_continuous(expand = expand_scale(add = c(10, 50))) # Add 50 y padding
Now, I incrementally add layers until it looks like I want:
# Semi-manual proof of concept
ggplot(df, aes(x, y, label = y)) +
geom_col() +
geom_text(vjust = -0.5) +
scale_y_continuous(expand = expand_scale(add = c(10, 50))) + # Add 50 y padding
# Line with arrow
geom_segment(aes(x = df$x[3], y = df$y[3] + 50,
xend = df$x[3], yend = df$y[3] + 50),
arrow = arrow(length = unit(0.02, "npc"), type = "closed")) +
# Background box
geom_tile(aes(x = mean(c(df$x[3], df$x[3])),
y = mean(c(df$y[3], df$y[3])) + 50, width = 1, height = 40),
fill = "white", color = "black", size = 0.5) +
# Text
geom_text(aes(x = mean(c(df$x[3], df$x[3])),
y = mean(c(df$y[3], df$y[3])) + 50,
label = paste0("CAGR\n",
df$x[3], "-", df$x[3], "\n",
scales::percent((df$y[3] / df$y[3]) ^ (1/(df$x[3]-df$x[3])) - 1))))
Step 2. Make it into a function
Now I move the CAGR-related layers into a function, replacing most of the constants with function parameters.
add_CAGR <- function(df, first_val_pos, second_val_pos,
y_offset, box_width = 1, box_height) {
list(
# Line with arrow
geom_segment(aes(x = df$x[first_val_pos],
xend = df$x[second_val_pos],
y = df$y[first_val_pos] + y_offset,
yend = df$y[second_val_pos] + y_offset),
arrow = arrow(length = unit(0.02, "npc"), type = "closed")),
# Background box
geom_tile(aes(x = mean(c(df$x[first_val_pos], df$x[second_val_pos])),
y = mean(c(df$y[first_val_pos], df$y[second_val_pos])) + y_offset,
width = box_width, height = box_height),
fill = "white", color = "black", size = 0.5),
# Text
geom_text(aes(x = mean(c(df$x[first_val_pos], df$x[second_val_pos])),
y = mean(c(df$y[first_val_pos], df$y[second_val_pos])) + y_offset,
label = paste0("CAGR\n",
df$x[first_val_pos], "-", df$x[second_val_pos], "\n",
scales::percent((df$y[second_val_pos] / df$y[1]) ^
(1/(df$x[second_val_pos]-df$x[first_val_pos])) - 1))),
lineheight = 0.8)
)
}
Step 3: Use in plot
ggplot(df, aes(x, y, label = y)) +
geom_col() +
geom_text(vjust = -0.5) +
scale_y_continuous(expand = expand_scale(add = c(0, 50))) + # Add 50 y padding
add_CAGR(df, first_val_pos = 1, second_val_pos = 3,
y_offset = 50,
box_width = 0.7, box_height = 40)
Or the same thing just between the first two bars:
ggplot(df, aes(x, y, label = y)) +
geom_col() +
geom_text(vjust = -0.5) +
scale_y_continuous(expand = expand_scale(add = c(0, 50))) + # Add 50 y padding
add_CAGR(df, first_val_pos = 1, second_val_pos = 2,
y_offset = 50,
box_width = 0.7, box_height = 40)
I have the following code, which produces the following plot:
cols <- brewer.pal(n = 3, name = 'Dark2')
p4 <- ggplot(all.m, aes(x=xval, y=yval, colour = Approach, ymax = 0.95)) + theme_bw() +
geom_errorbar(aes(ymin= yval - se, ymax = yval + se), width=5, position=pd) +
geom_line(position=pd) +
geom_point(aes(shape=Approach, colour = Approach), size = 4) +
geom_hline(aes(yintercept = cp.best$slope, colour = "C2P"), show_guide = FALSE) +
scale_color_manual(name="Approach", breaks=c("C2P", "P2P", "CP2P"), values = cols[c(1,3,2)]) +
scale_y_continuous(breaks = seq(0.4, 0.95, 0.05), "Test AUROC") +
scale_x_continuous(breaks = seq(10, 150, by = 20), "# Number of Patient Samples in Training")
p4 <- p4 + theme(legend.direction = 'horizontal',
legend.position = 'top',
plot.margin = unit(c(5.1, 7, 4.5, 3.5)/2, "lines"),
text = element_text(size=15), axis.title.x=element_text(vjust=-1.5), axis.title.y=element_text(vjust=2))
p4 <- p4 + guides(colour=guide_legend(override.aes=list(shape=c(NA,17,16))))
p4
When I try show_guide = FALSE in geom_point, the shape of the point in the upper legend are all set to default solid circles.
How can I make the lower legend to disappear, without affecting the upper legend?
This is a solution, complete with reproducible data:
library("ggplot2")
library("grid")
library("RColorBrewer")
cp2p <- data.frame(xval = 10 * 2:15, yval = cumsum(c(0.55, rnorm(13, 0.01, 0.005))), Approach = "CP2P", stringsAsFactors = FALSE)
p2p <- data.frame(xval = 10 * 1:15, yval = cumsum(c(0.7, rnorm(14, 0.01, 0.005))), Approach = "P2P", stringsAsFactors = FALSE)
pd <- position_dodge(0.1)
cp.best <- list(slope = 0.65)
all.m <- rbind(p2p, cp2p)
all.m$Approach <- factor(all.m$Approach, levels = c("C2P", "P2P", "CP2P"))
all.m$se <- rnorm(29, 0.1, 0.02)
all.m[nrow(all.m) + 1, ] <- all.m[nrow(all.m) + 1, ] # Creates a new row filled with NAs
all.m$Approach[nrow(all.m)] <- "C2P"
cols <- brewer.pal(n = 3, name = 'Dark2')
p4 <- ggplot(all.m, aes(x=xval, y=yval, colour = Approach, ymax = 0.95)) + theme_bw() +
geom_errorbar(aes(ymin= yval - se, ymax = yval + se), width=5, position=pd) +
geom_line(position=pd) +
geom_point(aes(shape=Approach, colour = Approach), size = 4, na.rm = TRUE) +
geom_hline(aes(yintercept = cp.best$slope, colour = "C2P")) +
scale_color_manual(values = c(C2P = cols[1], P2P = cols[2], CP2P = cols[3])) +
scale_shape_manual(values = c(C2P = NA, P2P = 16, CP2P = 17)) +
scale_y_continuous(breaks = seq(0.4, 0.95, 0.05), "Test AUROC") +
scale_x_continuous(breaks = seq(10, 150, by = 20), "# Number of Patient Samples in Training")
p4 <- p4 + theme(legend.direction = 'horizontal',
legend.position = 'top',
plot.margin = unit(c(5.1, 7, 4.5, 3.5)/2, "lines"),
text = element_text(size=15), axis.title.x=element_text(vjust=-1.5), axis.title.y=element_text(vjust=2))
p4
The trick is to make sure that all of the desired levels of all.m$Approach appear in all.m, even if one of them gets dropped out of the graph. The warning about the omitted point is suppressed by the na.rm = TRUE argument to geom_point.
Short answer:
Just add a dummy geom_point layer (transparent points) where shape is mapped to the same level as in geom_hline.
geom_point(aes(shape = "int"), alpha = 0)
Longer answer:
Whenever possible, ggplot merges / combines legends of different aesthetics. For example, if colour and shape is mapped to the same variable, then the two legends are combined into one.
I illustrate this using simple data set with 'x', 'y' and a grouping variable 'grp' with two levels:
df <- data.frame(x = rep(1:2, 2), y = 1:4, grp = rep(c("a", "b"), each = 2))
First we map both color and shape to 'grp'
ggplot(data = df, aes(x = x, y = y, color = grp, shape = grp)) +
geom_line() +
geom_point(size = 4)
Fine, the legends for the aesthetics, color and shape, are merged into one.
Then we add a geom_hline. We want it to have a separate color from the geom_lines and to appear in the legend. Thus, we map color to a variable, i.e. put color inside aes of geom_hline. In this case we do not map the color to a variable in the data set, but to a constant. We may give the constant a desired name, so we don't need to rename the legend entries afterwards.
ggplot(data = df, aes(x = x, y = y, color = grp, shape = grp)) +
geom_line() +
geom_point(size = 4) +
geom_hline(aes(yintercept = 2.5, color = "int"))
Now two legends appears, one for the color aesthetics of geom_line and geom_hline, and one for the shape of the geom_points. The reason for this is that the "variable" which color is mapped to now contains three levels: the two levels of 'grp' in the original data, plus the level 'int' which was introduced in the geom_hline aes. Thus, the levels in the color scale differs from those in the shape scale, and by default ggplot can't merge the two scales into one legend.
How to combine the two legends?
One possibility is to introduce the same, additional level for shape as for color by using a dummy geom_point layer with transparent points (alpha = 0) so that the two aesthetics contains the same levels:
ggplot(data = df, aes(x = x, y = y, color = grp, shape = grp)) +
geom_line() +
geom_point(size = 4) +
geom_hline(aes(yintercept = 2.5, color = "int")) +
geom_point(aes(shape = "int"), alpha = 0) # <~~~~ a blank geom_point
Another possibility is to convert the original grouping variable to a factor, and add the "geom_hline level" to the original levels. Then use drop = FALSE in scale_shape_discrete to include "unused factor levels from the scale":
datadf$grp <- factor(df$grp, levels = c(unique(df$grp), "int"))
ggplot(data = df, aes(x = x, y = y, color = grp, shape = grp)) +
geom_line() +
geom_point(size = 4) +
geom_hline(aes(yintercept = 2.5, color = "int")) +
scale_shape_discrete(drop = FALSE)
Then, as you already know, you may use the guides function to "override" the shape aesthetics in the legend, and remove the shape from the geom_hline entry by setting it to NA:
guides(colour = guide_legend(override.aes = list(shape = c(16, 17, NA))))
Similar to geom_area plot with areas and outlines ggplot, I'm trying to construct a stacked area plot with outlines. Since my variables are discrete I'm using geom_bar() for stacking them. The code is as follows:
require(ggplot2)
require(reshape)
x = 0:4
y1 = c(3,2,2,1,0)
y2 = c(1,1,0,0,0)
data = data.frame(x,y1,y2)
data.plot <-melt(data, id.vars = "x")
cols = c(y1="darkgrey",y2="lightgrey")
p = ggplot(data.plot,aes(x=x,y=value,fill=variable))
p + geom_bar(aes(width=1),stat = "identity") + theme_bw() + scale_fill_manual(values=cols)
Which gives
My problem is now adding the outlines as in the example I referred to. I can use colour="black" in geom_bar() but this adds vertical lines between the bars which look quite ugly.
Does anyone have a suggestion to get these outlines? The solution doesn't have to be based on geom_bar.
If possible, I am also interested in a solution where only the dark grey part has an outline, since this outline has an important interpretation. Perhaps this could be based on some shifted version of geom_line()?
Here is another approach, using annotate("path"). This suggestion has hard-coded values for some of the path components, but I suspect there is a way to algorithmically fill in those values (perhaps with gg_build().
p <- ggplot(data.plot,aes(x=x, y=value, fill=variable))
p <- p + geom_bar(aes(width=1), stat = "identity") + theme_bw() + scale_fill_manual(values=cols)
p <- p + annotate(x=c(-.5, 0.5, 0.5, 2.5, 2.5, 3.5, 3.5),
y=c(3, 3, 2, 2, 1, 1, 0 ), group = 1, "path", color = "black", size = 2)
p <- p + annotate(x=c(min(x)-.5, min(x)+0.5, min(x)+0.5, min(x)+2.5, min(x)+2.5, min(x)+3.5, min(x)+3.5),
y=c(max(value), max(value), max(value)- 1, max(value)- 1, max(value)- 2, max(value)- 2, min(value)), group = 1, "path", color = "black", size = 2)
p
Your plotting code (I don't want to use c since that's a function):
p <- ggplot(data.plot, aes(x = x, y = value, fill = variable))
p <- p + geom_bar(aes(width = 1), stat = "identity") + theme_bw() + scale_fill_manual(values = cols)
Now add a stepping line along the bars:
p <- p + geom_step(aes(x = x - 0.5), position = "stack")
It's a bit more work to fix a line along the axes:
library (dplyr)
y.max <- data.plot %>% group_by(x) %>% summarize(s = sum(value))
y.max <- max(y.max$s)
p + geom_step(aes(x = x - 0.5, ymax = value), position = "stack") +
annotate('segment',
x = min(data.plot$x) - 0.5,
xend = min(data.plot$x) - 0.5,
y = 0,
yend = y.max) +
annotate('segment',
x = min(data.plot$x) - 0.5,
xend = max(data.plot$x) - 0.5,
y = 0,
yend = 0)
I'd be interested in simpler solutions!