Related
I am using plot_grid and cowplot to arrange plots in a grid. I need to have some "empty" plots in the grid. Using NULL works fine - but the space still gets labelled. Is there a way to make NULL plots have no label automatically? I know I can do all plot labels manually.
MWE (adapted from this page)
library(ggplot2)
df <- data.frame(
x = 1:10, y1 = 1:10, y2 = (1:10)^2, y3 = (1:10)^3, y4 = (1:10)^4
)
p1 <- ggplot(df, aes(x, y1)) + geom_point()
p2 <- ggplot(df, aes(x, y2)) + geom_point()
p3 <- ggplot(df, aes(x, y3)) + geom_point()
p4 <- ggplot(df, aes(x, y4)) + geom_point()
p5 <- ggplot(mpg, aes(as.factor(year), hwy)) +
geom_boxplot() +
facet_wrap(~class, scales = "free_y")
# simple grid
plot_grid(p1, NULL, p3, p4, labels = "AUTO")
I'm looking for the behaviour you could get with plot_grid(p1, NULL, p3, p4, labels = c("A","","B","C"), but I don't want to have to set up each plot individually
Here is a potential solution using a modified cowplot::plot_grid():
plot_grid_modified <- function(..., plotlist = NULL, align = c("none", "h", "v", "hv"),
axis = c("none", "l", "r", "t", "b", "lr", "tb", "tblr"),
nrow = NULL, ncol = NULL, rel_widths = 1,
rel_heights = 1, labels = NULL, label_size = 14,
label_fontfamily = NULL, label_fontface = "bold", label_colour = NULL,
label_x = 0, label_y = 1,
hjust = -0.5, vjust = 1.5, scale = 1., greedy = TRUE,
byrow = TRUE, cols = NULL, rows = NULL) {
# Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
num_plots <- length(plots)
if (!is.null(cols)){
warning("Argument 'cols' is deprecated. Use 'ncol' instead.")
}
if (!is.null(rows)){
warning("Argument 'rows' is deprecated. Use 'nrow' instead.")
}
scale <- rep_len(scale, num_plots)
if (sum(scale <= 0) > 1){
stop("Argument 'scale' needs to be greater than 0.")
}
# internally, this function operates with variables cols and rows instead of ncol and nrow
if (!is.null(ncol)){
cols <- ncol
}
if (!is.null(nrow)){
rows <- nrow
}
# calculate grid dimensions
if (is.null(cols) && is.null(rows)){
# if neither rows nor cols are given, we make a square grid
cols <- ceiling(sqrt(num_plots))
rows <- ceiling(num_plots/cols)
}
# alternatively, we know at least how many rows or how many columns we need
if (is.null(cols)) cols <- ceiling(num_plots/rows)
if (is.null(rows)) rows <- ceiling(num_plots/cols)
# if the user wants to layout the plots by column, we use the calculated rows to reorder plots
if (!isTRUE(byrow)) plots <- plots[c(t(matrix(c(1:num_plots, rep(NA, (rows * cols) - num_plots)), nrow = rows, byrow = FALSE)))]
# Align the plots (if specified)
grobs <- align_plots(plotlist = plots, align = align, axis = axis, greedy = greedy)
if ("AUTO" %in% labels) {
count <- 0
labels <- c()
for (idx in seq_along(plots)) {
if (!is.null(unlist(plots[idx]))) {
count <- count + 1
labels <- c(labels, LETTERS[count])
} else {
labels <- c(labels, "")
}
}
} else if ("auto" %in% labels) {
count <- 0
labels <- c()
for (idx in seq_along(plots)) {
if (!is.null(unlist(plots[idx]))) {
count <- count + 1
labels <- c(labels, letters[count])
} else {
labels <- c(labels, "")
}
}
}
# label adjustments can be provided globally for all labels
# or individually for each label
hjust <- rep_len(hjust, length(labels))
vjust <- rep_len(vjust, length(labels))
label_x <- rep_len(label_x, length(labels))
label_y <- rep_len(label_y, length(labels))
# calculate appropriate vectors of rel. heights and widths
rel_heights <- rep(rel_heights, length.out = rows)
rel_widths <- rep(rel_widths, length.out = cols)
# calculate the appropriate coordinates and deltas for each row and column
x_deltas <- rel_widths/sum(rel_widths)
y_deltas <- rel_heights/sum(rel_heights)
xs <- cumsum(rel_widths)/sum(rel_widths) - x_deltas
ys <- 1 - cumsum(rel_heights)/sum(rel_heights)
# now place all the plots
p <- ggdraw() # start with nothing
col_count <- 0
row_count <- 1
for (i in 1:(rows*cols)){
if (i > num_plots) break
x_delta <- x_deltas[col_count+1]
y_delta <- y_deltas[row_count]
x <- xs[col_count+1]
y <- ys[row_count]
# place the plot
p_next <- grobs[[i]]
if (!is.null(p_next)){
p <- p + draw_grob(p_next, x, y, x_delta, y_delta, scale[i])
}
# place a label if we have one
if (i <= length(labels)){
p <- p + draw_plot_label(labels[i], x + label_x[i]*x_delta, y + label_y[i]*y_delta, size = label_size,
family = label_fontfamily, fontface = label_fontface, colour = label_colour,
hjust = hjust[i], vjust = vjust[i])
}
# move on to next grid position
col_count <- col_count + 1
if (col_count >= cols){
col_count <- 0
row_count <- row_count + 1
}
}
p
}
library(ggplot2)
library(cowplot)
df <- data.frame(
x = 1:10, y1 = 1:10, y2 = (1:10)^2, y3 = (1:10)^3, y4 = (1:10)^4
)
p1 <- ggplot(df, aes(x, y1)) + geom_point()
p2 <- ggplot(df, aes(x, y2)) + geom_point()
p3 <- ggplot(df, aes(x, y3)) + geom_point()
p4 <- ggplot(df, aes(x, y4)) + geom_point()
p5 <- ggplot(mpg, aes(as.factor(year), hwy)) +
geom_boxplot() +
facet_wrap(~class, scales = "free_y")
# simple grid
plot_grid_modified(p1, NULL, p3, p4, labels = "AUTO")
I am trying to align three plots (with different scales on the y-axis) on the left y-axis. In other words, I would like the red axis to be aligned:
However, the y-axis of the first plot does not align with the y-axis of the bottom left plot.
Code
# Libraries
library(tidyverse)
library(cowplot)
df1 <- data.frame(x = seq(0, 100, 1),
y = seq(100, 0, -1))
df2 <- data.frame(x = seq(0, 10, 0.1),
y = seq(1, 10^9, length.out = 101 ) )
p1 <- ggplot(data = df1) +
geom_line(aes(x = x, y = y))
p2 <- ggplot(data = df2) +
geom_line(aes(x = x, y = y))
combi_p2 <- plot_grid(p2, p2, nrow = 1)
plot_grid(p1, combi_p2, ncol = 1, axis = "l", align = "v")
Attempt to fix it
Using the information provided here, I rewrote the last part of the code:
require(grid) # for unit.pmax()
p1 <- ggplotGrob(p1) # convert to gtable
combi_p2 <- ggplotGrob(combi_p2) # convert to gtable
p1.widths <- p1$widths[1:3] # extract the first three widths,
# corresponding to left margin, y lab, and y axis
combi_p2.widths <- combi_p2$widths[1:3] # same for combi_p2 plot
max.widths <- unit.pmax(p1.widths, combi_p2.widths) # calculate maximum widths
p1$widths[1:3] <- max.widths # assign max. widths to p1 gtable
combi_p2$widths[1:3] <- max.widths # assign max widths to combi_p2 gtable
# plot_grid() can work directly with gtables, so this works
plot_grid(p1, combi_p2, labels = "AUTO", ncol = 1)
Sadly, I was not able to fix the alignment:
Question
How do I align the y-axis of the top plot with the left bottom plot using cowplot in R?
I think you can use ggplotGrob and put them together with gtable_rbind and gtable_cbind. Finally, you can draw the plot with grid.draw()
df1 <- data.frame(x = seq(0, 100, 1),
y = seq(100, 0, -1))
df2 <- data.frame(x = seq(0, 10, 0.1),
y = seq(1, 10^9, length.out = 101 ) )
p1 <- ggplot(data = df1) +
geom_line(aes(x = x, y = y))
p2 <- ggplot(data = df2) +
geom_line(aes(x = x, y = y))
g1 <- ggplotGrob(p1)
g2 <- ggplotGrob(p2)
frame_g2 <- gtable_frame(g2 , debug = TRUE)
frame_combi <- gtable_frame(gtable_cbind(frame_g2,frame_g2),
width = unit(2, "null"),
height = unit(1, "null"))
frame_g1 <-
gtable_frame(
g1,
width = unit(1, "null"),
height = unit(1, "null"),
debug = TRUE
)
grid.newpage()
all_frames <- gtable_rbind(frame_g1, frame_combi)
grid.draw(all_frames)
And this is how the plot looks.
A cowplot solution by Claus O. Wilke is presented here.
It is based on the align_plot function, which first aligns the top plot with the left bottom plot along the y-axis. Then the aligned plots are passed to the plot_grid function.
# Libraries
library(tidyverse)
library(cowplot)
df1 <- data.frame(x = seq(0, 100, 1),
y = seq(100, 0, -1))
df2 <- data.frame(x = seq(0, 10, 0.1),
y = seq(1, 10^9, length.out = 101 ) )
p1 <- ggplot(data = df1) +
geom_line(aes(x = x, y = y))
p2 <- ggplot(data = df2) +
geom_line(aes(x = x, y = y))
plots <- align_plots(p1, p2, align = 'v', axis = 'l')
bottom_row <- plot_grid(plots[[2]], p2, nrow = 1)
plot_grid(plots[[1]], bottom_row, ncol = 1)
I need to reproduce plots generated in InDesign in ggplot for reproducibility.
In this particular example, I have two plots that are combined into one composite plot (I have used the package {patchwork} for this).
I then need to overlay lines joining key points on one plot with the corresponding points on the bottom plot.
The two plots are generated from the same data, have the same x-axis values, but different y-axis values.
I have seen these examples on Stack Overflow, but these deal with drawing lines across facets, which doesn't work here as I'm attempting to draw lines across separate plots:
ggplot, drawing multiple lines across facets
ggplot, drawing line between points across facets
I've tried several approaches, and my closest so far has been to:
Add the lines with grobs using {grid} package
Convert the second plot to a gtable using {gtable} and set the clip of the panel to off so that I can extend the lines upwards beyond the panel of the plot.
Combine the plots again into a single image with {patchwork}.
The problem comes in the last step as the x-axes now do not line up anymore as they did before adding the lines and setting the clip to off (see example in code).
I have also tried combining the plots with ggarrange, {cowplot} and {egg} and {patchwork} comes the closest.
Following is my attempt at the best minimal reprex I can create, but still capturing the nuances of what it is I want to achieve.
library(ggplot2)
library(dplyr)
library(tidyr)
library(patchwork)
library(gtable)
library(grid)
# DATA
x <- 1:20
data <- data.frame(
quantity = x,
curve1 = 10 + 50*exp(-0.2 * x),
curve2 = 5 + 50*exp(-0.5 * x),
profit = c(seq(10, 100, by = 10),
seq(120, -240, by = -40))
)
data_long <- data %>%
gather(key = "variable", value = "value", -quantity)
# POINTS AND LINES
POINTS <- data.frame(
label = c("B", "C"),
quantity = c(5, 10),
value = c(28.39397, 16.76676),
profit = c(50, 100)
)
GROB <- linesGrob()
# Set maximum y-value to extend lines to outside of plot area
GROB_MAX <- 200
# BASE PLOTS
# Plot 1
p1 <- data_long %>%
filter(variable != "profit") %>%
ggplot(aes(x = quantity, y = value)) +
geom_line(aes(color = variable)) +
labs(x = "") +
coord_cartesian(xlim = c(0, 20), ylim = c(0, 30), expand = FALSE) +
theme(legend.justification = "top")
p1
# Plot 2
p2 <- data_long %>%
filter(variable == "profit") %>%
ggplot(aes(x = quantity, y = value)) +
geom_line(color = "darkgreen") +
coord_cartesian(xlim = c(0, 20), ylim = c(-100, 120), expand = FALSE) +
theme(legend.position = "none")
p2
# PANEL A
panel_A <- p1 + p2 + plot_layout(ncol = 1)
panel_A
# PANEL B
# ATTEMPT - adding grobs to plot 1 that end at x-axis of p1
p1 <- p1 +
annotation_custom(GROB,
xmin = 0,
xmax = POINTS$quantity[POINTS$label == "B"],
ymin = POINTS$value[POINTS$label == "B"],
ymax = POINTS$value[POINTS$label == "B"]) +
annotation_custom(GROB,
xmin = POINTS$quantity[POINTS$label == "B"],
xmax = POINTS$quantity[POINTS$label == "B"],
ymin = 0,
ymax = POINTS$value[POINTS$label == "B"]) +
geom_point(data = POINTS %>% filter(label == "B"), size = 1)
# ATTEMPT - adding grobs to plot 2 that extend up to meet plot 1
p2 <- p2 + annotation_custom(GROB,
xmin = POINTS$quantity[POINTS$label == "B"],
xmax = POINTS$quantity[POINTS$label == "B"],
ymin = POINTS$profit[POINTS$label == "B"],
ymax = GROB_MAX)
# Create gtable from ggplot
g2 <- ggplotGrob(p2)
# Turn clip off for panel so that line can extend above
g2$layout$clip[g2$layout$name == "panel"] <- "off"
panel_B <- p1 + g2 + plot_layout(ncol = 1)
panel_B
# Problems:
# 1. Note the shift in axes when turning the clip off so now they do not line up anymore.
# 2. Turning the clip off mean plot 2 extends below the axis. Tried experimenting with various clips.
The expectation is that the plots in panel_B should still appear as they do in panel_A but have the joining lines linking points between the plots.
I am looking for help with solving the above, or else, alternative approaches to try out.
As a reference without running the code above - links to images as I can't post them.
Panel A
Panel B: What it currently looks like
Panel B: What I want it to look like!
My solution is a little ad hoc, but it seems to work. I based it on the following previous answer Left align two graph edges (ggplot).
I will break the solution in three parts to address some of the issues you were facing separately.
The solution that matches what you want is the third one!
First trial
Here I get the axis aligned using the same approach as this answer Left align two graph edges (ggplot).
# first trial
# plots are aligned but line in bottom plot extends to the bottom
#
p1_1 <- p1 +
annotation_custom(GROB,
xmin = 0,
xmax = POINTS$quantity[POINTS$label == "B"],
ymin = POINTS$value[POINTS$label == "B"],
ymax = POINTS$value[POINTS$label == "B"]) +
annotation_custom(GROB,
xmin = POINTS$quantity[POINTS$label == "B"],
xmax = POINTS$quantity[POINTS$label == "B"],
ymin = 0,
ymax = POINTS$value[POINTS$label == "B"]) +
geom_point(data = POINTS %>% filter(label == "B"), size = 1)
p2_1 <- p2 + annotation_custom(GROB,
xmin = POINTS$quantity[POINTS$label == "B"],
xmax = POINTS$quantity[POINTS$label == "B"],
ymin = POINTS$profit[POINTS$label == "B"],
ymax = GROB_MAX)
# Create gtable from ggplot
gA <- ggplotGrob(p1_1)
gB <- ggplotGrob(p2_1)
# Turn clip off for panel so that line can extend above
gB$layout$clip[gB$layout$name == "panel"] <- "off"
# get max width of left axis between both plots
maxWidth = grid::unit.pmax(gA$widths[2:5], gB$widths[2:5])
# set maxWidth to both plots (to align left axis)
gA$widths[2:5] <- as.list(maxWidth)
gB$widths[2:5] <- as.list(maxWidth)
# now apply all widths from plot A to plot B
# (this is specific to your case because we know plot A is the one with the legend)
gB$widths <- gA$widths
grid.arrange(gA, gB, ncol=1)
Second trial
The problem now is that the line in the bottom plot extends beyond the plot area. One way to deal with this is to change coord_cartesian() to scale_y_continuous() and scale_x_continuous() because this will remove data that falls out of the plot area.
# second trial
# using scale_y_continuous and scale_x_continuous to remove data out of plot limits
# (this could resolve the problem of the bottom plot, but creates another problem)
#
p1_2 <- p1_1
p2_2 <- data_long %>%
filter(variable == "profit") %>%
ggplot(aes(x = quantity, y = value)) +
geom_line(color = "darkgreen") +
scale_x_continuous(limits = c(0, 20), expand = c(0, 0)) +
scale_y_continuous(limits=c(-100, 120), expand=c(0,0)) +
theme(legend.position = "none") +
annotation_custom(GROB,
xmin = POINTS$quantity[POINTS$label == "B"],
xmax = POINTS$quantity[POINTS$label == "B"],
ymin = POINTS$profit[POINTS$label == "B"],
ymax = GROB_MAX)
# Create gtable from ggplot
gA <- ggplotGrob(p1_2)
gB <- ggplotGrob(p2_2)
# Turn clip off for panel so that line can extend above
gB$layout$clip[gB$layout$name == "panel"] <- "off"
# get max width of left axis between both plots
maxWidth = grid::unit.pmax(gA$widths[2:5], gB$widths[2:5])
# set maxWidth to both plots (to align left axis)
gA$widths[2:5] <- as.list(maxWidth)
gB$widths[2:5] <- as.list(maxWidth)
# now apply all widths from plot A to plot B
# (this is specific to your case because we know plot A is the one with the legend)
gB$widths <- gA$widths
# but now the line does not go all the way to the bottom y axis
grid.arrange(gA, gB, ncol=1)
Third trial
The problem now is that the line does not extend all the way to the bottom of the y-axis (because the point below y=-100 was removed). The way I solved this (very ad hoc) was to interpolate the point at y=-100 and add this to the data frame.
# third trial
# modify the data set so value data stops at bottom of plot
#
p1_3 <- p1_1
# use approx() function to interpolate value of x when y value == -100
xvalue <- approx(x=data_long$value, y=data_long$quantity, xout=-100)$y
p2_3 <- data_long %>%
filter(variable == "profit") %>%
# add row with interpolated point!
rbind(data.frame(quantity=xvalue, variable = "profit", value=-100)) %>%
ggplot(aes(x = quantity, y = value)) +
geom_line(color = "darkgreen") +
scale_x_continuous(limits = c(0, 20), expand = c(0, 0)) +
scale_y_continuous(limits=c(-100, 120), expand=c(0,0)) +
theme(legend.position = "none") +
annotation_custom(GROB,
xmin = POINTS$quantity[POINTS$label == "B"],
xmax = POINTS$quantity[POINTS$label == "B"],
ymin = POINTS$profit[POINTS$label == "B"],
ymax = GROB_MAX)
# Create gtable from ggplot
gA <- ggplotGrob(p1_3)
gB <- ggplotGrob(p2_3)
# Turn clip off for panel so that line can extend above
gB$layout$clip[gB$layout$name == "panel"] <- "off"
# get max width of left axis between both plots
maxWidth = grid::unit.pmax(gA$widths[2:5], gB$widths[2:5])
# set maxWidth to both plots (to align left axis)
gA$widths[2:5] <- as.list(maxWidth)
gB$widths[2:5] <- as.list(maxWidth)
# now apply all widths from plot A to plot B
# (this is specific to your case because we know plot A is the one with the legend)
gB$widths <- gA$widths
# Now line goes all the way to the bottom y axis
grid.arrange(gA, gB, ncol=1)
This makes use of facet_grid to force the x-axis to match.
grobbing_lines <- tribble(
~facet, ~x, ~xend, ~y, ~yend,
'profit', 5, 5, 50, Inf,
# 'curve', 5, 5, -Inf, 28.39397
'curve', -Inf, 5, 28.39397, 28.39397
)
grobbing_points <- tribble(
~facet, ~x, ~y,
'curve', 5, 28.39397
)
data_long_facet <- data_long%>%
mutate(facet = if_else(variable == 'profit', 'profit', 'curve'))
p <- ggplot(data_long_facet, aes(x = quantity, y = value)) +
geom_line(aes(color = variable))+
facet_grid(rows = vars(facet), scales = 'free_y')+
geom_segment(data = grobbing_lines, aes(x = x, xend = xend, y = y, yend = yend),inherit.aes = F)+
geom_point(data = grobbing_points, aes(x = x, y = y), size = 3, inherit.aes = F)
pb <- ggplot_build(p)
pg <- ggplot_gtable(pb)
#formulas to determine points in x and y locations
data2npc <- function(x, panel = 1L, axis = "x") {
range <- pb$layout$panel_params[[panel]][[paste0(axis,".range")]]
scales::rescale(c(range, x), c(0,1))[-c(1,2)]
}
data_y_2npc <- function(y, panel, axis = 'y') {
range <- pb$layout$panel_params[[panel]][[paste0(axis,".range")]]
scales::rescale(c(range, y), c(0,1))[-c(1,2)]
}
# add the new grob
pg <- gtable_add_grob(pg,
segmentsGrob(x0 = data2npc(5),
x1 = data2npc(5),
y0=data_y_2npc(50, panel = 2)/2,
y1 = data_y_2npc(28.39397, panel = 1L)+ 0.25) ,
t = 7, b = 9, l = 5)
#print to page
grid.newpage()
grid.draw(pg)
The legend and the scales are what do not match your intended output.
I would like to draw a hollow histogram that has no vertical bars drawn inside of it, but just an outline. I couldn't find any way to do it with geom_histogram. The geom_step+stat_bin combination seemed like it could do the job. However, the bins of geom_step+stat_bin are shifted by a half bin either to the right or to the left, depending on the step's direction= parameter value. It seems like it is doing its "steps" WRT bin centers. Is there any way to change this behavior so it would do the "steps" at bin edges?
Here's an illustration:
d <- data.frame(x=rnorm(1000))
qplot(x, data=d, geom="histogram",
breaks=seq(-4,4,by=.5), color=I("red"), fill = I("transparent")) +
geom_step(stat="bin", breaks=seq(-4,4,by=.5), color="black", direction="vh")
I propose making a new Geom like so:
library(ggplot2)
library(proto)
geom_stephist <- function(mapping = NULL, data = NULL, stat="bin", position="identity", ...) {
GeomStepHist$new(mapping=mapping, data=data, stat=stat, position=position, ...)
}
GeomStepHist <- proto(ggplot2:::Geom, {
objname <- "stephist"
default_stat <- function(.) StatBin
default_aes <- function(.) aes(colour="black", size=0.5, linetype=1, alpha = NA)
reparameterise <- function(., df, params) {
transform(df,
ymin = pmin(y, 0), ymax = pmax(y, 0),
xmin = x - width / 2, xmax = x + width / 2, width = NULL
)
}
draw <- function(., data, scales, coordinates, ...) {
data <- as.data.frame(data)[order(data$x), ]
n <- nrow(data)
i <- rep(1:n, each=2)
newdata <- rbind(
transform(data[1, ], x=xmin, y=0),
transform(data[i, ], x=c(rbind(data$xmin, data$xmax))),
transform(data[n, ], x=xmax, y=0)
)
rownames(newdata) <- NULL
GeomPath$draw(newdata, scales, coordinates, ...)
}
guide_geom <- function(.) "path"
})
This also works for non-uniform breaks. To illustrate the usage:
d <- data.frame(x=runif(1000, -5, 5))
ggplot(d, aes(x)) +
geom_histogram(breaks=seq(-4,4,by=.5), color="red", fill=NA) +
geom_stephist(breaks=seq(-4,4,by=.5), color="black")
This isn't ideal, but it's the best I can come up with:
h <- hist(d$x,breaks=seq(-4,4,by=.5))
d1 <- data.frame(x = h$breaks,y = c(h$counts,NA))
ggplot() +
geom_histogram(data = d,aes(x = x),breaks = seq(-4,4,by=.5),
color = "red",fill = "transparent") +
geom_step(data = d1,aes(x = x,y = y),stat = "identity")
Yet another one. Use ggplot_build to build a plot object of the histogram for rendering. From this object x and y values are extracted, to be used for geom_step. Use by to offset x values.
by <- 0.5
p1 <- ggplot(data = d, aes(x = x)) +
geom_histogram(breaks = seq(from = -4, to = 4, by = by),
color = "red", fill = "transparent")
df <- ggplot_build(p1)$data[[1]][ , c("x", "y")]
p1 +
geom_step(data = df, aes(x = x - by/2, y = y))
Edit following comment from #Vadim Khotilovich (Thanks!)
The xmin from the plot object can be used instead (-> no need for offset adjustment)
df <- ggplot_build(p1)$data[[1]][ , c("xmin", "y")]
p1 +
geom_step(data = df, aes(x = xmin, y = y))
An alternative, also less than ideal:
qplot(x, data=d, geom="histogram", breaks=seq(-4,4,by=.5), color=I("red"), fill = I("transparent")) +
stat_summary(aes(x=round(x * 2 - .5) / 2, y=1), fun.y=length, geom="step")
Missing some bins that you can probably add back if you mess around a bit. Only (somewhat meaningless) advantage is it is more in ggplot than #Joran's answer, though even that is debatable.
I answer my own comment earlier today: here is a modified version of #RosenMatev's answer updated for the v2 (ggplot2_2.0.0) using ggproto:
GeomStepHist <- ggproto("GeomStepHist", GeomPath,
required_aes = c("x"),
draw_panel = function(data, panel_scales, coord, direction) {
data <- as.data.frame(data)[order(data$x), ]
n <- nrow(data)
i <- rep(1:n, each=2)
newdata <- rbind(
transform(data[1, ], x=x - width/2, y=0),
transform(data[i, ], x=c(rbind(data$x-data$width/2, data$x+data$width/2))),
transform(data[n, ], x=x + width/2, y=0)
)
rownames(newdata) <- NULL
GeomPath$draw_panel(newdata, panel_scales, coord)
}
)
geom_step_hist <- function(mapping = NULL, data = NULL, stat = "bin",
direction = "hv", position = "stack", na.rm = FALSE,
show.legend = NA, inherit.aes = TRUE, ...) {
layer(
data = data,
mapping = mapping,
stat = stat,
geom = GeomStepHist,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
direction = direction,
na.rm = na.rm,
...
)
)
}
TLDR: use geom_step(..., direction = "mid")
This has become much easier since Daniel Mastropietro and Dewey Dunnington implemented the "mid" as an additional option for the direction argument of geom_step for ggplot2 v3.3.0:
library(ggplot2)
set.seed(1)
d <- data.frame(x = rnorm(1000))
ggplot(d, aes(x)) +
geom_histogram(breaks = seq(-4, 4, by=.5), color="red", fill = "transparent") +
geom_step(stat="bin", breaks=seq(-4, 4, by=.5), color = "black", direction = "mid")
Below, for reference, the code from the question formatted like above answer:
ggplot(d, aes(x)) +
geom_histogram(breaks = seq(-4, 4, by=.5), color = "red", fill = "transparent") +
geom_step(stat="bin", breaks = seq(-4, 4, by=.5), color = "black", direction = "vh")
Created on 2020-09-02 by the reprex package (v0.3.0)
a simple way to do something similar to #Rosen Matev (that does not work with ggplot2_2.0.0 as mentioned by #julou), I would just
1) calculate manually the value of the bins (using a small function as shown below)
2) use geom_step()
Hope this helps !
geom_step_hist<- function(d,binw){
dd=NULL
bin=min(d$y) # this enables having a first value that is = 0 (to have the left vertical bar of the plot when using geom_step)
max=max(d$y)+binw*2 # this enables having a last value that is = 0 (to have the right vertical bar of the plot when using geom_step)
xx=NULL
yy=NULL
while(bin<=max){
n=length(temp$y[which(temp$y<bin & temp$y>=(bin-binw))])
yy=c(yy,n)
xx=c(xx,bin-binw)
bin=bin+binw
rm(n)
}
dd=data.frame(xx,yy)
return(dd)
}
hist=ggplot(dd,aes(x=xx,y=yy))+
geom_step()
I would like to draw a hollow histogram that has no vertical bars drawn inside of it, but just an outline. I couldn't find any way to do it with geom_histogram. The geom_step+stat_bin combination seemed like it could do the job. However, the bins of geom_step+stat_bin are shifted by a half bin either to the right or to the left, depending on the step's direction= parameter value. It seems like it is doing its "steps" WRT bin centers. Is there any way to change this behavior so it would do the "steps" at bin edges?
Here's an illustration:
d <- data.frame(x=rnorm(1000))
qplot(x, data=d, geom="histogram",
breaks=seq(-4,4,by=.5), color=I("red"), fill = I("transparent")) +
geom_step(stat="bin", breaks=seq(-4,4,by=.5), color="black", direction="vh")
I propose making a new Geom like so:
library(ggplot2)
library(proto)
geom_stephist <- function(mapping = NULL, data = NULL, stat="bin", position="identity", ...) {
GeomStepHist$new(mapping=mapping, data=data, stat=stat, position=position, ...)
}
GeomStepHist <- proto(ggplot2:::Geom, {
objname <- "stephist"
default_stat <- function(.) StatBin
default_aes <- function(.) aes(colour="black", size=0.5, linetype=1, alpha = NA)
reparameterise <- function(., df, params) {
transform(df,
ymin = pmin(y, 0), ymax = pmax(y, 0),
xmin = x - width / 2, xmax = x + width / 2, width = NULL
)
}
draw <- function(., data, scales, coordinates, ...) {
data <- as.data.frame(data)[order(data$x), ]
n <- nrow(data)
i <- rep(1:n, each=2)
newdata <- rbind(
transform(data[1, ], x=xmin, y=0),
transform(data[i, ], x=c(rbind(data$xmin, data$xmax))),
transform(data[n, ], x=xmax, y=0)
)
rownames(newdata) <- NULL
GeomPath$draw(newdata, scales, coordinates, ...)
}
guide_geom <- function(.) "path"
})
This also works for non-uniform breaks. To illustrate the usage:
d <- data.frame(x=runif(1000, -5, 5))
ggplot(d, aes(x)) +
geom_histogram(breaks=seq(-4,4,by=.5), color="red", fill=NA) +
geom_stephist(breaks=seq(-4,4,by=.5), color="black")
This isn't ideal, but it's the best I can come up with:
h <- hist(d$x,breaks=seq(-4,4,by=.5))
d1 <- data.frame(x = h$breaks,y = c(h$counts,NA))
ggplot() +
geom_histogram(data = d,aes(x = x),breaks = seq(-4,4,by=.5),
color = "red",fill = "transparent") +
geom_step(data = d1,aes(x = x,y = y),stat = "identity")
Yet another one. Use ggplot_build to build a plot object of the histogram for rendering. From this object x and y values are extracted, to be used for geom_step. Use by to offset x values.
by <- 0.5
p1 <- ggplot(data = d, aes(x = x)) +
geom_histogram(breaks = seq(from = -4, to = 4, by = by),
color = "red", fill = "transparent")
df <- ggplot_build(p1)$data[[1]][ , c("x", "y")]
p1 +
geom_step(data = df, aes(x = x - by/2, y = y))
Edit following comment from #Vadim Khotilovich (Thanks!)
The xmin from the plot object can be used instead (-> no need for offset adjustment)
df <- ggplot_build(p1)$data[[1]][ , c("xmin", "y")]
p1 +
geom_step(data = df, aes(x = xmin, y = y))
An alternative, also less than ideal:
qplot(x, data=d, geom="histogram", breaks=seq(-4,4,by=.5), color=I("red"), fill = I("transparent")) +
stat_summary(aes(x=round(x * 2 - .5) / 2, y=1), fun.y=length, geom="step")
Missing some bins that you can probably add back if you mess around a bit. Only (somewhat meaningless) advantage is it is more in ggplot than #Joran's answer, though even that is debatable.
I answer my own comment earlier today: here is a modified version of #RosenMatev's answer updated for the v2 (ggplot2_2.0.0) using ggproto:
GeomStepHist <- ggproto("GeomStepHist", GeomPath,
required_aes = c("x"),
draw_panel = function(data, panel_scales, coord, direction) {
data <- as.data.frame(data)[order(data$x), ]
n <- nrow(data)
i <- rep(1:n, each=2)
newdata <- rbind(
transform(data[1, ], x=x - width/2, y=0),
transform(data[i, ], x=c(rbind(data$x-data$width/2, data$x+data$width/2))),
transform(data[n, ], x=x + width/2, y=0)
)
rownames(newdata) <- NULL
GeomPath$draw_panel(newdata, panel_scales, coord)
}
)
geom_step_hist <- function(mapping = NULL, data = NULL, stat = "bin",
direction = "hv", position = "stack", na.rm = FALSE,
show.legend = NA, inherit.aes = TRUE, ...) {
layer(
data = data,
mapping = mapping,
stat = stat,
geom = GeomStepHist,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
direction = direction,
na.rm = na.rm,
...
)
)
}
TLDR: use geom_step(..., direction = "mid")
This has become much easier since Daniel Mastropietro and Dewey Dunnington implemented the "mid" as an additional option for the direction argument of geom_step for ggplot2 v3.3.0:
library(ggplot2)
set.seed(1)
d <- data.frame(x = rnorm(1000))
ggplot(d, aes(x)) +
geom_histogram(breaks = seq(-4, 4, by=.5), color="red", fill = "transparent") +
geom_step(stat="bin", breaks=seq(-4, 4, by=.5), color = "black", direction = "mid")
Below, for reference, the code from the question formatted like above answer:
ggplot(d, aes(x)) +
geom_histogram(breaks = seq(-4, 4, by=.5), color = "red", fill = "transparent") +
geom_step(stat="bin", breaks = seq(-4, 4, by=.5), color = "black", direction = "vh")
Created on 2020-09-02 by the reprex package (v0.3.0)
a simple way to do something similar to #Rosen Matev (that does not work with ggplot2_2.0.0 as mentioned by #julou), I would just
1) calculate manually the value of the bins (using a small function as shown below)
2) use geom_step()
Hope this helps !
geom_step_hist<- function(d,binw){
dd=NULL
bin=min(d$y) # this enables having a first value that is = 0 (to have the left vertical bar of the plot when using geom_step)
max=max(d$y)+binw*2 # this enables having a last value that is = 0 (to have the right vertical bar of the plot when using geom_step)
xx=NULL
yy=NULL
while(bin<=max){
n=length(temp$y[which(temp$y<bin & temp$y>=(bin-binw))])
yy=c(yy,n)
xx=c(xx,bin-binw)
bin=bin+binw
rm(n)
}
dd=data.frame(xx,yy)
return(dd)
}
hist=ggplot(dd,aes(x=xx,y=yy))+
geom_step()