I would like to draw a hollow histogram that has no vertical bars drawn inside of it, but just an outline. I couldn't find any way to do it with geom_histogram. The geom_step+stat_bin combination seemed like it could do the job. However, the bins of geom_step+stat_bin are shifted by a half bin either to the right or to the left, depending on the step's direction= parameter value. It seems like it is doing its "steps" WRT bin centers. Is there any way to change this behavior so it would do the "steps" at bin edges?
Here's an illustration:
d <- data.frame(x=rnorm(1000))
qplot(x, data=d, geom="histogram",
breaks=seq(-4,4,by=.5), color=I("red"), fill = I("transparent")) +
geom_step(stat="bin", breaks=seq(-4,4,by=.5), color="black", direction="vh")
I propose making a new Geom like so:
library(ggplot2)
library(proto)
geom_stephist <- function(mapping = NULL, data = NULL, stat="bin", position="identity", ...) {
GeomStepHist$new(mapping=mapping, data=data, stat=stat, position=position, ...)
}
GeomStepHist <- proto(ggplot2:::Geom, {
objname <- "stephist"
default_stat <- function(.) StatBin
default_aes <- function(.) aes(colour="black", size=0.5, linetype=1, alpha = NA)
reparameterise <- function(., df, params) {
transform(df,
ymin = pmin(y, 0), ymax = pmax(y, 0),
xmin = x - width / 2, xmax = x + width / 2, width = NULL
)
}
draw <- function(., data, scales, coordinates, ...) {
data <- as.data.frame(data)[order(data$x), ]
n <- nrow(data)
i <- rep(1:n, each=2)
newdata <- rbind(
transform(data[1, ], x=xmin, y=0),
transform(data[i, ], x=c(rbind(data$xmin, data$xmax))),
transform(data[n, ], x=xmax, y=0)
)
rownames(newdata) <- NULL
GeomPath$draw(newdata, scales, coordinates, ...)
}
guide_geom <- function(.) "path"
})
This also works for non-uniform breaks. To illustrate the usage:
d <- data.frame(x=runif(1000, -5, 5))
ggplot(d, aes(x)) +
geom_histogram(breaks=seq(-4,4,by=.5), color="red", fill=NA) +
geom_stephist(breaks=seq(-4,4,by=.5), color="black")
This isn't ideal, but it's the best I can come up with:
h <- hist(d$x,breaks=seq(-4,4,by=.5))
d1 <- data.frame(x = h$breaks,y = c(h$counts,NA))
ggplot() +
geom_histogram(data = d,aes(x = x),breaks = seq(-4,4,by=.5),
color = "red",fill = "transparent") +
geom_step(data = d1,aes(x = x,y = y),stat = "identity")
Yet another one. Use ggplot_build to build a plot object of the histogram for rendering. From this object x and y values are extracted, to be used for geom_step. Use by to offset x values.
by <- 0.5
p1 <- ggplot(data = d, aes(x = x)) +
geom_histogram(breaks = seq(from = -4, to = 4, by = by),
color = "red", fill = "transparent")
df <- ggplot_build(p1)$data[[1]][ , c("x", "y")]
p1 +
geom_step(data = df, aes(x = x - by/2, y = y))
Edit following comment from #Vadim Khotilovich (Thanks!)
The xmin from the plot object can be used instead (-> no need for offset adjustment)
df <- ggplot_build(p1)$data[[1]][ , c("xmin", "y")]
p1 +
geom_step(data = df, aes(x = xmin, y = y))
An alternative, also less than ideal:
qplot(x, data=d, geom="histogram", breaks=seq(-4,4,by=.5), color=I("red"), fill = I("transparent")) +
stat_summary(aes(x=round(x * 2 - .5) / 2, y=1), fun.y=length, geom="step")
Missing some bins that you can probably add back if you mess around a bit. Only (somewhat meaningless) advantage is it is more in ggplot than #Joran's answer, though even that is debatable.
I answer my own comment earlier today: here is a modified version of #RosenMatev's answer updated for the v2 (ggplot2_2.0.0) using ggproto:
GeomStepHist <- ggproto("GeomStepHist", GeomPath,
required_aes = c("x"),
draw_panel = function(data, panel_scales, coord, direction) {
data <- as.data.frame(data)[order(data$x), ]
n <- nrow(data)
i <- rep(1:n, each=2)
newdata <- rbind(
transform(data[1, ], x=x - width/2, y=0),
transform(data[i, ], x=c(rbind(data$x-data$width/2, data$x+data$width/2))),
transform(data[n, ], x=x + width/2, y=0)
)
rownames(newdata) <- NULL
GeomPath$draw_panel(newdata, panel_scales, coord)
}
)
geom_step_hist <- function(mapping = NULL, data = NULL, stat = "bin",
direction = "hv", position = "stack", na.rm = FALSE,
show.legend = NA, inherit.aes = TRUE, ...) {
layer(
data = data,
mapping = mapping,
stat = stat,
geom = GeomStepHist,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
direction = direction,
na.rm = na.rm,
...
)
)
}
TLDR: use geom_step(..., direction = "mid")
This has become much easier since Daniel Mastropietro and Dewey Dunnington implemented the "mid" as an additional option for the direction argument of geom_step for ggplot2 v3.3.0:
library(ggplot2)
set.seed(1)
d <- data.frame(x = rnorm(1000))
ggplot(d, aes(x)) +
geom_histogram(breaks = seq(-4, 4, by=.5), color="red", fill = "transparent") +
geom_step(stat="bin", breaks=seq(-4, 4, by=.5), color = "black", direction = "mid")
Below, for reference, the code from the question formatted like above answer:
ggplot(d, aes(x)) +
geom_histogram(breaks = seq(-4, 4, by=.5), color = "red", fill = "transparent") +
geom_step(stat="bin", breaks = seq(-4, 4, by=.5), color = "black", direction = "vh")
Created on 2020-09-02 by the reprex package (v0.3.0)
a simple way to do something similar to #Rosen Matev (that does not work with ggplot2_2.0.0 as mentioned by #julou), I would just
1) calculate manually the value of the bins (using a small function as shown below)
2) use geom_step()
Hope this helps !
geom_step_hist<- function(d,binw){
dd=NULL
bin=min(d$y) # this enables having a first value that is = 0 (to have the left vertical bar of the plot when using geom_step)
max=max(d$y)+binw*2 # this enables having a last value that is = 0 (to have the right vertical bar of the plot when using geom_step)
xx=NULL
yy=NULL
while(bin<=max){
n=length(temp$y[which(temp$y<bin & temp$y>=(bin-binw))])
yy=c(yy,n)
xx=c(xx,bin-binw)
bin=bin+binw
rm(n)
}
dd=data.frame(xx,yy)
return(dd)
}
hist=ggplot(dd,aes(x=xx,y=yy))+
geom_step()
Related
I have a geom_foo() which will do some transformation of the input data and I have a scale transformation. My problem is that these work not as I would expect together with other geom_*s in terms of scaling.
To illustrate the behavior, consider foo() which will be used in the setup_data method of GeomFoo, defined at the end of the question.
foo <- function(x, y) {
data.frame(
x = x + 2,
y = y + 2
)
}
foo(1, 1)
The transformer is:
foo_trans <- scales::trans_new(
name = "foo",
transform = function(x) x / 5,
inverse = function(x) x * 5
)
Given this input data:
df1 <- data.frame(x = c(1, 2), y = c(1, 2))
Here is a basic plot:
library(ggplot2)
ggplot(df1, aes(x = x, y = y)) +
geom_foo()
When I apply the transformation to the vertical scale, I get this
ggplot(df1, aes(x = x, y = y)) +
geom_foo() +
scale_y_continuous(trans = foo_trans)
What I can say is that the y-axis limits are calculate as 11 = 1 + (2*5) and 12 = 2 + (2*5), where 1 and 2 are df1$y, and (2 * 5) are taken from the setup_data method and from trans_foo.
My real problem is, that I would like add a text layer with labels. These labels and their coordinates come from another dataframe, as below.
df_label <- foo(df1$x, df1$y)
df_label$label <- c("A", "B")
Label and point layers are on same x-y positions without the scale transformation
p <- ggplot(df1, aes(x = x, y = y)) +
geom_foo(color = "red", size = 6) +
geom_text(data = df_label, aes(x, y, label = label))
p
But when I apply the transformation, the coordinates do not match anymore
p +
scale_y_continuous(trans = foo_trans)
How do I get the to layer to match in x-y coordinates after the transformation? Thanks
ggproto object:
GeomFoo <- ggproto("GeomFoo", GeomPoint,
setup_data = function(data, params) {
cols_to_keep <- setdiff(names(data), c("x", "y"))
cbind(
foo(data$x, data$y),
data[, cols_to_keep]
)
}
)
geom constructor:
geom_foo <- function(mapping = NULL, data = NULL, ...,
na.rm = FALSE, show.legend = NA,
inherit.aes = TRUE) {
layer(
data = data,
mapping = mapping,
stat = "identity",
geom = GeomFoo,
position = "identity",
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
na.rm = na.rm,
...
)
)
}
Doing data transformations isn't really the task of a geom, but a task of a stat instead. That said, the larger issue is that scale transformations are applied before the GeomFoo$setup_data() method is called. There are two ways one could accomplish this task that I could see.
Apply foo() before scale transformation. I don't think geoms or stats ever have access to the data before scale transformation. A possible place for this is in the ggplot2:::Layer$setup_layer() method. However, this isn't exported, which probably means the devs would like to discourage this even before we make an attempt.
Inverse the scale transformation, apply foo(), and transform again. For this, you need a method with access to the scales. AFAIK, no geom method has this access. However Stat$compute_panel() does have access, so we can use this.
To give an example of (2), I think you could get away with the following:
StatFoo <- ggproto(
"StatFoo", Stat,
compute_panel = function(self, data, scales) {
cols_to_keep <- setdiff(names(data), c("x", "y"))
food <- foo(scales$x$trans$inverse(data$x),
scales$y$trans$inverse(data$y))
cbind(
data.frame(x = scales$x$trans$transform(food$x),
y = scales$y$trans$transform(food$y)),
data[, cols_to_keep]
)
}
)
geom_foo <- function(mapping = NULL, data = NULL, ...,
na.rm = FALSE, show.legend = NA,
inherit.aes = TRUE) {
layer(
data = data,
mapping = mapping,
stat = StatFoo,
geom = GeomPoint,
position = "identity",
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
na.rm = na.rm,
...
)
)
}
If someone else has brighter ideas to do this, I'd also like to know!
I can plot a function in ggplot2, like so:
library(ggplot2)
ggplot(data.frame(x=0), aes(x)) + geom_function(fun = sin) + xlim(c(-5,5))
Can I use ggplot2's facetting to make the plot for multiple functions, one in each facet? (for example sin and cos)
Looks like you in fact can facet by the function if you feed each layer its own data with the faceting variable specified:
library(ggplot2) # using ggplot2 3.3.5
ggplot(data.frame(x=0), aes(x)) +
geom_function(fun = sin, data = data.frame(x = -5:5, fun_name = "sin")) +
geom_function(fun = cos, data = data.frame(x = -5:5, fun_name = "cos")) +
facet_wrap(~fun_name)
... fun is not an aesthetic ... you can make it one :)
It's a bit of an overkill, but just a quick demonstration what is possible by modifying the Geoms and Stats. The below is a quick hack and I've referenced with quite a lot of ugly :::, which you wouldn't do if you would properly package this. Also this is clearly not properly tested on many use cases. Few more comments in the code.
This was quite ... fun :)
df <- data.frame(x = 0, fun = c("sin", "cos", "tan", "mean"))
ggplot(df, aes(x)) +
stat_function2(aes(fun = fun)) +
xlim(c(-5,5)) +
facet_wrap(~fun, scales = "free_y")
Modifying Geom and Stat - StatFunction2
StatFunction2 <- ggproto(NULL, StatFunction)
## removing fun from the arguments
StatFunction2$compute_group <- function (data, scales, xlim = NULL, n = 101, args = list())
{
if (is.null(scales$x)) {
## need to change that here a bit
range <- rlang::`%||%`(xlim, c(0, 1))
xseq <- seq(range[1], range[2], length.out = n)
x_trans <- xseq
}
else {
## same same
range <- rlang::`%||%`(xlim, scales$x$dimension())
xseq <- seq(range[1], range[2], length.out = n)
if (scales$x$is_discrete()) {
x_trans <- xseq
}
else {
x_trans <- scales$x$trans$inverse(xseq)
}
}
## get the function, this is the trick :)
fun <- unique(data$fun)
if (plyr::is.formula(fun))
fun <- as_function(fun)
y_out <- do.call(fun, c(list(quote(x_trans)), args))
if (!is.null(scales$y) && !scales$y$is_discrete()) {
y_out <- scales$y$trans$transform(y_out)
}
ggplot2:::new_data_frame(list(x = xseq, y = y_out))
}
## update stat_function - remove fun argument and reference new geom_function2
stat_function2 <- function (mapping = NULL, data = NULL, geom = "function2", position = "identity",
..., fun, xlim = NULL, n = 101, args = list(), na.rm = FALSE,
show.legend = NA, inherit.aes = TRUE)
{
if (is.null(data)) {
### those ::: are just for to make it work here
data <- ggplot2:::ensure_nonempty_data
}
layer(data = data, mapping = mapping, stat = StatFunction2,
geom = geom, position = position, show.legend = show.legend,
## fun needs to be removed here too.
inherit.aes = inherit.aes, params = list(n = n,
args = args, na.rm = na.rm, xlim = xlim, ...))
}
## This is the correct way to create copies (children) of ggproto objects
## see https://stackoverflow.com/a/70637511/7941188
GeomFunction2 <- ggproto(NULL, GeomFunction)
## change the required aesthetics - this removes the warning that aesthetics are not known
GeomFunction2$required_aes <- c("x", "y", "fun")
## update the corresponding geom (two locations in this function definition)
geom_function2 <- function (mapping = NULL, data = NULL, stat = "function2", position = "identity",
..., na.rm = FALSE, show.legend = NA, inherit.aes = TRUE)
{
if (is.null(data)) {
data <- ensure_nonempty_data
}
layer(data = data, mapping = mapping, stat = stat, geom = GeomFunction2,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(na.rm = na.rm, ...))
}
Because fun= is not an aesthetic, I think you cannot facet on it. However, you can faux-facet using the patchwork package.
library(ggplot2)
gsin <- ggplot(data.frame(x=0), aes(x)) +
geom_function(fun = sin) +
xlim(c(-5,5)) +
labs(title = "sin()")
gtan <- ggplot(data.frame(x=0), aes(x)) +
geom_function(fun = tan) +
xlim(c(-5,5)) +
labs(title = "tan()")
gsin + gtan
If you prefer the "look" of ggplot2's facets, you can choose this method instead:
gsin <- ggplot(data.frame(x=0, fun="sin"), aes(x)) +
facet_wrap(~fun) +
geom_function(fun = sin) +
xlim(c(-5,5))
gtan <- ggplot(data.frame(x=0, fun="tan"), aes(x)) +
facet_wrap(~fun) +
geom_function(fun = tan) +
xlim(c(-5,5))
gsin + gtan
All of this so far has the effect of facet_*(scales="free_y") (because we fixed xlim(.)). If you want to mimic faceting more closely, you need to control the limits of all facets:
ylims <- c(-1, 1)
gsin <- ggplot(data.frame(x=0, fun="sin"), aes(x)) +
facet_wrap(~fun) +
geom_function(fun = sin) +
xlim(c(-5,5)) +
scale_y_continuous(limits = ylims)
gtan <- ggplot(data.frame(x=0, fun="tan"), aes(x)) +
facet_wrap(~fun) +
geom_function(fun = tan) +
xlim(c(-5,5)) +
scale_y_continuous(name = NULL, guide = NULL, limits = ylims)
gsin + gtan
# Warning: Removed 22 row(s) containing missing values (geom_path).
Technically you are not required here to set the y-limits on all, but ... unless you know with certainty that the limits on unconstrained y-axes will be what you need, it is possible that they could be slightly off. For instance, if you change the initial function (in a quick hack) to be 2*sin(x) but forget to update the remaining facets' y-limits, then your plots will be lying. It's best to set the limits in a single place (ylims <- ...) and reference in all plots.
I have been stuck with this for hours. When I run this :
library(ggmap)
set.seed(1)
n=100
df <- data.frame(x=rnorm(n, 0, 1), y=rnorm(n, 0, 1))
TestData <- ggplot (data = df) +
stat_density2d(aes(x = x, y = y,fill = as.factor(..level..)),bins=4, geom = "polygon",) +
geom_point(aes(x = x, y = y)) +
scale_fill_manual(values = c("yellow","red","green","royalblue", "black"))
I get this error message :
Error: Unknown parameters: bins
Does anyone know why?
Okay, adding this one as a second answer because I think the descriptions and comments in the first answer are useful and I don't feel like merging them. Basically I figured there must be an easy way to restore the regressed functionality. And after awhile, and learning some basics about ggplot2, I got this to work by overriding some ggplot2 functions:
library(ggmap)
library(ggplot2)
# -------------------------------
# start copy from stat-density-2d.R
stat_density_2d <- function(mapping = NULL, data = NULL, geom = "density_2d",
position = "identity", contour = TRUE,
n = 100, h = NULL, na.rm = FALSE,bins=0,
show.legend = NA, inherit.aes = TRUE, ...) {
layer(
data = data,
mapping = mapping,
stat = StatDensity2d,
geom = geom,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
na.rm = na.rm,
contour = contour,
n = n,
bins=bins,
...
)
)
}
stat_density2d <- stat_density_2d
StatDensity2d <-
ggproto("StatDensity2d", Stat,
default_aes = aes(colour = "#3366FF", size = 0.5),
required_aes = c("x", "y"),
compute_group = function(data, scales, na.rm = FALSE, h = NULL,
contour = TRUE, n = 100,bins=0) {
if (is.null(h)) {
h <- c(MASS::bandwidth.nrd(data$x), MASS::bandwidth.nrd(data$y))
}
dens <- MASS::kde2d(
data$x, data$y, h = h, n = n,
lims = c(scales$x$dimension(), scales$y$dimension())
)
df <- data.frame(expand.grid(x = dens$x, y = dens$y), z = as.vector(dens$z))
df$group <- data$group[1]
if (contour) {
# StatContour$compute_panel(df, scales,bins=bins,...) # bad dots...
if (bins>0){
StatContour$compute_panel(df, scales,bins)
} else {
StatContour$compute_panel(df, scales)
}
} else {
names(df) <- c("x", "y", "density", "group")
df$level <- 1
df$piece <- 1
df
}
}
)
# end copy from stat-density-2d.R
# -------------------------------
set.seed(1)
n=100
df <- data.frame(x=rnorm(n, 0, 1), y=rnorm(n, 0, 1))
TestData <- ggplot (data = df) +
stat_density2d(aes(x = x, y = y,fill = as.factor(..level..)),bins=5,geom = "polygon") +
geom_point(aes(x = x, y = y)) +
scale_fill_manual(values = c("yellow","red","green","royalblue", "black"))
print(TestData)
Which yields the result. Note that varying the bins parameter has the desired effect now, which cannot be replicated by varying the n parameter.
update:
After an extended discussion with Roland (see the comments), he determined it is probably a regression bug and filed a bug report.
As the question is "why is the bins parameter unknown?", and I spent a fair amount of time researching it, I will answer it.
Your example obviously comes from this link from Oct 2013, where the parameter is used. How to correctly interpret ggplot's stat_density2d
However it was never a documented parameter, and it is not clear that it is being used there. It is probably a parameter that is being passed through to other libraries (like MASS) that are being used by stat_density2d.
We can get the code to work by getting rid of the scale_fill_manual call and using this:
library(ggmap)
set.seed(1)
n=100
df <- data.frame(x=rnorm(n, 0, 1), y=rnorm(n, 0, 1))
TestData <- ggplot (data = df) +
stat_density2d(aes(x = x, y = y,fill = as.factor(..level..)), geom = "polygon",) +
geom_point(aes(x = x, y = y))
# scale_fill_manual(values = c("yellow","red","green","royalblue", "black"))
print(TestData)
which yields this:
Since this looks rather different from the plot posted in the original Oct 2013 link, I would say that stat_density2d has been extensively rewritten since then, or maybe MASS:kde2d (or another MASS routine), and that the bin parameter is no longer accepted. It is also not clear that that parameter ever did anything (read the link).
You can however vary the parameters n and h - also undocumented from the stat_density2d point-of-view (as far as I can tell).
I would like to draw a hollow histogram that has no vertical bars drawn inside of it, but just an outline. I couldn't find any way to do it with geom_histogram. The geom_step+stat_bin combination seemed like it could do the job. However, the bins of geom_step+stat_bin are shifted by a half bin either to the right or to the left, depending on the step's direction= parameter value. It seems like it is doing its "steps" WRT bin centers. Is there any way to change this behavior so it would do the "steps" at bin edges?
Here's an illustration:
d <- data.frame(x=rnorm(1000))
qplot(x, data=d, geom="histogram",
breaks=seq(-4,4,by=.5), color=I("red"), fill = I("transparent")) +
geom_step(stat="bin", breaks=seq(-4,4,by=.5), color="black", direction="vh")
I propose making a new Geom like so:
library(ggplot2)
library(proto)
geom_stephist <- function(mapping = NULL, data = NULL, stat="bin", position="identity", ...) {
GeomStepHist$new(mapping=mapping, data=data, stat=stat, position=position, ...)
}
GeomStepHist <- proto(ggplot2:::Geom, {
objname <- "stephist"
default_stat <- function(.) StatBin
default_aes <- function(.) aes(colour="black", size=0.5, linetype=1, alpha = NA)
reparameterise <- function(., df, params) {
transform(df,
ymin = pmin(y, 0), ymax = pmax(y, 0),
xmin = x - width / 2, xmax = x + width / 2, width = NULL
)
}
draw <- function(., data, scales, coordinates, ...) {
data <- as.data.frame(data)[order(data$x), ]
n <- nrow(data)
i <- rep(1:n, each=2)
newdata <- rbind(
transform(data[1, ], x=xmin, y=0),
transform(data[i, ], x=c(rbind(data$xmin, data$xmax))),
transform(data[n, ], x=xmax, y=0)
)
rownames(newdata) <- NULL
GeomPath$draw(newdata, scales, coordinates, ...)
}
guide_geom <- function(.) "path"
})
This also works for non-uniform breaks. To illustrate the usage:
d <- data.frame(x=runif(1000, -5, 5))
ggplot(d, aes(x)) +
geom_histogram(breaks=seq(-4,4,by=.5), color="red", fill=NA) +
geom_stephist(breaks=seq(-4,4,by=.5), color="black")
This isn't ideal, but it's the best I can come up with:
h <- hist(d$x,breaks=seq(-4,4,by=.5))
d1 <- data.frame(x = h$breaks,y = c(h$counts,NA))
ggplot() +
geom_histogram(data = d,aes(x = x),breaks = seq(-4,4,by=.5),
color = "red",fill = "transparent") +
geom_step(data = d1,aes(x = x,y = y),stat = "identity")
Yet another one. Use ggplot_build to build a plot object of the histogram for rendering. From this object x and y values are extracted, to be used for geom_step. Use by to offset x values.
by <- 0.5
p1 <- ggplot(data = d, aes(x = x)) +
geom_histogram(breaks = seq(from = -4, to = 4, by = by),
color = "red", fill = "transparent")
df <- ggplot_build(p1)$data[[1]][ , c("x", "y")]
p1 +
geom_step(data = df, aes(x = x - by/2, y = y))
Edit following comment from #Vadim Khotilovich (Thanks!)
The xmin from the plot object can be used instead (-> no need for offset adjustment)
df <- ggplot_build(p1)$data[[1]][ , c("xmin", "y")]
p1 +
geom_step(data = df, aes(x = xmin, y = y))
An alternative, also less than ideal:
qplot(x, data=d, geom="histogram", breaks=seq(-4,4,by=.5), color=I("red"), fill = I("transparent")) +
stat_summary(aes(x=round(x * 2 - .5) / 2, y=1), fun.y=length, geom="step")
Missing some bins that you can probably add back if you mess around a bit. Only (somewhat meaningless) advantage is it is more in ggplot than #Joran's answer, though even that is debatable.
I answer my own comment earlier today: here is a modified version of #RosenMatev's answer updated for the v2 (ggplot2_2.0.0) using ggproto:
GeomStepHist <- ggproto("GeomStepHist", GeomPath,
required_aes = c("x"),
draw_panel = function(data, panel_scales, coord, direction) {
data <- as.data.frame(data)[order(data$x), ]
n <- nrow(data)
i <- rep(1:n, each=2)
newdata <- rbind(
transform(data[1, ], x=x - width/2, y=0),
transform(data[i, ], x=c(rbind(data$x-data$width/2, data$x+data$width/2))),
transform(data[n, ], x=x + width/2, y=0)
)
rownames(newdata) <- NULL
GeomPath$draw_panel(newdata, panel_scales, coord)
}
)
geom_step_hist <- function(mapping = NULL, data = NULL, stat = "bin",
direction = "hv", position = "stack", na.rm = FALSE,
show.legend = NA, inherit.aes = TRUE, ...) {
layer(
data = data,
mapping = mapping,
stat = stat,
geom = GeomStepHist,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
direction = direction,
na.rm = na.rm,
...
)
)
}
TLDR: use geom_step(..., direction = "mid")
This has become much easier since Daniel Mastropietro and Dewey Dunnington implemented the "mid" as an additional option for the direction argument of geom_step for ggplot2 v3.3.0:
library(ggplot2)
set.seed(1)
d <- data.frame(x = rnorm(1000))
ggplot(d, aes(x)) +
geom_histogram(breaks = seq(-4, 4, by=.5), color="red", fill = "transparent") +
geom_step(stat="bin", breaks=seq(-4, 4, by=.5), color = "black", direction = "mid")
Below, for reference, the code from the question formatted like above answer:
ggplot(d, aes(x)) +
geom_histogram(breaks = seq(-4, 4, by=.5), color = "red", fill = "transparent") +
geom_step(stat="bin", breaks = seq(-4, 4, by=.5), color = "black", direction = "vh")
Created on 2020-09-02 by the reprex package (v0.3.0)
a simple way to do something similar to #Rosen Matev (that does not work with ggplot2_2.0.0 as mentioned by #julou), I would just
1) calculate manually the value of the bins (using a small function as shown below)
2) use geom_step()
Hope this helps !
geom_step_hist<- function(d,binw){
dd=NULL
bin=min(d$y) # this enables having a first value that is = 0 (to have the left vertical bar of the plot when using geom_step)
max=max(d$y)+binw*2 # this enables having a last value that is = 0 (to have the right vertical bar of the plot when using geom_step)
xx=NULL
yy=NULL
while(bin<=max){
n=length(temp$y[which(temp$y<bin & temp$y>=(bin-binw))])
yy=c(yy,n)
xx=c(xx,bin-binw)
bin=bin+binw
rm(n)
}
dd=data.frame(xx,yy)
return(dd)
}
hist=ggplot(dd,aes(x=xx,y=yy))+
geom_step()
I have this dataframe:
set.seed(1)
x <- c(rnorm(50, mean = 1), rnorm(50, mean = 3))
y <- c(rep("site1", 50), rep("site2", 50))
xy <- data.frame(x, y)
And I have made this density plot:
library(ggplot2)
ggplot(xy, aes(x, color = y)) + geom_density()
For site1 I need to shade the area under the curve that > 1% of the data. For site2 I need to shade the area under the curve that < 75% of the data.
I'm expecting the plot to look something like this (photoshopped). Having been through stack overflow, I'm aware that others have asked how to shade part of the area under a curve, but I cannot figure out how to shade the area under a curve by group.
Here is one way (and, as #joran says, this is an extension of the response here):
# same data, just renaming columns for clarity later on
# also, use data tables
library(data.table)
set.seed(1)
value <- c(rnorm(50, mean = 1), rnorm(50, mean = 3))
site <- c(rep("site1", 50), rep("site2", 50))
dt <- data.table(site,value)
# generate kdf
gg <- dt[,list(x=density(value)$x, y=density(value)$y),by="site"]
# calculate quantiles
q1 <- quantile(dt[site=="site1",value],0.01)
q2 <- quantile(dt[site=="site2",value],0.75)
# generate the plot
ggplot(dt) + stat_density(aes(x=value,color=site),geom="line",position="dodge")+
geom_ribbon(data=subset(gg,site=="site1" & x>q1),
aes(x=x,ymax=y),ymin=0,fill="red", alpha=0.5)+
geom_ribbon(data=subset(gg,site=="site2" & x<q2),
aes(x=x,ymax=y),ymin=0,fill="blue", alpha=0.5)
Produces this:
The problem with #jlhoward's solution is that you need to manually add goem_ribbon for each group you have. I wrote my own ggplot stat wrapper following this vignette. The benefit of this is that it automatically works with group_by and facet and you don't need to manually add geoms for each group.
StatAreaUnderDensity <- ggproto(
"StatAreaUnderDensity", Stat,
required_aes = "x",
compute_group = function(data, scales, xlim = NULL, n = 50) {
fun <- approxfun(density(data$x))
StatFunction$compute_group(data, scales, fun = fun, xlim = xlim, n = n)
}
)
stat_aud <- function(mapping = NULL, data = NULL, geom = "area",
position = "identity", na.rm = FALSE, show.legend = NA,
inherit.aes = TRUE, n = 50, xlim=NULL,
...) {
layer(
stat = StatAreaUnderDensity, data = data, mapping = mapping, geom = geom,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(xlim = xlim, n = n, ...))
}
Now you can use stat_aud function just like other ggplot geoms.
set.seed(1)
x <- c(rnorm(500, mean = 1), rnorm(500, mean = 3))
y <- c(rep("group 1", 500), rep("group 2", 500))
t_critical = 1.5
tibble(x=x, y=y)%>%ggplot(aes(x=x,color=y))+
geom_density()+
geom_vline(xintercept = t_critical)+
stat_aud(geom="area",
aes(fill=y),
xlim = c(0, t_critical),
alpha = .2)
tibble(x=x, y=y)%>%ggplot(aes(x=x))+
geom_density()+
geom_vline(xintercept = t_critical)+
stat_aud(geom="area",
fill = "orange",
xlim = c(0, t_critical),
alpha = .2)+
facet_grid(~y)
You need to use fill. color controls the outline of the density plot, which is necessary if you want non-black outlines.
ggplot(xy, aes(x, color=y, fill = y, alpha=0.4)) + geom_density()
To get something like that. Then you can remove the alpha part of the legend by using
ggplot(xy, aes(x, color = y, fill = y, alpha=0.4)) + geom_density()+ guides(alpha='none')