Related
It seems like there has to be a way to retain editable text when exporting graphics from R into a vector format (e.g., pdf, eps, svg), but I have not been able to locate it. My graphics are primarily created using ggplot2, and I am running R via RStudio on Windows.
#generate a plot
set.seed(1)
df <- data.frame(
gp = factor(rep(letters[1:3], each = 10)),
y = rnorm(30)
)
ds <- do.call(rbind, lapply(split(df, df$gp), function(d) {
data.frame(mean = mean(d$y), sd = sd(d$y), gp = d$gp)
}))
ggplot(df, aes(gp, y)) +
geom_point() +
geom_point(data = ds, aes(y = mean), colour = 'red', size = 3)
#export
ggsave("plot.eps")
ggsave("plot.pdf")
ggsave("plot_cairo.pdf", device=cairo_pdf)
ggsave("plot.svg")
All of these options generate a vector file with text (axis labels, etc) converted to outlines, which are no longer editable as text - which defeats a major point of the vector format, at least for my use case.
Ok, so typical use cases, the svglite library will retain text - see plot 1 export below. If you put two plots together using the patchwork library, the text is converted to outlines and no longer retained as editable text.
set.seed(1)
df <- data.frame(
gp = factor(rep(letters[1:3], each = 10)),
y = rnorm(30)
)
ds <- do.call(rbind, lapply(split(df, df$gp), function(d) {
data.frame(mean = mean(d$y), sd = sd(d$y), gp = d$gp)
}))
p1<-ggplot(df, aes(gp, y)) +
geom_point() +
geom_point(data = ds, aes(y = mean), colour = 'red', size = 3)
p2<-ggplot(df, aes(gp, y)) +
geom_point() +
geom_point(data = ds, aes(y = mean), colour = 'green', size = 3)
library(patchwork)
p3 <- p1|p2
ggsave(plot = p1, "p1.svg", device = svglite)
ggsave(plot = p3, "p3.svg", device = svglite)
I'm trying to graph multiple nonlinear least squares regression in r in different colors based on the value of a variable.
However, I also display the equation of the last one, and I would like the color in the nonlinear regression corresponding to the equation to be black as well.
What I've tried is shown in the geom_smooth() layer - I tried to include an ifelse() statement, but this doesn't work because of reasons described here: Different between colour argument and aes colour in ggplot2?
test <- function() {
require(ggplot2)
set.seed(1);
master <- data.frame(matrix(NA_real_, nrow = 0, ncol = 3))
for( i in 1:5 ) {
df <- data.frame(matrix(NA_real_, nrow = 50, ncol = 3))
colnames(df) <- c("xdata", "ydata", "test")
df$xdata = as.numeric(sample(1:100, size = nrow(df), replace = FALSE))
df$ydata = as.numeric(sample(1:3, size = nrow(df), prob=c(.60, .25, .15), replace = TRUE))
# browser()
df$test = i
master <- rbind(master, df)
}
df <- master
last <- 5
# based on https://stackoverflow.com/questions/18305852/power-regression-in-r-similar-to-excel
power_eqn = function(df, start = list(a=300,b=1)) {
m = nls(as.numeric(reorder(xdata,-ydata)) ~ a*ydata^b, start = start, data = df)
# View(summary(m))
# browser()
# eq <- substitute(italic(hat(y)) == a ~italic(x)^b*","~~italic(r)^2~"="~r2*","~~p~"="~italic(pvalue),
eq <- substitute(italic(y) == a ~italic(x)^b*","~~italic('se')~"="~se*","~~italic(p)~"="~pvalue,
list(a = format(coef(m)[1], digits = 6), # a
b = format(coef(m)[2], digits = 6), # b
# r2 = format(summary(m)$r.squared, digits = 3),
se = format(summary(m)$parameters[2,'Std. Error'], digits = 6), # standard error
pvalue = format(summary(m)$coefficients[2,'Pr(>|t|)'], digits=6) )) # p value (based on t statistic)
as.character(as.expression(eq))
}
plot1 <- ggplot(df, aes(x = as.numeric(reorder(xdata,-ydata)), y = ydata ) ) +
geom_point(color="black", shape=1 ) +
# PROBLEM LINE
stat_smooth(aes(color=ifelse(test==5, "black", test)), method = 'nls', formula = 'y~a*x^b', method.args = list(start= c(a =1,b=1)),se=FALSE, fullrange=TRUE) +
geom_text(x = quantile(df$xdata)[4], y = max(df$ydata), label = power_eqn(df), parse = TRUE, size=4, color="black") + # make bigger? add border around?
theme(legend.position = "none", axis.ticks.x = element_blank() ) + #, axis.title.x = "family number", axis.title.y = "number of languages" ) # axis.text.x = element_blank(),
labs( x = "xdata", y = "ydata", title="test" )
plot1
}
test()
This is the graph I got.
I would like the line corresponding to the points and equation to be black as well. Does anyone know how to do this?
I do not want to use a scale_fill_manual, etc., because my real data would have many, many more lines - unless the scale_fill_manual/etc. can be randomly generated.
You could use scale_color_manual using a custom created palette where your level of interest (in your example where test equals 5) is set to black. Below I use palettes from RColorBrewer, extend them if necessary to the number of levels needed and sets the last color to black.
library(RColorBrewer) # provides several great palettes
createPalette <- function(n, colors = 'Greens') {
max_colors <- brewer.pal.info[colors, ]$maxcolors # Get maximum colors in palette
palette <- brewer.pal(min(max_colors, n), colors) # Get RColorBrewer palette
if (n > max_colors) {
palette <- colorRampPalette(palette)(n) # make it longer i n > max_colros
}
# assume that n-th color should be black
palette[n] <- "#000000"
# return palette
palette[1:n]
}
# create a palette with 5 levels using the Spectral palette
# change from 5 to the needed number of levels in your real data.
mypalette <- createPalette(5, 'Spectral') # palettes from RColorBrewer
We can then use mypalette with scale_color_manual(values=mypalette) to color points and lines according to the test variable.
Please note that I have updated geom_point and stat_smooth to so that they use aes(color=as.factor(test)). I have also changed the call to power_eqn to only use data points where df$test==5. The black points, lines and equation should now be based on the same data.
plot1 <- ggplot(df, aes(x = as.numeric(reorder(xdata,-ydata)), y = ydata )) +
geom_point(aes(color=as.factor(test)), shape=1) +
stat_smooth(aes(color=as.factor(test)), method = 'nls', formula = 'y~a*x^b', method.args = list(start= c(a =1,b=1)),se=FALSE, fullrange=TRUE) +
geom_text(x = quantile(df$xdata)[4], y = max(df$ydata), label = power_eqn(df[df$test == 5,]), parse = TRUE, size=4, color="black") +
theme(legend.position = "none", axis.ticks.x = element_blank() ) +
labs( x = "xdata", y = "ydata", title="test" ) +
scale_color_manual(values = mypalette)
plot1
See resulting figure here (not reputation enough to include them)
I hope you find my answer useful.
I just encountered such graph attached where two colors of geom_point are used (I believe it is made by ggplot2). Similarly, I would like to have dots of one color to range from size 1 to 5, and have another color for a series of dots for the range 10 to 50. I have however no clue on how to add two different ranges of point in one graph.
At the basic step I have:
a <- c(1,2,3,4,5)
b <- c(10,20,30,40,50)
Species <- factor(c("Species1","Species2","Species3","Species4","Species5"))
bubba <- data.frame(Sample1=a,Sample2=b,Species=Species)
bubba$Species=factor(bubba$Species, levels=bubba$Species)
xm=melt(bubba,id.vars = "Species", variable.name="Samples", value.name = "Size")
str(xm)
ggplot(xm,aes(x= Samples,y= fct_rev(Species)))+geom_point(aes(size=Size))+scale_size(range = range(xm$Size))+theme_bw()
Any would have clues where I should look into ? Thanks!
I've got an approach that gets 90% of the way there, but I'm not sure how to finish the deed. To get a single legend for size, I used a transformation to convert input size to display size. That makes the legend appearance conform to the display. What I don't have figured out yet is how to apply a similar transformation to the fill so that both can be integrated into the same legend.
Here's the transformation, which in this case shrinks everything 10 or more:
library(scales)
shrink_10s_trans = trans_new("shrink_10s",
transform = function(y){
yt = if_else(y >= 10, y*0.1, y)
return(yt)
},
inverse = function(yt){
return(yt) # Not 1-to-1 function, picking one possibility
}
)
Then we can use this transformation on the size to selectively shink only the dots that are 10 or larger. This works out nicely for the legend, aside from integrating the fill encoding with the size encoding.
ggplot(xm,aes(x= Samples,y= fct_rev(Species), fill = Size < 10))+
geom_point(aes(size=Size), shape = 21)+
scale_size_area(trans = shrink_10s_trans, max_size = 10,
breaks = c(1,2,3,10,20,30,40),
labels = c(1,2,3,10,20,30,40)) +
scale_fill_manual(values = c(rgb(136,93,100, maxColorValue = 255),
rgb(236,160,172, maxColorValue = 255))) +
theme_bw()
a <- c(1, 2, 3, 4, 5)
b <- c(10, 20, 30, 40, 50)
Species <- factor(c("Species1", "Species2", "Species3", "Species4", "Species5"))
bubba <- data.frame(Sample1 = a, Sample2 = b, Species = Species)
bubba$Species <- factor(bubba$Species, levels = bubba$Species)
xm <- reshape2::melt(bubba, id.vars = "Species", variable.name = "Samples", value.name = "Size")
ggplot(xm, aes(x = Samples, y = fct_rev(Species))) +
geom_point(aes(size = Size, color = Size)) +
scale_color_continuous(breaks = c(1,2,3,10,20,30), guide = guide_legend()) +
scale_size(range = range(xm$Size), breaks = c(1,2,3,10,20,30)) +
theme_bw()
Here's a cludge. I haven't got time to figure out the legend at the moment. Note that 1 and 10 are the same size, but a different colour, as are 3 and 40.
# Create data frame
a <- c(1, 2, 3, 4, 5)
b <- c(10, 20, 30, 40, 50)
Species <- factor(c("Species1", "Species2", "Species3", "Species4", "Species5"))
bubba <- data.frame(Sample1 = a, Sample2 = b, Species = Species)
# Restructure data
xm <- reshape2::melt(bubba, id.vars = "Species", variable.name = "Samples", value.name = "Size")
# Calculate bubble size
bubble_size <- function(val){
ifelse(val > 3, (1/15) * val + (1/3), val)
}
# Calculate bubble colour
bubble_colour <- function(val){
ifelse(val > 3, "A", "B")
}
# Calculate bubble size and colour
xm %<>%
mutate(bub_size = bubble_size(Size),
bub_col = bubble_colour(Size))
# Plot data
ggplot(xm, aes(x = Samples, y = fct_rev(Species))) +
geom_point(aes(size = bub_size, fill = bub_col), shape = 21, colour = "black") +
theme(panel.grid.major = element_line(colour = alpha("gray", 0.5), linetype = "dashed"),
text = element_text(family = "serif"),
legend.position = "none") +
scale_size(range = c(1, 20)) +
scale_fill_manual(values = c("brown", "pink")) +
ylab("Species")
I think you are looking for bubble plots in R
https://www.r-graph-gallery.com/bubble-chart/
That said, you probably want to build the right and left the side of the graphic separately and then combine.
I'm working with the ggmap package in R and I am relatively new to geospatial data visualizations. I have a data frame of eleven latitude and longitude pairs that I would like to plot on a map, each with a label. Here is the dummy data:
lat<- c(47.597157,47.656322,47.685928,47.752365,47.689297,47.628128,47.627071,47.586349,47.512684,47.571232,47.562283)
lon<-c(-122.312187,-122.318039,-122.31472,-122.345345,-122.377045,-122.370117,-122.368462,-122.331734,-122.294395,-122.33606,-122.379745)
labels<-c("Site 1A","Site 1B","Site 1C","Site 2A","Site 3A","Site 1D","Site 2C","Site 1E","Site 2B","Site 1G","Site 2G")
df<-data.frame(lat,lon,labels)
Now I use annotate to create the data point labels and plot these on a map;
map.data <- get_map(location = c(lon=-122.3485,lat=47.6200),
maptype = 'roadmap', zoom = 11)
pointLabels<-annotate("text",x=uniqueReach$lon,y=c(uniqueReach$lat),size=5,font=3,fontface="bold",family="Helvetica",label=as.vector(uniqueReach$label))
dataPlot <- ggmap(map.data) +
geom_point(data = uniqueReach,aes(x = df$lon, y = df$lat), alpha = 1,fill="red",pch=21,size = 6) + labs(x = 'Longitude', y = 'Latitude')+pointLabels
This produces a plot of the data points
As you can see, there are two data points that overlap around (-122.44,47.63), and their labels also overlap. Now I can manually add a shift to each label point to keep the labels from overlapping (see this post), but this is not a great technique when I need to produce many of these plots for different sets of latitude and longitude pairs.
Is there a way I can automatically keep data labels from overlapping? I realize whether the labels overlap is dependent on the actual figure size, so I'm open to fixing the figure size at certain dimensions if need be. Thank you in advance for any insights!
EDIT
The following is modified code using the answer given by Sandy Mupratt
# Defining function to draw text boxes
draw.rects.modified <- function(d,...){
if(is.null(d$box.color))d$box.color <- NA
if(is.null(d$fill))d$fill <- "grey95"
for(i in 1:nrow(d)){
with(d[i,],{
grid.rect(gp = gpar(col = box.color, fill = fill,alpha=0.7),
vp = viewport(x, y, w, h, "cm", c(hjust, vjust=0.25), angle=rot))
})
}
d
}
# Defining function to determine text box borders
enlarge.box.modified <- function(d,...){
if(!"h"%in%names(d))stop("need to have already calculated height and width.")
calc.borders(within(d,{
w <- 0.9*w
h <- 1.1*h
}))
}
Generating the plot:
dataplot<-ggmap(map.data) +
geom_point(data = df,aes(x = df$lon, y = df$lat),
alpha = 1, fill = "red", pch = 21, size = 6) +
labs(x = 'Longitude', y = 'Latitude') +
geom_dl(data = df,
aes(label = labels),
list(dl.trans(y = y + 0.3), "boxes", cex = .8, fontface = "bold"))
This is a MUCH more readable plot, but with one outstanding issue. You'll note that the label "Site 1E" begins to overlap the data point associated with "Site 1A". Does directlabels have a way with dealing with labels overlapping data points belonging to another label?
A final question I have regarding this is how can I plot several duplicate labels using this method. Suppose the labels for data.frame are all the same:
df$labels<-rep("test",dim(df)[1])
When I use the same code, directlabels removes the duplicate label names:
But I want each data point to have a label of "test". Any suggestions?
Edit 11 Jan 2016: using ggrepel package with ggplot2 v2.0.0 and ggmap v2.6
ggrepel works well. In the code below, geom_label_repel() shows some of the available parameters.
lat <- c(47.597157,47.656322,47.685928,47.752365,47.689297,47.628128,47.627071,
47.586349,47.512684,47.571232,47.562283)
lon <- c(-122.312187,-122.318039,-122.31472,-122.345345,-122.377045,-122.370117,
-122.368462,-122.331734,-122.294395,-122.33606,-122.379745)
labels <- c("Site 1A","Site 1B","Site 1C","Site 2A","Site 3A","Site 1D",
"Site 2C","Site 1E","Site 2B","Site 1G","Site 2G")
df <- data.frame(lat,lon,labels)
library(ggmap)
library(ggrepel)
library(grid)
map.data <- get_map(location = c(lon = -122.3485, lat = 47.6200),
maptype = 'roadmap', zoom = 11)
ggmap(map.data) +
geom_point(data = df, aes(x = lon, y = lat),
alpha = 1, fill = "red", pch = 21, size = 5) +
labs(x = 'Longitude', y = 'Latitude') +
geom_label_repel(data = df, aes(x = lon, y = lat, label = labels),
fill = "white", box.padding = unit(.4, "lines"),
label.padding = unit(.15, "lines"),
segment.color = "red", segment.size = 1)
Original answer but updated for ggplot v2.0.0 and ggmap v2.6
If there is only a small number of overlapping points, then using the "top.bumpup" or "top.bumptwice" method from the direct labels package can separate them. In the code below, I use the geom_dl() function to create and position the labels.
lat <- c(47.597157,47.656322,47.685928,47.752365,47.689297,47.628128,47.627071,
47.586349,47.512684,47.571232,47.562283)
lon <- c(-122.312187,-122.318039,-122.31472,-122.345345,-122.377045,-122.370117,
-122.368462,-122.331734,-122.294395,-122.33606,-122.379745)
labels <- c("Site 1A","Site 1B","Site 1C","Site 2A","Site 3A","Site 1D",
"Site 2C","Site 1E","Site 2B","Site 1G","Site 2G")
df <- data.frame(lat,lon,labels)
library(ggmap)
library(directlabels)
map.data <- get_map(location = c(lon = -122.3485, lat = 47.6200),
maptype = 'roadmap', zoom = 11)
ggmap(map.data) +
geom_point(data = df, aes(x = lon, y = lat),
alpha = 1, fill = "red", pch = 21, size = 6) +
labs(x = 'Longitude', y = 'Latitude') +
geom_dl(data = df, aes(label = labels), method = list(dl.trans(y = y + 0.2),
"top.bumptwice", cex = .8, fontface = "bold", family = "Helvetica"))
Edit: Adjusting for underlying labels
A couple of methods spring to mind, but neither is entirely satisfactory. But I don't think you will find a solution that will apply to all situations.
Adding a background colour to each label
This is a bit of a workaround, but directlabels has a "box" function (i.e., the labels are placed inside a box). It looks like one should be able to modify background fill and border colour in the list in geom_dl, but I can't get it to work. Instead, I take two functions (draw.rects and enlarge.box) from the directlabels website; modify them; and combine the modified functions with the "top.bumptwice" method.
draw.rects.modified <- function(d,...){
if(is.null(d$box.color))d$box.color <- NA
if(is.null(d$fill))d$fill <- "grey95"
for(i in 1:nrow(d)){
with(d[i,],{
grid.rect(gp = gpar(col = box.color, fill = fill),
vp = viewport(x, y, w, h, "cm", c(hjust, vjust=0.25), angle=rot))
})
}
d
}
enlarge.box.modified <- function(d,...){
if(!"h"%in%names(d))stop("need to have already calculated height and width.")
calc.borders(within(d,{
w <- 0.9*w
h <- 1.1*h
}))
}
boxes <-
list("top.bumptwice", "calc.boxes", "enlarge.box.modified", "draw.rects.modified")
ggmap(map.data) +
geom_point(data = df,aes(x = lon, y = lat),
alpha = 1, fill = "red", pch = 21, size = 6) +
labs(x = 'Longitude', y = 'Latitude') +
geom_dl(data = df, aes(label = labels), method = list(dl.trans(y = y + 0.3),
"boxes", cex = .8, fontface = "bold"))
Add an outline to each label
Another option is to use this method to give each label an outline, although it is not immediately clear how it would work with directlabels. Therefore, it would need a manual adjustment of the coordinates, or a search of the dataframe for coordinates that are within a given threshold then adjust. However, here, I use the pointLabel function from maptools package to position the labels. No guarantee that it will work every time, but I got a reasonable result with your data. There is a random element built into it, so you can run it a few time until you get a reasonable result. Also, note that it positions labels in a base plot. The label locations then have to extracted and loaded into the ggplot/ggmap.
lat<- c(47.597157,47.656322,47.685928,47.752365,47.689297,47.628128,47.627071,47.586349,47.512684,47.571232,47.562283)
lon<-c(-122.312187,-122.318039,-122.31472,-122.345345,-122.377045,-122.370117,-122.368462,-122.331734,-122.294395,-122.33606,-122.379745)
labels<-c("Site 1A","Site 1B","Site 1C","Site 2A","Site 3A","Site 1D","Site 2C","Site 1E","Site 2B","Site 1G","Site 2G")
df<-data.frame(lat,lon,labels)
library(ggmap)
library(maptools) # pointLabel function
# Get map
map.data <- get_map(location = c(lon=-122.3485,lat=47.6200),
maptype = 'roadmap', zoom = 11)
bb = t(attr(map.data, "bb")) # the map's bounding box
# Base plot to plot points and using pointLabels() to position labels
plot(df$lon, df$lat, pch = 20, cex = 5, col = "red", xlim = bb[c(2,4)], ylim = bb[c(1,3)])
new = pointLabel(df$lon, df$lat, df$labels, pos = 4, offset = 0.5, cex = 1)
new = as.data.frame(new)
new$labels = df$labels
## Draw the map
map = ggmap(map.data) +
geom_point(data = df, aes(x = lon, y = lat),
alpha = 1, fill = "red", pch = 21, size = 5) +
labs(x = 'Longitude', y = 'Latitude')
## Draw the label outlines
theta <- seq(pi/16, 2*pi, length.out=32)
xo <- diff(bb[c(2,4)])/400
yo <- diff(bb[c(1,3)])/400
for(i in theta) {
map <- map + geom_text(data = new,
aes_(x = new$x + .01 + cos(i) * xo, y = new$y + sin(i) * yo, label = labels),
size = 3, colour = 'black', vjust = .5, hjust = .8)
}
# Draw the labels
map +
geom_text(data = new, aes(x = x + .01, y = y, label=labels),
size = 3, colour = 'white', vjust = .5, hjust = .8)
I would like to draw a hollow histogram that has no vertical bars drawn inside of it, but just an outline. I couldn't find any way to do it with geom_histogram. The geom_step+stat_bin combination seemed like it could do the job. However, the bins of geom_step+stat_bin are shifted by a half bin either to the right or to the left, depending on the step's direction= parameter value. It seems like it is doing its "steps" WRT bin centers. Is there any way to change this behavior so it would do the "steps" at bin edges?
Here's an illustration:
d <- data.frame(x=rnorm(1000))
qplot(x, data=d, geom="histogram",
breaks=seq(-4,4,by=.5), color=I("red"), fill = I("transparent")) +
geom_step(stat="bin", breaks=seq(-4,4,by=.5), color="black", direction="vh")
I propose making a new Geom like so:
library(ggplot2)
library(proto)
geom_stephist <- function(mapping = NULL, data = NULL, stat="bin", position="identity", ...) {
GeomStepHist$new(mapping=mapping, data=data, stat=stat, position=position, ...)
}
GeomStepHist <- proto(ggplot2:::Geom, {
objname <- "stephist"
default_stat <- function(.) StatBin
default_aes <- function(.) aes(colour="black", size=0.5, linetype=1, alpha = NA)
reparameterise <- function(., df, params) {
transform(df,
ymin = pmin(y, 0), ymax = pmax(y, 0),
xmin = x - width / 2, xmax = x + width / 2, width = NULL
)
}
draw <- function(., data, scales, coordinates, ...) {
data <- as.data.frame(data)[order(data$x), ]
n <- nrow(data)
i <- rep(1:n, each=2)
newdata <- rbind(
transform(data[1, ], x=xmin, y=0),
transform(data[i, ], x=c(rbind(data$xmin, data$xmax))),
transform(data[n, ], x=xmax, y=0)
)
rownames(newdata) <- NULL
GeomPath$draw(newdata, scales, coordinates, ...)
}
guide_geom <- function(.) "path"
})
This also works for non-uniform breaks. To illustrate the usage:
d <- data.frame(x=runif(1000, -5, 5))
ggplot(d, aes(x)) +
geom_histogram(breaks=seq(-4,4,by=.5), color="red", fill=NA) +
geom_stephist(breaks=seq(-4,4,by=.5), color="black")
This isn't ideal, but it's the best I can come up with:
h <- hist(d$x,breaks=seq(-4,4,by=.5))
d1 <- data.frame(x = h$breaks,y = c(h$counts,NA))
ggplot() +
geom_histogram(data = d,aes(x = x),breaks = seq(-4,4,by=.5),
color = "red",fill = "transparent") +
geom_step(data = d1,aes(x = x,y = y),stat = "identity")
Yet another one. Use ggplot_build to build a plot object of the histogram for rendering. From this object x and y values are extracted, to be used for geom_step. Use by to offset x values.
by <- 0.5
p1 <- ggplot(data = d, aes(x = x)) +
geom_histogram(breaks = seq(from = -4, to = 4, by = by),
color = "red", fill = "transparent")
df <- ggplot_build(p1)$data[[1]][ , c("x", "y")]
p1 +
geom_step(data = df, aes(x = x - by/2, y = y))
Edit following comment from #Vadim Khotilovich (Thanks!)
The xmin from the plot object can be used instead (-> no need for offset adjustment)
df <- ggplot_build(p1)$data[[1]][ , c("xmin", "y")]
p1 +
geom_step(data = df, aes(x = xmin, y = y))
An alternative, also less than ideal:
qplot(x, data=d, geom="histogram", breaks=seq(-4,4,by=.5), color=I("red"), fill = I("transparent")) +
stat_summary(aes(x=round(x * 2 - .5) / 2, y=1), fun.y=length, geom="step")
Missing some bins that you can probably add back if you mess around a bit. Only (somewhat meaningless) advantage is it is more in ggplot than #Joran's answer, though even that is debatable.
I answer my own comment earlier today: here is a modified version of #RosenMatev's answer updated for the v2 (ggplot2_2.0.0) using ggproto:
GeomStepHist <- ggproto("GeomStepHist", GeomPath,
required_aes = c("x"),
draw_panel = function(data, panel_scales, coord, direction) {
data <- as.data.frame(data)[order(data$x), ]
n <- nrow(data)
i <- rep(1:n, each=2)
newdata <- rbind(
transform(data[1, ], x=x - width/2, y=0),
transform(data[i, ], x=c(rbind(data$x-data$width/2, data$x+data$width/2))),
transform(data[n, ], x=x + width/2, y=0)
)
rownames(newdata) <- NULL
GeomPath$draw_panel(newdata, panel_scales, coord)
}
)
geom_step_hist <- function(mapping = NULL, data = NULL, stat = "bin",
direction = "hv", position = "stack", na.rm = FALSE,
show.legend = NA, inherit.aes = TRUE, ...) {
layer(
data = data,
mapping = mapping,
stat = stat,
geom = GeomStepHist,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
direction = direction,
na.rm = na.rm,
...
)
)
}
TLDR: use geom_step(..., direction = "mid")
This has become much easier since Daniel Mastropietro and Dewey Dunnington implemented the "mid" as an additional option for the direction argument of geom_step for ggplot2 v3.3.0:
library(ggplot2)
set.seed(1)
d <- data.frame(x = rnorm(1000))
ggplot(d, aes(x)) +
geom_histogram(breaks = seq(-4, 4, by=.5), color="red", fill = "transparent") +
geom_step(stat="bin", breaks=seq(-4, 4, by=.5), color = "black", direction = "mid")
Below, for reference, the code from the question formatted like above answer:
ggplot(d, aes(x)) +
geom_histogram(breaks = seq(-4, 4, by=.5), color = "red", fill = "transparent") +
geom_step(stat="bin", breaks = seq(-4, 4, by=.5), color = "black", direction = "vh")
Created on 2020-09-02 by the reprex package (v0.3.0)
a simple way to do something similar to #Rosen Matev (that does not work with ggplot2_2.0.0 as mentioned by #julou), I would just
1) calculate manually the value of the bins (using a small function as shown below)
2) use geom_step()
Hope this helps !
geom_step_hist<- function(d,binw){
dd=NULL
bin=min(d$y) # this enables having a first value that is = 0 (to have the left vertical bar of the plot when using geom_step)
max=max(d$y)+binw*2 # this enables having a last value that is = 0 (to have the right vertical bar of the plot when using geom_step)
xx=NULL
yy=NULL
while(bin<=max){
n=length(temp$y[which(temp$y<bin & temp$y>=(bin-binw))])
yy=c(yy,n)
xx=c(xx,bin-binw)
bin=bin+binw
rm(n)
}
dd=data.frame(xx,yy)
return(dd)
}
hist=ggplot(dd,aes(x=xx,y=yy))+
geom_step()