Change axis breaks without defining sequence - ggplot - r

Is there any way to set the break step size in ggplot without defining a sequence. For example:
x <- 1:10
y <- 1:10
df <- data.frame(x, y)
# Plot with auto scale
ggplot(df, aes(x,y)) + geom_point()
# Plot with breaks defined by sequence
ggplot(df, aes(x,y)) + geom_point() +
scale_y_continuous(breaks = seq(0,10,1))
# Plot with automatic sequence for breaks
ggplot(df, aes(x,y)) + geom_point() +
scale_y_continuous(breaks = seq(min(df$y),max(df$y),1))
# Does this exist?
ggplot(df, aes(x,y)) + geom_point() +
scale_y_continuous(break_step = 1)
You may say I am being lazy but there have been a few occasions where I have had to change the min and max limits of my seq due to the addition of error bars. So I just want to say...use a break size of x, with automatic scale limits.

You can define your own function to pass to the breaks argument. An example that would work in your case would be
f <- function(y) seq(floor(min(y)), ceiling(max(y)))
Then
ggplot(df, aes(x,y)) + geom_point() + scale_y_continuous(breaks = f)
gives
You could modify this to pass the step of the breaks, e.g.
f <- function(k) {
step <- k
function(y) seq(floor(min(y)), ceiling(max(y)), by = step)
}
then
ggplot(df, aes(x,y)) + geom_point() + scale_y_continuous(breaks = f(2))
would create a y-axis with ticks at 2, 4, .., 10, etc.
You can take this even further by writing your own scale function
my_scale <- function(step = 1, ...) scale_y_continuous(breaks = f(step), ...)
and just call it like
ggplot(df, aes(x,y)) + geom_point() + my_scale()

> # Does this exist?
> ggplot(df, aes(x,y)) + geom_point() +
> scale_y_continuous(break_step = 1)
If you're looking for an off-the-shelf solution, then you can use the scales::breaks_width() function like so:
scale_y_continuous(breaks = scales::breaks_width(1))
The scales package also includes handy functions to control breaks easily in "special" scales such as date-time, e.g. scale_x_datetime(breaks='6 hours').

Related

ggplot scale_x_log10() with bigger base than 10 for the logarithm

Is there an easy way to scale a ggplot by log base 20 or higher? This works great:
ggplot(data, aes(x, y)) + geom_line() + scale_x_log10()
Unfortunately base 10 too small. There's a more general scale_x_continuous function that takes a trans argument, but there doesn't appear to be any log transforms higher than base 10.
Here's a worked example of creating a new trans object to use in your plot:
Initial plot
library(ggplot2)
df <- data.frame(x = 1:10, y = 10^(1:10))
p <- ggplot(df, aes(x, y)) + geom_line()
p
With log scale using base 100
p + scale_y_continuous(trans = scales::trans_new(name = "log100",
transform = function(x) log(x, 100),
inverse = function(x) 100^x,
domain = c(1e-16, Inf)),
breaks = scales::breaks_log(5, 100),
labels = scales::comma)
Created on 2020-12-07 by the reprex package (v0.3.0)
The scales package provides transforms to the scale_x_continuous() function. You can either use the built-in flexible log transform or create your own using the trans_new() function.
Built-in with base-20:
require(scales)
base=20
p1 <- ggplot(mpg, aes(displ, hwy)) +
geom_point()
p1 + scale_y_continuous(trans = scales::log_trans(base))
Make your own transform:
require(scales)
logTrans <- function(base=20){
trans_new(name='logT',
transform=function(y){
return(log(y, base=base))
},
inverse=function(y){
return(base^y)
})
}
base=20
p1 + scale_y_p1 <- ggplot(mpg, aes(displ, hwy)) +
geom_point()
p1 + continuous(trans = logTrans(base=base))

ggplot2, introduce breaks on a x log scale

I have a plot like this:
p<-ggplot() +
geom_line(data= myData, aes(x = myData$x , y = myData$y)) +
scale_x_log10()+
scale_y_log10()
My x value is seq(9880000, 12220000, 10000)
There is only one break on the x-axis of the plot, what should I do if to get at least 3 breaks on the plot x-axis?
Here is fully reproducible example of the original poster's problem where a log-scaled plot only displays one break value on the x-axis. I demonstrate three possible solutions below.
library(ggplot2)
# Create a reproducible example data.frame using R functions.
x = seq(9880000, 12220000, 10000)
# Use set.seed() so that anyone who runs this code
# will get the same sequence of 'random' values.
set.seed(31415)
y = cumsum(runif(n=length(x), min=-1e5, max=1e5)) + 1e6
dat = data.frame(x=x, y=y)
# Original poster's plot.
p1 = ggplot(data=dat, aes(x=x, y=y)) +
geom_line() +
scale_x_log10() +
scale_y_log10() +
labs(title="1. Plot has only one x-axis break.")
# Add extra x-axis breaks manually.
x_breaks = c(10^7.0, 10^7.04, 10^7.08)
p2 = ggplot(data=dat, aes(x=x, y=y)) +
geom_line() +
scale_x_log10(breaks=x_breaks) +
scale_y_log10() +
labs(title="2. Add some x-axis breaks manually.")
# Add extra x-axis breaks in semi-automated manner.
x_breaks = 10^pretty(log10(x))
x_labels = formatC(x_breaks, format = "e", digits = 2)
p3 = ggplot(data=dat, aes(x=x, y=y)) +
geom_line() +
scale_x_log10(breaks=x_breaks, labels=x_labels) +
scale_y_log10() +
labs(title="3. Create x-axis breaks with R functions.")
# Skip the log10 scale because the x-values don't span multiple orders of magnitude.
p4 = ggplot(data=dat, aes(x=x, y=y)) +
geom_line() +
scale_y_log10() +
labs(title="4. Check appearance without log10 scale for x-axis.")
library(gridExtra)
ggsave("example.png", plot=arrangeGrob(p1, p2, p3, p4, nrow=2),
width=10, height=5, dpi=150)
I add: scale_x_log10(breaks=seq(9880000, 12220000, 1000000)).
This is my reproducible example:
library(random)
library(ggplot2)
z <- randomStrings(n=235, len=5, digits=TRUE, upperalpha=TRUE, loweralpha=TRUE, unique=TRUE, check=TRUE)
x <- seq(9880000, 12220000, 10000)
y <- randomNumbers(n=235, min=9880000, max=12220000, col=1)
df <- data.frame(z, x, y)
head(df)
V1 x V1.1
1 378VO 9880000 11501626
2 AStRK 9890000 10929705
3 sotp4 9900000 11305700
4 AS4DR 9910000 11302110
5 7iFdk 9920000 11611918
6 HIS7z 9930000 11175074
p<-ggplot() + geom_line(data= df, aes(x = df$x , y = df$V1.1)) + scale_y_log10()
p + scale_x_log10(breaks=seq(9880000, 12220000, 1000000))
Hope it is useful...
Add this between your parenthesis: breaks=seq(specify, breaks, here)
For example, if you wanted a break at 0, 10, 100:
scale_x_log10((breaks=seq(0,10,100))

facet_zoom can't change breaks of zoomed plot

I currently have a plot and have used facet_zoom to focus on records between 0 and 10 in the x axis. The following code reproduces an example:
require(ggplot2)
require(ggforce)
require(dplyr)
x <- rnorm(10000, 50, 25)
y <- rexp(10000)
data <- data.frame(x, y)
ggplot(data, aes(x = x, y = y)) +
geom_point() +
facet_zoom(x = dplyr::between(x, 0, 10))
I want to change the breaks on the zoomed portion of the graph to be the equivalent of:
ggplot(data, aes(x = x, y = y)) +
geom_point() +
facet_zoom(x = dplyr::between(x, 0, 10)) +
scale_x_continuous(breaks = seq(0,10,2))
But this changes the breaks of the original plot as well. Is it possible to just change the breaks of the zoomed portion whilst leaving the original plot as default?
This works for your use case:
ggplot(data, aes(x = x, y = y)) +
geom_point() +
facet_zoom(x = between(x, 0, 10)) +
scale_x_continuous(breaks = pretty)
From ?scale_x_continuous, breaks would accept the following (emphasis added):
One of:
NULL for no breaks
waiver() for the default breaks computed by the transformation object
A numeric vector of positions
A function that takes the limits as input and returns breaks as output
pretty() is one such function. It doesn't offer very fine control, but does allow you to have some leeway to specify breaks across different facets with very different scales.
For illustration, here are two examples with different desired number of breaks. See ?pretty for more details on the other arguments this function accepts.
p <- ggplot(data, aes(x = x, y = y)) +
geom_point() +
facet_zoom(x = between(x, 0, 10))
cowplot::plot_grid(
p + scale_x_continuous(breaks = function(x) pretty(x, n = 3)),
p + scale_x_continuous(breaks = function(x) pretty(x, n = 10)),
labels = c("n = 3", "n = 10"),
nrow = 1
)
Of course, you can also define your own function to convert plot limits into desired breaks, (e.g. something like p + scale_x_continuous(breaks = function(x) seq(min(x), max(x), length.out = 5))), but I generally find these functions require more tweaking to get right, & pretty() is often good enough.

label only subset of points in a ggplot where x was modified

I have this kind of table:
dt <- data.table(titles=c('B','C','A','C'),
labs =c('b','c','a','c'),
values=c( 3, 2, 3, 4))
In order to plot the points without collapsing and re-ordering, I had to do the following trick with ggplot(): instead of aes(x=titles, y=values) I use aes(x=seq_len(nrow(dt)), y=values):
ggplot(data = dt,
aes(x=seq_len(nrow(dt)), y=values)) +
geom_point() +
geom_text(aes(label=labs)) +
scale_x_discrete(labels=dt$titles) + xlab('titles')
Now I want to have labels not for all points but only for a subset of them (for example, where values>2). This call doesn't work:
ggplot(data = dt,
aes(x=seq_len(nrow(dt)), y=values)) +
geom_point() +
geom_text(data=subset(dt, values>2), aes(label=labs)) +
scale_x_discrete(labels=dt$titles) + xlab('titles')
# Error: Aesthetics must be either length 1 or the same as the data (2): label, x, y
How to call geom_text() in this case?
I think you are looking for the inherit.aes=F option:
dt2 <- subset(dt, values>2)
ggplot(data = dt,
aes(x=seq_len(nrow(dt)), y=values)) +
geom_point() +
geom_text(data=dt2, aes(x=values, y=values, label=labs), inherit.aes=F) +
scale_x_discrete(labels=dt$titles) + xlab('titles')

Different breaks per facet in ggplot2 histogram

A ggplot2-challenged latticist needs help: What's the syntax to request variable per-facet breaks in a histogram?
library(ggplot2)
d = data.frame(x=c(rnorm(100,10,0.1),rnorm(100,20,0.1)),par=rep(letters[1:2],each=100))
# Note: breaks have different length by par
breaks = list(a=seq(9,11,by=0.1),b=seq(19,21,by=0.2))
ggplot(d, aes(x=x) ) +
geom_histogram() + ### Here the ~breaks should be added
facet_wrap(~ par, scales="free")
As pointed out by jucor, here some more solutions.
On special request, and to show why I am not a great ggplot fan, the lattice version
library(lattice)
d = data.frame(x=c(rnorm(100,10,0.1),rnorm(100,20,0.1)),par=rep(letters[1:2],each=100))
# Note: breaks have different length by par
myBreaks = list(a=seq(8,12,by=0.1),b=seq(18,22,by=0.2))
histogram(~x|par,data=d,
panel = function(x,breaks,...){
# I don't know of a generic way to get the
# grouping variable with histogram, so
# this is not very generic
par = levels(d$par)[which.packet()]
breaks = myBreaks[[par]]
panel.histogram(x,breaks=breaks,...)
},
breaks=NULL, # important to force per-panel compute
scales=list(x=list(relation="free")))
Here is one alternative:
hls <- mapply(function(x, b) geom_histogram(data = x, breaks = b),
dlply(d, .(par)), myBreaks)
ggplot(d, aes(x=x)) + hls + facet_wrap(~par, scales = "free_x")
If you need to shrink the range of x, then
hls <- mapply(function(x, b) {
rng <- range(x$x)
bb <- c(rng[1], b[rng[1] <= b & b <= rng[2]], rng[2])
geom_histogram(data = x, breaks = bb, colour = "white")
}, dlply(d, .(par)), myBreaks)
ggplot(d, aes(x=x)) + hls + facet_wrap(~par, scales = "free_x")
I don't think that it is possible to give different break points in each facet.
As workaround you can make two plots and then with grid.arrange() function from library gridExtra put them together. To set break points in geom_histogram() use binwidth= and set one value for width of bin.
p1<-ggplot(subset(d,par=="a"), aes(x=x) ) +
geom_histogram(binwidth=0.1) +
facet_wrap(~ par)
p2<-ggplot(subset(d,par=="b"), aes(x=x) ) +
geom_histogram(binwidth=0.2) +
facet_wrap(~ par)
library(gridExtra)
grid.arrange(p1,p2,ncol=2)
Following on from Didzis example:
ggplot(dat=d, aes(x=x, y=..ncount..)) +
geom_histogram(data = d[d$par == "a",], binwidth=0.1) +
geom_histogram(data = d[d$par == "b",], binwidth=0.01) +
facet_grid(.~ par, scales="free")
EDIT: This works for more levels but of course there are already better solutions
# More facets
d <- data.frame(x=c(rnorm(200,10,0.1),rnorm(200,20,0.1)),par=rep(letters[1:4],each=100))
# vector of binwidths same length as number of facets - need a nicer way to calculate these
my.width=c(0.5,0.25,0.1,0.01)
out<-lapply(1:length(my.width),function(.i) data.frame(par=levels(d$par)[.i],ggplot2:::bin(d$x[d$par==levels(d$par)[.i]],binwidth=my.width[.i])))
my.df<-do.call(rbind , out)
ggplot(my.df) + geom_histogram(aes(x, y = density, width = width), stat = "identity") + facet_wrap(~par,scales="free")
from https://groups.google.com/forum/?fromgroups=#!searchin/ggplot2/bin$20histogram$20by$20facet/ggplot2/xlqRIFPP-zE/CgfigIkgAAkJ
It is not, strictly speaking, possible to give different breaks in the different facets. But you can get the same effect by having a different layer for each facet (much as in user20650's answer), but mostly automating the multiple geom_histogram calls:
d <- data.frame(x=c(rnorm(100,10,0.1),rnorm(100,20,0.1)),
par=rep(letters[1:2],each=100))
breaks <- list(a=seq(9,11,by=0.1),b=seq(19,21,by=0.2))
ggplot(d, aes(x=x)) +
mapply(function(d, b) {geom_histogram(data=d, breaks=b)},
split(d, d$par), breaks) +
facet_wrap(~ par, scales="free_x")
The mapply call creates a list of geom_histograms which can be added to the plot. The tricky part is that you have to manually split the data (split(d, d$par)) into the data that goes into each facet.

Resources