box plot in R with additional point - r

I have a dataframe of multiple columns (let's say n) with different range and a vector of length n. I want different x-axis for each variable to be shown below each box plot. I tried facet_grid and facet_wrap but it gives common x axis.
This is what I have tried:
d <- data.frame(matrix(rnorm(10000), ncol = 20))
point_var <- rnorm(20)
plot.data <- gather(d, variable, value)
plot.data$test_data <- rep(point_var, each = nrow(d))
ggplot(plot.data, aes(x=variable, y=value)) +
geom_boxplot() +
geom_point(aes(x=factor(variable), y = test_data), color = "red") +
coord_flip() +
xlab("Variables") +
theme(legend.position="none")

If you can live with having the text of the x axis above the plot, and having the order of the graphs a bit messed-up this could work:
library(grid)
p = ggplot(plot.data, aes(x = 0, y=value)) +
geom_boxplot() +
geom_point(aes(x = 0, y = test_data), color = "red") +
facet_wrap(~variable, scales = "free_y", switch = "y") +
xlab("Variables") +
theme(legend.position="none") + theme_bw() + theme(axis.text.x=element_blank())
print(p, vp=viewport(angle=270, width = unit(.75, "npc"), height = unit(.75, "npc")))
I'm actually just creating the graph without flipping coords, so that scales = 'free_y' works, swithcing the position of the strip labels, and then rotating the graph.
If you don't like the text above graph (which is understandable), I would consider creating a list of single plots and then putting them together with grid.arrange.
HTH,
Lorenzo

Related

plot_grid function removes axis breaks from ggbreak in plots

I'm struggling with a problem:
I created two volcano plots in ggplot2, but due to the fact that I had one outlier point in both plot, I need to add y axis break for better visualization.
The problem arises when I WANT TO plot both in the same page using plot_grid from cowplot::, because it visualizes the original plot without the breaks that I set.
p<- c1 %>%
ggplot(aes(x = avg_log2FC,
y = -log10(p_val_adj),
fill = gene_type,
size = gene_type,
alpha = gene_type)) +
geom_point(shape = 21, # Specify shape and colour as fixed local parameters
colour = "black") +
geom_hline(yintercept = 0,
linetype = "dashed") +
scale_fill_manual(values = cols) +
scale_size_manual(values = sizes) +
scale_alpha_manual(values = alphas) +
scale_x_continuous(limits=c(-1.5,1.5), breaks=seq(-1.5,1.5,0.5)) +
scale_y_continuous(limits=c(0,110),breaks=seq(0,110,25))+
labs(title = "Gene expression",
x = "log2(fold change)",
y = "-log10(adjusted P-value)",
colour = "Expression \nchange") +
theme_bw() + # Select theme with a white background
theme(panel.border = element_rect(colour = "black", fill = NA, size= 0.5),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank())
p1 <- p + scale_y_break(breaks = c(30, 100))
p1
p plot without breaks:
and p1 plot with breaks:
The same I did for the second plot. But this is the result using plot_grid(p1,p3, ncol = 2)
Can you help me understanding if I'm doing something wrong? or it is just a limitation of the package?
OP, it seems in that ggbreak is not compatible with functions that arrange multiple plots, as indicated in the documentation for the package here. There does seem to be a workaround via either print() (I didn't get this to work) or aplot::plot_list(...), which did work for me. Here's an example using built-in datasets.
# setting up the plots
library(ggplot2)
library(ggbreak)
library(cowplot)
p1 <-
ggplot(mtcars, aes(x=mpg, disp)) + geom_point() +
scale_y_break(c(200, 220))
p2 <-
ggplot(iris, aes(x=Sepal.Length, y=Sepal.Width, color=Species)) +
geom_point() + scale_y_break(c(3.5, 3.7))
Plots p1 and p2 yield breaks in the y axis like you would expect, but plot_grid(p1,p2) results in the plots placed side-by-side without the y axis breaks.
The following does work to arrange the plots without disturbing the y axis breaks:
aplot::plot_list(p1,p2)

Create plot with only x axis

I want to create a bubble plot without the y axis, meaning the x axis represents a range between certain values and the size of the bubbles corresponds to a "number" variable.
Since geom_point() requires a y variable, I created a new column with only zero values and assigned it to the y axis.
ggplot(df, aes(x=range, y=new, size = numberPoints)) +
geom_point(alpha=0.5, shape=19) +
scale_size(range = c(.1, 24)) +
scale_y_continuous(breaks = NULL)
However, it gave the following result (the y axis is too large):
I only wanted the bubbles above the x axis (without too much space), but I can't find a way to do it.
You can use coord_fixed to "reduce" your axis
library(dplyr)
library(ggplot2)
data.frame(x = c(1,2,3,4), size = c(1,1,4,8)) %>%
ggplot(aes(x=x, y=1, size = size)) +
geom_point(alpha=0.5, shape=19) +
scale_size(range = c(.1, 24)) +
scale_y_continuous(breaks = NULL)+
coord_fixed(6)

overlaying plots from different dataframes in ggplot without messing with legend

I want to overlay two plots: one is a simple point plot where a variable is used to control the dot size; and another is a simple curve.
Here is a dummy example for the first plot;
library(ggplot2)
x <- seq(from = 1, to = 10, by = 1)
df = data.frame(x=x, y=x^2, v=2*x)
ggplot(df, aes(x, y, size = v)) + geom_point() + theme_classic() + scale_size("blabla")
Now lets overlay a curve to this plot with data from another dataframe:
df2 = data.frame(x=x, y=x^2-x+2)
ggplot(df, aes(x, y, size = v)) + geom_point() + theme_classic() + scale_size("blabla") + geom_line(data=df2, aes(x, y), color = "blue") + scale_color_discrete(name = "other", labels = c("nanana"))
It produces the error:
Error in FUN(X[[i]], ...) : object 'v' not found
The value in v is not used to draw the intended curse, but anyway, I added a dummy v to df2.
df2 = data.frame(x=x, y=x^2-x+2, v=replicate(length(x),0)) # add a dummy v
ggplot(df, aes(x, y, size = v)) + geom_point() + theme_classic() + scale_size("blabla") + geom_line(data=df2, aes(x, y), color = "blue") + scale_color_discrete(name = "other", labels = c("nanana"))
An the result has a messed legend:
What is the right way to achieve the desired plot?
You can put the size aes in the geom_point() call to make it so that you don't need the dummy v in df2.
Not sure exactly what you want regarding the legend. If you replace the above, then the blue portion goes away. If you want to have a legend for the line color, then you have to place color inside the geom_line aes call.
x <- seq(from = 1, to = 10, by = 1)
df = data.frame(x=x, y=x^2, v=2*x)
df2 = data.frame(x=x, y=x^2-x+2)
ggplot(df, aes(x, y)) +
geom_point(aes(size = v)) +
theme_classic() +
scale_size("blabla") +
geom_line(data=df2, aes(x, y, color = "blue")) +
scale_color_manual(values = "blue", labels = "nanana", name = "other")

How to break axis in R/ggplot2? [duplicate]

I'm generating plots for some data, but the number of ticks is too small, I need more precision on the reading.
Is there some way to increase the number of axis ticks in ggplot2?
I know I can tell ggplot to use a vector as axis ticks, but what I want is to increase the number of ticks, for all data. In other words, I want the tick number to be calculated from the data.
Possibly ggplot do this internally with some algorithm, but I couldn't find how it does it, to change according to what I want.
You can override ggplots default scales by modifying scale_x_continuous and/or scale_y_continuous. For example:
library(ggplot2)
dat <- data.frame(x = rnorm(100), y = rnorm(100))
ggplot(dat, aes(x,y)) +
geom_point()
Gives you this:
And overriding the scales can give you something like this:
ggplot(dat, aes(x,y)) +
geom_point() +
scale_x_continuous(breaks = round(seq(min(dat$x), max(dat$x), by = 0.5),1)) +
scale_y_continuous(breaks = round(seq(min(dat$y), max(dat$y), by = 0.5),1))
If you want to simply "zoom" in on a specific part of a plot, look at xlim() and ylim() respectively. Good insight can also be found here to understand the other arguments as well.
Based on Daniel Krizian's comment, you can also use the pretty_breaks function from the scales library, which is imported automatically:
ggplot(dat, aes(x,y)) + geom_point() +
scale_x_continuous(breaks = scales::pretty_breaks(n = 10)) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 10))
All you have to do is insert the number of ticks wanted for n.
A slightly less useful solution (since you have to specify the data variable again), you can use the built-in pretty function:
ggplot(dat, aes(x,y)) + geom_point() +
scale_x_continuous(breaks = pretty(dat$x, n = 10)) +
scale_y_continuous(breaks = pretty(dat$y, n = 10))
You can supply a function argument to scale, and ggplot will use
that function to calculate the tick locations.
library(ggplot2)
dat <- data.frame(x = rnorm(100), y = rnorm(100))
number_ticks <- function(n) {function(limits) pretty(limits, n)}
ggplot(dat, aes(x,y)) +
geom_point() +
scale_x_continuous(breaks=number_ticks(10)) +
scale_y_continuous(breaks=number_ticks(10))
Starting from v3.3.0, ggplot2 has an option n.breaks to automatically generate breaks for scale_x_continuous and scale_y_continuous
library(ggplot2)
plt <- ggplot(mtcars, aes(x = mpg, y = disp)) +
geom_point()
plt +
scale_x_continuous(n.breaks = 5)
plt +
scale_x_continuous(n.breaks = 10) +
scale_y_continuous(n.breaks = 10)
Additionally,
ggplot(dat, aes(x,y)) +
geom_point() +
scale_x_continuous(breaks = seq(min(dat$x), max(dat$x), by = 0.05))
Works for binned or discrete scaled x-axis data (I.e., rounding not necessary).
A reply to this question and How set labels on the X and Y axises by equal intervals in R ggplot?
mtcars %>%
ggplot(aes(mpg, disp)) +
geom_point() +
geom_smooth() +
scale_y_continuous(limits = c(0, 500),
breaks = seq(0,500,50)) +
scale_x_continuous(limits = c(0,40),
breaks = seq(0,40,5))

ggExtra plot format: similar marginal plots for different plot dimensions

This question in regarding formatting plots produced using ggplot2 + ggExtra and is not related to any bug.
require(ggplot2)
#> Loading required package: ggplot2
require(ggExtra)
#> Loading required package: ggExtra
p1 <- ggplot(data = mpg,aes(x = cty,y = cty)) +
geom_point()+
xlab("City driving (miles/gallon)") +
ylab("City driving (miles/gallon)")
ggMarginal(p = p1,type= "boxplot")
The y-axis marginal plot in this chart is usually not similar to the x-axis marginal plot i.e. the width of the 2 boxplots are not similar. This problem become more acute when I change plot dimensions (in my case, using RStudio). Any suggestions how to make the width of the 2 boxplots similar while using different plot dimensions (width x height).
I face similar problems with other marginal plot type options provided by ggExtra package: histogram, density.
I suggest the axis_canvas function from the cowplot package. (Disclaimer: I'm the package author.) It requires a little more work, but it allows you to draw any marginals you want. And you can specify the size exactly, in output units (e.g. inch).
require(cowplot)
pmain <- ggplot(data = mpg, aes(x = cty, y = hwy)) +
geom_point() +
xlab("City driving (miles/gallon)") +
ylab("Highway driving (miles/gallon)")
xbox <- axis_canvas(pmain, axis = "x", coord_flip = TRUE) +
geom_boxplot(data = mpg, aes(y = cty, x = 1)) + coord_flip()
ybox <- axis_canvas(pmain, axis = "y") +
geom_boxplot(data = mpg, aes(y = hwy, x = 1))
p1 <- insert_xaxis_grob(pmain, xbox, grid::unit(1, "in"), position = "top")
p2 <- insert_yaxis_grob(p1, ybox, grid::unit(1, "in"), position = "right")
ggdraw(p2)
See how the boxplots retain their width/height in the following two images with different aspect ratios. (Unfortunately Stackoverflow rescales the images, so the effect is somewhat obscured, but you can see that the height of the top boxplot is always equal to the width of the side one.)
The second advantage is that because you can use full-blown ggplot2 for your marginal plots, you can draw anything you want, e.g. grouped box plots.
require(cowplot)
pmain <- ggplot(data = mpg, aes(x = cty, y = hwy, color = factor(cyl))) +
geom_point() +
xlab("City driving (miles/gallon)") +
ylab("Highway driving (miles/gallon)") +
theme_minimal()
xbox <- axis_canvas(pmain, axis = "x", coord_flip = TRUE) +
geom_boxplot(data = mpg, aes(y = cty, x = factor(cyl), color = factor(cyl))) +
scale_x_discrete() + coord_flip()
ybox <- axis_canvas(pmain, axis = "y") +
geom_boxplot(data = mpg, aes(y = hwy, x = factor(cyl), color = factor(cyl))) +
scale_x_discrete()
p1 <- insert_xaxis_grob(pmain, xbox, grid::unit(1, "in"), position = "top")
p2 <- insert_yaxis_grob(p1, ybox, grid::unit(1, "in"), position = "right")
ggdraw(p2)
I'm not entirely sure what you mean. Setting width=height of your output plot ensures the same width of the boxplots.
For example, in RMarkdown, if I include
```{r, fig.width = 5, fig.height = 5}
ggMarginal(p1, type = "boxplot", size = 2);
```
I get the following output
The box widths are identical.
Alternatively, if you save your plot make sure to set the same width and height.
ggsave(file = "test.png", ggMarginal(p1, type = "boxplot", size = 2), width = 5, height = 5);

Resources