I'm trying to make a combine some plots into a single plot using the patchwork package, but I'm unfamiliar with it and am having trouble figuring out to how to properly scale the size of the plots. There's one core plot, the scatter plot, and then a boxplot for each continuous variable to some the distributions as well. Here's a reproducible example below:
library(dplyr)
library(ggplot2)
library(patchwork)
set.seed(100)
dat1 <- tibble(x = runif(1000, 0, 10),
y = runif(1000, 0, 20),
group1 = sample(rep(letters[1:5], each = 200)),
group2 = sample(rep(letters[-22:-1], each = 250)))
plot1 <- ggplot(data = dat1) +
geom_point(aes(x = x, y = y, color = group1)) +
facet_wrap(~group2) +
theme(legend.position = 'bottom')
xbox <- ggplot(data = dat1) +
geom_boxplot(aes(x = x, y = group1, fill = group1, color = group1)) +
scale_x_continuous(position = 'top') +
theme(legend.position = 'none',
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
title = element_blank())
ybox <- ggplot(data = dat1) +
geom_boxplot(aes(x = group1, y = y, fill = group1, color = group1)) +
scale_y_continuous(position = 'right') +
theme(legend.position = 'none',
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
title = element_blank())
(xbox + plot_spacer()) / (plot1 + ybox)
This is close to what I'm attempting, but the boxplots need to be rescaled such that the top one is approximately 20% of its shown height and the righthand one is approximately 20% of its shown width. I've tried using patchwork::plot_layout to handle the scaling, but I keep getting unexpected results. I inserted the patchwork::plot_spacer() so that the righthand boxplot wouldn't stretch the whole height of picture but maybe there is a better way to do that. The boxplots are supposed to appear more like axes, so that upper right whitespace shouldn't really be there.
I'm not certain you'll get a plot that makes inferential sense in the margins, but you can change the widths and heights of the marginal plots in plot_layout using either the heights and widths arguments or the design argument. Note that plots in parenthesis are considered "subplots" themselves and is given their own "area" when using plot_layout. As such I've added all plots together rather than arranging them using other patchwork operators.
xbox + plot_spacer() + plot1 + ybox + plot_layout(design = c(area(t = 1, b = 1, l = 1, r = 6), # <== top column
area(t = 1, b = 1, l = 7, r = 7), # <== top right
area(t = 2, b = 7, l = 1, r = 6), # <== bottom left
area(t = 2, b = 7, l = 7, r = 7) # <== bottom right
))
# Alternative:
xbox + plot_spacer() + plot1 + ybox + plot_layout(ncol = 2, nrow = 2,
heights = c(1, 6),
widths = c(6, 1))
Related
I am including marginal distribution plots on a scatterplot of a continuous and integer variable. However, in the integer variable maringal distribution plot (y-axis) there is this zig-zag pattern that shows up because the y-values are all integers. Is there any way to increase the "width" (not sure that's the right term) of the bins/values the function calculates the distribution density over?
The goal is to get rid of that zig-zag pattern that develops because the y-values are integers.
library(GlmSimulatoR)
library(ggplot2)
library(patchwork)
### Create right-skewed dataset that has one continous variable and one integer variable
set.seed(123)
df1 <- data.frame(matrix(ncol = 2, nrow = 1000))
x <- c("int","cont")
colnames(df1) <- x
df1$int <- round(rgamma(1000, shape = 1, scale = 1),0)
df1$cont <- round(rgamma(1000, shape = 1, scale = 1),1)
p1 <- ggplot(data = df1, aes(x = cont, y = int)) +
geom_point(shape = 21, size = 2, color = "black", fill = "black", stroke = 1, alpha = 0.4) +
xlab("Continuous Value") +
ylab("Integer Value") +
theme_bw() +
theme(panel.grid = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 16, color = "black"),
axis.text.y = element_text(size = 16, color = "black"))
dens1 <- ggplot(df1, aes(x = cont)) +
geom_density(alpha = 0.4) +
theme_void() +
theme(legend.position = "none")
dens2 <- ggplot(df1, aes(x = int)) +
geom_density(alpha = 0.4) +
theme_void() +
theme(legend.position = "none") +
coord_flip()
dens1 + plot_spacer() + p1 + dens2 +
plot_layout(ncol = 2, nrow = 2, widths = c(6,1), heights = c(1,6))
From ?geom_density:
adjust: A multiplicate [sic] bandwidth adjustment. This makes it possible
to adjust the bandwidth while still using the a bandwidth
estimator. For example, ‘adjust = 1/2’ means use half of the
default bandwidth.
So as a start try e.g. geom_density(..., adjust = 2) (bandwidth twice as wide as default) and go from there.
The following code using R/ggplot
set.seed(123)
require(ggplot2)
n <- 60
df <- data.frame(theta=sample(180,n,replace=TRUE),
gp=sample(c("A","B"),n,replace=TRUE ))
p <- ggplot(df)
p <- p + geom_histogram(aes(x=theta,fill=gp),
binwidth=5)
p <- p + scale_x_continuous(breaks=seq(0,360,30),
limits=c(0,360))
p <- p + coord_polar(theta="x", start=3*pi/2, direction=-1)
p <- p + theme_bw()
print(p)
generates the figure below
I just want to display the angular range [0,180] and exclude entirely the range (180,360), so the figure would basically be the upper semi-circle rather than a full circle.
Changing the limits in scale_x_continuous does not do this.
Is there a way?
Thanks.
EDIT
There's a similar problem but with a different package here
Creating half a polar plot (rose diagram) with circular package
It is some kind of a hack, but based on this answer here and adding some code to your ggplot call as well as to the grid, I was able to come close to a solution. Please give it a try. Depending on your desired output format / resolution you might need to adjust the x, y, height and width arguments in the last line which basically recreates the black border around the plot which I deleted from the bw theme. Maybe someone with more profound knowledge of grobs can come up with something better.
library(ggplot2)
library(reshape2)
library(grid)
set.seed(123)
require(ggplot2)
n <- 60
df <- data.frame(theta=sample(180,n,replace=TRUE),
gp=sample(c("A","B"),n,replace=TRUE ))
p <- ggplot(df) + geom_histogram(aes(x = theta, fill = gp),
binwidth = 5) +
scale_x_continuous(
expand = c(0, 0),
breaks = seq(180, 0, -30),
limits = c(0, 360)
) +
coord_polar(theta = "x",
start = 3 * pi / 2,
direction = -1) +
theme_bw() +
theme(
panel.border = element_blank(),
axis.title.y = element_text(hjust = 0.75, vjust = 3),
legend.position = "top"
)
g = ggplotGrob(p)
grid.newpage()
pushViewport(viewport(height = 1, width = 1, clip="on"))
grid.draw(g)
grid.rect(x = 0, y = -0.05, height = 1, width = 2, gp = gpar(col="white"))
grid.rect(x = .5, y = .7, width = .6, height = .55, gp = gpar(lwd = 1, col = "black", fill = NA))
My data :
dat <- data.frame(x = c(1,2,3,4,5,6), y = c(2,3,4,6,2,3))
Breaks and labels of my plot :
breaks <- c(3,5)
labels <- c(paste(3,"(0.3)"), paste(5,"(0.5)"))
And my plot :
library(ggplot2)
ggplot() +
geom_point(data = dat, aes(x = x, y = y)) +
scale_y_continuous(breaks = breaks, labels = labels)
I wish to colour the same labels differently. For instance, I wish to colour the "3" with a different colour than the one of "(0.3)".
Here's a way to stick 2 plots together with patchwork, which is a package similar to cowplot but with a little more flexibility. I split the labels into 2 vectors, one with the integers and one with the decimals in parentheses. Then make 2 plots, one for the outer labels with no other markings, and one for the main plot.
After doing one round of trying to build this, I started adjusting the margins in each theme, realizing I needed to set the top and bottom margins the same, but making no margin on the right side of the left plot and the left side of the right plot, so there's very little space between them. There's definitely still ways to tweak this, but I'd start with some of the spacing.
library(tidyverse)
library(patchwork)
lbl_int <- str_extract(labels, "^\\d+")
lbl_frac <- str_extract(labels, "\\(.+\\)")
The main plot is fairly straightforward, just removing elements from the left side in the theme.
(main_plot <- ggplot(dat, aes(x = x, y = y)) +
geom_point() +
scale_y_continuous(breaks = breaks, labels = lbl_frac) +
theme(axis.text.y.left = element_text(color = "gray"),
axis.title.y.left = element_blank(),
plot.margin = margin(1, 1, 1, 0, "mm")))
The plot for the outer labels has most theme elements removed, but has the y-axis title and labels.
(int_plot <- ggplot(dat, aes(x = 0, y = y)) +
scale_y_continuous(breaks = breaks, labels = lbl_int) +
theme(axis.text.y.left = element_text(color = "black"),
axis.title.y.left = element_text(color = "black"),
axis.title.x = element_blank(),
panel.grid = element_blank(),
axis.ticks = element_blank(),
axis.text.x = element_blank(),
panel.background = element_blank(),
plot.margin = margin(1, 0, 1, 1, "mm")))
Then patchwork makes it easy to just add plots together—literally with +—and then set the widths. Again, here's something you can adjust as you need, but I made the left plot very very skinny compared to the right one.
int_plot + main_plot +
plot_layout(ncol = 2, widths = c(1e-3, 1))
Created on 2018-12-21 by the reprex package (v0.2.1)
This is something to get you going (credit to this answer which I adapted).
We use annotate to plot your labels, on two different x-axis coords, this will function as our labels (so we need to shut off the actual labelling in the theme).
First we create two vectors of the exact labels that we want in different colors.
dat <- data.frame(x = c(1,2,3,4,5,6), y = c(2,3,4,6,2,3))
breaks <- c(3,5)
labels_new1 <- c(NA, NA, 3, NA, 5, NA) # NA in order to skip that annotation
labels_new2 <- c(NA, NA, "(0.3)", NA, "(0.5)", NA)
Important parts:
coord_cartesian(xlim = c(0, 6), expand = FALSE) + # this will cut our plot properly
plot.margin = unit(c(1, 1, 1, 5), "lines") # this will give us some space on the left
Note that in coord_cartesian defined like that we are actually cutting off the two annotations (notice that the two x values you see in the next part (-1, -0.5) are outside the xlim range).
Plot object:
g1 <- ggplot() +
geom_point(data = dat, aes(x = x, y = y)) +
annotate(geom = "text", y = seq_len(nrow(dat)), x = -1, label = labels_new1, size = 4) +
#first the number add color = "blue" for example
annotate(geom = "text", y = seq_len(nrow(dat)), x = -0.5, label = labels_new2, size = 4, color = "red") +
#second the parenthesis (colored in red)
coord_cartesian(xlim = c(0, 6), expand = FALSE) +
scale_y_continuous(breaks = breaks) +
#now lets shut off the labels and give us some left space in the plot
theme(plot.margin = unit(c(1, 1, 1, 5), "lines"),
axis.title.y = element_blank(),
axis.text.y = element_blank())
Finally:
g2 <- ggplot_gtable(ggplot_build(g1)) # convert to grob
g2$layout$clip[g2$layout$name == "panel"] <- "off" # clipping of the axes
# this will show the two annotations that we left off before
grid::grid.draw(g2)
Remarks:
You can play around with x=-1 and x=-0.5 to move the two annotations, and with the last value in c(1, 1, 1, 5) to give you more space on the left side.
labels_new1 and labels_new2 are very important, they are doing all the heavy work of what and where you want to show something.
I'm using the code below:
# Libs
require(ggplot2); require(gridExtra); require(grid)
# Generate separate charts
chrts_list_scts <- list()
# Data
data("mtcars")
# A
chrts_list_scts$a <- ggplot(mtcars) +
geom_point(size = 2, aes(x = mpg, y = disp,
colour = as.factor(cyl))) +
geom_smooth(aes(x = mpg, y = disp),
method = "auto") +
xlab("MPG") +
ylab("Disp") +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "none")
# B
chrts_list_scts$b <- ggplot(mtcars) +
geom_point(size = 2, aes(x = mpg, y = drat,
colour = as.factor(cyl))) +
geom_smooth(aes(x = mpg, y = drat),
method = "auto") +
xlab("MPG") +
ylab("Drat") +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "none")
# C
chrts_list_scts$c <- ggplot(mtcars) +
geom_point(size = 2, aes(x = mpg, y = qsec,
colour = as.factor(cyl))) +
geom_smooth(aes(x = mpg, y = qsec),
method = "auto") +
xlab("MPG") +
ylab("QSEC") +
guides(colour = guide_legend(title = "cyl")) +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "bottom",
legend.key = element_rect(colour = NA))
# Arrange grid
png(filename = "chrts.PNG", width = 6,
height = 10, units = 'in', res = 300)
title_text <- c("mtcars")
chrts_list_scts$all_scts <- grid.arrange(chrts_list_scts$a,
chrts_list_scts$b,
chrts_list_scts$c,
top =
textGrob(label = title_text,
gp = gpar(
fontsize = 14,
font = 2)))
dev.off()
rm(title_text)
To generate the following chart:
I'm interested in adding border around that chart, as in the picture below:
Attempts
I tried to address this request via adding polygonGrob in the code:
chrts_list_scts$all_scts <- grid.arrange(chrts_list_scts$dep_work,
chrts_list_scts$chld_work,
chrts_list_scts$pens,
polygonGrob(x = c(0,0.5,1.05),
y = c(0,0.5,1.05)
),
top =
textGrob(label = title_text,
gp = gpar(
fontsize = 14,
font = 2)))
but this generates a pointless chart with one line across in the bottom. I had a look at the seeming similar discussion on SO but it wasn't clear to me how to arrive at a working solution.
Side requirements
In addition to generating the border, I would like to:
Be able to exercise some control over the border aesthetics, like changing size and colour of the border.
Ideally, I would like to encapsulate this solution within the arrange.grid call. So at the object chrts_list_scts$all_scts has all elements including charts and neat border around all of them.
I will be happy to accept solutions that address the major requirements with respect to the border only, if there is a suggested solution that matches the remaining two points it will be even nicer.
1) Using the iris example (but further simplified) from the link provided in the question just add the last line. Modify the gpar(...) components (and possibly the width and height) to get different aesthetics. (This is not encapsulated in the grid.arrange call.)
library(ggplot2)
library(grid)
library(gridExtra)
g <- ggplot(iris, aes(Sepal.Width, Sepal.Length)) + geom_point()
grid.arrange(g, g, ncol=2)
# next line adds border
grid.rect(width = .98, height = .98, gp = gpar(lwd = 2, col = "blue", fill = NA))
(continued after plot)
2) This is a variation of solution (1) in which on the plus side encapsulates both the graphics and border in the gt gTree by creating grobs to hold each. On the other hand it does involve some additional complexity:
grid.newpage()
ga <- arrangeGrob(g, g, ncol = 2)
gb <- rectGrob(height = .98, width = .98, gp = gpar(lwd = 2, col = "blue", fill = NA)) # border, no fill
gt <- gTree(children = gList(ga, gb))
grid.draw(gt)
you can add a rectGrob to the gtable
grid.draw(gtable::gtable_add_grob(arrangeGrob(g, g, ncol=2),
rectGrob(gp=gpar(lwd=5, fill=NA)), 1, 1, 1, 2))
NOTE: fill=NA or fill='transparent' is required otherwise the rectangle can mask the objects below it.
Please check my reproducible example and the result chart.
X = t(USArrests)
plot_color_clust = function(X,N=N,
cols=c("red","blue", "orange", "darkgreen","green","yellow","grey","black","white")
){
library(ggplot2)
library(gridExtra)
library(gtable)
library(scales)
library(ggdendro)
library(grid)
library(plyr)
if(N>length(cols)) stop("N too big. Not enough colors in cols.")
if(N>ncol(X)) stop("N too big. Not enough columns in data.")
fit = ClustOfVar::hclustvar(X.quanti = X)
dd.row = as.dendrogram(fit)
ddata_x <- dendro_data(dd.row)
temp = cutree(fit,k=N)
lab <- ggdendro::label(ddata_x)
x=c()
for(i in 1:nrow(lab)){
x[i]= paste( "clust", as.vector(temp[ lab$label[i]==names(temp) ]) ,sep="")
}
lab$group <- x
p1 <- ggplot(segment(ddata_x)) +
geom_segment(aes(x=x, y=y, xend=xend, yend=yend))+coord_flip()+
geom_text(data=lab,
aes(label=label, x=x, y=0, colour=group),hjust=1) +
theme(legend.position="none",
axis.title.y=element_blank(),
axis.title.x=element_blank(),
axis.text.x = element_text(angle = 0, hjust = 0),
axis.title.x = element_text(angle = 0, hjust = 0))+
theme(axis.text = element_blank(), axis.title = element_blank(),
axis.ticks = element_blank(), axis.ticks.margin = unit(0, "lines"),
axis.ticks.length = unit(0, "cm"))+
scale_colour_manual(values=cols)+coord_flip()+
scale_y_continuous(limits = c(-0.1, 2.1))
df2<-data.frame(cluster=cutree(fit,N),states=factor(fit$labels,levels=fit$labels[fit$order]))
df3<-ddply(df2,.(cluster),summarise,pos=mean(as.numeric(states)))
p2 = ggplot(df2,aes(states,y=1,fill=factor(cluster)))+geom_tile()+
scale_y_continuous(expand=c(0,0))+
theme(axis.title=element_blank(),
axis.ticks=element_blank(),
axis.text=element_blank(),
legend.position="none")+coord_flip()+
geom_text(data=df3,aes(x=pos,label=cluster))+
scale_fill_manual(name = "This is my title", values = cols)
gp1<-ggplotGrob(p1)
gp2<-ggplotGrob(p2)
maxHeight = grid::unit.pmax(gp1$heights[2:5], gp2$heights[2:5])
gp1$heights[2:5] <- as.list(maxHeight)
gp2$heights[2:5] <- as.list(maxHeight)
#grid.arrange(gp2, gp1, ncol=2,widths=c(1/6,5/6))
R = arrangeGrob(gp2,gp1,ncol=2,widths=c(1/6,5/6))
R
}
plot_color_clust(X,6)
Questions:
These two parts (left colors tiles and right clustering tree) has inconsistent heights. How do we adjust their heights for them to match each other's?
How can we make the tree on the right side shorter so states names (clustered subjects) can have more space to be fully displayed?
Is there a way make the white space between those two parts smaller?
Your tweaking of the code is appreciated. Thanks.
One major change: Rather than matching heights of the two charts, I extract the plot panel from gp2, then insert it into column 2 of gp1. There are no margins surrounding the resultant gp2, and thus, partly takes care of your point 3.
With respect to point 2: expand the limits of the axis to make room of the labels. (See point 2. in the code below). The parameters for points 2 and 3 were set by trial-and-error. Adjusting one parameter means the other needs to be adjusted.
With respect to point 1: expand the axis using the additive component of exapnd to add half a unit to each end of the axis (See point 1. in the code below).
Minor edit: updating to ggplot2 2.2.0 and R 3.3.2
axis.ticks.margin is deprecated
X = t(USArrests)
plot_color_clust = function(X, N = N,
# cols=c("red","blue", "orange", "darkgreen","green","yellow","grey","black","white")
cols = rainbow(N) # Easier to pick colours
){
library(ggplot2)
library(gtable)
library(grid)
library(ggdendro)
library(plyr)
if(N > length(cols)) stop("N too big. Not enough colors in cols.")
if(N > ncol(X)) stop("N too big. Not enough columns in data.")
fit = ClustOfVar::hclustvar(X.quanti = X)
dd.row = as.dendrogram(fit)
ddata_x <- dendro_data(dd.row)
temp = cutree(fit, k = N)
lab <- ggdendro::label(ddata_x)
x = c()
for(i in 1:nrow(lab)){
x[i] = paste("clust", as.vector(temp[lab$label[i] == names(temp)]), sep = "")
}
lab$group <- x
p1 <- ggplot(segment(ddata_x)) +
geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) +
geom_text(data = lab, aes(label = label, x = x, y = -.05, colour = group), # y = -.05 adds a little space between label and tree
size = 4, hjust = 1) +
scale_x_continuous(expand = c(0, .5)) + # 1. Add half a unit to each end of the vertical axis
expand_limits(y = -0.4) + # 2. Make room for labels
theme_classic() +
scale_colour_manual(values = cols) +
coord_flip() +
theme(legend.position = "none", axis.line = element_blank(),
axis.text = element_blank(), axis.title = element_blank(),
axis.ticks = element_blank(),
axis.ticks.length = unit(0, "cm"))
df2 <- data.frame(cluster = cutree(fit, N),
states = factor(fit$labels, levels = fit$labels[fit$order]))
df3 <- ddply(df2, .(cluster),summarise,pos=mean(as.numeric(states)))
p2 <- ggplot(df2, aes(states, y = 1,
fill = factor(as.character(cluster)))) + # 'as.character' - so that colours match with 10 or more clusters
geom_tile() +
scale_y_continuous(expand = c(0, 0)) +
scale_x_discrete(expand = c(0, 0)) +
coord_flip() +
geom_text(data = df3,aes(x = pos, label = cluster, size = 12)) +
scale_fill_manual(values = cols)
gp1 <- ggplotGrob(p1) # Get ggplot grobs
gp2 <- ggplotGrob(p2)
gp2 <- gp2[6, 4] # 3. Grab plot panel only from tiles plot (thus, no margins)
gp1 <- gtable_add_grob(gp1, gp2, t = 6, l = 2, name = "tiles") # 3. Insert it into dendrogram plot
gp1$widths[2] = unit(1, "cm") # 3. Set width of column containing tiles
grid.newpage()
grid.draw(gp1)
}
plot_color_clust(X, 6)