R heatmap with circles - r

I would like to generate, in R, a heatmap visualization of a matrix using circles, in order to have both the color and diameter of the circles be informative. Something looking like this:
This sort of plotting is called "bubblegum plot" in certain computational biology labs, but I could not find an R function/package to do it.
Any ideas? Thanks!

Not sure whether there is a package which offers this out-of-the-box but using just ggplot2 this could be achieved like so:
library(ggplot2)
set.seed(42)
d <- data.frame(
x = rep(paste("Team", LETTERS[1:8]), 4),
y = rep(paste("Task", 1:4), each = 8),
value = runif(32)
)
ggplot(d, aes(x, forcats::fct_rev(y), fill = value, size = value)) +
geom_point(shape = 21, stroke = 0) +
geom_hline(yintercept = seq(.5, 4.5, 1), size = .2) +
scale_x_discrete(position = "top") +
scale_radius(range = c(1, 15)) +
scale_fill_gradient(low = "orange", high = "blue", breaks = c(0, .5, 1), labels = c("Great", "OK", "Bad"), limits = c(0, 1)) +
theme_minimal() +
theme(legend.position = "bottom",
panel.grid.major = element_blank(),
legend.text = element_text(size = 8),
legend.title = element_text(size = 8)) +
guides(size = guide_legend(override.aes = list(fill = NA, color = "black", stroke = .25),
label.position = "bottom",
title.position = "right",
order = 1),
fill = guide_colorbar(ticks.colour = NA, title.position = "top", order = 2)) +
labs(size = "Area = Time Spent", fill = "Score:", x = NULL, y = NULL)

I wrote an alternative function to perform the plotting, without ggplot and tidyverse. I will soon upload it to the CRAN corto package. Enjoy!
Usage
inputp<-matrix(runif(1000),nrow=50)
inputn<-matrix(rnorm(1000),nrow=50)
colnames(inputp)<-colnames(inputn)<-paste0("Score",1:ncol(inputp))
rownames(inputp)<-rownames(inputn)<-paste0("Car",1:nrow(inputp))
par(las=2,mar=c(0,6,6,10))
bubblegum(inputp,inputn)
BUBBLEGUM function
require(gplots)
require(plotrix)
bubblegum<-function(
inputp,
inputn,
pcr=0.1,
grid=FALSE,
reorder=FALSE,
legend=TRUE,
matrix2col=TRUE
) {
if(nrow(inputp)!=nrow(inputn)|ncol(inputp)!=ncol(inputn)){
warning("inputp and inpute have different sizes!")
}
### Initialize
rownumber<-nrow(inputp)
colnumber<-ncol(inputp)
### Trasform the NESs into colors
if(matrix2col){
colconversion<-matrix2col(inputn,nbreaks=20)
nescolors<-colconversion$colormatrix
} else {
nescolors<-inputn
}
#pradii<-0.3*(-log(inputp)/max(-log(inputp)))
pradii<-inputp
pradii[inputp>0.1]<-pcr*0
pradii[inputp<=0.1]<-pcr*1
pradii[inputp<0.05]<-pcr*2
pradii[inputp<1E-5]<-pcr*3
pradii[inputp<1E-10]<-pcr*4
pradii[inputp<1E-20]<-pcr*5
### Order by sum NES
sumnes<-apply(inputn,1,function(x){sum(abs(x))})
if(reorder){
neworder<-order(sumnes)
pradii<-pradii[neworder,]
nescolors<-nescolors[neworder,]
} else {
pradii<-pradii[nrow(pradii):1,]
nescolors<-nescolors[nrow(nescolors):1,]
}
### Plot
#par(las=2,mar=c(0,20,6,0))
plot(0,ylim=c(0,rownumber+1),xlim=c(0,colnumber+1),xaxt="n",yaxt="n",type="n",frame.plot=FALSE,xlab="",ylab="")#,xaxs="i",yaxs="i")
if(grid){
abline(h=1:rownumber,lty=2)
abline(v=1:colnumber,lty=2)
}
for (i in (1:rownumber)) {
for(j in 1:colnumber) {
radius<-pradii[i,j]
color<-nescolors[i,j]
draw.circle(j,i,radius=radius,col=color,lwd=0.2)
}
}
axis(3,at=1:colnumber,labels=colnames(pradii))
axis(2,at=1:rownumber,labels=rownames(pradii),cex.axis=0.7)
### Enable things to be drawn outside the plot region
par(xpd=TRUE)
### Title
### Legend
if(legend){
#legend(-colnumber,rownumber,c("group A", "group B"), pch = c(1,2), lty = c(1,2))
legend("topright",inset=c(-0.1,0),legend=c(
"<0.1","0.05","<1e-5","<1e-10","<1e-20"
), pch=c(21), title="FDR",pt.bg="white",horiz=FALSE,pt.cex=c(1,1.5,2,2.5,3))
}
if(matrix2col){
extreme<-round(max(abs(inputn)),1)
legend("bottomright", inset=c(-0.1,0),legend=c(
-extreme,-extreme/2,0,extreme/2,extreme
), pch=c(21), title="Score",
pt.bg=colconversion$col[c(1,5,10,15,19)],
horiz=FALSE,pt.cex=3)
}
}
###########################
matrix2col<-function(z,col1="navy",col2="white",col3="red3",nbreaks=100,center=TRUE){
if(center){
extreme=max(abs(z))+0.001
breaks <- seq(-extreme, extreme, length = nbreaks)
}else {
breaks <- seq(min(z), max(z), length = nbreaks)
}
ncol <- length(breaks) - 1
col <- colorpanel(ncol,col1,col2,col3)
CUT <- cut(z, breaks=breaks)
colorlevels <- col[match(CUT, levels(CUT))] # assign colors to heights for each point
names(colorlevels)<-rownames(z)
colormatrix<-matrix(colorlevels,ncol=ncol(z),nrow=nrow(z))
dimnames(colormatrix)<-dimnames(z)
return(list(colormatrix=colormatrix,col=col))
}

Related

Modify the size of each legend icon in ggplot2

I am using ggplot/usmap libararies to plot highly skewed data onto a map.
Because the data is so skewed, I created uneven interval brackets. See below;
My Code:
library(dplyr)
library(tidyverse)
library(usmap)
library(ggplot2)
library(readxl)
library(rgdal)
plot_usmap(regions = "states",
# fill = 'orange',
labels = TRUE) +
geom_point(data = grant_sh,
size = 5,
aes(x = x,
y = y,
color = funding_cat)) +
theme(
legend.title = element_text(size = 16),
#change legend title font size
legend.text = element_text(size = 14),
#change legend text font size
legend.position = 'left',
plot.title = element_text(size = 22),
plot.subtitle = element_text(size = 16)
) + #+
scale_color_manual(
values = c('#D4148C', # pink muesaum
'#049CFC', #library,blue
'#1C8474',
'#7703fC'),
name = "Map Key",
labels = c(
'$1,500 - $4,000 (n = 7)',
'$4,001 - $6,000 (n = 12)',
'$6,001 - $20,000 (n = 6)',
'$20,001 - $40,000 (n = 25)'
)
) +
guides(colour = guide_legend(override.aes = list(size = 3)))
Current Output
Desired Output
I would like to adjust the legend key to reflect the size of each interval. So, for example 1500-400 would be the smallest icon, and 20,001-40,000 would be the largest.
I want to do this so that the viewer immediately knows that the intervals are not even. Any solution to achieve this outcome is greatly appreciated!
See how the sign/oval next to each interval represents the range of the interval in my example below.
One option to create this kind of legend would be to make it as a second plot and glue it to the main plot using e.g. patchwork.
Note: Especially with a map as the main plot and the export size if any, this approach requires some fiddling to position the legend, e.g. in my code below a added a helper row to the patchwork design to shift the legend upwards.
UPDATE: Update the code to include the counts in the labels. Added a second approach to make the legend using geom_col and a separate dataframe.
library(dplyr, warn = FALSE)
library(usmap)
library(ggplot2)
library(patchwork)
# Make example data
set.seed(123)
cat1 <- c(1500, 4001, 6001, 20001)
cat2 <- c(4000, 6000, 2000, 40000)
n = c(7, 12, 6, 25)
funding_cat <- paste0("$", cat1, " - $", cat2, " (n=", n, ")")
funding_cat <- factor(funding_cat, levels = rev(funding_cat))
grant_sh <- utils::read.csv(system.file("extdata", "us_states_centroids.csv", package = "usmapdata"))
grant_sh$funding_cat = sample(funding_cat, 51, replace = TRUE, prob = n / sum(n))
# Make legend plot
grant_sh_legend <- data.frame(
funding_cat = funding_cat,
n = c(7, 12, 6, 25)
)
legend <- ggplot(grant_sh, aes(y = funding_cat, fill = funding_cat)) +
geom_bar(width = .6) +
scale_y_discrete(position = "right") +
scale_fill_manual(
values = c('#D4148C',
'#049CFC',
'#1C8474',
'#7703fC')
) +
theme_void() +
theme(axis.text.y = element_text(hjust = 0),
plot.title = element_text(size = rel(1))) +
guides(fill = "none") +
labs(title = "Map Key")
map <- plot_usmap(regions = "states",
labels = TRUE) +
geom_point(data = grant_sh,
size = 5,
aes(x = x,
y = y,
color = funding_cat)) +
theme(
legend.position = 'none',
plot.title = element_text(size = 22),
plot.subtitle = element_text(size = 16)
) + #+
scale_color_manual(
values = c('#D4148C', # pink muesaum
'#049CFC', #library,blue
'#1C8474',
'#7703fC'),
name = "Map Key",
labels = c(
'$1,500 - $4,000 (n = 7)',
'$4,001 - $6,000 (n = 12)',
'$6,001 - $20,000 (n = 6)',
'$20,001 - $40,000 (n = 25)'
)
) +
guides(colour = guide_legend(override.aes = list(size = 3)))
# Glue together
design <- "
#B
AB
#B
"
legend + map + plot_layout(design = design, heights = c(5, 1, 1), widths = c(1, 10))
Using geom_bar the counts are computed from your dataset grant_sh. A second option would be to compute the counts manually or use a manually created dataframe and then use geom_col for the legend plot:
grant_sh_legend <- data.frame(
funding_cat = funding_cat,
n = c(7, 12, 6, 25)
)
legend <- ggplot(grant_sh, aes(y = funding_cat, n = n, fill = funding_cat)) +
geom_col(width = .6) +
scale_y_discrete(position = "right") +
scale_fill_manual(
values = c('#D4148C',
'#049CFC',
'#1C8474',
'#7703fC')
) +
theme_void() +
theme(axis.text.y = element_text(hjust = 0),
plot.title = element_text(size = rel(1))) +
guides(fill = "none") +
labs(title = "Map Key")

How to plot 'outside' of plotting area using ggplot in R?

I recently asked this question. However, I am asking a separate question now as the scope of my new question falls outside the range of the last question.
I am trying to create a heatmap in ggplot... however, outside of the axis I am trying to plot geom_tile. The issue is I cannot find a consistent way to get it to work. For example, the code I am using to plot is:
library(colorspace)
library(ggplot2)
library(ggnewscale)
library(tidyverse)
asd <- expand_grid(paste0("a", 1:9), paste0("b", 1:9))
df <- data.frame(
a = asd$`paste0("a", 1:9)`,
b = asd$`paste0("b", 1:9)`,
c = sample(20, 81, replace = T)
)
# From discrete to continuous
df$a <- match(df$a, sort(unique(df$a)))
df$b <- match(df$b, sort(unique(df$b)))
z <- sample(10, 18, T)
# set color palettes
pal <- rev(diverging_hcl(palette = "Blue-Red", n = 11))
palEdge <- rev(sequential_hcl(palette = "Plasma", n = 11))
# plot
ggplot(df, aes(a, b)) +
geom_tile(aes(fill = c)) +
scale_fill_gradientn(
colors = pal,
guide = guide_colorbar(
frame.colour = "black",
ticks.colour = "black"
),
name = "C"
) +
theme_classic() +
labs(x = "A axis", y = "B axis") +
new_scale_fill() +
geom_tile(data = tibble(a = 1:9,
z = z[1:9]),
aes(x = a, y = 0, fill = z, height = 0.3)) +
geom_tile(data = tibble(b = 1:9,
z = z[10:18]),
aes(x = 0, y = b, fill = z, width = 0.3)) +
scale_fill_gradientn(
colors = palEdge,
guide = guide_colorbar(
frame.colour = "black",
ticks.colour = "black"
),
name = "Z"
)+
coord_cartesian(clip = "off", xlim = c(0.5, NA), ylim = c(0.5, NA)) +
theme(aspect.ratio = 1,
plot.margin = margin(10, 15.5, 25, 25, "pt")
)
This produces something like this:
However, I am trying to find a consistent way to plot something more like this (which I quickly made in photoshop):
The main issue im having is being able to manipulate the coordinates of the new scale 'outside' of the plotting area. Is there a way to move the tiles that are outside so I can position them in an area that makes sense?
There are always the two classic options when plotting outside the plot area:
annotate/ plot with coord_...(clip = "off")
make different plots and combine them.
The latter option usually gives much more flexibility and way less headaches, in my humble opinion.
library(colorspace)
library(tidyverse)
library(patchwork)
asd <- expand_grid(paste0("a", 1:9), paste0("b", 1:9))
df <- data.frame(
a = asd$`paste0("a", 1:9)`,
b = asd$`paste0("b", 1:9)`,
c = sample(20, 81, replace = T)
)
# From discrete to continuous
df$a <- match(df$a, sort(unique(df$a)))
df$b <- match(df$b, sort(unique(df$b)))
z <- sample(10, 18, T)
# set color palettes
pal <- rev(diverging_hcl(palette = "Blue-Red", n = 11))
palEdge <- rev(sequential_hcl(palette = "Plasma", n = 11))
# plot
p_main <- ggplot(df, aes(a, b)) +
geom_tile(aes(fill = c)) +
scale_fill_gradientn("C",colors = pal,
guide = guide_colorbar(frame.colour = "black",
ticks.colour = "black")) +
theme_classic() +
labs(x = "A axis", y = "B axis")
p_bottom <- ggplot() +
geom_tile(data = tibble(a = 1:9, z = z[1:9]),
aes(x = a, y = 0, fill = z, height = 0.3)) +
theme_void() +
scale_fill_gradientn("Z",limits = c(0,10),
colors = palEdge,
guide = guide_colorbar(
frame.colour = "black", ticks.colour = "black"))
p_left <- ggplot() +
theme_void()+
geom_tile(data = tibble(b = 1:9, z = z[10:18]),
aes(x = 0, y = b, fill = z, width = 0.3)) +
scale_fill_gradientn("Z",limits = c(0,10),
colors = palEdge,
guide = guide_colorbar( frame.colour = "black", ticks.colour = "black"))
p_left + p_main +plot_spacer()+ p_bottom +
plot_layout(guides = "collect",
heights = c(1, .1),
widths = c(.1, 1))
Created on 2021-02-21 by the reprex package (v1.0.0)

Produce an inset in each facet of an R ggplot while preserving colours of the original facet content

I would like to produce a graphic combining four facets of a graph with insets in each facet showing a detail of the respective plot. This is one of the things I tried:
#create data frame
n_replicates <- c(rep(1:10,15),rep(seq(10,100,10),15),rep(seq(100,1000,100),15),rep(seq(1000,10000,1000),15))
sim_years <- rep(sort(rep((1:15),10)),4)
sd_data <- rep (NA,600)
for (i in 1:600) {
sd_data[i]<-rnorm(1,mean=exp(0.1 * sim_years[i]), sd= 1/n_replicates[i])
}
max_rep <- sort(rep(c(10,100,1000,10000),150))
data_frame <- cbind.data.frame(n_replicates,sim_years,sd_data,max_rep)
#do first basic plot
library(ggplot2)
plot1<-ggplot(data=data_frame, aes(x=sim_years,y=sd_data,group =n_replicates, col=n_replicates)) +
geom_line() + theme_bw() +
labs(title ="", x = "year", y = "sd")
plot1
#make four facets
my_breaks = c(2, 10, 100, 1000, 10000)
facet_names <- c(
`10` = "2, 3, ..., 10 replicates",
`100` = "10, 20, ..., 100 replicates",
`1000` = "100, 200, ..., 1000 replicates",
`10000` = "1000, 2000, ..., 10000 replicates"
)
plot2 <- plot1 +
facet_wrap( ~ max_rep, ncol=2, labeller = as_labeller(facet_names)) +
scale_colour_gradientn(name = "number of replicates", trans = "log",
breaks = my_breaks, labels = my_breaks, colours = rainbow(20))
plot2
#extract inlays (this is where it goes wrong I think)
library(ggpmisc)
library(tibble)
library(dplyr)
inset <- tibble(x = 0.01, y = 10.01,
plot = list(plot2 +
facet_wrap( ~ max_rep, ncol=2, labeller = as_labeller(facet_names)) +
coord_cartesian(xlim = c(13, 15),
ylim = c(3, 5)) +
labs(x = NULL, y = NULL, color = NULL) +
scale_colour_gradient(guide = FALSE) +
theme_bw(10)))
plot3 <- plot2 +
expand_limits(x = 0, y = 0) +
geom_plot_npc(data = inset, aes(npcx = x, npcy = y, label = plot)) +
annotate(geom = "rect",
xmin = 13, xmax = 15, ymin = 3, ymax = 5,
linetype = "dotted", fill = NA, colour = "black")
plot3
That leads to the following graphic:
As you can see, the colours in the insets are wrong, and all four of them appear in each of the facets even though I only want the corresponding inset of course. I read through a lot of questions here (to even get me this far) and also some examples in the ggpmisc user guide but unfortunately I am still a bit lost on how to achieve what I want. Except maybe to do it by hand extracting four insets and then combining them with plot2. But I hope there will be a better way to do this. Thank you for your help!
Edit: better graphic now thanks to this answer, but problem remains partially unsolved:
The following code does good insets, but unfortunately the colours are not preserved. As in the above version each inset does its own rainbow colours anew instead of inheriting the partial rainbow scale from the facet it belongs to. Does anyone know why and how I could change this? In comments I put another (bad) attempt at solving this, it preserves the colors but has the problem of putting all four insets in each facet.
library(ggpmisc)
library(tibble)
library(dplyr)
# #extract inlays: good colours, but produces four insets.
# fourinsets <- tibble(#x = 0.01, y = 10.01,
# x = c(rep(0.01, 4)),
# y = c(rep(10.01, 4)),
# plot = list(plot2 +
# facet_wrap( ~ max_rep, ncol=2) +
# coord_cartesian(xlim = c(13, 15),
# ylim = c(3, 5)) +
# labs(x = NULL, y = NULL, color = NULL) +
# scale_colour_gradientn(name = "number of replicates", trans = "log", guide = FALSE,
# colours = rainbow(20)) +
# theme(
# strip.background = element_blank(),
# strip.text.x = element_blank()
# )
# ))
# fourinsets$plot
library(purrr)
pp <- map(unique(data_frame$max_rep), function(x) {
plot2$data <- plot2$data %>% filter(max_rep == x)
plot2 +
coord_cartesian(xlim = c(12, 14),
ylim = c(3, 4)) +
labs(x = NULL, y = NULL) +
theme(
strip.background = element_blank(),
strip.text.x = element_blank(),
legend.position = "none",
axis.text=element_blank(),
axis.ticks=element_blank()
)
})
#pp[[2]]
inset_new <- tibble(x = c(rep(0.01, 4)),
y = c(rep(10.01, 4)),
plot = pp,
max_rep = unique(data_frame$max_rep))
final_plot <- plot2 +
geom_plot_npc(data = inset_new, aes(npcx = x, npcy = y, label = plot, vp.width = 0.3, vp.height =0.6)) +
annotate(geom = "rect",
xmin = 12, xmax = 14, ymin = 3, ymax = 4,
linetype = "dotted", fill = NA, colour = "black")
#final_plot
final_plot then looks like this:
I hope this clarifies the problem a bit. Any ideas are very welcome :)
Modifying off #user63230's excellent answer:
pp <- map(unique(data_frame$max_rep), function(x) {
plot2 +
aes(alpha = ifelse(max_rep == x, 1, 0)) +
coord_cartesian(xlim = c(12, 14),
ylim = c(3, 4)) +
labs(x = NULL, y = NULL) +
scale_alpha_identity() +
facet_null() +
theme(
strip.background = element_blank(),
strip.text.x = element_blank(),
legend.position = "none",
axis.text=element_blank(),
axis.ticks=element_blank()
)
})
Explanation:
Instead of filtering the data passed into plot2 (which affects the mapping of colours), we impose a new aesthetic alpha, where lines belonging to the other replicate numbers are assigned 0 for transparency;
Use scale_alpha_identity() to tell ggplot that the alpha mapping is to be used as-is: i.e. 1 for 100%, 0 for 0%.
Add facet_null() to override plot2's existing facet_wrap, which removes the facet for the inset.
Everything else is unchanged from the code in the question.
I think this will get you started although its tricky to get the size of the inset plot right (when you include a legend).
#set up data
library(ggpmisc)
library(tibble)
library(dplyr)
library(ggplot2)
# create data frame
n_replicates <- c(rep(1:10, 15), rep(seq(10, 100, 10), 15), rep(seq(100,
1000, 100), 15), rep(seq(1000, 10000, 1000), 15))
sim_years <- rep(sort(rep((1:15), 10)), 4)
sd_data <- rep(NA, 600)
for (i in 1:600) {
sd_data[i] <- rnorm(1, mean = exp(0.1 * sim_years[i]), sd = 1/n_replicates[i])
}
max_rep <- sort(rep(c(10, 100, 1000, 10000), 150))
data_frame <- cbind.data.frame(n_replicates, sim_years, sd_data, max_rep)
# make four facets
my_breaks = c(2, 10, 100, 1000, 10000)
facet_names <- c(`10` = "2, 3, ..., 10 replicates", `100` = "10, 20, ..., 100 replicates",
`1000` = "100, 200, ..., 1000 replicates", `10000` = "1000, 2000, ..., 10000 replicates")
Get overall plot:
# overall facet plot
overall_plot <- ggplot(data = data_frame, aes(x = sim_years, y = sd_data, group = n_replicates, col = n_replicates)) +
geom_line() +
theme_bw() +
labs(title = "", x = "year", y = "sd") +
facet_wrap(~max_rep, ncol = 2, labeller = as_labeller(facet_names)) +
scale_colour_gradientn(name = "number of replicates", trans = "log", breaks = my_breaks, labels = my_breaks, colours = rainbow(20))
#plot
overall_plot
which gives:
Then from the overall plot you want to extract each plot, see here. We can map over the list to extract one at a time:
pp <- map(unique(data_frame$max_rep), function(x) {
overall_plot$data <- overall_plot$data %>% filter(max_rep == x)
overall_plot + # coord_cartesian(xlim = c(13, 15), ylim = c(3, 5)) +
labs(x = NULL, y = NULL) +
theme_bw(10) +
theme(legend.position = "none")
})
If we look at one of these (I've removed the legend) e.g.
pp[[1]]
#pp[[2]]
#pp[[3]]
#pp[[4]]
Gives:
Then we want to add these inset plots into a dataframe so that each plot has its own row:
inset <- tibble(x = c(rep(0.01, 4)),
y = c(rep(10.01, 4)),
plot = pp,
max_rep = unique(data_frame$max_rep))
Then merge this into the overall plot:
overall_plot +
expand_limits(x = 0, y = 0) +
geom_plot_npc(data = inset, aes(npcx = x, npcy = y, label = plot, vp.width = 0.8, vp.height = 0.8))
Gives:
Here is a solution based on Z. Lin's answer, but using ggforce::facet_wrap_paginate() to do the filtering and keeping colourscales consistent.
First, we can make the 'root' plot containing all the data with no facetting.
library(ggpmisc)
library(tibble)
library(dplyr)
n_replicates <- c(rep(1:10,15),rep(seq(10,100,10),15),rep(seq(100,1000,100),15),rep(seq(1000,10000,1000),15))
sim_years <- rep(sort(rep((1:15),10)),4)
sd_data <- rep (NA,600)
for (i in 1:600) {
sd_data[i]<-rnorm(1,mean=exp(0.1 * sim_years[i]), sd= 1/n_replicates[i])
}
max_rep <- sort(rep(c(10,100,1000,10000),150))
data_frame <- cbind.data.frame(n_replicates,sim_years,sd_data,max_rep)
my_breaks = c(2, 10, 100, 1000, 10000)
facet_names <- c(
`10` = "2, 3, ..., 10 replicates",
`100` = "10, 20, ..., 100 replicates",
`1000` = "100, 200, ..., 1000 replicates",
`10000` = "1000, 2000, ..., 10000 replicates"
)
base <- ggplot(data=data_frame,
aes(x=sim_years,y=sd_data,group =n_replicates, col=n_replicates)) +
geom_line() +
theme_bw() +
scale_colour_gradientn(
name = "number of replicates",
trans = "log10", breaks = my_breaks,
labels = my_breaks, colours = rainbow(20)
) +
labs(title ="", x = "year", y = "sd")
Next, the main plot will be just the root plot with facet_wrap().
main <- base + facet_wrap(~ max_rep, ncol = 2, labeller = as_labeller(facet_names))
Then the new part is to use facet_wrap_paginate with nrow = 1 and ncol = 1 for every max_rep, which we'll use as insets. The nice thing is that this does the filtering and it keeps colour scales consistent with the root plot.
nmax_rep <- length(unique(data_frame$max_rep))
insets <- lapply(seq_len(nmax_rep), function(i) {
base + ggforce::facet_wrap_paginate(~ max_rep, nrow = 1, ncol = 1, page = i) +
coord_cartesian(xlim = c(12, 14), ylim = c(3, 4)) +
guides(colour = "none", x = "none", y = "none") +
theme(strip.background = element_blank(),
strip.text = element_blank(),
axis.title = element_blank(),
plot.background = element_blank())
})
insets <- tibble(x = rep(0.01, nmax_rep),
y = rep(10.01, nmax_rep),
plot = insets,
max_rep = unique(data_frame$max_rep))
main +
geom_plot_npc(data = insets,
aes(npcx = x, npcy = y, label = plot,
vp.width = 0.3, vp.height = 0.6)) +
annotate(geom = "rect",
xmin = 12, xmax = 14, ymin = 3, ymax = 4,
linetype = "dotted", fill = NA, colour = "black")
Created on 2020-12-15 by the reprex package (v0.3.0)

R: ggplot2: plot initiating at the axes

I'm trying to respond to a reviewer that wants some changes to a figure... I am using ggplot2 to generate Kaplan-Meier curves, and the reviewer wants the X-axis to start at 0. The default in ggkmTable adds some space between 0 and the Y-axis. I can't figure out what to change to make it look right.
Here is my code:
ggkmTable <- function(sfit, table=TRUE,returns = FALSE,
xlabs = "Time in Years", ylabs = "Survival Probability",
ystratalabs = NULL, ystrataname = NULL,
timeby = 100, main = "Kaplan-Meier Plot",
pval = TRUE, ...) {
require(plyr)
require(ggplot2)
require(survival)
require(gridExtra)
if(is.null(ystratalabs)) {
ystratalabs <- as.character(levels(summary(sfit)$strata))
}
m <- max(nchar(ystratalabs))
if(is.null(ystrataname)) ystrataname <- "Strata"
times <- seq(0, max(sfit$time), by = timeby)
.df <- data.frame(time = sfit$time, n.risk = sfit$n.risk,
n.event = sfit$n.event, surv = sfit$surv, strata = summary(sfit, censored = T)$strata,
upper = sfit$upper, lower = sfit$lower)
levels(.df$strata) <- ystratalabs
zeros <- data.frame(time = 0, surv = 1, strata = factor(ystratalabs, levels=levels(.df$strata)),
upper = 1, lower = 1)
.df <- rbind.fill(zeros, .df)
d <- length(levels(.df$strata))
p <- ggplot(.df, aes(time, surv, group = strata)) +
geom_step(aes(linetype = strata), size = 0.7) +
theme_bw() +
theme(axis.title.x = element_text(vjust = 0.5)) +
scale_x_continuous(xlabs, breaks = times, limits = c(0, max(sfit$time))) +
scale_y_continuous(ylabs, limits = c(0, 1)) +
theme(panel.grid.minor = element_blank()) +
theme(legend.position = "bottom") +
theme(legend.key = element_rect(colour = NA)) +
labs(linetype = ystrataname) +
theme(plot.margin = unit(c(0, 1, .5, ifelse(m < 10, 1.5, 2.5)), "lines")) +
ggtitle(main)
if(pval) {
sdiff <- survdiff(eval(sfit$call$formula), data = eval(sfit$call$data))
pval <- pchisq(sdiff$chisq, length(sdiff$n)-1, lower.tail = FALSE)
pvaltxt <- paste("p =", signif(pval, 3))
p <- p + annotate("text", x = 0.6 * max(sfit$time), y = 0.1, label = pvaltxt)
}
## Create a blank plot for place-holding
## .df <- data.frame()
blank.pic <- ggplot(.df, aes(time, surv)) +
geom_blank() +
theme_bw() +
theme(axis.text.x = element_blank(), axis.text.y = element_blank(),
axis.title.x = element_blank(), axis.title.y = element_blank(),
axis.ticks = element_blank(), panel.grid.major = element_blank(),
panel.border = element_blank())
if(table) {
## Create table graphic to include at-risk numbers
risk.data <- data.frame(strata = summary(sfit, times = times, extend = TRUE)$strata,
time = summary(sfit, times = times, extend = TRUE)$time,
n.risk = summary(sfit, times = times, extend = TRUE)$n.risk)
data.table <- ggplot(risk.data, aes(x = time, y = strata, label = format(n.risk, nsmall = 0))) +
#, color = strata)) +
geom_text(size = 3.5) +
theme_bw() +
scale_y_discrete(breaks = as.character(levels(risk.data$strata)), labels = ystratalabs) +
# scale_y_discrete(#format1ter = abbreviate,
# breaks = 1:3,
# labels = ystratalabs) +
scale_x_continuous("Numbers at risk", limits = c(0, max(sfit$time))) +
theme(axis.title.x = element_text(size = 10, vjust = 1), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), panel.border = element_blank(),
axis.text.x = element_blank(), axis.ticks = element_blank(),
axis.text.y = element_text(face = "bold", hjust = 1))
data.table <- data.table + theme(legend.position = "none") +
xlab(NULL) + ylab(NULL)
data.table <- data.table +
theme(plot.margin = unit(c(-1.5, 1, 0.1, ifelse(m < 10, 2.5, 3.5)-0.28 * m), "lines"))
## Plotting the graphs
## p <- ggplotGrob(p)
## p <- addGrob(p, textGrob(x = unit(.8, "npc"), y = unit(.25, "npc"), label = pvaltxt,
## gp = gpar(fontsize = 12)))
grid.arrange(p, blank.pic, data.table,
clip = FALSE, nrow = 3, ncol = 1,
heights = unit(c(2, .1, .25),c("null", "null", "null")))
if(returns) {
a <- arrangeGrob(p, blank.pic, data.table, clip = FALSE,
nrow = 3, ncol = 1, heights = unit(c(2, .1, .25),c("null", "null", "null")))
return(a)
}
}
else {
## p <- ggplotGrob(p)
## p <- addGrob(p, textGrob(x = unit(0.5, "npc"), y = unit(0.23, "npc"),
## label = pvaltxt, gp = gpar(fontsize = 12)))
print(p)
if(returns) return(p)
}
}
The answer first, then an explanation.
Add the line:
coord_cartesian(xlim=c(0,max(sfit$time)))
to your ggplot object.
A simple example:
df <- data.frame(c(runif(10,0,1)),runif(10,0,1))
names(df) <- c("x","y")
p <- ggplot(df, aes(x,y)) +
geom_point() +
scale_x_continuous(breaks=c(0,0.25,0.5,0.75,1.0),
labels=c("0","0.25","0.5","0.75","1.0"))
p
gives you
whereas, if you add to the above code
p <- p + coord_cartesian(xlim=c(0,1))
p
you get
coord_cartesian() is a friend of yours (and anyone who uses your code after you) IFF you are absolutely certain any [visually important] data will never fall beyond the bounds you set within that very function. This is well documented; see Hadley's doc on this useful creature
To apply it to your code above and visualize it myself... I need to know what "sfit" is (as per user Pascal's insightful inquiry), likely among other idiosyncratic things. But the gist is:
scale_x_continuous (and its sisters scale_y_continuous et al.) don't hard-cut bounds to the EXACT parameters you specify. They do as the name implies, yes, "scale" axes according to a function (e.g. log10). But they always leave a cute little buffer around the limits, for assumed aesthetically preferential defaults.
'coord_cartesian`, on the other hand, DOES set axes limits EXACTLY as you specify, cutting out all space and data falling outside those bounds without altering the analyses pertaining to that entire data field. But don't take my word for it: read Hadley's apt description:
"The Cartesian coordinate system is the most familiar, and common, type of coordinate system. Setting limits on the coordinate system will zoom the plot (like you're looking at it with a magnifying glass), and will not change the underlying data like setting limits on a scale will."
To be clear, you can use both scale_x_continuous (and y) and coord_cartesian in the same ggplot object, because they do different things. The former sets breaks and labels for those breaks, the latter the frame (i.e. visual bounds) of the plot.
The other (dirty, dirty) solution is... photoshop. :(
Use:
scale_x_continuous("Numbers at risk", limits = c(0, max(sfit$time)), expand = c(0, 0))
The expand can also be used on your y-axis if needed.

Alignment of arrangeGrob-Created Objects

I have seen many solutions for aligning the plotting regions of ggplot2 plots. However, I have a couple of plots that have been made by a function that outputs a result from arrangeGrob. I would like to plot them in a column, with both the beginning and end of the x-axis aligned. Here is a runnable example of what happens.
To run the example,
library(ggplot2)
library(gridExtra)
topPlot <- f(TRUE, TRUE, TRUE, "Dataset 1")
bottomPlot <- f(FALSE, FALSE, FALSE, "Dataset 2")
grid.draw(arrangeGrob(topPlot, bottomPlot, nrow = 2))
The definition of f :
f <- function(x,y, z, t)
{
showLegends = x
knownClasses <- rep(c("Healthy", "Sick"), each = 5)
plotData <- data.frame(name = LETTERS[1:10],
type = rep(c("Method 1", "Method 2"), each = 10),
class = rep(c("Healthy", "Sick"), each = 5),
Error = runif(20))
classesPlot <- ggplot(data.frame(Class = knownClasses), aes(1:length(knownClasses), factor(1)), environment = environment()) +
geom_tile(aes(fill = Class, height = 10)) +
scale_x_discrete(expand = c(0, 0), breaks = NULL, limits = c(1, length(knownClasses))) +
scale_y_discrete(expand = c(0, 0), breaks = NULL) +
labs(x = '', y = '')+ theme(legend.position = ifelse(showLegends, "right", "none"))
errorPlot <- ggplot(plotData, aes(name, type)) + geom_tile(aes(fill = Error)) + theme_bw() +
theme(legend.position = ifelse(showLegends, "right", "none"),
axis.text.y = if(y) element_text(size = 8) else element_blank()) + ylab(if(z) "Label" else NULL)
classGrob <- ggplot_gtable(ggplot_build(classesPlot))
errorGrob <- ggplot_gtable(ggplot_build(errorPlot))
commonWidth <- unit.pmax(classGrob[["widths"]], errorGrob[["widths"]])
classGrob[["widths"]] <- commonWidth
errorGrob[["widths"]] <- commonWidth
arrangeGrob(classGrob, errorGrob, nrow = 2, heights = c(1, 3), main = t)
}
In a real scenario, the two datasets will have different samples in different proportions of classes, so getting rid of the class colour scale above each error plot is not an option, unless multiple colour scales per plot were allowed in ggplot2.
How can this code be modified to align the plot areas ?

Resources