Related
I'm looking to add some annotations (ideally a text and an arrow) to a faceted ggplot outside the plot area.
What's that, you say? Hasn't someone asked something similar here, here and here? Well yes. But none of them were trying to do this below an x-axis with a log scale.
With the exception of this amazing answer by #Z.Lin — but that involved a specific package and I'm looking for a more generic solution.
At first glance this would appear to be a very niche question, but for those of you familiar with forest plots this may tweak some interest.
Firstly, some context... I'm interested in presenting the results of a coxph model using a forest plot in a publication. My goal here is to take the results of a model (literally a standalone coxph object) and use it to produce output that is customisable (gotta match the style guide) and helps translate the findings for an audience that might not be au fait with the technical details of hazard ratios. Hence the annotations and directional arrows.
Before you start dropping links to r packages/functions that could help do this... here are those that I've tried so far:
ggforestplot — this package produces lovely customisable forest plots (if you are using odds ratios), but it hard codes a geom_vline at zero which doesn't help for HR's
ggforest — this package is a nerd paradise of detail, but good luck a) editing the variable names and b) trying to theme it (I mentioned earlier that I'm working with a coxph object, what I didn't mention was that the varnames are ugly — they need to be changed for a punter to understand what we're trying to communicate)
finalfit offers a great workflow and its hr_plot kicks out some informative output, but it doesn't play nice if you've already got a coxph object and you just want to plot it
So... backstory out of the way. I've created my own framework for a forest plot below to which I'd love to add — in the space below the x-axis labels and the x-axis title — two annotations that help interpret the result. My current code struggles with:
repeating the code under each facet (this is something I'm trying to avoid)
mirroring the annotations of either side of the geom_vline with a log scale
Any advice anyone might have would be much appreciated... I've added a reproducible example below.
## LOAD REQUIRED PACKAGES
library(tidyverse)
library(survival)
library(broom)
library(ggforce)
library(ggplot2)
## PREP DATA
model_data <- lung %>%
mutate(inst_cat = case_when(
inst %% 2 == 0 ~ 2,
TRUE ~ 1)) %>%
mutate(pat.karno_cat = case_when(
pat.karno < 75 ~ 2,
TRUE ~ 1)) %>%
mutate(ph.karno_cat = case_when(
ph.karno < 75 ~ 2,
TRUE ~ 1)) %>%
mutate(wt.loss_cat = case_when(
wt.loss > 15 ~ 2,
TRUE ~ 1)) %>%
mutate(meal.cal_cat = case_when(
meal.cal > 900 ~ 2,
TRUE ~ 1))
coxph_model <- coxph(
Surv(time, status) ~
sex +
inst_cat +
wt.loss_cat +
meal.cal_cat +
pat.karno_cat +
ph.karno_cat,
data = model_data)
## PREP DATA
plot_data <- coxph_model %>%
broom::tidy(
exponentiate = TRUE,
conf.int = TRUE,
conf.level = 0.95) %>%
mutate(stat_sig = case_when(
p.value < 0.05 ~ "p < 0.05",
TRUE ~ "N.S.")) %>%
mutate(group = case_when(
term == "sex" ~ "gender",
term == "inst_cat" ~ "site",
term == "pat.karno_cat" ~ "outcomes",
term == "ph.karno_cat" ~ "outcomes",
term == "meal.cal_cat" ~ "outcomes",
term == "wt.loss_cat" ~ "outcomes"))
## PLOT FOREST PLOT
forest_plot <- plot_data %>%
ggplot() +
aes(
x = estimate,
y = term,
colour = stat_sig) +
geom_vline(
aes(xintercept = 1),
linetype = 2
) +
geom_point(
shape = 15,
size = 4
) +
geom_linerange(
xmin = (plot_data$conf.low),
xmax = (plot_data$conf.high)
) +
scale_colour_manual(
values = c(
"N.S." = "black",
"p < 0.05" = "red")
) +
annotate(
"text",
x = 0.45,
y = -0.2,
col="red",
label = "indicates y",
) +
annotate(
"text",
x = 1.5,
y = -0.2,
col="red",
label = "indicates y",
) +
labs(
y = "",
x = "Hazard ratio") +
coord_trans(x = "log10") +
scale_x_continuous(
breaks = scales::log_breaks(n = 7),
limits = c(0.1,10)) +
ggforce::facet_col(
facets = ~group,
scales = "free_y",
space = "free"
) +
theme(
legend.position = "bottom",
legend.title = element_blank(),
strip.text = element_text(hjust = 0),
axis.title.x = element_text(margin = margin(t = 25, r = 0, b = 0, l = 0))
)
Created on 2022-05-10 by the reprex package (v2.0.1)
I think I would use annotation_custom here. This requires standard coord_cartesian with clip = 'off', but it should be easy to re-jig your x axis to use scale_x_log10
plot_data %>%
ggplot() +
aes(
x = estimate,
y = term,
colour = stat_sig) +
geom_vline(
aes(xintercept = 1),
linetype = 2
) +
geom_point(
shape = 15,
size = 4
) +
geom_linerange(
xmin = (log10(plot_data$conf.low)),
xmax = (log10(plot_data$conf.high))
) +
scale_colour_manual(
values = c(
"N.S." = "black",
"p < 0.05" = "red")
) +
annotation_custom(
grid::textGrob(
x = unit(0.4, 'npc'),
y = unit(-7.5, 'mm'),
label = "indicates yada",
gp = grid::gpar(col = 'red', vjust = 0.5, hjust = 0.5))
) +
annotation_custom(
grid::textGrob(
x = unit(0.6, 'npc'),
y = unit(-7.5, 'mm'),
label = "indicates bada",
gp = grid::gpar(col = 'blue', vjust = 0.5, hjust = 0.5))
) +
annotation_custom(
grid::linesGrob(
x = unit(c(0.49, 0.25), 'npc'),
y = unit(c(-10, -10), 'mm'),
arrow = arrow(length = unit(3, 'mm')),
gp = grid::gpar(col = 'red'))
) +
annotation_custom(
grid::linesGrob(
x = unit(c(0.51, 0.75), 'npc'),
y = unit(c(-10, -10), 'mm'),
arrow = arrow(length = unit(3, 'mm')),
gp = grid::gpar(col = 'blue'))
) +
labs(
y = "",
x = "Hazard ratio") +
scale_x_log10(
breaks = c(0.1, 0.3, 1, 3, 10),
limits = c(0.1,10)) +
ggforce::facet_col(
facets = ~group,
scales = "free_y",
space = "free"
) +
coord_cartesian(clip = 'off') +
theme(
legend.position = "bottom",
legend.title = element_blank(),
strip.text = element_text(hjust = 0),
axis.title.x = element_text(margin = margin(t = 25, r = 0, b = 0, l = 0)),
panel.spacing.y = (unit(15, 'mm'))
)
I'm looking for a solution since too much time without finding it, so it's time to ask for some help...
I would like to add pValue to boxplots organized with facet_wrap (ggplot2). Similar to what you obtain with the script I add to this post (the first part of the script is the exemple of what I want and it's working well for 1 plot, the second part is related to facet and doesn't work).
I would like to add pvalue between all "dose" values of "OJ", same for "VC", but also between, for exemple "dose"=1 of OJ and VC (as in the plot). It's working well for 1 plot, but not in facet_wrap. The error message is:
Error: Assigned data value must be compatible with existing data.
x Existing data has 6 rows.
x Assigned data has 60 rows.
ℹ Only vectors of size 1 are recycled.
Thanks for your help (if only...)
The script:
################# DATAFRAME
data("ToothGrowth")
df <- ToothGrowth
vec <- c("A","B")
df$dose <- as.character(df$dose)
df$facet <- rep(sample(vec, 2),replace=T, nrow(df)/2)
view(df)
################### STAT
df_pval <- df %>%
rstatix::group_by(dose) %>%
rstatix::wilcox_test(len ~ supp) %>%
rstatix::add_xy_position()
df_pval2 <- df %>%
rstatix::group_by(supp) %>%
rstatix::wilcox_test(len ~ dose) %>%
rstatix::add_xy_position(x = "supp", dodge = 0.8)
################### PLOT
plotx <- ggplot(df, aes(x = supp, y = len)) +
geom_boxplot(aes(fill = dose)) +
stat_pvalue_manual(df_pval,
label = "{p}",
color = "dose",
fontface = "bold",
step.group.by = "dose",
step.increase = 0.1,
tip.length = 0,
bracket.colour = "black",
show.legend = FALSE) +
stat_pvalue_manual(df_pval2,
label = "{p}",
color = "black",
fontface = "bold",
step.group.by = "supp",
step.increase = 0.1,
tip.length = 0,
bracket.colour = "black",
show.legend = FALSE)
plot(plotx)
################### STAT FACET
df_pval3 <- df %>%
rstatix::group_by(dose, facet) %>%
rstatix::wilcox_test(len ~ supp) %>%
rstatix::add_xy_position()
df_pval4 <- df %>%
rstatix::group_by(supp, facet) %>%
rstatix::wilcox_test(len ~ dose) %>%
rstatix::add_xy_position(x = "supp", dodge = 0.8)
print(df_pval)
print(df_pval2)
###################### PLOT FACET
ploty <- ggplot(df, aes(x = supp, y = len)) +
geom_boxplot(aes(fill = dose)) +
facet_wrap(~df[,4]) + stat_pvalue_manual(df_pval3,
label = "{p}",
color = "dose",
fontface = "bold",
step.group.by = "dose",
step.increase = 0.1,
tip.length = 0,
bracket.colour = "black",
show.legend = FALSE) +
stat_pvalue_manual(df_pval4,
label = "{p}",
color = "black",
fontface = "bold",
step.group.by = "supp",
step.increase = 0.1,
tip.length = 0,
bracket.colour = "black",
show.legend = FALSE)
plot(ploty)
I'm trying draw multiple density plots in one plot for comparison porpuses. I wanted them to have their confidence interval of 95% like in the following figure. I'm working with ggplot2 and my df is a long df of observations for a certain location that I would like to compare for different time intervals.
I've done some experimentation following this example but I don't have the coding knowledge to achieve what I want.
What i managed to do so far:
library(magrittr)
library(ggplot2)
library(dplyr)
build_object <- ggplot_build(
ggplot(data=ex_long, aes(x=val)) + geom_density())
plot_credible_interval <- function(
gg_density, # ggplot object that has geom_density
bound_left,
bound_right
) {
build_object <- ggplot_build(gg_density)
x_dens <- build_object$data[[1]]$x
y_dens <- build_object$data[[1]]$y
index_left <- min(which(x_dens >= bound_left))
index_right <- max(which(x_dens <= bound_right))
gg_density + geom_area(
data=data.frame(
x=x_dens[index_left:index_right],
y=y_dens[index_left:index_right]),
aes(x=x,y=y),
fill="grey",
alpha=0.6)
}
gg_density <- ggplot(data=ex_long, aes(x=val)) +
geom_density()
gg_density %>% plot_credible_interval(tab$q2.5[[40]], tab$q97.5[[40]])
Help would be much apreaciated.
This is obviously on a different set of data, but this is roughly that plot with data from 2 t distributions. I've included the data generation in case it is of use.
library(tidyverse)
x1 <- seq(-5, 5, by = 0.1)
t_dist1 <- data.frame(x = x1,
y = dt(x1, df = 3),
dist = "dist1")
x2 <- seq(-5, 5, by = 0.1)
t_dist2 <- data.frame(x = x2,
y = dt(x2, df = 3),
dist = "dist2")
t_data = rbind(t_dist1, t_dist2) %>%
mutate(x = case_when(
dist == "dist2" ~ x + 1,
TRUE ~ x
))
p <- ggplot(data = t_data,
aes(x = x,
y = y )) +
geom_line(aes(color = dist))
plot_data <- as.data.frame(ggplot_build(p)$data)
bottom <- data.frame(plot_data) %>%
mutate(dist = case_when(
group == 1 ~ "dist1",
group == 2 ~ "dist2"
)) %>%
group_by(dist) %>%
slice_head(n = ceiling(nrow(.) * 0.1)) %>%
ungroup()
top <- data.frame(plot_data) %>%
mutate(dist = case_when(
group == 1 ~ "dist1",
group == 2 ~ "dist2"
)) %>%
group_by(dist) %>%
slice_tail(n = ceiling(nrow(.) * 0.1)) %>%
ungroup()
segments <- t_data %>%
group_by(dist) %>%
summarise(x = mean(x),
y = max(y))
p + geom_area(data = bottom,
aes(x = x,
y = y,
fill = dist),
alpha = 0.25,
position = "identity") +
geom_area(data = top,
aes(x = x,
y = y,
fill = dist),
alpha = 0.25,
position = "identity") +
geom_segment(data = segments,
aes(x = x,
y = 0,
xend = x,
yend = y,
color = dist,
linetype = dist)) +
scale_color_manual(values = c("red", "blue")) +
scale_linetype_manual(values = c("dashed", "dashed"),
labels = NULL) +
ylab("Density") +
xlab("\U03B2 for AQIv") +
guides(color = guide_legend(title = "p.d.f \U03B2",
title.position = "right",
labels = NULL),
linetype = guide_legend(title = "Mean \U03B2",
title.position = "right",
labels = NULL,
override.aes = list(color = c("red", "blue"))),
fill = guide_legend(title = "Rej. area \U03B1 = 0.05",
title.position = "right",
labels = NULL)) +
annotate(geom = "text",
x = c(-4.75, -4),
y = 0.35,
label = c("RK", "OK")) +
theme(panel.background = element_blank(),
panel.border = element_rect(fill = NA,
color = "black"),
legend.position = c(0.2, 0.7),
legend.key = element_blank(),
legend.direction = "horizontal",
legend.text = element_blank(),
legend.title = element_text(size = 8))
I am using the ggerrorplot () function of the ggpubr package to create the graph below. My question is whether there is any way to change the colors of the dots without changing the color of the point that represents the mean and standard deviation? Observe the image:
My code:
# loading packages
library(ggpubr)
# Create data frame
GROUP <- c()
TEST <- c()
VALUE <- c()
for (i in 0:100) {
gp <- c('Group1','Group2','Group1 and Group2')
ts <- c('Test1','Test2')
GROUP <- append(GROUP, sample(gp, 1))
TEST <- append(TEST, sample(ts, 1))
VALUE <- append(VALUE, sample(1:200, 1))
}
df <- data.frame(GROUP, TEST, VALUE)
# Seed
set.seed(123)
# Plot
ggerrorplot(df, x = "GROUP", y = "VALUE",
desc_stat = "mean_sd",
add = c("jitter"),
color = "TEST",
palette = "jco",
add.params = list(size = 0.2),
order = c('Group1','Group2','Group1 and Group2')
) +
labs(x = '', y = 'Values\n') +
theme(legend.title = element_blank())
Can you accomplish this by simply passing in color to add.params?
# loading packages
library(ggpubr)
#> Loading required package: ggplot2
# Create data frame
GROUP <- c()
TEST <- c()
VALUE <- c()
for (i in 0:100) {
gp <- c('Group1','Group2','Group1 and Group2')
ts <- c('Test1','Test2')
GROUP <- append(GROUP, sample(gp, 1))
TEST <- append(TEST, sample(ts, 1))
VALUE <- append(VALUE, sample(1:200, 1))
}
df <- data.frame(GROUP, TEST, VALUE)
# Seed
set.seed(123)
# Plot
ggerrorplot(df, x = "GROUP", y = "VALUE",
desc_stat = "mean_sd",
add = c("jitter"),
color = "TEST",
palette = "jco",
add.params = list(size = 0.2, color = "red"),
order = c('Group1','Group2','Group1 and Group2')
) +
labs(x = '', y = 'Values\n') +
theme(legend.title = element_blank())
Created on 2021-03-10 by the reprex package (v0.3.0)
Another potential workaround - replicate the plot using ggplot() and geom_linerange(), e.g.
library(ggpubr)
library(ggsci)
library(cowplot)
# Create data frame
GROUP <- c()
TEST <- c()
VALUE <- c()
for (i in 0:100) {
gp <- c('Group1','Group2','Group1 and Group2')
ts <- c('Test1','Test2')
GROUP <- append(GROUP, sample(gp, 1))
TEST <- append(TEST, sample(ts, 1))
VALUE <- append(VALUE, sample(1:200, 1))
}
df <- data.frame(GROUP, TEST, VALUE)
# Seed
set.seed(123)
data_summary <- function(data, varname, groupnames){
require(plyr)
summary_func <- function(x, col){
c(mean = mean(x[[col]], na.rm=TRUE),
sd = sd(x[[col]], na.rm=TRUE))
}
data_sum<-ddply(data, groupnames, .fun=summary_func,
varname)
data_sum <- rename(data_sum, c("mean" = varname))
return(data_sum)
}
df2 <- data_summary(df, varname = "VALUE", groupnames = c("TEST", "GROUP"))
# Plot
p1 <- ggplot(df, aes(x = factor(GROUP, levels = c('Group1','Group2','Group1 and Group2')),
y = VALUE, color = TEST)) +
geom_jitter(shape = 21, fill = "black", stroke = 0,
position = position_jitterdodge(jitter.width = 0.2)) +
geom_linerange(data = df2, aes(ymin=VALUE-sd, ymax=VALUE+sd),
position=position_dodge(width = .75)) +
geom_point(data = df2, aes(y = VALUE), size = 3,
position = position_dodge(width = 0.75)) +
scale_color_jco() +
labs(x = '', y = 'Values\n') +
theme_classic(base_size = 14) +
theme(legend.title = element_blank(),
legend.position = "top")
p2 <- ggerrorplot(df, x = "GROUP", y = "VALUE",
desc_stat = "mean_sd",
add = c("jitter"),
color = "TEST",
palette = "jco",
add.params = list(size = 0.2),
order = c('Group1','Group2','Group1 and Group2')
) +
labs(x = '', y = 'Values\n') +
theme(legend.title = element_blank())
cowplot::plot_grid(p1, p2, nrow = 1, ncol = 2, labels = "AUTO")
When you plot them side-by-side you can see that they aren't exactly the same, but this might work for you nonetheless.
Edit
An advantage of this approach is that you can adjust the 'fill' scale separately if you don't want all the dots to be the same colour, but you do want them to be different to the lines, e.g.
p1 <- ggplot(df, aes(x = factor(GROUP, levels = c('Group1','Group2','Group1 and Group2')),
y = VALUE, color = TEST)) +
geom_jitter(aes(fill = TEST), shape = 21, stroke = 0,
position = position_jitterdodge(jitter.width = 0.2)) +
geom_linerange(data = df2, aes(ymin=VALUE-sd, ymax=VALUE+sd),
position=position_dodge(width = .75)) +
geom_point(data = df2, aes(y = VALUE), size = 3,
position = position_dodge(width = 0.75)) +
scale_color_jco() +
scale_fill_npg() +
labs(x = '', y = 'Values\n') +
theme_classic(base_size = 14) +
theme(legend.title = element_blank(),
legend.position = "top")
p2 <- ggerrorplot(df, x = "GROUP", y = "VALUE",
desc_stat = "mean_sd",
add = c("jitter"),
color = "TEST",
palette = "jco",
add.params = list(size = 0.2),
order = c('Group1','Group2','Group1 and Group2')
) +
labs(x = '', y = 'Values\n') +
theme(legend.title = element_blank())
cowplot::plot_grid(p1, p2, nrow = 1, ncol = 2, labels = "AUTO")
I'm trying to add an interactive heatmap to my Shiny app, but I also have interactive graphs using ggiraph. I'm currently using the d3heatmap package, but the heatmaps don't render in the app. I've created a toy example to illustrate this:
library(shiny)
library(ggiraph)
library(d3heatmap)
ui <- fluidPage(
d3heatmapOutput('d3'),
ggiraphOutput('gg')
)
server <- function(input, output, session) {
# Create heatmap
output$d3 <- renderD3heatmap({
d3heatmap(matrix(1:100, nrow = 100, ncol = 100))
})
# Create ggiraph
output$gg <- renderggiraph({
p <- ggplot(iris, aes(x = Sepal.Length, y = Petal.Width,
color = Species, tooltip = iris$Species) ) +
geom_point_interactive()
ggiraph(code = {print(p)})
})
}
shinyApp(ui = ui, server = server)
Together, only the ggiraph renders, but the heatmap does not. However, if you comment out the ggiraph code, the heatmap renders. I tried switching the order of loading the packages, but that still didn't work.
I'm currently running on R 3.2.2 (I have to use this version because the company servers only run on this version, and neither my manager nor I have the authority to update it). I tried downloading the shinyheatmap, heatmaply, and heatmap.2 packages, but because of versioning issues, the installations were unsuccessful.
So right now, I've just used pheatmap to create the heatmaps, but they aren't interactive (i.e., I can't get values when I hover over individual cells, and I can't zoom in). Is there any workaround for this, or are there other interactive heatmap packages out there that would work? I'd like to avoid changing all of my ggiraph graphs to plotly graphs since there are a lot of them in my code.
Please let me know if there's any other information you need. Any suggestions would be much appreciated!
(just to let you know I am the author of ggiraph)
There is a conflict between ggiraph and d3heatmap because ggiraph is using d3.js version 4 and d3heatmap is using D3.js version 3. I don't think there is a solution to solve that conflict.
However, building an interactive heatmap with ggplot2/ggiraph is not that difficult. See below:
library(dplyr)
library(tidyr)
library(ggplot2)
library(ggiraph)
library(ggdendro)
# mydata <- cor(mtcars)
mydata <- matrix(runif(2500, min = -2, max = 2), ncol = 50)
row.names(mydata) <- paste0("row_", seq_len(nrow(mydata)))
colnames(mydata) <- paste0("col_", seq_len(ncol(mydata)))
# dendrogram for rows
hc <- hclust(dist(mydata), "ave")
dhr <- as.dendrogram(hc)
order_r <- rownames(mydata)[hc$order]
# dendrogram for columns
hc <- hclust(dist(t(mydata)), "ave")
dhc <- as.dendrogram(hc)
order_c <- colnames(mydata)[hc$order]
# the data
expr_set <- bind_cols(
data_frame(rowvar = rownames(mydata)),
as.data.frame(mydata)
)
expr_set <- gather(expr_set, colvar, measure, -rowvar)
expr_set$rowvar <- factor( expr_set$rowvar, levels = order_r )
expr_set$colvar <- factor( expr_set$colvar, levels = order_c )
expr_set <- arrange(expr_set, rowvar, colvar)
# get data for dendrograms - IMHO, ggdendro is the hero here...
data_c <- dendro_data(dhc, type = "rectangle")
data_c <- segment(data_c) %>% mutate(
y = y + length(order_r) + .5,
yend = yend + length(order_r) + .5
)
data_r <- dendro_data(dhr, type = "rectangle")
data_r <- segment(data_r)
data_r <- data_r %>%
mutate( x_ = y + length(order_c) + .5,
xend_ = yend + length(order_c) + .5,
y_ = x,
yend_ = xend )
expr_set <- expr_set %>%
mutate(
tooltip = sprintf("Row: %s<br/>Col: %s<br/>measure: %.02f",
rowvar, colvar, measure) ,
data_id = sprintf("%s_%s", rowvar, colvar)
)
# all data are tidy and can be now used with ggplot
p <- ggplot(data = expr_set, aes(x = colvar, y = rowvar) ) +
geom_tile_interactive(aes(fill = measure, tooltip = tooltip, data_id = data_id), colour = "white") +
scale_fill_gradient(low = "white", high = "#BC120A") +
geom_segment(
data = data_c,
mapping = aes(x = x, y = yend, xend = xend, yend = y),
colour = "gray20", size = .2) +
geom_segment(
data = data_r,
mapping = aes(x = x_, y = y_, xend = xend_, yend = yend_),
colour = "gray20", size = .2) +
coord_equal()
# cosmetics
p <- p + theme_minimal() +
theme(
legend.position = "right",
panel.grid.minor = element_line(color = "transparent"),
panel.grid.major = element_line(color = "transparent"),
axis.ticks.length = unit(2, units = "mm"),
plot.title = element_text(face = "bold", hjust = 0.5, size = 12),
axis.title = element_text(size = 9, colour = "gray30"),
axis.text.y = element_text(hjust = 1, size = 5, colour = "gray40"),
axis.text.x = element_text(angle = 90, hjust = 1, size = 5, colour = "gray40"),
legend.title=element_text(face = "bold", hjust = 0.5, size=8),
legend.text=element_text(size=6)
)
ggiraph(ggobj = p)
Hope it helps
I know that this question is answered some time ago but I've ran into the same problem and i was not able to use ggplot2 because it was just to slow to work with my Shiny application. The heatmaply package is allot faster and easier to implement. I performed a mini-benchmark (n= 20).
with ggplot2 took an average time of 64 seconds. With heatmaply it took only 2 seconds. both methods use the 'ave' method of hclust.I hope this is helpfull.
mini-benchmark n= 20 of ggplot vs heatmaply
here is the code i used:
library(tidyr)
library(ggplot2)
library(ggiraph)
library(ggdendro)
library(heatmaply)
# mydata <- cor(mtcars)
create_data <- function(){
df <- matrix(runif(2500, min = -2, max = 2), ncol = 50)
row.names(df) <- paste0("row_", seq_len(nrow(df)))
colnames(df) <- paste0("col_", seq_len(ncol(df)))
return(df)
}
gg2heat <- function(mydata){
# dendrogram for rows
hc <- hclust(dist(mydata), "ave")
dhr <- as.dendrogram(hc)
order_r <- rownames(mydata)[hc$order]
# dendrogram for columns
hc <- hclust(dist(t(mydata)), "ave")
dhc <- as.dendrogram(hc)
order_c <- colnames(mydata)[hc$order]
# the data
expr_set <- bind_cols(
data_frame(rowvar = rownames(mydata)),
as.data.frame(mydata)
)
expr_set <- gather(expr_set, colvar, measure, -rowvar)
expr_set$rowvar <- factor( expr_set$rowvar, levels = order_r )
expr_set$colvar <- factor( expr_set$colvar, levels = order_c )
expr_set <- arrange(expr_set, rowvar, colvar)
# get data for dendrograms - IMHO, ggdendro is the hero here...
data_c <- dendro_data(dhc, type = "rectangle")
data_c <- segment(data_c) %>% mutate(
y = y + length(order_r) + .5,
yend = yend + length(order_r) + .5
)
data_r <- dendro_data(dhr, type = "rectangle")
data_r <- segment(data_r)
data_r <- data_r %>%
mutate( x_ = y + length(order_c) + .5,
xend_ = yend + length(order_c) + .5,
y_ = x,
yend_ = xend )
expr_set <- expr_set %>%
mutate(
tooltip = sprintf("Row: %s<br/>Col: %s<br/>measure: %.02f",
rowvar, colvar, measure) ,
data_id = sprintf("%s_%s", rowvar, colvar)
)
# all data are tidy and can be now used with ggplot
p <- ggplot(data = expr_set, aes(x = colvar, y = rowvar) ) +
geom_tile_interactive(aes(fill = measure, tooltip = tooltip, data_id = data_id), colour = "white") +
scale_fill_gradient(low = "white", high = "#BC120A") +
geom_segment(
data = data_c,
mapping = aes(x = x, y = yend, xend = xend, yend = y),
colour = "gray20", size = .2) +
geom_segment(
data = data_r,
mapping = aes(x = x_, y = y_, xend = xend_, yend = yend_),
colour = "gray20", size = .2) +
coord_equal()
# cosmetics
p <- p + theme_minimal() +
theme(
legend.position = "right",
panel.grid.minor = element_line(color = "transparent"),
panel.grid.major = element_line(color = "transparent"),
axis.ticks.length = unit(2, units = "mm"),
plot.title = element_text(face = "bold", hjust = 0.5, size = 12),
axis.title = element_text(size = 9, colour = "gray30"),
axis.text.y = element_text(hjust = 1, size = 5, colour = "gray40"),
axis.text.x = element_text(angle = 90, hjust = 1, size = 5, colour = "gray40"),
legend.title=element_text(face = "bold", hjust = 0.5, size=8),
legend.text=element_text(size=6)
)
ggiraph(ggobj = p)
}
htmp_gg <- c()
htmp_maply <-c()
for (i in 1:20){
df <- create_data()
time_gg <- (system.time(gg2heat(df)))[3]
htmp_gg<- append(htmp_gg, values = time_gg)
time_heatmaply <- (system.time(heatmaply::heatmaply(df, hclust_method = 'ave')))[3]
htmp_maply<- append(htmp_maply, values = time_heatmaply)
rm(df)
}
score <- data.frame(htmp_gg, htmp_maply)%>% gather(key = 'method', value = 'time')
p <- ggplot(score, aes(x = method, y = time, fill = method))+geom_violin()+ stat_summary(fun.y=median, geom="point", size=2, color="black")
print(p)