Cluster annotations using pheatmap - r

So, I am trying to create a pretty heatmap with pheatmap function in R.
I want to have coloured bars where the dendrogram stops and the graph starts in order to annotate the different clusters. Like a horizontal thick line running through columns and changes colours when it pass to another cluster. It's pretty common I think. But I can't figure out the 'annotations' in pheatmap. Any ideas?
structure<-matrix(rnorm(10000),ncol=100,nrow=100)
dist_structue<-dist(structure)
clustering<-hclust(dist)
cols2 = colorRampPalette(c('green','white','red'))(20)
annotation <- data.frame(Var1 = factor(cutree(clustering, k = 4))
heat_chem <- pheatmap(as.matrix(dist_structure),
clustering_distance_rows=dist_structure,
cellwidth = 4, cellheight = 4, cluster_rows=T,cluster_cols=T, legend = T,
clustering_distance_cols = dist_structure, show_rownames = TRUE,
fontsize_row = 5, annotation_row = annotation,
show_colnames = FALSE, cutree_cols = 4, cutree_rows=4,
color = cols2,
main = 'Heatmap')

The rownames of annotation are not matching the rownames of as.matrix(dist_structure)

Related

GGplot is refusing to change the colors of my row annotation

There are some issues with my ggplot chart that I can't seem to fix.
# as you may geuss from the file name I have provided this matrix below
vis.matrix <- read.csv("csvfileprovidedbelow.csv")
# setting up annotation_row
cell_df <- data.frame ("Cells" = c(rep("Putative Engram Cell", 10), rep("Random Cell",10))
)
rownames(cell_df) <- rownames(vis.matrix)
cell_df$Cells <- as.factor(cell_df$Cells)
#setting up colors
newCols <- colorRampPalette(grDevices::rainbow(length(unique(cell_df$Cells))))
annoCol <- c("2AFE00", "ACACAC") # green and grey
names(annoCol) <- levels(cell_df$Cells)
annoCol <- list(category = annoCol)
color=colorRampPalette(c("navy", "white", "red"))(50)
#plotting
pheatmap(vis.matrix,cluster_rows = F, cluster_cols=F, annotation_row = cell_df,
annotation_names_col = F, scale = "column", color = color,
annotation_row_colors = annoCol,
show_rownames = F)
Result
For some reason the Cells are not the colors I selected, you can search those colors here: https://www.color-hex.com/
Don't know why ggplot is ignoring the input I'm giving it. Also would like to remove the word "Cells" beside the color bars on the graph, it's unescessary the legend already explains what it is.
Variables as csv's for reproduction(copy and paste!)
vis.matrix is here:
"","LINGO1","ARC","INHBA","BDNF","MAPK4","ADGRL3","PTGS2","CHGB","BRINP1","KCNK1"
"P57_CATCGGGCATGTCGAT",-0.368245729284319,3.47987283505039,2.94634318740768,5.57309275653906,1.28904872906168,5.3650511213102,-0.368245729284319,2.25850383984707,4.60363764575367,-0.368245729284319
"P57_GAAGCAGGTAAAGGAG",-0.384074162377759,4.36118508997518,3.70326968156081,4.89874111968957,1.65959775959153,4.36118508997518,-0.384074162377759,-0.384074162377759,4.89874111968957,2.85506919772029
"P57_TGACTTTTCTTTACAC",-0.357194851773428,2.40812492004642,3.13225019258772,5.67855340720666,-0.357194851773428,3.13225019258772,-0.357194851773428,4.87697271476829,1.38752767040715,-0.357194851773428
"P57_CTAGAGTGTCCGACGT",1.50110424640379,3.34315724311024,2.57863617381809,6.67240079339861,3.34315724311024,3.93616585502151,-0.340948750302666,1.50110424640379,5.77821885172796,3.34315724311024
"P57_CCTTACGTCCAAGTAC",-0.381478022176755,4.73256922534426,2.17554560158375,6.70465771162764,1.23182426263886,3.36449387848259,-0.381478022176755,2.17554560158375,4.45842883227008,3.36449387848259
"P57_ATCCGAAGTGTGACCC",2.60172319423431,1.50562420175544,-0.36816940232616,5.57161579079479,1.50562420175544,3.37941780583703,-0.36816940232616,3.37941780583703,4.47551679831591,3.98264461101114
"P57_TCCACACAGCTCCTCT",-0.364903374339472,2.59101007342497,2.59101007342497,5.23001785519025,-0.364903374339472,3.36504411201368,-0.364903374339472,1.5000703688371,1.5000703688371,-0.364903374339472
"P57_CTGAAGTGTGCTTCTC",-0.384690873645543,3.35025193111807,2.83241374986762,4.71429931551947,3.35025193111807,3.35025193111807,-0.384690873645543,3.35025193111807,2.16480422093696,2.16480422093696
"P57_CTGATAGAGAATCTCC",1.6886646742164,2.87694996247181,-0.342722443403036,7.39148929746973,1.6886646742164,5.75143890945527,-0.342722443403036,5.75143890945527,4.37401237658979,-0.342722443403036
"P57_GGAGCAACATACAGCT",-0.351186802480077,1.4651606822983,1.4651606822983,5.40649850082577,-0.351186802480077,4.34400333395122,-0.351186802480077,1.4651606822983,5.09785565185506,1.4651606822983
"A57_CGTCTACCAGACGCAA",-0.229651158962319,-0.229651158962319,-0.229651158962319,-0.229651158962319,-0.229651158962319,3.72717582194343,-0.229651158962319,-0.229651158962319,-0.229651158962319,-0.229651158962319
"P57_GTTCGGGCAATGGACG",-0.269219507178484,-0.269219507178484,-0.269219507178484,-0.269219507178484,-0.269219507178484,4.26241026631276,-0.269219507178484,-0.269219507178484,-0.269219507178484,-0.269219507178484
"P56_GGTATTGTCATGTCTT",-0.294887130864939,-0.294887130864939,-0.294887130864939,-0.294887130864939,-0.294887130864939,5.06808977241301,-0.294887130864939,-0.294887130864939,-0.294887130864939,-0.294887130864939
"A67_AAATGCCAGATAGTCA",4.03836820795661,-0.211281061058977,-0.211281061058977,-0.211281061058977,-0.211281061058977,-0.211281061058977,-0.211281061058977,-0.211281061058977,-0.211281061058977,-0.211281061058977
"P76_CCCTGATAGAGGACTC",-0.507269585219581,-0.507269585219581,-0.507269585219581,1.90264065061749,-0.507269585219581,4.86614536666517,-0.507269585219581,1.40253909173334,-0.507269585219581,0.697685532698955
"P56_GATCGATTCCGTCAAA",2.00727896845415,-0.313514850319463,-0.313514850319463,2.00727896845415,-0.313514850319463,3.36485632434217,-0.313514850319463,-0.313514850319463,-0.313514850319463,-0.313514850319463
"P57_GCTGCAGCATAGGATA",2.32839123926114,-0.289105834618761,-0.289105834618761,-0.289105834618761,-0.289105834618761,2.32839123926114,-0.289105834618761,-0.289105834618761,-0.289105834618761,4.94588831314104
"P82_AGGATAACATAGGTTC",1.39699437520094,-0.501641808549684,0.696264250985952,1.39699437520094,-0.501641808549684,4.49353661848721,-0.501641808549684,-0.501641808549684,1.89417031052159,-0.501641808549684
"P82_CCAAGCGTCCGGCTTT",-0.328980171926236,-0.328980171926236,-0.328980171926236,4.08682708745919,-0.328980171926236,1.87892345776647,-0.328980171926236,-0.328980171926236,4.08682708745919,-0.328980171926236
"P57_CAGCGACCATGTCCTC",-0.316475979591103,-0.316475979591103,-0.316475979591103,2.18079240270816,-0.316475979591103,6.13886914288907,-0.316475979591103,2.18079240270816,-0.316475979591103,4.67806078500742
pheatmap is not ggplot. It is drawn using grid graphics.
Anyway, you would pass the color specification as follows:
pheatmap(vis.matrix, cluster_rows = F, cluster_cols=F, annotation_row = cell_df,
annotation_names_col = F, scale = "column", color = color,
annotation_colors = list(Cells = c("Putative Engram Cell" = "#2AFE00",
"Random Cell" = "#ACACAC")),
show_rownames = F)

Is there a way to specify number of kmeans clusters to return in heatmaply

I would like to return a specific number of clusters for my interactive heatmap from heatmaply like I can do with pheatmap and the kmeans_k = argument. Is there a way to do this with heatmaply?
If I have a large matrix and do not define the number of clusters to return with heatmaply, it takes too long to calculate the heatmap or I will get the error: 'vector memory exhausted(limit reached?)'.
library(pheatmap)
data(mtcars)
mat <- as.matrix(mtcars)
pheatmap(
mtcars,
border_color = "grey20",
main = "",
show_rownames = TRUE,
show_colnames = TRUE,
kmeans_k = 30,
cluster_rows = F,
cluster_cols = F
)
You want to use the k_col, and or k_row arguments.
You can see examples in the vignette, but just a simple example:
library("heatmaply")
heatmaply(mtcars, k_col = 2, k_row = 4)
Output:

How to zoom in on/extract a subsection of/extract colours from a heatmap.2 in R?

I'm creating heatmaps in R using heatmap.2 (I think it needs to be heatmap.2 because I'm using 1 dataset to generate the colours of the heatmap and a second dataset to overlay numerical data).
Here is a sample of my code so far. The actual data set is 30 columns and 1000 rows.
heatmap_all_data <-
data.frame(name = c("John", "Mark", "Luke", "Jack", "Will", "Jim", "Clive", "Steve"),
trait_1 = c(1, 2, 5, 8, 5, 3, 7, 8),
trait_2 = c(5, 7, 3, 4, 6, 3, 2, 1)) %>%
column_to_rownames(var="name")
heatmap_colour <- colorRampPalette(brewer.pal(11, "RdYlBu"))(1000)
heatmap.2(as.matrix(heatmap_all_data),
scale = "column",
key = FALSE,
dendrogram = "none",
Rowv = FALSE,
Colv = FALSE,
trace = "none",
col = rev(heatmap_colour),
labRow = row.names(heatmap_all_data))
Which generates the following heatmap: https://i.stack.imgur.com/lK8Sc.png
NOW, the problem is I only want a subsection of this data, e.g I want the following heatmap:
heatmap_part_data <-
data.frame(name = c("John", "Mark", "Luke"),
trait_1 = c(1, 2, 5),
trait_2 = c(5, 7, 3)) %>%
column_to_rownames(var="name")
heatmap_colour <- colorRampPalette(brewer.pal(11, "RdYlBu"))(1000)
heatmap.2(as.matrix(heatmap_part_data),
scale = "column",
key = FALSE,
dendrogram = "none",
Rowv = FALSE,
Colv = FALSE,
trace = "none",
col = rev(heatmap_colour),
labRow = row.names(heatmap_part_data))
https://i.stack.imgur.com/j33Ic.png
BUT, I want each cell to keep the same colours as the original. I.e. I want the colours in my subsetted heatmap to be relative to the total data and not just the subsetted data. (In the real example I want to show 10 out of 1000 entries).
So, I need to either "zoom in" and rescale the top section of the heatmap and then crop the image, extract the top section of the heatmap into a new object while maintaining the same colours, or extract information about the colours in the full heatmap and overwrite the default colours in the subsetted heatmap.
The goal is basically to output an image of the subsetted data heatmap with each colour in each cell the same as in the all_data heatmap.
I hope this is clear - please advise if you need any clarification!
Many thanks for taking the time to read and I hope someone can help.
Best,
Ryan
Found the solution!
So I switched from heatmap.2 to heatmaply - same functionality but with interactivity. With heatmaply you can drag an area over the heatmap and zoom into that area which gives the desired result but I wanted to consistently zoom to a specific area.
From this website (https://plotly.com/r/axes/) I found out about the Layout function of the wider plotly library (that heatmaply is part of).
So to the existing code you can add:
%>% layout(yaxis = list(range = c(10.5, 0.5)))
(Need to add 0.5 to centre the rows properly)
Et voila! The heatmap colours are generated relative to the wider dataset but only a subset is shown.

Add a gap in heatmap with pheatmap package

I made the heatmap using the code below:
library(pheatmap)
library(dplyr)
data = data.frame(matrix(runif(10*10), ncol=10))
data$sample = rep(c("tumour", "normal"), 5)
data$subject.ID = paste('Subject', 1:10)
data = data %>% arrange(sample)
# for row annotation
my_sample_col = data %>% select(sample)
rownames(my_sample_col) = data$subject.ID
# data matrix
mat = as.matrix(data %>% select(-sample, -subject.ID))
rownames(mat) = data$subject.ID
pheatmap(mat,
scale='row',
annotation_row = my_sample_col,
annotation_names_row=F,
cluster_rows = FALSE,
cluster_cols = FALSE,
show_colnames = FALSE,
show_rownames = FALSE)
I want to put a gap between row 5 and row 6, to separate the heatmap according to my row annotation.
In pheatmap function, the argument gaps_row seems to do the job.
vector of row indices that show shere to put gaps into heatmap. Used only if the rows are not clustered.
I'm not sure how to implement that. Can someone help me with this? Thanks a lot.
I would recommend using ComplexHeatmap package (website; Gu et al, 2016). You can install it with devtools::install_github("jokergoo/ComplexHeatmap").
It has more functionalities, but you also have to invest more time (eg., row annotation and matrix scaling).
library(ComplexHeatmap)
# Create annotation for rows
my_sample_col_ano <- rowAnnotation(sample = my_sample_col$sample,
show_annotation_name = FALSE)
# Scale original matrix row-wise
matS <- t(apply(mat, 1, scale))
# Plot heatmap
Heatmap(matS,
# Remove name from fill legend
name = "",
# Keep original row/col order
row_order = rownames(matS), column_order = colnames(matS),
# Add left annotation (legend with tumor/normal)
left_annotation = my_sample_col_ano,
# ACTUAL SPLIT by sample group
row_split = my_sample_col$sample,
show_row_names = FALSE, show_column_names = FALSE,
show_row_dend = FALSE, show_column_dend = FALSE,
row_title = NULL)
If you want to use original pheatmap pass argument to gaps_row which is equal to the size of your group (ie, normal):
pheatmap(mat,
scale='row',
gaps_row = 5,
annotation_row = my_sample_col,
annotation_names_row=F,
cluster_rows = FALSE,
cluster_cols = FALSE,
show_colnames = FALSE,
show_rownames = FALSE)
If you can more groups than two instead of hardcoding numeric value to gaps_row (ie, gaps_row = 5) you can pass this snippet (head(as.numeric(cumsum(table(my_sample_col$sample))), -1)).

turn off grid lines for R xyplot timeseries

I am plotting a time series with the timePlot function of the open air package of R. The graph has grey grid lines in the background that I would like to turn off but I do not find a way to do it. I would expect something simple such as grid = FALSE, but that is not the case. It appears to be rather complex, requiring the use of extra arguments which are passed to xyplot of the library lattice. I believe the answer lies some where in the par.settings function but all attempts have failed. Does anyone have any suggestions to this issue?
Here is by script:
timeozone <- import(i, date="date", date.format = "%m/%d/%Y", header=TRUE, na.strings="")
ROMO = timePlot(timeozone, pollutant = c("C7", "C9", "C10"), group = TRUE, stack = FALSE,y.relation = "same", date.breaks = 9, lty = c(1,2,3), lwd = c(2, 3, 3), fontsize = 15, cols = c("black", "black"), ylab = "Ozone (ppbv)")
panel = function(x, y) {
panel.grid(h = 0, v = 0)
panel.xyplot(x,y)
}

Resources