There are some issues with my ggplot chart that I can't seem to fix.
# as you may geuss from the file name I have provided this matrix below
vis.matrix <- read.csv("csvfileprovidedbelow.csv")
# setting up annotation_row
cell_df <- data.frame ("Cells" = c(rep("Putative Engram Cell", 10), rep("Random Cell",10))
)
rownames(cell_df) <- rownames(vis.matrix)
cell_df$Cells <- as.factor(cell_df$Cells)
#setting up colors
newCols <- colorRampPalette(grDevices::rainbow(length(unique(cell_df$Cells))))
annoCol <- c("2AFE00", "ACACAC") # green and grey
names(annoCol) <- levels(cell_df$Cells)
annoCol <- list(category = annoCol)
color=colorRampPalette(c("navy", "white", "red"))(50)
#plotting
pheatmap(vis.matrix,cluster_rows = F, cluster_cols=F, annotation_row = cell_df,
annotation_names_col = F, scale = "column", color = color,
annotation_row_colors = annoCol,
show_rownames = F)
Result
For some reason the Cells are not the colors I selected, you can search those colors here: https://www.color-hex.com/
Don't know why ggplot is ignoring the input I'm giving it. Also would like to remove the word "Cells" beside the color bars on the graph, it's unescessary the legend already explains what it is.
Variables as csv's for reproduction(copy and paste!)
vis.matrix is here:
"","LINGO1","ARC","INHBA","BDNF","MAPK4","ADGRL3","PTGS2","CHGB","BRINP1","KCNK1"
"P57_CATCGGGCATGTCGAT",-0.368245729284319,3.47987283505039,2.94634318740768,5.57309275653906,1.28904872906168,5.3650511213102,-0.368245729284319,2.25850383984707,4.60363764575367,-0.368245729284319
"P57_GAAGCAGGTAAAGGAG",-0.384074162377759,4.36118508997518,3.70326968156081,4.89874111968957,1.65959775959153,4.36118508997518,-0.384074162377759,-0.384074162377759,4.89874111968957,2.85506919772029
"P57_TGACTTTTCTTTACAC",-0.357194851773428,2.40812492004642,3.13225019258772,5.67855340720666,-0.357194851773428,3.13225019258772,-0.357194851773428,4.87697271476829,1.38752767040715,-0.357194851773428
"P57_CTAGAGTGTCCGACGT",1.50110424640379,3.34315724311024,2.57863617381809,6.67240079339861,3.34315724311024,3.93616585502151,-0.340948750302666,1.50110424640379,5.77821885172796,3.34315724311024
"P57_CCTTACGTCCAAGTAC",-0.381478022176755,4.73256922534426,2.17554560158375,6.70465771162764,1.23182426263886,3.36449387848259,-0.381478022176755,2.17554560158375,4.45842883227008,3.36449387848259
"P57_ATCCGAAGTGTGACCC",2.60172319423431,1.50562420175544,-0.36816940232616,5.57161579079479,1.50562420175544,3.37941780583703,-0.36816940232616,3.37941780583703,4.47551679831591,3.98264461101114
"P57_TCCACACAGCTCCTCT",-0.364903374339472,2.59101007342497,2.59101007342497,5.23001785519025,-0.364903374339472,3.36504411201368,-0.364903374339472,1.5000703688371,1.5000703688371,-0.364903374339472
"P57_CTGAAGTGTGCTTCTC",-0.384690873645543,3.35025193111807,2.83241374986762,4.71429931551947,3.35025193111807,3.35025193111807,-0.384690873645543,3.35025193111807,2.16480422093696,2.16480422093696
"P57_CTGATAGAGAATCTCC",1.6886646742164,2.87694996247181,-0.342722443403036,7.39148929746973,1.6886646742164,5.75143890945527,-0.342722443403036,5.75143890945527,4.37401237658979,-0.342722443403036
"P57_GGAGCAACATACAGCT",-0.351186802480077,1.4651606822983,1.4651606822983,5.40649850082577,-0.351186802480077,4.34400333395122,-0.351186802480077,1.4651606822983,5.09785565185506,1.4651606822983
"A57_CGTCTACCAGACGCAA",-0.229651158962319,-0.229651158962319,-0.229651158962319,-0.229651158962319,-0.229651158962319,3.72717582194343,-0.229651158962319,-0.229651158962319,-0.229651158962319,-0.229651158962319
"P57_GTTCGGGCAATGGACG",-0.269219507178484,-0.269219507178484,-0.269219507178484,-0.269219507178484,-0.269219507178484,4.26241026631276,-0.269219507178484,-0.269219507178484,-0.269219507178484,-0.269219507178484
"P56_GGTATTGTCATGTCTT",-0.294887130864939,-0.294887130864939,-0.294887130864939,-0.294887130864939,-0.294887130864939,5.06808977241301,-0.294887130864939,-0.294887130864939,-0.294887130864939,-0.294887130864939
"A67_AAATGCCAGATAGTCA",4.03836820795661,-0.211281061058977,-0.211281061058977,-0.211281061058977,-0.211281061058977,-0.211281061058977,-0.211281061058977,-0.211281061058977,-0.211281061058977,-0.211281061058977
"P76_CCCTGATAGAGGACTC",-0.507269585219581,-0.507269585219581,-0.507269585219581,1.90264065061749,-0.507269585219581,4.86614536666517,-0.507269585219581,1.40253909173334,-0.507269585219581,0.697685532698955
"P56_GATCGATTCCGTCAAA",2.00727896845415,-0.313514850319463,-0.313514850319463,2.00727896845415,-0.313514850319463,3.36485632434217,-0.313514850319463,-0.313514850319463,-0.313514850319463,-0.313514850319463
"P57_GCTGCAGCATAGGATA",2.32839123926114,-0.289105834618761,-0.289105834618761,-0.289105834618761,-0.289105834618761,2.32839123926114,-0.289105834618761,-0.289105834618761,-0.289105834618761,4.94588831314104
"P82_AGGATAACATAGGTTC",1.39699437520094,-0.501641808549684,0.696264250985952,1.39699437520094,-0.501641808549684,4.49353661848721,-0.501641808549684,-0.501641808549684,1.89417031052159,-0.501641808549684
"P82_CCAAGCGTCCGGCTTT",-0.328980171926236,-0.328980171926236,-0.328980171926236,4.08682708745919,-0.328980171926236,1.87892345776647,-0.328980171926236,-0.328980171926236,4.08682708745919,-0.328980171926236
"P57_CAGCGACCATGTCCTC",-0.316475979591103,-0.316475979591103,-0.316475979591103,2.18079240270816,-0.316475979591103,6.13886914288907,-0.316475979591103,2.18079240270816,-0.316475979591103,4.67806078500742
pheatmap is not ggplot. It is drawn using grid graphics.
Anyway, you would pass the color specification as follows:
pheatmap(vis.matrix, cluster_rows = F, cluster_cols=F, annotation_row = cell_df,
annotation_names_col = F, scale = "column", color = color,
annotation_colors = list(Cells = c("Putative Engram Cell" = "#2AFE00",
"Random Cell" = "#ACACAC")),
show_rownames = F)
I am trying to generate a heatmap as the following figure. I have already tried pheatmap and the code is as follows:
breaks_2 <- seq(min(0), max(2), by = 0.1)
pheatmap::pheatmap(
mat = data,
cluster_cols = F,
cluster_rows = F,
scale = "column",
border_color = "white",
color = inferno(20),
show_colnames = TRUE,
show_rownames = FALSE,
breaks = breaks_2
)
But this does not seem to work. So far I am understanding I am mistaking with defining break or have to use another package than pheatmap. Any suggestion will be really helpful.
The color scale in pheatmap adjusts to the range of the input data. If you want anything above a certain value to be coloured daffodil, then simply send pheatmap a copy of your data with the highest values rounded to 2.
Suppose you have a data frame like this, with values anywhere between 0 and 3:
set.seed(1)
data <- as.data.frame(matrix(runif(64, 0, 3), nrow = 8))
names(data) <- LETTERS[1:8]
data
#> A B C D E F G H
#> 1 0.7965260 1.8873421 2.1528555 0.801662 1.4806239 2.46283888 2.1969412 0.9488151
#> 2 1.1163717 0.1853588 2.9757183 1.158342 0.5586528 1.94118058 2.0781947 1.5559028
#> 3 1.7185601 0.6179237 1.1401055 0.040171 2.4821200 2.34879829 1.4328589 1.9860152
#> 4 2.7246234 0.5296703 2.3323357 1.147164 2.0054002 1.65910893 2.5836284 1.2204906
#> 5 0.6050458 2.0610685 2.8041157 2.609073 2.3827196 1.58915874 1.3142913 2.7386278
#> 6 2.6951691 1.1523112 0.6364276 1.021047 0.3238309 2.36806870 0.7343918 0.8808101
#> 7 2.8340258 2.3095243 1.9550213 1.446240 2.1711328 0.06999361 0.2120371 1.3771972
#> 8 1.9823934 1.4930977 0.3766653 1.798697 1.2338233 1.43169020 0.2983985 0.9971840
Some of the values are greater than two. We want all of these to appear the same colour on our heatmap, so we create a copy of our data for plotting, and round down all of the values that were greater than 2 to be exactly 2:
data_2 <- data
data_2[] <- lapply(data_2, function(x) { x[x > 2] <- 2; x })
So now if we run pheatmap on data_2, we see that all the values that were greater than 2 in our original data frame are coloured daffodil.
library(viridis)
library(pheatmap)
breaks_2 <- seq(0, 2, by = 0.1)
pheatmap(
mat = data_2,
cluster_cols = F,
cluster_rows = F,
border_color = "white",
scale = 'none',
color = inferno(22),
show_colnames = TRUE,
show_rownames = FALSE,
legend_breaks = breaks_2
)
I have data that look like this:
Gene
HBEC-KT-01
HBEC-KT-02
HBEC-KT-03
HBEC-KT-04
HBEC-KT-05
Primarycells-02
Primarycells-03
Primarycells-04
Primarycells-05
BPIFB1
15726000000
15294000000
15294000000
14741000000
22427000000
87308000000
2.00E+11
1.04E+11
1.51E+11
LCN2
18040000000
26444000000
28869000000
30337000000
10966000000
62388000000
54007000000
56797000000
38414000000
C3
2.52E+11
2.26E+11
1.80E+11
1.80E+11
1.78E+11
46480000000
1.16E+11
69398000000
78766000000
MUC5AC
15647000
8353200
12617000
12221000
29908000
40893000000
79830000000
28130000000
69147000000
MUC5B
965190000
693910000
779970000
716110000
1479700000
38979000000
90175000000
41764000000
50535000000
ANXA2
14705000000
18721000000
21592000000
18904000000
22657000000
28163000000
24282000000
21708000000
16528000000
I want to make a heatmap like the following using R. I am following a paper and they quoted "Heat maps were generated with the ‘pheatmap’ package76, where correlation clustering distance row was applied". Here is their heatmap.
I want the same like this and I am trying to make one using R by following tutorials but I am new to R language and know nothing about R.
Here is my code.
df <- read.delim("R.txt", header=T, row.names="Gene")
df_matrix <- data.matrix(df)
pheatmap(df_matrix,
main = "Heatmap of Extracellular Genes",
color = colorRampPalette(rev(brewer.pal(n = 10, name = "RdYlBu")))(10),
cluster_cols = FALSE,
show_rownames = F,
fontsize_col = 10,
cellwidth = 40,
)
This is what I get.
When I try using clustering, I got the error.
pheatmap(
mat = df_matrix,
scale = "row",
cluster_column = F,
show_rownames = TRUE,
drop_levels = TRUE,
fontsize = 5,
clustering_method = "complete",
main = "Hierachical Cluster Analysis"
)
Error in hclust(d, method = method) :
NA/NaN/Inf in foreign function call (arg 10)
Can someone help me with the code?
You can normalize the data using scale to archive a more uniform coloring. Here, the mean expression is set to 0 for each sample. Genes lower expressed than average have a negative z score:
library(tidyverse)
library(pheatmap)
data <- tribble(
~Gene, ~`HBEC-KT-01`, ~`HBEC-KT-02`, ~`HBEC-KT-03`, ~`HBEC-KT-04`, ~`HBEC-KT-05`, ~`Primarycells-03`, ~`Primarycells-04`, ~`Primarycells-05`,
"BPIFB1", 1.5726e+10, 1.5294e+10, 1.5294e+10, 1.4741e+10, 2.2427e+10, 2e+11, 1.04e+11, 1.51e+11,
"LCN2", 1.804e+10, 2.6444e+10, 2.8869e+10, 3.0337e+10, 1.0966e+10, 5.4007e+10, 5.6797e+10, 3.8414e+10,
"C3", 2.52e+11, 2.26e+11, 1.8e+11, 1.8e+11, 1.78e+11, 1.16e+11, 6.9398e+10, 7.8766e+10,
"MUC5AC", 15647000, 8353200, 12617000, 12221000, 29908000, 7.983e+10, 2.813e+10, 6.9147e+10,
"MUC5B", 965190000, 693910000, 779970000, 716110000, 1479700000, 9.0175e+10, 4.1764e+10, 5.0535e+10,
"ANXA2", 1.4705e+10, 1.8721e+10, 2.1592e+10, 1.8904e+10, 2.2657e+10, 2.4282e+10, 2.1708e+10, 1.6528e+10
)
data %>%
mutate(across(where(is.numeric), scale)) %>%
column_to_rownames("Gene") %>%
pheatmap(
scale = "row",
cluster_column = F,
show_rownames = FALSE,
show_colnames = TRUE,
treeheight_col = 0,
drop_levels = TRUE,
fontsize = 5,
clustering_method = "complete",
main = "Hierachical Cluster Analysis (z-score)",
)
Created on 2021-09-26 by the reprex package (v2.0.1)
So, I am trying to create a pretty heatmap with pheatmap function in R.
I want to have coloured bars where the dendrogram stops and the graph starts in order to annotate the different clusters. Like a horizontal thick line running through columns and changes colours when it pass to another cluster. It's pretty common I think. But I can't figure out the 'annotations' in pheatmap. Any ideas?
structure<-matrix(rnorm(10000),ncol=100,nrow=100)
dist_structue<-dist(structure)
clustering<-hclust(dist)
cols2 = colorRampPalette(c('green','white','red'))(20)
annotation <- data.frame(Var1 = factor(cutree(clustering, k = 4))
heat_chem <- pheatmap(as.matrix(dist_structure),
clustering_distance_rows=dist_structure,
cellwidth = 4, cellheight = 4, cluster_rows=T,cluster_cols=T, legend = T,
clustering_distance_cols = dist_structure, show_rownames = TRUE,
fontsize_row = 5, annotation_row = annotation,
show_colnames = FALSE, cutree_cols = 4, cutree_rows=4,
color = cols2,
main = 'Heatmap')
The rownames of annotation are not matching the rownames of as.matrix(dist_structure)