plotting with specific values for heatmap in pheatmap - r

I have a data frame like this:
gene s1 s2 s3
1 -3.83 -8.17 -8.59
2 0.33 -4.51 -7.27
3 0.15 -5.26 -6.2
4 -0.08 -6.13 -5.95
5 -1.15 -4.82 -5.75
6 -0.99 -4.11 -4.85
7 0.42 -4.18 -4.54
8 -0.32 -3.43 -4.4
9 -0.72 -3.37 -4.39
I want to make a heatmap using pheatmap where if anything is below -4 it should be green and anything over +4 should be red and everything in between should red/green shades. I also don't want to scale my data and no clustering. I have this code so far in R:
d <- read.table("test.txt", header = TRUE, sep = "\t", row.names = 1, quote = "")
pheatmap(as.matrix(d), # matrix
scale = "none", # z score scaling applied to rows
cluster_cols=FALSE, # do not cluster columns
cluster_rows = FALSE,
treeheight_row=0, # do not show row dendrogram
show_rownames=FALSE, # do not show row names i.e gene names
main = "test.txt",
color = colorRampPalette(c("#0016DB","#FFFFFF","#FFFF00"))(50),
)
How can I plot this with the color scheme I mentioned above.
Thanks

d <-read.table(text="gene s1 s2 s3
1 -3.83 -8.17 -8.59
2 0.33 -4.51 -7.27
3 0.15 -5.26 -6.20
4 -0.08 -6.13 -5.95
5 -1.15 -4.82 -5.75
6 -0.99 -4.11 -4.85
7 0.42 -4.18 -4.54
8 -0.32 -3.43 -4.40
9 -0.72 -3.37 -4.39", header=T)
library(pheatmap)
my_colors <- c(min(d),seq(-4,4,by=0.01),max(d))
my_palette <- c("green",colorRampPalette(colors = c("green", "red"))
(n = length(my_colors)-2), "red")
pheatmap(as.matrix(d),
scale = "none",
cluster_cols=FALSE,
cluster_rows = FALSE,
treeheight_row=0,
show_rownames=FALSE,
main = "test.txt",
color = my_palette,
breaks = my_colors)
Created on 2019-05-29 by the reprex package (v0.3.0)

Related

Single heatmap on two symetric matrices with different colours and scales R

I want to achieve the same end goal as this question: Create a single heatmap based on two symmetric matrices in R but to take it further than the answer currently provided.
The answer given does not explain how one would go about having different colours for the upper and lower sections of the matrix and different scales?
Here is the example dataset:
library(Matrix)
set.seed(123)
s1<-forceSymmetric(matrix(round(rnorm(25),2),5))
colnames(s1)<-LETTERS[1:5]
rownames(s1)<-LETTERS[6:10]
diag(s1)<-1
s2<-forceSymmetric(matrix(round(rbinom(25,25,0.3),2),5))
colnames(s2)<-LETTERS[1:5]
rownames(s2)<-LETTERS[6:10]
diag(s2)<-1
s1
# 5 x 5 Matrix of class "dsyMatrix"
# A B C D E
# F 1.00 1.72 1.22 1.79 -1.07
# G 1.72 1.00 0.36 0.50 -0.22
# H 1.22 0.36 1.00 -1.97 -1.03
# I 1.79 0.50 -1.97 1.00 -0.73
# J -1.07 -0.22 -1.03 -0.73 1.00
s2
# 5 x 5 Matrix of class "dsyMatrix"
# A B C D E
# F 1 6 8 7 9
# G 6 1 5 9 8
# H 8 5 1 10 9
# I 7 9 10 1 1
# J 9 8 9 1 1
The suggested answer suggests to add the two matrices together as so:
#Get upper diagonal
reverse = s1[,ncol(s1):1]
diag(reverse) = 0
reverse[lower.tri(reverse, diag = FALSE)] <- 0
upper = reverse[,ncol(reverse):1]
# Get lower diagonal
reverse1 = s2[,ncol(s2):1]
diag(reverse1) = 0
reverse1[upper.tri(reverse1, diag = FALSE)] <- 0
upper1 = reverse1[,ncol(reverse1):1]
# Add them together
merged = as.matrix(upper+upper1)
merged
A B C D E
F 1.00 1.72 1.22 1.79 0
G 1.72 1.00 0.36 0.00 8
H 1.22 0.36 0.00 10.00 9
I 1.79 0.00 10.00 1.00 1
J 0.00 8.00 9.00 1.00 1
It then suggests using heatmap(merged) - however, how would you go about having different colours and scales for the upper and lower part of the matrix?
I am happy with using any package including ggplot2 to make this work.
Thanks in advance!
You can subset the relevant parts of the matrix in the data argument of a layer, and use {ggnewscale} to assign different fill scales to different layers. The trick is to declare a fill scale before adding new_scale_fill(), otherwise the order of operations goes wrong (which usually doesn't matter a lot, but here they do).
You can then tweak every individual scale. In the example below I just tweaked the palettes, but you can also adjust limits, breaks, labels etc.
# Assuming code from question has been executed and we have a 'merged' in memory
library(ggplot2)
library(ggnewscale)
# Wide matrix to long dataframe
# Later, we'll be relying on the notion that the dimnames have been
# converted to factor variables to separate out the upper from the lower
# matrix.
df <- reshape2::melt(merged)
ggplot(df, aes(Var1, Var2)) +
# The first layer, with its own fill scale
geom_raster(
data = ~ subset(.x, as.numeric(Var1) > as.numeric(Var2)),
aes(fill = value)
) +
scale_fill_distiller(palette = "Blues") +
# Declare new fill scale for the second layer
new_scale_fill() +
geom_raster(
data = ~ subset(.x, as.numeric(Var1) < as.numeric(Var2)),
aes(fill = value)
) +
scale_fill_distiller(palette = "Reds") +
# I'm not sure what to do with the diagonal. Make it grey?
new_scale_fill() +
geom_raster(
data = ~ subset(.x, as.numeric(Var1) == as.numeric(Var2)),
aes(fill = value)
) +
scale_fill_distiller(palette = "Greys", guide = "none")
In my opinion #teunbrand's answer is what you're looking for, but another potential option is to use the ComplexHeatmap package, e.g. based on one of the examples in the docs:
library(Matrix)
set.seed(123)
s1<-forceSymmetric(matrix(round(rnorm(25),2),5))
colnames(s1)<-LETTERS[1:5]
rownames(s1)<-LETTERS[6:10]
diag(s1)<-1
s2<-forceSymmetric(matrix(round(rbinom(25,25,0.3),2),5))
colnames(s2)<-LETTERS[1:5]
rownames(s2)<-LETTERS[6:10]
diag(s2)<-1
#Get upper diagonal
reverse = s1[,ncol(s1):1]
diag(reverse) = 0
reverse[lower.tri(reverse, diag = FALSE)] <- 0
upper = reverse[,ncol(reverse):1]
# Get lower diagonal
reverse1 = s2[,ncol(s2):1]
diag(reverse1) = 0
reverse1[upper.tri(reverse1, diag = FALSE)] <- 0
upper1 = reverse1[,ncol(reverse1):1]
# Add them together
m = as.matrix(upper+upper1)
m
#> A B C D E
#> F 1.00 1.72 1.22 1.79 0
#> G 1.72 1.00 0.36 0.00 8
#> H 1.22 0.36 0.00 10.00 9
#> I 1.79 0.00 10.00 1.00 1
#> J 0.00 8.00 9.00 1.00 1
library(ComplexHeatmap)
#> Loading required package: grid
#> ========================================
#> ComplexHeatmap version 2.8.0
#> Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
#> Github page: https://github.com/jokergoo/ComplexHeatmap
#> Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
#>
#> If you use it in published research, please cite:
#> Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
#> genomic data. Bioinformatics 2016.
#>
#> The new InteractiveComplexHeatmap package can directly export static
#> complex heatmaps into an interactive Shiny app with zero effort. Have a try!
#>
#> This message can be suppressed by:
#> suppressPackageStartupMessages(library(ComplexHeatmap))
#> ========================================
library(circlize)
#> ========================================
#> circlize version 0.4.13
#> CRAN page: https://cran.r-project.org/package=circlize
#> Github page: https://github.com/jokergoo/circlize
#> Documentation: https://jokergoo.github.io/circlize_book/book/
#>
#> If you use it in published research, please cite:
#> Gu, Z. circlize implements and enhances circular visualization
#> in R. Bioinformatics 2014.
#>
#> This message can be suppressed by:
#> suppressPackageStartupMessages(library(circlize))
#> ========================================
col1 = colorRamp2(c(-1, 10), c("white", "red"))
col2 = colorRamp2(c(-1, 10), c("white", "blue3"))
# here reordering the symmetric matrix is necessary
od = hclust(dist(m))$order
m = m[od, od]
ht = Heatmap(m, rect_gp = gpar(type = "none"), show_heatmap_legend = FALSE,
cluster_rows = FALSE, cluster_columns = FALSE,
layer_fun = function(j, i, x, y, w, h, fill) {
l = i > j
grid.rect(x[l], y[l], w[l], h[l],
gp = gpar(fill = col1(pindex(m, i[l], j[l])), col = NA))
l = i < j
grid.rect(x[l], y[l], w[l], h[l],
gp = gpar(fill = col2(pindex(m, i[l], j[l])), col = NA))
})
draw(ht, heatmap_legend_list = list(
Legend(title = "Group_A", col_fun = col1),
Legend(title = "Group_B", col_fun = col2)
))
Created on 2022-03-07 by the reprex package (v2.0.1)

Values on the x-axis of my R plot are random rather than the dates from the data frame

I loaded in a dataset to R that looks like this in the header:
date a b c
1 2017-01-01 -0.98 -1.35 -2.81
2 2017-02-01 -1.63 -2.18 -1.79
3 2017-03-01 -0.92 0.80 -3.33
4 2017-04-01 0.44 0.48 -2.11
5 2017-05-01 1.46 -3.11 -3.67
6 2017-06-01 -0.32 2.46 1.45
The full dataset includes 4 years of data with a total of 48 obs (from Jan-2017 to Dec-2020).
After loading in the dataset I change the format of the date variable to YYYYMM by using the code:
df$date <- format(as.Date(df$date), "%Y%m")
This results in the dates looking like this:
date a b c
1 201701 -0.98 -1.35 -2.81
2 201702 -1.63 -2.18 -1.79
3 201703 -0.92 0.80 -3.33
4 201704 0.44 0.48 -2.11
5 201705 1.46 -3.11 -3.67
6 201706 -0.32 2.46 1.45
After doing this I plot the data with this code:
plot(df$a, type="l", col="darkgreen", lwd=1, xlab="date", ylab="$", xaxs="i")
lines(df$b, col="red", lwd=1, xaxs="i")
lines(df$c, col="blue", lwd=1, xaxs="i")
legend("bottomleft", inset= 0.04, legend=c("a", "b", "c"),
col=c("darkgreen", "red", "blue"), lwd=3, cex=0.8)
Which results in the plot below:
However, the values of the x-axis do not show me the years so that I can measure the performance of a, b and c over time. How do I replace the values of the x-axis with the years in my dataset. And also, how do I make sure that only the years will be included on my x-axis and not my months as well?
The answers to this question I've seen so far has been to format the date etc. This is done and seems to work fine. Can anyone please tell me what to do about this issue?
Here are two solutions base R and ggplot2.
1. Base R
To plot multiple lines use either matplot or matlines.
colrs <- c("darkgreen", "red", "blue")
matplot(df[[1]], df[-1],
type = "l", lty = "solid", lwd = 1,
col = colrs,
xlab = "date", ylab = "$", xaxs = "i")
legend("bottomleft", inset = 0.04, legend = c("a", "b", "c"),
col = colrs, lwd = 3, cex = 0.8)
2. ggplot2
The data is in the wide format and one line per column vector is to be plotted against an x axis vector, in this case the date vector. This sort of problem is usually a data reformating problem. See reshaping data.frame from wide to long format.
library(ggplot2)
library(dplyr)
library(tidyr)
df %>%
pivot_longer(-date) %>%
ggplot(aes(date, value, colour = name)) +
geom_line() +
scale_colour_manual(breaks = c("a", "b", "c"), values = c("darkgreen", "red", "blue")) +
scale_x_date(date_labels = "%Y-%m") +
theme_classic() +
theme(axis.text.x = element_text(angle = 60, vjust = 0.5, hjust = 0.5))
Data
df <- read.table(text = "
date a b c
1 2017-01-01 -0.98 -1.35 -2.81
2 2017-02-01 -1.63 -2.18 -1.79
3 2017-03-01 -0.92 0.80 -3.33
4 2017-04-01 0.44 0.48 -2.11
5 2017-05-01 1.46 -3.11 -3.67
6 2017-06-01 -0.32 2.46 1.45
", header = TRUE)
df$date <- as.Date(df$date)
You replace the values of the x axis with the years in your dataset by adding xaxt="n" to your plot() command (which removes the current x axis) and calling
axis(1,at=1:nrow(df),labels=format(as.Date(df$date),"%Y"))
afterwards (which creates the desired x axis).

pheatmap : extracting row z score values

I am a little confused about row scaling in pheatmap. This is my data frame
gene s1 s2 s3
1 -3.83 -8.17 -8.59
2 0.33 -4.51 -7.27
3 0.15 -5.26 -6.2
4 -0.08 -6.13 -5.95
5 -1.15 -4.82 -5.75
6 -0.99 -4.11 -4.85
7 0.42 -4.18 -4.54
8 -0.32 -3.43 -4.4
9 -0.72 -3.37 -4.39
I need to extract those values of the data frame after pheatmap generates the graph with row z score
library(pheatmap)
my_colors <- c(min(d),seq(-4,4,by=0.01),max(d))
my_palette <- c("green",colorRampPalette(colors = c("green", "red"))
(n = length(my_colors)-2), "red")
pheatmap(as.matrix(d),
scale = "row",
cluster_cols=FALSE,
cluster_rows = FALSE,
treeheight_row=0,
show_rownames=FALSE,
main = "test.txt",
color = my_palette,
breaks = my_colors)
How can I get a mew matrix which pheatmap uses to make the heatmap?

Can't add legend panel to a certain scatter plot with multiple data sets

I simply can't find a way to plot legends panel in this specific ggplot with ggplot2 on R. Just want to make it appear.
For context, I'm plotting chemical abundances of sample versus the atomic number of the elements.
For background, I tried many things that are described here:
Reasons that ggplot2 legend does not appear
including links therein, however could not find a solution for my specific data set.
I know the problem could be within the structure of the data set, since I've been able to do that with other data, but I can't solve it. I also know that the problem should have to do with the theme() described in the code below, because when I use default ggplot configuration legends actually appear. I use this personalized theme for consistency trough out my work.
This is what I have so far removing cosmetics:
ggplot(atomic, aes(x=atomic$Z, y = atomic$avg, group=1), fill = atomic$Z) +
plot dots for average of values
geom_point(data=atomic, aes(x=atomic$Z, y=atomic$avg, group=1, color="black"), size=0.5, alpha=1, shape=16 ) +
connect dots for average of values
geom_line(data=atomic, aes(x=atomic$Z, y=atomic$avg, group=1), color="black", linetype= "dashed") +
plot dots for actual values from the samples
geom_point(data=atomic, aes(x=atomic$Z, y=atomic$SDSS, group=1, color="#00ba38"), size=5, alpha=1, shape=16, color="#00ba38") +
geom_point(data=atomic, aes(x=atomic$Z, y=atomic$HE22, group=1, color="#619cff"), size=5, alpha=1, shape=16, color="#619cff") +
geom_point(data=atomic, aes(x=atomic$Z, y=atomic$HE12, group=1, color="#F8766D"), size=5, alpha=1, shape=16, color="#F8766D") +
EDIT: the Definition of base_breaks (used below)
base_breaks_x <- function(x){
b <- pretty(x)
d <- data.frame(y=-Inf, yend=-Inf, x=min(b), xend=max(b))
list(geom_segment(data=d, aes(x=x, y=y, xend=xend, yend=yend), inherit.aes=FALSE),
scale_x_continuous(breaks=b))
}
base_breaks_y <- function(x){
b <- pretty(x)
d <- data.frame(x=-Inf, xend=-Inf, y=min(b), yend=max(b))
list(geom_segment(data=d, aes(x=x, y=y, xend=xend, yend=yend), inherit.aes=FALSE),
scale_y_continuous(breaks=b))
}
the problem might be here
theme_bw() +
theme(plot.title = element_text(hjust = 0.5),
text = element_text(size=20),
legend.position="bottom",
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
base_breaks_x(atomic$Z) +
base_breaks_y(atomic$HE22)
The data set is the following
Z Name HE22 SDSS HE12 avg
1 3 Li NA 1.00 NA 1.00
2 6 C 6.16 5.50 6.06 5.91
3 7 N NA NA 6.49 6.49
4 11 Na NA NA 3.53 3.53
5 12 Mg 5.32 4.43 4.99 4.91
6 13 Al 2.90 NA 3.08 2.99
7 14 Si NA 4.90 4.89 4.90
8 20 Ca 4.07 3.37 3.56 3.67
9 21 Sc 0.72 -0.07 0.24 0.30
10 22 Ti 2.74 1.79 2.47 2.33
11 23 V NA NA 1.18 1.18
12 24 Cr 2.88 2.14 2.67 2.56
13 25 Mn 2.34 1.59 2.44 2.12
14 26 Fe 4.92 4.14 4.59 4.55
15 27 Co 2.57 1.72 2.36 2.22
16 28 Ni 3.63 2.96 3.51 3.37
17 29 Cu NA NA 0.31 0.31
18 30 Zn 2.29 NA 2.44 2.37
19 38 Sr 0.62 0.29 0.41 0.44
20 39 Y -0.22 -0.44 -0.33 -0.33
21 40 Zr 0.60 NA 0.30 0.45
22 56 Ba 0.13 -0.10 0.12 0.05
23 57 La -0.77 -0.49 -0.77 -0.68
24 58 Ce NA NA -0.39 -0.39
25 59 Pr NA NA -0.78 -0.78
26 60 Nd -0.47 NA -0.37 -0.42
27 62 Sm NA NA -0.57 -0.57
28 63 Eu -1.02 -0.92 -0.85 -0.93
29 64 Gd NA NA -0.39 -0.39
30 66 Dy NA NA -0.16 -0.16
31 68 Er NA -0.40 NA -0.40
32 70 Yb NA -0.60 NA -0.60
33 90 Th NA -0.60 NA -0.60
as Z = atomic number, Name = element, HE12/HE22/SDSS = samples, avg = average of the samples.
I would like to know how I can add legend panel coherent with the colors of my scatter plots.
Thank you so much! Hope I could describe the problem properly.
This is personally what I would do.
I converted the data from wide format to long format since it's easier to manipulate colors that way (Sorry I just used generic "key" and "value" since I'm not sure what you would want your columns to be named). Hopefully this will get you at least part of the way to where you want to go. Let me know if you have questions!
library(ggplot2)
library(tidyr)
p <- atomic %>%
gather(key = "key", value = "value", SDSS, HE22, HE12) %>%
ggplot(aes(Z, value, color = key))+
geom_point() +
geom_text(aes(x = Z, y = avg, label = Name), # EDITED
color = "black")
scale_color_manual(values = c("#00ba38", "#619cff", "#F8766D"))
p +
geom_line(data=atomic, aes(x=atomic$Z, y=atomic$avg, group=1), color="black",
linetype= "dashed") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5),
text = element_text(size=20),
legend.position="bottom",
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
base_breaks_x(atomic$Z) +
base_breaks_y(atomic$HE22)
EDITED
I added the geom_text() command so labels show up. You can adjust the arguments so the labels look better. I've also heard geom_text_repel() in the ggrepel package is helpful for creating nice labels: https://cran.r-project.org/web/packages/ggrepel/vignettes/ggrepel.html#examples

Multidimensional scaling plot in R

I have a dataset ("data") that looks like this:
PatientID Visit Var1 Var2 Var3 Var4 Var5
1 ID1 0 44.28 4.57 23.56 4.36 8.87
2 ID1 1 58.60 5.34 4.74 3.76 6.96
3 ID1 2 72.44 11.18 21.22 2.15 8.34
4 ID2 0 65.98 6.91 8.57 1.19 7.39
5 ID2 1 10.33 38.27 0.48 14.41 NA
6 ID2 2 69.45 11.18 20.69 2.15 8.34
7 ID3 0 69.16 6.17 10.98 1.91 6.12
8 ID3 1 86.02 3.28 16.29 4.28 5.74
9 ID3 2 69.45 NA 20.69 2.15 8.34
10 ID4 0 98.55 26.75 2.89 3.92 2.19
11 ID4 1 32.66 14.38 4.96 1.13 4.78
12 ID4 2 70.45 11.42 21.78 2.15 8.34
I need to to generate an MDS plot with all datapoints. I also need the visit-points to be linked by a line and coloured as green for visit 1, red for visit 2 and black for visit3 (consistent colours for all individuals).
My code looks like this (quite lenghty, but it doesn't work):
data.cor <- cor(t(data[,3:7]), use = "pairwise.complete.obs", method = "spearman")
dim(data.cor)
dim(data)
rownames(data.cor) <- paste0(data$PatientID, "V", data$Visit)
colnames(data.cor) <- paste0(data$PatientID, "V", data$Visit)
c <- dist(data.cor)
fit <- cmdscale(c,eig=TRUE, k=2)
ff <- fit$points
ff <- as.data.frame(ff)
ff$pair <- paste0(substr(rownames(ff),1,6))
ff$pair <- factor(ff$pair)
pc.pair.distances <- matrix(nrow = nlevels(ff$pair), ncol = 1)
for(i in 1:nlevels(ff$pair)){
pair2 <- ff[ff$pair %in% levels(ff$pair)[i] , ]
pc.pair.distances[i,1] <- sqrt(
((pair2[1,1] - pair2[2,1]) * (pair2[1,1] - pair2[2,1]))
+ ((pair2[1,2] - pair2[2,2]) * (pair2[1,2] - pair2[2,2]))
)
rm(pair2)
}
plot(ff[,1], ff[,2], xlab="Principal 1", ylab="Principal 2", type = "n", las = 1)
for(i in 1:nlevels(ff$pair)){
lines(ff[ff$pair == levels(ff$pair)[i],1], ff[ff$pair == levels(ff$pair)[i],2], col = "grey")
}
points(ff[,1], ff[,2], xlab="Coordinate 1", ylab="Coordinate 2", type = "p",
pch = ifelse(grepl(x = substr(rownames(ff), 7,8), "V1"), 20, 18),
cex = 1.3)
)
I would really appreciate your help.
I suggest you to modify your data.frame in order to add a column for visit number and for indiv id with the function sapply.
ff$visit <- sapply(ff$pair,function(x){substr(x,5,5)})
ff$indiv <- sapply(ff$pair,function(x){substr(x,3,3)})
And then the library ggplot2 is very usefull to plot data. First, you draw the points :
g <- ggplot(ff,aes(V1,V2))+geom_point(aes(color=visit))
And then add lines for each individual :
for (i in unique(ff$indiv)){
g <- g+geom_line(data=ff[ff$indiv==i,],aes(V1,V2))
}

Resources