ggplot showing names of selected ids - r

I want to plot a scatterplot using ggplot instead of plot and
produce this scatterplot where some IDs have different colors
and labeled:
Some asked for the dput of the data, so I added the dput in the end.
The problem with adding dput, the quesiton won't go through because
it shows too much coding compared to the question content, so that's
why I avoid it, and the head of dput is not helpful in reproducing the
real data.
I want to be able to plot all the values then show the name
for a selected IDs, not all of them.
This what I tried:
library(ggplot2)
library(ggrepel)
fig5ctrial<-read.csv(url("https://raw.githubusercontent.com/learnseq/learning/main/fig5mintab.txt"),sep = '\t',header = TRUE)
a_select<- c("RPL31", "HSPB1", "MAFB", "ALPL1", "VGF","PCSK1N", "BSG", "CALY", "B2M", "SCG5", "TM4SF4")
selalpha <- fig5ctrial[match(rev(a_select), fig5ctrial$geneIDs), ]
alphanames<-fig5ctrial$geneIDs
attach(fig5ctrial)
z1 <-plot(a.donor, a_cells, main="Scatterplot Example", xlab="Spearsman p all cells ", ylab="Spearsman p alpha cells ", pch=19)
text(a_cells~a.donor, labels=alphanames,data=fig5ctrial, font=2)
z01 <-
#Plot
ggplot()+
#assign alpha cell
geom_point(fig5ctrial,aes(a.donor, a_cells))+
#assign all cells
geom_point(fig5ctrial,aes(all_donors, all.cells))+
#assign IDs of interest
geom_point(fig5ctrial,aes(all_donors, all.cells, color = factor(selalpha)))+
#Add labels
geom_text_repel(data=subset(fig5ctrial,
geneIDs %in% a_select),
aes(label=geneIDs),show.legend = F)

The solution from #r2evans is tremendously complete (upvoted for the very well explained details) and better than this. I used the same ggrepel strategy, but with a simulated variable over your data:
library(ggplot2)
library(ggrepel)
#Data
fig5ctrial <- read.csv('https://raw.githubusercontent.com/learnseq/learning/main/alphacell.csv',stringsAsFactors = F)
#Group
fig5ctrial$allcellstypes <- sample(1:2,nrow(fig5ctrial),replace = T)
fig5ctrial$geneIDs <- trimws(fig5ctrial$geneIDs,whitespace = '\'')
#Plot
ggplot(fig5ctrial,aes(X...donor., a.cells, color = factor(allcellstypes)))+
geom_point()+
#Add labels
geom_text_repel(data=subset(fig5ctrial,
geneIDs %in% a_select),
aes(label=geneIDs),show.legend = F)+
labs(color='allcellstypes')
Output:
Update: After playing with data from OP, here a possible sketch to solve the issue (Remember in the last attempt we merged all data):
library(xlsx)
library(ggplot2)
library(ggrepel)
#Data
fig5cwithoutdesc <- read.xlsx('fig5cwithoutdesc.xlsx',1,colIndex = 1:4)
ids <- c("RPL31", "HSPB1", "MAFB", "ALPL1", "VGF","PCSK1N", "BSG", "CALY", "B2M", "SCG5", "TM4SF4")
#Clean gen id
fig5cwithoutdesc$geneIDs <- trimws(gsub('[[:punct:] ]+',' ',fig5cwithoutdesc$geneIDs))
#Plot
ggplot(fig5cwithoutdesc,aes(a_donor, a_cell,color=(geneIDs %in% ids)))+
geom_point()+
scale_color_manual(values = c('gray','blue'))+
geom_text_repel(data=subset(fig5cwithoutdesc,geneIDs %in% ids),
aes(label=geneIDs),force=19)+
theme_bw()+
theme(legend.position = 'none')
Output:

Here's a sample that uses a little dplyr (not essential) and ggrepel (essential).
Sample data:
set.seed(42)
dat <- data.frame(id = c(outer(letters, letters, paste0)), x = runif(26*26))
dat$y <- dat$x + rnorm(26*26, 0.2, 0.2)
dat[1:3,]
# id x y
# 1 aa 0.9148060 0.9611270
# 2 ba 0.9370754 1.0316538
# 3 ca 0.2861395 0.4818541
Code for the plot:
library(ggplot2)
library(ggrepel)
library(dplyr)
# dots we want to highlight
interesting <- c("mg", "qx", "zz")
dat %>%
mutate(id = replace(id, !id %in% interesting, "")) %>%
ggplot(., aes(x, y)) +
geom_point(aes(color = (id %in% interesting))) +
scale_color_manual(guide = FALSE, values = c("FALSE" = "black", "TRUE" = "red")) +
ggrepel::geom_text_repel(
aes(label = id), color = "red",
nudge_x = 0.5, direction = "x", hjust = 0)
Notes:
dplyr can easily be removed here, replaced with transform and perhaps a temp-variable;
the id=replace(...) portion is to remove the label (id) for any uninteresting variables, so that geom_text_repel will only label the interesting ones;
there are other techniques for highlighting specific dots, including adding another geom_point(..., data=~subset(., id %in% interesting)), but that adds more points ... and in some graphic formats (pdf, svg) this produces extra objects and therefore might have unintended consequences. Coloring the points in this way will be more difficult if you are already using aes(color=.) elsewhere.

Related

Show other data points when using ggiraph in R?

I am using ggiraph to make an interactive plot in R. My data is grouped and what I'm hoping to do is plot just the mean value of the group but when I hover over that point in the plot, the other points appear. Hopefully, my example below will explain what I mean.
To begin I create some data and make a basic plot:
library(ggplot2)
library(ggiraph)
# create some data
dat1 <- data.frame(X=rnorm(21),
Y=rnorm(21),
groupID=rep(1,21))
dat2 <- data.frame(X=rnorm(21,5),
Y=rnorm(21,5),
groupID=rep(2,21))
dat3 <- data.frame(X=rnorm(21,10),
Y=rnorm(21,10),
groupID=rep(3,21))
ggdat <- rbind(dat1,dat2,dat3)
ggdat$groupID <- as.factor(ggdat$groupID)
# create a plot
ggplot(ggdat, aes(X,Y)) +
geom_point(aes(color = groupID)) +
theme(legend.position = 'none')
We can see the 3 different groups in the above plot.
Then, I'm finding the mean value of each group and plot that. In the example plot below, I'm also plotting all the points with a low alpha value and the mean point in black.
library(dplyr)
# create mean data frame
dfMean <- ggdat %>%
group_by(groupID) %>%
dplyr::summarize(mX = mean(X), mY = mean(Y))
gg_scatter <- ggplot(dfMean, aes(mX, mY, tooltip = groupID, data_id = groupID)) +
geom_point(data = ggdat, aes(X,Y), alpha = 0.1, color = ggdat$groupID) +
theme(legend.position = 'none') +
geom_point_interactive()
gg_scatter
What I'm hoping to do is when I hover over one of the black points, it changes the alpha value for that group to, say, alpha = 1 and shows all the points for that group.
Naively I just tried:
girafe(ggobj = gg_scatter,
options = list(
opts_hover_inv(css = "opacity:0.5;"),
opts_hover(css = "fill:red;")
) )
but this will just highlight the mean point that I'm hovering over and changes the other mean values points alpha.
Is there a way to hover over the mean value point, which changes the alpha for that particular group?
I am not sure if I answer correctly, but I hope it could help:
In your code, you did not use geom_point_interactive()when plotting the first points, so they can not be interactive.
library(ggplot2)
library(ggiraph)
# create some data
dat1 <- data.frame(X=rnorm(21),
Y=rnorm(21),
groupID=rep(1,21))
dat2 <- data.frame(X=rnorm(21,5),
Y=rnorm(21,5),
groupID=rep(2,21))
dat3 <- data.frame(X=rnorm(21,10),
Y=rnorm(21,10),
groupID=rep(3,21))
ggdat <- rbind(dat1,dat2,dat3)
ggdat$groupID <- as.factor(ggdat$groupID)
library(dplyr)
# create mean data frame
dfMean <- ggdat %>%
group_by(groupID) %>%
dplyr::summarize(mX = mean(X), mY = mean(Y))
gg_scatter <- ggplot(dfMean, aes(mX, mY, tooltip = groupID, data_id = groupID)) +
geom_point_interactive(data = ggdat, aes(X,Y, color = groupID), alpha = 0.9) +
theme(legend.position = 'none') +
geom_point_interactive()
gg_scatter
girafe(ggobj = gg_scatter,
options = list(
opts_hover_inv(css = "opacity:0.1;"),
opts_hover(css = "fill:red;")
) )

How do you compare similarities between variables in an R data frame, based on two categorical variables and one numeric variables

I have a dataframe with three variables of interest: LGA(Location), Offence Category and Total (numeric)
What I am hoping to do, is compare the distance/similarity between each LGA, based on the Total value, in order to create a heat map or similar structure. Is this possible? And if so, what would the process be?
Here is a snippet of the data frame:
I don't really understand your question, but here is an example of a heatmap and a clustered heatmap for 'similar' data:
# Load libraries
library(tidyverse)
library(readxl)
library(httr)
# Find some data
url1 <- "https://www.bocsar.nsw.gov.au/Documents/lga/NewSouthWales.xlsx"
# Get the data and remove missing data points (NA's)
GET(url1, write_disk(tf <- tempfile(fileext = ".xlsx")))
df <- read_excel(path = tf, 2L, skip = 5) %>%
na.omit()
df2 <- df %>%
# format the data to "long format" for plotting
pivot_longer(cols = -c(`Premises type`)) %>%
# Change "Premises type" and "name" to factors
mutate(`Premises type` = factor(
`Premises type`, levels = unique(`Premises type`))
) %>%
mutate(name = factor(
name, levels = unique(name))
) %>%
# Remove the "Total" counts
filter(`Premises type` != "Total")
# Define colours for text (white for dark fill, black for light fill)
hcl <- farver::decode_colour(viridisLite::inferno(length(df2$value)), "rgb", "hcl")
label_col <- ifelse(hcl[, "l"] > 50, "black", "white")
# Plot the data (log scale for fill)
ggplot(df2, aes(y = fct_rev(`Premises type`),
x = name, fill = log(value))) +
geom_tile() +
geom_text(aes(label = value, color = factor(value)),
show.legend = FALSE, size = 2.5) +
theme(axis.text.x = element_text(angle = 45, hjust = 1.05),
axis.title = element_blank()) +
scale_color_manual(values = label_col) +
scale_fill_viridis_c(option = "inferno", na.value = "black")
And a clustered heatmap (similar Premises Type / Crime types cluster together):
# Load the raw data and format for pheatmap (expects a matrix)
dm <- read_excel(path = tf, 2L, skip = 5) %>%
na.omit() %>%
column_to_rownames(var = "Premises type")
# Plot the data
pheatmap::pheatmap(as.matrix(dm), scale = "row")
Edit
I haven't used it before, so I don't know if the output is correct, but based on this SO post you can use cluster::daisy() to get the gower dissimilarity for "Premises Type" then plot using pheatmap, e.g.
library(cluster)
pheatmap::pheatmap(as.matrix(daisy(dm)))
Edit 2
You only need two variables for this heatmap (i.e. "Local government Area" (Character) and "Total" (Numeric) should be fine):
# Load libraries
library(tidyverse)
library(readxl)
library(httr)
library(cluster)
library(pheatmap)
# Find some data
url1 <- "https://www.bocsar.nsw.gov.au/Documents/lga/NewSouthWales.xlsx"
# Get the data and remove missing data points (NA's)
GET(url1, write_disk(tf <- tempfile(fileext = ".xlsx")))
df <- read_excel(path = tf, 2L, skip = 5) %>%
na.omit()
# Select two variables, then set the Premises type as the rownames
df3 <- df %>%
select(`Premises type`, Robbery) %>%
column_to_rownames(var = "Premises type")
# (in your case, use "column_to_rownames(`Local government Area`)"
# Then plot the heatmap
pheatmap(daisy(as.matrix(df3)),
labels_row = rownames(df3),
labels_col = rownames(df3))

color ggplot line based on increasing or decreasing

I am trying to change the color of a line segment based on whether or not it is greater than the point before it. What am I doing wrong here?
Example:
from [1,4] to [2,5] would be green because the y value is increasing.
from [1,4] to [2,1] would be red because the y value is decreasing.
My code:
set.seed(42)
df<-NULL
df$x<-rnorm(10,0,10)
df$y<-rnorm(10,0,10)
df$colors<-cbind(lapply(1:length(df$x),function(i){
ifelse(df$x[i]>df$x[i-1],"#CC6666","#9999CC")
}))
df<-data.frame(df)
ggplot()+
geom_line(aes(x=df$x,y=df$y),size=1.5,colour=df$color)+
scale_color_manual(values=df$color)
Would something like this work for you, I re-arranged your example data a bit but we can use geom_segment() and dplyr::lead() to get the colors matched correctly and a little ggplot hack to make the labels nice again and ditch the NA:
set.seed(42)
df <- data.frame( x = rnorm(10,0,10),
y = rnorm(10,0,10) )
# base R
df <- df[order(df$x),]
df$color <- sapply(1:nrow(df), function(i) df$y[i] > df$y[i+1])
df$group <- "1"
library(tidyverse)
df <- arrange(df, x) %>%
mutate(color = y > lead(y),
group = "1") # group everything togther or else we get two lines
ggplot()+
geom_path(data = df,
aes(x=x, y=y, color = color, group = group),size=1.5) +
scale_color_manual(values = c("#CC6666","#9999CC"), na.value = "white",
labels = c("increase", "decrease", ""))

ggplot2 boxplots - How to avoid extra vertical space when there are no significant comparisons?

After many questions on how to make boxplots with facets and significance levels, particularly this and this, I still have one more little problem.
I managed to produce the plot shown below, which is exactly what I want.
The problem I am facing now is when I have very few, or no significant comparisons; in those cases, the whole space dedicated to the brackets showing the significance levels is still preserved, but I want to get rid of it.
Please check this MWE with the iris dataset:
library(reshape2)
library(ggplot2)
data(iris)
iris$treatment <- rep(c("A","B"), length(iris$Species)/2)
mydf <- melt(iris, measure.vars=names(iris)[1:4])
mydf$treatment <- as.factor(mydf$treatment)
mydf$variable <- factor(mydf$variable, levels=sort(levels(mydf$variable)))
mydf$both <- factor(paste(mydf$treatment, mydf$variable), levels=(unique(paste(mydf$treatment, mydf$variable))))
a <- combn(levels(mydf$both), 2, simplify = FALSE)#this 6 times, for each lipid class
b <- levels(mydf$Species)
CNb <- relist(
paste(unlist(a), rep(b, each=sum(lengths(a)))),
rep.int(a, length(b))
)
CNb
CNb2 <- data.frame(matrix(unlist(CNb), ncol=2, byrow=T))
CNb2
#new p.values
pv.df <- data.frame()
for (gr in unique(mydf$Species)){
for (i in 1:length(a)){
tis <- a[[i]] #variable pair to test
as <- subset(mydf, Species==gr & both %in% tis)
pv <- wilcox.test(value ~ both, data=as)$p.value
ddd <- data.table(as)
asm <- as.data.frame(ddd[, list(value=mean(value)), by=list(both=both)])
asm2 <- dcast(asm, .~both, value.var="value")[,-1]
pf <- data.frame(group1=paste(tis[1], gr), group2=paste(tis[2], gr), mean.group1=asm2[,1], mean.group2=asm2[,2], log.FC.1over2=log2(asm2[,1]/asm2[,2]), p.value=pv)
pv.df <- rbind(pv.df, pf)
}
}
pv.df$p.adjust <- p.adjust(pv.df$p.value, method="BH")
colnames(CNb2) <- colnames(pv.df)[1:2]
# merge with the CN list
pv.final <- merge(CNb2, pv.df, by.x = c("group1", "group2"), by.y = c("group1", "group2"))
# fix ordering
pv.final <- pv.final[match(paste(CNb2$group1, CNb2$group2), paste(pv.final$group1, pv.final$group2)),]
# set signif level
pv.final$map.signif <- ifelse(pv.final$p.adjust > 0.05, "", ifelse(pv.final$p.adjust > 0.01,"*", "**"))
# subset
G <- pv.final$p.adjust <= 0.05
CNb[G]
P <- ggplot(mydf,aes(x=both, y=value)) +
geom_boxplot(aes(fill=Species)) +
facet_grid(~Species, scales="free", space="free_x") +
theme(axis.text.x = element_text(angle=45, hjust=1)) +
geom_signif(test="wilcox.test", comparisons = combn(levels(mydf$both),2, simplify = F),
map_signif_level = F,
vjust=0.5,
textsize=4,
size=0.5,
step_increase = 0.06)
P2 <- ggplot_build(P)
#pv.final$map.signif <- "" #UNCOMMENT THIS LINE TO MOCK A CASE WHERE THERE ARE NO SIGNIFICANT COMPARISONS
#pv.final$map.signif[c(1:42,44:80,82:84)] <- "" #UNCOMMENT THIS LINE TO MOCK A CASE WHERE THERE ARE JUST A COUPLE OF SIGNIFICANT COMPARISONS
P2$data[[2]]$annotation <- rep(pv.final$map.signif, each=3)
# remove non significants
P2$data[[2]] <- P2$data[[2]][P2$data[[2]]$annotation != "",]
# and the final plot
png(filename="test.png", height=800, width=800)
plot(ggplot_gtable(P2))
dev.off()
Which produces this plot:
The plot above is exactly what I want... But I am facing cases where there are no significant comparisons, or very few. In these cases, a lot of vertical space is left empty.
To exemplify those scenarios, we can uncomment the line:
pv.final$map.signif <- "" #UNCOMMENT THIS LINE TO MOCK A CASE WHERE THERE ARE NO SIGNIFICANT COMPARISONS
So when there are no significant comparisons I get this plot:
If we uncomment this other line instead:
pv.final$map.signif[c(1:42,44:80,82:84)] <- "" #UNCOMMENT THIS LINE TO MOCK A CASE WHERE THERE ARE JUST A COUPLE OF SIGNIFICANT COMPARISONS
We are in a case where there are only a couple of significant comparisons, and obtain this plot:
So my question here is:
How to adjust the vertical space to the number of significant comparisons, so no vertical space is left there?
There might be something I could change in step_increase or in y_position inside geom_signif(), so I only leave space for the significant comparisons in CNb[G]...
One option is to pre-calculate the p-values for each combination of both levels and then select only the significant ones for plotting. Since we then know up front how many are significant, we can adjust the y-ranges of the plots to account for that. However, it doesn't look like geom_signif is capable of doing only within-facet calculations for the p-value annotations (see the help for the manual argument). Thus, instead of using ggplot's faceting, we instead use lapply to create a separate plot for each Species and then use grid.arrange from the gridExtra package to lay out the individual plots as if they were faceted.
(To respond to the comments, I want to emphasize that the plots are all still created with ggplot2, but we create what would have been the three facet panels of a single plot as three separate plots and then lay them out together as if they had been faceted.)
The function below is hard-coded for the data frame and column names in the OP, but can of course be generalized to take any data frame and column names.
library(gridExtra)
library(tidyverse)
# Change data to reduce number of statistically significant differences
set.seed(2)
df = mydf %>% mutate(value=rnorm(nrow(mydf)))
# Function to generate and lay out the plots
signif_plot = function(signif.cutoff=0.05, height.factor=0.23) {
# Get full range of y-values
y_rng = range(df$value)
# Generate a list of three plots, one for each Species (these are the facets)
plot_list = lapply(split(df, df$Species), function(d) {
# Get pairs of x-values for current facet
pairs = combn(sort(as.character(unique(d$both))), 2, simplify=FALSE)
# Run wilcox test on every pair
w.tst = pairs %>%
map_df(function(lv) {
p.value = wilcox.test(d$value[d$both==lv[1]], d$value[d$both==lv[2]])$p.value
data.frame(levs=paste(lv, collapse=" "), p.value)
})
# Record number of significant p.values. We'll use this later to adjust the top of the
# y-range of the plots
num_signif = sum(w.tst$p.value <= signif.cutoff)
# Plot significance levels only for combinations with p <= signif.cutoff
p = ggplot(d, aes(x=both, y=value)) +
geom_boxplot() +
facet_grid(~Species, scales="free", space="free_x") +
geom_signif(test="wilcox.test", comparisons = pairs[which(w.tst$p.value <= signif.cutoff)],
map_signif_level = F,
vjust=0,
textsize=3,
size=0.5,
step_increase = 0.08) +
theme_bw() +
theme(axis.title=element_blank(),
axis.text.x = element_text(angle=45, hjust=1))
# Return the plot and the number of significant p-values
return(list(num_signif, p))
})
# Get the highest number of significant p-values across all three "facets"
max_signif = max(sapply(plot_list, function(x) x[[1]]))
# Lay out the three plots as facets (one for each Species), but adjust so that y-range is same
# for each facet. Top of y-range is adjusted using max_signif.
grid.arrange(grobs=lapply(plot_list, function(x) x[[2]] +
scale_y_continuous(limits=c(y_rng[1], y_rng[2] + height.factor*max_signif))),
ncol=3, left="Value")
}
Now run the function with four different significance cutoffs:
signif_plot(0.05)
signif_plot(0.01)
signif_plot(0.9)
signif_plot(0.0015)
You can try. Although the answer is similar to my answer here, I added now a function.
library(tidyverse)
library(ggsignif)
# 1. your data
set.seed(2)
df <- as.tbl(iris) %>%
mutate(treatment=rep(c("A","B"), length(iris$Species)/2)) %>%
gather(key, value, -Species, -treatment) %>%
mutate(value=rnorm(n())) %>%
mutate(key=factor(key, levels=unique(key))) %>%
mutate(both=interaction(treatment, key, sep = " "))
# 2. pairwise.wilcox.test for 1) validation and 2) to calculate the ylim
Wilcox <- df %>%
split(., .$Species) %>%
map(~tidy(pairwise.wilcox.test(.$value, .$both, p.adjust.method = "none"))) %>%
map(~filter(.,.$p.value < 0.05)) %>%
bind_rows(.id="Species") %>%
mutate(padjust=p.adjust(p.value, method = "BH"))
# 3. calculate y range
Ylim <- df %>%
summarise(Min=round(min(value)),
Max=round(max(value))) %>%
mutate(Max=Max+0.5*group_by(Wilcox, Species) %>% count() %>% with(.,max(n)))
%>% c()
# 4. the plot function
foo <- function(df, Ylim, Signif=0.05){
P <- df %>%
ggplot(aes(x=both, y=value)) +
geom_boxplot(aes(fill=Species)) +
facet_grid(~Species) +
ylim(Ylim$Min, Ylim$Max)+
theme(axis.text.x = element_text(angle=45, hjust=1)) +
geom_signif(comparisons = combn(levels(df$both),2,simplify = F),
map_signif_level = F, test = "wilcox.test" ) +
stat_summary(fun.y=mean, geom="point", shape=5, size=4) +
xlab("")
# 5. remove not significant values and add step increase
P_new <- ggplot_build(P)
P_new$data[[2]] <- P_new$data[[2]] %>%
filter(as.numeric(as.character(annotation)) < 0.05) %>%
group_by(PANEL) %>%
mutate(index=(as.numeric(group[drop=T])-1)*0.5) %>%
mutate(y=y+index,
yend=yend+index) %>%
select(-index) %>%
as.data.frame()
# the final plot
plot(ggplot_gtable(P_new))
}
foo(df, Ylim)
trying other data
set.seed(12345)
df <- as.tbl(iris) %>%
mutate(treatment=rep(c("A","B"), length(iris$Species)/2)) %>%
gather(key, value, -Species, -treatment) %>%
mutate(value=rnorm(n())) %>%
mutate(key=factor(key, levels=unique(key))) %>%
mutate(both=interaction(treatment, key, sep = " "))
foo(df, list(Min=-3,Max=5))
Ofcourse you can add the Ylim calculation to the function as well. In addition you can change or add ggtitel(), ylab() and change the color.

Create heatmap with values from matrix in ggplot2

I've seen heatmaps with values made in various R graphics systems including lattice and base like this:
I tend to use ggplot2 a bit and would like to be able to make a heatmap with the corresponding cell values plotted. Here's the heat map and an attempt using geom_text:
library(reshape2, ggplot2)
dat <- matrix(rnorm(100, 3, 1), ncol=10)
names(dat) <- paste("X", 1:10)
dat2 <- melt(dat, id.var = "X1")
p1 <- ggplot(dat2, aes(as.factor(Var1), Var2, group=Var2)) +
geom_tile(aes(fill = value)) +
scale_fill_gradient(low = "white", high = "red")
p1
#attempt
labs <- c(apply(round(dat[, -2], 1), 2, as.character))
p1 + geom_text(aes(label=labs), size=1)
Normally I can figure out the x and y values to pass but I don't know in this case since this info isn't stored in the data set. How can I place the text on the heatmap?
Key is to add a row identifier to the data and shape it "longer".
edit Dec 2022 to make code reproducible with R 4.2.2 / ggplot2 3.4.0 and reflect changes in tidyverse semantics
library(ggplot2)
library(tidyverse)
dat <- matrix(rnorm(100, 3, 1), ncol = 10)
## the matrix needs names
names(dat) <- paste("X", 1:10)
## convert to tibble, add row identifier, and shape "long"
dat2 <-
dat %>%
as_tibble() %>%
rownames_to_column("Var1") %>%
pivot_longer(-Var1, names_to = "Var2", values_to = "value") %>%
mutate(
Var1 = factor(Var1, levels = 1:10),
Var2 = factor(gsub("V", "", Var2), levels = 1:10)
)
#> Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
#> `.name_repair` is omitted as of tibble 2.0.0.
#> ℹ Using compatibility `.name_repair`.
ggplot(dat2, aes(Var1, Var2)) +
geom_tile(aes(fill = value)) +
geom_text(aes(label = round(value, 1))) +
scale_fill_gradient(low = "white", high = "red")
Created on 2022-12-31 with reprex v2.0.2
There is another simpler way to make heatmaps with values. You can use pheatmap to do this.
dat <- matrix(rnorm(100, 3, 1), ncol=10)
names(dat) <- paste("X", 1:10)
install.packages('pheatmap') # if not installed already
library(pheatmap)
pheatmap(dat, display_numbers = T)
This will give you a plot like this
If you want to remove clustering and use your color scheme you can do
pheatmap(dat, display_numbers = T, color = colorRampPalette(c('white','red'))(100), cluster_rows = F, cluster_cols = F, fontsize_number = 15)
You can also change the fontsize, format, and color of the displayed numbers.

Resources