R: Making common legend and colour scheme for multiple shapefiles - r

A sample data first:
library(raster)
# download a sample shape file
shape.file <- getData('GADM', country='FRA', level=2)
ID_2 <- rep(shape.file#data$ID_2,times=2)
group.id <- rep(c(100,200),each=96)
set.seed(1)
cat1<- runif(192,min=0,max=100)
set.seed(2)
cat2<- runif(192,min=0,max=100)
set.seed(3)
cat3<- runif(192,min=0,max=100)
dat <- as.data.frame(cbind(ID_2,group.id,cat1,cat2,cat3))
# extract the data for group.id = 100 and group.id = 200 in two seprate df
group.id.100 <- dat[dat$group.id==100,]
group.id.200 <- dat[dat$group.id==200,]
# merge with shape file
merge.shp.100 <- merge(shape.file,group.id.100, by="ID_2")
merge.shp.200 <- merge(shape.file,group.id.200, by="ID_2")
# plot them together
par(mfrow=c(3,2))
plot(merge.shp.100,col=merge.shp$cat1,main="Group.id.100,cat1")
plot(merge.shp.100,col=merge.shp$cat2,main="Group.id.100,cat2")
plot(merge.shp.100,col=merge.shp$cat3,main="Group.id.100,cat3")
plot(merge.shp.200,col=merge.shp$cat1,main="Group.id.200,cat1")
plot(merge.shp.200,col=merge.shp$cat2,main="Group.id.200,cat2")
plot(merge.shp.200,col=merge.shp$cat3,main="Group.id.200,cat3")
I want to insert a common legend and colour scheme for all the 6 figures.
For example, the legend should go from minimum = min(c(cat1,cat2,cat3)) to max(c(cat1,cat2,cat3))
for all the six figures. Similarly, a value of 50 should have the same colour in all the 6 figures.
Thank you

You can use spplot
Example data:
library(raster)
s <- getData('GADM', country='FRA', level=2)[, c('GID_1', 'NAME_1', 'GID_2', 'NAME_2')]
set.seed(1)
m <- matrix(runif(96*6, 0, 100), nrow=96, ncol=6)
vars <- paste0("cat", 1:6)
colnames(m) <- vars
d <- data.frame(GID_2 = s$GID_2, m)
sd <- merge(s, d)
Use spplot:
spplot(sd, vars)
With base plot, you could do something like
brks <- seq(0,100,20)
par(mfrow=c(2,3), mai=c(0,0,0.5,0))
cols = rainbow(length(brks))
for (v in vars) {
cuts <- cut(data.frame(sd)[, v], brks)
plot(s, col=cols[cuts], main=v)
}

Related

Dendrogram plot remove tree labels at end of the branches

Using the example located here https://www.datacamp.com/community/tutorials/hierarchical-clustering-R and the data located https://archive.ics.uci.edu/ml/datasets/seeds# i am trying to remove the labels at the bottom of the dendrogram when using the color_branches
when plot(hclust_avg, labels=FALSE) it works but not later when using color_branches. is there a way to remove them?
`set.seed(786)
seeds_df <- read.csv("seeds_dataset.txt",sep = '\t',header = FALSE)
feature_name <- c('area','perimeter','compactness','length.of.kernel','width.of.kernal','asymmetry.coefficient','length.of.kernel.groove','type.of.seed')
colnames(seeds_df) <- feature_name
seeds_df<- seeds_df[complete.cases(seeds_df), ]
seeds_label <- seeds_df$type.of.seed
seeds_df$type.of.seed <- NULL
seeds_df_sc <- as.data.frame(scale(seeds_df))
dist_mat <- dist(seeds_df_sc, method = 'euclidean')
hclust_avg <- hclust(dist_mat, method = 'average')
cut_avg <- cutree(hclust_avg, k = 3)
suppressPackageStartupMessages(library(dendextend))
avg_dend_obj <- as.dendrogram(hclust_avg)
avg_col_dend <- color_branches(avg_dend_obj, h = 3)
plot(avg_col_dend)`
Figured this out by colouring the the labels white to the background
avg_dend_obj <- as.dendrogram(hclust_avg)
labels_colors(avg_dend_obj) <- "white"
plot(avg_dend_obj)

Plot a re-leveled pairwise distance matrix in ggplot2

Loading libraries and creating a reproducible example
#Load libraries
set.seed(123)
library(tidyr)
library(ggplot2)
#Creating a fake pairwise matrix
locs <- 5
tmp <- matrix(runif(n = locs*locs),nrow = locs,ncol = locs)
tmp[upper.tri(tmp,diag = T)] <- NA
colnames(tmp) <- LETTERS[1:locs]
rownames(tmp) <- LETTERS[1:locs]
tmp
#Converting into a data frame
tmp1 <- as.data.frame(cbind(rownames(tmp),as.data.frame(tmp)))
names(tmp1)[1] <- "locA"
rownames(tmp1) <- NULL
head(tmp1)
#Changing it to long form and getting rid of NAs
tmp1 <- gather(tmp1, key = "locB",value = "value",-locA)
tmp1 <- tmp1[!is.na(tmp1$value),]
tmp1
#Making a tiled plot based on default levels
ggplot(tmp1, aes(x = locA, y=locB, fill=value, label=round(value,3)))+
geom_tile(color="black")+
geom_text(size=5,color="white")
But for reasons that make more biological sense, I want to change the order in which those comparisons are ordered
#biological order
my.order <- c("A","C","D","B","E")
my.order
#re-leveling
tmp1$locA <- factor(tmp1$locA, levels = my.order,ordered = T)
tmp1$locB <- factor(tmp1$locB, levels = my.order,ordered = T)
tmp1
#the releveled plot
ggplot(tmp1, aes(x = locA, y=locB, fill=value, label=round(value,3)))+
geom_tile(color="black")+
geom_text(size=5,color="white")
I am trying to find a way to get the "B-C" & "B-D" comparisons to be represented in the lower diagonal.
I tried to find a solution with a full matrix and lower.tri(), but have failed so far
#here is the full matrix
x <- tmp
x[is.na(x)] <- 0
y <- t(tmp)
y[is.na(y)] <- 0
full.matrix <- x+y
full.matrix
#the function lower.tri might be useful in this context
lower.tri(full.matrix)
Starting from after tmp and full.matrix are created, if you run:
reordered_mat <- full.matrix[match(my.order, rownames(full.matrix)),
match(my.order, colnames(full.matrix))]
lt_reordered_mat <- replace(reordered_mat, !lower.tri(reordered_mat), NA)
tmp1 <- as.data.frame(cbind(rownames(lt_reordered_mat),as.data.frame(lt_reordered_mat)))
And then rerun all your tmp1 creation code and reordering, then you should get you desired result:
Full reproducible code:
#Load libraries
set.seed(123)
library(tidyr)
library(ggplot2)
#Creating a fake pairwise matrix
locs <- 5
tmp <- matrix(runif(n = locs*locs),nrow = locs,ncol = locs)
tmp[upper.tri(tmp,diag = T)] <- NA
colnames(tmp) <- LETTERS[1:locs]
rownames(tmp) <- LETTERS[1:locs]
x <- tmp
x[is.na(x)] <- 0
y <- t(tmp)
y[is.na(y)] <- 0
full.matrix <- x+y
my.order <- c("A","C","D","B","E")
reordered_mat <- full.matrix[match(my.order, rownames(full.matrix)),
match(my.order, colnames(full.matrix))]
lt_reordered_mat <- replace(reordered_mat, !lower.tri(reordered_mat), NA)
tmp1 <- as.data.frame(cbind(rownames(lt_reordered_mat),as.data.frame(lt_reordered_mat)))
names(tmp1)[1] <- "locA"
rownames(tmp1) <- NULL
#Changing it to long form and getting rid of NAs
tmp1 <- gather(tmp1, key = "locB",value = "value",-locA)
tmp1 <- tmp1[!is.na(tmp1$value),]
#re-leveling
tmp1$locA <- factor(tmp1$locA, levels = my.order,ordered = T)
tmp1$locB <- factor(tmp1$locB, levels = my.order,ordered = T)
#the releveled plot
ggplot(tmp1, aes(x = locA, y=locB, fill=value, label=round(value,3)))+
geom_tile(color="black")+
geom_text(size=5,color="white")
As Mike H. was providing his answer, I created a slightly different solution. I think his answer is better because it's more succinct and doesn't use a for loop.
#Load libraries
set.seed(123)
library(tidyr)
library(ggplot2)
#Creating a fake pairwise matrix
locs <- 5
tmp <- matrix(runif(n = locs*locs),nrow = locs,ncol = locs)
tmp[upper.tri(tmp,diag = T)] <- NA
colnames(tmp) <- LETTERS[1:locs]
rownames(tmp) <- LETTERS[1:locs]
tmp
#Converting into a data frame
tmp1 <- as.data.frame(cbind(rownames(tmp),as.data.frame(tmp)))
names(tmp1)[1] <- "locA"
rownames(tmp1) <- NULL
head(tmp1)
#Changing it to long form and getting rid of NAs
tmp1 <- gather(tmp1, key = "locB",value = "value",-locA)
tmp1 <- tmp1[!is.na(tmp1$value),]
tmp1
#Making a tiled plot based on default levels
ggplot(tmp1, aes(x = locA, y=locB, fill=value, label=round(value,3)))+
geom_tile(color="black")+
geom_text(size=5,color="white")
#biological order
my.order <- c("A","C","D","B","E")
my.order
#re-leveling
tmp1$locA <- factor(tmp1$locA, levels = my.order,ordered = T)
tmp1$locB <- factor(tmp1$locB, levels = my.order,ordered = T)
tmp1
#the releveled plot
ggplot(tmp1, aes(x = locA, y=locB, fill=value, label=round(value,3)))+
geom_tile(color="black")+
geom_text(size=5,color="white")
#reordering tmp by my.order and replacing NAs with zero
x <- tmp
x<- x[my.order,my.order]
x[is.na(x)] <- 0
x
#identifying which values switch from the lower matrix to the upper matrix
y <- x
y[y !=0] <- 1
#figuring out which side of the matrix that needs to be switched to switch locA and locB
if(sum(y[lower.tri(y)]) > sum(y[upper.tri(y)])){ y[lower.tri(y)] <- 0 }
if(sum(y[lower.tri(y)]) == sum(y[upper.tri(y)])){ y[lower.tri(y)] <- 0 }
if(sum(y[lower.tri(y)]) < sum(y[upper.tri(y)])){ y[upper.tri(y)] <- 0 }
#Converting t into a long form data frame
fm <- as.data.frame(cbind(rownames(y),as.data.frame(y)))
names(fm)[1] <- "locA"
rownames(fm) <- NULL
fm <- gather(fm, key = "locB",value = "value",-locA)
#identifying which need to be switched and created an identifer to merge with
fm$action <- ifelse(fm$value == 1,"switch","keep")
fm$both <- paste0(fm$locA,fm$locB)
fm
#creating the same identifer in tmp1
tmp1$both <- paste0(tmp1$locA,tmp1$locB)
head(tmp1)
#merging the fm and tmp1 together
tmp2 <- merge(x = fm[,4:5],y = tmp1,by = "both")
tmp2
#using a for loop to make the necessary switches
i <- NULL
for(i in 1:nrow(tmp2)){
if(tmp2$action[i] == "switch"){
A <- as.character(tmp2$locA[i])
B <- as.character(tmp2$locB[i])
tmp2$locA[i] <- B
tmp2$locB[i] <- A
}
}
tmp2
#re-leveling to my order
tmp2$locA <- factor(tmp2$locA, levels = my.order,ordered = T)
tmp2$locB <- factor(tmp2$locB, levels = my.order,ordered = T)
tmp2
#now the graphic
ggplot(tmp2, aes(x = locA, y=locB, fill=value, label=round(value,3)))+
geom_tile(color="black")+
geom_text(size=5,color="white")

Faceting a plotly heatmap

I'd like to be able to facet an R plotly heatmap.
Here's what I mean:
I have a hierarchically-clustered gene expression dataset:
require(permute)
set.seed(1)
mat <- rbind(cbind(matrix(rnorm(2500,2,1),nrow=25,ncol=500),matrix(rnorm(2500,-2,1),nrow=25,ncol=500)),
cbind(matrix(rnorm(2500,-2,1),nrow=25,ncol=500),matrix(rnorm(2500,2,1),nrow=25,ncol=500)))
rownames(mat) <- paste("g",1:50,sep=".")
colnames(mat) <- paste("s",1:1000,sep=".")
hc.col <- hclust(dist(t(mat)))
dd.col <- as.dendrogram(hc.col)
col.order <- order.dendrogram(dd.col)
hc.row <- hclust(dist(mat))
dd.row <- as.dendrogram(hc.row)
row.order <- order.dendrogram(dd.row)
mat <- mat[row.order,col.order]
I then discretize it to specific expression ranges because that happens to help the resolution of colors for my case. I'm also creating other structures to help me plot the colorbar the way I want it to:
require(RColorBrewer)
mat.intervals <- cut(mat,breaks=6)
interval.mat <- matrix(mat.intervals,nrow=50,ncol=1000,dimnames=list(rownames(mat),colnames(mat)))
interval.cols <- brewer.pal(6,"Set2")
names(interval.cols) <- levels(mat.intervals)
require(reshape2)
interval.df <- reshape2::melt(interval.mat,varnames=c("gene","sample"),value.name="expr")
interval.cols2 <- rep(interval.cols, each=ncol(mat))
color.df <- data.frame(range=c(0:(2*length(interval.cols)-1)),colors=c(0:(2*length(interval.cols)-1)))
color.df <- setNames(data.frame(color.df$range,color.df$colors),NULL)
for (i in 1:(2*length(interval.cols))) {
color.df[[2]][[i]] <- interval.cols[[(i + 1) / 2]]
color.df[[1]][[i]] <- i/(2*length(interval.cols))-(i %% 2)/(2*length(interval.cols))
}
They way I generated the data I know that samples 1-500 are one cluster and samples 501:1000 are the other, so I label them:
interval.df$cluster <- NA
interval.df$cluster[which(interval.df$sample %in% paste("s",1:500,sep="."))] <- "A"
interval.df$cluster[which(interval.df$sample %in% paste("s",501:1000,sep="."))] <- "B"
I thought that adding a sample with not color and interval will create a white column in the heatmap plot that will look like a facet border:
divider.df <- data.frame(gene=unique(interval.df$gene),sample=NA,expr=NA,cluster=NA)
interval.df <- rbind(dplyr::filter(interval.df,cluster == "A"),divider.df,dplyr::filter(interval.df,cluster == "B"))
And now I try plotting:
#add ticks for each cluster
tick.vals <- c("s.158","s.617")
tick.text <- c("A","B")
require(plotly)
plot_ly(z=c(interval.df$expr),x=interval.df$sample,y=interval.df$gene,colors=interval.cols2,type="heatmap",colorscale=color.df,
colorbar=list(title="score",tickmode="array",tickvals=c(1:6),ticktext=names(interval.cols),len=0.2,outlinecolor="white",bordercolor="white",borderwidth=5,bgcolor="white")) %>%
layout(xaxis = list(title = 'Cluster',tickmode = 'array',tickvals = tick.vals,ticktext = tick.text))
But I don't see any separation between the clusters:
Any idea how to achieve such a facet border between the two clusters?
Your example is quite involved so I have reduced it down to a minimal example to focus on the gap you are looking for in the quadrants of your heatmap.
Modified from the examples on the plotly site, here.
library(plotly)
m <- matrix(rnorm(9), nrow = 3, ncol = 3)
p <- plot_ly(
x = c("a", "b", "c"), y = c("d", "e", "f"),
z = m, type = "heatmap"
)
subplot(p, p, p, p, shareX = TRUE, shareY = TRUE, nrows = 2)
If you create a plotly object for each of the quadrants and then use subplot, you will get a result looking similar to this:
N.B. I have cropped out the legend because it was duplicated for the facets, you could merge these into one.

R plotly heatmap tick labels being cutoff

Trying to produce a gene-expression heatmap using R's plotly. The gene names are quite long and the dimensions are quite big:
require(permute)
require(plotly)
set.seed(1)
mat <- matrix(shuffle(c(rnorm(5000,2,1),rnorm(5000,-2,1))),nrow=2500,ncol=4)
rownames(mat) <- paste("very_long_gene_name",1:2500,sep=".")
colnames(mat) <- paste("s",1:4,sep=".")
Clustering:
hc.col <- hclust(dist(t(mat)))
dd.col <- as.dendrogram(hc.col)
col.order <- order.dendrogram(dd.col)
hc.row <- hclust(dist(mat))
dd.row <- as.dendrogram(hc.row)
row.order <- order.dendrogram(dd.row)
mat <- mat[row.order,col.order]
Producing the plot and saving the html file:
heatmap.plotly <- plot_ly(x=colnames(mat),y=rownames(mat),z=mat,type="heatmap",colors=colorRamp(c("darkblue","white","darkred")))
htmlwidgets::saveWidget(heatmap.plotly,"heatmap.plotly.html")
The figure I get has the gene names cut off, and I'm not sure it's presenting all the data:
Any idea how to fix these two issues?
Increase your margins
m <- list(
l = 200,
r = 10,
b = 50,
t = 10,
pad = 2
)
heatmap.plotly <- plot_ly(x=colnames(mat),y=rownames(mat),z=mat,type="heatmap",
colors=colorRamp(c("darkblue","white","darkred"))) %>%
layout(margin = m)
heatmap.plotly

Change raster panel titles using levelplot

I'm using RasterVis and levelplot to make a trellis plot of some rasters. I am currently ok for most things but I would like to change the header for each panel from the filename to a chosen string (the filename is convoluted and long, i want to use just a year, for example '2004').
Looking at the levelplot page, it would indicate that levelplot goes looking for some settings as per the argument 'useRaster', either it goes to panel.levelplot or panel.levelplot.raster, but im struggling to use these latter functions.
Any help much appreciated, here's some sample code;
require(rasterVis)
layers <- c(1:4)
s2 <- stack()
for (i in layers) {
r <- raster(nrows=100, ncols=100,ext)
r[] <- sample(seq(from = 1, to = 6, by = 1), size = 10000, replace = TRUE)
rasc <- ratify(r)
rat <- levels(rasc)[[1]]
rat$legend <- c("A","B","C","D","E","F")
levels(rasc) <- rat
s2 <- stack(s2, rasc)
}
levelplot(s2, col.regions=rev(terrain.colors(6)),main = "example")
In the above e.g., I would like "layer.1.1" to be "2004", and so on through to 2007
require(rasterVis)
layers <- c(1:4)
s2 <- stack()
for (i in layers) {
r <- raster(nrows=100, ncols=100)
r[] <- sample(seq(from = 1, to = 6, by = 1), size = 10000, replace = TRUE)
rasc <- ratify(r)
rat <- levels(rasc)[[1]]
rat$legend <- c("A","B","C","D","E","F")
levels(rasc) <- rat
s2 <- stack(s2, rasc)
}
levelplot(s2, col.regions=rev(terrain.colors(6)),main = "example", names.attr=2004:2007)
p.strip <- list(cex=1.5, lines=1, col="blue", fontfamily='Serif')
levelplot(s2, col.regions=rev(terrain.colors(6)), main = "example",
names.attr=2004:2007, par.strip.text=p.strip)

Resources