Zipf_plot() : How to compare two objects in one graph? - r

I'm trying to use the Zipf_plot function from the tm package to compare two different document-term-matrices - and I'm not an R expert ..
Maybe you could tell me, if there's a way to fit both in this function?
Zipf_plot(x, type = "l", ... )
I know, there's a possibility to get both (or more) of them in one window:
par(mfrow=c())
but I'd really appreciate a solution with two or more dtms in one graph.
Thanks in advance! :-)

You could try par(new=T) or try to adjust the function according to your needs, e.g.:
library(tm)
data("acq")
data("crude")
m1 <- DocumentTermMatrix(acq)
m2 <- DocumentTermMatrix(crude)
Zipf_plot(m1, col = "red")
par(new=T)
Zipf_plot(m2, col="blue")
Zipf_plot_multi <- function (xx, type = "l", cols = rainbow(length(xx)), ...) {
stopifnot(is.list(xx) & length(xx)==length(cols))
for (idx in seq_along(xx)) {
x <- xx[[idx]]
if (inherits(x, "TermDocumentMatrix"))
x <- t(x)
y <- log(sort(slam::col_sums(x), decreasing = TRUE))
x <- log(seq_along(y))
m <- lm(y ~ x)
dots <- list(...)
if (is.null(dots$xlab))
dots$xlab <- "log(rank)"
if (is.null(dots$ylab))
dots$ylab <- "log(frequency)"
if (idx==1) {
do.call(plot, c(list(x, y, type = type, col = cols[idx]), dots))
} else {
lines(x, y, col = cols[idx])
}
abline(m, col = cols[idx], lty = "dotted")
print(coef(m))
}
}
Zipf_plot_multi(list(m1, m2), xlim=c(0, 7), ylim=c(0,6))

Related

How to increase coef label size in plot.glmnet - cex does not work

Using: the package "glmnet"
Problem: I use the plot function to plot a lasso image, I feel the labels are too small. So, I want to change the cex, but, it's not working. I looked up the documents of "glmnet", the plot function seems like normal plot. Any idea?
My code:
plot(f1, xvar='lambda', label=TRUE, cex=1.5)
Other: I tried like cex.lab=3. It worked but for the x-y axis labels.
What you are actually using is the generic plot which dispatches a method depending on the class of the object. In this case,
class(fit1)
# [1] "elnet" "glmnet"
glmnet:::plot.glmnet will be used, which internally uses glmnet:::plotCoef. And here lies the problem; in glmnet:::plotCoef the respective cex parameter in glmnet:::plotCoef is hard coded to 0.5.―We need a hack:
plot.glmnet <- function(x, xvar=c("norm", "lambda", "dev"),
label=FALSE, ...) {
xvar <- match.arg(xvar)
plotCoef2(x$beta, lambda=x$lambda, df=x$df, dev=x$dev.ratio, ## changed
label=label, xvar=xvar, ...)
}
plotCoef2 <- function(beta, norm, lambda, df, dev, label=FALSE,
xvar=c("norm", "lambda", "dev"), xlab=iname, ylab="Coefficients",
lab.cex=0.5, xadj=0, ...) {
which <- glmnet:::nonzeroCoef(beta)
nwhich <- length(which)
switch(nwhich + 1, `0`={
warning("No plot produced since all coefficients zero")
return()
}, `1`=warning("1 or less nonzero coefficients; glmnet plot is not meaningful"))
beta <- as.matrix(beta[which, , drop=FALSE])
xvar <- match.arg(xvar)
switch(xvar, norm={
index=if (missing(norm)) apply(abs(beta), 2, sum) else norm
iname="L1 Norm"
approx.f=1
}, lambda={
index=log(lambda)
iname="Log Lambda"
approx.f=0
}, dev={
index=dev
iname="Fraction Deviance Explained"
approx.f=1
})
dotlist <- list(...)
type <- dotlist$type
if (is.null(type))
matplot(index, t(beta), lty=1, xlab=xlab, ylab=ylab, type="l", ...)
else matplot(index, t(beta), lty=1, xlab=xlab, ylab=ylab, ...)
atdf <- pretty(index)
prettydf <- approx(x=index, y=df, xout=atdf, rule=2, method="constant", f=approx.f)$y
axis(3, at=atdf, labels=prettydf, tcl=NA)
if (label) {
nnz <- length(which)
xpos <- max(index)
pos <- 4
if (xvar == "lambda") {
xpos <- min(index)
pos <- 2
}
xpos <- rep(xpos + xadj, nnz) ## changed
ypos <- beta[, ncol(beta)]
text(xpos, ypos, paste(which), cex=lab.cex, pos=pos) ## changed
}
}
If we load the two hacked functions, we now can adapt the coefficient labels. New are the parameters lab.cex for size and xadj to adjust the x position of the numbers.
plot(fit1, xvar='lambda', label=TRUE, lab.cex=.8, xadj=.085)
Data:
set.seed(122873)
x <- matrix(rnorm(100 * 10), 100, 10)
y <- rnorm(100)
fit1 <- glmnet(x, y)

R Programming other alternatives for plot

I wonder how you can simplify these two :
plot (payroll,wins)
id = identify(payroll, wins,labels = code, n = 5)
plot (payroll,wins)
with(data, text(payroll, wins, labels = code, pos = 1, cex=0.5))
using other alternatives - pch() dan as.numeric()?
Not sure it's easier but you change pch during identification as below (taken from the R-help). Every time you click empty point change to filled-in dot.
# data simulation
data <- data.frame(payroll = rnorm(10), wins = rnorm(10), code = letters[1:10])
identifyPch <- function(x, y = NULL, n = length(x), plot = FALSE, pch = 19, ...)
{
xy <- xy.coords(x, y)
x <- xy$x
y <- xy$y
sel <- rep(FALSE, length(x))
while (sum(sel) < n) {
ans <- identify(x[!sel], y[!sel], labels = which(!sel), n = 1, plot = plot, ...)
if(!length(ans)) {
break
}
ans <- which(!sel)[ans]
points(x[ans], y[ans], pch = pch)
sel[ans] <- TRUE
}
## return indices of selected points
which(sel)
}
if(dev.interactive()) { ## use it
with(data, plot(payroll,wins))
id = with(data, identifyPch(payroll, wins))
}

label ylab in timeSeries::plot, type = 'o'

How do I label the y-axis, using timeSeries::plot, with Greek letters? i.e. change SB, SP, etc. to \alpha, \beta etc., I'm am aware I need expression(), in some way. However I can't even get to the labels (I normally use ggplot2). Code below.
# install.packages("xtable", dependencies = TRUE)
library("timeSeries")
## Load Swiss Pension Fund Benchmark Data -
LPP <- LPP2005REC[1:12, 1:4]
colnames(LPP) <- abbreviate(colnames(LPP), 2)
finCenter(LPP) <- "GMT"
timeSeries::plot(LPP, type = "o")
It have been pointed out that the object structure, obtained with str(), is quite particular in LPP compared to say this object z
z <- ts(matrix(rnorm(300), 100, 3), start = c(1961, 1), frequency = 12)
plot(z)
If any one has an answer to both or any I would appreciate it. I realize I can convert the data and plot it with ggplot2, I have seen that here on SO, but I am interested in doing in directly on the timeSeries object LPP and the stats (time-series object) z
[ REVISION & Edited ]
When plot.type is "multiple", we can't define ylab directly. Both plot(ts.obj) (S3 method) and plot(timeSeries.obj) (S4 method) take colnames(obj) as ylab, and I don't know any methods of using Greek letters as colname. (The difference in structure mainly comes from the difference of S3 and S4; colnames(timeSeries.obj) is equivalent to timeSeries.obj#units; the defaults is Series i and TS.i).
We can step in ylab using the arugument, panel (It wants a function and the default is lines). It is used in for(i in 1:ncol(data)). I couldn't give panel.function a suitable "i" (I guess it can in some way, but I didn't think up), so I got "i" using which col the data matches.
for timeSeries
ylabs <- expression(alpha, beta, gamma, delta)
row1 <- LPP[1,]
timeSeries.panel.f <- function(x, y, ...) {
lines(x, y, ...)
mtext(ylabs[which(row1 %in% y[1])], 2, line = 3)
}
plot(LPP, panel = timeSeries.panel.f, type = "o", ann = F)
title("Title")
mtext("Time", 1, line = 3)
## If you aren't so concerned about warnings, here is more general.
## (Many functions read `...` and they return warnings).
timeSeries.panel.f2 <- function(x, y, ..., ylabs = ylabs, row1 = row1) {
lines(x, y, ...)
mtext(ylabs[which(row1 %in% y[1])], 2, line = 3)
}
plot(LPP, panel = timeSeries.panel.f2, type = "o", ann = F,
ylabs = expression(alpha, beta, gamma, delta), row1 = LPP[1,])
title("Title")
mtext("Time", 1, line = 3)
for ts
ylabs <- expression(alpha, beta, gamma)
row1 <- z[1,]
ts.panel.f <- function(y, ...) {
lines(y, ...)
mtext(ylabs[which(row1 %in% y[1])], 2, line = 3)
}
plot(z, panel = ts.panel.f, ann = F)
title("Title")
mtext("Time", 1, line = 3)
Of course you can archieve it using new functions made from the original (mostly the same as the original). I showed only the modified points.
modified plot(ts.obj) (made from plot.ts)
my.plot.ts <- function(~~~, my.ylab = NULL) {
:
nm <- my.ylab # before: nm <- colnames(x)
:
}
# use
my.plot.ts(z, my.ylab = expression(alpha, beta, gamma), type = "o")
modified plot(timeSeries.obj)
# made from `.plot.timeSeries`
my.plot.timeSeries <- function(~~~, my.ylab = NULL) {
:
my.plotTimeSeries(~~~, my.ylab = my.ylab)
}
# made from `timeSeries:::.plotTimeSeries`
my.plotTimeSeries <- function(~~~, my.ylab) {
:
nm <- my.ylab # before: nm <- colnames(x)
:
}
#use
my.plot.timeSeries(LPP, my.ylab = expression(alpha, beta, gamma, delta), type="o")

how to create a heatmap with a fixed external hierarchical cluster

I have a matrix data, and want to visualize it with heatmap. The rows are species, so I want visualize the phylogenetic tree aside the rows and reorder the rows of the heatmap according the tree. I know the heatmap function in R can create the hierarchical clustering heatmap, but how can I use my phylogenetic clustering instead of the default created distance clustering in the plot?
First you need to use package ape to read in your data as a phylo object.
library(ape)
dat <- read.tree(file="your/newick/file")
#or
dat <- read.tree(text="((A:4.2,B:4.2):3.1,C:7.3);")
The following only works if your tree is ultrametric.
The next step is to transform your phylogenetic tree into class dendrogram.
Here is an example:
data(bird.orders) #This is already a phylo object
hc <- as.hclust(bird.orders) #Compulsory step as as.dendrogram doesn't have a method for phylo objects.
dend <- as.dendrogram(hc)
plot(dend, horiz=TRUE)
mat <- matrix(rnorm(23*23),nrow=23, dimnames=list(sample(bird.orders$tip, 23), sample(bird.orders$tip, 23))) #Some random data to plot
First we need to order the matrix according to the order in the phylogenetic tree:
ord.mat <- mat[bird.orders$tip,bird.orders$tip]
Then input it to heatmap:
heatmap(ord.mat, Rowv=dend, Colv=dend)
Edit: Here is a function to deal with ultrametric and non-ultrametric trees.
heatmap.phylo <- function(x, Rowp, Colp, ...){
# x numeric matrix
# Rowp: phylogenetic tree (class phylo) to be used in rows
# Colp: phylogenetic tree (class phylo) to be used in columns
# ... additional arguments to be passed to image function
x <- x[Rowp$tip, Colp$tip]
xl <- c(0.5, ncol(x)+0.5)
yl <- c(0.5, nrow(x)+0.5)
layout(matrix(c(0,1,0,2,3,4,0,5,0),nrow=3, byrow=TRUE),
width=c(1,3,1), height=c(1,3,1))
par(mar=rep(0,4))
plot(Colp, direction="downwards", show.tip.label=FALSE,
xlab="",ylab="", xaxs="i", x.lim=xl)
par(mar=rep(0,4))
plot(Rowp, direction="rightwards", show.tip.label=FALSE,
xlab="",ylab="", yaxs="i", y.lim=yl)
par(mar=rep(0,4), xpd=TRUE)
image((1:nrow(x))-0.5, (1:ncol(x))-0.5, x,
xaxs="i", yaxs="i", axes=FALSE, xlab="",ylab="", ...)
par(mar=rep(0,4))
plot(NA, axes=FALSE, ylab="", xlab="", yaxs="i", xlim=c(0,2), ylim=yl)
text(rep(0,nrow(x)),1:nrow(x),Rowp$tip, pos=4)
par(mar=rep(0,4))
plot(NA, axes=FALSE, ylab="", xlab="", xaxs="i", ylim=c(0,2), xlim=xl)
text(1:ncol(x),rep(2,ncol(x)),Colp$tip, srt=90, pos=2)
}
Here is with the previous (ultrametric) example:
heatmap.phylo(mat, bird.orders, bird.orders)
And with a non-ultrametric:
cat("owls(((Strix_aluco:4.2,Asio_otus:4.2):3.1,Athene_noctua:7.3):6.3,Tyto_alba:13.5);",
file = "ex.tre", sep = "\n")
tree.owls <- read.tree("ex.tre")
mat2 <- matrix(rnorm(4*4),nrow=4,
dimnames=list(sample(tree.owls$tip,4),sample(tree.owls$tip,4)))
is.ultrametric(tree.owls)
[1] FALSE
heatmap.phylo(mat2,tree.owls,tree.owls)
First, I create a reproducible example. Without data we can just guess what you want. So please try to do better next time(specially you are confirmed user). For example you can do this to create your tree in newick format:
tree.text='(((XXX:4.2,ZZZ:4.2):3.1,HHH:7.3):6.3,AAA:13.6);'
Like #plannpus, I am using ape to converts this tree to a hclust class. Unfortunatlty, it looks that we can do the conversion only for ultrametric tree: the distance from the root to each tip is the same.
library(ape)
tree <- read.tree(text='(((XXX:4.2,ZZZ:4.2):3.1,HHH:7.3):6.3,AAA:13.6);')
is.ultrametric(tree)
hc <- as.hclust.phylo(tree)
Then I am using dendrogramGrob from latticeExtra to plot my tree. and levelplot from lattice to draw the heatmap.
library(latticeExtra)
dd.col <- as.dendrogram(hc)
col.ord <- order.dendrogram(dd.col)
mat <- matrix(rnorm(4*4),nrow=4)
colnames(mat) <- tree$tip.label
rownames(mat) <- tree$tip.label
levelplot(mat[tree$tip,tree$tip],type=c('g','p'),
aspect = "fill",
colorkey = list(space = "left"),
legend =
list(right =
list(fun = dendrogramGrob,
args =
list(x = dd.col,
side = "right",
size = 10))),
panel=function(...){
panel.fill('black',alpha=0.2)
panel.levelplot.points(...,cex=12,pch=23)
}
)
I adapted plannapus' answer to deal with more than one tree (also cutting out some options I didn't need in the process):
library(ape)
heatmap.phylo <- function(x, Rowp, Colp, breaks, col, denscol="cyan", respect=F, ...){
# x numeric matrix
# Rowp: phylogenetic tree (class phylo) to be used in rows
# Colp: phylogenetic tree (class phylo) to be used in columns
# ... additional arguments to be passed to image function
scale01 <- function(x, low = min(x), high = max(x)) {
x <- (x - low)/(high - low)
x
}
col.tip <- Colp$tip
n.col <- 1
if (is.null(col.tip)) {
n.col <- length(Colp)
col.tip <- unlist(lapply(Colp, function(t) t$tip))
col.lengths <- unlist(lapply(Colp, function(t) length(t$tip)))
col.fraction <- col.lengths / sum(col.lengths)
col.heights <- unlist(lapply(Colp, function(t) max(node.depth.edgelength(t))))
col.max_height <- max(col.heights)
}
row.tip <- Rowp$tip
n.row <- 1
if (is.null(row.tip)) {
n.row <- length(Rowp)
row.tip <- unlist(lapply(Rowp, function(t) t$tip))
row.lengths <- unlist(lapply(Rowp, function(t) length(t$tip)))
row.fraction <- row.lengths / sum(row.lengths)
row.heights <- unlist(lapply(Rowp, function(t) max(node.depth.edgelength(t))))
row.max_height <- max(row.heights)
}
cexRow <- min(1, 0.2 + 1/log10(n.row))
cexCol <- min(1, 0.2 + 1/log10(n.col))
x <- x[row.tip, col.tip]
xl <- c(0.5, ncol(x)+0.5)
yl <- c(0.5, nrow(x)+0.5)
screen_matrix <- matrix( c(
0,1,4,5,
1,4,4,5,
0,1,1,4,
1,4,1,4,
1,4,0,1,
4,5,1,4
) / 5, byrow=T, ncol=4 )
if (respect) {
r <- grconvertX(1, from = "inches", to = "ndc") / grconvertY(1, from = "inches", to = "ndc")
if (r < 1) {
screen_matrix <- screen_matrix * matrix( c(r,r,1,1), nrow=6, ncol=4, byrow=T)
} else {
screen_matrix <- screen_matrix * matrix( c(1,1,1/r,1/r), nrow=6, ncol=4, byrow=T)
}
}
split.screen( screen_matrix )
screen(2)
par(mar=rep(0,4))
if (n.col == 1) {
plot(Colp, direction="downwards", show.tip.label=FALSE,xaxs="i", x.lim=xl)
} else {
screens <- split.screen( as.matrix(data.frame( left=cumsum(col.fraction)-col.fraction, right=cumsum(col.fraction), bottom=0, top=1)))
for (i in 1:n.col) {
screen(screens[i])
plot(Colp[[i]], direction="downwards", show.tip.label=FALSE,xaxs="i", x.lim=c(0.5,0.5+col.lengths[i]), y.lim=-col.max_height+col.heights[i]+c(0,col.max_height))
}
}
screen(3)
par(mar=rep(0,4))
if (n.col == 1) {
plot(Rowp, direction="rightwards", show.tip.label=FALSE,yaxs="i", y.lim=yl)
} else {
screens <- split.screen( as.matrix(data.frame( left=0, right=1, bottom=cumsum(row.fraction)-row.fraction, top=cumsum(row.fraction))) )
for (i in 1:n.col) {
screen(screens[i])
plot(Rowp[[i]], direction="rightwards", show.tip.label=FALSE,yaxs="i", x.lim=c(0,row.max_height), y.lim=c(0.5,0.5+row.lengths[i]))
}
}
screen(4)
par(mar=rep(0,4), xpd=TRUE)
image((1:nrow(x))-0.5, (1:ncol(x))-0.5, x, xaxs="i", yaxs="i", axes=FALSE, xlab="",ylab="", breaks=breaks, col=col, ...)
screen(6)
par(mar=rep(0,4))
plot(NA, axes=FALSE, ylab="", xlab="", yaxs="i", xlim=c(0,2), ylim=yl)
text(rep(0,nrow(x)),1:nrow(x),row.tip, pos=4, cex=cexCol)
screen(5)
par(mar=rep(0,4))
plot(NA, axes=FALSE, ylab="", xlab="", xaxs="i", ylim=c(0,2), xlim=xl)
text(1:ncol(x),rep(2,ncol(x)),col.tip, srt=90, adj=c(1,0.5), cex=cexRow)
screen(1)
par(mar = c(2, 2, 1, 1), cex = 0.75)
symkey <- T
tmpbreaks <- breaks
if (symkey) {
max.raw <- max(abs(c(x, breaks)), na.rm = TRUE)
min.raw <- -max.raw
tmpbreaks[1] <- -max(abs(x), na.rm = TRUE)
tmpbreaks[length(tmpbreaks)] <- max(abs(x), na.rm = TRUE)
} else {
min.raw <- min(x, na.rm = TRUE)
max.raw <- max(x, na.rm = TRUE)
}
z <- seq(min.raw, max.raw, length = length(col))
image(z = matrix(z, ncol = 1), col = col, breaks = tmpbreaks,
xaxt = "n", yaxt = "n")
par(usr = c(0, 1, 0, 1))
lv <- pretty(breaks)
xv <- scale01(as.numeric(lv), min.raw, max.raw)
axis(1, at = xv, labels = lv)
h <- hist(x, plot = FALSE, breaks = breaks)
hx <- scale01(breaks, min.raw, max.raw)
hy <- c(h$counts, h$counts[length(h$counts)])
lines(hx, hy/max(hy) * 0.95, lwd = 1, type = "s",
col = denscol)
axis(2, at = pretty(hy)/max(hy) * 0.95, pretty(hy))
par(cex = 0.5)
mtext(side = 2, "Count", line = 2)
close.screen(all.screens = T)
}
tree <- read.tree(text = "(A:1,B:1);((C:1,D:2):2,E:1);((F:1,G:1,H:2):5,((I:1,J:2):2,K:1):1);", comment.char="")
N <- sum(unlist(lapply(tree, function(t) length(t$tip))))
set.seed(42)
m <- cor(matrix(rnorm(N*N), nrow=N))
rownames(m) <- colnames(m) <- LETTERS[1:N]
heatmap.phylo(m, tree, tree, col=bluered(10), breaks=seq(-1,1,length.out=11), respect=T)
This exact application of a heatmap is already implemented in the plot_heatmap function (based on ggplot2) in the phyloseq package, which is openly/freely developed on GitHub. Examples with complete code and results are included here:
http://joey711.github.io/phyloseq/plot_heatmap-examples
One caveat, and not what you are explicitly asking for here, but phyloseq::plot_heatmap does not overlay a hierarchical tree for either axis. There is a good reason not to base your axis ordering on hierarchical clustering -- and this is because of the way indices at the end of long branches can still be next to each other arbitrarily depending on how branches are rotated at the nodes. This point, and an alternative based on non-metric multidimensional scaling is explained further in an article about the NeatMap package, which is also written for R and uses ggplot2. This dimension-reduction (ordination) approach to ordering the indices in a heatmap is adapted for phylogenetic abundance data in phyloseq::plot_heatmap.
While my suggestion for phlyoseq::plot_heatmap would get you part of the way there, the powerful "ggtree" package can do this, or more, if representing data on trees is really what you are going for.
Some examples are shown on the top of the following ggtree documentation page:
http://www.bioconductor.org/packages/3.7/bioc/vignettes/ggtree/inst/doc/advanceTreeAnnotation.html
Note that I am not affiliated with ggtree dev at all. Just a fan of the project and what it can already do.
After communication with #plannapus, I've modified (just a few) the code to remove some extra xlab="" information on the above code.
Here you will find the code. You can see the commented lines having the extra code and now the new lines just erasing them.
Hope this can help new users like me! :)
heatmap.phylo <- function(x, Rowp, Colp, ...){
# x numeric matrix
# Rowp: phylogenetic tree (class phylo) to be used in rows
# Colp: phylogenetic tree (class phylo) to be used in columns
# ... additional arguments to be passed to image function
x <- x[Rowp$tip, Colp$tip]
xl <- c(0.5, ncol(x) + 0.5)
yl <- c(0.5, nrow(x) + 0.5)
layout(matrix(c(0,1,0,2,3,4,0,5,0),nrow = 3, byrow = TRUE),
width = c(1,3,1), height = c(1,3,1))
par(mar = rep(0,4))
# plot(Colp, direction = "downwards", show.tip.label = FALSE,
# xlab = "", ylab = "", xaxs = "i", x.lim = xl)
plot(Colp, direction = "downwards", show.tip.label = FALSE,
xaxs = "i", x.lim = xl)
par(mar = rep(0,4))
# plot(Rowp, direction = "rightwards", show.tip.label = FALSE,
# xlab = "", ylab = "", yaxs = "i", y.lim = yl)
plot(Rowp, direction = "rightwards", show.tip.label = FALSE,
yaxs = "i", y.lim = yl)
par(mar = rep(0,4), xpd = TRUE)
image((1:nrow(x)) - 0.5, (1:ncol(x)) - 0.5, x,
#xaxs = "i", yaxs = "i", axes = FALSE, xlab = "", ylab = "", ...)
xaxs = "i", yaxs = "i", axes = FALSE, ...)
par(mar = rep(0,4))
plot(NA, axes = FALSE, ylab = "", xlab = "", yaxs = "i", xlim = c(0,2), ylim = yl)
text(rep(0, nrow(x)), 1:nrow(x), Rowp$tip, pos = 4)
par(mar = rep(0,4))
plot(NA, axes = FALSE, ylab = "", xlab = "", xaxs = "i", ylim = c(0,2), xlim = xl)
text(1:ncol(x), rep(2, ncol(x)), Colp$tip, srt = 90, pos = 2)
}

Change Dendrogram leaves

I want to modify the properties of the leaves in a dendrogram produced from plot of an hclust object. Minimally, I want to change the colors, but any help you can provide will be appreciated.
I did try to google the answer, but but every solution that I saw seemed alot harder than what I would have guessed.
A while ago, Joris Meys kindly provided me with this snippet of code that changes the color of leaves. Modify it to reflect your attributes.
clusDendro <- as.dendrogram(Clustering)
labelColors <- c("red", "blue", "darkgreen", "darkgrey", "purple")
## function to get colorlabels
colLab <- function(n) {
if(is.leaf(n)) {
a <- attributes(n)
# clusMember - a vector designating leaf grouping
# labelColors - a vector of colors for the above grouping
labCol <- labelColors[clusMember[which(names(clusMember) == a$label)]]
attr(n, "nodePar") <- c(a$nodePar, lab.col = labCol)
}
n
}
## Graph
clusDendro <- dendrapply(clusDendro, colLab)
op <- par(mar = par("mar") + c(0,0,0,2))
plot(clusDendro,
main = "Major title",
horiz = T, type = "triangle", center = T)
par(op)
Here is a solution for this question using a new package called "dendextend", built exactly for this sort of thing.
You can see many examples in the presentations and vignettes of the package, in the "usage" section in the following URL: https://github.com/talgalili/dendextend
Here is the solution for this question:
# define dendrogram object to play with:
dend <- as.dendrogram(hclust(dist(USArrests[1:3,]), "ave"))
# loading the package
install.packages('dendextend') # it is now on CRAN
library(dendextend)# let's add some color:
labels_colors(dend) <- 2:4
labels_colors(dend)
plot(dend)
It is not clear what you want to use it for, but I often need to identify a branch in a dendrogram. I've hacked the rect.hclust method to add a density and label input.
You would call it like this:
k <- 3 # number of branches to identify
labels.to.identify <- c('1','2','3')
required.density <- 10 # the density of shading lines, in lines per inch
rect.hclust.nice(tree, k, labels=labels.to.identify, density=density.required)
Here is the function
rect.hclust.nice = function (tree, k = NULL, which = NULL, x = NULL, h = NULL, border = 2,
cluster = NULL, density = NULL,labels = NULL, ...)
{
if (length(h) > 1 | length(k) > 1)
stop("'k' and 'h' must be a scalar")
if (!is.null(h)) {
if (!is.null(k))
stop("specify exactly one of 'k' and 'h'")
k <- min(which(rev(tree$height) < h))
k <- max(k, 2)
}
else if (is.null(k))
stop("specify exactly one of 'k' and 'h'")
if (k < 2 | k > length(tree$height))
stop(gettextf("k must be between 2 and %d", length(tree$height)),
domain = NA)
if (is.null(cluster))
cluster <- cutree(tree, k = k)
clustab <- table(cluster)[unique(cluster[tree$order])]
m <- c(0, cumsum(clustab))
if (!is.null(x)) {
if (!is.null(which))
stop("specify exactly one of 'which' and 'x'")
which <- x
for (n in 1L:length(x)) which[n] <- max(which(m < x[n]))
}
else if (is.null(which))
which <- 1L:k
if (any(which > k))
stop(gettextf("all elements of 'which' must be between 1 and %d",
k), domain = NA)
border <- rep(border, length.out = length(which))
labels <- rep(labels, length.out = length(which))
retval <- list()
for (n in 1L:length(which)) {
rect(m[which[n]] + 0.66, par("usr")[3L], m[which[n] +
1] + 0.33, mean(rev(tree$height)[(k - 1):k]), border = border[n], col = border[n], density = density, ...)
text((m[which[n]] + m[which[n] + 1]+1)/2, grconvertY(grconvertY(par("usr")[3L],"user","ndc")+0.02,"ndc","user"),labels[n])
retval[[n]] <- which(cluster == as.integer(names(clustab)[which[n]]))
}
invisible(retval)
}

Resources