Circular tree with heatmap - r

This question is quite trivial but I cannot be handled nicely with.
I'm trying to plot a circular tree with a side heatmap.
I'm using ggtree but any approach ggplo2 based is welcome.
The problems that I'm not understanding well the gheatmap function.
I want:
1- names AFTER the heatmap
2- 2 text columns after heatmap (for while may have the same value, but I need to know how to add it )
3- heatmap columns name nicely handled, should we remove the columns name and use different colors scales for each? wherever the solution falls might better than the way it is now
library(tidyverse)
library(ggtree)
library(treeio)
library(tidytree)
beast_file <- system.file("examples/MCC_FluA_H3.tree", package="ggtree")
beast_tree <- read.beast(beast_file)
genotype_file <- system.file("examples/Genotype.txt", package="ggtree")
genotype <- read.table(genotype_file, sep="\t", stringsAsFactor=F)
colnames(genotype) <- sub("\\.$", "", colnames(genotype))
p <- ggtree(beast_tree, mrsd="2013-01-01",layout = "fan", open.angle = -270) +
geom_treescale(x=2008, y=1, offset=2) +
geom_tiplab(size=2)
gheatmap(p, genotype, offset=5, width=0.5, font.size=3,
colnames_angle=-45, hjust=0) +
scale_fill_manual(breaks=c("HuH3N2", "pdm", "trig"),
values=c("steelblue", "firebrick", "darkgreen"), name="genotype")
Thanks in advance
UPDATE:
I found a better way to plot the name of heatmap columns.
Also, I found that the simplification of the data was useful to
clean up a little the tip labels.
Now, I just need to add two text columns after heatmap.
p <- ggtree(beast_tree)
gheatmap(
p, genotype, colnames=TRUE,
colnames_angle=90,
colnames_offset_y = 5,
colnames_position = "top",
) +
scale_fill_manual(breaks=c("HuH3N2", "pdm", "trig"),
values=c("steelblue", "firebrick", "darkgreen"), name="genotype")
UPDATE 2:
A very bad improvement
I just used ggplot to create the label and merge with patchwork
library(patchwork)
p$data %>%
ggplot(aes(1, y= y, label = label )) +
geom_text(size=2) +
xlim(NA, 1) +
theme_classic() +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()) -> adText
pp + adText

The answer according #xiangpin at GitHub.
Big offset value to geom_tiplabel:
p <- ggtree(beast_tree)
p1 <- gheatmap(
p, genotype, colnames=TRUE,
colnames_angle=-45,
colnames_offset_y = 5,
colnames_position = "bottom",
width=0.3,
hjust=0, font.size=2) +
scale_fill_manual(breaks=c("HuH3N2", "pdm", "trig"),
values=c("steelblue", "firebrick", "darkgreen"), name="genotype") +
geom_tiplab(align = TRUE, linesize=0, offset = 7, size=2) +
xlim_tree(xlim=c(0, 36)) +
scale_y_continuous(limits = c(-1, NA))
p1
Using ggtreeExtra:
library(ggtreeExtra)
library(ggtree)
library(treeio)
library(ggplot2)
beast_file <- system.file("examples/MCC_FluA_H3.tree", package="ggtree")
genotype_file <- system.file("examples/Genotype.txt", package="ggtree")
tree <- read.beast(beast_file)
genotype <- read.table(genotype_file, sep="\t")
colnames(genotype) <- sub("\\.$", "", colnames(genotype))
genotype$ID <- row.names(genotype)
dat <- reshape2::melt(genotype, id.vars="ID", variable.name = "type", value.name="genotype", factorsAsStrings=FALSE)
dat$genotype <- unlist(lapply(as.vector(dat$genotype),function(x)ifelse(nchar(x)==0,NA,x)))
p <- ggtree(tree) + geom_treescale()
p2 <- p + geom_fruit(data=dat,
geom=geom_tile,
mapping=aes(y=ID, x=type, fill=genotype),
color="white") +
scale_fill_manual(values=c("steelblue", "firebrick", "darkgreen"),
na.translate=FALSE) +
geom_axis_text(angle=-45, hjust=0, size=1.5) +
geom_tiplab(align = TRUE, linesize=0, offset = 6, size=2) +
xlim_tree(xlim=c(0, 36)) +
scale_y_continuous(limits = c(-1, NA))
p2

Related

How to hide (or remove) dots in the boxplot graph?

I have a question about how to hide(or remove) dots in the boxplot graph.
This is code what I implemented.
install.packages("randomForestSRC")
install.packages("ggRandomForests")
library(randomForestSRC)
library(ggRandomForests)
data(pbc, package="randomForestSRC")
pbc.na <- na.omit(pbc)
set.seed(123)
rsf <- rfsrc(Surv(days,status)~., data=pbc.na, ntree=500, importance=T)
gg_v <- gg_variable(rsf, time = c(2000, 4000),
time.labels = c("2000 days", "4000 days"))
gg_v$stage <- as.factor(gg_v$stage)
plot(gg_v, xvar="stage", panel=T, points=F)+
ggplot2::theme_bw() +
ggplot2::geom_boxplot(outlier.shape=NA)+
ggplot2::labs(y="Survival (%)")+
ggplot2::coord_cartesian(ylim=c(-.01, 1.02))
So I would like to hide(or remove) all of the event's dots (both of False and True).
However, I have no information about what I want.
Please let me know how to do it.
Thanks always.
I am not familiar how ggRandomForests work. But using the data frame gg_v, we can directly do the plotting in ggplot2.
ggplot(gg_v, aes(stage, yhat, group = stage)) +
geom_boxplot(outlier.shape = NA) +
facet_wrap(~time, nrow = 2, strip.position = "right") +
ylab("Survival (%)") +
theme_bw()
You can also use the function "geom_boxplot2" from github ("Ipaper")
# devtools::install_github('kongdd/Ipaper')
library(Ipaper)
library(ggplot2)
ggplot(gg_v, aes(stage, yhat, group = stage)) +
geom_boxplot2(width = 0.8, width.errorbar = 0.5)+
facet_wrap(~time, nrow = 2, strip.position = "right") +
ylab("Survival (%)") +
theme_bw()

Overlaying points and controlling size with ggplot2

I want to plot some point estimates with a couple of interval estimates around them, and then to superimpose the true point values using a different color and size, with a legend for the color.
I've tried lots of things. If I just use a new call to geom_point, I can't figure out how to add a legend. Therefore, my current approach resorts to stacking the data on top of itself, which is clumsy. Even then, the graph comes out wrong with big blue points for the True values, with the desired orange points on top of them.
I'd appreciate any help I can get.
nms <- c("2.5%","25%","50%","75%","97.5%","dose","truep")
a <- c(9.00614679684893e- 44,0.000123271800672435,0.0339603711049475,0.187721170170911,0.67452033450121,5,0.040752445325937)
b <- c(1.59502878028266e-25,0.00328588588499889,0.0738203422543555,0.25210200886225,0.714843425007051,10,0.0885844107052267)
cc <- c(1.41975723605948e-14,0.0184599181547097,0.118284929584256,0.311068595276067,0.74339745948793,15,0.141941915501108)
d <- c(0.0311851190805834,0.154722028150561,0.299318020818234,0.50887634580605,0.838779816278485,25,0.359181624981881)
e <- c(0.0529617924263383,0.289588386297245,0.566777817134668,0.883959271416755,0.999999999999317,40,0.680133380561602)
f <- c(0.0598904847882839,0.327655201251564,0.640100529843672,0.950060245074853,1,50,0.768120635812406)
g <- c(0.0641613025760661,0.355626055560067,0.686504841650593,0.978023943968809,1,60,0.823805809980712)
p <- as.data.frame(t(data.frame(a, b, cc, d, e, f, g)))
names(p) <- nms
# Faff duplicating data
p$truep <- 1.2 * p$truep
p2 <- p
p2[, 1:5] <- p$truep # truep is known, so there are no intervals
p3 <- rbind(p2, p)
p3$wh <- rep((c(2, 3)), each=nrow(p))
p3$col <- rep(c("orange", "blue"), each=nrow(p))
ggplot(p3, aes(dose, `50%`)) +
geom_point(aes(size=wh, color=col)) +
scale_size(range=c(5, 7), guide="none") +
scale_color_manual(name="", labels=c("Prior", "True"), values=c("blue", "orange")) +
geom_pointrange(aes(ymin=`2.5%`, ymax=`97.5%`, x=dose), color="blue") +
geom_pointrange(aes(ymin=`25%`, ymax=`75%`, x=dose), color="blue", size=2) +
geom_point(aes(dose, truep), color="orange") +
theme(axis.text.x=element_text(size=12), axis.title.x=element_text(size=14),
axis.text.y=element_text(size=12), axis.title.y=element_text(size=14),
legend.text=element_text(size=12))
R 3.3.1, ggplot2_2.1.1
Thanks,
Harry
I found a solution by splitting the dataset in two parts:
library(dplyr)
priors <- p%>%
mutate(datatype = 'Prior')
truevals <- p%>%
select(dose, truep)%>%
mutate(datatype = 'True')
ggplot(truevals, aes(x = dose, y = truep, colour = datatype))+
geom_pointrange(data = priors, aes(ymin=`25%`, ymax=`75%`, y = `50%`), size=1.5) +
geom_pointrange(data = priors, aes(ymin=`2.5%`, ymax=`97.5%`, y = `50%`))+
geom_point()+
scale_color_manual(name="", values=c("Prior" = "blue", "True" = "orange")) +
theme(axis.text.x=element_text(size=12), axis.title.x=element_text(size=14),
axis.text.y=element_text(size=12), axis.title.y=element_text(size=14),
legend.text=element_text(size=12))
First we plot the two pointranges based on the dataset with priors. Then the actual values. By adding a row with the datatype to both datasets we can add the legend. The result is this graph:
For the method ggplot2::geom_point() there is a show.legend attribute which is NA by default so setting this to TRUE should help.
You can add a legend using the labels attribute as follows:
ggplot2::scale_fill_manual(values = c("red", "black",
labels = c("Number of people",
"Number of birds"))
You are already doing this with labels=c("Prior", "True")
You can also change the look of the legend with:
ggplot2::theme(legend.position = "bottom",
legend.text = ggplot2::element_text(size = 22),
legend.box = "horizontal",
legend.key = ggplot2::element_blank())

Moving x or y axis together with tick labels to the middle of a single ggplot (no facets)

I made the following plot in Excel:
But then I thought I would make it prettier by using ggplot. I got this far:
If you're curious, the data is based on my answer here, although it doesn't really matter. The plot is a standard ggplot2 construct with some prettification, and the thick line for the x-axis through the middle is achieved with p + geom_hline(aes(yintercept=0)) (p is the ggplot object).
I feel that the axis configuration in the Excel plot is better. It emphasizes the 0 line (important when the data is money) and finding intercepts is much easier since you don't have to follow lines from all the way at the bottom. This is also how people draw axes when plotting on paper or boards.
Can the axis be moved like this in ggplot as well? I want not just the line, but the tick labels as well moved. If yes, how? If no, is the reason technical or by design? If by design, why was the decision made?
try this,
shift_axis <- function(p, y=0){
g <- ggplotGrob(p)
dummy <- data.frame(y=y)
ax <- g[["grobs"]][g$layout$name == "axis-b"][[1]]
p + annotation_custom(grid::grobTree(ax, vp = grid::viewport(y=1, height=sum(ax$height))),
ymax=y, ymin=y) +
geom_hline(aes(yintercept=y), data = dummy) +
theme(axis.text.x = element_blank(),
axis.ticks.x=element_blank())
}
p <- qplot(1:10, 1:10) + theme_bw()
shift_axis(p, 5)
I tried to change the theme's axis.text.x,but only can change hjust.
So I think you can delete axis.text.x,then use geom_text() to add.
For example:
test <- data.frame(x=seq(1,5), y=seq(-1,3))
ggplot(data=test, aes(x,y)) +
geom_line() +
theme(axis.text.x=element_blank(), axis.ticks.x=element_blank()) +
geom_text(data=data.frame(x=seq(1,5), y=rep(0,5)), label=seq(1,5), vjust=1.5)
Maybe these codes are useful.
just to complete baptiste's excellent answer with the equivalent for moving the y axis:
shift_axis_x <- function(p, x=0){
g <- ggplotGrob(p)
dummy <- data.frame(x=x)
ax <- g[["grobs"]][g$layout$name == "axis-l"][[1]]
p + annotation_custom(grid::grobTree(ax, vp = grid::viewport(x=1, width = sum(ax$height))),
xmax=x, xmin=x) +
geom_vline(aes(xintercept=x), data = dummy) +
theme(axis.text.y = element_blank(),
axis.ticks.y=element_blank())
}
As alistaire commented it can be done using geom_hline and geom_text as shown below.
df <- data.frame(YearMonth = c(200606,200606,200608,200701,200703,200605),
person1 = c('Alice','Bob','Alice','Alice','Bob','Alice'),
person2 = c('Bob','Alice','Bob','Bob','Alice','Bob'),
Event = c('event1','event2','event3','event3','event2','event4')
)
df$YM <- as.Date(paste0("01",df$YearMonth), format="%d%Y%m")
rangeYM <- range(df$YM)
ggplot()+geom_blank(aes(x= rangeYM, y = c(-1,1))) + labs(x = "", y = "") +
theme(axis.ticks = element_blank()) +
geom_hline(yintercept = 0, col = 'maroon') +
scale_x_date(date_labels = '%b-%y', date_breaks = "month", minor_breaks = NULL) +
scale_y_continuous(minor_breaks = NULL) +
geom_text(aes(x = df$YM, y = 0, label = paste(format(df$YM, "%b-%y")), vjust = 1.5), colour = "#5B7FA3", size = 3.5, fontface = "bold")

Ordering ggplot legend by the final value in a data frame

I would like to re-order the elements in a legend, as they appear top to bottom in an R ggplot. That is: I'd like the order dictated by comparing the Y value at the right most point X axis point. In the following data, I'd like the legend to read from the top: bush, foo, baz, bar.
Update: following #alexwhan comments, I have added the data to the script.
Update 2: this is now exactly what I was hoping for, thanks to #thomas-kern on #R (bosie) irc.freenode. The trick was to add both, i.e.
scale_linetype_discrete(breaks = ord$Variant) + scale_shape_discrete(breaks = ord$Variant)
Here's my R:
library(plyr)
library(ggplot2)
require(grid)
args <- commandArgs(trailingOnly = TRUE)
lines <- "
X,Variant,Y
1,foo,123
1,bar,134
1,baz,135
1,bush,136
2,foo,221
2,bar,104
2,baz,155
2,bush,336
"
con <- textConnection(lines)
DF <- read.csv(con, header=TRUE)
close(con)
cdata <- ddply(DF, .(Variant,X), summarise, N = length(Y), mean=round(mean(Y),2), sd=round(sd(Y),2), se=round(sd(Y)/sqrt(length(Y)),2))
ord <- cdata[cdata$X == max(cdata$X),]
ord <- ord[order(ord$Variant, decreasing=T),]
pdf("out.pdf")
none <- element_blank()
bp <- ggplot(cdata, aes(x=X, y=mean, group=Variant)) + xlab("X label") + geom_line(aes(linetype=Variant)) + geom_point(aes(shape=Variant)) + ylab("Y Value") + labs(title = "mytitle") + scale_linetype_discrete(breaks = ord$Variant) + scale_shape_discrete(breaks = ord$Variant)
print(bp + theme(legend.justification=c(1,0), legend.position=c(1,0), legend.key.width=unit(3,"line"), legend.title=element_blank(), text = element_text(size=18)) + theme(panel.background = element_rect(fill='white', colour='black')) + theme(panel.grid.major = none, panel.grid.minor = none))
dev.off()
This generates exactly what I'm after:
It really helps if you provide the data your plot is made with. Here's an example of how to approach with some data I made up:
dat <- data.frame(x = c(1,2), y = rnorm(8), group = rep(c("bar", "baz", "bush", "foo"), each = 2))
ord <- dat[dat$x == max(dat$x),]
ord <- ord[order(ord$y, decreasing=T),]
ggplot(dat, aes(x, y)) + geom_point(aes(shape = group)) + geom_line(aes(group = group)) +
scale_shape_discrete(breaks = ord$group)

Different font faces and sizes within label text entries in ggplot2

I am building charts that have two lines in the axis text. The first line contains the group name, the second line contains that group population. I build my axis labels as a single character string with the format "LINE1 \n LINE2". Is it possible to assign different font faces and sizes to LINE1 and LINE2, even though they are contained within a single character string? I would like LINE1 to be large and bolded, and LINE2 to be small and unbolded.
Here's some sample code:
Treatment <- rep(c('T','C'),each=2)
Gender <- rep(c('Male','Female'),2)
Response <- sample(1:100,4)
test_df <- data.frame(Treatment, Gender, Response)
xbreaks <- levels(test_df$Gender)
xlabels <- paste(xbreaks,'\n',c('POP1','POP2'))
hist <- ggplot(test_df, aes(x=Gender, y=Response, fill=Treatment, stat="identity"))
hist + geom_bar(position = "dodge") + scale_y_continuous(limits = c(0,
100), name = "") + scale_x_discrete(labels=xlabels, breaks = xbreaks) +
opts(
axis.text.x = theme_text(face='bold',size=12)
)
I tried this, but the result was one large, bolded entry, and one small, unbolded entry:
hist + geom_bar(position = "dodge") + scale_y_continuous(limits = c(0,
100), name = "") + scale_x_discrete(labels=xlabels, breaks = xbreaks) +
opts(
axis.text.x = theme_text(face=c('bold','plain'),size=c('15','10'))
)
Another possible solution is to create separate chart elements, but I don't think that ggplot2 has a 'sub-axis label' element available...
Any help would be very much appreciated.
Cheers,
Aaron
I also think that I could not to make the graph by only using ggplot2 features.
I would use grid.text and grid.gedit.
require(ggplot2)
Treatment <- rep(c('T','C'), each=2)
Gender <- rep(c('Male','Female'), 2)
Response <- sample(1:100, 4)
test_df <- data.frame(Treatment, Gender, Response)
xbreaks <- levels(test_df$Gender)
xlabels <- paste(xbreaks,'\n',c('',''))
hist <- ggplot(test_df, aes(x=Gender, y=Response, fill=Treatment,
stat="identity"))
hist + geom_bar(position = "dodge") +
scale_y_continuous(limits = c(0, 100), name = "") +
scale_x_discrete(labels=xlabels, breaks = xbreaks) +
opts(axis.text.x = theme_text(face='bold', size=12))
grid.text(label="POP1", x = 0.29, y = 0.06)
grid.text(label="POP2", x = 0.645, y = 0.06)
grid.gedit("GRID.text", gp=gpar(fontsize=8))
Please try to tune a code upon according to your environment (e.g. the position of sub-axis labels and the fontsize).
I found another simple solution below:
require(ggplot2)
Treatment <- rep(c('T','C'),each=2)
Gender <- rep(c('Male','Female'),2)
Response <- sample(1:100,4)
test_df <- data.frame(Treatment, Gender, Response)
xbreaks <- levels(test_df$Gender)
xlabels[1] <- expression(atop(bold(Female), scriptstyle("POP1")))
xlabels[2] <- expression(atop(bold(Male), scriptstyle("POP2")))
hist <- ggplot(test_df, aes(x=Gender, y=Response, fill=Treatment,
stat="identity"))
hist +
geom_bar(position = "dodge") +
scale_y_continuous(limits = c(0, 100), name = "") +
scale_x_discrete(label = xlabels, breaks = xbreaks) +
opts(
axis.text.x = theme_text(size = 12)
)
All,
Using Triad's cheat this is the closest I was able to get to solution on this one. Let me know if you have any questions:
library(ggplot2)
spacing <- 0 #We can adjust how much blank space we have beneath the chart here
labels1= paste('Group',c('A','B','C','D'))
labels2 = rep(paste(rep('\n',spacing),collapse=''),length(labels1))
labels <- paste(labels1,labels2)
qplot(1:4,1:4, geom="blank") +
scale_x_continuous(breaks=1:length(labels), labels=labels) + xlab("")+
opts(plot.margin = unit(c(1, 1, 3, 0.5), "lines"),
axis.text.x = theme_text(face='bold', size=14))
xseq <- seq(0.15,0.9,length.out=length(labels)) #Assume for now that 0.15 and 0.9 are constant plot boundaries
sample_df <- data.frame(group=rep(labels1,each=2),subgroup=rep(c('a','b'),4),pop=sample(1:10,8))
popLabs <- by(sample_df,sample_df$group,function(subData){
paste(paste(subData$subgroup,' [n = ', subData$pop,']',sep=''),collapse='\n')
})
gridText <- paste("grid.text(label='\n",popLabs,"',x=",xseq,',y=0.1)',sep='')
sapply(gridText, function(x){ #Evaluate parsed character string for each element of gridText
eval(parse(text=x))
})
grid.gedit("GRID.text", gp=gpar(fontsize=12))
Cheers,
Aaron

Resources