Related
This question is quite trivial but I cannot be handled nicely with.
I'm trying to plot a circular tree with a side heatmap.
I'm using ggtree but any approach ggplo2 based is welcome.
The problems that I'm not understanding well the gheatmap function.
I want:
1- names AFTER the heatmap
2- 2 text columns after heatmap (for while may have the same value, but I need to know how to add it )
3- heatmap columns name nicely handled, should we remove the columns name and use different colors scales for each? wherever the solution falls might better than the way it is now
library(tidyverse)
library(ggtree)
library(treeio)
library(tidytree)
beast_file <- system.file("examples/MCC_FluA_H3.tree", package="ggtree")
beast_tree <- read.beast(beast_file)
genotype_file <- system.file("examples/Genotype.txt", package="ggtree")
genotype <- read.table(genotype_file, sep="\t", stringsAsFactor=F)
colnames(genotype) <- sub("\\.$", "", colnames(genotype))
p <- ggtree(beast_tree, mrsd="2013-01-01",layout = "fan", open.angle = -270) +
geom_treescale(x=2008, y=1, offset=2) +
geom_tiplab(size=2)
gheatmap(p, genotype, offset=5, width=0.5, font.size=3,
colnames_angle=-45, hjust=0) +
scale_fill_manual(breaks=c("HuH3N2", "pdm", "trig"),
values=c("steelblue", "firebrick", "darkgreen"), name="genotype")
Thanks in advance
UPDATE:
I found a better way to plot the name of heatmap columns.
Also, I found that the simplification of the data was useful to
clean up a little the tip labels.
Now, I just need to add two text columns after heatmap.
p <- ggtree(beast_tree)
gheatmap(
p, genotype, colnames=TRUE,
colnames_angle=90,
colnames_offset_y = 5,
colnames_position = "top",
) +
scale_fill_manual(breaks=c("HuH3N2", "pdm", "trig"),
values=c("steelblue", "firebrick", "darkgreen"), name="genotype")
UPDATE 2:
A very bad improvement
I just used ggplot to create the label and merge with patchwork
library(patchwork)
p$data %>%
ggplot(aes(1, y= y, label = label )) +
geom_text(size=2) +
xlim(NA, 1) +
theme_classic() +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()) -> adText
pp + adText
The answer according #xiangpin at GitHub.
Big offset value to geom_tiplabel:
p <- ggtree(beast_tree)
p1 <- gheatmap(
p, genotype, colnames=TRUE,
colnames_angle=-45,
colnames_offset_y = 5,
colnames_position = "bottom",
width=0.3,
hjust=0, font.size=2) +
scale_fill_manual(breaks=c("HuH3N2", "pdm", "trig"),
values=c("steelblue", "firebrick", "darkgreen"), name="genotype") +
geom_tiplab(align = TRUE, linesize=0, offset = 7, size=2) +
xlim_tree(xlim=c(0, 36)) +
scale_y_continuous(limits = c(-1, NA))
p1
Using ggtreeExtra:
library(ggtreeExtra)
library(ggtree)
library(treeio)
library(ggplot2)
beast_file <- system.file("examples/MCC_FluA_H3.tree", package="ggtree")
genotype_file <- system.file("examples/Genotype.txt", package="ggtree")
tree <- read.beast(beast_file)
genotype <- read.table(genotype_file, sep="\t")
colnames(genotype) <- sub("\\.$", "", colnames(genotype))
genotype$ID <- row.names(genotype)
dat <- reshape2::melt(genotype, id.vars="ID", variable.name = "type", value.name="genotype", factorsAsStrings=FALSE)
dat$genotype <- unlist(lapply(as.vector(dat$genotype),function(x)ifelse(nchar(x)==0,NA,x)))
p <- ggtree(tree) + geom_treescale()
p2 <- p + geom_fruit(data=dat,
geom=geom_tile,
mapping=aes(y=ID, x=type, fill=genotype),
color="white") +
scale_fill_manual(values=c("steelblue", "firebrick", "darkgreen"),
na.translate=FALSE) +
geom_axis_text(angle=-45, hjust=0, size=1.5) +
geom_tiplab(align = TRUE, linesize=0, offset = 6, size=2) +
xlim_tree(xlim=c(0, 36)) +
scale_y_continuous(limits = c(-1, NA))
p2
I am trying to show a legend to accompany a plot created using ggplot and geom_col - my legend isn't showing up. I originally plotted these data using geom_bar (with a visible legend but some other problems), but after more research it seems more appropriate to use geom_col.
I know this topic has a ton of google-able questions and answers, but after many searches and code variations I still have no success.
How can I make my legend visible?
My reproducible code is below.
site <- c(0.700, 0.854)
site <- lapply(site, function(x) round((x*100),1))
site <- unlist(site, use.names=FALSE)
state <- c(0.726, 0.808)
state <- lapply(state, function(x) round((x*100),1))
state <- unlist(state, use.names=FALSE)
measure <- c("Individual", "Coalition")
measure <- unlist(measure, use.names=FALSE)
df1 <- data.frame(site,state,measure)
df2 <- melt(df1, id.vars='measure')
df2$variable <- factor(df2$variable,
levels = c('state','site'),ordered = TRUE)
fillcolors <- c("#7189A6", "#817BB0", "#7189A6", "#817BB0")
myplot <-
ggplot(df2, aes(measure, value, group = variable)) +
geom_col(width=.75, position=position_dodge(width=0.80), fill=fillcolors) +
labs(title = paste0("Knowledge & Skills Gained", paste0(rep("", 0), collapse = " ")),
x = "", y = "", fill="") +
scale_y_continuous(limits=c(0,100), labels = function(x){ paste0(x, "%") }) +
coord_flip() +
geom_text(aes(label=(paste0(value,"%"))), size=3,
colour = "#57585a",
position=position_dodge(width=0.75), vjust=.3, hjust=-.1) +
theme(legend.position="right", title = element_text(colour = "#57585a"),
legend.text = element_text(colour="#57585a", size = 9))
myplot
This results in the following plot: myplot
Thank you in advance!
The main issue is that you should use fill=, not group=:
ggplot(df2, aes(measure, value, fill = variable)) +
geom_col(width=.75, position=position_dodge(width=0.80)) +
labs(title = paste0("Knowledge & Skills Gained", paste0(rep("", 0), collapse = " ")),
x = "", y = "", fill="") +
scale_y_continuous(limits=c(0,100), labels = function(x){ paste0(x, "%") }) +
coord_flip() +
geom_text(aes(label=(paste0(value,"%"))), size=3,
colour = "#57585a",
position=position_dodge(width=0.75), vjust=.3, hjust=-.1) +
scale_fill_manual(values = c("#817BB0", "#7189A6")) +
guides(fill = guide_legend(reverse=T))
(There are a lot of other ways in which your code could be cleaned up; I focused only on getting the legend to appear.)
I'm trying to produce a scatter plot with geom_point where the points are circumscribed by a smoothed polygon, with geom_polygon.
Here's my point data:
set.seed(1)
df <- data.frame(x=c(rnorm(30,-0.1,0.1),rnorm(30,0,0.1),rnorm(30,0.1,0.1)),y=c(rnorm(30,-1,0.1),rnorm(30,0,0.1),rnorm(30,1,0.1)),val=rnorm(90),cluster=c(rep(1,30),rep(2,30),rep(3,30)),stringsAsFactors=F)
I color each point according the an interval that df$val is in. Here's the interval data:
intervals.df <- data.frame(interval=c("(-3,-2]","(-2,-0.999]","(-0.999,0]","(0,1.96]","(1.96,3.91]","(3.91,5.87]","not expressed"),
start=c(-3,-2,-0.999,0,1.96,3.91,NA),end=c(-2,-0.999,0,1.96,3.91,5.87,NA),
col=c("#2f3b61","#436CE8","#E0E0FF","#7d4343","#C74747","#EBCCD6","#D3D3D3"),stringsAsFactors=F)
Assigning colors and intervals to the points:
df <- cbind(df,do.call(rbind,lapply(df$val,function(x){
if(is.na(x)){
return(data.frame(col=intervals.df$col[nrow(intervals.df)],interval=intervals.df$interval[nrow(intervals.df)],stringsAsFactors=F))
} else{
idx <- which(intervals.df$start <= x & intervals.df$end >= x)
return(data.frame(col=intervals.df$col[idx],interval=intervals.df$interval[idx],stringsAsFactors=F))
}
})))
Preparing the colors for the leged which will show each interval:
df$interval <- factor(df$interval,levels=intervals.df$interval)
colors <- intervals.df$col
names(colors) <- intervals.df$interval
Here's where I constructed the smoothed polygons (using a function courtesy of this link):
clusters <- sort(unique(df$cluster))
cluster.cols <- c("#ff00ff","#088163","#ccbfa5")
splinePolygon <- function(xy,vertices,k=3, ...)
{
# Assert: xy is an n by 2 matrix with n >= k.
# Wrap k vertices around each end.
n <- dim(xy)[1]
if (k >= 1) {
data <- rbind(xy[(n-k+1):n,], xy, xy[1:k, ])
} else {
data <- xy
}
# Spline the x and y coordinates.
data.spline <- spline(1:(n+2*k), data[,1], n=vertices, ...)
x <- data.spline$x
x1 <- data.spline$y
x2 <- spline(1:(n+2*k), data[,2], n=vertices, ...)$y
# Retain only the middle part.
cbind(x1, x2)[k < x & x <= n+k, ]
}
library(data.table)
hulls.df <- do.call(rbind,lapply(1:length(clusters),function(l){
dt <- data.table(df[which(df$cluster==clusters[l]),])
hull <- dt[, .SD[chull(x,y)]]
spline.hull <- splinePolygon(cbind(hull$x,hull$y),100)
return(data.frame(x=spline.hull[,1],y=spline.hull[,2],val=NA,cluster=clusters[l],col=cluster.cols[l],interval=NA,stringsAsFactors=F))
}))
hulls.df$cluster <- factor(hulls.df$cluster,levels=clusters)
And here's my ggplot command:
library(ggplot2)
p <- ggplot(df,aes(x=x,y=y,colour=interval))+geom_point(cex=2,shape=1,stroke=1)+labs(x="X", y="Y")+theme_bw()+theme(legend.key=element_blank(),panel.border=element_blank(),strip.background=element_blank())+scale_color_manual(drop=FALSE,values=colors,name="DE")
p <- p+geom_polygon(data=hulls.df,aes(x=x,y=y,group=cluster),color=hulls.df$col,fill=NA)
which produces:
My question is how do I add a legend for the polygon under the legend for the points? I want it to a legend with 3 lines colored according to the cluster colors and the corresponding cluster number beside each line?
Slightly different output, only changing the last line of your code, it may solve your purpose:
p+geom_polygon(data=hulls.df,aes(x=x,y=y,group=cluster, fill=cluster),alpha=0.1)
Say, you want to add a legend of the_factor. My basic idea is,
(1) put the_factor into mapping by using unused aes arguments; aes(xx = the_factor)
(2) if (1) affects something, delete the effect by using scale_xx_manual()
(3) modify the legend by using guides(xx = guide_legend(override.aes = list()))
In your case, aes(fill) and aes(alpha) are unused. The former is better to do it because of no effect. So I used aes(fill=as.factor(cluster)).
p <- ggplot(df,aes(x=x,y=y,colour=interval, fill=as.factor(cluster))) + # add aes(fill=...)
geom_point(cex=2, shape=1, stroke=1) +
labs(x="X", y="Y",fill="cluster") + # add fill="cluster"
theme_bw() + theme(legend.key=element_blank(),panel.border=element_blank(),strip.background=element_blank()) + scale_color_manual(drop=FALSE,values=colors,name="DE") +
guides(fill = guide_legend(override.aes = list(colour = cluster.cols, pch=0))) # add
p <- p+geom_polygon(data=hulls.df,aes(x=x,y=y,group=cluster), color=hulls.df$col,fill=NA)
Of course, you can make the same graph by using aes(alpha = the_factor)). Because it has influence, you need to control it by using scale_alpha_manual().
g <- ggplot(df, aes(x=x,y=y,colour=interval)) +
geom_point(cex=2, shape=1, stroke=1, aes(alpha=as.factor(cluster))) + # add aes(alpha)
labs(x="X", y="Y",alpha="cluster") + # add alpha="cluster"
theme_bw() + theme(legend.key=element_blank(),panel.border=element_blank(),strip.background=element_blank()) + scale_color_manual(drop=FALSE,values=colors,name="DE") +
scale_alpha_manual(values=c(1,1,1)) + # add
guides(alpha = guide_legend(override.aes = list(colour = cluster.cols, pch=0))) # add
g <- p+geom_polygon(data=hulls.df,aes(x=x,y=y,group=cluster), color=hulls.df$col,fill=NA)
What you are asking for is two colour scales. My understanding is that this is not possible. But you can give the impression of having two colour scales with a bit of a cheat and using the filled symbols (shapes 21 to 25).
p <- ggplot(df, aes(x = x, y = y, fill = interval)) +
geom_point(cex = 2, shape = 21, stroke = 1, colour = NA)+
labs(x = "X", y = "Y") +
theme_bw() +
theme(legend.key = element_blank(), panel.border = element_blank(), strip.background = element_blank()) +
scale_fill_manual(drop=FALSE, values=colors, name="DE") +
geom_polygon(data = hulls.df, aes(x = x, y = y, colour = cluster), fill = NA) +
scale_colour_manual(values = cluster.cols)
p
Alternatively, use a filled polygon with a low alpha
p <- ggplot(df,aes(x=x,y=y,colour=interval))+
geom_point(cex=2,shape=1,stroke=1)+
labs(x="X", y="Y")+
theme_bw() +
theme(legend.key = element_blank(),panel.border=element_blank(), strip.background=element_blank()) +
scale_color_manual(drop=FALSE,values=colors,name="DE", guide = guide_legend(override.aes = list(fill = NA))) +
geom_polygon(data=hulls.df,aes(x=x,y=y,group=cluster, fill = cluster), alpha = 0.2, show.legend = TRUE) +
scale_fill_manual(values = cluster.cols)
p
But this might make the point colours difficult to see.
I have this chart - I would like to add to each label the text N=xx to denote the number of observations. I know how to do this and I have done that on charts with no facets.
When I tried that on the faceted chart it did not work, (I got the same N on the open tick on all 3 charts, the same N on the Restricted, etc.)
I hope someone can point the way to a solution, how do I control the elements on a given facet?
library(ggplot2)
library(scales)
stat_sum_single <- function(fun, geom="point", ...) {
stat_summary(fun.y=fun, fill="red", geom=geom, size = 5, shape=24)
}
set.seed(1)
data1 <- data.frame(Physicians_In=sample(1:3,100,replace=T),Physicians_Out=sample(1:3,100,replace=T),share=runif(100,0,1))
data1$Physicians_In <- factor(data1$Physicians_In,levels=c(1,2,3),labels=c("Open","Restricted","Closed"))
data1$Physicians_Out <- factor(data1$Physicians_Out,levels=c(1,2,3),labels=c("Open","Restricted","Closed"))
access_ch3 <- ggplot(data1,aes(x=Physicians_In,y=share,fill=Physicians_In))+geom_boxplot()+stat_sum_single(mean)
access_ch3 <- access_ch3 +geom_jitter(position = position_jitter(width = .2),color="blue")+theme_bw()
access_ch3 <- access_ch3 + theme(legend.position="none") +scale_y_continuous("Gammagard Share",labels=percent)
gpo_labs5 <- paste(gsub("/","-\n",names(table(data1$Physicians_Out)),fixed=T),"\n(N=",table(data1$Physicians_Out),")",sep="")
access_ch3 <- access_ch3 + scale_x_discrete("Physician Access (In Hospital)",labels=gpo_labs5)
access_ch3 <- access_ch3 +facet_grid(.~Physicians_Out,labeller=label_both)
access_ch3
I tried creating the 9 labels and passing that vector to the scale_x_discrete element, that just recycled the first 3, so it also did not solve the issue.
With the same data I followed a four step approach.
First: subsetting the data
open <- subset(data1, Physicians_Out == "Open")
restr <- subset(data1, Physicians_Out == "Restricted")
closed <- subset(data1, Physicians_Out == "Closed")
Second: creating the labels for the different subsets
labs.open <- paste(gsub("/","-\n",names(table(open$Physicians_In)),fixed=T),
"\n(N=",table(open$Physicians_In),")",sep="")
labs.restr <- paste(gsub("/","-\n",names(table(restr$Physicians_In)),fixed=T),
"\n(N=",table(restr$Physicians_In),")",sep="")
labs.closed <- paste(gsub("/","-\n",names(table(closed$Physicians_In)),fixed=T),
"\n(N=",table(closed$Physicians_In),")",sep="")
Third: creating a theme for removing the y-axis labels & text for the 2nd & 3rd sub-graphs
mytheme <- theme(
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
)
Finally: creating the graph
p1 <- ggplot(open,aes(x=Physicians_In,y=share,fill=Physicians_In)) +
geom_boxplot() + stat_sum_single(mean) +
geom_jitter(position = position_jitter(width = .2),color="blue") +
guides(fill=FALSE) +
ggtitle(paste("Physician Access (Out): Open\nN = (", nrow(open), ")\n")) +
scale_y_continuous("Gammagard Share",labels=percent) +
scale_x_discrete("\nPhysician Access (In Hospital)",labels=labs.open) +
theme_bw()
p2 <- ggplot(restr,aes(x=Physicians_In,y=share,fill=Physicians_In)) +
geom_boxplot() + stat_sum_single(mean) +
geom_jitter(position = position_jitter(width = .2),color="blue") +
guides(fill=FALSE) +
ggtitle(paste("Physician Access (Out): Restricted\nN = (", nrow(restr), ")\n")) +
scale_x_discrete("\nPhysician Access (In Hospital)",labels=labs.restr) +
theme_bw() + mytheme
p3 <- ggplot(closed,aes(x=Physicians_In,y=share,fill=Physicians_In)) +
geom_boxplot() + stat_sum_single(mean) +
geom_jitter(position = position_jitter(width = .2),color="blue") +
guides(fill=FALSE) +
ggtitle(paste("Physician Access (Out): Closed\nN = (", nrow(closed), ")\n")) +
scale_x_discrete("\nPhysician Access (In Hospital)",labels=labs.closed) +
theme_bw() + mytheme
library(gridExtra)
grid.arrange(p1, p2, p3, ncol=3)
Which gives the following result:
It is not exactly what you want to do , but I think this can be helpful ( at least a good start)
library(ggplot2)
library(plyr)
data1 <- ddply(data1,.(Physicians_Out,Physicians_In),transform,label = length(share))
ggplot(data1,aes(x=Physicians_In,y=share,fill=Physicians_In))+
geom_boxplot() +
stat_sum_single(mean) +
facet_grid(.~Physicians_Out,labeller=label_both,scales='free_x') +
stat_summary(fun.y=min,aes(label=paste0('N=',label)),geom='text',col='blue',cex=5)
I am building charts that have two lines in the axis text. The first line contains the group name, the second line contains that group population. I build my axis labels as a single character string with the format "LINE1 \n LINE2". Is it possible to assign different font faces and sizes to LINE1 and LINE2, even though they are contained within a single character string? I would like LINE1 to be large and bolded, and LINE2 to be small and unbolded.
Here's some sample code:
Treatment <- rep(c('T','C'),each=2)
Gender <- rep(c('Male','Female'),2)
Response <- sample(1:100,4)
test_df <- data.frame(Treatment, Gender, Response)
xbreaks <- levels(test_df$Gender)
xlabels <- paste(xbreaks,'\n',c('POP1','POP2'))
hist <- ggplot(test_df, aes(x=Gender, y=Response, fill=Treatment, stat="identity"))
hist + geom_bar(position = "dodge") + scale_y_continuous(limits = c(0,
100), name = "") + scale_x_discrete(labels=xlabels, breaks = xbreaks) +
opts(
axis.text.x = theme_text(face='bold',size=12)
)
I tried this, but the result was one large, bolded entry, and one small, unbolded entry:
hist + geom_bar(position = "dodge") + scale_y_continuous(limits = c(0,
100), name = "") + scale_x_discrete(labels=xlabels, breaks = xbreaks) +
opts(
axis.text.x = theme_text(face=c('bold','plain'),size=c('15','10'))
)
Another possible solution is to create separate chart elements, but I don't think that ggplot2 has a 'sub-axis label' element available...
Any help would be very much appreciated.
Cheers,
Aaron
I also think that I could not to make the graph by only using ggplot2 features.
I would use grid.text and grid.gedit.
require(ggplot2)
Treatment <- rep(c('T','C'), each=2)
Gender <- rep(c('Male','Female'), 2)
Response <- sample(1:100, 4)
test_df <- data.frame(Treatment, Gender, Response)
xbreaks <- levels(test_df$Gender)
xlabels <- paste(xbreaks,'\n',c('',''))
hist <- ggplot(test_df, aes(x=Gender, y=Response, fill=Treatment,
stat="identity"))
hist + geom_bar(position = "dodge") +
scale_y_continuous(limits = c(0, 100), name = "") +
scale_x_discrete(labels=xlabels, breaks = xbreaks) +
opts(axis.text.x = theme_text(face='bold', size=12))
grid.text(label="POP1", x = 0.29, y = 0.06)
grid.text(label="POP2", x = 0.645, y = 0.06)
grid.gedit("GRID.text", gp=gpar(fontsize=8))
Please try to tune a code upon according to your environment (e.g. the position of sub-axis labels and the fontsize).
I found another simple solution below:
require(ggplot2)
Treatment <- rep(c('T','C'),each=2)
Gender <- rep(c('Male','Female'),2)
Response <- sample(1:100,4)
test_df <- data.frame(Treatment, Gender, Response)
xbreaks <- levels(test_df$Gender)
xlabels[1] <- expression(atop(bold(Female), scriptstyle("POP1")))
xlabels[2] <- expression(atop(bold(Male), scriptstyle("POP2")))
hist <- ggplot(test_df, aes(x=Gender, y=Response, fill=Treatment,
stat="identity"))
hist +
geom_bar(position = "dodge") +
scale_y_continuous(limits = c(0, 100), name = "") +
scale_x_discrete(label = xlabels, breaks = xbreaks) +
opts(
axis.text.x = theme_text(size = 12)
)
All,
Using Triad's cheat this is the closest I was able to get to solution on this one. Let me know if you have any questions:
library(ggplot2)
spacing <- 0 #We can adjust how much blank space we have beneath the chart here
labels1= paste('Group',c('A','B','C','D'))
labels2 = rep(paste(rep('\n',spacing),collapse=''),length(labels1))
labels <- paste(labels1,labels2)
qplot(1:4,1:4, geom="blank") +
scale_x_continuous(breaks=1:length(labels), labels=labels) + xlab("")+
opts(plot.margin = unit(c(1, 1, 3, 0.5), "lines"),
axis.text.x = theme_text(face='bold', size=14))
xseq <- seq(0.15,0.9,length.out=length(labels)) #Assume for now that 0.15 and 0.9 are constant plot boundaries
sample_df <- data.frame(group=rep(labels1,each=2),subgroup=rep(c('a','b'),4),pop=sample(1:10,8))
popLabs <- by(sample_df,sample_df$group,function(subData){
paste(paste(subData$subgroup,' [n = ', subData$pop,']',sep=''),collapse='\n')
})
gridText <- paste("grid.text(label='\n",popLabs,"',x=",xseq,',y=0.1)',sep='')
sapply(gridText, function(x){ #Evaluate parsed character string for each element of gridText
eval(parse(text=x))
})
grid.gedit("GRID.text", gp=gpar(fontsize=12))
Cheers,
Aaron