Didzis Elferts showed how to plot a dendogram using ggplot2 and ggdendro:
horizontal dendrogram in R with labels
here is the code:
labs = paste("sta_",1:50,sep="") #new labels
rownames(USArrests)<-labs #set new row names
hc <- hclust(dist(USArrests), "ave")
library(ggplot2)
library(ggdendro)
#convert cluster object to use with ggplot
dendr <- dendro_data(hc, type="rectangle")
#your own labels are supplied in geom_text() and label=label
ggplot() +
geom_segment(data=segment(dendr), aes(x=x, y=y, xend=xend, yend=yend)) +
geom_text(data=label(dendr), aes(x=x, y=y, label=label, hjust=0), size=3) +
coord_flip() + scale_y_reverse(expand=c(0.2, 0)) +
theme(axis.line.y=element_blank(),
axis.ticks.y=element_blank(),
axis.text.y=element_blank(),
axis.title.y=element_blank(),
panel.background=element_rect(fill="white"),
panel.grid=element_blank())
Does anyone know, how to colorize the different clusters? For example, you want to have 2 Clusters (k=2) colorized?
This approach is very similar to #DidzisElferts', just a little simpler.
df <- USArrests # really bad idea to muck up internal datasets
labs <- paste("sta_",1:50,sep="") # new labels
rownames(df) <- labs # set new row names
library(ggplot2)
library(ggdendro)
hc <- hclust(dist(df), "ave") # heirarchal clustering
dendr <- dendro_data(hc, type="rectangle") # convert for ggplot
clust <- cutree(hc,k=2) # find 2 clusters
clust.df <- data.frame(label=names(clust), cluster=factor(clust))
# dendr[["labels"]] has the labels, merge with clust.df based on label column
dendr[["labels"]] <- merge(dendr[["labels"]],clust.df, by="label")
# plot the dendrogram; note use of color=cluster in geom_text(...)
ggplot() +
geom_segment(data=segment(dendr), aes(x=x, y=y, xend=xend, yend=yend)) +
geom_text(data=label(dendr), aes(x, y, label=label, hjust=0, color=cluster),
size=3) +
coord_flip() + scale_y_reverse(expand=c(0.2, 0)) +
theme(axis.line.y=element_blank(),
axis.ticks.y=element_blank(),
axis.text.y=element_blank(),
axis.title.y=element_blank(),
panel.background=element_rect(fill="white"),
panel.grid=element_blank())
Workaround would be to plot cluster object with plot() and then use function rect.hclust() to draw borders around the clusters (nunber of clusters is set with argument k=). If result of rect.hclust() is saved as object it will make list of observation where each list element contains observations belonging to each cluster.
plot(hc)
gg<-rect.hclust(hc,k=2)
Now this list can be converted to dataframe where column clust contains names for clusters (in this example two groups) - names are repeated according to lengths of list elemets.
clust.gr<-data.frame(num=unlist(gg),
clust=rep(c("Clust1","Clust2"),times=sapply(gg,length)))
head(clust.gr)
num clust
sta_1 1 Clust1
sta_2 2 Clust1
sta_3 3 Clust1
sta_5 5 Clust1
sta_8 8 Clust1
sta_9 9 Clust1
New data frame is merged with label() information of dendr object (dendro_data() result).
text.df<-merge(label(dendr),clust.gr,by.x="label",by.y="row.names")
head(text.df)
label x y num clust
1 sta_1 8 0 1 Clust1
2 sta_10 28 0 10 Clust2
3 sta_11 41 0 11 Clust2
4 sta_12 31 0 12 Clust2
5 sta_13 10 0 13 Clust1
6 sta_14 37 0 14 Clust2
When plotting dendrogram use text.df to add labels with geom_text() and use column clust for colors.
ggplot() +
geom_segment(data=segment(dendr), aes(x=x, y=y, xend=xend, yend=yend)) +
geom_text(data=text.df, aes(x=x, y=y, label=label, hjust=0,color=clust), size=3) +
coord_flip() + scale_y_reverse(expand=c(0.2, 0)) +
theme(axis.line.y=element_blank(),
axis.ticks.y=element_blank(),
axis.text.y=element_blank(),
axis.title.y=element_blank(),
panel.background=element_rect(fill="white"),
panel.grid=element_blank())
Adding to #DidzisElferts' and #jlhoward's code, the dendrogram itself can be coloured.
library(ggplot2)
library(ggdendro)
library(plyr)
library(zoo)
df <- USArrests # really bad idea to muck up internal datasets
labs <- paste("sta_", 1:50, sep = "") # new labels
rownames(df) <- labs # set new row names
cut <- 4 # Number of clusters
hc <- hclust(dist(df), "ave") # hierarchical clustering
dendr <- dendro_data(hc, type = "rectangle")
clust <- cutree(hc, k = cut) # find 'cut' clusters
clust.df <- data.frame(label = names(clust), cluster = clust)
# Split dendrogram into upper grey section and lower coloured section
height <- unique(dendr$segments$y)[order(unique(dendr$segments$y), decreasing = TRUE)]
cut.height <- mean(c(height[cut], height[cut-1]))
dendr$segments$line <- ifelse(dendr$segments$y == dendr$segments$yend &
dendr$segments$y > cut.height, 1, 2)
dendr$segments$line <- ifelse(dendr$segments$yend > cut.height, 1, dendr$segments$line)
# Number the clusters
dendr$segments$cluster <- c(-1, diff(dendr$segments$line))
change <- which(dendr$segments$cluster == 1)
for (i in 1:cut) dendr$segments$cluster[change[i]] = i + 1
dendr$segments$cluster <- ifelse(dendr$segments$line == 1, 1,
ifelse(dendr$segments$cluster == 0, NA, dendr$segments$cluster))
dendr$segments$cluster <- na.locf(dendr$segments$cluster)
# Consistent numbering between segment$cluster and label$cluster
clust.df$label <- factor(clust.df$label, levels = levels(dendr$labels$label))
clust.df <- arrange(clust.df, label)
clust.df$cluster <- factor((clust.df$cluster), levels = unique(clust.df$cluster), labels = (1:cut) + 1)
dendr[["labels"]] <- merge(dendr[["labels"]], clust.df, by = "label")
# Positions for cluster labels
n.rle <- rle(dendr$segments$cluster)
N <- cumsum(n.rle$lengths)
N <- N[seq(1, length(N), 2)] + 1
N.df <- dendr$segments[N, ]
N.df$cluster <- N.df$cluster - 1
# Plot the dendrogram
ggplot() +
geom_segment(data = segment(dendr),
aes(x=x, y=y, xend=xend, yend=yend, size=factor(line), colour=factor(cluster)),
lineend = "square", show.legend = FALSE) +
scale_colour_manual(values = c("grey60", rainbow(cut))) +
scale_size_manual(values = c(.1, 1)) +
geom_text(data = N.df, aes(x = x, y = y, label = factor(cluster), colour = factor(cluster + 1)),
hjust = 1.5, show.legend = FALSE) +
geom_text(data = label(dendr), aes(x, y, label = label, colour = factor(cluster)),
hjust = -0.2, size = 3, show.legend = FALSE) +
scale_y_reverse(expand = c(0.2, 0)) +
labs(x = NULL, y = NULL) +
coord_flip() +
theme(axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
axis.title.y = element_blank(),
panel.background = element_rect(fill = "white"),
panel.grid = element_blank())
The 2-cluster and 4-cluster solutions:
A short way to achieve a similar result is to use the package dendextend, taken from this overview.
df <- USArrests # really bad idea to muck up internal datasets
labs <- paste("sta_",1:50,sep="") # new labels
rownames(df) <- labs # set new row names
require(magrittr)
require(ggplot2)
require(dendextend)
dend <- df %>% dist %>%
hclust %>% as.dendrogram %>%
set("branches_k_color", k = 4) %>% set("branches_lwd", 0.7) %>%
set("labels_cex", 0.6) %>% set("labels_colors", k = 4) %>%
set("leaves_pch", 19) %>% set("leaves_cex", 0.5)
ggd1 <- as.ggdend(dend)
ggplot(ggd1, horiz = TRUE)
Note: The order of the states is slightly different compared to those above - not really changing interpretation though.
For those that are still looking for a convenient way to do this, you can use my package ggdendroplot (https://github.com/NicolasH2/ggdendroplot).
If you have the data from the posted example:
labs = paste("sta_",1:50,sep="") #new labels
rownames(USArrests)<-labs #set new row names
hc <- hclust(dist(USArrests), "ave")
...you can use ggdendroplot with ggplot to get a colored dendrogram:
devtools::install_github("NicolasH2/ggdendroplot")
library(ggdendroplot)
library(ggplot2)
ggplot() + geom_dendro(hc, dendrocut = 30)
You can turn it sideways and on its head etc. Its basically just a ggplot layer, so you can modify the graph further as you wish and add it to other ggplots. Check out the github page to see what you can do with ggdendroplot.
Related
Let say I did four measurements/experiments (M1..M4) producing intensity values which vary across 5 locations (loc_1...5). I observed various classes of elements (n=7). Now I would like to summarize the results into a single diagram using facet_grid and geom_raster from ggplot2. I end up with the diagram below (see image).
Now the question is whether there is a simple solution to force rows to fill the space in each facet (i.e to drop unused rows in each panel).
Thank you
rm(list=ls())
library(ggplot2)
library(reshape2)
set.seed(123)
# let's create a fake dataset
nb.mesure <- 4
n.row <- 200
n.col <- 5
nb.class <- 7
d <- matrix(round(runif(n.row * n.col),2), nc=n.col)
colnames(d) <- sprintf("Loc_%02d", 1:5)
# These strings will be the row names of each heatmap
# in the subsequent facet plot
elements <- sample(replicate(n.row/2, 1:100))
# let's create a data.frame d
d <- data.frame(d,
mesure = sort(rep(c("M1","M2","M3", "M4"), n.row/4)),
elements= elements,
class=sample(nb.class,
length(elements),
rep=T,
prob = seq(0.01, 0.25, length.out=7))
)
# Data are melt
dm <- melt(d, id.var=c( "mesure", "elements", "class"))
colnames(dm) <- c("mesure","elements", "class", "pos", "intensity")
# Plotting
p <- ggplot(dm, aes(x = pos, y = elements, fill = intensity))
p <- p + geom_raster()
p <- p + facet_grid(mesure~class , scales = "free", space="free_y")
p <- p + theme_bw()
p <- p + theme(text = element_text(size=8))
p <- p + theme(text = element_text(family = "mono", face = "bold"))
p <- p + theme(axis.text.y = element_blank(),
axis.ticks.y=element_blank(),
axis.text.x = element_text(colour="grey20",
size=6,angle=45,
vjust = 0.3))
print(p)
If your goal is to introduce NA values (or something else) for all missing combinations of mesure, elements, class and pos you can use the complete function from the tidyr package like so:
library(tidyr)
dm <- complete(dm, mesure, elements, class, pos, fill = list(intensity = NA))
UPDATE
In case you want to have the non-NA values expand to fill each facet you have to move away from facet_grid and switch to facet_wrap.
p <- p + facet_wrap(mesure~class , scales = "free_y", nrow = 4)
I'm trying to produce a scatter plot with geom_point where the points are circumscribed by a smoothed polygon, with geom_polygon.
Here's my point data:
set.seed(1)
df <- data.frame(x=c(rnorm(30,-0.1,0.1),rnorm(30,0,0.1),rnorm(30,0.1,0.1)),y=c(rnorm(30,-1,0.1),rnorm(30,0,0.1),rnorm(30,1,0.1)),val=rnorm(90),cluster=c(rep(1,30),rep(2,30),rep(3,30)),stringsAsFactors=F)
I color each point according the an interval that df$val is in. Here's the interval data:
intervals.df <- data.frame(interval=c("(-3,-2]","(-2,-0.999]","(-0.999,0]","(0,1.96]","(1.96,3.91]","(3.91,5.87]","not expressed"),
start=c(-3,-2,-0.999,0,1.96,3.91,NA),end=c(-2,-0.999,0,1.96,3.91,5.87,NA),
col=c("#2f3b61","#436CE8","#E0E0FF","#7d4343","#C74747","#EBCCD6","#D3D3D3"),stringsAsFactors=F)
Assigning colors and intervals to the points:
df <- cbind(df,do.call(rbind,lapply(df$val,function(x){
if(is.na(x)){
return(data.frame(col=intervals.df$col[nrow(intervals.df)],interval=intervals.df$interval[nrow(intervals.df)],stringsAsFactors=F))
} else{
idx <- which(intervals.df$start <= x & intervals.df$end >= x)
return(data.frame(col=intervals.df$col[idx],interval=intervals.df$interval[idx],stringsAsFactors=F))
}
})))
Preparing the colors for the leged which will show each interval:
df$interval <- factor(df$interval,levels=intervals.df$interval)
colors <- intervals.df$col
names(colors) <- intervals.df$interval
Here's where I constructed the smoothed polygons (using a function courtesy of this link):
clusters <- sort(unique(df$cluster))
cluster.cols <- c("#ff00ff","#088163","#ccbfa5")
splinePolygon <- function(xy,vertices,k=3, ...)
{
# Assert: xy is an n by 2 matrix with n >= k.
# Wrap k vertices around each end.
n <- dim(xy)[1]
if (k >= 1) {
data <- rbind(xy[(n-k+1):n,], xy, xy[1:k, ])
} else {
data <- xy
}
# Spline the x and y coordinates.
data.spline <- spline(1:(n+2*k), data[,1], n=vertices, ...)
x <- data.spline$x
x1 <- data.spline$y
x2 <- spline(1:(n+2*k), data[,2], n=vertices, ...)$y
# Retain only the middle part.
cbind(x1, x2)[k < x & x <= n+k, ]
}
library(data.table)
hulls.df <- do.call(rbind,lapply(1:length(clusters),function(l){
dt <- data.table(df[which(df$cluster==clusters[l]),])
hull <- dt[, .SD[chull(x,y)]]
spline.hull <- splinePolygon(cbind(hull$x,hull$y),100)
return(data.frame(x=spline.hull[,1],y=spline.hull[,2],val=NA,cluster=clusters[l],col=cluster.cols[l],interval=NA,stringsAsFactors=F))
}))
hulls.df$cluster <- factor(hulls.df$cluster,levels=clusters)
And here's my ggplot command:
library(ggplot2)
p <- ggplot(df,aes(x=x,y=y,colour=interval))+geom_point(cex=2,shape=1,stroke=1)+labs(x="X", y="Y")+theme_bw()+theme(legend.key=element_blank(),panel.border=element_blank(),strip.background=element_blank())+scale_color_manual(drop=FALSE,values=colors,name="DE")
p <- p+geom_polygon(data=hulls.df,aes(x=x,y=y,group=cluster),color=hulls.df$col,fill=NA)
which produces:
My question is how do I add a legend for the polygon under the legend for the points? I want it to a legend with 3 lines colored according to the cluster colors and the corresponding cluster number beside each line?
Slightly different output, only changing the last line of your code, it may solve your purpose:
p+geom_polygon(data=hulls.df,aes(x=x,y=y,group=cluster, fill=cluster),alpha=0.1)
Say, you want to add a legend of the_factor. My basic idea is,
(1) put the_factor into mapping by using unused aes arguments; aes(xx = the_factor)
(2) if (1) affects something, delete the effect by using scale_xx_manual()
(3) modify the legend by using guides(xx = guide_legend(override.aes = list()))
In your case, aes(fill) and aes(alpha) are unused. The former is better to do it because of no effect. So I used aes(fill=as.factor(cluster)).
p <- ggplot(df,aes(x=x,y=y,colour=interval, fill=as.factor(cluster))) + # add aes(fill=...)
geom_point(cex=2, shape=1, stroke=1) +
labs(x="X", y="Y",fill="cluster") + # add fill="cluster"
theme_bw() + theme(legend.key=element_blank(),panel.border=element_blank(),strip.background=element_blank()) + scale_color_manual(drop=FALSE,values=colors,name="DE") +
guides(fill = guide_legend(override.aes = list(colour = cluster.cols, pch=0))) # add
p <- p+geom_polygon(data=hulls.df,aes(x=x,y=y,group=cluster), color=hulls.df$col,fill=NA)
Of course, you can make the same graph by using aes(alpha = the_factor)). Because it has influence, you need to control it by using scale_alpha_manual().
g <- ggplot(df, aes(x=x,y=y,colour=interval)) +
geom_point(cex=2, shape=1, stroke=1, aes(alpha=as.factor(cluster))) + # add aes(alpha)
labs(x="X", y="Y",alpha="cluster") + # add alpha="cluster"
theme_bw() + theme(legend.key=element_blank(),panel.border=element_blank(),strip.background=element_blank()) + scale_color_manual(drop=FALSE,values=colors,name="DE") +
scale_alpha_manual(values=c(1,1,1)) + # add
guides(alpha = guide_legend(override.aes = list(colour = cluster.cols, pch=0))) # add
g <- p+geom_polygon(data=hulls.df,aes(x=x,y=y,group=cluster), color=hulls.df$col,fill=NA)
What you are asking for is two colour scales. My understanding is that this is not possible. But you can give the impression of having two colour scales with a bit of a cheat and using the filled symbols (shapes 21 to 25).
p <- ggplot(df, aes(x = x, y = y, fill = interval)) +
geom_point(cex = 2, shape = 21, stroke = 1, colour = NA)+
labs(x = "X", y = "Y") +
theme_bw() +
theme(legend.key = element_blank(), panel.border = element_blank(), strip.background = element_blank()) +
scale_fill_manual(drop=FALSE, values=colors, name="DE") +
geom_polygon(data = hulls.df, aes(x = x, y = y, colour = cluster), fill = NA) +
scale_colour_manual(values = cluster.cols)
p
Alternatively, use a filled polygon with a low alpha
p <- ggplot(df,aes(x=x,y=y,colour=interval))+
geom_point(cex=2,shape=1,stroke=1)+
labs(x="X", y="Y")+
theme_bw() +
theme(legend.key = element_blank(),panel.border=element_blank(), strip.background=element_blank()) +
scale_color_manual(drop=FALSE,values=colors,name="DE", guide = guide_legend(override.aes = list(fill = NA))) +
geom_polygon(data=hulls.df,aes(x=x,y=y,group=cluster, fill = cluster), alpha = 0.2, show.legend = TRUE) +
scale_fill_manual(values = cluster.cols)
p
But this might make the point colours difficult to see.
Does somebody know a alternative method for ordering stacks of a ggplot2 bar graph?
I used to use for example
library(ggplot2)
library(plyr)
a <- cbind(rep("a",5),sample(1:100,5), rep_len(c("1","2","3"),5))
b <- cbind(rep("b",7),sample(1:100,7), rep_len(c("1","2","3"),7))
c <- cbind(rep("c",3),sample(1:100,3), rep_len(c("1","2","3"),3))
d <- cbind(rep("d",10),sample(1:100,10), rep_len(c("1","2","3"),10))
e <- cbind(rep("e",15),sample(1:100,15), rep_len(c("1","2","3"),15))
dat <- rbind(a,b,c,d,e)
colnames(dat) <- c("x","count","example")
dat <- as.data.frame(dat)
dat$x <- as.character(dat$x)
dat$count <- as.numeric(dat$count)
dat$example <- as.character(dat$example)
GP <- ggplot(dat, aes(x= reorder(x, count, sum), y=count, fill = example, order = desc(count)))+
geom_bar(stat="identity", fill= "grey", colour= "black", size = 1)+
coord_flip() +
scale_y_continuous()+
scale_x_discrete('')+
#scale_fill_brewer()+
labs(y="")+
theme_bw()+
theme(axis.text.y=element_text(size=8,face="bold"),
axis.text.x=element_text(size=10,face="bold"),
axis.title.x=element_text(size=16,face="bold"),
axis.title.y=element_text(size=16,face="bold"),
plot.title=element_text(size=16,face="bold"),
strip.text.x = element_text(size=10,face="bold"),
strip.background = element_blank())
print(GP)
to create graphs like
however in version 2.0.0 of ggplot2 order() has been removed. and now the graph will be like:
Does anybody know a alternative?
Tanks
I have two pieces of data that I want to overlay onto the same plot. I've looked at several ggplot articles and I don't think it's possible within ggplot. So I have been using barplot. I have 5 tiers and I'm plotting total dollars by tier as a solid bar.
Then I have another piece of data that represents the number of tasks within those tiers by two different types of workers. I have this as a stacked bar plot. But I want to show them on the same graph with the total dollar amount as one bar and then the corresponding stacked bar next to it.
Here are the plots:
The data for the first graph looks like this (it's a table):
1 2 3 4 5
0 9 340 97 812 4271
1 1 417 156 3163 11314
The data for the second graph looks like this (this is a dataset):
Tier variable value
1 1 Opp_Amt 16200.00
2 2 Opp_Amt 116067.50
3 3 Opp_Amt 35284.12
4 4 Opp_Amt 278107.10
5 5 Opp_Amt 694820.29
I want to put the graphs on top of each other but the bars keep overlapping and I want them to appear side by side by tier.
Code for what I have so far.
par(mar=c(2.5, 4, 4, 4)+2)
## Plot first set of data and draw its axis
barplot(data1$value, axes=FALSE,ylim=c(0,700000), xlab="", ylab="",
col="black",space=-10,main="Work Score")
axis(2, ylim=c(0,700000),col="black",las=1) ## las=1 makes horizontal labels
mtext("Total Opportunity Amount",side=2,line=3.5)
box()
## Allow a second plot on the same graph
par(new=TRUE)
## Plot the second plot and put axis scale on right
m <- barplot(counts, xlab="", ylab="", ylim=c(0,16000),axes=FALSE, col=c("red","darkblue"),space=3,width=0.5,density=20)
## a little farther out (line=4) to make room for labels
mtext("Task Ratio: Outbound to AE",side=4,col="red",line=3.5)
axis(4, ylim=c(0,16000), col="red",col.axis="black",las=1)
And it gives me this
Using ggplot, I would do something like one of these. They plot the two sets of data separately. The first arranges the data into one dataframe, then uses facet_wrap() to position the plots side-by-side. The second generates the two plot objects separately, then combines the two plots and the legend into a combined plot.
But if you really need the "dual y-axis" approach, then with some fiddling, and using the plots' layouts and gtable functions, it can be done (using code borrowed from here).
Like this:
library(ggplot2)
library(gtable)
library(plyr)
df1 <- data.frame(Tier = rep(1:5, each = 2),
y = c(9, 1, 340, 417, 97, 156, 812, 3063, 4271, 11314),
gp = rep(0:1, 5))
df2 <- read.table(text = "
Tier variable value
1 Opp_Amt 16200.00
2 Opp_Amt 116067.50
3 Opp_Amt 35284.12
4 Opp_Amt 278107.10
5 Opp_Amt 694820.29", header = TRUE)
dfA = df1
dfB = df2
names(dfA) = c("Tier", "Value", "gp")
dfA$var = "Task Ratio"
dfB = dfB[,c(1,3)]
dfB$gp = 3
dfB$var = "Total Opportunity Amount"
names(dfB) = names(dfA)
df = rbind(dfA, dfB)
df$var = factor(df$var)
df$var = factor(df$var, levels = rev(levels(df$var)))
ggplot(df, aes(Tier, Value, fill = factor(gp))) +
geom_bar(position = "stack", stat = "identity") +
facet_wrap( ~ var, scale = "free_y") +
scale_fill_manual("Group", breaks = c("0","1"), values = c("#F8766D", "#00BFC4", "black")) +
theme_bw() +
theme(panel.spacing = unit(2, "lines"),
panel.grid = element_blank())
Or this:
p1 <- ggplot(df1, aes(factor(Tier), y, fill = factor(gp))) +
geom_bar(position = "stack", stat = "identity") +
#guides(fill = FALSE) +
scale_y_continuous("Task Ratio",
limit = c(0, 1.1*max(ddply(df1, .(Tier), summarise, sum = sum(y)))),
expand = c(0,0)) +
scale_x_discrete("Tier") +
theme_bw() +
theme(panel.grid = element_blank())
p2 <- ggplot(df2, aes(factor(Tier), value)) +
geom_bar(stat = "identity") +
scale_y_continuous("Total Opportunity Amount", limit = c(0, 1.1*max(df2$value)), expand = c(0,0)) +
scale_x_discrete("Tier") +
theme_bw() +
theme(panel.grid = element_blank())
# Get the ggplot grobs,
# And get the legend from p1
g1 <- ggplotGrob(p1)
leg = gtable_filter(g1, "guide-box")
legColumn = g1$layout[which(g1$layout$name == "guide-box"), "l"]
g1 = g1[,-legColumn]
g2 <- ggplotGrob(p2)
# Make sure the width are the same in g1 and g2
library(grid)
maxWidth = unit.pmax(g1$widths, g2$widths)
g1$widths = as.list(maxWidth)
g2$widths = as.list(maxWidth)
# Combine g1, g2 and the legend
library(gridExtra)
grid.arrange(arrangeGrob(g2, g1, nrow = 1), leg,
widths = unit.c(unit(1, "npc") - leg$width, leg$width), nrow=1)
Or the dual y-axis approach (But not recommended for reasons given in #Phil's post):
width1 = 0.6 # width of bars in p1
width2 = 0.2 # width of bars in p2
pos = .5*width1 + .5*width2 # positioning bars in p2
p1 <- ggplot(df1, aes(factor(Tier), y, fill = factor(gp))) +
geom_bar(position = "stack", stat = "identity", width = width1) +
guides(fill = FALSE) +
scale_y_continuous("",
limit = c(0, 1.1*max(ddply(df1, .(Tier), summarise, sum = sum(y)))),
expand = c(0,0)) +
scale_x_discrete("Tier") +
theme_bw() +
theme(panel.grid = element_blank(),
axis.text.y = element_text(colour = "red", hjust = 0, margin = margin(l = 2, unit = "pt")),
axis.ticks.y = element_line(colour = "red"))
p2 <- ggplot(df2, aes(factor(Tier), value)) +
geom_blank() +
geom_bar(aes(x = Tier - pos), stat = "identity", width = width2) +
scale_y_continuous("", limit = c(0, 1.1*max(df2$value)), expand = c(0,0)) +
theme_bw() +
theme(panel.grid = element_blank())
# Get ggplot grobs
g1 <- ggplotGrob(p1)
g2 <- ggplotGrob(p2)
# Get locations of the panels in g1
pp1 <- c(subset(g1$layout, name == "panel", se = t:r))
## Get bars from g2 and insert them into the panel in g1
g <- gtable_add_grob(g1, g2$grobs[[which(g2$layout$name == "panel")]][[4]][[4]], pp1$t, pp1$l)
# Grab axis from g1, reverse elements, and put it on the right
index <- which(g1$layout$name == "axis-l")
grob <- g1$grobs[[index]]
axis <- grob$children[[2]]
axis$widths <- rev(axis$widths)
axis$grobs <- rev(axis$grobs)
axis$grobs[[1]]$x <- axis$grobs[[1]]$x - unit(1, "npc") + unit(3, "pt")
g <- gtable_add_cols(g, g1$widths[g1$layout[index, ]$l], pp1$r)
g <- gtable_add_grob(g, axis, pp1$t, pp1$l+1)
# Grab axis from g2, and put it on the left
index <- which(g2$layout$name == "axis-l")
grob <- g2$grobs[[index]]
axis <- grob$children[[2]]
g <- gtable_add_grob(g, rectGrob(gp = gpar(col = NA, fill = "white")), pp1$t-1, pp1$l-1, pp1$b+1)
g <- gtable_add_grob(g, axis, pp1$t, pp1$l-1)
# Add axis titles
# right axis title
RightAxisText = textGrob("Task Ratio", rot = 90, gp = gpar(col = "red"))
g <- gtable_add_cols(g, unit.c(unit(1, "grobwidth", RightAxisText) + unit(1, "line")), 5)
g <- gtable_add_grob(g, RightAxisText, pp1$t, pp1$r+2)
# left axis title
LeftAxisText = textGrob("Total Opportunity Amount", rot = 90)
g <- gtable_add_grob(g, LeftAxisText, pp1$t, pp1$l-2)
g$widths[2] <- unit.c(unit(1, "grobwidth", LeftAxisText) + unit(1, "line"))
# Draw it
grid.newpage()
grid.draw(g)
It appears you are trying to plot two variables on two different y scales on to one chart. I recommend against this, and this is considered bad practice. See, for example, #hadley 's (the author of ggplot2) answer here about a similar issue: https://stackoverflow.com/a/3101876/3022126
It is possible to plot two variables on one y axis if they have comparable scales, but the range of your two datasets do not greatly overlap.
Consider other visualisations, perhaps using two separate charts.
Try looking at the add parameter for barplot.
## Function to create alpha colors for illustration.
col2alpha <- function(col, alpha = 0.5) {
tmp <- col2rgb(col)
rgb(tmp[1]/255, tmp[2]/255, tmp[3]/255, alpha)
}
## Some fake data
dat1 <- data.frame(id = 1:4, val = c(10, 8, 6, 4))
dat2 <- data.frame(id = 1:4, val = c(4, 6, 8, 10))
barplot(dat1$val, col = col2alpha("blue"))
barplot(dat2$val, col = col2alpha("red"), add = TRUE)
Map Data: InputSpatialData
Yield Data: InputYieldData
Results_using viewport():
EDIT: Results using "multiplot" function as suggested by #rawr (see comment below). I do love the new results, especially that the map is bigger. Nonetheless, the boxplot seems misaligned with the map plot still. Is there a more systematic way to control for centering and placement?
My Question: Is there a way to control for the size of the boxplot plot to make it close in size and centered with the map plot above it?
FullCode:
## Loading packages
library(rgdal)
library(plyr)
library(maps)
library(maptools)
library(mapdata)
library(ggplot2)
library(RColorBrewer)
library(foreign)
library(sp)
library(ggsubplot)
library(reshape)
library(gridExtra)
## get.centroids: function to extract polygon ID and centroid from shapefile
get.centroids = function(x){
poly = wmap#polygons[[x]]
ID = poly#ID
centroid = as.numeric(poly#labpt)
return(c(id=ID, long=centroid[1], lat=centroid[2]))
}
## read input files (shapefile and .csv file)
wmap <- readOGR(dsn=".", layer="ne_110m_admin_0_countries")
wyield <- read.csv(file = "F:/Purdue University/RA_Position/PhD_ResearchandDissert/PhD_Draft/GTAP-CGE/GTAP_Sims&Rests/NewFiles/RMaps_GTAP/AllWorldCountries_CCShocksGTAP.csv", header=TRUE, sep=",", na.string="NA", dec=".", strip.white=TRUE)
wyield$ID_1 <- substr(wyield$ID_1,3,10) # Eliminate the ID_1 column
## re-order the shapefile
wyield <- cbind(id=rownames(wmap#data),wyield)
## Build table of labels for annotation (legend).
labs <- do.call(rbind,lapply(1:17,get.centroids)) # Call the polygon ID and centroid from shapefile
labs <- merge(labs,wyield[,c("id","ID_1","name_long")],by="id") # merging the "labs" data with the spatial data
labs[,2:3] <- sapply(labs[,2:3],function(x){as.numeric(as.character(x))})
labs$sort <- as.numeric(as.character(labs$ID_1))
labs <- cbind(labs, name_code = paste(as.character(labs[,4]), as.character(labs[,5])))
labs <- labs[order(labs$sort),]
## Dataframe for boxplot plot
boxplot.df <- wyield[c("ID_1","name_long","A1B","A1BLow","A1F","A1T","A2","B1","B1Low","B2")]
boxplot.df[1] <- sapply(boxplot.df[1], as.numeric)
boxplot.df <- boxplot.df[order(boxplot.df$ID_1),]
boxplot.df <- cbind(boxplot.df, name_code = paste(as.character(boxplot.df[,1]), as.character(boxplot.df[,2])))
boxplot.df <- melt(boxplot.df, id=c("ID_1","name_long","name_code"))
boxplot.df <- transform(boxplot.df,name_code=factor(name_code,levels=unique(name_code)))
## Define new theme for map
## I have found this function on the website
theme_map <- function (base_size = 14, base_family = "serif") {
# Select a predefined theme for tweaking features
theme_bw(base_size = base_size, base_family = base_family) %+replace%
theme(
axis.line=element_blank(),
axis.text.x=element_text(size=rel(1.2), color="grey"),
axis.text.y=element_text(size=rel(1.2), color="grey"),
axis.ticks=element_blank(),
axis.ticks.length=unit(0.3, "lines"),
axis.ticks.margin=unit(0.5, "lines"),
axis.title.x=element_text(size=rel(1.2), color="grey"),
axis.title.y=element_text(size=rel(1.2), color="grey"),
legend.background=element_rect(fill="white", colour=NA),
legend.key=element_rect(colour="white"),
legend.key.size=unit(1.3, "lines"),
legend.position="right",
legend.text=element_text(size=rel(1.3)),
legend.title=element_text(size=rel(1.4), face="bold", hjust=0),
panel.border=element_blank(),
panel.grid.minor=element_blank(),
plot.title=element_text(size=rel(1.8), face="bold", hjust=0.5, vjust=2),
plot.margin=unit(c(0.5,0.5,0.5,0.5), "lines")
)}
## Transform shapefile to dataframe and merge with yield data
wmap_df <- fortify(wmap)
wmap_df <- merge(wmap_df,wyield, by="id") # merge the spatial data and the yield data
## Plot map
mapy <- ggplot(wmap_df, aes(long,lat, group=group))
mapy <- mapy + geom_polygon(aes(fill=AVG))
mapy <- mapy + geom_path(data=wmap_df, aes(long,lat, group=group, fill=A1BLow), color="white", size=0.4)
mapy <- mapy + labs(title="Average yield impacts (in %) across SRES scenarios ") + scale_fill_gradient2(name = "%Change in yield",low = "red3",mid = "snow2",high = "darkgreen")
mapy <- mapy + coord_equal() + theme_map()
mapy <- mapy + geom_text(data=labs, aes(x=long, y=lat, label=ID_1, group=ID_1), size=6, family="serif")
mapy
## Plot boxplot
boxploty <- ggplot(data=boxplot.df, aes(factor(name_code),value)) +
geom_boxplot(stat="boxplot",
position="dodge",
fill="grey",
outlier.colour = "blue",
outlier.shape = 16,
outlier.size = 4) +
labs(title="Distribution of yield impacts (in %) by GTAP region", y="Yield (% Change)") + theme_bw() + coord_flip() +
stat_summary(fun.y = "mean", geom = "point", shape=21, size= 4, color= "red") +
theme(plot.title = element_text(size = 26,
hjust = 0.5,
vjust = 1,
face = 'bold',
family="serif")) +
theme(axis.text.x = element_text(colour = 'black',
size = 18,
hjust = 0.5,
vjust = 1,
family="serif"),
axis.title.x = element_text(size = 14,
hjust = 0.5,
vjust = 0,
face = 'bold',
family="serif")) +
theme(axis.text.y = element_text(colour = 'black',
size = 18,
hjust = 0,
vjust = 0.5,
family="serif"),
axis.title.y = element_blank())
boxploty
## I found this code on the website, and tried to tweak it to achieve my desired
result, but failed
# Plot objects using widths and height and respect to fix aspect ratios
grid.newpage()
pushViewport( viewport( layout = grid.layout( 2 , 1 , widths = unit( c( 1 ) , "npc" ) ,
heights = unit( c( 0.45 ) , "npc" ) ,
respect = matrix(rep(2,1),2) ) ) )
print( mapy, vp = viewport( layout.pos.row = 1, layout.pos.col = 1 ) )
print( boxploty, vp = viewport( layout.pos.row = 2, layout.pos.col = 1 ) )
upViewport(0)
vp3 <- viewport( width = unit(0.5,"npc") , x = 0.9 , y = 0.5)
pushViewport(vp3)
#grid.draw( legend )
popViewport()
Is this close to what you had in mind?
Code:
library(rgdal)
library(ggplot2)
library(RColorBrewer)
library(reshape)
library(gridExtra)
setwd("<directory with all your files...>")
get.centroids = function(x){ # extract centroids from polygon with given ID
poly = wmap#polygons[[x]]
ID = poly#ID
centroid = as.numeric(poly#labpt)
return(c(id=ID, c.long=centroid[1], c.lat=centroid[2]))
}
wmap <- readOGR(dsn=".", layer="ne_110m_admin_0_countries")
wyield <- read.csv(file = "AllWorldCountries_CCShocksGTAP.csv", header=TRUE)
wyield <- transform(wyield, ID_1 = substr(ID_1,3,10)) #strip leading "TR"
# wmap#data and wyield have common, unique field: name
wdata <- data.frame(id=rownames(wmap#data),name=wmap#data$name)
wdata <- merge(wdata,wyield, by="name")
labs <- do.call(rbind,lapply(1:17,get.centroids)) # extract polygon IDs and centroids from shapefile
wdata <- merge(wdata,labs,by="id")
wdata[c("c.lat","c.long")] <- sapply(wdata[c("c.lat","c.long")],function(x) as.numeric(as.character(x)))
wmap.df <- fortify(wmap) # data frame for world map
wmap.df <- merge(wmap.df,wdata,by="id") # merge data to fill polygons
palette <- brewer.pal(11,"Spectral") # ColorBrewewr.org spectral palette, 11 colors
ggmap <- ggplot(wmap.df, aes(x=long, y=lat, group=group))
ggmap <- ggmap + geom_polygon(aes(fill=AVG))
ggmap <- ggmap + geom_path(colour="grey50", size=.1)
ggmap <- ggmap + geom_text(aes(x=c.long, y=c.lat, label=ID_1),size=3)
ggmap <- ggmap + scale_fill_gradientn(name="% Change",colours=rev(palette))
ggmap <- ggmap + theme(plot.title=element_text(face="bold"),legend.position="left")
ggmap <- ggmap + coord_fixed()
ggmap <- ggmap + labs(x="",y="",title="Average Yield Impacts across SRES Scenarios (% Change)")
ggmap <- ggmap + theme(plot.margin=unit(c(0,0.03,0,0.05),units="npc"))
ggmap
box.df <- wdata[order(as.numeric(wdata$ID_1)),] # order by ID_1
box.df$label <- with(box.df, paste0(name_long," [",ID_1,"]")) # create labels for boxplot
box.df <- melt(box.df,id.vars="label",measure.vars=c("A1B","A1BLow","A1F","A1T","A2","B1","B1Low","B2"))
box.df$label <- factor(box.df$label,levels=unique(box.df$label)) # need this so orderin is maintained in ggplot
ggbox <- ggplot(box.df,aes(x=label, y=value))
ggbox <- ggbox + geom_boxplot(fill="grey", outlier.colour = "blue", outlier.shape = 16, outlier.size = 4)
ggbox <- ggbox + stat_summary(fun.y=mean, geom="point", shape=21, size= 4, color= "red")
ggbox <- ggbox + coord_flip()
ggbox <- ggbox + labs(x="", y="% Change", title="Distribution of Yield Impacts by GTAP region")
ggbox <- ggbox + theme(plot.title=element_text(face="bold"), axis.text=element_text(color="black"))
ggbox <- ggbox + theme(plot.margin=unit(c(0,0.03,0,0.0),units="npc"))
ggbox
grid.newpage()
pushViewport(viewport(layout=grid.layout(2,1,heights=c(0.40,0.60))))
print(ggmap, vp=viewport(layout.pos.row=1,layout.pos.col=1))
print(ggbox, vp=viewport(layout.pos.row=2,layout.pos.col=1))
Explanation:
The last 4 lines of code do most of the work in arranging the layout. I create a viewport layout with 2 viewports arranged as 2 rows in 1 column. The upper viewport is 40% of the height of the grid, the lower viewport is 60% of the height. Then, in the ggplot calls I create a right margin of 3% of the plot width for both the map and he boxplot, and a left margin for the map so that the map and the boxplot are aligned on the left. There's a fair amount of tweaking to get everything lined up, but these are the parameters to play with. You should also know that, since we use coord_fixed() in the map, if you change the overall size of the plot (by resizing the plot window, for example), the map's width will change..
Finally, your code to create the choropleth map is a little dicey...
## re-order the shapefile
wyield <- cbind(id=rownames(wmap#data),wyield)
This does not reorder the shapefile. All you are doing here is prepending the wmap#data rownames to your wyield data. This works if the rows in wyield are in the same order as the polygons in wmap - a very dangerous assumption. If they are not, then you will get a map, but the coloring will be incorrect and unless you study the output very carefully, it is likely to be missed. So the code above creates an association between polygon ID and region name, merges the wyield data based on name, and then merges that into wmp.df based on polygon id.
wdata <- data.frame(id=rownames(wmap#data),name=wmap#data$name)
wdata <- merge(wdata,wyield, by="name")
...
wmap.df <- fortify(wmap) # data frame for world map
wmap.df <- merge(wmap.df,wdata,by="id") # merge data to fill polygons