Here data and hypithesis:
set.seed(1234)
myd <- data.frame (X = rnorm (100), Y = rnorm (100, 10, 3))
just catorizing X and Y, sometime this may be different variable than X and Y
and is category itself
myd$xcat <- cut (myd$X, 10)
myd$ycat <- cut (myd$Y, 10)
I want to make nice plot like the following, where the catories are plotting as strip of heatmap plots
require(ggplot2)
ggplot(myd, aes(x=X, y=Y)) + geom_point(shape=1) + theme_bw()
Is this possible ggplot2 or other packages or need a specialized solution?
One way to achieve this is to make three separate plots with ggplot2 and then use viewport() and grid.layout() to arrange them together.
First plot contains just middle part (scatter plot). px and py are heatmaps (made with geom_tile()) for the x and y axis. Most important part is to use the same theme() settings in plots (just change x to y). Used color="white" for some elements to ensure that there is a place for that element (to have correct dimensions) but they are not visible on plot.
#Scatter plot without axis titles
p<-ggplot(myd, aes(x=X, y=Y)) + geom_point(shape=1) +
theme_bw() + theme(axis.title=element_blank())
#tile plot for the x axis
px<-ggplot(myd,aes(x=xcat,y=1,fill=xcat))+geom_tile()+
scale_x_discrete(expand=c(0,0))+
scale_fill_hue(h=c(0,180))+
scale_y_continuous(expand=c(0,0),breaks=1,labels="10")+
theme(legend.position="none",
axis.title=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_text(color="white"),
axis.ticks.y=element_line(color="white"))
#tile plot for the y axis
py<-ggplot(myd,aes(x=1,y=ycat,fill=ycat))+geom_tile()+
scale_y_discrete(expand=c(0,0))+
scale_x_continuous(expand=c(0,0),breaks=1,labels="1")+
scale_fill_hue(h=c(181,360))+
theme(legend.position="none",
axis.title=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
axis.text.x=element_text(color="white"),
axis.ticks.x=element_line(color="white"))
#Define layout for the plots (2 rows, 2 columns)
layt<-grid.layout(nrow=2,ncol=2,heights=c(7/8,1/8),widths=c(1/8,7/8),default.units=c('null','null'))
#View the layout of plots
grid.show.layout(layt)
#Draw plots one by one in their positions
grid.newpage()
pushViewport(viewport(layout=layt))
print(py,vp=viewport(layout.pos.row=1,layout.pos.col=1))
print(p,vp=viewport(layout.pos.row=1,layout.pos.col=2))
print(px,vp=viewport(layout.pos.row=2,layout.pos.col=2))
A solution in base plot:
brX <- seq(min(myd$X),max(myd$X),length=11)
brY <- seq(min(myd$Y),max(myd$Y),length=11)
layout(matrix(c(1,0,2,3),nrow=2),width=c(2,8),height=c(8,2))
par(mar=c(0,3,5,0))
plot(NA,ylim=range(myd$Y),xlim=c(0,1),axes=F,ann=F,xaxs="i")
rect(0,brY[-length(brY)],1,brY[-1],
col=colorRampPalette(c("red","yellow","green"))(length(brY)-1))
par(mar=c(0,0,5,5))
plot(NA,xlim=range(myd$X),ylim=range(myd$Y),ann=F,xaxt="n",yaxt="n")
abline(h=pretty(myd$Y),v=pretty(myd$X), col="grey95")
points(myd$X,myd$Y,pch=21)
axis(3)
axis(4)
par(mar=c(3,0,0,5))
plot(NA,xlim=range(myd$X),ylim=c(0,1),axes=F,ann=F,yaxs="i")
rect(brX[-length(brX)],0,brX[-1],1,
col=colorRampPalette(c("blue","white","red"))(length(brX)-1))
Related
I am creating dendrograms using ggdendro and coloring them according to cutpoints in the branches. I'm using the approach provided by #jlhoward in this question (Colorize Clusters in Dendogram with ggplot2) but I run into problems when my leaf labels are very long.
Here is some example code:
df <- USArrests
labs <- paste("veryverylongtitlename",1:50,sep="")
rownames(df) <- labs
library(ggplot2)
library(ggdendro)
hc <- hclust(dist(df), "ave") # heirarchal clustering
dendr <- dendro_data(hc, type="rectangle") # convert for ggplot
clust <- cutree(hc,k=2) # find 2 clusters
clust.df <- data.frame(label=names(clust), cluster=factor(clust))
# dendr[["labels"]] has the labels, merge with clust.df based on
label column
dendr[["labels"]] <- merge(dendr[["labels"]],clust.df, by="label")
# plot the dendrogram; note use of color=cluster in geom_text(...)
ggplot() +
geom_segment(data=segment(dendr), aes(x=x, y=y, xend=xend,
yend=yend)) +
geom_text(data=label(dendr), aes(x, y, label=label, hjust=0, color=cluster),
size=3) +
coord_flip() + scale_y_reverse(expand=c(0.2, 0)) +
theme(axis.line.y=element_blank(),
axis.ticks.y=element_blank(),
axis.text.y=element_blank(),
axis.title.y=element_blank(),
panel.background=element_rect(fill="white"),
panel.grid=element_blank())
As you can see, the labels here get cut off. I found this answer (decrease size of dendogram (or y-axis) ggplot), but I don't want to use it because I very much like the ability to use cutree to define my clusters. How can I manipulate the above code to fit the long labels? Many thanks!
I have a barplot that I have overlaid with a scatterplot using stripchart.
Barplot(data1means)
stripchart(data1, add=TRUE, vertical = TRUE)
However, the points on the scatterplot are misaligned with the bars on the barplot, as shown here:
.
So how do I alter the spacing of the scatter plot so they match up? As I understand it, stripchart doesn't have a space or width variable like barplot does.
With base graphics, you can plot points on top of the bar plot using the points function. We get the x-positions of the bars from the bar plot itself. I've also included an alternative approach where the means are plotted with point markers rather than bars:
# Fake data
set.seed(1)
dat = data.frame(group=LETTERS[1:5], y=rnorm(25,20,2))
# Assign the barplot to x so that x will contain the bar positions.
x = barplot(tapply(dat$y, dat$group, FUN=mean), ylim=c(0,1.05*max(dat$y)), col=hcl(240,100, 70))
points(rep(x, table(dat$group)), dat$y[order(dat$group)], pch=21, bg="red")
plot(rep(1:length(unique(dat$group)), table(dat$group)),
dat$y[order(dat$group)], pch=21, bg="blue",
ylim=c(0,1.05*max(dat$y)), xlim=c(0.5,5.5), xaxt="n")
points(1:length(unique(dat$group)),
tapply(dat$y, dat$group, FUN=mean),
pch="\U2013", cex=3, col="red")
axis(side=1, at=1:5, labels=LETTERS[1:5])
Here's a version of the same two plots using ggplot2.
library(ggplot2)
ggplot(dat, aes(group, y)) +
stat_summary(fun.y=mean, geom="bar", fill=hcl(240,100,50)) +
geom_point() +
theme_minimal()
ggplot(dat, aes(group, y)) +
geom_point() +
stat_summary(fun.y=mean, geom="point", pch="\U2013",
size=8, colour="red") +
scale_y_continuous(limits=c(0, max(dat$y))) +
theme_bw()
I'm trying to construct a 5 x 6 matrix of plots in R using ggplot2 and gridExtra. For simplicity, I can show my issue with a 2 x 2 matrix and some fake data.
#Load libraries
library(ggplot2); library(gridExtra)
#Data
data = rbind(data.frame(x=rnorm(100,0,1),ALP='A',NUM=1),data.frame(x=rnorm(100,20000,1000),ALP='A',NUM=2),data.frame(x=rnorm(100,100,10),ALP='B',NUM=1),data.frame(x=rnorm(5000,1000),ALP='B',NUM=2))
#Ggplot2 facet_grid
ggplot(data,aes(x=x,y=..scaled..,fill='red')) + geom_density() + facet_grid(ALP~NUM,scales='free') + guides(fill=FALSE)
The result doesn't look good, as the x-scale is so different across the faceting labels. I tried to do it manually with gridExtra.
#Assemble grobs
plt1 = ggplot(subset(data,ALP=='A'&NUM==1),aes(x=x,y=..scaled..,fill=ALP)) + geom_density() + facet_grid(.~NUM,scales='free') + guides(fill=FALSE) + theme(axis.title.x=element_blank(),axis.title.y=element_blank())
plt2 = ggplot(subset(data,ALP=='A'&NUM==2),aes(x=x,y=..scaled..,fill=ALP)) + geom_density() + facet_grid(ALP~NUM,scales='free') + guides(fill=FALSE) + theme(axis.text.y=element_blank(),axis.ticks.y=element_blank(),axis.title.y=element_blank(),axis.title.x=element_blank())
plt3 = ggplot(subset(data,ALP=='B'&NUM==1),aes(x=x,y=..scaled..,fill=ALP)) + geom_density() + guides(fill=FALSE) + theme(axis.title.x=element_blank(),axis.title.y=element_blank())
plt4 = ggplot(subset(data,ALP=='B'&NUM==2),aes(x=x,y=..scaled..,fill=ALP)) + geom_density() + facet_grid(ALP~.,scales='free') + guides(fill=FALSE) + theme(axis.text.y=element_blank(),axis.ticks.y=element_blank(),axis.title.y=element_blank(),axis.title.x=element_blank())
#Plot it out
grid.arrange(plt1,plt2,plt3,plt4,nrow=2,ncol=2,left=textGrob("scaled",rot=90,vjust=1),bottom=textGrob("x"))
I'm almost there, unfortunately the plotting panel (x,y) in the upper, right-hand corner is smaller than all the rest. Similarly, the plotting panel (x,y) in the lower, left-hand corner is bigger than all the rest. I would like all of the plotting panels (x,y) to be the same height/width. I found some code using gtable, but it only seems to work consistently when the grobs don't have facet labels. The effect is even more exaggerated when the number of rows/columns increases.
as an alternative to facetting, you could work with gtable,
plt <- lapply(list(plt1,plt2, plt3,plt4), ggplotGrob)
left <- rbind(plt[[1]], plt[[3]])
right <- rbind(plt[[2]], plt[[4]])
all <- cbind(left, right)
grid.newpage()
grid.draw(all)
the panel sizes should all be equal (1null) with this layout.
See plot here:
(from here)
How do I reproduce both the upper and lower portion of the barplot using ggplot2?
For example, I can produce the upper portion with
ggplot(data.frame(x=rnorm(1000, 5)), aes(x=x)) + geom_bar() + scale_y_reverse()
However now if I add any other geom_, such as another geom_bar() the scale for y is reversed. Is it possible to apply the scale_y_reverse() to only a specific geom_?
Another option is to make two separate plots and combine them with arrangeGrob from the gridExtra package. After playing with the plot margins, you can arrive at something that looks decent.
library(gridExtra)
library(ggplot2)
set.seed(100)
p2 <- ggplot(data.frame(x=rnorm(1000, 5)), aes(x=x)) + geom_bar() + theme(plot.margin=unit(c(0,0,0,0), 'lines'))
p1 <- p2 + scale_y_reverse() +
theme(plot.margin=unit(c(0, 0, -.8, 0), 'lines'), axis.title.x=element_blank(),
axis.text.x=element_blank(), axis.ticks.x=element_blank())
p <- arrangeGrob(p1, p2)
print(p)
ggplot only like to have one y-axis scale. The easiest thing would be to basically reshape your data yourself. Here we can use geom_rect to draw the data where ever we like and we can condition it on group time. Here's an example
#sample data
dd<-data.frame(
year=rep(2000:2014, 2),
group=rep(letters[1:2], each=15),
count=rpois(30, 20)
)
And now we can plot it. But first, let's define the offset to the top bars by finding the maxima height at a year and adding a bit of space
height <- ceiling(max(tapply(dd$count, dd$year, sum))*1.10)
And here's how we plot
ggplot(dd) +
geom_rect(aes(xmin=year-.4, xmax=year+.4,
ymin=ifelse(group=="a", 0, height-count),
ymax=ifelse(group=="a", count, height), fill=group)) +
scale_y_continuous(expand=c(0,0))
And that will give us
I have data that plots over time with four different variables. I would like to combine them in one plot using facet_grid, where each variable gets its own sub-plot. The following code resembles my data and the way I'm presenting it:
require(ggplot2)
require(reshape2)
subm <- melt(economics, id='date', c('psavert','uempmed','unemploy'))
mcsm <- melt(data.frame(date=economics$date, q=quarters(economics$date)), id='date')
mcsm$value <- factor(mcsm$value)
ggplot(subm, aes(date, value, col=variable, group=1)) + geom_line() +
facet_grid(variable~., scale='free_y') +
geom_step(data=mcsm, aes(date, value)) +
scale_y_discrete(breaks=levels(mcsm$value))
If I leave out scale_y_discrete, R complains that I'm trying to combine discrete value with continuous scale. If I include scale_y_discreate my continuous series miss their scale.
Is there any neat way of solving this issue ie. getting all scales correct ? I also see that the legend is alphabetically sorted, can I change that so the legend is ordered in the same order as the sub-plots ?
Problem with your data is that that for data frame subm value is numeric (continuous) but for the mcsm value is factor (discrete). You can't use the same scale for numeric and continuous values and you get y values only for the last facet (discrete). Also it is not possible to use two scale_y...() functions in one plot.
My approach would be to make mcsm value as numeric (saved as value2) and then use them - it will plot quarters as 1,2,3 and 4. To solve the problem with legend, use scale_color_discrete() and provide breaks= in order you need.
mcsm$value2<-as.numeric(mcsm$value)
ggplot(subm, aes(date, value, col=variable, group=1)) + geom_line()+
facet_grid(variable~., scale='free_y') + geom_step(data=mcsm, aes(date, value2)) +
scale_color_discrete(breaks=c('psavert','uempmed','unemploy','q'))
UPDATE - solution using grobs
Another approach is to use grobs and library gridExtra to plot your data as separate plots.
First, save plot with all legends and data (code as above) as object p. Then with functions ggplot_build() and ggplot_gtable() save plot as grob object gp. Extract from gp only part that plots legend (saved as object gp.leg) - in this case is list element number 17.
library(gridExtra)
p<-ggplot(subm, aes(date, value, col=variable, group=1)) + geom_line()+
facet_grid(variable~., scale='free_y') + geom_step(data=mcsm, aes(date, value2)) +
scale_color_discrete(breaks=c('psavert','uempmed','unemploy','q'))
gp<-ggplot_gtable(ggplot_build(p))
gp.leg<-gp$grobs[[17]]
Make two new plot p1 and p2 - first plots data of subm and second only data of mcsm. Use scale_color_manual() to set colors the same as used for plot p. For the first plot remove x axis title, texts and ticks and with plot.margin= set lower margin to negative number. For the second plot change upper margin to negative number. faced_grid() should be used for both plots to get faceted look.
p1 <- ggplot(subm, aes(date, value, col=variable, group=1)) + geom_line()+
facet_grid(variable~., scale='free_y')+
theme(plot.margin = unit(c(0.5,0.5,-0.25,0.5), "lines"),
axis.text.x=element_blank(),
axis.title.x=element_blank(),
axis.ticks.x=element_blank())+
scale_color_manual(values=c("#F8766D","#00BFC4","#C77CFF"),guide="none")
p2 <- ggplot(data=mcsm, aes(date, value,group=1,col=variable)) + geom_step() +
facet_grid(variable~., scale='free_y')+
theme(plot.margin = unit(c(-0.25,0.5,0.5,0.5), "lines"))+ylab("")+
scale_color_manual(values="#7CAE00",guide="none")
Save both plots p1 and p2 as grob objects and then set for both plots the same widths.
gp1 <- ggplot_gtable(ggplot_build(p1))
gp2 <- ggplot_gtable(ggplot_build(p2))
maxWidth = grid::unit.pmax(gp1$widths[2:3],gp2$widths[2:3])
gp1$widths[2:3] <- as.list(maxWidth)
gp2$widths[2:3] <- as.list(maxWidth)
With functions grid.arrange() and arrangeGrob() arrange both plots and legend in one plot.
grid.arrange(arrangeGrob(arrangeGrob(gp1,gp2,heights=c(3/4,1/4),ncol=1),
gp.leg,widths=c(7/8,1/8),ncol=2))