ggplot: (manually) create legend when referencing different data.frames - r

Ok, lets take some sample data.
A <- sample(1:100, 25)
B <- sample(1:25, 25)
df.1 <- data.frame(A,B)
C <- sample(1:80, 15)
D <- sample(1:15, 15)
df.2 <- data.frame(C,D)
Then we plot the data using ggplot
library(ggplot2)
(plot2 <- ggplot(NULL) +
geom_point(data=df.1, aes(x=A, y=B),
color='black', cex=1, pch=16 ) +
geom_smooth(data=df.1, aes(x=A, y=B), method="lm", size=1,
se=FALSE, colour="black", linetype=2)+
geom_point(data=df.2, aes(x=C, y=D),
color='black', cex=1, pch=15 ) +
geom_smooth(data=df.2, aes(x=C, y=D), method="lm", size=1,
se=FALSE, colour="black", linetype=1)+
scale_y_continuous("Y scale") +
ggtitle("Plot") +
theme_bw()+
theme(plot.title = element_text(face="bold", size=20),
axis.title.x = element_text(vjust=-0.25),
axis.title.y = element_text(vjust=1),
axis.title = element_text(face="bold", size=15)
)
)
So we have created and modified the title, axis, etc.
But I want to create a legend which shows the linetype's from the geom_smooth() function of df.1 and df.2. It should be in the top right of the graph.
(so for df.1 we want a solid line and df.2 a dashed line)
The example here walks you through an example, but the data comes from within the same data set

Here you go:
#combine and create x and y (as mappings follow
#same pattern)
df.1$group <- "df.1"
df.1$x <- df.1$A
df.1$y <- df.1$B
df.2$group <- "df.2"
df.2$x <- df.2$C
df.2$y <- df.2$D
library(plyr) #for rbind.fill
df.all <- rbind.fill(df.1,df.2)
plot3 <- ggplot(df.all, aes(x=x,y=y,group=group)) +
geom_point(color='black', cex=1, pch=16 ) +
geom_smooth(aes(linetype=group),method="lm", size=1,
se=FALSE, colour="black") +
scale_y_continuous("Y scale") +
ggtitle("Plot") +
theme_bw()+
theme(plot.title = element_text(face="bold", size=20),
axis.title.x = element_text(vjust=-0.25),
axis.title.y = element_text(vjust=1),
axis.title = element_text(face="bold", size=15)
) +
#add custom linetypes (not necessary now, as default mapping to 1 and 2)
plot3 + scale_linetype_manual(values=c("df.1"=1,"df.2"=2))

Related

Add a specific color to each tile and a legend in geom_tile

I want to add two things to the following figure:
add a legend (zp or df$legend) (which is similar to each block) to the geom_tile figure
assign a specific color to each tile (which repeats for every block) instead of scale_fill_continuous but keep the "white" color for NA values.
library(ggplot2)
library(RColorBrewer)
xp <- c('Disease_1','Disease_2','Disease_3','Disease_4')
yp <- c('B Cell','T Cell')
zp <- c('feature1','feature2','feature3','feature4','feature5','feature6','feature7','feature8')
xp1 <- xp[1:2]
df <- list(x=1:2,y=1:4,xp1,yp)
df <- expand.grid(df)
df$z <- c(1.804344554,1.158037086,1.686173307,0.500280283,1.710806067,0.857513435,0.66474755,1.164780941,1.769090931,2.058400169,3.114233859,1.436684123,1.770306398,0.995507604,2.538556363,2.264486118,1.424789875,1.816608927,2.773082903,1.197434618,0.829416784,1.622892741,2.035117094,1.650363345,1.927235048,1.546477438,2.308773122,1.041881013,1.216029616,0.478353441,0.834348006,1.240448774)
df$legend <- rep(c('feature1','feature2','feature3','feature4','feature5','feature6','feature7','feature8'))
cols <- rev(brewer.pal(11, 'RdYlBu'))
p <- ggplot(df)
p <- p + geom_tile(aes(x, y, fill = ifelse(z > 1, z, NA)),
colour = "black", width=0.85, height=0.85, size=1) +
scale_fill_continuous(name = "z Scale", na.value = 'white') +
labs() +
facet_grid(Var3~Var4) +
theme(strip.background =element_blank(), #remove bg color of facet_grid texts
strip.text.x = element_text(size=12, color="black", face="bold"),
strip.text.y = element_text(size=10, color="black", face="bold"))
print(p)
p + coord_fixed() +
theme(
axis.title = element_blank(),
axis.text=element_blank(),
axis.ticks=element_blank(),
plot.background=element_blank(),
panel.border=element_blank(),
panel.background = element_blank())
Thanks in advance.

Truncated axis lines in ggplot2

I would like to use ggplot() to produce a graph with truncated axis lines similar to what is seen in the lower left corner in the plot below (created with base R graphics).
I guess that I need to set the axis.line argument in my ggplot theme to something other than element_line(), but I have no idea to what. Here's the code I currently have
library("ggplot2")
library("ggrepel")
tab <- data.frame(
Average=rnorm(40, mean=12, sd=3),
SD =rnorm(40, mean=12, sd=3),
names = LETTERS[1:40],
class_num = sample(1:4, size=40, replace=TRUE)
)
ggplot(data=tab,
aes(x=Average, y=SD, label=rownames(tab))) +
stat_smooth(method="lm", size=1, se=FALSE, col="black") +
geom_point(aes(col=factor(class_num), shape=factor(class_num)), size=2) +
geom_text_repel(size=3) +
xlab(expression("Seasonal average" ~ widehat(ETI)[m])) +
ylab(expression("Seasonal SD of" ~ widehat(ETI)[m])) +
scale_x_continuous(breaks =seq(9.2, 11.8, length.out=5)) +
scale_shape_manual(values=c(15, 16, 17, 18)) +
scale_color_manual(values=c("#CC0000", "darkgreen", "#0000CC", "#000000")) +
theme_classic() +
theme(legend.position="none",
axis.ticks.length = unit(.25, "cm"),
axis.line = element_line())
This produces the following plot where the x-axis and y axis lines are connected in the lower left-hand corner.
How about this, it makes use of the coord_capped_cart() function from the lemon package. There's a nice discussion here.
library("ggplot2")
library("ggrepel")
library(lemon)
tab <- data.frame(
Average=rnorm(40, mean=12, sd=3),
SD =rnorm(40, mean=12, sd=3),
names = LETTERS[1:40],
class_num = sample(1:4, size=40, replace=TRUE)
)
ggplot(data=tab,
aes(x=Average, y=SD, label=rownames(tab))) +
stat_smooth(method="lm", size=1, se=FALSE, col="black") +
geom_point(aes(col=factor(class_num), shape=factor(class_num)), size=2) +
geom_text_repel(size=3) +
xlab(expression("Seasonal average" ~ widehat(ETI)[m])) +
ylab(expression("Seasonal SD of" ~ widehat(ETI)[m])) +
scale_x_continuous(breaks =seq(7, 17, length.out=5)) +
scale_y_continuous(breaks=seq(5,18, length.out=5)) +
scale_shape_manual(values=c(15, 16, 17, 18)) +
scale_color_manual(values=c("#CC0000", "darkgreen", "#0000CC", "#000000")) +
theme_classic() +
theme(legend.position="none",
axis.ticks.length = unit(.25, "cm"),
axis.line = element_line()) +
coord_capped_cart(bottom=capped_horizontal(),
left=capped_vertical(capped="both"))

Displaying a subset of features in a facet of a multi-layer plot

I'm trying to generate a multi-layered plot where the points in one layer gets displayed only in a fraction of the facets created using data from another layer. In the code below, the points in red are either x1 or x2 (just like the row labels of the facet).
library(ggplot2)
set.seed(1000)
#generate first df
df1 = data.frame(x=rep(rep(seq(2,8,2),4),4),
y=rep(rep(seq(2,8,2),each=4),4),
v1=rep(c("x1","x2"),each=32),
v2=rep(rep(c("t1","t2"),each=16),2),
v3=rbinom(64,1,0.5))
# generate second df
df2 = data.frame(x=runif(20)*10,
y=runif(20)*10,
v4=sample(c("x1","x2"),20,T))
# create theme
t1=theme(panel.grid.major = element_blank(), text = element_text(size=18),
panel.grid.minor = element_blank(), strip.background= element_blank(),
axis.title.x = element_blank(), axis.title.y = element_blank())
# plot
ggplot() +
geom_point(data=df1, aes(x=x, y=y, colour = factor(v3)), shape=15, size=5) +
scale_colour_manual(values = c(NA,"black")) + facet_grid(v1~v2) +
geom_point(data=df2, aes(x=x,y=y, shape=v4), colour="red", size=4) +
coord_equal(ratio=1) + xlim(0, 10) + ylim(0, 10) + t1
EDIT: The black squares are generated by manually setting the colour of df1$v3 = 1 to black and df1$v3 = 0 to NA. /EDIT
But what I actually want is to display only those points from df2 with df2$v4 = x1 in the first row of facets, and df2$v4 = x2 in the second row of facets (corresponding to the values of df1$v1 and the row labels of the facet).
I've done this by generating two separate graphs...
ggplot() +
geom_point(data=df1[df1$v1=="x1",], shape=15, size=5,
aes(x=x, y=y, colour = factor(v3)), ) +
scale_colour_manual(values = c(NA,"black")) + facet_grid(~v2) +
geom_point(data=df2[df2$v4=="x1",], aes(x=x,y=y), colour="red", size=4) +
coord_equal(ratio=1) + xlim(0, 10) + ylim(0, 10) + t1
ggplot() +
geom_point(data=df1[df1$v1=="x2",], shape=15, size=5,
aes(x=x, y=y, colour = factor(v3)), ) +
scale_colour_manual(values = c(NA,"black")) + facet_grid(~v2) +
geom_point(data=df2[df2$v4=="x2",], aes(x=x,y=y), colour="red", size=4) +
coord_equal(ratio=1) + xlim(0, 10) + ylim(0, 10) + t1
... but I'm curious if a single plot can be generated because with my actual data set I have several x's and it is time consuming to piece the graphs together.
does it help if we just rename df2$v4 or make a new column called df2$v1, for faceting purposes:
df2 <- dplyr::rename(df2, v1 = v4)
df2$v1 <- df2$v4
# either works
then ggplot will distribute the data points as you would like, with this:
ggplot() +
geom_point(data=df1, aes(x=x, y=y, colour = factor(v3)), shape=15, size=5) +
scale_colour_manual(values = c(NA,"black")) +
facet_grid(v1~v2) +
geom_point(data=df2, aes(x=x,y=y), colour="red", size=4) +
coord_equal(ratio=1) + xlim(0, 10) + ylim(0, 10) +
t1
not 100% sure I grasp your problem...

Perfect fit of ggplot2 plot in plot

I want to plot a restricted cubic spline as main plot and add a box-and-whisker plot to show the variation of the X variable. However, the lower hinge (x=42), the median (x=51), and the upper hinge(x=61) did not fit perfectly with the corresponding grid line of the main plot.
library(Hmisc)
library(rms)
library(ggplot2)
library(gridExtra)
data(pbc)
d <- pbc
rm(pbc)
d$status <- ifelse(d$status != 0, 1, 0)
dd = datadist(d)
options(datadist='dd')
f <- cph(Surv(time, status) ~ rcs(age, 4), data=d)
p <- Predict(f, fun=exp)
df <- data.frame(age=p$age, yhat=p$yhat, lower=p$lower, upper=p$upper)
### 1st PLOT: main plot
(g <- ggplot(data=df, aes(x=age, y=yhat)) + geom_line(size=1))
# CI
(g <- g + geom_ribbon(data=df, aes(ymin=lower, ymax=upper), alpha=0.5, linetype=0, fill='#FFC000'))
# white background
(g <- g + theme_bw())
# X-axis
(breaks <- round(boxplot.stats(p[,"age"])$stats))
(g <- g + scale_x_continuous(breaks=breaks, limits=range(p[,"age"]), labels=round(breaks)))
(g <- g + xlab("Age"))
# Y-Achse
(g <- g + ylab("Hazard Ratio"))
# size and color of axis
(g <- g + theme(axis.line = element_line(color='black', size=1)))
(g <- g + theme(axis.ticks = element_line(color='black', size=1)))
(g <- g + theme( plot.background = element_blank() ))
#(g <- g + theme( panel.grid.major = element_blank() ))
(g <- g + theme( panel.grid.minor = element_blank() ))
(g <- g + theme( panel.border = element_blank() ))
### 2nd PLOT: box whisker plot
describe(df$age, digits=0)
round(range(df$age))
(gg <- ggplot(data=df, aes(x=1, y=age)) + geom_boxplot(outlier.shape=NA, size=1) + coord_flip())
(gg <- gg + theme( axis.line=element_blank() )) #
(gg <- gg + theme( axis.text.x=element_blank() ))
(gg <- gg + theme( axis.text.y=element_blank() ))
(gg <- gg + theme( axis.ticks=element_blank() ))
(gg <- gg + theme( axis.title.x=element_blank() ))
(gg <- gg + theme( axis.title.y=element_blank() ))
(gg <- gg + theme( panel.background=element_blank() ))
(gg <- gg + theme( panel.border=element_blank() )) #
(gg <- gg + theme( legend.position="none" )) #
(gg <- gg + theme( panel.grid.major=element_blank() )) #
(gg <- gg + theme( panel.grid.minor=element_blank() ))
(gg <- gg + theme( plot.background=element_blank() ))
(gg <- gg + theme( plot.margin = unit( c(0,0,0,0), "in" ) ))
(gg <- gg + scale_x_continuous(breaks=c(70,77,84), expand=c(0,0)) )
### FINAL PLOT: put box whisker plot in main plot
(final.gg <- g + annotation_custom(ggplotGrob(gg), ymin=2.4, ymax=2.6))
What do I have to change for the perfect fit?
Is there a better for a automatized alignment of the y-position of the
box-and-whisker?
UPDATE #1
Thanks for your answer! Below you can see my example with your code. However, as you can see, the lower hinge, the median, and the upper hinge still do not fit. What is going wrong?
library(Hmisc)
library(rms)
library(ggplot2)
library(gridExtra)
data(pbc)
d <- pbc
rm(pbc, pbcseq)
d$status <- ifelse(d$status != 0, 1, 0)
dd = datadist(d)
options(datadist='dd')
f <- cph(Surv(time, status) ~ rcs(age, 4), data=d)
p <- Predict(f, fun=exp)
df <- data.frame(age=p$age, yhat=p$yhat, lower=p$lower, upper=p$upper)
### 1st PLOT: main plot
(breaks <- boxplot.stats(p[,"age"])$stats)
g <- ggplot(data=df, aes(x=age, y=yhat)) + geom_line(size=1) +
geom_ribbon(data=df, aes(ymin=lower, ymax=upper), alpha=0.5, linetype=0, fill='#FFC000') +
theme_bw() +
scale_x_continuous(breaks=breaks) +
xlab("Age") +
ylab("Hazard Ratio") +
theme(axis.line = element_line(color='black', size=1),
axis.ticks = element_line(color='black', size=1),
plot.background = element_blank(),
# panel.border = element_blank(),
panel.grid.minor = element_blank())
### 2nd PLOT: box whisker plot
gg <- ggplot(data=df, aes(x=1, y=age)) +
geom_boxplot(outlier.shape=NA, size=1) +
scale_y_continuous(breaks=breaks) +
ylab(NULL) +
coord_flip() +
# theme_bw() +
theme(axis.line=element_blank(),
# axis.text.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
axis.title=element_blank(),
# panel.background=element_blank(),
panel.border=element_blank(),
# panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
# plot.background=element_blank(),
plot.margin = unit( c(0,0,0,0), "in" ),
axis.ticks.margin = unit(0, "lines"),
axis.ticks.length = unit(0, "cm"))
### FINAL PLOT: put box whisker plot in main plot
(final.gg <- g + annotation_custom(ggplotGrob(gg), ymin=2.4, ymax=2.6))
Minor edit: Updating to ggplot2 2.0.0
axis.ticks.margin is deprecated
In the boxplot, even though you have set various elements to element_blank and margins to zero, default spaces remain resulting in the misalignment. These spaces belong to:
axis.ticks.length
xlab
In the code below, I've re-arranged your code somewhat (I hope that's okay), and commented out some lines of code so that it can be seen that the two plots do align. I've also set the breaks in the two plot to un-rounded breaks (min and max values, hinges, and the median).
# X-axis
(breaks <- boxplot.stats(p[,"age"])$stats)
### 1st PLOT: main plot
g <- ggplot(data=df, aes(x=age, y=yhat)) + geom_line(size=1) +
geom_ribbon(data=df, aes(ymin=lower, ymax=upper), alpha=0.5, linetype=0, fill='#FFC000') +
theme_bw() +
scale_x_continuous(breaks=breaks) +
xlab("Age") +
ylab("Hazard Ratio") +
theme(axis.line = element_line(color='black', size=1),
axis.ticks = element_line(color='black', size=1),
plot.background = element_blank(),
# panel.border = element_blank(),
panel.grid.minor = element_blank())
### 2nd PLOT: box whisker plot
gg <- ggplot(data=df, aes(x=1, y=age)) +
geom_boxplot(outlier.shape=NA, size=1) +
scale_y_continuous(breaks=breaks) +
xlab(NULL) +
coord_flip() +
# theme_bw() +
theme(axis.line=element_blank(),
# axis.text.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
axis.title=element_blank(),
# panel.background=element_blank(),
panel.border=element_blank(),
# panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
# plot.background=element_blank(),
plot.margin = unit( c(0,0,0,0), "in" ),
# axis.ticks.margin = unit(0, "lines"),
axis.ticks.length = unit(0, "cm"))
### FINAL PLOT: put box whisker plot in main plot
(final.gg <- g + annotation_custom(ggplotGrob(gg), ymin=2.4, ymax=2.6))
You should note that ggplot's method for calculating the hinges differs slightly from the method used by boxplot.stats.
# ggplot's hinges
bp = ggplot(data=df, aes(x=1, y=age)) +
geom_boxplot(outlier.shape=NA, size=1)
bpData = ggplot_build(bp)
bpData$data[[1]][1:5]
Alternative answer using gtable functions to position the boxplot outside the main plot. The height of the box plot can be adjusted using the "h" parameter
library(rms)
# Data
data(pbc)
d <- pbc
rm(pbc)
d$status <- ifelse(d$status != 0, 1, 0)
dd = datadist(d)
options(datadist='dd')
f <- cph(Surv(time, status) ~ rcs(age, 4), data=d)
p <- Predict(f, fun=exp)
df <- data.frame(age=p$age, yhat=p$yhat, lower=p$lower, upper=p$upper)
# X-axis
breaks <- boxplot.stats(p[,"age"])$stats
# Main plot
MP <- ggplot(data=df, aes(x=age, y=yhat)) + geom_line(size=1) +
geom_ribbon(data=df, aes(ymin=lower, ymax=upper), alpha=0.5, linetype=0, fill='#FFC000') +
theme_bw() +
scale_x_continuous(breaks=breaks) +
xlab("Age") +
ylab("Hazard Ratio") +
theme(axis.line = element_line(color='black', size=1),
axis.ticks = element_line(color='black', size=1),
panel.grid.minor = element_blank())
# Boxplot
BP <- ggplot(data=df, aes(x=factor(1), y=age)) +
geom_boxplot(width = 1, outlier.shape=NA, size=1) +
geom_jitter(position = position_jitter(width = .3), size = 1) +
scale_y_continuous(breaks=breaks) +
coord_flip() +
theme_bw() +
theme(panel.border=element_blank(),
panel.grid=element_blank())
#### Set up the grobs and gtables here
library(gtable)
library(grid)
h = 1/15 # height of boxplot panel relative to main plot panel
# Get ggplot grobs
gMP = ggplotGrob(MP)
BPg = ggplotGrob(BP)
BPg = gtable_filter(BPg, "panel") # from the boxplot, extract the panel only
# In the main plot, get position of panel in the layout
pos = gMP$layout[gMP$layout$name == "panel", c('t', 'l')]
# In main plot, set height for boxplot
gMP$heights[pos$t-2] = unit(h, "null")
# Add boxplot to main plot
gMP = gtable_add_grob(gMP, BPg, t=pos$t-2, l=pos$l)
# Add small space
gMP$heights[pos$t-1] = unit(5, "pt")
# Draw it
grid.newpage()
grid.draw(gMP)

fill points based on date ranges

I am trying to expand a previous answer on filling a histogram based on date cuts to coloring points based on the same cuts.
library(ggplot2)
library(lubridate)
library(scales)
# random dates
# https://stackoverflow.com/questions/14720983/efficiently-generate-a-random-sample-of-times-and-dates-between-two-dates
randdate <- function(N, st="2012/01/01", et="2012/12/31") {
st <- as.POSIXct(as.Date(st))
et <- as.POSIXct(as.Date(et))
dt <- as.numeric(difftime(et,st,unit="sec"))
ev <- sort(runif(N, 0, dt))
rt <- st + ev
}
set.seed(42)
dat <- data.frame(y=sample(c(0:50), 1000, replace=TRUE),
date=randdate(1000))
dat$date <- ymd(substr(dat$date, 1, 10))
ggplot(dat,
aes(x=date, y=y,
fill=cut(..x..,
breaks=c(min(..x..), as.POSIXct("2012-03-01"),
as.POSIXct("2012-04-28"), max(..x..)),
labels=c("before","during","after"),
include.lowest=TRUE))) +
geom_point() +
scale_x_datetime(labels=date_format("%m-%Y"),
breaks=date_breaks("1 year")) +
scale_fill_manual(values=c("#E69F00", "#56B4E9", "#009E73"))
Also an attempt based on this answer, but with fewer "cuts".
ggplot(dat) +
geom_point(aes(x=date, y=y,
colour=x > as.POSIXct("2012-03-01"))) +
geom_line(colour="#999999", size=1) +
scale_colour_manual(values=c("#56B4E9", "#009E73")) +
ylab("Count") +
theme_bw() +
theme(axis.title.x = element_blank(),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
axis.line = element_line(color = 'black'),
title=element_text(size=9, face="bold"),
legend.position="none")
First, geom_point uses the color aesthetic, not fill. Next, I find that it's makes more sense (to me) to assign the factored colors to the data frame vs try to make the ggplot calls more complex:
dat$col <- "during"
dat$col <- ifelse(dat$date < as.POSIXct("2012-03-01"), "before", dat$col)
dat$col <- ifelse(dat$date > as.POSIXct("2012-04-28"), "after", dat$col)
dat$col <- factor(dat$col, c("before", "during", "after"), ordered=TRUE)
ggplot(dat, aes(x=date, y=y, color=col)) +
geom_point() +
scale_x_datetime(labels=date_format("%m-%Y"),
breaks=date_breaks("1 year")) +
scale_color_manual(values=c("#E69F00", "#56B4E9", "#009E73"))

Resources