Graphing a large number of plots - r

I'm fitting a dose-response curve to many data sets that I want to plot to a single file.
Here's how one data set looks like:
df <- data.frame(dose=c(10,0.625,2.5,0.156,0.0391,0.00244,0.00977,0.00061,10,0.625,2.5,0.156,0.0391,0.00244,0.00977,0.00061,10,0.625,2.5,0.156,0.0391,0.00244,0.00977,0.00061),viability=c(6.12,105,57.9,81.9,86.5,98.3,96.4,81.8,27.3,85.2,80.8,92,82.5,110,90.2,76.6,11.9,89,35.4,79,95.8,117,82.1,95.1),stringsAsFactors=F)
Here's the dose-response fit:
library(drc)
fit <- drm(viability~dose,data=df,fct=LL.4(names=c("Slope","Lower Limit","Upper Limit","ED50")))
Now I'm predicting values in order to plot the curve:
pred.df <- expand.grid(dose=exp(seq(log(max(df$dose)),log(min(df$dose)),length=100)))
pred <- predict(fit,newdata=pred.df,interval="confidence")
pred.df$viability <- pred[,1]
pred.df$viability.low <- pred[,2]
pred.df$viability.high <- pred[,3]
And this is how a single plot looks like:
library(ggplot2)
p <- ggplot(df,aes(x=dose,y=viability))+geom_point()+geom_ribbon(data=pred.df,aes(x=dose,y=viability,ymin=viability.low,ymax=viability.high),alpha=0.2)+labs(y="viability")+
geom_line(data=pred.df,aes(x=dose,y=viability))+coord_trans(x="log")+theme_bw()+scale_x_continuous(name="dose",breaks=sort(unique(df$dose)),labels=format(signif(sort(unique(df$dose)),3),scientific=T))+ggtitle(label="all doses")
adding a few parameter estimates to the plot:
params <- signif(summary(fit)$coefficient[-1,1],3)
names(params) <- c("lower","upper","ed50")
p <- p + annotate("text",size=3,hjust=0,x=2.4e-3,y=5,label=paste(sapply(1:length(params),function(p) paste0(names(params)[p],"=",params[p])),collapse="\n"),colour="black")
Which gives:
Now suppose I have 20 of these that I want to cram in a single figure file.
I thought that a reasonable solution would be to use grid.arrange:
As an example I'll loop 20 times on this example data set:
plot.list <- vector(mode="list",20)
for(i in 1:20){
plot.list[[i]] <- ggplot(df,aes(x=dose,y=viability))+geom_point()+geom_ribbon(data=pred.df,aes(x=dose,y=viability,ymin=viability.low,ymax=viability.high),alpha=0.2)+labs(y="viability")+
geom_line(data=pred.df,aes(x=dose,y=viability))+coord_trans(x="log")+theme_bw()+scale_x_continuous(name="dose",breaks=sort(unique(df$dose)),labels=format(signif(sort(unique(df$dose)),3),scientific=T))+ggtitle(label="all doses")+
annotate("text",size=3,hjust=0,x=2.4e-3,y=5,label=paste(sapply(1:length(params),function(p) paste0(names(params)[p],"=",params[p])),collapse="\n"),colour="black")
}
And then plot using:
library(grid)
library(gridExtra)
grid.arrange(grobs=plot.list,ncol=3,nrow=ceiling(length(plot.list)/3))
Which is obviously poorly scaled. So my question is how to create this figure with better scaling - meaning that all objects are compressed proportionally in way that produces a figure that is still visually interperable.

You should set the device size so that the plots remain readable, e.g.
pl = replicate(11, qplot(1,1), simplify = FALSE)
g = arrangeGrob(grobs = pl, ncol=3)
ggsave("plots.pdf", g, width=15, height=20)

Related

Combine quantile and continuous models in plot (cph, rms)?

I would like to combine continuous and quantile models in the same plot to compare and contrast the two approaches (xtile is a function that returns the quantile as factor):
q.s <- cph(inc ~ rcs(exposure,3), data=data)
q.q <- cph(inc ~ xtile(exposure,3), data=data)
p.s <- Predict(q.s, exposure, fun=exp)
p.q <- Predict(q.q, exposure, fun=exp)
ggplot.Predict gives a nice plot of either model - but I would like to combine both. Is this possible?
I have added an example - which I hope might illustrate what I would like to generate.
enter image description here
Try this where plot1/2 are your quantile and continuous plots (can change heights, widths, number of columns, number of rows):
p1 <- ggplot.Predict(..plot1 options here...)
p2 <- ggplot.Predict(..plot2 options here...)
library(gridExtra)
grid.arrange(p1, p2,
ncol=2, nrow=1, widths=c(2,2), heights=c(2))
For more information, check out the gridExtra package.

How to use ggplot to plot T-SNE clustering

Here is the t-SNE code using IRIS data:
library(Rtsne)
iris_unique <- unique(iris) # Remove duplicates
iris_matrix <- as.matrix(iris_unique[,1:4])
set.seed(42) # Set a seed if you want reproducible results
tsne_out <- Rtsne(iris_matrix) # Run TSNE
# Show the objects in the 2D tsne representation
plot(tsne_out$Y,col=iris_unique$Species)
Which produces this plot:
How can I use GGPLOT to make that figure?
I think the easiest/cleanest ggplot way would be to store all the info you need in a data.frame and then plot it. From your code pasted above, this should work:
library(ggplot2)
tsne_plot <- data.frame(x = tsne_out$Y[,1], y = tsne_out$Y[,2], col = iris_unique$Species)
ggplot(tsne_plot) + geom_point(aes(x=x, y=y, color=col))
My plot using the regular plot function is:
plot(tsne_out$Y,col=iris_unique$Species)

loop lm predictions for plotting multiple lines

I want to plot linear-model-lines for each ID.
How can I create predictions for multiple lms (or glms) using sequences of different length? I tried:
#some fake data
res<-runif(60,1,20)
var<-runif(60,10,50)
ID<-rep(c("A","B","B","C","C","C"),10)
data<- data.frame(ID,res,var)
#lm
library(data.table)
dt <- data.table(data,key="ID")
fits <- lapply(unique(data$ID),
function(z)lm(res~var, data=dt[J(z),], y=T))
#sequence for each ID of length var(ID)
mins<-matrix(with(data, tapply(var,ID,min)))
mins1<-mins[,1]
maxs<-matrix(with(data,tapply(var,ID,max)))
maxs1<-maxs[,1]
my_var<-list()
for(i in 1:3){
my_var[[i]]<- seq(from=mins1[[i]],to=maxs1[[i]],by=1)
}
# predict on sequences
predslist<- list()
predslist[[i]] <- for(i in 1:3){
dat<-fits[[i]]
predict(dat,newdata= data.frame("var"= my_var,type= "response", se=TRUE))
}
predict results error
Plotting lm lines only for var[i] ranges works in ggplot:
library(ggplot2)
# create ID, x, y as coded by Matt
p <- qplot(x, y)
p + geom_smooth(aes(group=ID), method="lm", size=1, se=F)
Is something like this what you're after?
# generating some fake data
ID <- rep(letters[1:4],each=10)
x <- rnorm(40,mean=5,sd=10)
y <- as.numeric(as.factor(ID))*x + rnorm(40)
# plotting in base R
plot(x, y, col=as.factor(ID), pch=16)
# calling lm() and adding lines
lmlist <- lapply(sort(unique(ID)), function(i) lm(y[ID==i]~x[ID==i]))
for(i in 1:length(lmlist)) abline(lmlist[[i]], col=i)
Don't know if the plotting part is where you're stuck, but the abline() function will draw a least-squares line if you pass in an object returned from lm().
If you want the least-squares lines to begin & end with the min & max x values, here's a workaround. It's not pretty, but seems to work.
plot(x, y, col=as.factor(ID), pch=16)
IDnum <- as.numeric(as.factor(ID))
for(i in 1:length(lmlist)) lines(x[IDnum==i], predict(lmlist[[i]]), col=i)

Changing ggplot objects generated by ggiNEXT()

This is the basic example given in the iNEXT package:
library(iNEXT)
data(spider)
# multiple abundance-based data with multiple order q
z <- iNEXT(spider, q=c(0,1,2), datatype="abundance")
p1 <- ggiNEXT(z, facet.var="site", color.var="order")
In my dataset, i have more samples and the facetting does not work so great:
, so i want to change the ncol/nrow arguments in the facet_wrap/grid-call inside the object "p1". p1 is a ggplot object, so it can be altered (f.e. p1 + xlab("") removes the x-title).
In general, it would be nice to know how gginext() can be decomposed into single lines, and what objects are used in the data arguments, so i can change the order of the samples and reduce the amount of samples used per plot. Somehow, i wasnt able to find that out by looking at the function itself, also i get "Error: ggplot2 doesn't know how to deal with data of class iNEXT" when i try to follow gginext() step-by-step.
You could use facet_wrap(~site, ncol=3) to tune your plot. Take a simple example as following:
library(iNEXT)
library(ggplot2)
set.seed(123)
p <- 1/1:sample(1:50, 1)
p <- p/sum(p)
dat <- as.data.frame(rmultinom(9, 200, p))
z <- iNEXT(dat, q=c(0,1,2))
p1 <- ggiNEXT(z, facet.var="site", color.var="order")
p1 + facet_wrap(~site, ncol=3)

multiple histograms on top of eachother without bins

Let's say I've got this dataframe with 2 levels. LC and HC.
Now i want to get 2 plots like below on top of eachother.
data <- data.frame(
welltype=c("LC","LC","LC","LC","LC","HC","HC","HC","HC","HC"),
value=c(1,2,1,2,1,5,4,5,4,5))
The code to get following plot =
x <- rnorm(1000)
y <- hist(x)
plot(y$breaks,
c(y$counts,0),
type="s",col="blue")
(with thanks to Joris Meys)
So, how do I even start on this. Since I'm used to java I was thinking of a for loop, but I've been told not to do it this way.
Next to the method provided by Aaron, there's a ggplot solution as well (see below),
but I would strongly advise you to use the densities, as they will give nicer plots and are a whole lot easier to construct :
# make data
wells <- c("LC","HC","BC")
Data <- data.frame(
welltype=rep(wells,each=100),
value=c(rnorm(100),rnorm(100,2),rnorm(100,3))
)
ggplot(Data,aes(value,fill=welltype)) + geom_density(alpha=0.2)
gives :
For the plot you requested :
# make hists dataframe
hists <- tapply(Data$value,Data$welltype,
function(i){
tmp <- hist(i)
data.frame(br=tmp$breaks,co=c(tmp$counts,0))
})
ll <- sapply(hists,nrow)
hists <- do.call(rbind,hists)
hists$fac <- rep(wells,ll)
# make plot
require(ggplot2)
qplot(br,co,data=hists,geom="step",colour=fac)
You can use the same code except with points instead of plot for adding additional lines to the plot.
Making up some data
set.seed(5)
d <- data.frame(x=c(rnorm(1000)+3, rnorm(1000)),
g=rep(1:2, each=1000) )
And doing it in a fairly straightforward way:
x1 <- d$x[d$g==1]
x2 <- d$x[d$g==2]
y1 <- hist(x1, plot=FALSE)
y2 <- hist(x2, plot=FALSE)
plot(y1$breaks, c(y1$counts,0), type="s",col="blue",
xlim=range(c(y1$breaks, y2$breaks)), ylim=range(c(0,y1$counts, y2$counts)))
points(y2$breaks, c(y2$counts,0), type="s", col="red")
Or in a more R-ish way:
col <- c("blue", "red")
ds <- split(d$x, d$g)
hs <- lapply(ds, hist, plot=FALSE)
plot(0,0,type="n",
ylim=range(c(0,unlist(lapply(hs, function(x) x$counts)))),
xlim=range(unlist(lapply(hs, function(x) x$breaks))) )
for(i in seq_along(hs)) {
points(hs[[i]]$breaks, c(hs[[i]]$counts,0), type="s", col=col[i])
}
EDIT: Inspired by Joris's answer, I'll note that lattice can also easily do overlapping density plots.
library(lattice)
densityplot(~x, group=g, data=d)

Resources