Loop functions with multiple variables for ggplot2 - r

I want to build several plots from one large database, so that I have one plot for each Text (factor) and for each Measure (the many resulting measures of an eye tracking study). The following is a much simpler example of what I am trying to to:
Let's say this is my dataset
Text <- c(1,1,1,1,2,2,2,2,1,1,1,1,2,2,2,2)
Position <- c(1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4)
Modified <- c(1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0)
Line_on_page <- c(1, 1, 1, 1, 2,2,2,2 ,1 ,1,1,1,2,2,2,2)
IA_FIXATION_DURATION <- c(250.3, 70.82, 400, 120.12, 270, 120.5, 100.54, 212.43, 250.3, 70.82, 320.29, 123.12, 260, 121.5, 100.54, 272.43)
IA_FIXATION_COUNT <- c(1,0,1,1,3,2,0, 1, 1,0,1,2,3,2,0, 2)
IA_LABEL <- c("she", "did", "not", "know", "what", "to", "say", "to", "she", "did", "not", "know", "what", "to", "do", "to")
testDF <- data.frame(Text , Position , Line_on_page, Modified, IA_FIXATION_DURATION, IA_FIXATION_COUNT, IA_LABEL)
so I want a heatmap (or another graph) for each Text (1/2/3), and for each measure (IA_FIXATION_DURATION/IA_FIXATION_COUNT)
# so first i create my vectors
library(stringr)
library(reshape2)
library(ggplot2)
library(ggthemes)
library(tidyverse)
Text_list <- unique(testDF$Text)
Measure_list <- testDF %>% dplyr::select_if(is.numeric) %>% colnames() %>% as.vector()
# create graphing function
Heatmap_FN <- function(testDF, na.rm = TRUE, ...){
# create for loop to produce ggplot2 graphs
for (i in seq_along(Text_list)) {
for (j in seq_along(Measure_list)) {
# create plot for each text in dataset
plots <- ggplot(subset(testDF, testDF$Text==Text_list[i])) +
geom_tile(aes(x=Position,
y=Line_on_page,
fill = Measure_list[j])) +
geom_text(aes(x=Position,
y=Line_on_page,
label=IA_LABEL),
color = "white", size = 2, family = "sans") +
scale_fill_viridis_c(option = "C", na.value = "black") +
scale_y_reverse() +
facet_grid(Page ~ Modified)+
theme(legend.position = "bottom") +
ggtitle(paste(Text_list[i],j, 'Text \n'))
ggsave(plots, file=paste(Measure_list[j], "_T", Text_list[i], ".pdf", sep = ""), height = 8.27, width = 11.69, units = c("in"))
}
}
}
Heatmap_FN(testDF)
now, I am pretty sure that the problem lies in the geom_tile "fill" part, where I would like to indicate to the function that I want to use the results variables one by one to produce the plot.
Any ideas on how to fix that?
Thanks

Related

ggplot2: heatmap customize legend

I am trying to plot a heatmap (colored by odds ratios) using ggplot2. The odds ratio values range from 0-200. I would like my heatmap legend to show markings corresponding to certain values (0.1, 1, 10, 50, 100, 200). This is the code I am using but my legend does not label all the values (see below)
Code below:
map is a sample data frame with columns: segments, OR, tissue type
segments <- c("TssA", "TssBiv", "BivFlnk", "EnhBiv","ReprPC", "ReprPCWk", "Quies", "TssAFlnk", "TxFlnk", "Tx", "TxWk", "EnhG", "Enh", "ZNF/Rpts", "Het")
OR <- c(1.4787622, 46.99886002, 11.74417278, 4.49223136, 204.975818, 1.85228517, 0.85762414, 0.67926846, 0.33696213, 0.06532777, 0.10478027, 0.07462983, 0.06501252, 1.32922162, 0.32638438)
df <- data.frame(segments, OR)
map <- df %>% mutate(tissue = 'colon')
ggplot(map, aes(tissue,segments, fill = OR))+ geom_tile(colour="gray80")+
theme_bw()+coord_equal()+
scale_fill_gradientn(colours=c("lightskyblue1", "white","navajowhite","lightsalmon", "orangered2", "indianred1"),
values=rescale(c(0.1, 1, 10, 50, 100, 200)), guide="colorbar", breaks=c(0.1, 1, 10, 50, 150, 200))
I am looking for my legend to look something similar to this (using the values I specified):
With your map data, first rescale OR to log(OR).
Also, you might want to assign white to OR = 1. If that's the case, your approach would be able to achieve that. You may want to try different limits values to achieve that with real data.
map_1 <-map %>% mutate(OR = log(OR))
OR_max <- max(map$OR, na.rm = TRUE)
log_list <- c(0.2, 1, 10, 50, 200) %>% log
ggplot(map_1, aes(tissue,segments, fill = OR))+ geom_tile(colour="gray80")+
theme_bw()+coord_equal()+
scale_fill_gradientn(
colours = c("red3", "white", "navy"),
values=rescale(log_list),
guide="colorbar",
breaks=log_list,
limits = c(1/OR_max, OR_max) %>% log,
labels = c("0.1", "1", "10", "50", "200")
)

Interactively identify 3D object in rgl plot

I want to identify 3d cylinders in an rgl plot to obtain one attribute of the nearest / selected cylinder. I tried using labels to simply spell out the attribute, but I work on data with more than 10.000 cylinders. Therefore, it gets so crowded that the labels are unreadable and it takes ages to render.
I tried to understand the documentation of rgl and I guess the solution to my issue is selecting the cylinder in the plot manually. I believe the function selectpoints3d() is probably the way to go. I believe it returns all vertices within the drawn rectangle, but I don't know how to go back to the cylinder data? I could calculate which cylinder is closest to the mean of the selected vertices, but this seems like a "quick & dirty" way to do the job.
Is there a better way to go? I noticed the argument value=FALSE to get the indices only, but I don't know how to go back to the cylinders.
Here is some dummy data and my code:
# dummy data
cylinder <- data.frame(
start_X = rep(1:3, 2)*2,
start_Y = rep(1:2, each = 3)*2,
start_Z = 0,
end_X = rep(1:3, 2)*2 + round(runif(6, -1, 1), 2),
end_Y = rep(1:2, each = 3)*2 + round(runif(6, -1, 1), 2),
end_Z = 0.5,
radius = 0.25,
attribute = sample(letters[1:6], 6)
)
# calculate centers
cylinder$center_X <- rowMeans(cylinder[,c("start_X", "end_X")])
cylinder$center_Y <- rowMeans(cylinder[,c("start_Y", "end_Y")])
cylinder$center_Z <- rowMeans(cylinder[,c("start_Z", "end_Z")])
# create cylinders
cylinder_list <- list()
for (i in 1:nrow(cylinder)) {
cylinder_list[[i]] <- cylinder3d(
center = cbind(
c(cylinder$start_X[i], cylinder$end_X[i]),
c(cylinder$start_Y[i], cylinder$end_Y[i]),
c(cylinder$start_Z[i], cylinder$end_Z[i])),
radius = cylinder$radius[i],
closed = -2)
}
# plot cylinders
open3d()
par3d()
shade3d(shapelist3d(cylinder_list, plot = FALSE), col = "blue")
text3d(cylinder$center_X+0.5, cylinder$center_Y+0.5, cylinder$center_Z+0.5, cylinder$attribute, color="red")
# get attribute
nearby <- selectpoints3d(value=TRUE, button = "right")
nearby <- colMeans(nearby)
cylinder$dist <- sqrt(
(nearby["x"]-cylinder$center_X)**2 +
(nearby["y"]-cylinder$center_Y)**2 +
(nearby["z"]-cylinder$center_Z)**2)
cylinder$attribute[which.min(cylinder$dist)]
If you call selectpoints3d(value = FALSE), you get two columns. The first column is the id of the object that was found. Your cylinders get two ids each. One way to mark the cylinders is to use "tags". For example, this modification of your code:
# dummy data
cylinder <- data.frame(
start_X = rep(1:3, 2)*2,
start_Y = rep(1:2, each = 3)*2,
start_Z = 0,
end_X = rep(1:3, 2)*2 + round(runif(6, -1, 1), 2),
end_Y = rep(1:2, each = 3)*2 + round(runif(6, -1, 1), 2),
end_Z = 0.5,
radius = 0.25,
attribute = sample(letters[1:6], 6)
)
# calculate centers
cylinder$center_X <- rowMeans(cylinder[,c("start_X", "end_X")])
cylinder$center_Y <- rowMeans(cylinder[,c("start_Y", "end_Y")])
cylinder$center_Z <- rowMeans(cylinder[,c("start_Z", "end_Z")])
# create cylinders
cylinder_list <- list()
for (i in 1:nrow(cylinder)) {
cylinder_list[[i]] <- cylinder3d(
center = cbind(
c(cylinder$start_X[i], cylinder$end_X[i]),
c(cylinder$start_Y[i], cylinder$end_Y[i]),
c(cylinder$start_Z[i], cylinder$end_Z[i])),
radius = cylinder$radius[i],
closed = -2)
# Add tag here:
cylinder_list[[i]]$material$tag <- cylinder$attribute[i]
}
# plot cylinders
open3d()
par3d()
shade3d(shapelist3d(cylinder_list, plot = FALSE), col = "blue")
text3d(cylinder$center_X+0.5, cylinder$center_Y+0.5, cylinder$center_Z+0.5, cylinder$attribute, color="red")
# Don't get values, get the ids
nearby <- selectpoints3d(value=FALSE, button = "right", closest = FALSE)
ids <- nearby[, "id"]
# Convert them to tags. If you select one of the labels, you'll get
# a blank in the list of tags, because we didn't tag the text.
unique(tagged3d(id = ids))
When I was trying this, I found that using closest = TRUE in selectpoints3d seemed to get too many ids; there may be a bug there.

Error "not numeric" when using crosstalk package with EnhancedVolcano

I'd like to implement a cross-talk functionality between a table and plot in both directions:
select the row in the table which will be reflected in the plot
select a dot in the plot which will be reflected in the table. Same idea as here.
I've managed to implement a script, which works beautifully if I make scatter plot with ggplot() and table (both objects cross-talk!). However, when used EnhancedVolcano() and table I got the following error:
Error in EnhancedVolcano(toptable = data_shared, lab = "disp", x = "qsec", :
qsec is not numeric!
If I replace data_shared variable with df_orig, no error is raised, but there is no cross-talking between objects :(
Does this mean that SharedData$new() doesn't recognize numeric values as numeric? How to fix this error?
Any help is highly appreciated.
Thank you
Toy example:
library(plotly) # '4.9.1'
library(DT) # '0.11'
library(crosstalk) # ‘1.0.0’
library(EnhancedVolcano) # ‘1.4.0’
# Input
data1 = mtcars #dim(data1) # 32 11
data_shared = SharedData$new(data1) #, key = c("qsec", "hp"))
# df_orig = data_shared$origData()
# V-Plot
vp =EnhancedVolcano( toptable = data_shared,
lab = 'disp',
x = 'qsec',
y = 'hp',
xlab ='testX',
ylab = 'testY')
bscols(
ggplotly(vp + aes(x= qsec, y= -log10(hp/1000))),
datatable(data_shared, style="bootstrap", class="compact", width="100%",
options=list(deferRender=FALSE, dom='t')))
Same script, which works with ggplot():
data1 = mtcars #dim(data1) # 32 11
data_shared = SharedData$new(data1)
vp = ggplot(data = data_shared, mapping = aes(qsec, hp)) +
geom_point()
bscols(
ggplotly(vp) ,
datatable(data_shared, style="bootstrap", class="compact", width="100%",
options=list(deferRender=FALSE, dom='t')))
Note: Related (same) question was posted at BioStars, and the package author posted an answer, with author's permission copying an answer here:
Hi,
Thanks - that's very useful code and I may add it to the main package vignette, eventually.
I tried it here on my computer and I was able to get it working in my browser, but some components of the original plot seem to have been lost. I think that you just need to convert your column, 'qsec', to numerical values.
Re-using an example from my Vignette, here is a perfectly reproducible example:
library("pasilla")
pasCts <- system.file("extdata", "pasilla_gene_counts.tsv",
package="pasilla", mustWork=TRUE)
pasAnno <- system.file("extdata", "pasilla_sample_annotation.csv",
package="pasilla", mustWork=TRUE)
cts <- as.matrix(read.csv(pasCts,sep="\t",row.names="gene_id"))
coldata <- read.csv(pasAnno, row.names=1)
coldata <- coldata[,c("condition","type")]
rownames(coldata) <- sub("fb", "", rownames(coldata))
cts <- cts[, rownames(coldata)]
library("DESeq2")
dds <- DESeqDataSetFromMatrix(countData = cts,
colData = coldata,
design = ~ condition)
featureData <- data.frame(gene=rownames(cts))
mcols(dds) <- DataFrame(mcols(dds), featureData)
dds <- DESeq(dds)
res <- results(dds)
library(EnhancedVolcano)
p1 <- EnhancedVolcano(res,
lab = rownames(res),
x = "log2FoldChange",
y = "pvalue",
pCutoff = 10e-4,
FCcutoff = 2,
xlim = c(-5.5, 5.5),
ylim = c(0, -log10(10e-12)),
pointSize = c(ifelse(res$log2FoldChange>2, 8, 1)),
labSize = 4.0,
shape = c(6, 6, 19, 16),
title = "DESeq2 results",
subtitle = "Differential expression",
caption = "FC cutoff, 1.333; p-value cutoff, 10e-4",
legendPosition = "right",
legendLabSize = 14,
col = c("grey30", "forestgreen", "royalblue", "red2"),
colAlpha = 0.9,
drawConnectors = TRUE,
hline = c(10e-8),
widthConnectors = 0.5)
p1 <- p1 +
ggplot2::coord_cartesian(xlim=c(-6, 6)) +
ggplot2::scale_x_continuous(
breaks=seq(-6,6, 1))
library(plotly)
library(DT)
library(crosstalk)
bscols(
ggplotly(p1 + aes(x= log2FoldChange, y= -log10(pvalue))),
datatable(
data.frame(res),
style="bootstrap",
class="compact", width="100%",
options=list(deferRender=FALSE, dom='t')))
Unfortunately, plotly and/or bscols don't like the use of bquote(), so, one cannot have the fancy axes names that I use in EnhancedVolcano:
... + xlab(bquote(~Log[2] ~ "fold change")) + ylab(bquote(~-Log[10] ~ italic(P)))
When i try to add these, it throws an error.
Kevin
tried to modify few things in volcano function, got following error:
Error in as.data.frame.default(toptable) :
cannot coerce class ‘c("SharedData", "R6")’ to a data.frame
not sure yet, how to fix it.

Saving multiply pdf plots r

I have made a loop for making multiply plots, however i have no way of saving them, my code looks like this:
#----------------------------------------------------------------------------------------#
# RING data: Mikkel
#----------------------------------------------------------------------------------------#
# Set working directory
setwd()
#### Read data & Converting factors ####
dat <- read.table("Complete RING.txt", header =TRUE)
str(dat)
dat$Vial <- as.factor(dat$Vial)
dat$Line <- as.factor(dat$Line)
dat$Fly <- as.factor(dat$Fly)
dat$Temp <- as.factor(dat$Temp)
str(dat)
datSUM <- summaryBy(X0.5_sec+X1_sec+X1.5_sec+X2_sec+X2.5_sec+X3_sec~Vial_nr+Concentration+Sex+Line+Vial+Temp,data=dat, FUN=sum)
fl<-levels(datSUM$Line)
colors = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3")
meltet <- melt(datSUM, id=c("Concentration","Sex","Line","Vial", "Temp", "Vial_nr"))
levels(meltet$variable) <- c('0,5 sec', '1 sec', '1,5 sec', '2 sec', '2,5 sec', '3 sec')
meltet20 <- subset(meltet, Line=="20")
meltet20$variable <- as.factor(meltet20$variable)
AllConcentrations <- levels(meltet20$Concentration)
for (i in AllConcentrations) {
meltet.i <- meltet20[meltet20$Concentration ==i,]
quartz()
print(dotplot(value~variable|Temp, group=Sex, data = meltet.i ,xlab="Time", ylab="Total height pr vial [mm above buttom]", main=paste('Line 20 concentration ', meltet.i$Concentration[1]),
key = list(points = list(col = colors[1:2], pch = c(1, 2)),
text = list(c("Female", "Male")),
space = "top"), col = colors, pch =c(1, 2))) }
I have tried with the quartz.save function, but that just overwrites the files. Im using a mac if that makes any difference.
When I want to save multiple plots in a loop I tend to do something like...
for(i in AllConcentrations){
meltet.i <- meltet20[meltet20$Concentration ==i,]
pdf(paste("my_filename", i, ".pdf", sep = ""))
dotplot(value~variable|Temp, group=Sex, data = meltet.i ,xlab="Time", ylab="Total height pr vial [mm above buttom]", main=paste('Line 20 concentration ', meltet.i$Concentration[1]),
key = list(points = list(col = colors[1:2], pch = c(1, 2)),
text = list(c("Female", "Male")),
space = "top"), col = colors, pch =c(1, 2))
dev.off()
}
This will create a pdf file for every level in AllConcentrations and save it in your working directory. It will paste together my_filename, the number of the iteration i, and then .pdf together to make each file unique. Of course, you will want to adjust height and width in the pdf function.

box plot using column of different length

I want to do some box plots, but I have data with a different number of rows for each column.
My data looks like:
OT1 OT2 OT3 OT4 OT5 OT6
22,6130653 16,6666667 20,259481 9,7431602 0,2777778 16,0678643
21,1122919 32,2946176 11,396648 10,9458023 4,7128509 10,8938547
23,5119048 19,5360195 23,9327541 39,5634921 0,6715507 12,2591613
16,9880885 39,5365943 7,7568134 22,7453205 3,6410445 11,7610063
32,768937 25,2897351 9,6288027 4,1629535 3,7251656
40,7819933 15,6320021 5,9171598
23,7961828 14,3728125 2,1887585
I'd like to have a box plot for each column (OT1, OT2…), but with the first three and the last three grouped together.
I tried:
>mydata <- read.csv('L5.txt', header = T, sep = "\t")
>mydata_t <- t(mydata)
>boxplot(mydata_t, ylab = "OTU abundance (%)",las=2, at=c(1,2,3 5,6,7))
But it didn't work…
How can I do?
Thanks!
Combining both answers and extenting Henrik's answer, you can also group the OT's together in boxplot() as well:
dat <- read.table(text='OT1 OT2 OT3 OT4 OT5 OT6
22,6130653 16,6666667 20,259481 9,7431602 0,2777778 16,0678643
21,1122919 32,2946176 11,396648 10,9458023 4,7128509 10,8938547
23,5119048 19,5360195 23,9327541 39,5634921 0,6715507 12,2591613
16,9880885 39,5365943 7,7568134 22,7453205 3,6410445 11,7610063
32,768937 25,2897351 9,6288027 4,1629535 3,7251656
40,7819933 15,6320021 5,9171598
23,7961828 14,3728125 2,1887585',header=TRUE,fill=TRUE)
dat <- sapply(dat,function(x)as.numeric(gsub(',','.',x)))
dat.m <- melt(dat)
dat.m <- transform(dat.m,group=ifelse(grepl('1|2|3','4|5|6'),
'group1','group2'))
as.factor(dat.m$X2)
boxplot(dat.m$value~dat.m$X2,data=dat.m,
axes = FALSE,
at = 1:6 + c(0.2, 0, -0.2),
col = rainbow(6))
axis(side = 1, at = c(2, 5), labels = c("Group_1", "Group_2"))
axis(side = 2, at = seq(0, 40, by = 10))
legend("topright", legend = c("OT1", "OT2", "OT3", "OT4", "OT5", "OT6"), fill = rainbow(6))
abline(v = 3.5, col = "grey")
box()
Not easy to group boxplots using R basic plots, better to use ggplot2 here. Whatever the difficulty here is how to reformat your data and reshape them in the long format.
dat <- read.table(text='OT1 OT2 OT3 OT4 OT5 OT6
22,6130653 16,6666667 20,259481 9,7431602 0,2777778 16,0678643
21,1122919 32,2946176 11,396648 10,9458023 4,7128509 10,8938547
23,5119048 19,5360195 23,9327541 39,5634921 0,6715507 12,2591613
16,9880885 39,5365943 7,7568134 22,7453205 3,6410445 11,7610063
32,768937 25,2897351 9,6288027 4,1629535 3,7251656
40,7819933 15,6320021 5,9171598
23,7961828 14,3728125 2,1887585',header=TRUE,fill=TRUE)
dat = sapply(dat,function(x)as.numeric(gsub(',','.',x)))
dat.m <- melt(dat)
dat.m <- transform(dat.m,group=ifelse(grepl('1|2|3',Var2),
'group1','group2'))
ggplot(dat.m)+
geom_boxplot(aes(x=group,y=value,fill=Var2))
Or with boxplot, using #agstudy's 'dat':
df <- melt(dat)
boxplot(value ~ Var2, data = df, at = 1:6 + c(0.2, 0, -0.2))

Resources