Plotting data in a list in R - r

I have a bunch of .csv files that I want to read into a list, then create plots.
I've tried the code below, but get an error when trying to cbind. Below is the dput from 2 example files. Each file represents weather data from seperate stations. Ideally I would plot prcp data (column) from each file into one plot window. I don't have much experience working with data in a list.
file1 <- structure(list(mxtmp = c(18.974, 20.767, 21.326, 19.669, 18.609,
21.322), mntmp = c(4.026, 5.935, 8.671, 6.785, 3.493, 6.647),
prcp = c(0.009, 0.046, 0.193, 0.345, 0.113, 0.187)), .Names = c("mxtmp",
"mntmp", "prcp"), row.names = c(NA, 6L), class = "data.frame")
.
file2 <- structure(list(mxtmp = c(18.974, 20.767, 21.326, 19.669, 18.609,
21.322), mntmp = c(4.026, 5.935, 8.671, 6.785, 3.493, 6.647),
prcp = c(0.009, 0.046, 0.193, 0.345, 0.113, 0.187)), .Names = c("mxtmp",
"mntmp", "prcp"), row.names = c(NA, 6L), class = "data.frame")
I read these files from a directory into a list:
myFiles <- list.files(full.names = F, pattern = ".csv")
my.data <- lapply(myFiles, read_csv)
my.data
names(my.data) <- gsub("\\.csv", " ", myFiles)
I get an error on the line below:
my.data <- lapply(my.data, function(x) cbind(x = seq_along(x), y = x))
Error in data.frame(..., check.names = FALSE) :
arguments imply differing number of rows: 3, 34333
list.names <- names(my.data)
lns <- sapply(my.data, nrow)
my.data <- as.data.frame(do.call("cbind", my.data))
my.data$group <- rep(list.names, lns)
My plot code:
library(ggplot2)
ggplot(my.data, aes(x = x, y = y, colour = group)) +
theme_bw() +
geom_line(linetype = "dotted")

If you don't need to keep the data frames around for anything else, then you can just read and plot all at once. The column names in your plot code don't match the column names in your data frames. So here's a general approach that you'll need to tailor to your actual data. The code below reads each data frame and creates a plot from it and then returns a list containing the plots:
plot.list = lapply(myFiles, function(file) {
df = read_csv(file)
ggplot(df, aes(x = x, y = y, colour = group)) +
theme_bw() +
geom_line(linetype = "dotted")
})
# Lay out all the plots together
library(gridExtra)
do.call(grid.arrange, plot.list)

Related

How to plot multiple xts objects inside list to have n amount of dygraphs printed in an output folder?

I have sample file with csv that describe some stock exchange indexes. I have already managed to:
Create for loop statement to tidy the data in a way that I have those data as DFs in a list.
In each index I calculated using loop OLSSlope, Log values, Percent Change, STDSlope etc.
Create XTS objects that are stored inside list.
I want to write such for loop/ lapply code that will take each xts object inside list, create plot using (dygraph) library and then save this graph in an output file. So that the goal is to have graph for each index that is inside this list of xts objects. I don't have problem with creating one graph for one object but to make it universal inside loop. Code for graph that I want is:
wig20tr_d_xts <- xts(x = wig20tr_d$Zamkniecie,
order.by = wig20tr_d$Date)
wig20tr_d_ols <- xts(x = wig20tr_d$OLSSlope,
order.by = wig20tr_d$Date)
wig20tr_d_stdup <- xts(x = wig20tr_d$OneSTDup,
order.by = wig20tr_d$Date)
wig20tr_d_stduptwo <- xts(x = wig20tr_d$TwoSTDup,
order.by = wig20tr_d$Date)
wig20tr_d_stddown <- xts(x = wig20tr_d$OneSTDdown,
order.by = wig20tr_d$Date)
wig20tr_d_stddowntwo <- xts(x = wig20tr_d$TwoSTDdown,
order.by = wig20tr_d$Date)
wig20 <- cbind(wig20tr_d_xts, wig20tr_d_ols, wig20tr_d_stdup, wig20tr_d_stduptwo, wig20tr_d_stddown, wig20tr_d_stddowntwo)
wig20_graph <- dygraph(wig20, main = "WIG 20 TR", ylab = "Total return in zł") %>%
dySeries("wig20tr_d_xts", color = "black") %>%
dySeries("wig20tr_d_ols", strokeWidth = 2, strokePattern = "dashed", color = "blue") %>%
dySeries("wig20tr_d_stdup", color = "green") %>%
dySeries("wig20tr_d_stduptwo", color = "green") %>%
dySeries("wig20tr_d_stddown", color = "red") %>%
dySeries("wig20tr_d_stddowntwo", color = "red") %>%
dyRangeSelector() %>%
dyUnzoom() %>%
dyOptions(axisLineColor = "navy",
gridLineColor = "lightblue") %>%
dyCrosshair(direction = "vertical")
wig20_graph
htmltools::save_html(wig20_graph, file = "C:/DATA_output/wig20_graph.html")
As you can see I use this addition to the graph:
dyCrosshair <- function(dygraph,
direction = c("both", "horizontal", "vertical")) {
dyPlugin(
dygraph = dygraph,
name = "Crosshair",
path = system.file("plugins/crosshair.js",
package = "dygraphs"),
options = list(direction = match.arg(direction))
)
}
Loop to create list of xts objects is like this:
for(i in 1:length(xts_list)){
df <- xts_list[i]
df <- as.data.frame(df)
colnames(df) <- c("Date", "Zamkniecie", "Trend", "OLSSlope", "LogClose", "LogCloseOLS", "LogCloseOLSSlope", "PercentChange", "LogChange", "OneSTDup", "OneSTDdown", "TwoSTDup", "TwoSTDdown")
time_series <- xts(x = df$Zamkniecie,
order.by = df$Date)
ols <- xts(x = df$OLSSlope,
order.by = df$Date)
stdup <- xts(x = df$OneSTDup,
order.by = df$Date)
stduptwo <- xts(x = df$TwoSTDup,
order.by = df$Date)
stddown <- xts(x = df$OneSTDdown,
order.by = df$Date)
stddowntwo <- xts(x = df$TwoSTDdown,
order.by = df$Date)
time_series_full <- cbind(time_series, ols, stdup, stduptwo, stddown, stddowntwo)
xts_list[[i]] <- time_series_full
print(i)
}
I have problem with adding part with graph inside this last for loop. So that the HTML graph would be named after the index. In this example the index is wig20tr_d

How to add additional statistics on top of a combined ggplot2 graph that uses a multi-variable object or two separate objects

I have a ggplot2 graph which plots two separate violin plots onto one graph, given by this example (thanks to #jared_mamrot for providing it):
library(tidyverse)
data("Puromycin")
head(Puromycin)
dat1 <- Puromycin %>%
filter(state == "treated")
dat2 <- Puromycin %>%
filter(state == "untreated")
mycp <- ggplot() +
geom_violin(data = dat1, aes(x= state, y = conc, colour = "Puromycin (Treatment1)")) +
geom_violin(data = dat2, aes(x= state, y = conc, colour = "Puromycin (Treatment2)"))
mycp
I would like to add a boxplot or other summary statistics such as those in http://www.sthda.com/english/wiki/ggplot2-violin-plot-quick-start-guide-r-software-and-data-visualization and https://www.maths.usyd.edu.au/u/UG/SM/STAT3022/r/current/Misc/data-visualization-2.1.pdf, but trying the code suggested in those places does not change the original plot.
mycp + geom_boxplot()
Thanks for reading and hopefully this makes sense!
UPDATE ==========================================================================
So the above example does not reflect exactly my situation I realize now. Essentially, I want to apply statistics onto a combined ggplot2 graph that uses two separate objects as its variables (here TNBC_List1 and ER_List1) Here is an example that does (sorry for the longer example, I will admit I am having trouble creating a simpler reproducible example and I am very new to coding in general):
# Libraries -------------------------------------------------------------
library(BiocManager)
library(GEOquery)
library(plyr)
library(dplyr)
library(Matrix)
library(devtools)
library(Seurat)
library(ggplot2)
library(cowplot)
library(SAVER)
library(metap)
library(multtest)
# Loading Raw Data into RStudio ----------------------------------
filePaths = getGEOSuppFiles("GSE75688")
tarF <- list.files(path = "./GSE75688/", pattern = "*.tar", full.names = TRUE)
tarF
untar(tarF, exdir = "./GSE75688/")
gzipF <- list.files(path = "./GSE75688/", pattern = "*.gz", full.names = TRUE)
ldply(.data = gzipF, .fun = gunzip)
list.files(path = "./GSE75688/", full.names = TRUE)
list.files(path = "./GSE75688/", pattern = "\\.txt$",full.names = TRUE)
# full matrix ----------------------------------------------------------
fullmat <- read.table(file = './GSE75688//GSE75688_GEO_processed_Breast_Cancer_raw_TPM_matrix.txt',
sep = '\t', header = FALSE, stringsAsFactors = FALSE)
fullmat <- data.frame(fullmat[,-1], row.names=fullmat[,1])
colnames(fullmat) <- as.character(fullmat[1, ])
fullmat <- fullmat[-1,]
fullmat <- as.matrix(fullmat)
# BC01 ER+ matrix -----------------------------------------------------------
BC01mat <- grep(pattern =c("^BC01") , x = colnames(fullmat), value = TRUE)
BC01mat = fullmat[,grepl(c("^BC01"),colnames(fullmat))]
BC01mat = BC01mat[,!grepl("^BC01_Pooled",colnames(BC01mat))]
BC01mat = BC01mat[,!grepl("^BC01_Tumor",colnames(BC01mat))]
BC01pdat <- data.frame("samples" = colnames(BC01mat), "treatment" = "ER+")
# BC07 TNBC matrix -----------------------------------------------------------
BC07mat <- grep(pattern =c("^BC07") , x = colnames(fullmat), value = TRUE)
BC07mat <- fullmat[,grepl(c("^BC07"),colnames(fullmat))]
BC07mat <- BC07mat[,!grepl("^BC07_Pooled",colnames(BC07mat))]
BC07mat <- BC07mat[,!grepl("^BC07_Tumor",colnames(BC07mat))]
BC07mat <- BC07mat[,!grepl("^BC07LN_Pooled",colnames(BC07mat))]
BC07mat <- BC07mat[,!grepl("^BC07LN",colnames(BC07mat))]
BC07pdat <- data.frame("samples" = colnames(BC07mat), "treatment" = "TNBC")
#merge samples together =========================================================================
joined <- cbind(BC01mat, BC07mat)
pdat_joined <- rbind(BC01pdat, BC07pdat)
#fdat ___________________________________________________________________________________
fdat <- grep(pattern =c("gene_name|gene_type") , x = colnames(fullmat), value = TRUE)
fdat <- fullmat[,grepl(c("gene_name|gene_type"),colnames(fullmat))]
fdat <- as.data.frame(fdat, stringsAsFactors = FALSE)
fdat <- setNames(cbind(rownames(fdat), fdat, row.names = NULL),
c("ensembl_id", "gene_short_name", "gene_type"))
rownames(pdat_joined) <- pdat_joined$samples
rownames(fdat) = make.names(fdat$gene_short_name, unique=TRUE)
rownames(joined) <- rownames(fdat)
# Create Seurat Object __________________________________________________________________
joined <- as.data.frame(joined)
sobj_pre <- CreateSeuratObject(counts = joined)
sobj_pre <-AddMetaData(sobj_pre,metadata=pdat_joined)
head(sobj_pre#meta.data)
#gene name input
sobj_pre[["RNA"]]#meta.features<-fdat
head(sobj_pre[["RNA"]]#meta.features)
#Downstream analysis -------------------------------------------------------
sobj <- sobj_pre
sobj <- FindVariableFeatures(object = sobj, mean.function = ExpMean, dispersion.function = LogVMR, nfeatures = 2000)
sobj <- ScaleData(object = sobj, features = rownames(sobj), block.size = 2000)
sobj <- RunPCA(sobj, npcs = 100, ndims.print = 1:10, nfeatures.print = 5)
sobj <- FindNeighbors(sobj, reduction = "pca", dims = 1:4, nn.eps = 0.5)
sobj <- FindClusters(sobj, resolution = 1, n.start = 10)
umap.method = 'umap-learn'
metric = 'correlation'
sobj <- RunUMAP(object = sobj, reduction = "pca", dims = 1:4,min.dist = 0.5, seed.use = 123)
p0 <- DimPlot(sobj, reduction = "umap", pt.size = 0.1,label=TRUE) + ggtitle(label = "Title")
p0
# ER+ score computation -------------------
ERlist <- list(c("CPB1", "RP11-53O19.1", "TFF1", "MB", "ANKRD30B",
"LINC00173", "DSCAM-AS1", "IGHG1", "SERPINA5", "ESR1",
"ILRP2", "IGLC3", "CA12", "RP11-64B16.2", "SLC7A2",
"AFF3", "IGFBP4", "GSTM3", "ANKRD30A", "GSTT1", "GSTM1",
"AC026806.2", "C19ORF33", "STC2", "HSPB8", "RPL29P11",
"FBP1", "AGR3", "TCEAL1", "CYP4B1", "SYT1", "COX6C",
"MT1E", "SYTL2", "THSD4", "IFI6", "K1AA1467", "SLC39A6",
"ABCD3", "SERPINA3", "DEGS2", "ERLIN2", "HEBP1", "BCL2",
"TCEAL3", "PPT1", "SLC7A8", "RP11-96D1.10", "H4C8",
"PI15", "PLPP5", "PLAAT4", "GALNT6", "IL6ST", "MYC",
"BST2", "RP11-658F2.8", "MRPS30", "MAPT", "AMFR", "TCEAL4",
"MED13L", "ISG15", "NDUFC2", "TIMP3", "RP13-39P12.3", "PARD68"))
sobj <- AddModuleScore(object = sobj, features = ERlist, name = "ER_List")
#TNBC computation -------------------
tnbclist <- list(c("FABP7", "TSPAN8", "CYP4Z1", "HOXA10", "CLDN1",
"TMSB15A", "C10ORF10", "TRPV6", "HOXA9", "ATP13A4",
"GLYATL2", "RP11-48O20.4", "DYRK3", "MUCL1", "ID4", "FGFR2",
"SHOX2", "Z83851.1", "CD82", "COL6A1", "KRT23", "GCHFR",
"PRICKLE1", "GCNT2", "KHDRBS3", "SIPA1L2", "LMO4", "TFAP2B",
"SLC43A3", "FURIN", "ELF5", "C1ORF116", "ADD3", "EFNA3",
"EFCAB4A", "LTF", "LRRC31", "ARL4C", "GPNMB", "VIM",
"SDR16C5", "RHOV", "PXDC1", "MALL", "YAP1", "A2ML1",
"RP1-257A7.5", "RP11-353N4.6", "ZBTB18", "CTD-2314B22.3", "GALNT3",
"BCL11A", "CXADR", "SSFA2", "ADM", "GUCY1A3", "GSTP1",
"ADCK3", "SLC25A37", "SFRP1", "PRNP", "DEGS1", "RP11-110G21.2",
"AL589743.1", "ATF3", "SIVA1", "TACSTD2", "HEBP2"))
sobj <- AddModuleScore(object = sobj, features = tnbclist, name = "TNBC_List")
#ggplot2 issue ----------------------------------------------------------------------------
sobj[["ClusterName"]] <- Idents(object = sobj)
sobjlists <- FetchData(object = sobj, vars = c("ER_List1", "TNBC_List1", "ClusterName"))
library(reshape2)
melt(sobjlists, id.vars = c("ER_List1", "TNBC_List1", "ClusterName"))
p <- ggplot() + geom_violin(data = sobjlists, aes(x= ClusterName, y = ER_List1, fill = ER_List1, colour = "ER+ Signature"))+ geom_violin(data = sobjlists, aes(x= ClusterName, y = TNBC_List1, fill = TNBC_List1, colour="TNBC Signature"))
Extension ======================================================================
If you want to do this but with two objects (sobjlists1 and sobjlists2, for example) instead of what my example showed (two variables but one object), rbind the two and then do what #StupidWolf says
library(reshape2)
sobjlists1= melt(sobjlists1, id.vars = "treatment")
sobjlists2= melt(sobjlists2, id.vars = "treatment")
combosobjlists <- rbind(sobjlists1, sobjlists2)
and then continue on with their code using combosobjlists:
ggplot(combosobjlists,aes(x= ClusterName, y = value)) +
geom_violin(aes(fill=variable)) +
geom_boxplot(aes(col=variable),
width = 0.2,position=position_dodge(0.9))
Hope this thread helps!
Try to include just the minimum code to show your problem. Like in your example, there's no need to start with the whole seurat processing. You can just provide the data.frame with dput() and we can see the issue with ggplot2 , see this post.
Create some example data:
library(Seurat)
library(ggplot2)
genes = c(unlist(c(ERlist,tnbclist)))
mat = matrix(rnbinom(500*length(genes),mu=500,size=1),ncol=500)
rownames(mat) = genes
colnames(mat) = paste0("cell",1:500)
sobj = CreateSeuratObject(mat)
sobj = NormalizeData(sobj)
Add some made-up cluster:
sobj$ClusterName = factor(sample(0:1,ncol(sobj),replace=TRUE))
Add your module score:
sobj = AddModuleScore(object = sobj, features = tnbclist,
name = "TNBC_List",ctrl=5)
sobj = AddModuleScore(object = sobj, features = ERlist,
name = "ER_List",ctrl=5)
We get the data, what you need to do is to pivot it long correctly. Plotting it twice with ggplot2 is going to cause all kinds of problem:
sobjlists = FetchData(object = sobj, vars = c("ER_List1", "TNBC_List1", "ClusterName"))
head(sobjlists)
ER_List1 TNBC_List1 ClusterName
cell1 -0.05391108 -0.008736057 1
cell2 0.07074816 -0.039064126 1
cell3 0.08688374 -0.066967324 1
cell4 -0.12503649 0.120665057 0
cell5 0.05356685 -0.072293651 0
cell6 -0.20053804 0.178977042 1
Should look like this:
library(reshape2)
sobjlists = melt(sobjlists, id.vars = "ClusterName")
ClusterName variable value
1 1 ER_List1 -0.05391108
2 1 ER_List1 0.07074816
3 1 ER_List1 0.08688374
4 0 ER_List1 -0.12503649
5 0 ER_List1 0.05356685
6 1 ER_List1 -0.20053804
Now we plot:
ggplot(sobjlists,aes(x= ClusterName, y = value)) +
geom_violin(aes(fill=variable)) +
geom_boxplot(aes(col=variable),
width = 0.2,position=position_dodge(0.9))
for you to be able to use the data within a plot without specifying it (like geom_boxplot() ), you need to put the data in the ggplot() function call. Then the following functions are able to inherit them.
You also do not need an extra violin plot per color
library(tidyverse)
data("Puromycin")
head(Puromycin)
mycp <- ggplot(Puromycin,aes(x= state, y = conc, colour=state))+geom_violin()
mycp + geom_boxplot(width=0.1, color= "black") +
scale_color_discrete(
labels= c("Puromycin (Treatment1)","Puromycin (Treatment2)")
)
Result:

data of class numeric error when plotting vertical and horizontal lines in ggplot

iarray <- iv$iarray
varray <- iv$varray
n<-gsub("^\\{+(.+)\\}+$", '\\1', iarray)
n1 <- strsplit(n,",")
n1 <- unlist(n1)
n1 <- as.numeric(n1)
df <- as.data.frame(n1)
n<-gsub("^\\{+(.+)\\}+$", '\\1', varray)
n2 <- strsplit(n,",")
n2 <- unlist(n2)
n2 <- as.numeric(n2)
df <- cbind(df,n2)
vmpp <-iv$vmpp
impp <- iv$impp
print(impp)
print(vmpp)
})
output$ivcurve <- renderPlot({
ggplot(data3(), aes(x=n2, y= n1)) + geom_line(colour='blue')+ geom_vline(xintercept = vmpp)+ geom_hline(yintercept = impp) + scale_y_continuous(limits = c(-1, 11))
Basically I'm trying to draw an IV curve from the above code.
As seen in the photo I need a horizontal and a vertical line.
But after I added the geom_vline function it gives me the Error : ggplot2 doesn't know how to deal with data of class numeric
iv is a dataframe and iarray and varray basically looks like this.
iarray = "{9.467182035,9.252423958,9.179368178,9.142931845}"
varray = "{-1.025945126,-0.791203874,-0.506481774,-0.255416444}"
And vmpp and impp are basically numbers as 8.5 and 20
suggestions?
P.s :
dput(iv)
structure(list(id = 3L, seris_id = "SERTPTR0003", module_id = 2L,
isc = 9.1043, voc = 37.61426, impp = 8.524, vmpp = 30.0118,
pmpp = 255.8095, unique_halm_id = 4414L, iarray = "{9.471385758,9.251831868,9.174032904,9.135095327,9.109244512,9.087563112,9.081257993,9.079282455,9.078209387,9.077396672,9.076717653,9.076285598,9.075914058,9.075549594,9.075098675,9.074659768,9.074080201,9.073659578,9.073411255,9.073349331,9.073215686,9.073189667,9.073011759,9.072868405,9.072659064,9.072636165,9.072659725,9.072729724,9.072779321,9.072915415,9.072951718,9.072855259,9.072758863,9.072562734,9.072286497,9.072036161,9.071858009,9.07165223,9.071458902,9.071172024,9.070818323,9.070364851,9.069865071,9.069392026,9.069058847,9.068673155,9.068486996,9.0684006,9.068241175,9.067848351,9.067533806,9.066886103,9.066177782,9.0655086,9.065025577,9.064457111,9.064154995,9.063866251,9.063564149,9.063221961,9.06295813,9.062580288,9.062182005,9.06179715,9.061378517,9.060847632,9.06033015,9.059686156,9.058814993,9.057817299,9.056732355,9.055534236,9.054389596,9.05351149,9.052819766,9.052254696,9.051816304,9.051431465,9.051000987,9.050664797,9.050589584,9.050615635,9.050795719,9.051096084,9.05121704,9.050958132,9.050478383,9.049724325,9.048695951,9.047619756,9.046715916,9.04602525,9.045615278,9.045512729,9.045617691,9.045803509,9.045989974,9.046083526,9.045997615,9.045871618,9.045772357,9.045599926,9.045340971,9.045082036,9.04473025,9.044178732,9.043440888,9.042642632,9.04185002,9.041056695,9.040316091,9.039781509,9.039426971,9.039199774,9.039026035,9.038805897,9.038478843,9.037978051,9.037190302,9.036262611,9.035408047,9.034687132,9.03411323,9.033759457,9.033445779,9.033105372,9.032611665,9.031991392,9.031298017,9.030631384,9.029991493,9.02931152,9.028518372,9.027678053,9.026644378,9.025384369,9.023971135,9.022443918,9.020510444,9.018469233,9.015987042,9.013123551,9.009951782,9.006524239,9.002508657,8.99806541,8.993200713,8.987509287,8.980851319,8.97337198,8.964883202,8.955065215,8.944015742,8.931773812,8.91796823,8.902911552,8.886450605,8.868452754,8.848678419,8.827119435,8.80336248,8.777313996,8.748941051,8.718309497,8.685225063,8.649388501,8.610785476,8.569040812,8.52363426,8.474699468,8.422382481,8.366516735,8.307103187,8.244481209,8.178090447,8.10779633,8.033345875,7.954744415,7.871665908,7.784296593,7.692116999,7.595199333,7.493377787,7.386704971,7.275055109,7.158981607,7.038484468,6.913650942,6.784728642,6.651977027,6.515069048,6.374111623,6.228897233,6.079031999,5.924669253,5.766323899,5.604063459,5.43841477,5.26939121,5.096619936,4.919752772,4.738936722,4.554312451,4.366039658,4.174017769,3.978461295,3.779470133,3.576724216,3.370764477,3.162238756,2.951119622,2.737359938,2.521133452,2.302407806,2.08132299,1.858467726,1.632539296,1.397202225,1.149523324,0.890812319,0.62251893,0.349040094,0.084409259,-0.164612445,-0.4001423,-0.625408177,-0.844927296,-1.067373925,-1.297998987,-1.536777099,-1.782558235,-2.033692207,-2.28906274,-2.54694712,-2.806836154,-3.068463186,-3.331653821,-3.596227332,-3.862303417,-4.129421924,-4.397321356,-4.666082505,-4.935632162,-5.206170796,-5.478105728,-5.751638617,-6.027203502,-6.304753878,-6.584235675,-6.865027697,-7.146774939,-7.428922534,-7.711971427,-7.995982555,-8.281623641,-8.569128828,-8.85847189,-9.14887768,-9.440152159,-9.731968139,-10.02382391,-10.315645796,-10.608918155,-10.906228043,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}",
varray = "{-1.055634971,-0.820094649,-0.530478984,-0.277519378,-0.049665975,0.168173928,0.369832037,0.557189853,0.73806136,0.918444007,1.100988955,1.285835111,1.471379381,1.656087228,1.83947039,2.021804885,2.204138782,2.387586314,2.572217234,2.757544476,2.943083961,3.127927125,3.311936258,3.49497066,3.677517995,3.860273388,4.043516446,4.227247167,4.411953813,4.597148124,4.781785019,4.965795342,5.149247651,5.331933288,5.514618924,5.698279889,5.882706331,6.067759345,6.253369179,6.43883951,6.623542572,6.807967224,6.991834459,7.175283184,7.359219574,7.543715171,7.727930567,7.91102934,8.092315166,8.270881273,8.44728269,8.622987785,8.800575578,8.981370755,9.16634984,9.355446065,9.546982405,9.738937256,9.930334092,10.119987137,10.30698723,10.492242934,10.676242509,10.859335313,11.042009008,11.224684494,11.40735998,11.589966311,11.77250289,11.955040067,12.138134659,12.321927365,12.506347836,12.691464628,12.877626501,13.06357852,13.248553416,13.432761044,13.616063692,13.798391012,13.981067688,14.165071441,14.350401073,14.536497974,14.722526917,14.907441627,15.090542195,15.272247132,15.453463208,15.634886746,15.817842429,16.00302897,16.188982779,16.375285347,16.561309505,16.745870074,16.92840904,17.110739948,17.293002899,17.47610287,17.660388619,17.84523298,18.029659429,18.213737119,18.397257992,18.580849811,18.765279846,18.950546303,19.136368979,19.322329963,19.507382979,19.691527431,19.874834264,20.057444179,20.239565832,20.422106592,20.605414621,20.789699174,20.974891096,21.161129891,21.347647095,21.533745789,21.718937711,21.903640773,22.087576567,22.271163603,22.454611733,22.638548722,22.822346805,23.006144888,23.189873219,23.373392294,23.556911967,23.740989056,23.925624756,24.110540657,24.296084919,24.481558832,24.666267268,24.850416495,25.034286715,25.217528572,25.400211222,25.582891485,25.765153238,25.947554494,26.130303912,26.31319522,26.495810505,26.678077627,26.859158373,27.03919344,27.2183891,27.396813913,27.574889968,27.752475972,27.92950277,28.106320312,28.283627309,28.460794206,28.638101203,28.814990286,28.99097379,29.16472524,29.336732302,29.506996765,29.675866194,29.843757906,30.011789121,30.179820337,30.347501601,30.514694006,30.680631478,30.844614709,31.006505389,31.166790586,31.32526224,31.482546323,31.638644628,31.793695462,31.947491958,32.10059034,32.252713392,32.40427843,32.555147743,32.705110284,32.853817294,33.001129867,33.14656034,33.29010692,33.431979459,33.572526716,33.711957348,33.85055096,33.988306953,34.125295678,34.260890565,34.395089819,34.528311952,34.660559355,34.792040088,34.923102908,35.053748414,35.1835569,35.312946278,35.441359731,35.568936163,35.695816274,35.821859963,35.946717873,36.071016573,36.194685715,36.317446291,36.439648854,36.561153301,36.681402816,36.801235017,36.924136292,37.050595501,37.180333637,37.313071096,37.446575824,37.573873847,37.693009727,37.80489203,37.910636184,38.012195237,38.11396235,38.218102215,38.324333435,38.432308447,38.541817399,38.652023867,38.762158191,38.872501769,38.982844151,39.092768022,39.202552988,39.31199099,39.420452469,39.528287977,39.635566071,39.742146647,39.848308713,39.954401627,40.060004483,40.165396887,40.270857247,40.376317008,40.481775571,40.587234732,40.692485836,40.797320225,40.901805855,41.005874173,41.109873339,41.21366265,41.317242107,41.420542558,41.523772657,41.626762446,41.729471102,41.832232456,41.937530675,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}"), .Names = c("id",
"seris_id", "module_id", "isc", "voc", "impp", "vmpp", "pmpp",
"unique_halm_id", "iarray", "varray"), row.names = 1L, class = "data.frame")
Solved it. since the ggplot requires data frames. I just added two more columns to the data frame and added them there.
vmpp <- iv$vmpp
df <- cbind(df,vmpp)
impp <- iv$impp
df <- cbind(df,impp)
print(df)
})
output$ivcurve <- renderPlot({
ggplot(data3(), aes(x=n2, y= n1)) + geom_line(colour='blue')+ scale_y_continuous(limits = c(-1, 11))+ geom_vline(aes(xintercept = vmpp))+ geom_hline(aes(yintercept = impp))

Adding parameters to a ggplot produced plot in a function

Let's say I have a saved plot named my_plot, produced with ggplot. Also, let's say that the column in my_plot[[1]] data frame used for horizontal axis is named my_dates
Now, I want to add some vertical lines to the plot, which, of course, can be done by something like that:
my_plot +
geom_vline(aes(xintercept = my_dates[c(3, 8)]))
Since I perform this task quite on a regular basis, I want to write a function for that -- something like that:
ggplot.add_lines <- function(given_plot, given_points) {
finale <- given_plot +
geom_vline(aes(xintercept = given_plot[[1]]$my_dates[given_points]))
return(finale)
}
Which, as it's probably obvious to everyone, doesn't work:
> ggplot.add_lines(my_plot, c(3, 5))
Error in eval(expr, envir, enclos) : object 'given_plot' not found
So, my question would be what am I doing wrong, and how can it be fixed? Below is some data for a reproducible example:
> dput(my_plot)
structure(list(data = structure(list(my_dates = c(1, 2, 3, 4,
5, 6, 7, 8, 9, 10), my_points = c(-2.20176409422924, -1.12872396340683,
-0.259703895194354, 0.634233385649338, -0.678983982973015, -1.83157126614836,
1.33360095418957, -0.120455389285709, -0.969431974863616, -1.20451262626184
)), .Names = c("my_dates", "my_points"), row.names = c(NA, -10L
), class = "data.frame"), layers = list(<environment>), scales = <S4 object of class structure("Scales", package = "ggplot2")>,
mapping = structure(list(x = my_dates, y = my_points), .Names = c("x",
"y"), class = "uneval"), theme = list(), coordinates = structure(list(
limits = structure(list(x = NULL, y = NULL), .Names = c("x",
"y"))), .Names = "limits", class = c("cartesian", "coord"
)), facet = structure(list(shrink = TRUE), .Names = "shrink", class = c("null",
"facet")), plot_env = <environment>, labels = structure(list(
x = "my_dates", y = "my_points"), .Names = c("x", "y"
))), .Names = c("data", "layers", "scales", "mapping", "theme",
"coordinates", "facet", "plot_env", "labels"), class = c("gg",
"ggplot"))
According to this post, below is my solution to this problem. The environment issue in the **ply and ggplot is annoying.
ggplot.add_lines <- function(given_plot, given_points) {
finale <- eval(substitute( expr = {given_plot +
geom_vline(aes(xintercept = my_dates[given_points]))}, env = list(given_points = given_points)))
return(finale)
}
The following code runs well on my machine. (I cannot make your reproducible work on my machine...)
df <- data.frame(my_dates = 1:10, val = 1:10)
my_plot <- ggplot(df, aes(x = my_dates, y = val)) + geom_line()
my_plot <- ggplot.add_lines(my_plot, c(3, 5))
print(my_plot)
Update: The above solution fails when more than two points are used.
It seems that we can easily solve this problem by not including the aes (subsetting together with aescauses problems):
ggplot.add_lines <- function(given_plot, given_points) {
finale <- given_plot + geom_vline(xintercept = given_plot[[1]]$my_dates[given_points])
return(finale)
}
I would take the following approach: extract the data.frame of interest, and pass it to the new layer,
df <- data.frame(my_dates = 1:10, val = rnorm(10))
my_plot <- ggplot(df, aes(x = my_dates, y = val)) + geom_line()
add_lines <- function(p, given_points=c(3,5), ...){
d <- p[["data"]][given_points,]
p + geom_vline(data = d, aes_string(xintercept="my_dates"), ...)
}
add_lines(my_plot, c(3,5), lty=2)

box plot using column of different length

I want to do some box plots, but I have data with a different number of rows for each column.
My data looks like:
OT1 OT2 OT3 OT4 OT5 OT6
22,6130653 16,6666667 20,259481 9,7431602 0,2777778 16,0678643
21,1122919 32,2946176 11,396648 10,9458023 4,7128509 10,8938547
23,5119048 19,5360195 23,9327541 39,5634921 0,6715507 12,2591613
16,9880885 39,5365943 7,7568134 22,7453205 3,6410445 11,7610063
32,768937 25,2897351 9,6288027 4,1629535 3,7251656
40,7819933 15,6320021 5,9171598
23,7961828 14,3728125 2,1887585
I'd like to have a box plot for each column (OT1, OT2…), but with the first three and the last three grouped together.
I tried:
>mydata <- read.csv('L5.txt', header = T, sep = "\t")
>mydata_t <- t(mydata)
>boxplot(mydata_t, ylab = "OTU abundance (%)",las=2, at=c(1,2,3 5,6,7))
But it didn't work…
How can I do?
Thanks!
Combining both answers and extenting Henrik's answer, you can also group the OT's together in boxplot() as well:
dat <- read.table(text='OT1 OT2 OT3 OT4 OT5 OT6
22,6130653 16,6666667 20,259481 9,7431602 0,2777778 16,0678643
21,1122919 32,2946176 11,396648 10,9458023 4,7128509 10,8938547
23,5119048 19,5360195 23,9327541 39,5634921 0,6715507 12,2591613
16,9880885 39,5365943 7,7568134 22,7453205 3,6410445 11,7610063
32,768937 25,2897351 9,6288027 4,1629535 3,7251656
40,7819933 15,6320021 5,9171598
23,7961828 14,3728125 2,1887585',header=TRUE,fill=TRUE)
dat <- sapply(dat,function(x)as.numeric(gsub(',','.',x)))
dat.m <- melt(dat)
dat.m <- transform(dat.m,group=ifelse(grepl('1|2|3','4|5|6'),
'group1','group2'))
as.factor(dat.m$X2)
boxplot(dat.m$value~dat.m$X2,data=dat.m,
axes = FALSE,
at = 1:6 + c(0.2, 0, -0.2),
col = rainbow(6))
axis(side = 1, at = c(2, 5), labels = c("Group_1", "Group_2"))
axis(side = 2, at = seq(0, 40, by = 10))
legend("topright", legend = c("OT1", "OT2", "OT3", "OT4", "OT5", "OT6"), fill = rainbow(6))
abline(v = 3.5, col = "grey")
box()
Not easy to group boxplots using R basic plots, better to use ggplot2 here. Whatever the difficulty here is how to reformat your data and reshape them in the long format.
dat <- read.table(text='OT1 OT2 OT3 OT4 OT5 OT6
22,6130653 16,6666667 20,259481 9,7431602 0,2777778 16,0678643
21,1122919 32,2946176 11,396648 10,9458023 4,7128509 10,8938547
23,5119048 19,5360195 23,9327541 39,5634921 0,6715507 12,2591613
16,9880885 39,5365943 7,7568134 22,7453205 3,6410445 11,7610063
32,768937 25,2897351 9,6288027 4,1629535 3,7251656
40,7819933 15,6320021 5,9171598
23,7961828 14,3728125 2,1887585',header=TRUE,fill=TRUE)
dat = sapply(dat,function(x)as.numeric(gsub(',','.',x)))
dat.m <- melt(dat)
dat.m <- transform(dat.m,group=ifelse(grepl('1|2|3',Var2),
'group1','group2'))
ggplot(dat.m)+
geom_boxplot(aes(x=group,y=value,fill=Var2))
Or with boxplot, using #agstudy's 'dat':
df <- melt(dat)
boxplot(value ~ Var2, data = df, at = 1:6 + c(0.2, 0, -0.2))

Resources