plot density of multiple csv files of different size in R - r

I have multiple csv files, each with a single column.
I want to read them and plot their density distribution in a single plot.
can anyone help me?

There are answers elsewhere about
reading multiple csv files so I will mainly concentrate on the density plotting part. Since you did not provide any data, I will use the built-in iris data to create some example files. This first step is to make a reusable example. I am assuming that you already have the data on the disk and have a list of the file names.
## Create some test data
FileNames = paste(names(iris[,1:4]), ".csv", sep="")
for(i in 1:4) {
write.csv(iris[,i], FileNames[i], row.names=FALSE)
}
So, on to the density plots. There is one small sticky point. Each of the different density plots will cover a different range of x and y values. If you want them all in one plot, you will need to leave enough room in your plot to hold them all. The code below first computes that range, then makes the plots.
## Read in all of the data from csv
DataList = list()
for(i in seq_along(FileNames)) {
DataList[[i]] = read.csv(FileNames[i], header=T)[[1]]
}
## Find the range that we will need to include all plots
XRange = range(DataList[[1]])
YRange = c(0,0)
for(i in seq_along(DataList)) {
Rx = range(DataList[[i]])
XRange[1] = min(XRange[1], Rx[1])
XRange[2] = max(XRange[2], Rx[2])
YRange[2] = max(density(DataList[[i]], na.rm=T)$y, YRange[2])
}
## Now make all of the plots
plot(density(DataList[[1]], na.rm=T), xlim=XRange, ylim=YRange,
xlab=NA, ylab=NA, main="Density Plots")
for(i in seq_along(DataList)) {
lines(density(DataList[[i]], na.rm=T), col=i)
}
legend("topright", legend=FileNames, lty=1, col=1:4, bty='n')

Related

Plotting multiple .TIFF images together with individual titles in R

I would like to plot multiple .TIFF images in R and add individual titles to them. Without the titles, this piece of code gets the job done:
require(raster)
setwd("...")
files = list.files(pattern="*.tif")
tiff("balanded_1.tiff", units="in", width=21, height=26, res=300, compression = 'lzw') #for saving
par(mfrow=c(5,3))
for (i in 1:15) {
plotRGB(brick(files[i]))
}
dev.off() #save figure
However, if I try to add individual titles to the images using 'plotRGB()', it automatically adds axes to them (because 'axes=TRUE' becomes a requirement in the 'plotRGB()' function), and I get something like this:
plotRGB(brick(files[2]), axes=TRUE, main="TITLE", xlab="", ylab="")
I understand that 'plotRGB()' is probably not the right function for the job (since I am not plotting maps), but I wonder if there is a way to make it work? If not, is there an alternative I could use? Thank you in advance.
I managed to find a solution to this problem using a more appropriate package for image manipulation:
require(magick)
setwd("...")
files = list.files(pattern="*.tif")
tiff("balanded_1.tiff", units="in", width=21, height=26, res=300, compression = 'lzw')
par(mfrow=c(5,3))
for (i in 1:15) {
name <- files[i]
lag <- strsplit(name, "_")[[1]][2] #get the name right for the image
match <- strsplit(name, "_")[[1]][4]
lead <- strsplit(name, "_")[[1]][6]
lead <- strsplit(lead, ".tiff")[[1]][1]
name <- paste(" Lag=",lag,", Match=1:",match,", Lead=",lead, sep = "") #put the name together
img <- image_annotate(image_read(files[i]), name, font = 'Times', size = 120) #read the image and add the name
img <- image_crop(img, "1500x1050")
plot(img)
}
dev.off() #save figure

R get legend with same color as plot lines

Hi I am new to R and am trying to plot multiple files as lines in a scatter plot. I was able to get the plot but not when I try to add legend to the plot. I want the legend with name of the file in the same color as the color of the line made from that file. I tried using the following suggestion from a previous thread -
xlist<-list.files(pattern = NULL)
first=TRUE
cl <- rainbow(22)
for(i in xlist) {
table <- read.table((i),header=T,sep="\t")
table <- table[, 1:2]
if (first) {
plot(table,xlab='Distance from center',ylab='Coverage',ylim=c(0,70),col=1, type="n")
lines(table) #plot and add lines
legend("top", y=NULL, legend = i, col=1)
par(new=T)
first=FALSE
}
else {
lines(table,col=cl[i]) #To add another file to the plot as another line
par(new=F)
plotcol[i] <- cl[i]
legend("top", y=NULL, legend = i, col=plotcol)
}
}
The error is get is - Error in plotcol[i] <- cl[i] : object 'plotcol' not found. Please let me know what I am missing or if there is a better way to plot the lines with different colors and get legend with names of the files with the same color as the lines. Thank you.
I had to make some reproducible examples to get it to work, but the following script works to make the lengends and line colors the same:
#random data
test.df1=data.frame(runif(100)*0.2,runif(100)*0.2)
test.df2=data.frame(runif(100)*0.5,runif(100)*0.5)
test.df3=data.frame(runif(100),runif(100))
test.df4=data.frame(runif(100)*2,runif(100)*2)
test.list=list(test.df1=test.df1,test.df2=test.df2,test.df3=test.df3,test.df4=test.df4) # I used this instead of reading in files from read.table, you shouldn't need this
xlist=c('test.df1','test.df2','test.df3','test.df4') #the list of files
first=TRUE
cl <- rainbow(length(xlist)) #colors dedicated to your list
names(cl)=xlist #this names the vector elements so you can reference them
for(i in xlist) {
i.table <- test.list[[i]]
i.table <- i.table[,c(1:2)]
if (first) {
plot(i.table,xlab='Distance from center',ylab='Coverage',xlim=c(0,2),ylim=c(0,2),col=cl[i], type="n")
lines(i.table,col=cl[i]) #plot and add lines
par(new=T)
first=FALSE
}
else {
lines(i.table,col=cl[i]) #To add another file to the plot as another line
par(new=F)
plotcol <- c(plotcol,cl[i])# pulls colors from the cl vector
}
}
legend("top", y=NULL, legend =xlist, text.col=cl) #label colors should now match
Try ggplot2 package in R. You wouldn't have to code as much too!
https://www.rstudio.com/wp-content/uploads/2015/12/ggplot2-cheatsheet-2.0.pdf

R statistical Programing

I am trying to write R codes for the histogram plot and save each histogram separate file using the following command.
I have a data set "Dummy" and i want to plot each histogram by a column name and there will be 100 histogram plots in total...
I have the following R codes that draws the each Histogram...
library(ggplot2)
i<-1
for(i in 1:100)
{
jpeg(file="d:/R Data/hist.jpeg", sep=",")
hist(Dummy$colnames<-1, ylab= "Score",ylim=c(0,3),col=c("blue"));
dev.off()
i++
if(i>100)
break()
}
As a start, let's get your for loop into R a little better by taking out the lines trying to change i, your for loop will do that for you.
We'll also include a file= value that changes with each loop run.
for(i in 1:100)
{
jpeg(file = paste0("d:/R Data/hist", i, ".jpeg"))
hist(Dummy[[i]], ylab = "Score", ylim = c(0, 3), col = "blue")
dev.off()
}
Now we just need to decide what you want to plot. Will each plot be different? How will each plot extract the data it needs?
EDIT: I've taken a stab at what you're trying to do. Are you trying to take each of 100 columns from the Dummy dataset? If so, Dummy[[i]] should achieve that (or Dummy[,i] if Dummy is a matrix).

Save multiple ggplot2 plots as R object in list and re-displaying in grid

I would like to save multiple plots (with ggplot2) to a list during a large for-loop. And then subsequently display the images in a grid (with grid.arrange)
I have tried two solutions to this:
1 storing it in a list, like so:
pltlist[["qplot"]] <- qplot
however for some reason this does save the plot correctly.
So I resorted to a second strategy which is recordPlot()
This was able to save the plot correctly, but unable to
use it in a grid.
Reproducable Example:
require(ggplot2);require(grid);require(gridExtra)
df <- data.frame(x = rnorm(100),y = rnorm(100))
histoplot <- ggplot(df, aes(x=x)) + geom_histogram(aes(y=..density..),binwidth=.1,colour="black", fill="white")
qplot <- qplot(sample = df$y, stat="qq")
pltlist <- list()
pltlist[["qplot"]] <- qplot
pltlist[["histoplot"]] <- histoplot
grid.arrange(pltlist[["qplot"]],pltlist[["histoplot"]], ncol=2)
above code works but produces the wrong graph
in my actual code
Then I tried recordPlot()
print(histoplot)
c1 <- recordPlot()
print(qplot)
c2 <- recordPlot()
I am able to display all the plots individually
but grid.arrange produces an error:
grid.arrange(replayPlot(c1),replayPlot(c2), ncol=2) # = Error
Error in gList(list(wrapvp = list(x = 0.5, y = 0.5, width = 1, height = 1, :
only 'grobs' allowed in "gList"
In this thread Saving grid.arrange() plot to file
They dicuss a solution which utilizes arrangeGrob() instead
arrangeGrob(c1, c1, ncol=2) # Error
Error in vapply(x$grobs, as.character, character(1)) :
values must be length 1,
but FUN(X[[1]]) result is length 3
I am forced to use the recordPlot() instead of saving to a list since this does not produce the same graph when saved as when it is plotted immediately, which I unfortunately cannot replicate, sorry.
In my actual code I am doing a large for-loop, looping through several variables, making a correlation with each and making scatterplots, where I name the scatterplots dependent on their significans level. I then want to re-display the plots that were significant in a grid, in a dynamic knitr report.
I am aware that I could just re-plot the plots that were significant after the for-loop instead of saving them, (I can't save as png while doing knitr either). However I would like to find a way to dynammically save the plots as R-objects and then replot them in a grid afterwards.
Thanks for Reading
"R version 3.2.1"
Windows 7 64bit - RStudio - Version 0.99.652
attached base packages:
[1] grid grDevices datasets utils graphics stats methods base
other attached packages:
[1] gridExtra_2.0.0 ggplot2_1.0.1
I can think of two solutions.
1. If your goal is to just save the list of plots as R objects, I recommend:
saveRDS(object = pltlist, file = "file_path")
This way when you wish to reload in these graphs, you can just use readRDS(). You can then put them in cowplot or gridarrange. This command works for all lists and R Objects.
One caveat to this approach is if settings/labeling for ggplot2 is dependent upon things in the environment (not the data, but stuff like settings for point size, shape, or coloring) instead of the ggplot2 function used to make the graph), your graphs won't work until you restore your dependencies. One reason to save some dependencies is to modularize your scripts to make the graphs.
Another caveat is performance: From my experience, I found it is actually faster to read in the data and remake individual graphs than load in an RDS file of all the graphs when you have a large number of graphs (100+ graphs).
2. If your goal is to save an 'image' or 'picture' of each graph (single and/or multiplot as .png, .jpeg, etc.), and later adjust things in a grid manually outside of R such as powerpoint or photoshop, I recommend:
filenames <- c("Filename_1", "Filename_2") #actual file names you want...
lapply(seq_along(pltlist), function(i) {
ggsave(filename = filenames[i], plot = pltlist[[i]], ...) #use your settings here
})
Settings I like for single plots:
lapply(seq_along(pltlist), function(i) ggsave(
plot = pltlist[[i]],
filename = paste0("plot_", i, "_", ".tiff"), #you can even paste in pltlist[[i]]$labels$title
device = "tiff", width=180, height=180, units="mm", dpi=300, compression = "lzw", #compression for tiff
path = paste0("../Blabla") #must be an existing directory.
))
You may want to do the manual approach if you're really OCD about the grid arrangement and you don't have too many of them to make for publications. Otherwise, when you do grid.arrange you'll want to do all the specifications there (adjusting font, increasing axis label size, custom colors, etc.), then adjust the width and height accordingly.
Reviving this post to add multiplot here, as it fits exactly.
require(ggplot2)
mydd <- setNames( data.frame( matrix( rep(c("x","y","z"), each=10) ),
c(rnorm(10), rnorm(10), rnorm(10)) ), c("points", "data") )
# points data
# 1 x 0.733013658
# 2 x 0.218838717
# 3 x -0.008303382
# 4 x 2.225820069
# ...
p1 <- ggplot( mydd[mydd$point == "x",] ) + geom_line( aes( 1:10, data, col=points ) )
p2 <- ggplot( mydd[mydd$point == "y",] ) + geom_line( aes( 1:10, data, col=points ) )
p3 <- ggplot( mydd[mydd$point == "z",] ) + geom_line( aes( 1:10, data, col=points ) )
multiplot(p1,p2,p3, cols=1)
multiplot:
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
library(grid)
# Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
numPlots = length(plots)
# If layout is NULL, then use 'cols' to determine layout
if (is.null(layout)) {
# Make the panel
# ncol: Number of columns of plots
# nrow: Number of rows needed, calculated from # of cols
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
ncol = cols, nrow = ceiling(numPlots/cols))
}
if (numPlots==1) {
print(plots[[1]])
} else {
# Set up the page
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
# Make each plot, in the correct location
for (i in 1:numPlots) {
# Get the i,j matrix positions of the regions that contain this subplot
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
layout.pos.col = matchidx$col))
}
}
}
Result:

Graphic of binary variable in R

I would like to plot a simple graphic. I have a dat set with n rowns and k columns, in which each row has a a sequence of 0 and 1. I would like to plot exactly this sequence for all rows.
Actually I want to reproduce the figure 24.1, p. 516, of Gelman and Hill's book (Data aAnalysis Using Regression and Multilevel/Hierarchical Models). I suspect that he made the graphic in Latex, but it seems quite ridiculous that I'm not able to repplicate this simple graphic in R. The figue is something like this. As you can see from the link, the "ones" are replaced by "S" and "zeros" by ".". It's a simple graphic, but it shows each individual response by time.
I would go with a formatted text output using sprintf. Much cleaner and simpler. If you still want a plot, you could go with the following:
Given matrix tbl containing your data:
tbl <- matrix(data=rep(0:1,25), nrow=5)
You can generate a plot as:
plot(1, 1, xlim=c(1,dim(tbl)[2]+.5), ylim=c(0.5,dim(tbl)[1]), type="n")
lapply(1:dim(tbl)[1], function(x) {
text(x=c(1:dim(tbl)[2]), y=rep(x,dim(tbl)[2]), labels=tbl[x,])
})
Using this as a base you can play around with the text and plot args to stylize the plot the way you wish.
Here are two possible solutions, based on fake data generated with this helper function:
generate.data <- function(rate=.3, dim=c(25,25)) {
tmp <- rep(".", prod(dim))
tmp[sample(1:prod(dim), ceiling(prod(dim)*rate))] <- "S"
m <- matrix(tmp, nr=dim[1], nc=dim[2])
return(m)
}
Text-based output
x <- generate.data()
rownames(x) <- colnames(x) <- 1:25
capture.output(as.table(x), file="res.txt")
The file res.txt include a pretty-printed version of the console output; you can convert it to pdf using any txt to pdf converter (I use the one from PDFlib). Here is a screenshot of the text file:
Image-based output
First, here is the plotting function I used:
make.table <- function(x, labels=NULL) {
# x = matrix
# labels = list of labels for x and y
coord.xy <- expand.grid(x=1:nrow(x), y=1:ncol(x))
opar <- par(mar=rep(1,4), las=1)
plot.new()
plot.window(xlim=c(0, ncol(x)), ylim=c(0, nrow(x)))
text(coord.xy$x, coord.xy$y, c(x), adj=c(0,1))
if (!is.null(labels)) {
mtext(labels[[1]], side=3, line=-1, at=seq(1, ncol(x)), cex=.8)
mtext(labels[[2]], side=2, line=-1, at=seq(1, nrow(x)), cex=.8, padj=1)
}
par(opar)
}
Then I call it as
make.table(x, list(1:25, 1:25))
and here is the result (save it as png, pdf, jpg, or whatever).
As far as I can see, this is a text table. I am wondering why you want to make it a graph? Anyway, quick solutions are (either way)
make the text table (by programming or typing) and make its screenshot and embed the image into the plot.
make a blank plot and put the text on the plot by programming R with "text" function. For more info on "text", refer to http://cran.r-project.org/doc/contrib/Lemon-kickstart/kr_adtxt.html

Resources