Line plots for several data using ggplot2 and save as pdf

Line plots for several data using ggplot2 and save as pdf - r

With following three different data set,
mean=replicate(10,rnorm(10))
colnames(mean)=paste0(rep(c("x0","x1","x2","x3","x4"),2),"_c", rep(c(1:2), each=5))
meanpos=replicate(10,rnorm(10))+1.5
meanneg=replicate(10,rnorm(10))-1.5
hcol=c(0,0.5,0,0.75,1.0,
1.1,1.20,0,0.8,-0.025)#vector of size ncol(mean)
I can create the line plots using following for loop
par(mfrow=c(2,2))
for ( v in 1:ncol(mean)){
plot(mean[,v], type = "l",
ylim = c(min(meanpos[,v],mean[,v]),
max(meanpos[,v],mean[,v])),
xlab = "sl no", ylab = "",main = colnames(mean)[v])
abline(h=hcol[v], col="purple")
lines(meanpos[,v], col="blue")
lines(meanneg[,v], col="green")
}
One plot for each column and outlined as 2 by 2. Here are a few plots
How can I create a similar plot using the ggplot2 function with a legend for each line and save as pdf file.
Any help is appreciated

If you want to use ggplot2 you can format data as next. It is better if you save your data from vectors to dataframes and then you can bind all data to reshape and have the desired plot using facets instead of loops as you did. You can tune ncol argument from facet_wrap() in order to define a matrix structure. Here the code using the data you provided. I added also the steps to have dataframes and easily use ggplot2 functions:
library(tidyverse)
#Initial data
set.seed(123)
#Data
mean=replicate(10,rnorm(10))
colnames(mean)=paste0(rep(c("x0","x1","x2","x3","x4"),2),"_c", rep(c(1:2), each=5))
meanpos=replicate(10,rnorm(10))+1.5
meanneg=replicate(10,rnorm(10))-1.5
hcol=c(0,0.5,0,0.75,1.0,
1.1,1.20,0,0.8,-0.025)#vector of size ncol(mean)
We save data in dataframes and identify all values:
#Concatenate all in a dataframe
df1 <- as.data.frame(mean)
#Data for intercepts
hcol=c(0,0.5,0,0.75,1.0,
1.1,1.20,0,0.8,-0.025)
#Dataframe
dfh <- data.frame(name=names(df1),hcol,stringsAsFactors = F)
#Mean pos
df2 <- as.data.frame(meanpos)
names(df2) <- names(df1)
#Mean neg
df3 <- as.data.frame(meanneg)
names(df3) <- names(df1)
#Assign ids
df1$id <- 'mean'
df2$id <- 'mean pos'
df3$id <- 'mean neg'
#Rows
df1$id2 <- 1:dim(df1)[1]
df2$id2 <- 1:dim(df2)[1]
df3$id2 <- 1:dim(df3)[1]
#Bind
dfm <- rbind(df1,df2,df3)
With the entire data, we reshape it to use facets:
#Pivot
dfm %>% pivot_longer(cols = -c(id,id2)) -> dfm2
Now, the plot:
#Sketch for plot
G1 <- ggplot(dfm2,aes(x=id2,y=value,group=id,color=id))+
geom_line()+
geom_hline(data = dfh,aes(yintercept = hcol),color='purple')+
facet_wrap(.~name,scales='free')+
xlab("sl no")+ylab("")+
scale_color_manual(values = c('mean'='tomato','mean pos'='blue','mean neg'='green'))+
theme_bw()+
theme(legend.position = 'top')
You can save as .pdf with ggsave():
#Export
ggsave(filename = 'Plot.pdf',plot = G1,width = 35,height = 20,units = 'cm')
Output:

Related

add one legend with all variables for combined graphs

I'm trying to plot two graphs side-by-side with one common legend that incorporates all the variables between both graphs (some vars are different between the graphs).
Here's a mock example of what I've been attempting:
#make relative abundance values for n rows
makeData <- function(n){
n <- n
x <- runif(n, 0, 1)
y <- x / sum(x)
}
#make random matrices filled with relative abundance values
makeDF <- function(col, rw){
df <- matrix(ncol=col, nrow=rw)
for(i in 1:ncol(df)){
df[,i] <- makeData(nrow(df))
}
return(df)
}
#create df1 and assign col names
df1 <- makeDF(4, 5)
colSums(df1) #verify relative abundance values = 1
df1 <- as.data.frame(df1)
colnames(df1) <- c("taxa","s1", "s2", "s3")
df1$taxa <- c("ASV1", "ASV2", "ASV3", "ASV4", "ASV5")
#repeat for df2
df2 <- makeDF(4,5)
df2 <- as.data.frame(df2)
colnames(df2) <- c("taxa","s1", "s2", "s3")
df2$taxa <- c("ASV1", "ASV5", "ASV6", "ASV7", "ASV8")
# convert wide data format to long format -- for plotting
library(reshape2)
makeLong <- function(df){
df.long <- melt(df, id.vars="taxa",
measure.vars=grep("s\\d+", names(df), val=T),
variable.name="sample",
value.name="value")
return(df.long)
}
df1 <- makeLong(df1)
df2 <- makeLong(df2)
#generate distinct colours for each asv
taxas <- union(df1$taxa, df2$taxa)
library("RColorBrewer")
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
colpals <- qual_col_pals[c("Set1", "Dark2", "Set3"),] #select colour palettes
col_vector = unlist(mapply(brewer.pal, colpals$maxcolors, rownames(colpals)))
taxa.col=sample(col_vector, length(taxas))
names(taxa.col) <- taxas
# plot using ggplot
library(ggplot2)
plotdf2 <- ggplot(df2, aes(x=sample, y=value, fill=taxa)) +
geom_bar(stat="identity")+
scale_fill_manual("ASV", values = taxa.col)
plotdf1 <- ggplot(df1, aes(x=sample, y=value, fill=taxa)) +
geom_bar(stat="identity")+
scale_fill_manual("ASV", values = taxa.col)
#combine plots to one figure and merge legend
library(ggpubr)
ggpubr::ggarrange(plotdf1, plotdf2, ncol=2, nrow=1, common.legend = T, legend="bottom")
(if you have suggestions on how to generate better mock data, by all means!)
When I run my code, I am able to get the two graphs in one figure, but the legend does not incorporate all variables from both plots:
I ideally would like to avoid having repeat variables in the legend, such as:
From what I've searched online, the legend only works when the variables are the same between graphs, but in my case I have similar and different variables.
Thanks for any help!

Maybe this is what you are looking for:
Convert your taxa variables to factor with the levels equal to your taxas variable, i.e. to include all levels from both datasets.
Add argument drop=FALSE to both scale_fill_manual to prevent dropping of unused factor levels.
Note: I only added the relevant parts of the code and set the seed to 42 at the beginning of the script.
set.seed(42)
df1$taxa <- factor(df1$taxa, taxas)
df2$taxa <- factor(df2$taxa, taxas)
# plot using ggplot
library(ggplot2)
plotdf2 <- ggplot(df2, aes(x=sample, y=value, fill=taxa)) +
geom_bar(stat="identity") +
scale_fill_manual("ASV", values = taxa.col, drop = FALSE)
plotdf1 <- ggplot(df1, aes(x=sample, y=value, fill=taxa)) +
geom_bar(stat="identity")+
scale_fill_manual("ASV", values = taxa.col, drop = FALSE)
#combine plots to one figure and merge legend
library(ggpubr)
ggpubr::ggarrange(plotdf1, plotdf2, ncol=2, nrow=1, common.legend = T, legend="bottom")

Changing aesthetics in ggplot generated by svars package in R

I'm using the svars package to generate some IRF plots. The plots are rendered using ggplot2, however I need some help with changing some of the aesthetics.
Is there any way I can change the fill and alpha of the shaded confidence bands, as well as the color of the solid line? I know in ggplot2 you can pass fill and alpha arguments to geom_ribbon (and col to geom_line), just unsure of how to do the same within the plot function of this package's source code.
# Load Dataset and packages
library(tidyverse)
library(svars)
data(USA)
# Create SVAR Model
var.model <- vars::VAR(USA, lag.max = 10, ic = "AIC" )
svar.model <- id.chol(var.model)
# Wild Bootstrap
cores <- parallel::detectCores() - 1
boot.svar <- wild.boot(svar.model, n.ahead = 30, nboot = 500, nc = cores)
# Plot the IRFs
plot(boot.svar)
I'm also looking at the command for a historical decomposition plot (see below). Is there any way I could omit the first two facets and plot only the bottom three lines on the same facet?
hist.decomp <- hd(svar.model, series = 1)
plot(hist.decomp)

Your first desired result is easily achieved by resetting the aes_params after calling plot. For your second goal. There is probably an approach to manipulate the ggplot object. Instead my approach below constructs the plot from scratch. Basically I copy and pasted the data wrangling code from vars:::plot.hd and filtered the prepared dataset for the desired series:
# Plot the IRFs
p <- plot(boot.svar)
p$layers[[1]]$aes_params$fill <- "pink"
p$layers[[1]]$aes_params$alpha <- .5
p$layers[[2]]$aes_params$colour <- "green"
p
# Helper to convert to long dataframe. Source: svars:::plot.hd
hd2PlotData <- function(x) {
PlotData <- as.data.frame(x$hidec)
if (inherits(x$hidec, "ts")) {
tsStructure = attr(x$hidec, which = "tsp")
PlotData$Index <- seq(from = tsStructure[1], to = tsStructure[2],
by = 1/tsStructure[3])
PlotData$Index <- as.Date(yearmon(PlotData$Index))
}
else {
PlotData$Index <- 1:nrow(PlotData)
PlotData$V1 <- NULL
}
dat <- reshape2::melt(PlotData, id = "Index")
dat
}
hist.decomp <- hd(svar.model, series = 1)
dat <- hd2PlotData(hist.decomp)
dat %>%
filter(grepl("^Cum", variable)) %>%
ggplot(aes(x = Index, y = value, color = variable)) +
geom_line() +
xlab("Time") +
theme_bw()
EDIT One approach to change the facet labels is via a custom labeller function. For a different approach which changes the facet labels via the data see here:
myvec <- LETTERS[1:9]
mylabel <- function(labels, multi_line = TRUE) {
data.frame(variable = labels)
}
p + facet_wrap(~variable, labeller = my_labeller(my_labels))

Generate a multi-page pdf file of a grid of ggplot plots [duplicate]

This question already has answers here:
Plot over multiple pages
(3 answers)
Closed 3 years ago.
I am trying to generate a multi-page pdf of a grid of ggplots from a list of ggplots. I have tried very many ways to do this and have not succeeded. Here is a reproducible equivalent to what I have been working with:
library(ggplot2)
# generate a data frame w same structure as the one I'm working with
time <- c(1:10)
veclist <- list()
veclist[[1]] <- time
for (i in 2:25){
veclist[[i]] <- as.vector(c(runif(10,-2,2)))
}
d <- as.data.frame(do.call(rbind, veclist))
d <- as.data.frame(t(d))
colnames(d)[1] <- "time"
for (i in 2:length(d)){
colnames(d)[i] <- paste("name",i,sep=" ")
}
# for a common axis
numericvalues <- d[,2:length(d)]
# generate plot(s)
name_list = paste("`",names(d),"`",sep="")
plot_list = list()
for (i in 2:length(d)) {
p = ggplot(d, aes_string(x=name_list[[1]], y=name_list[[i]])) +
geom_point() +
labs(x="time",title=paste(strwrap(names(d[i]), width = 30),collapse = "\n")) +
theme(plot.title = element_text(size=10,hjust = 0.5),axis.text.x=element_text(size=6)) +
coord_cartesian(ylim = c(min(numericvalues, na.rm = TRUE), max(numericvalues, na.rm = TRUE)))
plot_list[[i]] = p
}
What I am looking for would generate a multi-page pdf grid of the plots in plot_list (ideally with 3 columns, 4 rows of plots per page).
A few things I have tried:
pdf("test.pdf")
do.call("marrangeGrob",c(plot_list,ncol=3,nrow=2))
produces an unreadable pdf file.
pdf("test.pdf")
do.call("grid.arrange",c(plot_list))
returns only 'grobs' allowed in "gList" error.

This one produces a multipage layout:
library(gridExtra)
...
plot_list = list()
for (i in 2:length(d)) {
p = ggplot(...)
plot_list[[i]] = ggplotGrob(p)
}
class(plot_list) <- c("arrangelist", class(plot_list))
ggsave("multipage.pdf", plot_list)

your plot list seems to have a missing item. Here's a minimal example
library(ggplot2)
pl <- replicate(13, ggplot(), simplify = FALSE)
ggsave("mp.pdf", gridExtra::marrangeGrob(grobs = pl, nrow=3, ncol=2))
Notes:
you were missing dev.off() hence the invalid pdf
do.call is no longer necessary in (m)arrangeGrob, use the grobs argument

Here's a solution with gridExtra and tidyr
(i) transform the wide data to long-format, and split into a list of data.frame based on each name:
library(tidyr)
df <- d %>% gather(var, val, -time)
df_list <- split(df, df$var)
(ii) plot for each name with lapply function
plots <- lapply(names(df_list), function(x){
ggplot(df_list[[x]], aes(time, val)) +
geom_point() +
labs(x="time", title=x)
})
(iii) using gridExtra to print 12 plots each on two pages of the pdf:
library(gridExtra)
pdf("something.pdf")
do.call(grid.arrange, c(plots[1:12], nrow=4))
do.call(grid.arrange, c(plots[13:24], nrow=4))
dev.off()

How can I transform a matrix so that I can plot using ggplot

I have a matrix in R that I can plot using matplot however it is hard to customize the plot. I would like to plot using the R package ggplot however it will not work using a matrix. I am not sure what transformations are required of the matrix to allow the data to work with ggplot.
Thanks for any help.

you need to convert the matrix to a data frame
mat = cbind(index = seq(1:10), price=7+rnorm(10))
df = as.data.frame(mat)
library(ggplot2)
ggplot(df) + geom_line(aes(x = index, y = price))

You can use reshape2::melt as mentioned in the comment.
numbers <- sample(c(1:100), 100, replace = T)
data <- matrix( numbers, ncol=10)
rownames(data) <- paste0("row-", seq(1,10))
colnames(data) <- paste0("col-", seq(1,10))
data
d2.df <- reshape2::melt(data, c("x", "y"), value.name = "z")
head(d2.df)

Saving ggplot2 output in R to pdf file when using dplyr and mapply function

I'm still learning R (clearly), and cannot figure out where my problem might be when trying to save ggplot2 output into a pdf file. I have been able to create code using a loop to save ggplot output, but want to force myself to avoid loops and take advantage of R's ability to do so.
I have looked at other posts regarding saving pdf files, but none seemed to address my issue.
Here is a reproducible example:
# Create example data frame for reproducible example
amount <- c(rep(5, 25), rep(10, 50), rep(15, 25))
value <- c(rep(100, 20), rep(200, 30), rep(300, 50))
fund <- I(c(rep("FundA", 50), rep("FundB", 50)))
example_df <- data.frame(amount, value, fund)
#==============================================================
# Weighted histogram function for plotting
histogram_wt_fx <- function(my_df, xvar, my_weight,
chart_title = "title",
chart_xlabel = "x",
chart_ylabel = "y") {
library(ggplot2)
histogram <- ggplot(my_df, aes(x = xvar, weight = my_weight)) +
geom_histogram(binwidth=0.25, colour="black", fill="white")
# add another layer showing weighted avg amount
histogram <- histogram + geom_vline(aes(xintercept = sum (xvar*my_weight)),
color="red", linetype="dashed", size=1) +
labs(title = chart_title , x = chart_xlabel, y = chart_ylabel)
}
#===============================================================
# Function to weight data and plot histogram
# Note: fund_wtd_fx in turn calls histogram_wt_fx
fund_wtd_fx <- function(my_df, my_title) {
my_df <- my_df %>%
mutate(pct_amount = amount/sum(amount))
my_df %>%
histogram_wt_fx (xvar = my_df$value,
my_weight = my_df$pct_amount,
chart_title = my_title,
chart_xlabel = "Amount",
chart_ylabel = "Percent") %>%
plot() #%>%
#*** This is where the problem code is ****
#pdf() %>%
#plot()
}
#=====================================
# Extract fund lists from larger data set and run the functions on this list
fund_names <- unique(example_df$fund) # List of funds in the data frame
fund_dfs <- list() # Initialize list of data frames
# Create list of fund data frames
for (myfund in fund_names) {
myfund <- example_df %>%
filter(fund == myfund)
fund_dfs[[length(fund_dfs)+1]] <- myfund
}
rm(myfund)
names(fund_dfs) <- fund_names
# Assign list of fund names to the list of data frames
for (i in 1:length(fund_names)) {
assign(fund_names[[i]], fund_dfs[[i]])
}
# Run histogram function on each fund
my_title <- as.list(paste0("Some title for ", (names(fund_dfs))))
mapply(FUN = fund_wtd_fx, fund_dfs, my_title)
#dev.off()
My problem:
This code runs like I want it to, but if you uncomment lines 39, 41, 42, and 68 (assuming you pasted the code starting in line 1), then the plots do not get saved and a plot.window error is thrown.
I would have thought that pipe operator on uncommented line 39, would feed into the pdf function to save the plot output as the mapply function cycles through the data frames. Ultimately that is what I am trying to do--save the plots generated into a pdf file with this code.
Many thanks for any help or suggestions.

histogram_wt_fx() now returns the plot object to fund_wtd_fx() which now also returns the plot object.
Switched to purrr::map2() from mapply and did the plotting at the end.
Take a look, give it a go and let me know if I can/should explain a bit more.
library(dplyr)
library(ggplot2)
library(purrr)
amount <- c(rep(5, 25), rep(10, 50), rep(15, 25))
value <- c(rep(100, 20), rep(200, 30), rep(300, 50))
fund <- I(c(rep("FundA", 50), rep("FundB", 50)))
example_df <- data.frame(amount, value, fund)
histogram_wt_fx <- function(my_df, xvar, my_weight,
chart_title = "title",
chart_xlabel = "x",
chart_ylabel = "y") {
histogram <- ggplot(my_df, aes(x = xvar, weight = my_weight)) +
geom_histogram(binwidth=0.25, colour="black", fill="white")
histogram <- histogram + geom_vline(aes(xintercept = sum (xvar*my_weight)),
color="red", linetype="dashed", size=1) +
labs(title = chart_title , x = chart_xlabel, y = chart_ylabel)
histogram
}
fund_wtd_fx <- function(my_df, my_title) {
my_df <- my_df %>%
mutate(pct_amount = amount/sum(amount))
my_df %>%
histogram_wt_fx(xvar = my_df$value,
my_weight = my_df$pct_amount,
chart_title = my_title,
chart_xlabel = "Amount",
chart_ylabel = "Percent")
}
fund_names <- unique(example_df$fund) # List of funds in the data frame
fund_dfs <- list() # Initialize list of data frames
for (myfund in fund_names) {
myfund <- example_df %>%
filter(fund == myfund)
fund_dfs[[length(fund_dfs)+1]] <- myfund
}
rm(myfund)
names(fund_dfs) <- fund_names
for (i in 1:length(fund_names)) {
assign(fund_names[[i]], fund_dfs[[i]])
}
my_title <- as.list(paste0("Some title for ", (names(fund_dfs))))
plots <- map2(fund_dfs, my_title, fund_wtd_fx)
pdf()
walk(plots, print)
dev.off()

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

Line plots for several data using ggplot2 and save as pdf - r

Related

add one legend with all variables for combined graphs

Changing aesthetics in ggplot generated by svars package in R

Generate a multi-page pdf file of a grid of ggplot plots [duplicate]

How can I transform a matrix so that I can plot using ggplot

Saving ggplot2 output in R to pdf file when using dplyr and mapply function

Categories

Resources