add one legend with all variables for combined graphs - r

I'm trying to plot two graphs side-by-side with one common legend that incorporates all the variables between both graphs (some vars are different between the graphs).
Here's a mock example of what I've been attempting:
#make relative abundance values for n rows
makeData <- function(n){
n <- n
x <- runif(n, 0, 1)
y <- x / sum(x)
}
#make random matrices filled with relative abundance values
makeDF <- function(col, rw){
df <- matrix(ncol=col, nrow=rw)
for(i in 1:ncol(df)){
df[,i] <- makeData(nrow(df))
}
return(df)
}
#create df1 and assign col names
df1 <- makeDF(4, 5)
colSums(df1) #verify relative abundance values = 1
df1 <- as.data.frame(df1)
colnames(df1) <- c("taxa","s1", "s2", "s3")
df1$taxa <- c("ASV1", "ASV2", "ASV3", "ASV4", "ASV5")
#repeat for df2
df2 <- makeDF(4,5)
df2 <- as.data.frame(df2)
colnames(df2) <- c("taxa","s1", "s2", "s3")
df2$taxa <- c("ASV1", "ASV5", "ASV6", "ASV7", "ASV8")
# convert wide data format to long format -- for plotting
library(reshape2)
makeLong <- function(df){
df.long <- melt(df, id.vars="taxa",
measure.vars=grep("s\\d+", names(df), val=T),
variable.name="sample",
value.name="value")
return(df.long)
}
df1 <- makeLong(df1)
df2 <- makeLong(df2)
#generate distinct colours for each asv
taxas <- union(df1$taxa, df2$taxa)
library("RColorBrewer")
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
colpals <- qual_col_pals[c("Set1", "Dark2", "Set3"),] #select colour palettes
col_vector = unlist(mapply(brewer.pal, colpals$maxcolors, rownames(colpals)))
taxa.col=sample(col_vector, length(taxas))
names(taxa.col) <- taxas
# plot using ggplot
library(ggplot2)
plotdf2 <- ggplot(df2, aes(x=sample, y=value, fill=taxa)) +
geom_bar(stat="identity")+
scale_fill_manual("ASV", values = taxa.col)
plotdf1 <- ggplot(df1, aes(x=sample, y=value, fill=taxa)) +
geom_bar(stat="identity")+
scale_fill_manual("ASV", values = taxa.col)
#combine plots to one figure and merge legend
library(ggpubr)
ggpubr::ggarrange(plotdf1, plotdf2, ncol=2, nrow=1, common.legend = T, legend="bottom")
(if you have suggestions on how to generate better mock data, by all means!)
When I run my code, I am able to get the two graphs in one figure, but the legend does not incorporate all variables from both plots:
I ideally would like to avoid having repeat variables in the legend, such as:
From what I've searched online, the legend only works when the variables are the same between graphs, but in my case I have similar and different variables.
Thanks for any help!

Maybe this is what you are looking for:
Convert your taxa variables to factor with the levels equal to your taxas variable, i.e. to include all levels from both datasets.
Add argument drop=FALSE to both scale_fill_manual to prevent dropping of unused factor levels.
Note: I only added the relevant parts of the code and set the seed to 42 at the beginning of the script.
set.seed(42)
df1$taxa <- factor(df1$taxa, taxas)
df2$taxa <- factor(df2$taxa, taxas)
# plot using ggplot
library(ggplot2)
plotdf2 <- ggplot(df2, aes(x=sample, y=value, fill=taxa)) +
geom_bar(stat="identity") +
scale_fill_manual("ASV", values = taxa.col, drop = FALSE)
plotdf1 <- ggplot(df1, aes(x=sample, y=value, fill=taxa)) +
geom_bar(stat="identity")+
scale_fill_manual("ASV", values = taxa.col, drop = FALSE)
#combine plots to one figure and merge legend
library(ggpubr)
ggpubr::ggarrange(plotdf1, plotdf2, ncol=2, nrow=1, common.legend = T, legend="bottom")

Related

Line plots for several data using ggplot2 and save as pdf

With following three different data set,
mean=replicate(10,rnorm(10))
colnames(mean)=paste0(rep(c("x0","x1","x2","x3","x4"),2),"_c", rep(c(1:2), each=5))
meanpos=replicate(10,rnorm(10))+1.5
meanneg=replicate(10,rnorm(10))-1.5
hcol=c(0,0.5,0,0.75,1.0,
1.1,1.20,0,0.8,-0.025)#vector of size ncol(mean)
I can create the line plots using following for loop
par(mfrow=c(2,2))
for ( v in 1:ncol(mean)){
plot(mean[,v], type = "l",
ylim = c(min(meanpos[,v],mean[,v]),
max(meanpos[,v],mean[,v])),
xlab = "sl no", ylab = "",main = colnames(mean)[v])
abline(h=hcol[v], col="purple")
lines(meanpos[,v], col="blue")
lines(meanneg[,v], col="green")
}
One plot for each column and outlined as 2 by 2. Here are a few plots
How can I create a similar plot using the ggplot2 function with a legend for each line and save as pdf file.
Any help is appreciated
If you want to use ggplot2 you can format data as next. It is better if you save your data from vectors to dataframes and then you can bind all data to reshape and have the desired plot using facets instead of loops as you did. You can tune ncol argument from facet_wrap() in order to define a matrix structure. Here the code using the data you provided. I added also the steps to have dataframes and easily use ggplot2 functions:
library(tidyverse)
#Initial data
set.seed(123)
#Data
mean=replicate(10,rnorm(10))
colnames(mean)=paste0(rep(c("x0","x1","x2","x3","x4"),2),"_c", rep(c(1:2), each=5))
meanpos=replicate(10,rnorm(10))+1.5
meanneg=replicate(10,rnorm(10))-1.5
hcol=c(0,0.5,0,0.75,1.0,
1.1,1.20,0,0.8,-0.025)#vector of size ncol(mean)
We save data in dataframes and identify all values:
#Concatenate all in a dataframe
df1 <- as.data.frame(mean)
#Data for intercepts
hcol=c(0,0.5,0,0.75,1.0,
1.1,1.20,0,0.8,-0.025)
#Dataframe
dfh <- data.frame(name=names(df1),hcol,stringsAsFactors = F)
#Mean pos
df2 <- as.data.frame(meanpos)
names(df2) <- names(df1)
#Mean neg
df3 <- as.data.frame(meanneg)
names(df3) <- names(df1)
#Assign ids
df1$id <- 'mean'
df2$id <- 'mean pos'
df3$id <- 'mean neg'
#Rows
df1$id2 <- 1:dim(df1)[1]
df2$id2 <- 1:dim(df2)[1]
df3$id2 <- 1:dim(df3)[1]
#Bind
dfm <- rbind(df1,df2,df3)
With the entire data, we reshape it to use facets:
#Pivot
dfm %>% pivot_longer(cols = -c(id,id2)) -> dfm2
Now, the plot:
#Sketch for plot
G1 <- ggplot(dfm2,aes(x=id2,y=value,group=id,color=id))+
geom_line()+
geom_hline(data = dfh,aes(yintercept = hcol),color='purple')+
facet_wrap(.~name,scales='free')+
xlab("sl no")+ylab("")+
scale_color_manual(values = c('mean'='tomato','mean pos'='blue','mean neg'='green'))+
theme_bw()+
theme(legend.position = 'top')
You can save as .pdf with ggsave():
#Export
ggsave(filename = 'Plot.pdf',plot = G1,width = 35,height = 20,units = 'cm')
Output:

quarter circles ggplot-s with nonexisting points

I try to use ggplot to plot quarted circles to visualize contour plots but I get misconfigured plot using geom_area (following this tutorial on stacked area with ggplot2)
The code I tried reads
library(ggplot2)
library(dplyr)
N <- 1E2
r <- rev(c(1,2,4,7))
maxXY = max(r)+.25*max(r)
grupp <- c("0","0.25","0.5","0.75")
datalist = list()
plot(0,0,xlim=c(0,maxXY),ylim=c(0,maxXY))
for (i in 1:length(r)) {
quadX <- seq(from = 0,to = r[i],length.out = N) # calculate x coords
quadY <- sqrt(r[i]^2 - quadX^2) # calculate y coords
lines(quadX,quadY)
# data for ggplot
dat <- data.frame(X = quadX, Y = quadY)
dat$group <- grupp[i]
datalist[[i]] <- dat # add it to your list
}
DF = do.call(rbind, datalist)
# stacked area chart
p1 <- ggplot(DF, aes(x=X, y=Y, fill=group)) +
geom_area(alpha=0.6 , size=1, colour="black")
plot(p1)
and I get quarter circles plotted correctly with basic plot
but a weird one with geom_area
Any help would be very appreciated. MJS
EDIT: using Z.Lin's suggestions I get the correct plot, thanks!

How to set x and y limits to same values?

This is some data I made. I have two data frames with two variables each.
var1 <- (1:10)*(rnorm(10,2,0.1))
var2 <- (6:15)*(rnorm(10,1,0.1))
df1 <- as.data.frame(cbind(var1,var2))
var3 <- (1:10)*(rnorm(10,3,0.1))
var4 <- (6:15)*(rnorm(10,1.5,0.1))
df2 <- as.data.frame(cbind(var3,var4))
There is a loop for plotting the first variable of df1 and df2, and the second of df1 and df2 too.
plot_list = list()
for(i in 1:ncol(df1)){
p=ggplot(df1,
aes_string(x=df1[,i],
y=df2[,i]))+
geom_point()
plot_list[[i]] = p
}
library(gridExtra)
do.call("grid.arrange", c(plot_list[c(1:2)], ncol=1))
And this is the plot I got.
So far so good. But, I would like to x and y within each plot had the same limit based on max and min. For example, in the above plot both x and should go from ~5 to ~30. In the below plot both x and should go from ~6 to ~24. I could set the limits manually, but I need to do this for many plots.
Is there any way to set the x and y limits for each plot based on min and max observed in any of the axis?
Thanks for the help.
In general, I’d suggest that the data for each plot should be in its own data.frame. Having a single data.frame and using facets is an option, but facets make it difficult to specify different limits for each plot. I’ve therefore gone with a grid.arrange solution similar to yours.
library(ggplot2)
library(purrr)
var1 <- (1:10)*(rnorm(10,2,0.1))
var2 <- (6:15)*(rnorm(10,1,0.1))
var3 <- (1:10)*(rnorm(10,3,0.1))
var4 <- (6:15)*(rnorm(10,1.5,0.1))
df1 <- data.frame(x = var1, y = var3)
df2 <- data.frame(x = var2, y = var4)
plots <- map(
list(df1, df2),
function(data) {
ggplot(data, aes(x, y)) +
geom_point() +
coord_fixed(xlim = range(c(data$x, data$y)), ylim = range(c(data$x, data$y)))
})
gridExtra::grid.arrange(grobs = plots, nrow = 2)

"Dotplot" visualisation with factors

I am not sure how to approach this. I want to create a "dotpot" style plot in R from a data frame of categorical variables (factors) such that for each column of the df I plot a column of dots, each coloured according to the factors. For example,
my_df <- cbind(c('sheep','sheep','cow','cow','horse'),c('sheep','sheep','sheep','sheep',<NA>),c('sheep','cow','cow','cow','cow'))
I then want to end up with a 3 x 5 grid of dots, each coloured according to sheep/cow/horse (well, one missing because of the NA).
Do you mean something like this:
my_df <- cbind(c('sheep','sheep','cow','cow','horse'),
c('sheep','sheep','sheep','sheep',NA),
c('sheep','cow','cow','cow','cow'))
df <- data.frame(my_df) # make it as data.frame
df$id <- row.names(df) # add an id
library(reshape2)
melt_df <-melt(df,'id') # melt it
library(ggplot2) # now the plot
p <- ggplot(melt_df, aes(x = variable, fill = value))
p + geom_dotplot(stackgroups = TRUE, binwidth = 0.3, binpositions = "all")

Error in strsplit(filename, "\\.") : non-character argument in ggplot

I am trying to make the plot with horizontal lines where the data2 and data3 points should be within data1 range. This will give an overlapping lines in different colors but I am getting an error which says:
Error in strsplit(filename, "\\.") : non-character argument
Here is the data and code. Please give me some suggestion.
data1 <- data.frame(Start=c(10),End=c(19))
data2 <- data.frame(Start=c(5),End=c(15))
data3 <- data.frame(Start=c(6),End=c(18))
filter_data2 <- data2[data2$Start >= (data1$Start-(data1$Start/2)) & data2$End <= (data1$End+(data1$End/2)), ]
filter_data3 <- data3[data3$Start >= (data1$Start-(data1$Start/2)) & data3$End <= (data1$End+(data1$End/2)), ]
data1 <- data.frame(rep(1,nrow(data1)),data1)
colnames(data1) <- c("ID","start","end")
data2 <- data.frame(rep(2,nrow(filter_data2)),filter_data2)
colnames(data2) <- c("ID","start","end")
data3 <- data.frame(rep(3,nrow(filter_data3)),filter_data3)
colnames(data3) <- c("ID","start","end")
dat1 <- rbind(data1,data2,data3)
pdf("overlap.pdf")
p <- ggplot(dat1, aes(x=(max(start)-max(start)/2), y = ID, colour=ID))
p <- p + geom_segment(aes(xend =(max(end)+max(end)/2), ystart = ID, yend = ID))
p <- p + scale_colour_brewer(palette = "Set1")
ggsave(p)
There are two problems in your code. If you want to use scale_colour_brewer() then ID values should be set as factor
p <- ggplot(dat1, aes(x=(max(start)-max(start)/2), y = ID, colour=as.factor(ID)))
Next, to save the ggplot2 plot you have two possibilities.
Using ggsave() function you should provide file name and format. In this case function pdf() is unnecessary.
ggsave(plot=p,file="plot.pdf")
Using function pdf(), you should add print(p) and then dev.off(). In this case you don't need ggsave() function.
pdf("overlap.pdf")
print(p)
dev.off()

Resources