I am trying to build a function for bivariate plotting that taking 2 variables it is able to represent a marginal scatterplot and two lateral density plots.
The problem is that the density plot on the right does not align with the bottom axis.
Here is a sample data:
g1 = c(rnorm(200, mean=350, sd=100), rnorm(200, mean=700, sd=100))
g2 = c(rnorm(200, mean=350, sd=100), rnorm(200, mean=500, sd=100))
df_exp = data.frame(var1=log2(g1 + 1) , var2=log2(g2 + 1))
Here is the function:
bivariate_plot <- function(df, var1, var2, density = T, box = F) {
require(ggplot2)
require(cowplot)
scatter = ggplot(df, aes(eval(parse(text = var1)), eval(parse(text = var2)), color = "red")) +
geom_point(alpha=.8)
plot1 = ggplot(df, aes(eval(parse(text = var1)), fill = "red")) + geom_density(alpha=.5)
plot1 = plot1 + ylab("G1 density")
plot2 = ggplot(df, aes(eval(parse(text = var2)),fill = "red")) + geom_density(alpha=.5)
plot2 = plot2 + ylab("G2 density")
plot_grid(scatter, plot1, plot2, nrow=1, labels=c('A', 'B', 'C')) #Or labels="AUTO"
# Avoid displaying duplicated legend
plot1 = plot1 + theme(legend.position="none")
plot2 = plot2 + theme(legend.position="none")
# Homogenize scale of shared axes
min_exp = min(df[[var1]], df[[var2]]) - 0.01
max_exp = max(df[[var1]], df[[var2]]) + 0.01
scatter = scatter + ylim(min_exp, max_exp)
scatter = scatter + xlim(min_exp, max_exp)
plot1 = plot1 + xlim(min_exp, max_exp)
plot2 = plot2 + xlim(min_exp, max_exp)
plot1 = plot1 + ylim(0, 2)
plot2 = plot2 + ylim(0, 2)
first_row = plot_grid(scatter, labels = c('A'))
second_row = plot_grid(plot1, plot2, labels = c('B', 'C'), nrow = 1)
gg_all = plot_grid(first_row, second_row, labels=c('', ''), ncol=1)
# Display the legend
scatter = scatter + theme(legend.justification=c(0, 1), legend.position=c(0, 1))
# Flip axis of gg_dist_g2
plot2 = plot2 + coord_flip()
# Remove some duplicate axes
plot1 = plot1 + theme(axis.title.x=element_blank(),
axis.text=element_blank(),
axis.line=element_blank(),
axis.ticks=element_blank())
plot2 = plot2 + theme(axis.title.y=element_blank(),
axis.text=element_blank(),
axis.line=element_blank(),
axis.ticks=element_blank())
# Modify margin c(top, right, bottom, left) to reduce the distance between plots
#and align G1 density with the scatterplot
plot1 = plot1 + theme(plot.margin = unit(c(0.5, 0, 0, 0.7), "cm"))
scatter = scatter + theme(plot.margin = unit(c(0, 0, 0.5, 0.5), "cm"))
plot2 = plot2 + theme(plot.margin = unit(c(0, 0.5, 0.5, 0), "cm"))
# Combine all plots together and crush graph density with rel_heights
first_col = plot_grid(plot1, scatter, ncol = 1, rel_heights = c(1, 3))
second_col = plot_grid(NULL, plot2, ncol = 1, rel_heights = c(1, 3))
perfect = plot_grid(first_col, second_col, ncol = 2, rel_widths = c(3, 1),
axis = "lrbl", align = "hv")
print(perfect)
}
And here is the call for plotting:
bivariate_plot(df = df_exp, var1 = "var1", var2 = "var2")
It is important to point out that this alignment problem is always present even by changing the data.
And this is what happen with my real data:
This can be accomplished easily using the ggExtra package, rather than rolling your own solution.
library(ggExtra)
library(ggplot2)
g1 = c(rnorm(200, mean=350, sd=100), rnorm(200, mean=700, sd=100))
g2 = c(rnorm(200, mean=350, sd=100), rnorm(200, mean=500, sd=100))
df_exp = data.frame(var1=log2(g1 + 1) , var2=log2(g2 + 1))
g <- ggplot(df_exp, aes(x=var1, y=var2)) + geom_point()
ggMarginal(g)
Output:
There's so many bugs in your code that I don't quite know where to start. The code below fixes them, to the extent that I understand what the intended result is.
g1 = c(rnorm(200, mean=350, sd=100), rnorm(200, mean=700, sd=100))
g2 = c(rnorm(200, mean=350, sd=100), rnorm(200, mean=500, sd=100))
df_exp = data.frame(var1=log2(g1 + 1) , var2=log2(g2 + 1))
bivariate_plot <- function(df, var1, var2, density = T, box = F) {
require(ggplot2)
require(cowplot)
scatter = ggplot(df, aes_string(var1, var2)) +
geom_point(alpha=.8, color = "red")
plot1 = ggplot(df, aes_string(var1)) + geom_density(alpha=.5, fill = "red")
plot1 = plot1 + ylab("G1 density")
plot2 = ggplot(df, aes_string(var2)) + geom_density(alpha=.5, fill = "red")
plot2 = plot2 + ylab("G2 density")
# Avoid displaying duplicated legend
plot1 = plot1 + theme(legend.position="none")
plot2 = plot2 + theme(legend.position="none")
# Homogenize scale of shared axes
min_exp = min(df[[var1]], df[[var2]]) - 0.01
max_exp = max(df[[var1]], df[[var2]]) + 0.01
scatter = scatter + ylim(min_exp, max_exp)
scatter = scatter + xlim(min_exp, max_exp)
plot1 = plot1 + xlim(min_exp, max_exp)
plot2 = plot2 + xlim(min_exp, max_exp)
plot1 = plot1 + ylim(0, 2)
plot2 = plot2 + ylim(0, 2)
# Flip axis of gg_dist_g2
plot2 = plot2 + coord_flip()
# Remove some duplicate axes
plot1 = plot1 + theme(axis.title.x=element_blank(),
axis.text=element_blank(),
axis.line=element_blank(),
axis.ticks=element_blank())
plot2 = plot2 + theme(axis.title.y=element_blank(),
axis.text=element_blank(),
axis.line=element_blank(),
axis.ticks=element_blank())
# Modify margin c(top, right, bottom, left) to reduce the distance between plots
#and align G1 density with the scatterplot
plot1 = plot1 + theme(plot.margin = unit(c(0.5, 0, 0, 0.7), "cm"))
scatter = scatter + theme(plot.margin = unit(c(0, 0, 0.5, 0.5), "cm"))
plot2 = plot2 + theme(plot.margin = unit(c(0, 0.5, 0.5, 0), "cm"))
# Combine all plots together and crush graph density with rel_heights
perfect = plot_grid(plot1, NULL, scatter, plot2,
ncol = 2, rel_widths = c(3, 1), rel_heights = c(1, 3))
print(perfect)
}
bivariate_plot(df = df_exp, var1 = "var1", var2 = "var2")
I have created boxplots using ggplot2 with this code.
plotgraph <- function(x, y, colour, min, max)
{
plot1 <- ggplot(dims, aes(x = x, y = y, fill = Region)) +
geom_boxplot()
#plot1 <- plot1 + scale_x_discrete(name = "Blog Type")
plot1 <- plot1 + labs(color='Region') + geom_hline(yintercept = 0, alpha = 0.4)
plot1 <- plot1 + scale_y_continuous(breaks=c(seq(min,max,5)), limits = c(min, max))
plot1 <- plot1 + labs(x="Blog Type", y="Dimension Score") + scale_fill_grey(start = 0.3, end = 0.7) + theme_grey()
plot1 <- plot1 + theme(legend.justification = c(1, 1), legend.position = c(1, 1))
return(plot1)
}
plot1 <- plotgraph (Blog, Dim1, Region, -30, 25)
A part of data I use is reproduced here.
Blog,Region,Dim1,Dim2,Dim3,Dim4
BlogsInd.,PK,-4.75,13.47,8.47,-1.29
BlogsInd.,PK,-5.69,6.08,1.51,-1.65
BlogsInd.,PK,-0.27,6.09,0.03,1.65
BlogsInd.,PK,-2.76,7.35,5.62,3.13
BlogsInd.,PK,-8.24,12.75,3.71,3.78
BlogsInd.,PK,-12.51,9.95,2.01,0.21
BlogsInd.,PK,-1.28,7.46,7.56,2.16
BlogsInd.,PK,0.95,13.63,3.01,3.35
BlogsNews,PK,-5.96,12.3,6.5,1.49
BlogsNews,PK,-8.81,7.47,4.76,1.98
BlogsNews,PK,-8.46,8.24,-1.07,5.09
BlogsNews,PK,-6.15,0.9,-3.09,4.94
BlogsNews,PK,-13.98,10.6,4.75,1.26
BlogsNews,PK,-16.43,14.49,4.08,9.91
BlogsNews,PK,-4.09,9.88,-2.79,5.58
BlogsNews,PK,-11.06,16.21,4.27,8.66
BlogsNews,PK,-9.04,6.63,-0.18,5.95
BlogsNews,PK,-8.56,7.7,0.71,4.69
BlogsNews,PK,-8.13,7.26,-1.13,0.26
BlogsNews,PK,-14.46,-1.34,-1.17,14.57
BlogsNews,PK,-4.21,2.18,3.79,1.26
BlogsNews,PK,-4.96,-2.99,3.39,2.47
BlogsNews,PK,-5.48,0.65,5.31,6.08
BlogsNews,PK,-4.53,-2.95,-7.79,-0.81
BlogsNews,PK,6.31,-9.89,-5.78,-5.13
BlogsTech,PK,-11.16,8.72,-5.53,8.86
BlogsTech,PK,-1.27,5.56,-3.92,-2.72
BlogsTech,PK,-11.49,0.26,-1.48,7.09
BlogsTech,PK,-0.9,-1.2,-2.03,-7.02
BlogsTech,PK,-12.27,-0.07,5.04,8.8
BlogsTech,PK,6.85,1.27,-11.95,-10.79
BlogsTech,PK,-5.21,-0.89,-6,-2.4
BlogsTech,PK,-1.06,-4.8,-8.62,-2.42
BlogsTech,PK,-2.6,-4.58,-2.07,-3.25
BlogsTech,PK,-0.95,2,-2.2,-3.46
BlogsTech,PK,-0.82,7.94,-4.95,-5.63
BlogsTech,PK,-7.65,-5.59,-3.28,-0.54
BlogsTech,PK,0.64,-1.65,-2.36,-2.68
BlogsTech,PK,-2.25,-3,-3.92,-4.87
BlogsTech,PK,-1.58,-1.42,-0.38,-5.15
Columns,PK,-5.73,3.26,0.81,-0.55
Columns,PK,0.37,-0.37,-0.28,-1.56
Columns,PK,-5.46,-4.28,2.61,1.29
Columns,PK,-3.48,2.38,12.87,3.73
Columns,PK,0.88,-2.24,-1.74,3.65
Columns,PK,-2.11,4.51,8.95,2.47
Columns,PK,-10.13,10.73,9.47,-0.47
Columns,PK,-2.08,1.04,0.11,0.6
Columns,PK,-4.33,5.65,2,-0.77
Columns,PK,1.09,-0.24,-0.92,-0.17
Columns,PK,-4.23,-4.01,-2.32,6.26
Columns,PK,-1.46,-1.53,9.83,5.73
Columns,PK,9.37,-1.32,1.27,-4.12
Columns,PK,5.84,-2.42,-5.21,1.07
Columns,PK,8.21,-9.36,-5.87,-3.21
Columns,PK,7.34,-7.3,-2.94,-5.86
Columns,PK,1.83,-2.77,1.47,-4.02
BlogsInd.,PK,14.39,-0.55,-5.42,-4.7
BlogsInd.,US,22.02,-1.39,2.5,-3.12
BlogsInd.,US,4.83,-3.58,5.34,9.22
BlogsInd.,US,-3.24,2.83,-5.3,-2.07
BlogsInd.,US,-5.69,15.17,-14.27,-1.62
BlogsInd.,US,-22.92,4.1,5.79,-3.88
BlogsNews,US,0.41,-2.03,-6.5,2.81
BlogsNews,US,-4.42,8.49,-8.04,2.04
BlogsNews,US,-10.72,-4.3,3.75,11.74
BlogsNews,US,-11.29,2.01,0.67,8.9
BlogsNews,US,-2.89,0.08,-1.59,7.06
BlogsNews,US,-7.59,8.51,3.02,12.33
BlogsNews,US,-7.45,23.51,2.79,0.48
BlogsNews,US,-12.49,15.79,-9.86,18.29
BlogsTech,US,-11.59,6.38,11.79,-7.28
BlogsTech,US,-4.6,4.12,7.46,3.36
BlogsTech,US,-22.83,2.54,10.7,5.09
BlogsTech,US,-4.83,3.37,-8.12,-0.9
BlogsTech,US,-14.76,29.21,6.23,9.33
Columns,US,-15.93,12.85,19.47,-0.88
Columns,US,-2.78,-1.52,8.16,0.24
Columns,US,-16.39,13.08,11.07,7.56
Even though I have tried to add detailed scale on y-axis, it is hard for me to pinpoint exact median score for each boxplot. So I need to print median value within each boxplot. There was another answer available (for faceted boxplot) which does not work for me as the printed values are not within the boxes but jammed together in the middle. It will be great to be able to print them within (middle and above the median line of) boxplots.
Thanks for your help.
Edit: I make a grouped graph as below.
Add
library(dplyr)
dims=dims%>%
group_by(Blog,Region)%>%
mutate(med=median(Dim1))
plotgraph <- function(x, y, colour, min, max)
{
plot1 <- ggplot(dims, aes(x = x, y = y, fill = Region)) +
geom_boxplot()+
labs(color='Region') +
geom_hline(yintercept = 0, alpha = 0.4)+
scale_y_continuous(breaks=c(seq(min,max,5)), limits = c(min, max))+
labs(x="Blog Type", y="Dimension Score") + scale_fill_grey(start = 0.3, end = 0.7) +
theme_grey()+
theme(legend.justification = c(1, 1), legend.position = c(1, 1))+
geom_text(aes(y = med,x=x, label = round(med,2)),position=position_dodge(width = 0.8),size = 3, vjust = -0.5,colour="blue")
return(plot1)
}
plot1 <- plotgraph (Blog, Dim1, Region, -30, 25)
Which gives (the text colour can be tweaked to something less tacky):
Note: You should consider using non-standard evaluation in your function rather than having it require the use of attach()
Edit:
One liner, not as clean I wanted it to be since I ran into problems with dplyr not properly aggregating the data even though it says the grouping was performed.
This function assume the dataframe is always called dims
library(ggplot2)
library(reshape2)
plotgraph <- function(x, y, colour, min, max)
{
plot1 <- ggplot(dims, aes_string(x = x, y = y, fill = colour)) +
geom_boxplot()+
labs(color=colour) +
geom_hline(yintercept = 0, alpha = 0.4)+
scale_y_continuous(breaks=c(seq(min,max,5)), limits = c(min, max))+
labs(x="Blog Type", y="Dimension Score") +
scale_fill_grey(start = 0.3, end = 0.7) +
theme_grey()+
theme(legend.justification = c(1, 1), legend.position = c(1, 1))+
geom_text(data= melt(with(dims, tapply(eval(parse(text=y)),list(eval(parse(text=x)),eval(parse(text=colour))), median)),varnames=c("Blog","Region"),value.name="med"),
aes_string(y = "med",x=x, label = "med"),position=position_dodge(width = 0.8),size = 3, vjust = -0.5,colour="blue")
return(plot1)
}
plot1 <- plotgraph ("Blog", "Dim1", "Region", -30, 25)
Assuming that Blog is your dataframe, the following should work:
min <- -30
max <- 25
meds <- aggregate(Dim1~Region, Blog, median)
plot1 <- ggplot(Blog, aes(x = Region, y = Dim1, fill = Region)) +
geom_boxplot()
plot1 <- plot1 + labs(color='Region') + geom_hline(yintercept = 0, alpha = 0.4)
plot1 <- plot1 + scale_y_continuous(breaks=c(seq(min,max,5)), limits = c(min, max))
plot1 <- plot1 + labs(x="Blog Type", y="Dimension Score") + scale_fill_grey(start = 0.3, end = 0.7) + theme_grey()
plot1 + theme(legend.justification = c(1, 1), legend.position = c(1, 1)) +
geom_text(data = meds, aes(y = Dim1, label = round(Dim1,2)),size = 5, vjust = -0.5, color='white')