I have a dataframe of 10 variables and I want boxplot of each of the variables arranged in two columns. Range of each variable is different, so I want different x axis of each one them below the box plot. Currently I tried this but unable to place axis at right position. Any help will be appreciated!!
library(ggplot2)
library(tidyr)
df <- data.frame(matrix(rnorm(2000), ncol = 10))
plot.data <- gather(df, variable, value)
# plot.data$out <- as.numeric(rep(input_data, each = nrow(x_train)))
p <- ggplot(plot.data, aes(x = 0, y=value))
p <- p + geom_boxplot()
#p <- p + geom_point(aes(x = 0, y = test_data), color = "red")
p <- p + facet_wrap(~variable, scales = "free_x", strip.position = 'top', ncol = 2)
p <- p + coord_flip()
p <- p + xlab("") + ylab("")
p <- p + theme(legend.position="none") + theme_bw()
p <- p + theme(axis.text.x=element_blank(),
axis.text.y=element_text(angle=90),
axis.ticks.x=element_blank())
p
Related
I am pretty new to coding and have been teaching myself R. I tried writing some code to carry out multiple comparison statistics for facet_wrap plots with free scales but I am unable to figure out how to implement the for loop in the facet. Alternatively, I also tried to do the statistics using the rstatix package but I am unable to print the p-values from t-tests computed either using stat_compare_means function of ggpubr or using stat.test from rstatix as I have been struggling to figure out the y_position for facets with free scales. Here is the code I have written:
library(tidyverse)
library(dplyr)
library(ggplot2)
library(readxl)
library(scales)
library(stringr)
library(extrafont)
library(viridisLite)
#library(RColorBrewer)
library(ggpubr)
library(reshape2)
library(rstatix)
#library(plotly)
#library(ggpmisc)
LungExtracellularPanel <- read_excel("~/Box/Welm Labs Ubox/Pavitra Viswanath/Flow Cytometry/PVIMM-2 E0771 09-01-2020/Data Analysis/Lung 09-16-2020.xlsx")
df=as.data.frame(LungExtracellularPanel)
df$collection <- str_split_fixed(df$Sample, " ",2)[,1]
rdf <- melt(df,id.vars=c("Sample","collection"))
swr = function(string, nwrap=25) {
paste(strwrap(string, width=nwrap), collapse="\n")
}
swr = Vectorize(swr)
rdf$variable2=swr(rdf$variable)
stat.test <- rdf %>%
add_y_position(y="value", fun="max", scales="free", step.increase = 0.1)
group_by(variable2) %>%
t_test(value~collection)
stat.test
#stuff <- combn(unique(rdf$collection),2)
# a <- NULL
# my_comparisons = list()
# j = 0
# for(i in 1:dim(stuff)[2]){
# a <- as.character(stuff[,i])
# g1 = rdf[which(rdf$collection == a[1]),]$value
# g2 = rdf[which(rdf$collection == a[2]),]$value
# if(mean(g1, na.rm=T) != 0 || mean(g2,na.rm=T) != 0){
# j = j + 1
# my_comparisons[[j]] <- c(a[1],a[2])
# }
# }
p <- ggplot(data = rdf, mapping = aes(x = variable2, y = value, fill =collection))
p <- p + geom_violin(alpha=0.3, scale="width", position = position_dodge(width=1), size=0.5, facet.by="variable2", scales="free")
p <- p + geom_boxplot(alpha=0.7, outlier.shape=NA, position = position_dodge(1), scales="free")
p <- p + geom_point(position = position_jitterdodge(jitter.width=0.1,jitter.height=0, dodge.width = 1))
#p <- p + geom_jitter(position=position_jitter(width=0.1,height=0))
p <- p + theme_bw()
p <- p + theme(axis.text.x = element_blank())
p <- p + scale_fill_viridis_d(option = "D")
p <- p + facet_wrap(~variable2, scales="free", ncol=6)
p <- p + scale_y_continuous(expand = expansion(mult = c(0.1, 0.14)))
p <- p + stat_compare_means(method = "anova", size=3, hjust=0.7, label.y.npc = 'top', family="Arial", fontface="bold")
#p <- p + stat_pvalue_manual(stat.test, label="p")
#p <- p + stat_compare_means(comparisons = my_comparisons, size=3, family= "Arial", face= "bold")
p <- p + labs(title="Spleen Extracellular Panel Analysis")
p <- p + ylab("% Live Cells") + xlab(" ")
p <- p + theme(text=element_text(family="Arial", face = "bold"))
p
This is how the plot looks:
I want to have a line that crosses the chart at 45 degrees no matter what the x and y scale is. In this example the intercept for the abline shoud be around x=-3 and y=-0.5.
Below a few line of code:
x <- seq(1,10,1)
y <- sample(1:100, 10, replace=T)
df <- data.frame(x,y)
ggplot(df, aes(x=x, y=y)) +
geom_point() +
geom_abline(slope = 45) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 10), limits = c(-10,10)) +
scale_x_continuous(breaks = scales::pretty_breaks(n = 10), limits = c(-2,10))
You would just add
ggplot2::annotation_custom(grid::linesGrob())
to your plot.
So you can do this:
x <- rnorm(100)
y <- rnorm(100)
df <- data.frame(x,y)
ggplot(df, aes(x=x, y=y)) +
geom_point() +
ggplot2::annotation_custom(grid::linesGrob())
or this
ggplot(df, aes(x=x)) +
geom_histogram() +
ggplot2::annotation_custom(grid::linesGrob())
If you want to change the look of the line you need to change the grob:
ggplot(df, aes(x=x, y=y)) +
geom_point() +
ggplot2::annotation_custom(grid::linesGrob(gp = grid::gpar(col = "red", lty = 2)))
I have a dataframe of 10 variables and I plotted it in two columns. But ggplot defines whiskers as 5th and 95th perecentile. I want whisker lengths as Q1 - 1.5*IQR / Q3 + 1.5*IQR for each of these plots and outliers as usual. A similar question has been posted in this link, but I couldn't make use of it. Any help will be appreciated!!
library(ggplot2)
library(tidyr)
df <- data.frame(matrix(rnorm(2000), ncol = 10))
plot.data <- gather(df, variable, value)
# plot.data$out <- as.numeric(rep(input_data, each = nrow(x_train)))
p <- ggplot(plot.data, aes(x = 0, y=value))
p <- p + geom_boxplot()
#p <- p + geom_point(aes(x = 0, y = test_data), color = "red")
p <- p + facet_wrap(~variable, scales = "free_x", strip.position = 'top', ncol = 2)
p <- p + coord_flip()
p <- p + xlab("") + ylab("")
p <- p + theme(legend.position="none") + theme_bw()
p <- p + theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank())
p
By default (notched=FALSE), the geom_boxplot() should give you the whisker you want (Q1 - 1.5*IQR / Q3 + 1.5*IQR). See a more current question link. Although, this is subjected to the quantile, IQR definition.
If you insist on setting them manually with stat_summary
# geom_boxplot parameters with stat summary
f <- function(x) {
r <- quantile(x, probs = c(0.25, 0.25, 0.5, 0.75, 0.75))
r[[1]]<-r[[1]]-1.5*IQR(x) #ymin lower whisker, as per geom_boxplot
r[[5]]<-r[[5]]+1.5*IQR(x) #ymax upper whisker
names(r) <- c("ymin", "lower", "middle", "upper", "ymax")
r
}
# To subset the outlying points for plotting,
o <- function(x) {
r <- quantile(x, probs = c(0.25, 0.75))
r[[1]]<-r[[1]]-1.5*IQR(x)
r[[2]]<-r[[2]]+1.5*IQR(x)
subset(x, x < r[[1]] | r[[2]] < x)
}
# added seed for consistency
set.seed(123)
df <- data.frame(matrix(rnorm(2000), ncol = 10))
plot.data <- gather(df, variable, value)
# plot.data$out <- as.numeric(rep(input_data, each = nrow(x_train)))
p <- ggplot(plot.data, aes(x = 0, y=value))
p <- p + stat_summary(fun.data = f, geom="boxplot")+
stat_summary(fun.y = o, geom="point")
#p <- p + geom_point(aes(x = 0, y = test_data), color = "red")
p <- p + facet_wrap(~variable, scales = "free_x", strip.position = 'top', ncol = 2)
p <- p + coord_flip()
p <- p + xlab("") + ylab("")
p <- p + theme(legend.position="none") + theme_bw()
p <- p + theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank())
I am using the ggplot function to plot this kind of graph
image
I want to add the specific value of the x-axis as shown in the picture
this is my code :
quantiles <- quantile(mat,prob = quant)
x <- as.vector(mat)
d <- as.data.frame(x=x)
p <- ggplot(data = d,aes(x=x)) + theme_bw() +
geom_histogram(aes(y = ..density..), binwidth=0.001,color="black",fill="white") +
geom_density(aes(x=x, y = ..density..),fill="blue", alpha=0.5, color = 'black')
x.dens <- density(x)
df.dens <- data.frame(x = x.dens$x, y = x.dens$y)
p <- p + geom_area(data = subset(df.dens, x <= quantiles), aes(x=x,y=y),
fill = 'green', alpha=0.6)
print(p)
This question already has answers here:
Left align two graph edges (ggplot)
(9 answers)
Closed 9 years ago.
I am a newbie using ggplot2 and I'm trying to plot a scatter plot above a heatmap. Both plots have the same discrete x-axis.
This is the code I'm trying:
library(ggplot2)
library(grid)
library(reshape2)
#data for the scatterplot
df = data.frame(id1 = letters[1:10], C = abs(rnorm(10)))
#scatter plot
p1 <- ggplot(df, aes(x= id1, y = C)) +
geom_point(pch = 19) + theme_bw() +
scale_x_discrete(expand = c(0, 0), breaks = letters[1:10]) +
theme(legend.position = "none") + theme(axis.title.y = element_blank()) + theme(axis.title.x = element_blank())
#data for the heatmap
X = data.frame(matrix(rnorm(100), nrow = 10))
names(X) = month.name[1:10]
X = melt(cbind(id1 = letters[1:10], X))
#heatmap
p2 <- ggplot(X,
aes(x = id1, y = variable, fill = value))
p2 <- p2 + geom_tile()
p2 <- p2 + scale_fill_gradientn(colours = c("blue", "white" , "red"))
p2 <- p2 + theme(legend.position = "none") + theme(axis.title.y = element_blank()) + theme(axis.title.x = element_blank())
p2 <- p2 + scale_x_discrete(expand = c(0, 0), breaks = letters[1:10])
p2 <- p2 + scale_y_discrete(expand = c(0, 0))
layt <- grid.layout(nrow=2,ncol=1,heights=c(2/8,6/8),default.units=c('null','null'))
vplayout <- function(x,y) {viewport(layout.pos.row = x, layout.pos.col = y)}
grid.newpage()
pushViewport(viewport(layout=layt))
print(p1,vp=vplayout(1,1))
print(p2,vp = vplayout(2,1))
The problem is that the axis are not situated one above the other.
https://mail.google.com/mail/u/0/?ui=2&ik=81975edabc&view=att&th=13ece12a06a3cea2&attid=0.1&disp=emb&realattid=ii_13ece128398baede&zw&atsh=1
Is there any solution? It is possible to reshape the data and make something like facets?
Another option:
grid.draw(gtable:::rbind.gtable(ggplotGrob(p1),
ggplotGrob(p2), size='last'))
(ideally one would want size=max, but it has a bug preventing it to work).
There are a couple of tricks here. The first is that the tick marks get treated differently, even though you have the same discrete axis. When you do expand = c(0,0), on the scatterplot the tick is now aligned with the y axis, while on the heatmap it is in the centre of the category. My method of getting around that is to manually assign the expand value for the scatterplot so that there is a gap of of 1/2 a categorical value. Because there are 10 categorical values, in this case it is 0.05 ((1/10)/2). The points will now align with the centre of each category.
The other side of the problem is because the y labels are different sizes they throw out the rest of the alignment. The solution comes from this question, using ggplot_gtable and grid.arrange from the gridExtra package.
library(gridExtra)
#data for the scatterplot
df = data.frame(id1 = letters[1:10], C = abs(rnorm(10)))
#scatter plot
p1 <- ggplot(df, aes(x= id1, y = C)) +
geom_point(pch = 19) + theme_bw() +
# Change the expand values
scale_x_discrete(expand = c(0.05, 0.05), breaks = letters[1:10]) +
#scale_y_discrete(breaks = NULL) +
theme(legend.position = "none") + theme(axis.title.y = element_blank()) + theme(axis.title.x = element_blank())
p1
#data for the heatmap
X = data.frame(matrix(rnorm(100), nrow = 10))
names(X) = month.name[1:10]
X = melt(cbind(id1 = letters[1:10], X))
#heatmap
p2 <- ggplot(X,
aes(x = id1, y = variable, fill = value))
p2 <- p2 + geom_tile()
p2 <- p2 + scale_fill_gradientn(colours = c("blue", "white" , "red"))
p2 <- p2 + theme(legend.position = "none") + theme(axis.title.y = element_blank()) + theme(axis.title.x = element_blank())
p2 <- p2 + scale_x_discrete(expand = c(0, 0), breaks = letters[1:10])
p2 <- p2 + scale_y_discrete(expand = c(0, 0))
#Here's the gtable magic
gp1<- ggplot_gtable(ggplot_build(p1))
gp2<- ggplot_gtable(ggplot_build(p2))
#This identifies the maximum width
maxWidth = unit.pmax(gp1$widths[2:3], gp2$widths[2:3])
#Set each to the maximum width
gp1$widths[2:3] <- maxWidth
gp2$widths[2:3] <- maxWidth
#Put them together
grid.arrange(gp1, gp2)
EDIT - See #baptiste's answer for a more elegant method of alignment of the y axis