Changing whisker length of multiple boxplot in R - r

I have a dataframe of 10 variables and I plotted it in two columns. But ggplot defines whiskers as 5th and 95th perecentile. I want whisker lengths as Q1 - 1.5*IQR / Q3 + 1.5*IQR for each of these plots and outliers as usual. A similar question has been posted in this link, but I couldn't make use of it. Any help will be appreciated!!
library(ggplot2)
library(tidyr)
df <- data.frame(matrix(rnorm(2000), ncol = 10))
plot.data <- gather(df, variable, value)
# plot.data$out <- as.numeric(rep(input_data, each = nrow(x_train)))
p <- ggplot(plot.data, aes(x = 0, y=value))
p <- p + geom_boxplot()
#p <- p + geom_point(aes(x = 0, y = test_data), color = "red")
p <- p + facet_wrap(~variable, scales = "free_x", strip.position = 'top', ncol = 2)
p <- p + coord_flip()
p <- p + xlab("") + ylab("")
p <- p + theme(legend.position="none") + theme_bw()
p <- p + theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank())
p

By default (notched=FALSE), the geom_boxplot() should give you the whisker you want (Q1 - 1.5*IQR / Q3 + 1.5*IQR). See a more current question link. Although, this is subjected to the quantile, IQR definition.
If you insist on setting them manually with stat_summary
# geom_boxplot parameters with stat summary
f <- function(x) {
r <- quantile(x, probs = c(0.25, 0.25, 0.5, 0.75, 0.75))
r[[1]]<-r[[1]]-1.5*IQR(x) #ymin lower whisker, as per geom_boxplot
r[[5]]<-r[[5]]+1.5*IQR(x) #ymax upper whisker
names(r) <- c("ymin", "lower", "middle", "upper", "ymax")
r
}
# To subset the outlying points for plotting,
o <- function(x) {
r <- quantile(x, probs = c(0.25, 0.75))
r[[1]]<-r[[1]]-1.5*IQR(x)
r[[2]]<-r[[2]]+1.5*IQR(x)
subset(x, x < r[[1]] | r[[2]] < x)
}
# added seed for consistency
set.seed(123)
df <- data.frame(matrix(rnorm(2000), ncol = 10))
plot.data <- gather(df, variable, value)
# plot.data$out <- as.numeric(rep(input_data, each = nrow(x_train)))
p <- ggplot(plot.data, aes(x = 0, y=value))
p <- p + stat_summary(fun.data = f, geom="boxplot")+
stat_summary(fun.y = o, geom="point")
#p <- p + geom_point(aes(x = 0, y = test_data), color = "red")
p <- p + facet_wrap(~variable, scales = "free_x", strip.position = 'top', ncol = 2)
p <- p + coord_flip()
p <- p + xlab("") + ylab("")
p <- p + theme(legend.position="none") + theme_bw()
p <- p + theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank())

Related

ggplot within function does not fully work

I am trying to create Bland-Altman plots between 2 sets of percentages with a custom function that uses ggplot within it to generate the plot.
Perc1 <- sample(1:100, 100)
Perc2 <- sample(1:100, 100)
d <- data.frame(Perc1, Perc2)
bland <- function(dat, x, y){
df <- subset(dat[ ,c(x, y)])
df$avg <- rowMeans(df)
df$diff <- df[[1]] - df[[2]]
mean_diff <- mean(df$diff)
lower <- mean_diff - 1.96 * sd(df$diff)
upper <- mean_diff + 1.96 * sd(df$diff)
p <- ggplot(df, aes(x = avg, y = diff)) +
geom_point(size=2) +
geom_hline(yintercept = mean_diff) +
geom_hline(yintercept = lower, color = "red", linetype="dashed") +
geom_hline(yintercept = upper, color = "red", linetype="dashed") +
ggtitle("Bland-Altman Plot") +
ylab("Difference Between Measurements") +
xlab("Average Measurement")
plot(p)
}
bland(d, Perc1, Perc2)
However, when I run the function none of the lines are produced with the graph, but the title and x/y labels are. If anyone can explain why this is that would be great, thanks in advance.
Try this:
(Note also, the p <- and plot(p) are not needed as the function anyway returns the last object.)
library(tidyverse)
Perc1 <- sample(1:100, 100)
Perc2 <- sample(1:100, 100)
bland <- function(x, y){
df <- data.frame(x, y)
df$avg <- rowMeans(df)
df$diff <- df[[1]] - df[[2]]
mean_diff <- mean(df$diff)
lower <- mean_diff - 1.96 * sd(df$diff)
upper <- mean_diff + 1.96 * sd(df$diff)
p <- ggplot(df, aes(x = avg, y = diff)) +
geom_point(size=2) +
geom_hline(yintercept = mean_diff) +
geom_hline(yintercept = lower, color = "red", linetype="dashed") +
geom_hline(yintercept = upper, color = "red", linetype="dashed") +
ggtitle("Bland-Altman Plot") +
ylab("Difference Between Measurements") +
xlab("Average Measurement")
plot(p)
}
bland(Perc1, Perc2)
Created on 2022-05-17 by the reprex package (v2.0.1)

How do I apply for loop for multiple comparisons statistics using ggpubr or rstatix in facet_wrap with free scales

I am pretty new to coding and have been teaching myself R. I tried writing some code to carry out multiple comparison statistics for facet_wrap plots with free scales but I am unable to figure out how to implement the for loop in the facet. Alternatively, I also tried to do the statistics using the rstatix package but I am unable to print the p-values from t-tests computed either using stat_compare_means function of ggpubr or using stat.test from rstatix as I have been struggling to figure out the y_position for facets with free scales. Here is the code I have written:
library(tidyverse)
library(dplyr)
library(ggplot2)
library(readxl)
library(scales)
library(stringr)
library(extrafont)
library(viridisLite)
#library(RColorBrewer)
library(ggpubr)
library(reshape2)
library(rstatix)
#library(plotly)
#library(ggpmisc)
LungExtracellularPanel <- read_excel("~/Box/Welm Labs Ubox/Pavitra Viswanath/Flow Cytometry/PVIMM-2 E0771 09-01-2020/Data Analysis/Lung 09-16-2020.xlsx")
df=as.data.frame(LungExtracellularPanel)
df$collection <- str_split_fixed(df$Sample, " ",2)[,1]
rdf <- melt(df,id.vars=c("Sample","collection"))
swr = function(string, nwrap=25) {
paste(strwrap(string, width=nwrap), collapse="\n")
}
swr = Vectorize(swr)
rdf$variable2=swr(rdf$variable)
stat.test <- rdf %>%
add_y_position(y="value", fun="max", scales="free", step.increase = 0.1)
group_by(variable2) %>%
t_test(value~collection)
stat.test
#stuff <- combn(unique(rdf$collection),2)
# a <- NULL
# my_comparisons = list()
# j = 0
# for(i in 1:dim(stuff)[2]){
# a <- as.character(stuff[,i])
# g1 = rdf[which(rdf$collection == a[1]),]$value
# g2 = rdf[which(rdf$collection == a[2]),]$value
# if(mean(g1, na.rm=T) != 0 || mean(g2,na.rm=T) != 0){
# j = j + 1
# my_comparisons[[j]] <- c(a[1],a[2])
# }
# }
p <- ggplot(data = rdf, mapping = aes(x = variable2, y = value, fill =collection))
p <- p + geom_violin(alpha=0.3, scale="width", position = position_dodge(width=1), size=0.5, facet.by="variable2", scales="free")
p <- p + geom_boxplot(alpha=0.7, outlier.shape=NA, position = position_dodge(1), scales="free")
p <- p + geom_point(position = position_jitterdodge(jitter.width=0.1,jitter.height=0, dodge.width = 1))
#p <- p + geom_jitter(position=position_jitter(width=0.1,height=0))
p <- p + theme_bw()
p <- p + theme(axis.text.x = element_blank())
p <- p + scale_fill_viridis_d(option = "D")
p <- p + facet_wrap(~variable2, scales="free", ncol=6)
p <- p + scale_y_continuous(expand = expansion(mult = c(0.1, 0.14)))
p <- p + stat_compare_means(method = "anova", size=3, hjust=0.7, label.y.npc = 'top', family="Arial", fontface="bold")
#p <- p + stat_pvalue_manual(stat.test, label="p")
#p <- p + stat_compare_means(comparisons = my_comparisons, size=3, family= "Arial", face= "bold")
p <- p + labs(title="Spleen Extracellular Panel Analysis")
p <- p + ylab("% Live Cells") + xlab(" ")
p <- p + theme(text=element_text(family="Arial", face = "bold"))
p
This is how the plot looks:

Arranging boxplot in ggplot

I have a dataframe of 10 variables and I want boxplot of each of the variables arranged in two columns. Range of each variable is different, so I want different x axis of each one them below the box plot. Currently I tried this but unable to place axis at right position. Any help will be appreciated!!
library(ggplot2)
library(tidyr)
df <- data.frame(matrix(rnorm(2000), ncol = 10))
plot.data <- gather(df, variable, value)
# plot.data$out <- as.numeric(rep(input_data, each = nrow(x_train)))
p <- ggplot(plot.data, aes(x = 0, y=value))
p <- p + geom_boxplot()
#p <- p + geom_point(aes(x = 0, y = test_data), color = "red")
p <- p + facet_wrap(~variable, scales = "free_x", strip.position = 'top', ncol = 2)
p <- p + coord_flip()
p <- p + xlab("") + ylab("")
p <- p + theme(legend.position="none") + theme_bw()
p <- p + theme(axis.text.x=element_blank(),
axis.text.y=element_text(angle=90),
axis.ticks.x=element_blank())
p

Add a specific value of x-axis on ggplot

I am using the ggplot function to plot this kind of graph
image
I want to add the specific value of the x-axis as shown in the picture
this is my code :
quantiles <- quantile(mat,prob = quant)
x <- as.vector(mat)
d <- as.data.frame(x=x)
p <- ggplot(data = d,aes(x=x)) + theme_bw() +
geom_histogram(aes(y = ..density..), binwidth=0.001,color="black",fill="white") +
geom_density(aes(x=x, y = ..density..),fill="blue", alpha=0.5, color = 'black')
x.dens <- density(x)
df.dens <- data.frame(x = x.dens$x, y = x.dens$y)
p <- p + geom_area(data = subset(df.dens, x <= quantiles), aes(x=x,y=y),
fill = 'green', alpha=0.6)
print(p)

adding percentile lines to a density plot [duplicate]

This question already has answers here:
Shading a kernel density plot between two points.
(5 answers)
Closed 10 years ago.
I have some data dt = data.table(x=c(1:200),y=rnorm(200)) and I start with a density plot using ggplot2:
plot = ggplot(dt,aes(y)) + geom_density(aes(y=..density..))
Is there a way I can add percentile lines similar to this?
If further I could shade the segments of the graph (created by the percentile lines) similar to this, then that would be great!
Here is a possibility heavily inspired by this answer :
dt <- data.table(x=c(1:200),y=rnorm(200))
dens <- density(dt$y)
df <- data.frame(x=dens$x, y=dens$y)
probs <- c(0.1, 0.25, 0.5, 0.75, 0.9)
quantiles <- quantile(dt$y, prob=probs)
df$quant <- factor(findInterval(df$x,quantiles))
ggplot(df, aes(x,y)) + geom_line() + geom_ribbon(aes(ymin=0, ymax=y, fill=quant)) + scale_x_continuous(breaks=quantiles) + scale_fill_brewer(guide="none")
myd = data.frame(xvar=rnorm(2000),yvar=rnorm(2000))
xd <- data.frame(density(myd$xvar)[c("x", "y")])
p <- ggplot(xd, aes(x, y)) +
geom_area(data = subset(xd, x < -1), fill = "pink") +
geom_area(data = subset(xd, x < -1.96), fill = "red") +
geom_area(data = subset(xd, x > 1), fill = "lightgreen") +
geom_area(data = subset(xd, x > 1.96), fill = "green") +
geom_line()
p

Resources