Barplot Indicating the statistically significant difference - r

I need to draw a bar plot for significant SNP codes (categorical) against the corresponding phenotype, similar to these plots:
I tried many ways in R and got some results but I field to got my favorite result. Here are the codes and results:
### DATA
SNP_code <- as.factor(c("GG","GA","AA","GA","GA","GG","GG","GG","GG","GA","GA","AA","GA","GA","GA","GG","GG","GG","GG","AA","GG","GG","GG","GG","AA","GG","GG","GA","GG","AA","GA","GG","GG","GG","GG","GG","GG","AA","GG","GA","GG","GG","GA","GG","GG","GA","GG","GG","GA","GA","GG","GA","GG","GA","GA","GA","GA","GA","GA","GG","GG","GG","AA","GA","GA","GA","GA","GG","GA","GG","GG","GG","GA","GA","GA","GG","GG","GA","GG","AA","GG","GG","GG","AA"))
EBV <- c(0.06663,-0.03031,-0.122,-0.02021,-0.1157,-0.08131,-0.02034,-0.06324,0.06699,-0.062,0.02736,-0.1201,-0.04846,-0.06934,-0.06023,-0.009244,-0.05648,-0.01908,0.06728,-0.06517,0.08534,0.07618,-0.0814,0.06113,-0.0795,0.1055,0.08305,0.1209,-0.05314,-0.09431,0.05185,0.1347,0.1591,0.08777,0.08326,0.1612,0.09528,-0.1002,0.1561,-0.09327,0.09474,0.1356,0.06384,0.1585,0.03235,0.1081,0.1462,-0.04082,-0.05042,0.01793,-0.1157,-0.1165,-0.009399,-0.02311,-0.108,-0.1143,0.07219,0.01376,-0.05059,-0.052,0.08494,-0.0388,-0.06346,0.07789,0.02961,-0.1126,0.1102,0.133,-0.09317,-0.1181,0.1584,0.122,0.1019,-0.04074,-0.01178,0.09523,-0.03266,-0.01258,-0.0231,-0.08259,0.05823,-0.02894,-0.008242,0.07981)
LS <- c(2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,2,1,2,1,1,2,1,2,2,2,1,1,1,2,2,2,2,2,1,1,2,1,2,2,2,1,2,2,2,1,1,2,1,1,1,1,1,1,1,1,1,1,2,1,1,2,1,1,2,2,1,1,2,1,2,1,1,2,1,1,1,1,1,1,1,2)
IDs <- c(1033,1081,1106,1107,1120,1194,1199,1326,1334,1340,1345,1358,1398,1404,1405,1421,1457,1459,1464,1509,1529,1542,1550,2025,2030,2095,2099,2128,2141,2153,2167,2224,2232,2238,2244,2266,2271,2280,2283,2323,2326,2337,2369,2390,2391,2396,851012,851016,851021,851055,851063,851084,851105,851109,851146,851169,851176,851198,851205,851246,851266,851292,851332,851345,851488,851489,851509,851528,851531,851547,851562,851573,851574,851578,851584,851588,851592,851622,851651,851670,851672,851684,851690,861086)
sig_snp <- data.frame(IDs, SNP_code, EBV, LS)
### Variance analysis and Mean comparison
library(dplyr)
### for LS
group_by(sig_snp, SNP_code) %>%
summarise(
count = n(),
mean = mean(LS, na.rm = TRUE),
sd = sd(LS, na.rm = TRUE))
### for EBV
group_by(sig_snp, SNP_code) %>%
summarise(
count = n(),
mean = mean(EBV, na.rm = TRUE),
sd = sd(EBV, na.rm = TRUE))
# Compute the analysis of variance
Anova.fit <- aov(EBV ~ SNP_code, data = sig_snp)
summary(Anova.fit)
# Tukey multiple pairwise-comparisons
TukeyHSD(Anova.fit)
# or
library(multcomp)
summary(glht(Anova.fit, linfct = mcp(SNP_code = "Tukey")))
### Box plot for EBV (actually I need Barplot for LS and EBV)
library(ggplot2)
library(ggpval)
plot <- ggplot(sig_snp, aes(SNP_code, EBV)) +
geom_boxplot(fill=c("red","blue", "green"), color="black", width=.7); plot
add_pval(plot, pairs = list(c(1, 3)), test='wilcox.test')
add_pval(plot, pairs = list(c(2, 3)), test='wilcox.test')
add_pval(plot, pairs = list(c(1, 2)), test='wilcox.test')
"add_pval" only use "wilcox.test" and "t.test", but I perfer Tukey.
Any help is appreciated.

There is definitely room for improvement of the code that I posted below, but at least it gives you one example of the workflow you can use for getting your "favorite" barplot:
Part A: Barchart
1) We re-organise sig_snp in order to get a dataframe with the mean of each SNP in function of EBV or LS.
library(tidyverse)
DF1 <- sig_snp %>%
pivot_longer(., cols = c(EBV,LS), names_to = "Variable", values_to = "Values") %>%
group_by(SNP_code, Variable) %>%
summarise(Mean = mean(Values),
SEM = sd(Values) / sqrt(n()),
Nb = n()) %>%
rowwise() %>%
mutate(Labels = as.character(SNP_code)) %>%
mutate(Labels = paste(unlist(strsplit(Labels,"")),collapse = "/")) %>%
mutate(Labels = paste0(Labels,"\nn = ",Nb))
# A tibble: 6 x 6
SNP_code Variable Mean SEM Nb Labels
<fct> <chr> <dbl> <dbl> <int> <chr>
1 AA EBV -0.0719 0.0202 9 "A/A\nn = 9"
2 AA LS 1.11 0.111 9 "A/A\nn = 9"
3 GA EBV -0.0141 0.0134 31 "G/A\nn = 31"
4 GA LS 1.23 0.0763 31 "G/A\nn = 31"
5 GG EBV 0.0422 0.0126 44 "G/G\nn = 44"
6 GG LS 1.48 0.0762 44 "G/G\nn = 44"
The labels column will be re-used later for the labeling of x-axis.
2) Then, we are going to calculate the total mean (that will hep to draw the "Mean" bar) by doing:
library(tidyverse)
DF2 <- sig_snp %>%
pivot_longer(., cols = c(EBV,LS), names_to = "Variable", values_to = "Values") %>%
group_by(Variable) %>%
summarise(Mean = mean(Values),
SEM = sd(Values) / sqrt(n()),
Nb = n()) %>%
mutate(SNP_code = "All") %>%
select(SNP_code, Variable, Mean, SEM, Nb) %>%
rowwise() %>%
mutate(Labels = paste0("Mean\nn = ",Nb))
# A tibble: 2 x 6
SNP_code Variable Mean SEM Nb Labels
<chr> <chr> <dbl> <dbl> <int> <chr>
1 All EBV 0.00918 0.00944 84 "Mean\nn = 84"
2 All LS 1.35 0.0522 84 "Mean\nn = 84"
3) we are binding both DF1 and DF2 and we re-organize the levels of SNP_code in order to get the correct plotting order:
library(tidyverse)
DF <- bind_rows(DF1, DF2)
DF$Labels = factor(DF$Labels,levels= c("Mean\nn = 84",
"A/A\nn = 9",
"G/A\nn = 31",
"G/G\nn = 44" ))
4) Now, we can plot it:
library(ggplot2)
ggplot(DF, aes(x = SNP_code, y = Mean, fill = SNP_code))+
geom_bar(stat = "identity", show.legend = FALSE)+
geom_errorbar(aes(ymin = Mean-SEM, ymax = Mean+SEM), width = 0.2)+
facet_wrap(.~Variable, scales = "free")+
scale_x_discrete(name = "",labels = levels(DF$Labels))
Part B: Adding statistic on the barchart
For adding statistic, you can have the use of geom_signif function from ggsignif package that allow to add statistics from an external output.
1) First create the dataframe for the output of Tukey test on EBV:
Anova.fit <- aov(EBV ~ SNP_code, data = sig_snp)
t <- TukeyHSD(Anova.fit)
stat <- t$SNP_code
Stat_EBV <- stat %>% as.data.frame() %>%
mutate(Variable = "EBV") %>%
mutate(Group = rownames(stat)) %>%
rowwise() %>%
mutate(Group1 = unlist(strsplit(Group,"-"))[1]) %>%
mutate(Group2 = unlist(strsplit(Group,"-"))[2]) %>%
mutate(labels = round(`p adj`,4))
Stat_EBV$y_pos <- c(0.06,0.08,0.1)
2) same thing for the Tukey test of LS:
Anova.fit <- aov(LS ~ SNP_code, data = sig_snp)
t <- TukeyHSD(Anova.fit)
stat <- t$SNP_code
Stat_LS <- stat %>% as.data.frame() %>%
mutate(Variable = "LS") %>%
mutate(Group = rownames(stat)) %>%
rowwise() %>%
mutate(Group1 = unlist(strsplit(Group,"-"))[1]) %>%
mutate(Group2 = unlist(strsplit(Group,"-"))[2]) %>%
mutate(labels = round(`p adj`,4))
Stat_LS$y_pos = c(1.7,1.9,2.1)
3) Binding of both stats dataframes:
library(tidyverse)
STAT <- bind_rows(Stat_EBV,Stat_LS)
# A tibble: 6 x 10
diff lwr upr `p adj` Variable Group Group1 Group2 labels y_pos
<dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr> <dbl> <dbl>
1 0.0578 -0.0130 0.129 0.132 EBV GA-AA GA AA 0.132 0.06
2 0.114 0.0457 0.183 0.000431 EBV GG-AA GG AA 0.0004 0.08
3 0.0563 0.0125 0.100 0.00821 EBV GG-GA GG GA 0.0082 0.1
4 0.115 -0.303 0.532 0.790 LS GA-AA GA AA 0.790 1.7
5 0.366 -0.0373 0.770 0.0832 LS GG-AA GG AA 0.0832 1.9
6 0.251 -0.00716 0.510 0.0585 LS GG-GA GG GA 0.0585 2.1
4) Get the barchart and add the statistic results:
library(ggplot2)
library(ggsignif)
ggplot(DF, aes(x = SNP_code, y = Mean, fill = SNP_code))+
geom_bar(stat = "identity", show.legend = FALSE)+
geom_errorbar(aes(ymin = Mean-SEM, ymax = Mean+SEM), width = 0.2)+
geom_signif(inherit.aes = FALSE, data = STAT,
aes(xmin=Group1, xmax=Group2, annotations=labels, y_position=y_pos),
manual = TRUE)+
facet_wrap(.~Variable, scales = "free")+
scale_x_discrete(name = "",labels = levels(DF$Labels))
I hope it looks what you are expecting.

Related

barplot with different factor order for each x-axis tick

I was answering this question where #Léo wanted a barplot with stat = "identity" and position = "identity". This causes the bars (one for every value of the fill aesthetic) to get on top of eachother, making some to get hidden:
His solution was to set alpha = 0.5, but he didn't liked the result as the colors mixed in different ways in each x-axis tick. Thus, i figured that the solution would be to have a different color ordering for each x-axis tick, but i don't know how to do it in ggplot.
What I've tried:
Dummy data:
library(tidyverse)
set.seed(7)
df = tibble(
categories = rep(c("a", "b", "c"), each = 3) %>% factor(),
xaxis = rep(1:3, 3) %>% factor(),
yaxis = runif(9))
What plotted the "original" graph, shown above:
ggplot() +
geom_bar(aes(xaxis, yaxis, fill = categories), df,
stat = "identity", position = "identity")
My attempt: changing the categories levels order and creating a different geom_bar for each x-axis value with a for loop:
g = ggplot()
for(x in unique(df$xaxis)){
df.x = df %>% filter(xaxis == x) %>% mutate(categories = fct_reorder(categories, yaxis))
g = g + geom_bar(aes(xaxis, yaxis, fill = categories), df.x,
stat = "identity", position = "identity")}
plot(g)
The levels on df.x actually change to the correct order for every iteration, but the same graph as before gets produced.
I draw a traditional overlapping plot and (if i understood correctly) your desired plot below to compare results:
library(tidyverse)
set.seed(7)
df = tibble(
categories = rep(c("a", "b", "c"), each = 3) %>% factor(),
xaxis = rep(1:3, 3) %>% factor(),
yaxis = runif(9))
ggplot() +
geom_bar(aes(xaxis, yaxis, fill = categories, group=categories), df, alpha=0.8,
stat = "identity", position = position_dodge(width=0.3,preserve = "single"))
df<-df %>% group_by(xaxis) %>% mutate(rank=rank(-yaxis)) %>%
pivot_wider(values_from=yaxis, names_from = rank, values_fill = 0,
names_sort = T, names_prefix = "rank")
print(df)
#> # A tibble: 9 × 5
#> # Groups: xaxis [3]
#> categories xaxis rank1 rank2 rank3
#> <fct> <fct> <dbl> <dbl> <dbl>
#> 1 a 1 0.989 0 0
#> 2 a 2 0 0.398 0
#> 3 a 3 0 0 0.116
#> 4 b 1 0 0 0.0697
#> 5 b 2 0 0 0.244
#> 6 b 3 0.792 0 0
#> 7 c 1 0 0.340 0
#> 8 c 2 0.972 0 0
#> 9 c 3 0 0.166 0
g <- reduce(
map(paste0("rank",1:3),
~geom_bar(aes(xaxis, .data[[.x]], fill=categories), stat="identity", position="identity")),
`+`, .init = ggplot(df) )
g
Created on 2022-11-02 with reprex v2.0.2
EDIT
It is easier, thanks to Park and this post
set.seed(7)
df = tibble(
categories = rep(c("a", "b", "c"), each = 3) %>% factor(),
xaxis = rep(1:3, 3) %>% factor(),
yaxis = runif(9))
df %>% group_by(xaxis) %>% arrange(rank(-yaxis)) %>%
ggplot() + geom_bar(aes(xaxis, yaxis, fill=categories), stat="identity", position="identity")
How about this?
df %>%
arrange(xaxis, yaxis) %>%
group_by(xaxis) %>%
mutate(yaxis = yaxis - lag(yaxis, default = 0)) %>%
ggplot() +
geom_bar(aes(xaxis, yaxis, fill = categories),
stat = "identity", position = "stack")

Add means to histograms by group in ggplot2

I am following this source to do histograms by group in ggplot2.
The sample data looks like this:
set.seed(3)
x1 <- rnorm(500)
x2 <- rnorm(500, mean = 3)
x <- c(x1, x2)
group <- c(rep("G1", 500), rep("G2", 500))
df <- data.frame(x, group = group)
And the code:
# install.packages("ggplot2")
library(ggplot2)
# Histogram by group in ggplot2
ggplot(df, aes(x = x, fill = group, colour = group)) +
geom_histogram(alpha = 0.5, position = "identity")
I know that adding a line like:
+geom_vline(aes(xintercept=mean(group),color=group,fill=group), col = "red")
Should allow me to get what I am looking for, but I am obtaining just an histogram with one mean, not a mean by group:
Do you have any suggestions?
I would compute the mean into the dataframe:
library(ggplot2)
library(dplyr)
df %>%
group_by(group) %>%
mutate(mean_x = mean(x))
output is:
# A tibble: 1,000 × 3
# Groups: group [2]
x group mean_x
<dbl> <chr> <dbl>
1 -0.962 G1 0.0525
2 -0.293 G1 0.0525
3 0.259 G1 0.0525
4 -1.15 G1 0.0525
5 0.196 G1 0.0525
6 0.0301 G1 0.0525
7 0.0854 G1 0.0525
8 1.12 G1 0.0525
9 -1.22 G1 0.0525
10 1.27 G1 0.0525
# … with 990 more rows
So do:
library(ggplot2)
library(dplyr)
df %>%
group_by(group) %>%
mutate(mean_x = mean(x)) %>%
ggplot(aes(x, fill = group, colour = group)) +
geom_histogram(alpha = 0.5, position = "identity") +
geom_vline(aes(xintercept = mean_x), col = "red")
Output is:
In addition to the previous suggestion, you can also use separately stored group means, i. e. two instead of nrow=1000 highly redundant values:
## a 'tidy' (of several valid ways for groupwise calculation):
group_means <- df %>%
group_by(group) %>%
summarise(group_means = mean(x, na.rm = TRUE)) %>%
pull(group_means)
## ... ggplot code ... +
geom_vline(xintercept = group_means)
A straightforward method without precomputation would be:
ggplot(df, aes(x = x, fill = group, colour = group)) +
geom_histogram(alpha = 0.5, position = "identity") +
geom_vline(xintercept = tapply(df$x, df$group, mean), col = "red")

How to select specific two groups from a list of data for t-test and bar plot?

How to select two groups from data for the t-test and bar plot?
I have data made of several groups/substrate (i.e., O1, O2, O3, S_O4, S_O5 etc.), and I need to draw a bar plot using specific two group/substrate (e.g., O1 and S_O5) and then run t-test. I need to help. Would you please check the attached link for Excel data?
library(ggpubr)
library(rstatix)
library(xlsx)
library(patchwork)
df<-read.xlsx("umar_02.xlsx", header = T, 1)
# Statistical test
######## Figure 1 ########
stat.test <- df %>%
t_test(Moisture_content ~ substrate) %>%
add_significance()
stat.test
# Box plots with p-values
bxp1 <- ggboxplot(df, x = "substrate", y = "Moisture_content", fill = "substrate",
palette = c("#00AFBB", "#E7B800"), width = 0.35)
stat.test <- stat.test %>% add_xy_position(x = "substrate")
bxp1 +
stat_pvalue_manual(stat.test, label = "p = {p}") +
scale_y_continuous(expand = expansion(mult = c(0.05, 0.1)))+
theme(
aspect.ratio = 3
)
Excel data: enter link description here
df <- rio::import("https://docs.google.com/spreadsheets/d/1YZQRihyc5aTWehda8Wr24xwm23a87jv7/edit#gid=1007932109")
library(ggpubr)
library(rstatix)
stat.test <- df %>%
filter(Ortamlar %in% c("O1", "S_O5")) %>%
t_test(Moisture_content ~ Ortamlar) %>%
add_significance()
stat.test
stat.test
# # A tibble: 1 x 9
# .y. group1 group2 n1 n2 statistic df p p.signif
# * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
# 1 Moisture_content O1 S_O5 3 3 -2.20 2.50 0.132 ns
# Box plots with p-values
bxp1 <- df %>%
filter(Ortamlar %in% c("O1", "S_O5")) %>%
ggboxplot( x = "Ortamlar", y = "Moisture_content", fill = "Ortamlar",
palette = c("#00AFBB", "#E7B800"), width = 0.35, xlab = "Substrate")
stat.test %>% add_xy_position(x = "Ortamlar")
stat.test <- stat.test %>% mutate(y.position=5.15)
bxp1 +
stat_pvalue_manual(stat.test, label = "p = {p}") +
scale_y_continuous(expand = expansion(mult = c(0.05, 0.1)))+
theme(
aspect.ratio = 3
)
We may need to filter with %in%
library(dplyr)
stat.test <- df %>%
filter(Ortamlar %in% c("O1", "S_O5")) %>%
rename(substrate = Ortamlar) %>%
t_test(Moisture_content ~ substrate) %>%
add_significance()
-output
stat.test
# A tibble: 1 x 9
.y. group1 group2 n1 n2 statistic df p p.signif
* <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
1 Moisture_content O1 S_O5 3 3 -2.20 2.50 0.132 ns

How to visualize multiple bar plots in one (or splitted) pdf

I'm using the tidyverse-ggplot2 combination to plot multiple bar plots. In one of my comparisons i would like to have even up to 300 single plots. I was wondering if there is a possibility to make sure that the plots will be visible in the pdf file and not look like the attached example
If possible I would prefer to have all the plots in one single pdf file, but if not, also multiple pages will be ok.
The command to plot the bar charts is
common %>%
as_tibble(rownames="gene") %>%
left_join(x= ., y = up[,1:2], by = c("gene" = "ensembl_gene_id") ) %>%
pivot_longer(starts_with("S"), names_to="sample", values_to="counts") %>%
left_join(groups, by="sample") %>%
group_by(mgi_symbol, group, cond, time) %>%
summarize(mean_count=mean(counts)) %>%
ggplot( aes(x = time, y = mean_count, fill=cond)) +
geom_bar(stat = "identity", position = position_dodge(width=0.9) ) +
scale_fill_manual(values=c("darkblue", "lightblue", "black")) +
facet_wrap(~mgi_symbol, scales = "free", ncol = 5) +
theme_bw()
I forgot to add the group table
groups <- tibble(
sample= colnames(normCounts),
group = rep(seq(1, ncol(normCounts)/3), each=3),
cond = rep(c("WT", "GCN2-KO", "GCN1-KO"), each = 12),
time = rep(rep(c("0h", "1h", "4h", "8h"), each=3), times = 3 )
)
thanks
Adding the command with the group_map was as such
common %>%
as_tibble(rownames="gene") %>%
left_join(x= ., y = up[,1:2], by = c("gene" = "ensembl_gene_id") ) %>%
pivot_longer(starts_with("S"), names_to="sample", values_to="counts") %>%
left_join(groups, by="sample") %>%
group_by(mgi_symbol, group, cond, time) %>%
summarize(mean_count=mean(counts)) %>%
group_map(function(g, ...)
ggplot(g, aes(x = time, y = mean_count, fill=cond)) +
geom_bar(stat = "identity", position = position_dodge(width=0.9) ) +
scale_fill_manual(values=c("darkblue", "lightblue", "black")) +
facet_wrap(~mgi_symbol, scales = "free", ncol = 5) +
theme_bw()
)
EDIT
This is how the data looks like in the input table (after summarizing the means)
df <-
common %>%
as_tibble(rownames="gene") %>%
left_join(x= ., y = up[,1:2], by = c("gene" = "ensembl_gene_id") ) %>%
pivot_longer(starts_with("S"), names_to="sample", values_to="counts") %>%
left_join(groups, by="sample") %>%
group_by(mgi_symbol, group, cond, time) %>%
summarize(mean_count=mean(counts)) %>%
ungroup()
df
#>`summarise()` regrouping output by 'mgi_symbol', 'group', 'cond' (override with `.groups` argument)
#> # A tibble: 1,212 x 5
#> mgi_symbol group cond time mean_count
#> <chr> <int> <chr> <chr> <dbl>
#> 1 0610031O16Rik 1 WT 0h 14.4
#> 2 0610031O16Rik 2 WT 1h 30.9
#> 3 0610031O16Rik 3 WT 4h 45.5
#> 4 0610031O16Rik 4 WT 8h 56.0
#> 5 0610031O16Rik 5 GCN2-KO 0h 18.9
#> 6 0610031O16Rik 6 GCN2-KO 1h 39.4
#> 7 0610031O16Rik 7 GCN2-KO 4h 13.9
#> 8 0610031O16Rik 8 GCN2-KO 8h 13.3
#> 9 0610031O16Rik 9 GCN1-KO 0h 12.3
#> 10 0610031O16Rik 10 GCN1-KO 1h 25.3
#> # … with 1,202 more rows
Start with some dummy data. This is the data after you've finished running left_join, pivot_longer, group_by, summarize.
library(tidyverse)
df <- tibble(
time = 1:5,
mean_count = 1:5,
cond = "x"
) %>%
expand_grid(mgi_symbol = c(letters, LETTERS))
Create a column group which represents what page the mgi_symbol belongs on.
plots_per_page <- 20
df <-
df %>%
mutate(group = (dense_rank(mgi_symbol) - 1) %/% plots_per_page)
Create all the plots with group_map.
plots <-
df %>%
group_by(group) %>%
group_map(function(g, ...) {
ggplot(g, aes(x = time, y = mean_count, fill=cond)) +
geom_bar(stat = "identity", position = position_dodge(width=0.9) ) +
scale_fill_manual(values=c("darkblue", "lightblue", "black")) +
facet_wrap(~mgi_symbol, scales = "free", ncol = 5) +
theme_bw()
})
Save as multiple pages using ggpubr
ggpubr::ggexport(
ggpubr::ggarrange(plotlist = plots, nrow = 1, ncol = 1),
filename = "plots.pdf"
)

Dygraphs in R: Plot Ribbon and mean line of different groups

I recently started working dygraphs in R, and wanted to achieve a ribbon line plot with it.
Currently, I have the below ggplot which displays a ribbon (for data from multiple batches over time) and its median for two groups. Below is the code for it.
ggplot(df,
aes(x=variable, y=A, color=`[category]`, fill = `[category]`)) +
stat_summary(geom = "ribbon", alpha = 0.35) +
stat_summary(geom = "line", size = 0.9) +
theme_minimal()+ labs(x="TimeStamp")
I could add the median solid line on the dygraph, but I'm unable to add the ribbon to it. Below is the dygraph and my code for it.
df_Medians<- df%>%
group_by(variable,`[category]`) %>%
summarise(A = median(A[!is.na(A)]))
median <- cbind(as.ts(df_Medians$A))
dygraph(median) %>%
dyRangeSelector()
Is there anyway to plot something similar to the above ggplot on dygraphs? Thanks in advance.
See if the following serves your purpose:
ggplot code (for mean, replace median_se with mean_se in the stat_summary layers):
library(ggplot2)
ggplot(df,
aes(x=variable, y=A, color=category, fill = category)) +
stat_summary(geom = "ribbon", alpha = 0.35, fun.data = median_se) +
stat_summary(geom = "line", size = 0.9, fun.data = median_se) +
theme_minimal()
dygraph code (for mean, replace median_se with mean_se in the summarise step):
library(dplyr)
library(dygraph)
# calculate summary statistics for each category, & spread results out such that each row
# corresponds to one position on the x-axis
df_dygraph <- df %>%
group_by(variable, category) %>%
summarise(data = list(median_se(A))) %>%
ungroup() %>%
tidyr::unnest(data) %>%
mutate(category = as.integer(factor(category))) %>% # optional: standardizes the column
# names for summary stats
tidyr::pivot_wider(id_cols = variable, names_from = category,
values_from = c(ymin, y, ymax))
> head(df_dygraph)
# A tibble: 6 x 7
variable ymin_1 ymin_2 y_1 y_2 ymax_1 ymax_2
<int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 3817. 2712. 4560. 2918. 5304. 3125.
2 2 3848. 2712. 4564. 2918. 5279. 3125.
3 3 3847. 2826. 4564 2961 5281. 3096.
4 4 3722. 2827. 4331 2962. 4940. 3098.
5 5 3833. 2831. 4570. 2963 5306. 3095.
6 6 3835. 2831. 4572 2964 5309. 3097.
dygraph(df_dygraph, main = "Dygraph title") %>%
dySeries(c("ymin_1", "y_1", "ymax_1"), label = "Category 1") %>%
dySeries(c("ymin_2", "y_2", "ymax_2"), label = "Category 2") %>%
dyRangeSelector()
Code for median counterpart of mean_se:
median_se <- function(x) {
x <- na.omit(x)
se <- sqrt(var(x) / length(x))
med <- median(x)
ggplot2:::new_data_frame(list(y = med,
ymin = med - se,
ymax = med + se),
n = 1)
}
Sample data:
df <- diamonds %>%
select(price, cut) %>%
filter(cut %in% c("Fair", "Ideal")) %>%
group_by(cut) %>%
slice(1:1000) %>%
mutate(variable = rep(seq(1, 50), times = 20)) %>%
ungroup() %>%
rename(A = price, category = cut)

Resources