I'm trying to add significance annotations to an errorbar plot with a factor x-axis and dodged groups within each level of the x-axis. It is a similar but NOT identical use case to this
My base errorbar plot is:
library(ggplot2)
library(dplyr)
pres_prob_pd = structure(list(x = structure(c(1, 1, 1, 2, 2, 2, 3, 3, 3), labels = c(`1` = 1,
`2` = 2, `3` = 3)), predicted = c(0.571584427222816, 0.712630712634987,
0.156061969566517, 0.0162388386564817, 0.0371877245103279, 0.0165022541901018,
0.131528946944238, 0.35927812866896, 0.0708662221985375), std.error = c(0.355802875027348,
0.471253661425626, 0.457109887762665, 0.352871728451576, 0.442646879181155,
0.425913568532558, 0.376552208691762, 0.48178172708116, 0.451758041335245
), conf.low = c(0.399141779923204, 0.496138837620712, 0.0701919316506831,
0.00819832576725402, 0.0159620304815404, 0.00722904089045731,
0.0675129352870401, 0.17905347369819, 0.030504893442457), conf.high = c(0.728233665534388,
0.861980236164486, 0.311759350126477, 0.031911364587827, 0.0842227723261319,
0.0372248587668487, 0.240584344249407, 0.590437963881823, 0.156035177669385
), group = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("certain",
"neutral", "uncertain"), class = "factor"), group_col = structure(c(1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("certain", "neutral",
"uncertain"), class = "factor"), language = structure(c(2L, 2L,
2L, 1L, 1L, 1L, 3L, 3L, 3L), .Label = c("english", "dutch", "german"
), class = "factor"), top = c(0.861980236164486, 0.861980236164486,
0.861980236164486, 0.0842227723261319, 0.0842227723261319, 0.0842227723261319,
0.590437963881823, 0.590437963881823, 0.590437963881823)), row.names = c(NA,
-9L), groups = structure(list(language = structure(1:3, .Label = c("english",
"dutch", "german"), class = "factor"), .rows = structure(list(
4:6, 1:3, 7:9), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, 3L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
#dodge
pd = position_dodge(.75)
#plot
p = ggplot(pres_prob_pd,aes(x=language,y=predicted,color=group,shape=group)) +
geom_point(position=pd,size=2) +
geom_errorbar(aes(ymax=conf.high,ymin=conf.low),width=.125,position=pd)
p
What I want to do is annotate the plot such that the contrasts between group within each level of language are annotated for significance. I've plotted points representing the relevant contrasts and (toy) sig. annotations as follows:
#bump function
f = function(x){
v = c()
bump=0.025
constant = 0
for(i in x){
v = c(v,i+constant+bump)
bump = bump + 0.075
}
v
}
#create contrasts
combs = data.frame(gtools::combinations(3, 2, v=c("certain", "neutral", "uncertain"), set=F, repeats.allowed=F)) %>%
mutate(contrast=c("cont_1","cont_2","cont_3"))
combs = rbind(combs %>% mutate(language = 'english'),
combs %>% mutate(language='dutch'),
combs %>% mutate(language = "german")) %>%
left_join(select(pres_prob_pd,language:top)%>%distinct(),by='language') %>%
group_by(language)
#long transform and calc y_pos
combs_long = mutate(combs,y_pos=f(top)) %>% gather(long, probability, X1:X2, factor_key=TRUE) %>% mutate(language=factor(language,levels=c("english","dutch","german"))) %>%
arrange(language,contrast)
#back to wide
combs_wide =combs_long %>% spread(long,probability)
combs_wide$p = rep(c('***',"*","ns"),3)
#plot
p +
geom_point(data=combs_long,
aes(x = language,
color=probability,
shape=probability,
y=y_pos),
inherit.aes = T,
position=pd,
size=2) +
geom_text(data=combs_wide,
aes(x=language,
label=p,
y=y_pos+.025,
group=X1),
color='black',
position=position_dodge(.75),
inherit.aes = F)
What I am failing to achieve is plotting a line connecting each of the contrasts of group within each level of language, as is standard when annotating significant group-wise differences. Any help much appreciated!
Related
This question already has answers here:
Set the order of a stacked bar chart by the value of one of the variables
(2 answers)
Closed 9 months ago.
Using the code below, I have created the below chart. To make it easier for people to see the pattern, I'd like to order states from left to right according to the y values (Dx) by age 65.
Thanks,
NM
Here is my data:
structure(list(Age = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("30", "50", "65"), class = "factor"), Dx = c(3.057, 7.847, 17.157, 2.851, 8.861, 21.885, 2.521, 7.889, 21.328), PopName = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L), .Label = c("AK", "AL", "AR"), class = "factor")), row.names = c(NA, -9L), class = c("tbl_df", "tbl", "data.frame"))
library(tidyverse)
library(tidyverse)
CAPS_2019 %>%
group_by(Age, PopName) %>%
mutate(PopName1 = sum(Dx)) %>%
ungroup() %>%
ggplot(aes(x = fct_reorder(PopName, PopName1), y = Dx, fill = factor(as.character(Age)))) +
geom_col(position = position_stack(reverse = TRUE)) +
theme_classic()+
coord_flip()+
labs(x = "State", y = "Deaths (%)", caption = (""), face = "bold", fill = "Age")
Update 2 Try this in your new dataset Age and Popname are already factors. So maybe this should work as expected:
CAPS_2019_data %>%
group_by(Age, PopName) %>%
mutate(PopName1 = sum(Dx)) %>%
ungroup() %>%
ggplot(aes(x = reorder(PopName, PopName1), y = Dx, fill = Age)) +
geom_col(position = position_stack(reverse = TRUE)) +
theme_classic()+
coord_flip()+
labs(x = "State", y = "Deaths (%)", caption = (""), face = "bold", fill = "Age")
Update:
data:
CAPS_2019 <- structure(list(Age = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L), .Label = c("30", "50", "65"), class = "factor"), Dx = c(3.057,
7.847, 17.157, 2.851, 8.861, 21.885, 2.521, 7.889, 21.328), PopName = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L), .Label = c("AK", "AL", "AR"), class = "factor")), row.names = c(NA,
-9L), class = c("tbl_df", "tbl", "data.frame"))
To get the stacks ordered use position = position_stack(reverse = TRUE)
To order y axis do some preprocessing with group_by and sum and use fct_reorder from forcats package (it is in tidyverse)
library(tidyverse)
CAPS_2019 %>%
group_by(Age, PopName) %>%
mutate(PopName1 = sum(Dx)) %>%
ungroup() %>%
ggplot(aes(x = fct_reorder(PopName, PopName1), y = Dx, fill = factor(as.character(Age)))) +
geom_col(position = position_stack(reverse = TRUE)) +
theme_classic()+
coord_flip()+
labs(x = "State", y = "Deaths (%)", caption = (""), face = "bold", fill = "Age")
I am working on generating a hierarchical edge plot where the edge's color/transparency/thickness varies by the column (pvalue) in my connect dataframe, however the color/transparency/thickness of the edges in the plot I generated don't always map to the values in column (pvalue). For example, subgroup1 and subgroup4 should have the strongest thickest connection (pvalue is E-280), when in fact they don't, rather the connection between subgroup3 and subgroup4 looks to be strongest.
This data generates a reproducible example:
> dput(vertices)
structure(list(name = structure(c(3L, 1L, 2L, 4L, 5L, 6L, 7L), .Label = c("gp1",
"gp2", "origin", "subgroup1", "subgroup2", "subgroup3", "subgroup4"
), class = "factor"), id = c(NA, NA, NA, 1L, 2L, 3L, 4L), angle = c(NA,
NA, NA, 0, -90, 0, -90), hjust = c(NA, NA, NA, 1, 1, 1, 1)), row.names = c(NA,
-7L), class = "data.frame")
> dput(hierarchy)
structure(list(from = structure(c(3L, 3L, 1L, 1L, 2L, 2L), .Label = c("gp1",
"gp2", "origin"), class = "factor"), to = structure(1:6, .Label = c("gp1",
"gp2", "subgroup1", "subgroup2", "subgroup3", "subgroup4"), class = "factor")), class = "data.frame", row.names = c(NA,
-6L))
> dput(connect)
structure(list(from = structure(c(1L, 1L, 2L, 3L, 1L, 2L, 3L,
1L), .Label = c("subgroup1", "subgroup2", "subgroup3"), class = "factor"),
to = structure(c(1L, 2L, 2L, 1L, 3L, 3L, 3L, 3L), .Label = c("subgroup2",
"subgroup3", "subgroup4"), class = "factor"), pvalue = c(1.68e-204,
1.59e-121, 9.32e-73, 9.32e-73, 1.59e-21, 9.32e-50, 9.32e-40,
9.32e-280)), class = "data.frame", row.names = c(NA, -8L))
and this is the code I used to make this example plot:
from <- match( connect$from, vertices$name)
to <- match( connect$to, vertices$name)
col <- connect$pvalue
#Let's add information concerning the label we are going to add: angle, horizontal adjustement and potential flip
#calculate the ANGLE of the labels
vertices$id <- NA
myleaves <- which(is.na( match(vertices$name, hierarchy$from) ))
nleaves <- length(myleaves)
vertices$id[ myleaves ] <- seq(1:nleaves)
vertices$angle <- 90 - 360 * vertices$id / nleaves
# calculate the alignment of labels: right or left
# If I am on the left part of the plot, my labels have currently an angle < -90
vertices$hjust <- ifelse( vertices$id < 41, 1, 0)
# flip angle BY to make them readable
vertices$angle <- ifelse(vertices$angle < -90, vertices$angle+180, vertices$angle)
mygraph <- graph_from_data_frame( hierarchy, vertices=vertices )
ggraph(mygraph, layout = 'dendrogram', circular = TRUE) +
geom_node_point(aes(filter = leaf, x = x*1.05, y=y*1.05), size = 2, alpha = 0.8) +
geom_conn_bundle(data = get_con(from = from, to = to, col = col), aes(colour=col, alpha = col, width = col)) +
geom_node_text(aes(x = x*1.1, y=y*1.1, filter = leaf, label=name, angle = angle, hjust=hjust), size=3.5, alpha=0.6) +scale_edge_color_continuous(trans = "log",low="red", high="yellow")+ scale_edge_alpha_continuous(trans = "log",range = c(1, 0.1)) +scale_edge_width_continuous(trans = "log", range = c(4, 1))+
theme_void()
I think there is wrong mapping somewhere but I can't figure out where. Thank you so much for your input!
I believe there is a bug in this library. Rearranging the input data by the column of choice (pvalue in my case) in an ascending order helped but did not solve the issue.
connect_new <- arrange(connect, pvalue)
and I found the solution in a github issue submitted by another user. The subgroups within each group need to be ordered alphabetically in the hierarchy and vertices file. In addition, in the connect dataframe, the subgroups need to be ordered following the same order in the hierarchy and vertices file. Thanks to zhuxr11
I have problem ploting credibility interval like this:
My data structure is following,L1,L2,M,U1,U2 stand for 0.025quant,0.25quant,0.5quant,0.75quant,0.975quant,respectively.
`
structure(list(approach = structure(c(1L, 2L, 1L, 2L, 1L, 2L), class = "factor", .Label = c("INLA",
"rjags")), param = structure(c(1L, 2L, 3L, 1L, 2L, 3L), class = "factor", .Label = c("alpha",
"beta", "sig2")), L1 = c(0.0844546867936143, 1.79242348175439,
0.163143886545317, 0.0754165380733685, 1.79067991488052, 3.66675821267498
), L2 = c(0.60090835904286, 1.95337968870806, 0.898159977552433,
0.606017177641373, 1.95260448314298, 4.07080184844179), M = c(0.870204161297956,
2.03768437879748, 2.20651061559405, 0.87408237273113, 2.03725552264872,
4.32531027636171), U2 = c(1.13905085248391, 2.12210930874551,
4.26836270504725, 1.66260576926063, 2.28900567640091, 5.10063756831338
), U1 = c(1.65214011950274, 2.28396345192398, 4.9109804477583,
1.1450384685802, 2.12117799328209, 4.55657971279654), AP = structure(c(1L,
4L, 5L, 2L, 3L, 6L), .Label = c("INLA.alpha", "rjags.alpha",
"INLA.beta", "rjags.beta", "INLA.sig2", "rjags.sig2"), class = "factor")), .Names = c("approach",
"param", "L1", "L2", "M", "U2", "U1", "AP"), row.names = c(NA,
-6L), class = "data.frame")`
I referenced this answerenter link description here,but 'fill' seems only work for boxplot case.the code I tried so far is:
CI$AP=interaction(CI$approach,CI$param)
p=ggplot(CI,aes(y=AP))+geom_point(aes(x=M))
p=p+geom_segment(aes(x=L1,xend=U1,y=AP,yend=AP))
p=p+geom_segment(aes(x=L2,xend=U2,y=AP,yend=AP),size=1.5)
It is far away from what I want.
Many thanks!
How about the following:
ggplot(df, aes(x = param, y = M, colour = approach)) +
geom_point(position = position_dodge2(width = 0.3), size = 3) +
geom_linerange(
aes(ymin = L2, ymax = U2, x = param),
position = position_dodge2(width = 0.3),
size = 2) +
geom_linerange(
aes(ymin = L1, ymax = U1, x = param),
position = position_dodge2(width = 0.3),
size = 1) +
coord_flip() +
labs(x = "Parameter", y = "Estimate")
Sample data
df <- structure(list(approach = structure(c(1L, 2L, 1L, 2L, 1L, 2L), class = "factor", .Label = c("INLA",
"rjags")), param = structure(c(1L, 2L, 3L, 1L, 2L, 3L), class = "factor", .Label = c("alpha",
"beta", "sig2")), L1 = c(0.0844546867936143, 1.79242348175439,
0.163143886545317, 0.0754165380733685, 1.79067991488052, 3.66675821267498
), L2 = c(0.60090835904286, 1.95337968870806, 0.898159977552433,
0.606017177641373, 1.95260448314298, 4.07080184844179), M = c(0.870204161297956,
2.03768437879748, 2.20651061559405, 0.87408237273113, 2.03725552264872,
4.32531027636171), U2 = c(1.13905085248391, 2.12210930874551,
4.26836270504725, 1.66260576926063, 2.28900567640091, 5.10063756831338
), U1 = c(1.65214011950274, 2.28396345192398, 4.9109804477583,
1.1450384685802, 2.12117799328209, 4.55657971279654), AP = structure(c(1L,
4L, 5L, 2L, 3L, 6L), .Label = c("INLA.alpha", "rjags.alpha",
"INLA.beta", "rjags.beta", "INLA.sig2", "rjags.sig2"), class = "factor")), .Names = c("approach",
"param", "L1", "L2", "M", "U2", "U1", "AP"), row.names = c(NA,
-6L), class = "data.frame")
I have the following data frame:
structure(list(StepsGroup = structure(c(1L, 1L, 1L, 2L, 2L, 2L,
3L, 3L, 3L), .Label = c("(-Inf,3e+03]", "(3e+03,1.2e+04]", "(1.2e+04, Inf]"
), class = "factor"), GlucoseGroup = structure(c(1L, 2L, 3L,
1L, 2L, 3L, 1L, 2L, 3L), .Label = c("<100", "100-180", ">180"
), class = "factor"), n = c(396L, 1600L, 229L, 787L, 4182L, 375L,
110L, 534L, 55L), freq = c(0.177977528089888, 0.719101123595506,
0.102921348314607, 0.147267964071856, 0.782559880239521, 0.0701721556886228,
0.157367668097282, 0.763948497854077, 0.0786838340486409)), class =
c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -9L), vars = "StepsGroup",
labels = structure(list(
StepsGroup = structure(1:3, .Label = c("(-Inf,3e+03]", "(3e+03,1.2e+04]",
"(1.2e+04, Inf]"), class = "factor")), class = "data.frame", row.names =
c(NA, -3L), vars = "StepsGroup", drop = TRUE), indices = list(0:2,
3:5, 6:8), drop = TRUE, group_sizes = c(3L, 3L, 3L), biggest_group_size =
3L)
I would like to create a stacked bar plot, and add a summary of each StepsGroup on top of each bar. So the first group will have 2225, the second 5344 and the third 699.
I am using the following script:
ggplot(d_stepsFastingSummary , aes(y = freq, x = StepsGroup, fill =
GlucoseGroup)) + geom_bar(stat = "identity") +
geom_text(aes(label = sum(n()), vjust = 0))
The part until before the geom_text works, but for the last bit I get the following error:
Error: This function should not be called directly
Any idea how to add the aggregated quantity?
We could create a new dataframe stacked_df which would have sum for each StepsGroup
stacked_df <- df %>% group_by(StepsGroup) %>% summarise(nsum = sum(n))
ggplot(df) +
geom_bar(aes(y = freq, x = StepsGroup, fill= GlucoseGroup),stat = "identity") +
geom_text(data = stacked_df, aes(label = nsum, StepsGroup,y = 1.1))
I have the following data.
> dput(testdat)
structure(list(Type = structure(c(2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Saline",
"Compound1"), class = "factor"), Treatment = structure(c(1L,
2L, 3L, 4L, 6L, 5L), .Label = c(".0032uM", ".016uM", ".08uM",
".4uM", "2uM", "10uM"), class = "factor"), Peak = c(1071.28430020209,
1458.23366806524, 2714.49856342393, 3438.83453920159, 3938.86391759534,
2980.10159109856), Area1 = c(3312.99749863082, 4798.35142770291,
9044.21362002965, 11241.1497514069, 11575.3444645068, 9521.69011119236
), SS1 = c(781.759834505516, 1191.6273298958, 2180.02082601411,
2601.33855989239, 2492.11886600804, 2185.39715502702), Conc = c(0.0032,
0.016, 0.08, 0.4, 10, 2), logconc = c(-2.49485002168009, -1.79588001734408,
-1.09691001300806, -0.397940008672038, 1, 0.301029995663981),
Conc_nm = c(3.2, 16, 80, 400, 10000, 2000), logconc_nm = c(0.505149978319906,
1.20411998265592, 1.90308998699194, 2.60205999132796, 4,
3.30102999566398)), .Names = c("Type", "Treatment", "Peak",
"Area1", "SS1", "Conc", "logconc", "Conc_nm", "logconc_nm"), row.names = 2:7, class = "data.frame")
I've fitted the data (Peak) with a nls regression using the following code:
fit = nls(Peak ~ SSlogis(logconc_nm,Asym,xmid,scal),data=testdat)
This gives me a nice fit and I'm happy with it so I plot the dose response as follows:
m <- coef(fit)
vallog <- as.numeric(format((m[3]),dig=4))
val =round(10^val,2)
ggplot(data = testdat,aes(logconc_nm,Peak))+
geom_point()+
scale_x_log10(breaks=round(testdat$logconc_nm,2))+
geom_smooth(method = 'nls',
formula = y ~ SSfpl(x,A,B,xmid,scal),se=FALSE)+
geom_vline(color='red',xintercept = vallog,alpha=.5)+
geom_text(aes(x=vallog,y=max(Peak),label = paste0('EC50',val,'nM')),color='red')#,angle=90)
My Question is:
How can I add a big ol' red point on the blue line where the blue and red line meet. I'd like to replace the need for the red line with the red dot. I know i have to use geom_point but because it's a fitted line, i can't just say x=vallog can i?