ggplot add aggregated summaries to a bar plot - r

I have the following data frame:
structure(list(StepsGroup = structure(c(1L, 1L, 1L, 2L, 2L, 2L,
3L, 3L, 3L), .Label = c("(-Inf,3e+03]", "(3e+03,1.2e+04]", "(1.2e+04, Inf]"
), class = "factor"), GlucoseGroup = structure(c(1L, 2L, 3L,
1L, 2L, 3L, 1L, 2L, 3L), .Label = c("<100", "100-180", ">180"
), class = "factor"), n = c(396L, 1600L, 229L, 787L, 4182L, 375L,
110L, 534L, 55L), freq = c(0.177977528089888, 0.719101123595506,
0.102921348314607, 0.147267964071856, 0.782559880239521, 0.0701721556886228,
0.157367668097282, 0.763948497854077, 0.0786838340486409)), class =
c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -9L), vars = "StepsGroup",
labels = structure(list(
StepsGroup = structure(1:3, .Label = c("(-Inf,3e+03]", "(3e+03,1.2e+04]",
"(1.2e+04, Inf]"), class = "factor")), class = "data.frame", row.names =
c(NA, -3L), vars = "StepsGroup", drop = TRUE), indices = list(0:2,
3:5, 6:8), drop = TRUE, group_sizes = c(3L, 3L, 3L), biggest_group_size =
3L)
I would like to create a stacked bar plot, and add a summary of each StepsGroup on top of each bar. So the first group will have 2225, the second 5344 and the third 699.
I am using the following script:
ggplot(d_stepsFastingSummary , aes(y = freq, x = StepsGroup, fill =
GlucoseGroup)) + geom_bar(stat = "identity") +
geom_text(aes(label = sum(n()), vjust = 0))
The part until before the geom_text works, but for the last bit I get the following error:
Error: This function should not be called directly
Any idea how to add the aggregated quantity?

We could create a new dataframe stacked_df which would have sum for each StepsGroup
stacked_df <- df %>% group_by(StepsGroup) %>% summarise(nsum = sum(n))
ggplot(df) +
geom_bar(aes(y = freq, x = StepsGroup, fill= GlucoseGroup),stat = "identity") +
geom_text(data = stacked_df, aes(label = nsum, StepsGroup,y = 1.1))

Related

Ordering bars according to the values of y [duplicate]

This question already has answers here:
Set the order of a stacked bar chart by the value of one of the variables
(2 answers)
Closed 9 months ago.
Using the code below, I have created the below chart. To make it easier for people to see the pattern, I'd like to order states from left to right according to the y values (Dx) by age 65.
Thanks,
NM
Here is my data:
structure(list(Age = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("30", "50", "65"), class = "factor"), Dx = c(3.057, 7.847, 17.157, 2.851, 8.861, 21.885, 2.521, 7.889, 21.328), PopName = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L), .Label = c("AK", "AL", "AR"), class = "factor")), row.names = c(NA, -9L), class = c("tbl_df", "tbl", "data.frame"))
library(tidyverse)
library(tidyverse)
CAPS_2019 %>%
group_by(Age, PopName) %>%
mutate(PopName1 = sum(Dx)) %>%
ungroup() %>%
ggplot(aes(x = fct_reorder(PopName, PopName1), y = Dx, fill = factor(as.character(Age)))) +
geom_col(position = position_stack(reverse = TRUE)) +
theme_classic()+
coord_flip()+
labs(x = "State", y = "Deaths (%)", caption = (""), face = "bold", fill = "Age")
Update 2 Try this in your new dataset Age and Popname are already factors. So maybe this should work as expected:
CAPS_2019_data %>%
group_by(Age, PopName) %>%
mutate(PopName1 = sum(Dx)) %>%
ungroup() %>%
ggplot(aes(x = reorder(PopName, PopName1), y = Dx, fill = Age)) +
geom_col(position = position_stack(reverse = TRUE)) +
theme_classic()+
coord_flip()+
labs(x = "State", y = "Deaths (%)", caption = (""), face = "bold", fill = "Age")
Update:
data:
CAPS_2019 <- structure(list(Age = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L), .Label = c("30", "50", "65"), class = "factor"), Dx = c(3.057,
7.847, 17.157, 2.851, 8.861, 21.885, 2.521, 7.889, 21.328), PopName = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L), .Label = c("AK", "AL", "AR"), class = "factor")), row.names = c(NA,
-9L), class = c("tbl_df", "tbl", "data.frame"))
To get the stacks ordered use position = position_stack(reverse = TRUE)
To order y axis do some preprocessing with group_by and sum and use fct_reorder from forcats package (it is in tidyverse)
library(tidyverse)
CAPS_2019 %>%
group_by(Age, PopName) %>%
mutate(PopName1 = sum(Dx)) %>%
ungroup() %>%
ggplot(aes(x = fct_reorder(PopName, PopName1), y = Dx, fill = factor(as.character(Age)))) +
geom_col(position = position_stack(reverse = TRUE)) +
theme_classic()+
coord_flip()+
labs(x = "State", y = "Deaths (%)", caption = (""), face = "bold", fill = "Age")

connect points within position_dodged factor x-axis in ggplot2

I'm trying to add significance annotations to an errorbar plot with a factor x-axis and dodged groups within each level of the x-axis. It is a similar but NOT identical use case to this
My base errorbar plot is:
library(ggplot2)
library(dplyr)
pres_prob_pd = structure(list(x = structure(c(1, 1, 1, 2, 2, 2, 3, 3, 3), labels = c(`1` = 1,
`2` = 2, `3` = 3)), predicted = c(0.571584427222816, 0.712630712634987,
0.156061969566517, 0.0162388386564817, 0.0371877245103279, 0.0165022541901018,
0.131528946944238, 0.35927812866896, 0.0708662221985375), std.error = c(0.355802875027348,
0.471253661425626, 0.457109887762665, 0.352871728451576, 0.442646879181155,
0.425913568532558, 0.376552208691762, 0.48178172708116, 0.451758041335245
), conf.low = c(0.399141779923204, 0.496138837620712, 0.0701919316506831,
0.00819832576725402, 0.0159620304815404, 0.00722904089045731,
0.0675129352870401, 0.17905347369819, 0.030504893442457), conf.high = c(0.728233665534388,
0.861980236164486, 0.311759350126477, 0.031911364587827, 0.0842227723261319,
0.0372248587668487, 0.240584344249407, 0.590437963881823, 0.156035177669385
), group = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("certain",
"neutral", "uncertain"), class = "factor"), group_col = structure(c(1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("certain", "neutral",
"uncertain"), class = "factor"), language = structure(c(2L, 2L,
2L, 1L, 1L, 1L, 3L, 3L, 3L), .Label = c("english", "dutch", "german"
), class = "factor"), top = c(0.861980236164486, 0.861980236164486,
0.861980236164486, 0.0842227723261319, 0.0842227723261319, 0.0842227723261319,
0.590437963881823, 0.590437963881823, 0.590437963881823)), row.names = c(NA,
-9L), groups = structure(list(language = structure(1:3, .Label = c("english",
"dutch", "german"), class = "factor"), .rows = structure(list(
4:6, 1:3, 7:9), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, 3L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
#dodge
pd = position_dodge(.75)
#plot
p = ggplot(pres_prob_pd,aes(x=language,y=predicted,color=group,shape=group)) +
geom_point(position=pd,size=2) +
geom_errorbar(aes(ymax=conf.high,ymin=conf.low),width=.125,position=pd)
p
What I want to do is annotate the plot such that the contrasts between group within each level of language are annotated for significance. I've plotted points representing the relevant contrasts and (toy) sig. annotations as follows:
#bump function
f = function(x){
v = c()
bump=0.025
constant = 0
for(i in x){
v = c(v,i+constant+bump)
bump = bump + 0.075
}
v
}
#create contrasts
combs = data.frame(gtools::combinations(3, 2, v=c("certain", "neutral", "uncertain"), set=F, repeats.allowed=F)) %>%
mutate(contrast=c("cont_1","cont_2","cont_3"))
combs = rbind(combs %>% mutate(language = 'english'),
combs %>% mutate(language='dutch'),
combs %>% mutate(language = "german")) %>%
left_join(select(pres_prob_pd,language:top)%>%distinct(),by='language') %>%
group_by(language)
#long transform and calc y_pos
combs_long = mutate(combs,y_pos=f(top)) %>% gather(long, probability, X1:X2, factor_key=TRUE) %>% mutate(language=factor(language,levels=c("english","dutch","german"))) %>%
arrange(language,contrast)
#back to wide
combs_wide =combs_long %>% spread(long,probability)
combs_wide$p = rep(c('***',"*","ns"),3)
#plot
p +
geom_point(data=combs_long,
aes(x = language,
color=probability,
shape=probability,
y=y_pos),
inherit.aes = T,
position=pd,
size=2) +
geom_text(data=combs_wide,
aes(x=language,
label=p,
y=y_pos+.025,
group=X1),
color='black',
position=position_dodge(.75),
inherit.aes = F)
What I am failing to achieve is plotting a line connecting each of the contrasts of group within each level of language, as is standard when annotating significant group-wise differences. Any help much appreciated!

Credibility interval with respect two factors using ggplot2 in r

I have problem ploting credibility interval like this:
My data structure is following,L1,L2,M,U1,U2 stand for 0.025quant,0.25quant,0.5quant,0.75quant,0.975quant,respectively.
`
structure(list(approach = structure(c(1L, 2L, 1L, 2L, 1L, 2L), class = "factor", .Label = c("INLA",
"rjags")), param = structure(c(1L, 2L, 3L, 1L, 2L, 3L), class = "factor", .Label = c("alpha",
"beta", "sig2")), L1 = c(0.0844546867936143, 1.79242348175439,
0.163143886545317, 0.0754165380733685, 1.79067991488052, 3.66675821267498
), L2 = c(0.60090835904286, 1.95337968870806, 0.898159977552433,
0.606017177641373, 1.95260448314298, 4.07080184844179), M = c(0.870204161297956,
2.03768437879748, 2.20651061559405, 0.87408237273113, 2.03725552264872,
4.32531027636171), U2 = c(1.13905085248391, 2.12210930874551,
4.26836270504725, 1.66260576926063, 2.28900567640091, 5.10063756831338
), U1 = c(1.65214011950274, 2.28396345192398, 4.9109804477583,
1.1450384685802, 2.12117799328209, 4.55657971279654), AP = structure(c(1L,
4L, 5L, 2L, 3L, 6L), .Label = c("INLA.alpha", "rjags.alpha",
"INLA.beta", "rjags.beta", "INLA.sig2", "rjags.sig2"), class = "factor")), .Names = c("approach",
"param", "L1", "L2", "M", "U2", "U1", "AP"), row.names = c(NA,
-6L), class = "data.frame")`
I referenced this answerenter link description here,but 'fill' seems only work for boxplot case.the code I tried so far is:
CI$AP=interaction(CI$approach,CI$param)
p=ggplot(CI,aes(y=AP))+geom_point(aes(x=M))
p=p+geom_segment(aes(x=L1,xend=U1,y=AP,yend=AP))
p=p+geom_segment(aes(x=L2,xend=U2,y=AP,yend=AP),size=1.5)
It is far away from what I want.
Many thanks!
How about the following:
ggplot(df, aes(x = param, y = M, colour = approach)) +
geom_point(position = position_dodge2(width = 0.3), size = 3) +
geom_linerange(
aes(ymin = L2, ymax = U2, x = param),
position = position_dodge2(width = 0.3),
size = 2) +
geom_linerange(
aes(ymin = L1, ymax = U1, x = param),
position = position_dodge2(width = 0.3),
size = 1) +
coord_flip() +
labs(x = "Parameter", y = "Estimate")
Sample data
df <- structure(list(approach = structure(c(1L, 2L, 1L, 2L, 1L, 2L), class = "factor", .Label = c("INLA",
"rjags")), param = structure(c(1L, 2L, 3L, 1L, 2L, 3L), class = "factor", .Label = c("alpha",
"beta", "sig2")), L1 = c(0.0844546867936143, 1.79242348175439,
0.163143886545317, 0.0754165380733685, 1.79067991488052, 3.66675821267498
), L2 = c(0.60090835904286, 1.95337968870806, 0.898159977552433,
0.606017177641373, 1.95260448314298, 4.07080184844179), M = c(0.870204161297956,
2.03768437879748, 2.20651061559405, 0.87408237273113, 2.03725552264872,
4.32531027636171), U2 = c(1.13905085248391, 2.12210930874551,
4.26836270504725, 1.66260576926063, 2.28900567640091, 5.10063756831338
), U1 = c(1.65214011950274, 2.28396345192398, 4.9109804477583,
1.1450384685802, 2.12117799328209, 4.55657971279654), AP = structure(c(1L,
4L, 5L, 2L, 3L, 6L), .Label = c("INLA.alpha", "rjags.alpha",
"INLA.beta", "rjags.beta", "INLA.sig2", "rjags.sig2"), class = "factor")), .Names = c("approach",
"param", "L1", "L2", "M", "U2", "U1", "AP"), row.names = c(NA,
-6L), class = "data.frame")

asterix R bar ggplot [duplicate]

I tried to make the title self-explanatory, but here goes - data first:
dtf <- structure(list(variable = structure(c(1L, 1L, 2L, 2L, 3L, 3L,
4L, 4L, 5L, 5L), .Label = c("vma", "vla", "ia", "fma", "fla"), class = "factor"),
ustanova = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L), .Label = c("srednja škola", "fakultet"), class = "factor"),
`(all)` = c(42.9542857142857, 38.7803203661327, 37.8996138996139,
33.7672811059908, 29.591439688716, 26.1890660592255, 27.9557692307692,
23.9426605504587, 33.2200772200772, 26.9493087557604)), .Names = c("variable",
"ustanova", "(all)"), row.names = c(NA, 10L), class = c("cast_df",
"data.frame"), idvars = c("variable", "ustanova"), rdimnames = list(
structure(list(variable = structure(c(1L, 1L, 2L, 2L, 3L,
3L, 4L, 4L, 5L, 5L), .Label = c("vma", "vla", "ia", "fma",
"fla"), class = "factor"), ustanova = structure(c(1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("srednja škola",
"fakultet"), class = "factor")), .Names = c("variable", "ustanova"
), row.names = c("vma_srednja škola", "vma_fakultet", "vla_srednja škola",
"vla_fakultet", "ia_srednja škola", "ia_fakultet", "fma_srednja škola",
"fma_fakultet", "fla_srednja škola", "fla_fakultet"), class = "data.frame"),
structure(list(value = structure(1L, .Label = "(all)", class = "factor")), .Names = "value", row.names = "(all)", class = "data.frame")))
And I'd like to create a dodged barplot, do the coord_flip and put some text labels inside the bars:
ggplot(bar) + geom_bar(aes(variable, `(all)`, fill = ustanova), position = "dodge") +
geom_text(aes(variable, `(all)`, label = sprintf("%2.1f", `(all)`)), position = "dodge") +
coord_flip()
you can see output here.
I reckon I'm asking for something trivial. I want the text labels to "follow" stacked bars. Labels are placed correctly on the y-axis, but how to position them correctly on x-axis?
Is this what you want?
library(ggplot2)
ggplot(bar) +
geom_col(aes(variable, `(all)`, fill = ustanova), position = "dodge") +
geom_text(aes(variable, `(all)`, label = sprintf("%2.1f", `(all)`), group = ustanova),
position = position_dodge(width = .9)) +
coord_flip()
The key is to position = position_dodge(width = .9) (where .9 is the default width of the bars) instead of position = "dodge", which is just a shortcut without any parameter. Additionally you have to set the group=ustanova aesthetic in geom_text to dodge the labels by ustanova (A second option would be to make fill = ustanova a global aesthetic via ggplot(bar, aes(fill = ustanova))
In ggplot2_2.0.0 you find several examples in ?geom_text on how to position geom_text on dodged or stacked bars (the code chunk named "# Aligning labels and bars"). The Q&A What is the width argument in position_dodge? provides a more thorough description of the topic.

Position geom_text on dodged barplot

I tried to make the title self-explanatory, but here goes - data first:
dtf <- structure(list(variable = structure(c(1L, 1L, 2L, 2L, 3L, 3L,
4L, 4L, 5L, 5L), .Label = c("vma", "vla", "ia", "fma", "fla"), class = "factor"),
ustanova = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L), .Label = c("srednja škola", "fakultet"), class = "factor"),
`(all)` = c(42.9542857142857, 38.7803203661327, 37.8996138996139,
33.7672811059908, 29.591439688716, 26.1890660592255, 27.9557692307692,
23.9426605504587, 33.2200772200772, 26.9493087557604)), .Names = c("variable",
"ustanova", "(all)"), row.names = c(NA, 10L), class = c("cast_df",
"data.frame"), idvars = c("variable", "ustanova"), rdimnames = list(
structure(list(variable = structure(c(1L, 1L, 2L, 2L, 3L,
3L, 4L, 4L, 5L, 5L), .Label = c("vma", "vla", "ia", "fma",
"fla"), class = "factor"), ustanova = structure(c(1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("srednja škola",
"fakultet"), class = "factor")), .Names = c("variable", "ustanova"
), row.names = c("vma_srednja škola", "vma_fakultet", "vla_srednja škola",
"vla_fakultet", "ia_srednja škola", "ia_fakultet", "fma_srednja škola",
"fma_fakultet", "fla_srednja škola", "fla_fakultet"), class = "data.frame"),
structure(list(value = structure(1L, .Label = "(all)", class = "factor")), .Names = "value", row.names = "(all)", class = "data.frame")))
And I'd like to create a dodged barplot, do the coord_flip and put some text labels inside the bars:
ggplot(bar) + geom_bar(aes(variable, `(all)`, fill = ustanova), position = "dodge") +
geom_text(aes(variable, `(all)`, label = sprintf("%2.1f", `(all)`)), position = "dodge") +
coord_flip()
you can see output here.
I reckon I'm asking for something trivial. I want the text labels to "follow" stacked bars. Labels are placed correctly on the y-axis, but how to position them correctly on x-axis?
Is this what you want?
library(ggplot2)
ggplot(bar) +
geom_col(aes(variable, `(all)`, fill = ustanova), position = "dodge") +
geom_text(aes(variable, `(all)`, label = sprintf("%2.1f", `(all)`), group = ustanova),
position = position_dodge(width = .9)) +
coord_flip()
The key is to position = position_dodge(width = .9) (where .9 is the default width of the bars) instead of position = "dodge", which is just a shortcut without any parameter. Additionally you have to set the group=ustanova aesthetic in geom_text to dodge the labels by ustanova (A second option would be to make fill = ustanova a global aesthetic via ggplot(bar, aes(fill = ustanova))
In ggplot2_2.0.0 you find several examples in ?geom_text on how to position geom_text on dodged or stacked bars (the code chunk named "# Aligning labels and bars"). The Q&A What is the width argument in position_dodge? provides a more thorough description of the topic.

Resources