Adding p value on top of grouped bar plot - r

This is my data which I'm trying to plot
dput(results)
structure(list(ontology = c("CC", "BP", "MF", "CC", "BP", "MF",
"CC", "BP", "MF"), breadth = structure(c(3L, 3L, 3L, 2L, 2L,
2L, 1L, 1L, 1L), .Label = c("10", "30", "100"), class = "factor"),
enrichment = c(4.09685904270847, 8.04193317540539, 5.5801230522415,
4.52127958016442, 8.9221766387218, 5.68189764335457, 4.25046722366786,
9.49038239297713, 6.75423163834793), p = c(0, 0, 0, 0, 0,
0, 2.09057402562873e-221, 0, 0)), class = "data.frame", row.names = c(NA,
-9L))
My code
results = read.delim("data/GO/LC-GO-enrichment_new.txt") %>%
mutate(breadth = factor(breadth))
p = ggplot(results, aes(x = breadth, y = enrichment, fill = ontology,
color = ontology)) +
geom_col(position = 'dodge', width = 0.8) +
labs(x = "Breadth", y = "Odds ratio") +
scale_fill_manual(values = ryb8[c(1, 5, 8)], name = "Ontology") +
scale_color_manual(values = darken(ryb8[c(1, 5, 8)], 1.3),
name = "Ontology") +
scale_y_log10(expand = c(0.01, 0)) +
sci_theme
p
I get something like this
is there a way the pvalue can be added similar to this
or its done post making the figure manually .
Any help or suggestion would be really helpfu;

You could simply add the p values as a text layer. Note though, that in your data, each bar has a p value, so it's not clear where the groupwise p values are coming from.
library(ggplot2)
ggplot(results, aes(x = breadth, y = enrichment, fill = ontology)) +
geom_col(position = 'dodge', width = 0.8,
aes(color = after_scale(colorspace::darken(fill, 1.3)))) +
geom_text(aes(label = paste("p", scales::pvalue(p)), group = ontology),
vjust = -1, position = position_dodge(width = 0.8)) +
labs(x = "Breadth", y = "Odds ratio", fill = "Ontology") +
scale_fill_manual(values = c("#d63228", "#dff2f8", "#4575b5")) +
scale_y_log10(expand = c(0.05, 0)) +
theme_classic(base_size = 16) +
theme(legend.position = "top")

Related

How can I center geom_label_repel labels so that they are in the middle of each bar?

I'm using geom_label_repel to place percentages in a faceted horizontal bar chart, which mostly works great, except that I can't seem to find a way to shift the labels to the middle (or as close as possible) of each bar. Instead, they are justified right, which visually is a bit confusing as some bars are close to one another.
I've tried using h_just and v_just, but these just center the text within its box rather than the box within the plot. If I drop position_identity() in favour of nudge_x the bars all drop out, so that doesn't get me there. Assuming there is some simple way to do this, but I'm not seeing it!
Here's the function I've made to do the plot:
plot_single_result_with_facets <- function(x) {
x %>%
# we need to get the data including facet info in long format, so we use pivot_longer()
pivot_longer(!response, names_to = "bin_name", values_to = "b") %>%
# add counts for plot below
count(response, bin_name, b) %>%
# remove nas
filter(!is.na(response)) %>%
# add grouping by bins
group_by(bin_name,b) %>%
# calculate percentages
mutate(perc=paste0(round(n*100/sum(n),0),"%")) %>%
# run ggplot
ggplot(aes(x = n, y = "", fill = response, label = perc)) +
# reversing order here using forcats::fct_rev() note - needs to be changed under geom_label_repel as well
geom_col(position=position_fill(), aes(fill=forcats::fct_rev(response))) +
coord_cartesian(clip = "off") +
geom_vline(xintercept = x_limits, linetype = 3) +
geom_label_repel(
# important to make sure grouping of data matches grouping of labels so they aren't backwards
# reversing order here using forcats::fct_rev() note - needs to be changed above as well
aes(group = forcats::fct_rev(response), label = perc),
# justify text using center = 0.5, left = 0 and right = 1
hjust = 0.5,
vjust = 0.5,
direction = "y",
force = 1.5,
fill = "white",
# font size in the text labels
size = 1.5,
# allow labels to overlap
max.overlaps = Inf,
# make sure that bars are included
position = position_fill(),
# hide points
segment.size = 0.2,
point.size = NA,
# reduce padding around each text label
box.padding = 0.001
) +
scale_fill_brewer(palette="YlOrBr", direction = -1) +
scale_x_continuous(labels = scales::percent_format(), expand = c(0.05, 0.05)) +
facet_grid(vars(b), vars(bin_name), labeller=as_labeller(facet_names)) +
labs(title = title, caption = caption, x = "", y = "") +
guides(fill = guide_legend(title = NULL)) +
theme_classic()
}
And the code that uses it:
caption <- NULL
df <- select(data, Q51_bin, Q52_bin, Q57_bin, Q53_bin, Q4)
df <- as_factor(df)
names(df) <- c("Q51_bin", "Q52_bin", "Q57_bin", "Q53_bin", "response")
facet_names <- c(`Q51_bin` = "Nature Relatedness", `Q52_bin` = "Spirituality", `Q57_bin` = "Religiosity", `Q53_bin` = "Politics L/R", `low`="low", `medium`="medium", `high`="high")
facet_labeller <- function(variable,value){return(facet_names[value])}
x_limits <- c(.50, NA)
facet_grid(~fct_relevel(df,'Nature Relatedness','Spirituality','Religiosity','Politics L/R'))
plot_single_result_with_facets(df)
ggsave("figures/q5_faceted.png", width = 20, height = 10, units = "cm")
Here's the plot as it currently stands:
And a bit of data to make it reproducible:
df <- structure(list(Q51_bin = structure(c(2L, 2L, 2L, 2L), levels = c("low", "medium", "high"), class = "factor"), Q52_bin = structure(c(3L, 2L, 2L, 2L), levels = c("low", "medium", "high"), class = "factor"), Q57_bin = structure(c(2L, 2L, 2L, 2L), levels = c("low", "medium", "high"), class = "factor"), Q53_bin = structure(c(2L, 3L, 2L, 2L), levels = c("low", "medium", "high"), class = "factor"), Q4 = structure(c(2, 3, 3, 5), label = "How much have you thought about climate change before today?", format.spss = "F40.0", display_width = 5L, labels = c(`Not at all` = 1, `A little` = 2, Some = 3, `A lot` = 4, `A great deal` = 5, `Don't know` = 99), class = c("haven_labelled", "vctrs_vctr", "double"))), class = c("rowwise_df", "tbl_df", "tbl", "data.frame"), row.names = c(NA, -4L), groups = structure(list(.rows = structure(list(1L, 2L, 3L, 4L), ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr", "list"))), row.names = c(NA, -4L), class = c("tbl_df", "tbl", "data.frame")))
You need to put vjust = 0.5 inside position_fill:
x %>%
pivot_longer(!response, names_to = "bin_name", values_to = "b") %>%
count(response, bin_name, b) %>%
filter(!is.na(response)) %>%
group_by(bin_name,b) %>%
mutate(perc=paste0(round(n*100/sum(n),0),"%")) %>%
ggplot(aes(x = n, y = "", fill = response, label = perc)) +
geom_col(position=position_fill(), aes(fill=forcats::fct_rev(response))) +
coord_cartesian(clip = "off") +
geom_vline(xintercept = x_limits, linetype = 3) +
geom_label_repel(
aes(group = forcats::fct_rev(response), label = perc),
hjust = 0.5,
vjust = 0.5,
direction = "y",
force = 1.5,
fill = "white",
size = 1.5,
max.overlaps = Inf,
position = position_fill(vjust = 0.5),
segment.size = 0.2,
point.size = NA,
box.padding = 0.001
) +
scale_fill_brewer(palette="YlOrBr", direction = -1) +
scale_x_continuous(labels = scales::percent_format(), expand = c(0.05, 0.05)) +
facet_grid(vars(b), vars(bin_name), labeller=as_labeller(facet_names)) +
labs(title = 'title', caption = caption, x = "", y = "") +
guides(fill = guide_legend(title = NULL)) +
theme_classic()

geom_text - make text black for plot grouped by color

I have a graph grouped by color with values in text above each point. However, I'd like the writing in black as it's difficult to read.
Could I get some help changing the color of the text from geom_text() to black without losing the positioning?
Adding color = "black" to geom_text() messes up the position of the text, but I'm not sure why...
My data:
structure(list(type = c("full", "full", "full", "noadiposity",
"noadiposity", "noadiposity", "nocv", "nocv", "nocv", "nocv2",
"nocv2", "nocv2", "noenergy", "noenergy", "noenergy", "noenergy2",
"noenergy2", "noenergy2"), fi.cat = structure(c(1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("Non-frail",
"Pre-frail", "Frail"), class = "factor"), mean = c(0.0566154812663495,
0.150817937965167, 0.285714285714286, 0.0459153181095795, 0.148380746409361,
0.292192760942761, 0.0550705669171458, 0.147270820014587, 0.288461538461538,
0.0530093023576546, 0.145279762712841, 0.292717236467236, 0.0531040684693624,
0.146793227463497, 0.292499719195777, 0.054311319499867, 0.14824350913368,
0.283745781777278), sd = c(0.0289882935363143, 0.0342654979144937,
0.0393662413936823, 0.0298601819635622, 0.0345078387756546, 0.0422635938212309,
0.0285280200524055, 0.0338893364029561, 0.0430877768970245, 0.0275365612798787,
0.0358119253511248, 0.0415426999110631, 0.0270394224053038, 0.0374836297491701,
0.0384867847822804, 0.0280882098015465, 0.0353023978795509, 0.039235018559239
)), row.names = c(NA, -18L), groups = structure(list(type = c("full",
"noadiposity", "nocv", "nocv2", "noenergy", "noenergy2"), .rows = structure(list(
1:3, 4:6, 7:9, 10:12, 13:15, 16:18), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
The code I used:
library(ggplot2)
ggplot(grouped_mean, aes(x = fi.cat, y = mean, color = type)) +
geom_point(position = position_dodge(0.9), size = 2) +
geom_errorbar(aes(ymin = mean-sd, ymax = mean+sd), position = position_dodge(0.9), size = 1, width = 0.2) +
geom_text(aes(label = round(mean, 2)), vjust = -5.5, position = position_dodge(0.9), size = 3) +
labs(x = "FI category", y = "Mean FI score", color = "FI type") +
scale_color_brewer(palette = "Blues") +
theme_minimal()
Using the colour for the points and error bars automatically splits them into groups. If you're assigning the colour manually, you need to specify the groups in the geom_text() i.e:
ggplot(grouped_mean, aes(x = fi.cat, y = mean, color = type)) +
geom_point(position = position_dodge(0.9), size = 2) +
geom_errorbar(aes(ymin = mean-sd, ymax = mean+sd), position = position_dodge(0.9), size = 1, width = 0.2) +
# Add grouping and manual colour to geom_text
geom_text(aes(label = round(mean, 2), group=type), colour="black", vjust = -5.5, position = position_dodge(0.9), size = 3) +
labs(x = "FI category", y = "Mean FI score", color = "FI type") +
scale_color_brewer(palette = "Blues") +
theme_minimal()
Here is an approach: Not the best one, but it seems to work!
This approach uses fill aesthetics and assigns the color aesthetics separately to geom_text aesthetics:
library(ggplot2)
ggplot(grouped_mean, aes(x = fi.cat, y = mean, fill=type)) +
geom_point(position = position_dodge(0.9), size = 2) +
geom_errorbar(aes(ymin = mean-sd, ymax = mean+sd), position = position_dodge(0.9), size = 1, width = 0.2) +
geom_text(aes(label = round(mean, 2)), vjust =-11.5, position = position_dodge(0.9), size = 3) +
labs(x = "FI category", y = "Mean FI score", color = "FI type") +
geom_point(aes(color=type), position = position_dodge(0.9), size = 2) +
geom_errorbar(aes(color=type, ymin = mean-sd, ymax = mean+sd), position = position_dodge(0.9), size = 1, width = 0.2) +
scale_color_brewer(palette = "Blues") +
theme_minimal()+
guides(fill = "none")

adjusting position of text above an error bar in ggplot

I have the following data frame:
df <- structure(list(Gender = c("M", "M", "M", "M", "F", "F", "F",
"F"), HGGroup = structure(c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L), .Label =
c("Low: \n F: <11.5, M: <12.5",
"Medium: \n F: > 11.5 & < 13, M: >12.5 & < 14.5", "High: \n F: >= 13, M >=
14.5", "No data"), class = "factor"), MeanBlood = c(0.240740740740741,
1.20689655172414, 0.38150289017341, 0.265957446808511, 0.272727272727273,
1.07821229050279, 0.257309941520468, 0.288776796973518), SEBlood =
c(0.0694516553311722, 0.154646785911315, 0.0687932999815165,
0.0383529942166715, 0.0406072582435844, 0.0971802933392401,
0.0327856332532931, 0.0289636037703526),
N = c(108L, 116L, 173L, 376L, 319L, 179L, 342L, 793L)), row.names = c(NA,
-8L), class = c("tbl_df", "tbl", "data.frame"))
I have the following command for plotting the means and confidence intervals for each group:
ggplot(df, aes(x = Gender, y = MeanBlood, colour = Gender)) +
geom_errorbar(aes(ymin = MeanBlood - SEBlood*qnorm(0.975), ymax = MeanBlood
+ SEBlood*qnorm(0.975)), width = 0.3, stat = "identity") +
geom_point(size = 3) + facet_grid(~HGGroup) + theme(legend.position =
"none") +
geom_text(aes(label = N, x = Gender), vjust = -5)
I am trying to get the text exactly on top of the error bar, but it needs to be in a different location for each group and currently comes out weird.
I think the problem originates from the fact that the confidence interval has a different length for each group, so that a constant justification would not work - it has to be relative to the lower quartile.
Any suggestions?
This seems to work, the y of your label, as you want it, is not the y set in the aes of ggplot, but is ymax:
ggplot(df, aes(x = Gender, y = MeanBlood, colour = Gender)) +
geom_errorbar(aes(ymin = MeanBlood - SEBlood*qnorm(0.975), ymax = MeanBlood
+ SEBlood*qnorm(0.975)), width = 0.3, stat = "identity") +
geom_point(size = 3) + facet_grid(~HGGroup) + theme(legend.position =
"none") +
geom_text(aes(y = MeanBlood + SEBlood*qnorm(0.975), label = N, x = Gender), vjust = -1)
If you move ymax to the ggplot call other layers will be able to access it so no need to redefine it:
ggplot(df, aes(x = Gender, y = MeanBlood, colour = Gender,
ymin = MeanBlood - SEBlood*qnorm(0.975), ymax = MeanBlood
+ SEBlood*qnorm(0.975))) +
geom_errorbar(aes(width = 0.3), stat = "identity") +
geom_point(size = 3) + facet_grid(~HGGroup) + theme(legend.position =
"none") +
geom_text(aes(y = stat(ymax), label = N, x = Gender), vjust = -1)

How to change the text in the colour legend of ggplot2

I have this code:
ggplot(databoth, aes(withxstep)) +
geom_point(aes(y = withnassoc, colour = "withnassoc"), size = 2.8) +
geom_point(aes(y = withoutnassoc, colour = "withoutnassoc"), size = 1 ) +
labs(colour = "Legend") +
labs(x = "Time") +
labs(y = "N associations")
How do I modify the withnassoc and the withoutnassoc? I would like it to be "With Activities" and "Without activities".
This should answer your question:
ggplot(databoth, aes(withxstep)) +
geom_point(aes(y = withnassoc, colour = "withnassoc"), size = 2.8) +
geom_point(aes(y = withoutnassoc, colour = "withoutnassoc"), size = 1 ) +
labs(colour = "Legend", x = "Time", y = "N associations") +
scale_color_manual(values = c("red", "blue"),
labels = c("With Activities", "Without activities"))
For this example data-set:
exampledata <- structure(list(withxstep = structure(c(4L, 3L, 2L, 1L),
.Label = c("2017-06-27", "2017-06-28", "2017-06-29", "2017-06-30"), class = "factor"),
withnassoc = c(1, 2, 3, 4), withoutnassoc = c(5, 6, 7, 8)), .Names = c("withxstep",
"withnassoc", "withoutnassoc"), class = "data.frame", row.names = c(NA,-4L))
This would be the plot:

ggplot jitter geom_errorbar?

My data looks something like this:
df1 <-
structure(
list(
y = c(-0.19, 0.3,-0.05, 0.15,-0.05, 0.15),
lb = c(-0.61,
0.1,-0.19,-0.06,-0.19,-0.06),
ub = c(0.22, 0.51, 0.09, 0.36,
0.09, 0.36),
x = structure(
c(1L, 2L, 1L, 2L, 1L, 2L),
.Label = c("X1",
"X2"),
class = "factor"
),
Group = c("A", "A", "B", "B", "C",
"C")
),
.Names = c("y", "lb", "ub", "x", "Group"),
row.names = c(NA,-6L),
class = "data.frame"
)
I want to use ggplot2 to plotthe points x,y colored by group with error bars lb, ub. Because x is discrete, I want to jitter so the points and bars don't overlap. Right now, I can jitter the points but not the lines. Additionally, I would like to have the order of the point to be A,B,C
ggplot(data = df1, aes(x, y, color = Group)) + geom_point(size = 4, position = "jitter") +
geom_errorbar(
aes(ymin = lb, ymax = ub),
width = 0.1,
linetype = "dotted"
) +
geom_hline(aes(yintercept = 0), linetype = "dashed") + theme_bw()
You can use position_dodge to achieve both the desired order and the error bars being drawn at the location of the points
ggplot(data = df1, aes(x, y, color = Group)) +
geom_point(size = 4, position=position_dodge(width=0.5)) +
geom_errorbar(
aes(ymin = lb, ymax = ub),
width = 0.1,
linetype = "dotted",
position=position_dodge(width=0.5)) +
geom_hline(aes(yintercept = 0), linetype = "dashed") +
theme_bw()
If you want jitter, I do like this:
ggplot(data = df1, aes(x, y, color = Group)) +
geom_pointrange(aes(ymin = lb, ymax = ub),
position=position_jitter(width=0.5),
linetype='dotted') +
theme_bw()

Resources