How to add labels to a stacked bar graph - r

I am looking to add numerical values to the middle of each stack in the stacked bar graph (code below). Most of the examples I am finding are orientated towards information in one column and whenever I try to modify it, I run into errors about length requirements.
DA <- data.frame(
Imp=c("2015","2019"),
"mismatch"=c(220,209),
"match"=c(3465,3347),
"NA"=c(501,630),
check.names = FALSE)
DA %>%
pivot_longer(-Imp) %>%
ggplot(aes(x = Imp, y = value, fill = name)) + geom_col(position = "stack") +
scale_fill_manual(name=" ", values=c("aquamarine4", "orange", "coral")) +
theme_light() +
theme(legend.position = "bottom") +
scale_y_continuous(expand = c(0,0)) +
geom_text(aes(x=1, y=4300, label="Stretch it"), vjust=-1) +
labs(y="Count", x="Imputed Genotypes") +
geom_bar(stat = "identity", color="white", width = 1)

Like this?
library(tidyverse)
DA <- data.frame(
Imp=c("2015","2019"),
"mismatch"=c(220,209),
"match"=c(3465,3347),
"NA"=c(501,630),
check.names = FALSE)
DA %>%
pivot_longer(-Imp) %>%
ggplot(aes(x = Imp, y = value, fill = name)) +
geom_col(color = "white", lwd = 1,
position = "stack", width = 0.75) +
scale_fill_manual(name="", values=c("aquamarine4", "orange", "coral")) +
scale_y_continuous(expand = c(0,0),
limits = c(0, 4200)) +
labs(y="Imputed Genotypes (Count)") +
geom_text(aes(label = value), color = "white", size = 5,
position = position_stack(vjust = 0.5),
show.legend = FALSE) +
theme_light(base_size = 18) +
theme(legend.position = "right",
axis.title.x = element_blank())
Created on 2021-12-19 by the reprex package (v2.0.1)

Related

How to make different patterns and colors in different category of data using ggplot in R?

I would like to make my data have different colors for species and different patterns for sex. However, I can only set to make it different colors according to the sex. Here is my data,
data
This is how I run my script,
#making bar plot
library(readr)
library(ggplot2)
# loading and checking the data
data_summary <- read_csv("trial.csv")
print(data_summary)
# coloured barplot
ggplot(data_summary, aes(x = factor(species), y = mean, fill = sex)) +
geom_bar(stat = "identity", position = "dodge", show.legend = FALSE) +
geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd), position = position_dodge(0.9), width = 0.2, show.legend = FALSE) +
labs(x="", y="") + theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(legend.position = c(0.1, 0.75)) + ylim(0, 80) +
scale_fill_manual(values=c("#870A30","#D3D3D3"))
This can be done using fill = interaction(..,..):
library(ggplot2)
ggplot(data_summary, aes(x = factor(species), y = mean, fill = interaction(species,sex))) +
geom_bar(stat = "identity", position = "dodge", show.legend = FALSE) +
geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd), position = position_dodge(0.9), width = 0.2, show.legend = FALSE) +
labs(x="", y="") +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(legend.position = c(0.1, 0.75)) + ylim(0, 80) +
scale_fill_manual(values= c("#870A30", '#009E73', '#CC79A7', "#D3D3D3"))
An option could be using ggplot_build and add a vector of four colors (you change this to what you want) to the fill column of the bars layer like this:
library(ggplot2)
p <- ggplot(data_summary, aes(x = factor(species), y = mean, fill = sex)) +
geom_bar(stat = "identity", position = "dodge", show.legend = FALSE) +
geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd), position = position_dodge(0.9), width = 0.2, show.legend = FALSE) +
labs(x="", y="") + theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(legend.position = c(0.1, 0.75)) + ylim(0, 80) +
scale_fill_manual(values=c("#870A30","#D3D3D3"))
q <- ggplot_build(p)
q$data[[1]]$fill <- c("#870A30","#D3D3D3", '#009E73', '#CC79A7')
q <- ggplot_gtable(q)
plot(q)
Created on 2023-01-02 with reprex v2.0.2
You can use ggpattern to get different patterns per sex and different colors per species:
library(ggplot2)
library(ggpattern)
ggplot(data_summary, aes(x = species, y = mean, fill = species, group = sex)) +
geom_col_pattern(position = "dodge", aes(pattern = sex),
pattern_fill = "white", pattern_color = "white",
pattern_angle = 45, show.legend = FALSE) +
geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd), position = position_dodge(0.9),
width = 0.2, show.legend = FALSE) +
labs(x = NULL, y = NULL) +
theme_classic() +
theme(panel.border = element_rect(linewidth = 0.5, fill = NA)) +
ylim(0, 80) +
scale_fill_manual(values = c("#870A30" ,"#D3D3D3"))
There is a nice package called ggpattern which offers hatching for geoms. Unfortunately it is not available for the R version I am using.
But I would like to offer different alpha values for the fill color.
The alpha itself can defined like scale_alpha_manual(values = c(.5,1)).
library(ggplot2)
data_summary <- read.table(text = "
species,sex,mean,sd,tukey
species_a,female,67,4.17,a
species_b,male,62.2,4.8,a
species_b,female,61.3,6.43,a
species_a,male,49.7,16.2,a
", header = T, sep = ','
)
# coloured barplot
ggplot(data_summary, aes(x = factor(species), y = mean, fill = sex, alpha = species)) +
geom_bar(stat = "identity", position = "dodge", show.legend = FALSE) +
geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd), position = position_dodge(0.9), width = 0.2, show.legend = FALSE) +
labs(x="", y="") + theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(legend.position = c(0.1, 0.75)) + ylim(0, 80) +
scale_fill_manual(values=c("#870A30","#D3D3D3")) +
scale_alpha_manual(values = c(.5,1))

Replace x axis labels with colored bars to get a gradient effect in R

I'm building a barplot with RNA reads % in ggplot, I did this:
ggplot(tipos_exo,aes(x = reorder(sample, -value),y = value,fill = variable)) +
geom_bar( stat = "identity")
I need to replace the x axis labels with colored bars, each sample belongs to a specific batch and I looking for this effect:
Any thoughts?
One option to achieve your desired result would be to create your axis colorbar as a second plot and glue it to the main plot via the patchwork package.
For the colorbar I use geom_tile and remove all non-data ink using theme_void. As a first step I reorder your sample column by value and get rid of the duplicated sample categories using dplyr::distinct.
Using some fake random example data:
set.seed(123)
tipos_exo <- data.frame(
sample = rep(letters, each = 2),
variable = c("tablaq_readsPerc", "tablaq_shortReadsPerc"),
value = runif(52, 0, 100),
batch = rep(LETTERS, each = 2)
)
library(ggplot2)
library(patchwork)
library(dplyr, warn = FALSE)
p1 <- ggplot(tipos_exo,aes(x = reorder(sample, -value),y = value,fill = variable)) +
geom_bar( stat = "identity") +
scale_y_continuous(expand = c(0, 0)) +
labs(x = NULL) +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.ticks.length.x = unit(0, "pt"))
tipos_exo1 <- tipos_exo |>
mutate(sample = reorder(sample, -value)) |>
distinct(sample, batch)
p_axis <- ggplot(tipos_exo1, aes(x = sample, y = factor(1), fill = batch)) +
geom_tile(width = .9) +
geom_text(aes(label = sample)) +
theme_void() +
theme(axis.title.x = element_text()) +
labs(x = "Batch Annotation") +
guides(fill = "none")
p1 / p_axis + plot_layout(heights = c(8, 1))
UPDATE Adapting my answer on this post Reorder Bars of a Stacked Barchart in R you could reorder your sample column by a helper value "column", e.g. if you want to reorder by "tablaq_readsPerc" you set the values for the other categories to zero and use FUN=sum. Note that I also reversed the order of the stack so that the "tablaq_readsPerc" bars are placed at the bottom.
tipos_exo <- tipos_exo |>
mutate(sample1 = reorder(sample, -ifelse(!variable %in% "tablaq_readsPerc", 0, value), FUN = sum))
p1 <- ggplot(tipos_exo,aes(x = sample1, y = value, fill = variable)) +
geom_bar( stat = "identity", position = position_stack(reverse = TRUE)) +
scale_y_continuous(expand = c(0, 0)) +
labs(x = NULL) +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.ticks.length.x = unit(0, "pt"))
tipos_exo1 <- tipos_exo |>
distinct(sample, sample1, batch)
p_axis <- ggplot(tipos_exo1, aes(x = sample1, y = factor(1), fill = batch)) +
geom_tile(width = .9) +
geom_text(aes(label = sample)) +
theme_void() +
theme(axis.title.x = element_text()) +
labs(x = "Batch Annotation") +
guides(fill = "none")
p1 / p_axis + plot_layout(heights = c(8, 1))

Creating ggplot geom_point() with position dodge 's-shape'

I am trying to create a plot like the one below. I'd like the order the points in each category in such a way that they form an s-shape. Is it possible to do this in ggplot?
Similar data available here
What I have so far:
somatic.variants <- read.delim("data/Lawrence.S2.txt", stringsAsFactors=T)
cancer_rates <- tapply(somatic.variants$logn_coding_mutations, somatic.variants$tumor_type, median)
cancer_rates <- cancer_rates[order(cancer_rates, decreasing=F)]
somatic.variants$tumor_type <- factor(somatic.variants$tumor_type, levels = names(cancer_rates))
library(ggplot2)
library(GGally)
ggplot(data = somatic.variants,
mapping = aes(x = tumor_type,
y = log10(n_coding_mutations))) +
geom_point(position = position_dodge2()) +
scale_x_discrete(position = "top") +
scale_y_continuous(labels = c(0,10,100,1000,10000), expand = c(0,0)) +
geom_stripped_cols() +
theme_bw() +
theme(axis.title.x = element_blank(),
axis.text.x = element_text(angle = 315, hjust = 1, size = 12),
panel.grid = element_blank()) +
labs(y = "Coding mutations count") +
stat_summary(fun = median,
geom="crossbar",
size = 0.25,
width = 0.9,
group = 1,
show.legend = FALSE,
color = "#FF0000")
This could be achieved by
grouping the data by x-axis categories
arranging by the y-axis value
which ensures that the points are plotted in ascending order of the values for each category.
somatic.variants <- read.delim("https://gist.githubusercontent.com/wudustan/57deecdaefa035c1ecabf930afde295a/raw/1594d51a1e3b52f674ff746caace3231fd31910a/Lawrence.S2.txt", stringsAsFactors=T)
cancer_rates <- tapply(somatic.variants$logn_coding_mutations, somatic.variants$tumor_type, median)
cancer_rates <- cancer_rates[order(cancer_rates, decreasing=F)]
somatic.variants$tumor_type <- factor(somatic.variants$tumor_type, levels = names(cancer_rates))
library(ggplot2)
library(GGally)
library(dplyr)
somatic.variants <- somatic.variants %>%
group_by(tumor_type) %>%
arrange(n_coding_mutations)
ggplot(data = somatic.variants,
mapping = aes(x = tumor_type,
y = log10(n_coding_mutations))) +
geom_point(position = position_dodge2(.9), size = .25) +
scale_x_discrete(position = "top") +
scale_y_continuous(labels = c(0,10,100,1000,10000), expand = c(0,0)) +
geom_stripped_cols() +
theme_bw() +
theme(axis.title.x = element_blank(),
axis.text.x = element_text(angle = 315, hjust = 1, size = 12),
panel.grid = element_blank()) +
labs(y = "Coding mutations count") +
stat_summary(fun = median,
geom="crossbar",
size = 0.25,
width = 0.9,
group = 1,
show.legend = FALSE,
color = "#FF0000")
#> Warning: Removed 29 rows containing non-finite values (stat_summary).

Overlapping text on top of geom_bar in ggplot2

I have made a barplot similar to the one below using ggplot2.
I cannot get the percentages on top of the bars to be centered and not overlapping of other bars and numbers. Sample code is below.
library(tidyverse)
cat1=c("cat1","cat1","cat1","cat1","cat1","cat1","cat1","cat1","cat1","cat1","cat1","cat1",
"cat2","cat2","cat2","cat2","cat2","cat2","cat2","cat2","cat2","cat2","cat2","cat2",
"cat3","cat3","cat3","cat3","cat3","cat3","cat3","cat3","cat3","cat3","cat3","cat3",
"cat4","cat4","cat4","cat4","cat4","cat4","cat4","cat4","cat4","cat4","cat4","cat4")
cat2=c("c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12",
"c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12",
"c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12",
"c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12")
count1=round(rnorm(48,10))
fakeperc=rnorm(48,9)
df1=cbind(count1,fakeperc)
df2=cbind(cat1,cat2)
finaldf=as.data.frame(cbind(df1,df2))
finaldf$cat1=as.factor(finaldf$cat1)
finaldf$fakeperc=as.numeric(finaldf$fakeperc)
#finaldf$cat1=factor(finaldf$cat1,levels = c("cat1","cat2","cat3","cat4"))
finaldf$cat2 = factor(finaldf$cat2,
levels = c("c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12"))
a=ggplot(data=finaldf,aes(x=cat1, y=count1,
fill=cat2,group=cat2)) +
geom_bar(stat='identity',color='black',width=.65,position=position_dodge(width=.9))+
scale_y_discrete(limits=0:50,breaks=c(0,10,20,30,40,50))+
scale_fill_brewer(palette="Set3") +
theme_classic() +
geom_text(data = finaldf,
aes(x=cat1,y=count1,group=cat2,
label=format(paste(round(fakeperc),"%",sep = ""))),inherit.aes = F,
color='black',position=position_dodge(.9),vjust=-.5,size=3)
a
When trying to add either nudge_y or nudge_x to the geom_text call, nothing happens. I suspect this is because there is already a position_dodge call. I am open any and all solutions to make these percentages non-overlapping and legible.
What do you think of this?
# I think you meant count1 to be numeric
finaldf$count1 <- as.numeric(finaldf$count1)
ggplot(data = finaldf,
aes(x = cat1,
y = count1,
fill = cat2,
group = cat2)) +
geom_col(color = 'black',
width = 0.65,
position = position_dodge(width = 0.9)) +
geom_text(data = finaldf,
aes(x = cat1,
y = count1,
group = cat2,
label = scales::percent(fakeperc/100, accuracy = 0.01)),
inherit.aes = FALSE,
color = 'black',
position = position_dodge(0.9),
hjust = -0.1,
size = 3) +
scale_y_continuous(limits = c(0,50), breaks = c(0,10,20,30,40,50)) +
scale_fill_brewer(palette = "Set3") +
theme_classic() +
coord_flip()
I cleaned up a bit the code (according to my taste)
I changed scale_y_numeric to scale_y_continuous (since count1 should be numeric)
I used coord_flip() to make it more readable
I used scales::percent to write percentage numbers
(don't know why you set up limits from 0 to 50 but I left them as I suppposed they were intended)
If you don't want to use coor_flip:
finaldf$count1 <- as.numeric(finaldf$count1)
ggplot(data = finaldf,
aes(x = cat1,
y = count1,
fill = cat2,
group = cat2)) +
geom_col(color = 'black',
width = 0.65,
position = position_dodge(width = 0.9)) +
geom_text(data = finaldf,
aes(x = cat1,
y = count1,
group = cat2,
label = scales::percent(fakeperc/100, accuracy = 0.01)),
inherit.aes = FALSE,
color = 'black',
position = position_dodge(0.9),
hjust = -0.1,
angle = 90,
size = 3) +
scale_y_continuous(limits = c(0,50), breaks = c(0,10,20,30,40,50)) +
scale_fill_brewer(palette = "Set3") +
theme_classic()
Is this what you are looking for:
library(ggplot2)
#Code
ggplot(data=finaldf,aes(x=cat2, y=count1,
fill=cat2,group=cat2)) +
geom_bar(stat='identity',color='black',
position=position_dodge(width=1))+
scale_fill_brewer(palette="Set3") +
theme_bw() +
geom_text(aes(x=cat2,y=count1,group=cat2,
label=format(paste(round(fakeperc),"%",sep = ""))),inherit.aes = F,
color='black',position=position_dodge(1),
size=3,vjust=-0.5)+
facet_wrap(.~cat1,scales = 'free_x',nrow = 1,strip.position = 'bottom')+
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.position = 'top',
strip.background = element_blank(),
panel.spacing = unit(2, "lines"),
panel.grid = element_blank())+
guides(fill = guide_legend(nrow = 1))
Output:

Avoid applying alpha aesthetic to geom_text in ggplot2

I have the following ggplot2 chart. I don't want transparency on the value labels.
Code:
ggplot(test, aes(x = reorder(org, -as.numeric(level)), y = obsAvg, fill = level, alpha = round)) +
geom_bar(stat = "identity", position = "dodge") +
scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73")) +
scale_alpha_manual(values = c(.5, .75, 1), guide = FALSE) +
labs(title = "Average Observation Score by Round", y = "", x = "", fill = "Group") +
theme_bw() +
geom_text(aes(label = round(obsAvg,1)), vjust = -.5, size = 4, fontface="bold", position = position_dodge(width = .9)) +
scale_y_continuous(limits = c(0,4), expand = c(0,0)) +
theme(legend.position="bottom")
Data:
set.seed(1)
test <- data.frame(
org = rep(c("Mammals", "Cats", "Tigers", "Lions", "Cheetahs"), 3),
level = rep(c("Animals", "Family", rep("Species", 3)), 3),
group = rep("Cats",15),
round = rep(c("Round1", "Round2", "Round3"),5),
obsAvg = runif(15, 1, 4)
)
I have tried moving the alpha = round to be an aesthetic of geom_bar() but then I lose the dodge of the labels:
How can I replicate the top chart but not apply the transparency aesthetic to my labels?
I would move the aes(alpha=) to geom_bar and then add an aes(group=) to geom_text to recover the dodging.
ggplot(test, aes(x = reorder(org, -as.numeric(level)), y = obsAvg, fill = level)) +
geom_bar(aes(alpha=round), stat = "identity", position = "dodge") +
scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73")) +
scale_alpha_manual(values = c(.5, .75, 1), guide = FALSE) +
labs(title = "Average Observation Score by Round", y = "", x = "", fill = "Group") +
theme_bw() +
geom_text(aes(label = round(obsAvg,1), group=round), vjust = -.5, size = 4, fontface="bold", position = position_dodge(width = .9)) +
scale_y_continuous(limits = c(0,4), expand = c(0,0)) +
theme(legend.position="bottom")
That's a pretty plot.

Resources