Labeling stack bar chart with percentage - r

I am trying to label the stack bar chart with percentage and I ended labeling with proportion. Here are my codes:
ggplot(data = GradeSTEM_data, aes(x = Grade, y = percent, fill = STEMFlag, label = sprintf("%.02f", percent))) +
geom_bar(position = "fill", stat = "identity") +
scale_y_continuous(labels = scales::label_percent(accuracy = 1)) +
geom_text(position = position_stack(vjust = 0.5), size = 2)

Here is a potential solution:
# Load libraries
library(tidyverse)
# Create 'fake' data (minimal reproducible example)
stem_data <- data.frame(Grade = rep(c("A", "B", "C", "P", "NP"), 2),
STEMflag = factor(x = c(rep("STEM", 5), rep("NONSTEM", 5)),
levels = c("STEM", "NONSTEM")),
percent = c(0.95, 0.93, 0.90, 0.67, 0.86,
0.05, 0.07, 0.10, 0.33, 0.14))
head(stem_data)
#> Grade STEMflag percent
#> 1 A STEM 0.95
#> 2 B STEM 0.93
#> 3 C STEM 0.90
#> 4 P STEM 0.67
#> 5 NP STEM 0.86
#> 6 A NONSTEM 0.05
# Plot the example data
ggplot(data = stem_data, aes(x = Grade, y = percent, fill = STEMflag,
label = paste(percent * 100, "%", sep = ""))) +
geom_bar(position = "fill", stat = "identity") +
scale_y_continuous(labels = scales::label_percent(accuracy = 1)) +
geom_text(position = position_stack(vjust = 0.5), size = 4)

jared's answer can be further improved:
scales::label_percent(accuracy = 1) can be used to format both kind of labels in geom_text() and scale_y_continuous() consistently and without repeating code
geom_bar(stat = "identity") can be abbreviated by geom_col()
# function to format percent labels
lp <- scales::label_percent(accuracy = 1)
library(ggplot2)
ggplot(data = GradeSTEM_data,
aes(x = Grade, y = percent, fill = STEMflag, label = lp(percent))) +
geom_col(position = "fill") +
scale_y_continuous(labels = lp) +
geom_text(position = position_stack(vjust = 0.5))
Data
GradeSTEM_data <- data.frame(
Grade = factor(rep(c("A", "B", "C", "P", "NP"), 2),
levels = c("A", "B", "C", "P", "NP")),
STEMflag = factor(x = c(rep("STEM", 5), rep("NONSTEM", 5)),
levels = c("STEM", "NONSTEM")),
percent = c(0.95, 0.93, 0.90, 0.67, 0.86,
0.05, 0.07, 0.10, 0.33, 0.14))

Related

ggpubr::ggarrange not arranging plot made with ggpattern

I am using ggarrange to produce a figure with three plots. One of these plots includes fill and a pattern created using ggpattern. I am able to create a combined figure when I combine one standard ggplot and the ggpattern plot, but I get an error (Error in seq.default(from, to, by) : invalid '(to - from)/by') when I try to combine all 3. I've included a simplified example below.
#fake data
data <- structure(list(Level= c(0.2, 0.3, 0.25, 0.35, 0.4, 0.5, 0.5, 0.6, 0.15, 0.35),
Group= c("A", "A", "B", "B", "C", "C", "D", "D", "E", "E"),
Condition = c("no", "yes", "no", "yes", "no", "yes", "no", "yes", "no", "yes"),
Hx = c(0,1,1,1,0,1,0,0,0,0),
Type = c("T", "T", "T", "T", "T", "F", "F", "F", "F", "F")),
row.names = c(NA, -10L),
class = c("tbl_df", "tbl", "data.frame"))
#create plots
pattern_plot <-
ggplot(data = data, aes(x = Group, y = Level)) +
facet_grid(cols=vars(Type)) +
geom_bar_pattern(aes(pattern = Condition, fill = as.factor(Hx)),
stat = "identity",
position = "dodge",
color = "black",
pattern_fill = "black",
pattern_angle = 45,
pattern_density = 0.1,
pattern_spacing = 0.025,
pattern_key_scale_factor = 0.6) +
scale_pattern_manual(values = c("none", "stripe")) +
labs(x = "", y = "", pattern = "Condition", fill = "History",
subtitle = "Percentage of people with condition") +
guides(pattern = guide_legend(override.aes = list(fill = "white")),
fill = guide_legend(override.aes = list(pattern = "none"))) +
theme_bw() +
theme(legend.position="left") +
scale_y_continuous(labels = scales::percent_format(accuracy = 1))
no_pattern_plot <-
ggplot(data = data, aes(x = Group, y = Level, fill = Condition)) +
geom_bar(position = "dodge", stat = "identity") +
theme_bw() +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
theme(legend.position="left")
no_pattern_plot2 <- ggplot(data = data, aes(x = Group, y = Level, fill = as.factor(Hx))) +
geom_bar(position = "dodge", stat = "identity") +
theme_bw() +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
theme(legend.position="left")
This works:
ggarrange(pattern_plot, no_pattern_plot2, nrow = 2)
To create this
plot
However ggarrange(pattern_plot, no_pattern_plot, no_pattern_plot2, nrow = 3)
produces Error in seq.default(from, to, by) : invalid '(to - from)/by'
Any ideas?

How to plot a chart with dual Y, both are bar plot with ggplot2?

I would like to plot a chart with dual Y, both are bar plot with ggplot2, and both bar with its own error bar and label, the following code fails. The bars overlaped, and the labels can not be displayed.
Thanks a lot.
df<- structure(list(dose = structure(1:3, .Label = c("0.5", "1", "2"
), class = "factor"), mean1 = c(13.23, 22.7, 26.06), sd1 = c(0.1,
0.2, 0.3), label = c("a", "b", "c"), mean2 = c(7.98, 16.77, 26.14
), sd2 = c(0.01, 0.2, 0.3), label2 = c("a", "b", "c")), row.names = c(NA,
-3L), class = "data.frame")
ggplot(df,aes(x = dose, fill = dose))+
geom_bar(aes(y = mean1), position = 'dodge', stat="identity", width=.4) +
geom_bar(aes(y = mean2/5), position = 'dodge', stat="identity", width=.4)+
scale_y_continuous(sec.axis = sec_axis(~. *5, name = "mean2"))+
geom_errorbar(aes(ymin = mean1, ymax = mean1 + sd1), width=.07,
position=position_dodge(0.4)) +
geom_errorbar(aes(ymin = mean2, ymax = mean2 + sd2), width=.07,
position=position_dodge(0.4))
geom_text(aes(y =mean1 + sd1, label = label1),vjust = -0.5, position=position_dodge(0.4))
geom_text(aes(y =mean2 + sd2, label = label2,),vjust = -0.5, position=position_dodge(0.4))
Is this what you are looking for? You just need to restructure your data so that it can be dodged by group.
bind_rows(
df |>
select(dose, label, mean = mean1, sd = sd1) |>
mutate(group = 1),
df |>
select(dose, label, mean = mean2, sd = sd2) |>
mutate(group = 2,
mean = mean/5)
) |>
ggplot(aes(x = dose, fill = dose, group = group))+
geom_bar(aes(y = mean), position = 'dodge', stat="identity", width=.4)+
geom_errorbar(aes(ymin = mean, ymax = mean + sd), width=.07,
position=position_dodge(0.4)) +
geom_text(aes(y =mean + sd, label = label),vjust = -0.5, position=position_dodge(0.4))+
scale_y_continuous(sec.axis = sec_axis(~. *5, name = "mean2"))

changing legend of faceted boxplot in ggplot2 to have groups with similar names inside

This question builds off of enter link description here but is in the context of faceted boxplots.
So, I have the following code:
set.seed(20210714)
dd <- data.frame(Method = rep(c("A", "B", "C"), each = 60), Pattern = rep(c("X", "Y", "Z"), times = 30), X1 = runif(180), Complexity = rep(c("High", "Low"), times = 90), nsim = rep(rep(1:10, times = 9), each = 2), n = 10)
dd1 <- data.frame(Method = rep(c("A", "B", "C"), each = 60), Pattern = rep(c("X", "Y", "Z"), times = 30), X1 = runif(180), Complexity = rep(c("High", "Low"), times = 90), nsim = rep(rep(1:10, times = 9), each = 2), n = 5)
dd <- rbind(dd, dd1)
library(ggplot2)
# create dummy dataframe.
dummy.df <- dd
dummy.df[nrow(dd) + 1:2,"Pattern"] <- unique(dd$Pattern)[-3]
dummy.df[nrow(dd) + 1:2,"Method"] <- "ZZZ"
dummy.df[nrow(dd) + 1:2,"Complexity"] <- c("High","Low")
dummy.df$dummy <- interaction(dummy.df$Method,dummy.df$Pattern)
ggplot(dummy.df, aes(x = dummy, y = X1, fill = Method)) +
geom_boxplot(aes(fill = Method)) +
facet_grid(~Complexity) +
theme_light() +
theme(legend.position = 'bottom') +
guides(fill = guide_legend(nrow=1)) +
geom_line(aes(x = dummy,
group=interaction(Pattern,nsim)),
size = 0.35, alpha = 0.35, colour = I("#525252")) +
geom_point(aes(x = dummy,
group=interaction(Pattern,nsim)),
size = 0.35, alpha = 0.25, colour = I("#525252")) +
scale_x_discrete(labels = c("","X", "", "", "", "Y", "", "", "", "Z","","")) +
xlab("Pattern") +
scale_fill_brewer(breaks=c("A", "B", "C"), type="qual", palette="Paired")
dummy.df <- dd
dummy.df[nrow(dd) + 1:2,"Pattern"] <- unique(dd$Pattern)[-3]
dummy.df[nrow(dd) + 1:2,"Method"] <- "ZZZ"
dummy.df[nrow(dd) + 1:2,"Complexity"] <- c("High","Low")
dummy.df$dummy <- interaction(dummy.df$Method,dummy.df$Pattern)
dummy.df$fill <- interaction(dummy.df$Method, dummy.df$n)
dummy.df$dummy <- interaction(dummy.df$fill, dummy.df$Pattern)
dummy.df$dummy <- factor(dummy.df$dummy, levels = levels(dummy.df$dummy)[-c(4, 12, 20, 24)])
dummy.df$dummy[361:362] <- "A.10.Z" ## dummy variables to get rid of NAs
theme_set(theme_bw(base_size = 14))
ggplot(dummy.df, aes(x = dummy, y = X1, fill = fill)) +
geom_boxplot(aes(fill = fill),lwd=0.1,outlier.size = 0.01) +
facet_grid(~Complexity) +
theme(legend.position = 'bottom') +
guides(fill = guide_legend(nrow=1)) +
geom_line(aes(x = dummy,
group=interaction(Pattern,nsim,n)),
size = 0.35, alpha = 0.35, colour = I("#525252")) +
geom_point(aes(x = dummy,
group=interaction(Pattern,nsim,n)),
size = 0.35, alpha = 0.25, colour = I("#525252")) +
scale_x_discrete(labels = c("X", "Y", "Z"), breaks = paste("A.10.", c("X", "Y", "Z"), sep = ""),drop=FALSE) +
xlab("Pattern") +
scale_fill_brewer(breaks= levels(dummy.df$fill)[-c(4,8)], type="qual", palette="Paired")
This yields the following plot.
All is well, except with the legend. I would like the following: the dark colors to be in the First group titled "n=5" on the left, with "A", "B", "C" for the three dark colors, and the light colors to be to the right, in a Second group titled "n=10" on the right, with "A", "B", "C" for the three light colors. Sort of like in the link enter link description here above.
What I can not figure out is how to call the boxplot twice to mimic the solution there.
Is there a way to do this? Please feel free to let me know if the question is not clear.
Thanks again, in advance, for any help!
Adapting my answer on your former question this could be achieved like so:
library(ggplot2)
fill <- levels(dummy.df$fill)[-c(4,8)]
fill <- sort(fill)
labels <- gsub("\\.\\d+", "", fill)
labels <- setNames(labels, fill)
colors <- scales::brewer_pal(type="qual", palette="Paired")(6)
colors <- setNames(colors, fill)
library(ggnewscale)
ggplot(dummy.df, aes(x = dummy, y = X1, fill = fill)) +
geom_boxplot(aes(fill = fill), lwd=0.1,outlier.size = 0.01) +
scale_fill_manual(name = "n = 5", breaks= fill[grepl("5$", fill)], labels = labels[grepl("5$", fill)], values = colors,
guide = guide_legend(title.position = "left", order = 1)) +
new_scale_fill() +
geom_boxplot(aes(fill = fill), lwd=0.1,outlier.size = 0.01) +
scale_fill_manual(name = "n = 10", breaks = fill[grepl("10$", fill)], labels = labels[grepl("10$", fill)], values = colors,
guide = guide_legend(title.position = "left", order = 2)) +
facet_grid(~Complexity) +
theme(legend.position = 'bottom') +
guides(fill = guide_legend(nrow=1)) +
geom_line(aes(x = dummy,
group=interaction(Pattern,nsim,n)),
size = 0.35, alpha = 0.35, colour = I("#525252")) +
geom_point(aes(x = dummy,
group=interaction(Pattern,nsim,n)),
size = 0.35, alpha = 0.25, colour = I("#525252")) +
scale_x_discrete(labels = c("X", "Y", "Z"), breaks = paste("A.10.", c("X", "Y", "Z"), sep = ""),drop=FALSE) +
xlab("Pattern")
#> Warning: Removed 2 rows containing non-finite values (new_stat_boxplot).

ggplot: filling color based on condition

I want to plot two categorical variables (group, condition) and one numeric variable (value). In addition, I want to base the filling color on the significance of the values (significant bars should be grey, the rest white). With the following code, however, only some significant bars are colored in grey.
plot <- ggplot(dat, aes(group, value))+
geom_col(aes(fill = condition), position = position_dodge(0.8), width = .7, color= "black") +
scale_fill_manual(values = ifelse(dat$significance > .05, "white", "grey")) +
geom_linerange(aes(group = condition, ymin = ci_lower, ymax= ci_upper), position = position_dodge(0.8)) +
coord_flip(ylim =c(-.2,1))
plot
here is my data:
dat <- structure(list(group = c("friends", "parent", "esm", "friends", "parent", "esm"),
value = c(0.25, 0.44, 0.33, 0.47, 0.25, 0.32),
significance = c(0.08, 0, 0, 0, 0.01, 0),
condition = c("S1", "S1", "S1", "S2", "S2", "S2"),
trait = c("E", "E", "E", "E", "E", "E"),
ci_lower = c(0.52, 0.74, 0.53, 0.67, 0.44, 0.49),
ci_upper = c(-0.03, 0.14, 0.14, 0.27, 0.06, 0.15)),
row.names = c(1L,2L, 3L, 16L, 17L, 18L), class = "data.frame")
You can add an inline mutate to create a column to specify the color group based on significance. The key here is to use the group aesthetic so the bars can still be dodged and positioned correctly based on the condition variable.
dat %>%
mutate(sig = significance < .05) %>%
ggplot(aes(group, value, group = condition)) +
geom_col(
aes(fill = sig),
position = position_dodge(0.8),
color = "black",
width = .7
) +
scale_fill_manual(values = c("white", "grey")) +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(0.8)) +
coord_flip(ylim = c(-.2, 1))
Gives this plot:
However, I think you need another aesthetic to distinguish condition in addition to significance. Color is one option, but this is a nice place to use ggpattern which will be more obvious than the outline color and keep the B&W look.
Here's an example:
library(ggpattern)
dat %>%
mutate(sig = significance > .05) %>%
ggplot(aes(group, value, group = condition)) +
geom_col_pattern(
aes(fill = sig, pattern_angle = condition),
position = position_dodge(0.8),
pattern_fill = "black",
pattern_spacing = 0.025,
pattern = "stripe",
width = .7,
color = "black"
) +
scale_pattern_angle_discrete(range = c(45, 135)) +
scale_fill_manual(values = c("grey", "white")) +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(0.8)) +
coord_flip(ylim = c(-.2, 1))
Which gives this plot:
Finally, it's worth noting that the color of a bar is not usually used to denote significance of a statistical metric; a much more common convention would be to use asterisk to indicate relevant p value thresholds (e.g. ** p < 0.01) or letters to indicate membership in a grouped analysis such as an ANOVA. These can be easily implemented using the ggpubr package. That would leave fill color free to indicate the grouping by condition.
It can also be useful:
library(ggplot2)
#Code
ggplot(dat, aes(group, value))+
geom_col(aes(fill = interaction(condition,significance > .05)),
position = position_dodge(0.8), width = .7, color= "black") +
scale_fill_manual(values = c("grey","grey","white"),
breaks = c('S2.FALSE','S1.TRUE'),
labels=c('S2','S1')) +
geom_linerange(aes(group = condition, ymin = ci_lower, ymax= ci_upper), position = position_dodge(0.8)) +
coord_flip(ylim =c(-.2,1))+
labs(fill='Var')
Output:

Reorder panels in ridgeline plot

I have a dataframe like this:
set.seed(3467)
df<- data.frame(method= c(rep("A", 1000), rep("B", 1000), rep("C", 1000)),
beta=c(rnorm(1000, mean=0, sd=1),rnorm(1000, mean=2, sd=1.4),rnorm(1000, mean=0, sd=0.5)))
I wish to create a ridgeline plot similar to this:
library(ggplot2)
library(ggridges)
ggplot() +
geom_rect(data = data.frame(x = 1),
xmin = -0.391, xmax = 0.549, ymin = -Inf, ymax = Inf,
alpha = 0.5, fill = "gray") +
geom_density_ridges(data = df, aes(x = beta, y = method, color = method, fill = method),
size=0.75)+
xlim(-5,5)+
scale_fill_manual(values = c("#483d8b50", "#0072B250","#228b2250")) +
scale_color_manual(values = c("#483d8b", "#0072B2", "#228b22"), guide = "none") +
stat_density_ridges(data = df, aes(x = beta, y = method, color = method, fill = method),
quantile_lines = TRUE, quantiles = c(0.025, 0.5, 0.975), alpha = 0.6, size=0.75)+
scale_y_discrete(expand = expand_scale(add = c(0.1, 0.9)))
However, I wish to order the y-axis in the order of method= "B", "C", "A" not method= "A", "B", "C"
I have tried the following method, without success, to reorder the density plots:
library(dplyr)
df %>%
mutate(method = fct_relevel(method,
"B", "C", "A"))%>%
ggplot() +
geom_rect(data = data.frame(x = 1),
xmin = -0.391, xmax = 0.549, ymin = -Inf, ymax = Inf,
alpha = 0.5, fill = "gray") +
geom_density_ridges(data = df, aes(x = beta, y = method, color = method, fill = method),
size=0.75)+
xlim(-5,5)+
scale_fill_manual(values = c("#483d8b50", "#0072B250","#228b2250")) +
scale_color_manual(values = c("#483d8b", "#0072B2", "#228b22"), guide = "none") +
stat_density_ridges(data = df, aes(x = beta, y = method, color = method, fill = method),
quantile_lines = TRUE, quantiles = c(0.025, 0.5, 0.975), alpha = 0.6, size=0.75)+
scale_y_discrete(expand = expand_scale(add = c(0.1, 0.9)))
You were nearly there. - You need to specify the levels argument within fct_relevel. (See ?fct_relevel: there is no levels argument, but ..., you have to specify its name!)
library(tidyverse)
library(ggridges)
set.seed(3467)
df<- data.frame(method= c(rep("A", 1000), rep("B", 1000), rep("C", 1000)),
beta=c(rnorm(1000, mean=0, sd=1),rnorm(1000, mean=2, sd=1.4),rnorm(1000, mean=0, sd=0.5)))
# here is the main change:
df <- df %>%
mutate(method = fct_relevel(method, levels = "B", "C", "A"))
ggplot(df) +
geom_density_ridges(data = df, aes(x = beta, y = method, color = method, fill = method))
#> Picking joint bandwidth of 0.225
Created on 2020-02-17 by the reprex package (v0.3.0)
If you change the method column to ordered factor, it should work:
df$method <- factor(df$method, levels = c("C", "A", "B"), ordered = TRUE)

Resources