How does one plot "filled" bars with counts labels using ggplot2?
I'm able to do this for "stacked" bars. But I'm very confused otherwise.
Here is a reproducible example using dplyr and the mpg dataset
library(ggplot)
library(dplyr)
mpg_summ <- mpg %>%
group_by(class, drv) %>%
summarise(freq = n()) %>%
ungroup() %>%
mutate(total = sum(freq),
prop = freq/total)
g <- ggplot(mpg_summ, aes(x = class, y = prop, group = drv))
g + geom_col(aes(fill = drv)) +
geom_text(aes(label = freq), position = position_stack(vjust = .5))
But if I try to plot counts for filled bars it does not work
g <- ggplot(mpg_summ, aes(x=class, fill=drv))
g + stat_count(aes(y = (..count..)/sum(..count..)), geom="bar", position="fill") +
scale_y_continuous(labels = percent_format())
Further, if I try:
g <- ggplot(mpg_summ, aes(x=class, fill=drv))
g + geom_bar(aes(y = freq), position="fill") +
geom_text(aes(label = freq), position = "fill") +
scale_y_continuous(labels = percent_format())
I get:
Error: stat_count() must not be used with a y aesthetic.
I missed the fill portion from the last question. This should get you there:
library(ggplot2)
library(dplyr)
mpg_summ <- mpg %>%
group_by(class, drv) %>%
summarise(freq = n()) %>%
ungroup() %>%
mutate(total = sum(freq),
prop = freq/total)
g <- ggplot(mpg_summ, aes(x = class, y = prop, group = drv))
g + geom_col(aes(fill = drv), position = 'fill') +
geom_text(aes(label = freq), position = position_fill(vjust = .5))
Related
I am plotting a geom_point for several groups (Loc) and want in addition a line that indicates the mean of the points for each group. The groups should be ordered based on the mean of the Size for each group. I am trying to do this by reorder(Loc, Size.Mean) but it does not reorder.
ggplot(data,aes(Loc,Size,color=Loc)) +
geom_point() +
geom_point(data %>%
group_by(Loc) %>%
summarise(Size.Mean = mean(Size)),
mapping = aes(y = Size.Mean, x = reorder(Loc, Size.Mean)),
color = "black", shape = '-') +
theme_pubr(base_size=8) +
scale_y_continuous(trans="log10") +
theme(axis.text.x = element_text(angle = 90,hjust = 1)) +
theme(legend.position = "none")
ggplot orders discrete x ticks according to their level if the variable is a factor:
library(tidyverse)
iris_means <-
iris %>%
group_by(Species) %>%
summarise(mean = mean(Sepal.Length)) %>%
arrange(-mean)
iris %>%
mutate(Species = Species %>% factor(levels = iris_means$Species)) %>%
ggplot(aes(Species, Sepal.Length)) +
geom_point() +
geom_crossbar(data = iris_means, mapping = aes(y = mean, ymin = mean, ymax = mean), color = "red")
Created on 2021-09-10 by the reprex package (v2.0.1)
I have a stacked bar chart of proportions, so all bars total 100%. I would like to add a label to the end of each bar (i.e. on the far right-hand side of each bar, not within the bar itself) to show the total number of observations in each bar.
Something like this gets close-ish...
library(dplyr)
library(ggplot2)
data("mtcars")
mtcars %>%
# prep data
mutate(across(where(is.numeric), as.factor)) %>%
count(am, cyl, gear) %>%
mutate(prop = n / sum(n)) %>%
# plot
ggplot(aes(x = prop, y = cyl)) +
geom_col(aes(fill = gear),
position = "fill",
alpha = 0.8) +
facet_wrap(~am, ncol = 1) +
theme_minimal() +
scale_x_continuous(labels = scales::percent) +
# add labels to show total n for each bar
geom_text(aes(label = paste0("n = ", stat(y)), group = cyl),
stat = 'summary',
fun = sum)
...but (i) the values for my n labels clearly aren't the sums for each bar that I was expecting, and (ii) I can't figure out how to position the labels at the end of each bar. I thought I could specify a location on the x-axis within the geom_text aes, like this...
mtcars %>%
# prep data
mutate(across(where(is.numeric), as.factor)) %>%
count(am, cyl, gear) %>%
mutate(prop = n / sum(n)) %>%
# plot
ggplot(aes(x = prop, y = cyl)) +
geom_col(aes(fill = gear),
position = "fill",
alpha = 0.8) +
facet_wrap(~am, ncol = 1) +
theme_minimal() +
scale_x_continuous(labels = scales::percent) +
# add labels to show total n for each bar
geom_text(aes(label = paste0("n = ", stat(y)), group = cyl, x = 1),
stat = 'summary',
fun = sum)
...but I can't work out why that throws the x-axis scale out, and doesn't position all the labels at the same location on the scale.
Thanks in advance for any suggestions!
Try this:
library(dplyr)
library(ggplot2)
data("mtcars")
#Code
mtcars %>%
# prep data
mutate(across(where(is.numeric), as.factor)) %>%
count(am, cyl, gear) %>%
mutate(prop = n / sum(n)) %>%
# plot
ggplot(aes(x = prop, y = cyl)) +
geom_col(aes(fill = gear),
position = "fill",
alpha = 0.8) +
geom_text(aes(x=1.05,label = paste0("n = ", stat(y)), group = cyl),
hjust=0.5
)+
facet_wrap(~am, ncol = 1,scales = 'free')+
theme_minimal() +
scale_x_continuous(labels = scales::percent)
Output:
This is a modified version to add both proportions and numbers
library(dplyr)
library(ggplot2)
library(scales)
data("mtcars")
mtcars %>%
# prep data
mutate(across(where(is.numeric), as.factor)) %>%
count(am, cyl, gear) %>%
mutate(prop = n / sum(n)) %>%
# plot
ggplot(aes(x = prop, y = cyl)) +
geom_col(aes(fill = gear),
position = "fill", alpha = 0.8) +
theme_minimal() +
scale_x_continuous(labels = scales::percent) +
# add labels to show total n for each bar
geom_text(aes(x = 1.1, , group = cyl,
label = paste0("n = ", stat(y))),
hjust = 0.5) +
geom_text(aes(x = prop, y = cyl, group = gear,
label = paste0('p =',round(stat(x),2))),
hjust = 0.5, angle = 0,
position = position_fill(vjust = .5)) +
facet_wrap(~am, ncol = 1, scales = 'free')
It's not the most elegant solution, but I got there in the end by expanding on #Duck's answer for the positioning of labels (thanks!), and calculating the totals to be used as labels outside of ggplot.
mtcars %>%
# prep data
mutate(across(where(is.numeric), as.factor)) %>%
count(am, cyl, gear) %>%
group_by(cyl, am) %>%
mutate(prop = n / sum(n)) %>%
mutate(column_total = sum(n)) %>%
ungroup() %>%
# plot
ggplot(aes(x = prop, y = cyl)) +
geom_col(aes(fill = gear),
position = "fill",
alpha = 0.8) +
geom_text(aes(x = 1.05, label = paste0("n = ", column_total))) +
facet_wrap(~am, ncol = 1, scales = 'free')+
theme_minimal() +
scale_x_continuous(labels = scales::percent)
I have a dataframe for which I want to visualize different things. In every barplot the same characteristic of the variable/attribute should be have the same color. For example:
data_mtcars <- mtcars
data <- data_mtcars %>%
group_by(am, gear) %>%
summarise(Freq = sum(mpg)) %>%
group_by(am) %>%
mutate(Prop = Freq / sum(Freq)) %>%
arrange(desc(Prop))
First plot with three characteristics of the variable "gear".
ggplot(data) +
aes(x = am, y = Prop, fill = reorder(gear, Prop), width=0.5) +
geom_col() + scale_y_continuous(labels = function(x) paste0(eval(x*100), "%")) +
geom_text(aes(label = if_else(Prop>0.05, scales::percent(Prop),NULL)), position = position_stack(0.4))+
theme_minimal() +
theme(legend.title = element_blank()) + ylab("") + xlab("") +
scale_fill_brewer(palette = "Set3")
which gives me purple color for characteristic "3" of variable gear. If I change the number of characteristics the color should not be changed.
df <- data[data$gear!=4,]
ggplot(df) +
aes(x = am, y = Prop, fill = reorder(gear, Prop), width=0.5) +
geom_col() + scale_y_continuous(labels = function(x) paste0(eval(x*100), "%")) +
# scale_x_date(breaks = unique(df_sum_EAD$Stichtag) , date_labels = "%d.%m.%Y") +
geom_text(aes(label = if_else(Prop>0.05, scales::percent(Prop),NULL)), position = position_stack(0.4)) + theme_minimal() +
theme(legend.title = element_blank()) + ylab("") + xlab("")+ scale_fill_brewer(palette = "Set3")
Now same characteristics have different colors (characteristic "3" is yellow). How to fix this problem. I have tried to fix the levels
of the factor variable but I dont know how to include in an appropriate argument in plot.
data_mtcars$gear <- factor(data_mtcars$gear, levels=levels(as.factor(data_mtcars$gear)), ordered=T)
To get consistent colors for categories you can make use of a named color vector which can then be used via scale_color/fill_manual to always set the same color for each category:
library(dplyr)
library(ggplot2)
data_mtcars <- mtcars
data <- data_mtcars %>%
group_by(am, gear) %>%
summarise(Freq = sum(mpg)) %>%
group_by(am) %>%
mutate(Prop = Freq / sum(Freq)) %>%
arrange(desc(Prop))
#> `summarise()` regrouping output by 'am' (override with `.groups` argument)
data <- mutate(data, gear = reorder(gear, Prop))
# Named vector of colors
colors_gear <- scales::brewer_pal(palette = "Set2")(length(levels(data$gear)))
colors_gear <- setNames(colors_gear, levels(data$gear))
make_plot <- function(d) {
ggplot(d) +
aes(x = am, y = Prop, fill = reorder(gear, Prop), width=0.5) +
geom_col() + scale_y_continuous(labels = function(x) paste0(eval(x*100), "%")) +
geom_text(aes(label = if_else(Prop>0.05, scales::percent(Prop),NULL)), position = position_stack(0.4)) + theme_minimal() +
theme(legend.title = element_blank()) + ylab("") + xlab("")+
scale_fill_manual(values = colors_gear)
}
make_plot(data)
make_plot(data[data$gear!=4,])
I am trying to create faceted geom_bar graphs with the following charactaristics:
The proportion of each answer per question is shown
Each bar is colored according to the response
The plot is faceted by question
I seem to be able to do any two of the conditions, but not all 3.
Question:
Is there a way to facet and calculate proportions using one variable, but colour/fill based on another variable?
Code:
df <- data.frame(
Question = rep(c('A', 'B', 'C'), each = 5),
Resp = sample(c('Yes', 'No', 'Unsure', NA), 15, T, c(0.3,0.3,0.3,0.1)),
stringsAsFactors = F
)
# Plot 1: grouping by question to get the right proportions, but has no colour
ggplot(df, aes(x = Resp, fill = Resp)) +
stat_count(aes(y = ..prop.., group = Question)) +
scale_y_continuous(labels = scales::percent_format()) +
facet_wrap(~ Question)
# Plot 2: grouping by response to get colour, but has wrong proportions
ggplot(df, aes(x = Resp, fill = Resp)) +
stat_count(aes(y = ..prop.., group = Resp)) +
scale_y_continuous(labels = scales::percent_format()) +
facet_wrap(~ Question)
Outputs:
This is a "ggplot2-only" option:
ggplot(df, aes(x = Resp)) +
geom_bar(aes(y = ..prop.., group = Question, fill = factor(..x..)), position = "dodge") +
scale_y_continuous(labels = scales::percent_format()) +
scale_fill_discrete(name = "Response", labels = c("No", "Unsure", "Yes", "NA")) +
facet_wrap(~ Question)
One way could be to calculate the proportions and then plot.
library(dplyr)
library(ggplot2)
df %>%
count(Question, Resp) %>%
group_by(Question) %>%
mutate(n = n/sum(n) * 100) %>%
ggplot() + aes(Resp, n, fill = Resp) +
geom_col() +
facet_wrap(~Question)
Plot without facet
df$n <- 1
df <- df %>% group_by(Question, Resp) %>% summarise(n = sum(n))
ggplot(df, aes(x=factor(Question), y=n, fill=Resp)) + geom_col()
Plot with facet
df <- df %>% group_by(Question, Resp) %>% summarise(n = sum(n)) %>% mutate(prop = n/5)
ggplot(df, aes(x=factor(Resp), y=prop, fill=Resp)) + geom_col() + facet_wrap(~Question)
I am having trouble drawing "dodges" line on "dodged" stacked bars.
dt = mtcars %>% group_by(am, cyl) %>% summarise(m = mean(disp))
dt0 = dt[dt$am == 0, ]
dt1 = dt[dt$am == 1, ]
dt0 %>% ggplot(aes(factor(cyl), m, fill = factor(cyl))) + geom_bar(stat = 'identity', position = 'dodge') +
geom_point(data = dt1, aes(factor(cyl), m, colour = factor(cyl)), position=position_dodge(width=0.9), colour = 'black')
What I would like is to draw a line from the top of the stacked bar to the black points of each cyl.
dt0 %>% ggplot(aes(factor(cyl), m, fill = factor(cyl))) + geom_bar(stat = 'identity', position = 'dodge') +
geom_point(data = dt1, aes(factor(cyl), m, colour = factor(cyl)), position=position_dodge(width=0.9), colour = 'black') +
geom_line(data = dt1, aes(factor(cyl), m, colour = factor(cyl), group = 1), position=position_dodge(width=0.9), colour = 'black')
However, the position=position_dodge(width=0.9) dodge doesn't work here.
Any idea ?
This is much easier to accomplish if you reshape your summary data:
dt <- mtcars %>%
group_by(am, cyl) %>%
summarise(m = mean(disp)) %>%
spread(am, m)
cyl 0 1
* <dbl> <dbl> <dbl>
1 4 135.8667 93.6125
2 6 204.5500 155.0000
3 8 357.6167 326.0000
While "0" and "1" are poor column names, they can still be used in aes() if you quote them in backticks. The calls to position_dodge() also become unnecessary:
dt %>% ggplot(aes(x = factor(cyl), y = `0`, fill = factor(cyl))) +
geom_bar(stat = 'identity') +
geom_point(aes(x = factor(cyl), y = `1`), colour = 'black') +
geom_segment(aes(x = factor(cyl), xend = factor(cyl), y = `0`, yend = `1`))