fill and group bar graphs by different variables - r

I am trying to create faceted geom_bar graphs with the following charactaristics:
The proportion of each answer per question is shown
Each bar is colored according to the response
The plot is faceted by question
I seem to be able to do any two of the conditions, but not all 3.
Question:
Is there a way to facet and calculate proportions using one variable, but colour/fill based on another variable?
Code:
df <- data.frame(
Question = rep(c('A', 'B', 'C'), each = 5),
Resp = sample(c('Yes', 'No', 'Unsure', NA), 15, T, c(0.3,0.3,0.3,0.1)),
stringsAsFactors = F
)
# Plot 1: grouping by question to get the right proportions, but has no colour
ggplot(df, aes(x = Resp, fill = Resp)) +
stat_count(aes(y = ..prop.., group = Question)) +
scale_y_continuous(labels = scales::percent_format()) +
facet_wrap(~ Question)
# Plot 2: grouping by response to get colour, but has wrong proportions
ggplot(df, aes(x = Resp, fill = Resp)) +
stat_count(aes(y = ..prop.., group = Resp)) +
scale_y_continuous(labels = scales::percent_format()) +
facet_wrap(~ Question)
Outputs:

This is a "ggplot2-only" option:
ggplot(df, aes(x = Resp)) +
geom_bar(aes(y = ..prop.., group = Question, fill = factor(..x..)), position = "dodge") +
scale_y_continuous(labels = scales::percent_format()) +
scale_fill_discrete(name = "Response", labels = c("No", "Unsure", "Yes", "NA")) +
facet_wrap(~ Question)

One way could be to calculate the proportions and then plot.
library(dplyr)
library(ggplot2)
df %>%
count(Question, Resp) %>%
group_by(Question) %>%
mutate(n = n/sum(n) * 100) %>%
ggplot() + aes(Resp, n, fill = Resp) +
geom_col() +
facet_wrap(~Question)

Plot without facet
df$n <- 1
df <- df %>% group_by(Question, Resp) %>% summarise(n = sum(n))
ggplot(df, aes(x=factor(Question), y=n, fill=Resp)) + geom_col()
Plot with facet
df <- df %>% group_by(Question, Resp) %>% summarise(n = sum(n)) %>% mutate(prop = n/5)
ggplot(df, aes(x=factor(Resp), y=prop, fill=Resp)) + geom_col() + facet_wrap(~Question)

Related

Placing data labels for stacked bar chart at top of bar

I have been attempting to add a label on top of each bar to represent the proportion that each ethnic group makes up in referrals.
For some reason I cannot get the labels to be placed at the top of each bar. How do I fix this?
My code below
freq <- df %>%
group_by(ethnicity) %>%
summarise(n = n()) %>%
mutate(f = round((n/sum(n)*100, 1))
df %>%
group_by(pathway) %>%
count(ethnicity) %>%
ggplot(aes(x = ethnicity, y = n , fill = pathway)) +
geom_bar(stat = "identity", position = "stack") +
geom_text(data = freq,
aes(x= ethnicity, y = f, label = f),
inherit.aes = FALSE) +
theme(legend.position = "bottom") +
scale_fill_manual(name = "",
values = c("light blue", "deepskyblue4"),
labels = "a", "b") +
xlab("") +
ylab("Number of Referrals") +
scale_y_continuous(breaks = seq(0, 2250, 250), expand = c(0,0)
Here is what it currently looks like
Since you are using the count as your y-axis position in geom_bar, you need to use the same thing in your geom_text to get the labels in the right place. Below is an example using mtcars dataset. Using vjust = -1 I put a little bit of space between the label and the bars to make it more legible and aesthetically pleasing.
library(tidyverse)
mtcars %>%
group_by(carb) %>%
summarise(n = n()) %>%
mutate(f = round(proportions(n) * 100, 1)) -> frq
mtcars %>%
group_by(gear) %>%
count(carb) -> df
df %>%
ggplot(aes(x = carb, y = n, fill = gear)) +
geom_bar(stat = "identity", position = "stack") +
geom_text(data = frq,
vjust = -1,
aes(x= carb, y = n, label = f),
inherit.aes = FALSE)
Created on 2022-10-31 by the reprex package (v2.0.1)

Order grouped scatterplot by mean

I am plotting a geom_point for several groups (Loc) and want in addition a line that indicates the mean of the points for each group. The groups should be ordered based on the mean of the Size for each group. I am trying to do this by reorder(Loc, Size.Mean) but it does not reorder.
ggplot(data,aes(Loc,Size,color=Loc)) +
geom_point() +
geom_point(data %>%
group_by(Loc) %>%
summarise(Size.Mean = mean(Size)),
mapping = aes(y = Size.Mean, x = reorder(Loc, Size.Mean)),
color = "black", shape = '-') +
theme_pubr(base_size=8) +
scale_y_continuous(trans="log10") +
theme(axis.text.x = element_text(angle = 90,hjust = 1)) +
theme(legend.position = "none")
ggplot orders discrete x ticks according to their level if the variable is a factor:
library(tidyverse)
iris_means <-
iris %>%
group_by(Species) %>%
summarise(mean = mean(Sepal.Length)) %>%
arrange(-mean)
iris %>%
mutate(Species = Species %>% factor(levels = iris_means$Species)) %>%
ggplot(aes(Species, Sepal.Length)) +
geom_point() +
geom_crossbar(data = iris_means, mapping = aes(y = mean, ymin = mean, ymax = mean), color = "red")
Created on 2021-09-10 by the reprex package (v2.0.1)

Label grouped bar plot in R

I'm tryng to add label to a grouped bar plot in r.
However I'm using percentege in the y axis, and I want the label to be count.
I've tried to use the geom_text() function, but I don't how exacly the parameters i need to use.
newdf3 %>%
dplyr::count(key, value) %>%
dplyr::group_by(key) %>%
dplyr::mutate(p = n / sum(n)) %>%
ggplot() +
geom_bar(
mapping = aes(x = key, y = p, fill = value),
stat = "identity",
position = position_dodge()
) +
scale_y_continuous(labels = scales::percent_format(),limits=c(0,1))+
labs(x = "", y = "%",title="")+
scale_fill_manual(values = c('Before' = "deepskyblue", 'During' = "indianred1", 'After' = "green2", '?'= "mediumorchid3"),
drop = FALSE, name="")
Here is an exemple of how I need it:
here's a sample of data I'm using:
key value
A Before
A After
A During
B Before
B Before
C After
D During
...
I also wanted to keep the bars with no value (label = 0).
Can someone help me with this?
Here is MWE of how to add count labels to a simple bar chart. See below for the case when these are grouped.
library(datasets)
library(tidyverse)
data <- chickwts %>%
group_by(feed) %>%
count %>%
ungroup %>%
mutate(p = n / sum(n))
ggplot(data, aes(x = feed, y = p, fill = feed)) +
geom_bar(stat = "identity") +
geom_text(stat = "identity",
aes(label = n), vjust = -1)
You should be able to do the same thing on your data.
EDIT: StupidWolf points out in the comments that the original example has grouped data. Adding position = position_dodge(0.9) in geom_text deals with this.
Again, no access to the original data, but here's a different MWE using mtcars showing this:
library(datasets)
library(tidyverse)
data <- mtcars %>%
as_tibble %>%
transmute(gear = as_factor(gear),
carb = as_factor(carb),
cyl = cyl) %>%
group_by(gear, carb) %>%
count
ggplot(data, aes(x = gear, y = n, fill = carb)) +
geom_bar(stat = "identity",
position = "dodge") +
geom_text(aes(label = n),
stat = "identity",
vjust = -1,
position = position_dodge(0.9))

Means barplot with confidence intervals?

I have a large dataset, where I have a variable Q1with 7 response/value options, and two groups (One and Two).
Q1<- c(6,4,2,4,7,1,4,7,4,5,4,4,2,6,1)
Group<- c(One, Two, One, Two,Two, Two, One, One, One, One, Two, One, One, Two, Two)
I'm trying to convert a simple frequency plot (number of observations in each response category by group) and instead plot the means with confidence intervals (as in the image below).
df1<- filter(df, Q1!="-99",df$Group=="One"|df$Group=="Two")
ggplot(data = df1, aes(x = Q1)) +
geom_bar(aes(fill = df1$Group), position = "dodge", stat="summary", fun.y="mean") + labs(title="Graph Title")
When i run this, I get the following error:
Error: stat_summary requires the following missing aesthetics: y
Any ideas are appreciated!
Here is an example. You need to pre-compute CIs yourself:
library(dplyr)
library(ggplot2)
set.seed(123)
df <- data.frame(g = c(rep("A",10),rep("B",10),rep("C",10)),
val = c(rnorm(10,100,5), rnorm(10,200,10), rnorm(10,300,50)))
df <- df %>% group_by(g) %>% summarise(m = mean(val),
stdv = sd(val))
ggplot(df, aes(g,m,fill=g)) +
geom_bar(stat="identity", color="black",
position=position_dodge()) +
geom_errorbar(aes(ymin=m-stdv, ymax=m+stdv), width=.2,
position=position_dodge(.9))
Output
UPDATE
df <- data.frame(
Q1 = c(6,4,2,4,7,1,4,7,4,5,4,4,2,6,1),
Group = sample(c("One","Two"), 15, TRUE),
stringsAsFactors = FALSE)
df <- df %>% group_by(Group) %>% summarise(m = mean(Q1),
stdv = sd(Q1))
ggplot(df, aes(Group,m,fill=Group)) +
geom_bar(stat="identity", color="black",
position=position_dodge()) +
geom_errorbar(aes(ymin=m-stdv, ymax=m+stdv), width=.2,
position=position_dodge(.9))
what about something like this
`ggplot(df.df, aes(x=category, color=group)) +
stat_summary(aes(y = value),
fun.y = mean, na.rm = TRUE,
geom = "bar",
size = 3) +
stat_summary(aes(y = value),
fun.data = mean_se, na.rm = TRUE,
geom = "errorbar",
width = 0.2) `

Plotting labels on bar plots with position = "fill" in R ggplot2

How does one plot "filled" bars with counts labels using ggplot2?
I'm able to do this for "stacked" bars. But I'm very confused otherwise.
Here is a reproducible example using dplyr and the mpg dataset
library(ggplot)
library(dplyr)
mpg_summ <- mpg %>%
group_by(class, drv) %>%
summarise(freq = n()) %>%
ungroup() %>%
mutate(total = sum(freq),
prop = freq/total)
g <- ggplot(mpg_summ, aes(x = class, y = prop, group = drv))
g + geom_col(aes(fill = drv)) +
geom_text(aes(label = freq), position = position_stack(vjust = .5))
But if I try to plot counts for filled bars it does not work
g <- ggplot(mpg_summ, aes(x=class, fill=drv))
g + stat_count(aes(y = (..count..)/sum(..count..)), geom="bar", position="fill") +
scale_y_continuous(labels = percent_format())
Further, if I try:
g <- ggplot(mpg_summ, aes(x=class, fill=drv))
g + geom_bar(aes(y = freq), position="fill") +
geom_text(aes(label = freq), position = "fill") +
scale_y_continuous(labels = percent_format())
I get:
Error: stat_count() must not be used with a y aesthetic.
I missed the fill portion from the last question. This should get you there:
library(ggplot2)
library(dplyr)
mpg_summ <- mpg %>%
group_by(class, drv) %>%
summarise(freq = n()) %>%
ungroup() %>%
mutate(total = sum(freq),
prop = freq/total)
g <- ggplot(mpg_summ, aes(x = class, y = prop, group = drv))
g + geom_col(aes(fill = drv), position = 'fill') +
geom_text(aes(label = freq), position = position_fill(vjust = .5))

Resources