Order the stacked barplot by proportion or percent in R - r

I can order my plot based on count but not by proportion. I want to bars on x-axis to be arranged by proportion of "c". Here is my code
long<- data.frame(
Name = c("abc","abc","abc","gif","gif","gif","xyz","xyz","xyz"),
variable = c("a","b","c","a","b","c","c","b","a"),
value = c(4,6,NA,2,8,1,6,NA,NA))
long_totals <- long %>%
group_by(Name) %>%
summarise(Total = sum(value, na.rm = T))
p<-long %>%
mutate(variable = fct_relevel(variable, c("c", "b", "a"))) %>%
arrange(variable) %>%
mutate(Name = fct_inorder(Name))
p %>%
ggplot() +
aes(x = Name,
y = value,
fill = variable) +
geom_bar(position = "fill",
stat = "summary") +
geom_text(data = long_totals,
aes(y = 100,
x = Name,
label = Total),
size = 7,
position = position_fill(vjust = 1.02)) +
scale_y_continuous(labels = scales::percent_format())
Also, I am plotting total numbers using geom_text

Add the proportion c by group like this, when generating p:
... %>%
group_by(Name) %>%
mutate(prop_c = sum(value[variable=="c"], na.rm=T)/sum(value, na.rm=T))
Then plot, using reorder:
ggplot() +
geom_col(data= p,aes(x = reorder(Name,prop_c, decreasing=T),y = value,fill = variable),position = "fill") +
geom_text(data = long_totals, aes(y = 100,x = Name,label = Total),size = 7,position = position_fill(vjust = 1.02)) +
scale_y_continuous(labels = scales::percent_format())

Related

Placing data labels for stacked bar chart at top of bar

I have been attempting to add a label on top of each bar to represent the proportion that each ethnic group makes up in referrals.
For some reason I cannot get the labels to be placed at the top of each bar. How do I fix this?
My code below
freq <- df %>%
group_by(ethnicity) %>%
summarise(n = n()) %>%
mutate(f = round((n/sum(n)*100, 1))
df %>%
group_by(pathway) %>%
count(ethnicity) %>%
ggplot(aes(x = ethnicity, y = n , fill = pathway)) +
geom_bar(stat = "identity", position = "stack") +
geom_text(data = freq,
aes(x= ethnicity, y = f, label = f),
inherit.aes = FALSE) +
theme(legend.position = "bottom") +
scale_fill_manual(name = "",
values = c("light blue", "deepskyblue4"),
labels = "a", "b") +
xlab("") +
ylab("Number of Referrals") +
scale_y_continuous(breaks = seq(0, 2250, 250), expand = c(0,0)
Here is what it currently looks like
Since you are using the count as your y-axis position in geom_bar, you need to use the same thing in your geom_text to get the labels in the right place. Below is an example using mtcars dataset. Using vjust = -1 I put a little bit of space between the label and the bars to make it more legible and aesthetically pleasing.
library(tidyverse)
mtcars %>%
group_by(carb) %>%
summarise(n = n()) %>%
mutate(f = round(proportions(n) * 100, 1)) -> frq
mtcars %>%
group_by(gear) %>%
count(carb) -> df
df %>%
ggplot(aes(x = carb, y = n, fill = gear)) +
geom_bar(stat = "identity", position = "stack") +
geom_text(data = frq,
vjust = -1,
aes(x= carb, y = n, label = f),
inherit.aes = FALSE)
Created on 2022-10-31 by the reprex package (v2.0.1)

Staggering labels or adding only selected labels on ggplot stacked bar chart

I am plotting a series of Likert scale questions as stacked bar charts. They need to be labelled but some of the segments are two small and get overlaps like the example below (with sample code below it). Is there a clever way to either only label sections that are big enough to fit the label or a way to stagger how the labels are presented?
q1a<-rep(c("A", "B", "C", "D"), c(2, 5,45,45))
q1b<-rep(c("A", "B", "C", "D"), c(45,45,2, 5))
data<-data.frame(q1a,q1b)
data %>%
select(q1a:q1b) %>%
pivot_longer(cols = everything(), names_to = "Question") %>%
filter(!is.na(value)) %>%
count(Question, value) %>%
group_by(Question) %>%
mutate(Pct = n / sum(n)) %>%
ggplot(aes(fill = value, x = Pct, y = fct_rev(Question))) +
geom_bar(position = "fill", stat = "identity") +
geom_text(aes(label = paste0(sprintf("%1.0f", Pct * 100), "%")), position = position_stack(vjust = 0.5), size = 3) +
scale_fill_brewer(palette = "Blues") +
theme_bw() +
scale_x_continuous(labels = scales::percent) +
labs(title = "Question 1", y = "Question", x = "Percentage") +
theme(legend.title = element_blank())
If I were you, I'll only display labels for Pct greater than 5% using if_else() in geom_text(aes()). If it's less than 5%, display nothing.
Also, since your geom_bar position is fill, you should also use position = position_fill() in geom_text to align the position.
library(tidyverse)
data %>%
select(q1a:q1b) %>%
pivot_longer(cols = everything(), names_to = "Question") %>%
filter(!is.na(value)) %>%
dplyr::count(Question, value) %>%
group_by(Question) %>%
mutate(Pct = n / sum(n)) %>%
ggplot(aes(fill = value, x = Pct, y = fct_rev(Question))) +
geom_bar(position = "fill", stat = "identity") +
geom_text(aes(label = if_else(Pct > 0.05, paste0(sprintf("%1.0f", Pct * 100), "%"), NULL)),
position = position_fill(vjust = 0.5), size = 3) +
scale_fill_brewer(palette = "Blues") +
theme_bw() +
scale_x_continuous(labels = scales::percent) +
labs(title = "Question 1", y = "Question", x = "Percentage") +
theme(legend.title = element_blank())

R ggplot: Combine a barplot and a line chart from a long dataset

I have a wide dataset that records the blood glucose values from 10 subjects.
library(dplyr)
df_wide = data.frame(
ID = seq(1, 10),
gender = sample(0:1, 10, replace = T),
glucose_0 = sample(100:125, 10, replace = T),
glucose_60 = sample(180:200, 10, replace = T),
glucose_120 = sample(130:160, 10, replace = T),
glucose_180 = sample(100:125, 10, replace = T)
)
I then transformed it into a long dataset using gather:
df_long = df_wide %>%
gather("glucose_0", "glucose_60", "glucose_120", "glucose_180", key = Time, value = glucose) %>%
arrange(ID)
To show how the glucose values changed from 0 min to 180 min, I then made the following line chart:
df_long %>%
ggplot(aes(x = Time, y = glucose, group = ID)) +
geom_line(aes(linetype = as.factor(gender))) +
geom_point() +
theme_classic() +
scale_x_discrete(limits = c("glucose_0", "glucose_60", "glucose_120", "glucose_180"),
labels = c("0", "60", "120", "180")) +
theme(legend.position = "bottom") +
labs(
x = "Time",
y = "Glucose",
fill = "Gender"
)
Finally, to show the glucose at each time point, I also made a barplot:
df_long %>%
ggplot(aes(x = Time, y = glucose, fill = as.factor(gender))) +
geom_bar(stat = 'identity', position = position_dodge()) +
theme_classic() +
scale_x_discrete(limits = c("glucose_0", "glucose_60", "glucose_120", "glucose_180"))
My question is: How to combine the line chart and the barplot into one figure that looks like this?
In order to plot the mean glucose levels as both bar and line
df_long %>%
group_by(gender, Time) %>%
mutate(glucose = mean(glucose)) %>%
ggplot(aes(x = Time, y = glucose, fill = as.factor(gender))) +
geom_bar(stat = 'identity', position = position_dodge()) +
geom_line(aes(linetype=as.factor(gender), group=ID)) +
theme_classic() +
scale_x_discrete(limits = c("glucose_0", "glucose_60", "glucose_120", "glucose_180"))
Are you looking for such a solution?
library(tidyverse)
df_wide %>%
pivot_longer(
starts_with("glucose")
) %>%
mutate(gender = fct_inorder(factor(gender))) %>%
arrange(ID) %>%
ggplot(aes(x = name, y = value)) +
geom_col(aes(fill = gender, group=gender), width = 0.5, position = position_dodge())+
stat_summary(aes(group = gender), fun = mean, geom = 'line', size=1, alpha=0.9) +
stat_summary(aes(group = gender), fun = mean, geom = 'point', size=2, alpha=0.9) +
theme_classic() +
scale_x_discrete(limits = c("glucose_0", "glucose_60", "glucose_120", "glucose_180"),
labels = c("0", "60", "120", "180")) +
theme(legend.position = "bottom") +
labs(
x = "Time",
y = "Glucose",
fill = "Gender"
)

Re-positioning axis labels in face_wrapped plots with free y axis in ggplot2

I have a face_wrapped plot with free y axis in ggplot2. I was wondering if it is possible to reposition some of the flipped x-axis label based on the red arrows shown in the picture below?
library(tidyverse)
data <- read_csv('https://raw.githubusercontent.com/rnorouzian/e/master/surv.csv')
names(data)[2:5] <- c("Representation", "Solidification", "Application", "Confidence")
data %>%
pivot_longer(cols = -id) %>%
mutate(name = name,
value = str_wrap(value, 20)) %>%
ggplot() +
geom_bar(aes(value, fill = name), show.legend = FALSE) +
facet_wrap(.~name, scales = 'free_y') +
coord_flip() +
labs(y = "Students", x = "") +
theme(axis.text.y = element_text(size=8))
We can reorder after creating the frequency with count
library(dplyr)
library(tidyr)
library(ggplot2)
data %>%
pivot_longer(cols = -id) %>%
mutate(name = name,
value = str_wrap(value, 20)) %>%
count(name, value) %>%
ggplot(aes(x = reorder(value, n), y = n, fill = name)) +
geom_bar(show.legend = FALSE, stat = 'identity') +
facet_wrap(.~name, scales = 'free_y') +
coord_flip() +
labs(y = "Students", x = "") +
theme(axis.text.y = element_text(size=8))
-output
Or if it is a custom order, then have to create the custom order vector and use that to change the 'value' to a factor with levels specified in that order
data1 <- data %>%
pivot_longer(cols = -id) %>%
mutate(name = name,
value = str_wrap(value, 20))
lvls <- unique(data1$value)[c(3, 10, 1, 5, 9, 4, 8, 7, 6, 2)]
data1 %>%
mutate(value = factor(value, levels = lvls)) %>% # // change here
ggplot() +
geom_bar(aes(value, fill = name), show.legend = FALSE) +
facet_wrap(.~name, scales = 'free_y') +
coord_flip() +
labs(y = "Students", x = "") +
theme(axis.text.y = element_text(size=8))
This code should get the exact picture you want, by reordering the factor after you have pivoted longer
data %>%
pivot_longer(cols = -id) %>%
mutate(name = name,
value = factor(str_wrap(value, 20))) %>%
mutate(value = forcats::fct_relevel(value, "Agree", after = 1)) %>%
mutate(value = forcats::fct_relevel(value, "The assignment\nhelped me solidify\nthe key concepts", after = Inf)) %>%
mutate(value = forcats::fct_relevel(value, "The assignment\nreflected the class\ninstructions", after = Inf)) %>%
mutate(value = forcats::fct_relevel(value, "The assignment\nhighly reflected the\nclass instructions", after = Inf)) %>%
mutate(value = forcats::fct_relevel(value, "The assignment\ngave me a great\nopportunity to apply\nwhat I learned", after = Inf)) %>%
ggplot() +
geom_bar(aes(value, fill = name), show.legend = FALSE) +
facet_wrap(.~name, scales = 'free_y') +
coord_flip() +
labs(y = "Students", x = "") +
theme(axis.text.y = element_text(size=8))

Arranging factors in increasing order

I am trying to create two plots which should display frequency in a decreasing order.
#preparing the data to resemble actual data
test <- data.frame(HairEyeColor) %>%
mutate(combi = paste(Hair,Eye)) %>%
group_by(Sex) %>%
mutate(prop = Freq / sum(Freq)) %>%
ungroup()
test$combi <- factor(test$combi)
freq_test_count <- test %>%
setorder(Freq)
#creating the plot
freq_test_plot <- freq_test_count %>%
ggplot(aes(x = reorder(combi,prop),y = prop, label = Freq)) +
geom_col(show.legend = FALSE) +
geom_text(check_overlap = TRUE, nudge_y = 0.005, size = 3) +
facet_wrap(~Sex, scales = "free") +
labs(y = "Proportion",
x = NULL) +
coord_flip()
When i plot freq_test_plot, it shows the plot but the output is not in decreasing order
I am not sure what should I do so that I can see terms in decreasing order of frequency.
A workaround is to create two different plots and arrange them in grid. But you should be cautious because, like Gregor mentioned, it could definitely be misleading.
library(grid)
p1 = freq_test_count[freq_test_count$Sex == "Male",] %>%
ggplot(aes(x = reorder(combi,prop),y = prop, label = Freq)) +
geom_col(show.legend = FALSE) +
geom_text(check_overlap = TRUE, nudge_y = 0.005, size = 3) +
facet_wrap(~Sex, scales = "free") +
labs(y = "Proportion",
x = NULL) +
coord_flip()
p2 = freq_test_count[freq_test_count$Sex == "Female",] %>%
ggplot(aes(x = reorder(combi,prop),y = prop, label = Freq)) +
geom_col(show.legend = FALSE) +
geom_text(check_overlap = TRUE, nudge_y = 0.005, size = 3) +
facet_wrap(~Sex, scales = "free") +
labs(y = "Proportion",
x = NULL) +
coord_flip()
graphics.off()
grid.newpage()
grid.draw(ggarrange(p1, p2, ncol = 2))
Another work-around is to make male and female specific levels for the factor. Here I add a space " " to the front of the Male Hair/Eye labels. This lets you define an ordering that takes sex into account:
test <- data.frame(HairEyeColor) %>%
mutate(combi = paste(Hair,Eye)) %>%
group_by(Sex) %>%
mutate(prop = Freq / sum(Freq)) %>%
ungroup() %>%
mutate(combi = factor(test$combi),
sex_combi = factor(paste(ifelse(Sex == "Male", " ", ""), Hair, Eye)),
sex_combi = reorder(sex_combi, prop))
#creating the plot
ggplot(test, aes(x = sex_combi,y = prop, label = Freq)) +
geom_col(show.legend = FALSE) +
geom_text(check_overlap = TRUE, nudge_y = 0.005, size = 3) +
facet_wrap(~Sex, scales = "free") +
labs(y = "Proportion",
x = NULL) +
coord_flip()
But as I mentioned in the comments, I think this is a misleading plot.
Are you wanting the values to be sorted on male or female?
library(tidyverse)
#preparing the data to resemble actual data
test <- data.frame(HairEyeColor) %>%
mutate(combi = paste(Hair,Eye)) %>%
group_by(Sex) %>%
mutate(prop = Freq / sum(Freq)) %>%
ungroup()
test$combi <- factor(test$combi)
test$combi<- factor(test$combi, levels = unique(test$combi)[order(test$Freq)],)
#creating the plot
ggplot(test,aes(x = combi,y = prop, label = Freq))+
geom_col(show.legend = FALSE)+
geom_text(check_overlap = TRUE, nudge_y = 0.005, size = 3) +
facet_wrap(~Sex, scales = "free")+
labs(y = "Proportion",
x = NULL) +
coord_flip()
updated to include full code from question.

Resources