Order grouped scatterplot by mean - r

I am plotting a geom_point for several groups (Loc) and want in addition a line that indicates the mean of the points for each group. The groups should be ordered based on the mean of the Size for each group. I am trying to do this by reorder(Loc, Size.Mean) but it does not reorder.
ggplot(data,aes(Loc,Size,color=Loc)) +
geom_point() +
geom_point(data %>%
group_by(Loc) %>%
summarise(Size.Mean = mean(Size)),
mapping = aes(y = Size.Mean, x = reorder(Loc, Size.Mean)),
color = "black", shape = '-') +
theme_pubr(base_size=8) +
scale_y_continuous(trans="log10") +
theme(axis.text.x = element_text(angle = 90,hjust = 1)) +
theme(legend.position = "none")

ggplot orders discrete x ticks according to their level if the variable is a factor:
library(tidyverse)
iris_means <-
iris %>%
group_by(Species) %>%
summarise(mean = mean(Sepal.Length)) %>%
arrange(-mean)
iris %>%
mutate(Species = Species %>% factor(levels = iris_means$Species)) %>%
ggplot(aes(Species, Sepal.Length)) +
geom_point() +
geom_crossbar(data = iris_means, mapping = aes(y = mean, ymin = mean, ymax = mean), color = "red")
Created on 2021-09-10 by the reprex package (v2.0.1)

Related

Placing data labels for stacked bar chart at top of bar

I have been attempting to add a label on top of each bar to represent the proportion that each ethnic group makes up in referrals.
For some reason I cannot get the labels to be placed at the top of each bar. How do I fix this?
My code below
freq <- df %>%
group_by(ethnicity) %>%
summarise(n = n()) %>%
mutate(f = round((n/sum(n)*100, 1))
df %>%
group_by(pathway) %>%
count(ethnicity) %>%
ggplot(aes(x = ethnicity, y = n , fill = pathway)) +
geom_bar(stat = "identity", position = "stack") +
geom_text(data = freq,
aes(x= ethnicity, y = f, label = f),
inherit.aes = FALSE) +
theme(legend.position = "bottom") +
scale_fill_manual(name = "",
values = c("light blue", "deepskyblue4"),
labels = "a", "b") +
xlab("") +
ylab("Number of Referrals") +
scale_y_continuous(breaks = seq(0, 2250, 250), expand = c(0,0)
Here is what it currently looks like
Since you are using the count as your y-axis position in geom_bar, you need to use the same thing in your geom_text to get the labels in the right place. Below is an example using mtcars dataset. Using vjust = -1 I put a little bit of space between the label and the bars to make it more legible and aesthetically pleasing.
library(tidyverse)
mtcars %>%
group_by(carb) %>%
summarise(n = n()) %>%
mutate(f = round(proportions(n) * 100, 1)) -> frq
mtcars %>%
group_by(gear) %>%
count(carb) -> df
df %>%
ggplot(aes(x = carb, y = n, fill = gear)) +
geom_bar(stat = "identity", position = "stack") +
geom_text(data = frq,
vjust = -1,
aes(x= carb, y = n, label = f),
inherit.aes = FALSE)
Created on 2022-10-31 by the reprex package (v2.0.1)

How to add text label to show total n in each bar of stacked proportion bars in ggplot?

I have a stacked bar chart of proportions, so all bars total 100%. I would like to add a label to the end of each bar (i.e. on the far right-hand side of each bar, not within the bar itself) to show the total number of observations in each bar.
Something like this gets close-ish...
library(dplyr)
library(ggplot2)
data("mtcars")
mtcars %>%
# prep data
mutate(across(where(is.numeric), as.factor)) %>%
count(am, cyl, gear) %>%
mutate(prop = n / sum(n)) %>%
# plot
ggplot(aes(x = prop, y = cyl)) +
geom_col(aes(fill = gear),
position = "fill",
alpha = 0.8) +
facet_wrap(~am, ncol = 1) +
theme_minimal() +
scale_x_continuous(labels = scales::percent) +
# add labels to show total n for each bar
geom_text(aes(label = paste0("n = ", stat(y)), group = cyl),
stat = 'summary',
fun = sum)
...but (i) the values for my n labels clearly aren't the sums for each bar that I was expecting, and (ii) I can't figure out how to position the labels at the end of each bar. I thought I could specify a location on the x-axis within the geom_text aes, like this...
mtcars %>%
# prep data
mutate(across(where(is.numeric), as.factor)) %>%
count(am, cyl, gear) %>%
mutate(prop = n / sum(n)) %>%
# plot
ggplot(aes(x = prop, y = cyl)) +
geom_col(aes(fill = gear),
position = "fill",
alpha = 0.8) +
facet_wrap(~am, ncol = 1) +
theme_minimal() +
scale_x_continuous(labels = scales::percent) +
# add labels to show total n for each bar
geom_text(aes(label = paste0("n = ", stat(y)), group = cyl, x = 1),
stat = 'summary',
fun = sum)
...but I can't work out why that throws the x-axis scale out, and doesn't position all the labels at the same location on the scale.
Thanks in advance for any suggestions!
Try this:
library(dplyr)
library(ggplot2)
data("mtcars")
#Code
mtcars %>%
# prep data
mutate(across(where(is.numeric), as.factor)) %>%
count(am, cyl, gear) %>%
mutate(prop = n / sum(n)) %>%
# plot
ggplot(aes(x = prop, y = cyl)) +
geom_col(aes(fill = gear),
position = "fill",
alpha = 0.8) +
geom_text(aes(x=1.05,label = paste0("n = ", stat(y)), group = cyl),
hjust=0.5
)+
facet_wrap(~am, ncol = 1,scales = 'free')+
theme_minimal() +
scale_x_continuous(labels = scales::percent)
Output:
This is a modified version to add both proportions and numbers
library(dplyr)
library(ggplot2)
library(scales)
data("mtcars")
mtcars %>%
# prep data
mutate(across(where(is.numeric), as.factor)) %>%
count(am, cyl, gear) %>%
mutate(prop = n / sum(n)) %>%
# plot
ggplot(aes(x = prop, y = cyl)) +
geom_col(aes(fill = gear),
position = "fill", alpha = 0.8) +
theme_minimal() +
scale_x_continuous(labels = scales::percent) +
# add labels to show total n for each bar
geom_text(aes(x = 1.1, , group = cyl,
label = paste0("n = ", stat(y))),
hjust = 0.5) +
geom_text(aes(x = prop, y = cyl, group = gear,
label = paste0('p =',round(stat(x),2))),
hjust = 0.5, angle = 0,
position = position_fill(vjust = .5)) +
facet_wrap(~am, ncol = 1, scales = 'free')
It's not the most elegant solution, but I got there in the end by expanding on #Duck's answer for the positioning of labels (thanks!), and calculating the totals to be used as labels outside of ggplot.
mtcars %>%
# prep data
mutate(across(where(is.numeric), as.factor)) %>%
count(am, cyl, gear) %>%
group_by(cyl, am) %>%
mutate(prop = n / sum(n)) %>%
mutate(column_total = sum(n)) %>%
ungroup() %>%
# plot
ggplot(aes(x = prop, y = cyl)) +
geom_col(aes(fill = gear),
position = "fill",
alpha = 0.8) +
geom_text(aes(x = 1.05, label = paste0("n = ", column_total))) +
facet_wrap(~am, ncol = 1, scales = 'free')+
theme_minimal() +
scale_x_continuous(labels = scales::percent)

How to create scaled and faceted clustered bargraphs of a summarized dataframe in ggplot2?

I am trying to create a grid of bargraphs that show the average for different species. I am using the iris dataset for this question.
I summarised the data, melted it into long form long, and tried to use facet_wrap.
iris %>%
group_by(Species) %>%
summarise(M.Sepal.Length=mean(Sepal.Length),
M.Sepal.Width=mean(Sepal.Width),
M.Petal.Length= mean(Petal.Length),
M.Petal.Width=mean(Petal.Width)) %>%
gather(key = Part, value = Value, M.Sepal.Length:M.Petal.Width) %>%
ggplot(., aes(Part, Value, group = Species, fill=Species)) +
geom_col(position = "dodge") +
facet_grid(cols=vars(Part)) +
facet_grid(cols = vars(Part))
However, the graph I am getting has x.axis labels that are strung across each facet grid. Additionally the clustered graphs are not centered within each facet box. Instead they appear at the location of their respective x-axis label. I'd like to get rid of the x-axis labels, center the graphs, and scale the graphs within each facet.
Here is an image of the resulting graph marked up with my expected output:
Perhaps this is what you're looking for?
The key changes are:
Remove Part as the variable mapped to x, that way the data is plotted in the same location in every facet
Switch to facet_wrap so you can use scales = "free_y"
Use labs to manually add the x title
Add theme to get rid of the x-axis ticks and tick labels.
library(ggplot2)
library(dplyr) # Version >= 1.0.0
iris %>%
group_by(Species) %>%
summarise(across(1:4, mean, .names = "M.{col}")) %>%
gather(key = Part, value = Value, M.Sepal.Length:M.Petal.Width) %>%
ggplot(., aes(x = 1, y = Value, group = Species, fill=Species)) +
geom_col(position = "dodge") +
facet_wrap(.~Part, nrow = 1, scales = "free_y") +
labs(x = "Part") +
theme(axis.ticks.x = element_blank(),
axis.text.x = element_blank())
I also took the liberty of switching out your manual call to summarise with the new across functionality.
Here's how you might also calculate error bars:
library(tidyr)
iris %>%
group_by(Species) %>%
summarise(across(1:4, list(M = mean, SE = ~ sd(.)/sqrt(length(.))),
.names = "{fn}_{col}")) %>%
pivot_longer(-Species, names_to = c(".value","Part"),
names_pattern = "([SEM]+)_(.+)") %>%
ggplot(., aes(x = 1, y = M, group = Species, fill=Species)) +
geom_col(position = "dodge") +
geom_errorbar(aes(ymin = M - SE, ymax = M + SE), width = 0.5,
position = position_dodge(0.9)) +
facet_wrap(.~Part, nrow = 1, scales = "free_y") +
labs(x = "Part", y = "Value") +
theme(axis.ticks.x = element_blank(),
axis.text.x = element_blank())

R, ggplot2, limit rows in a faceted barplot

Sup,
Consider the following lines:
data
df=data.frame(
prod=sample(1:30, 1000, replace=TRUE),
mat=sample(c('yes', 'no'), 1000, replace=TRUE),
fj=sample(c(1,2), 1000, replace = TRUE)
)
plot
df %>%
group_by(mat, prod, fj) %>%
summarise(n = n()) %>%
arrange(desc(n)) %>%
slice(1:5) %>%
ggplot(aes(x = reorder(prod, n), y = n)) +
geom_col(fill = RColorBrewer::brewer.pal(3, 'Dark2')[2], colour = "grey", alpha = 0.8) +
labs(x = "Prod", y = "Qnt") +
scale_y_continuous(labels = scales::comma) +
coord_flip() +
facet_wrap(fj ~ mat, scale="free") +
theme_minimal()
which gives me
Now, if i drop fj variable, as in
df %>%
group_by(mat, prod) %>%
summarise(n = n()) %>%
arrange(desc(n)) %>%
slice(1:5) %>%
ggplot(aes(x = reorder(prod, n), y = n)) +
geom_col(fill = RColorBrewer::brewer.pal(3, 'Dark2')[2], colour = "grey", alpha = 0.8) +
labs(x = "Prod", y = "Qnt") +
scale_y_continuous(labels = scales::comma) +
coord_flip() +
facet_wrap(~ mat, scale="free") +
theme_minimal()
slice(1:5) does it's job and i've got:
Question
why slice and reorder doesn't seems to work properly when there's 3+ variables and what should i do to limit the first plot to 5 lines each?
When you call summarize you loose one level of grouping. In this case, you lost fj, so when you slice it's not included in the group divisions.
If you first ungroup then group_by mat and fj, I think you'll end up with what you are looking for.
df %>%
group_by(mat, prod, fj) %>%
summarise(n = n()) %>%
ungroup()%>%
group_by(mat, fj) %>%
arrange(desc(n)) %>%
slice(1:5) %>%
ggplot(aes(x = reorder(prod, n), y = n)) +
geom_col(fill = RColorBrewer::brewer.pal(3, 'Dark2')[2], colour = "grey", alpha = 0.8) +
labs(x = "Prod", y = "Qnt") +
scale_y_continuous(labels = scales::comma) +
coord_flip() +
facet_wrap(fj ~ mat, scale="free") +
theme_minimal()
This leaves the problem of reordering the prod variable within each facet. It doesn't work in the example above because you are ordering by the entire data frame, and some of the values of Prod are repeated in several of the facets. As discussed in this blog post by #drsimonj you need to create an order variable and plot based on that. This follows/blatently copies the method outlined in the blog post.
df %>%
group_by(mat, prod, fj) %>%
summarise(n = n()) %>%
group_by(mat, fj) %>%
arrange(desc(n)) %>%
slice(1:5) %>%
ungroup() %>%
arrange(fj,mat, n) %>% # arrange the entire table by the facets first, then by the n value
mutate(row.order = row_number()) %>% # create dummy variable
ggplot(aes(x = row.order, y = n)) + # plot by the dummy variable
geom_col(fill = RColorBrewer::brewer.pal(3, 'Dark2')[2], colour = "grey", alpha = 0.8, position = "dodge") +
labs(x = "Prod", y = "Qnt") +
scale_y_continuous(labels = scales::comma) +
scale_x_continuous( # add back in the Prod values
breaks = df2$row.order,
labels = df2$prod
)+
coord_flip() +
facet_wrap(fj ~ mat, scales = "free") +
theme_minimal()

Plotting labels on bar plots with position = "fill" in R ggplot2

How does one plot "filled" bars with counts labels using ggplot2?
I'm able to do this for "stacked" bars. But I'm very confused otherwise.
Here is a reproducible example using dplyr and the mpg dataset
library(ggplot)
library(dplyr)
mpg_summ <- mpg %>%
group_by(class, drv) %>%
summarise(freq = n()) %>%
ungroup() %>%
mutate(total = sum(freq),
prop = freq/total)
g <- ggplot(mpg_summ, aes(x = class, y = prop, group = drv))
g + geom_col(aes(fill = drv)) +
geom_text(aes(label = freq), position = position_stack(vjust = .5))
But if I try to plot counts for filled bars it does not work
g <- ggplot(mpg_summ, aes(x=class, fill=drv))
g + stat_count(aes(y = (..count..)/sum(..count..)), geom="bar", position="fill") +
scale_y_continuous(labels = percent_format())
Further, if I try:
g <- ggplot(mpg_summ, aes(x=class, fill=drv))
g + geom_bar(aes(y = freq), position="fill") +
geom_text(aes(label = freq), position = "fill") +
scale_y_continuous(labels = percent_format())
I get:
Error: stat_count() must not be used with a y aesthetic.
I missed the fill portion from the last question. This should get you there:
library(ggplot2)
library(dplyr)
mpg_summ <- mpg %>%
group_by(class, drv) %>%
summarise(freq = n()) %>%
ungroup() %>%
mutate(total = sum(freq),
prop = freq/total)
g <- ggplot(mpg_summ, aes(x = class, y = prop, group = drv))
g + geom_col(aes(fill = drv), position = 'fill') +
geom_text(aes(label = freq), position = position_fill(vjust = .5))

Resources