I would like to create a ggplot facet grid where the x axis of the grid (not the plot) are the labels, rather than squish into each chartlet.
Example:
mtcars %>%
group_by(mpg) %>%
mutate(cnt = n()) %>%
ggplot(aes(x = cyl, y = cnt)) +
geom_bar(stat = "identity") +
facet_grid(vs ~ cyl)
Looks like:
Rather than having 3 through 9 on each individual chart, I would like the horizontal part of the grid to be cyl as opposed to each individual chart.
In other words, each bar chart should be a single column bar chart only.
How can I do this?
Use a constant for the x value, while still faceting by cyl:
library(tidyverse)
mtcars %>%
group_by(mpg) %>%
mutate(cnt = n()) %>%
ggplot(aes(x = 1, y = cnt)) +
geom_bar(stat = "identity") +
facet_grid(vs ~ cyl) +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank()) +
labs(x = "cyl")
Like this?
mtcars %>%
group_by(mpg) %>%
mutate(cnt = n()) %>%
ggplot(aes(x = cyl, y = cnt)) +
geom_bar(stat = "identity") +
facet_grid(vs ~ cyl)+
theme(axis.text.x = element_blank(),axis.ticks = element_blank())
Or like this?
mtcars %>%
group_by(mpg) %>%
mutate(cnt = n()) %>%
ggplot(aes(x = cyl, y = cnt)) +
geom_bar(stat = "identity") +
facet_grid(vs ~ cyl, scales="free")+
theme(axis.text.x = element_blank(),axis.ticks = element_blank())
Related
I would like to reverse the order of the legend for a horizontal bar chart. When adding guides(fill = guide_legend(reverse = TRUE)) to the ggplot it works fine (see second plot). However, after applying ggplotly() the legend is again in the default order.
How to reverse the order of the plotly legend without changing the order of the bars?
library(ggplot2)
library(dplyr)
data(mtcars)
p1 <- mtcars %>%
count(cyl, am) %>%
mutate(cyl = factor(cyl), am = factor(am)) %>%
ggplot(aes(cyl, n, fill = am)) +
geom_col(position = "dodge") +
coord_flip()
p1
p2 <- p1 + guides(fill = guide_legend(reverse = TRUE))
p2
plotly::ggplotly(p2)
Adding to the great answer of #Zac Garland here is a solution that works with legends of arbitrary length:
library(ggplot2)
library(dplyr)
reverse_legend_labels <- function(plotly_plot) {
n_labels <- length(plotly_plot$x$data)
plotly_plot$x$data[1:n_labels] <- plotly_plot$x$data[n_labels:1]
plotly_plot
}
p1 <- mtcars %>%
count(cyl, am) %>%
mutate(cyl = factor(cyl), am = factor(am)) %>%
ggplot(aes(cyl, n, fill = am)) +
geom_col(position = "dodge") +
coord_flip()
p2 <- mtcars %>%
count(am, cyl) %>%
mutate(cyl = factor(cyl), am = factor(am)) %>%
ggplot(aes(am, n, fill = cyl)) +
geom_col(position = "dodge") +
coord_flip()
p1 %>%
plotly::ggplotly() %>%
reverse_legend_labels()
p2 %>%
plotly::ggplotly() %>%
reverse_legend_labels()
When you call ggplotly, it's really just creating a list and a function call on that list.
So if you save that intermediate step, you can modify the list directly. and as such, modify the plot output.
library(ggplot2)
library(dplyr)
data(mtcars)
p1 <- mtcars %>%
count(cyl, am) %>%
mutate(cyl = factor(cyl), am = factor(am)) %>%
ggplot(aes(cyl, n, fill = am)) +
geom_col(position = "dodge") +
coord_flip()
html_plot <- ggplotly(p1)
replace_1 <- html_plot[["x"]][["data"]][[2]]
replace_2 <- html_plot[["x"]][["data"]][[1]]
html_plot[["x"]][["data"]][[1]] <- replace_1
html_plot[["x"]][["data"]][[2]] <- replace_2
html_plot
plot output
A simple solution is to define the order of the levels of the factor variable am:
library(ggplot2)
library(dplyr)
data(mtcars)
df <- mtcars %>%
count(cyl, am) %>%
mutate(cyl = factor(cyl), am = factor(as.character(am), levels = c("1", "0")))
head(df)
p1 <- df %>%
ggplot(aes(cyl, n, fill = am)) +
geom_col(position = "dodge") +
coord_flip()
p1
plotly::ggplotly(p1)
I have a stacked bar chart of proportions, so all bars total 100%. I would like to add a label to the end of each bar (i.e. on the far right-hand side of each bar, not within the bar itself) to show the total number of observations in each bar.
Something like this gets close-ish...
library(dplyr)
library(ggplot2)
data("mtcars")
mtcars %>%
# prep data
mutate(across(where(is.numeric), as.factor)) %>%
count(am, cyl, gear) %>%
mutate(prop = n / sum(n)) %>%
# plot
ggplot(aes(x = prop, y = cyl)) +
geom_col(aes(fill = gear),
position = "fill",
alpha = 0.8) +
facet_wrap(~am, ncol = 1) +
theme_minimal() +
scale_x_continuous(labels = scales::percent) +
# add labels to show total n for each bar
geom_text(aes(label = paste0("n = ", stat(y)), group = cyl),
stat = 'summary',
fun = sum)
...but (i) the values for my n labels clearly aren't the sums for each bar that I was expecting, and (ii) I can't figure out how to position the labels at the end of each bar. I thought I could specify a location on the x-axis within the geom_text aes, like this...
mtcars %>%
# prep data
mutate(across(where(is.numeric), as.factor)) %>%
count(am, cyl, gear) %>%
mutate(prop = n / sum(n)) %>%
# plot
ggplot(aes(x = prop, y = cyl)) +
geom_col(aes(fill = gear),
position = "fill",
alpha = 0.8) +
facet_wrap(~am, ncol = 1) +
theme_minimal() +
scale_x_continuous(labels = scales::percent) +
# add labels to show total n for each bar
geom_text(aes(label = paste0("n = ", stat(y)), group = cyl, x = 1),
stat = 'summary',
fun = sum)
...but I can't work out why that throws the x-axis scale out, and doesn't position all the labels at the same location on the scale.
Thanks in advance for any suggestions!
Try this:
library(dplyr)
library(ggplot2)
data("mtcars")
#Code
mtcars %>%
# prep data
mutate(across(where(is.numeric), as.factor)) %>%
count(am, cyl, gear) %>%
mutate(prop = n / sum(n)) %>%
# plot
ggplot(aes(x = prop, y = cyl)) +
geom_col(aes(fill = gear),
position = "fill",
alpha = 0.8) +
geom_text(aes(x=1.05,label = paste0("n = ", stat(y)), group = cyl),
hjust=0.5
)+
facet_wrap(~am, ncol = 1,scales = 'free')+
theme_minimal() +
scale_x_continuous(labels = scales::percent)
Output:
This is a modified version to add both proportions and numbers
library(dplyr)
library(ggplot2)
library(scales)
data("mtcars")
mtcars %>%
# prep data
mutate(across(where(is.numeric), as.factor)) %>%
count(am, cyl, gear) %>%
mutate(prop = n / sum(n)) %>%
# plot
ggplot(aes(x = prop, y = cyl)) +
geom_col(aes(fill = gear),
position = "fill", alpha = 0.8) +
theme_minimal() +
scale_x_continuous(labels = scales::percent) +
# add labels to show total n for each bar
geom_text(aes(x = 1.1, , group = cyl,
label = paste0("n = ", stat(y))),
hjust = 0.5) +
geom_text(aes(x = prop, y = cyl, group = gear,
label = paste0('p =',round(stat(x),2))),
hjust = 0.5, angle = 0,
position = position_fill(vjust = .5)) +
facet_wrap(~am, ncol = 1, scales = 'free')
It's not the most elegant solution, but I got there in the end by expanding on #Duck's answer for the positioning of labels (thanks!), and calculating the totals to be used as labels outside of ggplot.
mtcars %>%
# prep data
mutate(across(where(is.numeric), as.factor)) %>%
count(am, cyl, gear) %>%
group_by(cyl, am) %>%
mutate(prop = n / sum(n)) %>%
mutate(column_total = sum(n)) %>%
ungroup() %>%
# plot
ggplot(aes(x = prop, y = cyl)) +
geom_col(aes(fill = gear),
position = "fill",
alpha = 0.8) +
geom_text(aes(x = 1.05, label = paste0("n = ", column_total))) +
facet_wrap(~am, ncol = 1, scales = 'free')+
theme_minimal() +
scale_x_continuous(labels = scales::percent)
I have a dataframe for which I want to visualize different things. In every barplot the same characteristic of the variable/attribute should be have the same color. For example:
data_mtcars <- mtcars
data <- data_mtcars %>%
group_by(am, gear) %>%
summarise(Freq = sum(mpg)) %>%
group_by(am) %>%
mutate(Prop = Freq / sum(Freq)) %>%
arrange(desc(Prop))
First plot with three characteristics of the variable "gear".
ggplot(data) +
aes(x = am, y = Prop, fill = reorder(gear, Prop), width=0.5) +
geom_col() + scale_y_continuous(labels = function(x) paste0(eval(x*100), "%")) +
geom_text(aes(label = if_else(Prop>0.05, scales::percent(Prop),NULL)), position = position_stack(0.4))+
theme_minimal() +
theme(legend.title = element_blank()) + ylab("") + xlab("") +
scale_fill_brewer(palette = "Set3")
which gives me purple color for characteristic "3" of variable gear. If I change the number of characteristics the color should not be changed.
df <- data[data$gear!=4,]
ggplot(df) +
aes(x = am, y = Prop, fill = reorder(gear, Prop), width=0.5) +
geom_col() + scale_y_continuous(labels = function(x) paste0(eval(x*100), "%")) +
# scale_x_date(breaks = unique(df_sum_EAD$Stichtag) , date_labels = "%d.%m.%Y") +
geom_text(aes(label = if_else(Prop>0.05, scales::percent(Prop),NULL)), position = position_stack(0.4)) + theme_minimal() +
theme(legend.title = element_blank()) + ylab("") + xlab("")+ scale_fill_brewer(palette = "Set3")
Now same characteristics have different colors (characteristic "3" is yellow). How to fix this problem. I have tried to fix the levels
of the factor variable but I dont know how to include in an appropriate argument in plot.
data_mtcars$gear <- factor(data_mtcars$gear, levels=levels(as.factor(data_mtcars$gear)), ordered=T)
To get consistent colors for categories you can make use of a named color vector which can then be used via scale_color/fill_manual to always set the same color for each category:
library(dplyr)
library(ggplot2)
data_mtcars <- mtcars
data <- data_mtcars %>%
group_by(am, gear) %>%
summarise(Freq = sum(mpg)) %>%
group_by(am) %>%
mutate(Prop = Freq / sum(Freq)) %>%
arrange(desc(Prop))
#> `summarise()` regrouping output by 'am' (override with `.groups` argument)
data <- mutate(data, gear = reorder(gear, Prop))
# Named vector of colors
colors_gear <- scales::brewer_pal(palette = "Set2")(length(levels(data$gear)))
colors_gear <- setNames(colors_gear, levels(data$gear))
make_plot <- function(d) {
ggplot(d) +
aes(x = am, y = Prop, fill = reorder(gear, Prop), width=0.5) +
geom_col() + scale_y_continuous(labels = function(x) paste0(eval(x*100), "%")) +
geom_text(aes(label = if_else(Prop>0.05, scales::percent(Prop),NULL)), position = position_stack(0.4)) + theme_minimal() +
theme(legend.title = element_blank()) + ylab("") + xlab("")+
scale_fill_manual(values = colors_gear)
}
make_plot(data)
make_plot(data[data$gear!=4,])
Sup,
Consider the following lines:
data
df=data.frame(
prod=sample(1:30, 1000, replace=TRUE),
mat=sample(c('yes', 'no'), 1000, replace=TRUE),
fj=sample(c(1,2), 1000, replace = TRUE)
)
plot
df %>%
group_by(mat, prod, fj) %>%
summarise(n = n()) %>%
arrange(desc(n)) %>%
slice(1:5) %>%
ggplot(aes(x = reorder(prod, n), y = n)) +
geom_col(fill = RColorBrewer::brewer.pal(3, 'Dark2')[2], colour = "grey", alpha = 0.8) +
labs(x = "Prod", y = "Qnt") +
scale_y_continuous(labels = scales::comma) +
coord_flip() +
facet_wrap(fj ~ mat, scale="free") +
theme_minimal()
which gives me
Now, if i drop fj variable, as in
df %>%
group_by(mat, prod) %>%
summarise(n = n()) %>%
arrange(desc(n)) %>%
slice(1:5) %>%
ggplot(aes(x = reorder(prod, n), y = n)) +
geom_col(fill = RColorBrewer::brewer.pal(3, 'Dark2')[2], colour = "grey", alpha = 0.8) +
labs(x = "Prod", y = "Qnt") +
scale_y_continuous(labels = scales::comma) +
coord_flip() +
facet_wrap(~ mat, scale="free") +
theme_minimal()
slice(1:5) does it's job and i've got:
Question
why slice and reorder doesn't seems to work properly when there's 3+ variables and what should i do to limit the first plot to 5 lines each?
When you call summarize you loose one level of grouping. In this case, you lost fj, so when you slice it's not included in the group divisions.
If you first ungroup then group_by mat and fj, I think you'll end up with what you are looking for.
df %>%
group_by(mat, prod, fj) %>%
summarise(n = n()) %>%
ungroup()%>%
group_by(mat, fj) %>%
arrange(desc(n)) %>%
slice(1:5) %>%
ggplot(aes(x = reorder(prod, n), y = n)) +
geom_col(fill = RColorBrewer::brewer.pal(3, 'Dark2')[2], colour = "grey", alpha = 0.8) +
labs(x = "Prod", y = "Qnt") +
scale_y_continuous(labels = scales::comma) +
coord_flip() +
facet_wrap(fj ~ mat, scale="free") +
theme_minimal()
This leaves the problem of reordering the prod variable within each facet. It doesn't work in the example above because you are ordering by the entire data frame, and some of the values of Prod are repeated in several of the facets. As discussed in this blog post by #drsimonj you need to create an order variable and plot based on that. This follows/blatently copies the method outlined in the blog post.
df %>%
group_by(mat, prod, fj) %>%
summarise(n = n()) %>%
group_by(mat, fj) %>%
arrange(desc(n)) %>%
slice(1:5) %>%
ungroup() %>%
arrange(fj,mat, n) %>% # arrange the entire table by the facets first, then by the n value
mutate(row.order = row_number()) %>% # create dummy variable
ggplot(aes(x = row.order, y = n)) + # plot by the dummy variable
geom_col(fill = RColorBrewer::brewer.pal(3, 'Dark2')[2], colour = "grey", alpha = 0.8, position = "dodge") +
labs(x = "Prod", y = "Qnt") +
scale_y_continuous(labels = scales::comma) +
scale_x_continuous( # add back in the Prod values
breaks = df2$row.order,
labels = df2$prod
)+
coord_flip() +
facet_wrap(fj ~ mat, scales = "free") +
theme_minimal()
How does one plot "filled" bars with counts labels using ggplot2?
I'm able to do this for "stacked" bars. But I'm very confused otherwise.
Here is a reproducible example using dplyr and the mpg dataset
library(ggplot)
library(dplyr)
mpg_summ <- mpg %>%
group_by(class, drv) %>%
summarise(freq = n()) %>%
ungroup() %>%
mutate(total = sum(freq),
prop = freq/total)
g <- ggplot(mpg_summ, aes(x = class, y = prop, group = drv))
g + geom_col(aes(fill = drv)) +
geom_text(aes(label = freq), position = position_stack(vjust = .5))
But if I try to plot counts for filled bars it does not work
g <- ggplot(mpg_summ, aes(x=class, fill=drv))
g + stat_count(aes(y = (..count..)/sum(..count..)), geom="bar", position="fill") +
scale_y_continuous(labels = percent_format())
Further, if I try:
g <- ggplot(mpg_summ, aes(x=class, fill=drv))
g + geom_bar(aes(y = freq), position="fill") +
geom_text(aes(label = freq), position = "fill") +
scale_y_continuous(labels = percent_format())
I get:
Error: stat_count() must not be used with a y aesthetic.
I missed the fill portion from the last question. This should get you there:
library(ggplot2)
library(dplyr)
mpg_summ <- mpg %>%
group_by(class, drv) %>%
summarise(freq = n()) %>%
ungroup() %>%
mutate(total = sum(freq),
prop = freq/total)
g <- ggplot(mpg_summ, aes(x = class, y = prop, group = drv))
g + geom_col(aes(fill = drv), position = 'fill') +
geom_text(aes(label = freq), position = position_fill(vjust = .5))