R ggplot with percentages - r

I'm tryng to do a bar plot with percentages instead of counts, and I've tried this:
ggplot(data = newdf3) +
geom_bar(mapping = aes(x = key, y = ..prop..,fill=value,group = 1), stat = "count",position=position_dodge()) +
scale_y_continuous(labels = scales::percent_format())
but apparently "group=1" is not working because this is what it returns:
and if I don't use "group=1" it returns:
here's a sample of data I'm using:
key value
1 Before
1 After
1 During
1 Before
2 Before
2 After
3 During
...
Can someone help me with this?

Consider using geom_col() instead of geom_bar().
However, you should be able to get around your problem with stat="identity".
library(ggplot2)
#sample data.frame
df <- data.frame(
group = c("A","A","B","B","C","C"),
value = c(0.1,0.5,0.3,0.1,0.2,0.6)
)
df %>% head
#histogram
df %>%
ggplot(aes(x = group)) +
geom_bar()
#NOT histogram
df %>%
ggplot(aes(x = group, y = value)) +
geom_bar(stat = "identity") +
scale_y_continuous(labels = scales::percent_format())

One solution would be to calculate relative frequency with you input data and pass the results directly to ggplot, using the stat = "identity" parameter in geom_bar (see this post):
library(tidyverse)
df <- tibble::tribble(
~key, ~value,
1, "Before",
1, "After",
1, "During",
1, "Before",
2, "Before",
2, "After",
3, "During"
)
df %>%
dplyr::count(key, value) %>%
dplyr::group_by(key) %>%
dplyr::mutate(p = n / sum(n)) %>%
ggplot() +
geom_bar(
mapping = aes(x = key, y = p, fill = value),
stat = "identity",
position = position_dodge()
) +
scale_y_continuous(labels = scales::percent_format())
Created on 2019-10-28 by the reprex package (v0.3.0)

Related

Error filling and distributing values on the x-axis of the bar graph

I'm trying to create a bar graph in ggplot, considering a "type" variable in the filling of each bar.
However, the maximum values of the bars are excessively high (above 100, when in fact they should be close to 40). My goal is to place the overlay padding.
I appreciate any help.
df <- structure(list(Count = c("Beu", "Beu", "Beu", "Abe", "Abe", "Abe",
"Pre", "Pre", "Pre", "Bra", "Bra", "Bra"), Type = c(1, 2, 3,
1, 2, 3, 1, 2, 3, 1, 2, 3), Hours = c(40.17775, 42.1492178098676,
42.1910353866317, 38.3701812919564, 39.9185282522996, 38.8002722361139,
41.6389448017412, 41.7041742286751, 41.9545826200271, 41.1375910844406,
41.0602923264312, 40.6300999927013)), row.names = c(NA, 12L), class = "data.frame")
Here's the code I'm trying to run:
df %>%
mutate(Type = as.factor(Type)) %>%
ggplot(mapping = aes(x = Count, y = Hours, fill = Type)) +
geom_bar(stat = 'identity') +
coord_flip() +
theme_classic()
To overlay the bars you could add position = 'identity' in your geom_bar with a lower transparency alpha like this:
library(ggplot2)
library(dplyr)
df %>%
mutate(Type = as.factor(Type)) %>%
ggplot(mapping = aes(x = Count, y = Hours, fill = Type)) +
geom_bar(stat = 'identity', position = 'identity', alpha =0.2) +
coord_flip() +
theme_classic()
Another option is by dodging your bars using position_identity to have them overlayed and to make sure the highest values are behind you can arrange the values per group like this:
df %>%
mutate(Type = as.factor(Type)) %>%
group_by(Count) %>%
arrange(desc(Hours)) %>%
ggplot(mapping = aes(x = Count, y = Hours, fill = Type)) +
geom_bar (stat="identity", position =position_identity()) +
coord_flip() +
theme_classic()
Created on 2022-11-02 with reprex v2.0.2
If Quinten did not answer as you expected, I would use a dodged method to show each category in a bar to itself as such:
df %>%
mutate(Type = Type) %>%
ggplot(mapping = aes(x = Count, y = Hours, fill = as.factor(Type))) +
geom_bar(stat='identity',position = 'dodge2') +
coord_flip() +
theme_classic()
theme_classic()
This would give you this chart:

Placing data labels for stacked bar chart at top of bar

I have been attempting to add a label on top of each bar to represent the proportion that each ethnic group makes up in referrals.
For some reason I cannot get the labels to be placed at the top of each bar. How do I fix this?
My code below
freq <- df %>%
group_by(ethnicity) %>%
summarise(n = n()) %>%
mutate(f = round((n/sum(n)*100, 1))
df %>%
group_by(pathway) %>%
count(ethnicity) %>%
ggplot(aes(x = ethnicity, y = n , fill = pathway)) +
geom_bar(stat = "identity", position = "stack") +
geom_text(data = freq,
aes(x= ethnicity, y = f, label = f),
inherit.aes = FALSE) +
theme(legend.position = "bottom") +
scale_fill_manual(name = "",
values = c("light blue", "deepskyblue4"),
labels = "a", "b") +
xlab("") +
ylab("Number of Referrals") +
scale_y_continuous(breaks = seq(0, 2250, 250), expand = c(0,0)
Here is what it currently looks like
Since you are using the count as your y-axis position in geom_bar, you need to use the same thing in your geom_text to get the labels in the right place. Below is an example using mtcars dataset. Using vjust = -1 I put a little bit of space between the label and the bars to make it more legible and aesthetically pleasing.
library(tidyverse)
mtcars %>%
group_by(carb) %>%
summarise(n = n()) %>%
mutate(f = round(proportions(n) * 100, 1)) -> frq
mtcars %>%
group_by(gear) %>%
count(carb) -> df
df %>%
ggplot(aes(x = carb, y = n, fill = gear)) +
geom_bar(stat = "identity", position = "stack") +
geom_text(data = frq,
vjust = -1,
aes(x= carb, y = n, label = f),
inherit.aes = FALSE)
Created on 2022-10-31 by the reprex package (v2.0.1)

How to position labels on grouped bar plot columns in ggplot2

I am having trouble positioning percentage&count labels on a grouped barplot.
The labels are currently stacked together:
I think this is because I have been referring to an example code for a stacked barplot. I have tried adding position=position_dodge(width=1) to geom_textto unstack the labels, but I have gotten the following warning:
Warning: Ignoring unknown aesthetics: position
Don't know how to automatically pick scale for object of type PositionDodge/Position/ggproto/gg. Defaulting to continuous.
Error: Aesthetics must be valid data columns. Problematic aesthetic(s): position = position_dodge(width = 1).
Did you mistype the name of a data column or forget to add stat()?
Here is the code I have using the Titanic dataset:
data("titanic_train")
head(titanic_train, 6)
library(dplyr)
library(ggplot2)
titanic_train$Survived <- as.factor(titanic_train$Survived)
summary = titanic_train %>% group_by(Survived, Sex) %>% tally %>% mutate(pct = n/sum(n))
ggplot(summary, aes(x=Sex, y=n, fill=Survived)) + geom_bar(stat="identity", position="dodge") + geom_text(aes(label=paste0(sprintf("%1.1f", pct*100),"%\n", n)), colour="black")
How can I resolve this?
You can just add position = position_dodge(width = 1) to your geom_text call, but outside of aes. Your error was caused by trying to put position... inside aes.
library(dplyr)
library(ggplot2)
library(titanic)
ggplot(summary, aes(x = Sex, y = n, fill = Survived)) +
geom_bar(stat = "identity", position = "dodge") +
geom_text(aes(label = paste0(sprintf("%1.1f", pct * 100), "%\n", n)),
colour = "black",
position = position_dodge(width = 1)) +
coord_cartesian(ylim = c(0, 550))
I would like to share an example which you could replicate the same by using your data
data
df <- data.frame(
x = factor(c(1, 1, 2, 2)),
y = c(1, 3, 2, 1),
grp = c("a", "b", "a", "b")
)
plot
ggplot(data = df, aes(x, y, group = grp)) +
geom_col(aes(fill = grp), position = "dodge") +
geom_text(
aes(label = y, y = y + 0.05),
position = position_dodge(0.9),
vjust = 0
)

Label grouped bar plot in R

I'm tryng to add label to a grouped bar plot in r.
However I'm using percentege in the y axis, and I want the label to be count.
I've tried to use the geom_text() function, but I don't how exacly the parameters i need to use.
newdf3 %>%
dplyr::count(key, value) %>%
dplyr::group_by(key) %>%
dplyr::mutate(p = n / sum(n)) %>%
ggplot() +
geom_bar(
mapping = aes(x = key, y = p, fill = value),
stat = "identity",
position = position_dodge()
) +
scale_y_continuous(labels = scales::percent_format(),limits=c(0,1))+
labs(x = "", y = "%",title="")+
scale_fill_manual(values = c('Before' = "deepskyblue", 'During' = "indianred1", 'After' = "green2", '?'= "mediumorchid3"),
drop = FALSE, name="")
Here is an exemple of how I need it:
here's a sample of data I'm using:
key value
A Before
A After
A During
B Before
B Before
C After
D During
...
I also wanted to keep the bars with no value (label = 0).
Can someone help me with this?
Here is MWE of how to add count labels to a simple bar chart. See below for the case when these are grouped.
library(datasets)
library(tidyverse)
data <- chickwts %>%
group_by(feed) %>%
count %>%
ungroup %>%
mutate(p = n / sum(n))
ggplot(data, aes(x = feed, y = p, fill = feed)) +
geom_bar(stat = "identity") +
geom_text(stat = "identity",
aes(label = n), vjust = -1)
You should be able to do the same thing on your data.
EDIT: StupidWolf points out in the comments that the original example has grouped data. Adding position = position_dodge(0.9) in geom_text deals with this.
Again, no access to the original data, but here's a different MWE using mtcars showing this:
library(datasets)
library(tidyverse)
data <- mtcars %>%
as_tibble %>%
transmute(gear = as_factor(gear),
carb = as_factor(carb),
cyl = cyl) %>%
group_by(gear, carb) %>%
count
ggplot(data, aes(x = gear, y = n, fill = carb)) +
geom_bar(stat = "identity",
position = "dodge") +
geom_text(aes(label = n),
stat = "identity",
vjust = -1,
position = position_dodge(0.9))

ggplot2: show relative % in a stacked barplot per group

I'm trying to plot a basic bar chart per group.
As values are pretty big, I want to show for each bar (i.e. group) the % of each group within the bar.
I managed to show percentage of the total, but this is not what I'm expecting : in each bar, I would like that the sum of % equal 100%.
Is there an easy way to do it without changing the dataframe ?
(DF <- data.frame( year = rep(2015:2017, each = 4),
Grp = c("Grp1", "Grp2", "Grp3", "Grp4"),
Value = trunc(rnorm(12, 2000000, 100000))) )
ggplot(DF) +
geom_bar(aes(x = year, y = Value, fill = Grp),
stat = "identity",
position = position_stack()) +
geom_text(aes(x = year, y = Value, group = Grp,
label = percent(Value/sum(Value))) ,
position = position_stack(vjust = .5))
You can create a new variable for percentile by year:
library(dplyr)
library(ggplot2)
library(scales)
DF <- DF %>% group_by(year) %>% mutate(ValuePer=(Value/sum(Value))) %>% ungroup()
ggplot(DF, aes(year, ValuePer, fill = Grp)) +
geom_bar(stat = "identity", position = "fill") +
geom_text(aes(label = percent(ValuePer)),
position = position_fill())+
scale_y_continuous(labels = percent_format())
Use position = "fill" to turn scale into proportions and scale_y_continuous(labels = percent_format()) to turn this scale into percent.
DF <- data.frame( year = rep(2015:2017, each = 4),
Grp = c("Grp1", "Grp2", "Grp3", "Grp4"),
Value = trunc(rnorm(12, 2000000, 100000)))
library(ggplot2)
library(scales)
ggplot(DF, aes(year, Value, fill = Grp)) +
geom_bar(stat = "identity", position = "fill") +
geom_text(aes(label = percent(Value / sum(Value))),
position = position_fill()) +
scale_y_continuous(labels = percent_format())
OK gathering all your tricks, I finally get this :
I need to adjust my DF, what I wanted to avoid, but it remains simple so it works
library(dplyr)
library(ggplot2)
library(scales)
DF <- DF %>% group_by(year) %>% mutate(ValuePer=(Value/sum(Value))) %>% ungroup()
ggplot(DF, aes(year, Value, fill = Grp)) +
geom_bar(stat = "identity", position = "stack") +
geom_text(aes(label = percent(ValuePer)),
position = position_stack()) +
scale_y_continuous(labels = unit_format("M", 1e-6) )
I would use a single geom_text for each bar while filtering data by year (bar) using dplyr. Check if is that what you need:
(DF <- data.frame( year = rep(2015:2017, each = 4),
Grp = c("Grp1", "Grp2", "Grp3", "Grp4"),
Value = trunc(rnorm(12, 2000000, 100000))) )
library(dplyr)
ggplot(DF) +
geom_bar(aes(x = year, y = Value, fill = Grp),
stat = "identity",
position = position_stack()) +
geom_text(data = DF %>% filter(year == 2015),
aes(x = year, y = Value,
label = scales::percent(Value/sum(Value))) ,
position = position_stack(vjust = .5)) +
geom_text(data = DF %>% filter(year == 2016),
aes(x = year, y = Value,
label = scales::percent(Value/sum(Value))) ,
position = position_stack(vjust = .5)) +
geom_text(data = DF %>% filter(year == 2017),
aes(x = year, y = Value,
label = scales::percent(Value/sum(Value))) ,
position = position_stack(vjust = .5))
Argument group is not necessary here. There may be more elegant solutions but that is the one I could think about. Tell me if this is the output you were waiting for:
Maybe creating a new column doing the right computation. I could not figure out how the computation could be done right inside aes(), the way you did you just computed the overall %, the Value should be grouped by year instead.
At least you got yourself the actually value by the Y axis and the Year grouped % inside bars. I would advise changing this labels by stacking something like this:
scale_y_continuous(breaks = seq(0,8*10^6,10^6),
labels = c(0, paste(seq(1,8,1),'M')))
Resulting this:
You can adapt to your context.

Resources