labels on a stacked density plot - r

I'm generating a stacked density plot:
ggplot(data=tydy_rawdata, aes(x=timepoint, y=tpm, group=fct_inorder(names),
fill=fct_inorder(names))) +
geom_density(position="fill",
stat="identity") +
scale_fill_manual(values = rev(mycolors))
plot :
I would like to add label on each curve (or at least the top 3 or 4) basing on the "names" displayed on the right.
I'm trying adding geom_text but the result is this :
gplot(data=tydy_rawdata, aes(x=timepoint, y=tpm, group=fct_inorder(names),
fill=fct_inorder(names))) +
geom_density(position="fill",
stat="identity") +
geom_text(aes(label=names)) +
scale_fill_manual(values = rev(mycolors))
plot :
Are there some way to do it?

First, your chart is a stacked area chart, i.e. geom_density with stat="identity" is equal to geom_area. Second, when adding labels via geom_text you have to take account of the position argument. As you use position="fill" for your density/area chart you also have to do the same for geom_text.
As you provided no example data I created my own to make your issue reproducible:
library(ggplot2)
library(forcats)
set.seed(123)
tydy_rawdata <- data.frame(
names = rep(LETTERS[1:10], each = 6),
timepoint = factor(seq(6)),
tpm = runif(6 * 10, 0, 80)
)
ggplot(data = tydy_rawdata, aes(
x = timepoint, y = tpm,
group = fct_inorder(names), fill = fct_inorder(names)
)) +
geom_area(
position = "fill",
color = "black"
) +
geom_text(aes(label = names), position = "fill")

Related

How to add percentages on top of an histogram when data is grouped

This is not my data (for confidentiality reasons), but I have tried to create a reproducible example using a dataset included in the ggplot2 library. I have an histogram summarizing the value of some variable by group (factor of 2 levels). First, I did not want the counts but proportions of the total, so I used that code:
library(ggplot2)
library(dplyr)
df_example <- diamonds %>% as.data.frame() %>% filter(cut=="Premium" | cut=="Ideal")
ggplot(df_example,aes(x=z,fill=cut)) +
geom_histogram(aes(y=after_stat(width*density)),binwidth=1,center=0.5,col="black") +
facet_wrap(~cut) +
scale_x_continuous(breaks=seq(0,9,by=1)) +
scale_y_continuous(labels=scales::percent_format(accuracy=2,suffix="")) +
scale_fill_manual(values=c("#CC79A7","#009E73")) +
labs(x="Depth (mm)",y="Count") +
theme_bw() + theme(legend.position="none")
It gave me this as a result.
enter image description here
The issue is that I would like to print the numeric percentages on top of the bins and haven't find a way to do so.
As I saw it done for printing counts elsewhere, I attempted to print them using stat_bin(), including the same y and label values as the y in geom_histogram, thinking it would print the right numbers:
ggplot(df_example,aes(x=z,fill=cut)) +
geom_histogram(aes(y=after_stat(width*density)),binwidth=1,center=0.5,col="black") +
stat_bin(aes(y=after_stat(width*density),label=after_stat(width*density*100)),geom="text",vjust=-.5) +
facet_wrap(~cut) +
scale_x_continuous(breaks=seq(0,9,by=1)) +
scale_y_continuous(labels=scales::percent_format(accuracy=2,suffix="")) +
scale_fill_manual(values=c("#CC79A7","#009E73")) +
labs(x="Depth (mm)",y="%") +
theme_bw() + theme(legend.position="none")
However, it does print way more values than there are bins, these values do not appear consistent with what is portrayed by the bar heights and they do not print in respect to vjust=-.5 which would make them appear slightly above the bars.
enter image description here
What am I missing here? I know that if there was no grouping variable/facet_wrap, I could use after_stat(count/sum(count)) instead of after_stat(width*density) and it seems that it would have fixed my issue. But I need the histograms for both groups to appear next to each other. Thanks in advance!
You have to use the same arguments in stat_bin as for the histogram when adding your labels to get same binning for both layers and to align the labels with the bars:
library(ggplot2)
library(dplyr)
df_example <- diamonds %>%
as.data.frame() %>%
filter(cut == "Premium" | cut == "Ideal")
ggplot(df_example, aes(x = z, fill = cut)) +
geom_histogram(aes(y = after_stat(width * density)),
binwidth = 1, center = 0.5, col = "black"
) +
stat_bin(
aes(
y = after_stat(width * density),
label = scales::number(after_stat(width * density), scale = 100, accuracy = 1)
),
geom = "text", binwidth = 1, center = 0.5, vjust = -.25
) +
facet_wrap(~cut) +
scale_x_continuous(breaks = seq(0, 9, by = 1)) +
scale_y_continuous(labels = scales::number_format(scale = 100)) +
scale_fill_manual(values = c("#CC79A7", "#009E73")) +
labs(x = "Depth (mm)", y = "%") +
theme_bw() +
theme(legend.position = "none")

Use free_y scale on first axis and fixed on second + facet_grid + ggplot2

Is there any method to set scale = 'free_y' on the left hand (first) axis in ggplot2 and use a fixed axis on the right hand (second) axis?
I have a dataset where I need to use free scales for one variable and fixed for another but represent both on the same plot. To do so I'm trying to add a second, fixed, y-axis to my data. The problem is I cannot find any method to set a fixed scale for the 2nd axis and have that reflected in the facet grid.
This is the code I have so far to create the graph -
#plot weekly seizure date
p <- ggplot(dfspw_all, aes(x=WkYr, y=Seizures, group = 1)) + geom_line() +
xlab("Week Under Observation") + ggtitle("Average Seizures per Week - To Date") +
geom_line(data = dfsl_all, aes(x =WkYr, y = Sleep), color = 'green') +
scale_y_continuous(
# Features of the first axis
name = "Seizures",
# Add a second axis and specify its features
sec.axis = sec_axis(~.[0:20], name="Sleep")
)
p + facet_grid(vars(Name), scales = "free_y") +
theme(axis.ticks.x=element_blank(),axis.text.x = element_blank())
This is what it is producing (some details omitted from code for simplicity) -
What I need is for the scale on the left to remain "free" and the scale on the right to range from 0-24.
Secondary axes are implemented in ggplot2 as a decoration that is a transformation of the primary axis, so I don't know an elegant way to do this, since it would require the secondary axis formula to be aware of different scaling factors for each facet.
Here's a hacky approach where I scale each secondary series to its respective primary series, and then add some manual annotations for the secondary series. Another way might be to make the plots separately for each facet like here and use patchwork to combine them.
Given some fake data where the facets have different ranges for the primary series but the same range for the secondary series:
library(tidyverse)
fake <- tibble(facet = rep(1:3, each = 10),
x = rep(1:10, times = 3),
y_prim = (1+sin(x))*facet/2,
y_sec = (1 + sin(x*3))/2)
ggplot(fake, aes(x, y_prim)) +
geom_line() +
geom_line(aes(y= y_sec), color = "green") +
facet_wrap(~facet, ncol = 1)
...we could scale each secondary series to its primary series, and add custom annotations for that secondary series:
fake2 <- fake %>%
group_by(facet) %>%
mutate(y_sec_scaled = y_sec/max(y_sec) * (max(y_prim))) %>%
ungroup()
fake2_labels <- fake %>%
group_by(facet) %>%
summarize(max_prim = max(y_prim), baseline = 0, x_val = 10.5)
ggplot(fake2, aes(x, y_prim)) +
geom_line() +
geom_line(aes(y= y_sec_scaled), color = "green") +
facet_wrap(~facet, ncol = 1, scales = "free_y") +
geom_text(data = fake2_labels, aes(x = x_val, y = max_prim, label = "100%"),
hjust = 0, color = "green") +
geom_text(data = fake2_labels, aes(x = x_val, y = baseline, label = "0%"),
hjust = 0, color = "green") +
coord_cartesian(xlim = c(0, 10), clip = "off") +
theme(plot.margin = unit(c(1,3,1,1), "lines"))

Plotting a bar chart with years grouped together

I am using the fivethirtyeight bechdel dataset, located here https://github.com/rudeboybert/fivethirtyeight, and am attempting to recreate the first plot shown in the article here https://fivethirtyeight.com/features/the-dollar-and-cents-case-against-hollywoods-exclusion-of-women/. I am having trouble getting the years to group together similarly to how they did in the article.
This is the current code I have:
ggplot(data = bechdel, aes(year)) +
geom_histogram(aes(fill = clean_test), binwidth = 5, position = "fill") +
scale_fill_manual(breaks = c("ok", "dubious", "men", "notalk", "nowomen"),
values=c("red", "salmon", "lightpink", "dodgerblue",
"blue")) +
theme_fivethirtyeight()
I see where you were going with using the histogram geom but this really looks more like a categorical bar chart. Once you take that approach it's easier, after a bit of ugly code to get the correct labels on the year columns.
The bars are stacked in the wrong order on this one, and there needs to be some formatting applied to look like the 538 chart, but I'll leave that for you.
library(fivethirtyeight)
library(tidyverse)
library(ggthemes)
library(scales)
# Create date range column
bechdel_summary <- bechdel %>%
mutate(date.range = ((year %/% 10)* 10) + ((year %% 10) %/% 5 * 5)) %>%
mutate(date.range = paste0(date.range," - '",substr(date.range + 5,3,5)))
ggplot(data = bechdel_summary, aes(x = date.range, fill = clean_test)) +
geom_bar(position = "fill", width = 0.95) +
scale_y_continuous(labels = percent) +
theme_fivethirtyeight()
ggplot

ggplot 2: how to show breaks

I can't get ggplot2 to show breaks in horizontal bars. The code is as follows:
dat <- data.frame(
result = c(replicate(50, 'ok'), replicate(17, 'error'), replicate(35, 'notrun')),
test = 'test',
count = 'count'
)
ggplot(data=dat, aes(x=test, y=count, fill=result)) +
geom_bar(stat="identity") +
scale_fill_manual(values = c(ok = '#00BA38', error='#F8766D', notrun='gray')) +
xlab("") + ylab("") +
scale_x_discrete(label="") +
scale_y_discrete(breaks = c(1, 7, 9)) +
coord_flip()
ggplot counts automatically if you leave stat=identity out. In this case you can leave your count variable out as well:
dat <- data.frame(
result = c( replicate(50, 'ok'), replicate(17, 'error'),
replicate(35, 'notrun') ),
test = 'test' )
ggplot(data=dat, aes(x=test, fill=result)) +
geom_bar() +
coord_flip() +
scale_fill_manual(values = c(ok = '#00BA38', error='#F8766D', notrun='gray'))
To clarify I cite the documentation on geom_bar():
"By default, geom_bar uses stat="bin". This makes the height of each bar equal to the number of cases in each group, and it is incompatible with mapping values to the y aesthetic. If you want the heights of the bars to represent values in the data, use stat="identity" and map a value to the y aesthetic."
So what you were doing was putting variable count on the y-axis, which had the same string value "count" for each row, and hence could not be represented on a numerical scale.

How to order bars in faceted ggplot2 bar chart

If I want to order the bars in a ggplot2 barchart from largest to smallest, then I'd usually update the factor levels of the bar category, like so
one_group <- data.frame(
height = runif(5),
category = gl(5, 1)
)
o <- order(one_group$height, decreasing = TRUE)
one_group$category <- factor(one_group$category, levels = one_group$category[o])
p_one_group <- ggplot(one_group, aes(category, height)) +
geom_bar(stat = "identity")
p_one_group
If have have several groups of barcharts that I'd like in different facets, with each facet having bars ordered from largest to smallest (and different x-axes) then the technique breaks down.
Given some sample data
two_groups <- data.frame(
height = runif(10),
category = gl(5, 2),
group = gl(2, 1, 10, labels = letters[1:2])
)
and the plotting code
p_two_groups <- ggplot(two_groups, aes(category, height)) +
geom_bar(stat = "identity") +
facet_grid(. ~ group, scales = "free_x")
p_two_groups
what do I need to do to get the bar ordering right?
If it helps, an equivalent problem to solve is: how do I update factor levels after I've done the faceting?
here is a hack:
two_groups <- transform(two_groups, category2 = factor(paste(group, category)))
two_groups <- transform(two_groups, category2 = reorder(category2, rank(height)))
ggplot(two_groups, aes(category2, height)) +
geom_bar(stat = "identity") +
facet_grid(. ~ group, scales = "free_x") +
scale_x_discrete(labels=two_groups$category, breaks=two_groups$category2)
make UNIQUE factor variable for all entries (category2)
reorder the variable based on the height
plot on the variable: aes(x=category2)
re-label the axis using original value (category) for the variable (category2) in scale_x_discrete.
Here is a hack to achieve what you want. I was unable to figure out how to get the category values below the tick marks. So if someone can help fix that, it would be wonderful. Let me know if this works
# add a height rank variable to the data frame
two_groups = ddply(two_groups, .(group), transform, hrank = rank(height));
# plot the graph
p_two_groups <- ggplot(two_groups, aes(-hrank, height)) +
geom_bar(stat = "identity") +
facet_grid(. ~ group, scales = "free_x") +
opts(axis.text.x = theme_blank()) +
geom_text(aes(y = 0, label = category, vjust = 1.5))

Resources