geom_dumbell spacing, legends in different places, and multiple aesthetics (timelines) - r

I saw this interesting way of creating a publication timeline using geom_dumbell, so I created my own by first loading the libraries:
library(tidyverse)
library(ggalt)
library(ggrepel)
Entering in some data:
# create data frame
df <- data.frame(
paper = c("Paper 1", "Paper 1", "Paper 2", "Paper 2", "Paper 3", "Paper 3", "Paper 3", "Paper 3"),
round = c("first","revision","first","revision","first","first","first","first"),
submission_date = c("2019-05-23","2020-12-11", "2020-08-12","2020-10-28","2020-12-10","2020-12-11","2021-01-20","2021-01-22"),
journal_type = c("physics", "physics","physics","physics","chemistry","chemistry","chemistry","chemistry"),
journal = c("journal 1", "journal 1", "journal 2", "journal 2", "journal 3", "journal 4", "journal 5", "journal 6"),
status = c("Revise and Resubmit", "Waiting for Decision", "Revise and Resubmit", "Accepted", "Desk Reject","Desk Reject", "Desk Reject","Waiting for Decision"),
decision_date = c("2019-09-29", "2021-01-24", "2020-08-27", "2020-10-29", "2020-12-10","2021-01-05","2021-01-22","2021-01-24"),
step_complete = c("yes","no","yes","yes","yes","yes","yes", "no"),
duration_days = c(129,44,15,1,0,25,2,2))
# convert variables to dates
df$decision_date = as.Date(df$decision_date)
df$submission_date = as.Date(df$submission_date)
and, finally, creating my own basic timeline using this code:
ggplot(df, aes(x = submission_date, xend = decision_date,
y = paper, label = duration_days,
color = status)) +
geom_dumbbell(size = 1, size_x = 1) +
scale_color_manual(values=c("green", "red", "darkolivegreen4", "turquoise1")) +
labs(x=NULL, color = 'Status:',
y=NULL,
title="Timeline of Journal Submissions",
subtitle="Start date, decision date, and wait time (in days) for my papers.") +
#theme_ipsum_tw() +
ggrepel::geom_label_repel(nudge_y = -.25, show.legend = FALSE) +
theme(legend.position = 'top')
As you can see from the above image, I can't see the x-axis. Additionally, I'd like to put another aesthetic and legend on the right side for the journal, perhaps putting a different shape on each line. Any other bells and whistles using the above data would be fun, too. Thanks!

Ok, I finally found some time to figure this out with help from this terrific post. To start, let's load the revised list of packages:
library(tidyverse)
library(ggalt)
library(ggrepel)
library(gridExtra)
library(gtable)
library(grid)
For comprehensiveness, let's reload the data:
# create dataframe
df <- data.frame(
paper = c("Paper 1", "Paper 1", "Paper 2", "Paper 2", "Paper 3", "Paper 3", "Paper 3", "Paper 3"),
round = c("first","revision","first","revision","first","first","first","first"),
submission_date = c("2019-05-23","2020-12-11", "2020-08-12","2020-10-28","2020-12-10","2020-12-11","2021-01-20","2021-01-22"),
journal_type = c("physics", "physics","physics","physics","chemistry","chemistry","chemistry","chemistry"),
Journal = c("journal 1", "journal 1", "journal 2", "journal 2", "journal 3", "journal 4", "journal 5", "journal 6"),
status = c("Revise and Resubmit", "Waiting for Decision", "Revise and Resubmit", "Accepted", "Desk Reject","Desk Reject", "Desk Reject","Waiting for Decision"),
decision_date = c("2019-09-29", "2021-01-24", "2020-08-27", "2020-10-29", "2020-12-10","2021-01-05","2021-01-22","2021-01-24"),
step_complete = c("yes","no","yes","yes","yes","yes","yes", "no"),
duration_days = c(129,44,15,1,0,25,2,2)
)
# convert variables to dates
df$decision_date = as.Date(df$decision_date)
df$submission_date = as.Date(df$submission_date)
First, let's create the plot with the color legend and extract it. Because I want that legend to be on top, I make sure indicate that as my legend position. Note that I specify my preferred colors using the scale_color_manual argument:
# make plot with color legend
p1 <- ggplot(df, aes(x = submission_date, xend = decision_date,
y = paper, label = duration_days,
color = status)) +
geom_dumbbell(size = 1, size_x = 1) +
scale_color_manual(values=c("green", "red", "darkolivegreen4", "turquoise1")) +
labs(x=NULL, color = 'Status:',
y=NULL,
title="Timeline of Journal Submissions",
subtitle="Start date, decision date, and wait time (in days) for my papers.") +
ggrepel::geom_label_repel(nudge_y = -.25, show.legend = FALSE) +
theme(legend.position = 'top')
# Extract the color legend - leg1
leg1 <- gtable_filter(ggplot_gtable(ggplot_build(p1)), "guide-box")
Second, let's make the plot with the shape legend and extract it. Because I want this legend to be positioned on the right side, I don't need to even specify the legend position here. Note that I specify my preferred shapes using the scale_shape_manual argument:
# make plot with shape legend
p2 <- ggplot(df, aes(x = submission_date, xend = decision_date,
y = paper, label = duration_days,
shape = Journal)) +
geom_dumbbell(size = 1, size_x = 1) +
scale_shape_manual(values=c(15, 16, 17, 18, 19,25))+
labs(x=NULL, color = 'Status:',
y=NULL,
title="Timeline of Journal Submissions",
subtitle="Start date, decision date, and wait time (in days) for my papers.") +
ggrepel::geom_label_repel(nudge_y = -.25, show.legend = FALSE)
# Extract the shape legend - leg2
leg2 <- gtable_filter(ggplot_gtable(ggplot_build(p2)), "guide-box")
Third, let's make the full plot with no legend, specifying both the scale_color_manual and scale_shape_manual arguments as well as theme(legend.position = 'none'):
# make plot without legend
plot <- ggplot(df, aes(x = submission_date, xend = decision_date,
y = paper, label = duration_days,
color =status, shape = Journal)) +
geom_dumbbell(size = 1, size_x = 3) +
scale_color_manual(values=c("green", "red", "darkolivegreen4", "turquoise1")) +
scale_shape_manual(values=c(15, 16, 17, 18, 19,25))+
labs(x=NULL, color = 'Status:',
y=NULL,
title="Timeline of Journal Submissions",
subtitle="Start date, decision date, and wait time (in days) for my papers.") +
ggrepel::geom_label_repel(nudge_y = -.25, nudge_x = -5.25, show.legend = FALSE) +
theme(legend.position = 'none')
Fourth, let's arrange everything according to our liking:
# Arrange the three components (plot, leg1, leg2)
# The two legends are positioned outside the plot:
# one at the top and the other to the side.
plotNew <- arrangeGrob(leg1, plot,
heights = unit.c(leg1$height, unit(1, "npc") - leg1$height), ncol = 1)
plotNew <- arrangeGrob(plotNew, leg2,
widths = unit.c(unit(1, "npc") - leg2$width, leg2$width), nrow = 1)
Finally, plot and enjoy the final product:
grid.newpage()
grid.draw(plotNew)
As everyone will no doubt recognize, I relied very heavily on this post. However, I did change a few things, I tried be comprehensive with my explanation, and some others spent time trying to help, so I think it is still helpful to have this answer here.

Related

ggplot: labeling x axis in lineplot

since a long time I despair to straighten the label of the x-axis in my plot (ggplot2).
The challenge is that I have two geom_paths, each fetching the data from a different dataframe - I'm sure this will become a bit clearer in the code:
ggplot(data=dx, aes(x = year, y=en.x ))+
scale_y_continuous(breaks = scales::pretty_breaks(n = 2))+
geom_path(data=ps, aes(x, y, color = "Person 1", linetype="Person 1"), size=0.5)+
geom_path(data=pg, aes(x , y, color = "Person 2", linetype="Person 2"), size=0.5)+
scale_color_manual("",labels = c(Nutzer1, Nutzer2), values = c("Person 1" = Nutzer1Farbe, "Person 2" = Nutzer2Farbe)) +
scale_linetype_manual("",labels = c(Nutzer1, Nutzer2), values=c("Person 1"=Nutzer1Format, "Person 2"=Nutzer2Format))
The goal is, to Label the X-Axis with the years from the dataframe "dx", as shown in the aes-parameter. And it works! But only if you disable the geom_paths - shown below:
ggplot(data=dx, aes(x = year, y=en.x ))+
scale_y_continuous(breaks = scales::pretty_breaks(n = 2))+
#geom_path(data=ps, aes(x, y, color = "Person 1", linetype="Person 1"), size=0.5)+
#geom_path(data=pg, aes(x , y, color = "Person 2", linetype="Person 2"), size=0.5)+
scale_color_manual("",labels = c(Nutzer1, Nutzer2), values = c("Person 1" = Nutzer1Farbe, "Person 2" = Nutzer2Farbe)) +
scale_linetype_manual("",labels = c(Nutzer1, Nutzer2), values=c("Person 1"=Nutzer1Format, "Person 2"=Nutzer2Format))
I can't really understand why the paths destroy the labeling like this - it must be the aes parameters.
If someone has a solution for this, I would be extremely grateful!
This could be achieved like so:
Convert your original month variable to a date time before calling xspline. This way the interpolated date values could be easily converted back to datetime via e.g. lubridate::as_datetime.
besides that you could row bind your datasets which makes plotting a bit easier
library(ggplot2)
library(tidyr)
library(dplyr)
datengesamt <- datengesamt %>%
# Convert to datetime
mutate(month = as.POSIXct(month))
plot(1, 1)
ps <- xspline(datengesamt[,1], datengesamt[,2], 1, draw=FALSE)
pg <- xspline(datengesamt[,1], datengesamt[,3], 1, draw=FALSE)
pp <- list("Person 1" = data.frame(ps), "Person 2" = data.frame(pg)) %>%
bind_rows(.id = "id") %>%
mutate(x = lubridate::as_datetime(x))
ggplot(pp, aes(x, y, color = id, linetype = id)) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 2)) +
geom_path(size=0.5) +
scale_x_datetime(date_labels = "%Y")

How to Clean up Donut Chart?

I need to clean up the donut chart below, by getting rid of the double data labels, and rounding the percents. I would like the final graph to have one data label with a % and no decimal place. Anyone know how I accomplish this?
df <- data.frame(
group = cc("DoDEA", "ERAC", "Evaluator", "Project Director", "Project Staff - Finance",
"Project Staff - Other"),
value = c(3,7, 10, 43, 12, 25))
head(df)
ggdonutchart(df, label = "group")
ggdonutchart(df, "value",
color = "white",fill = "group",
palette = c("yellow", "steelblue3","slategray3", "dodgerblue4",
"yellowgreen", "sienna2") ) +
labs(fill = "") + #THIS DELETS GROUP FROM LEGEND
geom_text(aes(label = scales::percent(value/sum(value))),
position = position_stack(vjust = 0.5))`
You can add a new column to your dataframe to create the labels. This code will put the % on a new line. Then add these labels in the ggdonutchart function using the label argument. The function then has additional arguments to adjust the label position.
df <- data.frame(
group = c("DoDEA", "ERAC", "Evaluator", "Project Director", "Project Staff - Finance",
"Project Staff - Other"),
value = c(3,7, 10, 43, 12, 25))
# create labels
df$label <- paste(df$group,
scales::percent(df$value/sum(df$value), accuracy = 1), sep = "\n")
ggdonutchart(df, "value",
label = "label", # add labels
color = "white",fill = "group",
palette = c("yellow", "steelblue3","slategray3", "dodgerblue4",
"yellowgreen", "sienna2")) +
theme(legend.position = "none")

ggplot: re-order categorical y-axis (Gantt chart)

I am trying to produce a Gantt chart out of a table with different task (each having a start date and end date).
library(tidyverse)
# Sample data
df1 <- data.frame(from = c("2020-01-01", "2020-02-02", "2020-05-04", "2020-02-01", "2020-01-20", "2020-02-10"),
to = c("2020-03-30", "2020-03-15", "2020-05-20", "2020-04-05", "2020-03-05", "2020-04-13"),
task= c("Task 1", "Task 2", "Task 3", "Task 4", "Task 5", "Task 6"),
group = c("Finance", "Finance", "Research", "Research", "Other", "Other")
)
# Plot gantt-chart
df1 %>% mutate(from = as.Date(from),
to = as.Date(to)) %>%
pivot_longer(cols = c(from, to), values_to = "date") %>%
ggplot(aes(x=date, y=task, colour = group)) +
geom_line(lwd=3) +
geom_point(aes(color=group), alpha=.5, pch=18, size=5) +
scale_x_date(position="bottom", date_breaks = "1 week", date_labels="%U") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))+
ggtitle("Milestones") +
labs(y="", x = "", colour="Responsible")
So far so good, but now I have a major problem and a minor problem:
major problem:
How can I re-order the categories on the y-axis as they appear in the data (not alphabetically)? From top to bottom it should be: Finance, Research, Other. Additionally, within each category (finance, research, other) the lines should be ordered by starting date (i.e. the task starting first should be on top)
minor problem:
on the x-axis how can I plot a thicker line for each month and a thinner line for each week?
Thanks for help!
This could be achieved like so:
As #RuiBarrades mentioned in his comments to get the right order you have to convert to a factor and set the levels in the right order. First, set the levels for the groups. Second, to get the tasks in the desired order I rearrange the dataset by group and start date and make use of forcats::fct_inorder to set the levels of the tasks in the desired order.
If I got you right you want different grid lines for week and month? This could be achieved by setting date_breaks_minor="month" and styling of the grid lines via theme and panel.grid.minor/major.x. Here I opted for a "black" color but if you prefer different sizes you could do so via size.
library(tidyverse)
library(ggplot2)
# Sample data
df1 <- data.frame(from = c("2020-01-01", "2020-02-02", "2020-05-04", "2020-02-01", "2020-01-20", "2020-02-10"),
to = c("2020-03-30", "2020-03-15", "2020-05-20", "2020-04-05", "2020-03-05", "2020-04-13"),
task= c("Task 1", "Task 2", "Task 3", "Task 4", "Task 5", "Task 6"),
group = c("Finance", "Finance", "Research", "Research", "Other", "Other")
)
# Plot gantt-chart
df1 %>% mutate(from = as.Date(from),
to = as.Date(to),
group = factor(group, levels = c("Finance", "Research", "Other"))) %>%
arrange(desc(group), desc(from)) %>%
mutate(task = forcats::fct_inorder(task)) %>%
pivot_longer(cols = c(from, to), values_to = "date") %>%
ggplot(aes(x=date, y=task, colour = group)) +
geom_line(lwd=3) +
geom_point(aes(color=group), alpha=.5, pch=18, size=5) +
scale_x_date(position="bottom", date_breaks = "1 week", date_minor_breaks = "1 month",
date_labels="%U") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5),
panel.grid.minor.x = element_line(color = "black"))+
ggtitle("Milestones") +
labs(y="", x = "", colour="Responsible")

Manual fill geom_tile with multiple scales by group

I have the following current output:
And I am aiming for a colouring like this, but only filled until the maximum level (e.g the fill stops at the level present):
The data to create this, is:
df <- tribble(~Question_Code, ~RespondentLevel,
"Engagement - Inclusion", 5,
"External engagement - policies", 2,
"External engagement - technology", 5,
"Community data ", 5,
"Internal engagement", 5,
"Internal use of technology", 4,
"Familiarity/Alignment", 5,
"Environmental impacts", 5,
"Innovation", 2,
"Use of open-source technology", 2,
"Regulation of hardware & software", 5,
"In-house technical capacity", 5,
"Infrastructure procurement", 5,
"Algorithmic Error & Bias", 2,
"Control: Privacy", 5,
"Accountability in Governance Structures", 3,
"Open procurement", 5,
"Use in decision-making", 1,
"Accountability", 1,
"External Control", 4,
"Internal Control", 2,
"Open Data", 2)
levels <- c("Open Data","Internal Control","External Control","Accountability",
"Use in decision-making","Open procurement","Accountability in Governance Structures","Control: Privacy",
"Algorithmic Error & Bias","Infrastructure procurement","In-house technical capacity",
"Regulation of hardware & software","Use of open-source technology","Innovation",
"Environmental impacts","Familiarity/Alignment",
"Internal use of technology","Internal engagement","Community data",
"External engagement - technology","External engagement - policies","Engagement - Inclusion")
df <- df %>% mutate(Domain = c(as.character((rep("Domain 1", 5))),
as.character(rep("Domain 2", 4)),
as.character(rep("Domain 3", 6)),
as.character(rep("Domain 4", 7))))
And for the ggplot:
df %>%
ggplot(aes(x = RespondentLevel, y = fct_rev(Question_Code))) +
geom_tile() +
theme_minimal(16)
The colours to fill, I'm using:
with each colour corresponding to a domain, and each shade to a level:
Greens <- c("#edf8e9", "#bae4b3", "#74c476", "#31a354", "#006d2c")
Reds <- c("#fee5d9", "#fcae91", "#fb6a4a", "#de2d26", "#a50f15")
Yellows <- c("#ffffeb","#ffff9d","#ffff89", "#ffff4e", "#ffff14")
Blues <- c("#eff3ff","#bdd7e7","#6baed6","#3182bd", "#08519c")
EDIT: geom_bar also does the trick, but not broken down by gradient. Trying to use this function:
ColourPalleteMulti <- function(df, group, subgroup){
# Find how many colour categories to create and the number of colours in each
categories <- aggregate(as.formula(paste(subgroup, group, sep="~" )), df, function(x) length(unique(x)))
category.start <- (scales::hue_pal(l = 100)(nrow(categories))) # Set the top of the colour pallete
category.end <- (scales::hue_pal(l = 40)(nrow(categories))) # set the bottom
# Build Colour pallette
colours <- unlist(lapply(1:nrow(categories),
function(i){
colorRampPalette(colors = c(category.start[i], category.end[i]))(categories[i,2])}))
return(colours)
}
colours <- ColourPalleteMulti(df, "Domain", "RespondentLevel")
df %>%
ggplot(aes(x = fct_rev(Question_Code), y = RespondentLevel))+
geom_bar(stat = "identity", aes(fill = Domain), alpha = .9) +
coord_flip() +
theme_minimal(16)+
xlab(" ") +
ggtitle("Baseline Report Card Sample Community")+
scale_fill_manual("RespondentLevel", values = colours)+
theme(legend.title = element_text(size = 14),
legend.position = "none",
legend.text = element_text(size = 14),
plot.title = element_text(size=18, hjust = 0.5),
plot.caption = element_text(size = 12, hjust = 1),
axis.text.y = element_text(hjust = 0),
panel.grid = element_line(colour = "#F0F0F0"),
plot.margin = unit(c(1,1,0.5,1), "cm"))
Sorry for the long reprex, can adjust if possible
Here are a few options for tricks. First off, to get the full set of levels for each question so you don't have gaps in your data, I used tidyr::complete. That's the data frame I'll be working with.
library(ggplot2)
library(dplyr)
library(tidyr)
library(purrr)
library(patchwork)
df_full <- df %>%
complete(nesting(Domain, Question_Code), RespondentLevel) %>%
mutate(RespondentLevel = as.character(RespondentLevel))
The easier option is to approximate the gradients with changing the alpha, and setting the hue (red, green, etc) based on domain. This forfeits the other colors you've chosen, and just uses the last, darkest color of each palette.
To do this, I made a list of all your palettes. In setting the fill, map_chr(palettes, 5) extracts the 5th element of each list, which is the darkest color of each. You'll probably want to adjust or remove one or both of the legends.
palettes <- list(Greens, Reds, Yellows, Blues)
ggplot(df_full, aes(x = RespondentLevel, y = Question_Code, fill = Domain, alpha = RespondentLevel)) +
geom_tile() +
theme_minimal() +
facet_grid(rows = vars(Domain), scales = "free", space = "free") +
scale_fill_manual(values = map_chr(palettes, 5))
#> Warning: Using alpha for a discrete variable is not advised.
The more difficult way splits the data by domain and makes a list of plots, then puts them together with the patchwork package. The benefit is that you can keep the full color palettes, but the downside is that it's more difficult to control things like sizing that you get from facet_grid, which adjusts for the fact that there are more questions listed in some domains than in others. You could resize these by hand in plot_layout if you think this approach is worthwhile. You'll also need to adjust some theme elements to mimic what facet_grid would do.
plot_list <- df_full %>%
split(.$Domain) %>%
map2(palettes, function(domain_df, pal) {
ggplot(domain_df, aes(x = RespondentLevel, y = Question_Code, fill = RespondentLevel)) +
geom_tile() +
theme_minimal() +
scale_fill_manual(values = pal) +
theme(legend.position = "none") +
labs(x = NULL, y = NULL)
})
reduce(plot_list, `+`) +
plot_layout(ncol = 1)
Note that normally, patchwork puts plots together like plot1 + plot2 to mimic ggplot layering. Since I had the plots in a list, I did this with purrr::reduce.

highlight points in ggplot2 stat_qq output

I am trying to highlight selected points based on their order statistics in a ggplot stat_qq output:
ydata <- data.frame(sample = c(rep("Sample 1", 100),
rep("Sample 2", 100),
rep("Sample 3", 100),
rep("Sample 4", 100)),
x=rnorm(400))
ydata <- ydata[order(ydata$sample, ydata$x),]
ydata$order <- 1:100
ggplot(ydata, aes(sample=x)) + stat_qq() + facet_wrap(~sample) + scale_x_continuous(breaks = -2:2, labels = function(x) paste0(x, " \n [",100 * signif(pnorm(-2:2, lower.tail=FALSE),2), "%]")) + theme_bw(base_size = 14, base_family = "sans") + labs(title = "Four Samples of 100 Observations From Normal Distribution",
caption = "4 Samples of n = 100 from Normal Distribution \nNumbers indicate order of value",
y = "Sample Value",
x = "Standard Deviation\n[%exceeding]") +
geom_text(data = ydata[ydata$order %in% c(2,16,50,84,98),], aes(x=qnorm(pnorm(x)), y=x, label = order), nudge_y = 1)
Which produced this:
Obviously my text notation is not highlighting the right points (the 2, 16, 50 84, 98th points). I wish I could also highlight the actual points in red. Would appreciate any suggestions.
You could calculate the qq values outside of ggplot and create a separate column to group the qq values into highlighted and not highlighted. Then you could plot them using geom_point with the grouping variable as a colour aesthetic. For example:
library(tidyverse)
# Generate data reproducibly
set.seed(2)
ydata <- data.frame(sample = c(rep("Sample 1", 100),
rep("Sample 2", 100),
rep("Sample 3", 100),
rep("Sample 4", 100)),
x=rnorm(400))
ydata <- ydata[order(ydata$sample, ydata$x),]
ydata$order <- 1:100
# Quantile indices to highlight
pts = c(2,16,50,84,98)
# Add qq values and grouping column to data frame and pipe into ggplot
# Use split and map to calculate the qq values separately for each Sample
split(ydata, ydata$sample) %>%
map_df(~ .x %>% mutate(xq = qqnorm(x, plot.it=FALSE)$x,
group = ifelse(order %in% pts, "A", "B"))) %>%
ggplot(aes(xq, x, colour=group)) +
geom_point(size=1) +
geom_text(aes(label=ifelse(group=="A", order, "")),
nudge_y=1, size=3) +
facet_wrap(~ sample) +
theme_bw(base_size = 14, base_family = "sans") +
scale_colour_manual(values=c("red", "black")) +
guides(colour=FALSE)
As an alternative, a quick hack would be to use ggplot_build to highlight specific points in your original plot (note though that something is not quite right with how you placed the labels relative to the highlighted points):
pts = rep(c(2,16,50,84,98), 4) + rep(seq(0,300,100), each=5)
# Assuming you've assigned your plot to the object p
pb = ggplot_build(p)
# Change point colors
pb$data[[1]][pts, "colour"] = "red"
# Change label colors
pb$data[[2]][["colour"]] = "red"
# Regenerate plot object
p = ggplot_gtable(pb)
plot(p)
You can apply stat="qq" to your geom_point and then use the colors assigned to new variable
ydata <- data.frame(sample = c(rep("Sample 1", 100),
rep("Sample 2", 100),
rep("Sample 3", 100),
rep("Sample 4", 100)),
x=rnorm(400))
ydata <- ydata[order(ydata$sample, ydata$x),]
ydata$order <- 1:100
ydata$highlight = ifelse(ydata$order %in% c(2,16,50,84,98), "#FF0000", "#000000")
ydata$order_txt = ifelse(ydata$order %in% c(2,16,50,84,98), ydata$order, "")
ggplot(ydata, aes(sample=x)) +
geom_point(color=ydata$highlight, stat="qq") +
geom_text(label=ydata$order_txt, stat="qq", nudge_y=1) +
facet_wrap(~sample) +
scale_x_continuous(breaks = -2:2, labels = function(x) paste0(x, " \n [",100 * signif(pnorm(-2:2, lower.tail=FALSE),2), "%]")) +
theme_bw(base_size = 14, base_family = "sans") +
labs(
title = "Four Samples of 100 Observations From Normal Distribution",
caption = "4 Samples of n = 100 from Normal Distribution \nNumbers indicate order of value",
y = "Sample Value",x = "Standard Deviation\n[%exceeding]")

Resources