waterfall chart in ggplot2 doesnt seem to render right - r

I am trying to recreate the waterfall chart as shown in
https://vita.had.co.nz/papers/ggplot2-wires.pdf
I am reproducing the code from the link
balance <- data.frame(event = c("Starting\nCash", "Sales", "Refunds",
"Payouts", "Court\nLosses", "Court\nWins", "Contracts", "End\nCash"),
change = c(2000, 3400, -1100, -100, -6600, 3800, 1400, -2800))
balance$balance <- cumsum(c(0, balance$change[-nrow(balance)]))
balance$time <- 1:nrow(balance)
balance$flow <- factor(sign(balance$change))
ggplot(balance) +
geom_hline(yintercept = 0, colour = "white", size = 2) +
geom_rect(aes(fill= 'red'),xmin = time - 0.45, xmax = time + 0.45, ymin = balance, ymax = balance)
geom_text(aes(x = time, y = pmin(balance, balance + change) - 50, label = dollar(change)),
hjust = 0.5, vjust = 1, size = 3)
scale_x_continuous( breaks = balance$time, labels = balance$event) +
scale_y_continuous("Balance") +
scale_fill_manual(values = c("-1" = "red", "1" = "black"))
it throws an error :Error in scale_x_continuous(breaks = balance$time, labels = balance$event) + : non-numeric argument to binary operator
THe final output, per the pdf should look like the image below

ggplot(balance %>%
mutate(flow = factor(flow, labels = c("Negative", "Positive")))) +
geom_hline(yintercept = 0, colour = "white", size = 2) +
geom_rect(aes(fill= flow, xmin = time - 0.45, xmax = time+0.45, ymin = change, ymax = balance),
color = "black") +
geom_text(aes(x = time, y = pmin(balance, balance + change) - 50, label = change),
hjust = 0.5, vjust = 1, size = 3) +
scale_x_continuous(breaks = balance$time, labels = balance$event) +
scale_y_continuous("Balance") +
scale_fill_manual(values = c("Negative" = "red", "Positive" = "green"))

Related

How to apply slope plot R code to another data

I have dataframe which represents sales by model within 2 different years. 'change' column stands for absolute change by models from 2020 to 2021 while 'chng.percent' measures this change in percentages.
However, I am struggling to apply the given Code of slope plot to my data.
df <- data.frame (model = c("A", "A", "B","B"),
year = c(2020,2021,2020,2021),
sale =c(105,190,110,180),
chang = c(85,NA,70,NA),
chng.percent = c(80.9,NA, 63.6,NA))
Expected outcome (Like this)
Here's a way to do it all within ggplot using your existing data:
ggplot(df, aes(year, sale, color = model)) +
geom_line(arrow = arrow(type = "closed", angle = 20),
key_glyph = draw_key_point) +
geom_vline(aes(xintercept = year)) +
geom_text(aes(label = sale, hjust = ifelse(year == 2020, 1.3, -0.3)),
color = "black",
size = 6) +
geom_text(aes(x = min(df$year) + 0.25, y = 105,
label = paste0("+", chang[1], "; ", chng.percent[1], "%"),
color = "A"), size = 5) +
geom_text(aes(x = max(df$year) - 0.25, y = 150,
label = paste0("+", chang[3], "; ", chng.percent[3], "%"),
color = "B"), size = 5) +
theme_void(base_size = 16) +
coord_cartesian(clip = "off") +
scale_x_continuous(breaks = c(2020, 2021)) +
guides(color = guide_legend(override.aes = list(size = 5))) +
scale_color_brewer(palette = "Set1") +
theme(plot.margin = margin(30, 30, 30, 30),
aspect.ratio = 1.5,
axis.text.x = element_text(size = 20))
you can try something like this :
df <- data.frame(model = c("A", "B"),
sale_2020 =c(105,110),
sale_2021 =c(190,180),
chang = c(85,70),
chng.percent = c(80.9, 63.6))
df %>%
ggplot() +
geom_segment(aes(x = 1, xend = 2,
y = sale_2020,
yend = sale_2021,
group = model,
col = model),
size = 1.2) +
# set the colors
scale_color_manual(values = c("#468189", "#9DBEBB"), guide = "none") +
# remove all axis stuff
theme_classic() +
theme(axis.line = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank()) +
geom_text(aes(x = x, y = y, label = label),
data = data.frame(x = 1:2,
y = 10 + max(df$sale_2021),
label = c("2020", "2021")),
col = "grey30",
size = 6) +
# add vertical lines that act as axis for 2020
geom_segment(x = 1, xend = 1,
y = min(df$sale_2020) -10,
yend = max(df$sale_2020) + 81,
col = "grey70", size = 1.5) +
# add vertical lines that act as axis for 2021
geom_segment(x = 2, xend = 2,
y = min(df$sale_2021) - 80,
yend = max(df$sale_2021) + 1,
col = "grey70", size = 1.5) +
# add the success rate next to each point on 2021 axis
geom_text(aes(x = 2 + 0.08,
y = sale_2021,
label = paste0(round(sale_2021, 1))),
col = "grey30") +
# add the success rate next to each point on 2021 axis
geom_text(aes(x = 1 - 0.08,
y = sale_2020,
label = paste0(round(sale_2020, 1))),
col = "grey30") +
# add the success rate next to each point on 2020 axis
geom_text(aes(x = 2 - 0.5,
y = c(156, 135),
label = paste0(round(chng.percent, 1), "%")),
col = "grey30")

Rstudio Bland Altman grouped colours and shapes

I have a bland-altman plot of 16 measurements divided over 3 groups (Slice) which I want to colorcode and possibly have different shapes but somehow I cant get it working:
df <- data.frame("Slice" = c(1,1,1,1,1,1,2,2,2,2,2,2,3,3,3,3),
"Segments" = c(1:16),
"mean" = c(6,5,2,4,8,9,6,3,5,6,5,8,5,4,6,4),
"dif" = c(1,3,2,1,2,3,2,1,2,2,2,1,3,2,1,2))
#creat limits of agreement
LL = mean(df$dif)-1.96*(sd(df$dif))
UL = mean(df$dif)+1.96*(sd(df$dif))
#create BA plot
BAplot <- ggplot(df, aes(x=mean,y=dif))+
geom_jitter(alpha=1.0,size=18,shape="*", stroke = 1.5)+
geom_hline(yintercept=mean(df$dif),color= "blue",size=2)+
geom_text(aes(x = 12, y = mean(df$dif)+0.2, label = round(mean(df$dif), 1)), col = "blue", size = 7) +
geom_hline(yintercept=0,linetype=3,size=2) +
geom_hline(yintercept=c(UL,LL),color="black",linetype="dashed",size=2)+theme_bw()+
geom_text(aes(x = 12, y = UL+0.2, label = round(UL,1)), col = "black", size = 7) +
geom_text(aes(x = 12, y = LL+0.2, label = round(LL,1)), col = "black", size = 7) +
scale_x_continuous("mean",limits = c(-2,12))+
scale_y_continuous("diff", limits = c(-1, 5.5))
To code your points by color and to have different shapes you have to map your Slice column on the color and/or shape aesthetic inside geom_jitter. As Slice is a numeric I first converted it to a factor. If you want specific colors or shape you could set your desired values using scale_color_manual and scale_shape_manual:
library(ggplot2)
ggplot(df, aes(x = mean, y = dif)) +
geom_jitter(aes(color = factor(Slice), shape = factor(Slice)), alpha = 1.0, size = 2) +
geom_hline(yintercept = mean(df$dif), color = "blue", size = 2) +
geom_text(aes(x = 12, y = mean(dif) + 0.2, label = round(mean(dif), 1)), col = "blue", size = 7) +
geom_hline(yintercept = 0, linetype = 3, size = 2) +
geom_hline(yintercept = c(UL, LL), color = "black", linetype = "dashed", size = 2) +
theme_bw() +
geom_text(aes(x = 12, y = UL + 0.2, label = round(UL, 1)), col = "black", size = 7) +
geom_text(aes(x = 12, y = LL + 0.2, label = round(LL, 1)), col = "black", size = 7) +
scale_x_continuous("mean", limits = c(-2, 12)) +
scale_y_continuous("diff", limits = c(-1, 5.5))

using a grouping variable and geom_rect

I want to show covered ranges (including overlaps) and (after some failures with stacked bar plots) I chose geom_rect. The following code works well for one type.
library(tidyverse)
# create dummy data
foo <- tibble(start = c(1, 150, 140, 75, 300),
end = c(150, 180, 170, 160, 400))
ggplot() +
geom_rect(data = foo, aes(xmin = start, xmax = end, ymin = 0, ymax = 1), fill = "green", linetype = "blank", alpha = 0.3) +
geom_rect(data = foo, aes(xmin = 1, xmax = max(end), ymin = 0, ymax = 1), fill = NA, colour = "black") +
scale_y_continuous(name = "", breaks = NULL, limits = c(0, 1)) +
scale_x_continuous(name = "", breaks = NULL) +
theme_minimal() +
theme(panel.grid = element_blank())
If I add more data (only one more type, but in the original data I do have some more) like below, I can add the data "by hand", i.e. add two lines of code for each type, but I'm looking for a way to do this by grouping, but didn't succeed.
foo <- foo %>%
mutate(type = "A", .before = 1)
bar <- tibble(type = "B",
start = c(1, 30, 40, 100, 150, 200, 310),
end = c(20, 50, 100, 120, 200, 300, 380))
foo <- bind_rows(foo, bar)
ggplot() +
geom_rect(data = foo %>% filter(type == "A"), aes(xmin = start, xmax = end, ymin = 0, ymax = 1), fill = "green", linetype = "blank", alpha = 0.3) +
geom_rect(data = foo, aes(xmin = 1, xmax = max(end), ymin = 0, ymax = 1), fill = NA, colour = "black") +
geom_rect(data = foo %>% filter(type == "B"), aes(xmin = start, xmax = end, ymin = 2, ymax = 3), fill = "green", linetype = "blank", alpha = 0.3) +
geom_rect(data = foo, aes(xmin = 1, xmax = max(end), ymin = 2, ymax = 3), fill = NA, colour = "black") +
scale_y_continuous(name = "", breaks = NULL, limits = c(0, 3)) +
scale_x_continuous(name = "", breaks = NULL) +
geom_text(aes(x = c(0, 0), y = c(0.5, 2.5), label = c("A", "B")), size = 4, hjust = 2) +
theme_minimal() +
theme(panel.grid = element_blank())
So, the graph already looks the way I want, but I'd prefer to get here by using grouping (or any other non-manual way).
Maybe there's also a different geom or method to get this kind of graph?
You can write a small helper function that positions a categorical value in continuous space. Example below.
helper <- function(x) {(match(x, sort(unique(x))) - 1) * 2}
ggplot(foo) +
geom_rect(
aes(xmin = start, xmax = end,
ymin = helper(type),
ymax = helper(type) + 1),
fill = "green", linetype = "blank", alpha = 0.3
) +
geom_rect(
aes(xmin = min(start), xmax = max(end),
ymin = helper(type),
ymax = helper(type) + 1),
fill = NA, colour = "black"
) +
scale_y_continuous(name = "", breaks = NULL, limits = c(0, 3)) +
scale_x_continuous(name = "", breaks = NULL) +
annotate(
"text", x = c(0, 0), y = c(0.5, 2.5), label = c("A", "B"),
size = 4, hjust = 2
) +
theme_minimal() +
theme(panel.grid = element_blank())

Change colors to specific words in a text in ggplot (R)

I am trying to add different colors to a text in ggplot using the function annotate().
The results are quite good, but I have to define manually which are the right y values to correctly overlap the text.
Plot with annotations for
I would love to know if there is a better way to overlap text in annotations in R.
Thank you,
Btw, here is the code I am using:
ex_3_1 %>%
ggplot(aes(x = DATE)) +
# geometries
geom_line(aes(y = if_else(ORIGIN == "ACTUAL" |
(YEAR == 2019 & MONTH == "JUN"),
SALES, NULL)), size = 1) +
geom_line(aes(y = if_else(ORIGIN == "FORECAST", SALES, NULL)),
linetype = "dashed", size = 1) +
geom_point(aes(y = REL_SALES), size = 3) +
geom_point(aes(y = if_else(MONTH == "JUL" & YEAR == 2018, SALES, NULL)),
shape = 21, fill = "darkorange", size = 3) +
geom_point(aes(y = if_else(MONTH == "FEB" & YEAR == 2019, SALES, NULL)),
shape = 21, fill = col, size = 3) +
geom_text(aes(y = SALES, label = dollar(round(REL_SALES,1),
suffix = "B", accuracy = 0.1)),
vjust = -1.5, hjust = 0.2, size = 3) +
# annotations
# square text
annotate(geom = "rect", xmin = as_date("2019-05-20"),
xmax = as_date("2020-01-10"), ymin = 0, ymax = 2.6,
alpha = 0.1) +
annotate(geom = "text",
x = as_date("2020-01-01"), y = 1, hjust = 1, vjust = -1,
label = expression(bold("2019 FORECAST")),
col = "gray60", size = 3.25) +
annotate(geom = "text",
x = as_date("2020-01-01"), y = 1, hjust = 1, vjust = 1,
label = paste0("This is provided by ABC\n",
"consultants and based on\n",
"market data through June.\n",
"The forecast assumes no\n",
"major market changes.\n"),
col = "gray60", size = 3.5) +
# 2018 notes
annotate(
geom = "text", x = as_date("2018-01-10"), y = 3.5, hjust = 0, vjust = 1,
label = paste0("2018: Jan-Jun was a period of stability, with\n",
"fairly steady growth (averaging +3% per\n",
"month). There was a nearly 20% decrease\n",
"in July, when Product X was recalled and\n",
"pulled from the market. Total sales remained\n",
"at reduced volume for the rest of the year."),
col = "gray60", size = 3.5) +
annotate(
geom = "text", x = as_date("2018-01-10"), y = 3.5, hjust = 0, vjust = 1,
label = expression(bold("2018:")),
col = "gray60", size = 3.5) +
annotate(
geom = "text", x = as_date("2018-01-10"), y = 3.19, hjust = 0, vjust = 1,
label = expression(phantom("month). There was a ")*
"nearly 20% decrease"),
size = 3.5, col = "darkorange") +
annotate(
geom = "text", x = as_date("2018-01-10"), y = 3.03, hjust = 0, vjust = 1,
label = "in July",
size = 3.5, col = "darkorange") +
# 2019 notes
annotate(
geom = "text", x = as_date("2019-01-10"), y = 3.5, hjust = 0, vjust = 1,
label = paste0("2019: The year started at less than $1.6B, but\n",
"Increased markedly in February, when a new\n",
"study was released. Total sales have increased\n",
"steadly since then and this projected to continue.\n",
"The latest forecast is for $2.4B in monthly sales by\n",
"the end of the year."),
col = "gray60", size = 3.5) +
annotate(
geom = "text", x = as_date("2019-01-10"), y = 3.5, hjust = 0, vjust = 1,
label = expression(bold("2019:")),
col = "gray60", size = 3.5) +
annotate(
geom = "text", x = as_date("2019-01-10"), y = 3.35, hjust = 0, vjust = 1,
label = "Increased markedly in February",
size = 3.5, col = col) +
# scales
scale_x_date(date_labels = "%b'%y", date_breaks = "3 month") +
scale_y_continuous(labels = dollar, breaks = c(seq(0,3.5,0.5)),
limits = c(0, 3.5)) +
# titles
labs("Market size over time") +
ylab("SALES ($USD BILLIONS)") +
# themes
theme_void() +
theme(
axis.line.x = element_line(color = "gray58"),
axis.text.y = element_text(size = 11, color = "gray58"),
axis.title.y = element_text(hjust = 1, color = "gray58"),
axis.text.x = element_text(size = 9, color = "gray58")
)

Applying log scale to y-axis for visualizing proportions with ggplot2

I am attempting to recreate some plots from a research article in R and am running into an issue with applying a log scale to y axis. The visualization I'm attempting to recreate is this:
reference plot with y log scale
I currently have a working version without the logarithmic scale applied to the y-axis:
Proportion_Mean_Plot <- ggplot(proportions, aes(days2,
proportion_mean, group = observation)) +
geom_point(aes(shape = observation)) +
geom_line() +
scale_x_continuous(breaks = seq(0,335,20)) +
scale_y_continuous(breaks = seq(0,6,.5)) +
theme_tufte() +
geom_rangeframe() +
theme(legend.position="none") +
theme(axis.line.x = element_line(colour = "black", size = 0.5, linetype = 1),
axis.line.y = element_line(colour = "black", size = 0.5, linetype = 1)) +
labs(title = "Proportion of Baseline Mean",
subtitle = "Daily steps within each intervention phase",
x = "DAYS",
y = "PROPORTION OF BASELINE \n(MEAN)") +
geom_vline(xintercept = 164.5) +
geom_hline(yintercept = 1) +
annotate("text", x = c(82, 246), y = 5,
label = c("Intervention 1", "Intervention 2")) +
geom_segment(aes(x = 0, y = mean, xend = end, yend = mean),
data = proportion_intervention1_data) +
geom_segment(aes(x = start, y = mean, xend = end, yend = mean),
data = proportion_intervention2_data, linetype = 4)
This produces a decent representation of the original:
normally scaled y-axis plot
I would like to try to apply that logarithmic scaling to more closely match it. Any help is appreciated.
As per Richard's suggestion, here is a quick example how you can use scale_y_log10:
suppressPackageStartupMessages(library(tidyverse))
set.seed(123)
# generate some data
proportions <- tibble(interv_1 = pmax(0.4, rnorm(160, mean = 1.3, sd = 0.2)),
interv_2 = pmax(0.01, rnorm(160, mean = 1.6, sd = 0.5)))
proportions <- proportions %>%
gather(key = observation, value = proportion_mean) %>%
mutate(days2 = 1:320)
# create the plot
ggplot(proportions, aes(days2, proportion_mean, group = observation)) +
geom_point(aes(shape = observation)) +
geom_line() +
scale_x_continuous(breaks = seq(0,335,20), expand = c(0, 0)) +
scale_y_log10(breaks = c( 0.1, 0.5, 1, 2, 3, 4, 5), limits = c(0.1, 5)) +
# theme_tufte() +
# geom_rangeframe() +
theme(legend.position="none") +
theme(axis.line.x = element_line(colour = "black", size = 0.5, linetype = 1),
axis.line.y = element_line(colour = "black", size = 0.5, linetype = 1)) +
labs(title = "Proportion of Baseline Mean",
subtitle = "Daily steps within each intervention phase",
x = "DAYS",
y = "PROPORTION OF BASELINE \n(MEAN)") +
geom_vline(xintercept = 164.5) +
geom_hline(yintercept = 1) +
annotate("text", x = c(82, 246), y = 5,
label = c("Intervention 1", "Intervention 2")) +
# plugged the values for the means of the two distributions
geom_segment(aes(x = 0, y = 1.3, xend = 164.5, yend = 1.3)) +
geom_segment(aes(x = 164.5, y = 1.6, xend = 320, yend = 1.6), linetype = 4)

Resources