Related
I have 3 forecast plots that are combined together by plotly::subplot. The next step is draw a transparent box (or 3 separate boxes) with red dashed lines around the forecast line of each plot so that they stand out to the reader.
How can I do this ?
Desired Output:
Data (df):
structure(list(year = 1980:2021, AvgTMean = c(24.2700686838937,
23.8852956598276, 25.094446596092, 24.1561175050287, 24.157183605977,
24.3047482638362, 24.7899738481466, 24.5756232655603, 24.5833086228592,
24.7344695534483, 25.3094451071121, 25.2100615173707, 24.3651692293534,
24.5423890611494, 25.2492166633908, 24.7005097837931, 24.2491591827443,
25.0912281781322, 25.0779264303305, 24.403294248319, 24.4983991453592,
24.4292324356466, 24.8179824927011, 24.7243948463075, 24.5086534543966,
24.2818632071983, 24.4567195220259, 24.8402224356034, 24.6574465515086,
24.5440715673563, 23.482670620977, 24.9979594684914, 24.5452453980747,
24.9271462811494, 24.7443215819253, 25.8929839790805, 25.1801908261063,
25.2079308058908, 25.0722425561207, 25.4554644289799, 25.4548979078736,
25.0756772250287), AvgTMin = c(19.6018663372126, 18.9935718486724,
20.8351710187356, 19.7723002680316, 19.8097384811782, 19.7280847671034,
20.2907499842098, 20.1950373662931, 20.1812715311494, 20.1808865070833,
21.0320272801006, 21.1252427976293, 20.1712830368678, 20.407655174727,
21.5430646243391, 20.6760574525862, 20.0822658237356, 21.0735574619397,
21.0871494406322, 20.1311178414224, 20.3191250001149, 20.3474683732557,
20.668169553204, 20.3772270269296, 20.2330157893678, 19.9486551337931,
20.1114496908333, 20.5816350393966, 20.4033879191236, 20.1582514856897,
19.2288879223678, 20.8451063140805, 20.4878865041092, 21.0259712576437,
20.5510100674138, 22.0143793370977, 21.3529094881753, 21.1688506012213,
21.040550304569, 21.4923981385632, 21.6580430460057, 21.2433069288506
), AvgTMax = c(28.9392198638937, 28.778245693046, 29.3549223685201,
28.5411393752011, 28.5058118063649, 28.8825532046983, 29.2903534709195,
28.9574051835776, 28.9865201368247, 29.2891997662069, 29.5881379007328,
29.2960976760201, 28.5602557685057, 28.6782844806753, 28.9566034394684,
28.7262054694971, 28.4171896994397, 29.1100747038649, 29.0698836095546,
28.6766350461063, 28.6788764437787, 28.5122026355891, 28.9690143596839,
29.0727844759914, 28.7854971337931, 28.6163189712069, 28.8032270024138,
29.1000460207471, 28.9127356101149, 28.9310646744109, 27.7376810545833,
29.1520129070402, 28.6037845089512, 28.8295359311638, 28.9388276133764,
29.7726939654598, 29.0086407880029, 29.2482097613937, 29.1050890698132,
29.4187571974569, 29.2519238543247, 28.9081913630029)), class = "data.frame", row.names = c(NA,
-42L))
Code
library(tidyverse)
library(plotly)
AvgTMeanYearFP = ggplot(df, aes(year, AvgTMean)) +
geom_smooth(method = 'lm', fullrange = TRUE) +
annotate('rect', xmin = -Inf, xmax = 2021, ymin = -Inf, ymax = Inf,
fill = 'gray92') +
geom_vline(xintercept = seq(1980, 2020, 5), color = 'white') +
geom_hline(yintercept = seq(23.5, 25.5, 0.5), color = 'white') +
geom_line() +
scale_x_continuous(limits = c(1980, 2030)) +
labs(y = "Avg. Mean T (C)", x = "Year") +
geom_text(aes(x = 2000 , y = 25.5, label = "Historic Trend")) +
geom_text(aes(x = 2025 , y = 25.5, label = "Forecast Trend"))
AvgTMinYearFP = ggplot(df, aes(year, AvgTMin)) +
geom_smooth(method = 'lm', fullrange = TRUE) +
annotate('rect', xmin = -Inf, xmax = 2021, ymin = -Inf, ymax = Inf,
fill = 'gray92') +
geom_vline(xintercept = seq(1980, 2020, 5), color = 'white') +
geom_hline(yintercept = seq(23.5, 25.5, 0.5), color = 'white') +
geom_line() +
scale_x_continuous(limits = c(1980, 2030)) +
ylim(18, 23) +
labs(y = "Avg. Min. T (C)", x = "Year")
AvgTMaxYearFP = ggplot(df, aes(year, AvgTMax)) +
geom_smooth(method = 'lm', fullrange = TRUE) +
annotate('rect', xmin = -Inf, xmax = 2021, ymin = -Inf, ymax = Inf,
fill = 'gray92') +
geom_vline(xintercept = seq(1980, 2020, 5), color = 'white') +
geom_hline(yintercept = seq(23.5, 25.5, 0.5), color = 'white') +
geom_line() +
scale_x_continuous(limits = c(1980, 2030)) +
ylim(27, 30) +
labs(y = "Avg. Max. T (C)", x = "Year")
# Combine plots
subplot(AvgTMeanYearFP, AvgTMinYearFP, AvgTMaxYearFP, titleY = TRUE, shareX = TRUE, nrows = 3) %>%
layout(title ="Historic Average Temperature And Future Temperature Projection")
I actually like one box over all plots more aesthetically. Had a hard time doing this, because there seems to be a known issue with using ggplotly and the layout() function. That's why the shapes are put in p$x$layout$shapes like this.
# Combine plots
p <- subplot(AvgTMeanYearFP, AvgTMinYearFP, AvgTMaxYearFP, titleY = TRUE, shareX = TRUE, nrows = 3) %>%
layout(title ="Historic Average Temperature And Future Temperature Projection")
p$x$layout$shapes <- list(type = "rect",
line = list(color = "red",
dash = 'dash'),
x0 = 2021,
x1 = 2030,
xref = "x",
y0 = 0,
y1 = 1,
yref = "paper")
p
An alternative to a dashed box could be using the opacity.
list(type = "rect",
fillcolor = "red",
opacity = 0.1,
x0 = 2021,
x1 = 2030,
xref = "x",
y0 = 0,
y1 = 1,
yref = "paper")
I can also get you some of the way there - by making a red box in each figure, but putting a single box across the whole plot is going to be more challenging.
library(tidyverse)
library(plotly)
add_box <- function(p, start=2022, stop=NULL, prop_in=.05, ...){
pb <- ggplot_build(p)
rgy <- pb$layout$panel_params[[1]]$y.range
rgx <- pb$layout$panel_params[[1]]$x.range
px1 <- diff(rgx)*prop_in
py1 <- diff(rgy)*prop_in
rgx <- c(1,-1)*px1 + rgx
rgy <- c(1,-1)*py1 + rgy
rgx[1] <- start
if(!is.null(stop)){
rgx[2] <- stop
}
boxdf <- data.frame(x = rgx[c(1,2,2,1,1)],
y=rgy[c(1,1,2,2,1)])
p + geom_path(data=boxdf,
aes(x=x,
y=y),
col="red",
linetype=2)
}
AvgTMeanYearFP = ggplot(df, aes(year, AvgTMean)) +
geom_smooth(method = 'lm', fullrange = TRUE) +
annotate('rect', xmin = -Inf, xmax = 2021, ymin = -Inf, ymax = Inf,
fill = 'gray92') +
geom_vline(xintercept = seq(1980, 2020, 5), color = 'white') +
geom_hline(yintercept = seq(23.5, 25.5, 0.5), color = 'white') +
geom_line() +
scale_x_continuous(limits = c(1980, 2030)) +
labs(y = "Avg. Mean T (C)", x = "Year") +
geom_text(aes(x = 2000 , y = 25.5, label = "Historic Trend")) +
geom_text(aes(x = 2025 , y = 25.5, label = "Forecast Trend"))
AvgTMinYearFP = ggplot(df, aes(year, AvgTMin)) +
geom_smooth(method = 'lm', fullrange = TRUE) +
annotate('rect', xmin = -Inf, xmax = 2021, ymin = -Inf, ymax = Inf,
fill = 'gray92') +
geom_vline(xintercept = seq(1980, 2020, 5), color = 'white') +
geom_hline(yintercept = seq(23.5, 25.5, 0.5), color = 'white') +
geom_line() +
scale_x_continuous(limits = c(1980, 2030)) +
ylim(18, 23) +
labs(y = "Avg. Min. T (C)", x = "Year")
AvgTMaxYearFP = ggplot(df, aes(year, AvgTMax)) +
geom_smooth(method = 'lm', fullrange = TRUE) +
annotate('rect', xmin = -Inf, xmax = 2021, ymin = -Inf, ymax = Inf,
fill = 'gray92') +
geom_vline(xintercept = seq(1980, 2020, 5), color = 'white') +
geom_hline(yintercept = seq(23.5, 25.5, 0.5), color = 'white') +
geom_line() +
scale_x_continuous(limits = c(1980, 2030)) +
ylim(27, 30) +
labs(y = "Avg. Max. T (C)", x = "Year")
# Combine plots
subplot(AvgTMeanYearFP %>% add_box(stop=2030, prop_in=.05),
AvgTMinYearFP %>% add_box(stop=2030, prop_in=.05),
AvgTMaxYearFP %>% add_box(stop=2030, prop_in=.05),
titleY = TRUE, shareX = TRUE, nrows = 3) %>%
layout(title ="Historic Average Temperature And Future Temperature Projection")
The add_box() function does a few different things. First, it builds your plot so I can grab the ranges of the x and y axes. If you try to plot the box all the way to the end of the range, the top, bottom and right side lines don't print. So, I have it pull the those edges prop_in toward the interior of the plot. I found that .05 is about the smallest that worked. Then, I change the rgx and rgy objects accordingly. Then, I replace the first and optionally second value of rgx with the start and stop arguments from the function call. I take the range values and make them into a data frame that will be amenable to plot with geom_path() and then I add the appropriate geom_path() function to your existing plot.
I'm attempting to create a composite plot in r, the code for which is below:
#Adding initial data
ggp <- ggplot(NULL, aes(x = date, y = covid)) +
geom_spline(data = onsdf,
aes(x = date, y = covid, colour = "ONS Modelled Estimates"), nknots = 90, size = 1.3) +
geom_spline(data = gvtdf,
aes(x = date, y = covid, colour = "Gvt Reported Positive Tests"), nknots = 90, size = 1.3)
#Creating function to add stringency bars
barfunction <- function(date1, date2, alpha){
a <- annotate(geom = "rect",
xmin = as.Date(date1), xmax = as.Date(date2), ymin = 0, ymax = Inf, alpha = alpha, fill = "red")
return(a)
}
#Adding lockdown stringency bars
ggp <- ggp +
barfunction("2020-05-03", "2020-06-01", 0.5) +
barfunction("2020-06-01", "2020-06-15", 0.4) +
barfunction("2020-06-15", "2020-09-14", 0.3) +
barfunction("2020-09-14", "2020-11-05", 0.3) +
barfunction("2020-11-05", "2020-12-02", 0.5) +
barfunction("2020-12-02", "2021-01-06", 0.4) +
barfunction("2021-01-06", "2021-03-29", 0.5) +
barfunction("2021-03-29", "2021-04-12", 0.4) +
barfunction("2021-04-12", "2021-05-17", 0.3) +
barfunction("2021-05-17", "2021-07-19", 0.2) +
barfunction("2021-07-19", "2021-12-08", 0.1) +
barfunction("2021-12-08", "2022-02-24", 0.2) +
#Adding plot labels
ggp <- ggp + labs(title = "Estimated Total Covid-19 Cases vs Reported Positive Cases",
subtitle = "From ONS and HMGvt datasets",
x = "Date (year - month)", y = "Covid Levels") +
scale_y_continuous(labels = scales::comma) +
scale_x_date(limits = as.Date(c("2020-05-03", NA ))) +
scale_colour_manual(name = "Measurement Method",
values = c("ONS Modelled Estimates"="purple",
"Gvt Reported Positive Tests" = "blue"))
The output of this code looks like this:
Rendered graph
As you can see, I have a very repetitive function (barfunction) in this code that I would like to change. I thought the best way to do this was to convert the data barfunction() was applying to the graph into a dataframe, and then try to use a function on said data frame. Here is a head of the data frame (called strindf)
date1 date2 alpha
2020-05-03 2020-06-01 0.5
2020-06-01 2020-06-15 0.4
2020-06-15 2020-09-14 0.3
2020-09-14 2020-11-05 0.3
I initially tried to use apply() to add the strindf data to my plot, however I got an error message (Error in as.Date(date2) : argument "date2" is missing, with no default). Here is how I implemented it into the original code
ggptest <- ggplot(NULL, aes(x = date, y = covid)) +
geom_spline(data = onsdf,
aes(x = date, y = covid, colour = "ONS Modelled Estimates"), nknots = 90, size = 1.3) +
geom_spline(data = gvtdf,
aes(x = date, y = covid, colour = "Gvt Reported Positive Tests"), nknots = 90, size = 1.3) +
apply(strindf, MARGIN = 1 , barfunction) +
theme_minimal() +
scale_y_continuous(labels = scales::comma) +
scale_x_date(limits = as.Date(c("2020-05-03", NA ))) +
scale_colour_manual(name = "Legend",
I'm quite new to r so I'm a bit stumped, does anyone have any suggestions?
Thanks in advance!
Your idea was right. But you have chosen the wrong function from the apply family of functions. As you have a function of multiple arguments use mapply or as I do below purrr::pmap:
Using some fake random example data:
library(ggplot2)
library(ggformula)
barfunction <- function(date1, date2, alpha) {
annotate(geom = "rect", xmin = as.Date(date1), xmax = as.Date(date2), ymin = 0, ymax = Inf, alpha = alpha, fill = "red")
}
ggplot(NULL, aes(x = date, y = covid)) +
geom_spline(data = df, aes(colour = "ONS Modelled Estimates"), nknots = 90, size = 1.3) +
purrr::pmap(strindf, barfunction) +
theme_minimal() +
scale_y_continuous(labels = scales::comma) +
scale_x_date(limits = as.Date(c("2020-05-03", NA))) +
scale_colour_manual(
name = "Measurement Method",
values = c(
"ONS Modelled Estimates" = "purple",
"Gvt Reported Positive Tests" = "blue"
)
)
#> Warning: Removed 123 rows containing non-finite values (stat_spline).
DATA
set.seed(123)
df <- data.frame(
date = seq.Date(as.Date("2020-01-01"), as.Date("2020-12-31"), by = "day"),
covid = runif(366)
)
strindf <- structure(list(date1 = c(
"2020-05-03", "2020-06-01", "2020-06-15",
"2020-09-14"
), date2 = c(
"2020-06-01", "2020-06-15", "2020-09-14",
"2020-11-05"
), alpha = c(0.5, 0.4, 0.3, 0.3)), class = "data.frame", row.names = c(
NA,
-4L
))
I have a data.table that I want to plot
cbo = data.table(date = seq(as.Date("2000/4/2"), by = "week", length.out = 9), week = 1:9, x= sample(10000:50000, 9))
Week 6 to 9 forecasts. I want a dotted line from 2000-05-07 onward to signify the forecasts and shade the background in blue.
My ggplot2
ggplot(cbo) +
aes(x = date, y = x) +
geom_line(size = 0.5, colour = "#112446") +
theme_minimal()
Question: How do I dot the line from week == 6, date = 2000-05-07 and shade background in blue colour to distinguish the forecasted numbers?
I think this should accomplish what you're looking to do. I used geom_rect to shade the plot and an ifelse to adjust the linetype in the aes of geom_segment. If you just use geom_line you'll end up with a gap between the dates for week 5 and 6.
library(ggplot2)
library(data.table)
# make data
cbo <- data.table(date = seq(as.Date("2000/4/2"), by = "week", length.out = 9), week = 1:9, x= sample(10000:50000, 9))
# plot data
ggplot(cbo) +
geom_segment(aes(x = date, xend = lead(date), y = x, yend = lead(x),
linetype = ifelse(date > as.Date("2000-04-30"), "solid", "dashed")), # adjust linetype
size = 1) +
# plot a rectangle
geom_rect(aes(xmin = as.Date("2000-05-07"), xmax = as.Date("2000-05-30"), ymin = min(x), ymax = Inf),
fill = "blue", alpha = 0.1)+ # adjust alpha for shading
theme_minimal() +
theme(legend.position = 'none')
You can plot geom_line twice. Once with a solid line until week 6 and once with a dotted line for the whole line.
library(tidyverse)
library(data.table)
cbo = data.table(date = seq(as.Date("2000/4/2"), by = "week", length.out = 9), week = 1:9, x= sample(10000:50000, 9))
ggplot(cbo) +
aes(x = date, y = x) +
geom_rect(aes(xmin = as.Date("2000-05-07"), xmax = as.Date("2000-05-28"), ymin = -Inf, ymax = Inf),
fill = "#6BC6F5FF", alpha = 0.1) +
geom_line(size = 0.5, colour = "#112446", lty = 2) +
geom_line(data = cbo %>% filter(week <= 6), size = 0.7, color = "#112446", lty = 1) +
theme_minimal()
I am trying to create a plot in R using ggplot that shows the difference between my two bars in a nice way.
I found an example that did part of what I wanted, but I have two major problems:
It is based on comparing groups of bars, but I only have two, so I added one group with both of them.
I would like to draw the arrow in nicer shape. I attached an image.
Code:
transactions <- c(5000000, 1000000)
time <- c("Q1","Q2")
group <- c("A", "A")
data <- data.frame(transactions, time, group)
library(ggplot2)
fun.data <- function(x){
print(x)
return(data.frame(y = max(x) + 1,
label = paste0(round(diff(x), 2), "cm")))
}
ylab <- c(2.5, 5.0, 7.5, 10)
gg <- ggplot(data, aes(x = time, y = transactions, fill = colors_hc[1], label = round(transactions, 0))) +
geom_bar(stat = "identity", show.legend = FALSE) +
geom_text(position = position_dodge(width = 0.9),
vjust = 1.1) +
geom_line(aes(group = group), position = position_nudge(0.1),
arrow = arrow()) +
stat_summary(aes(x = group, y = transactions),
geom = "label",
fun.data = fun.data,
fontface = "bold", fill = "lightgrey",
inherit.aes = FALSE) +
expand_limits(x = c(0, NA), y = c(0, NA)) +
scale_y_continuous(labels = paste0(ylab, "M"),
breaks = 10 ^ 6 * ylab)
gg
The arrows I am aiming for:
Where I am (ignore the ugliness, didn't style it yet):
This works, but you still need to play around a bit with the axes (or rather beautify them)
library(dplyr)
library(ggplot2)
transactions <- c(5000000, 1000000)
time <- c("Q1","Q2")
group <- c("A", "A")
my_data <- data.frame(transactions, time, group)
fun.data <- function(x){
return(data.frame(y = max(x) + 1,
label = as.integer(diff(x))))
}
my_data %>%
ggplot(aes(x = group, y = transactions, fill = time)) +
geom_bar(stat = 'identity', position = 'dodge') +
geom_text(aes(label = as.integer(transactions)),
position = position_dodge(width = 0.9),
vjust = 1.5) +
geom_line(aes(group = group), position = position_nudge(0.1),
arrow = arrow()) +
stat_summary(aes(x = group, y = transactions),
geom = "label",
size = 5,
position = position_nudge(0.05),
fun.data = fun.data,
fontface = "bold", fill = "lightgrey",
inherit.aes = FALSE)
Edit2:
y_limit <- 6000000
my_data %>%
ggplot(aes(x = time, y = transactions)) +
geom_bar(stat = 'identity',
fill = 'steelblue') +
geom_text(aes(label = as.integer(transactions)),
vjust = 2) +
coord_cartesian(ylim = c(0, y_limit)) +
geom_segment(aes(x = 'Q1', y = max(my_data$transactions),
xend = 'Q1', yend = y_limit)) +
geom_segment(aes(x = 'Q2', y = y_limit,
xend = 'Q2', yend = min(my_data$transactions)),
arrow = arrow()) +
geom_segment(aes(x = 'Q1', y = y_limit,
xend = 'Q2', yend = y_limit)) +
geom_label(aes(x = 'Q2',
y = y_limit,
label = as.integer(min(my_data$transactions)- max(my_data$transactions))),
size = 10,
position = position_nudge(-0.5),
fontface = "bold", fill = "lightgrey")
I am trying to plot some data in a ggplotly plot.
The x-axis contains dates. Ggplotly doesn't work well with dates as when I hover over a point, the date is displayed as a number.
I solved this by setting a tooltip like below.
Some sample data:
x <- data.frame(Date = as.Date(seq(Sys.Date(), Sys.Date() + 29, by = "days")), Amount = seq(-10000, 19000, by = 1000),
stringsAsFactors = FALSE)
The plot:
ggplotly(ggplot(x, aes(x = Date, y = Amount, group = 1, text = paste("Date: ", Date, "<br>Amount: ", Amount))) + geom_line() + geom_point()
, tooltip = "text")
Now I want to use geom_rect() to get some background colors depending on the value of the y-axis. This gives me problems as the rectangles seem to be placed on top of the geom_line(). Also, the rectangles are labeled by ggplotly too, which I don't want either.
Here is the code I tried (the background coloring works fine when I am not using a custom tooltip, but then the problem with the dates in the labels occurs):
ggplotly(ggplot(x, aes(x = Date, y = Amount, group = 1, text = paste("Date: ", Date, "<br>Amount: ", Amount))) + geom_line() + geom_point()
+
geom_rect(aes(xmin = as.Date(Sys.Date()),
xmax = as.Date(Sys.Date() + 30),
ymin = 10000, ymax = max(max(x$Amount) + 1000, 11000), fill = "1")) +
geom_rect(aes(xmin = as.Date(Sys.Date()),
xmax = as.Date(Sys.Date() + 30),
ymin = 0, ymax = 10000, fill = "2")) +
geom_rect(aes(xmin = as.Date(Sys.Date()),
xmax = as.Date(Sys.Date() + 30),
ymin = min(min(x$Amount) - 1000, 0), ymax = 0, fill = "3"))
+
scale_fill_manual(values = alpha(c("green", "orange", "red"), 0.2))
, tooltip = "text")
Result
Any help would be appreciated, thanks!
EDIT:
The following code results in working geom_rect():
ggplotly(ggplot(x, aes(x = Date, y = Amount)) + geom_line() + geom_point()
+
geom_rect(aes(xmin = as.Date(Sys.Date()),
xmax = as.Date(Sys.Date() + 30),
ymin = 10000, ymax = max(max(x$Amount) + 1000, 11000), fill = "1")) +
geom_rect(aes(xmin = as.Date(Sys.Date()),
xmax = as.Date(Sys.Date() + 30),
ymin = 0, ymax = 10000, fill = "2")) +
geom_rect(aes(xmin = as.Date(Sys.Date()),
xmax = as.Date(Sys.Date() + 30),
ymin = min(min(x$Amount) - 1000, 0), ymax = 0, fill = "3"))
+
scale_fill_manual(values = alpha(c("green", "orange", "red"), 0.2)))
Result
You could try this:
ggplotly(ggplot() +
geom_rect(data = x, aes(xmin = as.Date(Sys.Date()),
xmax = as.Date(Sys.Date() + 30),
ymin = 10000, ymax = max(max(x$Amount) + 1000, 11000), fill = "1")) +
geom_rect(data = x, aes(xmin = as.Date(Sys.Date()),
xmax = as.Date(Sys.Date() + 30),
ymin = 0, ymax = 10000, fill = "2")) +
geom_rect(data = x, aes(xmin = as.Date(Sys.Date()),
xmax = as.Date(Sys.Date() + 30),
ymin = min(min(x$Amount) - 1000, 0), ymax = 0, fill = "3")) +
geom_line(data = x, aes(x = Date, y = Amount, group = 1, text = paste("Date: ", Date, "<br>Amount: ", Amount))) +
geom_point(data = x, aes(x = Date, y = Amount, text = paste("Date: ", Date, "<br>Amount: ", Amount))) +
scale_fill_manual(values = alpha(c("green", "orange", "red"), 0.2))
, tooltip = "text")