Shape annotations across facets in ggplotly() - r

I have a Shiny dashboard which includes a line graph which tracks number of visitors on mon-thur and fri-sun periods per month for three years:
I originally also had an annotation which shaded the parts of the graph which occur during the Covid pandemic in Australia, i.e. 2020-03-01 to present. When ggplotly is called on the ggplot, it strips the annotations out. What I want to do is add the shading from 2020-03-01 to present back in. I've tried adding
%>% layout(
shapes = list(
list(type = "rect",
fillcolor = "blue", line = list(color = "blue"), opacity = 0.9,
x0 = "2020-03-01", x1 = Inf,
y0 = 0, y1 = Inf
)
)
after the ggplotly() call, but it doesn't do anything.
I also tried following the code in this question, but the shading doesn't start at the correct date, and it's also only on the first facet.
Reproducible code example:
date <- c("2019-01-01","2019-01-01","2019-02-01","2019-02-01","2019-03-01","2019-03-01","2019-04-01",
"2019-04-01","2019-05-01","2019-05-01","2019-06-01","2019-06-01","2019-07-01","2019-07-01",
"2019-08-01","2019-08-01","2019-09-01","2019-09-01","2019-10-01","2019-10-01","2019-11-01",
"2019-11-01","2019-12-01","2019-12-01","2020-01-01","2020-01-01","2020-02-01","2020-02-01",
"2020-03-01","2020-03-01","2020-04-01","2020-04-01","2020-05-01","2020-05-01","2020-06-01",
"2020-06-01","2020-07-01","2020-07-01","2020-08-01","2020-08-01","2020-09-01","2020-09-01",
"2020-10-01","2020-10-01","2020-11-01","2020-11-01","2020-12-01","2020-12-01","2021-01-01",
"2021-01-01","2021-02-01","2021-02-01","2021-03-01","2021-03-01","2021-04-01","2021-04-01",
"2021-05-01","2021-05-01","2021-06-01","2021-06-01","2019-01-01","2019-01-01","2019-02-01",
"2019-02-01","2019-03-01","2019-03-01","2019-04-01","2019-04-01","2019-05-01","2019-05-01",
"2019-06-01","2019-06-01","2019-07-01","2019-07-01","2019-08-01","2019-08-01","2019-09-01",
"2019-09-01","2019-10-01","2019-10-01","2019-11-01","2019-11-01","2019-12-01","2019-12-01",
"2020-01-01","2020-01-01","2020-02-01","2020-02-01","2020-03-01","2020-03-01","2020-04-01",
"2020-04-01","2020-05-01","2020-05-01","2020-06-01","2020-06-01","2020-07-01","2020-07-01",
"2020-08-01","2020-08-01","2020-09-01","2020-09-01","2020-10-01","2020-10-01","2020-11-01",
"2020-11-01","2020-12-01","2020-12-01","2021-01-01","2021-01-01","2021-02-01","2021-02-01",
"2021-03-01","2021-03-01","2021-04-01","2021-04-01","2021-05-01","2021-05-01","2021-06-01",
"2021-06-01")
location <- rep(c("1001", "1002"), c(60, 60))
daytype <- rep(c("mon-thur", "fri-sat"), 60)
visitors <- c(5694,6829,3087,4247,2814,4187,5310,6408,5519,5934,2817,4080,6762,6595,5339,6669,
4863,6137,8607,11974,4909,9103,7986,9493,15431,13044,6176,5997,6458,7694,5990,5419,
5171,8149,6091,7971,10677,10468,7782,7627,7210,9526,8554,9844,8262,9218,9418,9038,
13031,13418,7408,10621,6908,8122,8851,8861,7940,9179,5992,7026,7939,6923,8209,7815,
8190,7085,9136,7905,9784,8454,9467,9092,9183,8436,9029,8927,8828,8323,7679,7112,
1885,3156,6932,5530,6077,4975,4922,4008,5549,4557,3932,3395,4865,4820,5090,4529,
5407,4262,4858,4200,5101,4761,5108,4413,5209,4116,5405,4445,4140,2985,5589,4684,
5322,4540,4898,4214,5266,4188,5184,4555)
total <- data.frame(location, date, daytype, visitors)
mon_year_vis <- total %>%
ggplot() +
(
mapping =
aes(
x = as.Date(date),
y = visitors,
group = daytype,
color = daytype
)
) +
geom_line() +
geom_point(show.legend = FALSE, size = 1) +
scale_y_continuous(labels = comma) +
facet_wrap( ~location, ncol = 1, scales = "free") +
scale_x_date(date_labels = "%b-%y",
breaks = "3 month",
limits = range)
ggplotly(mon_year_vis)

this task is a bit more complex than it appears to be, since you use the scales_free argument in the facet_wrap call. Because of this you need a little helper that holds none global limits of the shaded areas and work with ggplot2::geom_rect else you could use ggplot2::annotate (for completeness I will list this option also). It is important to bear in mind that plotly seems to have issues with INF as limitations for coordinates when using plotly::ggplotly at least. (I will omit the lines until the declaration of your total variable)
# libraries needed to make things work
library(dplyr)
library(ggplot2)
library(plotly)
library(scales)
ggplot2::geom_rect
# needed for coordinates of shadowed area
helper <- total %>%
dplyr::group_by(location) %>%
dplyr::summarise(mv = max(visitors) , md = max(as.Date(date))) %>%
dplyr::ungroup()
mon_year_vis <- total %>%
ggplot() +
(
mapping =
aes(
x = as.Date(date),
y = visitors,
group = daytype,
color = daytype
)
) +
# insert the geom_rect before the lines so that plotly gets the layer order right
geom_rect(data = helper, aes(xmin = as.Date("2020-03-01"), xmax = md, ymin = 0, ymax = mv), alpha = 0.3, fill="blue", inherit.aes = FALSE) +
geom_line() +
geom_point(show.legend = FALSE, size = 1) +
scale_y_continuous(labels = comma) +
facet_wrap( ~location, ncol = 1, scales = "free") +
scale_x_date(date_labels = "%b-%y",
breaks = "3 month",
limits = range)
ggplotly(mon_year_vis)
ggplot2::annotate
mon_year_vis2 <- total %>%
group_by(daytype) %>%
mutate(maxy = max(visitors)) %>%
ggplot() +
(
mapping =
aes(
x = as.Date(date),
y = visitors,
group = daytype,
color = daytype
)
) +
# insert the annotate before the lines so that plotly gets the layer order right
annotate("rect", xmin=as.Date("2020-03-01"), xmax=max(as.Date(date)), ymin=0, ymax=max(visitors), alpha=0.2, fill="blue") +
geom_line() +
geom_point(show.legend = FALSE, size = 1) +
scale_y_continuous(labels = comma) +
facet_wrap( ~location, ncol = 1, scales = "free") +
scale_x_date(date_labels = "%b-%y",
breaks = "3 month",
limits = range)
ggplotly(mon_year_vis2)
The two resources I have used are: 1 2

Related

geom_ribbon: Fill area between lines - spurious lines connecting groups

I'm trying to build a plot with two lines and fill the area between with geom_ribbon. I've managed to select a fill color (red/blue) depending on the sign of the difference between two lines. First I create two new columns in the dataset for ymax, ymin. It seems to work but some spurious lines appear joining red areas.
Is geom_ribbon appropriate to fill the areas? Is there any problem in the plot code?
This is the code used to create the plot
datos.2022 <- datos.2022 %>% mutate(y1 = SSTm-273.15, y2 = SST.mean.day-273.15)
datos.2022 %>% ggplot(aes(x=fecha)) +
geom_line(aes(y=SSTm-273.15), color = "red") +
geom_line(aes(y=SST.mean.day - 273.15), color = "black") +
geom_ribbon(aes(ymax=y1, ymin = y2, fill = as.factor(sign)), alpha = 0.6) +
scale_fill_manual(guide = "none", values=c("blue","red")) +
scale_y_continuous(limits = c(10,30)) +
scale_x_date(expand = c(0,0), breaks = "1 month", date_labels = "%b" ) +
theme_hc() +
labs(x="",y ="SST",title = "Temperature (2022)") +
theme(text = element_text(size=20,family = "Arial"))
And this is the output
Example data for the plot available at https://www.dropbox.com/s/mkk8w7py2ynuy1t/temperature.dat?dl=0
What if you made two different series to plot as ribbons - one for the positive values where there is no distance between ymin and ymax for the places where the difference is negative. And one for the negative values that works in a similar way.
library(dplyr)
library(ggplot2)
datos.2022 <- datos.2022 %>%
mutate(y1 = SSTm-273.15,
y2 = SST.mean.day-273.15) %>%
rowwise() %>%
mutate(high_pos = max(SST.mean.day - 273.15, y1),
low_neg = min(SSTm-273.15, y2))
datos.2022 %>% ggplot(aes(x=fecha)) +
geom_line(aes(y=SSTm-273.15), color = "red") +
geom_line(aes(y=SST.mean.day - 273.15), color = "black") +
geom_ribbon(aes(ymax=high_pos, ymin = SST.mean.day - 273.15, fill = "b"), alpha = 0.6, col="transparent", show.legend = FALSE) +
geom_ribbon(aes(ymax = SST.mean.day - 273.15, ymin = low_neg, fill = "a"), alpha = 0.6, col="transparent", show.legend = FALSE) +
scale_fill_manual(guide = "none", values=c("blue","red")) +
scale_y_continuous(limits = c(10,30)) +
scale_x_date(expand = c(0,0), breaks = "1 month", date_labels = "%b" ) +
#theme_hc() +
labs(x="",y ="SST",title = "Temperature (2022)") +
theme(text = element_text(size=20,family = "Arial"))

How to change color of moving averages in ggplot, plotting two series into one graph?

In order to highlight the moving average in my ggplot visualization, I want to give it a different color (in this case grey or black for both MA lines). When it comes to to a graph representing two time series, I struggle to find the best solution. Maybe I need to take a different approach.
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(tidyquant))
V = 365
data <- data.frame (var1 = c(rnorm(V)),
var2 = c(rnorm(V)+12),
date = c(dates <- ymd("2013-01-01")+ days(0:364))
)
data_melted <- reshape2::melt(data, id.var='date')
data_melted %>%
ggplot() +
geom_line(mapping = aes(x= date, y=value, col=variable)) +
scale_color_manual(values=c("#CC6666", "steelblue")) +
geom_ma(ma_fun = SMA, n = 30, mapping = aes(x= date, y=value, col=variable)) +
theme(axis.text.x = element_text(angle = 50, vjust = 0.5)) +
scale_x_date(date_breaks = "1 month")
I think you can get what you want by not mapping variable to color in aes() for the MA part. I instead include group = variable to tell ggplot2 that the two MA's should be separate series, but no difference in their color based on that.
data_melted %>%
ggplot() +
geom_line(mapping = aes(x= date, y=value, col=variable)) +
scale_color_manual(values=c("#CC6666", "steelblue")) +
tidyquant::geom_ma(ma_fun = SMA, n = 30, mapping = aes(x= date, y=value, group = variable), color = "black") +
theme(axis.text.x = element_text(angle = 50, vjust = 0.5)) +
scale_x_date(date_breaks = "1 month")
If you want different colors, the natural way to do this in ggplot would be to give the moving averages their own values to be mapped to color.
...
scale_color_manual(values=c("#CC6666", "#996666", "steelblue", "slateblue")) +
tidyquant::geom_ma(ma_fun = SMA, n = 30, mapping = aes(x= date, y=value, col=paste(variable, "MA"))) +
...
I would consider looking at the tsibble library for time series data.
library(tsibble)
data_melted <-as_tsibble(data_melted, key = 'variable', index = 'date')
data_melted <- data_melted %>%
mutate(
`5-MA` = slider::slide_dbl(value, mean,
.before = 2, .after = 2, .complete = TRUE)
)
data_melted %>%
filter(variable == "var1") %>%
autoplot(value) +
geom_line(aes(y = `5-MA`), colour = "#D55E00") +
labs(y = "y",
title = "title") +
guides(colour = guide_legend(title = "series"))

adding a label in geom_line in R

I have two very similar plots, which have two y-axis - a bar plot and a line plot:
code:
sec_plot <- ggplot(data, aes_string (x = year, group = 1)) +
geom_col(aes_string(y = frequency), fill = "orange", alpha = 0.5) +
geom_line(aes(y = severity))
However, there are no labels. I want to get a label for the barplot as well as a label for the line plot, something like:
How can I add the labels to the plot, if there is only pone single group? is there a way to specify this manually? Until know I have only found option where the labels can be added by specifying them in the aes
EXTENSION (added a posterior):
getSecPlot <- function(data, xvar, yvar, yvarsec, groupvar){
if ("agegroup" %in% xvar) xvar <- get("agegroup")
# data <- data[, startYear:= as.numeric(startYear)]
data <- data[!claims == 0][, ':=' (scaled = get(yvarsec) * max(get(yvar))/max(get(yvarsec)),
param = max(get(yvar))/max(get(yvarsec)))]
param <- data[1, param] # important, otherwise not found in ggplot
sec_plot <- ggplot(data, aes_string (x = xvar, group = groupvar)) +
geom_col(aes_string(y = yvar, fill = groupvar, alpha = 0.5), position = "dodge") +
geom_line(aes(y = scaled, color = gender)) +
scale_y_continuous(sec.axis = sec_axis(~./(param), name = paste0("average ", yvarsec),labels = function(x) format(x, big.mark = " ", scientific = FALSE))) +
labs(y = paste0("total ", yvar)) +
scale_alpha(guide = 'none') +
theme_pubclean() +
theme(legend.title=element_blank(), legend.background = element_rect(fill = "white"))
}
plot.ExposureYearly <- getSecPlot(freqSevDataAge, xvar = "agegroup", yvar = "exposure", yvarsec = "frequency", groupvar = "gender")
plot.ExposureYearly
How can the same be done on a plot where both the line plot as well as the bar plot are separated by gender?
Here is a possible solution. The method I used was to move the color and fill inside the aes and then use scale_*_identity to create and format the legends.
Also, I needed to add a scaling factor for severity axis since ggplot does not handle the secondary axis well.
data<-data.frame(year= 2000:2005, frequency=3:8, severity=as.integer(runif(6, 4000, 8000)))
library(ggplot2)
library(scales)
sec_plot <- ggplot(data, aes(x = year)) +
geom_col(aes(y = frequency, fill = "orange"), alpha = 0.6) +
geom_line(aes(y = severity/1000, color = "black")) +
scale_fill_identity(guide = "legend", label="Claim frequency (Number of paid claims per 100 Insured exposure)", name=NULL) +
scale_color_identity(guide = "legend", label="Claim Severity (Average insurance payment per claim)", name=NULL) +
theme(legend.position = "bottom") +
scale_y_continuous(sec.axis =sec_axis( ~ . *1, labels = label_dollar(scale=1000), name="Severity") ) + #formats the 2nd axis
guides(fill = guide_legend(order = 1), color = guide_legend(order = 2)) #control which scale plots first
sec_plot

How to separately label and scale double y-axis in ggplot2?

I have a test dataset like this:
df_test <- data.frame(
proj_manager = c('Emma','Emma','Emma','Emma','Emma','Alice','Alice'),
proj_ID = c(1, 2, 3, 4, 5, 6, 7),
stage = c('B','B','B','A','C','A','C'),
value = c(15,15,20,20,20,70,5)
)
Preparation for viz:
input <- select(df_test, proj_manager, proj_ID, stage, value) %>%
filter(proj_manager=='Emma') %>%
do({
proj_value_by_manager = sum(distinct(., proj_ID, value)$value);
mutate(., proj_value_by_manager = proj_value_by_manager)
}) %>%
group_by(stage) %>%
do({
sum_value_byStage = sum(distinct(.,proj_ID,value)$value);
mutate(.,sum_value_byStage= sum_value_byStage)
}) %>%
mutate(count_proj = length(unique(proj_ID)))
commapos <- function(x, ...) {
format(abs(x), big.mark = ",", trim = TRUE,
scientific = FALSE, ...) }
Visualization:
ggplot (input, aes(x=stage, y = count_proj)) +
geom_bar(stat = 'identity')+
geom_bar(aes(y=-proj_value_by_manager),
stat = "identity", fill = "Blue") +
scale_y_continuous(labels = commapos)+
coord_flip() +
ylab('') +
geom_text(aes(label= sum_value_byStage), hjust = 5) +
geom_text(aes(label= count_proj), hjust = -1) +
labs(title = "Emma: 4 projects| $90M Values \n \n Commitment|Projects") +
theme(plot.title = element_text(hjust = 0.5)) +
geom_hline(yintercept = 0, linetype =1)
My questions are:
Why is the y-values not showing up right? e.g. C is labeled 20, but nearing hitting 100 on the scale.
How to adjust the position of labels so that it sits on the top of its bar?
How to re-scale the y axis so that both the very short bar of 'count of project' and long bar of 'Project value' can be well displayed?
Thank you all for the help!
I think your issues are coming from the fact that:
(1) Your dataset has duplicated values. This causes geom_bar to add all of them together. For example there are 3 obs for B where proj_value_by_manager = 90 which is why the blue bar extends to 270 for that group (they all get added).
(2) in your second geom_bar you use y = -proj_value_by_manager but in the geom_text to label this you use sum_value_byStage. That's why the blue bar for A is extending to 90 (since proj_value_by_manager is 90) but the label reads 20.
To get you what I believe the chart you want is you could do:
#Q1: No dupe dataset so it doesnt erroneous add columns
input2 <- input[!duplicated(input[,-c(2,4)]),]
ggplot (input2, aes(x=stage, y = count_proj)) +
geom_bar(stat = 'identity')+
geom_bar(aes(y=-sum_value_byStage), #Q1: changed so this y-value matches your label
stat = "identity", fill = "Blue") +
scale_y_continuous(labels = commapos)+
coord_flip() +
ylab('') +
geom_text(aes(label= sum_value_byStage, y = -sum_value_byStage), hjust = 1) + #Q2: Added in y-value for label and hjust so it will be on top
geom_text(aes(label= count_proj), hjust = -1) +
labs(title = "Emma: 4 projects| $90M Values \n \n Commitment|Projects") +
theme(plot.title = element_text(hjust = 0.5)) +
geom_hline(yintercept = 0, linetype =1)
For your last question, there is no good way to display both of these. One option would be to rescale the small data and still label it with a 1 or 3. However, I didn't do this because once you scale down the blue bars the other bars look OK to me.

Placing tick marks between bars in ggplot2

Using the diamonds data set in the ggplot2 package, I can generate the following chart.
library(ggplot2)
library(dplyr)
diamond.summary <-
diamonds %>%
mutate(carat = ifelse(runif(nrow(.)) < 0.05, NA_real_, carat)) %>%
group_by(carat_quintile = ntile(carat, 5)) %>%
summarise(avg_price = mean(price))
diamond.summary %>%
filter(!is.na(carat_quintile)) %>%
ggplot(aes(carat_quintile, avg_price)) +
geom_bar(stat = "identity",
color = "black",
width = 1) +
scale_x_continuous("Carat percentile",
breaks = 1:6 - 0.5,
labels = seq(0,100, by = 20)) +
scale_y_continuous(expand = c(0,0),
limits = c(0, 1.1* max(diamond.summary$avg_price)))
So far, so easy. However, I would also like to display the average price of the missing entries alongside the chart. Similar to the following:
diamond.summary %>%
mutate(Facet = is.na(carat_quintile),
carat_quintile_noNA = ifelse(Facet, "Unknown", carat_quintile)) %>%
ggplot(aes(x = carat_quintile_noNA, y = avg_price, fill = Facet)) +
geom_bar(stat = "identity") +
facet_grid(~Facet, scales = "free_x", space = "free_x") +
scale_x_discrete(breaks = (0:6) - 0.5)
However, when I try to perform the same trick using scale_x_continuous, I get the error Discrete value supplied to continuous scale. When I try to use scale_x_discrete(breaks = c(0:6 + 0.5)) for example, the axis ticks and labels disappear.
My question is, how can I get the same faceted chart above with the tick marks in the first panel placed as in the first chart in this post? Advice about chart design could be an acceptable solution, but I don't think all problems like this are solvable with a redesign.
The trick is to convert your factor to a numeric, assigning a magic number to the unknown quantity. (ggplot2 will not plot bars with true NA values.) Then use scale_x_continuous
diamond.summary %>%
mutate(Facet = is.na(carat_quintile),
carat_quintile_noNA = ifelse(Facet, "Unknown", carat_quintile),
##
## 99 is a magic number. For our plot, it just has
## to be larger than 5. The value 6 would be a natural
## choice, but this means that the x tick marks would
## overflow ino the 'unknown' facet. You could choose
## choose 7 to avoid this, but any large number works.
## I used 99 to make it clear that it's magic.
numeric = ifelse(Facet, 99, carat_quintile)) %>%
ggplot(aes(x = numeric, y = avg_price, fill = Facet)) +
geom_bar(stat = "identity", width = 1) +
facet_grid(~Facet, scales = "free_x", space = "free_x") +
scale_x_continuous(breaks = c(0:5 + 0.5, 99),
labels = c(paste0(c(0:5) * 20, "%"), "Unknown"))
One solution is to approach a bit differently, and reposition the bars instead of the ticks, using position_nudge.
library(ggplot2)
library(dplyr)
diamond.summary <-
diamonds %>%
mutate(carat = ifelse(runif(nrow(.)) < 0.05, NA_real_, carat)) %>%
group_by(carat_quintile = ntile(carat, 5)) %>%
summarise(avg_price = mean(price))
# nudge bars to the left
diamond.summary %>%
filter(!is.na(carat_quintile)) %>%
ggplot(aes(carat_quintile, avg_price)) +
geom_bar(stat = "identity",
color = "black",
width = 1,
position=position_nudge((x=-1))) +
scale_x_continuous("Carat percentile",
breaks = 1:6 - 0.5,
labels = seq(0,100, by = 20)) +
scale_y_continuous(expand = c(0,0),
limits = c(0, 1.1* max(diamond.summary$avg_price)))
# nudge bars to the right
diamond.summary %>%
filter(!is.na(carat_quintile)) %>%
ggplot(aes(carat_quintile, avg_price)) +
geom_bar(stat = "identity",
color = "black",
width = 1,
position=position_nudge((x=1))) +
scale_x_continuous("Carat percentile",
breaks = 1:6 - 0.5,
labels = seq(0,100, by = 20)) +
scale_y_continuous(expand = c(0,0),
limits = c(0, 1.1* max(diamond.summary$avg_price)))

Resources