How to add vertical lines and text to time series plot? - r

I have some time series data and used autoplot function in R to plot my time series. I would like add vertical lines to the plot and text. For example, a line between 2003-2010 with text "train data", 2010-2015 "test" data and 2015- 2018 "predictions". I did it in ordinary R plot, but it is not fancy. I would like to do it with ggplot or autoplot. My code is as follows.
data <- runif(180,100,1000)
ts.data <- ts(data, frequency = 12, start = c(2003,1))
autoplot(ts.data, xlab="Year", ylab="Number of Tourists")
Thanks in advance

You can also use ggplot2 itself to get it done:
library(ggplot2)
data <- runif(180,100,1000)
ts.data <- ts(data, frequency = 12, start = c(2003,1))
autoplot(ts.data, xlab="Year", ylab="Number of Tourists") +
geom_vline(xintercept = as.Date(c("2003-01-01", "2010-01-01", "2015-01-01", "2018-01-01") ), color = "blue")+
geom_text(aes(x = as.Date("2007-01-01"), y = 1020, label="train data"))+
geom_text(aes(x = as.Date("2013-01-01"), y = 1020, label="test"))+
geom_text(aes(x = as.Date("2016-07-01"), y = 1020, label="predictions"))
I understand you asked about adding vertical lines, just wonder if it is better to use horizontal lines:
autoplot(ts.data, xlab="Year", ylab="Number of Tourists") +
geom_segment(aes(x = as.Date("2003-01-01") , y = 1000, xend = as.Date("2010-01-01"), yend = 1000), color = "red") +
geom_segment(aes(x = as.Date("2010-01-01") , y = 1000, xend = as.Date("2015-01-01"), yend = 1000), color = "blue") +
geom_segment(aes(x = as.Date("2015-01-01") , y = 1000, xend = as.Date("2018-01-01"), yend = 1000), color = "green") +
geom_text(aes(x = as.Date("2007-01-01"), y = 1020, label="train data"), color = "red")+
geom_text(aes(x = as.Date("2013-01-01"), y = 1020, label="test"), color = "blue")+
geom_text(aes(x = as.Date("2017-01-01"), y = 1020, label="predictions"), color="green")

You could use the geomtextpath package with geom_textvline to add vertical lines with labels to autoplot since it is a ggplot object. You have to make sure your xintercept is the right number because your data is in dates. Here is some reproducible code:
library(ggplot2)
library(ggfortify)
library(geomtextpath)
autoplot(ts.data, xlab="Year", ylab="Number of Tourists") +
geom_textvline(xintercept = as.numeric(as.Date(c("2005-01-01"))),
label = "train data", vjust = 1.3, col = "blue") +
geom_textvline(xintercept = as.numeric(as.Date(c("2012-01-01"))),
label = "test data", vjust = 1.3, col = "blue") +
geom_textvline(xintercept = as.numeric(as.Date(c("2017-01-01"))),
label = "Predictions", vjust = 1.3, col = "blue")
Created on 2023-02-18 with reprex v2.0.2

Related

How to overlay a box on an existing plot?

I am trying to draw a box on top of the plot on a specific x = Date and y = Price.
I have multiple Date entries stored in specificDates, but even though the code can be ran and doesn't output any errors, the box doesn't show on the plot.
dataDate <- as.Date(c("2015-01-01","2016-03-01","2018-06-01","2020-08-01"))
dataPrice <- c(170, 320, 7000,8000)
dummyData <- data.frame(dataDate, dataPrice)
specificDates <- as.Date(c("2016-07-15", "2020-05-20"))
plot_linPrice <- ggplot(data = dummyData,
mapping = aes(x = dataDate, y = dataPrice)) +
geom_line() +
scale_y_log10() +
geom_vline(xintercept = as.numeric(specificDates), color = "blue", lwd = .5) #+ #uncommenting + brings up error
geom_rect(aes(xmin = "2015-01-01", xmax = "2015-06-01", ymin = 5000, ymax = 8000), fill = "blue")
print(plot_linPrice)
Try with this:
library(ggplot2)
#Data
dataDate <- as.Date(c("2015-01-01","2016-03-01","2018-06-01","2020-08-01"))
dataPrice <- c(170, 320, 7000,8000)
dummyData <- data.frame(dataDate, dataPrice)
specificDates <- as.Date(c("2016-07-15", "2020-05-20"))
#Code
ggplot(data = dummyData,
mapping = aes(x = dataDate, y = dataPrice)) +
geom_line() +
scale_y_log10() +
geom_vline(xintercept = as.numeric(specificDates), color = "blue", lwd = .5)+
geom_rect(aes(xmin = as.Date("2015-01-01"), xmax = as.Date("2015-06-01"), ymin = 5000, ymax = 8000), fill = "blue")
Output:

How do I add labels to both extremities of my axes?

so imagine the following example
mtcars %>%
ggplot(aes(mpg,hp)) + geom_point()
How can I add labels to both extremities of my axes without changing the title of my x and y axes?
For instance:
bottom right: "MPG increases"
bottom left (for x axis) : "MPG decreases"
bottom left (for y axis) : "HP decreases"
top left : "HP increases"
Does anyone have any leads on how to do this?
Thank you!
You can try something like this:
library(dplyr)
library(ggplot2)
mtcars %>% ggplot(aes(mpg,hp)) +
geom_point() + annotate("text", x = 33, y = 10,
label = "MPG increases",
hjust=1.1, vjust=-1.1, col="black",
cex=5, fontface = "bold", alpha = 0.8)+
annotate("text", x = 17, y = 10,
label = "MPG decreases",
hjust=1.1, vjust=-1.1, col="black",
cex=5, fontface = "bold", alpha = 0.8)+
annotate("text", x = 17, y = 100,
label = "HP decreases",
hjust=1.1, vjust=-1.1, col="black",
cex=5, fontface = "bold", alpha = 0.8)+
annotate("text", x = 17, y = 310,
label = "HP increases",
hjust=1.1, vjust=-1.1, col="black",
cex=5, fontface = "bold", alpha = 0.8)
Here's one way to do this keeping within the maximum and minimum limits of the data. And picks up on #Tjebo suggestion to include the labelling text within a data frame.
Although not in the question have included arrows to see if this would help distinguish the HP and MPG decreases labels. The code for this can easily be disregarded.
If you want to use this presentation regularly it would pay to make a function.
library(ggplot2)
library(tibble)
mtcars %>%
ggplot(aes(mpg,hp)) +
geom_point()+
geom_text(data = df_lab, aes(x, y, label = lab), vjust = df_lab$vj, hjust = df_lab$hj)+
geom_segment(data = df_ar, aes(x = x, y = y, xend = xend, yend = yend),
lineend = "square", linejoin = "mitre", size = 1, arrow = arrow(length = unit(2, "mm")))
Data
# tibble for labels
df_lab <-
tibble(lab = c("HP increases", "HP decreases", "MPG decreases", "MPG increases"),
x = c(rep(min(mtcars$mpg), 3), max(mtcars$mpg)),
y = c(max(mtcars$hp), rep(min(mtcars$hp), 3)),
vj = rep(c("bottom", "top") , each = 2),
hj = c(rep("left", 3), "right"))
# postitioning helpers for arrows
x_len <- 2
y_len <- 40
y_off <- 15
x_off <- 0.5
# tibble for arrows
df_ar <-
tibble(xend = c(rep(min(mtcars$mpg) - x_off, 2), min(mtcars$mpg), max(mtcars$mpg)),
yend = c(max(mtcars$hp), min(mtcars$hp), rep(min(mtcars$hp) - y_off, 2)),
x = c(rep(min(mtcars$mpg) - x_off, 2), min(mtcars$mpg) + x_len, max(mtcars$mpg) - x_len),
y = c(max(mtcars$hp) - y_len, min(mtcars$hp) + y_len, rep(min(mtcars$hp) - y_off, 2)))
Created on 2020-05-12 by the reprex package (v0.3.0)

how to ggplot with upper and lower bound as shaded using facet_wrap in R?

I am trying to automate the process of plotting data using ggplot and the facet_wrap functionality. I want a single y-axis label instead individual plot Ob (i.e., A_Ob, B_ob etc) and also a single X-axis not all the plots having label for x-axis such as below. Below is my sample code using gridextra package. However, i would like to do it through facet_wrap as i have many other plots to draw which i think will save me sometime.
graphics.off()
rm(list = ls())
library(tidyverse)
library(gridExtra)
G1 = data.frame(A_Ob = runif(1000, 5, 50), A_Sim = runif(1000, 3,60), A_upper = runif(1000, 10,70), A_lower = runif(1000, 0, 45 ),
B_Ob = runif(1000, 5, 50), B_Sim = runif(1000, 3,60), B_upper = runif(1000, 10,70), B_lower = runif(1000, 0, 45 ),
C_Ob = runif(1000, 5, 50), C_Sim = runif(1000, 3,60), C_upper = runif(1000, 10,70), C_lower = runif(1000, 0, 45 ),
D_Ob = runif(1000, 5, 50), D_Sim = runif(1000, 3,60), D_upper = runif(1000, 10,70), D_lower = runif(1000, 0, 45 ),
Pos = 1:1000)
A1 = ggplot(data = G1, aes(x = Pos))+
geom_line(aes(y = A_Ob), col = "black")+
geom_line(aes(y = A_Sim), col = "blue")+
geom_vline(xintercept = 750, color = "red", size=1.5)+
geom_ribbon(aes(ymin = A_upper, ymax = A_lower), fill = "grey70")
B1 = ggplot(data = G1, aes(x = Pos))+
geom_line(aes(y = B_Ob), col = "black")+
geom_line(aes(y = B_Sim), col = "blue")+
geom_vline(xintercept = 750, color = "red", size=1.5)+
geom_ribbon(aes(ymin = B_upper, ymax = B_lower), fill = "grey70")
C1 = ggplot(data = G1, aes(x = Pos))+
geom_line(aes(y = C_Ob), col = "black")+
geom_line(aes(y = C_Sim), col = "blue")+
geom_vline(xintercept = 750, color = "red", size=1.5)+
geom_ribbon(aes(ymin = C_upper, ymax = C_lower), fill = "grey70")
D1 = ggplot(data = G1, aes(x = Pos))+
geom_line(aes(y = D_Ob), col = "black")+
geom_line(aes(y = D_Sim), col = "blue")+
geom_vline(xintercept = 750, color = "red", size=1.5)+
geom_ribbon(aes(ymin = D_upper, ymax = D_lower), fill = "grey70")
grid.arrange(A1,B1,C1,D1, nrow = 4)
Here is the result of the code
You need to reshape your dataframe into a longer format and separate values for Ob, Sim, upper and lower.
Using the function melt from data.table package can help you to achieve this:
library(data.table)
setDT(G1)
Ob_cols = grep("_Ob",colnames(G1),value = TRUE)
Sim_cols = grep("_Sim",colnames(G1),value = TRUE)
Upper_cols = grep("_upper",colnames(G1), value = TRUE)
Lower_cols = grep("_lower", colnames(G1), value = TRUE)
g.m <- melt(G1, measure = list(Ob_cols,Sim_cols,Upper_cols,Lower_cols), value.name = c("OBS","SIM","UP","LOW"))
levels(g.m$variable) <- c("A","B","C","D")
Pos variable OBS SIM UP LOW
1: 1 A 5.965488 29.167666 26.66783 29.97259
2: 2 A 23.855719 8.570245 43.75830 30.65616
3: 3 A 16.947887 51.201047 15.20758 39.76122
4: 4 A 49.883306 3.715319 34.38066 20.73177
5: 5 A 5.021938 3.102880 30.05036 32.05123
6: 6 A 19.887176 15.400853 53.67156 28.54982
and now, you can plot it:
library(ggplot2)
ggplot(g.m, aes(x = Pos))+
geom_line(aes(y = OBS), color = "black")+
geom_line(aes(y = SIM), color = "blue")+
geom_vline(xintercept = 750,color = "red", size = 1.5)+
geom_ribbon(aes(ymin = UP, ymax = LOW), fill = "grey70")+
facet_grid(variable~.)
EDIT: Adding annotations & renaming labels
To rename and replace facet labels, you can re-define levels of variable and use facet_wrap instead of facet_grid using ncol = 1 as argument.
To add multiple annotations on a single panel, you need to define a dataframe that you will use in geom_text.
Altogether, you have to do:
# renaming names of each facets:
levels(g.m$variable) <- c("M1","M2","M3","M4")
# Defining annotations to add:
df_text <- data.frame(label = c("Calibration", "Validation"),
x = c(740,760),
y = c(65,65),
hjust = c(1,0),
variable = factor("M1", levels = c("M1","M2","M3","M4")))
# Plotting
ggplot(g.m, aes(x = Pos))+
geom_line(aes(y = OBS), color = "black")+
geom_line(aes(y = SIM), color = "blue")+
geom_vline(xintercept = 750,color = "red", size = 1.5)+
geom_ribbon(aes(ymin = UP, ymax = LOW), fill = "grey70")+
facet_wrap(variable~., ncol = 1)+
theme(strip.text.x = element_text(hjust = 0),
strip.background = element_rect(fill = "white"))+
geom_text(data = df_text, aes(x = x, y = y, label = label, hjust = hjust), color = "red")
Does it look what you are expecting ?

Histogram with normal Distribution in R using ggplot2 for illustrations

I'm trying to plot a histogram with ggplot2.
I wrote a simple code for this in R
dnorm.count <- function(x, mean = 0, sd = 1, log = FALSE, n = 1, binwidth = 1){
n * binwidth * dnorm(x = x, mean = mean, sd = sd, log = log)
}
mtcars %>%
ggplot(aes(x = mpg)) +
geom_histogram(bins =60,color = "white", fill = "#9FE367",boundary = 0.5) +
geom_vline(aes(xintercept = mean(mpg)),
linetype="dashed",
size = 1.6,
color = "#FF0000")+
geom_text(aes(label = ..count..), stat= "count",vjust = -0.6)+
stat_function(fun = dnorm.count, color = "#6D67E3",
args = list(mean= mean(mtcars$mpg),
sd = sd(mtcars$mpg),
n = nrow(mtcars)),
lwd = 1.2) +
scale_y_continuous(labels = comma, name = "Frequency") +
scale_x_continuous(breaks=seq(0,max(mtcars$mpg)))+
geom_text(aes(label = paste0("mean = ", round(mean(mtcars$mpg), 2)),
x = mean(mtcars$mpg)*1.2,
y = mean(mtcars$mpg)/5))+
geom_vline(aes(xintercept = sd(mpg)), linetype="dashed",size = 1.6, color = "#FF0000")
What I got is this!
The question is how do I Plot the histogram similar to this
using ggplot2 and is it possible to convert the code to R function?
Edit: For the better explanation of what I'm trying to do:
I wanna create a Histogram exactly the same as the one attached for reference using ggplot2 and then I wanna create a function for the same to reduce the coding. Use any package+ggplot2 you like. The histograms should have lines depicting the standard deviation & mean like the one in reference. If possible depict the standard deviation in the plot as the reference image, that's what I'm trying to achieve.
If your question how to plot histograms like the one you attached in your last figure, this 9 lines of code produce a very similar result.
library(magrittr) ; library(ggplot2)
set.seed(42)
data <- rnorm(1e5)
p <- data %>%
as.data.frame() %>%
ggplot(., aes(x = data)) +
geom_histogram(fill = "white", col = "black", bins = 30 ) +
geom_density(aes( y = 0.3 *..count..)) +
labs(x = "Statistics", y = "Probability/Density") +
theme_bw() + theme(axis.text = element_blank())
You could use annotate() to add symbols or text and geom_segment to show the intervals on the plot like this:
p + annotate(x = sd(data)/2 , y = 8000, geom = "text", label = "σ", size = 10) +
annotate(x = sd(data) , y = 6000, geom = "text", label = "2σ", size = 10) +
annotate(x = sd(data)*1.5 , y = 4000, geom = "text", label = "3σ", size = 10) +
geom_segment(x = 0, xend = sd(data), y = 7500, yend = 7500) +
geom_segment(x = 0, xend = sd(data)*2, y = 5500, yend = 5500) +
geom_segment(x = 0, xend = sd(data)*3, y = 3500, yend = 3500)
This chunk of code would give you something like this:

How to show percent labels on histogram bars using ggplot2

I have seen lots of question regarding converting count on y axis into percent but must of them are in bar plot.
I want to do similar thing in histogram but not able to show the labels on the bar clearly. Please tell me where I am doing wrong.
x = runif(100, min = 0, max = 10)
data1 <- data.frame(x = x)
ggplot(aes(x = x), data = data1)+
geom_histogram(aes(y = (..count..)/sum(..count..)), bins = 10, breaks =
seq(0,10,1), fill = "blue", col = "black")+
geom_text(aes(y = ((..count..)/sum(..count..)),
label = scales::percent((..count..)/sum(..count..))),
stat = "count", vjust = -10)+
scale_y_continuous(labels = scales::percent)
Output:
Use scale_y_continous with breaks and labels will solve your problem.
data1 <- data.frame (x = runif(100, min = 0, max = 10))
ggplot(aes(x=x), data1) + stat_bin(aes(y = ..count..))
ggplot(data1, aes(x = x)) + geom_histogram(fill = "blue", col = "black")+ scale_y_continuous(breaks = seq(0,10,1),labels = paste(seq(0, 10, by = 1) / 100, "%", sep = ""))+geom_text(aes(y = (..count..),label = scales::percent((..count..)/sum(..count..))), stat="bin",colour="green",vjust=2)
or, you can specify where you would like to add the percentage like this:
geom_text(aes(y = (..count..)+0.5))
of course you can change the color as well. from,
stat="bin",colour="your prefer color "
Also you can change the width of the bins as follows:
geom_histogram(fill = "blue", col = "black", binwidth = 0.5)

Resources