ggplotly() does not display geom_vline / geom_hline when data is POSIXct - r

I am trying to make a graph with "time markers". These time markers are vertical lines for certain dates. Time data are POSIXct format. I would like to use the awesome interactive interface of Plotly and use my ggplot objects in it.
The problem is that these "time markers" doesn't show in after using ggplotly(). I ave already tried with plotly::add_segments() but it does not work.
Here are two reproductible examples :
1. With non-POSIXct data it works fine
# dummy dataset
df2 = data.frame(id = 1:10, measure = runif(10, 0, 20))
events2 = data.frame(number = c(2,3,8))
# ggplot graph
p2 = ggplot() + geom_line(data = df2, aes(x = id, y = measure)) +
geom_vline(data = events2, aes(xintercept = events2$number), color = "red")
p2
# plotly graph that displays the geom_vline properly
ggplotly(p2)
2. With POSIXct data is doesn't display the correct result
# dummy dataset
df = data.frame(date = seq(as.POSIXct("2017-07-01", tz = "UTC", format = "%Y-%m-%d"),
as.POSIXct("2018-04-15", tz = "UTC", format = "%Y-%m-%d"),
"1 month"),
measure = runif(10, 0, 20))
events = data.frame(date_envents = as.POSIXct(c("2017-10-12", "2017-11-12", "2018-03-15"), tz = "UTC", format = "%Y-%m-%d"))
# ggplot graph
p = ggplot() + geom_line(data = df, aes(x = date, y = measure)) +
geom_vline(data = events, aes(xintercept = events$date), color = "red")
p
# plotly graph that does not display the geom_vline properly
ggplotly(p)
I have seen some workaround (like this one : Add vertical line to ggplotly plot) but it is "complicated". Is there a more simple way to solve this problem ?
I am using Windows 10 with R version 3.5.0, RStudio and the following packages :
library(tidyverse) and library(plotly)

A simple workaround is to set the xintecept of the geom_vline to numeric.
sample data
df = data.frame(date = seq(as.POSIXct("2017-07-01", tz = "UTC", format = "%Y-%m-%d"),
as.POSIXct("2018-04-15", tz = "UTC", format = "%Y-%m-%d"),
"1 month"),
measure = runif(10, 0, 20))
events = data.frame(date_envents = as.POSIXct(c("2017-10-12", "2017-11-12", "2018-03-15"), tz = "UTC", format = "%Y-%m-%d"))
code
p = ggplot() + geom_line(data = df, aes(x = date, y = measure)) +
geom_vline(data = events, aes(xintercept = as.numeric(events$date)), color = "red")
result
ggplotly(p)

Related

Draw arrow on ggplot with dates as variable, by specifying co-ordinates rather than using the units of the x- and y-variables

I am attempting to plot the blood test results for a patient in a time series. I have managed to do this and included a reference range between two shaded y-intercepts. My problem is that the annotate() or geom_segment() calls want me to specify, in the units of my independent variable, which is, unhelpfully, a date (YYYY-MM-DD).
Is it possible to get R to ignore the units of the x- and y-axis and specify the arrow co-ordinates as if they were on a grid?
result <- runif(25, min = 2.0, max = 3.5)
start_date <- ymd("2021-08-16")
end_date <- ymd("2022-10-29")
date <- sample(seq(start_date, end_date, by = "days"), 25, replace = TRUE)
q <- data.table(numbers, date)
ggplot(q, aes(x = date, y = result)) +
geom_line() +
geom_point(aes(x = date, y = result), shape = 21, size = 3) +
scale_x_date(limits = c(min(q$date), max(q$date)),
breaks = date_breaks("1 month"),
labels = date_format("%b %Y")) +
ylab("Corrected calcium (mmol/L")+
xlab("Date of blood test") +
ylim(1,4)+
geom_ribbon(aes(ymin=2.1, ymax=2.6), fill="grey", alpha=0.2, colour="grey")+
geom_vline(xintercept=as.numeric(q$date[c(3, 2)]),
linetype=4, colour="black") +
theme(axis.text.x = element_text(angle = 45)) + theme_prism(base_size = 10) +
annotate("segment", x = 1, y = 2, xend = 3, yend = 4, arrow = arrow(length = unit(0.15, "cm")))
The error produced is Error: Invalid input: date_trans works with objects of class Date only.
I can confirm that:
> class(q$date)
[1] "Date"
I've just gone with test co-ordinates (1,2,3,4) for the annotate("segment"...), ideally I want to be able to get the arrow to point to a specific data point on the plot to indicate when the patient went on treatment.
Many thanks,
Sandro
You don't need to convert to points or coordinates. Just use the actual values from your data frame. I am just subsetting within annotate using a hard coded index (you can also automate this of course), but you will need to "remind" R that you are dealing with dates - thus the added lubridate::as_date call.
library(ggplot2)
library(lubridate)
result <- runif(25, min = 2.0, max = 3.5)
start_date <- ymd("2021-08-16")
end_date <- ymd("2022-10-29")
date <- sample(seq(start_date, end_date, by = "days"), 25, replace = TRUE)
q <- data.frame(result, date)
## I am arranging the data frame by date
q <- dplyr::arrange(q, date)
ggplot(q, aes(x = date, y = result)) +
geom_line() +
## for start use a random x and y so it starts whereever you want it to start
## for end, use the same row from your data frame, in this case row 20
annotate(geom = "segment",
x = as_date(q$date[2]), xend = as_date(q$date[20]),
y = min(q$result), yend = q$result[20],
arrow = arrow(),
size = 2, color = "red")

Plot multiple time-series in a single graph - Error: Invalid input: date_trans works with objects of class Date only

I am trying to plot multiple time series into a single graph like the attached image, but I am getting this error Error: Invalid input: date_trans works with objects of class Date only. I tried to solve the issue by converting the column containing the Months into a date format either using the lubricate package, or other methods I found on internet. Here is the data:
structure(list(Month = c("2016-01-01", "2016-02-01", "2016-03-01",
"2016-04-01", "2016-05-01", "2016-10-01", "2016-11-01", "2016-12-01"
), Residential = c(36.22021, 40.31832, 37.83721, 39.75821, 34.7028,
42.97021, 37.61029, 41.58934), Residential.Informal = c(33.3279,
35.2452, 34.67857, 30.44121, 30.65073, 35.55425, 34.04282, 35.22392
), Non.Residential = c(79.75459, 80.79518, 96.62088, 95.32751,
94.49208, 91.24384, 115.131, 119.9929)), class = "data.frame", row.names = c(NA,
-8L))
here is the code, without all my try-and-error in it:
df <- read.csv("wd/ts_all_month.csv")
df_melt = melt(df, id.vars = 'Month')
#multiple ts plots
tiff("ts_lu.tiff", units="in", width=14, height=8, res=300)
ggplot(df_melt, aes(x = Month, y = value)) +
geom_line() +
facet_wrap(~ variable, scales = 'free_y', ncol = 1)
dev.off()
With your sample data you can do:
ggplot(df_melt, aes(x = Month, y = value)) +
geom_line() +
facet_wrap(~ variable, scales = 'free_y', ncol = 1)

How can I work with stat_density and a timeseries (Posixct on x axis)?

Based on this example:
#example from https://ggplot2.tidyverse.org/reference/geom_tile.html
cars <- ggplot(mtcars, aes(mpg,factor(cyl)))
cars + stat_density(aes(fill = after_stat(density)), geom = "raster", position = "identity")
I wanted to create a plot with the density plotted vertically per hour of my dataset. The original dataset is very long. I also want to display the single data points and a mean as a line.
Here is a simplified basic version of the code:
#reproducable example for density plot
library(reshape2)
library(ggplot2)
library(scales)
startdate <- as.POSIXct("2020-01-01 01:00", tz="UTC")
enddate <- as.POSIXct("2020-01-01 05:00", tz="UTC")
#dataframe
df <- data.frame(x = seq.POSIXt(startdate, enddate, "hour"),
y1 = c(1,2,3,4,5),
y2 = c(2,4,6,8,10),
y3 = c(3,6,9,12,15))
df$mean <- rowMeans(df[,-1])
df_melt <- melt(df, id.vars = 1, measure.vars = c(2,3,4))
#plot
g1 <- ggplot(data = df_melt, aes(factor(x), value)) +
stat_density(aes(fill = after_stat(ndensity)),
geom = "raster", position = "identity", orientation = "y") +
geom_point()
g1
This works, but the original dataset has so many hours that the labeling of the x axis is not nice. I also want to determine how the dateformat of the labels should look like and the limits of the plot. Before working with stat_density, I used to do that with scale_x_datetime. But for the density plot I have to use factor(x) instead of the original x, which is PosixcT. So the following scaling produces an error because x is a factor and not a date, obviously:
#scale x datetime (does not work)
g1 <- g1 + scale_x_datetime(labels = date_format("%b/%d", tz="UTC"),
limits = c(startdate, enddate),
breaks = function(x)
seq.POSIXt(from = startdate, to = enddate, by = "2 days"),
date_minor_breaks = "12 hours",
expand = c(0,0))
g1
I managed to scale_x_discrete but this makes it hard to determine the label format and limits with the bigger dataset:
#scale x discrete
g1 <- g1 + scale_x_discrete(limits = c(as.character(df$x)),
breaks = as.character(df$x)[c(2,4)])
g1
The next problem with factors is then that I cannot add the mean of every hour as geom_line as every factor consists of 1 observation only.
#plot mean
g1 + geom_point(data = df, aes(factor(x), mean), col = "red")
g1 + geom_line(data = df, aes(factor(x), mean), col = "red")
So, is there a way to produce the desired plot with density per hour, overplotted points and overplotted mean line? And I want to edit the x labels and limits as comfortably as possible. Maybe there is a way to use x instead of factor(x)...
I think the solution might be as simple as dropping the as.factor() and setting an explicit group in the density. Does the following work for your real case?
library(reshape2)
library(ggplot2)
library(scales)
#> Warning: package 'scales' was built under R version 4.0.3
startdate <- as.POSIXct("2020-01-01 01:00", tz="UTC")
enddate <- as.POSIXct("2020-01-01 05:00", tz="UTC")
#dataframe
df <- data.frame(x = seq.POSIXt(startdate, enddate, "hour"),
y1 = c(1,2,3,4,5),
y2 = c(2,4,6,8,10),
y3 = c(3,6,9,12,15))
df$mean <- rowMeans(df[,-1])
df_melt <- melt(df, id.vars = 1, measure.vars = c(2,3,4))
#plot
ggplot(data = df_melt, aes(x, value)) +
stat_density(aes(fill = after_stat(ndensity),
group = x),
geom = "raster", position = "identity", orientation = "y") +
geom_point()
Created on 2021-01-29 by the reprex package (v0.3.0)

Trying to map a value for geom_vline, but is not plotting in the correct place on the x axis with ggplot in R

I am currently trying to generate NOAA tide prediction charts (x = datetime, y = water level) with the dawn/sunrise/dusk/sunset times as vertical lines along the x axis timeline.
The rnoaa package calls the data and gives me the prediction date times in POSIXct. The suncalc library provides me a data frame with each date in the range's sunrise, sunset, etc. in POSIXct format as well.
library(rnoaa)
library(tidyverse)
library(ggplot2)
library(suncalc)
march.tides <- as.data.frame(coops_search(station_name = 8551762,
begin_date = 20200301, end_date = 20200331,
datum = "mtl", product = "predictions"))
march.tides <- march.tides %>%
mutate(yyyy.mm.dd = as.Date(predictions.t))
dates <- unique(march.tides$yyyy.mm.dd)
sunlight.times <- getSunlightTimes(date = seq.Date(as.Date("2020/3/1"), as.Date("2020/3/31"), by = 1),
lat = 39.5817, lon = -75.5883, tz = "EST")
I then have a loop that spits out separate plots for each calendar date - which works hunky dory. The vertical lines are drawing on the graph without an error, but are definitely in the wrong spot (sunrise is being drawn around 11am when it should be 06:30).
for (i in seq_along(dates)) {
plot <- ggplot(subset(march.tides, march.tides$yyyy.mm.dd==dates[i])) +
aes(x = predictions.t, y = predictions.v) +
geom_line(size = 1L, colour = "#0c4c8a") +
theme_bw() +
geom_vline(xintercept = sunlight.times$sunrise) +
geom_vline(xintercept = sunlight.times$sunset) +
geom_vline(xintercept = sunlight.times$dawn, linetype="dotted") +
geom_vline(xintercept = sunlight.times$dusk, linetype="dotted") +
ggtitle(dates[i])
print(plot)
}
I could alternatively facet the separate dates instead of this looping approach. Even when I subset the data to a single date, the vertical lines still did not draw correctly.
I wondered if maybe the issue was a time zone one. If I try to stick a time zone argument onto the tide prediction data call, I get the error:
Error in if (!repeated && grepl("%[[:xdigit:]]{2}", URL, useBytes = TRUE)) return(URL) :
missing value where TRUE/FALSE needed
It looks like you want to use EST as your timezone, so you could include in your conversion of predictions.t.
I would be explicit in what you want labeled on your xaxis in ggplot using scale_x_datetime, including the timezone.
library(rnoaa)
library(tidyverse)
library(ggplot2)
library(suncalc)
library(scales)
march.tides <- as.data.frame(coops_search(station_name = 8551762,
begin_date = 20200301, end_date = 20200331,
datum = "mtl", product = "predictions"))
march.tides <- march.tides %>%
mutate(yyyy.mm.dd = as.Date(predictions.t, tz = "EST"))
dates <- unique(march.tides$yyyy.mm.dd)
sunlight.times <- getSunlightTimes(date = seq.Date(as.Date("2020/3/1"), as.Date("2020/3/31"), by = 1),
lat = 39.5817, lon = -75.5883, tz = "EST")
for (i in seq_along(dates)) {
plot <- ggplot(subset(march.tides, march.tides$yyyy.mm.dd==dates[i])) +
aes(x = predictions.t, y = predictions.v) +
geom_line(size = 1L, colour = "#0c4c8a") +
theme_bw() +
geom_vline(xintercept = sunlight.times$sunrise) +
geom_vline(xintercept = sunlight.times$sunset) +
geom_vline(xintercept = sunlight.times$dawn, linetype="dotted") +
geom_vline(xintercept = sunlight.times$dusk, linetype="dotted") +
ggtitle(dates[i]) +
scale_x_datetime(labels = date_format("%b %d %H:%M", tz = "EST"))
print(plot)
}
Plot

Plotting daily time series in R

I'm new to R. I have daily time series data on sap flux and want to plot line graph in R and want to format x-axis for date .my data file is like this;
Date G1T0 G1T1 G1T2 G1T3
19-Jul-14 0.271081377 0.342416929 0.216215197 0.414495265
20-Jul-14 0.849117059 0.778333568 0.555856888 0.375737302
21-Jul-14 0.742855108 0.756373483 0.536025029 0.255169809
22-Jul-14 0.728504928 0.627172734 0.506561041 0.244863511
23-Jul-14 0.730702865 0.558290192 0.452253842 0.223213402
24-Jul-14 0.62732916 0.461480279 0.377567279 0.180328992
25-Jul-14 0.751401513 0.5404663 0.517567416 0.204342317
Please help me by sample R script.
You can try this:
# install.packages("ggplot2")
# install.packages("scales")
library(ggplot2)
library(scales)
data$Date <- as.Date(data$Date, format = "%d-%b-%y")
ggplot(data = data, x = Date) +
geom_line(aes(x = Date, y = G1T0, col = "G1T0")) +
geom_line(aes(x = Date, y = G1T1, col = "G1T1")) +
geom_line(aes(x = Date, y = G1T2, col = "G1T2")) +
geom_line(aes(x = Date, y = G1T3, col = "G1T3")) +
scale_colour_discrete(name = "Group") +
labs(y = "Measurement", x = "Date")
What this does is loads a couple of packages to do the plot (obviously, if you don't have those packages, install them), then it formats your date so R knows they're dates (the format argument I used matched your particular string pattern), then it calls the ggplot function to map your data.
Does this work for you?

Resources