Analyse day-time of a date-time variable - r

This is how I solved the problem:
datestimes <- c("2014-01-01 23:03:00", "2014-01-02 00:35:00", "2014-01-02 00:51:00") # There is a change in date.
# Is there any lubridate command for the following step?
time <- as.POSIXct(strftime(datetimes, format = "%H:%M:%S", tz = "UTC"), format = "%H:%M:%S")
time
[1] "2016-05-13 23:03:00 CEST" "2016-05-13 00:35:00 CEST" "2016-05-13 00:51:00 CEST"
Is there a reason that there is no such functionality in lubridate?
As I said - I am only interested in the time part - not the date.
lubridate::hms gives periods:
ltime <- lubridate::hms(strftime(datetimes, format = "%H:%M:%S", tz = "UTC"))
ltime
[1] "23H 3M 0S" "35M 0S" "51M 0S"
class(ltime)
[1] "Period"
attr(,"package")
[1] "lubridate"

What about this?
library(lubridate)
datetimes <- c("2014-01-01 23:03:00", "2014-01-02 00:35:00", "2014-01-02 00:51:00")
dataset <- data.frame(
time = as.POSIXct(strftime(datetimes, format = "%H:%M:%S", tz = "UTC"), format = "%H:%M:%S")
)
dataset$delta <- dataset$time - floor_date(dataset$time, unit = "day")
dataset$relative <- as.POSIXct("2001-01-01 0:0:0") + minutes(dataset$delta)
library(ggplot2)
ggplot(dataset, aes(x = relative)) +
geom_histogram(binwidth = 3600) +
scale_x_datetime(date_breaks = "3 hour", date_labels = "%H:%M")

Thanks to alistair: ymd_hms(paste(today(), format(ymd_hms(datetimes), '%T'))) will be my choice. This can be nicely combined with hist or ggplot2 (See above).

Related

How to convert dataframe with datetimes to daily time series in + mean aggregation R

I have a dataframe that looks like this:
Arrival_DateTime = c("2009-01-01 08:35:00", "2009-01-01 10:00:00", "2009-01-01 10:25:00",
"2009-01-02 07:45:00", "2009-01-02 15:32:00", "2009-01-02 11:15:00",
"2009-01-02 12:35:00")
Cust_ID = c("1214", "2643", "31231", "41244", "1214", "15317", "51591")
Wait_Time_Mins = c("54","43","88","94","12","130", "170")
df_have = data.frame(Arrival_DateTime, Cust_ID, Wait_Time_Mins)
and want to convert it so I get the number of customer visits per day, and also their average wait time per day, so it looks something like this:
dates = c("2009-01-01", "2009-01-02")
num_visits = c("3", "4")
avg_wait_time = c("61.7","101.5")
df_want = data.frame(dates, num_visits, avg_wait_time)
How would I go about doing this?
Similarly, is there a way to do monthly aggregations as well?
You can use -
library(dplyr)
df_have %>%
mutate(Arrival_DateTime = lubridate::ymd_hms(Arrival_DateTime),
Date = as.Date(Arrival_DateTime),
#For monthly aggregation -
#Date = format(Arrival_DateTime, '%Y-%m'),
Wait_Time_Mins = as.numeric(Wait_Time_Mins)) %>%
group_by(Date) %>%
summarise(num_visits = n_distinct(Cust_ID),
avg_wait_time = mean(Wait_Time_Mins))
# Date num_visits avg_wait_time
#1 2009-01-01 3 61.66667
#2 2009-01-02 4 101.50000
Using aggregate().
aggregate(as.double(Wait_Time_Mins) ~ as.Date(Arrival_DateTime), df_have,
\(x) c(length(x), mean(x))) |>
do.call(what=data.frame) |>
setNames(c('date', 'num_visits', 'avg_wait_time'))
# date num_visits avg_wait_time
# 1 2009-01-01 3 61.66667
# 2 2009-01-02 4 101.50000
Note: R >= 4.1 used.
Data:
df_have <- structure(list(Arrival_DateTime = c("2009-01-01 08:35:00", "2009-01-01 10:00:00",
"2009-01-01 10:25:00", "2009-01-02 07:45:00", "2009-01-02 15:32:00",
"2009-01-02 11:15:00", "2009-01-02 12:35:00"), Cust_ID = c("1214",
"2643", "31231", "41244", "1214", "15317", "51591"), Wait_Time_Mins = c("54",
"43", "88", "94", "12", "130", "170")), class = "data.frame", row.names = c(NA,
-7L))

Why is my x-axis getting cut off before the end of my data, when I tried adding xlim() I got an error?

I am trying to make a scatterplot with salinity on the y-axis and Date/Time on x-axis. However, when I try to plot it my x-axis data is getting cut off before the end of my data. I've tried adding an xlim but I get an error. See below
p_OY1 <- ggplot(data = OY1, aes(x=date_time2, y=Salinity)) + geom_point() + ylim=c(0,12) + xlim=c("0019-10-04 00:00:00", "0020-06-10 00:00:00")+ theme_classic()+
scale_x_discrete("Date", breaks=c("0019-10-04 10:30:00", "0019-11-01 00:00:00", "0019-12-01 00:00:00", "0020-01-01 00:00:00", "0020-02-01 00:00:00",
"0020-03-01 00:00:00","0020-04-01 00:00:00", "0020-05-01 00:00:00", "0020-06-01 00:00:00"),
labels=c("10/4/2019", "11/1/2019", "12/1/2019", "1/1/2020", "2/1/2020", "3/1/2020", "4/1/2020", "5/1/2020", "6/1/2020"))+ylab("Salinity")+ggtitle("OY1")
Error:
Error in c("0019-10-04 00:00:00", "0020-06-10 00:00:00") + scale_x_discrete("Date", :
non-numeric argument to binary operator
OY1 Data
> dput(head(OY1))
structure(list(Site = c("OY1", "OY1", "OY1", "OY1", "OY1", "OY1"
), Date = c("10/4/19", "10/4/19", "10/4/19", "10/4/19", "10/4/19",
"10/4/19"), Time = structure(c(37800, 39600, 41400, 43200, 45000,
46800), class = c("hms", "difftime"), units = "secs"), Salinity = c(0.891307674,
0.960962348, 1.015788098, 1.096679068, 1.191564305, 1.272517514
), Date_time = c("10/4/19 10:30:00", "10/4/19 11:00:00", "10/4/19 11:30:00",
"10/4/19 12:00:00", "10/4/19 12:30:00", "10/4/19 13:00:00")), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame"), problems = structure(list(
row = 4550L, col = "Salinity", expected = "a double", actual = "#VALUE!",
file = "'~/Documents/TAMUG_Thesis/Rollover_Pass_Data/Logger/RP_LoggerData_OY1.csv'"), row.names = c(NA,
-1L), class = c("tbl_df", "tbl", "data.frame")))
Can anyone help? I've been messing with this for far too long. Thanks!
Your example data is helpful because it shows that the Date_time column has class character. That is why scale_x_datetime isn't working as expected.
You can convert it to datetime format in several ways. One is to use the lubridate package. Your dates look as though they are day/month/year, so you can convert date + time using the function dmy_hms.
See if this gets you started. You'll need to experiment with date_breaks to get what you want.
library(lubridate)
library(dplyr)
library(ggplot2)
OY1 %>%
mutate(dt = lubridate::dmy_hms(Date_time)) %>%
ggplot(aes(dt, Salinity)) +
geom_point() +
scale_x_datetime(date_labels = "%d/%m/%Y")
Result using your example data:

How to add a label to a dygraph mouseover in R?

I have been trying to plot a time series using dygraph in R. It seems that we can only pass in a dataframe with the dates and their values. All other columns will be automatically ignored. Here I have reproduced my data and the resulting graph:
library(dygraphs)
library(tidyverse)
library(plotly)
library(data.table)
dates <- seq(as.POSIXct("2021-01-01 05:00:00"), as.POSIXct("2021-01-05 05:00:00"), by = 8*3600)
df <- data.table(date = dates,
percentage = round(runif(length(dates), min = 0, max = 1), digits = 2),
team = sample(c("A", "B", "C", "D", "E"), size = length(dates), replace = T)
)
dygraph(df) %>%
dyOptions(drawPoints = T, pointSize = 3) %>%
dyAxis(name = "y", label = "percentage") %>%
dyLegend(show = "follow")
And here how the graph looks like:
As we can see the team corresponding to each date is not shown in the legend. I would like to see the teams on mouseover. To be more clear, I could manage to do this using ggplot and ggplotly, however, the plotly package is relatively heavy and I would still like to use dygraph for my Shiny application. Here how it would look like using ggplotly:
p <- ggplot(df, aes(x = date, y = percentage)) + geom_line() + geom_point(aes(group = team))
ggplotly(p)
Is there any way I could add labels to a dygraph and achieve the same thing? I would appreciate any help
You could create a custom valueFormater:
valueFormatter <- function(df) {
paste0('function(x){return ',
paste0((paste0('x==',as.numeric(head(df$date,-1))*1000,' ? "',head(df$date,-1),' - Team ',head(df$team,-1),'"',collapse = ":")),
': "',tail(df$date,1),' Team ',tail(df$team,1)),'";}')
}
Dates are tricky as they are counted in milliseconds in JavaScript, hence the as.numeric(head(df$date,-1))*1000.
The formatter generates a JavaScript function using ifelse shorthand ternary operator:
cat(valueFormatter(df))
function(x){return x==1609473600000 ? "2021-01-01 05:00:00 - Team A":x==1609502400000 ? "2021-01-01 13:00:00 - Team C":x==1609531200000 ? "2021-01-01 21:00:00 - Team E":x==1.60956e+12 ? "2021-01-02 05:00:00 - Team E":x==1609588800000 ? "2021-01-02 13:00:00 - Team A":x==1609617600000 ? "2021-01-02 21:00:00 - Team C":x==1609646400000 ? "2021-01-03 05:00:00 - Team D":x==1609675200000 ? "2021-01-03 13:00:00 - Team C":x==1.609704e+12 ? "2021-01-03 21:00:00 - Team B":x==1609732800000 ? "2021-01-04 05:00:00 - Team A":x==1609761600000 ? "2021-01-04 13:00:00 - Team B":x==1609790400000 ? "2021-01-04 21:00:00 - Team C": "2021-01-05 05:00:00 Team C";}
This function can be used by dyGraphs via the valueFormatter argument :
dygraph(df[,.(date,percentage)]) %>% dyOptions(drawPoints = T, pointSize = 3) %>%
dyAxis('x',valueFormatter = valueFormatter(df))

Weekly data plot in R

I have a big data frame (from 2007 to 2015), with data points at about every 2 minutes. I want to plot the graph of every week (from 2007 to 2015), with each week being automatically exported as a PNG file to my computer's folder. Previously, I was able to successfully produce working codes for annually, monthly, and daily plot. E.g.for yearly data:
for(j in 2007:2015){
mypath <- file.path("~", "Documents","Yearly", paste("WAO_AIR_Data_", j, ".png", sep = "" ))
png(filename = mypath, width = 963, height = 690)
timePlot(selectByDate(new_subdata, year = j),
pollutant = c("CO2", "O2", "APO"),
date.pad = TRUE,
pch = c(19,19,19),
cex = 0.2,
xlab = paste("Month of year in", j),
ylab = "CO2, O2, and APO concentrations",
name.pol = c("CO2 (ppm)", "O2 (per meg)", "APO (per meg)"),
)
dev.off()
}
The data frame looks like this
tail(new_subdata)
date CO2 O2 APO
1052042 2015-12-31 23:48:45 409.636 -666.39 -353.27
1052043 2015-12-31 23:50:46 409.652 -669.62 -356.41
1052044 2015-12-31 23:52:44 409.679 -669.44 -356.09
1052045 2015-12-31 23:54:46 409.703 -667.07 -353.59
1052046 2015-12-31 23:56:44 409.719 -671.02 -357.46
1052047 2015-12-31 23:58:46 409.734 NA NA
But I dont know how to produce the code for weekly plotting. Can anyone help me please? Thank you so much!
Via ?strptime, you can get the week out of a Date or POSIXct with %U
%U
Week of the year as decimal number (00–53) using Sunday as the first day 1 of the week (and typically with the first Sunday of the year as day 1 of week 1). The US convention.
x <- Sys.time()
class(x); format(x, '%U')
# [1] "POSIXct" "POSIXt"
# [1] "26"
x <- Sys.Date()
class(x); format(x, '%U')
# [1] "Date"
# [1] "26"
Using your example data with minor changes:
new_subdata <- read.table(header = TRUE, text = "date CO2 O2 APO
1052042 '2015-10-31 23:48:45' 409.636 -666.39 -353.27
1052043 '2015-10-31 23:50:46' 409.652 -669.62 -356.41
1052044 '2015-11-30 23:52:44' 409.679 -669.44 -356.09
1052045 '2015-11-30 23:54:46' 409.703 -667.07 -353.59
1052046 '2015-12-31 23:56:44' 409.719 -671.02 -357.46
1052047 '2015-12-31 23:58:46' 409.734 NA NA")
## create a new grouping variable with year/week
new_subdata <- within(new_subdata, {
yr_wk <- format(as.Date(date), '%Y %U')
})
## iterate over the unique values
jj <- unique(new_subdata$yr_wk)
# [1] "2015 43" "2015 48" "2015 52"
## do some plotting
par(mfrow = n2mfrow(length(jj)), las = 1, mar = c(5,6,2,2),
tcl = .2, mgp = c(3,.25,0))
xr <- range(new_subdata$O2, na.rm = TRUE)
yr <- range(new_subdata$CO2, na.rm = TRUE)
for (j in jj) {
mypath <- file.path("~", "Documents","Yearly", sprintf("WAO_AIR_Data_%s.png", j))
# png(filename = mypath, width = 963, height = 690)
plot(CO2 ~ O2, data = subset(new_subdata, yr_wk == j), xlim = xr, ylim = yr)
# dev.off()
}

How to return condition codes in Rblpapi

I am trying to return condition codes in the getTicks function for Rblpapi. The following code works fine:
require(Rblpapi)
blpConnect()
symbol <- "SBH6 Comdty"
times <- as.POSIXct(c("2016-01-07 09:30:00", "2016-01-08 10:00:00"), format="%Y-%m-%d %H:%M:%S")
ticks <- getTicks(symbol, eventType = "TRADE", startTime = times[1], endTime = times[2],verbose = FALSE)
This does not:
require(Rblpapi)
blpConnect()
symbol <- "SBH6 Comdty"
times <- as.POSIXct(c("2016-01-07 09:30:00", "2016-01-08 10:00:00"), format="%Y-%m-%d %H:%M:%S")
ticks <- getTicks(symbol, eventType = "TRADE", startTime = times[1], endTime = times[2],verbose = FALSE, setCondCodes = TRUE)
Error in getTicks(symbol, eventType = "TRADE", startTime = times[1], endTime = times[2], :
unused argument (setCondCodes = TRUE)
I have the latest package installed, 0.3.2. R version is 3.2.3. Is anyone able to get condition codes returned from Bloomberg via Rblpapi? I have also tried Rbbg but I don't think it is supported.
Thanks!
You can try like this
res4 <- getTicks("ADANI IS Equity", "TRADE", Sys.time()-60*60*24*31, Sys.time()-60*60*24*28)
or for multiple ticks
res5 <- getMultipleTicks("ADANI IS Equity", c("TRADE", "BID", "ASK"), Sys.time()-60*60*24*31, Sys.time()-60*60*24*25)

Resources