How to aggregate data in 10 minute-steps [duplicate] - r

This question already has answers here:
How to aggregate every 30 minutes in R
(2 answers)
Grouping every n minutes with dplyr
(4 answers)
Create a time interval of 15 minutes from minutely data in R?
(3 answers)
Closed 3 years ago.
I have a dataframe with varying time steps, which I want to convert into even time steps. Every 10 minutes a value should be written and if there is no new value, the previous one should be taken (see 2019-01-01 01:00:00 and 2019-01-01 02:30:00).
date ZUL_T
1 2019-01-01 00:04:00 23.3
2 2019-01-01 00:15:00 23.3
3 2019-01-01 00:26:00 19.9
4 2019-01-01 00:37:00 20.7
5 2019-01-01 00:48:00 21.9
6 2019-01-01 00:59:00 21.9
7 2019-01-01 01:10:00 18.8
8 2019-01-01 01:22:00 18.8
9 2019-01-01 01:33:00 20.7
10 2019-01-01 01:44:00 21.6
11 2019-01-01 01:55:00 19.2
12 2019-01-01 02:06:00 19.2
13 2019-01-01 02:17:00 19.6
14 2019-01-01 02:29:00 19.6
15 2019-01-01 02:40:00 20.5
This is my current code, but there are some time steps missing if there is no value in the DS.
library(lubridate)
lowtime <- min(DS$date)
hightime <- max(DS$date)
# Set the minute and second to the nearest 10 minute value
minute(lowtime) <- floor(minute(lowtime)/10) * 10
minute(hightime) <- ceiling(minute(hightime)/10) * 10
second(lowtime) <- 0
second(hightime) <- 0
# Set the breakpoints at 10 minute intervals
breakpoints <- seq.POSIXt(lowtime, hightime, by = 600)
ZUL_T <- aggregate(ZUL_T ~ cut(date, breaks = breakpoints), DS, mean)
> data
date ZUL_T
1 2019-01-01 00:00:00 23.3
2 2019-01-01 00:10:00 23.3
3 2019-01-01 00:20:00 19.9
4 2019-01-01 00:30:00 20.7
5 2019-01-01 00:40:00 21.9
6 2019-01-01 00:50:00 21.9
7 2019-01-01 01:10:00 18.8
8 2019-01-01 01:20:00 18.8
9 2019-01-01 01:30:00 20.7
10 2019-01-01 01:40:00 21.6
11 2019-01-01 01:50:00 19.2
12 2019-01-01 02:00:00 19.2
13 2019-01-01 02:10:00 19.6
14 2019-01-01 02:20:00 19.6
15 2019-01-01 02:40:00 20.5

We can use floor_date from lubridate package to cut time every 10 mins and take a lower bound, group by it and sum ZUL_T values.
library(dplyr)
library(lubridate)
library(tidyr)
df %>%
group_by(date = floor_date(ymd_hms(date), "10 mins")) %>%
summarise(ZUL_T = sum(ZUL_T))
# date ZUL_T
# <dttm> <dbl>
# 1 2019-01-01 00:00:00 23.3
# 2 2019-01-01 00:10:00 23.3
# 3 2019-01-01 00:20:00 19.9
# 4 2019-01-01 00:30:00 20.7
# 5 2019-01-01 00:40:00 21.9
# 6 2019-01-01 00:50:00 21.9
# 7 2019-01-01 01:10:00 18.8
# 8 2019-01-01 01:20:00 18.8
# 9 2019-01-01 01:30:00 20.7
#10 2019-01-01 01:40:00 21.6
#11 2019-01-01 01:50:00 19.2
#12 2019-01-01 02:00:00 19.2
#13 2019-01-01 02:10:00 19.6
#14 2019-01-01 02:20:00 19.6
#15 2019-01-01 02:40:00 20.5
and then use complete and fill to complete the missing combinations and fill the NA values with previous values.
df %>%
group_by(date = floor_date(ymd_hms(date), "10 mins")) %>%
summarise(ZUL_T = sum(ZUL_T))
complete(date = seq(min(date), max(date), "10 mins")) %>%
fill(ZUL_T)
# date ZUL_T
# <dttm> <dbl>
# 1 2019-01-01 00:00:00 23.3
# 2 2019-01-01 00:10:00 23.3
# 3 2019-01-01 00:20:00 19.9
# 4 2019-01-01 00:30:00 20.7
# 5 2019-01-01 00:40:00 21.9
# 6 2019-01-01 00:50:00 21.9
# 7 2019-01-01 01:00:00 21.9
# 8 2019-01-01 01:10:00 18.8
# 9 2019-01-01 01:20:00 18.8
#10 2019-01-01 01:30:00 20.7
#11 2019-01-01 01:40:00 21.6
#12 2019-01-01 01:50:00 19.2
#13 2019-01-01 02:00:00 19.2
#14 2019-01-01 02:10:00 19.6
#15 2019-01-01 02:20:00 19.6
#16 2019-01-01 02:30:00 19.6
#17 2019-01-01 02:40:00 20.5
data
df <- structure(list(date = structure(1:15, .Label = c("2019-01-01 00:04:00",
"2019-01-01 00:15:00", "2019-01-01 00:26:00", "2019-01-01 00:37:00",
"2019-01-01 00:48:00", "2019-01-01 00:59:00", "2019-01-01 01:10:00",
"2019-01-01 01:22:00", "2019-01-01 01:33:00", "2019-01-01 01:44:00",
"2019-01-01 01:55:00", "2019-01-01 02:06:00", "2019-01-01 02:17:00",
"2019-01-01 02:29:00", "2019-01-01 02:40:00"), class = "factor"),
ZUL_T = c(23.3, 23.3, 19.9, 20.7, 21.9, 21.9, 18.8, 18.8,
20.7, 21.6, 19.2, 19.2, 19.6, 19.6, 20.5)),
class = "data.frame", row.names = c(NA,-15L))

You could merge with the breakpoints as data frame.
# first, you probably need 10 min later in time
minute(hightime) <- ceiling((minute(max(DS$date)) + 10)/10) * 10
breakpoints <- seq.POSIXt(lowtime, hightime, by=600)
Use aggregate in classic list notation to get proper names.
ZUL_T <- aggregate(list(ZUL_T=DS$ZUL_T), list(date=cut(DS$date, breaks=breakpoints)), mean)
Now merge,
ZUL_T <- merge(transform(ZUL_T, date=as.character(date)),
data.frame(date=as.character(breakpoints[-length(breakpoints)]),
stringsAsFactors=F),
all=TRUE)
and replace NA values wit values - 1.
ZUL_T$ZUL_T[is.na(ZUL_T$ZUL_T)] <- ZUL_T$ZUL_T[which(is.na(ZUL_T$ZUL_T)) - 1]
ZUL_T
# date ZUL_T
# 1 2019-01-01 00:00:00 23.3
# 2 2019-01-01 00:10:00 23.3
# 3 2019-01-01 00:20:00 19.9
# 4 2019-01-01 00:30:00 20.7
# 5 2019-01-01 00:40:00 21.9
# 6 2019-01-01 00:50:00 21.9
# 7 2019-01-01 01:00:00 21.9
# 8 2019-01-01 01:10:00 18.8
# 9 2019-01-01 01:20:00 18.8
# 10 2019-01-01 01:30:00 20.7
# 11 2019-01-01 01:40:00 21.6
# 12 2019-01-01 01:50:00 19.2
# 13 2019-01-01 02:00:00 19.2
# 14 2019-01-01 02:10:00 19.6
# 15 2019-01-01 02:20:00 19.6
# 16 2019-01-01 02:30:00 19.6
# 17 2019-01-01 02:40:00 20.5

Related

How to create a tibble from list of tibbles with different number of rows?

I have a list of tibbles that look like this:
> head(temp)
$AT
# A tibble: 8,784 × 2
price_eur datetime
<dbl> <dttm>
1 50.9 2021-01-01 00:00:00
2 48.2 2021-01-01 01:00:00
3 44.7 2021-01-01 02:00:00
4 42.9 2021-01-01 03:00:00
5 40.4 2021-01-01 04:00:00
6 40.2 2021-01-01 05:00:00
7 39.6 2021-01-01 06:00:00
8 40.1 2021-01-01 07:00:00
9 41.3 2021-01-01 08:00:00
10 44.9 2021-01-01 09:00:00
# … with 8,774 more rows
$IE
# A tibble: 7,198 × 2
price_eur datetime
<dbl> <dttm>
1 54.0 2021-01-01 01:00:00
2 53 2021-01-01 02:00:00
3 51.2 2021-01-01 03:00:00
4 48.1 2021-01-01 04:00:00
5 47.3 2021-01-01 05:00:00
6 47.6 2021-01-01 06:00:00
7 45.4 2021-01-01 07:00:00
8 43.4 2021-01-01 08:00:00
9 47.8 2021-01-01 09:00:00
10 51.8 2021-01-01 10:00:00
# … with 7,188 more rows
$`IT-Calabria`
# A tibble: 8,736 × 2
price_eur datetime
<dbl> <dttm>
1 50.9 2021-01-01 00:00:00
2 48.2 2021-01-01 01:00:00
3 44.7 2021-01-01 02:00:00
4 42.9 2021-01-01 03:00:00
5 40.4 2021-01-01 04:00:00
6 40.2 2021-01-01 05:00:00
7 39.6 2021-01-01 06:00:00
8 40.1 2021-01-01 07:00:00
9 41.3 2021-01-01 08:00:00
10 41.7 2021-01-01 09:00:00
# … with 8,726 more rows
The number of rows is different because there are missing observations, usually one or several days.
Ideally I need a tibble with a single date time index and corresponding columns with NAs when there is missing data and I'm stuck here.
We can do a full join by 'datetime'
library(dplyr)
library(purrr)
reduce(temp, full_join, by = "datetime")
If we need to rename the column 'price_eur' before the join, loop over the list with imap, rename the 'price_eur' to the corresponding list name (.y) and do the join within reduce
imap(temp, ~ .x %>%
rename(!! .y := price_eur)) %>%
reduce(full_join, by = 'datetime')

Remove time periods from daily data

I have a dataset of hourly observations with the format %Y%m%d %H:%M that results like this 2020-03-01 01:00:00 for various days. How can filter filter out a certain time interval? My goal is to maintain the observations between 08:00 and 20:00.
You can extract the hour value from the column and keep the rows between 8 and 20 hours.
df$hour <- as.integer(format(df$datetime, '%H'))
result <- subset(df, hour >= 8 & hour <= 20)
result
# datetime hour
#9 2020-01-01 08:00:00 8
#10 2020-01-01 09:00:00 9
#11 2020-01-01 10:00:00 10
#12 2020-01-01 11:00:00 11
#13 2020-01-01 12:00:00 12
#14 2020-01-01 13:00:00 13
#15 2020-01-01 14:00:00 14
#16 2020-01-01 15:00:00 15
#17 2020-01-01 16:00:00 16
#18 2020-01-01 17:00:00 17
#19 2020-01-01 18:00:00 18
#20 2020-01-01 19:00:00 19
#21 2020-01-01 20:00:00 20
#33 2020-01-02 08:00:00 8
#34 2020-01-02 09:00:00 9
#35 2020-01-02 10:00:00 10
#...
#...
data
df <- data.frame(datetime = seq(as.POSIXct('2020-01-01 00:00:00', tz = 'UTC'),
as.POSIXct('2020-01-10 00:00:00', tz = 'UTC'), 'hour'))
between(hour( your_date_value ), 8, 19)

Mutate a column in a tsibble dataframe, applying a Box-Cox transformation

I am a big fan of Hyndman's packages, but stumbled with Box-Cox transformation.
I have a dataframe
class(chicago_sales)
[1] "tbl_ts" "tbl_df" "tbl" "data.frame"
I am trying to mutate an extra column, where the Mean_price variable will be transformed.
foo <- chicago_sales %>%
mutate(bc = BoxCox(x = chicago_sales$Median_price, lambda =
BoxCox.lambda(chicago_sales$Median_price)))
gives me some result (probably wrong too) and cannot apply autoplot.
I also tried to apply the code from Hyndman's book, but failed.
What am I doing wrong? Thanks!
UPDATED:
Issue, inside tsibbles, when using dplyr, you do not call chicago_sales$Median_price, but just Median_price. When using tsibbles I would advice using fable and fabletools, but if you are using forecast, it should work like this:
library(tsibble)
library(dplyr)
library(forecast)
pedestrian %>%
mutate(bc = BoxCox(Count, BoxCox.lambda(Count)))
# A tsibble: 66,037 x 6 [1h] <Australia/Melbourne>
# Key: Sensor [4]
Sensor Date_Time Date Time Count bc
<chr> <dttm> <date> <int> <int> <dbl>
1 Birrarung Marr 2015-01-01 00:00:00 2015-01-01 0 1630 11.3
2 Birrarung Marr 2015-01-01 01:00:00 2015-01-01 1 826 9.87
3 Birrarung Marr 2015-01-01 02:00:00 2015-01-01 2 567 9.10
4 Birrarung Marr 2015-01-01 03:00:00 2015-01-01 3 264 7.65
5 Birrarung Marr 2015-01-01 04:00:00 2015-01-01 4 139 6.52
6 Birrarung Marr 2015-01-01 05:00:00 2015-01-01 5 77 5.54
7 Birrarung Marr 2015-01-01 06:00:00 2015-01-01 6 44 4.67
8 Birrarung Marr 2015-01-01 07:00:00 2015-01-01 7 56 5.04
9 Birrarung Marr 2015-01-01 08:00:00 2015-01-01 8 113 6.17
10 Birrarung Marr 2015-01-01 09:00:00 2015-01-01 9 166 6.82
# ... with 66,027 more rows
I used a built in dataset from the tsibble package as you did not provide a dput of chicago_sales.

Mean over a certain time-period in R

I have hourly data of CO2 values and I would like to know what is the CO2 concentration during the night (e.g. 9pm-7am). A reproducible example:
library(tidyverse); library(lubridate)
times <- seq(ymd_hms("2020-01-01 08:00:00"),
ymd_hms("2020-01-04 08:00:00"), by = "1 hours")
values <- runif(length(times), 1, 15)
df <- tibble(times, values)
How to get mean nightime values (e.g. between 9pm and 7am)? Of course I can filter like this:
df <- df %>%
filter(!hour(times) %in% c(8:20))
And then give id to each observation during the night
df$ID <- rep(LETTERS[1:round(nrow(df)/11)],
times = 1, each = 11)
And finally group and summarise
df_grouped <- df %>%
group_by(., ID) %>%
summarise(value_mean =mean(values))
But this is not a good way I am sure. How to do this better? Especially the part where we give ID to the nighttime values
You can use data.table::frollmean to get the means for a certain window time. In your case you want the means for the last 10 hours, so we set the n argument of the function to 10:
> df$means <- data.table::frollmean(df$values, 10)
> df
> head(df, 20)
# A tibble: 20 x 3
times values means
<dttm> <dbl> <dbl>
1 2020-01-01 08:00:00 4.15 NA
2 2020-01-01 09:00:00 6.24 NA
3 2020-01-01 10:00:00 5.17 NA
4 2020-01-01 11:00:00 9.20 NA
5 2020-01-01 12:00:00 12.3 NA
6 2020-01-01 13:00:00 2.93 NA
7 2020-01-01 14:00:00 9.12 NA
8 2020-01-01 15:00:00 9.72 NA
9 2020-01-01 16:00:00 12.0 NA
10 2020-01-01 17:00:00 13.4 8.41
11 2020-01-01 18:00:00 10.2 9.01
12 2020-01-01 19:00:00 1.97 8.59
13 2020-01-01 20:00:00 11.9 9.26
14 2020-01-01 21:00:00 8.84 9.23
15 2020-01-01 22:00:00 10.1 9.01
16 2020-01-01 23:00:00 3.76 9.09
17 2020-01-02 00:00:00 9.98 9.18
18 2020-01-02 01:00:00 5.56 8.76
19 2020-01-02 02:00:00 5.22 8.09
20 2020-01-02 03:00:00 6.36 7.39
Each row in the mean column will be the mean of that same row value column with the 9 last rows of the value column. Of course there will be some NAs.
Maybe you should give some look to the tsibble package, built to manipulate time series.
You can parametrize the difference between the times you want, but they need to be evenly spaced in your data to use this solution:
n <- diff(which(grepl('20:00:00|08:00:00', df$times))) + 1
n <- unique(n)
df$means <- data.table::frollmean(df$values, n)
> head(df, 20)
# A tibble: 20 x 3
times values means
<dttm> <dbl> <dbl>
1 2020-01-01 08:00:00 11.4 NA
2 2020-01-01 09:00:00 7.03 NA
3 2020-01-01 10:00:00 7.15 NA
4 2020-01-01 11:00:00 6.91 NA
5 2020-01-01 12:00:00 8.18 NA
6 2020-01-01 13:00:00 4.70 NA
7 2020-01-01 14:00:00 13.8 NA
8 2020-01-01 15:00:00 5.16 NA
9 2020-01-01 16:00:00 12.3 NA
10 2020-01-01 17:00:00 3.81 NA
11 2020-01-01 18:00:00 3.09 NA
12 2020-01-01 19:00:00 9.89 NA
13 2020-01-01 20:00:00 1.24 7.28
14 2020-01-01 21:00:00 8.07 7.02
15 2020-01-01 22:00:00 5.59 6.91
16 2020-01-01 23:00:00 5.77 6.81
17 2020-01-02 00:00:00 10.7 7.10
18 2020-01-02 01:00:00 3.44 6.73
19 2020-01-02 02:00:00 10.3 7.16
20 2020-01-02 03:00:00 4.61 6.45

Cut a POSIXct by specific time for daily means

I am interested in calculating averages over specific time periods in a time series data set.
Given a time series like this:
dtm=as.POSIXct("2007-03-27 05:00", tz="GMT")+3600*(1:240)
Count<-c(1:240)
DF<-data.frame(dtm,Count)
In the past I have been able to calculate daily averages with
DF$Day<-cut(DF$dtm,breaks="day")
Day_Avg<-aggregate(DF$Count~Day,DF,mean)
But now I am trying to cut up the day into specific time periods and I'm not sure how to set my "breaks".
As opposed to a daily average from 0:00:24:00, How for example could I get a Noon to Noon average?
Or more fancy, how could I set up a Noon to Noon average excluding the night times of 7PM to 6AM (or conversely only including the daylight hours of 6AM- 7PM).
xts is perfect package for timeseries analysis
library(xts)
originalTZ <- Sys.getenv("TZ")
Sys.setenv(TZ = "GMT")
data.xts <- as.xts(1:240, as.POSIXct("2007-03-27 05:00", tz = "GMT") + 3600 * (1:240))
head(data.xts)
## [,1]
## 2007-03-27 06:00:00 1
## 2007-03-27 07:00:00 2
## 2007-03-27 08:00:00 3
## 2007-03-27 09:00:00 4
## 2007-03-27 10:00:00 5
## 2007-03-27 11:00:00 6
# You can filter data using ISO-style subsetting
data.xts.filterd <- data.xts["T06:00/T19:00"]
# You can use builtin functions to apply any function FUN on daily data.
apply.daily(data.xts.filtered, mean)
## [,1]
## 2007-03-27 18:00:00 7.5
## 2007-03-28 18:00:00 31.5
## 2007-03-29 18:00:00 55.5
## 2007-03-30 18:00:00 79.5
## 2007-03-31 18:00:00 103.5
## 2007-04-01 18:00:00 127.5
## 2007-04-02 18:00:00 151.5
## 2007-04-03 18:00:00 175.5
## 2007-04-04 18:00:00 199.5
## 2007-04-05 18:00:00 223.5
# OR
# now let's say you want to find noon to noon average.
period.apply(data.xts, c(0, which(.indexhour(data.xts) == 11)), FUN = mean)
## [,1]
## 2007-03-27 11:00:00 3.5
## 2007-03-28 11:00:00 18.5
## 2007-03-29 11:00:00 42.5
## 2007-03-30 11:00:00 66.5
## 2007-03-31 11:00:00 90.5
## 2007-04-01 11:00:00 114.5
## 2007-04-02 11:00:00 138.5
## 2007-04-03 11:00:00 162.5
## 2007-04-04 11:00:00 186.5
## 2007-04-05 11:00:00 210.5
# now if you want to exclude time from 7 PM to 6 AM
data.xts.filtered <- data.xts[!data.xts %in% data.xts["T20:00/T05:00"]]
head(data.xts.filtered, 20)
## [,1]
## 2007-03-27 06:00:00 1
## 2007-03-27 07:00:00 2
## 2007-03-27 08:00:00 3
## 2007-03-27 09:00:00 4
## 2007-03-27 10:00:00 5
## 2007-03-27 11:00:00 6
## 2007-03-27 12:00:00 7
## 2007-03-27 13:00:00 8
## 2007-03-27 14:00:00 9
## 2007-03-27 15:00:00 10
## 2007-03-27 16:00:00 11
## 2007-03-27 17:00:00 12
## 2007-03-27 18:00:00 13
## 2007-03-27 19:00:00 14
## 2007-03-28 06:00:00 25
## 2007-03-28 07:00:00 26
## 2007-03-28 08:00:00 27
## 2007-03-28 09:00:00 28
## 2007-03-28 10:00:00 29
## 2007-03-28 11:00:00 30
period.apply(data.xts.filtered, c(0, which(.indexhour(data.xts.filtered) == 11)), FUN = mean)
## [,1]
## 2007-03-27 11:00:00 3.50000
## 2007-03-28 11:00:00 17.78571
## 2007-03-29 11:00:00 41.78571
## 2007-03-30 11:00:00 65.78571
## 2007-03-31 11:00:00 89.78571
## 2007-04-01 11:00:00 113.78571
## 2007-04-02 11:00:00 137.78571
## 2007-04-03 11:00:00 161.78571
## 2007-04-04 11:00:00 185.78571
## 2007-04-05 11:00:00 209.78571
Sys.setenv(TZ = originalTZ)
Let me quickly repeat your code.
dtm <- as.POSIXct("2007-03-27 05:00", tz="GMT")+3600*(1:240)
Count <- c(1:240)
DF<-data.frame(dtm,Count)
DF$Day<-cut(DF$dtm,breaks="day")
Day_Avg<-aggregate(DF$Count~Day,DF,mean)
If you offset each time by 12 hours in the function call, you can still use cut with breaks on "day". I will save the day that the noon to noon starts on, so I will subtract 12 hours.
# Get twelve hours in seconds
timeOffset <- 60*60*12
# Subtract the offset to get the start day of the noon to noon
DF$Noon_Start_Day <- cut((DF$dtm - timeOffset), breaks="day")
# Get the mean
NtN_Avg <- aggregate(DF$Count ~ Noon_Start_Day, DF, mean)
One way to exclude certain hours is to convert the dates to POSIXlt. Then you can access hour among other things.
# Indicate which times are good (use whatever boolean test is needed here)
goodTimes <- !(as.POSIXlt(DF$dtm)$hour >= 19) & !(as.POSIXlt(DF$dtm)$hour <= 6)
new_NtN_Avg <- aggregate(Count ~ Noon_Start_Day, data=subset(DF, goodTimes), mean)
I found some help at this question on stackoverflow: r-calculate-means-for-subset-of-a-group
The noon-to-noon problem can easily be solved numerically. The key is that the start of a (GMT) day has a time_t value that is always divisible by 86400. This is specified by POSIX. For example, see: http://en.wikipedia.org/wiki/Unix_time
cuts <- unique(as.numeric(DF$dtm) %/% (86400/2)) * (86400/2) # half-days
cuts <- c(cuts, cuts[length(cuts)]+(86400/2)) # One more at the end
cuts <- as.POSIXct(cuts, tz="GMT", origin="1970-01-01") # Familiar format
DF$halfday <- cut(DF$dtm, cuts) # This is the cut you want.
Halfday_Avg <- aggregate(Count~halfday, data=DF, FUN=mean)
Halfday_Avg
## halfday Count
## 1 2007-03-27 00:00:00 3.5
## 2 2007-03-27 12:00:00 12.5
## 3 2007-03-28 00:00:00 24.5
## 4 2007-03-28 12:00:00 36.5
## 5 2007-03-29 00:00:00 48.5
## 6 2007-03-29 12:00:00 60.5
## 7 2007-03-30 00:00:00 72.5
## 8 2007-03-30 12:00:00 84.5
## 9 2007-03-31 00:00:00 96.5
## 10 2007-03-31 12:00:00 108.5
## 11 2007-04-01 00:00:00 120.5
## 12 2007-04-01 12:00:00 132.5
## 13 2007-04-02 00:00:00 144.5
## 14 2007-04-02 12:00:00 156.5
## 15 2007-04-03 00:00:00 168.5
## 16 2007-04-03 12:00:00 180.5
## 17 2007-04-04 00:00:00 192.5
## 18 2007-04-04 12:00:00 204.5
## 19 2007-04-05 00:00:00 216.5
## 20 2007-04-05 12:00:00 228.5
## 21 2007-04-06 00:00:00 237.5
Now to extend this to solve the rest of the problem. Given here is the 6AM-7PM time range.
intraday <- as.numeric(DF$dtm) %% 86400
# Subset DF by the chosen range
New_Avg <- aggregate(Count~halfday, data=DF[intraday >= 6*3600 & intraday <= 19*3600,], FUN=mean)
New_Avg
## halfday Count
## 1 2007-03-27 00:00:00 3.5
## 2 2007-03-27 12:00:00 10.5
## 3 2007-03-28 00:00:00 27.5
## 4 2007-03-28 12:00:00 34.5
## 5 2007-03-29 00:00:00 51.5
## 6 2007-03-29 12:00:00 58.5
## 7 2007-03-30 00:00:00 75.5
## 8 2007-03-30 12:00:00 82.5
## 9 2007-03-31 00:00:00 99.5
## 10 2007-03-31 12:00:00 106.5
## 11 2007-04-01 00:00:00 123.5
## 12 2007-04-01 12:00:00 130.5
## 13 2007-04-02 00:00:00 147.5
## 14 2007-04-02 12:00:00 154.5
## 15 2007-04-03 00:00:00 171.5
## 16 2007-04-03 12:00:00 178.5
## 17 2007-04-04 00:00:00 195.5
## 18 2007-04-04 12:00:00 202.5
## 19 2007-04-05 00:00:00 219.5
## 20 2007-04-05 12:00:00 226.5

Resources