Related
I have two fields in a dataframe that are of the class "times". Call it Time1 and Time2. I am trying to find the time difference between the two.
CombinedFrame2$Duration <- difftime(CombinedFrame2$Time1, CombinedFrame2$Time2)
Error in as.POSIXct.numeric(CombinedFrame2$Time1) :
'origin' must be supplied
How do I get the classes to cooperate to do the calculation?
Example:
Time1 Time2 Duration
5:30:00 6:24:00 0:54:00
$ Time1 : POSIXlt, format: "2019-07-10 16:07:00" "2019-07-10 22:05:00" "2019-07-10 22:20:00" "2019-07-10 22:43:00" ...
$ Time2 : POSIXlt, format: "2019-07-10 22:05:00" "2019-07-10 22:20:00" "2019-07-10 22:43:00" "2019-07-10 23:15:00" ...
> dput(head(CombinedFrame2[,c("Time1", "Time2")]))
structure(list(Time1 = structure(list(sec = c(0, 0, 0, 0,
0, 0), min = c(7L, 5L, 20L, 43L, 15L, 35L), hour = c(16L, 22L,
22L, 22L, 23L, 23L), mday = c(11L, 11L, 11L, 11L, 11L, 11L),
mon = c(6L, 6L, 6L, 6L, 6L, 6L), year = c(119L, 119L, 119L,
119L, 119L, 119L), wday = c(4L, 4L, 4L, 4L, 4L, 4L), yday = c(191L,
191L, 191L, 191L, 191L, 191L), isdst = c(1L, 1L, 1L, 1L,
1L, 1L), zone = c("EDT", "EDT", "EDT", "EDT", "EDT", "EDT"
), gmtoff = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_)), class = c("POSIXlt", "POSIXt"
)), Time2 = structure(list(sec = c(0, 0, 0, 0, 0, 0), min = c(5L,
20L, 43L, 15L, 35L, 55L), hour = c(22L, 22L, 22L, 23L, 23L, 23L
), mday = c(11L, 11L, 11L, 11L, 11L, 11L), mon = c(6L, 6L, 6L,
6L, 6L, 6L), year = c(119L, 119L, 119L, 119L, 119L, 119L), wday = c(4L,
4L, 4L, 4L, 4L, 4L), yday = c(191L, 191L, 191L, 191L, 191L, 191L
), isdst = c(1L, 1L, 1L, 1L, 1L, 1L), zone = c("EDT", "EDT",
"EDT", "EDT", "EDT", "EDT"), gmtoff = c(NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_)), class = c("POSIXlt",
"POSIXt"))), row.names = c("1:1", "1:2", "1:3", "1:4", "1:5",
"1:6"), class = "data.frame")
You need to make sure that your time is formatted correctly. See the code below.
You can use strptime() to format your time into hours, minutes, and seconds.
time1 <- "5:30:00"
time2 <- "6:24:00"
time1a <- strptime(time1,format="%H:%M:%S")
time2a <- strptime(time2,format="%H:%M:%S")
duration <- difftime(time2a,time1a)
I am trying to use the aggregate function to get 100 Hz data into 1 minute averages. However, when I use this function the 1-min averages are incorrect. A sample of the data is below. I am using the following code to calculate the 1-min values. The code does not break but the calculations are incorrect.
aggregate(list(X = df$`Gyroscope X`,
Y = df$`Gyroscope Y`,
Z = df$`Gyroscope Z`),
list(minofday = cut(df$Timestamp, "1 min")),mean)
Timestamp Gyroscope X Gyroscope Y Gyroscope Z
2018-07-10T10:25:00.0000000 41.381838 -21.667482 -118.896492
2018-07-10T10:25:00.0100000 48.046268 -12.399903 -110.917976
2018-07-10T10:25:00.0200000 49.102786 -7.36084 -106.485602
2018-07-10T10:25:00.0300000 44.338382 -9.215699 -102.296759
2018-07-10T10:25:00.0400000 34.724123 -11.308594 -96.108404
2018-07-10T10:25:00.0500000 19.622804 -15.225221 -88.122564
2018-07-10T10:25:00.0600000 13.240968 -26.539308 -85.274663
2018-07-10T10:25:00.0700000 13.397218 -31.933596 -80.127568
2018-07-10T10:25:00.0800000 16.333009 -29.663088 -73.027348
2018-07-10T10:25:00.0900000 17.384645 -29.745485 -67.694096
2018-07-10T10:25:00.1000000 16.546632 -30.08423 -67.565922
Assuming OP's data varies by the min (note the modified data), here is how to do it with base R and dplyr:
df$Timestamp <- as.POSIXct(df$Timestamp, format = "%Y-%m-%dT%H:%M:%S")
aggregate(list(X = df$Gyroscope_X,
Y = df$Gyroscope_Y,
Z = df$Gyroscope_Z),
list(minofday = cut(df$Timestamp, "1 min")), mean)
or a more concise way:
aggregate(. ~ minofday, mean, data = cbind(setNames(df[,-1], c("X", "Y", "Z")),
minofday = cut(df$Timestamp, "1 min")))
Result:
minofday X Y Z
1 2018-07-10 10:24:00 48.57453 -9.880371 -108.70179
2 2018-07-10 10:25:00 27.78422 -19.314983 -95.13774
3 2018-07-10 10:26:00 16.85883 -29.704286 -70.36072
4 2018-07-10 10:27:00 16.54663 -30.084230 -67.56592
With lubridate and summarize_all from dplyr:
library(dplyr)
library(lubridate)
df %>%
mutate(Timestamp = ymd_hms(Timestamp)) %>%
group_by(minofday = cut(Timestamp, "1 min")) %>%
summarize_all(mean) %>%
select(-Timestamp)
Result:
# A tibble: 4 x 4
minofday Gyroscope_X Gyroscope_Y Gyroscope_Z
<fct> <dbl> <dbl> <dbl>
1 2018-07-10 10:24:00 48.6 -9.88 -109.
2 2018-07-10 10:25:00 27.8 -19.3 -95.1
3 2018-07-10 10:26:00 16.9 -29.7 -70.4
4 2018-07-10 10:27:00 16.5 -30.1 -67.6
Data:
df <- read.table(text = " Timestamp Gyroscope_X Gyroscope_Y Gyroscope_Z
2018-07-10T10:25:00.0000000 41.381838 -21.667482 -118.896492
2018-07-10T10:24:00.0100000 48.046268 -12.399903 -110.917976
2018-07-10T10:24:00.0200000 49.102786 -7.36084 -106.485602
2018-07-10T10:25:00.0300000 44.338382 -9.215699 -102.296759
2018-07-10T10:25:00.0400000 34.724123 -11.308594 -96.108404
2018-07-10T10:25:00.0500000 19.622804 -15.225221 -88.122564
2018-07-10T10:25:00.0600000 13.240968 -26.539308 -85.274663
2018-07-10T10:25:00.0700000 13.397218 -31.933596 -80.127568
2018-07-10T10:26:00.0800000 16.333009 -29.663088 -73.027348
2018-07-10T10:26:00.0900000 17.384645 -29.745485 -67.694096
2018-07-10T10:27:00.1000000 16.546632 -30.08423 -67.565922", header = TRUE)
Since you are dealing with timestamps the xts package has a lot of functions that can help you. For rolling up timestamps period.apply can help you out. The endpoints part can roll up the data from microseconds all the way up to years.
# don't load the timestamp column that one goes to the order.by part
df1_xts <- xts(df1[, -1], order.by = df1$Timestamp)
# roll up to seconds.
period.apply(df1_xts, endpoints(df1_xts, on = "mins"), colMeans)
Gyroscope_X Gyroscope_Y Gyroscope_Z
2018-07-10 10:25:00 28.55624 -20.46759 -90.59249
If you timestamp column is not yet a date time object you can use this:
df1$Timestamp <- strptime(df1$Timestamp, format = "%Y-%m-%dT%H:%M:%OS")
data:
df1 <- structure(list(Timestamp = structure(list(sec = c(0, 0.01, 0.02,
0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1), min = c(25L,
25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L), hour = c(10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L), mday = c(10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L), mon = c(6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), year = c(118L, 118L,
118L, 118L, 118L, 118L, 118L, 118L, 118L, 118L, 118L), wday = c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), yday = c(190L, 190L,
190L, 190L, 190L, 190L, 190L, 190L, 190L, 190L, 190L), isdst = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), zone = c("CEST", "CEST",
"CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST",
"CEST"), gmtoff = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_)), class = c("POSIXlt", "POSIXt")),
Gyroscope_X = c(41.381838, 48.046268, 49.102786, 44.338382,
34.724123, 19.622804, 13.240968, 13.397218, 16.333009, 17.384645,
16.546632), Gyroscope_Y = c(-21.667482, -12.399903, -7.36084,
-9.215699, -11.308594, -15.225221, -26.539308, -31.933596,
-29.663088, -29.745485, -30.08423), Gyroscope_Z = c(-118.896492,
-110.917976, -106.485602, -102.296759, -96.108404, -88.122564,
-85.274663, -80.127568, -73.027348, -67.694096, -67.565922
)), row.names = c(NA, -11L), class = "data.frame")
I have 9x3 dataframe DATA, where one column has timestamps (DateTime), one prices (Close), and one binary values (FOMCBinary).
I want to add column SignalBinary recording a 1 IF Close < an X value (1126 in this example) AND FOMCBinary > 0 in any of the two rows below, but only if SignalBinary = 0 in the row below (i.e. do not want consecutive 1s).
In the example here I need to record a 1 under SignalBinary only at 14:15:00. My coding attempt is instead recording a 1 at 14:15:00 and at 14:30:00. Should be fairly simple, don't understand why my code is not producing the desired result. How could I get this fixed?
DATA <- structure(list(DateTime = structure(list(sec = c(0, 0, 0, 0,0, 0, 0, 0, 0), min = c(30L, 15L, 0L, 45L, 30L, 15L, 0L, 45L,30L), hour = c(15L, 15L, 15L, 14L, 14L, 14L, 14L, 13L, 13L),mday = c(27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L), mon = c(0L,0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), year = c(116L, 116L, 116L,116L, 116L, 116L, 116L, 116L, 116L), wday = c(3L, 3L, 3L,3L, 3L, 3L, 3L, 3L, 3L), yday = c(26L, 26L, 26L, 26L, 26L,26L, 26L, 26L, 26L), isdst = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,0L, 0L), zone = c("EST", "EST", "EST", "EST", "EST", "EST","EST", "EST", "EST"), gmtoff = c(NA_integer_, NA_integer_,NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,NA_integer_, NA_integer_)), .Names = c("sec", "min", "hour","mday", "mon", "year", "wday", "yday", "isdst", "zone", "gmtoff"), class = c("POSIXlt", "POSIXt")), Close = c(1127.2, 1127.5,1126.9, 1128.3, 1125.4, 1122.7, 1122.8, 1117.3, 1116), FOMCBinary = c(0,0, 0, 0, 0, 0, 1, 0, 0)), .Names = c("DateTime", "Close", "FOMCBinary"), row.names = 2131:2139, class = "data.frame")
Xvalue = 1126
#For comparing lagged or forward rows
rowShift <- function(x, shiftLen = 1L) {
r <- (1L + shiftLen):(length(x) + shiftLen)
r[r<1] <- NA
return(x[r]) }
DATA$SignalBinary <- ifelse(
DATA$Close < Xvalue & (
rowShift(DATA$FOMCBinary, +1) > 0 |
(rowShift(DATA$FOMCBinary, +2) > 0 & rowShift(DATA$FOMCBinary, +1) == 0))
, 1, 0)
##Note rowShift(DATA$FOMCBinary, +1) is equivalent to DATA$FOMCBinary[seq(nrow(DATA))+1]##
Output for DATA after calculations:
DateTime Close FOMCBinary SignalBinary
2131 2016-01-27 15:30:00 1127.2 0 0
2132 2016-01-27 15:15:00 1127.5 0 0
2133 2016-01-27 15:00:00 1126.9 0 0
2134 2016-01-27 14:45:00 1128.3 0 0
2135 2016-01-27 14:30:00 1125.4 0 1 => UNWANTED 1
2136 2016-01-27 14:15:00 1122.7 0 1
2137 2016-01-27 14:00:00 1122.8 1 0
2138 2016-01-27 13:45:00 1117.3 0 NA
2139 2016-01-27 13:30:00 1116.0 0 NA
Thank you very much.
had a closer look. you wanted to remove the first consecutive 1 in SignalBinary but you didnt sweep through SignalBinary. Here is a rough code
DATA <- structure(list(DateTime = structure(list(sec = c(0, 0, 0, 0,0, 0, 0, 0, 0), min = c(30L, 15L, 0L, 45L, 30L, 15L, 0L, 45L,30L), hour = c(15L, 15L, 15L, 14L, 14L, 14L, 14L, 13L, 13L),mday = c(27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L), mon = c(0L,0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), year = c(116L, 116L, 116L,116L, 116L, 116L, 116L, 116L, 116L), wday = c(3L, 3L, 3L,3L, 3L, 3L, 3L, 3L, 3L), yday = c(26L, 26L, 26L, 26L, 26L,26L, 26L, 26L, 26L), isdst = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,0L, 0L), zone = c("EST", "EST", "EST", "EST", "EST", "EST","EST", "EST", "EST"), gmtoff = c(NA_integer_, NA_integer_,NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,NA_integer_, NA_integer_)), .Names = c("sec", "min", "hour","mday", "mon", "year", "wday", "yday", "isdst", "zone", "gmtoff"), class = c("POSIXlt", "POSIXt")), Close = c(1127.2, 1127.5,1126.9, 1128.3, 1125.4, 1122.7, 1122.8, 1117.3, 1116), FOMCBinary = c(0,0, 0, 0, 0, 0, 1, 0, 0)), .Names = c("DateTime", "Close", "FOMCBinary"), row.names = 2131:2139, class = "data.frame")
Xvalue = 1126
#For comparing lagged or forward rows
rowShift <- function(x, shiftLen = 1) {
r <- (1 + shiftLen):(length(x) + shiftLen)
r[r<1] <- NA
return(x[r]) }
DATA$SignalBinary <- as.numeric(DATA$Close < Xvalue & rowShift(DATA$FOMCBinary, +1) > 0)
DATA$SignalBinary <- c(sapply(1:(nrow(DATA)-1), function(n) {
if (is.na(DATA$SignalBinary[n+1])) return(NA)
if (DATA$SignalBinary[n+1]) return(0)
DATA$SignalBinary[n]
}), tail(DATA$SignalBinary,1))
DATA
anotherDATA <- structure(list(DateTime = structure(list(sec = c(0, 0, 0, 0, 0, 0, 0, 0, 0), min = c(30L, 15L, 0L, 45L, 30L, 15L, 0L, 45L,30L),
hour = c(15L, 15L, 15L, 14L, 14L, 14L, 14L, 13L, 13L),mday = c(27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L),
mon = c(0L,0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), year = c(116L, 116L, 116L,116L, 116L, 116L, 116L, 116L, 116L),
wday = c(3L, 3L, 3L,3L, 3L, 3L, 3L, 3L, 3L), yday = c(26L, 26L, 26L, 26L, 26L,26L, 26L, 26L, 26L),
isdst = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,0L, 0L), zone = c("EST", "EST", "EST", "EST", "EST", "EST","EST", "EST", "EST"),
gmtoff = c(NA_integer_, NA_integer_,NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,NA_integer_, NA_integer_)),
.Names = c("sec", "min", "hour","mday", "mon", "year", "wday", "yday", "isdst", "zone", "gmtoff"), class = c("POSIXlt", "POSIXt")),
Close = c(1127.2, 1127.5,1126.9, 1128.3, 1125.4, 1122.7, 1122.8, 1117.3, 1116),
FOMCBinary = c(0,0, 0, 0, 0, 1, 1, 0, 0)), .Names = c("DateTime", "Close", "FOMCBinary"), row.names = 2131:2139, class = "data.frame")
Xvalue = 1126
#For comparing lagged or forward rows
rowShift <- function(x, shiftLen = 1) {
r <- (1 + shiftLen):(length(x) + shiftLen)
r[r<1] <- NA
return(x[r]) }
anotherDATA$SignalBinary <- as.numeric(anotherDATA$Close < Xvalue & rowShift(anotherDATA$FOMCBinary, +1) > 0)
anotherDATA$SignalBinary <- c(sapply(1:(nrow(anotherDATA)-1), function(n) {
if (is.na(anotherDATA$SignalBinary[n+1])) return(NA)
if (anotherDATA$SignalBinary[n+1]) return(0)
anotherDATA$SignalBinary[n]
}), tail(anotherDATA$SignalBinary,1))
anotherDATA
A variation of this question -- I can't quite get the dimensions right in the data structure to make a boxplot with the right values.
what I'm looking to do: hours would be on the x-axis, region would be on the y-axis, and for every region there will be a boxplot showing the distribution of income by hour.
The closest I can get is the following, but it's not right. How do I create the boxplot with two factors (one a time series) as axes, populated by the value distribution?
data:
regions <- structure(list(location = structure(c(2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("east",
"north", "west"), class = "factor"), hour = structure(list(sec = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), min = c(0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), hour = c(0L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 0L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L), mday = c(13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L,
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L,
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L), mon = c(7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L
), year = c(115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L,
115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L,
115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L,
115L, 115L), wday = c(4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L), yday = c(224L, 224L, 224L, 224L,
224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L,
224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L,
224L, 224L, 224L, 224L, 224L, 224L, 224L), isdst = c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
zone = c("CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT",
"CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT",
"CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT",
"CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT"),
gmtoff = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_)), .Names = c("sec",
"min", "hour", "mday", "mon", "year", "wday", "yday", "isdst",
"zone", "gmtoff"), class = c("POSIXlt", "POSIXt")), hour_income = c(67L,
98L, 89L, 75L, 75L, 89L, 70L, 97L, 52L, 94L, 80L, 84L, 52L, 82L,
81L, 93L, 85L, 94L, 64L, 90L, 54L, 60L, 97L, 100L, 57L, 63L,
90L, 58L, 86L, 68L, 52L, 78L, 61L)), .Names = c("location", "hour",
"hour_income"), row.names = c(NA, -33L), class = "data.frame")
And the boxplot
ggplot(regions) +
geom_boxplot(aes(x=hour, y=hour_income, group=location))
First we convert datetimes to character format, then create the boxplots.
regions$hour <- strftime(regions$hour, format="%H:%M:%S")
ggplot(data = regions, aes(x = hour, y = hour_income)) + geom_boxplot()
But because you only have an observation for each region and hour when you try to create a boxplot to visualise the regions you can only obtain lines instead of boxplots, which are not very meaningful:
ggplot(data = regions, aes(x = hour, y = hour_income)) + geom_boxplot(aes(fill= location))
Plenty of material on stackoverflow regarding calculating time differences between rows/entries/observations. However, I'm stumped why I'm getting NA's in unusual positions.
I have 3 columns, DATETIME which is posixlt, GRP800 which is the group (factor), and TIME800 which is supposed to represent the time elapsed between each observation for each group. My particular code was derived from Calculate differences between rows faster than a for loop?.
df$TIME800<-unlist(by(df$DATETIME,df$GRP800,function(x)c(NA,diff(x))))
It does appear to function properly for the first group but then I am getting NA's in the middle of the 2nd group. I've tried several approaches using diff and it's producing the identical output. I'm quite puzzled. Any advice would be greatly appreciated.
DATETIME GRP800 TIME800
1 2013-07-16 16:01:30 1 NA
2 2013-07-16 20:00:54 1 3.990000
3 2013-07-17 00:01:30 1 4.010000
4 2013-07-17 04:01:00 1 3.991667
5 2013-07-17 08:00:50 1 3.997222
6 2013-07-17 12:01:46 1 4.015556
7 2013-07-17 16:00:50 1 3.984444
8 2013-07-17 20:01:00 1 4.002778
9 2013-07-18 00:01:18 1 4.005000
10 2013-07-18 04:01:02 1 3.995556
11 2013-07-18 08:00:50 1 3.996667
12 2013-07-18 12:01:18 2 NA
13 2013-07-18 16:01:02 2 3.970833
14 2013-07-18 20:00:59 2 4.007500
15 2013-07-19 00:01:31 2 3.997222
16 2013-07-19 04:01:18 2 4.011111
17 2013-07-19 08:01:02 2 NA
18 2013-07-19 12:01:57 2 2.007500
19 2013-07-19 20:01:00 2 NA
20 2013-07-20 00:01:00 2 2.003333
> dput(df[1:20,])
structure(list(DATETIME = structure(list(sec = c(30, 54, 30,
0, 50, 46, 50, 0, 18, 2, 50, 18, 2, 59, 31, 18, 2, 57, 0, 0),
min = c(1L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L,
0L, 1L, 1L, 1L, 1L, 1L, 1L), hour = c(16L, 20L, 0L, 4L, 8L,
12L, 16L, 20L, 0L, 4L, 8L, 12L, 16L, 20L, 0L, 4L, 8L, 12L,
20L, 0L), mday = c(16L, 16L, 17L, 17L, 17L, 17L, 17L, 17L,
18L, 18L, 18L, 18L, 18L, 18L, 19L, 19L, 19L, 19L, 19L, 20L
), mon = c(6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), year = c(113L, 113L, 113L,
113L, 113L, 113L, 113L, 113L, 113L, 113L, 113L, 113L, 113L,
113L, 113L, 113L, 113L, 113L, 113L, 113L), wday = c(2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
5L, 5L, 6L), yday = c(196L, 196L, 197L, 197L, 197L, 197L,
197L, 197L, 198L, 198L, 198L, 198L, 198L, 198L, 199L, 199L,
199L, 199L, 199L, 200L), isdst = c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
zone = c("MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT",
"MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT",
"MDT", "MDT", "MDT", "MDT"), gmtoff = c(NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_)), .Names = c("sec",
"min", "hour", "mday", "mon", "year", "wday", "yday", "isdst",
"zone", "gmtoff"), class = c("POSIXlt", "POSIXt")), GRP800 = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), TIME800 = c(NA, 3.99, 4.01, 3.991666667, 3.997222222,
4.015555556, 3.984444444, 4.002777778, 4.005, 3.995555556, 3.996666667,
NA, 3.970833333, 4.0075, 3.997222222, 4.011111111, NA, 2.0075,
NA, 2.003333333)), .Names = c("DATETIME", "GRP800", "TIME800"
), row.names = c(NA, 20L), class = "data.frame")