How to find the difference between two timestamps in R? - r

I have two fields in a dataframe that are of the class "times". Call it Time1 and Time2. I am trying to find the time difference between the two.
CombinedFrame2$Duration <- difftime(CombinedFrame2$Time1, CombinedFrame2$Time2)
Error in as.POSIXct.numeric(CombinedFrame2$Time1) :
'origin' must be supplied
How do I get the classes to cooperate to do the calculation?
Example:
Time1 Time2 Duration
5:30:00 6:24:00 0:54:00
$ Time1 : POSIXlt, format: "2019-07-10 16:07:00" "2019-07-10 22:05:00" "2019-07-10 22:20:00" "2019-07-10 22:43:00" ...
$ Time2 : POSIXlt, format: "2019-07-10 22:05:00" "2019-07-10 22:20:00" "2019-07-10 22:43:00" "2019-07-10 23:15:00" ...
> dput(head(CombinedFrame2[,c("Time1", "Time2")]))
structure(list(Time1 = structure(list(sec = c(0, 0, 0, 0,
0, 0), min = c(7L, 5L, 20L, 43L, 15L, 35L), hour = c(16L, 22L,
22L, 22L, 23L, 23L), mday = c(11L, 11L, 11L, 11L, 11L, 11L),
mon = c(6L, 6L, 6L, 6L, 6L, 6L), year = c(119L, 119L, 119L,
119L, 119L, 119L), wday = c(4L, 4L, 4L, 4L, 4L, 4L), yday = c(191L,
191L, 191L, 191L, 191L, 191L), isdst = c(1L, 1L, 1L, 1L,
1L, 1L), zone = c("EDT", "EDT", "EDT", "EDT", "EDT", "EDT"
), gmtoff = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_)), class = c("POSIXlt", "POSIXt"
)), Time2 = structure(list(sec = c(0, 0, 0, 0, 0, 0), min = c(5L,
20L, 43L, 15L, 35L, 55L), hour = c(22L, 22L, 22L, 23L, 23L, 23L
), mday = c(11L, 11L, 11L, 11L, 11L, 11L), mon = c(6L, 6L, 6L,
6L, 6L, 6L), year = c(119L, 119L, 119L, 119L, 119L, 119L), wday = c(4L,
4L, 4L, 4L, 4L, 4L), yday = c(191L, 191L, 191L, 191L, 191L, 191L
), isdst = c(1L, 1L, 1L, 1L, 1L, 1L), zone = c("EDT", "EDT",
"EDT", "EDT", "EDT", "EDT"), gmtoff = c(NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_)), class = c("POSIXlt",
"POSIXt"))), row.names = c("1:1", "1:2", "1:3", "1:4", "1:5",
"1:6"), class = "data.frame")

You need to make sure that your time is formatted correctly. See the code below.
You can use strptime() to format your time into hours, minutes, and seconds.
time1 <- "5:30:00"
time2 <- "6:24:00"
time1a <- strptime(time1,format="%H:%M:%S")
time2a <- strptime(time2,format="%H:%M:%S")
duration <- difftime(time2a,time1a)

Related

Convert UTC Time Zone as per state column in R

I have a data frame with date column in UTC time zone, I want to create another column which convert my UTC time zone wrt to the states time zone.
I have 1000's of rows with different states of USA.
My proxy data frame is mentioned below
df<-structure(list(UTC_date = structure(list(sec = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), min = c(14L, 21L, 17L, 38L, 45L, 25L, 44L, 11L,09L, 27L), hour = c(3L, 0L, 16L, 16L, 17L, 8L, 17L, 1L, 2L, 4L),
mday = c(2L, 2L, 15L, 12L, 19L, 18L, 25L, 17L, 07L, 17L),
mon = c(9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L),
year = c(121L,121L, 121L, 121L, 121L, 121L, 121L, 121L, 121L, 121L),
wday = c(1L,4L, 1L, 1L, 1L, 4L, 1L, 3L, 3L, 3L),
yday = c(297L, 300L,297L, 297L, 297L, 300L, 297L, 299L, 299L, 299L), isdst = c(1L,1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
zone = c("CDT", "CDT","CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT"),
gmtoff = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_)), class = c("POSIXlt", "POSIXt")), StoreState = c("TX","MS", "AZ", "SC", "WI", "MO", "TX", "TX", "MO", "TX")),
row.names = c(NA,10L), class = "data.frame")
str(df)
head(df)
What I am looking for is
below o/p as an example for the 1st row
Input Example
UTC_Date State
2021-10-02 03:14:00 TX
Output Example
UTC_Date State Local Time as per State
2021-10-02 03:14:00 TX 2021-10-02 21:14:00
You can use the force_tzs function to convert to the corresponding time zones(tz).
library(lubridate)
df %>%
mutate(tz = case_when(
StoreState=="AZ"~ "US/Mountain",
StoreState=="SC"~ "US/Eastern",
TRUE ~ "US/Central"
)) %>%
mutate('Local Time as per State'= force_tzs(UTC_date, tzones = tz)) %>%
select(UTC_date, State= StoreState, 'Local Time as per State')

Aggregate function not working properly in R

I am trying to use the aggregate function to get 100 Hz data into 1 minute averages. However, when I use this function the 1-min averages are incorrect. A sample of the data is below. I am using the following code to calculate the 1-min values. The code does not break but the calculations are incorrect.
aggregate(list(X = df$`Gyroscope X`,
Y = df$`Gyroscope Y`,
Z = df$`Gyroscope Z`),
list(minofday = cut(df$Timestamp, "1 min")),mean)
Timestamp Gyroscope X Gyroscope Y Gyroscope Z
2018-07-10T10:25:00.0000000 41.381838 -21.667482 -118.896492
2018-07-10T10:25:00.0100000 48.046268 -12.399903 -110.917976
2018-07-10T10:25:00.0200000 49.102786 -7.36084 -106.485602
2018-07-10T10:25:00.0300000 44.338382 -9.215699 -102.296759
2018-07-10T10:25:00.0400000 34.724123 -11.308594 -96.108404
2018-07-10T10:25:00.0500000 19.622804 -15.225221 -88.122564
2018-07-10T10:25:00.0600000 13.240968 -26.539308 -85.274663
2018-07-10T10:25:00.0700000 13.397218 -31.933596 -80.127568
2018-07-10T10:25:00.0800000 16.333009 -29.663088 -73.027348
2018-07-10T10:25:00.0900000 17.384645 -29.745485 -67.694096
2018-07-10T10:25:00.1000000 16.546632 -30.08423 -67.565922
Assuming OP's data varies by the min (note the modified data), here is how to do it with base R and dplyr:
df$Timestamp <- as.POSIXct(df$Timestamp, format = "%Y-%m-%dT%H:%M:%S")
aggregate(list(X = df$Gyroscope_X,
Y = df$Gyroscope_Y,
Z = df$Gyroscope_Z),
list(minofday = cut(df$Timestamp, "1 min")), mean)
or a more concise way:
aggregate(. ~ minofday, mean, data = cbind(setNames(df[,-1], c("X", "Y", "Z")),
minofday = cut(df$Timestamp, "1 min")))
Result:
minofday X Y Z
1 2018-07-10 10:24:00 48.57453 -9.880371 -108.70179
2 2018-07-10 10:25:00 27.78422 -19.314983 -95.13774
3 2018-07-10 10:26:00 16.85883 -29.704286 -70.36072
4 2018-07-10 10:27:00 16.54663 -30.084230 -67.56592
With lubridate and summarize_all from dplyr:
library(dplyr)
library(lubridate)
df %>%
mutate(Timestamp = ymd_hms(Timestamp)) %>%
group_by(minofday = cut(Timestamp, "1 min")) %>%
summarize_all(mean) %>%
select(-Timestamp)
Result:
# A tibble: 4 x 4
minofday Gyroscope_X Gyroscope_Y Gyroscope_Z
<fct> <dbl> <dbl> <dbl>
1 2018-07-10 10:24:00 48.6 -9.88 -109.
2 2018-07-10 10:25:00 27.8 -19.3 -95.1
3 2018-07-10 10:26:00 16.9 -29.7 -70.4
4 2018-07-10 10:27:00 16.5 -30.1 -67.6
Data:
df <- read.table(text = " Timestamp Gyroscope_X Gyroscope_Y Gyroscope_Z
2018-07-10T10:25:00.0000000 41.381838 -21.667482 -118.896492
2018-07-10T10:24:00.0100000 48.046268 -12.399903 -110.917976
2018-07-10T10:24:00.0200000 49.102786 -7.36084 -106.485602
2018-07-10T10:25:00.0300000 44.338382 -9.215699 -102.296759
2018-07-10T10:25:00.0400000 34.724123 -11.308594 -96.108404
2018-07-10T10:25:00.0500000 19.622804 -15.225221 -88.122564
2018-07-10T10:25:00.0600000 13.240968 -26.539308 -85.274663
2018-07-10T10:25:00.0700000 13.397218 -31.933596 -80.127568
2018-07-10T10:26:00.0800000 16.333009 -29.663088 -73.027348
2018-07-10T10:26:00.0900000 17.384645 -29.745485 -67.694096
2018-07-10T10:27:00.1000000 16.546632 -30.08423 -67.565922", header = TRUE)
Since you are dealing with timestamps the xts package has a lot of functions that can help you. For rolling up timestamps period.apply can help you out. The endpoints part can roll up the data from microseconds all the way up to years.
# don't load the timestamp column that one goes to the order.by part
df1_xts <- xts(df1[, -1], order.by = df1$Timestamp)
# roll up to seconds.
period.apply(df1_xts, endpoints(df1_xts, on = "mins"), colMeans)
Gyroscope_X Gyroscope_Y Gyroscope_Z
2018-07-10 10:25:00 28.55624 -20.46759 -90.59249
If you timestamp column is not yet a date time object you can use this:
df1$Timestamp <- strptime(df1$Timestamp, format = "%Y-%m-%dT%H:%M:%OS")
data:
df1 <- structure(list(Timestamp = structure(list(sec = c(0, 0.01, 0.02,
0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1), min = c(25L,
25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L), hour = c(10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L), mday = c(10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L), mon = c(6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), year = c(118L, 118L,
118L, 118L, 118L, 118L, 118L, 118L, 118L, 118L, 118L), wday = c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), yday = c(190L, 190L,
190L, 190L, 190L, 190L, 190L, 190L, 190L, 190L, 190L), isdst = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), zone = c("CEST", "CEST",
"CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST",
"CEST"), gmtoff = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_)), class = c("POSIXlt", "POSIXt")),
Gyroscope_X = c(41.381838, 48.046268, 49.102786, 44.338382,
34.724123, 19.622804, 13.240968, 13.397218, 16.333009, 17.384645,
16.546632), Gyroscope_Y = c(-21.667482, -12.399903, -7.36084,
-9.215699, -11.308594, -15.225221, -26.539308, -31.933596,
-29.663088, -29.745485, -30.08423), Gyroscope_Z = c(-118.896492,
-110.917976, -106.485602, -102.296759, -96.108404, -88.122564,
-85.274663, -80.127568, -73.027348, -67.694096, -67.565922
)), row.names = c(NA, -11L), class = "data.frame")

time difference between rows producing odd results

Plenty of material on stackoverflow regarding calculating time differences between rows/entries/observations. However, I'm stumped why I'm getting NA's in unusual positions.
I have 3 columns, DATETIME which is posixlt, GRP800 which is the group (factor), and TIME800 which is supposed to represent the time elapsed between each observation for each group. My particular code was derived from Calculate differences between rows faster than a for loop?.
df$TIME800<-unlist(by(df$DATETIME,df$GRP800,function(x)c(NA,diff(x))))
It does appear to function properly for the first group but then I am getting NA's in the middle of the 2nd group. I've tried several approaches using diff and it's producing the identical output. I'm quite puzzled. Any advice would be greatly appreciated.
DATETIME GRP800 TIME800
1 2013-07-16 16:01:30 1 NA
2 2013-07-16 20:00:54 1 3.990000
3 2013-07-17 00:01:30 1 4.010000
4 2013-07-17 04:01:00 1 3.991667
5 2013-07-17 08:00:50 1 3.997222
6 2013-07-17 12:01:46 1 4.015556
7 2013-07-17 16:00:50 1 3.984444
8 2013-07-17 20:01:00 1 4.002778
9 2013-07-18 00:01:18 1 4.005000
10 2013-07-18 04:01:02 1 3.995556
11 2013-07-18 08:00:50 1 3.996667
12 2013-07-18 12:01:18 2 NA
13 2013-07-18 16:01:02 2 3.970833
14 2013-07-18 20:00:59 2 4.007500
15 2013-07-19 00:01:31 2 3.997222
16 2013-07-19 04:01:18 2 4.011111
17 2013-07-19 08:01:02 2 NA
18 2013-07-19 12:01:57 2 2.007500
19 2013-07-19 20:01:00 2 NA
20 2013-07-20 00:01:00 2 2.003333
> dput(df[1:20,])
structure(list(DATETIME = structure(list(sec = c(30, 54, 30,
0, 50, 46, 50, 0, 18, 2, 50, 18, 2, 59, 31, 18, 2, 57, 0, 0),
min = c(1L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L,
0L, 1L, 1L, 1L, 1L, 1L, 1L), hour = c(16L, 20L, 0L, 4L, 8L,
12L, 16L, 20L, 0L, 4L, 8L, 12L, 16L, 20L, 0L, 4L, 8L, 12L,
20L, 0L), mday = c(16L, 16L, 17L, 17L, 17L, 17L, 17L, 17L,
18L, 18L, 18L, 18L, 18L, 18L, 19L, 19L, 19L, 19L, 19L, 20L
), mon = c(6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), year = c(113L, 113L, 113L,
113L, 113L, 113L, 113L, 113L, 113L, 113L, 113L, 113L, 113L,
113L, 113L, 113L, 113L, 113L, 113L, 113L), wday = c(2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
5L, 5L, 6L), yday = c(196L, 196L, 197L, 197L, 197L, 197L,
197L, 197L, 198L, 198L, 198L, 198L, 198L, 198L, 199L, 199L,
199L, 199L, 199L, 200L), isdst = c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
zone = c("MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT",
"MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT",
"MDT", "MDT", "MDT", "MDT"), gmtoff = c(NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_)), .Names = c("sec",
"min", "hour", "mday", "mon", "year", "wday", "yday", "isdst",
"zone", "gmtoff"), class = c("POSIXlt", "POSIXt")), GRP800 = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), TIME800 = c(NA, 3.99, 4.01, 3.991666667, 3.997222222,
4.015555556, 3.984444444, 4.002777778, 4.005, 3.995555556, 3.996666667,
NA, 3.970833333, 4.0075, 3.997222222, 4.011111111, NA, 2.0075,
NA, 2.003333333)), .Names = c("DATETIME", "GRP800", "TIME800"
), row.names = c(NA, 20L), class = "data.frame")

Merging two dataframes on a date range in R

In R I want to merge two dataframes on a range of dates, taking all rows from the second dataframe which fall on and between two columns of dates from the first dataframe. I couldn't find a strictly R function or version of the merge function that could do this, but I know there's a 'between' function in sql and I was thinking of trying the sqldf package (although I'm not well versed in sql). If there's a more R-ish way to do this, that would be preferable. Thank you in advance for your help!
df1 <- structure(list(ID = 1:2, PtID = structure(c(1L, 1L), .Label = c("T031", "T040", "T045", "T064", "T074", "T081", "T092", "T094", "T096", "T105", "T107", "T108", "T115", "T118", "T120", "T124", "T125", "T128", "T130", "T132", "T138", "T140", "T142", "T142_R1", "T146", "T158", "T159", "T160", "T164", "T166", "T169", "T171", "T173", "T197", "T208", "T214", "T221"), class = "factor"), StartDateTime = structure(list(sec = c(0, 0), min = c(11L, 35L), hour = c(17L, 17L), mday = c(23L, 23L), mon = c(9L, 9L), year = c(112L, 112L), wday = c(2L, 2L), yday = c(296L, 296L), isdst = c(1L, 1L)), .Names = c("sec", "min", "hour", "mday", "mon", "year", "wday", "yday", "isdst"), class = c("POSIXlt", "POSIXt")), EndDateTime = structure(list(sec = c(0, 0), min = c(16L, 37L), hour = c(17L, 17L), mday = c(23L, 23L), mon = c(9L, 9L), year = c(112L, 112L), wday = c(2L, 2L), yday = c(296L, 296L), isdst = c(1L, 1L)), .Names = c("sec", "min", "hour", "mday", "mon", "year", "wday", "yday", "isdst"), class = c("POSIXlt", "POSIXt"))), .Names = c("ID", "PtID", "StartDateTime", "EndDateTime"), row.names = 1:2, class = "data.frame")
df1
ID PtID StartDateTime EndDateTime
1 1 T031 2012-10-23 17:11:00 2012-10-23 17:16:00
2 2 T031 2012-10-23 17:35:00 2012-10-23 17:37:00
The second dataframe has several IDs (which match the first dataframe) and timestamps on the minute level.
df2
df2 <- structure(list(ID = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2), dateTime = structure(list(sec = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), min = 2:44, hour = c(17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L), mday = c(23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L), mon = c(9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L), year = c(112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L), wday = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), yday = c(296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L), isdst = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L)), .Names = c("sec", "min", "hour", "mday", "mon", "year", "wday", "yday", "isdst"), class = c("POSIXlt", "POSIXt")), lat = c(33.06621406, 33.06616621, 33.06617305, 33.06617624, 33.06617932, 33.06618161, 33.06618326, 33.06618604, 33.06615089, 33.06628004, 33.06618461, 33.06615113, 33.0661362, 33.06620301, 33.0662218, 33.06624283, 33.06622268, 33.06622425, 33.06622787, 33.06623042, 33.06623318, 33.06623654, 33.06623826, 33.06623919, 33.06623907, 33.06624009, 33.06623804, 33.06624255, 33.06624377, 33.06624446, 33.06624242, 33.06624254, 33.06624513, 33.06624582, 33.06615573, 33.06625534, 33.06618541, 33.06613825, 33.06613624, 33.06614027, 33.06614551, 33.06614844, 33.06615393), lon = c(-116.6105531, -116.6105651,-116.6105613, -116.6105553, -116.610551, -116.610549, -116.6105484, -116.6105512, -116.6105712, -116.6104996, -116.6104711, -116.6104854, -116.6105596, -116.6104509, -116.610524, -116.6105535, -116.6105461, -116.6105461, -116.6105477, -116.6105498, -116.6105478, -116.6105473, -116.6105473, -116.6105488, -116.6105497, -116.6105479, -116.610545, -116.6105461, -116.6105448, -116.610543, -116.6105409, -116.6105395, -116.6105367, -116.6105337, -116.6105344, -116.6104779, -116.6104953,-116.6105222, -116.610526, -116.6105255, -116.6105282, -116.6105265,-116.6105282)), .Names = c("ID", "dateTime", "lat", "lon"), row.names = 1023:1065, class = "data.frame")
So the desired output would look like this:
ID PtID DateTime lat lon
1 T031 2012-10-23 17:11:00 33.06628 -116.6105
1 T031 2012-10-23 17:12:00 33.06618 -116.6105
1 T031 2012-10-23 17:13:00 33.06615 -116.6105
1 T031 2012-10-23 17:14:00 33.06614 -116.6106
1 T031 2012-10-23 17:15:00 33.06620 -116.6105
1 T031 2012-10-23 17:16:00 33.06622 -116.6105
2 T031 2012-10-23 17:35:00 33.06625 -116.6105
2 T031 2012-10-23 17:36:00 33.06616 -116.6105
2 T031 2012-10-23 17:37:00 33.06626 -116.6105
So with sqldf maybe something like this?
sqldf("SELECT df2.ID, df2.lon, df2.lat, FROM df1
INNER JOIN df2 ON df1.ID = df2.ID
WHERE df2.DateTime BETWEEN df1.StartDateTime AND df1.EndDateTime")
In general, its not a good idea to use POSIXlt in data frames. Use POSIXct instead. Also your SQL statement is ok except the comma before FROM needs to be removed:
df1a <- transform(df1,
StartDateTime = as.POSIXct(StartDateTime),
EndDateTime = as.POSIXct(EndDateTime))
df2a <- transform(df2, dateTime = as.POSIXct(dateTime))
The SQL statement in the question has an extraneous commma before FROM.
Here is a slightly simplified statement. This one uses a left join instead to ensure that all ID's from df1a are included even if they have no matches in df2a.
sqldf("SELECT df1a.ID, PtID, dateTime, lat, lon
FROM df1a LEFT JOIN df2a
ON df1a.ID = df2a.ID AND dateTime BETWEEN StartDateTime AND EndDateTime")
You may want to look into defining your data as zoo objects. merge.zoo does something very close to what you ask. Refer to this question for more: R: merge two irregular time series

draw a vertical line based on hour minute on y-axis

x
structure(list(Date = structure(c(15358, 15359, 15362, 15363,
15364, 15365), class = "Date"), EndTime1 = structure(list(sec = c(0,
0, 0, 0, 0, 0), min = c(45L, 25L, 7L, 19L, 5L, 23L), hour = c(5L,
8L, 3L, 4L, 4L, 3L), mday = c(18L, 18L, 18L, 18L, 18L, 18L),
mon = c(0L, 0L, 0L, 0L, 0L, 0L), year = c(113L, 113L, 113L,
113L, 113L, 113L), wday = c(5L, 5L, 5L, 5L, 5L, 5L), yday = c(17L,
17L, 17L, 17L, 17L, 17L), isdst = c(0L, 0L, 0L, 0L, 0L, 0L
)), .Names = c("sec", "min", "hour", "mday", "mon", "year",
"wday", "yday", "isdst"), class = c("POSIXlt", "POSIXt")), EndTime2 = structure(list(
sec = c(0, 0, 0, 0, 0, 0), min = c(45L, 41L, 11L, 27L, 19L,
34L), hour = c(7L, 15L, 5L, 7L, 8L, 5L), mday = c(18L, 18L,
18L, 18L, 18L, 18L), mon = c(0L, 0L, 0L, 0L, 0L, 0L), year = c(113L,
113L, 113L, 113L, 113L, 113L), wday = c(5L, 5L, 5L, 5L, 5L,
5L), yday = c(17L, 17L, 17L, 17L, 17L, 17L), isdst = c(0L,
0L, 0L, 0L, 0L, 0L)), .Names = c("sec", "min", "hour", "mday",
"mon", "year", "wday", "yday", "isdst"), class = c("POSIXlt",
"POSIXt")), EndTime3 = structure(list(sec = c(0, 0, 0, 0, 0,
0), min = c(7L, 59L, 30L, 48L, 46L, 58L), hour = c(8L, 15L, 5L,
7L, 8L, 5L), mday = c(18L, 18L, 18L, 18L, 18L, 18L), mon = c(0L,
0L, 0L, 0L, 0L, 0L), year = c(113L, 113L, 113L, 113L, 113L, 113L
), wday = c(5L, 5L, 5L, 5L, 5L, 5L), yday = c(17L, 17L, 17L,
17L, 17L, 17L), isdst = c(0L, 0L, 0L, 0L, 0L, 0L)), .Names = c("sec",
"min", "hour", "mday", "mon", "year", "wday", "yday", "isdst"
), class = c("POSIXlt", "POSIXt"))), .Names = c("Date", "EndTime1",
"EndTime2", "EndTime3"), row.names = c(NA, 6L), class = "data.frame")
\n
y_limits = as.POSIXct(c(strptime("00:00", "%H:%M"), strptime("23:29", "%H:%M")))
y_breaks = seq(from=strptime("00:00", "%H:%M"),
to=strptime("23:29", "%H:%M"), by="2 hours")
y_labels = format(y_breaks, "%H:%M")
s<-as.POSIXlt("09:00", format="%H:%M")
ggplot(x, aes(Date, EndTime1, group=1, colour="Team1")) + geom_line() + scale_y_datetime(limits=y_limits, breaks=y_breaks, labels=y_labels) + geom_line(aes(Date, EndTime2, colour="Team2")) + geom_line(aes(Date, EndTime3, colour="Team3")) + geom_hline(yintercept=s, colour="red")
I like to draw a geom_line at s but not working. Getting error as:
Error : Invalid intercept type: should be a numeric vector, a function, or a name of a function
Error in if (nrow(layer_data) == 0) return() : argument is of length zero
Any ideas How I can draw a vertical line at a given s?
s<-as.POSIXct(c("09:00"), format="%H:%M")
s<-as.numeric(s)

Resources