Convert UTC Time Zone as per state column in R - r

I have a data frame with date column in UTC time zone, I want to create another column which convert my UTC time zone wrt to the states time zone.
I have 1000's of rows with different states of USA.
My proxy data frame is mentioned below
df<-structure(list(UTC_date = structure(list(sec = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), min = c(14L, 21L, 17L, 38L, 45L, 25L, 44L, 11L,09L, 27L), hour = c(3L, 0L, 16L, 16L, 17L, 8L, 17L, 1L, 2L, 4L),
mday = c(2L, 2L, 15L, 12L, 19L, 18L, 25L, 17L, 07L, 17L),
mon = c(9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L),
year = c(121L,121L, 121L, 121L, 121L, 121L, 121L, 121L, 121L, 121L),
wday = c(1L,4L, 1L, 1L, 1L, 4L, 1L, 3L, 3L, 3L),
yday = c(297L, 300L,297L, 297L, 297L, 300L, 297L, 299L, 299L, 299L), isdst = c(1L,1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
zone = c("CDT", "CDT","CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT"),
gmtoff = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_)), class = c("POSIXlt", "POSIXt")), StoreState = c("TX","MS", "AZ", "SC", "WI", "MO", "TX", "TX", "MO", "TX")),
row.names = c(NA,10L), class = "data.frame")
str(df)
head(df)
What I am looking for is
below o/p as an example for the 1st row
Input Example
UTC_Date State
2021-10-02 03:14:00 TX
Output Example
UTC_Date State Local Time as per State
2021-10-02 03:14:00 TX 2021-10-02 21:14:00

You can use the force_tzs function to convert to the corresponding time zones(tz).
library(lubridate)
df %>%
mutate(tz = case_when(
StoreState=="AZ"~ "US/Mountain",
StoreState=="SC"~ "US/Eastern",
TRUE ~ "US/Central"
)) %>%
mutate('Local Time as per State'= force_tzs(UTC_date, tzones = tz)) %>%
select(UTC_date, State= StoreState, 'Local Time as per State')

Related

How to find the difference between two timestamps in R?

I have two fields in a dataframe that are of the class "times". Call it Time1 and Time2. I am trying to find the time difference between the two.
CombinedFrame2$Duration <- difftime(CombinedFrame2$Time1, CombinedFrame2$Time2)
Error in as.POSIXct.numeric(CombinedFrame2$Time1) :
'origin' must be supplied
How do I get the classes to cooperate to do the calculation?
Example:
Time1 Time2 Duration
5:30:00 6:24:00 0:54:00
$ Time1 : POSIXlt, format: "2019-07-10 16:07:00" "2019-07-10 22:05:00" "2019-07-10 22:20:00" "2019-07-10 22:43:00" ...
$ Time2 : POSIXlt, format: "2019-07-10 22:05:00" "2019-07-10 22:20:00" "2019-07-10 22:43:00" "2019-07-10 23:15:00" ...
> dput(head(CombinedFrame2[,c("Time1", "Time2")]))
structure(list(Time1 = structure(list(sec = c(0, 0, 0, 0,
0, 0), min = c(7L, 5L, 20L, 43L, 15L, 35L), hour = c(16L, 22L,
22L, 22L, 23L, 23L), mday = c(11L, 11L, 11L, 11L, 11L, 11L),
mon = c(6L, 6L, 6L, 6L, 6L, 6L), year = c(119L, 119L, 119L,
119L, 119L, 119L), wday = c(4L, 4L, 4L, 4L, 4L, 4L), yday = c(191L,
191L, 191L, 191L, 191L, 191L), isdst = c(1L, 1L, 1L, 1L,
1L, 1L), zone = c("EDT", "EDT", "EDT", "EDT", "EDT", "EDT"
), gmtoff = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_)), class = c("POSIXlt", "POSIXt"
)), Time2 = structure(list(sec = c(0, 0, 0, 0, 0, 0), min = c(5L,
20L, 43L, 15L, 35L, 55L), hour = c(22L, 22L, 22L, 23L, 23L, 23L
), mday = c(11L, 11L, 11L, 11L, 11L, 11L), mon = c(6L, 6L, 6L,
6L, 6L, 6L), year = c(119L, 119L, 119L, 119L, 119L, 119L), wday = c(4L,
4L, 4L, 4L, 4L, 4L), yday = c(191L, 191L, 191L, 191L, 191L, 191L
), isdst = c(1L, 1L, 1L, 1L, 1L, 1L), zone = c("EDT", "EDT",
"EDT", "EDT", "EDT", "EDT"), gmtoff = c(NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_)), class = c("POSIXlt",
"POSIXt"))), row.names = c("1:1", "1:2", "1:3", "1:4", "1:5",
"1:6"), class = "data.frame")
You need to make sure that your time is formatted correctly. See the code below.
You can use strptime() to format your time into hours, minutes, and seconds.
time1 <- "5:30:00"
time2 <- "6:24:00"
time1a <- strptime(time1,format="%H:%M:%S")
time2a <- strptime(time2,format="%H:%M:%S")
duration <- difftime(time2a,time1a)

Aggregate function not working properly in R

I am trying to use the aggregate function to get 100 Hz data into 1 minute averages. However, when I use this function the 1-min averages are incorrect. A sample of the data is below. I am using the following code to calculate the 1-min values. The code does not break but the calculations are incorrect.
aggregate(list(X = df$`Gyroscope X`,
Y = df$`Gyroscope Y`,
Z = df$`Gyroscope Z`),
list(minofday = cut(df$Timestamp, "1 min")),mean)
Timestamp Gyroscope X Gyroscope Y Gyroscope Z
2018-07-10T10:25:00.0000000 41.381838 -21.667482 -118.896492
2018-07-10T10:25:00.0100000 48.046268 -12.399903 -110.917976
2018-07-10T10:25:00.0200000 49.102786 -7.36084 -106.485602
2018-07-10T10:25:00.0300000 44.338382 -9.215699 -102.296759
2018-07-10T10:25:00.0400000 34.724123 -11.308594 -96.108404
2018-07-10T10:25:00.0500000 19.622804 -15.225221 -88.122564
2018-07-10T10:25:00.0600000 13.240968 -26.539308 -85.274663
2018-07-10T10:25:00.0700000 13.397218 -31.933596 -80.127568
2018-07-10T10:25:00.0800000 16.333009 -29.663088 -73.027348
2018-07-10T10:25:00.0900000 17.384645 -29.745485 -67.694096
2018-07-10T10:25:00.1000000 16.546632 -30.08423 -67.565922
Assuming OP's data varies by the min (note the modified data), here is how to do it with base R and dplyr:
df$Timestamp <- as.POSIXct(df$Timestamp, format = "%Y-%m-%dT%H:%M:%S")
aggregate(list(X = df$Gyroscope_X,
Y = df$Gyroscope_Y,
Z = df$Gyroscope_Z),
list(minofday = cut(df$Timestamp, "1 min")), mean)
or a more concise way:
aggregate(. ~ minofday, mean, data = cbind(setNames(df[,-1], c("X", "Y", "Z")),
minofday = cut(df$Timestamp, "1 min")))
Result:
minofday X Y Z
1 2018-07-10 10:24:00 48.57453 -9.880371 -108.70179
2 2018-07-10 10:25:00 27.78422 -19.314983 -95.13774
3 2018-07-10 10:26:00 16.85883 -29.704286 -70.36072
4 2018-07-10 10:27:00 16.54663 -30.084230 -67.56592
With lubridate and summarize_all from dplyr:
library(dplyr)
library(lubridate)
df %>%
mutate(Timestamp = ymd_hms(Timestamp)) %>%
group_by(minofday = cut(Timestamp, "1 min")) %>%
summarize_all(mean) %>%
select(-Timestamp)
Result:
# A tibble: 4 x 4
minofday Gyroscope_X Gyroscope_Y Gyroscope_Z
<fct> <dbl> <dbl> <dbl>
1 2018-07-10 10:24:00 48.6 -9.88 -109.
2 2018-07-10 10:25:00 27.8 -19.3 -95.1
3 2018-07-10 10:26:00 16.9 -29.7 -70.4
4 2018-07-10 10:27:00 16.5 -30.1 -67.6
Data:
df <- read.table(text = " Timestamp Gyroscope_X Gyroscope_Y Gyroscope_Z
2018-07-10T10:25:00.0000000 41.381838 -21.667482 -118.896492
2018-07-10T10:24:00.0100000 48.046268 -12.399903 -110.917976
2018-07-10T10:24:00.0200000 49.102786 -7.36084 -106.485602
2018-07-10T10:25:00.0300000 44.338382 -9.215699 -102.296759
2018-07-10T10:25:00.0400000 34.724123 -11.308594 -96.108404
2018-07-10T10:25:00.0500000 19.622804 -15.225221 -88.122564
2018-07-10T10:25:00.0600000 13.240968 -26.539308 -85.274663
2018-07-10T10:25:00.0700000 13.397218 -31.933596 -80.127568
2018-07-10T10:26:00.0800000 16.333009 -29.663088 -73.027348
2018-07-10T10:26:00.0900000 17.384645 -29.745485 -67.694096
2018-07-10T10:27:00.1000000 16.546632 -30.08423 -67.565922", header = TRUE)
Since you are dealing with timestamps the xts package has a lot of functions that can help you. For rolling up timestamps period.apply can help you out. The endpoints part can roll up the data from microseconds all the way up to years.
# don't load the timestamp column that one goes to the order.by part
df1_xts <- xts(df1[, -1], order.by = df1$Timestamp)
# roll up to seconds.
period.apply(df1_xts, endpoints(df1_xts, on = "mins"), colMeans)
Gyroscope_X Gyroscope_Y Gyroscope_Z
2018-07-10 10:25:00 28.55624 -20.46759 -90.59249
If you timestamp column is not yet a date time object you can use this:
df1$Timestamp <- strptime(df1$Timestamp, format = "%Y-%m-%dT%H:%M:%OS")
data:
df1 <- structure(list(Timestamp = structure(list(sec = c(0, 0.01, 0.02,
0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1), min = c(25L,
25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L), hour = c(10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L), mday = c(10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L), mon = c(6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), year = c(118L, 118L,
118L, 118L, 118L, 118L, 118L, 118L, 118L, 118L, 118L), wday = c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), yday = c(190L, 190L,
190L, 190L, 190L, 190L, 190L, 190L, 190L, 190L, 190L), isdst = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), zone = c("CEST", "CEST",
"CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST",
"CEST"), gmtoff = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_)), class = c("POSIXlt", "POSIXt")),
Gyroscope_X = c(41.381838, 48.046268, 49.102786, 44.338382,
34.724123, 19.622804, 13.240968, 13.397218, 16.333009, 17.384645,
16.546632), Gyroscope_Y = c(-21.667482, -12.399903, -7.36084,
-9.215699, -11.308594, -15.225221, -26.539308, -31.933596,
-29.663088, -29.745485, -30.08423), Gyroscope_Z = c(-118.896492,
-110.917976, -106.485602, -102.296759, -96.108404, -88.122564,
-85.274663, -80.127568, -73.027348, -67.694096, -67.565922
)), row.names = c(NA, -11L), class = "data.frame")

Record non-consecutive binary events (simple if clause issue)

I have 9x3 dataframe DATA, where one column has timestamps (DateTime), one prices (Close), and one binary values (FOMCBinary).
I want to add column SignalBinary recording a 1 IF Close < an X value (1126 in this example) AND FOMCBinary > 0 in any of the two rows below, but only if SignalBinary = 0 in the row below (i.e. do not want consecutive 1s).
In the example here I need to record a 1 under SignalBinary only at 14:15:00. My coding attempt is instead recording a 1 at 14:15:00 and at 14:30:00. Should be fairly simple, don't understand why my code is not producing the desired result. How could I get this fixed?
DATA <- structure(list(DateTime = structure(list(sec = c(0, 0, 0, 0,0, 0, 0, 0, 0), min = c(30L, 15L, 0L, 45L, 30L, 15L, 0L, 45L,30L), hour = c(15L, 15L, 15L, 14L, 14L, 14L, 14L, 13L, 13L),mday = c(27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L), mon = c(0L,0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), year = c(116L, 116L, 116L,116L, 116L, 116L, 116L, 116L, 116L), wday = c(3L, 3L, 3L,3L, 3L, 3L, 3L, 3L, 3L), yday = c(26L, 26L, 26L, 26L, 26L,26L, 26L, 26L, 26L), isdst = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,0L, 0L), zone = c("EST", "EST", "EST", "EST", "EST", "EST","EST", "EST", "EST"), gmtoff = c(NA_integer_, NA_integer_,NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,NA_integer_, NA_integer_)), .Names = c("sec", "min", "hour","mday", "mon", "year", "wday", "yday", "isdst", "zone", "gmtoff"), class = c("POSIXlt", "POSIXt")), Close = c(1127.2, 1127.5,1126.9, 1128.3, 1125.4, 1122.7, 1122.8, 1117.3, 1116), FOMCBinary = c(0,0, 0, 0, 0, 0, 1, 0, 0)), .Names = c("DateTime", "Close", "FOMCBinary"), row.names = 2131:2139, class = "data.frame")
Xvalue = 1126
#For comparing lagged or forward rows
rowShift <- function(x, shiftLen = 1L) {
r <- (1L + shiftLen):(length(x) + shiftLen)
r[r<1] <- NA
return(x[r]) }
DATA$SignalBinary <- ifelse(
DATA$Close < Xvalue & (
rowShift(DATA$FOMCBinary, +1) > 0 |
(rowShift(DATA$FOMCBinary, +2) > 0 & rowShift(DATA$FOMCBinary, +1) == 0))
, 1, 0)
##Note rowShift(DATA$FOMCBinary, +1) is equivalent to DATA$FOMCBinary[seq(nrow(DATA))+1]##
Output for DATA after calculations:
DateTime Close FOMCBinary SignalBinary
2131 2016-01-27 15:30:00 1127.2 0 0
2132 2016-01-27 15:15:00 1127.5 0 0
2133 2016-01-27 15:00:00 1126.9 0 0
2134 2016-01-27 14:45:00 1128.3 0 0
2135 2016-01-27 14:30:00 1125.4 0 1 => UNWANTED 1
2136 2016-01-27 14:15:00 1122.7 0 1
2137 2016-01-27 14:00:00 1122.8 1 0
2138 2016-01-27 13:45:00 1117.3 0 NA
2139 2016-01-27 13:30:00 1116.0 0 NA
Thank you very much.
had a closer look. you wanted to remove the first consecutive 1 in SignalBinary but you didnt sweep through SignalBinary. Here is a rough code
DATA <- structure(list(DateTime = structure(list(sec = c(0, 0, 0, 0,0, 0, 0, 0, 0), min = c(30L, 15L, 0L, 45L, 30L, 15L, 0L, 45L,30L), hour = c(15L, 15L, 15L, 14L, 14L, 14L, 14L, 13L, 13L),mday = c(27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L), mon = c(0L,0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), year = c(116L, 116L, 116L,116L, 116L, 116L, 116L, 116L, 116L), wday = c(3L, 3L, 3L,3L, 3L, 3L, 3L, 3L, 3L), yday = c(26L, 26L, 26L, 26L, 26L,26L, 26L, 26L, 26L), isdst = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,0L, 0L), zone = c("EST", "EST", "EST", "EST", "EST", "EST","EST", "EST", "EST"), gmtoff = c(NA_integer_, NA_integer_,NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,NA_integer_, NA_integer_)), .Names = c("sec", "min", "hour","mday", "mon", "year", "wday", "yday", "isdst", "zone", "gmtoff"), class = c("POSIXlt", "POSIXt")), Close = c(1127.2, 1127.5,1126.9, 1128.3, 1125.4, 1122.7, 1122.8, 1117.3, 1116), FOMCBinary = c(0,0, 0, 0, 0, 0, 1, 0, 0)), .Names = c("DateTime", "Close", "FOMCBinary"), row.names = 2131:2139, class = "data.frame")
Xvalue = 1126
#For comparing lagged or forward rows
rowShift <- function(x, shiftLen = 1) {
r <- (1 + shiftLen):(length(x) + shiftLen)
r[r<1] <- NA
return(x[r]) }
DATA$SignalBinary <- as.numeric(DATA$Close < Xvalue & rowShift(DATA$FOMCBinary, +1) > 0)
DATA$SignalBinary <- c(sapply(1:(nrow(DATA)-1), function(n) {
if (is.na(DATA$SignalBinary[n+1])) return(NA)
if (DATA$SignalBinary[n+1]) return(0)
DATA$SignalBinary[n]
}), tail(DATA$SignalBinary,1))
DATA
anotherDATA <- structure(list(DateTime = structure(list(sec = c(0, 0, 0, 0, 0, 0, 0, 0, 0), min = c(30L, 15L, 0L, 45L, 30L, 15L, 0L, 45L,30L),
hour = c(15L, 15L, 15L, 14L, 14L, 14L, 14L, 13L, 13L),mday = c(27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L),
mon = c(0L,0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), year = c(116L, 116L, 116L,116L, 116L, 116L, 116L, 116L, 116L),
wday = c(3L, 3L, 3L,3L, 3L, 3L, 3L, 3L, 3L), yday = c(26L, 26L, 26L, 26L, 26L,26L, 26L, 26L, 26L),
isdst = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,0L, 0L), zone = c("EST", "EST", "EST", "EST", "EST", "EST","EST", "EST", "EST"),
gmtoff = c(NA_integer_, NA_integer_,NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,NA_integer_, NA_integer_)),
.Names = c("sec", "min", "hour","mday", "mon", "year", "wday", "yday", "isdst", "zone", "gmtoff"), class = c("POSIXlt", "POSIXt")),
Close = c(1127.2, 1127.5,1126.9, 1128.3, 1125.4, 1122.7, 1122.8, 1117.3, 1116),
FOMCBinary = c(0,0, 0, 0, 0, 1, 1, 0, 0)), .Names = c("DateTime", "Close", "FOMCBinary"), row.names = 2131:2139, class = "data.frame")
Xvalue = 1126
#For comparing lagged or forward rows
rowShift <- function(x, shiftLen = 1) {
r <- (1 + shiftLen):(length(x) + shiftLen)
r[r<1] <- NA
return(x[r]) }
anotherDATA$SignalBinary <- as.numeric(anotherDATA$Close < Xvalue & rowShift(anotherDATA$FOMCBinary, +1) > 0)
anotherDATA$SignalBinary <- c(sapply(1:(nrow(anotherDATA)-1), function(n) {
if (is.na(anotherDATA$SignalBinary[n+1])) return(NA)
if (anotherDATA$SignalBinary[n+1]) return(0)
anotherDATA$SignalBinary[n]
}), tail(anotherDATA$SignalBinary,1))
anotherDATA

ggplot + boxplot with time series value by group

A variation of this question -- I can't quite get the dimensions right in the data structure to make a boxplot with the right values.
what I'm looking to do: hours would be on the x-axis, region would be on the y-axis, and for every region there will be a boxplot showing the distribution of income by hour.
The closest I can get is the following, but it's not right. How do I create the boxplot with two factors (one a time series) as axes, populated by the value distribution?
data:
regions <- structure(list(location = structure(c(2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("east",
"north", "west"), class = "factor"), hour = structure(list(sec = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), min = c(0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), hour = c(0L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 0L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L), mday = c(13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L,
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L,
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L), mon = c(7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L
), year = c(115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L,
115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L,
115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L,
115L, 115L), wday = c(4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L), yday = c(224L, 224L, 224L, 224L,
224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L,
224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L,
224L, 224L, 224L, 224L, 224L, 224L, 224L), isdst = c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
zone = c("CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT",
"CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT",
"CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT",
"CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT"),
gmtoff = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_)), .Names = c("sec",
"min", "hour", "mday", "mon", "year", "wday", "yday", "isdst",
"zone", "gmtoff"), class = c("POSIXlt", "POSIXt")), hour_income = c(67L,
98L, 89L, 75L, 75L, 89L, 70L, 97L, 52L, 94L, 80L, 84L, 52L, 82L,
81L, 93L, 85L, 94L, 64L, 90L, 54L, 60L, 97L, 100L, 57L, 63L,
90L, 58L, 86L, 68L, 52L, 78L, 61L)), .Names = c("location", "hour",
"hour_income"), row.names = c(NA, -33L), class = "data.frame")
And the boxplot
ggplot(regions) +
geom_boxplot(aes(x=hour, y=hour_income, group=location))
First we convert datetimes to character format, then create the boxplots.
regions$hour <- strftime(regions$hour, format="%H:%M:%S")
ggplot(data = regions, aes(x = hour, y = hour_income)) + geom_boxplot()
But because you only have an observation for each region and hour when you try to create a boxplot to visualise the regions you can only obtain lines instead of boxplots, which are not very meaningful:
ggplot(data = regions, aes(x = hour, y = hour_income)) + geom_boxplot(aes(fill= location))

time difference between rows producing odd results

Plenty of material on stackoverflow regarding calculating time differences between rows/entries/observations. However, I'm stumped why I'm getting NA's in unusual positions.
I have 3 columns, DATETIME which is posixlt, GRP800 which is the group (factor), and TIME800 which is supposed to represent the time elapsed between each observation for each group. My particular code was derived from Calculate differences between rows faster than a for loop?.
df$TIME800<-unlist(by(df$DATETIME,df$GRP800,function(x)c(NA,diff(x))))
It does appear to function properly for the first group but then I am getting NA's in the middle of the 2nd group. I've tried several approaches using diff and it's producing the identical output. I'm quite puzzled. Any advice would be greatly appreciated.
DATETIME GRP800 TIME800
1 2013-07-16 16:01:30 1 NA
2 2013-07-16 20:00:54 1 3.990000
3 2013-07-17 00:01:30 1 4.010000
4 2013-07-17 04:01:00 1 3.991667
5 2013-07-17 08:00:50 1 3.997222
6 2013-07-17 12:01:46 1 4.015556
7 2013-07-17 16:00:50 1 3.984444
8 2013-07-17 20:01:00 1 4.002778
9 2013-07-18 00:01:18 1 4.005000
10 2013-07-18 04:01:02 1 3.995556
11 2013-07-18 08:00:50 1 3.996667
12 2013-07-18 12:01:18 2 NA
13 2013-07-18 16:01:02 2 3.970833
14 2013-07-18 20:00:59 2 4.007500
15 2013-07-19 00:01:31 2 3.997222
16 2013-07-19 04:01:18 2 4.011111
17 2013-07-19 08:01:02 2 NA
18 2013-07-19 12:01:57 2 2.007500
19 2013-07-19 20:01:00 2 NA
20 2013-07-20 00:01:00 2 2.003333
> dput(df[1:20,])
structure(list(DATETIME = structure(list(sec = c(30, 54, 30,
0, 50, 46, 50, 0, 18, 2, 50, 18, 2, 59, 31, 18, 2, 57, 0, 0),
min = c(1L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L,
0L, 1L, 1L, 1L, 1L, 1L, 1L), hour = c(16L, 20L, 0L, 4L, 8L,
12L, 16L, 20L, 0L, 4L, 8L, 12L, 16L, 20L, 0L, 4L, 8L, 12L,
20L, 0L), mday = c(16L, 16L, 17L, 17L, 17L, 17L, 17L, 17L,
18L, 18L, 18L, 18L, 18L, 18L, 19L, 19L, 19L, 19L, 19L, 20L
), mon = c(6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), year = c(113L, 113L, 113L,
113L, 113L, 113L, 113L, 113L, 113L, 113L, 113L, 113L, 113L,
113L, 113L, 113L, 113L, 113L, 113L, 113L), wday = c(2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
5L, 5L, 6L), yday = c(196L, 196L, 197L, 197L, 197L, 197L,
197L, 197L, 198L, 198L, 198L, 198L, 198L, 198L, 199L, 199L,
199L, 199L, 199L, 200L), isdst = c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
zone = c("MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT",
"MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT",
"MDT", "MDT", "MDT", "MDT"), gmtoff = c(NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_)), .Names = c("sec",
"min", "hour", "mday", "mon", "year", "wday", "yday", "isdst",
"zone", "gmtoff"), class = c("POSIXlt", "POSIXt")), GRP800 = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), TIME800 = c(NA, 3.99, 4.01, 3.991666667, 3.997222222,
4.015555556, 3.984444444, 4.002777778, 4.005, 3.995555556, 3.996666667,
NA, 3.970833333, 4.0075, 3.997222222, 4.011111111, NA, 2.0075,
NA, 2.003333333)), .Names = c("DATETIME", "GRP800", "TIME800"
), row.names = c(NA, 20L), class = "data.frame")

Resources