Example Data:
structure(c(-0.0752423128397812, -0.00667756345500559, 0.127210629285125,
-0.139921096245914, 0.0652869973391721, -0.0426597532279215,
0.0900627738506856, 0.0181364458126518, 0.0655042896419282, 0.00433434751877004,
-0.0265985905707364, 0.0479551496911459), class = c("xts", "zoo"
), .indexCLASS = "Date", tclass = "Date", .indexTZ = "UTC", tzone = "UTC", index = structure(c(1451606400,
1454284800, 1456790400, 1459468800, 1462060800, 1464739200, 1467331200,
1470009600, 1472688000, 1475280000, 1477958400, 1480550400), tzone = "UTC", tclass = "Date"), .Dim = c(12L,
1L), .Dimnames = list(NULL, "AAPL.Returns"))
How do I convert the index of an object, in this case, the Date column, into a new column labelled Date?
Edit:
> head(Stock1_returns)
AAPL.Returns
2007-01-01 -0.006489744
2007-02-01 -0.013064271
2007-03-01 0.098097127
2007-04-01 0.074157809
2007-05-01 0.214328635
2007-06-01 0.007013805
For turning a xts object into a dataframe with the date column you can use the following code. You use index to get the date index of the xts object and coredata for all the data contained in the xts object.
# my_xts is based on data from OP
df1 <- data.frame(Date = index(my_xts), coredata(my_xts) )
# show resulting structure
str(df1)
'data.frame': 12 obs. of 2 variables:
$ Date : Date, format: "2016-01-01" "2016-02-01" "2016-03-01" "2016-04-01" ...
$ AAPL.Returns: num -0.07524 -0.00668 0.12721 -0.13992 0.06529 ...
# outcome
df1
Date AAPL.Returns
1 2016-01-01 -0.075242313
2 2016-02-01 -0.006677563
3 2016-03-01 0.127210629
4 2016-04-01 -0.139921096
5 2016-05-01 0.065286997
6 2016-06-01 -0.042659753
7 2016-07-01 0.090062774
8 2016-08-01 0.018136446
9 2016-09-01 0.065504290
10 2016-10-01 0.004334348
11 2016-11-01 -0.026598591
12 2016-12-01 0.047955150
Related
I have a data set and there are some date and hour attributes. Here is the sample, then I will explain what I want to do;
date1
hour1
date2
hour2
date3
hour3
2014-03-16 00:00:00
16:20:00
2014-03-16 00:00:00
20:20:03
2014-03-16 00:00:00
22:12:34
2014-04-22 00:00:00
10:20:00
2014-04-22 00:00:00
15:20:03
2014-04-22 00:00:00
20:12:34
2015-03-12 00:00:00
16:20:00
2015-03-12 00:00:00
20:20:03
2015-03-12 00:00:00
22:12:34
We know event1 happens before event2 (event1 -> event2 -> event3)
But as you see, in the date attributes, time section is not correct yet we have hour attributes for each. What I want to do; I want to correct them by using hour attributes then find the difference between these two dates and create new attributes that gives the time difference as hours.
Sample for above table;
event2_time
4
5
4
I tried to merge hour to date and create a new attribute like this but it doesn't work.(my goal is actually correct the date value and get rid of the hour attribute)
trainTable <- trainTable %>%
mutate("newParam" = as.POSIXct(paste(alert_date, alert_hour), format="%Y-%m-%d %H:%M:%S")
I could use some help, thanks in advance.
Data
structure(list(alert_date = structure(c(1394928000, 1395014400,
1395014400, 1395187200, 1395273600, 1395014400), tzone = "UTC", class = c("POSIXct",
"POSIXt")), alert_hour = c("16:15:00", "20:53:00", "12:55:00",
"14:22:00", "12:07:00", "17:48:00"), firstInterv_date = structure(c(1394928000,
1395014400, 1395014400, 1395187200, 1395273600, 1395014400), tzone = "UTC", class = c("POSIXct",
"POSIXt")), firstInterv_hour = c("16:35:00", "21:05:00", "13:10:00",
"14:42:00", "12:07:00", "18:08:00"), extinction_date = structure(c(1394928000,
1395014400, 1395014400, 1395187200, 1395273600, 1395014400), tzone = "UTC", class = c("POSIXct",
"POSIXt")), extinction_hour = c("17:47:00", "22:46:00", "15:30:00",
"15:25:00", "13:14:00", "21:10:00")), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
Try this solution using mapply. It uses strsplit to split the date from the hours.
dat <- as.data.frame( dat ) # tibbles are cool but sometimes very restrictive, so changing to data.frame here
dat_new <- data.frame( setNames( mapply( function(x,y){
tmp <- sapply( strsplit( as.character(dat[,x]), " "), function(z) z[1] );
list( as.POSIXct( paste(tmp,dat[,y] ) ) ) },
grep("date", colnames(dat)), grep("hour", colnames(dat)) ),
c("a","b","c") ) )
dat_new$b - dat_new$a
Time differences in secs
[1] 1200 720 900 1200 0 1200
# if you need tibbles convert back if you need
as_tibble( dat_new )
# A tibble: 6 x 3
a b c
<dttm> <dttm> <dttm>
1 2014-03-16 16:15:00 2014-03-16 16:35:00 2014-03-16 17:47:00
2 2014-03-17 20:53:00 2014-03-17 21:05:00 2014-03-17 22:46:00
3 2014-03-17 12:55:00 2014-03-17 13:10:00 2014-03-17 15:30:00
4 2014-03-19 14:22:00 2014-03-19 14:42:00 2014-03-19 15:25:00
5 2014-03-20 12:07:00 2014-03-20 12:07:00 2014-03-20 13:14:00
6 2014-03-17 17:48:00 2014-03-17 18:08:00 2014-03-17 21:10:00
Data
dat <- structure(list(alert_date = structure(c(1394928000, 1395014400,
1395014400, 1395187200, 1395273600, 1395014400), tzone = "UTC", class = c("POSIXct",
"POSIXt")), alert_hour = c("16:15:00", "20:53:00", "12:55:00",
"14:22:00", "12:07:00", "17:48:00"), firstInterv_date = structure(c(1394928000,
1395014400, 1395014400, 1395187200, 1395273600, 1395014400), tzone = "UTC", class = c("POSIXct",
"POSIXt")), firstInterv_hour = c("16:35:00", "21:05:00", "13:10:00",
"14:42:00", "12:07:00", "18:08:00"), extinction_date = structure(c(1394928000,
1395014400, 1395014400, 1395187200, 1395273600, 1395014400), tzone = "UTC", class = c("POSIXct",
"POSIXt")), extinction_hour = c("17:47:00", "22:46:00", "15:30:00",
"15:25:00", "13:14:00", "21:10:00")), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
Error in seq.Date(as.Date(retail$Valid_from), as.Date(retail$Valid_to), :
'from' must be of length 1
I have tried both the methods as mentioned in the question :
How should I deal with 'from' must be of length 1 error?
I basically want to repeat the quantity for each day in a given date range :
HSD_RSP Valid_from Valid_to
70 1/1/2018 15/1/2018
80 1/16/2018 1/31/2018
.
.
.
Method 1 :
byDay = ddply(retail, .(HSD_RSP), transform,
day=seq(as.Date(retail$Valid_from), as.Date(retail$Valid_to), by="day"))
Method 2 :
dt <- data.table(retail)
dt <- dt[,seq(as.Date(Valid_from),as.Date(Valid_to),by="day"),
by=list(HSD_RSP)]
HSD_RSP final_date
70 1/1/2018
70 2/1/2018
70 3/1/2018
70 4/1/2018
.
.
.
output of
dput(head(retail))
structure(list(HSD_RSP = c(61.68, 62.96, 63.14, 60.51, 60.34,
61.63), Valid_from = structure(c(1483315200, 1484524800, 1487116800,
1491004800, 1491523200, 1492300800), class = c("POSIXct", "POSIXt"
), tzone = "UTC"), Valid_to = structure(c(1484438400, 1487030400,
1490918400, 1491436800, 1492214400, 1493510400), class = c("POSIXct",
"POSIXt"), tzone = "UTC")), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
Convert to date, create a sequence of dates between Valid_from and Valid_to and unnest
library(tidyverse)
df %>%
mutate_at(vars(starts_with("Valid")), as.Date, "%m/%d/%Y") %>%
mutate(Date = map2(Valid_from, Valid_to, seq, by = "1 day")) %>%
unnest(Date) %>%
select(-Valid_from, -Valid_to)
# HSD_RSP Date
# <int> <date>
# 1 70 2018-01-01
# 2 70 2018-01-02
# 3 70 2018-01-03
# 4 70 2018-01-04
# 5 70 2018-01-05
# 6 70 2018-01-06
# 7 70 2018-01-07
# 8 70 2018-01-08
# 9 70 2018-01-09
#10 70 2018-01-10
# … with 21 more rows
data
df <- structure(list(HSD_RSP = c(70L, 80L), Valid_from = structure(1:2,
.Label = c("1/1/2018", "1/16/2018"), class = "factor"), Valid_to =
structure(1:2, .Label = c("1/15/2018", "1/31/2018"), class = "factor")),
class = "data.frame", row.names = c(NA, -2L))
Using Ronak Shah's data structure, using data.table:
library(data.table)
dt <- as.data.table(df1)
dt[, .(final_date = seq(as.Date(Valid_from, "%m/%d/%Y"), as.Date(Valid_to, "%m/%d/%Y"), by = "day")),
by = HSD_RSP]
HSD_RSP final_date
1: 70 2018-01-01
2: 70 2018-01-02
3: 70 2018-01-03
4: 70 2018-01-04
....
data:
df <- structure(list(HSD_RSP = c(70L, 80L), Valid_from = structure(1:2,
.Label = c("1/1/2018", "1/16/2018"), class = "factor"), Valid_to =
structure(1:2, .Label = c("1/15/2018", "1/31/2018"), class = "factor")),
class = "data.frame", row.names = c(NA, -2L))
I have a large dataset with multiple groups within the dataset of IDs with Start & Stop datetimes. What I'm trying to do is within each group identify where a subgroup occurred. A subgroup within a group would be when two ID's overlap with their START & END datetime columns. Below is script to create a sample dataset in R for one group. What I want to do is within each group create a column called, "Grp" that groups those subgroups with overlapping START & END datetimes.
What I have...
structure(list(ID = c(1,2,3,4), START = structure(c(1490904000, 1490918400,
1508363100, 1508379300), tzone = "UTC", class = c("POSIXct",
"POSIXt")), END = structure(c(1492050600, 1492247700,
1509062400, 1509031800), tzone = "UTC", class = c("POSIXct",
"POSIXt"))), class = "data.frame", row.names = c(NA, -4L), .Names = c("ID","START",
"END"))
What I want is...
structure(list(ID = c(1,2,3,4), START = structure(c(1490904000, 1508379300,
1508363100, 1490918400), tzone = "UTC", class = c("POSIXct",
"POSIXt")), END = structure(c(1492050600, 1509031800,
1509062400, 1492247700), tzone = "UTC", class = c("POSIXct",
"POSIXt")), Grp = c(1,2,2,1)), class = "data.frame", row.names = c(NA, -4L), .Names = c("ID","START",
"END","Grp"))
I've tried using lubridate's interval, and finding an overlap that way, but no luck. Any help would be greatly appreciated.
Atfter sorting by START, the condition for a new group is that the END of the previous row is less than the START of the next group:
head(df1$END, -1) < tail(df1$START,-1)
df1 <- structure(list(ID = c(1,2,3,4), START = structure(c(1490904000, 1490918400,
1508363100, 1508379300), tzone = "UTC", class = c("POSIXct",
"POSIXt")), END = structure(c(1492050600, 1492247700,
1509062400, 1509031800), tzone = "UTC", class = c("POSIXct",
"POSIXt"))), class = "data.frame", row.names = c(NA, -4L), .Names = c("ID","START",
"END"))
df1
ID START END
1 1 2017-03-30 20:00:00 2017-04-13 02:30:00
2 2 2017-03-31 00:00:00 2017-04-15 09:15:00
3 3 2017-10-18 21:45:00 2017-10-27 00:00:00
4 4 2017-10-19 02:15:00 2017-10-26 15:30:00
df1a <- df1[ order(df1$START), ]
df1a$grp <- cumsum( c( 1, head(df1$END, -1) < tail(df1$START,-1) ))
df1a
#---------------
ID START END grp
1 1 2017-03-30 20:00:00 2017-04-13 02:30:00 1
2 2 2017-03-31 00:00:00 2017-04-15 09:15:00 1
3 3 2017-10-18 21:45:00 2017-10-27 00:00:00 2
4 4 2017-10-19 02:15:00 2017-10-26 15:30:00 2
Here's a function that answers the first part of my response to the comment below:
grp_overlaps <- function(endings, beginnings){
cumsum(c( 1, head(endings, -1) < tail(beginnings, -1) )) }
I have a dataframe consisting of:
two columns with start and end timestamps (POSIXct class) of various
projects
another timestamp (POSIXct class) column showing events which
occured within the start and end timeframe
a project id column
Projects have multiple events naturally.
Projid Event BEGIN_DT END_DT
1 04/12/2013 09:00:00 04/12/2013 08:12:00 04/14/2013 20:14:00
1 04/13/2013 15:16:24 04/12/2013 08:12:00 04/14/2013 20:14:00
2 06/06/2012 18:00:00 06/06/2012 13:54:32 08/06/2012 23:59:43
2 06/07/2012 22:54:32 06/06/2012 13:54:32 08/06/2012 23:59:43
I would like to add a field showing for each event the 60 min time bucket it belongs to (as in first hour or second hour or n-th hour of the project etc...). How could this be done?
How about the following using a floor on the difftime in hours:
# Your sample data
df <- structure(list(
Projid = c(1L, 1L, 2L, 2L),
Event = structure(c(1365721200, 1365830184, 1338969600, 1339073672), class = c("POSIXct", "POSIXt"), tzone = ""),
BEGIN_DT = structure(c(1365718320, 1365718320, 1338954872, 1338954872), class = c("POSIXct", "POSIXt"), tzone = ""),
END_DT = structure(c(1365934440, 1365934440, 1344261583, 1344261583), class = c("POSIXct", "POSIXt"), tzone = "")),
.Names = c("Projid", "Event", "BEGIN_DT", "END_DT"), row.names = c(NA, -4L), class = "data.frame");
# Add hour bin
df$hourBin <- floor(difftime(df$Event, df$BEGIN_DT, unit = "hours")) + 1;
df;
#Projid Event BEGIN_DT END_DT hourBin
#1 1 2013-04-12 09:00:00 2013-04-12 08:12:00 2013-04-14 20:14:00 1 hours
#2 1 2013-04-13 15:16:24 2013-04-12 08:12:00 2013-04-14 20:14:00 32 hours
#3 2 2012-06-06 18:00:00 2012-06-06 13:54:32 2012-08-06 23:59:43 5 hours
#4 2 2012-06-07 22:54:32 2012-06-06 13:54:32 2012-08-06 23:59:43 34 hours
I have a data frame which looks like this
df = data.frame (time = c("2013-12-23 00:00:00", "2013-12-23 00:13:00", "2013-12-23 00:14:00", "2013-12-23 00:14:01",
"2013-12-24 00:00:00", "2013-12-24 00:12:00", "2013-12-24 00:15:00", "2013-12-24 00:16:00"),
value = c(1, 2, 3, 4, 5, 6, 7, 8))
I transform this data frame to an xts object and use the POSIXct format for the index
df = as.xts(as.numeric(as.character(df[,"value"])), order.by = as.POSIXct(df[,"time"]))
What I now need is to change all the indices whose time is 00:00:00 to 22:00:00.
All other time indices must stay as they are.
The resulting object looks like this
>df
[,1]
2013-12-23 00:13:00 2
2013-12-23 00:14:00 3
2013-12-23 00:14:01 4
2013-12-23 22:00:00 1
2013-12-24 00:12:00 6
2013-12-24 00:15:00 7
2013-12-24 00:16:00 8
2013-12-24 22:00:00 5
Thanks for your help! Pat
We could use sub to replace the '00:00:00' to '22:00:00' in the original dataset and then do the xts conversion
df$time <- as.POSIXct(sub('00:00:00', '22:00:00', df$time),
format='%Y-%m-%d %H:%M:%S')
library(xts)
xts(df$value, order.by=df$time)
# [,1]
#2013-12-23 00:13:00 2
#2013-12-23 00:14:00 3
#2013-12-23 00:14:01 4
#2013-12-23 22:00:00 1
#2013-12-24 00:12:00 6
#2013-12-24 00:15:00 7
#2013-12-24 00:16:00 8
#2013-12-24 22:00:00 5
Here's a function that will shift the zero-hour of an xts object by n seconds.
shiftZeroHour <- function(x, n=1) {
stopifnot(is.xts(x))
# find zero hour
plt <- as.POSIXlt(index(x), tz=indexTZ(x))
isZeroHour <- plt$hour == 0 & plt$min == 0 & plt$sec == 0
# shift zero hour index values
.index(x)[isZeroHour] <- .index(x)[isZeroHour] + n
# ensure index is ordered properly
as.xts(x)
}
Here is how to use it with your sample data:
xdf <- structure(c(1, 2, 3, 4, 5, 6, 7, 8), .Dim = c(8L, 1L),
index = structure(c(1387778400, 1387779180, 1387779240, 1387779241,
1387864800, 1387865520, 1387865700, 1387865760), tzone = "",
tclass = c("POSIXct", "POSIXt")), class = c("xts", "zoo"),
.indexCLASS = c("POSIXct", "POSIXt"), tclass = c("POSIXct", "POSIXt"),
.indexTZ = "", tzone = "")
shiftZeroHour(xdf, 60*60*22)