merge by date not time in R - r

I have a large time-series file that I imported from my working directory and then turn them into log returns by:
read.csv("/Volumes/3TB/ALLsince1996.csv",header=T)-> ALL
all <- xts(ALL[,2:dim(ALL)[2]], order.by= as.POSIXct(ALL[,1], format="%m/%d/%y"))
RETS <- CalculateReturns(all, method= c("log"))
RETS<- na.locf(RETS)
RETS[is.na(RETS)] <- 0
I then download the 3-Month Treaury via FRED by:
# 3-Mo Treasury
data <- new.env()
FEDs <- c( "DGS3MO") # DGS3MO : 3-Mo Treasury Constant maturity
getSymbols( FEDs
, src = "FRED"
, env = data
)
data$DGS3MO -> TB3
TB3/100/365 -> TB3
na.locf(TB3["1996-01-01::"])-> TB3
I then try to combine the log returns series with the 3-month treasury using cbind() and get the following:
both <- cbind(RETS[,1], TB3)
both:
row.names ZX.Adjusted DGS3MO
1 1995-12-31 16:00:00 NA NA
2 1996-01-01 00:00:00 0 NA
3 1996-01-01 16:00:00 NA 0.0001424658
4 1996-01-02 00:00:00 0 NA
5 1996-01-02 16:00:00 NA 0.0001424658
6 1996-01-03 00:00:00 0 NA
7 1996-01-03 16:00:00 NA 0.0001421918
8 1996-01-04 00:00:00 0 NA
9 1996-01-04 16:00:00 NA 0.0001421918
But this returns a vector with two-times per day; such as 1996-01-01 00:00:00 and 1996-01-01 16:00:00 . What I would like is to combine the two by date not by time.
REPRODUCIBLE DATA:
#Pull Data from getSymbols()
library(quantmod)
dataset<- xts()
symbols <- c( "GLD", "IWM", "SPY", "GS")
system.time(
for(i in 1:length(symbols)) {
symbols[i]-> symbol
tryit <- try(getSymbols(symbol, from="1995-12-31", src='yahoo'))
if(inherits(tryit, "try-error")){
i <- i+1
} else {
data <- getSymbols(symbol, from="1995-12-31", src='yahoo')
dataset <- merge(dataset, Ad(get(symbols[i])))
rm(symbol)
}
}
)
Because it was a large file I saved dataset and index(dataset) in two separate files as I could not save the index with the dataset
write.csv(dataset, "dataset.csv")
write.csv(index(dataset), "index.csv")
I later opened the index.csv file in Excel & manually pasted the index to dataset.csv & saved the file.I later tried to reopen the .csv unto my workspace & calculate log returns
read.csv("dataset.csv",header=T)-> ALL
all <- xts(ALL[,2:dim(ALL)[2]], order.by= as.POSIXct(ALL[,1], format="%m/%d/%y"))
RETS <- CalculateReturns(all, method= c("log"))
RETS<- na.locf(RETS)
RETS[is.na(RETS)] <- 0
Next Download the 3-Month T-Bill, same code as above...
# 3-Mo Treasury
data <- new.env()
FEDs <- c( "DGS3MO") # DGS3MO : 3-Mo Treasury Constant maturity
getSymbols( FEDs
, src = "FRED"
, env = data
)
data$DGS3MO -> TB3
TB3/100/365 -> TB3
na.locf(TB3["1996-01-01::"])-> TB3
Now try to combine the RETS1 with TB3...
both <- cbind(RETS1, TB3)

#Rime, to reformat the index without the time information use the strptime function and later merge the series as suggested above.
index(dataset) <- strptime(index(dataset),"%Y-%m-%d")
A much easier and more elegant way to accomplish what you are trying to do is to use the makeReturnFrame function using the fantastic qmao-package (https://r-forge.r-project.org/R/?group_id=1113) with a lot of utility and helper function for this kind of stuff.
library(quantmod)
library(qmao)
symbols <- c( "GLD", "IWM", "SPY", "GS")
getSymbols(symbols, from="1995-12-31", src='yahoo')
rets <- makeReturnFrame(symbols,silent = TRUE)
FEDs <- c( "DGS3MO") # DGS3MO : 3-Mo Treasury Constant maturity
data <- new.env()
getSymbols( FEDs
, src = "FRED"
, env = data
)
data$DGS3MO -> TB3
TB3/100/365 -> TB3
na.locf(TB3["1996-01-01::"])-> TB3
series.merged <- merge(rets,TB3,join = "inner")
> tail(series.merged)
GLD IWM SPY GS DGS3MO
2014-08-07 4.050035e-03 -0.004844797 -0.005429405 -0.0037775986 8.219178e-07
2014-08-08 7.924872e-05 0.009666235 0.011502456 0.0185147075 8.219178e-07
2014-08-11 -1.824311e-03 0.009485466 0.002893760 0.0011603622 1.095890e-06
2014-08-12 2.381425e-04 -0.006905738 -0.001394160 -0.0007540822 8.219178e-07
2014-08-13 1.665411e-03 0.007787650 0.006746170 0.0002320859 1.095890e-06
2014-08-14 8.712527e-04 0.001497468 0.004710710 0.0020863525 1.095890e-06

The time component has to removed from the indexes of both datasets for proper merge across date index
#Read stock returns data
require(quantmod)
data_agg<- xts()
symbols <- c( "GLD", "IWM", "SPY", "GS")
for(i in 1:length(symbols)) {
symbols[i]->symbol #assign
tryit <- try(getSymbols(symbol, from="1995-12-31", src='yahoo'))
if(inherits(tryit, "try-error")){
i <- i+1
} else {
data <- getSymbols(symbol, from="1995-12-31", src='yahoo')
data_agg <- merge(data_agg, Ad(get(symbols[i])))
rm(symbol)
}
}
head(data_agg)
# GLD.Adjusted IWM.Adjusted SPY.Adjusted GS.Adjusted
#1996-01-02 05:30:00 NA NA 44.91 NA
#1996-01-03 05:30:00 NA NA 45.03 NA
#1996-01-04 05:30:00 NA NA 44.60 NA
#1996-01-05 05:30:00 NA NA 44.51 NA
#1996-01-08 05:30:00 NA NA 44.68 NA
#1996-01-09 05:30:00 NA NA 43.91 NA
#Read interest rate data
data <- new.env()
FEDs <- c( "DGS3MO") # DGS3MO : 3-Mo Treasury Constant maturity
getSymbols( FEDs
, src = "FRED"
, env = data
)
TB3 <- data$DGS3MO
TB3 <- TB3/100/365
TB3 <- na.locf(TB3["1996-01-01::"])
head(TB3)
# DGS3MO
#1996-01-01 NA
#1996-01-02 0.0001424658
#1996-01-03 0.0001424658
#1996-01-04 0.0001421918
#1996-01-05 0.0001421918
#1996-01-08 0.0001419178
cbind is not advisable for merging purpose as it places the inputs side-by-side by coercing to same format.
merge.xts is the method for merging across xts objects
#The index of the datasets contain time component hence the doubled rows here
head(merge.xts(data_agg,TB3))
# GLD.Adjusted IWM.Adjusted SPY.Adjusted GS.Adjusted DGS3MO
#1996-01-01 NA NA NA NA NA
#1996-01-01 NA NA 44.91 NA NA
#1996-01-02 NA NA NA NA 0.0001424658
#1996-01-02 NA NA 45.03 NA NA
#1996-01-03 NA NA NA NA 0.0001424658
#1996-01-03 NA NA 44.60 NA NA
head(as.Date(index(data_agg)))
#[1] "1996-01-02" "1996-01-03" "1996-01-04" "1996-01-05" "1996-01-08" "1996-01-09"
#Only one observation per day, since there are no duplicate dates
#We can safely strip the time component from dates
any(duplicated(as.Date(index(data_agg))))
#[1] FALSE
any(duplicated(as.Date(index(TB3))))
#[1] FALSE
#Keep only the date component across both datasets
index(data_agg)<-as.Date(index(data_agg))
index(TB3)<-as.Date(index(TB3))
#Properly merged datasets across date index
head(merge.xts(data_agg,TB3))
# GLD.Adjusted IWM.Adjusted SPY.Adjusted GS.Adjusted DGS3MO
#1995-12-31 NA NA 44.91 NA NA
#1996-01-01 NA NA 45.03 NA 0.0001424658
#1996-01-02 NA NA 44.60 NA 0.0001424658
#1996-01-03 NA NA 44.51 NA 0.0001421918
#1996-01-04 NA NA NA NA 0.0001421918
#1996-01-06 NA NA 44.68 NA NA
You can use complete.cases to keep non-missing returns only.

Related

format input to ts object in R

I am having some data with a time column expressed in week.year and a corresponding unit that was measured in that week.
Week-Year Units
01.2020 39.12727273
02.2020 33.34545455
03.2020 118.7181818
04.2020 83.71818182
05.2020 58.56985
. .
52.2020 89.54651534
I have to create a ts object which takes these Week-Year values as input.
The reason for requiring this step is- there are sometimes values missing for certain weeks so using an auto generated time scale (start=, end=, frequency=) will mess up the readings.
Is there any way of achieving it? or is there any way to accommodate such a situation?
R novice here, would really appreciate some guidance. :)
Assuming the input is the data frame DF shown reproducibly in the Note at the end, convert it to a zoo object and then use as.ts to create a ts series with frequency 52.
library(zoo)
week <- as.integer(DF[[1]])
year <- as.numeric(sub("...", "", DF[[1]]))
z <- zoo(DF[[2]], year + (week - 1) / 52)
tt <- as.ts(z)
tt
## Time Series:
## Start = c(2020, 1)
## End = c(2020, 52)
## Frequency = 52
## [1] 39.12727 33.34545 118.71818 83.71818 58.56985 NA NA
## [8] NA NA NA NA NA NA NA
## [15] NA NA NA NA NA NA NA
## [22] NA NA NA NA NA NA NA
## [29] NA NA NA NA NA NA NA
## [36] NA NA NA NA NA NA NA
## [43] NA NA NA NA NA NA NA
## [50] NA NA 89.54652
frequency(tt)
## [1] 52
class(tt)
## [1] "ts"
Note
Lines <- " Week-Year Units
01.2020 39.12727273
02.2020 33.34545455
03.2020 118.7181818
04.2020 83.71818182
05.2020 58.56985
52.2020 89.54651534"
DF <- read.table(text = Lines, header = TRUE, colClasses = c("character", NA))

R: creating xts changes dataset, losing data

when creating an xtsobject from a data.frame I seem to lose some data (approximately 3000 data lost over 33 000).
My dataset is as follow: (with the time being day-month-year, EU format)
> head(mesdonnees)
time value
1 05-03-2006 04:07 NA
2 05-03-2006 04:17 NA
3 05-03-2006 04:27 NA
4 05-03-2006 04:37 NA
5 05-03-2006 04:47 NA
6 05-03-2006 04:57 NA
Due to the format I had to extract the different parts of the date (at least I couldn't get as.POSIXct to work with this format).
Here is how I did it:
# Extract characters and define as S....
Syear <- substr(mesdonnees$time, 7,10)
Smonth <- substr(mesdonnees$time, 4,5)
Sday <- substr(mesdonnees$time, 1, 2)
#Gather all parts and use "-" as sep
datetext <- paste(Syear, Smonth, Sday, sep="-")
#define format of each part of the string
formatdate<-as.POSIXct(datetext, format="%Y-%m-%d", tz = "GMT")
I then try to create my xtswith...
xtsdata <- xts(mesdonnees$value, order.by = formatdate, tz = "GMT")
... but when doing this I get some quite weird results: the first value is in 1900
> head(xtsdata)
[,1]
1900-01-04 NA
2006-03-05 NA
2006-03-05 NA
2006-03-05 NA
2006-03-05 NA
2006-03-05 NA
and many (3000) dates are not kept:
> xtsdata[30225:30233,]
[,1]
2006-12-31 0
2006-12-31 0
2006-12-31 0
2006-12-31 0
<NA> NA
<NA> NA
<NA> NA
<NA> NA
<NA> NA
When looking at what should be the same line in both my data.frameand my xtsI can see that the lines are offset (I had the date format changed in the xts object creation):
> mesdonnees[25617,]
time value
25617 08-11-2006 23:51 0
> xtsdata[25617,]
[,1]
2006-11-25 0.27
How is it that my data are offset? I tried changing the tz but it doesn't affect it. I removed all duplicates using the dyplr package, it doesn't affect the xts results either. Thank you for your help !
After changing my xts code to the one suggested by Joshua:
xtsdata <- xts(mesdonnees$value, order.by = as.POSIXct(mesdonnees$time, tz = "GMT", format = "%d-%m-%Y %H:%M"))
... my data show properly for the "last" part, but I now have a different problem. The first 2300 data show the following results when doing (using xtsdata[1500,] (or any row < 2300) displays the same results)
> view(xtsdata):
0206-06-30 23:08:00 NA
0206-06-30 23:18:00 NA
0206-06-30 23:28:00 NA
1900-01-04 12:00:00 NA
2006-03-05 04:07:00 NA
2006-03-05 04:17:00 NA
I noticed this error before and thought it was due to the date format; maybe it is not? Also, when looking at the xtsdata I do not get the same results for the same row (the last rows are correct thought):
> mesdonnees[2360,]
time value
2360 23-03-2006 03:09 NA
> xtsdata[2360,]
[,1]
2006-03-05 09:07:00 NA
As requested:
> str(mesdonnees)
'data.frame': 32556 obs. of 2 variables:
$ time : chr "05-03-2006 04:07" "05-03-2006 04:17" "05-03-2006 04:27" "05-03-2006 04:37" ...
$ value: num NA NA NA NA NA NA NA NA NA NA ...
And if needed:
An ‘xts’ object on 0206-06-01 00:09:00/2006-12-31 23:29:00 containing:
Data: num [1:32556, 1] NA NA NA NA NA NA NA NA NA NA ...
Indexed by objects of class: [POSIXct,POSIXt] TZ: GMT
xts Attributes:
NULL
The problem is that you only include the date portion of the timestamp in datetext and formatdate, but your data have dates and times.
You also do not need to do all the string subsetting. You can achive the same result by specifying the format argument in your as.POSIXct call.
xtsdata <- xts(mesdonnees$value,
as.POSIXct(mesdonnees$times, "GMT", format = "%d-%m-%Y %H:%M")

1 period TS lag return (ROC) to mimic stock momentum

I am relatively new to R and have read as much as I have been able to on the topic but can't seem to find what I am looking for on the other questions.
I am looking to calculate the rate of change (momentum) using TTR and ROC for 12 periods, using monthly data but I would like to ignore the most recent month. In other words I am looking to find the ROC for t-2 till t-12 (January 2016 12 month momentum of a stock excluding January 2016). This is the norm in calculating momentum in portfolio construction literature.
My data is all stocks that have been listed on the South African stock exchange (JSE). The date header is the 1st column (i.e. the date is the variable in the rows) and the stocks are listed in the subsequent columns.
I know my code below is pretty straightforward however I have tried a few things and they have given errors. As I have about 250 stocks (columns) over 20 years, its not advisable to create a new lagged variable for each observation.
x <- Prices.df
x$DATE <- as.Date(x$DATE, format = "%Y/%m/%d")
y <- xts(x[,-1], order.by = x$DATE) library(TTR)
roc <- ROC(y, n = 12, type = "discrete")
Any help would be much appreciated.
Just use lag of roc. The following code works with simulated data (18 periods and 2 assets):
set.seed(123) # to reproduze the same results
x <- data.frame(matrix(rnorm(18*2,100,2),ncol=2))
x$DATE <- seq.Date(as.Date("2000/01/01"),length.out = 18,by="1 month")
x <- x[,c(3,1,2)]
library(TTR)
library(xts)
y <- xts(x[,-1], order.by = x$DATE)
roc <- ROC(y, n = 12, type = "discrete")
cbind(y,lag(roc))
X1 X2 X1.1 X2.1
1999-12-31 98.87905 101.40271 NA NA
2000-01-31 99.53965 99.05442 NA NA
2000-02-29 103.11742 97.86435 NA NA
2000-03-31 100.14102 99.56405 NA NA
2000-04-30 100.25858 97.94799 NA NA
2000-05-31 103.43013 98.54222 NA NA
2000-06-30 100.92183 98.74992 NA NA
2000-07-31 97.46988 96.62661 NA NA
2000-08-31 98.62629 101.67557 NA NA
2000-09-30 99.10868 100.30675 NA NA
2000-10-31 102.44816 97.72373 NA NA
2000-11-30 100.71963 102.50763 NA NA
2000-12-31 100.80154 100.85293 NA NA
2001-01-31 100.22137 99.40986 0.019442887 -0.005421782
2001-02-28 98.88832 101.79025 0.006848733 0.003588329
2001-03-31 103.57383 101.75627 -0.041012460 0.040115718
2001-04-30 100.99570 101.64316 0.034279755 0.022018156
2001-05-31 96.06677 101.37728 0.007352244 0.037725848

How to fetch 3-years historical price serie from Oanda with R?

I would like to process Bitcoin price in R but I'm unable to download time serie from Yahoo and Google.
From Yahoo the BTCUSD historical time serie is missing and the Google doesn't recognize the URL formated by getSymbols when symbol is "CURRENCY:EURUSD". I know R expect the ":" to be a list so I applied a workaround I found in Stakeoverflow to turn CURRENCY:EURUSD in CURRENCY.EURUSD but still Google cannot process the request.
Download from Oanda works like a charm but request cannot exceed 500 days. I try this workaround to bypass the limitation but it fails to populate correctly the prices object in which I have others symbols :
for some reason BTCUSD prices are missing for 2012 and part of 2013
also there are symbols from symbols's list that get NA with the wo.
tail(prices) (with the loop bellow)
UUP FXB FXE FXF FXY SLV GLD BTC
2014-08-31 NA NA NA NA NA NA NA 506.809
2014-09-30 22.87 159.33 124.48 102.26 88.80 16.35 116.21 375.386
2014-10-31 23.09 157.20 123.49 101.45 86.65 15.50 112.66 341.852
2014-11-30 NA NA NA NA NA NA NA 378.690
2014-12-31 23.97 153.06 119.14 98.16 81.21 15.06 113.58 312.642
2015-01-24 NA NA NA NA NA NA NA 229.813
Extract of print(prices) (with the loop bellow)
2013-06-28 22.56 150.17 128.93 103.92 98.63 18.97 119.11 NA
2013-07-31 22.09 150.12 131.74 105.99 99.93 19.14 127.96 NA
2013-08-30 22.19 152.93 130.84 105.45 99.63 22.60 134.62 NA
2013-09-30 21.63 159.70 133.85 108.44 99.47 20.90 128.18 133.794
2013-10-31 21.63 158.10 134.29 108.03 99.38 21.10 127.74 203.849
2013-11-30 NA NA NA NA NA NA NA 1084.800
2013-12-31 21.52 163.30 135.99 109.82 92.76 18.71 116.12 758.526
2014-01-31 21.83 161.95 133.29 108.00 95.58 18.45 120.09 812.097
tail(prices) (without the loop bellow)
UUP FXB FXE FXF FXY SLV GLD
2014-08-29 22.02 163.23 129.54 106.42 93.61 18.71 123.86
2014-09-30 22.87 159.33 124.48 102.26 88.80 16.35 116.21
2014-10-31 23.09 157.20 123.49 101.45 86.65 15.50 112.66
2014-11-28 23.47 153.46 122.46 101.00 82.01 14.83 112.11
2014-12-31 23.97 153.06 119.14 98.16 81.21 15.06 113.58
2015-01-23 25.21 147.23 110.33 110.95 82.57 17.51 124.23
What is wrong with this code ? Tx !
require(quantmod)
require(PerformanceAnalytics)
symbols <- c(
"UUP",
"FXB",
"FXE",
"FXF",
"FXY",
"SLV",
"GLD"
)
getSymbols(symbols, from="2004-01-01")
prices <- list()
for(i in 1:length(symbols)) {
prices[[i]] <- Cl(get(symbols[i]))
}
BTC <- list()
for(i in 1:2) {
BTC[[1]] <- getFX("BTC/USD",
from = Sys.Date() -499 * (i + 1),
to = Sys.Date() - 499 * i,
env = parent.frame(),
auto.assign = FALSE)
}
BTC[[1]] <- getFX("BTC/USD",
from = Sys.Date() -499,
to = Sys.Date(),
env = parent.frame(),
auto.assign = FALSE)
prices[[length(symbols)+1]] <- BTC[[1]]
prices <- do.call(cbind, prices)
colnames(prices) <- gsub("\\.[A-z]*", "", colnames(prices))
ep <- endpoints(prices, "months")
prices <- prices[ep,]
prices <- prices["1997-03::"]
Your for loop isn't using i, and then after the for loop you're overwriting the results (the list was of length 1 because BTC[[1]] was hardcoded)
Try this
btc <- do.call(rbind, lapply(0:2, function(i) {
getFX("BTC/USD",
from = Sys.Date() -499 * (i + 1),
to = Sys.Date() - 499 * i,
env=NULL)
}))
prices <- do.call(cbind, c(prices, list(btc)))
Edit: Here's a more complete example
library(quantmod)
# Use tryCatch() in case we try to get data too far in the past that
# Oanda doesn't provide. Return NULL if there is an error, and Filter
# to only include data that has at least 1 row.
btc <- do.call(rbind, Filter(NROW, lapply(0:5, function(i) {
tryCatch(getFX("BTC/USD",
from = Sys.Date() -499 * (i + 1),
to = Sys.Date() - 499 * i,
env=NULL), error=function(e) NULL)
})))
symbols <- c(
"UUP",
"FXB",
"FXE",
"FXF",
"FXY",
"SLV",
"GLD"
)
e <- new.env()
getSymbols(symbols, from=start(btc), env=e)
prices <- do.call(cbind, c(eapply(e, Cl)[symbols], list(btc)))
colnames(prices) <- gsub("\\.[A-z]*", "", colnames(prices))
head(na.locf(prices)[endpoints(prices, "months")])
# UUP FXB FXE FXF FXY SLV GLD BTC
#2010-07-31 23.74 156.15 129.88 95.38 114.60 17.58 115.49 0.06386
#2010-08-31 24.12 152.60 126.25 97.80 117.83 18.93 122.08 0.06441
#2010-09-30 22.84 156.33 135.81 101.00 118.57 21.31 127.91 0.06194
#2010-10-31 22.37 159.45 138.69 100.81 122.93 24.17 132.62 0.18530
#2010-11-30 23.50 154.72 129.30 98.87 118.16 27.44 135.42 0.27380
#2010-12-31 22.71 155.77 133.09 106.25 121.75 30.18 138.72 0.29190

xts assignment changes column class

I have a data.frame earlyCloses defined as follows:
earlyCloses <- read.table('EarlyCloses.txt', header=T, colClasses= c(rep("character", 3)))
earlyCloses
StartDate EndDate EarlyClose
1 2012-12-24 2012-12-24 13:00
I define a xts object pricesXts as follows:
prices <- read.table('sample.txt', header=T, colClasses=c("character", "numeric"))
pricesXts = xts(prices$Close, as.POSIXct(prices$Date, tz='America/New_York'))
colnames(pricesXts) = c("Close")
pricesXts$CloseTime = NA
pricesXts
Close CloseTime
2012-12-21 13190.84 NA
2012-12-24 13139.08 NA
2012-12-26 13114.59 NA
2012-12-27 13096.31 NA
2012-12-28 12938.11 NA
Now I execute a for loop over the rows of earlyCloses and set the CloseTime of pricesXts.
for (i in 1:nrow(earlyCloses)) {
pricesXts[paste(earlyCloses[i,"StartDate"], earlyCloses[i,"EndDate"], sep='/'), 2] = earlyCloses[i,"EarlyClose"]
}
pricesXts
Close CloseTime
2012-12-21 "13190.84" NA
2012-12-24 "13139.08" "13:00"
2012-12-26 "13114.59" NA
2012-12-27 "13096.31" NA
2012-12-28 "12938.11" NA
Why has the class of the Close column in the xts object changed from numeric to character? Is this because an xts object is represented internally as a matrix? Is there a way to avoid this conversion?
xts is encoded internally as a matrix ( better performances). Since you want just to store the Early Close, you can convert it to a numeric , for example:
strptime(earlyCloses$EarlyClose,'%H:%M')$hour
Then
for (i in 1:nrow(earlyCloses))
pricesXts[paste(earlyCloses[i,"StartDate"],
earlyCloses[i,"EndDate"],
sep='/'), 2] <- strptime(earlyCloses$EarlyClose,'%H:%M')$hour
Close CloseTime
2012-12-21 13191 NA
2012-12-24 13139 13
2012-12-26 13115 NA
2012-12-27 13096 NA
2012-12-28 12938 NA

Resources