I'm trying to perform the Jarque Bera test on hourly and daily return series in R. It worked fine for my daily return series, however it doesn't work for high frequency data.
That's what I did so far:
# Daily,hourly,minute prices of Tether in USD
df.ohlc.daily_usdt <- get_ohlc(usdt, periods = 86400, after = "2014-01-01", datetime=TRUE)
df.ohlc.hourly_usdt <- get_ohlc(usdt, periods = 3600, after = "2014-01-01", datetime = TRUE)
df.ohlc.min_usdt <- get_ohlc(usdt, periods = 60, after = "2014-01-01", datetime = TRUE)
index_daily_usdt <- df.ohlc.daily_usdt$CloseTime
data_daily_usdt <- data.frame(df.ohlc.daily_usdt[,2:6])
df.ohlc.daily_usdt_xts <- xts(data_daily_usdt, index_daily_usdt)
usdt_daily_return <- dailyReturn(df.ohlc.daily_usdt_xts, log=TRUE)
index_hour_usdt <- df.ohlc.hourly_usdt$CloseTime
data_hour_usdt <- data.frame(df.ohlc.hourly_usdt[,2:6])
df.ohlc.hourly_usdt_xts <- xts(data_hour_usdt, index_hour_usdt)
usdt_hourly_return <- diff(log(Cl(df.ohlc.hourly_usdt_xts)), lag=1)
#Descriptive statistics Tether hourly log returns
usdt_mean_hourly = mean(usdt_hourly_return, na.rm = TRUE)
usdt_sd_hourly = sd(usdt_hourly_return, na.rm = TRUE)
usdt_min_hourly = min(usdt_hourly_return, na.rm = TRUE)
usdt_max_hourly = max(usdt_hourly_return, na.rm = TRUE)
usdt_JB_hourly = jarque.bera.test(usdt_hourly_return)
Error in jarque.bera.test(usdt_hourly_return) : NAs in x
The JB test using Desctools does not work for me. Can someone tell me what other possibility I have to remove NAs to perform the JB test using ts package?
DescTools::JarqueBeraTest(usdt_hourly_return, na.rm=TRUE)
does not work?
Related
Sequential similarity of similar scale
library(quantmod)
#input symbol, start and end dates:
my_symbol<-readline("What symbol would you like analyzed?")
start_date <- readline("When do you want the series to start?")
end_date <- readline("When do you want the series to end?")
#Scaled series begins here
start_date2 <- readline("When do you want the scaled series to start?")
end_date2 <- readline("When do you want the scaled series to end?")
s_min<-readline("What is the minimum scaling factor you would like the use?")
s_max<-readline("what is the maximum scaling factor you would like to use?")
data1 <- as.data.frame(getSymbols(my_symbol, from = start_date, to = end_date, env = NULL))
data2<- getSymbols(my_symbol, from = start_date2, to = end_date2, env = NULL)
data3<- as.ts(data2[,4])
section<-length(data3)/3
#divide subset into 3 sections
subdata1<-data3[1:section]
subdata2<-data3[section:(section*2)]
subdata3<-data3[(section*2):(section*3)]
#Dataframe function goes here!
data_frames<- function(subdata, df){
section2<-length(subdata)
testdata1<-data1[,4]
testdata2<-testdata1[1:section2]
co<-readline("what is the minimum correlation accepteable for the current part?")
corr<-as.numeric(co)
steps<-readline("what is the minimum step desired per iteration?")
steps_numeric<-as.numeric(steps)
#Iterate through testdata
len2<-length(subdata)
x<-as.numeric(s_min)
while (x < as.numeric(s_max)){
resize<-approx(seq_along(subdata), subdata, n = len2*x)$y
len<-length(resize)
for (i in seq(1, length(testdata1), by = steps_numeric)){
testdata2<-testdata1[(1+i):(len+i)]
corl<-cor(testdata2, resize)
corl<-na.omit(corl)
if(corl > corr) {
maxes<-c(corl, (len/len2), i)
write.table(maxes,"a.txt", append = TRUE, quote = FALSE, row.names = FALSE, col.names = FALSE)
}
}
x<-(x+0.01)
}
df<- read.table("a.txt")
file.remove("a.txt")
df<-as.matrix(df)
df<-matrix(df, ncol = 3, byrow = TRUE)
df<-as.data.frame(df)
}
df<-data_frames(subdata1,df)
df<-df[ order(-df[,1], df[,1]), ]
df2<-data_frames(subdata2, df2)
df2<-df2[ order(-df2[,1], df2[,1]), ]
df3<-data_frames(subdata3, df3)
df3<-df3[ order(-df3[,1], df3[,1]), ]
colnames(df)<-c("Cor", "Scale", "Step")
colnames(df2)<-c("Cor", "Scale", "Step")
colnames(df3)<-c("Cor", "Scale", "Step")
I get an error when running the if statement that says $Error in if (corl[i] > corr) { : missing value where TRUE/FALSE needed
How can this be resolved?
Thanks in advance
How can I subset a spacetime::SDFDF (spatio-temporal data with full space-time grid) by time?
Sofar, I tried:
library("maps")
library("maptools")
library("spacetime")
library("plm")
states.m <- map("state", plot = FALSE, fill = TRUE)
IDs <- sapply(strsplit(states.m$names, ":"), function(x) x[1])
states <- map2SpatialPolygons(states.m, IDs = IDs)
yrs <- 1970:1986
time <- as.POSIXct(paste(yrs, "-01-01", sep = ""), tz = "GMT")
data("Produc")
Produc.st <- STFDF(states[-8], time, Produc[order(Produc[2], Produc[1]),])
Produc.st#time[c(1,5,17)]
Produc.st[Produc.st#time[c(1,5,17)]]
But that gives me the error: ncol(i) == 2 is not TRUE.
Any ideas?
Please try
Produc.st[,index(Produc.st#time[c(1,5,17)])]
i.e., time selection is done after the ,, and don't select with an xts object as Produc.st#time[c(1,5,17)]) is, but with a time (POSIXct) vector.
I am trying to run a correlation test. Instead I am getting the following error.
Error in cor.test.default(Indicator, Revpolu) :
not enough finite observations What does this mean?
The script stops at table_corr <- setDT(table)[, list(Revpolu, Indicator), by = list(Club, Date)]
The table_corr variable is storing 1161 observations of 4 variables.
library(data.table)
dat <- read.csv("corrtest.csv", header = TRUE)
dat$Date <- as.Date(paste0(format(strptime(as.character(dat$Date),
"%m/%d/%y"),
"%Y/%m"),"/1"))
table <- (setDT(dat)[, list(Revenue = sum(Revenues),
Hours = sum(Hours),
Indicator = mean(Indicator)),
by = list(Club, Date)])
table$Hours[table$Hours == 0 ] <-NA
table <- table[complete.cases(table), ]
table[, Revpolu := Revenue / Hours]
table_corr <- setDT(table)[, list(Revpolu, Indicator),
by = list(Club, Date)]
testing <- table_corr[, list(Correlation = cor.test(Indicator, Revpolu)[["estimate"]]),
by = Club]
testing <- testing[complete.cases(testing), ]
correl <- testing[, round(Correlation, digits = 2)]
done <- round(mean(correl), digits = 2)
I'm trying to aggregate a data frame as to obtain a table with weekly averages of a variable. I found the following package provides a nice solution, and I've been using it for aggregating data yearly and monthly. However, the function to aggregate data weekly simply is not working as described. Does anyone has an idea how I can fix this up?
For instance, following the manual:
require(TSAgg)
#Load the data:
data(foo)
##Format the data using the timeSeries function.
foo.ts<-timeSeries(foo[,1], "%d/%m/%Y %H:%M",foo[,3])
##Aggregate the data into 6 days blocks using max
(mean.month <- monthsAgg(foo.ts,mean,6))
#Aggregate the data into weeks, using 7 days and mean:
(foo.week<-daysAgg(foo.ts,mean,7) )
The last command doesn't work. The function is the following:
daysAgg <-
function (data, process, multiple = NULL, na.rm = FALSE)
{
if (is.null(multiple)) {
multiple = 1
}
if (multiple == 1) {
day <- aggregate(data[, 8:length(data)], list(day = data$day,
month = data$month, year = data$year), process, na.rm = na.rm)
days <- ymd(paste(day$year, day$month, day$day))
data2 <- data.frame(date = days, data = day[, 4:length(day)])
names(data2) <- c("Date", names(data[8:length(data)]))
return(data2)
}
temp <- data
day <- aggregate(list(data[, 8:length(data)], count = 1),
list(day = data$day, month = data$month, year = data$year),
process, na.rm = na.rm)
days <- ymd(paste(day$year, day$month, day$day))
data <- data.frame(date = days, day[, 5:length(day) - 1],
count = day[length(day)])
days = paste(multiple, "days")
all.dates <- seq.Date(as.Date(data$date[1]), as.Date(data$date[length(data[,
1])]), by = "day")
dates <- data.frame(date = all.dates)
aggreGated <- merge(dates, data, by = "date", all.x = TRUE)
aggreGated$date <- rep(seq.Date(as.Date(data$date[1]), as.Date(data$date[length(data[,
1])]), by = days), each = multiple, length = length(all.dates))
results <- aggregate(list(aggreGated[2:length(aggreGated)]),
list(date = aggreGated$date), process, na.rm = TRUE)
results <- subset(results, results$count != 0)
results <- results[, -length(results)]
names(results) <- c("Date", names(temp[8:length(temp)]))
return(results)
}
The problem in the code stems from its usage of the function ymd, which attaches " UTC" to the end of all dates it outputs. It is possible to overload the function by defining ymd again using
ymd <- function(x) {
as.Date(x, "%Y %m %d")
}
before you call daysAgg.
My R learning curve has got the best of me today. So.. I have a list of multi series zoo objects. I'm trying to rename the columns in each to the same values. I'm attempting this in the last line... and it runs without error... but the names aren't changed. Any ideas would be great.
require("zoo")
Get monthly data of stocks.
symbs = c('AAPL', 'HOV', 'NVDA')
importData <- lapply(symbs, function(symb) get.hist.quote(instrument= symb,
start = "2000-01-01", end = "2013-07-15", quote="AdjClose", provider = "yahoo",
origin="1970-01-01", compression = "m", retclass="zoo"))
names(importData) <- symbs
#Calculate monthly pct chgs of stocks.
monthlyPctChgs = lapply(importData, function(x) diff(x, lag = 1) / lag(x, k = -1))
names(monthlyPctChgs) <- symbs
#Merge the pct chgs and the monthly closing prices
pricingAndPerfsMerged = mapply(merge, importData, lag(monthlyPctChgs, k = -1),
SIMPLIFY = FALSE)
#Rename the columns in each zoo.
lapply(pricingAndPerfsMerged, function(x) colnames(x) = c('AdjClose', 'MonthlyPerf'))
You're renaming columns of a copy. This would be a good place to use a for loop instead:
for (i in seq_along(pricingAndPerfsMerged)) {
colnames(pricingAndPerfsMerged[[i]]) = c('AdjClose', 'MonthlyPerf')
}