I am trying to apply this simple technical analysis indicator to an xts dataframe called prices. But I can't manage to create the loop for the signal. Do you have some suggestions?
library(TTR)
library(Hmisc)
library(xts)
prices = structure(c(70.27, 70.29, 70.31, 70.67, 70.41, 70.53, 70.56,
69.61, 70.32, 69.97, 70.13, 68.88, 68.97, 70.75, 71.32, 71.32,
71.32, 72.02, 72.48, 73.33, 73.59, 73.93, 73.47, 72.13, 72.17,
73.18, 72.59, 73.34, 73.43, 72.78, 72.43, 72.3, 71.27, 71.51,
71.94, 71.1, 69.77, 70.02, 70.26, 69.6, 70.13, 70.13, 71.27,
70.58, 69.52, 69.58, 69.46, 69.62, 69.07, 69.98, 44.245, 44.125,
44.09, 44.155, 43.93, 44.305, 44.065, 43.37, 43.685, 43.285,
43.355, 42.305, 42.65, 43.64, 43.885, 43.885, 43.885, 44.12,
44.385, 44.78, 44.985, 44.985, 44.865, 44.38, 44.05, 44.65, 44.065,
44.62, 44.73, 44.32, 44.275, 44.145, 43.615, 43.975, 44.52, 44.335,
43.585, 43.715, 43.83, 43.735, 44.09, 44.005, 44.775, 44.325,
43.555, 43.535, 43.325, 43.425, 43.04, 43.45, 166.09, 166.44,
165.04, 167.69, 168.08, 169.17, 168.67, 167.19, 167.19, 164.39,
163.26, 159.64, 160.33, 162.83, 163.4, 163.4, 163.4, 164.79,
166.23, 168.3, 168.29, 169.34, 168.56, 166.81, 165.39, 165.98,
162.64, 163.78, 164.91, 164, 162.1, 162.25, 161.45, 162.08, 162.37,
160.09, 157.96, 158.45, 159.95, 159.75, 160.58, 160.51, 164.09,
161.96, 160.84, 161.41, 159.48, 159.45, 158.09, 158.49, 66, 66.19,
66.31, 67.17, 66.84, 67.32, 67.26, 66.19, 66.46, 65.62, 65.61,
63.87, 64.09, 64.73, 65.72, 65.72, 65.72, 66.11, 66.96, 67.53,
67.57, 67.53, 67.25, 65.98, 65.52, 66.19, 65.23, 66.2, 66.4,
65.53, 65.52, 65.37, 64.54, 64.57, 64.85, 64, 62.94, 63.18, 63.87,
63.3, 63.9, 63.83, 64.76, 64, 63.62, 63.92, 63.02, 63.27, 62.33,
62.65), .indexCLASS = "Date", tclass = "Date", .indexTZ = "UTC", tzone = "UTC", format = "%Y-%m-%d", class = c("xts",
"zoo"), index = structure(c(1301616000, 1301875200, 1301961600,
1302048000, 1302134400, 1302220800, 1302480000, 1302566400, 1302652800,
1302739200, 1302825600, 1303084800, 1303171200, 1303257600, 1303344000,
1303430400, 1303689600, 1303776000, 1303862400, 1303948800, 1304035200,
1304294400, 1304380800, 1304467200, 1304553600, 1304640000, 1304899200,
1304985600, 1305072000, 1305158400, 1305244800, 1305504000, 1305590400,
1305676800, 1305763200, 1305849600, 1306108800, 1306195200, 1306281600,
1306368000, 1306454400, 1306713600, 1306800000, 1306886400, 1306972800,
1307059200, 1307318400, 1307404800, 1307491200, 1307577600), tzone = "UTC", tclass = "Date"), .Dim = c(50L,
4L), .Dimnames = list(NULL, c("A", "B", "C", "D")))
#I apply the EMA indicator to the prices xts dataframe
EMA20fn <- function(x) EMA(x, n=20)
EMA20prices <- xts(apply(prices, 2, EMA20fn), order.by=index(EMA20fn(prices[,1])))
#I know how to create the signals (in EMA20prices) for a single asset, but I don't know
#what kind of loop it's required to apply the signal to every asset in the dataframe
#prices
ema20tr <- Lag(ifelse(Lag(prices[,1])<Lag(EMA20prices[,1])& prices[,1]>EMA20prices[,1],1,
ifelse(Lag(prices[,1])>Lag(EMA20prices[,1])& prices[,1]<EMA20prices[,1],-1,0)))
ema20tr[is.na(ema20tr)] <- 0
ema20sig <- ifelse(ema20tr>1,0,0)
for(i in 2:length(prices[,1])){ema20sig[i] <- ifelse(ema20tr[i]==1,1,
ifelse(ema20tr[i]==-1,0,ema20sig[i-1]))}
ema20sig[is.na(ema20sig)] <- 1
Thank you in advance for the answers!
The following changes to your code will do what you want on all four columns (with the prices data structure as per the question)
library(TTR)
library(Hmisc)
library(xts)
#I apply the EMA indicator to the prices xts dataframe
EMA20fn <- function(x) EMA(x, n=20)
EMA20prices <- xts(apply(prices, 2, EMA20fn), order.by=index(EMA20fn(prices[,1])))
#I know how to create the signals (in EMA20prices) for a single asset, but I don't know
#what kind of loop it's required to apply the signal to every asset in the dataframe
#prices
ema20tr = NULL
for (j in 1:ncol(prices)) {
ema20tr <- cbind(ema20tr,Lag(ifelse(Lag(prices[,j])<Lag(EMA20prices[,j])& prices[,j]>EMA20prices[,j],1,
ifelse(Lag(prices[,j])>Lag(EMA20prices[,j])& prices[,j]<EMA20prices[,j],-1,0))))
}
ema20tr[is.na(ema20tr)] <- 0
ema20sig <- ifelse(ema20tr>1,0,0)
for (j in 1:ncol(prices)) {
for(i in 2:length(prices[,j])) {ema20sig[i,j] <- ifelse(ema20tr[i,j]==1,1,
ifelse(ema20tr[i,j]==-1,0,ema20sig[i-1,j]))}
}
ema20sig[is.na(ema20sig)] <- 1
Related
I am trying to create a For Loop in R to fill a Vector with Forecasted values, generated via the auto.arima function.
I am new to R, so I am not sure if this is done correctly.
The code I am using is the following:
library(dplyr)
library(forecast)
dfts <- ts(df$Price_REG1)
fc=c()
for (i in 0:7) {
modArima <- auto.arima(dfts[0+(i*24):168+(i*24)])
forecast <- forecast(modArima, h=24)
forecast_values <- forecast$mean
fc <- append(fc, forecast_values)
}
I use longer sets in reality, but made it smaller here to make it more understandable.
What I am trying to achieve is to use the first week of data (168 hours in one week) to estimate the coefficients for the model. Then I want to put the generated predictions for the first 24 hours after the training set in the Vector fc.
I then want to move the window one day, reestimate the coefficients and generate the forecasts for the following day and saving them into the Vector.
I am a bit unsure on the dfts[0+(i*24):168+(i*24)] part, since df <- df[0:168], doesn't work, but needs the df <- df[0:168,]. But if I put dfts[0+(i*24):168+(i*24)] I get
Error in [.default(dfts, 0 + (i * 24):874 + (i * 24), ) : incorrect
number of dimensions
EDIT :
Sample of Data:
structure(c(28.78, 28.45, 27.9, 27.52, 27.54, 26.55, 25.83, 25.07,
25.65, 26.15, 26.77, 27.4, 28.08, 28.69, 29.37, 29.97, 30.46,
30.39, 30.06, 29.38, 27.65, 27.33, 25.88, 24.81, 12.07, 13.13,
19.07, 21.12, 24.29, 26.27, 27.74, 28.39, 29.37, 29.95, 29.91,
29.96, 29.94, 29.94, 30.18, 30.96, 31.2, 30.98, 30.35, 29.27,
28.17, 28.02, 27.69, 24.39, 18.93, 9.98, 1.53, 0.14, 0.85, 9.92,
24.48, 26.68, 28.12, 28.58, 28.16, 28.78, 28.31, 28.44, 28.96,
29.86, 30.15, 30.07, 29.54, 29.11, 27.91, 27.03, 25.7, 22.04,
21.73, 15.95, 16.23, 6.45, 3.83, 4.03, 4.04, 19.07, 17.49, 24.18,
24.94, 25.11, 24.94, 24.95, 25.25, 26.33, 27.36, 28.88, 29.58,
29.42, 27.71, 27.4, 27.37, 25.77, 26.65, 27.13, 27.11, 27.42), tsp = c(1,
5.125, 24), class = "ts")
Here is an example with built-in data set AirPassengers on how to run a rolling forecast with package forecast.
The code below makes use of time series functions
window to subset objects of class "ts";
frequency and start to get those attributes.
The output vector is created beforehand, not extended in the loop with append.
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
data("AirPassengers", package = "datasets")
fc <- ts(
data = rep(NA, length(AirPassengers)),
start = start(AirPassengers),
frequency = frequency(AirPassengers)
)
start <- start(AirPassengers)[1]
freq <- frequency(AirPassengers)
i_fc <- seq_len(freq)
fc[i_fc] <- AirPassengers[i_fc]
for(i in 1:11) {
w <- window(AirPassengers, start = start + i - 1L, end = c(start + i - 1L, freq))
modArima <- auto.arima(w)
y <- forecast(modArima, h = freq)$mean
i_fc <- i_fc + freq
fc[i_fc] <- y
}
plot(cbind(AirPassengers, fc))
Created on 2022-12-20 with reprex v2.0.2
Edit
I believe that the code below forecasts the next day given a certain initial number of days.
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
fill_first_periods <- function(x, weeks = 1L, week_days) {
if(missing(week_days)) week_days <- 7L
fc <- ts(
data = rep(NA, length(x)),
start = start(x),
frequency = frequency(x)
)
i_fc <- seq_len(frequency(x) * week_days * weeks)
fc[i_fc] <- x[i_fc]
fc
}
# not enough data to run an example for 1 week
# three days only
weeks <- 1L
week_days <- 3L
fc <- fill_first_periods(dfts, weeks = weeks, week_days)
n <- length(fc)
i_last <- length(fc[!is.na(fc)])
h <- frequency(fc)
curr_start <- start(fc)
curr_end <- c(curr_start[1] + weeks*week_days - 1L, frequency(fc))
for(i in 2:(end(fc)[1] - 1L)) {
if(n - i_last < h) {
h <- end(fc)[2]
i_fc <- tail(seq_len(n), h)
} else {
i_fc <- (i_last + 1L):(i_last + h)
i_last <- i_last + h
}
w <- window(dfts, start = curr_start, end = curr_end)
modArima <- auto.arima(w)
fc[i_fc] <- forecast(modArima, h = h)$mean
#
curr_start[1] <- curr_start[1L] + 1L
curr_end <- c(curr_end[1L] + 1L, h)
}
plot(cbind(dfts, fc))
Created on 2022-12-21 with reprex v2.0.2
I am having trouble calculating the correlation coefficient between electricity prices of different countries on monthly/ weekly level. The dataset (https://github.com/Argiro1983/prices_df.git) looks like this:
prices_df<-structure(list(DATETIME = structure(c(1609459200, 1609462800,
1609466400, 1609470000, 1609473600, 1609477200, 1609480800, 1609484400,
1609488000, 1609491600), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
GR = c(50.87, 48.19, 44.68, 42.92, 40.39, 20.96, 39.63, 40.1,
20, 40.74), IT = c(50.87, 48.19, 44.68, 42.92, 40.39, 40.2,
39.63, 40.09, 41.27, 41.67), BG = c(49.95, 48.05, 49.62,
46.73, 45.39, 44.25, 36.34, 19.97, 20, 20.43), HU = c(45.54,
41.59, 40.05, 36.9, 34.47, 32.82, 27.7, 15, 8.43, 20.77),
TR = c(26.31, 24.06, 24.21, 23.2, 23.2, 26.31, 24.98, 26.31,
24.04, 26.31), SR = c(38.89, 34.86, 33.62, 28.25, 29.03,
29.22, 29.71, 1.08, 1.1, 36.07)), row.names = c(NA, 10L), class = "data.frame")
I have tried converting it to xts and using apply.monthly (or apply.weekly) as follows, but it does not work.
library(xts)
SEE_prices <- xts(x = prices_df, order.by = DATETIME)
storage.mode(SEE_prices) <- "numeric"
SEE_prices <- na.locf(SEE_prices)
library(tidyverse)
library(tidyquant)
apply.monthly(SEE_prices, cor(SEE_prices$GR, SEE_prices$SR))
Another way I tried to get correlation on weekly level was to use the dplyr package, but it also did not work:
library(lubridate)
library(magrittr)
library(dplyr)
prices_df %<>% mutate( DATETIME = ymd_hms(DATETIME) )
table1<- prices_df %>% group_by( year( DATETIME ), isoweek( DATETIME ) ) %>%
summarise( DateCount = n_distinct(date(DATETIME)), correlation = cor(prices_df$GR, prices_df$SR))
Does anybody have an idea on how to calculate weekly/monthly correlation on a dataset?
Thank you in advance.
Don't use $ in dplyr pipes. To calculate correlation try -
library(dplyr)
library(lubridate)
prices_df %>%
mutate(DATETIME = ymd_hms(DATETIME),
year = year(DATETIME), week = isoweek(DATETIME)) %>%
group_by(year, week) %>%
summarise(DateCount = n_distinct(date(DATETIME)),
correlation = cor(GR, SR), .groups = 'drop')
Trying to write a function to combine multiple steps that are used regularly on an R dataframe. At the moment I stack individual lines, which is most inefficient. An Example each step I take at the moment
library(scores)
MscoreIndex <- 3
labMedians <- mapply(median, df[-1], na.rm = T) #calculate the median for each column except 1st
LabGrandMedian <- median(mapply(median, df[-1], na.rm = T),na.rm = T)
labMscore <- as.vector(round(abs(scores_na(labMedians, "mad")), digits = 2)) #calculate mscore by lab
labMscoreIndex <- which(labMscore > MscoreMax) #get the position in the vector that exceeds Mscoremax
df[-1][labMscoreIndex] <- NA # discharge values above threshold by making NA
An example my df below
structure(list(Determination_No = 1:6, `2` = c(55.94, 55.7, 56.59,
56.5, 55.98, 55.93), `3` = c(56.83, 56.54, 56.18, 56.5, 56.51,
56.34), `4` = c(56.39, 56.43, 56.53, 56.31, 56.47, 56.35), `5` = c(56.32,
56.29, 56.31, 56.32, 56.39, 56.32), `7` = c(56.48, 56.4, 56.54,
56.43, 56.73, 56.62), `8` = c(56.382, 56.258, 56.442, 56.258,
56.532, 56.264), `10` = c(56.3, 56.5, 56.2, 56.5, 56.7, 56.5),
`12` = c(56.11, 56.46, 56.1, 56.35, 56.36, 56.37)), class = "data.frame", row.names = c(NA,
-6L))
I started by trying to get the indivdual lab medians and the grandmedian with the following but got errors
I tried.
mediansFunction <- function(x){
analytemedians <- mapply(median(x[,-1]))
grandmedian <- median(x[,-1])
list(analytemedians,grandmedian)
}
mediansFunction(df)
But I get "Error in median.default(x[, -1]) : need numeric data"
Try :
mediansFunction <- function(x){
analytemedians <- sapply(x[-1], median)
median_of_median <- median(analytemedians)
grand_median <- median(as.matrix(x[-1]))
list(analytemedians = analytemedians,
median_of_median = median_of_median,
grand_median = grand_median)
}
mediansFunction(df)
#$analytemedians
# 2 3 4 5 7 8 10 12
#55.960 56.505 56.410 56.320 56.510 56.323 56.500 56.355
#$median_of_median
#[1] 56.3825
#$grand_median
#[1] 56.386
My goal is to adjust data seasonally and save only seasonal factors.
My data consists of 60+ time series. As you see, one of my problems is that there
is a lot of time series that I need to adjust at once. Here’s my try to do this in R:
library("timeDate")
library("timeSeries")
library("seasonal")
mDxts <- structure(c(35.8, 41.6, 35.9, 36.9, 42.43, 36.067,28.67, 29.53, 32.83, 29.867,23.9, 20.8, 21.167, NA, NA, NA, NA, NA, NA, NA, 149.67,108.89, 89.067, 83.33, 77.2,64.91, 50.2, 48, 62.13, 52.93,43.2, 38.8, 37.9, 19, 18, 17, 16.5, 16, 15.5, 15), class = c("xts","zoo"), .indexCLASS = "yearqtr", tclass = "yearqtr", .indexTZ = "", tzone = "", index = structure(c(946684800,954547200, 962409600, 970358400, 978307200, 986083200, 993945600,1001894400, 1009843200, 1017619200, 1025481600, 1033430400, 1041379200,1491004800, 1498867200, 1506816000, 1514764800, 1522540800, 1530403200,1538352000), tzone = "", tclass = "yearqtr"), .Dim = c(20L, 2L), .Dimnames = list(NULL, c("depall", "ref")))
ll<-lapply(mDxts, function(e) ts(e,start=c(2000,1),frequency=4))
#I. Seasonal adjustment and saving only d10 component:
sf<-sapply(ll,function(e) try(seas(e,x11="",na.action=na.exclude, transform.function = "none",x11.mode="logadd",arima.model="(0 1 1) (0 1 0)",regression.aictest = NULL, outlier=NULL,x11.save="d10")))
As I understand, my main problem is that performing sf<-sapply(st,function(e) try(seas(e, x11.save="d10")) I obtain a list where each element is a list with all d10, d11 and so on (take a look by SF.df<-as.data.frame(SF)).
Well, time passed, and now I'm able to give an answer to the questions.
I needed to adjust seasonally a lot a data simultaneously and then to recalculate them at one time.
Firstly, one problem was that the result of seasonal adjustment was saved as list. The answer is in using fuction final from seasonal:
sa<-sapply(st,function(e) try(final(seas(e,x11="",na.action=na.exclude,
transform.function = "none",x11.mode="logadd",arima.model="(0 1 1) (0 1 0)",
regression.aictest = NULL, outlier=NULL))))
To save the seasonal factor a function series from the same package was needed:
sf<-sapply(st,function(e) try(series(seas(e,x11="",na.action=na.exclude,
transform.function = "none",x11.mode="logadd",arima.model="(0 1 1) (0 1 0)",
regression.aictest = NULL, outlier=NULL),"d10")))
I would like to convert a list object to zoo and then apply rollapply on the zoo object. Short example reproduced below (I have 90,000 such files to process, using UNIX:)). Assume my list has two dataframes.
1) I would like to convert the date in each of the dataframes to this format:
dates <- as.Date(paste0(mylist$year, "-", mylist$month, "-", mylist$day), format="%Y-%m-%d")
z <- zoo(mylist, order.by=mylist[,1])
I understand lapply can do this but I tried without success.
Once I get my zoo object, I would like to use rollapply:
library(hydroTSM)#for daily2annual function but aggregate can do
x.3max <- rollapply(data=zooobject, width=3, FUN=sum, fill=NA, partial= TRUE,
align="center")
# Maximum value per year of 3-day total rainfall for each one of the simulations
z.3max.annual <- daily2annual(z.3max, FUN=max,na.rm=TRUE)#dates=1
What the code above does is it centers a 3-day window on each column of the dataframes in zooobject and sums the values. The, the max per year of the 3-day total is extracted.
mylist<- list(a,a)
mylist<-lapply(mylist, function(x) x[x[["Month"]] %in% c(12,1,2),])# extract data for DJF for individual sites
library(zoo)
a= structure(list(Year = c(1975L, 1975L, 1975L, 1975L, 1975L, 1975L
), Month = c(1L, 1L, 1L, 1L, 1L, 1L), Site = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "G100", class = "factor"), Day = 1:6,
sim01 = c(28.49, 29.04, 27.62, 28.43, 28.69, 29.16), sim02 = c(29.49,
30.04, 28.62, 29.43, 29.69, 30.16), sim03 = c(30.49, 31.04,
29.62, 30.43, 30.69, 31.16), sim04 = c(31.49, 32.04, 30.62,
31.43, 31.69, 32.16), sim05 = c(32.49, 33.04, 31.62, 32.43,
32.69, 33.16), sim06 = c(33.49, 34.04, 32.62, 33.43, 33.69,
34.16), sim07 = c(34.49, 35.04, 33.62, 34.43, 34.69, 35.16
), sim08 = c(35.49, 36.04, 34.62, 35.43, 35.69, 36.16), sim09 = c(36.49,
37.04, 35.62, 36.43, 36.69, 37.16), sim10 = c(37.49, 38.04,
36.62, 37.43, 37.69, 38.16), sim11 = c(38.49, 39.04, 37.62,
38.43, 38.69, 39.16), sim12 = c(39.49, 40.04, 38.62, 39.43,
39.69, 40.16), sim13 = c(40.49, 41.04, 39.62, 40.43, 40.69,
41.16), sim14 = c(41.49, 42.04, 40.62, 41.43, 41.69, 42.16
), sim15 = c(42.49, 43.04, 41.62, 42.43, 42.69, 43.16), sim16 = c(43.49,
44.04, 42.62, 43.43, 43.69, 44.16), sim17 = c(44.49, 45.04,
43.62, 44.43, 44.69, 45.16), sim18 = c(45.49, 46.04, 44.62,
45.43, 45.69, 46.16), sim19 = c(46.49, 47.04, 45.62, 46.43,
46.69, 47.16), sim20 = c(47.49, 48.04, 46.62, 47.43, 47.69,
48.16)), .Names = c("Year", "Month", "Site", "Day", "sim01",
"sim02", "sim03", "sim04", "sim05", "sim06", "sim07", "sim08",
"sim09", "sim10", "sim11", "sim12", "sim13", "sim14", "sim15",
"sim16", "sim17", "sim18", "sim19", "sim20"), row.names = c(NA,
6L), class = "data.frame")
Output should be similar to:
Year Site Sim01...
1975 G100 ...
1976 G100 ...
1977 G100 ...
Only the values in the months c(12,1,2) are needed.
This produces a list of zoo objects, Lz, and then performs rollapply on each component of the list giving L2. Finally L3 aggregates over year taking the max of each column.
library(zoo)
mylist <- list(a, a) # a is given at bottom of question
Lz <- lapply(mylist, read.zoo, index = 1:3, format = "%Y %m %d")
L2 <- lapply(Lz, rollapply, 3, sum, partial = TRUE)
L3 <- lapply(L2, function(z) aggregate(z, as.numeric(format(time(z), "%Y")), max))
giving:
> L3
[[1]]
sim01 sim02 sim03 sim04 sim05 sim06 sim07 sim08 sim09 sim10 sim11
1975 86.28 89.28 92.28 95.28 98.28 101.28 104.28 107.28 110.28 113.28 116.28
sim12 sim13 sim14 sim15 sim16 sim17 sim18 sim19 sim20
1975 119.28 122.28 125.28 128.28 131.28 134.28 137.28 140.28 143.28
[[2]]
sim01 sim02 sim03 sim04 sim05 sim06 sim07 sim08 sim09 sim10 sim11
1975 86.28 89.28 92.28 95.28 98.28 101.28 104.28 107.28 110.28 113.28 116.28
sim12 sim13 sim14 sim15 sim16 sim17 sim18 sim19 sim20
1975 119.28 122.28 125.28 128.28 131.28 134.28 137.28 140.28 143.28
Solved
lst1 <- lapply(list.files(pattern=".csv"),function(x) read.table(x,header=TRUE,sep="")) # read all files and data and replace -999.9 with NA
lst2<-lapply(lst1, function(x) x[x[["Month"]] %in% c(6,7,8),])#c(6,7,8) extract data for DJF for individual sites
names(lst2)<-list.files(pattern=".csv")
lapply(lst2,tail,4)
lst3<-lapply(lst2, function(x) x[!(names(x) %in% c("Site"))])
Lz <- lapply(lst3, read.zoo, index = 1:3, format = "%Y %m %d")
L2 <- lapply(Lz, rollapply, 3, sum, partial = TRUE)
L3 <- lapply(L2, function(z) aggregate(z, as.numeric(format(time(z), "%Y")), max))
mapply(
write.table,
x=L3, file=paste(names(L3), "csv", sep="."),
MoreArgs=list(row.names=FALSE, sep=",")
) # write files to folder keeping the list names as file names