Rolling window with Copulas - r

I would like to apply a rolling window to fit a student t Copula and then to do a forecast based on the results from the fitting process. I already tried it with a for loop, but it always state errors according to the fit Copula command.
#Students t Copula
windowsSize <- 4000 # training data size
testsize <- 351 # number of observations to forecast
for(k in 0:33) # run 34 experiments
{
A <- k*testsize + 1
B <- A + windowsSize - 1
start_obs <- A
end_obs <- B
lgYen_roll <- lgYenUSD[A:B]
lgEuro_roll <- lgEuroUSD[A:B]
ElgYen_roll <- ElgYenUSD[A:B]
ElgEuro_roll <- ElgEuroUSD[A:B
StdlgYen_roll <- StdlgYenUSD[A:B]
StdlgEuro_roll <- StdlgEuroUSD[A:B]
CopYenEuro_roll <- pobs(as.matrix(cbind(lgYen_roll,lgEuro_roll)))
YenEuro_fit_t_roll <- fitCopula(t.cop,CopYenEuro_roll,method=c('ml'), posDef = is(t.cop, "ellipCopula"),
start = NULL, lower = NULL, upper = NULL,
optim.method = optimMeth(t.cop, method,dim=d),
optim.control = list(maxit=1000),
estimate.variance = NA, hideWarnings = FALSE)
Here occurs already the first error: "Error in if (any(u < 0) || any(u > 1)) stop("'u' must be in [0,1] -- probably rather use pobs(.)") :
missing value where TRUE/FALSE needed"
CO_YenEuro_roll_rho <- coef(YenEuro_fit_t_roll)[1]
CO_YenEuro_roll_df <- coef(YenEuro_fit_t_roll)[2]
YenEurocopula_dist_t_roll <- mvdc(copula=tCopula(param = CO_YenEuro_roll_rho,dim=2), margins=c("norm","norm"),
paramMargins = list(list(mean=ElgYen_roll, sd=StdlgYen_roll),
list(mean=ElgEuro_roll, sd=StdlgEuro_roll)),
check = TRUE, fixupNames = TRUE)
YenEurocopula_random_t_roll.dist <- rMvdc(351,YenEurocopula_dist_t_roll)
#Prediction
A <- B + 1
B <- B + testsize
lgYen_roll <- lgYenUSD[A:B]
lgEuro_roll <- lgEuroUSD[A:B]
ElgYen_roll <- ElgYenUSD[A:B]
ElgEuro_roll <- ElgEuroUSD[A:B]
StdlgYen_roll <- StdlgYenUSD[A:B]
StdlgEuro_roll <- StdlgEuroUSD[A:B]
predict_EXT <- matrix(0, testsize, 1)
for(i in 1:testsize) # do the forecast based on the Copula Fit results
{
predict_EXT[i] <- fitCopula(t.cop,CopYenEuro_rolling[i],method=c('ml'), posDef = is(t.cop, "ellipCopula"),
start = NULL, lower = NULL, upper = NULL,
optim.method = optimMeth(t.cop, method,dim=d),
optim.control = list(maxit=1000),
estimate.variance = NA, hideWarnings = TRUE)
YenEurocopula_dist_t_roll <- mvdc(copula=tCopula(param = CO_YenEuro_roll_rho[i],dim=2), margins=c("norm","norm"),
paramMargins = list(list(mean=ElgYen_roll[i], sd=StdlgYen_roll[i]),
list(mean=ElgEuro_roll[i], sd=StdlgEuro_roll[i])),
check = TRUE, fixupNames = TRUE)
YenEurocopula_random_t_roll.dist <- rMvdc(351,YenEurocopula_dist_t_roll[i])
}}
Maybe someone has a solution to this problem?

Related

issue with disag_model() function from disaggregation R package

I was trying to use the disaggregation package to evaluate if it could be used on the dataset I have. My original data are disaggregated, so I've aggregated them to use the disag_model function from disaggregation package and compare "fitted values" with actual values.
However when I run the function the R session aborts.
I tried to execute the disag_model function step by step and I saw that the problem is due to the use of nlminb() to optimize the a posteriori density function, but I cannot understand why it's happening and how to solve it.
Thanks for your help.
You can find the data I used at this link: https://www.dropbox.com/sh/au7l0e11trzfo19/AACpfRSUpd4gRCveUsh5JX6Ea?dl=0
Please download the folder to run the code.
This is the code I used:
library(tidyverse)
library(raster)
library(disaggregation)
library(sp)
path<- "yourPath/Data"
load(file.path(path, "myRS"))
load(file.path(path, "RAST"))
Data <- read.csv(file = paste(path, "/sim_data.csv", sep = ""))
Data$HasRes <- ifelse(Data$PN50 > runif(nrow(Data)), 1, 0)
for (i in 1:nlayers(myRS)) {
myRS#layers[[i]]#file#name<-file.path(path, "predStackl10")
}
DFCov <-
as.data.frame(raster::extract(myRS, Data[c("XCoord", "YCoord")]))
Data <- cbind(Data, DFCov)
# Remove NA
NAs <- which(is.na(rowSums(Data[names(myRS)])))
Data <- Data[-NAs, ]
Data$ISO3 <- as.factor(Data$ISO3)
world_shape <-
shapefile(file.path(path, "World.shp"))
lmic_shape <-
world_shape[(world_shape#data$ISO3 %in% levels(Data$ISO3)),]
plot(lmic_shape)
# I would like to convert Data in a SpatialPointsDataFrame object
PN50 <- Data
coordinates(PN50) <- c("XCoord", "YCoord")
is.projected(PN50) # see if a projection is defined
proj4string(PN50) <- CRS("+proj=longlat +datum=WGS84")
# compute the mean P50 within each state
PN50_mean <- aggregate(x = PN50,
by = list(Data$ISO3),
FUN = mean)
# compute the centroid of the observations coordinates for each state
PN50_centroid <-
Data %>% group_by(ISO3) %>% summarise(meanX = mean(XCoord), meanY = mean(YCoord))
# assign to each mean the centroid coordinates
PN50_agg <-
as.data.frame(
cbind(
PN50_mean = PN50_mean#data$PN50,
XCoord = PN50_centroid$meanX,
YCoord = PN50_centroid$meanY
)
)
PN50_agg$XCoord <- as.numeric(PN50_agg$XCoord)
PN50_agg$YCoord <- as.numeric(PN50_agg$YCoord)
PN50_agg$ISO3 <- as.character(PN50_centroid$ISO3)
samsiz <-
Data %>% group_by(ISO3) %>% summarise(sz = sum(SampleSize))
PN50_agg$sample_size <- as.numeric(samsiz$sz)
PN50_agg$case <- round(PN50_agg$PN50_mean * PN50_agg$sample_size)
# I would like having data in a SpatialPolygonsDataFrame format to use the disaggrgation package
library(sp)
coordinates(PN50_agg) <- c("XCoord", "YCoord")
proj4string(PN50_agg) <- CRS("+proj=longlat +datum=WGS84")
PN50_polyg <- lmic_shape
PN50_polyg#data <-
full_join(PN50_polyg#data, PN50_agg#data, by = "ISO3")
# covariates raster
covariate_stack <-
getCovariateRasters(path, shape = raster(x = paste0(path, '/multi.tif')))
names(covariate_stack)
covariate_stack2 <- dropLayer(covariate_stack, nlayers(covariate_stack))
names(covariate_stack2)
plot(covariate_stack2)
covariate_stack2 <- raster::stack(covariate_stack2)
covariate_stack2<-brick(covariate_stack2)
# population raster
extracted <- raster::extract(raster(x = paste0(path, '/multi.tif')), PN50_polyg)
n_cells <- sapply(extracted, length)
PN50_polyg#data$pop_per_cell <- PN50_polyg#data$sample_size / n_cells
population_raster <-
rasterize(PN50_polyg, covariate_stack2, field = 'pop_per_cell')
# prepare data for disag_model()
dis_data <- prepare_data(
polygon_shapefile = PN50_polyg,
covariate_rasters = covariate_stack2,
aggregation_raster = population_raster,
mesh.args = list(
max.edge = c(5, 40),
cut = 0.0005,
offset = 1
),
id_var = "ISO3",
response_var = "case",
sample_size_var = "sample_size",
na.action = TRUE,
ncores = 8
)
# Rho and p(Rho<Rho_min)
dist <- pointDistance(PN50_agg#coords, lonlat = F, allpairs = T)
rownames(dist) <- PN50_agg$ISO3
colnames(dist) <- PN50_agg$ISO3
flattenDist <- function(dist) {
up <- upper.tri(dist)
flat <- data_frame(row = rownames(dist)[row(dist)[up]],
column = rownames(dist)[col(dist)[up]],
dist = dist[up])
return(flat)
}
pair_dist <- flattenDist(dist)
d <- pair_dist$dist
k <- 0.036
CorMatern <- k * d * besselK(k * d, 1)
limits <- sp::bbox(PN50_polyg)
hypontenuse <-
sqrt((limits[1, 2] - limits[1, 1]) ^ 2 + (limits[2, 2] - limits[2, 1]) ^
2)
prior_rho <- hypontenuse / 3
p_rho <- sum(d[CorMatern <= 0.1] < prior_rho) / length(d[CorMatern <= 0.1])
# sigma and p(sigma>sigma_max)
sigma_boost <- function(data, i) {
sd(data[i] / mean(data[i]))
}
sigma <-
boot(data = dis_data$polygon_data$response,
statistic = sigma_boost,
10000)
prior_sigma <- sigma$t0
p_sigma <- sum(sigma$t >= sigma$t0) / length(sigma$t)
default_priors <-
list(
priormean_intercept = 0,
priorsd_intercept = 4,
priormean_slope = 0,
priorsd_slope = 2,
prior_rho_min = prior_rho,
prior_rho_prob = p_rho,
prior_sigma_max = prior_sigma,
prior_sigma_prob = p_sigma,
prior_iideffect_sd_max = 0.1,
prior_iideffect_sd_prob = 0.01
)
fitted_model <- disag_model(
data = dis_data,
iterations = 1000,
family = "binomial",
link = "logit",
# priors = default_priors,
field = TRUE,
iid = TRUE,
silent = TRUE
)
I was able to run the disag_model function using your dis_data object. There were no errors or crashes. I ran the following lines.
fitted_model <- disag_model(
data = dis_data,
iterations = 1000,
family = "binomial",
link = "logit",
field = TRUE,
iid = TRUE,
silent = TRUE
)
I am running on a Windows machine with 64GB RAM and 8 cores. It took over an hour and used all of my RAM for a while and up to 50% of my CPU, which is not surprising as you are fitting 5.5M pixels over the whole world. Therefore, I suspect it is related to your computer running out of resources. I suggest you try a smaller example to test it out first. Try fewer polygons and fewer pixels in each polygon.

Error in VAR: Different row size of y and exogen

I am attempting a VAR model in R with an exogenous variable on:
vndata <- read.csv("vndata.txt", sep="")
names(vndata)
da <- data.frame(vndata[2:dim(vndata),])
# STOCK PRICE MODEL
y <- da[, c("irate", "stockp", "mrate", "frate")]
x <- data.frame(da[, c("cdi")])
library("vars")
VARselect(y, lag.max = 8,exogen = x)
var1 <- restrict(VAR(y, p = 2,exogen = x),method = c("ser"),thresh = 1.56)
Then, I want to plot the impulse response function:
plot(irf(var1, impulse = c("irate"), response = c("frate"), boot = T,
cumulative = FALSE,n.ahead = 20))
however, it produces the warning:
Error in VAR(y = ysampled, p = 2, exogen = x) :
Different row size of y and exogen.
I can not figure what happen. I have use dim() to make sure that y and x have the same row size.
Try this, it worked for me:
.GlobalEnv$exogen <- x
VARselect(y, lag.max = 8,exogen = .GlobalEnv$exogen)

Error in R-script: error in abs (alpha) non-numeric argument to mathematical function

I am trying to reproduce some results from the book "Financial Risk Modelling and Portfolio Optimisation with R" and I get an error that I can't seem to get my head around.
I get the following error in the COPPosterior function:
error in abs(alpha) : non-numeric argument to mathematical function
Is anyone able to see why I get the error?
The error is from the following script:
library(urca)
library(vars)
library(fMultivar)
## Loading data set and converting to zoo
data(EuStockMarkets)
Assets <- as.zoo(EuStockMarkets)
## Aggregating as month-end series
AssetsM <- aggregate(Assets, as.yearmon, tail, 1)
head(AssetsM)
## Applying unit root tests for sub-sample
AssetsMsub <- window(AssetsM, start = start(AssetsM),
end = "Jun 1996")
## Levels
ADF <- lapply(AssetsMsub, ur.df, type = "drift",
selectlags = "AIC")
ERS <- lapply(AssetsMsub, ur.ers)
## Differences
DADF <- lapply(diff(AssetsMsub), ur.df, selectlags = "AIC")
DERS <- lapply(diff(AssetsMsub), ur.ers)
## VECM
VEC <- ca.jo(AssetsMsub, ecdet = "none", spec = "transitory")
summary(VEC)
## Index of time stamps in back test (extending window)
idx <- index(AssetsM)[-c(1:60)]
ANames <- colnames(AssetsM)
NAssets <- ncol(AssetsM)
## Function for return expectations
f1 <- function(x, ci, percent = TRUE){
data <- window(AssetsM, start = start(AssetsM), end = x)
Lobs <- t(tail(data, 1))
vec <- ca.jo(data, ecdet = "none", spec = "transitory")
m <- vec2var(vec, r = 1)
fcst <- predict(m, n.ahead = 1, ci = ci)
LU <- matrix(unlist(fcst$fcst),
ncol = 4, byrow = TRUE)[, c(2, 3)]
RE <- rep(0, NAssets)
PView <- LU[, 1] > Lobs
NView <- LU[, 2] < Lobs
RE[PView] <- (LU[PView, 1] / Lobs[PView, 1] - 1)
RE[NView] <- (LU[NView, 1] / Lobs[NView, 1] - 1)
names(RE) <- ANames
if(percent) RE <- RE * 100
return(RE)
}
ReturnEst <- lapply(idx, f1, ci = 0.5)
qv <- zoo(matrix(unlist(ReturnEst),
ncol = NAssets, byrow = TRUE), idx)
colnames(qv) <- ANames
tail(qv)
library(BLCOP)
library(fPortfolio)
## Computing returns and EW-benchmark returns
R <- (AssetsM / lag(AssetsM, k = -1) -1.0) * 100
## Prior distribution
## Fitting of skewed Student's t distribution
MSTfit <- mvFit(R, method = "st")
mu <- c(MSTfit#fit[["beta"]])
S <- MSTfit#fit[["Omega"]]
skew <- c(MSTfit#fit[["alpha"]])
df <- MSTfit#fit[["df"]]
CopPrior <- mvdistribution("mvst", dim = NAssets, mu = mu,
Omega = S, alpha = skew, df = df)
## Pick matrix and view distributions for last forecast
RetEstCop <- ReturnEst[[27]]
RetEstCop
PCop <- matrix(0, ncol = NAssets, nrow = 3)
colnames(PCop) <- ANames
PCop[1, ANames[1]] <- 1
PCop[2, ANames[2]] <- 1
PCop[3, ANames[4]] <- 1
Sds <- apply(R, 2, sd)
RetViews <- list(distribution("norm", mean = RetEstCop[1],
sd = Sds[1]),
distribution("norm", mean = RetEstCop[2],
sd = Sds[2]),
distribution("norm", mean = RetEstCop[4],
sd = Sds[4])
)
CopViews <- COPViews(pick = PCop, viewDist = RetViews,
confidences = rep(0.5, 3),
assetNames = ANames)
## Simulation of posterior
NumSim <- 10000
CopPost <- COPPosterior(CopPrior, CopViews,
numSimulations = NumSim)
print(CopPrior)
print(CopViews)
slotNames(CopPost)
look at the structure of MSTfit:
str(MSTfit)
You can see that if you want the estimated alpha value, you need to access it via:
MSTfit#fit$estimated[['alpha']]
rather than
MSTfit#fit[['alpha']]

arima method in mtsdi

I have a large data set(more than 2000 rows and 2000 variables) with lots of missing values. I am using mnimputfunction of mtsdi package of R for imputing all missing values. This is my code
formula = data
imput_out <- mnimput(formula,data, by = NULL, log = FALSE, log.offset = 1,
eps = 1e-3, maxit = 1e2, ts = TRUE, method = "arima", ar.control = list(order = c(1,1,1), period = 4, f.eps = 1e-6, f.maxit = 1e3, ga.bf.eps = 1e-6,verbose = TRUE, digits = getOption("digits")))
But I am getting an error
Error in o[1:3, j] : incorrect number of dimensions
Please help me out.
you have to get real deep into the package source to uncover whats going on here.
the ar.control is placed into a variable o that is iterated on by the j # of columns that you put into your formula. so if your formula looks like ~c31+c32+c33 your ar term need to be 3 columns of (p,d,q) values
I assigned it outside of the ar.control parameter for ease of editing
arcontrol<-list(order=cbind(c(1,0,0),c(0,0,1),c(1,0,0)), period=NULL)
mnimput(formula,data,eps=1e-3,ts=TRUE, method="arima", ar.control=arcontrol
here is the package source if you are interested
function (xn, o, s, eps, maxit)
{
rows <- dim(xn)[1]
cols <- dim(xn)[2]
models <- as.list(rep(NA, cols))
ar.pred <- matrix(NA, nrow = rows, ncol = cols)
for (j in 1:cols) {
if (is.null(s)) {
order <- o[1:3, j]
seasonal <- list(order = c(0, 0, 0), period = NA)
}
else {
order <- o[1:3, j]
seasonal <- list(order = o[4:6, j], period = s)
}
models[[j]] <- arima(xn[, j], order = order, seasonal = seasonal,
xreg = NULL, optim.control = list(maxit = maxit,
reltol = eps))
ar.pred[, j] <- xn[, j] - residuals(models[[j]])
}
retval <- list(ar.pred = ar.pred, models = models)
return(retval)
}

Explaining methodolgy behind this ARIMA weighted code

I have a code that was given to me that runs an ARIMA model putting weight on more recent errors, it gives excellent results, much better than simple ARIMA, but i do not understand the methodology behind it. If you can understand whats going on and why and how it works then i would really appreciate it :)
The code that i would like explaining is from the #---Weighting---
suppressMessages(library(lmtest))
suppressMessages(library(tseries))
suppressMessages(library(forecast))
suppressMessages(library(TTR))
#-------------------------------------------------------------------------------
Input.data <- matrix(c("8Q1","8Q2","8Q3","8Q4","9Q1","9Q2","9Q3","9Q4","10Q1","10Q2","10Q3","10Q4","11Q1","11Q2","11Q3","11Q4","12Q1","12Q2","12Q3","12Q4","13Q1","13Q2","13Q3","13Q4","14Q1","14Q2","14Q3",5403.675741,6773.504993,7231.117289,7835.55156,5236.709983,5526.619467,6555.781711,11464.72728,7210.068674,7501.610403,8670.903486,10872.93518,8209.022658,8153.393088,10196.44775,13244.50201,8356.732878,10188.44157,10601.32205,12617.82102,11786.52641,10044.98676,11006.0051,15101.9456,10992.27282,11421.18922,10731.31198),ncol=2,byrow=FALSE)
#-------------------------------------------------------------------------------
# Maximum seasonal differences allowed. For typical series, 0 is recommended.
max.sdiff <- 2
#-------------------------------------------------------------------------------
# Force seasonality
arima.force.seasonality <- "y"
#-------------------------------------------------------------------------------
# The frequency of the data. 1/4 for QUARTERLY, 1/12 for MONTHLY
Frequency <- 1/4
#-------------------------------------------------------------------------------
# How many quarters/months to forecast
Forecast.horizon <- 4
#-------------------------------------------------------------------------------
# The first date in the series. Use c(8, 1) to denote 2008 q1
Start.date <- c(8, 1)
#-------------------------------------------------------------------------------
# The dates of the forecasts
Forecast.dates <- c("14Q4", "15Q1", "15Q2", "15Q3")
#-------------------------------------------------------------------------------
# Set if the data should be logged. Takes value "s" (lets script choose logging)
#"level" (forces levels) or "log" (forces logs)
force.log <- "s"
#-------------------------------------------------------------------------------
# Selects the data column from Input.data
Data.col <- as.numeric(Input.data[, length(Input.data[1, ])])
#-------------------------------------------------------------------------------
# Turns the Data.col into a time-series
Data.col.ts <- ts(Data.col, deltat=Frequency, start = Start.date)
#-------------------------------------------------------------------------------
# A character vector of the dates from Input.data
Dates.col <- as.character(Input.data[,1])
#-------------------------------------------------------------------------------
# Starts the testing to see if the data should be logged
transform.method <- round(BoxCox.lambda(Data.col.ts, method = "loglik"), 5)
log.values <- seq(0, 0.24999, by = 0.00001)
sqrt.values <- seq(0.25, 0.74999, by = 0.00001)
which.transform.log <- transform.method %in% log.values
which.transform.sqrt <- transform.method %in% sqrt.values
if (which.transform.log == "TRUE"){
as.log <- "log"
Data.new <- log(Data.col.ts)
} else {
if (which.transform.sqrt == "TRUE"){
as.log <- "sqrt"
Data.new <- sqrt(Data.col.ts)
} else {
as.log <- "no"
Data.new <- Data.col.ts
}
}
#----- Weighting ---------------------------------------------------------------
fweight <- function(x){
PatX <- 0.5+x
return(PatX)
}
integ1 <- integrate(fweight, lower = 0.00, upper = 1)
valinteg <- 2*integ1$value
#Split the integral to several intervals, and pick the weights accordingly
integvals <- rep(0, length.out = length(Data.new))
for (i in 1:length(Data.new)){
integi <- integrate(fweight, lower = (i-1)/length(Data.new), upper= i/length(Data.new))
integvals[i] <- 2*integi$value
}
suppressWarnings(kpssW <- kpss.test(Data.new, null="Level"))
suppressWarnings(ppW <- tryCatch({
ppW <- pp.test(Data.new, alternative = "stationary")},
error = function(ppW){
ppW <- list(error = "TRUE", p.value = 0.99)
}))
suppressWarnings(adfW <- adf.test(Data.new, alternative = "stationary",
k = trunc((length(Data.new) - 1)^(1/3))))
suppressWarnings(if (kpssW$p.value < 0.05 |
ppW$p.value > 0.05 |
adfW$p.value > 0.05){
ndiffsW = 1
} else {
ndiffsW = 0
})
aaw <- auto.arima(Data.new,
max.D = max.sdiff,
d = ndiffsW,
seasonal = TRUE,
allowdrift = FALSE,
stepwise = FALSE,
trace = FALSE,
seasonal.test = "ch")
order.arima <- c(aaw$arma[1], aaw$arma[6] , aaw$arma[2])
order.seasonal.arima <- c(aaw$arma[3], aaw$arma[7], aaw$arma[4])
if (sum(aaw$arma[1:2]) == 0){
order.arima[1] <- 1
} else {
NULL
}
if (arima.force.seasonality == "y"){
if(sum(aaw$arma[3:4]) == 0){
order.seasonal.arima[1] <- 1
} else {
NULL
}
} else {
NULL
}
#----- ARIMA -------------------------------------------------------------------
# Fits an ARIMA model with the orders set
stAW <- Arima(Data.new,
order = order.arima,
seasonal = list(order = order.seasonal.arima),
method ="ML")
parSW <- stAW$coef
WMAEOPT <- function(parSW){
ArimaW <- Arima(Data.new,
order = order.arima,
seasonal = list(order = order.seasonal.arima),
include.drift = FALSE,
method = "ML",
fixed = c(parSW))
errAR <- c(abs(resid(ArimaW)))
WMAE <- t(errAR) %*% integvals
return(WMAE)
}
OPTWMAE <- optim(parSW,
WMAEOPT,
method = "SANN",
set.seed(2),
control = list(fnscale = 1, maxit = 5000))
parS3 <- OPTWMAE$par
Arima.Data.new <- Arima(Data.new, order = order.arima, seasonal=list(order=order.seasonal.arima),
include.drift=FALSE, method = "ML", fixed = c(parS3))

Resources