I am trying to modify the Nelson/Siegel example from the YieldCurve documentation using data from the USTYC package (https://cran.r-project.org/web/packages/YieldCurve/YieldCurve.pdf).
The original code is:
library(YieldCurve)
### Nelson.Siegel function and Fed data-set ###
data(FedYieldCurve)
rate.Fed = first(FedYieldCurve,'5 month')
maturity.Fed <- c(3/12, 0.5, 1,2,3,5,7,10)
NSParameters <- Nelson.Siegel( rate= rate.Fed, maturity=maturity.Fed )
y <- NSrates(NSParameters[5,], maturity.Fed)
My modified code is below
library(ustyc)
library(YieldCurve)
xlist = getYieldCurve() # 2.5 mins
yields <- xlist$df
maturities <- c(1/12, 3/12, 6/12, 1, 2, 3, 5, 7, 10, 20, 30)
curve <- yields["2018-05-21",1:11]
NSParameters <- Nelson.Siegel(curve,maturities)
y <- NSrates(NSParameters[1,],maturities)
However, I am getting the error:
Error in attr(x, "tsp") <- c(1, NROW(x), 1) :
invalid time series parameters specified
What am I doing wrong? Thanks in advance
Solved it: 'yields' is in the wrong format. The fourth line needs to be:
yields <- as.xts(xlist$df)
Related
I was just getting started with some practice with decision trees and I encountered this error regarding varying lengths between columns but after checking each column length I saw that they were all the same size. I compiled this code in RStudio.
library(tree)
library(tidyverse)
df <- subset(read.csv("Steel_industry_data.csv"), select = c(2, 3, 4, 5, 6, 7, 8))
df <- rename(df, CO2_Usage = "CO2.tCO2.",
LeadingPowerFactor = "Leading_Current_Power_Factor",
LaggingPowerFactor = "Lagging_Current_Power_Factor",
LeadingReactivePower = "Leading_Current_Reactive_Power_kVarh",
LaggingReactivePower = "Lagging_Current_Reactive.Power_kVarh")
set.seed(1234)
i <- sample(1:nrow(df), nrow(df)*0.75, replace = FALSE)
train <- df[i,]
test <- df[-i,]
#Here the error generates, I checked the length of columns using length(df$*Variable*)
tree1 <- tree(df$CO2_Usage~., data=train)
Link to the dataset I used can be found here.
https://archive.ics.uci.edu/ml/datasets/Steel+Industry+Energy+Consumption+Dataset
I am trying to take one dataset and run a forecast on it based on different cutoffs. The for i runs, but it ends up saying "replacement has 76 rows, data has 0"
Here is the replicable example:
library(tidyverse)
library(forecast)
library(scales)
library(growthcurver)
options(scipen = 12) # Scientific Notation
options(digits = 6) # Specify Digits
noup<-3 #Days without update
claims <- tribble(~perdaycases, 3,1,1,0,0,0,
1,8, 7, 2,
8, 8, 12,
13, 15,
21, 27,
47, 65,
47, 30,
62, 74,
23, 38)
claims$cases <- cumsum(claims$perdaycases)
claims$id<-1:nrow(claims)
inds <- seq(as.Date("2020-03-11"), as.Date(Sys.Date()-noup), by = "day")
set.seed(1)
## Forecast length
h0 = 30
#Here, I create the empty dataset
estimates<-data.frame(Simulation=numeric(),Forecast=numeric()) #Empty Dataset
for(i in 1:length(claims$id)) {
cap<-subset(claims,id<14+i) #First cutoff then it runs again
cts <- ts(cap$cases, start = 1,frequency = 365) #Time Series
cfore <- forecast(auto.arima(cts), h= h0, level = c(80)) #Do the Arima
gc_fit <- SummarizeGrowth(seq(1,nrow(cap)),cap$cases) #Fit the Growth curve
tt <- seq(from=nrow(cap)+1,to=90,by=1)
forelog <- predict(gc_fit$model,newdata=list(t=tt)) #Prediction
forecast<-forelog #Create the item with mean projection
len<-as.numeric(length(forecast)) #Length of each forecast
estimates$Simulation<-as.numeric(rep(i,len)) #id each iteration
estimates$Forecast<-forecast #Here I try to export the forecast
}
The Error I get is ...
Error in $<-.data.frame(*tmp*, "Simulation", value = c(1, 1, 1, 1, :
replacement has 76 rows, data has 0
I am guessing it has to do with the last 2 lines but after 4 hours of struggling I have decided to ask for help.
SOS.
Thanks,
Try to provide a minimal example that reproduces your error. Your problem boils down to this simple problem:
test <- data.frame(
a=numeric(),
b=numeric()
)
test$a <- c(1,2,3)
# Error in `$<-.data.frame`(`*tmp*`, a, value = c(1, 2, 3)) :
# replacement has 3 rows, data has 0
Which doesn't work. You could do something like this:
test <- data.frame(
a=numeric(),
b=numeric()
)
for (i in 1:3){
a <- c(rep(i, 3))
b <- c(rep(i-1, 3))
df <- data.frame(
a = a,
b = b
)
test <- bind_rows(test,df)
}
And for your specific example, you could replace the following lines:
# estimates$Simulation <-as.numeric(rep(i,len)) #id each iteration
# estimates$Forecast<-forecast #Here I try to export the forecast
df <- data.frame(
Simulation = as.numeric(rep(i,len)),
estimates = forecast
)
estimates <- bind_rows(estimates,df)
I have a for-loop which return 4 different answeres, which is correct, but when I try to retrieve these values to my data.frame I get "Error in [<-.data.frame(*tmp*, p, 1, value = 29.1520685791182) :
missing values are not allowed in subscripted assignments of data frames"
Goal: Im trying to get values, which is printed 29, 485,-14, 12, in a data.frame
library("xts")
library("quantmod")
library("fredr")
Tesla <- getSymbols("TSLA", from=as.Date("2014-11-03"),to=as.Date("2019-11-03"))
Amazon <- getSymbols("AMZN", from=as.Date("2014-11-03"),to=as.Date("2019-11-03"))
Equinor <- getSymbols("EQNR",from="2014-11-03",to="2019-11-03")
FTSE100 <- getSymbols("^FTSE",from="2014-11-03",to="2019-11-03")
dftest <- data.frame(merge(TSLA$TSLA.Close, AMZN$AMZN.Close, EQNR$EQNR.Close,FTSE$FTSE.Close))
df <- data.frame(matrix(nrow = 1, ncol = 4)) #The data.frame where i want my returned values from print(pros) to be in.
colnames(dfProsent) <- c("TESLA", "AMAZON","EQUINOR","FTSE")
for (p in dftest) {
pros <- ((last(as.numeric(p)))-(first(as.numeric(p))))/(first(as.numeric(p)))*100
print(pros) #this print out 29, 485,-14,12
df[p,1] <- pros #the problem
}
Trying to use "for loop" in R. I have a vector of length 44 with 4401 observations read from data file "data.csv".
I am converting it to a matrix for working on each column as a time series data.
I want to extract each column, do forecasting and then make a matrix for that.
What is the easiest way to do that?
library(forecast)
data<-read.table(file="data.csv",sep=",",row.names=NULL,header=FALSE)
x <- matrix(1:47, ncol = 1, byrow = FALSE)
for (i in 1:4401)
{
y <- data[i]
y_ts <- ts(y, start=c(2016,1), end=c(2019,8), frequency=12)
AutoArimaModel=auto.arima(y_ts)
forecast=predict(AutoArimaModel, 3)
output <- matrix(forecast$pred, ncol = 1, byrow = FALSE)
ym = data.matrix(y)
z = rbind(ym,output)
x = cbind(x,z)}
It is just running for i = 1 and giving me error as below:
Error in array(x, c(length(x), 1L), if (!is.null(names(x))) list(names(x), :
'data' must be of a vector type, was 'NULL'
So, your code needed a partial re-write!
If I understand, you want to get 3 forecasts for every 44 time-series data. I used the .xlsx data that you provided.
library(forecast)
library(readxl)
data<-read_excel("data.xlsx",col_names = F)
z <- NULL
data <- t(data)
forecast_horizon <- 3
for (i in 1:ncol(data)){
y <- data[,i]
y_ts <- ts(y, start=c(2016,1), end=c(2019,8), frequency=12)
AutoArimaModel <- auto.arima(y_ts)
forecast <- tryCatch(predict(AutoArimaModel, forecast_horizon),
error = function(e) data.frame(pred = rep(NA,forecast_horizon)))
output <- matrix(forecast$pred, ncol = 1, byrow = FALSE)
z = cbind(z,output)
}
Pay attention to the usage of tryCatch which is used because there is one time series that produces errors when accessing the predictions (you can investigate further why this is the case.)
Use the tibbletime package: https://www.business-science.io/code-tools/2017/09/07/tibbletime-0-0-1.html
Read the data with readr::read_csv such that it's a tibble. Turn it into a tibbletime with your date vector. Use tmap_* functions as described in the article to encapsulate your forecasting code and map them to the columns of the tibbletime.
The article should have all the info you need to implement this.
The problem seems to be your data source. This works:
n_col <- 5
n_rows <- 44
#generate data
data <- data.frame(replicate(n_col, rnorm(n_rows)))
x <- matrix(1:47, ncol = 1, byrow = FALSE)
for (i in seq_len(n_col)) {
y <- data[i]
y_ts <- ts(y, start=c(2016,1), end=c(2019,8), frequency=12)
AutoArimaModel=auto.arima(y_ts)
forecast=predict(AutoArimaModel, 3)
output <- matrix(forecast$pred, ncol = 1, byrow = FALSE)
ym = data.matrix(y)
z = rbind(ym,output)
x = cbind(x,z)}
x
As an aside, I think I would approach it like this, especially if you have 4,401 fields to perform an auto.arima on:
y_ts <- ts(data, start = c(2016, 1), end = c(2019, 8), frequency = 12)
library(future.apply)
plan(multiprocess)
do.call(
cbind,
future_lapply(y_ts,
function(y_t) {
AutoArimaModel = auto.arima(y_t)
forecast = predict(AutoArimaModel, 3)
output = matrix(forecast$pred, ncol = 1, byrow = F)
ym = data.matrix(y_t)
z = rbind(ym, output)
}
)
)
I'm trying to estimate a Markov-switching VAR in R using the command msvar. These are the first 10 entries of my two time series. I have 798. When I try to run this I get an Error message
a <- c(1.998513, 1.995302, 2.030693, 2.122130, 2.236770, 2.314639, 2.365214, 2.455784, 2.530696, 2.596537)
b <- c(0.6421369, 0.6341437, 0.6494933, 0.6760939, 0.7113511, 0.7173038, 0.7250545, 0.7812490, 0.7874657, 0.8275209)
x <- matrix (NA,10,2)
x[,1] <- a
x[,2] <- b
time.seriesx <- ts(x)
markov.switchingx <- msvar(time.seriesx, p = 2, h = 2, niterblkopt = 10)
The error message I get is the following:
Error in optim(par = c(beta0.it), fn = llf.msar, Y = Yregmat, X =
Xregmat, : initial value in 'vmmin' is not finite
Anyone who could help me? Thanks
I think that you have to run the log-likehood function first. I get the same error, but when i did this, it works.
I'm not sure but i hope this can help you : (I used my data so don't pay attention to "M1euro")
library(base)
data <- data.matrix(M1euro, rownames.force = NA)
library(stats)
ss1<-ts(data, frequency=12, start=c(2007,1), end=c(2016,4))
class(ss1)
length(ss1)
ss <- na.approx(ss1,na.rm=F,rule=2)
ss
class(ss)
library(MSBVAR)
require(graphics)
set.seed(1)