My data frame is a time series with different variables and looks like
Date prcpmm TmaxC TminC
1/1/1966 0 7.8 0
1/2/1966 0.8 8.3 -1.1
1/3/1966 0 2.8 -5
1/4/1966 0 4.4 -5.6
1/5/1966 0 5.6 -8.9
1/6/1966 0 2.2 -1.7
1/7/1966 0.5 0.6 -12.8
1/8/1966 0 -6.1 -15
1/9/1966 0 3.9 -8.3
1/10/1966 0 5 -8.9
1/11/1966 0 -5 -12.2
1/12/1966 1 -3.3 -11.1
1/13/1966 1.3 2.2 -6.1
starting from 1966 - 2005, link to have an access to the input file: https://drive.google.com/open?id=0B09Gj4L5FPQWeGt0Wl94OWVlelE
I tried the blow mentioned code:
Adrian <- read.csv("~/Adrian.csv")
Adrian <- cbind(Adrian, weatherGen = rep("ObservedAdrian", nrow(Adrian)))
dateObj <- as.POSIXct(Adrian$Date, format = "%m/%d/%Y")
Adrian$Date <- dateObj
Adrian$day <- day(dateObj)
Adrian$month <- month(dateObj)
Adrian$year <- year(dateObj)
Adrian$prcpmm[Adrian$prcpmm==-99.00]<- NA
Adrian$TmaxC[Adrian$TmaxC==-99.00]<- NA
Adrian$TminC[Adrian$TminC==-99.00]<- NA
for (i in 1:length(Adrian$month)) {
if (Adrian$month[i] %in% 3:5) {
Adrian$season[i] = "spring"
Adrian$yearNew[i] = Adrian$year[i]
} else if (Adrian$month[i] %in% 6:8) {
Adrian$season[i] = "summer"
Adrian$yearNew[i] = Adrian$year[i]
} else if (Adrian$month[i] %in% 9:11) {
Adrian$season[i] = "fall"
Adrian$yearNew[i] = Adrian$year[i]
} else{
Adrian$season[i] = "winter"
Adrian$yearNew[i] = Adrian$year[i] + 1
}
}
Adrian$season <- ordered(Adrian$season, levels = c("winter", "spring",
"summer", "fall"))
It executed till this part very well without error.
Observed_everyseason_pVal <- Adrian %>% group_by(yearNew, season) %>%
summarise(pvalMK = mk.test(ts(prcpmm))$pvalue[1], SMK =
mk.test(ts(prcpmm))$Sg[1] )
Observed_everyseason_pVal1 <- Adrian %>% group_by(season) %>%
summarise(pvalMK = mk.test(ts(prcpmm))$pvalue[1], SMK =
mk.test(ts(prcpmm))$Sg[1])
from the above to code lines:
Observed_everyseason_pVal <- Adrian %>% group_by(yearNew, season)
It worked well without error but when I run with
Observed_everyseason_pVal <- Adrian %>% group_by(yearNew, season) %>%
summarise(pvalMK = mk.test(ts(prcpmm))$pvalue[1], SMK =
mk.test(ts(prcpmm))$Sg[1] )
It produces error:
Error in summarise_impl(.data, dots) : Column pvalMK is of
unsupported type NULL
if you do str(mk.test(ts(Adrian$prcpmm))), it gives you the following output:
List of 9
$ data.name : chr "ts(Adrian$prcpmm)"
$ p.value : num 0.259
$ statistic : Named num 1.13
..- attr(*, "names")= chr "z"
$ null.value : Named num 0
..- attr(*, "names")= chr "S"
$ parameter : Named int 13
..- attr(*, "names")= chr "n"
$ estimates : Named num [1:3] 16 176.67 0.28
..- attr(*, "names")= chr [1:3] "S" "varS" "tau"
$ alternative: chr "two.sided"
$ method : chr "Mann-Kendall trend test"
$ pvalg : num 0.259
- attr(*, "class")= chr "htest"
Therefore, the former object you specified, namely mk.test(ts(prcpmm))$pvalue[1], doesn´t exist. There is only mk.test(ts(prcpmm))$p.value[1]. Also, the object mk.test(ts(prcpmm))$Sg[1] (...$Sg) is not in scope at all. This explains the error: Error in summarise_impl(.data, dots) : Column pvalMK is of unsupported type NULL.
Related
I have this code which works for list [[1]] and list of list [[200]]:
SHAP_Prep_data <- shap.prep(xgb_model = xgb.mod[[1]][[200]],
shap_contrib = shap_values_results[[1]][[200]]$shap_score %>% select(-BIAS0),
X_train = as.matrix(TrainTestData[[1]]$XTrain[[200]])
#top_n = 4
)
I can simply replace out the [[200]] for [[300]], [[400]] etc. and obtain a new data structure (the shap.prep function comes from the shapforxgboost package.
xgb.mod[[1]][[200]] is a single xgboost model
shap_values_results[[1]][[200]]$shap_score %>% select(-BIAS0) is a data frame with the following structure.
> str(shap_values_results[[1]][[200]]$shap_score %>% select(-BIAS0))
'data.frame': 2190 obs. of 29 variables:
$ holiday : num -0.276 -0.347 -0.284 -0.356 -0.197 ...
$ temp : num 0.35 0.25 0.144 0.227 0.16 ...
$ wind : num -0.116 -0.187 -0.25 -0.265 -0.135 ...
$ humidity : num -0.021 0.0125 -0.037 0.016 -0.0196 ...
$ barometer : num -0.0191742 -0.0000462 0.0444956 -0.0148842 -0.0551703 ...
$ weekday : num -0.00421 -0.00937 0.0012 -0.01194 -0.00931 ...
$ weekend : num 0 0 0 0 0 0 0 0 0 0 ...
$ workday_on_holiday : num -0.00949 -0.00949 -0.00885 -0.00949 -0.00885 ...
$ weekend_on_holiday : num 0 0 0 0 0 0 0 0 0 0 ...
$ protocol_active : num 0 0 0 0 0 0 0 0 0 0 ...
$ text_fog : num 0.00714 0.00714 0.00783 0.00783 0.00772 ...
$ text_light_rain : num -0.000364 -0.000364 -0.000364 -0.000364 -0.000364 ...
$ text_mostly_cloudy : num -0.0013 -0.0013 -0.0013 -0.0013 -0.0013 ...
$ text_passing_clouds : num 0.00135 0.00152 0.00363 0.00152 0.00345 ...
$ text_rain : num -0.0000682 -0.0000682 -0.0000682 -0.0000682 -0.0000682 ...
$ text_scattered_clouds: num -0.0941 -0.0832 -0.1497 -0.0813 -0.0965 ...
$ text_sunny : num 0.000635 0.007435 0.009286 0.007435 0.007009 ...
$ month_1 : num 0.045 0.0503 0.062 0.062 0.0484 ...
$ month_2 : num 0.0602 0.0529 0.0526 0.0529 0.1008 ...
$ month_3 : num 0.0467 0.0348 0.0333 0.0348 0.0467 ...
$ month_4 : num -0.03439 -0.03439 -0.00777 -0.03439 -0.00164 ...
$ month_5 : num -0.02191 -0.02191 -0.00836 -0.02026 -0.01533 ...
$ month_6 : num -0.05498 -0.00637 -0.04769 -0.05101 -0.05155 ...
$ month_7 : num -0.1302 -0.1126 -0.0878 -0.0963 -0.1535 ...
$ month_8 : num -0.0418 -0.051 -0.0727 -0.0437 -0.0957 ...
$ month_9 : num 0.164 0.185 0.141 0.193 0.122 ...
$ month_10 : num 0.206 0.251 0.243 0.251 0.211 ...
$ month_11 : num 0.0929 0.0744 0.0302 0.0568 0.0961 ...
$ month_12 : num 0.059 0.0608 0.0806 0.0608 0.0788 ...
Finally as.matrix(TrainTestData[[1]]$XTrain[[200]]) is a dgcMatrix which I convert to a simple matrix using as.matrix() which has structure:
> str(as.matrix(TrainTestData[[1]]$XTrain[[200]]))
num [1:2190, 1:29] 0 0 0 0 0 0 0 0 0 0 ...
- attr(*, "dimnames")=List of 2
..$ : NULL
..$ : chr [1:29] "holiday" "temp" "wind" "humidity" ...
I have 3 pieces of data I would like to apply the shap.prep function to.
The desired output would be to have a list (or list of lists) where the shap.prep function has been applied. The function requires 3 inputs shap.prep(xgb_model = NULL, shap_contrib = NULL, X_train,
top_n = NULL) which is what I am providing.
How can I use imap correctly to pass all three objects to the shap_prep function and obtain lists as my output?
It's difficult for me to give some dput() data since the I am not sure if its possible to dput() a trained XGBoost model.
EDIT:
I am adding the closest thing I can get to a reproducible example.
data(iris)
df <- split(iris, iris$Species) # I just want to create some lists here
library(xgboost)
library(SHAPforxgboost)
dtrainFunction <- function(i){
dt = xgb.DMatrix(data = data.matrix(i[, c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")]), label = i$Species)
}
dtrain <- map(df, dtrainFunction) # I just apply the dtrainFunction which just puts each list into an xgb.DMatrix
xgb.mod <- map(dtrain, ~xgboost(data = .x, nround = 20)) # Apply the xgboost model to each list
# could not get this part of the code to work but it's not important. I manually put the results into a list below.
# shap_values_function <- function(j){
# map2(
# .x = xgb.mod[[j]],
# .y = df[[j]][, c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")],
# ~shap.values(xgb_model = .x, X_train = as.matrix(.y))
# )
# }
#
# shap_values_results <- lapply(seq(1:3), shap_values_function)
# Here I manually put the results into a list which are lists of shap.values
shap_values_results <- list(
shap.values(xgb_model = xgb.mod[[1]], X_train = as.matrix(df[[1]][, c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")])),
shap.values(xgb_model = xgb.mod[[2]], X_train = as.matrix(df[[2]][, c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")])),
shap.values(xgb_model = xgb.mod[[3]], X_train = as.matrix(df[[3]][, c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")]))
)
# Something is wrong here which is something to do with shap_contrib and BIAS0
SHAP_Prep_data <- shap.prep(xgb_model = xgb.mod[[1]],
shap_contrib = shap_values_results[[1]]$shap_score[, c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")],
X_train = as.matrix(df[[1]])
#top_n = 4
)
shap.plot.summary(data_long = SHAP_Prep_data)
My actual code for the SHAP_Prep_data data is slightly different to the one above since I have list of lists.
EDIT 2:
I have tried the following which throws an error:
SHAP_Prep_data <- pmap(
list(
.x = xgb.model[[1]],
.y = shap_values_results[[1]],
.z = TrainTestData[[1]]$XTrain
), ~shap.prep(
xgb_model = .x,
shap_contrib = .y,
X_train = as.matrix(.z))
)
Error in as.matrix(.z) : object '.z' not found
EDIT 3:
When I apply the function on the iris data example:
SHAP_Prep_data <- pmap(
list(
.x = xgb.mod,
.y = shap_values_results,
.z = dtrain
), ~shap.prep(
xgb_model = .x,
shap_contrib = .y,
X_train = as.matrix(.z))
)
Error in as.matrix(.z) : object '.z' not found
EDIT 4:
I want to be able to access the $shap_score data which is created from the shap.values function used earlier (and also remove the column BIAS0 in the data from the following line).
shap_contrib = shap_values_results[[1]][[1300]]$shap_score %>% select(-BIAS0)
So would another map be needed here? or should I extract the shap_score data earlier in the function and remove the BIAS0 column there so that I can just call NEWDATA_shap_score[[1]][[1300]]?
The issue is that
str(as.matrix(df[[1]]))
#chr [1:50, 1:5] "5.1" "4.9" "4.7" "4.6" "5.0" "5.4" "4.6" "5.0" "4.4" "4.9" "5.4" "4.8" "4.8" "4.3" "5.8" "5.7" "5.4" "5.1" "5.7" "5.1" ...
# - attr(*, "dimnames")=List of 2
# ..$ : chr [1:50] "1" "2" "3" "4" ...
# ..$ : chr [1:5] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width" ...
returns a character matrix as the last column is a character column. Remove the last column and then do the conversion
out <- shap.prep(xgb_model = xgb.mod[[1]],
shap_contrib = shap_values_results[[1]]$shap_score[,
c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")],
X_train = as.matrix(df[[1]][-5]) ###
#top_n = 4
)
Regarding using this with pmap
out2 <- pmap(list( xgb.model[[1]],
shap_values_results[[1]],
TrainTestData[[1]]$XTrain),
~shap.prep(
xgb_model = ..1,
shap_contrib = ..2$shap_score %>% select(-BIAS0),
X_train = as.matrix(..3)))
If we also want to apply this on the list of lists
pmap(list(xgb.model,
shap_values_results,
TrainTestData), ~
pmap(list(..1, ..2, ..3$xTrain), ~
shap.prep(xgb_model = ..1,
shap_contrib = ..2$shap_score %>% select(-BIAS0),
X_train = as.matrix(..3))))
It's hard to say without a reproducible example, but it sounds like you want pmap rather than imap
a <- list(letters[1:3])
b <- list(letters[4:6])
c <- list(letters[7:9])
purrr::pmap(list(a,b,c), function (x,y,z) paste(x, y, z))
#> [[1]]
#> [1] "a d g" "b e h" "c f i"
Created on 2020-01-08 by the reprex package (v0.3.0)
I'm using BoxCoxTrans function from the caret package:
library(caret)
library(purrr)
model1 <- apply(X = my.df, 2, BoxCoxTrans)
model2 <- purrr::map2(model1, my.df, function(x,y) predict(x,y))
trans.df <- as.data.frame(do.call(cbind, model2))
library(rcompanion)
plotNormalHistogram(trans.df)
print(trans.df)
It is working correctly and transforming the data, but I have no way of knowing which lambda value is used for the transformation.
You can find these values in model1. I'll show you how to get them using the iris data.
library(caret)
fudge <- 0.2
out <- lapply(iris[1:2], BoxCoxTrans, fudge = fudge) # instead of apply(..., margin = 2, ...)
Now look at the structure of out
str(out, 2)
#List of 2
# $ Sepal.Length:List of 6
# ..$ lambda : num -0.1
# ..$ fudge : num 0.2
# ..$ n : int 150
# ..$ summary :Classes 'summaryDefault', 'table' Named num [1:6] 4.3 5.1 5.8 5.84 6.4 ...
# .. .. ..- attr(*, "names")= chr [1:6] "Min." "1st Qu." "Median" "Mean" ...
# ..$ ratio : num 1.84
# ..$ skewness: num 0.309
# ..- attr(*, "class")= chr "BoxCoxTrans"
# $ Sepal.Width :List of 6
# ..$ lambda : num 0.3
# ..$ fudge : num 0.2
# ..$ n : int 150
# ..$ summary :Classes 'summaryDefault', 'table' Named num [1:6] 2 2.8 3 3.06 3.3 ...
# .. .. ..- attr(*, "names")= chr [1:6] "Min." "1st Qu." "Median" "Mean" ...
# ..$ ratio : num 2.2
# ..$ skewness: num 0.313
# ..- attr(*, "class")= chr "BoxCoxTrans"
Using base R you can use sapply and `[[` now as follows
sapply(out, `[[`, "lambda")
#Sepal.Length Sepal.Width
# -0.1 0.3
Since you use purrr, you might consider map and pluck
map_dbl(out, pluck, "lambda")
#Sepal.Length Sepal.Width
# -0.1 0.3
Thanks to #missuse's mindful comments we can get the lambda used for transformation as
library(dplyr)
real_lambda <- case_when(between(lambda, -fudge, fudge) ~ 0,
between(lambda, 1 - fudge, 1 + fudge) ~ 1,
TRUE ~ lambda)
real_lambda <- setNames(real_lambda, names(lambda))
real_lambda
#Sepal.Length Sepal.Width
# 0.0 0.3
This is necessary because the function BoxCoxTrans has the argument fudge which is
a tolerance value: lambda values within +/-fudge will be coerced to 0 and within 1+/-fudge will be coerced to 1.
Trying to use dplyr to group_by the stud_ID variable in the following data frame, as in this SO question:
> str(df)
'data.frame': 4136 obs. of 4 variables:
$ stud_ID : chr "ABB112292" "ABB112292" "ABB112292" "ABB112292" ...
$ behavioral_scale: num 3.5 4 3.5 3 3.5 2 NA NA 1 2 ...
$ cognitive_scale : num 3.5 3 3 3 3.5 2 NA NA 1 1 ...
$ affective_scale : num 2.5 3.5 3 3 2.5 2 NA NA 1 1.5 ...
I tried the following to obtain scale scores by student (rather than scale scores for observations across all students):
scaled_data <-
df %>%
group_by(stud_ID) %>%
mutate(behavioral_scale_ind = scale(behavioral_scale),
cognitive_scale_ind = scale(cognitive_scale),
affective_scale_ind = scale(affective_scale))
Here is the result:
> str(scaled_data)
Classes ‘grouped_df’, ‘tbl_df’, ‘tbl’ and 'data.frame': 4136 obs. of 7 variables:
$ stud_ID : chr "ABB112292" "ABB112292" "ABB112292" "ABB112292" ...
$ behavioral_scale : num 3.5 4 3.5 3 3.5 2 NA NA 1 2 ...
$ cognitive_scale : num 3.5 3 3 3 3.5 2 NA NA 1 1 ...
$ affective_scale : num 2.5 3.5 3 3 2.5 2 NA NA 1 1.5 ...
$ behavioral_scale_ind: num [1:12, 1] 0.64 1.174 0.64 0.107 0.64 ...
..- attr(*, "scaled:center")= num 2.9
..- attr(*, "scaled:scale")= num 0.937
$ cognitive_scale_ind : num [1:12, 1] 1.17 0.64 0.64 0.64 1.17 ...
..- attr(*, "scaled:center")= num 2.4
..- attr(*, "scaled:scale")= num 0.937
$ affective_scale_ind : num [1:12, 1] 0 1.28 0.64 0.64 0 ...
..- attr(*, "scaled:center")= num 2.5
..- attr(*, "scaled:scale")= num 0.782
The three scaled variables (behavioral_scale, cognitive_scale, and affective_scale) have only 12 observations - the same number of observations for the first student, ABB112292.
What's going on here? How can I obtain scaled scores by individual?
The problem seems to be in the base scale() function, which expects a matrix. Try writing your own.
scale_this <- function(x){
(x - mean(x, na.rm=TRUE)) / sd(x, na.rm=TRUE)
}
Then this works:
library("dplyr")
# reproducible sample data
set.seed(123)
n = 1000
df <- data.frame(stud_ID = sample(LETTERS, size=n, replace=TRUE),
behavioral_scale = runif(n, 0, 10),
cognitive_scale = runif(n, 1, 20),
affective_scale = runif(n, 0, 1) )
scaled_data <-
df %>%
group_by(stud_ID) %>%
mutate(behavioral_scale_ind = scale_this(behavioral_scale),
cognitive_scale_ind = scale_this(cognitive_scale),
affective_scale_ind = scale_this(affective_scale))
Or, if you're open to a data.table solution:
library("data.table")
setDT(df)
cols_to_scale <- c("behavioral_scale","cognitive_scale","affective_scale")
df[, lapply(.SD, scale_this), .SDcols = cols_to_scale, keyby = factor(stud_ID)]
This was a known problem in dplyr, a fix has been merged to the development version, which you can install via
# install.packages("devtools")
devtools::install_github("hadley/dplyr")
In the stable version, the following should work, too:
scale_this <- function(x) as.vector(scale(x))
df <- df %>% mutate(across(is.numeric, ~ as.numeric(scale(.))))
I tried to find the subset but it's showing error as :
I am performing Data Envelopment Analysis using Benchmarking Package in R.
Although I saw similar Question were asked before but it didn't help me .
Update :Structure and Summary of Database
I am performing DEA for V6 and V7.
I guess you need
Large.Cap$V1[e_crs$eff > 0.85]
Using a reproducible example from ?dea
library(Benchmarking)
x <- matrix(c(100,200,300,500,100,200,600),ncol=1)
y <- matrix(c(75,100,300,400,25,50,400),ncol=1)
Large.Cap <- data.frame(v1= LETTERS[1:7], v2= 1:7)
e_crs <- dea(x, y, RTS='crs', ORIENTATION='in')
e_crs
#[1] 0.7500 0.5000 1.0000 0.8000 0.2500 0.2500 0.6667
The e_crs object is a list
str(e_crs)
#List of 12
# $ eff : num [1:7] 0.75 0.5 1 0.8 0.25 ...
# $ lambda : num [1:7, 1:7] 0 0 0 0 0 0 0 0 0 0 ...
# ..- attr(*, "dimnames")=List of 2
# .. ..$ : NULL
# .. ..$ : chr [1:7] "L1" "L2" "L3" "L4" ...
# $ objval : num [1:7] 0.75 0.5 1 0.8 0.25 ...
# $ RTS : chr "crs"
# $ primal : NULL
# $ dual : NULL
# $ ux : NULL
# $ vy : NULL
# $ gamma :function (x)
# $ ORIENTATION: chr "in"
# $ TRANSPOSE : logi FALSE
# $ param : NULL
# - attr(*, "class")= chr "Farrell"
We extract the 'eff' list element from 'e_crs' to subset the 'v1' column in 'Large.Cap' dataset.
droplevels(Large.Cap$v1[e_crs$eff > 0.85])
#[1] C
#Levels: C
This question is unlikely to help any future visitors; it is only relevant to a small geographic area, a specific moment in time, or an extraordinarily narrow situation that is not generally applicable to the worldwide audience of the internet. For help making this question more broadly applicable, visit the help center.
Closed 10 years ago.
My lack of understanding of R is causing me to grind to a halt in my work and seek your help. I'm looking to build a neural network from some time series data and then build a prediction using separate data and the model returned by the trained neural network.
I created an xts containing the dependent variable nxtCl (a one-day forward closing stock price) and the independent variables (a set of corresponding prices and technical indicators).
I split the xts in two, one set being training data and the other set for testing/prediction, these are miData.train and miData.test respectively. Subsequently I altered these two xts to be scaled data frames.
miData.train <- scale(as.data.frame(miData.train))
miDate.test <- scale(as.data.frame(miData.test))
Using the package nnet I am able to build a neural network from the training data:
nn <- nnet(nxtCl ~ .,data=miData.train,linout=T,size=10,decay=0.001,maxit=10000)
The str() output for this returned formula object is:
> str(nn)
List of 18
$ n : num [1:3] 11 10 1
$ nunits : int 23
$ nconn : num [1:24] 0 0 0 0 0 0 0 0 0 0 ...
$ conn : num [1:131] 0 1 2 3 4 5 6 7 8 9 ...
$ nsunits : num 22
$ decay : num 0.001
$ entropy : logi FALSE
$ softmax : logi FALSE
$ censored : logi FALSE
$ value : num 4.64
$ wts : num [1:131] 2.73 -1.64 1.1 2.41 1.36 ...
$ convergence : int 0
$ fitted.values: num [1:901, 1] -0.465 -0.501 -0.46 -0.431 -0.485 ...
..- attr(*, "dimnames")=List of 2
.. ..$ : chr [1:901] "2005-07-15" "2005-07-18" "2005-07-19" "2005-07-20" ...
.. ..$ : NULL
$ residuals : num [1:901, 1] -0.0265 0.0487 0.0326 -0.0384 0.0632 ...
..- attr(*, "dimnames")=List of 2
.. ..$ : chr [1:901] "2005-07-15" "2005-07-18" "2005-07-19" "2005-07-20" ...
.. ..$ : NULL
$ call : language nnet.formula(formula = nxtCl ~ ., data = miData.train, inout = T, size = 10, decay = 0.001, maxit = 10000)
$ terms : language nxtCl ~ Op + Hi + Lo + Cl + vul + smaten + smafif + smath + vol + rsi + dvi
$ coefnames : chr [1:11] "Op" "Hi" "Lo" "Cl" ...
$ xlevels : Named list()
- attr(*, "class")= chr [1:2] "nnet.formula" "nnet"
I then try to run the prediction function using this model nn and the data I kept separate miData.test using the following function:
preds <- predict(object=nn, miData.test)
and I get the following error:
Error in terms.default(object, data = data) :
no terms component nor attribute
Running terms.default on miData.test I see that my data frame does not have any attributes:
terms.default(miData.test)
Error in terms.default(miData.test) : no terms component nor attribute
but is this why the prediction will not run?
miData.test has names that match the terms of nn:
> nn$terms
nxtCl ~ Op + Hi + Lo + Cl + vul + smaten + smafif + smath + vol +
rsi + dvi
> names(miData.test)[1] "Op" "Hi" "Lo" "Cl" "vul" "smaten" "smafif" "smath" "vol" "rsi" "dvi" "nxtCl"
And, in terms of structure, the data is exactly the same as that which was used to build nn in the first place. I tried adding my own named attributes to miData.test, matching the terms of nn but that did not work. The str() of miData.test returns:
> str(miData.test)
'data.frame': 400 obs. of 12 variables:
$ Op : num 82.2 83.5 80.2 79.8 79.8 ...
$ Hi : num 83.8 84.2 83 79.9 80.2 ...
$ Lo : num 81 82.7 79.2 78.3 78 ...
$ Cl : num 83.7 82.8 79.2 79 78.2 ...
$ vul : num 4.69e+08 2.94e+08 4.79e+08 3.63e+08 3.17e+08 ...
$ smaten: num 84.1 84.1 83.8 83.3 82.8 ...
$ smafif: num 86.9 86.8 86.7 86.6 86.4 ...
$ smath : num 111 111 111 110 110 ...
$ vol : num 0.335 0.341 0.401 0.402 0.382 ...
$ rsi : num 45.7 43.6 36.6 36.3 34.7 ...
$ dvi : num 0.00968 0.00306 -0.01575 -0.01189 -0.00623 ...
$ nxtCl : num 82.8 79.2 79 78.2 77.4 ...
Any help or insight in getting predict() to work in this instance would be greatly appreciated. Thanks.
Here's some reproducible code. In putting this together, I have 'removed' the error. Unfortunately, although it now works, I am none the wiser as to what was causing the problem before:
require(quantstrat)
require(PerformanceAnalytics)
require(nnet)
initDate <- "2004-09-30"
endDate <- "2010-09-30"
symbols <- c("SPY")
getSymbols(symbols, from=initDate, to=endDate, index.class=c("POSIXt","POSIXct"))
rsi <- RSI(Cl(SPY))
smaTen <- SMA(Cl(SPY))
smaFif <- SMA(Cl(SPY),n=50)
nxtCl <- lag(Cl(SPY),-1)
tmp <- SPY[,-5]
tmp <- tmp[,-5]
miData <- merge(tmp,rsi,smaTen,smaFif,nxtCl)
names(miData) <- c("Op","Hi","Lo","Cl","rsi","smaTen","smaFif","nxtCl")
miData <- miData[50:1512]
scaled.miData <- scale(miData)
miData.train <- as.data.frame(scaled.miData[1:1000])
miData.test <- as.data.frame(scaled.miData[1001:1463])
nn <- nnet(nxtCl ~ .,data=miData.train,linout=T,size=10,decay=0.001,maxit=10000)
preds <- predict(object=nn, miData.test)