changing reference value restricted cubic spline inside a function in r - r

I cannot force the rc splines prediction reference to change when it is inside a function. I want the age 65 to be the reference (yhat=1, lower=1, upper=1). It works perfectly when the code is outside the function "analysis". I suspect that the "update" doesn't take place when it is inside the function, even though the dd$limits does change to reference 65 (see output). Probably different environment? I tried for hours to set environment etc, unfortunately without success. Any help will be highly appreciated!
library(Hmisc)
library(survival)
library(rms)
library(cmprsk)
bk.tst <- function(analysis)
{
if (analysis=="test") {
dt<-m
}
outcomes<-c("compos", "acs", "death", "vascdeath", "stroke", "majbleed")
for (i in outcomes) {
fvl<- dt$fvl
age<-dt$age
ndt <- data.frame(age, fvl)
dd<-assign('dd', datadist(ndt), pos=1) 
options(datadist='dd')
tm<-paste("tt",i, sep="")
SurvObj <- with(dt, Surv(eval(parse(text=tm)), eval(parse(text=i))==1))
f<-cph(SurvObj ~ fvl*rcs(age,c(60,70,80)), type="Survival", method="exact", x=T, y=T)
print(dd$limits)
dd$limits["Adjust to","age"] <- 65
print(dd$limits)
g <- update(f)
ano<-anova(f)
age.intr.rcsplines<-Predict(g, age=45:85, fvl, ref.zero=TRUE, fun=exp)
print(age.intr.rcsplines[20:22,])
}
}
bk.tst("test")
#the output is:
age fvl
Low:effect 57.01027 0
Adjust to 64.62697 0
High:effect 71.69884 1
Low:prediction 39.48545 0
High:prediction 84.64122 1
Low 35.80287 0
High 92.15606 1
age fvl
Low:effect 57.01027 0
Adjust to 65.00000 0
High:effect 71.69884 1
Low:prediction 39.48545 0
High:prediction 84.64122 1
Low 35.80287 0
High 92.15606 1
age fvl yhat lower upper
20 64 0 0.986740 0.9599284 1.014300
21 65 0 1.008632 0.9929697 1.024541
22 66 0 1.034987 0.9799775 1.093084
# when the code is outside the function analysis, then the output is correct with 65 as a reference. That is what I want to happen also inside the function "analysis":
age fvl
Low:effect 57.01027 0
Adjust to 64.62697 0
High:effect 71.69884 1
Low:prediction 39.48545 0
High:prediction 84.64122 1
Low 35.80287 0
High 92.15606 1
age fvl
Low:effect 57.01027 0
Adjust to 65.00000 0
High:effect 71.69884 1
Low:prediction 39.48545 0
High:prediction 84.64122 1
Low 35.80287 0
High 92.15606 1
age fvl yhat lower upper
20 64 0 0.9782956 0.9369436 1.021473
21 65 0 1.0000000 1.0000000 1.000000
22 66 0 1.0261294 0.9868852 1.066934

Finally, defining the environment in the function bk.tst as shown below worked!
bk.tst <- function(analysis, env = parent.frame())
{
if (analysis=="test") {
dt<-m
}
outcomes<-c("compos", "acs", "death", "vascdeath", "stroke", "majbleed")
for (i in outcomes) {
fvl<- dt$fvl
age<-dt$age
env$ndt <- data.frame(age, fvl)
env$dd<-datadist(env$ndt) 
options(datadist='dd')
tm<-paste("tt",i, sep="")
SurvObj <- with(dt, Surv(eval(parse(text=tm)), eval(parse(text=i))==1))
env$f<-cph(SurvObj ~ fvl*rcs(age,c(60,70,80)), type="Survival", method="exact", x=T, y=T)
print(env$dd$limits)
env$dd$limits["Adjust to","age"] <- 65
print(env$dd$limits)
g <- update(env$f)
ano<-anova(g)
age.intr.rcsplines<-Predict(g, age=45:85, fvl, ref.zero=TRUE, fun=exp)
print(age.intr.rcsplines[20:22,])
}
}
bk.tst("test")

Related

"variable lengths differ" error while running regressions in a loop

I am trying to run a regression loop based on code that I have found in a previous answer (How to Loop/Repeat a Linear Regression in R) but I keep getting an error. My outcomes (dependent) are 940 variables (metabolites) and my exposure (independent) are "bmi","Age", "sex","lpa2c", and "smoking". where BMI and Age are continuous. BMI is the mean exposure, and for others, I am controlling for them.
So I'm testing the effect of BMI on 940 metabolites.
Also, I would like to know how I can extract coefficient, p-value, standard error, and confidence interval for BMI only and when it is significant.
This is the code I have used:
y<- c(1653:2592) # response
x1<- c("bmi","Age", "sex","lpa2c", "smoking") # predictor
for (i in x1){
model <- lm(paste("y ~", i[[1]]), data= QBB_clean)
print(summary(model))
}
And this is the error:
Error in model.frame.default(formula = paste("y ~", i[[1]]), data = QBB_clean, :
variable lengths differ (found for 'bmi').
y1 y2 y3 y4 bmi age sex lpa2c smoking
1 0.2875775201 0.59998896 0.238726027 0.784575267 24 18 1 0.470681834 1
2 0.7883051354 0.33282354 0.962358936 0.009429905 12 20 0 0.365845473 1
3 0.4089769218 0.48861303 0.601365726 0.779065883 18 15 0 0.121272054 0
4 0.8830174040 0.95447383 0.515029727 0.729390652 16 21 0 0.046993681 0
5 0.9404672843 0.48290240 0.402573342 0.630131853 18 28 1 0.262796304 1
6 0.0455564994 0.89035022 0.880246541 0.480910830 13 13 0 0.968641168 1
7 0.5281054880 0.91443819 0.364091865 0.156636851 11 12 0 0.488495482 1
8 0.8924190444 0.60873498 0.288239281 0.008215520 21 23 0 0.477822030 0
9 0.5514350145 0.41068978 0.170645235 0.452458394 18 17 1 0.748792881 0
10 0.4566147353 0.14709469 0.172171746 0.492293329 20 15 1 0.667640231 1
If you want to loop over responses you will want something like this:
respvars <- names(QBB_clean[1653:2592])
predvars <- c("bmi","Age", "sex","lpa2c", "smoking")
results <- list()
for (v in respvars) {
form <- reformulate(predvars, response = v)
results[[v]] <- lm(form, data = QBB_clean)
}
You can then print the results with something like lapply(results, summary), extract coefficients, etc.. (I have a little trouble seeing how it's going to be useful to just print the results of 940 regressions ... are you really going to inspect them all?
If you want coefficients etc. for BMI, I think this should work (not tested):
t(sapply(results, function(m) coef(summary(m))["bmi",]))
Or for coefficients:
t(sapply(results, function(m) confint(m)["bmi",]))

Implementing XGBoost on Methyl450k data set in R

I'm attempting to implement a the XGBoost on a Methyl450k data set. The data has approximately 480000+ specific CpG sites with subsequent beta values between 0 and 1. Here is a look at the data (sample 10 columns with response):
cg13869341 cg14008030 cg12045430 cg20826792 cg00381604 cg20253340 cg21870274 cg03130891 cg24335620 cg16162899 response
1 0.8612869 0.6958909 0.07918330 0.10816711 0.03484078 0.4875475 0.7475878 0.11051578 0.7120003 0.8453396 0
2 0.8337106 0.6276754 0.09811698 0.08934333 0.03348864 0.6300766 0.7753453 0.08652890 0.6465146 0.8137132 0
3 0.8516102 0.6575332 0.13310207 0.07990076 0.04195286 0.4325115 0.7257208 0.14334007 0.7384455 0.8054013 0
4 0.8970384 0.6955810 0.08134887 0.08950676 0.03578006 0.4711689 0.7214661 0.08299838 0.7718571 0.8151683 0
5 0.8562323 0.7204416 0.08078766 0.14902533 0.04274820 0.4769631 0.8034706 0.16473891 0.7143823 0.8475410 0
6 0.8613325 0.6527599 0.10158672 0.15459204 0.04839691 0.4805285 0.8004808 0.12598627 0.8218743 0.8222552 0
7 0.9168869 0.5963966 0.11457045 0.13245761 0.03720798 0.5067649 0.6806004 0.13601034 0.7063457 0.8509160 0
8 0.9002366 0.6898320 0.07029171 0.07158694 0.03875135 0.7065322 0.8167016 0.15394095 0.7226098 0.8310477 0
9 0.8876504 0.6172154 0.13511072 0.15276686 0.06149520 0.5642073 0.7177438 0.14752285 0.6846876 0.8360360 0
10 0.8992898 0.6361644 0.15423780 0.19111275 0.05700406 0.4941239 0.7819968 0.10109936 0.6680640 0.8504023 0
11 0.8997905 0.5906462 0.10411472 0.15006796 0.04157008 0.4931531 0.7857664 0.13430963 0.6946644 0.8326747 0
12 0.9009607 0.6721858 0.09081460 0.11057752 0.05824153 0.4683763 0.7655608 0.01755990 0.7113345 0.8346149 0
13 0.9036750 0.6313643 0.07477824 0.12089404 0.04738597 0.5502747 0.7520128 0.16332395 0.7036665 0.8564414 0
14 0.8420276 0.6265071 0.15351674 0.13647090 0.04901864 0.5037902 0.7446693 0.10534171 0.7727812 0.8317943 0
15 0.8995276 0.6515500 0.09214429 0.08973162 0.04231420 0.5071999 0.7484940 0.21822470 0.6859165 0.7775508 0
16 0.9071643 0.7945852 0.15809474 0.11264440 0.04793316 0.5256078 0.8425513 0.17150603 0.7581367 0.8271037 0
17 0.8691358 0.6206902 0.11868549 0.15944891 0.03523320 0.4581166 0.8058461 0.11557264 0.6960848 0.8579109 1
18 0.8330247 0.7030860 0.12832663 0.12936172 0.03534059 0.4687507 0.7630222 0.12176819 0.7179690 0.8775521 1
19 0.9015574 0.6592869 0.12693119 0.14671845 0.03819418 0.4395692 0.7420882 0.10293369 0.7047038 0.8435531 1
20 0.8568249 0.6762936 0.18220218 0.10123198 0.04963466 0.5781550 0.6324743 0.06676272 0.6805745 0.8291353 1
21 0.8799152 0.6736554 0.15056617 0.16070673 0.04944037 0.4015415 0.4587438 0.10392791 0.7467060 0.7396137 1
22 0.8730770 0.6663321 0.10802390 0.14481460 0.04448009 0.5177664 0.6682854 0.16747621 0.7161234 0.8309462 1
23 0.9359656 0.7401368 0.16730300 0.11842173 0.03388908 0.4906018 0.5730439 0.15970761 0.7904663 0.8136450 1
24 0.9320397 0.6978085 0.10474803 0.10607080 0.03268366 0.5362214 0.7832729 0.15564091 0.7171350 0.8511477 1
25 0.8444256 0.7516799 0.16767449 0.12025258 0.04426417 0.5040725 0.6950104 0.16010829 0.7026808 0.8800469 1
26 0.8692707 0.7016945 0.10123979 0.09430876 0.04037325 0.4877716 0.7053603 0.09539885 0.8316933 0.8165352 1
27 0.8738410 0.6230674 0.12793232 0.14837137 0.04878595 0.4335648 0.6547601 0.13714725 0.6944921 0.8788708 1
28 0.9041870 0.6201079 0.12490195 0.16227251 0.04812720 0.4845896 0.6619842 0.13093443 0.7415606 0.8479339 1
29 0.8618622 0.7060291 0.09453812 0.14068246 0.04799782 0.5474036 0.6088231 0.23338428 0.6772588 0.7795908 1
30 0.8776350 0.7132561 0.12100425 0.17367148 0.04399987 0.5661632 0.6905305 0.12971867 0.6788903 0.8198201 1
31 0.9134456 0.7249370 0.07144695 0.08759897 0.04864476 0.6682650 0.7445900 0.16374150 0.7322691 0.8071598 1
32 0.8706637 0.6743936 0.15291891 0.11422262 0.04284591 0.5268217 0.7207478 0.14296945 0.7574967 0.8609048 1
33 0.8821504 0.6845216 0.12004074 0.14009196 0.05527732 0.5677475 0.6379840 0.14122421 0.7090634 0.8386022 1
34 0.9061180 0.5989445 0.09160787 0.14325261 0.05142950 0.5399465 0.6718870 0.08454002 0.6709083 0.8264233 1
35 0.8453511 0.6759766 0.13345672 0.16310764 0.05107034 0.4666146 0.7343603 0.12733287 0.7062292 0.8471812 1
36 0.9004188 0.6114532 0.11837118 0.14667433 0.05050403 0.4975502 0.7258132 0.14894363 0.7195090 0.8382364 1
37 0.9051729 0.6652954 0.15153241 0.14571184 0.05026702 0.4855397 0.7226850 0.12179138 0.7430388 0.8342340 1
38 0.9112012 0.6314450 0.12681305 0.16328649 0.04076789 0.5382251 0.7404122 0.13971506 0.6607798 0.8657917 1
39 0.8407927 0.7148585 0.12792107 0.15447060 0.05287096 0.6798039 0.7182050 0.06549068 0.7433669 0.7948445 1
40 0.8554747 0.7356683 0.22698080 0.21692162 0.05365043 0.4496654 0.7353112 0.13341649 0.8032266 0.7883068 1
41 0.8535359 0.5729331 0.14392737 0.16612463 0.04651752 0.5228045 0.7397588 0.09967424 0.7906682 0.8384434 1
42 0.8059968 0.7148594 0.16774123 0.19006840 0.04990847 0.5929818 0.7011064 0.17921090 0.8121909 0.8481069 1
43 0.8856906 0.6987405 0.19262137 0.18327412 0.04816967 0.4340002 0.6569263 0.13724290 0.7600389 0.7788117 1
44 0.8888717 0.6760166 0.17025712 0.21906969 0.04812641 0.4173613 0.7927178 0.17458413 0.6806101 0.8297604 1
45 0.8691575 0.6682723 0.11932277 0.13669098 0.04014911 0.4680455 0.6186511 0.10002737 0.8012731 0.7177891 1
46 0.9148742 0.7797494 0.13313955 0.15166151 0.03934042 0.4818276 0.7484973 0.16354624 0.6979735 0.8164431 1
47 0.9226736 0.7211714 0.08036409 0.10395457 0.04063595 0.4014187 0.8026643 0.17762644 0.7194800 0.8156545 1
I've attempted to implement the algorithm in R but I'm continuing to get errors.
Attempt:
> train <- beta_values1_updated[training1, ]
> test <- beta_values1_updated[-training1, ]
> labels <- train$response
> ts_label <- test$response
> new_tr <- model.matrix(~.+0,data = train[,-c("response"),with=F])
Error in `[.data.frame`(train, , -c("response"), with = F) :
unused argument (with = F)
> new_ts <- model.matrix(~.+0,data = test[,-c("response"),with=F])
Error in `[.data.frame`(test, , -c("response"), with = F) :
unused argument (with = F)
I am attempting to follow the tutorial here:
https://www.hackerearth.com/practice/machine-learning/machine-learning-algorithms/beginners-tutorial-on-xgboost-parameter-tuning-r/tutorial/
Any insight as to how I could correctly implement the XGBoost algorithm would be greatly appreciated.
Edit:
I'm adding additional code to show the point in the tutorial where I get stuck:
train<-data.table(train)
test<-data.table(test)
new_tr <- model.matrix(~.+0,data = train[,-c("response"),with=F])
new_ts <- model.matrix(~.+0,data = test[,-c("response"),with=F])
#convert factor to numeric
labels <- as.numeric(labels)-1
ts_label <- as.numeric(ts_label)-1
#preparing matrix
dtrain <- xgb.DMatrix(data = new_tr,label = labels)
#preparing matrix
dtrain <- xgb.DMatrix(data = new_tr,label = labels)
dtest <- xgb.DMatrix(data = new_ts,label=ts_label)
params <- list(booster = "gbtree", objective = "binary:logistic", eta=0.3, gamma=0, max_depth=6, min_child_weight=1, subsample=1, colsample_bytree=1)
xgbcv <- xgb.cv( params = params, data = dtrain, nrounds = 100, nfold = 5, showsd = T, stratified = T, print.every.n = 10, early.stop.round = 20, maximize = F)
[1] train-error:0.000000+0.000000 test-error:0.000000+0.000000
Multiple eval metrics are present. Will use test_error for early stopping.
Will train until test_error hasn't improved in 20 rounds.
[11] train-error:0.000000+0.000000 test-error:0.000000+0.000000
[21] train-error:0.000000+0.000000 test-error:0.000000+0.000000
Stopping. Best iteration:
[1] train-error:0.000000+0.000000 test-error:0.000000+0.000000
Warning messages:
1: 'print.every.n' is deprecated.
Use 'print_every_n' instead.
See help("Deprecated") and help("xgboost-deprecated").
2: 'early.stop.round' is deprecated.
Use 'early_stopping_rounds' instead.
See help("Deprecated") and help("xgboost-deprecated").
The author of the tutorial is using the data.table package. As you can read here, using with = F is sometimes used to get a single column. Make sure you have loaded and installed data.table and other packages to follow the tutorial. Also, make sure your data set is a data.table object.

modulus values (roots) in VECM model using R?

thanks for reading my question. I am trying to fit a VECM for an economic research, i am using the vars and urca package on R using Rstudio. Considering i have no stationary time series, and both need one difference ,both are I(1), i need to use the VECM approach, but i can not get all the tests i need.
For example:
First i load the libraries
library(vars)
library(urca)
and create my model
data("Canada")
df <- Canada
VARselect(df)
vecm <- urca::ca.jo(df,K = 3)
model <- vec2var(vecm)
The problem is, i can not get the "modules" values to prove stability, i know i can use roots() function to get this values from a "varest" object, for example:
roots(VAR(df,3))
My question is:
how can i get modulus from my vec2var object, roots() doesn't handle this kind of object. I know Gretl can do it (using unit circle to prove stability), so is posible to get this values from a VECM?. How can i do it in R?
Starting with:
data("Canada")
dim(Canada) #84observations x 4 variables
VARselect(Canada) # since in small samples, AIC>BIC; VAR(3) is chosen.
Now, the range of the dataset Canada: 1980.1 - 2000.4 (20 years) is long enough for modeling. This 20-year long period definitely includes lots of crises and interventions. Hence, structural breaks in the data MUST be searched. This is necessary since in structurally-broken series, the existence of SBs changes t values of nonstationarity tests (thereby affects the decision on whether a series is stationary or not).
Since Narayan-Popp 2010 nonstationarity test under multiple structural breaks is statistically very powerful against previous ones (Lee-Strazichic2003, Zivot-Andres1992), and since Joyeux 2007 (in Rao2007) has proven the illogicalness of these previous tests, and NP2013 has proven the superiority of NP2010's statistical power, one MUST use NP2010. Since Gauss code for NP2010 seemed to be ugly to me, I converted it to R code, and with the help of ggplot2, results are presented nicer.
[Processing structural breaks is a MUST for cointegration check as well since Osterwald-Lenum1992 CVs ignore SBs whereas Johansen-Mosconi-Nielsen2000 CVs cares SBs.]
Canada <- as.data.frame(Canada)
head(Canada)
e prod rw U
1 929.6105 405.3665 386.1361 7.53
2 929.8040 404.6398 388.1358 7.70
...................................
# Assign lexiographic row names for dates of observations
row.names(Canada) <- paste(sort(rep(seq(1980, 2000, 1), 4) ), rep(seq(1, 4, 1), 20), sep = ".")
# Insert lexiographic "date" column to the dataframe. This is necessary for creating intervention dummies.
DCanada <- data.frame(date=row.names(Canada),Canada) # dataset with obs dates in a column
head(DCanada)
date e prod rw U
1980.1 1980.1 929.6105 405.3665 386.1361 7.53
1980.2 1980.2 929.8040 404.6398 388.1358 7.70
Perform Narayan-Popp 2010 nonstationarity test to the series:
[H0: "(with 2 structural breaks) series is nonstationary";
H1: "(with 2 structural breaks) series is stationary";
"test stat > critical value" => "hold H0"; "test stat < critical value" => "hold H1"]
library(causfinder)
narayanpopp(DCanada[,2]) # for e
narayanpopp(DCanada[,3]) # for prod
narayanpopp(DCanada[,4]) # for rw
narayanpopp(DCanada[,5]) # for U
Narayan-Popp 2010 nonstationarity test results (with obs #s):
variable t stat lag SB1 SB2 Integration Order
e -4.164 2 37:946.86 43:948.03 I(1)
prod -3.325 1 24:406.77 44:405.43 I(1)
rw -5.087 0 36:436.15 44:446.96 I(0) <trend-stationary>
U -5.737 1 43:8.169 53:11.070 I(0) <stationary pattern> (M2 computationally singular; used M1 model)
(critical values (M2): (1%,5%,10%): -5.576 -4.937 -4.596)
(critical values (M1): (1%,5%,10%): -4.958 -4.316 -3.980
Since in a VAR structure, all variables are treated equally, continue to equal-treatment when determining structural breaks systemwise:
mean(c(37,24,36,43)) # 35; SB1 of system=1988.3
mean(c(43,44,44,53)) # 46; SB2 of system=1990.2
The following is to overcome "In Ops.factor(left, right) : >= not meaningful for factors" error. In some dataset, we need to do the following:
library(readxl)
write.xlsx(Canada, file="data.xlsx", row.names=FALSE) # Take this to the below folder, add "date" column with values 1980.1,....,2000.4
mydata <- read_excel("D://eKitap//RAO 2007 Cointegration for the applied economist 2E//JoyeuxCalisma//Canada//data.xlsx")
# arrange your path accordingly in the above line.
mydata <- as.data.frame(mydata)
library(lubridate); library(zoo)
row.names(mydata) <- as.yearqtr(seq(ymd('1980-01-01'), by = '1 quarter', length.out=(84)))
Dmydata <- mydata # Hold it in a variable
Define intervention dummy matrix with 2 SBs (35:1988.3 and 46: 1990.2) as follows:
library(data.table)
DataTable <- data.table(Dmydata, keep.rownames=FALSE)
Dt <- cbind("bir"=1, # intervention dummies matrix
"D2t" = as.numeric(ifelse( DataTable[,c("date"), with=FALSE] >= "1988.3" & DataTable[,c("date"), with=FALSE] <= "1990.1", 1 , 0)),
"D3t" = as.numeric(ifelse( DataTable[,c("date"), with=FALSE] >= "1990.2" & DataTable[,c("date"), with=FALSE] <= "2000.4", 1 , 0)))
On the fly indicator variables accompanying intervention dummies:
OnTheFlyIndicator <- cbind(
"I2t" = as.numeric(DataTable[, c("date"), with=FALSE] == "1988.3"),
"I3t" = as.numeric(DataTable[, c("date"), with=FALSE] == "1990.2"))
myTimeTrend <- as.matrix(cbind("TimeTrend" = as.numeric(1:nrow(Dt))))
zyDt <- Dt * as.vector(myTimeTrend) # TimeTrendDavranisDegisimleri
colnames(zyDt) <- paste(colnames(myTimeTrend), colnames(Dt), sep="*")
mydata <- mydata[,-1]
Selection of VAR order:
library(vars)
# Lag order selection with the effects of intervention dummies
VARselect(mydata, lag.max=5, "both", exogen=cbind(zyDt[drop=FALSE], Dt[drop=FALSE], OnTheFlyIndicator)) # Take VAR(3)
Lagger matrix for Joyeux2007 indexing technique:
lagmatrix <- function(x, maxlag){
x <- as.matrix(x)
if(is.null(colnames(x))== TRUE){ colnames(x) <- "VarCol0" }
DondurulenDizey <- embed(c(rep(NA,maxlag),x),maxlag+1)
dimnames(DondurulenDizey)[[2]] <- c(colnames(x)[1, drop = FALSE], paste(colnames(x)[1,drop=FALSE],".",1:maxlag,"l", sep = ""))
return(DondurulenDizey)
}
Assign VAR lag and no. of subsamples:
VARlag <- 3
Subsamples <- 3 # subsamples = no. of str breaks +1
Dummy matrix for 2 structural breaks:
dummymatrix2SB <- matrix(NA,DataTable[,.N], 10)
dummymatrix2SB <- cbind(myTimeTrend,
lagmatrix(zyDt[,c("TimeTrend*D2t"), drop=FALSE], maxlag=VARlag)[,1+VARlag, drop=FALSE],
lagmatrix(zyDt[,c("TimeTrend*D3t"), drop=FALSE], maxlag=VARlag)[,1+VARlag, drop=FALSE],
lagmatrix(Dt[,c("D2t"), drop=FALSE], maxlag=VARlag)[,1+VARlag, drop=FALSE],
lagmatrix(Dt[,c("D3t"), drop=FALSE], maxlag=VARlag)[,1+VARlag, drop=FALSE],
lagmatrix(OnTheFlyIndicator[,c("I2t"), drop=FALSE], maxlag=VARlag-1),
lagmatrix(OnTheFlyIndicator[,c("I3t"), drop=FALSE], maxlag=VARlag-1))
dummymatrix2SB[is.na(dummymatrix2SB)] <- 0 # replace NAs with 0
dummymatrix2SB # Print dummy matrix for 2 str breaks to make sure all are OK
TimeTrend TimeTrend.D2t.3l TimeTrend.D3t.3l D2t.3l D3t.3l I2t I2t.1l I2t.2l I3t I3t.1l I3t.2l
1 0 0 0 0 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0 0 0
...........................................
34 0 0 0 0 0 0 0 0 0 0
35 0 0 0 0 1 0 0 0 0 0
36 0 0 0 0 0 1 0 0 0 0
37 0 0 0 0 0 0 1 0 0 0
38 35 0 1 0 0 0 0 0 0 0
39 36 0 1 0 0 0 0 0 0 0
40 37 0 1 0 0 0 0 0 0 0
41 38 0 1 0 0 0 0 0 0 0
42 39 0 1 0 0 0 0 1 0 0
43 40 0 1 0 0 0 0 0 1 0
44 41 0 1 0 0 0 0 0 0 1
45 0 42 0 1 0 0 0 0 0 0
46 0 43 0 1 0 0 0 0 0 0
............................................
83 0 80 0 1 0 0 0 0 0 0
84 0 81 0 1 0 0 0 0 0 0
STABILITY of VAR:
Victor, theoretically you are wrong. Stability is checked from VAR side even in the case of restricted (cointegrated) VAR models. See Joyeux2007 for details. Also, estimations from both sides are same:
"unrestricted VAR = unrestricted VECM" and
"restricted VAR = restricted VECM".
Hence, checking stability of unrestricted VAR is equal to checking stability of unrestricted VECM, and vice versa. They are equal math'ly, they are just different representations.
Also, checking stability of restricted VAR is equal to checking stability of restricted VECM, and vice versa. They are equal math'ly, they are just different representations. But, you do not need this checking for restricted VECM cases since we are surfing in subspace of a feasible VAR. That is to say, if original unr VAR corresponding to restd VeCM is stable, then all are OK.
If your series are cointegrated, you check the stability from VAR side even in that case! If you wonder "whether you should check stability for restricted VECM", the answer is NO. You should not check. Because, in cointegrated case, you are in the subspace of feasible solution. That said, if you insist to check stability of restricted (cointegrated) VECM, you can still do that via urca::ca.jo extentions and vars::vec2var extentions:
print(roots(VAR(mydata, p=3, "both", exogen=cbind(zyDt[drop=FALSE], Dt[drop=FALSE], OnTheFlyIndicator)), modulus=TRUE))
# [1] 0.96132524 0.77923543 0.68689517 0.68689517 0.67578368 0.67578368
[7] 0.59065419 0.59065419 0.55983617 0.55983617 0.33700725 0.09363846
print(max(roots(VAR(mydata, p=3, "both", exogen=cbind(zyDt[drop=FALSE], Dt[drop=FALSE], OnTheFlyIndicator)), modulus=TRUE)))
#0.9613252
(optional) Check stability via OLS-CUSUM:
plot(stability(VAR(mydata, p=3, "both", exogen=cbind(zyDt[drop=FALSE], Dt[drop=FALSE], OnTheFlyIndicator)), type="OLS-CUSUM"))
NON-AUTOCORRELATION of VAR residuals test:
for (j in as.integer(1:5)){
print(paste("VAR's lag no:", j))
print(serial.test(VAR(mydata, p=j, "both", exogen=cbind(zyDt[drop=FALSE], Dt[drop=FALSE], OnTheFlyIndicator)), lags.bg=4, type= c("ES")))
# lags.bg: AR order of VAR residuals
}
NORMALITY of VAR residuals test:
print(normality.test(VAR(mydata, p=3, "both", exogen=cbind(zyDt[drop=FALSE], Dt[drop=FALSE], OnTheFlyIndicator)), multivariate=TRUE))
library(normtest)
for (i in as.integer(1:4)){ # there are 4 variables
print(skewness.norm.test(resid(VAR(mydata, p=3, "both", exogen=cbind(zyDt[drop=FALSE], Dt[drop=FALSE], OnTheFlyIndicator)))[,i]))
print(kurtosis.norm.test(resid(VAR(mydata, p=3, "both", exogen=cbind(zyDt[drop=FALSE], Dt[drop=FALSE], OnTheFlyIndicator)))[,i]))
print(jb.norm.test(resid(VAR(mydata, p=3, "both", exogen=cbind(zyDt[drop=FALSE], Dt[drop=FALSE], OnTheFlyIndicator)))[,i]))
}
HOMOSCEDASTICITY of VAR residuals test:
print(arch.test(VAR(mydata, p=3, "both", exogen=cbind(zyDt[drop=FALSE], Dt[drop=FALSE], OnTheFlyIndicator))), lags.multi=6, multivariate.only=TRUE)
Since integration orders of series is different, there is no way that they are cointegrated. That said,
Assume for a while all are I(1) and perform cointegration test with multiple structural breaks with Johansen-Mosconi-Nielsen 2000 CVs:
(extend urca::cajo to causfinder::ykJohEsbInc (i.e., add the functionality to process 1 SB and 2 SBs))
summary(ykJohEsbInc(mydata, type="trace", ecdet="zamanda2yk", K=3, spec="longrun", dumvar=dummymatrix2SB[,c(-1,-2,-3)]))
# summary(ykJohEsbInc(mydata, type="trace", ecdet="zamanda2yk", K=3, spec="transitory", dumvar=dummymatrix2SB[,c(-1,-2,-3)])) gives the exactly same result.
Since there are 2 SBs in the system (1988.3, 1990.2), there are q=2+1=3 subsamples.
1st SB ratio: v1= (35-1)/84= 0.4047619
2nd SB ratio:v2= (46-1)/84= 0.5357143
Hence, JMN2000 CVs for cointegration test with 2 SBs:
(The following is TR-localized. One can find original EN-local code in Giles website)
library(gplots)
# Johansen vd. (2000) nin buldugu, yapisal kirilmalarin varliginda esbutunlesim incelemesinin degistirilmis iz sinamalarinin yanasik p degerleri ve karar degerlerini hesaplama kodu
# Ryan Godwin & David Giles (Dept. of Economics, Univesity of Victoria, Canada), 29.06.2011
# Kullanici asagidaki 4 degeri atamalidir
#======================================
degiskensayisi <- 4 # p
q<- 3 # q: verideki farkli donemlerin sayisi; q=1: 1 donem, hicbir yapisal kirilma yok demek oldugundan v1 ve v2 nin degerleri ihmal edilir
v1<- 0.4047619 # (35-1)/84 # 1.yk anı=34+1=35. Johansen et. al 2000 v1 def'n , v1: SB1 - 1
v2<- 0.5357143 # (46-1)/84 # 2nd SB moment 45+1=46.
#======================================
# iz istatistiginin biri veya her ikisi icin p degerlerinin olmasi istendiginde, sonraki 2 satirin biri veya her ikisini degistir
izZ <- 15.09 # Vz(r) istatistiginin degeri
izK <- 114.7 # Vk(r) istatistiginin degeri
#=========================================
enbuyuk_p_r<- degiskensayisi # "p-r > 10" olmasın; bkz: Johansen vd. (2000)
# "a" ve "b" nin değerleri yapısal kırılmaların sayısına (q-1) bağlıdır
# q=1 iken, hiçbir yapısal kırılma olmadığı bu durumda a=b=0 ata
# q=2 iken, 1 yapısal kırılma olduğu bu durumda a=0 (Johansen vd. 2000 4.Tabloda) ve b=min[V1 , (1-V1)] ata
# q=3 iken, 2 yapısal kırılma olduğu bu durumda a=min[V1, (V2-V1), (1-V2)] ve b=min[geriye kalan iki V ifadesi] ata
a = c(0, 0, min(v1, v2-v1, 1-v2))[q]
b = c(0, min(v1, 1-v1), median(c(v1,v2-v1,1-v2)))[q]
# YanDagOrtLog: yanaşık dağılımın ortalamasının logaritması
# YanDagDegLog: yanaşık dağılımın değişmesinin logaritması
# V(Zamanyönsemsi) veya V(Kesme) sınamalarını yansıtmak üzere adlara z veya k ekle.
# Bkz. Johansen vd. (2000) 4. Tablo.
# Önce Vz(r) sınamasının sonra Vk(r) sınamasının karar değerlerini oluştur
pr<- c(1:enbuyuk_p_r)
YanDagOrtLogZ <- 3.06+0.456*pr+1.47*a+0.993*b-0.0269*pr^2-0.0363*a*pr-0.0195*b*pr-4.21*a^2-2.35*b^2+0.000840*pr^3+6.01*a^3-1.33*a^2*b+2.04*b^3-2.05/pr-0.304*a/pr+1.06*b/pr
+9.35*a^2/pr+3.82*a*b/pr+2.12*b^2/pr-22.8*a^3/pr-7.15*a*b^2/pr-4.95*b^3/pr+0.681/pr^2-0.828*b/pr^2-5.43*a^2/pr^2+13.1*a^3/pr^2+1.5*b^3/pr^2
YanDagDegLogZ <- 3.97+0.314*pr+1.79*a+0.256*b-0.00898*pr^2-0.0688*a*pr-4.08*a^2+4.75*a^3-0.587*b^3-2.47/pr+1.62*a/pr+3.13*b/pr-4.52*a^2/pr-1.21*a*b/pr-5.87*b^2/pr+4.89*b^3/pr
+0.874/pr^2-0.865*b/pr^2
OrtalamaZ<- exp(YanDagOrtLogZ)-(3-q)*pr
DegismeZ<- exp(YanDagDegLogZ)-2*(3-q)*pr
# Sinama istatistiginin yanasik dagilimina yaklasmakta kullanilacak Gama dagiliminin sekil ve olcek degiskelerini elde etmek icin yanasik ortalama ve degismeyi kullanarak
# V0 varsayimi altinda istenen quantilelari elde et:
# quantilelar: olasilik dagiliminin araligini veya bir ornekteki gozlemleri, esit olasiliklara sahip birbirlerine bitisik araliklarla bolen kesim noktalari.
tetaZ <- DegismeZ/OrtalamaZ
kZ <- OrtalamaZ^2/DegismeZ
YanDagOrtLogK<- 2.80+0.501*pr+1.43*a+0.399*b-0.0309*pr^2-0.0600*a*pr-5.72*a^2-1.12*a*b-1.70*b^2+0.000974*pr^3+0.168*a^2*pr+6.34*a^3+1.89*a*b^2+1.85*b^3-2.19/pr-0.438*a/pr
+1.79*b/pr+6.03*a^2/pr+3.08*a*b/pr-1.97*b^2/pr-8.08*a^3/pr-5.79*a*b^2/pr+0.717/pr^2-1.29*b/pr^2-1.52*a^2/pr^2+2.87*b^2/pr^2-2.03*b^3/pr^2
YanDagDegLogK<- 3.78+0.346*pr+0.859*a-0.0106*pr^2-0.0339*a*pr-2.35*a^2+3.95*a^3-0.282*b^3-2.73/pr+0.874*a/pr+2.36*b/pr-2.88*a^2/pr-4.44*b^2/pr+4.31*b^3/pr+1.02/pr^2-0.807*b/pr^2
OrtalamaK <- exp(YanDagOrtLogK)-(3-q)*pr
DegismeK <- exp(YanDagDegLogK)-2*(3-q)*pr
# Sinama istatistiginin yanasik dagilimina yaklasmakta kullanilacak Gama dagiliminin sekil ve olcek degiskelerini elde etmek icin yanasik ortalama ve degismeyi kullanarak
# V0 varsayimi altinda istenen quantilelari elde et:
# quantilelar: olasilik dagiliminin araligini veya bir ornekteki gozlemleri, esit olasiliklara sahip birbirlerine bitisik araliklarla bolen kesim noktalari.
tetaK <- DegismeK/OrtalamaK
kK <- OrtalamaK^2/DegismeK
# (izZ veya izK den biri 0 dan farklı ise) karar değerlerini ve p değerlerini tablolaştır:
windows(6,3.8)
KararDegerleri <- cbind(sapply(c(.90,.95,.99) , function(x) sprintf("%.2f",round(c(qgamma(x, shape=kZ,scale=tetaZ)),2))),
sapply(c(.9,.95,.99) , function(x) sprintf("%.2f",round(c(qgamma(x, shape=kK,scale=tetaK)),2))))
colnames(KararDegerleri) <- rep(c(0.90,0.95,0.99),2)
# rownames(KararDegerleri) <- pr
rownames(KararDegerleri) <- c(sapply((degiskensayisi -1):1, function(i) paste(degiskensayisi - i, " ","(r<=", i, ")",sep="")), paste(degiskensayisi, " ( r=0)", sep=""))
textplot(KararDegerleri, cex=1)
text(.064,.91,"p-r",font=2)
text(.345,1,expression(paste(plain(V)[z],"(r) test")),col=2)
text(.821,1,expression(paste(plain(V)[k],"(r) test")),col=4)
title("Yanasik Karar Degerleri \n (p:duzendeki degisken sayisi; r:esbutunlesim ranki)")
if(izZ!=0){
windows(4,3.8)
pDegerleri <- matrix(sprintf("%.3f",round(1 - pgamma(izZ, shape=kZ, scale = tetaZ),3)))
# rownames(pDegerleri) <- pr
rownames(pDegerleri) <- c(sapply((degiskensayisi -1):1, function(i) paste(degiskensayisi - i, " ","(r<=", i, ")",sep="")), paste(degiskensayisi, " ( r=0)", sep=""))
textplot(pDegerleri,cex=1,show.colnames=F)
text(.69,.96,substitute(paste("Pr(",plain(V)[z],">",nn,")"),list(nn=izZ)),col=2)
text(.45,.96,"p-r",font=2)
title("Yanasik p Degerleri \n (p:duzendeki degisken sayisi; \n r:esbutunlesim ranki)")
}
if(izK!=0){
windows(3,3.8)
pDegerleri <- matrix(sprintf("%.3f",round(1 - pgamma(izK, shape=kK, scale = tetaK),3)))
#rownames(pDegerleri) <- pr
rownames(pDegerleri) <- c(sapply((degiskensayisi -1):1, function(i) paste(degiskensayisi - i, " ","(r<=", i, ")",sep="")), paste(degiskensayisi, " ( r=0)", sep=""))
textplot(pDegerleri,cex=1,show.colnames=F)
text(.78,.96,substitute(paste("Pr(",plain(V)[k],">",nn,")"),list(nn=izK)),col=4)
text(.43,.96,"p-r",font=2)
title("Yanasik p Degerleri \n (p:duzendeki degisken sayisi; \n r:esbutunlesim ranki)")
}
Hence, the according to JMN2000 CVs, there is no cointegration as well. So, your usage of vec2var is meaningless. Because, vec2var is needed in cointegrated cases. Again, assume all series are cointegrated to make you happy (to create need to use vec2var) and continue with the most difficult case (cointegration for series with multiple structural breaks); i.e., we are continueing with "One who pee-pees ambitiously drills the wall" logic.
Extend vars::vec2var to causfinder::vec2var_ykJohEsbInc to handle transformations under "multiple structural breaks" case having relevant intervention dummies. JMN2000 application above showed cointegration rank r is not within [1,4-1]=[1,3] range. Even though that assume JMN2000 CVs resulted r=1 in the above for the sake of argument.
So, to transform restricted VECM to restricted VAR (under multiple=2 structural breaks), apply:
vec2var_ykJohEsbInc(ykJohEsbInc(mydata, type="trace", ecdet="zamanda2yk", K=3, spec="longrun", dumvar=dummymatrix2SB[,c(-1,-2,-3)]),r=1)
These results in:
Deterministic coefficients (detcoeffs):
e prod rw U
kesme 22.6612871 -0.215892151 32.0610121 -9.26649249 #(const)
zyonsemesi 0.2505164 -0.009900004 0.3503561 -0.10494714 #(trend)
zy*D2t_3 0.2238060 -0.008844454 0.3130007 -0.09375756
zy*D3t_3 -0.1234803 0.004879743 -0.1726916 0.05172878
$deterministic
kesme zyonsemesi zy*D2t_3 zy*D3t_3 D2t.3l D3t.3l
e 22.6612871 0.250516390 0.223806048 -0.123480327 -8.8012612 5.3052074
prod -0.2158922 -0.009900004 -0.008844454 0.004879743 -0.1157137 -0.3396206
rw 32.0610121 0.350356063 0.313000702 -0.172691620 -12.5838458 7.2201840
U -9.2664925 -0.104947142 -0.093757559 0.051728781 3.5836119 -2.2921099
I2t I2t.1l I2t.2l I3t I3t.1l I3t.2l
e -0.2584379 0.08470453 0.2102661 -0.51366831 -1.0110891 -2.08728944
prod 0.3013044 0.25103445 -0.8640467 0.08804425 -0.2362783 -0.05606892
rw -0.5838161 0.28400182 1.2073483 -0.67760848 -2.2650094 -0.70586316
U 0.1305258 0.03559119 0.1476985 0.14614290 0.6847273 1.27469940
$A
$A$A1
e.1g prod.1g rw.1g U.1g
e 1.4817704 0.1771082 -0.2274936 0.2332402
prod -0.1605790 1.1846699 0.0406294 -0.9398689
rw -0.8366449 -0.1910611 0.9774874 0.4667430
U -0.4245817 -0.1498295 0.1226085 0.7557885
$A$A2
e.2g prod.2g rw.2g U.2g
e -0.8441175 -0.04277845 0.01128282 -0.01896916
prod -0.3909984 -0.25960184 -0.20426749 0.79420691
rw 1.4181448 -0.03659278 -0.12240211 -0.06579174
U 0.4299422 0.09070905 0.04935195 -0.12691817
$A$A3
e.3g prod.3g rw.3g U.3g
e 0.40149641+0i -0.07067529+0i -0.008175418-0i 0.2286283+0i
prod 0.55003024+0i 0.07241639+0i 0.172505474-0i 0.1281593+0i
rw -0.52674826+0i 0.31667695+0i -0.168897398-0i 0.2184591+0i
U -0.02176108-0i 0.03245409-0i -0.077959841+0i 0.1855889-0i
So, now, check roots:
print(roots(vec2var_ykJohEsbInc(ykJohEsbInc(mydata, type="trace", ecdet="zamanda2yk", K=3, spec="longrun", dumvar=dummymatrix2SB[,c(-1,-2,-3)]),r=1), modulus=TRUE))
This result in "Please provide an object of class 'varest', generated by 'VAR()'." since vars::roots was not extended because: we do NOT need this extention! As I said before, even in the case of restricted VECM, stability is checked from VAR side. You must read Joyeux2007 line by line to see this.
I will supply the ouputs (print-screens) of above functions thouroughly for further clarification.
I will also write extention to vars::root as well just for pedagogical reasons.

Plotting Logistic Equation Fit or many variables in ggplot2

I have a logistic model fit, say myfit that I've saved. The data frame I'm using is in the format of (where the first column is the outcome).
medical10 age female nonwhite bmi smoked condxs insuredd smi2d
1 0 60 0 1 29.97 0 0 1 0
2 0 42 0 1 25.85 1 3 1 1
3 0 62 1 0 25.06 0 1 1 0
4 0 62 0 0 36.27 0 2 0 0
5 0 32 0 0 33.36 0 0 1 0
6 0 41 0 0 21.70 1 0 0 0
...
What I would like to do is to make a logistic plot (in this form: http://ww2.coastal.edu/kingw/statistics/R-tutorials/logistic.html) for each combination of variables.
Since there are 8 variables, there are 2^8 permutations of having one variable on the x-axis while holding the other 7 constant. Is there a way I can automate the plot using ggplot2?
For instance, if 'x' was age, I would get the mean of bmi, and then pick 0 for female, 0 for nonwhite, 0 for smoked, 0 for condxs, 0 for insuredd and 0 for smi2d. I would then do a prediction and make a ggplot of x vs y.
However, this is quite tedious and I was hoping there was a better way?
I don't know of anything particular in ggplot that will make this easy. But I did find a way (though it was more work than I was expecting. Perhaps others can improve. Anyway, first let's define a more useful set of sample data
N<-100
set.seed(15)
invlogit <- function(x) exp(x)/(exp(x)+1)
dd <- transform(data.frame(
age=runif(N,30,60),
female=sample(0:1, N, replace=T),
white=sample(c("Y","N"), N, replace=T),
bmi=rnorm(N,30,2)),
medical=as.numeric(invlogit((-60+2*age-1.5*bmi+3*female)/10)>runif(N)))
fit<-glm(medical~. ,dd, family=binomial)
So now we have some data and a model. Now i'll define a helper function that will predict values for a single variable while holding the others at the mean value.
predictone<-function(fit, var, xlim=NULL, fix=list(), n=101,
xname=var, type="response") {
tt <- terms(fit)
vv <- as.list(attr(tt, "variables"))[-c(1,attr(tt, "response")+1)]
vn <- sapply(vv, deparse)
stopifnot(var %in% vn)
others <- vn[vn != var]
def<-lapply(others, function(x) {
if(x %in% names(fix)) {
if(is.factor(val)) {
stopifnot(fix[[x]] %in% levels(val))
val[val==fix[[x]]][1]
} else {
fix[[x]]
}
} else {
val <- fit$data[[x]]
if(is.factor(val)) {
val[val==names(sort(table(val))[1])][1]
} else {
mean(val)
}
}
})
if(is.factor(fit$data[[var]])) {
newdata <- data.frame(def, unique(fit$data[[var]]))
} else {
if(is.null(xlim)) {
xlim <- range(fit$data[[var]])
}
newdata <- data.frame(def, seq(min(xlim), max(xlim), length.out=n))
}
names(newdata)<-c(others, var)
pp<-data.frame(newdata[[var]], predict(fit,newdata, type=type))
names(pp)<-c(xname, type)
attr(pp,"fixed")<-setNames(def, others)
pp
}
Basically this function exists to calculate the averages of all the other variables and then do the actual prediction. We can use it with the test data to make a bunch of plots with
plots<-lapply(names(dd)[1:4], function(x) {
if(is.factor(dd[[x]])) {
ggplot(predictone(fit, x), aes_string(x=x, y="response")) + geom_point()
} else {
ggplot(predictone(fit, x), aes_string(x=x, y="response")) + geom_line()
}
})
require(gridExtra)
do.call(grid.arrange, plots)
which will return
Note that factors are treated differently than regular numeric values. When you code categorical variables with 0/1 R can't tell they are categorical so it doesn't do a good job of inferring the values which make sense. I would encourage you to convert 0/1 values to a proper factor variable.
An update to the R rms package to be posted on CRAN on about 2015-01-01 includes a new function ggplot.Predict (called by ggplot()) that provides a general way to generate such curves using ggplot2, handling multiple moving variables, interactions, etc. You can see some example usage at https://github.com/harrelfe/rms/blob/master/man/ggplot.Predict.Rd . You can do all this with the current version of rms using lattice graphics and the plot.Predict function.

prcomp : PCA residuals not zero

I have 3 variables on which I ran PCA using prcomp. I tried to reconstruct the variables using the loadings and factors but residuals is not zero. Statistically (I might be wrong here) I was expecting to be able to reconstruct the original data. Am I missing something?
test = read.table(text='0.8728891 0.7403704 0.6655271
0.8697503 0.7447901 0.6629487
0.8569866 0.7321241 0.6493666
0.8824890 0.7405750 0.6505887
0.8912246 0.7334331 0.6508194
0.8930270 0.7381421 0.6448108
0.8721081 0.7173891 0.6355404
0.8649705 0.7326563 0.6493313
0.8976412 0.7249211 0.6437649
0.9233625 0.7406451 0.6454023',sep=' ')
pca = prcomp(test,center=T,scale=F)
pca$x %*% pca$rotation + matrix(1,nrow=nrow(test),ncol=1) %*% pca$center - test
V1 V2 V3
-0.0020186611 0.0071487188 -0.0240478838
-0.0004352159 -0.0005375912 -0.0262594828
0.0008042558 -0.0039840874 -0.0019352850
0.0009905100 -0.0053390749 -0.0067663626
-0.0008375576 0.0041104957 0.0016244986
0.0013586563 -0.0060476694 0.0036526104
0.0004278214 0.0009280342 0.0298641699
0.0005504918 -0.0026885505 -0.0009348334
-0.0011619165 0.0073130849 0.0185829183
0.0003216158 -0.0009033601 0.0062196504
I use the following function for reconstructing data from a prcomp object:
#This function reconstructs a data set using a defined set of principal components.
#arguments "pca" is the pca object from prcomp, "pcs" is a vector of principal components
#to be used for reconstruction (default includes all pcs)
prcomp.recon <- function(pca, pcs=NULL){
if(is.null(pcs)) pcs <- seq(pca$sdev)
recon <- as.matrix(pca$x[,pcs]) %*% t(as.matrix(pca$rotation[,pcs]))
if(pca$scale[1] != FALSE){
recon <- scale(recon , center=FALSE, scale=1/pca$scale)
}
if(pca$center[1] != FALSE){
recon <- scale(recon , center=-pca$center, scale=FALSE)
}
recon
}
I couldn't figure out exactly what was wrong with your code, but using the prcomp.recon function gives the right result:
> prcomp.recon(pca) - test
V1 V2 V3
1 0 0 0
2 0 0 0
3 0 0 0
4 0 0 0
5 0 0 0
6 0 0 0
7 0 0 0
8 0 0 0
9 0 0 0
10 0 0 0

Resources