I noticed that SVM when fed with decision.values=T (plus sigmoid to get probabilities ) produces non-deterministic result when I permute data frame under analysis. Does anyone has any idea why? Please try the code yourself
install.packages("e1071")
library(e1071)
A <- cbind(rnorm(20,1,1),rnorm(20,1,1),rep(1,20))
B <- cbind(rnorm(20,9,1),rnorm(20,9,1),rep(0,20))
dataframe <- as.data.frame(rbind(A,B))
predc <- rep(0,length(dataframe[,1]))
K <- length(dataframe[1,])
permutator <- sample(nrow(dataframe))
dataframe$V3 <- factor(dataframe$V3)
dataframe <- dataframe[permutator, ]
for(i in 1:length(dataframe[,1])) {
frm <- as.formula(object=paste("V",as.character(K), " ~ .",sep=""))
r <- svm(formula=frm, data=(dataframe[-i,]))
predicted <- predict(r,newdata=dataframe[i,],decision.values=TRUE)
predc[i] <- sigmoid(attr(predicted,'decision.values')[1])
}
plot(sort(predc))
[edited: code]
Related
new here!
I want to find the determinant of the pooled sample covariance of the given matrix. Can someone give a leading clue? (i have searched everywhere)
I have tried many things, this isn't the right solution (i have tried many) such as:
det(cov(dfdata))
mvec <- colMeans(dfdata) #sample mean vector#`enter code here`
covM <- cov(dfdata) #sample covariance matrix#
corM <- cor(dfdata) #sample correlation matrix#
covMnum <- cov(dfdatanum)
The following code is what i have developed:
##uploading the data
data2 <- read.table("file.tsv")
data3 <- read.table("file2.tsv")
data4 <- read.table("file3.tsv")
data5 <- read.table("file4.tsv")
## have a first look at data###
head(dfBull)
n <- nrow(dfBull) #n#
p <- ncol(dfBull) #p#
summary(dfBull)
##removing the first rove as it isnt neccesary
a <- data2[-(1), ]
b <- data3[-(1), ]
c <- data4[-(1), ]
d <- data5[-(1), ]
## finding the covariance
cv1 <- cov(as.numeric(a$V1), as.numeric(a$V2))
cv2 <- cov(as.numeric(b$V1), as.numeric(b$V2))
cv3 <- cov(as.numeric(c$V1), as.numeric(c$V2))
cv4 <- cov(as.numeric(c$V1), as.numeric(c$V2))
##This is the function im trying to use:
mat <- matrix(c(cv1,0,0,0,0,cv2,0,0,0,0,cv3,0,0,0,0,cv4), nrow=4, ncol=4, byrow=TRUE)
det(mat)`
I would like to normalize the data this way:
(trainData - mean(trainData)) / sd(trainData)
(testData - mean(trainData)) / sd(trainData)
For the Train set I can use the function scale(). How can I do for the test set? I tried in different ways the lapply() function .. but I did not succeed.
Many thanks! An exemple of code:
Train <- data.frame(matrix(c(1:100),10,10))
Test <- data.frame(matrix(sample(1:100),10,10))
scaled.Train <- scale(Train)
ct <- ncol(Test)
rt <- nrow(Test)
ncol(Train)
sdmatrix <- data.frame(matrix(,rt,ct))
for (i in 1:ct){
sdmatrix[1,i] <- mean(Train[,i])
sdmatrix[2,i] <- sd(Train[,i])
}
Test <- rbind(Test, sdmatrix)
normTest <- function(x){
a <- x[rt-1]
b <- x[rt]
x <- (x-a)/b
}
Test <- lapply(Test[1:(rt-2),],normTest)
I have the following randomForest loop:
require(randomForest)
require(xts)
require(quantmod)
getSymbols("MSFT", from = '2017-01-01', to = '2017-09-13', src = "yahoo")
rets <- diff(log(Ad(MSFT))); rets[is.na(rets)]=0
rets1 <- ifelse(rets>0,"up","dn")
Lag1 <- lag(rets,1);Lag1[is.na(Lag1)]=0;Lag1 <- ifelse(Lag1>0,"up","dn")
Lag2 <- lag(rets,2);Lag2[is.na(Lag2)]=0;Lag2 <- ifelse(Lag2>0,"up","dn")
Lag3 <- lag(rets,3);Lag3[is.na(Lag3)]=0;Lag3 <- ifelse(Lag3>0,"up","dn")
preds <- rets* 0
win=20
for(i in win:(NROW(rets)-1)){
RFDataf <- data.frame(factor(Lag1),factor(Lag2),factor(Lag3))
fit <- try(randomForest(RFDataf[(i-win+1):i,], factor(as.character(rets1[(i-win+1):i])), ntree=1000, mtry=round(sqrt(NCOL(RFDataf)),0)))
if(class(fit) != "try-error"){
fit0 <- fit
preds[(i+1)] <- predict(fit, RFDataf[(i+1),])
}else{
if(exists("fit0")){
preds[(i+1)] <- predict(fit0, RFDataf[(i+1),])}else{next}
}
}
}
I read these carefully:
https://stats.stackexchange.com/questions/120446/different-results-from-several-passes-of-random-forest-on-same-dataset
https://github.com/mlr-org/mlr/issues/938
I understand that if in the R function is used a different language, set.seed() won't work.
I have the additional problem of a loop.
If I used a function with no foreign languages but r in a loop, how could I preserve the reproducibility?
Is there a way to make set.seed() work in a loop? Any alternative solution?
I would like to code a loop for cross-validation: computing MSE for a one- and a four-step forecast and store the results in a matrix. The problem I get is that the columns for the 1 to 3-step forecast get overwritten and I get just the 4-step forecast in all columns. Anybody can help?
k<-20
n<-length(xy)-1
h<-4
start <- tsp(xy) [1]+k
j <- n-k
mseQ1 <- matrix(NA,j,h)
colnames(mseQ1) <- paste0('h=',1:h)
for(i in 1:j)
{
xtrain <- window(xy, end=start+(i-1))
xvalid <- window(xy, start=start+i, end=start+i)
qualifiedETS <- ets(xtrain, alpha=NULL, beta=NULL, additive.only=TRUE, opt.crit="mse")
fcastHW <- forecast(qualifiedETS, h=h)
mseQ1[i,] <- ((fcastHW[['mean']]-xvalid)^2)
}
I´ve a question concerning the a regression analysis in r.
#Datei einlesen
residual <- read.csv2("E:***Input-R_Renditen.csv",header=TRUE,sep=";")
#Firmen
alist <- list()
for (a in 2:11){
#Länge Gesamtzeit
t <- 243
tx <- t-59
#Länge Regression
reglist <- list()
for (i in 1:tx){
j <- i+59
c <- i+54
#RegressionsVariable
r <- residual[i:j,a]
rm <- residual[i:j,12]
smb <- residual[i:j,13]
hml <- residual[i:j,14]
rf <- residual[i:j,15]
#Überschussrendite
er <- r-rf
erm <- rm-rf
#Regression
reg <- lm(er~erm+smb+hml)
reglist[[i]] <- coef(reg)
}
alist[[a]] <- reglist
}
I want to insert a dummy/categorical variable into the regression. Let me call the dummy "d", d should have the the value 1 for i to j-6 and the value 0 for j-5 to j.
I cannot include this in my table, which is read, because for every new regression i, the dummy differs. I tried it with ifelse, but I got the error, that the lengths of d differs from others for the regression.