Deep NN for multivariate regression - r

I implemented this simple NN but even when making it do all the interactions it fails to converge and the MSE remains very high
I tried to change the number of iterations and the learning rate but it doesn't work
rm(list=ls())
data <- read.csv("C:/Users/Mikele/Documents/Uni/IA AI & Machine Learning/R/11_23_2018/wine.csv",sep = ',',header = FALSE)
x <- data[,1:11]
y <- as.matrix(data[,12])
y_matrix <- matrix(rep(0,length(y)),nrow = length(y), ncol = 6)
k <-1
for (w in 1:length(y))
{
temp <- y[k] - 2
y_matrix[k,temp] <-1
k <- k + 1
}
hl <- c(40, 30, 20)
iter <- 1000
lr <- 0.1
## add in intercept
x_1 <- as.matrix(cbind(rep(1, nrow(x)),x))
## set error array
error <- rep(0, iter)
## set up weights
## the +1 is to add in the intercept/bias parameter
W1 <- matrix(runif(ncol(x_1)*hl[1], -1, 1), nrow = ncol(x_1))
W2 <- matrix(runif((hl[1]+1)*hl[2], -1, 1), nrow = hl[1]+1)
W3 <- matrix(runif((hl[2]+1)*hl[3], -1, 1), nrow = hl[2]+1)
W4 <- matrix(runif((hl[3]+1)*ncol(y), -1, 1), nrow = hl[3]+1)
for(k in 1:iter)
{
# calculate the hidden and output layers using X and hidden layer as inputs
# hidden layer 1 and 2 have a column of ones appended for the bias term
hidden1 <- cbind(matrix(1, nrow = nrow(x_1)), sigm(x_1 %*% W1))
hidden2 <- cbind(matrix(1, nrow = nrow(x_1)), sigm(hidden1 %*% W2))
hidden3 <- cbind(matrix(1, nrow = nrow(x_1)), sigm(hidden2 %*% W3))
y_hat <- sigm(hidden3 %*% W4)
# calculate the gradient and back prop the errors
# see theory above
y_hat_del <- (y-y_hat)*(d.sigm(y_hat))
hidden3_del <- y_hat_del %*% t(W4)*d.sigm(hidden3)
hidden2_del <- hidden3_del[,-1] %*% t(W3)*d.sigm(hidden2)
hidden1_del <- hidden2_del[,-1] %*% t(W2)*d.sigm(hidden1)
# update the weights
W4 <- W4 + lr*t(hidden3) %*% y_hat_del
W3 <- W3 + lr*t(hidden2) %*% hidden3_del[,-1]
W2 <- W2 + lr*t(hidden1) %*% hidden2_del[,-1]
W1 <- W1 + lr*t(x_1) %*% hidden1_del[,-1]
error[k] <- 1/nrow(y)*sum((y-y_hat)^2)
if((k %% (10^4+1)) == 0) cat("mse:", error[k], "\n")
}
# plot loss
xvals <- seq(1, iter, length = 100)
print(qplot(xvals, error[xvals], geom = "line", main = "MSE", xlab = "Iteration"))
no error message but I can't understand how to make a deep NN for
Multivariate Linear Regression
in addition I divided the ys into a 6-column matrix (the maximum and minimum of the initial dataset) now there is someone who can help me understand why not cover and in any case the final results are all concentrated on column 4?

Related

Confidence interval in R for 1000 times

I have run the code below to obtain 1000 confidence intervals but it doesn't give an output for lambda_jk and beta_jk. And hence I cannot obtain the jack_lambda and jack_beta.
library(bootstrap)
library(maxLik)
est<-NULL
set.seed(20)
lambda <- 0.02
beta <- 0.5
alpha <- 0.10
n <- 40
N <- 1000
lambda_hat <- NULL
beta_hat <- NULL
lambda_jk<-NULL
beta_jk<-NULL
cp <- NULL
jack_lambda <- matrix(NA, nrow = N, ncol = 2)
jack_beta <- matrix(NA, nrow = N, ncol = 2)
for(i in 1:N){
u <- runif(n)
c_i <- rexp(n, 0.0001)
t_i <- (log(1 - (1 / lambda) * log(1 - u))) ^ (1 / beta)
s_i <- 1 * (t_i < c_i)
t <- pmin(t_i, c_i)
data<- data.frame(t,s_i)
LLF <- function(para,y) {
lambda <- para[1]
beta <- para[2]
e <- y[,2]*log(lambda*y[,1]^(beta-1)*beta*exp(y[,1]^beta)*exp(lambda*(1-exp(y[,1]^beta))))
r <- (1-y[,2])*log(exp(lambda*(1-exp(y[,1]^beta))))
f <- sum(e + r)
return(f)
}
mle <- maxLik(LLF, y=data,start = c(para = c(0.02, 0.5))) ### Obtain MLE based on the simulated data
lambda_hat[i] <- mle$estimate[1] #estimate for parameter 1
beta_hat[i] <- mle$estimate[2] #estimate for parameter 2
est<-rbind(est,mle$estimate)
### statistic function for jackknife()
jack<-matrix(0, nrow = n, ncol = 2)
for(i in 1:n){
fit.jack<-maxLik(logLik=LLF,y=data[-i,],method="NR",start=c(0.02, 0.5))
jack[i,]<-coef(fit.jack) #delete-one estimates
}
estjack<-rbind(jack)
meanlambda = mean(estjack[,1])
meanbeta = mean(estjack[,2])
lambda_jk[i] =lambda_hat[i]-(n-1)*(meanlambda-lambda_hat[i]) #jackknife estimate
beta_jk[i] = beta_hat[i]-(n-1)*(meanbeta-beta_hat[i])
SElambda<-sqrt(var(estjack[,1])/n-1) #std error
SEbeta<-sqrt(var(estjack[,2])/n-1)
#confidence interval
jack_lambda[i,] <- lambda_jk[i]+c(-1,1)*qt((1-alpha)/2,n-1)*SElambda
jack_beta[i,] <- beta_jk[i]+c(-1,1)*qt((1-alpha)/2,n-1)*SEbeta
}
(I am very appreciate with any ideas)

why random effect estiamator are not correct

I'm trying to simulate glmmLasso using a binomial data.
but random effect estiamator are not similar 5 that i given.
something wrong in my code?
if not, why random effect shown like that.
makedata <- function(I, J, p, sigmaB){
N <- I*J
# fixed effect generation
beta0 <- runif(1, 0, 1)
beta <- sort(runif(p, 0, 1))
# x generation
x <- matrix(runif(N*p, -1, 1), N, p)
# random effect generation
b0 <- rep(rnorm(I, 0, sigmaB), each=J)
# group
group <- as.factor(rep(1:I, each = J))
# y generation
k <- exp(-(beta0 + x %*% beta + b0))
y <- rbinom(n = length(k), size = 1, prob = (1/(1+k)))
#standardization
sx <- scale(x, center = TRUE, scale = TRUE)
simuldata <- data.frame(y = y, x = sx, group)
res <- list(simuldata=simuldata)
return(res)
}
# I : number of groups
I <- 20
# J : number of observation in group
J <- 10
# p : number of variables
p <- 20
# sigmaB : sd of random effect b0
sigmaB <- 5
set.seed(231233)
simdata <- makedata(I, J, p, sigmaB)
lam <- 10
xnam <- paste("x", 1:p, sep=".")
fmla <- as.formula(paste("y ~ ", paste(xnam, collapse= "+")))
glmm <- glmmLasso(fmla, rnd = list(group=~1), data = simdata, lambda = lam, control = list(scale = T, center = T))
summary(glmm)

Objective function in optim evaluates to length 3 not 1

I am new to R and trying to find the optimal values of 3 parameters via indirect inference from a simulated panel data set, but getting an error "objective function in optim evaluates to length 3 not 1". I tried to check past posts, but the one I found didn't address the problem I am facing.
The code works if I only try for one parameter instead of 3. Here is the code:
#Generating data
modelp <- function(Y,alpha,N,T){
Yt <- Y[,2:T]
Ylag <- Y[,1:(T-1)]
Alpha <- alpha[,2:T]
yt <- matrix(t(Yt), (T-1)*N, 1)
ylag <- matrix(t(Ylag), (T-1)*N, 1)
alph <- matrix(t(Alpha), (T-1)*N, 1)
rho.ind <- rep(NA,N)
sigma_u <- rep(NA,N)
sigma_a <- rep(NA,N)
for(n in 1:N){
sigma_u[n] <- sigma(lm(yt~alph+ylag))
sigma_a[n] <- lm(yt~alph+ylag)$coef[2] #
(diag(vcov((lm(yt~alph+ylag)$coef),complete=TRUE)))[2] #
rho.ind[n] <- lm(yt~alph+ylag)$coef[3]
}
param <- matrix(NA,1,3)
param[1]<- mean(sum(rho.ind))
param[2]<- mean(sum(sigma_u))
param[3]<- mean(sum(sigma_a))
return(param)
}
## Function to estimate parameters
H.theta <- function(param.s){
set.seed(tmp.seed) #set seed
param.s.tmp <- matrix(0,1,3)
for(s in 1:H){
eps.s <- matrix(rnorm(N*T), N, T) #white noise erros
eps0.s <- matrix(rnorm(N*T), N, 1) #error for initial condition
alph.s <- matrix(rnorm(N*T),N,T)
Y.s <- matrix( 0, N, T)
ys.lag <- eps0.s
for(t in 1:T){ #Simulating the AR(1) process data
ys <- alph.s[,t]+param.s[1] * ys.lag + eps.s[,t] # [n,1:t]
Y.s[,t] <- ys
ys.lag <- ys
}
param.s.tmp <- param.s.tmp + modelp(Y.s, alph.s,N, T)
param.s[2] <- param.s.tmp[2]
param.s[3] <- mean(var(alph.s)) #param.s.tmp[3]
}
return( (param.data - param.s.tmp/H)^2 )
#return(param.s[1])
}
#Results for T = 10 & H = 10, N=100
nrep <-10
rho <-0.9
sigma_u <- 1
sigma_a <- 1.5
param <- matrix(NA,1,3)
param[1] <- rho
param[2] <- sigma_u
param[3] <- sigma_u
s.mu <- 0 # Mean
s.ep <- 0.5 #White Noise -initial conditions
Box <- cbind(rep(100,1),c(20),rep(c(5),1))
r.simu.box <- matrix(0,nrep,nrow(Box))
r.data.box <- matrix(0,nrep,nrow(Box))
for(k in 1:nrow(Box)){
N <- Box[k,1] #Number of individuals in panel
T <- Box[k,2] #Length of Panel
H <- Box[k,3] # Number of simulation paths
p.data <-matrix(NA,nrep,3)
p.simu <-matrix(NA,nrep,3)
est <- matrix(NA,1,3)
for(i in 1:nrep){
mu <- matrix(rnorm(N )*s.mu, N, 1)
eps <- matrix(rnorm(N*T)*s.ep, N, T)
eps0 <- matrix(rnorm(N*T)*s.ep, N, 1)
alph <- matrix(rnorm(N ), N, T)
Y <- matrix( 0, N, T)
y.lag <- (1-param[1])*mu + eps0
for(t in 1:T){
y <- alph[,t]+param[1]*y.lag +eps[,t]
Y[,t] <- y
y.lag <- y
}
param.data <- modelp(Y,alph,N,T) #Actual data
p.data[i,1:3] <- param.data
tmp.seed <- 3864+i+100*(k-1) #Simulated data
x0 <- c(0.5, 0,0)
est[i] <- optim(x0, H.theta,method = "BFGS", hessian = TRUE)$par
p.simu[i,1:3] <- est[i]
if(i%%10==0) print(c("Finished the (",i,")-th replication"))
}
}
mean(p.data[,1])- mean(p.simu[,1])
mean(p.data[,2])- mean(p.simu[,2])
sqrt(mean((p.data[1]-p.simu[1])^2))
I expect to get three values. Any help or suggestion will be greatly appreciated.

Coverage probability for an unspecified CDF

I used the following r code to determine the coverage probability.
theta <- seq(0,1, length = 100)
CD_theta <- function(y, p, n){
1 - pbinom (y, size = n, prob = p) + 1/2*dbinom(y, size = n, prob = p)
}
y <- 5
n <- 100
phat <- y/n
mytheta <- CD_theta(5, theta, 100)
set.seed(650)
ci <- list()
n <- 100
B <- 1000
result = rep(NA, B)
all_confInt <- function(B) {
for (i in 1:B){
boot.sample <- sample(mytheta, replace = TRUE)
lower <- theta[which.min(abs(boot.sample - .025))]
upper <- theta[which.min(abs(boot.sample - .975))]
ci[[i]] <- data.frame(lowerCI = lower, upperCI = upper)
intervals <- unlist(ci)
}
return(intervals)
}
df <- data.frame(matrix(all_confInt(B), nrow=B, byrow=T))
colnames(df)[1] <- "Lower"
colnames(df)[2] <- "Upper"
names(df)
dim(df)
mean(df$Lower < phat & df$Upper > phat)*100
However, I obtained 6.4% which is too low. Why am I getting really lower percentage?. Is there any problem in the r function?

Performing t-Test Selection manually

I’m trying to write simulation code, that generates data and runs t-test selection (discarding those predictors whose t-test p-value exceeds 0.05, retaining the rest) on it. The simulation is largely an adaptation of Applied Econometrics with R by Kleiber and Zeileis (2008, pp. 183–189).
When running the code, it usually fails. Yet with certain seeds (e.g. 1534) it produces plausible output. If it does not produce output (e.g. 1911), it fails due to: "Error in x[, ii] : subscript out of bounds", which traces back to na.omit.data.frame(). So, for some reason, the way I attempt to handle the NAs seems to fail, but I'm unable to figure out in how so.
coef <- rep(coef[,3], length.out = pdim+1)
err <- as.vector(rnorm(nobs, sd = sd))
uX <- c(rep(1, times = nobs))
pX <- matrix(scale(rnorm(nobs)), byrow = TRUE, ncol = pdim, nrow = nobs)
X <- cbind(uX, pX)
y <- coef %*% t(X) + err
y <- matrix(y)
tTp <- (summary(lm(y ~ pX)))$coefficients[,4]
tTp <- tTp[2:length(tTp)]
TTT <- matrix(c(tTp, rep(.7, ncol(pX)-length(tTp))))
tX <- matrix(NA, ncol = ncol(pX), nrow = nrow(pX))
for(i in 1:ncol(pX)) {ifelse(TTT[i,] < ALPHA, tX[,i] <- pX[,i], NA)}
tX <- matrix(Filter(function(x)!all(is.na(x)), tX), nrow = nobs)
TTR <- lm(y ~ tX)
The first block is unlikely to the cause of the error. It merely generates the data and works well on its own and with other methods, like PCA, as well. The second block pulls the p-values from the regression output; removes the p-value of the intercept (beta_0); and fills the vector with as many 7s as necessary to have the same length as the number of variables, to ensure the same dimension for matrix calculations. Seven is arbitrary and could be any number larger than 0.05 to not pass the test of the loop. This becomes – I believe – necessary, if R discards predictors due to multicollinearity.
The final block creates an empty matrix of the original dimensions; inserts the original data, if the t-test p-value is lower than 0.05, else retains the NA; while the penultimate line removes all columns containing NAs ((exclusively NA or one NA is the same here) taken from mnel’s answer to Remove columns from dataframe where ALL values are NA); lastly, the modified data is again put in the shape of a linear regression.
Does anyone know what causes this behavior or how it would work as intended? I would expect it to either work or not, but not kind of both. Ideally, the former.
A working version of the code is:
set.seed(1534)
Sim_TTS <- function(nobs = c(1000, 15000), pdim = pdims, coef = coef100,
model = c("MLC", "MHC"), ...){
DGP_TTS <- function(nobs = 1000, model = c("MLC", "MHC"), coef = coef100,
sd = 1, pdim = pdims, ALPHA = 0.05)
{
model <- match.arg(model)
if(model == "MLC") {
coef <- rep(coef[,1], length.out = pdim+1)
err <- as.vector(rnorm(nobs, sd = sd))
uX <- c(rep(1, times = nobs))
pX <- matrix(scale(rnorm(nobs)), byrow = TRUE, ncol = pdim, nrow = nobs)
X <- cbind(uX, pX)
y <- coef %*% t(X) + err
y <- matrix(y)
tTp <- (summary(lm(y ~ pX)))$coefficients[,4]
tTp <- tTp[2:length(tTp)]
TTT <- matrix(c(tTp, rep(.7, ncol(pX)-length(tTp))))
tX <- matrix(NA, ncol = ncol(pX), nrow = nrow(pX))
for(i in 1:ncol(pX)) {ifelse(TTT[i,] < ALPHA, tX[,i] <- pX[,i], NA)}
tX <- matrix(Filter(function(x)!all(is.na(x)), tX), nrow = nobs)
TTR <- lm(y ~ tX)
} else {
coef <- rep(coef[,2], length.out = pdim+1)
err <- as.vector(rnorm(nobs, sd = sd))
uX <- c(rep(1, times = nobs))
pX <- matrix(scale(rnorm(nobs)), byrow = TRUE, ncol = pdim, nrow = nobs)
X <- cbind(uX, pX)
y <- coef %*% t(X) + err
y <- matrix(y)
tTp <- (summary(lm(y ~ pX)))$coefficients[,4]
tTp <- tTp[2:length(tTp)]
TTT <- matrix(c(tTp, rep(.7, ncol(pX)-length(tTp))))
tX <- matrix(NA, ncol = ncol(pX), nrow = nrow(pX))
for(i in 1:ncol(pX)) {ifelse(TTT[i,] < ALPHA, tX[,i] <- pX[,i], NA)}
tX <- matrix(Filter(function(x)!all(is.na(x)), tX), nrow = nobs)
TTR <- lm(y ~ tX)
}
return(TTR)
}
PG_TTS <- function(nrep = 1, ...)
{
rsq <- matrix(rep(NA, nrep), ncol = 1)
rsqad <- matrix(rep(NA, nrep), ncol = 1)
pastr <- matrix(rep(NA, nrep), ncol = 1)
vmat <- cbind(rsq, rsqad, pastr)
colnames(vmat) <- c("R sq.", "adj. R sq.", "p*")
for(i in 1:nrep) {
vmat[i,1] <- summary(DGP_TTS(...))$r.squared
vmat[i,2] <- summary(DGP_TTS(...))$adj.r.squared
vmat[i,3] <- length(DGP_TTS(...)$coefficients)-1
}
return(c(mean(vmat[,1]), mean(vmat[,2]), round(mean(vmat[,3]))))
}
SIM_TTS <- function(...)
{
prs <- expand.grid(pdim = pdim, nobs = nobs, model = model)
nprs <- nrow(prs)
pow <- matrix(rep(NA, 3 * nprs), ncol = 3)
for(i in 1:nprs) pow[i,] <- PG_TTS(pdim = prs[i,1],
nobs = prs[i,2], model = as.character(prs[i,3]), ...)
rval <- rbind(prs, prs, prs)
rval$stat <- factor(rep(1:3, c(nprs, nprs, nprs)),
labels = c("R sq.", "adj. R sq.", "p*"))
rval$power <- c(pow[,1], pow[,2], pow[,3])
rval$nobs <- factor(rval$nobs)
return(rval)
}
psim_TTS <- SIM_TTS()
tab_TTS <- xtabs(power ~ pdim + stat + model + nobs, data = psim_TTS)
ftable(tab_TTS, row.vars = c("model", "nobs", "stat"), col.vars = "pdim")}
FO_TTS <- Sim_TTS()
FO_TTS
}
Preceeded by:
pdims <- seq(12, 100, 4)
coefLC12 <- c(0, rep(0.2, 4), rep(0.1, 4), rep(0, 4))/1.3
rtL <- c(0.2, rep(0, 3))/1.3
coefLC100 <- c(coefLC12, rep(rtL, 22))
coefHC12 <- c(0, rep(0.8, 4), rep(0.4, 4), rep(0, 4))/1.1
rtH <- c(0.8, rep(0, 3))/1.1
coefHC100 <- c(coefHC12, rep(rtH, 22))
coef100 <- cbind(coefLC100, coefHC100)
I’m aware that model selection via the significance of individual predictors is not recommended, but that is the whole point – it is meant to be compared to more sophisticated methods.

Resources