Linear constrains Z and D in MARSS package - r

Recently, I need to establish a MARSS model such as:
y_t = c + beta * d1_t + alpha * x_t + v_t; x_t = x_(t-1) + w_t
then I try in R and I meet the problem:
Error in optim(pars, negloglike, method = "BFGS") :
objective function in optim evaluates to length 0 not 1
My benchmark model:
y_t = c + beta * d1_t + alpha * d2_t + alpha * x_t + v_t; x_t = x_(t-1) + w_t
My code:
y = rnorm(10)
x = matrix(0, 3, 10)
x[1:2, ] = matrix(rnorm(20), 2, 10)
x[3, ]= 1
x0 = matrix(0.1, 1, 1)
V0 = matrix(0.01, 1, 1)
B = matrix(1, 1, 1)
U = A = 'zero'
# pars to be estimated
pars = c(beta=0.5, alpha=0.5, c=0.5, q=1)
# calculate loglike
negloglike = function(pars){
Q = matrix(list('q'), 1, 1)
R = matrix(1, 1, 1)
Z = matrix(list(pars['alpha']), 1, 1)
D = matrix(list(pars['beta'], pars['alpha'], pars['c']), 1, 3)
model.list = list(B=B, U=U, Q=Q, Z=Z, A=A, D=D, d=x, R=R, x0=x0, V0=V0)
-1 * MARSS(y, model=model.list, control=list(
maxit=200,conv.test.slope.tol=0.1,abstol=0.1),method='kem', silent=TRUE)$loglik
}
optim(pars, negloglike, method = 'BFGS')
but it denoted:
Error in optim(pars, negloglike, method = "BFGS") :
objective function in optim evaluates to length 0 not 1
I need help, Thanks!

Related

How to estimate the Kalman Filter with 'KFAS' R package, with an AR(1) transition equation and covariates?

I am using 'KFAS' package from R to estimate a state-space model with the Kalman filter. My measurement and transition equations are:
y_t = b_0 + b_1xx_t + Z_t * x_t + \eps_t (measurement)
x_t = T_t * x_{t-1} + R_t * \eta_t (transition),
with \eps_t ~ N(0,H_t) and \eta_t ~ N(0,Q_t),
where xx_t are covariates. I have read this question and wrote the following code
library(KFAS)
set.seed(100)
xx <- rnorm(200)
beta0 <- 0.1
beta1 <- 0.1
eps <- rt(200, 4, 1)
y <- as.matrix(beta0 + beta1*xx + (arima.sim(n=200, list(ar=0.6), innov = rnorm(200)*sqrt(0.5)) + eps),
ncol=1)
Zt <- 1
Ht <- matrix(NA)
Tt <- matrix(NA)
Rt <- 1
Qt <- matrix(NA)
ss_model <- SSModel(y ~ xx + SSMcustom(Z = Zt, T = Tt, R = Rt,
Q = Qt), H = Ht)
updatefn <- function(pars, model) {
model$H[1] <- pars[1]
model$T[1] <- pars[2]
model$Q[1] <- pars[3]
model
}
fit <- fitSSM(ss_model, c(1, 0.5, 1), updatefn, method = "L-BFGS-B",
lower = c(0, -0.99, 0), upper = c(100, 0.99, 100))
I get the error
Error in is.SSModel(do.call(updatefn, args = c(list(inits, model), update_args)), :
System matrices (excluding Z) contain NA or infinite values, covariance matrices contain values larger than 1e+07
I have tried to change the initial vector to c(1, 0.5, 1, 1, 1) but it returns the same message. Does anyone know how can I do this?
Thanks!

Failing to optimise negative binomial model using optim

I am trying to manually optimise a negative binomial regression model using the optim package in R trying to predict a count variable y using a matrix of factors X using the following code:
# generating some fake data
n <- 1000
X <- matrix(NA, ncol = 5, nrow = n)
X[,1] <- 1
X[,2] <- sample(size = n, x = c(0,1), replace = TRUE)
X[,3] <- sample(size = n, x = c(0,1), replace = TRUE)
X[,4] <- sample(size = n, x = c(0,1), replace = TRUE)
X[,5] <- sample(size = n, x = c(0,1), replace = TRUE)
beta0 <- 3
beta1 <- -2
beta2 <- -2
beta3 <- -4
beta4 <- -0.9
k <- 0.9
## draws from negative binomial distribution
mu <- exp(beta0 + beta1 * X[,2] + beta2 * X[,3] + beta3 * X[,4] + beta4 * X[,5])
theta <- mu + mu ^2 / k
# dependent variable
y <- rnegbin(n, mu = mu, theta = theta)
# function to be optimised
negbin_ll <- function(y, X, theta){
beta <- theta[1:ncol(X)]
alpha <- theta[ncol(X) + 1]
logll <- y * log(alpha) + y *( beta %*% t(X) ) - (y + (1 / alpha ) ) * log( 1 + alpha * exp(beta %*% t(X))) + lgamma(y + (1 / alpha)) - lgamma ( y + 1) - lgamma ( 1 / alpha)
logll <- sum( logll )
return(logll)
}
stval <- rep(0, ncol(X) + 1)
res <-
optim(
stval,
negbin_ll,
y = y,
X = X,
control = list(fnscale = -1),
hessian = TRUE,
method = "BFGS"
)
The code should produce point estimates from the optimisation process, but instead fails when executing the optim-function with the error in optim(stval, negbin_ll, y = y, X = X, control = list(fnscale = -1), : initial value in 'vmmin' is not finite.
I already tried to change log(gamma(...)) to lgamma(...) in the likelihood function and tried many other ways, but I fail to get estimates.
Changing the start values of optim also does not help.
Do you have any idea if there is any particularity to the likelihood function that leads to values being treated in any odd fashion?
Help would be much appreciated.
optim tries several points to get to the minimum, in your case it hits some non-positive values in the arguments inside the logs. One way is to discard the values that return any non-positive inside the problematic functions by returning a negative (in your case) large number, like -lenght(series)*10^6. Remade the log-likelihood function, like this it kinda works:
negbin_ll <- function(y, X, theta){
beta <- theta[1:ncol(X)]
alpha <- theta[ncol(X) + 1]
if(any(alpha<=0)) return(-length(y)*10^6)
if(any(1 + alpha * exp(beta %*% t(X))<=0)) return(-length(y)*10^6)
logll <- y * log(alpha) + y *( beta %*% t(X) ) - (y + (1 / alpha ) ) * log( 1 + alpha * exp(beta %*% t(X))) + lgamma(y + (1 / alpha)) - lgamma ( y + 1) - lgamma ( 1 / alpha)
logll <- sum( logll )
return(logll)
}

Plotting fitted values from regression

Hey I have following code in R
S0 = 40
r = log(1 + 0.07)
sigma = 0.3
K = 45
n_steps_per_year = 4
dt = 1 / n_steps_per_year
T = 3
n_steps = n_steps_per_year * T
R = n_paths
Q = 70
P = 72
n_paths = P * Q
d = exp(-r * dt)
N = matrix(rnorm(n_paths * n_steps, mean = 0, sd = 1), n_paths, n_steps)
paths_S = matrix(nrow = n_paths, ncol = n_steps + 1, S0)
for(i in 1:n_paths){
for(j in 1:n_steps){
paths_S[i, j + 1] = paths_S[i, j] * exp((r - 0.5 * sigma ^ 2) * dt + sigma * sqrt(dt) * N[i, j])
}
}
I = apply(K - paths_S, c(1,2), max, 0)
V = matrix(nrow = n_paths, ncol = n_steps + 1)
V[, n_steps + 1] = I[, n_steps + 1]
dV = d * V[, n_steps + 1]
model = lm(dV ~ poly(paths_S[, n_steps], 10))
pred = predict(model, data.frame(x = paths_S[, n_steps]))
plot(paths_S[, n_steps], d * V[, n_steps + 1])
lines(paths_S[, n_steps], pred)
but when I run the last two lines then I get very strange plot (multiple lines instead of one line). What is going on?
You did not provide n_paths, lets assume:
n_paths = 7
set.seed(111)
Then running your code, before you plot, you need to order your x values before plotting:
o = order(paths_S[,12])
plot(paths_S[o, n_steps], d * V[o, n_steps + 1],cex=0.2,pch=20)
lines(paths_S[o, n_steps], pred[o],col="blue")

Fiting 1 - exp(x) giving higher weight to the first values

I want to fit to a 1 - exp(x) function to a data set , but giving higher weight to the first values. However, the following code is not working in such way:
x <-sqrt((0.05)^2+(0.05)^2)*seq(from = 1, to = 20, by = 1)
y <- c(11,20,27,32,35,36,36.5,25,16,9,4,1,7.87e-16,2.07e-15,-9.36e-16,1.61e-15,-3.81e-16,3.92e-16,7.65e-16,-8.26e-16)
temp <- data.frame(cbind(x,y))
we <- 1/(log1p(seq_along(x)))
# fit non-linear model
mod <- nls(y ~ (1 - exp(a + b * x)), data = temp, start = list(a = 0, b = 0), weights = we)
#add fitted curve
lines(temp$x, predict(mod, list(x = temp$x)))
Here is the output:
Your specification of weights is correct. The bad fit you obtained is due to your faulty model assumption. You assumed:
y ~ 1 - exp(a + b * x)
Note that exp() gives strictly positive values, so y will be no larger than 1. However, y values in your data range up to 35.
My idea is not perfect, but it might give you a better starting point. Consider:
y ~ a * x * exp(b * x * x + c * x)
Using your data:
x <- c(0, sqrt((0.05)^2+(0.05)^2)*seq(from = 1, to = 20, by = 1))
y <- c(0, 11,20,27,32,35,36,36.5,25,16,9,4,1,7.87e-16,2.07e-15,-9.36e-16,1.61e-15,-3.81e-16,3.92e-16,7.65e-16,-8.26e-16)
fit <- nls(y ~ a * x * exp(b * x * x + c * x), start = list(a = 30, b= -1, c = -1))
plot(x, y)
lines(x, predict(fit, list(x)))

Reproduce Fisher linear discriminant figure

Many books illustrate the idea of Fisher linear discriminant analysis using the following figure (this particular is from Pattern Recognition and Machine Learning, p. 188)
I wonder how to reproduce this figure in R (or in any other language). Pasted below is my initial effort in R. I simulate two groups of data and draw linear discriminant using abline() function. Any suggestions are welcome.
set.seed(2014)
library(MASS)
library(DiscriMiner) # For scatter matrices
# Simulate bivariate normal distribution with 2 classes
mu1 <- c(2, -4)
mu2 <- c(2, 6)
rho <- 0.8
s1 <- 1
s2 <- 3
Sigma <- matrix(c(s1^2, rho * s1 * s2, rho * s1 * s2, s2^2), byrow = TRUE, nrow = 2)
n <- 50
X1 <- mvrnorm(n, mu = mu1, Sigma = Sigma)
X2 <- mvrnorm(n, mu = mu2, Sigma = Sigma)
y <- rep(c(0, 1), each = n)
X <- rbind(x1 = X1, x2 = X2)
X <- scale(X)
# Scatter matrices
B <- betweenCov(variables = X, group = y)
W <- withinCov(variables = X, group = y)
# Eigenvectors
ev <- eigen(solve(W) %*% B)$vectors
slope <- - ev[1,1] / ev[2,1]
intercept <- ev[2,1]
par(pty = "s")
plot(X, col = y + 1, pch = 16)
abline(a = slope, b = intercept, lwd = 2, lty = 2)
MY (UNFINISHED) WORK
I pasted my current solution below. The main question is how to rotate (and move) the density plot according to decision boundary. Any suggestions are still welcome.
require(ggplot2)
library(grid)
library(MASS)
# Simulation parameters
mu1 <- c(5, -9)
mu2 <- c(4, 9)
rho <- 0.5
s1 <- 1
s2 <- 3
Sigma <- matrix(c(s1^2, rho * s1 * s2, rho * s1 * s2, s2^2), byrow = TRUE, nrow = 2)
n <- 50
# Multivariate normal sampling
X1 <- mvrnorm(n, mu = mu1, Sigma = Sigma)
X2 <- mvrnorm(n, mu = mu2, Sigma = Sigma)
# Combine into data frame
y <- rep(c(0, 1), each = n)
X <- rbind(x1 = X1, x2 = X2)
X <- scale(X)
X <- data.frame(X, class = y)
# Apply lda()
m1 <- lda(class ~ X1 + X2, data = X)
m1.pred <- predict(m1)
# Compute intercept and slope for abline
gmean <- m1$prior %*% m1$means
const <- as.numeric(gmean %*% m1$scaling)
z <- as.matrix(X[, 1:2]) %*% m1$scaling - const
slope <- - m1$scaling[1] / m1$scaling[2]
intercept <- const / m1$scaling[2]
# Projected values
LD <- data.frame(predict(m1)$x, class = y)
# Scatterplot
p1 <- ggplot(X, aes(X1, X2, color=as.factor(class))) +
geom_point() +
theme_bw() +
theme(legend.position = "none") +
scale_x_continuous(limits=c(-5, 5)) +
scale_y_continuous(limits=c(-5, 5)) +
geom_abline(intecept = intercept, slope = slope)
# Density plot
p2 <- ggplot(LD, aes(x = LD1)) +
geom_density(aes(fill = as.factor(class), y = ..scaled..)) +
theme_bw() +
theme(legend.position = "none")
grid.newpage()
print(p1)
vp <- viewport(width = .7, height = 0.6, x = 0.5, y = 0.3, just = c("centre"))
pushViewport(vp)
print(p2, vp = vp)
Basically you need to project the data along the direction of the classifier, plot a histogram for each class, and then rotate the histogram so its x axis is parallel to the classifier. Some trial-and-error with scaling the histogram is needed in order to get a nice result. Here's an example of how to do it in Matlab, for the naive classifier (difference of class' means). For the Fisher classifier it is of course similar, you just use a different classifier w. I changed the parameters from your code so the plot is more similar to the one you gave.
rng('default')
n = 1000;
mu1 = [1,3]';
mu2 = [4,1]';
rho = 0.3;
s1 = .8;
s2 = .5;
Sigma = [s1^2,rho*s1*s1;rho*s1*s1, s2^2];
X1 = mvnrnd(mu1,Sigma,n);
X2 = mvnrnd(mu2,Sigma,n);
X = [X1; X2];
Y = [zeros(n,1);ones(n,1)];
scatter(X1(:,1), X1(:,2), [], 'b' );
hold on
scatter(X2(:,1), X2(:,2), [], 'r' );
axis equal
m1 = mean(X(1:n,:))';
m2 = mean(X(n+1:end,:))';
plot(m1(1),m1(2),'bx','markersize',18)
plot(m2(1),m2(2),'rx','markersize',18)
plot([m1(1),m2(1)], [m1(2),m2(2)],'g')
%% classifier taking only means into account
w = m2 - m1;
w = w / norm(w);
% project data onto w
X1_projected = X1 * w;
X2_projected = X2 * w;
% plot histogram and rotate it
angle = 180/pi * atan(w(2)/w(1));
[hy1, hx1] = hist(X1_projected);
[hy2, hx2] = hist(X2_projected);
hy1 = hy1 / sum(hy1); % normalize
hy2 = hy2 / sum(hy2); % normalize
scale = 4; % set manually
h1 = bar(hx1, scale*hy1,'b');
h2 = bar(hx2, scale*hy2,'r');
set([h1, h2],'ShowBaseLine','off')
% rotate around the origin
rotate(get(h1,'children'),[0,0,1], angle, [0,0,0])
rotate(get(h2,'children'),[0,0,1], angle, [0,0,0])

Resources