Related
I am using 'KFAS' package from R to estimate a state-space model with the Kalman filter. My measurement and transition equations are:
y_t = b_0 + b_1xx_t + Z_t * x_t + \eps_t (measurement)
x_t = T_t * x_{t-1} + R_t * \eta_t (transition),
with \eps_t ~ N(0,H_t) and \eta_t ~ N(0,Q_t),
where xx_t are covariates. I have read this question and wrote the following code
library(KFAS)
set.seed(100)
xx <- rnorm(200)
beta0 <- 0.1
beta1 <- 0.1
eps <- rt(200, 4, 1)
y <- as.matrix(beta0 + beta1*xx + (arima.sim(n=200, list(ar=0.6), innov = rnorm(200)*sqrt(0.5)) + eps),
ncol=1)
Zt <- 1
Ht <- matrix(NA)
Tt <- matrix(NA)
Rt <- 1
Qt <- matrix(NA)
ss_model <- SSModel(y ~ xx + SSMcustom(Z = Zt, T = Tt, R = Rt,
Q = Qt), H = Ht)
updatefn <- function(pars, model) {
model$H[1] <- pars[1]
model$T[1] <- pars[2]
model$Q[1] <- pars[3]
model
}
fit <- fitSSM(ss_model, c(1, 0.5, 1), updatefn, method = "L-BFGS-B",
lower = c(0, -0.99, 0), upper = c(100, 0.99, 100))
I get the error
Error in is.SSModel(do.call(updatefn, args = c(list(inits, model), update_args)), :
System matrices (excluding Z) contain NA or infinite values, covariance matrices contain values larger than 1e+07
I have tried to change the initial vector to c(1, 0.5, 1, 1, 1) but it returns the same message. Does anyone know how can I do this?
Thanks!
When performing ridge regression manually, as it is defined
solve(t(X) %*% X + lbd*I) %*%t(X) %*% y
I get different results from those calculated by MASS::lm.ridge. Why? For ordinary linear regression the manual method (computing the pseudoinverse) works fine.
Here is my Minimal, Reproducible Example:
library(tidyverse)
ridgeRegression = function(X, y, lbd) {
Rinv = solve(t(X) %*% X + lbd*diag(ncol(X)))
t(Rinv %*% t(X) %*% y)
}
# generate some data:
set.seed(0)
tb1 = tibble(
x0 = 1,
x1 = seq(-1, 1, by=.01),
x2 = x1 + rnorm(length(x1), 0, .1),
y = x1 + x2 + rnorm(length(x1), 0, .5)
)
X = as.matrix(tb1 %>% select(x0, x1, x2))
# sanity check: force ordinary linear regression
# and compare it with the built-in linear regression:
ridgeRegression(X, tb1$y, 0) - coef(summary(lm(y ~ x1 + x2, data=tb1)))[, 1]
# looks the same: -2.94903e-17 1.487699e-14 -2.176037e-14
# compare manual ridge regression to MASS ridge regression:
ridgeRegression(X, tb1$y, 10) - coef(MASS::lm.ridge(y ~ x0 + x1 + x2 - 1, data=tb1, lambda = 10))
# noticeably different: -0.0001407148 0.003689412 -0.08905392
MASS::lm.ridge scales the data before modelling - this accounts for the difference in the coefficients.
You can confirm this by checking the function code by typing MASS::lm.ridge into the R console.
Here is the lm.ridge function with the scaling portion commented out:
X = as.matrix(tb1 %>% select(x0, x1, x2))
n <- nrow(X); p <- ncol(X)
#Xscale <- drop(rep(1/n, n) %*% X^2)^0.5
#X <- X/rep(Xscale, rep(n, p))
Xs <- svd(X)
rhs <- t(Xs$u) %*% tb1$y
d <- Xs$d
lscoef <- Xs$v %*% (rhs/d)
lsfit <- X %*% lscoef
resid <- tb1$y - lsfit
s2 <- sum(resid^2)/(n - p)
HKB <- (p-2)*s2/sum(lscoef^2)
LW <- (p-2)*s2*n/sum(lsfit^2)
k <- 1
dx <- length(d)
div <- d^2 + rep(10, rep(dx,k))
a <- drop(d*rhs)/div
dim(a) <- c(dx, k)
coef <- Xs$v %*% a
coef
# x0 x1 x2
#[1,] 0.01384984 0.8667353 0.9452382
I want to write a mle for finite mixture model in R,but coefficients estimated by model are not same as coefficients estimated by package flexmix. I wonder if you can point out my mistakes.
my code is as following:
#prepare data
slope1 <- -.3;slope2 <- .3;slope3 <- 1.8; slope4 <- 0.5;intercept1 <- 1.5
age <- sample(seq(18,60,len=401), 200)
grade <- sample(seq(0,100,len=401), 200)
not_smsa <- sample(seq(-2,2,len=401), 200)
unemployment <- rnorm(200,mean=0,sd=1)
wage <- intercept1 + slope1*age +slope2*grade + slope3*not_smsa + rnorm(length(age),0,.15)
y <- wage
X <- cbind(1, age , grade , not_smsa)
mydata <- cbind.data.frame(X,y)
anso <- lm(wage ~ age + grade + not_smsa,
data = mydata)
vi <- c(coef(anso),0.01,0.02,0.03,0.04,0.1)
#function
fmm <- function(beta) {
mu1 <- c(X %*% beta[1:4])
mu2 <- c(X %*% beta[5:8])
p1 <- 1 / (1 + exp(-beta[9]))
p2 <- 1-p1
llk <- p1*dnorm(y,mu1)+p2*dnorm(y,mu2)
-sum(log(llk),na.rm=T)
}
fit <- optim(vi,fmm , method = "BFGS", control = list(maxit=50000), hessian = TRUE)
fit$par
library(flexmix)
flexfit <- flexmix(wage ~ age + grade + not_smsa, data = mydata, k = 2)
flexfit$par
c1 <- parameters(flexfit,component=1)
c2 <- parameters(flexfit, component=2)
Are there any mistakes esisted in my code?
I have solved mistakes esisted in my code,parameters of main function should be added some constraints.
fmm <- function(pars) {
beta1 = pars[1:4]
sigma1 = log(1 + exp(pars[4]))
beta2 = pars[6:10]
sigma2 = log(1 + exp(pars[11]))
p1 = 1 / (1 + exp(-pars[12]))
mu1 <- c(X %*% beta1)
mu2 <- c(X %*% beta2)
p2 <- 1-p1
llk <- p1*dnorm(y,mu1,sigma1)+p2*dnorm(y,mu2,sigma2)
-sum(log(llk),na.rm=T)
}
Many books illustrate the idea of Fisher linear discriminant analysis using the following figure (this particular is from Pattern Recognition and Machine Learning, p. 188)
I wonder how to reproduce this figure in R (or in any other language). Pasted below is my initial effort in R. I simulate two groups of data and draw linear discriminant using abline() function. Any suggestions are welcome.
set.seed(2014)
library(MASS)
library(DiscriMiner) # For scatter matrices
# Simulate bivariate normal distribution with 2 classes
mu1 <- c(2, -4)
mu2 <- c(2, 6)
rho <- 0.8
s1 <- 1
s2 <- 3
Sigma <- matrix(c(s1^2, rho * s1 * s2, rho * s1 * s2, s2^2), byrow = TRUE, nrow = 2)
n <- 50
X1 <- mvrnorm(n, mu = mu1, Sigma = Sigma)
X2 <- mvrnorm(n, mu = mu2, Sigma = Sigma)
y <- rep(c(0, 1), each = n)
X <- rbind(x1 = X1, x2 = X2)
X <- scale(X)
# Scatter matrices
B <- betweenCov(variables = X, group = y)
W <- withinCov(variables = X, group = y)
# Eigenvectors
ev <- eigen(solve(W) %*% B)$vectors
slope <- - ev[1,1] / ev[2,1]
intercept <- ev[2,1]
par(pty = "s")
plot(X, col = y + 1, pch = 16)
abline(a = slope, b = intercept, lwd = 2, lty = 2)
MY (UNFINISHED) WORK
I pasted my current solution below. The main question is how to rotate (and move) the density plot according to decision boundary. Any suggestions are still welcome.
require(ggplot2)
library(grid)
library(MASS)
# Simulation parameters
mu1 <- c(5, -9)
mu2 <- c(4, 9)
rho <- 0.5
s1 <- 1
s2 <- 3
Sigma <- matrix(c(s1^2, rho * s1 * s2, rho * s1 * s2, s2^2), byrow = TRUE, nrow = 2)
n <- 50
# Multivariate normal sampling
X1 <- mvrnorm(n, mu = mu1, Sigma = Sigma)
X2 <- mvrnorm(n, mu = mu2, Sigma = Sigma)
# Combine into data frame
y <- rep(c(0, 1), each = n)
X <- rbind(x1 = X1, x2 = X2)
X <- scale(X)
X <- data.frame(X, class = y)
# Apply lda()
m1 <- lda(class ~ X1 + X2, data = X)
m1.pred <- predict(m1)
# Compute intercept and slope for abline
gmean <- m1$prior %*% m1$means
const <- as.numeric(gmean %*% m1$scaling)
z <- as.matrix(X[, 1:2]) %*% m1$scaling - const
slope <- - m1$scaling[1] / m1$scaling[2]
intercept <- const / m1$scaling[2]
# Projected values
LD <- data.frame(predict(m1)$x, class = y)
# Scatterplot
p1 <- ggplot(X, aes(X1, X2, color=as.factor(class))) +
geom_point() +
theme_bw() +
theme(legend.position = "none") +
scale_x_continuous(limits=c(-5, 5)) +
scale_y_continuous(limits=c(-5, 5)) +
geom_abline(intecept = intercept, slope = slope)
# Density plot
p2 <- ggplot(LD, aes(x = LD1)) +
geom_density(aes(fill = as.factor(class), y = ..scaled..)) +
theme_bw() +
theme(legend.position = "none")
grid.newpage()
print(p1)
vp <- viewport(width = .7, height = 0.6, x = 0.5, y = 0.3, just = c("centre"))
pushViewport(vp)
print(p2, vp = vp)
Basically you need to project the data along the direction of the classifier, plot a histogram for each class, and then rotate the histogram so its x axis is parallel to the classifier. Some trial-and-error with scaling the histogram is needed in order to get a nice result. Here's an example of how to do it in Matlab, for the naive classifier (difference of class' means). For the Fisher classifier it is of course similar, you just use a different classifier w. I changed the parameters from your code so the plot is more similar to the one you gave.
rng('default')
n = 1000;
mu1 = [1,3]';
mu2 = [4,1]';
rho = 0.3;
s1 = .8;
s2 = .5;
Sigma = [s1^2,rho*s1*s1;rho*s1*s1, s2^2];
X1 = mvnrnd(mu1,Sigma,n);
X2 = mvnrnd(mu2,Sigma,n);
X = [X1; X2];
Y = [zeros(n,1);ones(n,1)];
scatter(X1(:,1), X1(:,2), [], 'b' );
hold on
scatter(X2(:,1), X2(:,2), [], 'r' );
axis equal
m1 = mean(X(1:n,:))';
m2 = mean(X(n+1:end,:))';
plot(m1(1),m1(2),'bx','markersize',18)
plot(m2(1),m2(2),'rx','markersize',18)
plot([m1(1),m2(1)], [m1(2),m2(2)],'g')
%% classifier taking only means into account
w = m2 - m1;
w = w / norm(w);
% project data onto w
X1_projected = X1 * w;
X2_projected = X2 * w;
% plot histogram and rotate it
angle = 180/pi * atan(w(2)/w(1));
[hy1, hx1] = hist(X1_projected);
[hy2, hx2] = hist(X2_projected);
hy1 = hy1 / sum(hy1); % normalize
hy2 = hy2 / sum(hy2); % normalize
scale = 4; % set manually
h1 = bar(hx1, scale*hy1,'b');
h2 = bar(hx2, scale*hy2,'r');
set([h1, h2],'ShowBaseLine','off')
% rotate around the origin
rotate(get(h1,'children'),[0,0,1], angle, [0,0,0])
rotate(get(h2,'children'),[0,0,1], angle, [0,0,0])
I have three classes with mean
mu1 <- matrix(c(3, 1), nrow=2)
mu2 <- matrix(c(4, 3), nrow=2)
mu3 <- matrix(c(8, 2), nrow=2)
and covariance
cov <- matrix(c(.5, .3, .3, .5), nrow=2, ncol=2)
I would like to simulate about 100 observations from each class and perform LDA.
first, I made three matrix with 100 observations.
x1 <- matrix(c(rmvnorm(100, mean=mu1, sigma=cov), matrix("x1", ncol=1, nrow=100)), ncol=3)
x2 <-matrix(c(rmvnorm(100, mean=mu2, sigma=cov), matrix("x2", ncol=1, nrow=100)), ncol=3)
x3 <- matrix(c(rmvnorm(100, mean=mu3, sigma=cov), matrix("x3", ncol=1, nrow=100)), ncol=3)
and made those to data frame and bind it together.
d1 <- data.frame(x1)
d2 <- data.frame(x2)
d3 <- data.frame(x3)
alld <- rbind(d1, d2, d3)
now I would like to perform lda with code of
lda.x1 <- lda(alld[,3]~alld[,1]+alld[,2], data=alld)
here... I got warning message and weird result.
please help me out
Thank you
Your groups are on a line, which is tripping off lda (see plot(alld[, 1], alld[, 2], col = alld[, 3]). I've modified your code a bit and added some noise to means.
set.seed(357)
mu1 <- sample(1:10, 2)
mu2 <- sample(1:10, 2)
mu3 <- sample(1:10, 2)
cov <- matrix(c(.5, .3, .3, .5), nrow=2, ncol=2)
require(mvtnorm)
x1 <- rmvnorm(100, mean= mu1, sigma=cov)
x2 <- rmvnorm(100, mean= mu2, sigma=cov)
x3 <- rmvnorm(100, mean= mu3, sigma=cov)
alld <- data.frame(rbind(x1, x2, x3))
alld$col <- rep(1:3, each = 100)
names(alld) <- c("a", "b", "col")
plot(b ~ a, data = alld, col = alld$col)
mdl <- lda(col ~ a + b, data = alld)
plot(mdl)
points(predict(mdl)$x, cex = 0.5, pch = "+")