How to fit a finite mixture of Dirichlet distributions - r

I have a compositional sample and I would like to fit a finite mixture of Dirichlet distributions. To be more precise, consider the following example:
library(gtools)
set.seed(1)
PROB = c(0.25, 0.15, 0.60)
ALPHA = list(
c(1,1,1),
c(2,1,1),
c(1,1,20)
)
size = 500
N = sapply(1:3, function(i, z) sum(z == i),
sample(1:3, size, prob = PROB, replace = TRUE))
X = do.call('rbind',
sapply(1:3, function(i, N)
rdirichlet(N[i], ALPHA[[i]]), N))[sample(1:size),]
X contains sample generated from a mixture of Dirichlet distributions defined in the 3-part simplex. The first Dirichlet component of this mixture has parameter (1,1,1), the second component has parameter (2,1,1) and the third (1,1,20). The mixture probabilities are 0.25, 0.15, 0.60. I would like to retrieve these parameters from the sample.
How would you find this parameters?

Reparameterizing in terms of theta1=log(p1/p3), theta2=log(p2/p3) and logs of all 9 alpha parameters, and then maximising the log likelihood using optim() with method="BFGS" seems to work if using initial values sufficiently close to the parameter values used to simulate the data. At least, all eigenvalues of the Hessian are negative, and small changes in initial values leads to the the same optimum.
repar <- function(theta) {
p <- exp(theta[1])
p[2] <- exp(theta[2])
p[3] <- 1
p <- p/sum(p)
alpha <- matrix(exp(theta[3:11]),3,3,byrow=TRUE)
list(p=p,alpha=alpha)
}
logL <- function(theta,x) {
par <- repar(theta)
p <- par$p
alpha <- par$alpha
terms <- 0
for (i in 1:length(p)) {
terms <- terms + p[i]*ddirichlet(x,alpha[i,])
}
-sum(log(terms))
}
start <- c(log(c(.25,.15)/.6), log(c(1,1,1, 2,1,1, 1,1,20)))
fit <- optim(start,logL,x=X,hessian=TRUE,method="BFGS")
repar(fit$par)
eigen(fit$hessian)$val
fit2 <- optim(start+rnorm(11,sd=.2),logL,x=X,hessian=TRUE,method="BFGS")
repar(fit2$par)

Related

How do I calculate cronbach's alpha on multiply imputed data?

I have run a multiple imputation (m=45, 10 iterations) using the MICE package, and want to calculate the cronbach's alpha for a number of ordinal scales in the data. Is there a function in r that could assist me in calculating the alpha coefficient across the imputed datasets in a manner that would satisfy Rubin's rules for pooling estimates?
We may exploit pool.scalar from the mice package, which performs pooling of univariate estimates according to Rubin's rules.
Since you have not provided a reproducible example yourself, I will provide one.
set.seed(123)
# sample survey responses
df <- data.frame(
x1 = c(1,2,2,3,2,2,3,3,2,3,
1,2,2,3,2,2,3,3,2,3,
1,2,2,3,2,2,3,3,2,3),
x2 = c(1,1,1,2,3,3,2,3,3,3,
1,1,1,2,3,3,2,3,3,3,
1,2,2,3,2,2,3,3,2,3),
x3 = c(1,1,2,1,2,3,3,3,2,3,
1,1,2,1,2,3,3,3,2,3,
1,2,2,3,2,2,3,3,2,3)
)
# function to column-wise generate missing values (MCAR)
create_missings <- function(data, prob) {
x <- replicate(ncol(data),rbinom(nrow(data), 1, prob))
for(k in 1:ncol(data)) {
data[, k] <- ifelse(x[, k] == 1, NA, data[,k])
}
data
}
df <- create_missings(df, prob = 0.2)
# multiple imputation ----------------------------------
library(mice)
imp <- mice(df, m = 10, maxit = 20)
# extract the completed data in long format
implong <- complete(imp, 'long')
We need a function to compute cronbach's alpha and obtain an estimate of the standard error of alpha, which can be used in a call to pool.scalar() later on. Since there is no available formula with which we can analytically estimate the standard error of alpha, we also need to deploy a bootstrapping procedure to estimate this standard error.
The function cronbach_fun() takes the following arguments:
list_compl_data: a character string specifying the list of completed data from a mids object.
boot: a logical indicating whether a non-parametrical bootstrap should be conducted.
B: an integer specifying the number of bootstrap samples to be taken.
ci: a logical indicating whether a confidence interval around alpha should be estimated.
cronbach_fun <- function(list_compl_data, boot = TRUE, B = 1e4, ci = FALSE) {
n <- nrow(list_compl_data); p <- ncol(list_compl_data)
total_variance <- var(rowSums(list_compl_data))
item_variance <- sum(apply(list_compl_data, 2, sd)^2)
alpha <- (p/(p - 1)) * (1 - (item_variance/total_variance))
out <- list(alpha = alpha)
boot_alpha <- numeric(B)
if (boot) {
for (i in seq_len(B)) {
boot_dat <- list_compl_data[sample(seq_len(n), replace = TRUE), ]
total_variance <- var(rowSums(boot_dat))
item_variance <- sum(apply(boot_dat, 2, sd)^2)
boot_alpha[i] <- (p/(p - 1)) * (1 - (item_variance/total_variance))
}
out$var <- var(boot_alpha)
}
if (ci){
out$ci <- quantile(boot_alpha, c(.025,.975))
}
return(out)
}
Now that we have our function to do the 'heavy lifting', we can run it on all m completed data sets, after which we can obtain Q and U (which are required for the pooling of the estimates). Consult ?pool.scalar for more information.
m <- length(unique(implong$.imp))
boot_alpha <- rep(list(NA), m)
for (i in seq_len(m)) {
set.seed(i) # fix random number generator
sub <- implong[implong$.imp == i, -c(1,2)]
boot_alpha[[i]] <- cronbach_fun(sub)
}
# obtain Q and U (see ?pool.scalar)
Q <- sapply(boot_alpha, function(x) x$alpha)
U <- sapply(boot_alpha, function(x) x$var)
# pooled estimates
pool_estimates <- function(x) {
out <- c(
alpha = x$qbar,
lwr = x$qbar - qt(0.975, x$df) * sqrt(x$t),
upr = x$qbar + qt(0.975, x$df) * sqrt(x$t)
)
return(out)
}
Output
# Pooled estimate of alpha (95% CI)
> pool_estimates(pool.scalar(Q, U))
alpha lwr upr
0.7809977 0.5776041 0.9843913

nlm function fails with analytic Hessian

Some background: the nlm function in R is a general purpose optimization routine that uses Newton's method. To optimize a function, Newton's method requires the function, as well as the first and second derivatives of the function (the gradient vector and the Hessian matrix, respectively). In R the nlm function allows you to specify R functions that correspond to calculations of the gradient and Hessian, or one can leave these unspecified and numerical solutions are provided based on numerical derivatives (via the deriv function). More accurate solutions can be found by supplying functions to calculate the gradient and Hessian, so it's a useful feature.
My problem: the nlm function is slower and often fails to converge in a reasonable amount of time when the analytic Hessian is supplied. I'm guessing this is some sort of bug in the underlying code, but I'd be happy to be wrong. Is there a way to make nlm work better with an analytic Hessian matrix?
Example: my R code below demonstrates this problem using a logistic regression example, where
log(Pr(Y=1)/Pr(Y=0)) = b0 + Xb
where X is a multivariate normal of dimension N by p and b is a vector of coefficients of length p.
library(mvtnorm)
# example demonstrating a problem with NLM
expit <- function(mu) {1/(1+exp(-mu))}
mk.logit.data <- function(N,p){
set.seed(1232)
U = matrix(runif(p*p), nrow=p, ncol=p)
S = 0.5*(U+t(U)) + p*diag(rep(1,p))
X = rmvnorm(N, mean = runif(p, -1, 1), sigma = S)
Design = cbind(rep(1, N), X)
beta = sort(sample(c(rep(0,p), runif(1))))
y = rbinom(N, 1, expit(Design%*%beta))
list(X=X,y=as.numeric(y),N=N,p=p)
}
# function to calculate gradient vector at given coefficient values
logistic_gr <- function(beta, y, x, min=TRUE){
mu = beta[1] + x %*% beta[-1]
p = length(beta)
n = length(y)
D = cbind(rep(1,n), x)
gri = matrix(nrow=n, ncol=p)
for(j in 1:p){
gri[,j] = D[,j]*(exp(-mu)*y-1+y)/(1+exp(-mu))
}
gr = apply(gri, 2, sum)
if(min) gr = -gr
gr
}
# function to calculate Hessian matrix at given coefficient values
logistic_hess <- function(beta, y, x, min=TRUE){
# allow to fail with NA, NaN, Inf values
mu = beta[1] + x %*% beta[-1]
p = length(beta)
n = length(y)
D = cbind(rep(1,n), x)
h = matrix(nrow=p, ncol=p)
for(j in 1:p){
for(k in 1:p){
h[j,k] = -sum(D[,j]*D[,k]*(exp(-mu))/(1+exp(-mu))^2)
}
}
if(min) h = -h
h
}
# function to calculate likelihood (up to a constant) at given coefficient values
logistic_ll <- function(beta, y,x, gr=FALSE, he=FALSE, min=TRUE){
mu = beta[1] + x %*% beta[-1]
lli = log(expit(mu))*y + log(1-expit(mu))*(1-y)
ll = sum(lli)
if(is.na(ll) | is.infinite(ll)) ll = -1e16
if(min) ll=-ll
# the below specification is required for using analytic gradient/Hessian in nlm function
if(gr) attr(ll, "gradient") <- logistic_gr(beta, y=y, x=x, min=min)
if(he) attr(ll, "hessian") <- logistic_hess(beta, y=y, x=x, min=min)
ll
}
First example, with p=3:
dat = mk.logit.data(N=100, p=3)
The glm function estimates are for reference. nlm should give the same answer, allowing for small errors due to approximation.
(glm.sol <- glm(dat$y~dat$X, family=binomial()))$coefficients
> (Intercept) dat$X1 dat$X2 dat$X3
> 0.00981465 0.01068939 0.04417671 0.01625381
# works when correct analytic gradient is specified
(nlm.sol1 <- nlm(p=runif(dat$p+1), f=logistic_ll, gr=TRUE, y=dat$y, x=dat$X))$estimate
> [1] 0.009814547 0.010689396 0.044176627 0.016253966
# works, but less accurate when correct analytic hessian is specified (even though the routine notes convergence is probable)
(nlm.sol2 <- nlm(p=runif(dat$p+1), f=logistic_ll, gr=TRUE, he=TRUE, y=dat$y, x=dat$X, hessian = TRUE, check.analyticals=TRUE))$estimate
> [1] 0.009827701 0.010687278 0.044178416 0.016255630
But the problem becomes apparent when p is larger, here it is 10
dat = mk.logit.data(N=100, p=10)
Again, glm solution for reference. nlm should give the same answer, allowing for small errors due to approximation.
(glm.sol <- glm(dat$y~dat$X, family=binomial()))$coefficients
> (Intercept) dat$X1 dat$X2 dat$X3 dat$X4 dat$X5 dat$X6 dat$X7
> -0.07071882 -0.08670003 0.16436630 0.01130549 0.17302058 0.03821008 0.08836471 -0.16578959
> dat$X8 dat$X9 dat$X10
> -0.07515477 -0.08555075 0.29119963
# works when correct analytic gradient is specified
(nlm.sol1 <- nlm(p=runif(dat$p+1), f=logistic_ll, gr=TRUE, y=dat$y, x=dat$X))$estimate
> [1] -0.07071879 -0.08670005 0.16436632 0.01130550 0.17302057 0.03821009 0.08836472
> [8] -0.16578958 -0.07515478 -0.08555076 0.29119967
# fails to converge in 5000 iterations when correct analytic hessian is specified
(nlm.sol2 <- nlm(p=runif(dat$p+1), f=logistic_ll, gr=TRUE, he=TRUE, y=dat$y, x=dat$X, hessian = TRUE, iterlim=5000, check.analyticals=TRUE))$estimate
> [1] 0.31602065 -0.06185190 0.10775381 -0.16748897 0.05032156 0.34176104 0.02118631
> [8] -0.01833671 -0.20364929 0.63713991 0.18390489
Edit: I should also add that I have confirmed I have the correct Hessian matrix through multiple different approaches
I tried the code, but at first it seemed to be using a different rmvnorm than I can find on CRAN. I found one rmvnorm in dae package, then one in the mvtnorm package. The latter is the one to use.
nlm() was patched about the time of the above posting. I'm currently trying to verify the patches and it now seems to work OK. Note that I am author of a number of R's optimization codes, including 3/5 in optim().
nashjc at uottawa.ca
Code is below.
Revised code:
# example demonstrating a problem with NLM
expit <- function(mu) {1/(1+exp(-mu))}
mk.logit.data <- function(N,p){
set.seed(1232)
U = matrix(runif(p*p), nrow=p, ncol=p)
S = 0.5*(U+t(U)) + p*diag(rep(1,p))
X = rmvnorm(N, mean = runif(p, -1, 1), sigma = S)
Design = cbind(rep(1, N), X)
beta = sort(sample(c(rep(0,p), runif(1))))
y = rbinom(N, 1, expit(Design%*%beta))
list(X=X,y=as.numeric(y),N=N,p=p)
}
# function to calculate gradient vector at given coefficient values
logistic_gr <- function(beta, y, x, min=TRUE){
mu = beta[1] + x %*% beta[-1]
p = length(beta)
n = length(y)
D = cbind(rep(1,n), x)
gri = matrix(nrow=n, ncol=p)
for(j in 1:p){
gri[,j] = D[,j]*(exp(-mu)*y-1+y)/(1+exp(-mu))
}
gr = apply(gri, 2, sum)
if(min) gr = -gr
gr
}
# function to calculate Hessian matrix at given coefficient values
logistic_hess <- function(beta, y, x, min=TRUE){
# allow to fail with NA, NaN, Inf values
mu = beta[1] + x %*% beta[-1]
p = length(beta)
n = length(y)
D = cbind(rep(1,n), x)
h = matrix(nrow=p, ncol=p)
for(j in 1:p){
for(k in 1:p){
h[j,k] = -sum(D[,j]*D[,k]*(exp(-mu))/(1+exp(-mu))^2)
}
}
if(min) h = -h
h
}
# function to calculate likelihood (up to a constant) at given coefficient values
logistic_ll <- function(beta, y,x, gr=FALSE, he=FALSE, min=TRUE){
mu = beta[1] + x %*% beta[-1]
lli = log(expit(mu))*y + log(1-expit(mu))*(1-y)
ll = sum(lli)
if(is.na(ll) | is.infinite(ll)) ll = -1e16
if(min) ll=-ll
# the below specification is required for using analytic gradient/Hessian in nlm function
if(gr) attr(ll, "gradient") <- logistic_gr(beta, y=y, x=x, min=min)
if(he) attr(ll, "hessian") <- logistic_hess(beta, y=y, x=x, min=min)
ll
}
##!!!! NOTE: Must have this library loaded
library(mvtnorm)
dat = mk.logit.data(N=100, p=3)
(glm.sol <- glm(dat$y~dat$X, family=binomial()))$coefficients
# works when correct analytic gradient is specified
(nlm.sol1 <- nlm(p=runif(dat$p+1), f=logistic_ll, gr=TRUE, y=dat$y, x=dat$X))$estimate
# works, but less accurate when correct analytic hessian is specified (even though the routine notes convergence is probable)
(nlm.sol2 <- nlm(p=runif(dat$p+1), f=logistic_ll, gr=TRUE, he=TRUE, y=dat$y, x=dat$X, hessian = TRUE, check.analyticals=TRUE))$estimate
dat = mk.logit.data(N=100, p=10)
# Again, glm solution for reference. nlm should give the same answer, allowing for small errors due to approximation.
(glm.sol <- glm(dat$y~dat$X, family=binomial()))$coefficients
# works when correct analytic gradient is specified
(nlm.sol1 <- nlm(p=runif(dat$p+1), f=logistic_ll, gr=TRUE, y=dat$y, x=dat$X))$estimate
# fails to converge in 5000 iterations when correct analytic hessian is specified
(nlm.sol2 <- nlm(p=runif(dat$p+1), f=logistic_ll, gr=TRUE, he=TRUE, y=dat$y, x=dat$X, hessian = TRUE, iterlim=5000, check.analyticals=TRUE))$estimate

How to estimate the Kalman Filter with 'KFAS' R package, with an AR(1) transition equation?

I am using 'KFAS' package from R to estimate a state-space model with the Kalman filter. My measurement and transition equations are:
y_t = Z_t * x_t + \eps_t (measurement)
x_t = T_t * x_{t-1} + R_t * \eta_t (transition),
with \eps_t ~ N(0,H_t) and \eta_t ~ N(0,Q_t).
So, I want to estimate the variances H_t and Q_t, but also T_t, the AR(1) coefficient. My code is as follows:
library(KFAS)
set.seed(100)
eps <- rt(200, 4, 1)
meas <- as.matrix((arima.sim(n=200, list(ar=0.6), innov = rnorm(200)*sqrt(0.5)) + eps),
ncol=1)
Zt <- 1
Ht <- matrix(NA)
Tt <- matrix(NA)
Rt <- 1
Qt <- matrix(NA)
ss_model <- SSModel(meas ~ -1 + SSMcustom(Z = Zt, T = Tt, R = Rt,
Q = Qt), H = Ht)
fit <- fitSSM(ss_model, inits = c(0,0.6,0), method = 'L-BFGS-B')
But it returns: "Error in is.SSModel(do.call(updatefn, args = c(list(inits, model), update_args)),: System matrices (excluding Z) contain NA or infinite values, covariance matrices contain values larger than 1e+07"
The NA definitions for the variances works well, as documented in the package's paper. However, it seems this cannot be done for the AR coefficients. Does anyone know how can I do this?
Note that I am aware of the SSMarima function, which eases the definition of the transition equation as ARIMA models. Although I am able to estimate the AR(1) coef. and Q_t this way, I still cannot estimate the \eps_t variance (H_t). Moreover, I am migrating my Kalman filter codes from EViews to R, so I need to learn SSMcustom for other models that are more complicated.
Thanks!
It seems that you are missing something in your example, as your error message comes from the function fitSSM. If you want to use fitSSM for estimating general state space models, you need to provide your own model updating function. The default behaviour can only handle NA's in covariance matrices H and Q. The main goal of fitSSM is just to get started with simple stuff. For complex models and/or large data, I would recommend using your self-written objective function (with help of logLik method) and your favourite numerical optimization routines manually for maximum performance. Something like this:
library(KFAS)
set.seed(100)
eps <- rt(200, 4, 1)
meas <- as.matrix((arima.sim(n=200, list(ar=0.6), innov = rnorm(200)*sqrt(0.5)) + eps),
ncol=1)
Zt <- 1
Ht <- matrix(NA)
Tt <- matrix(NA)
Rt <- 1
Qt <- matrix(NA)
ss_model <- SSModel(meas ~ -1 + SSMcustom(Z = Zt, T = Tt, R = Rt,
Q = Qt), H = Ht)
objf <- function(pars, model, estimate = TRUE) {
model$H[1] <- pars[1]
model$T[1] <- pars[2]
model$Q[1] <- pars[3]
if (estimate) {
-logLik(model)
} else {
model
}
}
opt <- optim(c(1, 0.5, 1), objf, method = "L-BFGS-B",
lower = c(0, -0.99, 0), upper = c(100, 0.99, 100), model = ss_model)
ss_model_opt <- objf(opt$par, ss_model, estimate = FALSE)
Same with fitSSM:
updatefn <- function(pars, model) {
model$H[1] <- pars[1]
model$T[1] <- pars[2]
model$Q[1] <- pars[3]
model
}
fit <- fitSSM(ss_model, c(1, 0.5, 1), updatefn, method = "L-BFGS-B",
lower = c(0, -0.99, 0), upper = c(100, 0.99, 100))
identical(ss_model_opt, fit$model)

Bayesian simple linear regression Gibbs Sampling with gamma prior

Please help me out.
I am doing Metopolis_hasting within Gibbs to generate a Markov Chian with stationary distribution equal to the joint conditional distribution of (beta,phi) given observed y. Where the model for y is simple linear regression and phi is 1/sigma^2. The full conditional distribution for phi is gamma(shape=shape_0+n/2,rate=rate_0 + 0.5*sum((y$y-b[1]-b[1]*y$x)^2)) where shape_0 and rate_0 are prior distribution of phi (which follows a gamma)
Here is my code:
y <- read.table("...",header = T)
n <- 50
shape_0 <- 10
rate_0 <- 25
shape <- shape_0+n/2
mcmc <- function (n = 10){
X <- matrix(0,n,3)
b <- c(5,2)
phi <- 0.2
X[1,] <- c(b,phi)
count1 <- 0
count2 <- 0
for (i in 2:n){
phi_new <- rnorm(1,phi,1) #generate new phi candidate
rate <- rate_0 + 0.5*sum((y$y-b[1]-b[1]*y$x)^2)
prob1 <- min(dgamma(phi_new,shape = shape,
rate = rate)/dgamma(phi,shape = shape, rate = rate),1)
##here is where I run into trouble, dgamma(phi_new,shape = shape,
##rate = rate)
##and dgamma(phi,shape = shape, rate = rate) both gives 0
u <- runif(1)
if (prob1>u)
{X[i,3] <- phi_new; count1=count1+1}
else {X[i,3] <-phi}
phi <- X[i,3]
....}
I know I should use log transformation on the precision parameter, but I'm not exactly sure how to do it. log(dgamma(phi_new,shape = shape, rate = rate)) would return -inf.
Thank you so much for help.

Understanding different results of optim() and lm()

Given:
set.seed(1001)
outcome<-rnorm(1000,sd = 1)
covariate<-rnorm(1000,sd = 1)
log-likelihood of normal pdf:
loglike <- function(par, outcome, covariate){
cov <- as.matrix(cbind(1, covariate))
xb <- cov * par
(- 1/2* sum((outcome - xb)^2))
}
optimize:
opt.normal <- optim(par = 0.1,fn = loglike,outcome=outcome,cov=covariate, method = "BFGS", control = list(fnscale = -1),hessian = TRUE)
However I get different results when running an simple OLS. However maximizing log-likelihhod and minimizing OLS should bring me to a similar estimate. I suppose there is something wrong with my optimization.
summary(lm(outcome~covariate))
Umm several things... Here's a proper working likelihood function (with names x and y):
loglike =
function(par,x,y){cov = cbind(1,x); xb = cov %*% par;(-1/2)*sum((y-xb)^2)}
Note use of matrix multiplication operator.
You were also only running it with one par parameter, so it was not only broken because your loglike was doing element-element multiplication, it was only returning one value too.
Now compare optimiser parameters with lm coefficients:
opt.normal <- optim(par = c(0.1,0.1),fn = loglike,y=outcome,x=covariate, method = "BFGS", control = list(fnscale = -1),hessian = TRUE)
opt.normal$par
[1] 0.02148234 -0.09124299
summary(lm(outcome~covariate))$coeff
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.02148235 0.03049535 0.7044466 0.481319029
covariate -0.09124299 0.03049819 -2.9917515 0.002842011
shazam.
Helpful hints: create data that you know the right answer for - eg x=1:10; y=rnorm(10)+(1:10) so you know the slope is 1 and the intercept 0. Then you can easily see which of your things are in the right ballpark. Also, run your loglike function on its own to see if it behaves as you expect.
Maybe you will find it usefull to see the difference between these two methods from my code. I programmed it the following way.
data.matrix <- as.matrix(hprice1[,c("assess","bdrms","lotsize","sqrft","colonial")])
loglik <- function(p,z){
beta <- p[1:5]
sigma <- p[6]
y <- log(data.matrix[,1])
eps <- (y - beta[1] - z[,2:5] %*% beta[2:5])
-nrow(z)*log(sigma)-0.5*sum((eps/sigma)^2)
}
p0 <- c(5,0,0,0,0,2)
m <- optim(p0,loglik,method="BFGS",control=list(fnscale=-1,trace=10),hessian=TRUE,z=data.matrix)
rbind(m$par,sqrt(diag(solve(-m$hessian))))
And for the lm() method I find this
m.ols <- lm(log(assess)~bdrms+lotsize+sqrft+colonial,data=hprice1)
summary(m.ols)
Also if you would like to estimate the elasticity of assessed value with respect to the lotsize or calculate a 95% confidence interval
for this parameter, you could use the following
elasticity.at.mean <- mean(hprice1$lotsize) * m$par[3]
var.coefficient <- solve(-m$hessian)[3,3]
var.elasticity <- mean(hprice1$lotsize)^2 * var.coefficient
# upper bound
elasticity.at.mean + qnorm(0.975)* sqrt(var.elasticity)
# lower bound
elasticity.at.mean + qnorm(0.025)* sqrt(var.elasticity)
A more simple example of the optim method is given below for a binomial distribution.
loglik1 <- function(p,n,n.f){
n.f*log(p) + (n-n.f)*log(1-p)
}
m <- optim(c(pi=0.5),loglik1,control=list(fnscale=-1),
n=73,n.f=18)
m
m <- optim(c(pi=0.5),loglik1,method="BFGS",hessian=TRUE,
control=list(fnscale=-1),n=73,n.f=18)
m
pi.hat <- m$par
numerical calculation of s.d
rbind(pi.hat=pi.hat,sd.pi.hat=sqrt(diag(solve(-m$hessian))))
analytical
rbind(pi.hat=18/73,sd.pi.hat=sqrt((pi.hat*(1-pi.hat))/73))
Or this code for the normal distribution.
loglik1 <- function(p,z){
mu <- p[1]
sigma <- p[2]
-(length(z)/2)*log(sigma^2) - sum(z^2)/(2*sigma^2) +
(mu*sum(z)/sigma^2) - (length(z)*mu^2)/(2*sigma^2)
}
m <- optim(c(mu=0,sigma2=0.1),loglik1,
control=list(fnscale=-1),z=aex)

Resources