I am trying to compare two distributions by using Likelihood ratio test. I used the maxLik function to obtain mles of both of null and alternative distributions. I want to use BFGSR method because it gives better estimates.
library("maxLik")
library("flexsurv")
n=20
den1=1000
mpar=3
omepar=5
spar=3
Logliknak1=function(param){
m=param[1]
o=param[2]
n*(log(2)+m*log(m)-lgamma(m)-m*log(o))+(2*m-1)*sum(log(y))-(m/o)*sum(y^(2))
}
Loglikgennak= function(param){
s <- param[1]
ome <- param[2]
m<-param[3]
(n*(log(2*s)+m*log(m)-lgamma(m)-m*log(ome))+(2*m*s-1)*sum(log(y))-(m/ome)*sum(y^(2*s)))
}
LR2=rep(0,den1)
ps=0; pome=0; pm=0;
for(i in 1:den1){
repeat
{
x=rgengamma.orig(n,shape=spar,scale=(omepar/mpar),k=mpar)
y=x^0.5
ot=mean(y^2)
mt=(mean(y)*mean(y^2))/(2*(mean(y^3)-(mean(y)*mean(y^2))))
mle2 <- maxLik(logLik=Logliknak1, start = c(m=mt, o=ot),method="BFGSR")
lnull=logLik(mle2)
mm=coef(mle2)[[1]]
mo=coef(mle2)[[2]]
mle3 <- maxLik(logLik=Loglikgennak, start = c(s=1.5,ome=omepar+1,m=mpar+1),method="BFGSR")
lalt=logLik(mle3)
ps=coef(mle3)[[1]]
pome=coef(mle3)[[2]]
pm=coef(mle3)[[3]]
if (lalt>lnull && ps>1 && pome>0 && pm>0)
{break}
}
LR2[i]=2*(lalt-lnull)
print(i)
print(LR2[i])
print(pm)
print(pome)
print(ps)
}
However I keep getting the following error message:
Error in if (all(y == 0)) { : missing value where TRUE/FALSE needed
How do I fix this?
Related
So I want to estimate 5 parameters consist of intercept, age, disease, mdu, and alfa from panel data. And I write the pdf, here's the code of multivariate negative binomial for panel data. I use optim() package to estimate the parameters. Please help me to fix what is wrong with my code..
library(pglm)
library(plm)
data("HealthIns")
dat<- pdata.frame(HealthIns,index = c("id","year"))
y<-data.matrix(dat$mdu)
y[is.na(y)]=0
Y<-matrix(data=y,nrow=5908,ncol=5)
dat$ageclass<-ifelse(dat$age >=30,1,0)
x1<-data.matrix(dat$ageclass)
x1[is.na(x1)]=0
X1<-matrix(data=x1,nrow=5908,ncol=5)
dat$gender <-ifelse(dat$sex=="male",1,0)
x2<-data.matrix(dat$gender)
x2[is.na(x2)]=0
X2<-matrix(data=x2,nrow=5908,ncol=5)
x3<-data.matrix(dat$disease)
x3[is.na(x3)]=0
X3<-matrix(data=x3,nrow=5908,ncol=5)
Function for optim package
po.gam=function(para){
#Lambda(i,t)
{for (i in (1:5908)){
for(t in (1:5)){
lambda<-matrix(para[1] + para[2]*X1 + para[3]*X2 +
para[4]*X3,nrow=5908,ncol=5)}}
}
#Sigma N(i,t) terhadap t
num.claims.of.t <-numeric(nrow(Y))
{for (i in seq(nrow(Y))){
num.claims.of.t[i] <-sum(Y[i,])}
}
#Sigma Lambda(i,t) terhadap t
num.lambda.of.t<-numeric(nrow(Y))
{for (i in seq(nrow(Y))){
num.lambda.of.t[i]<-sum(lambda[i,])}
}
#Produc Exponential Dist
prod.exp<-numeric(nrow(Y))
{for (i in seq(nrow(Y))){
prod.exp[i]<-prod(lambda[i,]^Y[i,]/factorial(Y[i,]))}
}
#JOINT PROBABILITY OF TIMESNYA...
joint.pdf.mvnb<-prod.exp*gamma(num.claims.of.t + (1/para[5]))/gamma(1/para[5])*((1/para[5])/(num.lambda.of.t + (1/para[5])))^(1/para[5])*(num.lambda.of.t + (1/para[5]))^(-num.claims.of.t)
#PRODUC NUMBER OF CLAIMS SEMUA INDIVIDU
-log(prod(joint.pdf.mvnb))
}
start.value <- c(beta0=1,beta1=1,beta2=1,beta3=1,alfa=1)
MLE_estimator<-optim(start.value,po.gam,hessian=TRUE)
MLE_estimator
And here is my result
> MLE_estimator<-optim(start.value,po.gam,hessian=TRUE)
Error in optim(start.value, po.gam, hessian = TRUE) :
function cannot be evaluated at initial parameters
library(mvtnorm)
set.seed(14)
n=10000
sigmatrue<-1
rhotrue<-0.3
b1=0.05
b0=0
y<-arima.sim(model=list(ar=c(0.3)),n=10000 ,sd=sigmatrue)#kataskevi
#xronoseiras
x=rep(0,n)
for(i in 1:n){
x[i]=i
}
for(t in 1:n)
{
y[t]=y[t]+b0+b1*x[t]
}
est=arima(y,order=c(1,0,0),xreg=x,include.mean=TRUE,method="ML",kappa=1e+06)
cens<-rep(0, n)
c=(9/10)*(n*b1+b0)
for (i in 1:n) {
if(y[i]>c){
y[i]<-c
cens[i]<-1
}
}
ll<-function(p){
sigma=matrix(c(p[2]^2/(1-p[3]^2), p[2]^2*p[3]/(1-p[3]^2),p[2]^2*p[3]/(1-p[3]^2),p[2]^2/(1-p[3]^2)),ncol=2,nrow=2,byrow=TRUE)
likelihood<-rep(0,n)
for(t in 2 :n){
if(cens[t]==0 & cens[t-1]==0){
likelihood[t]<-dnorm(((y[t]-(p[1]+p[4]*t)-p[3]*(y[t-1]-(p[1]+p[4]*(t-1)))/p[2]) )/p[2])
}
else if(cens[t]==0 & cens[t-1]==1){
likelihood[t]<-(1/(1-pnorm((c-(p[1]+p[4]*t)*sqrt(1-p[3]^2)/p[2]))*sqrt(1-p[3]^2)/p[2]*dnorm(((y[t]-(p[1]+p[4]*t)*sqrt(1-p[3]^2))/p[2])*(1-pnorm(((c-(p[1]+p[4]*(t))-p[3]*(y[t]-(p[1]+p[4]*(t-1)))/p[2])))))))
}
else if(cens[t]==1 & cens[t-1]==0){
likelihood[t]<-1-pnorm(((c-(p[1]+p[4]*t)-p[3]*(y[t-1]-(p[1]+p[4]*(t-1)))/p[2])))
}
else
{
likelihood[t]<-(((pmvnorm(lower=c, upper=Inf , mean=c(p[1]+p[4]*(t-1),p[1]+p[4]*t),sigma=sigma))/(1-pnorm((c-(p[1]+p[4]*(t-1))*sqrt(1-p[3]^2)/p[2])))))
}
}
f0=(sqrt(1-p[3])/p[2]*dnorm(((y[1]-p[1]-p[4])*sqrt(1-p[3]^2))/p[2]))
likelihood[1]=f0
#Ta prosthesa
if (any(likelihood==0)){
likelihood[likelihood==0] = 0.000001 #poly mikros arithmos
}
if (any(likelihood==Inf)){
likelihood[likelihood==Inf] = 1 #poly megalos h 1, an milame gia pi8anothta
}
if (any(is.nan(likelihood))){
likelihood[is.nan(likelihood)] = 0.000001
}
minusloglike=-sum(log(likelihood))
#l1=list(Minusloglike=minusloglike,Loglikelihood=log(likelihood))
return(minusloglike)
}
fit<-optim(c(0,1,0.3,0.05),ll,method="L-BFGS-B",lower=c(-Inf,0.001,-0.999,-Inf),upper = c(Inf,Inf,0.999,Inf),hessian=TRUE)
fisher.info<-solve(fit$hessian)
fisher.info
prop.sigma<-sqrt(diag(fisher.info))
sigmas<-diag(prop.sigma)
upper<-fit$par+1.96*sigmas
lower<-fit$par-1.96*sigmas
interval<-data.frame(value=fit$par, lower=diag(lower),upper=diag(upper))
interval
I run this code(it is for censored first order autogressive process with covariate , i have 4 cases for x(t) ,x(t-1) either is censored or non-censored and i dont want the likelihood to go near zero and inf).I get error
Error in if (any(likelihood == Inf)) { :
missing value where TRUE/FALSE needed
Called from: fn(par, ...)
The program is working for n=100 but when n is larger than 100 i have this error. I think this error causes bad estimattes of the four parameters(b1,rho,sigma,b0).Does anyone know what can i do?
Thank you for your help.
I am trying reproduce the Table 1 results from the page 12 using the equation given the page 13.To access the journal article please click https://arxiv.org/pdf/math/0605322.pdf. The corresponding equation is given below.
My r code is give below. Am I programmed correctly?
mytest=function(n,s,c1){
t = sum(s)
k=which.max(s[19:n]>=c1)
if(k==1 && s[19]<c1)
return(c(n,0))
else
return(c(k,1))
}
for (n in c(100,200,400)){
for (i in c(-0.5, -1.0)){
a1=0
c1 = 20
asn1=0
for (m in 1:1000){
g=c(dnorm(n,0,1))
f=c(dnorm(n,i,1))
s = log(g/f)
test=mytest(n,s,c1)
a1=a1+test[2]
asn1=asn1+test[1]
}
}
out <- list(power= a1/m, asn=asn1/m)
return(out)
}
But I am getting the following errors.
Error in if (k == 1 && s[19] < c1) return(c(n, 0)) else return(c(k, 1)) :
missing value where TRUE/FALSE needed
The first time you call mytest, you have n=100, i=-0.5 which yields s=NaN. Therefore, you get an error on line if(k==1 && s[19]<c1) given that s[19]=NaN.
Here's a workaround, but you need to make sure it does what you expect/wish :
mytest=function(n,s,c1){
if(is.na(s)) return(c(c1,1)) # skips if NaN
t = sum(s)
k=which.max(s[19:n]>=c1)
if(k==1 && s[19]<c1)
return(c(n,0))
else
return(c(k,1))
}
I'm trying to evaluate trees for a number of output parameters, in a loop. But sometimes the tree function aborts. How can the lines be surrounded by a try catch block?
I apologize for not having "real" code, but I don't have an example of a non working tree. Here's pseddo code to illustrate the current implementation
for (icol in seq(1,ncol)) {
cName <-colnames(dt)[icol]
tdata <- dt[,unique(c(1,2,icol)),with=F]
nTrues <- sum(rowSums(tdata[,cName,with=F]))
if (nTrues>0 ) {
print(paste('processing column',icol,'of',ncol,': ',cName))
nFac <- table(tdata[,cName,with=F])
print(nFac)
treeData <- merge(tdata, maint_data)
treeData[,c('identifiers'):=NULL]
fmla <- paste(cName,'~ .')
if (TRUE) {
# Recursive Partitioning and Regression Trees
cat('Recursive Partitioning and Regression Trees (rpart)','\n')
rtree <- rpart(fmla,data=treeData) # <-- NEED TRY CATCH HERE...
print(summary(rtree))
cat('Confusion matrix for rpart')
print(table(predict(rtree), treeData[[cName]]))
}
flush.console()
} else {
print(paste('skipping column',icol,'of',ncol(ci_ratio_before_larger),': ',cName))
}
}
Here's a correction that seems to work....
tryCatch({
# Recursive Partitioning and Regression Trees
cat('Recursive Partitioning and Regression Trees (rpart)','\n')
rtree <- rpart(fmla,data=treeData)
print(summary(rtree))
cat('Confusion matrix for rpart')
print(table(predict(rtree,type='vector'), treeData[[cName]]))
},
error = function (condition) {
print("RPART_ERROR:")
print(paste(" Message:",conditionMessage(condition)))
print(paste(" Call: ",conditionCall(condition)))
}
)
I cannot really test it, but can you try replacing your
if (TRUE)
condition with this:
tryCatch({
# Recursive Partitioning and Regression Trees
cat('Recursive Partitioning and Regression Trees (rpart)','\n')
rtree <- rpart(fmla,data=treeData) # <-- NEED TRY CATCH HERE...
print(summary(rtree))
cat('Confusion matrix for rpart')
print(table(predict(rtree), treeData[[cName]]))
},
error = function (condition) {
print("RPART_ERROR:")
print(paste(" Message:",conditionMessage(condition)))
print(paste(" Call: ",conditionCall(condition)))
},
finally= function() {
}
)
In my previous question:How do I put arena limits on a random walk? the community helped create a random walk function in a set arena. This function is designed to simulate a fish moving through an area, but now I need to make it decide when to stop when a certain condition is satisfied.
I thought it would be as simple as
{{if(z>P)break}} put in just before the loop function. What I want it to understand is "if this condition is satisfied then stop, otherwise keep going until you reach the maximum number of steps.
Instead it caused my random walk to become deterministic (I always get the same path and it never stops before step.max).
Main question: How do I tell the random walk to stop if z>P?
For reference:
step.max<-125
step.prob<-function(n.times=step.max){
draw=sample(0:100,1,replace=T)
CS<-sample(draw,size=1,replace=TRUE)
CS.max<-100
step.num<-15
SP<-((CS/CS.max)*(1-(step.num/step.max))+(step.num/step.max))*100
if(SP>P){stop('Settled at step number',P)}else{SP
}
}
z<-step.prob(1) #renaming the above function to be easier to reference later
P<-80 #preset cutoff point for value z, ranges from 0-100
walkE <- function(n.times=125,
xlim=c(524058,542800),
ylim=c(2799758,2818500),
start=c(525000,2810000),
stepsize=c(4000,4000)) {
plot(c(0,0),type="n",xlim=xlim,ylim=ylim,
xlab="Easting",ylab="Northing")
x <- start[1]
y <- start[2]
steps <- 1/c(1,2,4,8,12,16)
steps.y <- c(steps,-steps,0)
steps.x <- c(steps,-steps[c(1,5,6)],0)
points(x,y,pch=16,col="red",cex=1)
for (i in 1:n.times) {
repeat {
xi <- stepsize[1]*sample(steps.x,1)
yi <- stepsize[2]*sample(steps.y,1)
newx <- x+xi
newy <- y+yi
if (newx>xlim[1] && newx<xlim[2] &&
newy>ylim[1] && newy<ylim[2]) break
}
lines(c(x,newx),c(y,newy),col="blue")
x <- newx
y <- newy
if(z>P){stop(points(newx,newy,col="green",cex=1))}
#this is where I want it to stop if z>P
else
if(z<P){points(newx,newy,pch=1,col="blue",cex=1)}
else
if(step.max){points(newx,newy,pch=16,col="green",cex=1)}
set.seed(101)}
}
walkE(step.max) #run above random walk function walkE looped for the step.max number
Thanks in advance!!!
This is pretty easy and can be accomplished by inserting a stop(...) function in your user defined step.prob function.
step.prob<-function(n.times=step.max, p){
draw=sample(0:100,1,replace=T)
CS<-sample(draw,size=1,replace=TRUE)
CS.max<-100
CS.max
step.num<-15
SP<-((CS/CS.max)*(1-(step.num/step.max))+(step.num/step.max))*100
if(SP > p) {
stop('Your random walk exceeded ', p)
} else {
SP
}
}
If this doesn't do it for you look into the break command.
So, when the random walk value is > p:
step.prob(p=300000)
# Error in step.prob(p = 3) : Your random walk exceeded 3
And if you want to set the value returned by the function to p you can just add in SP <- p before the stop command.