Simulation for Confidence interval in R - r

I have an R function that provides the 95% confidence Interval for the ncp (non-centrality parameter) of a t distribution.
Via simulation in R, is it possible to show that in the long-run the CIs from this R function capture a given TRUE ncp (here "2" same as input t) 95% of the time?
(I appreciate any ideas as to how to do this)
CI.ncp <- function(t, N){
f <- function (ncp, alpha, q, df) {
abs(suppressWarnings(pt(q = t, df = N - 1, ncp, lower.tail = FALSE)) - alpha) }
sapply(c(0.025, 0.975),
function(x) optim(1, f, alpha = x, q = t, df = N - 1, control = list(reltol = (.Machine$double.eps)))[[1]]) }
#Example of Use:
CI.ncp(t = 2, N = 20) # gives: -0.08293755 4.03548862
#(in the long-run 95% of the time, "2" is contained within these
# two numbers, how to show this in R?)
Here is what I have tried with no success:
fun <- function(t = 2, N = 20){
ncp = rt(1, N - 1, t)
CI.ncp(t = 2, N = 20)
mean(ncp <= 2 & 2 <= ncp )
}
R <- 1000
sim <- t(replicate(R, fun()))
coverage <- mean(sim[,1] <= 2 & 2 <= sim[,2])

The problem is the that we need to feed the random ncp obtained from the fun in the CI.ncp:
fun <- function(t = 2, N = 20){ ;
ncp = rt(1, N - 1, t);
CI.ncp(t = ncp, N = 20);
}
R <- 1e4 ;
sim <- t(replicate(R, fun()));
coverage <- mean(sim[,1] <= 2 & 2 <= sim[,2])

I would use package MBESS.
#install.packages("MBESS")
library(MBESS)
fun <- function(t = 2, N = 20, alpha = 0.95){
x = rt(1, N - 1, t)
conf.limits.nct(x, df = N, conf.level = alpha)[c(1, 3)]
}
set.seed(5221)
R <- 1000
sim <- t(replicate(R, fun()))
head(sim)
coverage <- mean(sim[,1] <= 2 & 2 <= sim[,2])
coverage
[1] 0.941

Related

Binary Logistic Regression with BFGS using package maxLik

I tried binary logistic regression with BFGS using maxlik, but i have included the feature as per the syntax i attached below, but the result is, but i get output like this
Maximum Likelihood estimation
BFGS maximization, 0 iterations
*Return code 100: Initial value out of range.
https://docs.google.com/spreadsheets/d/1fVLeJznB9k29FQ_BdvdCF8ztkOwbdFpx/edit?usp=sharing&ouid=109040212946671424093&rtpof=true&sd=true (this is my data)*
library(maxLik)
library(optimx)
data=read_excel("Book2.xlsx")
data$JKLaki = ifelse(data$JK==1,1,0)
data$Daerah_Samarinda<- ifelse(data$Daerah==1,1,0)
data$Prodi2 = ifelse(data$Prodi==2,1,0)
data$Prodi3 = ifelse(data$Prodi==3,1,0)
data$Prodi4 = ifelse(data$Prodi==4,1,0)
str(data)
attach(data)
ll<- function(param){
mu <- param[1]
beta <- param[-1]
y<- as.vector(data$Y)
x<- cbind(1, data$JKLaki, data$IPK, data$Daerah_Samarinda, data$Prodi2, data$Prodi3, data$Prodi4)
xb<- x%*%beta
pi<- exp(xb)
val <- -sum(y * log(pi) + (1 - y) * log(1 - pi),log=TRUE)
return(val)
}
gl<- funtion(param){
mu <- param[1]
beta <- param[-1]
y <- as.vector(data$Y)
x <- cbind(0, data$JKLaki,data$IPK,data$Daerah_Samarinda,data$Prodi2,data$Prodi3,data$Prodi4)
sigma <- x*beta
pi<- exp(sigma)/(1+exp(sigma))
v= y-pi
vx=as.matrix(x)%*%as.vector(v)
gg= colSums(vx)
return(-gg)}
mle<-maxLik(logLik=ll, grad=gl,hess=NULL,
start=c(mu=1, beta1=0, beta2=0, beta3=0, beta4=0, beta5=0, beta6=0,beta7=0), method="BFGS")
summary(mle)
can i get some help, i tired get this solution, please.
I have been able to optimize the log-likelihood with the following code :
library(DEoptim)
library(readxl)
data <- read_excel("Book2.xlsx")
data$JKLaki <- ifelse(data$JK == 1, 1, 0)
data$Daerah_Samarinda <- ifelse(data$Daerah == 1, 1, 0)
data$Prodi2 <- ifelse(data$Prodi == 2, 1, 0)
data$Prodi3 <- ifelse(data$Prodi == 3, 1, 0)
data$Prodi4 <- ifelse(data$Prodi == 4, 1, 0)
ll <- function(param, data)
{
mu <- param[1]
beta <- param[-1]
y <- as.vector(data$Y)
x <- cbind(1, data$JKLaki, data$IPK, data$Daerah_Samarinda, data$Prodi2, data$Prodi3, data$Prodi4)
xb <- x %*% beta
pi <- exp(mu + xb)
val <- -sum(y * log(pi) + (1 - y) * log(1 - pi))
if(is.nan(val) == TRUE)
{
return(10 ^ 30)
}else
{
return(val)
}
}
lower <- rep(-500, 8)
upper <- rep(500, 8)
obj_DEoptim_Iter1 <- DEoptim(fn = ll, lower = lower, upper = upper,
control = list(itermax = 5000), data = data)
lower <- obj_DEoptim_Iter1$optim$bestmem - 0.25 * abs(obj_DEoptim_Iter1$optim$bestmem)
upper <- obj_DEoptim_Iter1$optim$bestmem + 0.25 * abs(obj_DEoptim_Iter1$optim$bestmem)
obj_DEoptim_Iter2 <- DEoptim(fn = ll, lower = lower, upper = upper,
control = list(itermax = 5000), data = data)
obj_Optim <- optim(par = obj_DEoptim_Iter2$optim$bestmem, fn = ll, data = data)
$par
par1 par2 par3 par4 par5 par6 par7
-350.91045436 347.79576145 0.05337466 0.69032735 -0.01089112 0.47465162 0.38284804
par8
0.42125664
$value
[1] 95.08457
$counts
function gradient
501 NA
$convergence
[1] 1
$message
NULL

MCMC for estimating negative binomial distribution

I want to estimate parameters of negative binomial distribution using MCMC Metropolis-Hastings algorithm. In other words, I have sample:
set.seed(42)
y <- rnbinom(20, size = 3, prob = 0.2)
and I want to write algorithm that will estimate parameter of size and parameter of prob.
My work so far
I defined prior distribution of size as Poisson:
prior_r <- function(r) {
return(dpois(r, lambda = 2, log = T))
}
And prior distribution of prob as uniform on [0, 1]:
prior_prob <- function(prob) {
return(dunif(prob, min = 0, max = 1, log = T))
}
Moreover for simplicity I defined loglikelihood and joint probability functions:
loglikelihood <- function(data, r, prob) {
loglikelihoodValue <- sum(dnorm(data, mean = r, sd = prob, log = T))
return(loglikelihoodValue)
}
joint <- function(r, prob) {
data <- y
return(loglikelihood(data, r, prob) + prior_r(r) + prior_prob(prob))
}
Finally, the whole algorithm:
run_mcmc <- function(startvalue, iterations) {
chain <- array(dim = c(iterations + 1, 2))
chain[1, ] <- startvalue
for (i in 1:iterations) {
proposal_r <- rpois(1, lambda = chain[i, 1])
proposal_prob <- chain[i, 2] + runif(1, min = -0.2, max = 0.2)
quotient <- joint(proposal_r, proposal_prob) - joint(chain[i, 1], chain[i, 2])
if (runif(1, 0, 1) < min(1, exp(quotient))) chain[i + 1, ] <- c(proposal_r, proposal_prob)
else chain[i + 1, ] <- chain[i, ]
}
return(chain)
}
The problem
Problem that I'm having is that when I run it with starting values even very close to correct ones:
iterations <- 2000
startvalue <- c(4, 0.25)
res <- run_mcmc(startvalue, iterations)
I'll obtain posterior distribution which is obviously wrong. For example
> colMeans(res)
[1] 11.963018 0.994533
As you can see, size is located very close to point 12, and probability is located in point 1.
Do you know what's the cause of those phenomeons?
Change dnorm in loglikelihood to dnbinom and fix the proposal for prob so it doesn't go outside (0,1):
set.seed(42)
y <- rnbinom(20, size = 3, prob = 0.2)
prior_r <- function(r) {
return(dpois(r, lambda = 2, log = T))
}
prior_prob <- function(prob) {
return(dunif(prob, min = 0, max = 1, log = TRUE))
}
loglikelihood <- function(data, r, prob) {
loglikelihoodValue <- sum(dnbinom(data, size = r, prob = prob, log = TRUE))
return(loglikelihoodValue)
}
joint <- function(r, prob) {
return(loglikelihood(y, r, prob) + prior_r(r) + prior_prob(prob))
}
run_mcmc <- function(startvalue, iterations) {
chain <- array(dim = c(iterations + 1, 2))
chain[1, ] <- startvalue
for (i in 1:iterations) {
proposal_r <- rpois(1, lambda = chain[i, 1])
proposal_prob <- chain[i, 2] + runif(1, min = max(-0.2, -chain[i,2]), max = min(0.2, 1 - chain[i,2]))
quotient <- joint(proposal_r, proposal_prob) - joint(chain[i, 1], chain[i, 2])
if (runif(1, 0, 1) < min(1, exp(quotient))) {
chain[i + 1, ] <- c(proposal_r, proposal_prob)
} else {
chain[i + 1, ] <- chain[i, ]
}
}
return(chain)
}
iterations <- 2000
startvalue <- c(4, 0.25)
res <- run_mcmc(startvalue, iterations)
colMeans(res)
#> [1] 3.1009495 0.1988177

why random effect estiamator are not correct

I'm trying to simulate glmmLasso using a binomial data.
but random effect estiamator are not similar 5 that i given.
something wrong in my code?
if not, why random effect shown like that.
makedata <- function(I, J, p, sigmaB){
N <- I*J
# fixed effect generation
beta0 <- runif(1, 0, 1)
beta <- sort(runif(p, 0, 1))
# x generation
x <- matrix(runif(N*p, -1, 1), N, p)
# random effect generation
b0 <- rep(rnorm(I, 0, sigmaB), each=J)
# group
group <- as.factor(rep(1:I, each = J))
# y generation
k <- exp(-(beta0 + x %*% beta + b0))
y <- rbinom(n = length(k), size = 1, prob = (1/(1+k)))
#standardization
sx <- scale(x, center = TRUE, scale = TRUE)
simuldata <- data.frame(y = y, x = sx, group)
res <- list(simuldata=simuldata)
return(res)
}
# I : number of groups
I <- 20
# J : number of observation in group
J <- 10
# p : number of variables
p <- 20
# sigmaB : sd of random effect b0
sigmaB <- 5
set.seed(231233)
simdata <- makedata(I, J, p, sigmaB)
lam <- 10
xnam <- paste("x", 1:p, sep=".")
fmla <- as.formula(paste("y ~ ", paste(xnam, collapse= "+")))
glmm <- glmmLasso(fmla, rnd = list(group=~1), data = simdata, lambda = lam, control = list(scale = T, center = T))
summary(glmm)

Maximum likelihood estimation using a step function

I would like to fit a step function (two parameters) to some data. The code below is not doing the job. I wonder if the round() argument is the problem. However, I also tried to divide the parameters to make small (e.g. 0.001) changes in the parameters to cause significant changes. But that did not change the fit. Any idea how to properly fit this function to the data?
dat <- c(rbinom(100, 100, 0.95), rbinom(50, 100, 0.01), rbinom(100, 100, 0.95))
plot(dat/100)
stepFnc <- function(parms, t) {
par <- as.list(parms)
(c(rep(1-(1e-5), par$t1), rep(1e-5, par$t2), rep(1-(1e-5), t)))[1:t]
}
lines(stepFnc(c(t1 = 50, t2 = 50), length(dat)))
loglik <- function(t1 = 50, t2 = 50) {
fit <- snowStepCurve(parms = list(t1=round(t1,0), t2=round(t2,0)), t = length(dat))
lines(fit)
-sum(dbinom(x = dat, size = 100, prob = fit, log = T), na.rm = T)
}
mle <- bbmle::mle2(loglik)
mle#coef
lines(snowStepCurve(mle#coef, length(dat)), lwd = 2, lty = 2, col = "orange")
With discrete x data I'd do a brute-force approach:
x <- seq_along(dat)
foo <- function(x, lwr, upr) {
y <- x
y[x <= lwr | x > upr] <- mean(dat[x <= lwr | x > upr])
y[x > lwr & x <= upr] <- mean(dat[x > lwr & x <= upr])
y
}
SSE <- function(lwr, upr) {
sum((dat - foo(x, lwr, upr))^ 2)
}
limits <- expand.grid(lwr = x, upr = x)
limits <- limits[limits$lwr <= limits$upr,]
nrow(limits)
SSEvals <- mapply(SSE, limits$lwr, limits$upr)
id <- which(SSEvals == min(SSEvals))
optlims <- limits[id,]
meanouter <- mean(dat[x <= optlims$lwr | x > optlims$upr])
meaninner <- mean(dat[x > optlims$lwr & x <= optlims$upr])
bar <- function(x) {
y <- x
y[x <= optlims$lwr | x > optlims$upr] <- meanouter
y[x > optlims$lwr & x <= optlims$upr] <- meaninner
y
}
plot(dat/100)
curve(bar(x) / 100, add = TRUE)

p-value from fisher.test() does not match phyper()

The Fisher's Exact Test is related to the hypergeometric distribution, and I would expect that these two commands would return identical pvalues. Can anyone explain what I'm doing wrong that they do not match?
#data (variable names chosen to match dhyper() argument names)
x = 14
m = 20
n = 41047
k = 40
#Fisher test, alternative = 'greater'
(fisher.test(matrix(c(x, m-x, k-x, n-(k-x)),2,2), alternative='greater'))$p.value
#returns 2.01804e-39
#geometric distribution, lower.tail = F, i.e. P[X > x]
phyper(x, m, n, k, lower.tail = F, log.p = F)
#returns 5.115862e-43
In this case, the actual call to phyper that is relevant is phyper(x - 1, m, n, k, lower.tail = FALSE). Look at the source code for fisher.test relevant to your call of fisher.test(matrix(c(x, m-x, k-x, n-(k-x)),2,2), alternative='greater'). At line 138, PVAL is set to:
switch(alternative, less = pnhyper(x, or),
greater = pnhyper(x, or, upper.tail = TRUE),
two.sided = {
if (or == 0) as.numeric(x == lo) else if (or ==
Inf) as.numeric(x == hi) else {
relErr <- 1 + 10^(-7)
d <- dnhyper(or)
sum(d[d <= d[x - lo + 1] * relErr])
}
})
Since alternative = 'greater', PVAL is set to pnhyper(x, or, upper.tail = TRUE). You can see pnhyper defined on line 122. Here, or = 1, which is passed to ncp, so the call is phyper(x - 1, m, n, k, lower.tail = FALSE)
With your values:
x = 14
m = 20
n = 41047
k = 40
phyper(x - 1, m, n, k, lower.tail = FALSE)
# [1] 2.01804e-39

Resources