Binary Logistic Regression with BFGS using package maxLik - r

I tried binary logistic regression with BFGS using maxlik, but i have included the feature as per the syntax i attached below, but the result is, but i get output like this
Maximum Likelihood estimation
BFGS maximization, 0 iterations
*Return code 100: Initial value out of range.
https://docs.google.com/spreadsheets/d/1fVLeJznB9k29FQ_BdvdCF8ztkOwbdFpx/edit?usp=sharing&ouid=109040212946671424093&rtpof=true&sd=true (this is my data)*
library(maxLik)
library(optimx)
data=read_excel("Book2.xlsx")
data$JKLaki = ifelse(data$JK==1,1,0)
data$Daerah_Samarinda<- ifelse(data$Daerah==1,1,0)
data$Prodi2 = ifelse(data$Prodi==2,1,0)
data$Prodi3 = ifelse(data$Prodi==3,1,0)
data$Prodi4 = ifelse(data$Prodi==4,1,0)
str(data)
attach(data)
ll<- function(param){
mu <- param[1]
beta <- param[-1]
y<- as.vector(data$Y)
x<- cbind(1, data$JKLaki, data$IPK, data$Daerah_Samarinda, data$Prodi2, data$Prodi3, data$Prodi4)
xb<- x%*%beta
pi<- exp(xb)
val <- -sum(y * log(pi) + (1 - y) * log(1 - pi),log=TRUE)
return(val)
}
gl<- funtion(param){
mu <- param[1]
beta <- param[-1]
y <- as.vector(data$Y)
x <- cbind(0, data$JKLaki,data$IPK,data$Daerah_Samarinda,data$Prodi2,data$Prodi3,data$Prodi4)
sigma <- x*beta
pi<- exp(sigma)/(1+exp(sigma))
v= y-pi
vx=as.matrix(x)%*%as.vector(v)
gg= colSums(vx)
return(-gg)}
mle<-maxLik(logLik=ll, grad=gl,hess=NULL,
start=c(mu=1, beta1=0, beta2=0, beta3=0, beta4=0, beta5=0, beta6=0,beta7=0), method="BFGS")
summary(mle)
can i get some help, i tired get this solution, please.

I have been able to optimize the log-likelihood with the following code :
library(DEoptim)
library(readxl)
data <- read_excel("Book2.xlsx")
data$JKLaki <- ifelse(data$JK == 1, 1, 0)
data$Daerah_Samarinda <- ifelse(data$Daerah == 1, 1, 0)
data$Prodi2 <- ifelse(data$Prodi == 2, 1, 0)
data$Prodi3 <- ifelse(data$Prodi == 3, 1, 0)
data$Prodi4 <- ifelse(data$Prodi == 4, 1, 0)
ll <- function(param, data)
{
mu <- param[1]
beta <- param[-1]
y <- as.vector(data$Y)
x <- cbind(1, data$JKLaki, data$IPK, data$Daerah_Samarinda, data$Prodi2, data$Prodi3, data$Prodi4)
xb <- x %*% beta
pi <- exp(mu + xb)
val <- -sum(y * log(pi) + (1 - y) * log(1 - pi))
if(is.nan(val) == TRUE)
{
return(10 ^ 30)
}else
{
return(val)
}
}
lower <- rep(-500, 8)
upper <- rep(500, 8)
obj_DEoptim_Iter1 <- DEoptim(fn = ll, lower = lower, upper = upper,
control = list(itermax = 5000), data = data)
lower <- obj_DEoptim_Iter1$optim$bestmem - 0.25 * abs(obj_DEoptim_Iter1$optim$bestmem)
upper <- obj_DEoptim_Iter1$optim$bestmem + 0.25 * abs(obj_DEoptim_Iter1$optim$bestmem)
obj_DEoptim_Iter2 <- DEoptim(fn = ll, lower = lower, upper = upper,
control = list(itermax = 5000), data = data)
obj_Optim <- optim(par = obj_DEoptim_Iter2$optim$bestmem, fn = ll, data = data)
$par
par1 par2 par3 par4 par5 par6 par7
-350.91045436 347.79576145 0.05337466 0.69032735 -0.01089112 0.47465162 0.38284804
par8
0.42125664
$value
[1] 95.08457
$counts
function gradient
501 NA
$convergence
[1] 1
$message
NULL

Related

Using `cor.test()` on ranked data

I would like to do a Spearman correlation test using rank data. How can I do this with cor.test()? I don't want the function to rerank the data.
Additionally, what form does the data need to be in? From the help, it seems to be the raw data as compared to a correlation matrix.
Consider this example
## Hollander & Wolfe (1973), p. 187f.
## Assessment of tuna quality. We compare the Hunter L measure of
## lightness to the averages of consumer panel scores (recoded as
## integer values from 1 to 6 and averaged over 80 such values) in
## 9 lots of canned tuna.
library(tidyverse)
A <- tibble(
x = c(44.4, 45.9, 41.9, 53.3, 44.7, 44.1, 50.7, 45.2, 60.1),
y = c( 2.6, 3.1, 2.5, 5.0, 3.6, 4.0, 5.2, 2.8, 3.8)
) %>%
mutate(rank_x = rank(x),
rank_y = rank(y)
)
Spearman's correlation coefficient is defined as Pearson's correlation between ranked variables
cor(A$x, A$y, method = "spearman")
#[1] 0.6
cor(A$rank_x, A$rank_y, method = "pearson")
#[1] 0.6
what about cor.test()? Can I use the rank data as its input?
x1 <- cor.test(A$x, A$y, method = "spearman")
x1
# Spearman's rank correlation rho
#
# data: A$x and A$y
# S = 48, p-value = 0.1
# alternative hypothesis: true rho is not equal to 0
# sample estimates:
# rho
# 0.6
x2 <- cor.test(A$rank_x, A$rank_y, method = "pearson")
x2
# Pearson's product-moment correlation
# data: A$rank_x and A$rank_y
# t = 2, df = 7, p-value = 0.09
# alternative hypothesis: true correlation is not equal to 0
# 95 percent confidence interval:
# -0.11 0.90
# sample estimates:
# cor
# 0.6
x3 <- cor.test(A$rank_x, A$rank_y, method = "spearman")
# Spearman's rank correlation rho
#
# data: A$rank_x and A$rank_y
# S = 48, p-value = 0.1
# alternative hypothesis: true rho is not equal to 0
# sample estimates:
# rho
# 0.6
Yes, you should use method = Spearman for ranked or original data. If rank data is used, the data is not reranked in the function.
As the help file implies, using method=Pearson with rank data conducts a Pearson's correlation test on the ranks, which would follow a t-distribution. However, since the ranks are not continuous variables, this approach is not correct.
getAnywhere(cor.test.default)
A single object matching ‘cor.test.default’ was found
It was found in the following places
registered S3 method for cor.test from namespace stats
namespace:stats
with value
function (x, y, alternative = c("two.sided", "less",
"greater"), method = c("pearson", "kendall",
"spearman"), exact = NULL, conf.level = 0.95, continuity = FALSE,
...)
{
alternative <- match.arg(alternative)
method <- match.arg(method)
DNAME <- paste(deparse1(substitute(x)), "and", deparse1(substitute(y)))
if (!is.numeric(x))
stop("'x' must be a numeric vector")
if (!is.numeric(y))
stop("'y' must be a numeric vector")
if (length(x) != length(y))
stop("'x' and 'y' must have the same length")
OK <- complete.cases(x, y)
x <- x[OK]
y <- y[OK]
n <- length(x)
NVAL <- 0
conf.int <- FALSE
if (method == "pearson") {
if (n < 3L)
stop("not enough finite observations")
method <- "Pearson's product-moment correlation"
names(NVAL) <- "correlation"
r <- cor(x, y)
df <- n - 2L
ESTIMATE <- c(cor = r)
PARAMETER <- c(df = df)
STATISTIC <- c(t = sqrt(df) * r/sqrt(1 - r^2))
if (n > 3) {
if (!missing(conf.level) && (length(conf.level) !=
1 || !is.finite(conf.level) || conf.level < 0 ||
conf.level > 1))
stop("'conf.level' must be a single number between 0 and 1")
conf.int <- TRUE
z <- atanh(r)
sigma <- 1/sqrt(n - 3)
cint <- switch(alternative, less = c(-Inf, z + sigma *
qnorm(conf.level)), greater = c(z - sigma * qnorm(conf.level),
Inf), two.sided = z + c(-1, 1) * sigma * qnorm((1 +
conf.level)/2))
cint <- tanh(cint)
attr(cint, "conf.level") <- conf.level
}
PVAL <- switch(alternative, less = pt(STATISTIC, df),
greater = pt(STATISTIC, df, lower.tail = FALSE),
two.sided = 2 * min(pt(STATISTIC, df), pt(STATISTIC,
df, lower.tail = FALSE)))
}
else {
if (n < 2)
stop("not enough finite observations")
PARAMETER <- NULL
TIES <- (min(length(unique(x)), length(unique(y))) <
n)
if (method == "kendall") {
method <- "Kendall's rank correlation tau"
names(NVAL) <- "tau"
r <- cor(x, y, method = "kendall")
ESTIMATE <- c(tau = r)
if (!is.finite(ESTIMATE)) {
ESTIMATE[] <- NA
STATISTIC <- c(T = NA)
PVAL <- NA
}
else {
if (is.null(exact))
exact <- (n < 50)
if (exact && !TIES) {
q <- round((r + 1) * n * (n - 1)/4)
STATISTIC <- c(T = q)
pkendall <- function(q, n) .Call(C_pKendall,
q, n)
PVAL <- switch(alternative, two.sided = {
if (q > n * (n - 1)/4) p <- 1 - pkendall(q -
1, n) else p <- pkendall(q, n)
min(2 * p, 1)
}, greater = 1 - pkendall(q - 1, n), less = pkendall(q,
n))
}
else {
xties <- table(x[duplicated(x)]) + 1
yties <- table(y[duplicated(y)]) + 1
T0 <- n * (n - 1)/2
T1 <- sum(xties * (xties - 1))/2
T2 <- sum(yties * (yties - 1))/2
S <- r * sqrt((T0 - T1) * (T0 - T2))
v0 <- n * (n - 1) * (2 * n + 5)
vt <- sum(xties * (xties - 1) * (2 * xties +
5))
vu <- sum(yties * (yties - 1) * (2 * yties +
5))
v1 <- sum(xties * (xties - 1)) * sum(yties *
(yties - 1))
v2 <- sum(xties * (xties - 1) * (xties - 2)) *
sum(yties * (yties - 1) * (yties - 2))
var_S <- (v0 - vt - vu)/18 + v1/(2 * n * (n -
1)) + v2/(9 * n * (n - 1) * (n - 2))
if (exact && TIES)
warning("Cannot compute exact p-value with ties")
if (continuity)
S <- sign(S) * (abs(S) - 1)
STATISTIC <- c(z = S/sqrt(var_S))
PVAL <- switch(alternative, less = pnorm(STATISTIC),
greater = pnorm(STATISTIC, lower.tail = FALSE),
two.sided = 2 * min(pnorm(STATISTIC), pnorm(STATISTIC,
lower.tail = FALSE)))
}
}
}
else {
method <- "Spearman's rank correlation rho"
if (is.null(exact))
exact <- TRUE
names(NVAL) <- "rho"
r <- cor(rank(x), rank(y))
ESTIMATE <- c(rho = r)
if (!is.finite(ESTIMATE)) {
ESTIMATE[] <- NA
STATISTIC <- c(S = NA)
PVAL <- NA
}
else {
pspearman <- function(q, n, lower.tail = TRUE) {
if (n <= 1290 && exact)
.Call(C_pRho, round(q) + 2 * lower.tail,
n, lower.tail)
else {
den <- (n * (n^2 - 1))/6
if (continuity)
den <- den + 1
r <- 1 - q/den
pt(r/sqrt((1 - r^2)/(n - 2)), df = n - 2,
lower.tail = !lower.tail)
}
}
q <- (n^3 - n) * (1 - r)/6
STATISTIC <- c(S = q)
if (TIES && exact) {
exact <- FALSE
warning("Cannot compute exact p-value with ties")
}
PVAL <- switch(alternative, two.sided = {
p <- if (q > (n^3 - n)/6) pspearman(q, n, lower.tail = FALSE) else pspearman(q,
n, lower.tail = TRUE)
min(2 * p, 1)
}, greater = pspearman(q, n, lower.tail = TRUE),
less = pspearman(q, n, lower.tail = FALSE))
}
}
}
RVAL <- list(statistic = STATISTIC, parameter = PARAMETER,
p.value = as.numeric(PVAL), estimate = ESTIMATE, null.value = NVAL,
alternative = alternative, method = method, data.name = DNAME)
if (conf.int)
RVAL <- c(RVAL, list(conf.int = cint))
class(RVAL) <- "htest"
RVAL
}
<bytecode: 0x0000018603fa9418>
<environment: namespace:stats>

Subscript out of bounds in R language

genBi <- function(rho, mu1, mu2, s1, s2){
library(MASS)
mu <- c(mu1, mu2) #mean
sigma <- matrix(c(s1^2, s1*s2*rho, s1*s2*rho, s2^2),
2) #covariance matrix
bvn1 <- mvrnorm(4000, mu = mu, Sigma = sigma )
colnames(bvn1) <- c("x","y")
return(bvn1)
}
#get samples from Z with sample size 20,
getSlice <- function(data){
Z <- seq(0, 1, length.out = 200) #initialize Z
for(i in 0:199){
temp <- data[i * 20 + 1 : (i + 1) * 20, ]
R <- cor(temp[,1], temp[,2])
Z[i] <- 0.5 * log((1 + R)/(1 - R))
}
return(Z)
}
data <- genBi(0.6, 1, 1, 2, 2)
Z <- getSlice(data)
Return with error Error in temp[, 2] : subscript out of bounds. Please help identify the problem!

Coverage probability for an unspecified CDF

I used the following r code to determine the coverage probability.
theta <- seq(0,1, length = 100)
CD_theta <- function(y, p, n){
1 - pbinom (y, size = n, prob = p) + 1/2*dbinom(y, size = n, prob = p)
}
y <- 5
n <- 100
phat <- y/n
mytheta <- CD_theta(5, theta, 100)
set.seed(650)
ci <- list()
n <- 100
B <- 1000
result = rep(NA, B)
all_confInt <- function(B) {
for (i in 1:B){
boot.sample <- sample(mytheta, replace = TRUE)
lower <- theta[which.min(abs(boot.sample - .025))]
upper <- theta[which.min(abs(boot.sample - .975))]
ci[[i]] <- data.frame(lowerCI = lower, upperCI = upper)
intervals <- unlist(ci)
}
return(intervals)
}
df <- data.frame(matrix(all_confInt(B), nrow=B, byrow=T))
colnames(df)[1] <- "Lower"
colnames(df)[2] <- "Upper"
names(df)
dim(df)
mean(df$Lower < phat & df$Upper > phat)*100
However, I obtained 6.4% which is too low. Why am I getting really lower percentage?. Is there any problem in the r function?

Predicting binary response probabilities from gamlss R object

I want to predict binary class probabilities/class labels from gamlss R function, how can the predict function be used to get them?
I have the following sample code
library(gamlss)
X1 <- rnorm(500)
X2 <- sample(c("A","C","D","E"),500, replace = TRUE)
Y <- ifelse(X1>0.2& X2=="A",1,0)
n <- 500
training <- sample(1:n, 400)
testing <- (1:n)[-training]
fit <- gamlss(Y[training]~pcat(X2[training],Lp=1)+ri(X1[training],Lp=1),family=BI())
pred <- predict(fit,newdata = data.frame(X1,X2)[testing,],type = "response")
Error in predict.gamlss(fit, newdata = data.frame(X1, X2)[testing, ], :
define the original data using the option data
Any idea?
You need to define the original data using the data option of gamlss:
library(gamlss)
set.seed(1)
n <- 500
X1 <- rnorm(n)
X2 <- sample(c("A","C","D","E"), n, replace = TRUE)
Y <- ifelse(X1>0.2 & X2=="A", 1, 0)
dtset <- data.frame(X1, X2, Y)
training <- sample(1:n, 400)
XYtrain <- dtset[training,]
XYtest <- dtset[-training,]
fit <- gamlss(Y ~ pcat(X2, Lp=1) + ri(X1, Lp=1), family=BI(), data=XYtrain)
pred <- predict(fit, type="response", newdata=XYtest)
Unfortunately, predict now generates a new error message:
Error in if (p != ap) stop("the dimensions of the penalty matrix and
of the design matrix are incompatible") : argument is of length
zero
This problem can be solved modifying the gamlss.ri function used by predict.gamlss:
gamlss.ri <- function (x, y, w, xeval = NULL, ...)
{
regpen <- function(sm, D, P0, lambda) {
for (it in 1:iter) {
RD <- rbind(R, sqrt(lambda) * sqrt(omega.) * D)
svdRD <- svd(RD)
rank <- sum(svdRD$d > max(svdRD$d) * .Machine$double.eps^0.8)
np <- min(p, N)
U1 <- svdRD$u[1:np, 1:rank]
y1 <- t(U1) %*% Qy
beta <- svdRD$v[, 1:rank] %*% (y1/svdRD$d[1:rank])
dm <- max(abs(sm - beta))
sm <- beta
omega. <- c(1/(abs(sm)^(2 - Lp) + kappa^2))
if (dm < c.crit)
break
}
HH <- (svdRD$u)[1:p, 1:rank] %*% t(svdRD$u[1:p, 1:rank])
edf <- sum(diag(HH))
fv <- X %*% beta
row.names(beta) <- namesX
out <- list(fv = fv, beta = beta, edf = edf, omega = omega.)
}
fnGAIC <- function(lambda, k) {
fit <- regpen(sm, D, P0, lambda)
fv <- fit$fv
GAIC <- sum(w * (y - fv)^2) + k * fit$edf
GAIC
}
X <- if (is.null(xeval))
as.matrix(attr(x, "X"))
else as.matrix(attr(x, "X"))[seq(1, length(y)), , drop=FALSE] # Added drop=FALSE
namesX <- as.character(attr(x, "namesX"))
D <- as.matrix(attr(x, "D"))
order <- as.vector(attr(x, "order"))
lambda <- as.vector(attr(x, "lambda"))
df <- as.vector(attr(x, "df"))
Lp <- as.vector(attr(x, "Lp"))
kappa <- as.vector(attr(x, "kappa"))
iter <- as.vector(attr(x, "iter"))
k <- as.vector(attr(x, "k"))
c.crit <- as.vector(attr(x, "c.crit"))
method <- as.character(attr(x, "method"))
gamlss.env <- as.environment(attr(x, "gamlss.env"))
startLambdaName <- as.character(attr(x, "NameForLambda"))
N <- sum(w != 0)
n <- nrow(X)
p <- ncol(X)
aN <- nrow(D)
ap <- ncol(D)
qrX <- qr(sqrt(w) * X, tol = .Machine$double.eps^0.8)
R <- qr.R(qrX)
Q <- qr.Q(qrX)
Qy <- t(Q) %*% (sqrt(w) * y)
if (p != ap)
stop("the dimensions of the penalty matrix and of the design matrix are incompatible")
P0 <- diag(p) * 1e-06
sm <- rep(0, p)
omega. <- rep(1, p)
tau2 <- sig2 <- NULL
lambdaS <- get(startLambdaName, envir = gamlss.env)
if (lambdaS >= 1e+07)
lambda <- 1e+07
if (lambdaS <= 1e-07)
lambda <- 1e-07
if (is.null(df) && !is.null(lambda) || !is.null(df) && !is.null(lambda)) {
fit <- regpen(sm, D, P0, lambda)
fv <- fit$fv
}
else if (is.null(df) && is.null(lambda)) {
lambda <- lambdaS
switch(method, ML = {
for (it in 1:20) {
fit <- regpen(sm, D, P0, lambda)
gamma. <- D %*% as.vector(fit$beta) * sqrt(fit$omega)
fv <- X %*% fit$beta
sig2 <- sum(w * (y - fv)^2)/(N - fit$edf)
tau2 <- sum(gamma.^2)/(fit$edf - order)
lambda.old <- lambda
lambda <- sig2/tau2
if (abs(lambda - lambda.old) < 1e-04 || lambda >
1e+05) break
}
}, GAIC = {
lambda <- nlminb(lambda, fnGAIC, lower = 1e-07, upper = 1e+07,
k = k)$par
fit <- regpen(sm, D, P0, lambda)
fv <- fit$fv
assign(startLambdaName, lambda, envir = gamlss.env)
}, )
}
else {
edf1_df <- function(lambda) {
edf <- sum(1/(1 + lambda * UDU$values))
(edf - df)
}
Rinv <- solve(R)
S <- t(D) %*% D
UDU <- eigen(t(Rinv) %*% S %*% Rinv)
lambda <- if (sign(edf1_df(0)) == sign(edf1_df(1e+05)))
1e+05
else uniroot(edf1_df, c(0, 1e+05))$root
fit <- regpen(sm, D, P0, lambda)
fv <- fit$fv
}
waug <- as.vector(c(w, rep(1, nrow(D))))
xaug <- as.matrix(rbind(X, sqrt(lambda) * D))
lev <- hat(sqrt(waug) * xaug, intercept = FALSE)[1:n]
var <- lev/w
coefSmo <- list(coef = fit$beta, lambda = lambda, edf = fit$edf,
sigb2 = tau2, sige2 = sig2, sigb = if (is.null(tau2)) NA else sqrt(tau2),
sige = if (is.null(sig2)) NA else sqrt(sig2), fv = as.vector(fv),
se = sqrt(var), Lp = Lp)
class(coefSmo) <- "ri"
if (is.null(xeval)) {
list(fitted.values = as.vector(fv), residuals = y - fv,
var = var, nl.df = fit$edf - 1, lambda = lambda,
coefSmo = coefSmo)
}
else {
ll <- dim(as.matrix(attr(x, "X")))[1]
nx <- as.matrix(attr(x, "X"))[seq(length(y) + 1, ll),
]
pred <- drop(nx %*% fit$beta)
pred
}
}
# Replace "gamlss.ri" in the package "gamlss"
assignInNamespace("gamlss.ri", gamlss.ri, pos="package:gamlss")
pred <- predict(fit, type="response", newdata=XYtest)
print(head(pred))
# [1] 2.220446e-16 2.220446e-16 2.220446e-16 4.142198e-12 2.220446e-16 2.220446e-16

append values by dataframe row in a loop

I'm running models with various initial values, and I'm trying to append values (3 estimators) by rows to a dataframe in a loop. I assign values to estimators within the loop, but I can't recall them to produce a dataframe.
My code: f is the model for the estimation. Three parameters: alpha, rho, and lambda in the model. I want to output these 3 values.
library("maxLik")
f <- function(param) {
alpha <- param[1]
rho <- param[2]
lambda <- param[3]
u <- 0.5 * (dataset$v_50_1)^alpha - 0.5 * lambda * (dataset$v_50_2)^alpha
p <- 1/(1 + exp(-rho * u))
logl <- sum(dataset$gamble * log(p) + (1 - dataset$gamble) * log(1 - p))
}
df <- data.frame(alpha = numeric(), rho = numeric(), lambda = numeric())
for (j in 1:20) {
tryCatch({
ml <- maxLik(f, start = c(alpha = runif(1, 0, 2), rho = runif(1, 0, 4), lambda = runif(1,
0, 10)), method = "NM")
alpha[j] <- ml$estimate[1]
rho[j] <- ml$estimate[2]
lambda[j] <- ml$estimate[3]
}, error = function(e) {NA})
}
output <- data.frame(alpha, rho, lambda)
error occurs:
Error in data.frame(alpha, rho, lambda) : object 'alpha' not found
Expected output
alpha rho lambda
0.4 1 2 # estimators append by row.
0.6 1.1 3 # each row has estimators that are estimated
0.7 1.5 4 # by one set of initial values, there are 20
# rows, as the estimation loops for 20 times.
I am running an example, by changing the function f
library("maxLik")
t <- rexp(100, 2)
loglik <- function(theta) log(theta) - theta*t
df <- data.frame(alpha = numeric(), rho = numeric(), lambda = numeric())
for (j in 1:20){
tryCatch({
ml <- maxLik(loglik, start = c(alpha = runif(1, 0, 2), rho = runif(1, 0, 4),
lambda = runif(1, 0, 10)), method = "NM")
df <- rbind(df, data.frame(alpha = ml$estimate[1],
rho = ml$estimate[2],
lambda = ml$estimate[3]))
# I tried to append values for each column
}, error = function(e) {NA})}
> row.names(df) <- NULL
> head(df)
alpha rho lambda
1 2.368739 2.322220 2.007375
2 2.367607 2.322328 2.007093
3 2.368324 2.322105 2.007597
4 2.368515 2.322072 2.007334
5 2.368269 2.322071 2.007142
6 2.367998 2.322438 2.007391

Resources