Finding the roots of equation system - r

I would like to use multiroot command in the rootSolve package to find b and m.
The code is given below.
I tried different starting values but the result was either NaN or NaNs produced.
n <- 23
model <- function(theta){
b <- theta[1]
m <- theta[2]
power <- exp(-(x-m)*b)
a <- -n/sum(log(1-power))
betat <- apply(x,1,function(x) (x-m)*power/(1-power))
mut <- apply(x,1, function(x) power/(1-power))
F1 <- n/b-sum(x)+n*m+(a-1)*sum(betat)
F2 <- n*b-b*(a-1)*sum(mut)
c(F1=F1,F2=F2)
}
multiroot(f = model, start = c(.5, .5))
So can someone explain me where the mistake is, please?

library(rootSolve)
x<- c(17.88,28.92,33,41.52,42.12,45.6,48.40,51.84,51.96,54.12,55.56,67.80,
68.64,68.64,68.88,84.12,93.12, 98.64,105.12,105.84,127.92,128.04,173.4)
n <- length(x)
model <- function(theta){
b <- theta[1]
m <- theta[2]
power <- exp(-(x-m)*b)
a <- -n/sum(log(1-power))
F1 <- n/b-sum(x-m) + (a-1)*sum((x-m)*power/(1-power))
F2 <- n*b - b*(a-1)*sum(power/(1-power))
c(F1=F1,F2=F2)
}
# model(c(b = 0.031, m = 4.748))
multiroot(f = model, start = c(.03, 5))
so the result is:
> multiroot(f = model, start = c(.03, 5))
$root
[1] 0.03140027 4.55976021
$f.root
F1 F2
-2.046363e-12 -6.217249e-15
$iter
[1] 5
$estim.precis
[1] 1.02629e-12

Related

Cannot make sense of the error while using OptimParallel in R

I'm trying to run the following function mentioned below using OptimParallel in R on a certain data set. The code is as follows:
install.packages("optimParallel")
install.packages('parallel')
library(parallel)
library(optimParallel)
library(doParallel)
library(data.table)
library(Rlab)
library(HDInterval)
library(mvtnorm)
library(matrixStats)
library(dplyr)
library(cold)
## Bolus data:
data("bolus")
d1 <- bolus
d1$group <- ifelse(d1$group == "2mg",1,0)
colnames(d1) <- c("index",'group',"time","y")
d2 <- d1 %>% select(index, y, group, time)
colnames(d2) <- c('index','y','x1','x2') ### Final data
## Modification of the objective function:
## Another approach:
dpd_poi <- function(x,fixed = c(rep(FALSE,5))){
params <- fixed
dpd_1 <- function(p){
params[!fixed] <- p
alpha <- params[1]
beta_0 <- params[2]
beta_1 <- params[3]
beta_2 <- params[4]
rho <- params[5]
add_pi <- function(d){
k <- beta_0+(d[3]*beta_1)+(d[4]*beta_2)
k1 <- exp(k) ## for Poisson regression
d <- cbind(d,k1)
}
dat_split <- split(x , f = x$index)
result <- lapply(dat_split, add_pi)
result <- rbindlist(result)
result <- as.data.frame(result)
colnames(result) <- c('index','y','x1','x2','lamb')
result_split <- split(result, f = result$index)
expression <- function(d){
bin <- as.data.frame(combn(d$y , 2))
pr <- as.data.frame(combn(d$lamb , 2))
## Evaluation of the probabilities:
f_jk <- function(u,v){
dummy_func <- function(x,y){
ppois(x, lambda = y)
}
dummy_func_1 <- function(x,y){
ppois(x-1, lambda = y)
}
k <- mapply(dummy_func,u,v)
k_1 <- mapply(dummy_func_1,u,v)
inv1 <- inverseCDF(as.matrix(k), pnorm)
inv2 <- inverseCDF(as.matrix(k_1), pnorm)
mean <- rep(0,2)
lower <- inv2
upper <- inv1
corr <- diag(2)
corr[lower.tri(corr)] <- rho
corr[upper.tri(corr)] <- rho
prob <- pmvnorm(lower = lower, upper = upper, mean = mean, corr = corr)
prob <- (1+(1/alpha))*(prob^alpha)
## First expression: (changes for Poisson regression)
lam <- as.vector(t(v))
v1 <- rpois(1000, lambda = lam[1])
v2 <- rpois(1000, lambda = lam[2])
all_possib <- as.data.frame(rbind(v1,v2))
new_func <- function(u){
k <- mapply(dummy_func,u,v)
k_1 <- mapply(dummy_func_1,u,v)
inv1_1 <- inverseCDF(as.matrix(k), pnorm)
inv2_1 <- inverseCDF(as.matrix(k_1), pnorm)
mean1 <- rep(0,2)
lower1 <- inv2_1
upper1 <- inv1_1
corr1 <- diag(2)
corr1[lower.tri(corr1)] <- rho
corr1[upper.tri(corr1)] <- rho
prob1 <- pmvnorm(lower = lower1, upper = upper1, mean = mean1, corr = corr1)
prob1 <- prob1^(alpha)
}
val <- apply(all_possib, 2, new_func)
val_s <- mean(val) ## approximation
return(val_s - prob)
}
final_res <- mapply(f_jk, bin, pr)
final_value <- sum(final_res)
}
u <- sapply(result_split,expression)
return(sum(u))
}
}
## run the objective function:
cl <- makeCluster(25)
setDefaultCluster(cl=cl)
clusterExport(cl,c('d2','val'))
clusterEvalQ(cl,c(library(data.table), library(Rlab),library(HDInterval),library(mvtnorm),library(matrixStats),library(dplyr),library(cold)))
val <- dpd_poi(d2, c(0.5,FALSE,FALSE,FALSE,FALSE))
optimParallel(par = c(beta_0 =1, beta_1 =0.1 ,beta_2 = 1,rho=0.2),fn = val ,method = "L-BFGS-B",lower = c(-10,-10,-10,0),upper = c(Inf,Inf,Inf,1))
stopCluster(cl)
After running for some time, it returns the following error:
checkForRemoteErrors(val)
9 nodes produced errors; first error: missing value where TRUE/FALSE needed
However, when I make a minor change in the objective function (pick 2 random numbers from rpois instead of 1000) and run the same code using optim, it converges and gives me a proper result. This is a Monte Carlo simulation and it does not make sense to draw so few Poisson variables. I have to use optimParllel, otherwise, it takes way too long to converge. I could also run this code using simulated data.
I'm unable to figure out where the issue truly lies. I truly appreciate any help in this regard.

R optim with a hidden restriction

Background
I'm doing empirical likelihood maximization, and hit an error. The statistics is a bit complex and I describe the error like following.
Problem
Suppose I have multivariate function with domain:
,
And I want to minimize the function in the domain.
Here is the counter plot for my function, the optimum seems to be x = y = 0.5 and z = 0.
if(!require(plotly)){
install.packages('plotly')
require(plotly)
}else{
require(plotly)
}
myfun = function(x,y){
z <- 1-x-y
# want to minimiaze f
f = (x - 0.26)^2 + (y-0.51)^2 + 100*(z+0.6)^2
return(f) # return f, so that optim can minimize f
}
plot_ly(
x = seq(0,1,length.out = 101),
y = seq(0,1,length.out = 101),
z = outer(seq(0,1,length.out = 101), seq(0,1,length.out = 101),FUN = myfun),
type = "contour"
)
My fail attempt
since z = 1-x-y, I tried with 2 parameters (x,y) and their boundry [0,1].
Code:
myobj <- function(para){
x <- para[1]
y <- para[2]
z <- 1-x-y
# want to minimiaze f
f = (x - 0.26)^2 + (y-0.51)^2 + 100*(z+1.6)^2
return(f) # return f, so that optim can minimize f
}
# initialization x=y=0, lower bound (0,0), upper bound (1,1), without restriction on z
optim(par = c(0,0),fn = myobj,method = "L-BFGS-B",lower = c(0,0),upper = c(1,1))
Output:
$par
[1] 1 1
$value
[1] 36.7877
$counts
function gradient
2 2
$convergence
[1] 0
$message
[1] "CONVERGENCE: NORM OF PROJECTED GRADIENT <= PGTOL"
The output shows that x=y=1 is the result, but when x=y=1, z = 1-1-1 = -1 does not in its domain.
I am wondering how can I put my restrition on z and get the right result,using function like optim?
Thanks a lot!
CVXR vs. constrOptim time comparation
library(microbenchmark)
library(CVXR)
method_const <- function(){
myobj <- function(para){
x <- para[1]
y <- para[2]
z <- 1 - x-y
# want to minimiaze f
f = (x - 0.26)^2 + (y-0.51)^2 + 100*(z+0.6)^2
return(f) # return f, so that optim can minimize f
}
res <- constrOptim(c(0.01,0.01), myobj, NULL,
ui = rbind(c(1,0),c(-1,0),c(0,1),c(0,-1),c(-1,-1)),
ci = c(0,-1,0,-1,-1))
return(round(c(res$par,res$value),4))
}
method_CVXR <-function(){
# declaration of variables x, y, and z
x <- Variable(1)
y <- Variable(1)
z <- Variable(1)
# setup constraints
cons <- list(sum(gg)==1,gg[1]>=0, gg[2]>=0, gg[3]>=0)
# formulate objective function
obj <- Minimize((gg[1] - 0.26)^2 + (gg[2]-0.51)^2 + 100*(gg[3]+0.6)^2)
problem <- Problem(obj,cons)
# solve the optimization problem
res <- solve(problem)
return(round(c(res$getValue(gg),res$value),4))
}
method_CVXR2 <-function(){
# declaration of variables x, y, and z
gg <- Variable(3)
# setup constraints
cons <- list(x+y+z==1,x>=0, y>=0, z>=0)
# formulate objective function
obj <- Minimize((x - 0.26)^2 + (y-0.51)^2 + 100*(z+0.6)^2)
problem <- Problem(obj,cons)
# solve the optimization problem
res <- solve(problem)
return(round(c(res$getValue(x),res$getValue(y),res$value),4))
}
time_res = microbenchmark(method_const(),method_CVXR(),method_CVXR2())
print(time_res)
ggplot2::autoplot(time_res)
It will be more natural to solve this as a quadratic programming problem. Package quadprog provides such a QP solver, except that the problem formulation is a bit clumsy.
# objective function as quadratic problem
D <- diag(c(1, 1, 100))
d <- c(0.26, 0.51, -60)
# equality and bound constraints
A <- rbind(c(1,1,1), diag(3))
b <- c(1, 0,0,0)
meq <- 1
s <- solve.QP(D, d, t(A), b, meq)
s$solution
## [1] 0.375 0.625 0.000
Time comparisons on my computer are as follows:
Unit: milliseconds
expr mean median
method_quadprog() 0.035 0.032
method_solnl() 1.696 1.037
method_fmincon() 1.677 1.092
method_constroptim() 2.130 1.644
method_CVXR() 113.590 97.924
REMARK: Please note that fmincon is just a wrapper for solnl.
The solver behind CVXR is one of the fastest available in R, but building the model is taking some time. That is why CVXR is not efficient for very small problems such as this one, but can be hundreds of times faster than others for quite large problems.
Here is the performance comparison among several approaches
> time_res
Unit: milliseconds
expr min lq mean median uq max neval
method_constroptim() 1.8112 1.86370 3.140725 1.97750 2.07470 11.8188 20
method_fmincon() 1.1804 1.22620 1.633585 1.37365 1.45635 7.0064 20
method_solnl() 1.0980 1.17495 2.165110 1.27700 1.40575 9.3543 20
method_CVXR() 111.4424 121.00940 155.573570 129.92280 149.25700 414.5042 20
where the code for benchmark is given as below
library(microbenchmark)
library(CVXR)
library(pracma)
library(NlcOptim)
# objective function for minimization
f <- function(v) {
x <- v[1]
y <- v[2]
z <- 1- x - y
r <- (x - 0.26)^2 + (y-0.51)^2 + 100*(z +0.6)^2
}
# constrOptim()
method_constroptim <- function(){
v0 <- c(0.5,0.5)
ui <- rbind(c(1,0),c(-1,0),c(0,1),c(0,-1),c(-1,-1))
ci <- c(0,-1,0,-1,-1)
constrOptim(c(0.01,0.01), f, NULL, ui = ui,ci = ci)
}
# fmincon() from "pracma" package
method_fmincon <-function(){
v0 <- c(0.5,0.5,0)
Aeq <- t(matrix(c(1,1,1)))
beq <- 1
lb <- c(0,0,0)
ub <- c(1,1,1)
fmincon(v0,f,Aeq = Aeq, beq = beq, lb = lb, ub = ub)
}
# solnl() from "NlcOptim" package
method_solnl <- function() {
v0 <- c(0.5,0.5,0)
Aeq <- t(matrix(c(1,1,1)))
beq <- 1
lb <- c(0,0,0)
ub <- c(1,1,1)
solnl(v0,f,Aeq = Aeq, Beq = beq, lb = lb, ub = ub)
}
# solnl() from "CVXR" package
method_CVXR <-function(){
x <- Variable(1)
y <- Variable(1)
cons <- list(x>=0, y>=0, x+y<=1)
obj <- Minimize((x - 0.26)^2 + (y-0.51)^2 + 100*(1-x-y +0.6)^2)
problem <- Problem(obj,cons)
solve(problem)
}
time_res = microbenchmark(method_constroptim(),
method_fmincon(),
method_solnl(),
method_CVXR(),
times = 20)

Error in Ordered Probit with Simulated Maximum Likelihood in R

I am 'trying' to program a ordered probit model with random effects with simulated maximum likelihood in R.
I have adapted a code by Chris Adolph (http://faculty.washington.edu/cadolph/?page=21)
set.seed(10234)
nobs <- 1000
x1 <- rnorm(nobs)*.15^.5
x2 <- rnorm(nobs)*.35^.5
z <- rnorm(nobs)*.25^.5
y <- round(runif(nobs, 1,5), 0)
x <- cbind(x1, x2)
#### Generate Halton Sequences
library("randtoolbox")
R <- 200
#a <- matrix(999, nrow=R, ncol=nobs)
a <- halton(n=nobs, dim=R, normal=T, init=T)
# Likelihood for 5 category ordered probit
llk.oprobit5 <- function(param, x, y) {
# preliminaries
x <- as.matrix(x)
os <- rep(1, nrow(x))
x <- cbind(os, x)
b <- param[1:ncol(x)]
t2 <- param[(ncol(x)+1)]
t3 <- param[(ncol(x)+2)]
t4 <- param[(ncol(x)+3)]
sigma_a <- param[ncol(x)+4]
# probabilities and penalty function
xb <- x %*% b %*% rep(1, R)
asigma <- a * sigma_a
p1 <- pnorm(- xb - asigma)
if (t2 <= 0) {
p2 <- -(abs(t2) * 10000) # penalty function to keep t2>0
} else {
p2 <- pnorm(t2 - xb - asigma) - pnorm(- xb - asigma)
}
if (t3 <= t2) {
p3 <- -((t2-t3)*10000) # penalty to keep t3>t2
} else {
p3 <- pnorm(t3 - xb - asigma) - pnorm(t2 - xb - asigma)
}
if (t4 <= t3) {
p4 <- -((t3 - t4) * 10000)
} else {
p4 <- pnorm(t4 - xb - asigma) - pnorm(t3 - xb - asigma)
}
p5 <- 1 - pnorm(t4 - xb - asigma)
p1 <- log(apply(p1, MARGIN=1, FUN=sum)/R)
p2 <- log(apply(p2, MARGIN=1, FUN=sum)/R)
p3 <- log(apply(p3, MARGIN=1, FUN=sum)/R)
p4 <- log(apply(p4, MARGIN=1, FUN=sum)/R)
p5 <- log(apply(p5, MARGIN=1, FUN=sum)/R)
# -1 * log likelihood (optim is a minimizer)
-sum(cbind(y==1, y==2, y==3, y==4, y==5) * cbind(p1, p2, p3, p4, p5))
}
# Use optim directly
ls.result <- lm(y~x) # use ls estimates as starting values
stval <- c(ls.result$coefficients,1,2,3,2) # initial guesses
oprobit.result <- optim(stval, llk.oprobit5, method="BFGS", x=x, y=y, hessian = T)
However, the code gave me the following error:
Error in apply(p3, MARGIN = 1, FUN = sum) :
dim(X) must have a positive length
Called from: apply(p3, MARGIN = 1, FUN = sum)
I already used the debug() function and I am able to run all functions separately and I can print the values in each step.
The problem is that the averaging over the Halton sequences that you are doing needs to be performed only if the respective parameter values are in the allowed ranges. Note that I moved the lines log(apply(…)) inside each respective if branch:
set.seed(10234)
nobs <- 1000
x1 <- rnorm(nobs)*.15^.5
x2 <- rnorm(nobs)*.35^.5
z <- rnorm(nobs)*.25^.5
y <- round(runif(nobs, 1,5), 0)
x <- cbind(x1, x2)
#### Generate Halton Sequences
library("randtoolbox")
R <- 200
a <- halton(n=nobs, dim=R, normal=T, init=T)
# Likelihood for 5 category ordered probit
llk.oprobit5 <- function(param, x, y) {
# preliminaries
x <- as.matrix(x)
os <- rep(1, nrow(x))
x <- cbind(os, x)
b <- param[1:ncol(x)]
t2 <- param[(ncol(x)+1)]
t3 <- param[(ncol(x)+2)]
t4 <- param[(ncol(x)+3)]
sigma_a <- param[ncol(x)+4]
# probabilities and penalty function
xb <- x %*% b %*% rep(1, R)
asigma <- a*sigma_a
p1 <- pnorm(-xb-asigma)
p1 <- log(apply(p1, MARGIN=1, FUN=sum)/R)
if (t2 <= 0) {
p2 <- -(abs(t2) * 10000) # penalty function to keep t2>0
} else {
p2 <- pnorm(t2-xb-asigma)-pnorm(-xb-asigma)
p2 <- log(apply(p2, MARGIN=1, FUN=sum)/R)
}
if (t3 <= t2) {
p3 <- -((t2-t3)*10000) # penalty to keep t3>t2
} else {
p3 <- pnorm(t3-xb-asigma)-pnorm(t2-xb-asigma)
p3 <- log(apply(p3, MARGIN=1, FUN=sum)/R)
}
if (t4 <= t3) {
p4 <- -((t3-t4)*10000)
} else {
p4 <- pnorm(t4-xb-asigma)-pnorm(t3-xb-asigma)
p4 <- log(apply(p4, MARGIN=1, FUN=sum)/R)
}
p5 <- 1 - pnorm(t4-xb-asigma)
p5 <- log(apply(p5, MARGIN=1, FUN=sum)/R)
# -1 * log likelihood (optim is a minimizer)
-sum(cbind(y==1,y==2,y==3,y==4, y==5) * cbind(p1,p2,p3,p4,p5))
}
# Use optim directly
ls.result <- lm(y~x) # use ls estimates as starting values
stval <- c(ls.result$coefficients,1,2,3,2) # initial guesses
oprobit.result <- optim(stval, llk.oprobit5, method="BFGS", x=x, y=y, hessian=T, control = list(trace = 10, REPORT = 1))
which then runs successfully:
...
iter 20 value 1567.966484
iter 21 value 1567.966434
iter 22 value 1567.966389
iter 23 value 1567.966350
iter 23 value 1567.966349
iter 23 value 1567.966345
final value 1567.966345
converged
producing the results:
pe <- oprobit.result$par # point estimates
vc <- solve(oprobit.result$hessian) # var-cov matrix
se <- sqrt(diag(vc)) # standard errors
ll <- -oprobit.result$value # likelihood at maximum
> pe
(Intercept) xx1 xx2
1.14039048 -0.05864677 0.04268965 0.77838577 1.43517145 2.23191376 0.02237956
> vc
(Intercept) xx1 xx2
(Intercept) 2.704159e-03 -7.945238e-05 4.507541e-07 1.520451e-03 1.970613e-03 2.215032e-03 9.274348e-04
xx1 -7.945238e-05 7.300705e-03 1.165960e-04 -9.066118e-06 -6.078438e-05 -1.046191e-04 -2.009612e-04
xx2 4.507541e-07 1.165960e-04 2.850668e-03 3.795273e-05 3.951004e-05 3.506606e-05 -2.686577e-04
1.520451e-03 -9.066118e-06 3.795273e-05 2.107875e-03 1.860727e-03 1.728905e-03 9.524469e-05
1.970613e-03 -6.078438e-05 3.951004e-05 1.860727e-03 2.955453e-03 2.576940e-03 1.960465e-04
2.215032e-03 -1.046191e-04 3.506606e-05 1.728905e-03 2.576940e-03 4.262996e-03 2.723117e-04
9.274348e-04 -2.009612e-04 -2.686577e-04 9.524469e-05 1.960465e-04 2.723117e-04 5.636931e-03
> se
(Intercept) xx1 xx2
0.05200153 0.08544417 0.05339165 0.04591160 0.05436408 0.06529162 0.07507950
> ll
[1] -1567.966

Writing my own MLE command in R is causing issues

I am just really getting into trying to write MLE commands in R that function and look similar to native R functions. In this attempt I am trying to do a simple MLE with
y=b0 + x*b1 + u
and
u~N(0,sd=s0 + z*s1)
However, even such a simple command I am having difficulty coding. I have written a similar command in Stata in a handful of lines
Here is the code I have written so far in R.
normalreg <- function (beta, sigma=NULL, data, beta0=NULL, sigma0=NULL,
con1 = T, con2 = T) {
# If a formula for sigma is not specified
# assume it is the same as the formula for the beta.
if (is.null(sigma)) sigma=beta
# Grab the call expression
mf <- match.call(expand.dots = FALSE)
# Find the position of each argument
m <- match(c("beta", "sigma", "data", "subset", "weights", "na.action",
"offset"), names(mf), 0L)
# Adjust names of mf
mf <- mf[c(1L, m)]
# Since I have two formulas I will call them both formula
names(mf)[2:3] <- "formula"
# Drop unused levels
mf$drop.unused.levels <- TRUE
# Divide mf into data1 and data2
data1 <- data2 <- mf
data1 <- mf[-3]
data2 <- mf[-2]
# Name the first elements model.frame which will be
data1[[1L]] <- data2[[1L]] <- as.name("model.frame")
data1 <- as.matrix(eval(data1, parent.frame()))
data2 <- as.matrix(eval(data2, parent.frame()))
y <- data1[,1]
data1 <- data1[,-1]
if (con1) data1 <- cbind(data1,1)
data2 <- unlist(data2[,-1])
if (con2) data2 <- cbind(data2,1)
data1 <- as.matrix(data1) # Ensure our data is read as matrix
data2 <- as.matrix(data2) # Ensure our data is read as matrix
if (!is.null(beta0)) if (length(beta0)!=ncol(data1))
stop("Length of beta0 need equal the number of ind. data2iables in the first equation")
if (!is.null(sigma0)) if (length(sigma0)!=ncol(data2))
stop("Length of beta0 need equal the number of ind. data2iables in the second equation")
# Set initial parameter estimates
if (is.null(beta0)) beta0 <- rep(1, ncol(data1))
if (is.null(sigma0)) sigma0 <- rep(1, ncol(data2))
# Define the maximization function
normMLE <- function(est=c(beta0,sigma0), data1=data1, data2=data2, y=y) {
data1est <- as.matrix(est[1:ncol(data1)], nrow=ncol(data1))
data2est <- as.matrix(est[(ncol(data1)+1):(ncol(data1)+ncol(data2))],
nrow=ncol(data1))
ps <-pnorm(y-data1%*%data1est,
sd=data2%*%data2est)
# Estimate a vector of log likelihoods based on coefficient estimates
llk <- log(ps)
-sum(llk)
}
results <- optim(c(beta0,sigma0), normMLE, hessian=T,
data1=data1, data2=data2, y=y)
results
}
x <-rnorm(10000)
z<-x^2
y <-x*2 + rnorm(10000, sd=2+z*2) + 10
normalreg(y~x, y~z)
At this point the biggest issue is finding an optimization routine that does not fail when the some of the values return NA when the standard deviation goes negative. Any suggestions? Sorry for the huge amount of code.
Francis
I include a check to see if any of the standard deviations are less than or equal to 0 and return a likelihood of 0 if that is the case. Seems to work for me. You can figure out the details of wrapping it into your function.
#y=b0 + x*b1 + u
#u~N(0,sd=s0 + z*s1)
ll <- function(par, x, z, y){
b0 <- par[1]
b1 <- par[2]
s0 <- par[3]
s1 <- par[4]
sds <- s0 + z*s1
if(any(sds <= 0)){
return(log(0))
}
preds <- b0 + x*b1
sum(dnorm(y, preds, sds, log = TRUE))
}
n <- 100
b0 <- 10
b1 <- 2
s0 <- 2
s1 <- 2
x <- rnorm(n)
z <- x^2
y <- b0 + b1*x + rnorm(n, sd = s0 + s1*z)
optim(c(1,1,1,1), ll, x=x, z=z,y=y, control = list(fnscale = -1))
With that said it probably wouldn't be a bad idea to parameterize the standard deviation in such a way that it is impossible to go negative...

Optimisation in R using Ucminf package

I am not able to apply ucminf function to minimise my cost function in R.
Here is my cost function:
costfunction <- function(X,y,theta){
m <- length(y);
J = 1/m * ((-t(y)%*%log(sigmoid(as.matrix(X)%*%as.matrix(theta)))) - ((1-t(y))%*%log(1-sigmoid(as.matrix(X)%*%as.matrix(theta)))))
}
Here is my sigmoid function:
sigmoid <- function(t){
g = 1./(1+exp(-t))
}
Here is my gradient function:
gradfunction <- function(X,y,theta){
grad = 1/ m * t(X) %*% (sigmoid(as.matrix(X) %*% as.matrix(theta) - y));
}
I am trying to do the following:
library("ucminf")
data <- read.csv("ex2data1.txt",header=FALSE)
X <<- data[,c(1,2)]
y <<- data[,3]
qplot(X[,1],X[,2],colour=factor(y))
m <- dim(X)[1]
n <- dim(X)[2]
X <- cbind(1,X)
initial_theta <<- matrix(0,nrow=n+1,ncol=1)
cost <- costfunction(X,y,initial_theta)
grad <- gradfunction(X,y,initial_theta)
This is where I want to call ucminf to find the minimum cost and values of theta. I am not sure how to do this.
Looks like you are trying to do the week2 problem of the machine learning course of Coursera.
No need to use ucminf packages here, you can simply use the R function optim it works
We will define the sigmoid and cost function first.
sigmoid <- function(z)
1 / (1 + exp(-z))
costFunction <- function(theta, X, y) {
m <- length(y)
J <- -(1 / m) * crossprod(c(y, 1 - y),
c(log(sigmoid(X %*% theta)), log(1 - sigmoid(X %*% theta))))
grad <- (1 / m) * crossprod(X, sigmoid(X %*% theta) - y)
list(J = J, grad = grad)
}
Let's load the data now, to make this code it reproductible, I put the data in my dropbox.
download.file("https://dl.dropboxusercontent.com/u/8750577/ex2data1.txt",
method = "curl", destfile = "/tmp/ex2data1.txt")
data <- matrix(scan('/tmp/ex2data1.txt', what = double(), sep = ","),
ncol = 3, byrow = TRUE)
X <- data[, 1:2]
y <- data[, 3, drop = FALSE]
m <- nrow(X)
n <- ncol(X)
X <- cbind(1, X)
initial_theta = matrix(0, nrow = n + 1)
We can then compute the result of the cost function at the initial theta like this
cost <- costFunction(initial_theta, X, y)
(grad <- cost$grad)
## [,1]
## [1,] -0.100
## [2,] -12.009
## [3,] -11.263
(cost <- cost$J)
## [,1]
## [1,] 0.69315
Finally we can use optim to ge the optimal theta
res <- optim(par = initial_theta,
fn = function(t) costFunction(t, X, y)$J,
gr = function(t) costFunction(t, X, y)$grad,
method = "BFGS", control = list(maxit = 400))
(theta <- res$par)
## [,1]
## [1,] -25.08949
## [2,] 0.20566
## [3,] 0.20089
(cost <- res$value)
## [1] 0.2035
If you have some problem with the function download.file, the data can be downloaded
here
As you did not provide a reproducible example it is hard to exactly give you the code you need, but the general idea is to hand the functions over to ucminf:
ucminf(start, costfunction, gradfunction, y = y, theta = initial_theta)
Note that start needs to be a vector of initial starting values which when handed over as X to the two functions need to produce a result. Usually you use random starting value (e.g., runif).

Resources