Related
I am trying to optimize a function in R using the nloptr package.
Here is the code:
library('nloptr')
hn <- function(x, n)
{
hret <- 0
if (n == 0)
{
hret <- 1
return (hret)
}
else if (n == 1)
{
hret <- 2*x
return (hret)
}
else
{
hn2 <- 1
hn1 <- 2*x
all_n <- seq(from = 2, to = n, by = 1)
for (ni in all_n)
{
hn = (2*x*hn1/sqrt(ni)) + (2*sqrt( (ni-1)/ni)*hn2)
#print(hn)
hn2 = hn1
hn1 = hn
}
hret <- hn
return (hret)
}
}
term <- function(alpha, r, theta, n)
{
beta = alpha*cosh(r) - Conj(alpha)*exp(1i*theta)*(sinh(r))
hnterm <- beta/(sqrt(exp(1i*theta)*sinh(2*r)))
term4 <- hn(hnterm, n)
logterm1 <- (1/2)*log(cosh(r))
logterm2 <- -((1/2)*(abs(alpha)^2)) + ((1/2)* (Conj(alpha)^2))*exp(1i*theta)*tanh(r)
logterm3 <- (n/2)*( log (((1/2)*exp(1i*theta)*tanh(r)) ))
logterm4 <- log ( term4)
logA <- logterm1 + logterm2 + logterm3 + logterm4
A <- exp(logA)
retval <- c(A)
return (A)
}
PESQ <- function(x, alpha)
{
p0 <- x[1]
p1 <- x[2]
beta <- x[3]
r <- x[4]
theta <- x[5]
N <- 30
NI <- seq(from = 0, to = N, by = 1)
elements <- rep(0+1i*0, length(NI))
elements_abs_sqr <- rep(0, length(NI))
pr <- rep(0, length(NI))
total <- 0 + 1i*0
for (n in NI)
{
w <-term(2*alpha + beta, r, theta, n)
elements[n+1] <- w
elements_abs_sqr[n+1] <-(abs(w)^2)
}
total <- sum(elements_abs_sqr)
for (n in NI)
{
pr[n+1] <- Re(elements[n+1]/sqrt(total))
pr[n+1] <- pr[n+1]^2
}
p_off_given_on <- pr[1]
elements <- rep(0+1i*0, length(NI))
elements_abs_sqr <- rep(0, length(NI))
pr <- rep(0, length(NI))
total <- 0 + 1i*0
for (n in NI)
{
w <-term(beta, r, theta, n)
elements[n+1] <- w
elements_abs_sqr[n+1] <-(abs(w)^2)
}
total <- sum(elements_abs_sqr)
for (n in NI)
{
pr[n+1] <- Re(elements[n+1]/sqrt(total))
pr[n+1] <- pr[n+1]^2
}
p_on_given_off = 1 - pr[1]
P_e = p0*p_off_given_on + p1*p_on_given_off
return(P_e)
}
eval_g_eq <- function(x)
{
return ( x[1] + x[2] - 1)
}
lb <- c(0, 0, -Inf, 0.001, -pi)
ub <- c(1, 1, Inf, Inf, pi)
local_opts <- list("algorithm" = "NLOPT_LD_MMA",
"xtol_rel"=1.0e-18)
# Set optimization options.
opts <- list("algorithm" = "NLOPT_LN_AUGLAG",
"xtol_rel" = 1.0e-18, "local_opts" = local_opts, "maxeval" = 10000)
x0 <- c(0.1,0.9, 0.1, 0.01, 0.7853982)
alpha <- 0.65
eval_g_ineq <- function(x)
{
return (c (- x[1] - x[2],
x[1] + x[2] - 1)
)
}
eval_f <- function(x)
{
ret = PESQ(x, alpha)
return(ret)
}
res <- nloptr ( x0 = x0,
eval_f = eval_f,
eval_g_eq = eval_g_eq,
eval_g_ineq = eval_g_ineq,
lb = lb,
ub = ub,
opts = opts )
print(res)
Upon running this code, I get the following error:
Error in nloptr(x0 = x0, eval_f = eval_f, eval_g_ineq = eval_g_ineq, eval_g_eq = eval_g_eq, :
STRING_ELT() can only be applied to a 'character vector', not a 'NULL'
Calls: ... withCallingHandlers -> withVisible -> eval -> eval -> nloptr
Execution halted
The weird thing, if I use "algorithm"="NLOPT_LN_COBYLA" in opts and I remove the equality constraint eval_g_eq in nloptr call, it runs fine and I get a solution. However, I need equality constraints for my work.
How should I fix the issue?
This is still a bit of a guess, but: the only possibility I can come up with is that using a derivative-based optimizer for your local optimizer at the same time as you use a derivative-free optimizer for the global solution (i.e., the NLopt docs clarify that LN in NLOPT_LN_AUGLAG denotes "local, derivative-free" whereas _LD_ would denote "local, derivative-based") is causing the problem? I got an answer (not sure if it's correct though!) by using "NLOPT_LN_COBYLA" as the algorithm in local_opts: with everything else as in your code,
local_opts <- list("algorithm" = "NLOPT_LN_COBYLA",
"xtol_rel"=1.0e-18)
# Set optimization options.
opts <- list("algorithm" = "NLOPT_LN_AUGLAG",
"xtol_rel" = 1.0e-18, "local_opts" = local_opts, "maxeval" = 10000)
print(res <- nloptr ( x0 = x0,
eval_f = eval_f,
eval_g_eq = eval_g_eq,
eval_g_ineq = eval_g_ineq,
lb = lb,
ub = ub,
opts = opts ))
Returns
Call:
nloptr(x0 = x0, eval_f = eval_f, lb = lb, ub = ub, eval_g_ineq = eval_g_ineq,
eval_g_eq = eval_g_eq, opts = opts)
Minimization using NLopt version 2.4.2
NLopt solver status: 3 ( NLOPT_FTOL_REACHED: Optimization stopped because
ftol_rel or ftol_abs (above) was reached. )
Number of Iterations....: 102
Termination conditions: xtol_rel: 1e-18 maxeval: 10000
Number of inequality constraints: 2
Number of equality constraints: 1
Optimal value of objective function: 2.13836819774604e-05
Optimal value of controls: 0 1 -0.0003556752 0.006520304 2.037835
As far as I can see this has done a plausible solution respecting the constraints:
the reason for stopping ("ftol_rel or ftol_abs ... was reached") is sensible
it used a reasonable number (102) of iterations to get there (and not maxeval)
eval_g_eq(res$solution) does give 0 (which we can also see by inspection, as the condition is x[1]+x[2]-1==0).
The inequality conditions are -x1-x2 and x1+x2-1; I'm not sure how the sign of these inequalities is defined/determined? The same as x0, i.e. assuming the initial conditions are feasible? (If x1+x2 is constrained to equal 1, I'm not sure why the inequality constraints here can ever do anything?)
eval_f(x0) is considerably larger than eval_f(res$solution) ...
I am trying to implement multinomial regression (mlogit or multinom package) in R with Codes and optim (Not using packages).
rm(list= ls())
data = read.table("~/Desktop/R Code/textfiles/keane.csv", sep = ",",header = T)
data1 = data[,c("educ","exper", "expersq", "black", "status")]
data1 = na.omit(data1)
data2 = as.matrix(data1)
y_1 = rep(0, nrow(data2))
y_2 = rep(0, nrow(data2))
y_3 = rep(0, nrow(data2))
data2 = cbind(data2[,1:5], y_1, y_2, y_3)
data2[,6] = ifelse(data2[,5] == 1, 1, 0)
data2[,7] = ifelse(data2[,5] == 2, 1, 0)
data2[,8] = ifelse(data2[,5] == 3, 1, 0)
int = rep(1, nrow(data2)) #intercept
data2 = cbind(int, data2[,c(1:4,6,7,8)])
X = as.matrix(data2[, c(1:5)])
y_1 = as.matrix(data2[, 6]) #replace y values(status = 1)
y_2 = as.matrix(data2[, 7]) #replace y values(status = 2)
y_3 = as.matrix(data2[, 8]) #replace y values(status = 3)
Y = cbind(y_1, y_2, y_3)
##beta
beta = solve(t(X) %*% X) %*% t(X) %*% Y #LPM coefficient
logit.nll = function (beta, X, Y) {
P = as.matrix(rowSums(exp(X %*% beta))); #Sum_(h=1)^3 exp(X * Beta_(h))
Pr_1 = exp(X %*% beta[,2])/(1 + P); #P(y = 2 | X)
Pr_2 = exp(X %*% beta[,3])/(1 + P); #P(y = 3 | X)
Pr_0 = 1/(1+P);#P(y = 1 | X)
(colSums(Y[,1] * log(Pr_0)) + colSums(Y[,2] * log(Pr_1)) + colSums(Y[,3] * log(Pr_2))) #log-likelihood
}
optim(beta, logit.nll, X = X, Y = Y, method = "BFGS")
when I do this code it gives me the message that "Error in X %*% beta : non-conformable arguments". My approach might be fundamentally wrong or the implementation of loglikelihood function is wrong. Can I get some help to fix this code?
Not very familiar with your svm optimization or what you are trying to do, the error you have comes with optim working with a vector. You need to coerce it into a matrix inside the function, let's say your data is like this:
set.seed(111)
data = iris
X = model.matrix(~.,data=data[,1:4])
Y = model.matrix(~0+Species,data=data)
beta = solve(t(X) %*% X) %*% t(X) %*% Y
Now we add the matrix part, also note the by default optim performs minimization (https://stat.ethz.ch/R-manual/R-devel/library/stats/html/optim.html) so you need to return the negative of loglikelihood:
logit.nll = function (beta, X, Y) {
beta = matrix(beta,ncol=3)
P = as.matrix(rowSums(exp(X %*% beta))); #Sum_(h=1)^3 exp(X * Beta_(h))
Pr_1 = exp(X %*% beta[,2])/(1 + P); #P(y = 2 | X)
Pr_2 = exp(X %*% beta[,3])/(1 + P); #P(y = 3 | X)
Pr_0 = 1/(1+P);#P(y = 1 | X)
LL = (colSums(Y[,1] * log(Pr_0)) + colSums(Y[,2] * log(Pr_1)) + colSums(Y[,3] * log(Pr_2))) #log-likelihood
print(LL)
return(-LL)
}
res = optim(beta, logit.nll, X = X, Y = Y, method = "BFGS")
res
$par
Speciessetosa Speciesversicolor Speciesvirginica
(Intercept) -2.085162 15.040679 -27.60634
Sepal.Length -4.649971 -8.971237 -11.43702
Sepal.Width -9.286757 -5.016616 -11.69764
Petal.Length 12.803070 17.125483 26.55641
Petal.Width 6.025760 3.342659 21.63200
Consider the following example of nonlinear optimization problem. The procedure is too slow to apply in simulation studies. For example, in case of my studies, it takes 2.5 hours for only one replication. How to speed up the process so that the processing time could also be optimized?
library(mvtnorm)
library(alabama)
n = 200
X <- matrix(0, nrow = n, ncol = 2)
X[,1:2] <- rmvnorm(n = n, mean = c(0,0), sigma = matrix(c(1,1,1,4),
ncol = 2))
x0 = matrix(c(X[1,1:2]), nrow = 1)
y0 = x0 - 0.5 * log(n) * (colMeans(X) - x0)
X = rbind(X, y0)
x01 = y0[1]
x02 = y0[2]
x1 = X[,1]
x2 = X[,2]
pInit = matrix(rep(0.1, n + 1), nrow = n + 1)
outopt = list(kkt2.check=FALSE, "trace" = FALSE)
f1 <- function(p) sum(sqrt(pmax(0, p)))/sqrt(n+1)
heq1 <- function(p) c(sum(x1 * p) - x01, sum(x2 * p) - x02, sum(p) - 1)
hin1 <- function(p) p - 1e-06
sol <- alabama::auglag(pInit, fn = function(p) -f1(p),
heq = heq1, hin = hin1,
control.outer = outopt)
-1 * sol$value
I was using the NLoptr package for solving an optimization problem of a 9 variables cost function using the program as:
function(x){return( list( "objective" = 0.0404*x[1]^2 + 4.4823*x[1] + 0.4762+0.024*x[2]^2 + 3.9767*x[2] + 0.3737+0.0246*x[3]^2 + 3.6992*x[3] + 0.9425+0.0214*x[4]^2 + 3.5896*x[4] + 0.7615+0.0266*x[5]^2 + 3.8197*x[5] + 0.2799+0.0262*x[6]^2 + 3.7884*x[6] + 0.307+0.0362*x[7]^2 + 4.4927*x[7] + 0.1549+0.0344*x[8]^2 + 4.4066*x[8] - 0.2472+0.0241*x[9]^2 + 4.227*x[9],"gradient" = c(2*0.0404*x[1]+4.4823, 2*0.024*x[2]+3.9767, 2*0.0246*x[3], 2*0.0214*x[4]+3.5896, 2*0.0266*x[5]+3.8197,2*0.0262*x[6]+3.7884,2*0.0362*x[7]+4.4927, 2*0.0344*x[8]+4.4066, 2*0.0241*x[9]+4.227)))}
function( x ) {
constr <- c(x[1] + x[2]+ x[3] + x[4]+x[5]+x[6]+x[7]+x[8]+x[9]-Balance)
grad <- c(1,1,1,1,1,1,1,1,1)
return( list( "constraints"=constr, "jacobian"=grad ) )
}
lb<-c(50,50,50,50,50,50,50,50,50)
ub<-c(0,0,0,0,0,0,0,0)
x_0<-c(25,25,25,25,25,25,25,25,25)
local_opts <- list( "algorithm" = "NLOPT_LD_MMA","xtol_rel" = 1.0e-9 )
opts <- list( "algorithm" = "NLOPT_LD_AUGLAG","xtol_rel" = 1.0e-9,"maxeval" = 10000, "local_opts" = local_opts )
res <- nloptr(x0=x_0, eval_f=eval_f,lb=lb,ub=ub,eval_g_eq=eval_g_eq,opts=opts)
The code works fine but the problem is that I need to solve this optimization for a period of 168h and each time step the lower bounds and upper bounds have to be different. Has anyone implemented this before?
BR
I highly suggest you to use OSQP for that. You can download it from CRAN. You can find an example for updating the problem vectors in the manual. I have rewritten it here:
library(Matrix)
# Define problem data in the form
# minimize (1/2) x' P x + q' x
# subject to l <= A x <= u
#
P <- Matrix(c(11., 0., 0., 0.), 2, 2, sparse = TRUE)
q <- c(3., 4.)
A <- Matrix(c(-1., 0., -1., 2., 3., 0., -1., -3., 5., 4.), 5, 2, sparse = TRUE)
u <- c(0., 0., -15., 100., 80)
l <- rep_len(-Inf, 5)
settings <- osqpSettings(verbose = FALSE)
model <- osqp(P, q, A, l, u, settings)
# Solve
res <- model$Solve()
# Get solution
x_opt <- res$x
# Define new vector
q_new <- c(10., 20.)
# Update model and solve again
model$Update(q = q_new)
res <- model$Solve()
# Get new solution
x_opt_new <- res$x
Disclamer: I am one of the OSQP authors.
The cvm.test() from dgof package provides a way of doing the one-sample Cramer-von Mises test on discrete distributions, my goal is to develop a function that does the test for continuous distributions as well (like the Kolmogorov-Smirnov ks.test() from the stats package).
Note:this post is concerned only with fully specified df null hypothesis, so please no bootstraping or Monte Carlo Simulation here
> cvm.test
function (x, y, type = c("W2", "U2", "A2"), simulate.p.value = FALSE,
B = 2000, tol = 1e-08)
{
cvm.pval.disc <- function(STAT, lambda) {
x <- STAT
theta <- function(u) {
VAL <- 0
for (i in 1:length(lambda)) {
VAL <- VAL + 0.5 * atan(lambda[i] * u)
}
return(VAL - 0.5 * x * u)
}
rho <- function(u) {
VAL <- 0
for (i in 1:length(lambda)) {
VAL <- VAL + log(1 + lambda[i]^2 * u^2)
}
VAL <- exp(VAL * 0.25)
return(VAL)
}
fun <- function(u) return(sin(theta(u))/(u * rho(u)))
pval <- 0
try(pval <- 0.5 + integrate(fun, 0, Inf, subdivisions = 1e+06)$value/pi,
silent = TRUE)
if (pval > 0.001)
return(pval)
if (pval <= 0.001) {
df <- sum(lambda != 0)
est1 <- dchisq(STAT/max(lambda), df)
logf <- function(t) {
ans <- -t * STAT
ans <- ans - 0.5 * sum(log(1 - 2 * t * lambda))
return(ans)
}
est2 <- 1
try(est2 <- exp(nlm(logf, 1/(4 * max(lambda)))$minimum),
silent = TRUE)
return(min(est1, est2))
}
}
cvm.stat.disc <- function(x, y, type = c("W2", "U2", "A2")) {
type <- match.arg(type)
I <- knots(y)
N <- length(x)
e <- diff(c(0, N * y(I)))
obs <- rep(0, length(I))
for (j in 1:length(I)) {
obs[j] <- length(which(x == I[j]))
}
S <- cumsum(obs)
T <- cumsum(e)
H <- T/N
p <- e/N
t <- (p + p[c(2:length(p), 1)])/2
Z <- S - T
Zbar <- sum(Z * t)
S0 <- diag(p) - p %*% t(p)
A <- matrix(1, length(p), length(p))
A <- apply(row(A) >= col(A), 2, as.numeric)
E <- diag(t)
One <- rep(1, nrow(E))
K <- diag(0, length(H))
diag(K)[-length(H)] <- 1/(H[-length(H)] * (1 - H[-length(H)]))
Sy <- A %*% S0 %*% t(A)
M <- switch(type, W2 = E, U2 = (diag(1, nrow(E)) - E %*%
One %*% t(One)) %*% E %*% (diag(1, nrow(E)) - One %*%
t(One) %*% E), A2 = E %*% K)
lambda <- eigen(M %*% Sy)$values
STAT <- switch(type, W2 = sum(Z^2 * t)/N, U2 = sum((Z -
Zbar)^2 * t)/N, A2 = sum((Z^2 * t/(H * (1 - H)))[-length(I)])/N)
return(c(STAT, lambda))
}
cvm.pval.disc.sim <- function(STATISTIC, lambda, y, type,
tol, B) {
knots.y <- knots(y)
fknots.y <- y(knots.y)
u <- runif(B * length(x))
u <- sapply(u, function(a) return(knots.y[sum(a > fknots.y) +
1]))
dim(u) <- c(B, length(x))
s <- apply(u, 1, cvm.stat.disc, y, type)
s <- s[1, ]
return(sum(s >= STATISTIC - tol)/B)
}
type <- match.arg(type)
DNAME <- deparse(substitute(x))
if (is.stepfun(y)) {
if (length(setdiff(x, knots(y))) != 0) {
stop("Data are incompatable with null distribution; ",
"Note: This function is meant only for discrete distributions ",
"you may be receiving this error because y is continuous.")
}
tempout <- cvm.stat.disc(x, y, type = type)
STAT <- tempout[1]
lambda <- tempout[2:length(tempout)]
if (!simulate.p.value) {
PVAL <- cvm.pval.disc(STAT, lambda)
}
else {
PVAL <- cvm.pval.disc.sim(STAT, lambda, y, type,
tol, B)
}
METHOD <- paste("Cramer-von Mises -", type)
names(STAT) <- as.character(type)
RVAL <- list(statistic = STAT, p.value = PVAL, alternative = "Two.sided",
method = METHOD, data.name = DNAME)
}
else {
stop("Null distribution must be a discrete.")
}
class(RVAL) <- "htest"
return(RVAL)
}
<environment: namespace:dgof>
Kolmogorov-Smirnov ks.test() from stats package for comparison (note that this function does both the one-sample and two-sample tests):
> ks.test
function (x, y, ..., alternative = c("two.sided", "less", "greater"),
exact = NULL, tol = 1e-08, simulate.p.value = FALSE, B = 2000)
{
pkolmogorov1x <- function(x, n) {
if (x <= 0)
return(0)
if (x >= 1)
return(1)
j <- seq.int(from = 0, to = floor(n * (1 - x)))
1 - x * sum(exp(lchoose(n, j) + (n - j) * log(1 - x -
j/n) + (j - 1) * log(x + j/n)))
}
exact.pval <- function(alternative, STATISTIC, x, n, y, knots.y,
tol) {
ts.pval <- function(S, x, n, y, knots.y, tol) {
f_n <- ecdf(x)
eps <- min(tol, min(diff(knots.y)) * tol)
eps2 <- min(tol, min(diff(y(knots.y))) * tol)
a <- rep(0, n)
b <- a
f_a <- a
for (i in 1:n) {
a[i] <- min(c(knots.y[which(y(knots.y) + S >=
i/n + eps2)[1]], Inf), na.rm = TRUE)
b[i] <- min(c(knots.y[which(y(knots.y) - S >
(i - 1)/n - eps2)[1]], Inf), na.rm = TRUE)
f_a[i] <- ifelse(!(a[i] %in% knots.y), y(a[i]),
y(a[i] - eps))
}
f_b <- y(b)
p <- rep(1, n + 1)
for (i in 1:n) {
tmp <- 0
for (k in 0:(i - 1)) {
tmp <- tmp + choose(i, k) * (-1)^(i - k - 1) *
max(f_b[k + 1] - f_a[i], 0)^(i - k) * p[k +
1]
}
p[i + 1] <- tmp
}
p <- max(0, 1 - p[n + 1])
if (p > 1) {
warning("numerical instability in p-value calculation.")
p <- 1
}
return(p)
}
less.pval <- function(S, n, H, z, tol) {
m <- ceiling(n * (1 - S))
c <- S + (1:m - 1)/n
CDFVAL <- H(sort(z))
for (j in 1:length(c)) {
ifelse((min(abs(c[j] - CDFVAL)) < tol), c[j] <- 1 -
c[j], c[j] <- 1 - CDFVAL[which(order(c(c[j],
CDFVAL)) == 1)])
}
b <- rep(0, m)
b[1] <- 1
for (k in 1:(m - 1)) b[k + 1] <- 1 - sum(choose(k,
1:k - 1) * c[1:k]^(k - 1:k + 1) * b[1:k])
p <- sum(choose(n, 0:(m - 1)) * c^(n - 0:(m - 1)) *
b)
return(p)
}
greater.pval <- function(S, n, H, z, tol) {
m <- ceiling(n * (1 - S))
c <- 1 - (S + (1:m - 1)/n)
CDFVAL <- c(0, H(sort(z)))
for (j in 1:length(c)) {
if (!(min(abs(c[j] - CDFVAL)) < tol))
c[j] <- CDFVAL[which(order(c(c[j], CDFVAL)) ==
1) - 1]
}
b <- rep(0, m)
b[1] <- 1
for (k in 1:(m - 1)) b[k + 1] <- 1 - sum(choose(k,
1:k - 1) * c[1:k]^(k - 1:k + 1) * b[1:k])
p <- sum(choose(n, 0:(m - 1)) * c^(n - 0:(m - 1)) *
b)
return(p)
}
p <- switch(alternative, two.sided = ts.pval(STATISTIC,
x, n, y, knots.y, tol), less = less.pval(STATISTIC,
n, y, knots.y, tol), greater = greater.pval(STATISTIC,
n, y, knots.y, tol))
return(p)
}
sim.pval <- function(alternative, STATISTIC, x, n, y, knots.y,
tol, B) {
fknots.y <- y(knots.y)
u <- runif(B * length(x))
u <- sapply(u, function(a) return(knots.y[sum(a > fknots.y) +
1]))
dim(u) <- c(B, length(x))
getks <- function(a, knots.y, fknots.y) {
dev <- c(0, ecdf(a)(knots.y) - fknots.y)
STATISTIC <- switch(alternative, two.sided = max(abs(dev)),
greater = max(dev), less = max(-dev))
return(STATISTIC)
}
s <- apply(u, 1, getks, knots.y, fknots.y)
return(sum(s >= STATISTIC - tol)/B)
}
alternative <- match.arg(alternative)
DNAME <- deparse(substitute(x))
x <- x[!is.na(x)]
n <- length(x)
if (n < 1L)
stop("not enough 'x' data")
PVAL <- NULL
if (is.numeric(y)) {
DNAME <- paste(DNAME, "and", deparse(substitute(y)))
y <- y[!is.na(y)]
n.x <- as.double(n)
n.y <- length(y)
if (n.y < 1L)
stop("not enough 'y' data")
if (is.null(exact))
exact <- (n.x * n.y < 10000)
METHOD <- "Two-sample Kolmogorov-Smirnov test"
TIES <- FALSE
n <- n.x * n.y/(n.x + n.y)
w <- c(x, y)
z <- cumsum(ifelse(order(w) <= n.x, 1/n.x, -1/n.y))
if (length(unique(w)) < (n.x + n.y)) {
warning("cannot compute correct p-values with ties")
z <- z[c(which(diff(sort(w)) != 0), n.x + n.y)]
TIES <- TRUE
}
STATISTIC <- switch(alternative, two.sided = max(abs(z)),
greater = max(z), less = -min(z))
nm_alternative <- switch(alternative, two.sided = "two-sided",
less = "the CDF of x lies below that of y", greater = "the CDF of x lies above that of y")
if (exact && (alternative == "two.sided") && !TIES)
PVAL <- 1 - .C("psmirnov2x", p = as.double(STATISTIC),
as.integer(n.x), as.integer(n.y), PACKAGE = "dgof")$p
}
else if (is.stepfun(y)) {
z <- knots(y)
if (is.null(exact))
exact <- (n <= 30)
if (exact && n > 30) {
warning("numerical instability may affect p-value")
}
METHOD <- "One-sample Kolmogorov-Smirnov test"
dev <- c(0, ecdf(x)(z) - y(z))
STATISTIC <- switch(alternative, two.sided = max(abs(dev)),
greater = max(dev), less = max(-dev))
if (simulate.p.value) {
PVAL <- sim.pval(alternative, STATISTIC, x, n, y,
z, tol, B)
}
else {
PVAL <- switch(exact, `TRUE` = exact.pval(alternative,
STATISTIC, x, n, y, z, tol), `FALSE` = NULL)
}
nm_alternative <- switch(alternative, two.sided = "two-sided",
less = "the CDF of x lies below the null hypothesis",
greater = "the CDF of x lies above the null hypothesis")
}
else {
if (is.character(y))
y <- get(y, mode = "function")
if (mode(y) != "function")
stop("'y' must be numeric or a string naming a valid function")
if (is.null(exact))
exact <- (n < 100)
METHOD <- "One-sample Kolmogorov-Smirnov test"
TIES <- FALSE
if (length(unique(x)) < n) {
warning(paste("default ks.test() cannot compute correct p-values with ties;\n",
"see help page for one-sample Kolmogorov test for discrete distributions."))
TIES <- TRUE
}
x <- y(sort(x), ...) - (0:(n - 1))/n
STATISTIC <- switch(alternative, two.sided = max(c(x,
1/n - x)), greater = max(1/n - x), less = max(x))
if (exact && !TIES) {
PVAL <- if (alternative == "two.sided")
1 - .C("pkolmogorov2x", p = as.double(STATISTIC),
as.integer(n), PACKAGE = "dgof")$p
else 1 - pkolmogorov1x(STATISTIC, n)
}
nm_alternative <- switch(alternative, two.sided = "two-sided",
less = "the CDF of x lies below the null hypothesis",
greater = "the CDF of x lies above the null hypothesis")
}
names(STATISTIC) <- switch(alternative, two.sided = "D",
greater = "D^+", less = "D^-")
pkstwo <- function(x, tol = 1e-06) {
if (is.numeric(x))
x <- as.vector(x)
else stop("argument 'x' must be numeric")
p <- rep(0, length(x))
p[is.na(x)] <- NA
IND <- which(!is.na(x) & (x > 0))
if (length(IND)) {
p[IND] <- .C("pkstwo", as.integer(length(x[IND])),
p = as.double(x[IND]), as.double(tol), PACKAGE = "dgof")$p
}
return(p)
}
if (is.null(PVAL)) {
PVAL <- ifelse(alternative == "two.sided", 1 - pkstwo(sqrt(n) *
STATISTIC), exp(-2 * n * STATISTIC^2))
}
RVAL <- list(statistic = STATISTIC, p.value = PVAL, alternative = nm_alternative,
method = METHOD, data.name = DNAME)
class(RVAL) <- "htest"
return(RVAL)
}
<environment: namespace:dgof>