I have a function defined below named algor, which is translated from MATLAB to R. In order to make the function faster, I am using the foreach construct for the first time. I have the complete function code below:
library("ramify")
library("foreach")
algor <- function (vc) {
# initialize A, ybar, and Ia
A <- 0
ybar <- 0
Ia <- 0
# x is the first column of vc
x <- vc[, 1, drop = FALSE]
# y is the second column of vc
y <- vc[, 2, drop = FALSE]
# n is the length of x
n <- length(x)
foreach(i = 1:(n-1), .combine = 'c', .export = c("A", "ybar", "Ia", "x", "y")) %do% {
A <- A + 0.5 * (x[i] - x[i+1]) * (y[i] + y[i+1])
ybar <- ybar + (1 / 6) * (x[i] - x[i+1]) * (y[i] ^ 2 + y[i] * y[i+1] + y[i+1] ^ 2)
Ia <- Ia + (1 / 12) * (x[i] - x[i+1]) * (y[i] ^ 3 + y[i] ^ 2 * y[i+1] + y[i] * y[i+1] ^ 2 + y[i+1] ^ 3)
}
props <- mat("A, Ia, ybar", eval = TRUE)
return(props)
}
inner <- mat("0, 300; 300, 300; 300, 695; 0, 695; 0, 300")
algor(inner)
Although I have exported A, ybar, Ia, x, and y I am getting an error that the object A is not found, which is below:
Error in eval(parse(text = paste0("c(", paste0(char_vals, collapse = ","), :
object 'A' not found
Called from: eval(parse(text = paste0("c(", paste0(char_vals, collapse = ","),
")")))
How do I get foreach to recognize the defined objects: A, ybar, Ia, x, and y?
Thank you.
Try defining the .GlobalEnv variables within the foreach loop in every call.
library("ramify")
library("foreach")
algor <- function (vc) {
# initialize A, ybar, and Ia
A <- 0
ybar <- 0
Ia <- 0
# x is the first column of vc
x <- vc[, 1, drop = FALSE]
# y is the second column of vc
y <- vc[, 2, drop = FALSE]
# n is the length of x
n <- length(x)
foreach(i = 1:(n-1), .combine = 'c', .export = c("A", "ybar", "Ia", "x", "y")) %do% {
.GlobalEnv$A <- A
.GlobalEnv$ybar <- ybar
.GlobalEnv$Ia <- Ia
A <- A + 0.5 * (x[i] - x[i+1]) * (y[i] + y[i+1])
ybar <- ybar + (1 / 6) * (x[i] - x[i+1]) * (y[i] ^ 2 + y[i] * y[i+1] + y[i+1] ^ 2)
Ia <- Ia + (1 / 12) * (x[i] - x[i+1]) * (y[i] ^ 3 + y[i] ^ 2 * y[i+1] + y[i] * y[i+1] ^ 2 + y[i+1] ^ 3)
}
props <- mat("A, Ia, ybar", eval = TRUE)
return(props)
}
inner <- mat("0, 300; 300, 300; 300, 695; 0, 695; 0, 300")
algor(inner)
This returns:
[,1] [,2] [,3]
[1,] 118500 30870237500 58953750
Related
mR <- readRDS("mData_retake-kopi.r")
LL <- function(theta, mR){
if (alpha + beta != 1 && 0 > alpha && alpha < 1 && v < 2) {
cat("error: Constraint doesn't hold")
return(NULL)
}
alpha <- theta[1]
beta <- theta[2]
mu <- theta[3]
v <- theta[4]
N <- dim(mR)[1]
sig <- matrix(0,N,2)
sig[1] <- var(mR)
ll <- matrix(0,N-1)
for (n in seq(2,N)){
sig[n,1] <- alpha * mR[n-1] + beta * sig[n-1]
ll[n-1] <- (gamma( (v+1) / 2) / sqrt(v * pi) * gamma( v / 2 )) * ( 1 + (e^2 / v))^(-(v+1)/2)
}
return( -mean(ll) )
}
theta <- c(0.90, 0.10, 0, 5)
LL(theta,mR)
It returns
Error in matrix(0, nrow = unlist(N), ncol = 2) : non-numeric matrix
extent
Having an issue with mR.
I believe I have defined it by reading my data, but it keeps returning non-numeric matrix extent in the "sig" part.
I need to find the negative average log-likelihood function.
I am using mult.chart for SPC in an Rmarkdown file for a proof of concept. I just want to print the chart and leave out all the decompositions, xmv, covariance and t2.
when I use
t <- mult.chart(na.omit(test.data), type = "t2", Xmv = Xmv, S = S, colm = colm)
the object has everything but the chart.
> str(t)
List of 5
$ : chr "Hotelling Control Chart"
$ ucl : num 13.8
$ t2 : num [1:154, 1] 6.1 1.11 3.13 0.66 2.26 2.13 2.02 3.45 4.17 2.41 ...
$ Xmv : num [1:4] 130.9 94.8 957.4 490.1
$ covariance: num [1:4, 1:4] 320 11 130 1000 11 0.87 4.9 32 130 4.9 ...
How can I extract the chart out of it?
I updated the code of the function to add a ggplot chart to the output. I am posting the code below for everyone's benefit.
mult.chart2 <- function (type = c("chi", "t2", "mewma", "mcusum",
"mcusum2"), x, Xmv, S, colm, alpha = 0.01, lambda = 0.1,
k = 0.5, h = 5.5, phase = 1, method = "sw", ...)
{
type <- match.arg(type)
p <- ncol(x)
m <- nrow(x)
if (class(x) == "matrix" || class(x) == "data.frame")
(x <- array(data.matrix(x), c(m, p, 1)))
n <- dim(x)[3]
if (!missing(Xmv))
(phase <- 2)
x.jk <- matrix(0, m, p)
t2 <- matrix(0, m, 1)
x.jk <- apply(x, 1:2, mean)
if (missing(Xmv))
(Xmv <- colMeans(x.jk))
if (missing(S))
(S <- covariance(x, method = method))
if (missing(colm))
(colm <- nrow(x))
if (type == "chi") {
name <- paste("Chi-squared Control Chart")
for (ii in 1:m) {
t2[ii, 1] <- n * t(x.jk[ii, ] - Xmv) %*% solve(S) %*%
(x.jk[ii, ] - Xmv)
}
ucl <- qchisq(1 - alpha, p)
if (any(t2 > ucl)) {
cat("The following(s) point(s) fall outside the control limits")
t3 <- which(t2 > ucl)
print(t3)
}
}
if (type == "t2") {
name <- paste("Hotelling Control Chart")
for (ii in 1:m) {
t2[ii, 1] <- n * t(x.jk[ii, ] - Xmv) %*% solve(S) %*%
(x.jk[ii, ] - Xmv)
}
ifelse(n == 1, ifelse(phase == 1, ucl <- ((colm - 1)^2)/colm *
qbeta(1 - alpha, p/2, ((colm - p - 1)/2)), ucl <- ((p *
(colm + 1) * (colm - 1))/((colm^2) - colm * p)) *
qf(1 - alpha, p, colm - p)), ifelse(phase == 1, ucl <- (p *
(colm - 1) * (n - 1))/(colm * n - colm - p + 1) *
qf(1 - alpha, p, colm * n - colm - p + 1), ucl <- (p *
(colm + 1) * (n - 1))/(colm * n - colm - p + 1) *
qf(1 - alpha, p, colm * n - colm - p + 1)))
if (any(t2 > ucl)) {
cat("The following(s) point(s) fall outside of the control limits")
t3 <- which(t2 > ucl)
print(t3)
for (ii in 1:length(t3)) {
v = 1
k = 0
for (i in 1:p) {
k <- k + factorial(p)/(factorial(i) * factorial(p -
i))
}
q <- matrix(0, k, p + 3)
for (i in 1:p) {
a <- t(combn(p, i))
for (l in 1:nrow(a)) {
for (j in 1:ncol(a)) {
q[v, j + 3] <- a[l, j]
}
v = v + 1
}
}
for (i in 1:nrow(q)) {
b <- subset(q[i, 4:ncol(q)], q[i, 4:ncol(q)] >
0)
di <- length(b)
if (length(b) > 1) {
q[i, 1] <- n * t(Xmv[b] - x.jk[t3[ii], ][b]) %*%
solve(S[b, b]) %*% (Xmv[b] - x.jk[t3[ii],
][b])
}
else (q[i, 1] <- n * (x.jk[t3[ii], ][b] - Xmv[b])^2/S[b,
b])
ifelse(n == 1, ifelse(phase == 1, q[i, 2] <- ((colm -
1)^2)/colm * qbeta(1 - alpha, di/2, (((2 *
(colm - 1)^2)/(3 * colm - 4) - di - 1)/2)),
q[i, 2] <- ((di * (colm + 1) * (colm - 1))/((colm^2) -
colm * di)) * qf(1 - alpha, di, colm -
di)), ifelse(phase == 1, q[i, 2] <- (di *
(colm - 1) * (n - 1))/(colm * n - colm -
di + 1) * qf(1 - alpha, di, colm * n - colm -
di + 1), q[i, 2] <- (di * (colm + 1) * (n -
1))/(colm * n - colm - di + 1) * qf(1 - alpha,
di, colm * n - colm - di + 1)))
q[i, 3] <- 1 - pf(q[i, 1], di, colm - 1)
}
colnames(q) <- c("t2 decomp", "ucl",
"p-value", 1:p)
print(list(`Decomposition of` = t3[ii]))
print(round(q, 4))
}
}
}
if (type == "mewma") {
h4 <- matrix(c(8.6336, 9.6476, 10.083, 10.3114, 10.4405,
10.5152, 10.5581, 10.5816, 10.5932, 10.814, 11.8961,
12.3505, 12.5845, 12.7143, 12.788, 12.8297, 12.8524,
12.8635, 12.7231, 13.8641, 14.3359, 14.576, 14.7077,
14.7818, 14.8234, 14.846, 14.857, 14.5363, 15.7293,
16.217, 16.4629, 16.5965, 16.6711, 16.7127, 16.7352,
16.7463, 16.2634, 17.5038, 18.0063, 18.2578, 18.3935,
18.4687, 18.5105, 18.5331, 18.5442, 17.9269, 19.2113,
19.7276, 19.9845, 20.1223, 20.1982, 20.2403, 20.2631,
20.2743, 19.541, 20.8665, 21.396, 21.6581, 21.798,
21.8747, 21.9171, 21.9401, 21.9515, 21.1152, 22.4796,
23.0217, 23.2887, 23.4307, 23.5082, 23.551, 23.5742,
23.5858, 22.6565, 24.0579, 24.6119, 24.8838, 25.0278,
25.1062, 25.1493, 25.1728, 25.1846), nrow = 9)
rownames(h4) <- c(seq(0.1, 0.9, by = 0.1))
colnames(h4) <- c(1:9)
z <- matrix(0, m, p)
m1 <- rownames(h4)
m2 <- colnames(h4)
l <- lambda * 10
ucl <- h4[m1[l], m2[p - 1]]
name <- paste("MEWMA Control Chart")
for (i in 1:m) {
if (i == 1) {
z[i, ] <- lambda * (x.jk[i, ] - Xmv)
}
else {
z[i, ] <- lambda * (x.jk[i, ] - Xmv) + (1 - lambda) *
z[i - 1, ]
}
weig <- S * (lambda * (1 - ((1 - lambda)^(2 * i)))/(2 -
lambda))
t2[i, 1] <- t(z[i, ]) %*% solve(weig) %*% z[i, ]
}
}
if (type == "mcusum") {
name <- paste("MCUSUM Control Chart by Crosier (1988)")
ucl <- h
dif <- sweep(x.jk, 2, Xmv)
s <- matrix(0, m, p)
ci <- matrix(0, m, 1)
ci[1] <- sqrt(dif[1, ] %*% solve((S/n)) %*% dif[1, ])
if (ci[1] > k) {
s[1, ] <- (s[1, ] + dif[1, ]) * (1 - k/ci[1])
}
else (s[1, ] = matrix(0, ncol = p))
for (i in 2:m) {
ci[i, ] = sqrt((s[i - 1, ] + dif[i, ]) %*% solve(S/n) %*%
(s[i - 1, ] + dif[i, ]))
if (ci[i] > k) {
s[i, ] = (s[i - 1, ] + dif[i, ]) * (1 - k/ci[i])
}
else {
s[i, ] = matrix(0, ncol = p)
}
}
for (i in 1:m) {
t2[i] = sqrt(s[i, ] %*% solve((S/n)) %*% (s[i, ]))
}
}
if (type == "mcusum2") {
name <- paste("MCUSUM Control Chart by Pignatiello (1990)")
ucl <- h
dif <- sweep(x.jk, 2, Xmv)
s <- matrix(0, m, p)
l <- matrix(0, m, 1)
for (i in 1:m) {
if (i == 1) {
l[i, 1] <- 1
}
if (i > 1) {
if (t2[i - 1, 1] > 0) {
l[i, 1] <- l[i - 1, 1] + 1
}
else {
l[i, 1] <- 1
}
}
if (i == ((i - l[i, 1] + 1))) {
s[i, ] <- dif[i, ]
}
else {
s[i, ] <- colSums(dif[(i - l[i, 1] + 1):i, ])
}
t2[i, 1] <- max(0, (t(s[i, ]) %*% solve(S/n) %*%
s[i, ])^0.5 - k * l[i, 1])
}
}
t3 <- which(t2 > ucl)
# par(mar = c(4, 5, 3, 5))
# plot(t2, ylim = c(0, 1.1 * max(max(t2), ucl)), main = name,
# xlab = "Sample", ylab = expression(T^2), type = "o",
# las = 1)
# points(t3, t2[t3], col = 2)
# segments(0, ucl, m, ucl, col = 2)
# mtext(paste(" UCL=", round(ucl, 2)), side = 4, at = ucl,
# las = 2)
t2df <- data.frame(t2)
t2df$oob <- ifelse(t2df$t2 > ucl, "bad", "good")
t2df$sample <- seq(1:nrow(t2df))
p2 <- ggplot(data = t2df) +
geom_point(aes(x = sample, y = t2, color = oob)) +
scale_color_manual(values = c( "Red", "Grey20")) +
geom_path(aes(x = sample, y = t2))+
geom_hline(yintercept = ucl, color = "red") +
theme(legend.position = "none")
outList = list(
name,
ucl = round(ucl, 2),
t2 = round(t2, 2),
Xmv = round(Xmv, 2),
covariance = signif(S, 2),
plot2 = p2
)
return(outList)
}
I have trouble in writing cyclic coordinate search algorithm.
ccsfinal <- function(x, alpha, e, Errortolarence, f) {
k <- 0
alpha[1] <- 2
repeat {
for (j in 1:10) {
alpha[j] <- which.min(f(x[k] + (alpha[j]*e[j])))
x[k + 1] <- x[k] + alpha[j]*e[k]
}
k <- k + 1
if (abs(x[k] - x[k - 1]) < Errortolerance) {
return(x[k])
}
}
}
I try to work that algorithm with the variables:
ccsfinal(c(5, 5), 2, c(2, 2), 1e-5, realf)
realf <- function(x) {
(5*x[1] - x[2])^4 + (x[1] - 2)^2 + x[1] - 2*x[2] + 12
}
Where is my mistake anyone can help me?
I've been running the following code which returns the correct coefficients. However, no matter where I put a plot call, I can't get any plot output.
I'm not sure if a reproducible example is needed here, as I think this can be solved by looking at my gradientDescent function below? It's my first attempt at running this algorithm in R:
gradientDescent <- function(x, y, learn_rate, conv_threshold, n, max_iter) {
m <- runif(1, 0, 1)
c <- runif(1, 0, 1)
yhat <- m * x + c
cost_error <- (1 / (n + 2)) * sum((y - yhat) ^ 2)
converged = F
iterations = 0
while(converged == F) {
m_new <- m - learn_rate * ((1 / n) * (sum((yhat - y) * x)))
c_new <- c - learn_rate * ((1 / n) * (sum(yhat - y)))
m <- m_new
c <- c_new
yhat <- m * x + c
cost_error_new <- (1 / (n + 2)) * sum((y - yhat) ^ 2)
if(cost_error - cost_error_new <= conv_threshold) {
converged = T
}
iterations = iterations + 1
if(iterations > max_iter) {
converged = T
return(paste("Optimal intercept:", c, "Optimal slope:", m))
}
}
}
It's unclear what you have been doing that was ineffective. The base graphics functions plot and abline should be able to produce output even when used inside functions. Lattice and ggplot2 graphics are based on grid-grpahics and would therefore need a print() wrapped around the function calls to create output (as described in the R-FAQ). So try this:
gradientDescent <- function(x, y, learn_rate, conv_threshold, n, max_iter)
{ ## plot.new() perhaps not needed
plot(x,y)
m <- runif(1, 0, 1)
c <- runif(1, 0, 1)
yhat <- m * x + c
cost_error <- (1 / (n + 2)) * sum((y - yhat) ^ 2)
converged = F
iterations = 0
while(converged == F) {
m_new <- m - learn_rate * ((1 / n) * (sum((yhat - y) * x)))
c_new <- c - learn_rate * ((1 / n) * (sum(yhat - y)))
m <- m_new
c <- c_new
yhat <- m * x + c
cost_error_new <- (1 / (n + 2)) * sum((y - yhat) ^ 2)
if(cost_error - cost_error_new <= conv_threshold) {
converged = T
}
iterations = iterations + 1
if(iterations > max_iter) { abline( c, m) #calculated
dev.off()
converged = T
return(paste("Optimal intercept:", c, "Optimal slope:", m))
}
}
}
The cvm.test() from dgof package provides a way of doing the one-sample Cramer-von Mises test on discrete distributions, my goal is to develop a function that does the test for continuous distributions as well (like the Kolmogorov-Smirnov ks.test() from the stats package).
Note:this post is concerned only with fully specified df null hypothesis, so please no bootstraping or Monte Carlo Simulation here
> cvm.test
function (x, y, type = c("W2", "U2", "A2"), simulate.p.value = FALSE,
B = 2000, tol = 1e-08)
{
cvm.pval.disc <- function(STAT, lambda) {
x <- STAT
theta <- function(u) {
VAL <- 0
for (i in 1:length(lambda)) {
VAL <- VAL + 0.5 * atan(lambda[i] * u)
}
return(VAL - 0.5 * x * u)
}
rho <- function(u) {
VAL <- 0
for (i in 1:length(lambda)) {
VAL <- VAL + log(1 + lambda[i]^2 * u^2)
}
VAL <- exp(VAL * 0.25)
return(VAL)
}
fun <- function(u) return(sin(theta(u))/(u * rho(u)))
pval <- 0
try(pval <- 0.5 + integrate(fun, 0, Inf, subdivisions = 1e+06)$value/pi,
silent = TRUE)
if (pval > 0.001)
return(pval)
if (pval <= 0.001) {
df <- sum(lambda != 0)
est1 <- dchisq(STAT/max(lambda), df)
logf <- function(t) {
ans <- -t * STAT
ans <- ans - 0.5 * sum(log(1 - 2 * t * lambda))
return(ans)
}
est2 <- 1
try(est2 <- exp(nlm(logf, 1/(4 * max(lambda)))$minimum),
silent = TRUE)
return(min(est1, est2))
}
}
cvm.stat.disc <- function(x, y, type = c("W2", "U2", "A2")) {
type <- match.arg(type)
I <- knots(y)
N <- length(x)
e <- diff(c(0, N * y(I)))
obs <- rep(0, length(I))
for (j in 1:length(I)) {
obs[j] <- length(which(x == I[j]))
}
S <- cumsum(obs)
T <- cumsum(e)
H <- T/N
p <- e/N
t <- (p + p[c(2:length(p), 1)])/2
Z <- S - T
Zbar <- sum(Z * t)
S0 <- diag(p) - p %*% t(p)
A <- matrix(1, length(p), length(p))
A <- apply(row(A) >= col(A), 2, as.numeric)
E <- diag(t)
One <- rep(1, nrow(E))
K <- diag(0, length(H))
diag(K)[-length(H)] <- 1/(H[-length(H)] * (1 - H[-length(H)]))
Sy <- A %*% S0 %*% t(A)
M <- switch(type, W2 = E, U2 = (diag(1, nrow(E)) - E %*%
One %*% t(One)) %*% E %*% (diag(1, nrow(E)) - One %*%
t(One) %*% E), A2 = E %*% K)
lambda <- eigen(M %*% Sy)$values
STAT <- switch(type, W2 = sum(Z^2 * t)/N, U2 = sum((Z -
Zbar)^2 * t)/N, A2 = sum((Z^2 * t/(H * (1 - H)))[-length(I)])/N)
return(c(STAT, lambda))
}
cvm.pval.disc.sim <- function(STATISTIC, lambda, y, type,
tol, B) {
knots.y <- knots(y)
fknots.y <- y(knots.y)
u <- runif(B * length(x))
u <- sapply(u, function(a) return(knots.y[sum(a > fknots.y) +
1]))
dim(u) <- c(B, length(x))
s <- apply(u, 1, cvm.stat.disc, y, type)
s <- s[1, ]
return(sum(s >= STATISTIC - tol)/B)
}
type <- match.arg(type)
DNAME <- deparse(substitute(x))
if (is.stepfun(y)) {
if (length(setdiff(x, knots(y))) != 0) {
stop("Data are incompatable with null distribution; ",
"Note: This function is meant only for discrete distributions ",
"you may be receiving this error because y is continuous.")
}
tempout <- cvm.stat.disc(x, y, type = type)
STAT <- tempout[1]
lambda <- tempout[2:length(tempout)]
if (!simulate.p.value) {
PVAL <- cvm.pval.disc(STAT, lambda)
}
else {
PVAL <- cvm.pval.disc.sim(STAT, lambda, y, type,
tol, B)
}
METHOD <- paste("Cramer-von Mises -", type)
names(STAT) <- as.character(type)
RVAL <- list(statistic = STAT, p.value = PVAL, alternative = "Two.sided",
method = METHOD, data.name = DNAME)
}
else {
stop("Null distribution must be a discrete.")
}
class(RVAL) <- "htest"
return(RVAL)
}
<environment: namespace:dgof>
Kolmogorov-Smirnov ks.test() from stats package for comparison (note that this function does both the one-sample and two-sample tests):
> ks.test
function (x, y, ..., alternative = c("two.sided", "less", "greater"),
exact = NULL, tol = 1e-08, simulate.p.value = FALSE, B = 2000)
{
pkolmogorov1x <- function(x, n) {
if (x <= 0)
return(0)
if (x >= 1)
return(1)
j <- seq.int(from = 0, to = floor(n * (1 - x)))
1 - x * sum(exp(lchoose(n, j) + (n - j) * log(1 - x -
j/n) + (j - 1) * log(x + j/n)))
}
exact.pval <- function(alternative, STATISTIC, x, n, y, knots.y,
tol) {
ts.pval <- function(S, x, n, y, knots.y, tol) {
f_n <- ecdf(x)
eps <- min(tol, min(diff(knots.y)) * tol)
eps2 <- min(tol, min(diff(y(knots.y))) * tol)
a <- rep(0, n)
b <- a
f_a <- a
for (i in 1:n) {
a[i] <- min(c(knots.y[which(y(knots.y) + S >=
i/n + eps2)[1]], Inf), na.rm = TRUE)
b[i] <- min(c(knots.y[which(y(knots.y) - S >
(i - 1)/n - eps2)[1]], Inf), na.rm = TRUE)
f_a[i] <- ifelse(!(a[i] %in% knots.y), y(a[i]),
y(a[i] - eps))
}
f_b <- y(b)
p <- rep(1, n + 1)
for (i in 1:n) {
tmp <- 0
for (k in 0:(i - 1)) {
tmp <- tmp + choose(i, k) * (-1)^(i - k - 1) *
max(f_b[k + 1] - f_a[i], 0)^(i - k) * p[k +
1]
}
p[i + 1] <- tmp
}
p <- max(0, 1 - p[n + 1])
if (p > 1) {
warning("numerical instability in p-value calculation.")
p <- 1
}
return(p)
}
less.pval <- function(S, n, H, z, tol) {
m <- ceiling(n * (1 - S))
c <- S + (1:m - 1)/n
CDFVAL <- H(sort(z))
for (j in 1:length(c)) {
ifelse((min(abs(c[j] - CDFVAL)) < tol), c[j] <- 1 -
c[j], c[j] <- 1 - CDFVAL[which(order(c(c[j],
CDFVAL)) == 1)])
}
b <- rep(0, m)
b[1] <- 1
for (k in 1:(m - 1)) b[k + 1] <- 1 - sum(choose(k,
1:k - 1) * c[1:k]^(k - 1:k + 1) * b[1:k])
p <- sum(choose(n, 0:(m - 1)) * c^(n - 0:(m - 1)) *
b)
return(p)
}
greater.pval <- function(S, n, H, z, tol) {
m <- ceiling(n * (1 - S))
c <- 1 - (S + (1:m - 1)/n)
CDFVAL <- c(0, H(sort(z)))
for (j in 1:length(c)) {
if (!(min(abs(c[j] - CDFVAL)) < tol))
c[j] <- CDFVAL[which(order(c(c[j], CDFVAL)) ==
1) - 1]
}
b <- rep(0, m)
b[1] <- 1
for (k in 1:(m - 1)) b[k + 1] <- 1 - sum(choose(k,
1:k - 1) * c[1:k]^(k - 1:k + 1) * b[1:k])
p <- sum(choose(n, 0:(m - 1)) * c^(n - 0:(m - 1)) *
b)
return(p)
}
p <- switch(alternative, two.sided = ts.pval(STATISTIC,
x, n, y, knots.y, tol), less = less.pval(STATISTIC,
n, y, knots.y, tol), greater = greater.pval(STATISTIC,
n, y, knots.y, tol))
return(p)
}
sim.pval <- function(alternative, STATISTIC, x, n, y, knots.y,
tol, B) {
fknots.y <- y(knots.y)
u <- runif(B * length(x))
u <- sapply(u, function(a) return(knots.y[sum(a > fknots.y) +
1]))
dim(u) <- c(B, length(x))
getks <- function(a, knots.y, fknots.y) {
dev <- c(0, ecdf(a)(knots.y) - fknots.y)
STATISTIC <- switch(alternative, two.sided = max(abs(dev)),
greater = max(dev), less = max(-dev))
return(STATISTIC)
}
s <- apply(u, 1, getks, knots.y, fknots.y)
return(sum(s >= STATISTIC - tol)/B)
}
alternative <- match.arg(alternative)
DNAME <- deparse(substitute(x))
x <- x[!is.na(x)]
n <- length(x)
if (n < 1L)
stop("not enough 'x' data")
PVAL <- NULL
if (is.numeric(y)) {
DNAME <- paste(DNAME, "and", deparse(substitute(y)))
y <- y[!is.na(y)]
n.x <- as.double(n)
n.y <- length(y)
if (n.y < 1L)
stop("not enough 'y' data")
if (is.null(exact))
exact <- (n.x * n.y < 10000)
METHOD <- "Two-sample Kolmogorov-Smirnov test"
TIES <- FALSE
n <- n.x * n.y/(n.x + n.y)
w <- c(x, y)
z <- cumsum(ifelse(order(w) <= n.x, 1/n.x, -1/n.y))
if (length(unique(w)) < (n.x + n.y)) {
warning("cannot compute correct p-values with ties")
z <- z[c(which(diff(sort(w)) != 0), n.x + n.y)]
TIES <- TRUE
}
STATISTIC <- switch(alternative, two.sided = max(abs(z)),
greater = max(z), less = -min(z))
nm_alternative <- switch(alternative, two.sided = "two-sided",
less = "the CDF of x lies below that of y", greater = "the CDF of x lies above that of y")
if (exact && (alternative == "two.sided") && !TIES)
PVAL <- 1 - .C("psmirnov2x", p = as.double(STATISTIC),
as.integer(n.x), as.integer(n.y), PACKAGE = "dgof")$p
}
else if (is.stepfun(y)) {
z <- knots(y)
if (is.null(exact))
exact <- (n <= 30)
if (exact && n > 30) {
warning("numerical instability may affect p-value")
}
METHOD <- "One-sample Kolmogorov-Smirnov test"
dev <- c(0, ecdf(x)(z) - y(z))
STATISTIC <- switch(alternative, two.sided = max(abs(dev)),
greater = max(dev), less = max(-dev))
if (simulate.p.value) {
PVAL <- sim.pval(alternative, STATISTIC, x, n, y,
z, tol, B)
}
else {
PVAL <- switch(exact, `TRUE` = exact.pval(alternative,
STATISTIC, x, n, y, z, tol), `FALSE` = NULL)
}
nm_alternative <- switch(alternative, two.sided = "two-sided",
less = "the CDF of x lies below the null hypothesis",
greater = "the CDF of x lies above the null hypothesis")
}
else {
if (is.character(y))
y <- get(y, mode = "function")
if (mode(y) != "function")
stop("'y' must be numeric or a string naming a valid function")
if (is.null(exact))
exact <- (n < 100)
METHOD <- "One-sample Kolmogorov-Smirnov test"
TIES <- FALSE
if (length(unique(x)) < n) {
warning(paste("default ks.test() cannot compute correct p-values with ties;\n",
"see help page for one-sample Kolmogorov test for discrete distributions."))
TIES <- TRUE
}
x <- y(sort(x), ...) - (0:(n - 1))/n
STATISTIC <- switch(alternative, two.sided = max(c(x,
1/n - x)), greater = max(1/n - x), less = max(x))
if (exact && !TIES) {
PVAL <- if (alternative == "two.sided")
1 - .C("pkolmogorov2x", p = as.double(STATISTIC),
as.integer(n), PACKAGE = "dgof")$p
else 1 - pkolmogorov1x(STATISTIC, n)
}
nm_alternative <- switch(alternative, two.sided = "two-sided",
less = "the CDF of x lies below the null hypothesis",
greater = "the CDF of x lies above the null hypothesis")
}
names(STATISTIC) <- switch(alternative, two.sided = "D",
greater = "D^+", less = "D^-")
pkstwo <- function(x, tol = 1e-06) {
if (is.numeric(x))
x <- as.vector(x)
else stop("argument 'x' must be numeric")
p <- rep(0, length(x))
p[is.na(x)] <- NA
IND <- which(!is.na(x) & (x > 0))
if (length(IND)) {
p[IND] <- .C("pkstwo", as.integer(length(x[IND])),
p = as.double(x[IND]), as.double(tol), PACKAGE = "dgof")$p
}
return(p)
}
if (is.null(PVAL)) {
PVAL <- ifelse(alternative == "two.sided", 1 - pkstwo(sqrt(n) *
STATISTIC), exp(-2 * n * STATISTIC^2))
}
RVAL <- list(statistic = STATISTIC, p.value = PVAL, alternative = nm_alternative,
method = METHOD, data.name = DNAME)
class(RVAL) <- "htest"
return(RVAL)
}
<environment: namespace:dgof>