Related
How do you compute this probability density function, with a triangular distribution of parameters (a,b,c)?
f(x)= 0 , x<a
2(x-a)/((b-a)(c-a)) , a <= x <= c
2(b-x)/((b-a)(b-c)) , c < x <=b
0 , x> b
Expanding on #StéphaneLaurent‘s comments, you can define a piecewise function using a series of ifelse() calls (or dplyr::case_when()).
f <- function(x, A, B, C) {
out <- ifelse(
x < A | B < x,
0,
ifelse(
A <= x & x <= C,
(2*(x-A))/((B-A)*(C-A)),
ifelse(
C < x & x <= B,
(2*(B-x))/((B-A)*(B-C)),
NA_real_
)))
if (any((is.na(out) | is.nan(out)) & (!is.na(x) & !is.nan(x)))) {
warning("f(x) undefined for some input values")
}
out
}
Taking it for a spin:
library(ggplot2)
dat <- expand.grid(
x = seq(-1.5, 1.5, by = 0.1),
A = -1:1,
B = -1:1,
C = -1:1
)
dat$y <- with(dat, f(x, A, B, C))
# Warning message:
# In f(x, A, B, C) : f(x) undefined for some input values
ggplot(dat, aes(x, y)) +
geom_line(aes(color = factor(C))) +
facet_grid(B ~ A, labeller = label_both)
Here is an option for a density function and a random function.
dtri <- function(x, A, B, C) {
n <- length(x)
i <- 1:n
if (length(A) == 1) A <- rep(A, n)
if (length(B) == 1) B <- rep(B, n)
if (length(C) == 1) C <- rep(C, n)
abc <- Rfast::rowSort(matrix(c(A, B, C), n, 3))
bln <- x < abc[,2]
p <- 2*abs(x - abc[i + 2*n*!bln])/(abc[,3] - abc[,1])/(abc[i + n*(2 - bln)] - abc[i + n*(1 - bln)])
p[x < abc[,1] | x > abc[,3]] <- 0
p
}
rtri <- function(n, a, b, c) {
if (a > b) {a <- (b - a) + (b <- a)}
if (b > c) {c <- (b - c) + (b <- c)}
fb <- (b - a)/(c - a)
U <- runif(n)
blna <- U < fb
r <-numeric(n)
r[blna] <- a + sqrt(U[blna]*(c - a)*(b - a))
r[!blna] <- c - sqrt((1 - U[!blna])*(c - a)*(c - b))
r
}
Using R's vectorized cabilities:
my_fun <- function(x, a, b, c) {
i1 <- x >= a & x <= c
i2 <- x > c & x <= b
f <- rep(NA, length(x))
f[x < a | x > b] <- 0
f[i1] <- 2 * (x[i1] - a) / ((b - a) * (c - a))
f[i2] <- 2 * (b - x[i2]) / ((b - a) * (b - c))
return(f)
}
Test:
set.seed(123)
x <- sort(rnorm(1000, 0, 5))
a <- runif(1, -10, -2)
c <- runif(1, -2, 2)
b <- runif(1, 2, 10)
f <- my_fun(x, a, b, c)
plot(x, f, type="l")
I've taken an algorithm from https://rosettacode.org/wiki/Bitmap/Flood_fill and test it with R language. Added diagonal neighnors matching (like Queeni
floodfill <- function(row, col, tcol, rcol) {
if (tcol == rcol) return()
if (M[row, col] != tcol) return()
Q <- matrix(c(row, col), 1, 2)
while (dim(Q)[1] > 0) {
n <- Q[1, , drop = FALSE]
west <- cbind(n[1] , n[2] - 1)
east <- cbind(n[1] , n[2] + 1)
north <- cbind(n[1] + 1, n[2] )
south <- cbind(n[1] - 1, n[2] )
nwest <- cbind(n[1] - 1, n[2] - 1)
neast <- cbind(n[1] - 1, n[2] + 1)
swest <- cbind(n[1] + 1, n[2] - 1)
seast <- cbind(n[1] + 1, n[2] + 1)
Q <- Q[-1, , drop = FALSE]
if (M[n] == tcol) {
M[n] <<- rcol
if (M[west] == tcol) Q <- rbind(Q, west)
if (M[east] == tcol) Q <- rbind(Q, east)
if (M[north] == tcol) Q <- rbind(Q, north)
if (M[south] == tcol) Q <- rbind(Q, south)
if (M[nwest] == tcol) Q <- rbind(Q, nwest)
if (M[neast] == tcol) Q <- rbind(Q, neast)
if (M[swest] == tcol) Q <- rbind(Q, swest)
if (M[seast] == tcol) Q <- rbind(Q, seast)
}
}
return("filling completed")
}
I'm not sure what doing wrong. I get error for such execution:
a matrix:
M = matrix(c(c(1,0,0,0,0,0,0,1),
c(0,1,0,0,0,1,1,0),
c(0,0,1,1,1,1,1,0),
c(0,0,0,0,1,1,0,0),
c(0,0,0,0,0,1,0,1),
c(0,0,0,1,0,1,0,1)), nrow = 6, byrow = T)
a call:
> floodfill(row=3, col=3, tcol=1, rcol=2)
Error in if (M[west] == tcol) Q <- rbind(Q, west) :
argument is of length zero
Starting from (3,3) it fills on the beginning but filling stops somewhere at column 6 and the rest is not replaced with 2s. Are my modifications wrong or Rosetta code?
The "solution" provided on the rosetta stone doens't do any bounds checking on the array. Here's an alternative that does
floodfill <- function(row, col, tcol, rcol) {
if (tcol == rcol) return()
if (M[row, col] != tcol) return()
Q <- matrix(c(row, col), 1, 2)
moves <- matrix(c(-1, 0, 1, -1, 1, -1, 0, 1,
-1, -1, -1, 0, 0, 1, 1, 1), ncol=2)
check <- function(p) {
if(p[,1] >= 1 & p[,1] <= nrow(M) & p[,2] >= 1 & p[,2] <= ncol(M)) {
if (M[p] == tcol)
Q <<- rbind(Q, p)
}
}
while (dim(Q)[1] > 0) {
n <- Q[1, , drop = FALSE]
dirs <- cbind(n[1] + moves[,1], n[2] + moves[,2])
Q <- Q[-1, , drop = FALSE]
if (M[n] == tcol) {
M[n] <<- rcol
for(i in seq.int(nrow(dirs)))
check(dirs[i, , drop=FALSE])
}
}
return("filling completed")
}
If you have a multilinestring:
multiline <- MULTILINESTRING ((12.573769564824442 55.67932019039465, 12.573664593749626 55.67929917900955, 12.572916898811318 55.679149506755245, 12.5722100725459 55.679011254788364, 12.572044628054563 55.678978898110536))
Is there a smart way to, split that into X lines of equal length?
lines_of_equal_length <- cool_function(multiline)
Or do you have to code such a function all the way from zero, e.g. by calculating the length of the line, and then dividing the length by X, and then running along the line and cleaving it ever y meters?
I found a nice blog post that goes over it in detail, Segmentation of spatial lines. Basically you convert your multilinestring into spatial lines, and then in the blog a lot of nice functions are described that do the magic.
Converting to spatial lines:
library(rgeos)
spatialLines <- readWKT(multiline)
Function 1: Creates a line segment
CreateSegment <- function(coords, from, to) {
distance <- 0
coordsOut <- c()
biggerThanFrom <- F
for (i in 1:(nrow(coords) - 1)) {
d <- sqrt((coords[i, 1] - coords[i + 1, 1])^2 + (coords[i, 2] - coords[i +
1, 2])^2)
distance <- distance + d
if (!biggerThanFrom && (distance > from)) {
w <- 1 - (distance - from)/d
x <- coords[i, 1] + w * (coords[i + 1, 1] - coords[i, 1])
y <- coords[i, 2] + w * (coords[i + 1, 2] - coords[i, 2])
coordsOut <- rbind(coordsOut, c(x, y))
biggerThanFrom <- T
}
if (biggerThanFrom) {
if (distance > to) {
w <- 1 - (distance - to)/d
x <- coords[i, 1] + w * (coords[i + 1, 1] - coords[i, 1])
y <- coords[i, 2] + w * (coords[i + 1, 2] - coords[i, 2])
coordsOut <- rbind(coordsOut, c(x, y))
break
}
coordsOut <- rbind(coordsOut, c(coords[i + 1, 1], coords[i + 1,
2]))
}
}
return(coordsOut)
}
Function 2: Creates line segments
CreateSegments <- function(coords, length = 0, n.parts = 0) {
stopifnot((length > 0 || n.parts > 0))
# calculate total length line
total_length <- 0
for (i in 1:(nrow(coords) - 1)) {
d <- sqrt((coords[i, 1] - coords[i + 1, 1])^2 + (coords[i, 2] - coords[i +
1, 2])^2)
total_length <- total_length + d
}
# calculate stationing of segments
if (length > 0) {
stationing <- c(seq(from = 0, to = total_length, by = length), total_length)
} else {
stationing <- c(seq(from = 0, to = total_length, length.out = n.parts),
total_length)
}
# calculate segments and store the in list
newlines <- list()
for (i in 1:(length(stationing) - 1)) {
newlines[[i]] <- CreateSegment(coords, stationing[i], stationing[i +
1])
}
return(newlines)
}
Function 3: Merges to last line segments
MergeLast <- function(lst) {
l <- length(lst)
lst[[l - 1]] <- rbind(lst[[l - 1]], lst[[l]])
lst <- lst[1:(l - 1)]
return(lst)
}
Function 4: Puts it all toghether and can cut a spatial lines into either n-1 lines or into lines of certain lengths
SegmentSpatialLines <- function(sl, length = 0, n.parts = 0, merge.last = FALSE) {
stopifnot((length > 0 || n.parts > 0))
id <- 0
newlines <- list()
sl <- as(sl, "SpatialLines")
for (lines in sl#lines) {
for (line in lines#Lines) {
crds <- line#coords
# create segments
segments <- CreateSegments(coords = crds, length, n.parts)
if (merge.last && length(segments) > 1) {
# in case there is only one segment, merging would result into error
segments <- MergeLast(segments)
}
# transform segments to lineslist for SpatialLines object
for (segment in segments) {
newlines <- c(newlines, Lines(list(Line(unlist(segment))), ID = as.character(id)))
id <- id + 1
}
}
}
return(SpatialLines(newlines))
}
Again, all credit goes to the creator of the functions: Creator
I am plotting data using a mosaic plot (with mosaicplot()) and am considering adding a numeric axis to one dimension to clarify the size of the different groups. But, I do not understand how the plot cells are aligned to the axis since it seems to range from approximately 0.2 to .98 (or something like that) on the graphics device. Here's a reproducible example:
mosaicplot(Titanic, main = "Survival on the Titanic", off = 0)
axis(1, seq(0, 1, by = 0.1))
Note how a 0-1 x-axis actually extends to the left and right of the plot. Is it possible to add a set of axis labels that is scaled correctly?
par(mfrow = c(2,1), mar = c(3,4,2,1))
mp(Titanic)
mp(Titanic, off = 0)
This one isn't difficult to fix, but there are a couple things going on:
obviously, the axis doesn't start at 0 nor does it end at something round which is what you get from pretty (used to calculate, draw, and label the ticks and labels). From these lines, we can see that the polygons are drawn from 50 to 950 along the x (depending on what is set for cex.axis):
x1 <- 30 + 20 * cex.axis/0.66
y1 <- 5
x2 <- 950
y2 <- 1000 - x1
Secondly, the plotting device is finished when the function exits which is why your attempt ranges from 0 to 1 instead of pretty(c(50, 950)), and I don't see any way to pass something through mosaicplot like new or add since
Warning message:
In mosaicplot.default(Titanic, new = TRUE) :
extra argument ‘new’ will be disregarded
So I don't think there is an easy fix without editing the source code (because seems like you would have to backtrace how far over your last plot has shifted the origin and how that translates to a new window which may not be the same for every plot and blah blah blah).
The only thing I changed was adding the final three lines.
## graphics:::mosaicplot.default
mp <- function (x, main = deparse(substitute(x)), sub = NULL, xlab = NULL,
ylab = NULL, sort = NULL, off = NULL, dir = NULL, color = NULL,
shade = FALSE, margin = NULL, cex.axis = 0.66, las = par("las"),
border = NULL, type = c("pearson", "deviance", "FT"), ...) {
mosaic.cell <- function(X, x1, y1, x2, y2, srt.x, srt.y,
adj.x, adj.y, off, dir, color, lablevx, lablevy, maxdim,
currlev, label) {
p <- ncol(X) - 2
if (dir[1L] == "v") {
xdim <- maxdim[1L]
XP <- rep.int(0, xdim)
for (i in seq_len(xdim)) XP[i] <- sum(X[X[, 1L] ==
i, p])/sum(X[, p])
if (anyNA(XP))
stop("missing values in contingency table")
white <- off[1L] * (x2 - x1)/max(1, xdim - 1)
x.l <- x1
x.r <- x1 + (1 - off[1L]) * XP[1L] * (x2 - x1)
if (xdim > 1L)
for (i in 2:xdim) {
x.l <- c(x.l, x.r[i - 1L] + white)
x.r <- c(x.r, x.r[i - 1L] + white + (1 - off[1L]) *
XP[i] * (x2 - x1))
}
if (lablevx > 0L) {
this.lab <- if (is.null(label[[1L]][1L])) {
paste(rep.int(as.character(currlev), length(currlev)),
as.character(seq_len(xdim)), sep = ".")
}
else label[[1L]]
text(x = x.l + (x.r - x.l)/2, y = 1000 - 35 *
cex.axis/0.66 + 22 * cex.axis/0.65 * (lablevx -
1), srt = srt.x, adj = adj.x, cex = cex.axis,
this.lab, xpd = NA)
}
if (p > 2L) {
for (i in seq_len(xdim)) {
if (XP[i] > 0) {
Recall(X[X[, 1L] == i, 2L:(p + 2L), drop = FALSE],
x.l[i], y1, x.r[i], y2, srt.x, srt.y, adj.x,
adj.y, off[-1L], dir[-1L], color, lablevx -
1, (i == 1L) * lablevy, maxdim[-1L],
currlev + 1, label[2:p])
}
else {
segments(rep.int(x.l[i], 3L), y1 + (y2 -
y1) * c(0, 2, 4)/5, rep.int(x.l[i], 3L),
y1 + (y2 - y1) * c(1, 3, 5)/5)
}
}
}
else {
for (i in seq_len(xdim)) {
if (XP[i] > 0) {
polygon(c(x.l[i], x.r[i], x.r[i], x.l[i]),
c(y1, y1, y2, y2), lty = if (extended)
X[i, p + 1L]
else 1L, col = color[if (extended)
X[i, p + 2L]
else i], border = border)
}
else {
segments(rep.int(x.l[i], 3L), y1 + (y2 -
y1) * c(0, 2, 4)/5, rep.int(x.l[i], 3L),
y1 + (y2 - y1) * c(1, 3, 5)/5)
}
}
}
}
else {
ydim <- maxdim[1L]
YP <- rep.int(0, ydim)
for (j in seq_len(ydim)) {
YP[j] <- sum(X[X[, 1L] == j, p])/sum(X[, p])
}
white <- off[1L] * (y2 - y1)/(max(1, ydim - 1))
y.b <- y2 - (1 - off[1L]) * YP[1L] * (y2 - y1)
y.t <- y2
if (ydim > 1L) {
for (j in 2:ydim) {
y.b <- c(y.b, y.b[j - 1] - white - (1 - off[1L]) *
YP[j] * (y2 - y1))
y.t <- c(y.t, y.b[j - 1] - white)
}
}
if (lablevy > 0L) {
this.lab <- if (is.null(label[[1L]][1L])) {
paste(rep.int(as.character(currlev), length(currlev)),
as.character(seq_len(ydim)), sep = ".")
}
else label[[1L]]
text(x = 35 * cex.axis/0.66 - 20 * cex.axis/0.66 *
(lablevy - 1), y = y.b + (y.t - y.b)/2, srt = srt.y,
adj = adj.y, cex = cex.axis, this.lab, xpd = NA)
}
if (p > 2L) {
for (j in seq_len(ydim)) {
if (YP[j] > 0) {
Recall(X[X[, 1L] == j, 2:(p + 2), drop = FALSE],
x1, y.b[j], x2, y.t[j], srt.x, srt.y, adj.x,
adj.y, off[-1L], dir[-1L], color, (j ==
1L) * lablevx, lablevy - 1, maxdim[-1L],
currlev + 1, label[2:p])
}
else {
segments(x1 + (x2 - x1) * c(0, 2, 4)/5, rep.int(y.b[j],
3L), x1 + (x2 - x1) * c(1, 3, 5)/5, rep.int(y.b[j],
3L))
}
}
}
else {
for (j in seq_len(ydim)) {
if (YP[j] > 0) {
polygon(c(x1, x2, x2, x1), c(y.b[j], y.b[j],
y.t[j], y.t[j]), lty = if (extended)
X[j, p + 1]
else 1, col = color[if (extended)
X[j, p + 2]
else j], border = border)
}
else {
segments(x1 + (x2 - x1) * c(0, 2, 4)/5, rep.int(y.b[j],
3L), x1 + (x2 - x1) * c(1, 3, 5)/5, rep.int(y.b[j],
3L))
}
}
}
}
}
srt.x <- if (las > 1)
90
else 0
srt.y <- if (las == 0 || las == 3)
90
else 0
if (is.null(dim(x)))
x <- as.array(x)
else if (is.data.frame(x))
x <- data.matrix(x)
dimd <- length(dx <- dim(x))
if (dimd == 0L || any(dx == 0L))
stop("'x' must not have 0 dimensionality")
if (!missing(...))
warning(sprintf(ngettext(length(list(...)), "extra argument %s will be disregarded",
"extra arguments %s will be disregarded"), paste(sQuote(names(list(...))),
collapse = ", ")), domain = NA)
Ind <- 1L:dx[1L]
if (dimd > 1L) {
Ind <- rep.int(Ind, prod(dx[2:dimd]))
for (i in 2:dimd) {
Ind <- cbind(Ind, c(matrix(1L:dx[i], byrow = TRUE,
nrow = prod(dx[1L:(i - 1)]), ncol = prod(dx[i:dimd]))))
}
}
Ind <- cbind(Ind, c(x))
if (is.logical(shade) && !shade) {
extended <- FALSE
Ind <- cbind(Ind, NA, NA)
}
else {
if (is.logical(shade))
shade <- c(2, 4)
else if (any(shade <= 0) || length(shade) > 5)
stop("invalid 'shade' specification")
extended <- TRUE
shade <- sort(shade)
breaks <- c(-Inf, -rev(shade), 0, shade, Inf)
color <- c(hsv(0, s = seq.int(1, to = 0, length.out = length(shade) +
1)), hsv(4/6, s = seq.int(0, to = 1, length.out = length(shade) +
1)))
if (is.null(margin))
margin <- as.list(1L:dimd)
E <- stats::loglin(x, margin, fit = TRUE, print = FALSE)$fit
type <- match.arg(type)
residuals <- switch(type, pearson = (x - E)/sqrt(E),
deviance = {
tmp <- 2 * (x * log(ifelse(x == 0, 1, x/E)) -
(x - E))
tmp <- sqrt(pmax(tmp, 0))
ifelse(x > E, tmp, -tmp)
}, FT = sqrt(x) + sqrt(x + 1) - sqrt(4 * E + 1))
Ind <- cbind(Ind, c(1 + (residuals < 0)), as.numeric(cut(residuals,
breaks)))
}
label <- dimnames(x)
if (is.null(off))
off <- if (dimd == 2)
2 * (dx - 1)
else rep.int(10, dimd)
if (length(off) != dimd)
off <- rep_len(off, dimd)
if (any(off > 50))
off <- off * 50/max(off)
if (is.null(dir) || length(dir) != dimd) {
dir <- rep_len(c("v", "h"), dimd)
}
if (!is.null(sort)) {
if (length(sort) != dimd)
stop("length of 'sort' does not conform to 'dim(x)'")
Ind[, seq_len(dimd)] <- Ind[, sort]
off <- off[sort]
dir <- dir[sort]
label <- label[sort]
}
nam.dn <- names(label)
if (is.null(xlab) && any(dir == "v"))
xlab <- nam.dn[min(which(dir == "v"))]
if (is.null(ylab) && any(dir == "h"))
ylab <- nam.dn[min(which(dir == "h"))]
ncolors <- length(tabulate(Ind[, dimd]))
if (!extended && ((is.null(color) || length(color) != ncolors))) {
color <- if (is.logical(color))
if (color[1L])
gray.colors(ncolors)
else rep.int(0, ncolors)
else if (is.null(color))
rep.int("grey", ncolors)
else rep_len(color, ncolors)
}
dev.hold()
on.exit(dev.flush())
plot.new()
if (!extended) {
opar <- par(usr = c(1, 1000, 1, 1000), mgp = c(1, 1,
0))
on.exit(par(opar), add = TRUE)
}
else {
pin <- par("pin")
rtxt <- "Standardized\nResiduals:"
rtxtCex <- min(1, pin[1L]/(strheight(rtxt, units = "inches") *
12), pin[2L]/(strwidth(rtxt, units = "inches")/4))
rtxtWidth <- 0.1
opar <- par(usr = c(1, 1000 * (1.1 + rtxtWidth), 1, 1000),
mgp = c(1, 1, 0))
on.exit(par(opar), add = TRUE)
rtxtHeight <- strwidth(rtxt, units = "i", cex = rtxtCex)/pin[2L]
text(1000 * (1.05 + 0.5 * rtxtWidth), 0, labels = rtxt,
adj = c(0, 0.25), srt = 90, cex = rtxtCex)
len <- length(shade) + 1
bh <- 0.95 * (0.95 - rtxtHeight)/(2 * len)
x.l <- 1000 * 1.05
x.r <- 1000 * (1.05 + 0.7 * rtxtWidth)
y.t <- 1000 * rev(seq.int(from = 0.95, by = -bh, length.out = 2 *
len))
y.b <- y.t - 1000 * 0.8 * bh
ltype <- c(rep.int(2, len), rep.int(1, len))
for (i in 1:(2 * len)) {
polygon(c(x.l, x.r, x.r, x.l), c(y.b[i], y.b[i],
y.t[i], y.t[i]), col = color[i], lty = ltype[i],
border = border)
}
brks <- round(breaks, 2)
y.m <- y.b + 1000 * 0.4 * bh
text(1000 * (1.05 + rtxtWidth), y.m, c(paste0("<", brks[2L]),
paste(brks[2:(2 * len - 1)], brks[3:(2 * len)], sep = ":"),
paste0(">", brks[2 * len])), srt = 90, cex = cex.axis,
xpd = NA)
}
if (!is.null(main) || !is.null(xlab) || !is.null(ylab) ||
!is.null(sub))
title(main, sub = sub, xlab = xlab, ylab = ylab)
adj.x <- adj.y <- 0.5
x1 <- 30 + 20 * cex.axis/0.66
y1 <- 5
x2 <- 950
y2 <- 1000 - x1
maxlen.xlabel <- maxlen.ylabel <- 35 * cex.axis/0.66
if (srt.x == 90) {
maxlen.xlabel <- max(strwidth(label[[dimd + 1L - match("v",
rev(dir))]], cex = cex.axis))
adj.x <- 1
y2 <- y2 - maxlen.xlabel
}
if (srt.y == 0) {
maxlen.ylabel <- max(strwidth(label[[match("h", dir)]],
cex = cex.axis))
adj.y <- 0
x1 <- x1 + maxlen.ylabel
}
mosaic.cell(Ind, x1 = x1, y1 = y1, x2 = x2, y2 = y2, srt.x = srt.x,
srt.y = srt.y, adj.x = adj.x, adj.y = adj.y, off = off/100,
dir = dir, color = color, lablevx = 2, lablevy = 2,
maxdim = apply(as.matrix(Ind[, 1L:dimd]), 2L, max),
currlev = 1, label = label)
## new stuff
at <- seq(x1, x2, length.out = 6)
axis(1, at, (at - min(at)) / diff(range(at)))
invisible()
}
The cvm.test() from dgof package provides a way of doing the one-sample Cramer-von Mises test on discrete distributions, my goal is to develop a function that does the test for continuous distributions as well (like the Kolmogorov-Smirnov ks.test() from the stats package).
Note:this post is concerned only with fully specified df null hypothesis, so please no bootstraping or Monte Carlo Simulation here
> cvm.test
function (x, y, type = c("W2", "U2", "A2"), simulate.p.value = FALSE,
B = 2000, tol = 1e-08)
{
cvm.pval.disc <- function(STAT, lambda) {
x <- STAT
theta <- function(u) {
VAL <- 0
for (i in 1:length(lambda)) {
VAL <- VAL + 0.5 * atan(lambda[i] * u)
}
return(VAL - 0.5 * x * u)
}
rho <- function(u) {
VAL <- 0
for (i in 1:length(lambda)) {
VAL <- VAL + log(1 + lambda[i]^2 * u^2)
}
VAL <- exp(VAL * 0.25)
return(VAL)
}
fun <- function(u) return(sin(theta(u))/(u * rho(u)))
pval <- 0
try(pval <- 0.5 + integrate(fun, 0, Inf, subdivisions = 1e+06)$value/pi,
silent = TRUE)
if (pval > 0.001)
return(pval)
if (pval <= 0.001) {
df <- sum(lambda != 0)
est1 <- dchisq(STAT/max(lambda), df)
logf <- function(t) {
ans <- -t * STAT
ans <- ans - 0.5 * sum(log(1 - 2 * t * lambda))
return(ans)
}
est2 <- 1
try(est2 <- exp(nlm(logf, 1/(4 * max(lambda)))$minimum),
silent = TRUE)
return(min(est1, est2))
}
}
cvm.stat.disc <- function(x, y, type = c("W2", "U2", "A2")) {
type <- match.arg(type)
I <- knots(y)
N <- length(x)
e <- diff(c(0, N * y(I)))
obs <- rep(0, length(I))
for (j in 1:length(I)) {
obs[j] <- length(which(x == I[j]))
}
S <- cumsum(obs)
T <- cumsum(e)
H <- T/N
p <- e/N
t <- (p + p[c(2:length(p), 1)])/2
Z <- S - T
Zbar <- sum(Z * t)
S0 <- diag(p) - p %*% t(p)
A <- matrix(1, length(p), length(p))
A <- apply(row(A) >= col(A), 2, as.numeric)
E <- diag(t)
One <- rep(1, nrow(E))
K <- diag(0, length(H))
diag(K)[-length(H)] <- 1/(H[-length(H)] * (1 - H[-length(H)]))
Sy <- A %*% S0 %*% t(A)
M <- switch(type, W2 = E, U2 = (diag(1, nrow(E)) - E %*%
One %*% t(One)) %*% E %*% (diag(1, nrow(E)) - One %*%
t(One) %*% E), A2 = E %*% K)
lambda <- eigen(M %*% Sy)$values
STAT <- switch(type, W2 = sum(Z^2 * t)/N, U2 = sum((Z -
Zbar)^2 * t)/N, A2 = sum((Z^2 * t/(H * (1 - H)))[-length(I)])/N)
return(c(STAT, lambda))
}
cvm.pval.disc.sim <- function(STATISTIC, lambda, y, type,
tol, B) {
knots.y <- knots(y)
fknots.y <- y(knots.y)
u <- runif(B * length(x))
u <- sapply(u, function(a) return(knots.y[sum(a > fknots.y) +
1]))
dim(u) <- c(B, length(x))
s <- apply(u, 1, cvm.stat.disc, y, type)
s <- s[1, ]
return(sum(s >= STATISTIC - tol)/B)
}
type <- match.arg(type)
DNAME <- deparse(substitute(x))
if (is.stepfun(y)) {
if (length(setdiff(x, knots(y))) != 0) {
stop("Data are incompatable with null distribution; ",
"Note: This function is meant only for discrete distributions ",
"you may be receiving this error because y is continuous.")
}
tempout <- cvm.stat.disc(x, y, type = type)
STAT <- tempout[1]
lambda <- tempout[2:length(tempout)]
if (!simulate.p.value) {
PVAL <- cvm.pval.disc(STAT, lambda)
}
else {
PVAL <- cvm.pval.disc.sim(STAT, lambda, y, type,
tol, B)
}
METHOD <- paste("Cramer-von Mises -", type)
names(STAT) <- as.character(type)
RVAL <- list(statistic = STAT, p.value = PVAL, alternative = "Two.sided",
method = METHOD, data.name = DNAME)
}
else {
stop("Null distribution must be a discrete.")
}
class(RVAL) <- "htest"
return(RVAL)
}
<environment: namespace:dgof>
Kolmogorov-Smirnov ks.test() from stats package for comparison (note that this function does both the one-sample and two-sample tests):
> ks.test
function (x, y, ..., alternative = c("two.sided", "less", "greater"),
exact = NULL, tol = 1e-08, simulate.p.value = FALSE, B = 2000)
{
pkolmogorov1x <- function(x, n) {
if (x <= 0)
return(0)
if (x >= 1)
return(1)
j <- seq.int(from = 0, to = floor(n * (1 - x)))
1 - x * sum(exp(lchoose(n, j) + (n - j) * log(1 - x -
j/n) + (j - 1) * log(x + j/n)))
}
exact.pval <- function(alternative, STATISTIC, x, n, y, knots.y,
tol) {
ts.pval <- function(S, x, n, y, knots.y, tol) {
f_n <- ecdf(x)
eps <- min(tol, min(diff(knots.y)) * tol)
eps2 <- min(tol, min(diff(y(knots.y))) * tol)
a <- rep(0, n)
b <- a
f_a <- a
for (i in 1:n) {
a[i] <- min(c(knots.y[which(y(knots.y) + S >=
i/n + eps2)[1]], Inf), na.rm = TRUE)
b[i] <- min(c(knots.y[which(y(knots.y) - S >
(i - 1)/n - eps2)[1]], Inf), na.rm = TRUE)
f_a[i] <- ifelse(!(a[i] %in% knots.y), y(a[i]),
y(a[i] - eps))
}
f_b <- y(b)
p <- rep(1, n + 1)
for (i in 1:n) {
tmp <- 0
for (k in 0:(i - 1)) {
tmp <- tmp + choose(i, k) * (-1)^(i - k - 1) *
max(f_b[k + 1] - f_a[i], 0)^(i - k) * p[k +
1]
}
p[i + 1] <- tmp
}
p <- max(0, 1 - p[n + 1])
if (p > 1) {
warning("numerical instability in p-value calculation.")
p <- 1
}
return(p)
}
less.pval <- function(S, n, H, z, tol) {
m <- ceiling(n * (1 - S))
c <- S + (1:m - 1)/n
CDFVAL <- H(sort(z))
for (j in 1:length(c)) {
ifelse((min(abs(c[j] - CDFVAL)) < tol), c[j] <- 1 -
c[j], c[j] <- 1 - CDFVAL[which(order(c(c[j],
CDFVAL)) == 1)])
}
b <- rep(0, m)
b[1] <- 1
for (k in 1:(m - 1)) b[k + 1] <- 1 - sum(choose(k,
1:k - 1) * c[1:k]^(k - 1:k + 1) * b[1:k])
p <- sum(choose(n, 0:(m - 1)) * c^(n - 0:(m - 1)) *
b)
return(p)
}
greater.pval <- function(S, n, H, z, tol) {
m <- ceiling(n * (1 - S))
c <- 1 - (S + (1:m - 1)/n)
CDFVAL <- c(0, H(sort(z)))
for (j in 1:length(c)) {
if (!(min(abs(c[j] - CDFVAL)) < tol))
c[j] <- CDFVAL[which(order(c(c[j], CDFVAL)) ==
1) - 1]
}
b <- rep(0, m)
b[1] <- 1
for (k in 1:(m - 1)) b[k + 1] <- 1 - sum(choose(k,
1:k - 1) * c[1:k]^(k - 1:k + 1) * b[1:k])
p <- sum(choose(n, 0:(m - 1)) * c^(n - 0:(m - 1)) *
b)
return(p)
}
p <- switch(alternative, two.sided = ts.pval(STATISTIC,
x, n, y, knots.y, tol), less = less.pval(STATISTIC,
n, y, knots.y, tol), greater = greater.pval(STATISTIC,
n, y, knots.y, tol))
return(p)
}
sim.pval <- function(alternative, STATISTIC, x, n, y, knots.y,
tol, B) {
fknots.y <- y(knots.y)
u <- runif(B * length(x))
u <- sapply(u, function(a) return(knots.y[sum(a > fknots.y) +
1]))
dim(u) <- c(B, length(x))
getks <- function(a, knots.y, fknots.y) {
dev <- c(0, ecdf(a)(knots.y) - fknots.y)
STATISTIC <- switch(alternative, two.sided = max(abs(dev)),
greater = max(dev), less = max(-dev))
return(STATISTIC)
}
s <- apply(u, 1, getks, knots.y, fknots.y)
return(sum(s >= STATISTIC - tol)/B)
}
alternative <- match.arg(alternative)
DNAME <- deparse(substitute(x))
x <- x[!is.na(x)]
n <- length(x)
if (n < 1L)
stop("not enough 'x' data")
PVAL <- NULL
if (is.numeric(y)) {
DNAME <- paste(DNAME, "and", deparse(substitute(y)))
y <- y[!is.na(y)]
n.x <- as.double(n)
n.y <- length(y)
if (n.y < 1L)
stop("not enough 'y' data")
if (is.null(exact))
exact <- (n.x * n.y < 10000)
METHOD <- "Two-sample Kolmogorov-Smirnov test"
TIES <- FALSE
n <- n.x * n.y/(n.x + n.y)
w <- c(x, y)
z <- cumsum(ifelse(order(w) <= n.x, 1/n.x, -1/n.y))
if (length(unique(w)) < (n.x + n.y)) {
warning("cannot compute correct p-values with ties")
z <- z[c(which(diff(sort(w)) != 0), n.x + n.y)]
TIES <- TRUE
}
STATISTIC <- switch(alternative, two.sided = max(abs(z)),
greater = max(z), less = -min(z))
nm_alternative <- switch(alternative, two.sided = "two-sided",
less = "the CDF of x lies below that of y", greater = "the CDF of x lies above that of y")
if (exact && (alternative == "two.sided") && !TIES)
PVAL <- 1 - .C("psmirnov2x", p = as.double(STATISTIC),
as.integer(n.x), as.integer(n.y), PACKAGE = "dgof")$p
}
else if (is.stepfun(y)) {
z <- knots(y)
if (is.null(exact))
exact <- (n <= 30)
if (exact && n > 30) {
warning("numerical instability may affect p-value")
}
METHOD <- "One-sample Kolmogorov-Smirnov test"
dev <- c(0, ecdf(x)(z) - y(z))
STATISTIC <- switch(alternative, two.sided = max(abs(dev)),
greater = max(dev), less = max(-dev))
if (simulate.p.value) {
PVAL <- sim.pval(alternative, STATISTIC, x, n, y,
z, tol, B)
}
else {
PVAL <- switch(exact, `TRUE` = exact.pval(alternative,
STATISTIC, x, n, y, z, tol), `FALSE` = NULL)
}
nm_alternative <- switch(alternative, two.sided = "two-sided",
less = "the CDF of x lies below the null hypothesis",
greater = "the CDF of x lies above the null hypothesis")
}
else {
if (is.character(y))
y <- get(y, mode = "function")
if (mode(y) != "function")
stop("'y' must be numeric or a string naming a valid function")
if (is.null(exact))
exact <- (n < 100)
METHOD <- "One-sample Kolmogorov-Smirnov test"
TIES <- FALSE
if (length(unique(x)) < n) {
warning(paste("default ks.test() cannot compute correct p-values with ties;\n",
"see help page for one-sample Kolmogorov test for discrete distributions."))
TIES <- TRUE
}
x <- y(sort(x), ...) - (0:(n - 1))/n
STATISTIC <- switch(alternative, two.sided = max(c(x,
1/n - x)), greater = max(1/n - x), less = max(x))
if (exact && !TIES) {
PVAL <- if (alternative == "two.sided")
1 - .C("pkolmogorov2x", p = as.double(STATISTIC),
as.integer(n), PACKAGE = "dgof")$p
else 1 - pkolmogorov1x(STATISTIC, n)
}
nm_alternative <- switch(alternative, two.sided = "two-sided",
less = "the CDF of x lies below the null hypothesis",
greater = "the CDF of x lies above the null hypothesis")
}
names(STATISTIC) <- switch(alternative, two.sided = "D",
greater = "D^+", less = "D^-")
pkstwo <- function(x, tol = 1e-06) {
if (is.numeric(x))
x <- as.vector(x)
else stop("argument 'x' must be numeric")
p <- rep(0, length(x))
p[is.na(x)] <- NA
IND <- which(!is.na(x) & (x > 0))
if (length(IND)) {
p[IND] <- .C("pkstwo", as.integer(length(x[IND])),
p = as.double(x[IND]), as.double(tol), PACKAGE = "dgof")$p
}
return(p)
}
if (is.null(PVAL)) {
PVAL <- ifelse(alternative == "two.sided", 1 - pkstwo(sqrt(n) *
STATISTIC), exp(-2 * n * STATISTIC^2))
}
RVAL <- list(statistic = STATISTIC, p.value = PVAL, alternative = nm_alternative,
method = METHOD, data.name = DNAME)
class(RVAL) <- "htest"
return(RVAL)
}
<environment: namespace:dgof>