R - exclamation mark before variable, but no subsequent =, == or similar

R - exclamation mark before variable, but no subsequent =, == or similar - r

The code for the Holt-Winters function in R contains the following if clause:
if (!is.null(gamma) && is.logical(gamma) && !gamma)
Obviously the first means "if NOT gamma is Null". I am a bit confused by the meaning of the third - it looks like this reads, "if NOT gamma", but there is nothing following like an equals sign or an is.null, etc.
Forgive me if this is a somewhat basic question, but I am very new to R.
Full code:
{
x <- as.ts(x)
seasonal <- match.arg(seasonal)
f <- frequency(x)
if (!is.null(alpha) && (alpha == 0))
stop("cannot fit models without level ('alpha' must not be 0 or FALSE)")
if (!all(is.null(c(alpha, beta, gamma))) && any(c(alpha,
beta, gamma) < 0 || c(alpha, beta, gamma) > 1))
stop("'alpha', 'beta' and 'gamma' must be within the unit interval")
if ((is.null(gamma) || gamma > 0)) {
if (seasonal == "multiplicative" && any(x == 0))
stop("data must be non-zero for multiplicative Holt-Winters")
if (start.periods < 2)
stop("need at least 2 periods to compute seasonal start values")
}
if (!is.null(gamma) && is.logical(gamma) && !gamma) {
expsmooth <- !is.null(beta) && is.logical(beta) && !beta
if (is.null(l.start))
l.start <- if (expsmooth)
x[1L]
else x[2L]
if (is.null(b.start))
if (is.null(beta) || !is.logical(beta) || beta)
b.start <- x[2L] - x[1L]
start.time <- 3 - expsmooth
s.start <- 0
}
else {
start.time <- f + 1
wind <- start.periods * f
st <- decompose(ts(x[1L:wind], start = start(x), frequency = f),
seasonal)
if (is.null(l.start) || is.null(b.start)) {
dat <- na.omit(st$trend)
cf <- coef(.lm.fit(x = cbind(1, seq_along(dat)),
y = dat))
if (is.null(l.start))
l.start <- cf[1L]
if (is.null(b.start))
b.start <- cf[2L]
}
if (is.null(s.start))
s.start <- st$figure
}
lenx <- as.integer(length(x))
if (is.na(lenx))
stop("invalid length(x)")
len <- lenx - start.time + 1
hw <- function(alpha, beta, gamma) .C(C_HoltWinters, as.double(x),
lenx, as.double(max(min(alpha, 1), 0)), as.double(max(min(beta,
1), 0)), as.double(max(min(gamma, 1), 0)), as.integer(start.time),
as.integer(!+(seasonal == "multiplicative")), as.integer(f),
as.integer(!is.logical(beta) || beta), as.integer(!is.logical(gamma) ||
gamma), a = as.double(l.start), b = as.double(b.start),
s = as.double(s.start), SSE = as.double(0), level = double(len +
1L), trend = double(len + 1L), seasonal = double(len +
f))
if (is.null(gamma)) {
if (is.null(alpha)) {
if (is.null(beta)) {
error <- function(p) hw(p[1L], p[2L], p[3L])$SSE
sol <- optim(optim.start, error, method = "L-BFGS-B",
lower = c(0, 0, 0), upper = c(1, 1, 1), control = optim.control)
if (sol$convergence || any(sol$par < 0 | sol$par >
1)) {
if (sol$convergence > 50) {
warning(gettextf("optimization difficulties: %s",
sol$message), domain = NA)
}
else stop("optimization failure")
}
alpha <- sol$par[1L]
beta <- sol$par[2L]
gamma <- sol$par[3L]
}
else {
error <- function(p) hw(p[1L], beta, p[2L])$SSE
sol <- optim(c(optim.start["alpha"], optim.start["gamma"]),
error, method = "L-BFGS-B", lower = c(0, 0),
upper = c(1, 1), control = optim.control)
if (sol$convergence || any(sol$par < 0 | sol$par >
1)) {
if (sol$convergence > 50) {
warning(gettextf("optimization difficulties: %s",
sol$message), domain = NA)
}
else stop("optimization failure")
}
alpha <- sol$par[1L]
gamma <- sol$par[2L]
}
}
else {
if (is.null(beta)) {
error <- function(p) hw(alpha, p[1L], p[2L])$SSE
sol <- optim(c(optim.start["beta"], optim.start["gamma"]),
error, method = "L-BFGS-B", lower = c(0, 0),
upper = c(1, 1), control = optim.control)
if (sol$convergence || any(sol$par < 0 | sol$par >
1)) {
if (sol$convergence > 50) {
warning(gettextf("optimization difficulties: %s",
sol$message), domain = NA)
}
else stop("optimization failure")
}
beta <- sol$par[1L]
gamma <- sol$par[2L]
}
else {
error <- function(p) hw(alpha, beta, p)$SSE
gamma <- optimize(error, lower = 0, upper = 1)$minimum
}
}
}
else {
if (is.null(alpha)) {
if (is.null(beta)) {
error <- function(p) hw(p[1L], p[2L], gamma)$SSE
sol <- optim(c(optim.start["alpha"], optim.start["beta"]),
error, method = "L-BFGS-B", lower = c(0, 0),
upper = c(1, 1), control = optim.control)
if (sol$convergence || any(sol$par < 0 | sol$par >
1)) {
if (sol$convergence > 50) {
warning(gettextf("optimization difficulties: %s",
sol$message), domain = NA)
}
else stop("optimization failure")
}
alpha <- sol$par[1L]
beta <- sol$par[2L]
}
else {
error <- function(p) hw(p, beta, gamma)$SSE
alpha <- optimize(error, lower = 0, upper = 1)$minimum
}
}
else {
if (is.null(beta)) {
error <- function(p) hw(alpha, p, gamma)$SSE
beta <- optimize(error, lower = 0, upper = 1)$minimum
}
}
}
final.fit <- hw(alpha, beta, gamma)
fitted <- ts(cbind(xhat = final.fit$level[-len - 1], level = final.fit$level[-len -
1], trend = if (!is.logical(beta) || beta)
final.fit$trend[-len - 1], season = if (!is.logical(gamma) ||
gamma)
final.fit$seasonal[1L:len]), start = start(lag(x, k = 1 -
start.time)), frequency = frequency(x))
if (!is.logical(beta) || beta)
fitted[, 1] <- fitted[, 1] + fitted[, "trend"]
if (!is.logical(gamma) || gamma)
fitted[, 1] <- if (seasonal == "multiplicative")
fitted[, 1] * fitted[, "season"]
else fitted[, 1] + fitted[, "season"]
structure(list(fitted = fitted, x = x, alpha = alpha, beta = beta,
gamma = gamma, coefficients = c(a = final.fit$level[len +
1], b = if (!is.logical(beta) || beta) final.fit$trend[len +
1], s = if (!is.logical(gamma) || gamma) final.fit$seasonal[len +
1L:f]), seasonal = seasonal, SSE = final.fit$SSE,
call = match.call()), class = "HoltWinters")
}

edit: I was confused about the context.
! is the logical-NOT operator in R.
As pointed out in the comments, R often allows users to pass arguments of different types. In this case ?HoltWinters says
gamma: gamma parameter used for the seasonal component. If set to
‘FALSE’, an non-seasonal model is fitted.
So gamma can be either a numeric value or a logical (FALSE) value.
Since this !gamma follows is.logical(gamma) && ..., it will only be evaluated if gamma is a logical (TRUE/FALSE) value. In this case, !gamma is equivalent to gamma==FALSE, but most programmers would shorten this to !gamma (so that FALSE becomes TRUE and TRUE becomes FALSE).
We wouldn't want to test gamma=FALSE without the is.logical() test first, because someone might have specified gamma=0, in which case R would evaluate 0==FALSE, which according to its coercion rules is TRUE.
This test could also have been written if (identical(gamma,FALSE)) - which would correctly evaluate both NULL and 0 as different from FALSE.
In contrast, if gamma were to be numeric, !gamma would be shorthand for gamma != 0.
According to R's rules for coercion from floating-point to logical, 0 gets converted to FALSE and any non-zero, non-NA value gets converted to TRUE (see this question for more detail).
Thus !gamma is equivalent to gamma!=0. Some old-school programmers use this for brevity; I don't think the brevity-clarity tradeoff is worth it, but that's just my opinion.

Related

My variogram code result different from variog() result

I am writing code for producing a variogram. For validating my result, I checked with geoR::variog() but both variograms are different.
I tried to understand the code of variog() to see what happens under the hood but there are so many things happening that I can't seem to understand it. I, in my code, am using the parameters X-coordinate, Y-coordiante, data value, number of lags, minimum lag value, lag interval, azimuth (angle in degrees; 90 corresponds to vertical direction), angle tolerance (in degrees) and maximum bandwidth.
variogram = function(xcor, ycor, data, nlag, minlag, laginv, azm, atol, maxbandw){
dl <- length(data)
lowangle <- azm - atol
upangle <- azm + atol
gamlag <- integer(nlag)
n <- integer(nlag)
dist <- pairdist(xcor, ycor)
maxd <- max(dist)
llag <- seq(minlag, minlag + (nlag-1) * laginv, by = laginv)
hlag <- llag + laginv
for(i in 1:dl){
for(j in i:dl){
if(i != j){
if(xcor[j]- xcor[i] == 0)
theta <- 90
else
theta <- 180/pi * atan((ycor[j] - ycor[i])/(xcor[j] - xcor[i]))
for(k in 1:nlag){
d <- dist[j, i]
b <- abs(d * sin(theta - azm))
if((llag[k] <= d & d < hlag[k]) & (lowangle <= theta & theta < upangle) & (b <= maxbandw)){
gamlag[k] <- gamlag[k] + (data[i] - data[j])^2;
n[k] <- n[k] + 1
}
}
}
}
}
gamlag <- ifelse(n == 0, NA, gamlag/(2*n))
tmp <- data.frame("lag" = llag, "gamma" = gamlag)
return(tmp)
}
function call for the above code
ideal_variogram_2 <- variogram(data3[,1], data3[,2], data3[,3], 18, 0, 0.025, 90, 45, 1000000)
ideal_variogram_2 <- na.omit(ideal_variogram_2)
plot(ideal_variogram_2$lag, ideal_variogram_2$gamma, main = "Using my code")
function call for variog()
geodata1 <- as.geodata(data3, coords.col = 1:2, data.col = 3)
ideal_variogram_1 <- variog(geodata1, coords = geodata1$coords, data = geodata1$data, option = "bin", uvec = seq(0, 0.45, by = 0.025), direction = pi/2, tolerance = pi/4)
df <- data.frame(u = ideal_variogram_1$u, v = ideal_variogram_1$v)
plot(df$u, df$v, main = "Using variog()")
The 2 variograms that I got are at the following link:
Variogram

How to insert a numeric gradient inside constrOptim

I was doing maximum likelihood estimation using optim() and it was quite easy. It's a generalized logistic distribution with 4 parameters and a couple of restrictions, all listed in the likelihood function:
genlogis.loglikelihood <- function(param = c(sqrt(2/pi),0.5, 2, 0), x){
if(length(param) < 3 | length(param) > 4 ){
stop('Incorrect number of parameters: param = c(a,b,p,location)')
}
if(length(param) == 3){
#warning('Location parameter is set to 0')
location = 0
}
if(length(param) == 4){
location = param[4]
}
a = param[1]
b = param[2]
p = param[3]
if(!missing(a)){
if(a < 0){
stop('The argument "a" must be positive.')
}
}
if(!missing(b)){
if(b < 0){
stop('The argument "b" must be positive.')
}
}
if(!missing(p)){
if(p < 0){
stop('The argument "p" must be positive.')
}
}
if(p == 0 && b > 0 && a > 0){
stop('If "p" equals to 0, "b" or "a" must be
0 otherwise there is identifiability problem.')
}
if(b == 0 && a == 0){
stop('The distribution is not defined for "a"
and "b" equal to 0 simultaneously.')
}
z <- sum(log((a+b*(1+p)*abs((x-location))^p ) * exp(-((x-location)*(a+b*abs((x-location))^p))))) -
sum(2*log(exp(-((x-location)*(a+b*abs((x-location))^p))) + 1))
if(!is.finite(z)){
z <- 1e+20
}
return(-z)
}
I made it's likelihood function and worked flawessly this way:
opt <- function(parameters, data){
optim(par = parameters, fn = genlogis.loglikelihood, x=data,
lower = c(0.00001,0.00001,0.00001, -Inf),
upper = c(Inf,Inf,Inf,Inf), method = 'L-BFGS-B')
}
opt(c(0.3, 1.01, 2.11, 3.5), faithful$eruptions)
Since this function does the gradient numerically I had not much problem.
Then I wanted to change to constrOptim() because the boundaries are actually 0 and not a small number on the first 3 parameters. But, the problem I face is that the argument grad has to be specified and I can't derive that function to give a gradient function, so I have to do it numerically as in optim(), it works if I put grad = NULL but I don't want Nelder-Mead method but BFGS.
I've tried this way but not of much sucess:
opt2 <- function(initial, data){
ui <- rbind(c(1, 0, 0, 0), c(0,1,0,0), c(0,0,1,0))
ci <- c(0,0,0)
constrOptim(theta = initial, f = genlogis.loglikelihood(param, x),
grad = numDeriv::grad(func = function(x, param) genlogis.loglikelihood(param, x), param = theta, x = data)
, x = data, ui = ui, ci = ci)
}

Your notation is a bit complicated, maybe that confused you.
opt2 <- function(parameters, data){
fn = function(p) genlogis.loglikelihood(p, x = data)
gr = function(p) numDeriv::grad(fn, p)
ui <- rbind(c(1, 0, 0, 0), c(0,1,0,0), c(0,0,1,0))
ci <- c(0,0,0)
constrOptim(theta = parameters, f = fn, grad = gr,
ui = ui, ci = ci, method="BFGS")
}
opt2(c(0.3, 1.01, 2.11, 3.5), faithful$eruptions)

Rpart equivalent to LM's se.fit

I have 2 regression models in R:
LM model in which I use the se.fit=true as follows:
predict(my_model, newdata=data, se.fit=T)
Recursive Portioning Tree (using rpart package)
Unfortunately I don't have the se.fit option in rpart, and I'd like to calculate these values manually.
I do understand what the standard error means for a group of estimations (basically sum of mean squares) but what does it mean per each estimate separately as generated by se.fit?
How can I do that? Thanks!

After digging into that, I found out LM's se.fit is calculated in a little odd way. here is the implementation:
function (object, newdata, se.fit = FALSE, scale = NULL, df = Inf,
interval = c("none", "confidence", "prediction"), level = 0.95,
type = c("response", "terms"), terms = NULL, na.action = na.pass,
pred.var = res.var/weights, weights = 1, ...)
{
tt <- terms(object)
if (!inherits(object, "lm"))
warning("calling predict.lm(<fake-lm-object>) ...")
if (missing(newdata) || is.null(newdata)) {
mm <- X <- model.matrix(object)
mmDone <- TRUE
offset <- object$offset
}
else {
Terms <- delete.response(tt)
m <- model.frame(Terms, newdata, na.action = na.action,
xlev = object$xlevels)
if (!is.null(cl <- attr(Terms, "dataClasses")))
.checkMFClasses(cl, m)
X <- model.matrix(Terms, m, contrasts.arg = object$contrasts)
offset <- rep(0, nrow(X))
if (!is.null(off.num <- attr(tt, "offset")))
for (i in off.num) offset <- offset + eval(attr(tt,
"variables")[[i + 1]], newdata)
if (!is.null(object$call$offset))
offset <- offset + eval(object$call$offset, newdata)
mmDone <- FALSE
}
n <- length(object$residuals)
p <- object$rank
p1 <- seq_len(p)
piv <- if (p)
qr.lm(object)$pivot[p1]
if (p < ncol(X) && !(missing(newdata) || is.null(newdata)))
warning("prediction from a rank-deficient fit may be misleading")
beta <- object$coefficients
predictor <- drop(X[, piv, drop = FALSE] %*% beta[piv])
if (!is.null(offset))
predictor <- predictor + offset
interval <- match.arg(interval)
if (interval == "prediction") {
if (missing(newdata))
warning("predictions on current data refer to _future_ responses\n")
if (missing(newdata) && missing(weights)) {
w <- weights.default(object)
if (!is.null(w)) {
weights <- w
warning("assuming prediction variance inversely proportional to weights used for fitting\n")
}
}
if (!missing(newdata) && missing(weights) && !is.null(object$weights) &&
missing(pred.var))
warning("Assuming constant prediction variance even though model fit is weighted\n")
if (inherits(weights, "formula")) {
if (length(weights) != 2L)
stop("'weights' as formula should be one-sided")
d <- if (missing(newdata) || is.null(newdata))
model.frame(object)
else newdata
weights <- eval(weights[[2L]], d, environment(weights))
}
}
type <- match.arg(type)
if (se.fit || interval != "none") {
w <- object$weights
res.var <- if (is.null(scale)) {
r <- object$residuals
rss <- sum(if (is.null(w)) r^2 else r^2 * w)
df <- object$df.residual
rss/df
}
else scale^2
if (type != "terms") {
if (p > 0) {
XRinv <- if (missing(newdata) && is.null(w))
qr.Q(qr.lm(object))[, p1, drop = FALSE]
else X[, piv] %*% qr.solve(qr.R(qr.lm(object))[p1,
p1])
ip <- drop(XRinv^2 %*% rep(res.var, p))
}
else ip <- rep(0, n)
}
}
if (type == "terms") {
if (!mmDone) {
mm <- model.matrix(object)
mmDone <- TRUE
}
aa <- attr(mm, "assign")
ll <- attr(tt, "term.labels")
hasintercept <- attr(tt, "intercept") > 0L
if (hasintercept)
ll <- c("(Intercept)", ll)
aaa <- factor(aa, labels = ll)
asgn <- split(order(aa), aaa)
if (hasintercept) {
asgn$"(Intercept)" <- NULL
if (!mmDone) {
mm <- model.matrix(object)
mmDone <- TRUE
}
avx <- colMeans(mm)
termsconst <- sum(avx[piv] * beta[piv])
}
nterms <- length(asgn)
if (nterms > 0) {
predictor <- matrix(ncol = nterms, nrow = NROW(X))
dimnames(predictor) <- list(rownames(X), names(asgn))
if (se.fit || interval != "none") {
ip <- matrix(ncol = nterms, nrow = NROW(X))
dimnames(ip) <- list(rownames(X), names(asgn))
Rinv <- qr.solve(qr.R(qr.lm(object))[p1, p1])
}
if (hasintercept)
X <- sweep(X, 2L, avx, check.margin = FALSE)
unpiv <- rep.int(0L, NCOL(X))
unpiv[piv] <- p1
for (i in seq.int(1L, nterms, length.out = nterms)) {
iipiv <- asgn[[i]]
ii <- unpiv[iipiv]
iipiv[ii == 0L] <- 0L
predictor[, i] <- if (any(iipiv > 0L))
X[, iipiv, drop = FALSE] %*% beta[iipiv]
else 0
if (se.fit || interval != "none")
ip[, i] <- if (any(iipiv > 0L))
as.matrix(X[, iipiv, drop = FALSE] %*% Rinv[ii,
, drop = FALSE])^2 %*% rep.int(res.var,
p)
else 0
}
if (!is.null(terms)) {
predictor <- predictor[, terms, drop = FALSE]
if (se.fit)
ip <- ip[, terms, drop = FALSE]
}
}
else {
predictor <- ip <- matrix(0, n, 0L)
}
attr(predictor, "constant") <- if (hasintercept)
termsconst
else 0
}
if (interval != "none") {
tfrac <- qt((1 - level)/2, df)
hwid <- tfrac * switch(interval, confidence = sqrt(ip),
prediction = sqrt(ip + pred.var))
if (type != "terms") {
predictor <- cbind(predictor, predictor + hwid %o%
c(1, -1))
colnames(predictor) <- c("fit", "lwr", "upr")
}
else {
if (!is.null(terms))
hwid <- hwid[, terms, drop = FALSE]
lwr <- predictor + hwid
upr <- predictor - hwid
}
}
if (se.fit || interval != "none") {
se <- sqrt(ip)
if (type == "terms" && !is.null(terms) && !se.fit)
se <- se[, terms, drop = FALSE]
}
if (missing(newdata) && !is.null(na.act <- object$na.action)) {
predictor <- napredict(na.act, predictor)
if (se.fit)
se <- napredict(na.act, se)
}
if (type == "terms" && interval != "none") {
if (missing(newdata) && !is.null(na.act)) {
lwr <- napredict(na.act, lwr)
upr <- napredict(na.act, upr)
}
list(fit = predictor, se.fit = se, lwr = lwr, upr = upr,
df = df, residual.scale = sqrt(res.var))
}
else if (se.fit)
list(fit = predictor, se.fit = se, df = df, residual.scale = sqrt(res.var))
else predictor
}

Is there a way to see the formula that R uses for the survfit confidence intervals? [duplicate]

This question already has answers here:
How can I view the source code for a function?
(13 answers)
Closed 7 years ago.
I want to be able to see how the summary of survfit calculates its confidence intervals. Is there a way that I can ask R to show me how it calculated these to show me the formula?
Thanks :)

You can find the source code like this. First look at survfit:
> getAnywhere("survfit")
A single object matching ‘survfit’ was found
It was found in the following places
package:survival
namespace:survival
with value
function (formula, ...)
{
UseMethod("survfit", formula)
}
<bytecode: 0x000000000edccc88>
<environment: namespace:survival>
>
This tells us we have to look at survfit.formula, which is what that UseMethod call is saying. So we do that and we get a lot of code:
> getAnywhere("survfit.formula")
A single object matching ‘survfit.formula’ was found
It was found in the following places
package:survival
registered S3 method for survfit from namespace survival
namespace:survival
with value
function (formula, data, weights, subset, na.action, etype, id,
istate, ...)
{
Call <- match.call()
Call[[1]] <- as.name("survfit")
mfnames <- c("formula", "data", "weights", "subset", "na.action",
"istate", "id", "etype")
temp <- Call[c(1, match(mfnames, names(Call), nomatch = 0))]
temp[[1]] <- as.name("model.frame")
if (is.R())
m <- eval.parent(temp)
else m <- eval(temp, sys.parent())
Terms <- terms(formula, c("strata", "cluster"))
ord <- attr(Terms, "order")
if (length(ord) & any(ord != 1))
stop("Interaction terms are not valid for this function")
n <- nrow(m)
Y <- model.extract(m, "response")
if (!is.Surv(Y))
stop("Response must be a survival object")
casewt <- model.extract(m, "weights")
if (is.null(casewt))
casewt <- rep(1, n)
if (!is.null(attr(Terms, "offset")))
warning("Offset term ignored")
id <- model.extract(m, "id")
istate <- model.extract(m, "istate")
temp <- untangle.specials(Terms, "cluster")
if (length(temp$vars) > 0) {
if (length(temp$vars) > 1)
stop("can not have two cluster terms")
if (!is.null(id))
stop("can not have both a cluster term and an id variable")
id <- m[[temp$vars]]
Terms <- Terms[-temp$terms]
}
ll <- attr(Terms, "term.labels")
if (length(ll) == 0)
X <- factor(rep(1, n))
else X <- strata(m[ll])
if (!is.Surv(Y))
stop("y must be a Surv object")
etype <- model.extract(m, "etype")
if (!is.null(etype)) {
if (attr(Y, "type") == "mcounting" || attr(Y, "type") ==
"mright")
stop("cannot use both the etype argument and mstate survival type")
if (length(istate))
stop("cannot use both the etype and istate arguments")
status <- Y[, ncol(Y)]
etype <- as.factor(etype)
temp <- table(etype, status == 0)
if (all(rowSums(temp == 0) == 1)) {
newlev <- levels(etype)[order(-temp[, 2])]
}
else newlev <- c(" ", levels(etype)[temp[, 1] > 0])
status <- factor(ifelse(status == 0, 0, as.numeric(etype)),
labels = newlev)
if (attr(Y, "type") == "right")
Y <- Surv(Y[, 1], status, type = "mstate")
else if (attr(Y, "type") == "counting")
Y <- Surv(Y[, 1], Y[, 2], status, type = "mstate")
else stop("etype argument incompatable with survival type")
}
if (attr(Y, "type") == "left" || attr(Y, "type") == "interval")
temp <- survfitTurnbull(X, Y, casewt, ...)
else if (attr(Y, "type") == "right" || attr(Y, "type") ==
"counting")
temp <- survfitKM(X, Y, casewt, ...)
else if (attr(Y, "type") == "mright" || attr(Y, "type") ==
"mcounting")
temp <- survfitCI(X, Y, weights = casewt, id = id, istate = istate,
...)
else {
stop("unrecognized survival type")
}
if (is.null(temp$states))
class(temp) <- "survfit"
else class(temp) <- c("survfitms", "survfit")
if (!is.null(attr(m, "na.action")))
temp$na.action <- attr(m, "na.action")
temp$call <- Call
temp
}
<bytecode: 0x000000003f6a8c28>
<environment: namespace:survival>
We scan this and eventually notice a call to survfitCI close to the end. Sounds like what we are looking for. So once again into the breech:
> getAnywhere("survfitCI")
A single object matching ‘survfitCI’ was found
It was found in the following places
package:survival
namespace:survival
with value
function (X, Y, weights, id, istate, type = c("kaplan-meier",
"fleming-harrington", "fh2"), se.fit = TRUE, conf.int = 0.95,
conf.type = c("log", "log-log", "plain", "none"), conf.lower = c("usual",
"peto", "modified"))
{
method <- match.arg(type)
conf.type <- match.arg(conf.type)
conf.lower <- match.arg(conf.lower)
if (is.logical(conf.int)) {
if (!conf.int)
conf.type <- "none"
conf.int <- 0.95
}
type <- attr(Y, "type")
if (type != "mright" && type != "mcounting" && type != "right" &&
type != "counting")
stop(paste("Cumulative incidence computation doesn't support \"",
type, "\" survival data", sep = ""))
n <- nrow(Y)
status <- Y[, ncol(Y)]
ncurve <- length(levels(X))
state.names <- attr(Y, "states")
if (missing(istate) || is.null(istate))
istate <- rep(0L, n)
else if (is.factor(istate) || is.character(istate)) {
temp <- as.factor(istate)
appear <- (levels(istate))[unique(as.numeric(istate))]
state.names <- unique(c(attr(Y, "states"), appear))
istate <- as.numeric(factor(as.character(istate), levels = state.names))
}
else if (!is.numeric(istate) || any(istate != floor(istate)))
stop("istate should be a vector of integers or a factor")
if (length(id) == 0)
id <- 1:n
if (length(istate) == 1)
istate <- rep(istate, n)
if (length(istate) != n)
stop("wrong length for istate")
states <- sort(unique(c(istate, 1:length(attr(Y, "states")))))
docurve2 <- function(entry, etime, status, istate, wt, states,
id, se.fit) {
ftime <- factor(c(entry, etime))
ltime <- levels(ftime)
ftime <- matrix(as.integer(ftime), ncol = 2)
timeset <- as.numeric(ltime[sort(unique(ftime[, 2]))])
nstate <- length(states)
uid <- sort(unique(id))
P <- as.vector(tapply(wt, factor(istate, levels = states),
sum)/sum(wt))
P <- ifelse(is.na(P), 0, P)
cstate <- istate[match(uid, id)]
storage.mode(wt) <- "double"
storage.mode(cstate) <- "integer"
storage.mode(status) <- "integer"
fit <- .Call(Csurvfitci, ftime, order(ftime[, 1]) - 1L,
order(ftime[, 2]) - 1L, length(timeset), status,
cstate - 1L, wt, match(id, uid) - 1L, P, as.integer(se.fit))
prev0 <- table(factor(cstate, levels = states), exclude = NA)/length(cstate)
if (se.fit)
list(time = timeset, pmat = t(fit$p), std = sqrt(t(fit$var)),
n.risk = colSums(fit$nrisk), n.event = fit$nevent,
n.censor = fit$ncensor, prev0 = prev0, cumhaz = array(fit$cumhaz,
dim = c(nstate, nstate, length(timeset))))
else list(time = timeset, pmat = t(fit$p), n.risk = colSums(fit$nrisk),
n.event = fit$nevent, n.censor = fit$ncensor, prev0 = prev0,
cumhaz = array(fit$cumhaz, dim = c(nstate, nstate,
length(timeset))))
}
if (any(states == 0)) {
state0 <- TRUE
states <- states + 1
istate <- istate + 1
status <- ifelse(status == 0, 0, status + 1)
}
else state0 <- FALSE
curves <- vector("list", ncurve)
names(curves) <- levels(X)
if (ncol(Y) == 2) {
indx <- which(status == istate & status != 0)
if (length(indx)) {
warning("an observation transitions to it's starting state, transition ignored")
status[indx] <- 0
}
if (length(id) && any(duplicated(id)))
stop("Cannot have duplicate id values with (time, status) data")
entry <- rep(min(-1, 2 * min(Y[, 1]) - 1), n)
for (i in levels(X)) {
indx <- which(X == i)
curves[[i]] <- docurve2(entry[indx], Y[indx, 1],
status[indx], istate[indx], weights[indx], states,
id[indx], se.fit)
}
}
else {
if (missing(id) || is.null(id))
stop("the id argument is required for start:stop data")
indx <- order(id, Y[, 2])
indx1 <- c(NA, indx)
indx2 <- c(indx, NA)
same <- (id[indx1] == id[indx2] & !is.na(indx1) & !is.na(indx2))
if (any(same & X[indx1] != X[indx2])) {
who <- 1 + min(which(same & X[indx1] != X[indx2]))
stop("subject is in two different groups, id ", (id[indx1])[who])
}
if (any(same & Y[indx1, 2] != Y[indx2, 1])) {
who <- 1 + min(which(same & Y[indx1, 2] != Y[indx2,
1]))
stop("gap in follow-up, id ", (id[indx1])[who])
}
if (any(Y[, 1] == Y[, 2]))
stop("cannot have start time == stop time")
if (any(same & Y[indx1, 3] == Y[indx2, 3] & Y[indx1,
3] != 0)) {
who <- 1 + min(which(same & Y[indx1, 1] != Y[indx2,
2]))
warning("subject changes to the same state, id ",
(id[indx1])[who])
}
if (any(same & weights[indx1] != weights[indx2])) {
who <- 1 + min(which(same & weights[indx1] != weights[indx2]))
stop("subject changes case weights, id ", (id[indx1])[who])
}
indx <- order(Y[, 2])
uid <- unique(id)
temp <- (istate[indx])[match(uid, id[indx])]
istate <- temp[match(id, uid)]
for (i in levels(X)) {
indx <- which(X == i)
curves[[i]] <- docurve2(Y[indx, 1], Y[indx, 2], status[indx],
istate[indx], weights[indx], states, id[indx],
se.fit)
}
}
grabit <- function(clist, element) {
temp <- (clist[[1]][[element]])
if (is.matrix(temp)) {
nc <- ncol(temp)
matrix(unlist(lapply(clist, function(x) t(x[[element]]))),
byrow = T, ncol = nc)
}
else {
xx <- as.vector(unlist(lapply(clist, function(x) x[element])))
if (class(temp) == "table")
matrix(xx, byrow = T, ncol = length(temp))
else xx
}
}
kfit <- list(n = as.vector(table(X)), time = grabit(curves,
"time"), n.risk = grabit(curves, "n.risk"), n.event = grabit(curves,
"n.event"), n.censor = grabit(curves, "n.censor"), prev = grabit(curves,
"pmat"), prev0 = grabit(curves, "prev0"))
nstate <- length(states)
kfit$cumhaz <- array(unlist(lapply(curves, function(x) x$cumhaz)),
dim = c(nstate, nstate, length(kfit$time)))
if (length(curves) > 1)
kfit$strata <- unlist(lapply(curves, function(x) length(x$time)))
if (se.fit)
kfit$std.err <- grabit(curves, "std")
if (state0) {
kfit$prev <- kfit$prev[, -1]
if (se.fit)
kfit$std.err <- kfit$std.err[, -1]
kfit$prev0 <- kfit$prev0[, -1]
}
if (se.fit) {
std.err <- kfit$std.err
zval <- qnorm(1 - (1 - conf.int)/2, 0, 1)
surv <- 1 - kfit$prev
if (conf.type == "plain") {
temp <- zval * std.err
kfit <- c(kfit, list(lower = pmax(kfit$prev - temp,
0), upper = pmin(kfit$prev + temp, 1), conf.type = "plain",
conf.int = conf.int))
}
if (conf.type == "log") {
xx <- ifelse(kfit$prev == 1, 1, 1 - kfit$prev)
temp1 <- ifelse(surv == 0, NA, exp(log(xx) + zval *
std.err/xx))
temp2 <- ifelse(surv == 0, NA, exp(log(xx) - zval *
std.err/xx))
kfit <- c(kfit, list(lower = pmax(1 - temp1, 0),
upper = 1 - temp2, conf.type = "log", conf.int = conf.int))
}
if (conf.type == "log-log") {
who <- (surv == 0 | surv == 1)
temp3 <- ifelse(surv == 0, NA, 1)
xx <- ifelse(who, 0.1, kfit$surv)
temp1 <- exp(-exp(log(-log(xx)) + zval * std.err/(xx *
log(xx))))
temp1 <- ifelse(who, temp3, temp1)
temp2 <- exp(-exp(log(-log(xx)) - zval * std.err/(xx *
log(xx))))
temp2 <- ifelse(who, temp3, temp2)
kfit <- c(kfit, list(lower = 1 - temp1, upper = 1 -
temp2, conf.type = "log-log", conf.int = conf.int))
}
}
kfit$states <- state.names
kfit$type <- attr(Y, "type")
kfit
}
<bytecode: 0x000000002ce81838>
<environment: namespace:survival>
Somewhere in there is your answer.

Plotting newton-raphson/fisher scoring iterations in R

Is there a package in R plotting newton-raphson/fisher scoring iterations when fitting a glm modelel (from the stats package)?

I answered a very similar question yesterday. In your case however, things are a little simpler.
Note that when you call glm, it eventually calls glm.fit (or any other method argument you specify to glm) which computes the solution path in the loop from lines 78 to 170. The current iteration's value of the coefficients is computed on line 97 using a .Call to a C function C_Cdqrls. As a hack, you can extract the current value of the coefficients to the global environment (fit$coefficients), within this loop, by modifying the glm.fit function like so:
glm.fit.new = function (x, y, weights = rep(1, nobs), start = NULL, etastart = NULL,
mustart = NULL, offset = rep(0, nobs), family = gaussian(),
control = list(), intercept = TRUE) {
control <- do.call("glm.control", control)
x <- as.matrix(x)
xnames <- dimnames(x)[[2L]]
ynames <- if (is.matrix(y))
rownames(y)
else names(y)
conv <- FALSE
nobs <- NROW(y)
nvars <- ncol(x)
EMPTY <- nvars == 0
if (is.null(weights))
weights <- rep.int(1, nobs)
if (is.null(offset))
offset <- rep.int(0, nobs)
variance <- family$variance
linkinv <- family$linkinv
if (!is.function(variance) || !is.function(linkinv))
stop("'family' argument seems not to be a valid family object",
call. = FALSE)
dev.resids <- family$dev.resids
aic <- family$aic
mu.eta <- family$mu.eta
unless.null <- function(x, if.null) if (is.null(x))
if.null
else x
valideta <- unless.null(family$valideta, function(eta) TRUE)
validmu <- unless.null(family$validmu, function(mu) TRUE)
if (is.null(mustart)) {
eval(family$initialize)
}
else {
mukeep <- mustart
eval(family$initialize)
mustart <- mukeep
}
if (EMPTY) {
eta <- rep.int(0, nobs) + offset
if (!valideta(eta))
stop("invalid linear predictor values in empty model",
call. = FALSE)
mu <- linkinv(eta)
if (!validmu(mu))
stop("invalid fitted means in empty model", call. = FALSE)
dev <- sum(dev.resids(y, mu, weights))
w <- ((weights * mu.eta(eta)^2)/variance(mu))^0.5
residuals <- (y - mu)/mu.eta(eta)
good <- rep_len(TRUE, length(residuals))
boundary <- conv <- TRUE
coef <- numeric()
iter <- 0L
}
else {
coefold <- NULL
eta <- if (!is.null(etastart))
etastart
else if (!is.null(start))
if (length(start) != nvars)
stop(gettextf("length of 'start' should equal %d and correspond to initial coefs for %s",
nvars, paste(deparse(xnames), collapse = ", ")),
domain = NA)
else {
coefold <- start
offset + as.vector(if (NCOL(x) == 1L)
x * start
else x %*% start)
}
else family$linkfun(mustart)
mu <- linkinv(eta)
if (!(validmu(mu) && valideta(eta)))
stop("cannot find valid starting values: please specify some",
call. = FALSE)
devold <- sum(dev.resids(y, mu, weights))
boundary <- conv <- FALSE
# EDIT: counter to create track of iterations
i <<- 1
for (iter in 1L:control$maxit) {
good <- weights > 0
varmu <- variance(mu)[good]
if (anyNA(varmu))
stop("NAs in V(mu)")
if (any(varmu == 0))
stop("0s in V(mu)")
mu.eta.val <- mu.eta(eta)
if (any(is.na(mu.eta.val[good])))
stop("NAs in d(mu)/d(eta)")
good <- (weights > 0) & (mu.eta.val != 0)
if (all(!good)) {
conv <- FALSE
warning(gettextf("no observations informative at iteration %d",
iter), domain = NA)
break
}
z <- (eta - offset)[good] + (y - mu)[good]/mu.eta.val[good]
w <- sqrt((weights[good] * mu.eta.val[good]^2)/variance(mu)[good])
fit <- .Call(stats:::C_Cdqrls, x[good, , drop = FALSE] *
w, z * w, min(1e-07, control$epsilon/1000), check = FALSE)
#======================================================
# EDIT: assign the coefficients to variables in the global namespace
#======================================================
assign(paste0("iteration_x_", i), fit$coefficients,
envir = .GlobalEnv)
i <<- i + 1 # increase the counter
if (any(!is.finite(fit$coefficients))) {
conv <- FALSE
warning(gettextf("non-finite coefficients at iteration %d",
iter), domain = NA)
break
}
if (nobs < fit$rank)
stop(sprintf(ngettext(nobs, "X matrix has rank %d, but only %d observation",
"X matrix has rank %d, but only %d observations"),
fit$rank, nobs), domain = NA)
start[fit$pivot] <- fit$coefficients
eta <- drop(x %*% start)
mu <- linkinv(eta <- eta + offset)
dev <- sum(dev.resids(y, mu, weights))
if (control$trace)
cat("Deviance = ", dev, " Iterations - ", iter,
"\n", sep = "")
boundary <- FALSE
if (!is.finite(dev)) {
if (is.null(coefold))
stop("no valid set of coefficients has been found: please supply starting values",
call. = FALSE)
warning("step size truncated due to divergence",
call. = FALSE)
ii <- 1
while (!is.finite(dev)) {
if (ii > control$maxit)
stop("inner loop 1; cannot correct step size",
call. = FALSE)
ii <- ii + 1
start <- (start + coefold)/2
eta <- drop(x %*% start)
mu <- linkinv(eta <- eta + offset)
dev <- sum(dev.resids(y, mu, weights))
}
boundary <- TRUE
if (control$trace)
cat("Step halved: new deviance = ", dev, "\n",
sep = "")
}
if (!(valideta(eta) && validmu(mu))) {
if (is.null(coefold))
stop("no valid set of coefficients has been found: please supply starting values",
call. = FALSE)
warning("step size truncated: out of bounds",
call. = FALSE)
ii <- 1
while (!(valideta(eta) && validmu(mu))) {
if (ii > control$maxit)
stop("inner loop 2; cannot correct step size",
call. = FALSE)
ii <- ii + 1
start <- (start + coefold)/2
eta <- drop(x %*% start)
mu <- linkinv(eta <- eta + offset)
}
boundary <- TRUE
dev <- sum(dev.resids(y, mu, weights))
if (control$trace)
cat("Step halved: new deviance = ", dev, "\n",
sep = "")
}
if (abs(dev - devold)/(0.1 + abs(dev)) < control$epsilon) {
conv <- TRUE
coef <- start
break
}
else {
devold <- dev
coef <- coefold <- start
}
}
if (!conv)
warning("glm.fit: algorithm did not converge", call. = FALSE)
if (boundary)
warning("glm.fit: algorithm stopped at boundary value",
call. = FALSE)
eps <- 10 * .Machine$double.eps
if (family$family == "binomial") {
if (any(mu > 1 - eps) || any(mu < eps))
warning("glm.fit: fitted probabilities numerically 0 or 1 occurred",
call. = FALSE)
}
if (family$family == "poisson") {
if (any(mu < eps))
warning("glm.fit: fitted rates numerically 0 occurred",
call. = FALSE)
}
if (fit$rank < nvars)
coef[fit$pivot][seq.int(fit$rank + 1, nvars)] <- NA
xxnames <- xnames[fit$pivot]
residuals <- (y - mu)/mu.eta(eta)
fit$qr <- as.matrix(fit$qr)
nr <- min(sum(good), nvars)
if (nr < nvars) {
Rmat <- diag(nvars)
Rmat[1L:nr, 1L:nvars] <- fit$qr[1L:nr, 1L:nvars]
}
else Rmat <- fit$qr[1L:nvars, 1L:nvars]
Rmat <- as.matrix(Rmat)
Rmat[row(Rmat) > col(Rmat)] <- 0
names(coef) <- xnames
colnames(fit$qr) <- xxnames
dimnames(Rmat) <- list(xxnames, xxnames)
}
names(residuals) <- ynames
names(mu) <- ynames
names(eta) <- ynames
wt <- rep.int(0, nobs)
wt[good] <- w^2
names(wt) <- ynames
names(weights) <- ynames
names(y) <- ynames
if (!EMPTY)
names(fit$effects) <- c(xxnames[seq_len(fit$rank)], rep.int("",
sum(good) - fit$rank))
wtdmu <- if (intercept)
sum(weights * y)/sum(weights)
else linkinv(offset)
nulldev <- sum(dev.resids(y, wtdmu, weights))
n.ok <- nobs - sum(weights == 0)
nulldf <- n.ok - as.integer(intercept)
rank <- if (EMPTY)
0
else fit$rank
resdf <- n.ok - rank
aic.model <- aic(y, n, mu, weights, dev) + 2 * rank
list(coefficients = coef, residuals = residuals, fitted.values = mu,
effects = if (!EMPTY) fit$effects, R = if (!EMPTY) Rmat,
rank = rank, qr = if (!EMPTY) structure(fit[c("qr", "rank",
"qraux", "pivot", "tol")], class = "qr"), family = family,
linear.predictors = eta, deviance = dev, aic = aic.model,
null.deviance = nulldev, iter = iter, weights = wt, prior.weights = weights,
df.residual = resdf, df.null = nulldf, y = y, converged = conv,
boundary = boundary)
}
Note that this is a hack for a couple of reasons:
1. The function C_Cdrqls is not exported by the package stats, and so we have to look for it within namespace:package:stats.
2. This pollutes your global environment with the iteration values via a side-effect of the call to glm.fit.new, creating one vector per iteration. Side-effects are generally frowned upon in functional languages like R. You can probably clean the multiple objects bit up by creating a matrix or a data.frame and assign within that.
However, once you have the iteration values extracted, you can do whatever you want with them, including plotting them.
Here is what a call to glm with the newly defined glm.fit.new method would look like:
counts = c(18,17,15,20,10,20,25,13,12)
outcome = gl(3,1,9)
treatment = gl(3,3)
print(d.AD = data.frame(treatment, outcome, counts))
glm.D93 = glm(counts ~ outcome + treatment, family = poisson(),
control = list(trace = TRUE, epsilon = 1e-16), method = "glm.fit.new")
You can check that the iteration parameter values have indeed been populated in the global environment:
> ls(pattern = "iteration_x_")
[1] "iteration_x_1" "iteration_x_10" "iteration_x_11" "iteration_x_2"
[5] "iteration_x_3" "iteration_x_4" "iteration_x_5" "iteration_x_6"
[9] "iteration_x_7" "iteration_x_8" "iteration_x_9"

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

R - exclamation mark before variable, but no subsequent =, == or similar - r

Related

My variogram code result different from variog() result

How to insert a numeric gradient inside constrOptim

Rpart equivalent to LM's se.fit

Is there a way to see the formula that R uses for the survfit confidence intervals? [duplicate]

Plotting newton-raphson/fisher scoring iterations in R

Categories

Resources