I'm using speedglm to estimate a logistic regression model on some data. I've created a reproducible example which generates the same error that I get using my original data.
library(speedglm)
n <- 10000
dtf <- data.frame( y = sample(c(0,1), n, 1),
x1 = as.factor(sample(c("a","b"), n, 1)),
x2 = rnorm(n, 30, 10))
m <- speedglm(y ~ x1 + x2, dtf, family=binomial())
summary(m)
The output is the following:
Generalized Linear Model of class 'speedglm':
Call: speedglm(formula = y ~ x1 + x2, data = dtf, family = binomial())
Coefficients:
------------------------------------------------------------------
Error in data.frame(..., check.names = FALSE) :
arguments imply differing number of rows: 3, 0
I've checked the source code of summary.speedglm by executing getS3method("summary", "speedglm") and found the code line which generates the error, but it didn't help to solve the problem.
PS: someone with 1500+ rep should create the speedglm tag.
UPDATE
Marco Enea, the maintainer of speedglm, asked to post the following temporary fix for summary.speedglm and print.summary.speedglm.
summary.speedglm <- function (object, correlation = FALSE, ...)
{
if (!inherits(object, "speedglm"))
stop("object is not of class speedglm")
z <- object
var_res <- as.numeric(z$RSS/z$df)
dispersion <- if (z$family$family %in% c("poisson", "binomial")) 1 else var_res
if (z$method == "qr") {
z$XTX <- z$XTX[z$ok, z$ok]
}
inv <- solve(z$XTX, tol = z$tol.solve)
covmat <- diag(inv)
se_coef <- rep(NA, length(z$coefficients))
se_coef[z$ok] <- sqrt(dispersion * covmat)
if (z$family$family %in% c("binomial", "poisson")) {
z1 <- z$coefficients/se_coef
p <- 2 * pnorm(abs(z1), lower.tail = FALSE)
} else {
t1 <- z$coefficients/se_coef
p <- 2 * pt(abs(t1), df = z$df, lower.tail = FALSE)
}
ip <- !is.na(p)
p[ip] <- as.numeric(format(p[ip], digits = 3))
dn <- c("Estimate", "Std. Error")
if (z$family$family %in% c("binomial", "poisson")) {
format.coef <- if (any(na.omit(abs(z$coef)) < 1e-04))
format(z$coefficients, scientific = TRUE, digits = 4) else
round(z$coefficients, digits = 7)
format.se <- if (any(na.omit(se_coef) < 1e-04))
format(se_coef, scientific = TRUE, digits = 4) else round(se_coef, digits = 7)
format.pv <- if (any(na.omit(p) < 1e-04))
format(p, scientific = TRUE, digits = 4) else round(p, digits = 4)
param <- data.frame(format.coef, format.se, round(z1,
digits = 4), format.pv)
dimnames(param) <- list(names(z$coefficients), c(dn,
"z value", "Pr(>|z|)"))
} else {
format.coef <- if (any(abs(na.omit(z$coefficients)) <
1e-04))
format(z$coefficients, scientific = TRUE, digits = 4) else
round(z$coefficients, digits = 7)
format.se <- if (any(na.omit(se_coef) < 1e-04))
format(se_coef, scientific = TRUE, digits = 4) else
round(se_coef, digits = 7)
format.pv <- if (any(na.omit(p) < 1e-04))
format(p, scientific = TRUE, digits = 4) else round(p, digits = 4)
param <- data.frame(format.coef, format.se, round(t1,
digits = 4), format.pv)
dimnames(param) <- list(names(z$coefficients), c(dn,
"t value", "Pr(>|t|)"))
}
eps <- 10 * .Machine$double.eps
if (z$family$family == "binomial") {
if (any(z$mu > 1 - eps) || any(z$mu < eps))
warning("fitted probabilities numerically 0 or 1 occurred")
}
if (z$family$family == "poisson") {
if (any(z$mu < eps))
warning("fitted rates numerically 0 occurred")
}
keep <- match(c("call", "terms", "family", "deviance", "aic",
"df", "nulldev", "nulldf", "iter", "tol", "n", "convergence",
"ngoodobs", "logLik", "RSS", "rank"), names(object),
0)
ans <- c(object[keep], list(coefficients = param, dispersion = dispersion,
correlation = correlation, cov.unscaled = inv, cov.scaled = inv *
var_res))
if (correlation) {
ans$correl <- (inv * var_res)/outer(na.omit(se_coef),
na.omit(se_coef))
}
class(ans) <- "summary.speedglm"
return(ans)
}
print.summary.speedglm <- function (x, digits = max(3, getOption("digits") - 3), ...)
{
cat("Generalized Linear Model of class 'speedglm':\n")
if (!is.null(x$call))
cat("\nCall: ", deparse(x$call), "\n\n")
if (length(x$coef)) {
cat("Coefficients:\n")
cat(" ------------------------------------------------------------------",
"\n")
sig <- function(z){
if (!is.na(z)){
if (z < 0.001)
"***"
else if (z < 0.01)
"** "
else if (z < 0.05)
"* "
else if (z < 0.1)
". "
else " "
} else " "
}
options(warn=-1)
sig.1 <- sapply(as.numeric(as.character(x$coefficients[,4])),
sig)
options(warn=0)
est.1 <- cbind(format(x$coefficients, digits = digits),
sig.1)
colnames(est.1)[ncol(est.1)] <- ""
print(est.1)
cat("\n")
cat("-------------------------------------------------------------------",
"\n")
cat("Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1",
"\n")
cat("\n")
}
else cat("No coefficients\n")
cat("---\n")
cat("null df: ", x$nulldf, "; null deviance: ", round(x$nulldev,
digits = 2), ";\n", "residuals df: ", x$df, "; residuals deviance: ",
round(x$deviance, digits = 2), ";\n", "# obs.: ", x$n,
"; # non-zero weighted obs.: ", x$ngoodobs, ";\n", "AIC: ",
x$aic, "; log Likelihood: ", x$logLik, ";\n", "RSS: ",
round(x$RSS, digits = 1), "; dispersion: ", x$dispersion,
"; iterations: ", x$iter, ";\n", "rank: ", round(x$rank,
digits = 1), "; max tolerance: ", format(x$tol, scientific = TRUE,
digits = 3), "; convergence: ", x$convergence, ".\n",
sep = "")
invisible(x)
if (x$correlation) {
cat("---\n")
cat("Correlation of Coefficients:\n")
x$correl[upper.tri(x$correl, diag = TRUE)] <- NA
print(x$correl[-1, -nrow(x$correl)], na.print = "", digits = 2)
}
}
Following 42' suggestion, I would also add the following:
environment(summary.speedglm) <- environment(speedglm)
environment(print.summary.speedglm) <- environment(speedglm)
The print.summary.speedglm function has a tiny bug in it. If you change this line:
sig.1 <- cbind(sapply(as.numeric(as.character(x$coefficients$"Pr(>|t|)")), sig))
To this line:
sig.1 <- cbind(sapply(as.numeric(as.character(x$coefficients$"Pr(>|z|)")), sig))
And also run:
environment(print.summary.speedglm) <- environment(speedglm)
You will not see the error message anymore.
The proper way to report bugs is to contact the maintainer (I'll send him an email):
maintainer('speedglm')
[1] "Marco Enea <emarco76#libero.it>"
It appears that this is a bug; in speedglm:::print.summary.speedglm there is the line:
sig.1 <- sapply(as.numeric(as.character(x$coefficients$"Pr(>|t|)")),
sig)
but when you look at the object, you can see:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.0546397 0.0655713 -0.8333 0.405
x1b -0.0618225 0.0400126 -1.5451 0.122
x2 0.0020771 0.0019815 1.0483 0.295
which has a Pr(>|z|) instead of Pr(>|t|), so the sig stars fail.
Related
I am doing a GAMLSS model, this linear model could do iterations automatically until it could get a best combinations of explanatory variables. After I put some explanatory variables in the model, it was still good in iteration process at first several rounds, then I got a Error like this.
Model with term Spr_Tmean has failed
Model with term Spr_Psum has failed
Model with term Spr_sdmean has failed
Model with term Spr_Wsum has failed
Model with term Sum_Tmean has failed
Model with term Sum_Psum has failed
Model with term Sum_sdmean has failed
Model with term Sum_Wsum has failed
Error in ans[, 1] : incorrect number of dimensions
I also checked some questions related to Error in xxx[,1]: incorrect number of dimensions, but i think this is not what i want.
I also list the source function in here, you could search "ans[, 1]" to locate the problem. What "ans[, 1]" means in here? I am not professional to check this function, so any answer about the reason caused this Error, and how to solve this problem would be welcome. Thank you in advance.
> stepGAICAll.B
function (object, scope, direction = c("both", "backward",
"forward"), trace = T, keep = NULL, steps = 1000, scale = 0,
k = 2, parallel = c("no", "multicore", "snow"),
ncpus = 1L, cl = NULL, ...)
{
mydeviance <- function(x, ...) {
dev <- deviance(x)
if (!is.null(dev))
dev
else extractAIC(x, k = 0)[2]
}
cut.string <- function(string) {
if (length(string) > 1)
string[-1] <- paste("\n", string[-1], sep = "")
string
}
re.arrange <- function(keep) {
namr <- names(k1 <- keep[[1]])
namc <- names(keep)
nc <- length(keep)
nr <- length(k1)
array(unlist(keep, recursive = FALSE), c(nr, nc), list(namr,
namc))
}
step.results <- function(models, fit, object, usingCp = FALSE) {
change <- sapply(models, "[[", "change")
rd <- sapply(models, "[[", "deviance")
dd <- c(NA, abs(diff(rd)))
rdf <- sapply(models, "[[", "df.resid")
ddf <- c(NA, abs(diff(rdf)))
AIC <- sapply(models, "[[", "AIC")
heading <- c("Stepwise Model Path \nAnalysis of Deviance Table",
"\nInitial Model:", deparse(as.vector(formula(object))),
"\nFinal Model:", deparse(as.vector(formula(fit))),
"\n")
aod <- if (usingCp)
data.frame(Step = change, Df = ddf, Deviance = dd,
`Resid. Df` = rdf, `Resid. Dev` = rd,
Cp = AIC, check.names = FALSE)
else data.frame(Step = change, Df = ddf, Deviance = dd,
`Resid. Df` = rdf, `Resid. Dev` = rd,
AIC = AIC, check.names = FALSE)
attr(aod, "heading") <- heading
class(aod) <- c("Anova", "data.frame")
fit$anova <- aod
fit
}
droptermAllP <- function(object, scope, test = c("Chisq",
"none"), k = 2, sorted = FALSE, trace = FALSE,
parallel = c("no", "multicore", "snow"),
ncpus = 1L, cl = NULL, ...) {
drop1.scope <- function(terms1, terms2) {
terms1 <- terms(terms1, "mu")
f2 <- if (missing(terms2))
numeric(0)
else attr(terms(terms2, "mu"), "factor")
factor.scope(attr(terms1, "factor"), list(drop = f2))$drop
}
safe_pchisq <- function(q, df, ...) {
df[df <= 0] <- NA
pchisq(q = q, df = df, ...)
}
tl <- attr(terms(object, "mu"), "term.labels")
if (missing(scope)) {
scope <- drop1.scope(object)
}
else {
if (!is.character(scope))
scope <- attr(terms(update.formula(formula(object,
"mu"), scope), "mu"), "term.labels")
if (!all(match(scope, tl, FALSE)))
stop("scope is not a subset of term labels")
}
ns <- length(scope)
ans <- matrix(nrow = ns + 1, ncol = 2, dimnames = list(c("<none>",
scope), c("df", "AIC")))
ans[1, ] <- extractAIC(object, scale, k = k, ...)
fn <- function(term) {
if (trace)
cat("trying -", term, "\n")
nfit <- update(object, as.formula(paste("~ . -",
term)), what = "All", evaluate = FALSE,
trace = FALSE)
nfit <- try(eval.parent(nfit), silent = TRUE)
if (any(class(nfit) %in% "try-error")) {
cat("Model with term ", term, "has failed \n")
NA
}
else extractAIC(nfit, scale, k = k, ...)
}
ans[-1, ] <- if (ncpus > 1L && (have_mc || have_snow)) {
if (have_mc) {
matrix(unlist(parallel::mclapply(scope, fn, mc.cores = ncpus)),
ncol = 2, byrow = T)
}
else if (have_snow) {
list(...)
if (is.null(cl)) {
res <- t(parallel::parSapply(cl, scope, fn))
res
}
else t(parallel::parSapply(cl, scope, fn))
}
}
else t(sapply(scope, fn))
dfs <- ans[1, 1] - ans[, 1]
dfs[1] <- NA
aod <- data.frame(Df = dfs, AIC = ans[, 2])
o <- if (sorted)
order(aod$AIC)
else seq(along = aod$AIC)
test <- match.arg(test)
if (test == "Chisq") {
dev <- ans[, 2] - k * ans[, 1]
dev <- dev - dev[1]
dev[1] <- NA
nas <- !is.na(dev)
P <- dev
P[nas] <- safe_pchisq(dev[nas], dfs[nas], lower.tail = FALSE)
aod[, c("LRT", "Pr(Chi)")] <- list(dev,
P)
}
aod <- aod[o, ]
head <- c("Single term deletions", "\nModel:",
deparse(as.vector(formula(object))))
class(aod) <- c("anova", "data.frame")
attr(aod, "heading") <- head
aod
}
addtermAllP <- function(object, scope, test = c("Chisq",
"none"), k = 2, sorted = FALSE, trace = FALSE,
parallel = c("no", "multicore", "snow"),
ncpus = 1L, cl = NULL, ...) {
add.scope <- function(terms1, terms2) {
terms1 <- terms(terms1)
terms2 <- terms(terms2)
factor.scope(attr(terms1, "factor"), list(add = attr(terms2,
"factor")))$add
}
safe_pchisq <- function(q, df, ...) {
df[df <= 0] <- NA
pchisq(q = q, df = df, ...)
}
if (missing(scope) || is.null(scope))
stop("no terms in scope")
if (!is.character(scope))
scope <- add.scope(object, terms(update.formula(formula(object,
"mu"), scope)))
if (!length(scope))
stop("no terms in scope for adding to object")
ns <- length(scope)
ans <- matrix(nrow = ns + 1, ncol = 2, dimnames = list(c("<none>",
scope), c("df", "AIC")))
ans[1, ] <- extractAIC(object, scale, k = k, ...)
fn <- function(term) {
if (trace)
cat("trying -", term, "\n")
nfit <- update(object, as.formula(paste("~ . +",
term)), what = "All", trace = FALSE, evaluate = FALSE)
nfit <- try(eval.parent(nfit), silent = TRUE)
if (any(class(nfit) %in% "try-error")) {
cat("Model with term ", term, "has failed \n")
NA
}
else extractAIC(nfit, scale, k = k, ...)
}
ans[-1, ] <- if (ncpus > 1L && (have_mc || have_snow)) {
if (have_mc) {
matrix(unlist(parallel::mclapply(scope, fn, mc.cores = ncpus)),
ncol = 2, byrow = T)
}
else if (have_snow) {
list(...)
if (is.null(cl)) {
res <- t(parallel::parSapply(cl, scope, fn))
res
}
else t(parallel::parSapply(cl, scope, fn))
}
}
else t(sapply(scope, fn))
dfs <- ans[, 1] - ans[1, 1]
dfs[1] <- NA
aod <- data.frame(Df = dfs, AIC = ans[, 2])
o <- if (sorted)
order(aod$AIC)
else seq(along = aod$AIC)
test <- match.arg(test)
if (test == "Chisq") {
dev <- ans[, 2] - k * ans[, 1]
dev <- dev[1] - dev
dev[1] <- NA
nas <- !is.na(dev)
P <- dev
P[nas] <- safe_pchisq(dev[nas], dfs[nas], lower.tail = FALSE)
aod[, c("LRT", "Pr(Chi)")] <- list(dev,
P)
}
aod <- aod[o, ]
head <- c("Single term additions for", "\nModel:",
deparse(as.vector(formula(object))))
class(aod) <- c("anova", "data.frame")
attr(aod, "heading") <- head
aod
}
if (missing(parallel))
parallel <- "no"
parallel <- match.arg(parallel)
have_mc <- have_snow <- FALSE
if (parallel != "no" && ncpus > 1L) {
if (parallel == "multicore")
have_mc <- .Platform$OS.type != "windows"
else if (parallel == "snow")
have_snow <- TRUE
if (!have_mc && !have_snow)
ncpus <- 1L
loadNamespace("parallel")
}
if (have_snow) {
cl <- parallel::makeForkCluster(ncpus)
if (RNGkind()[1L] == "L'Ecuyer-CMRG")
parallel::clusterSetRNGStream(cl)
on.exit(parallel::stopCluster(cl))
}
Terms <- terms(object)
object$formula <- Terms
object$call$formula <- Terms
md <- missing(direction)
direction <- match.arg(direction)
backward <- direction == "both" | direction == "backward"
forward <- direction == "both" | direction == "forward"
if (missing(scope)) {
fdrop <- numeric(0)
fadd <- attr(Terms, "factors")
if (md)
forward <- FALSE
}
else {
if (is.list(scope)) {
fdrop <- if (!is.null(fdrop <- scope$lower))
attr(terms(update.formula(formula(object, what = "mu"),
fdrop), what = "mu"), "factors")
else numeric(0)
fadd <- if (!is.null(fadd <- scope$upper))
attr(terms(update.formula(formula(object, what = "mu"),
fadd), what = "mu"), "factors")
}
else {
fadd <- if (!is.null(fadd <- scope))
attr(terms(update.formula(formula(object, what = "mu"),
scope), what = "mu"), "factors")
fdrop <- numeric(0)
}
}
models <- vector("list", steps)
if (!is.null(keep))
keep.list <- vector("list", steps)
if (is.list(object) && (nmm <- match("nobs", names(object),
0)) > 0)
n <- object[[nmm]]
else n <- length(residuals(object))
fit <- object
bAIC <- extractAIC(fit, scale, k = k, ...)
edf <- bAIC[1]
bAIC <- bAIC[2]
if (is.na(bAIC))
stop("AIC is not defined for this model, so stepAIC cannot proceed")
nm <- 1
Terms <- terms(fit, "mu")
if (trace)
cat("Start: AIC=", format(round(bAIC, 2)), "\n",
cut.string(deparse(as.vector(formula(fit, what = "mu")))),
"\n\n")
models[[nm]] <- list(deviance = mydeviance(fit), df.resid = n -
edf, change = "", AIC = bAIC)
if (!is.null(keep))
keep.list[[nm]] <- keep(fit, bAIC)
usingCp <- FALSE
while (steps > 0) {
steps <- steps - 1
AIC <- bAIC
ffac <- attr(Terms, "factors")
if (!is.null(sp <- attr(Terms, "specials")) &&
!is.null(st <- sp$strata))
ffac <- ffac[-st, ]
scope <- factor.scope(ffac, list(add = fadd, drop = fdrop))
aod <- NULL
change <- NULL
if (backward && length(scope$drop)) {
aod <- droptermAllP(fit, scope$drop, trace = max(0,
trace - 1), k = k, test = "none", parallel = parallel,
ncpus = ncpus, cl = cl)
rn <- row.names(aod)
row.names(aod) <- c(rn[1], paste("-", rn[-1],
sep = " "))
if (any(aod$Df == 0, na.rm = TRUE)) {
zdf <- aod$Df == 0 & !is.na(aod$Df)
nc <- match(c("Cp", "AIC"), names(aod))
nc <- nc[!is.na(nc)][1]
ch <- abs(aod[zdf, nc] - aod[1, nc]) > 0.01
if (any(ch)) {
warning("0 df terms are changing AIC")
zdf <- zdf[!ch]
}
if (length(zdf) > 0)
change <- rev(rownames(aod)[zdf])[1]
}
}
if (is.null(change)) {
if (forward && length(scope$add)) {
aodf <- addtermAllP(fit, scope$add, trace = max(0,
trace - 1), k = k, test = "none", parallel = parallel,
ncpus = ncpus, cl = cl)
rn <- row.names(aodf)
row.names(aodf) <- c(rn[1], paste("+",
rn[-1], sep = " "))
aod <- if (is.null(aod))
aodf
else rbind(aod, aodf[-1, , drop = FALSE])
}
attr(aod, "heading") <- NULL
if (is.null(aod) || ncol(aod) == 0)
break
nzdf <- if (!is.null(aod$Df))
aod$Df != 0 | is.na(aod$Df)
aod <- aod[nzdf, ]
if (is.null(aod) || ncol(aod) == 0)
break
nc <- match(c("Cp", "AIC"), names(aod))
nc <- nc[!is.na(nc)][1]
o <- order(aod[, nc])
if (trace)
print(aod[o, ])
if (o[1] == 1)
break
change <- rownames(aod)[o[1]]
}
usingCp <- match("Cp", names(aod), 0) > 0
fit <- update(fit, paste("~ .", change), evaluate = FALSE,
what = "All", trace = FALSE)
fit <- eval.parent(fit)
if (is.list(fit) && (nmm <- match("nobs", names(fit),
0)) > 0)
nnew <- fit[[nmm]]
else nnew <- length(residuals(fit))
if (nnew != n)
stop("number of rows in use has changed: remove missing values?")
Terms <- terms(fit, "mu")
bAIC <- extractAIC(fit, scale, k = k, ...)
edf <- bAIC[1]
bAIC <- bAIC[2]
if (trace)
cat("\nStep: AIC=", format(round(bAIC, 2)),
"\n", cut.string(deparse(as.vector(formula(fit,
"mu")))), "\n\n")
if (bAIC >= AIC + 1e-07)
break
nm <- nm + 1
models[[nm]] <- list(deviance = mydeviance(fit), df.resid = n -
edf, change = change, AIC = bAIC)
if (!is.null(keep))
keep.list[[nm]] <- keep(fit, bAIC)
}
if (!is.null(keep))
fit$keep <- re.arrange(keep.list[seq(nm)])
step.results(models = models[seq(nm)], fit, object, usingCp)
}
<bytecode: 0x0000026ddc5c40e8>
<environment: namespace:gamlss>
Not sure about your problem, but I prefer using
stepGAICAll.A()
I used to use a tobit regression with the following code:
tobit56 <- tobit (months56 ~ g1v3 + gender + un30min, left=0, right=60, data=gym)
summary(tobit56)
That code always worked well for me and got me a summary of the tobit.
Since yesterday whenever I run this, I get an error after I do summary:
tobit56 <- tobit (months56 ~ g1v3 + gender + un30min, left=0, right=60, data=gym)
summary(tobit56)
Error: $ operator is invalid for atomic vectors
Somebody has any idea what can cause this error message?
It seems to be a bug in the summary.tobit function of AER package.
Solution 1:
class(tobit_model$y) <- "Surv"
summary(tobit_model)
Solution 2:
Commenting out the line if(!inherits(y, "Surv")) y <- y$y and the function works fine.
summary.tobit(tobit_model)
summary.tobit <- function(object, correlation = FALSE, symbolic.cor = FALSE, vcov. = NULL, ...)
{
## failure
if(!is.null(object$fail)) {
warning("tobit/survreg failed.", object$fail, " No summary provided\n")
return(invisible(object))
}
## rank
if(all(is.na(object$coefficients))) {
warning("This model has zero rank --- no summary is provided")
return(invisible(object))
}
## vcov
if(is.null(vcov.)) vcov. <- vcov(object)
else {
if(is.function(vcov.)) vcov. <- vcov.(object)
}
## coefmat
coef <- coeftest(object, vcov. = vcov., ...)
attr(coef, "method") <- NULL
## Wald test
nc <- length(coef(object))
has_intercept <- attr(terms(object), "intercept") > 0.5
wald <- if(nc <= has_intercept) NULL else linearHypothesis(object,
if(has_intercept) cbind(0, diag(nc-1)) else diag(nc),
vcov. = vcov.)[2,3]
## instead of: waldtest(object, vcov = vcov.)
## correlation
correlation <- if(correlation) cov2cor(vcov.) else NULL
## distribution
dist <- object$dist
if(is.character(dist)) sd <- survreg.distributions[[dist]]
else sd <- dist
if(length(object$parms)) pprint <- paste(sd$name, "distribution: parmameters =", object$parms)
else pprint <- paste(sd$name, "distribution")
## number of observations
## (incorporating "bug fix" change for $y in survival 2.42-7)
surv_table <- function(y) {
# if(!inherits(y, "Surv")) y <- y$y
type <- attr(y, "type")
if(is.null(type) || (type == "left" && any(y[, 2L] > 1))) type <- "old"
y <- switch(type,
"left" = 2 - y[, 2L],
"interval" = y[, 3L],
y[, 2L]
)
table(factor(y, levels = c(2, 1, 0, 3),
labels = c("Left-censored", "Uncensored", "Right-censored", "Interval-censored")))
}
nobs <- surv_table(object$y)
nobs <- c("Total" = sum(nobs), nobs[1:3])
rval <- object[match(c("call", "df", "loglik", "iter", "na.action", "idf", "scale"),
names(object), nomatch = 0)]
rval <- c(rval, list(coefficients = coef, correlation = correlation,
symbolic.cor = symbolic.cor, parms = pprint, n = nobs, wald = wald))
class(rval) <- "summary.tobit"
return(rval)
}
catTestfisher <-
function (tab)
{
st <- if (!is.matrix(tab) || nrow(tab) < 2 | ncol(tab) <
2)
list(p.value = NA, statistic = NA, parameter = NA)
else {
rowcounts <- tab %*% rep(1, ncol(tab))
tab <- tab[rowcounts > 0, ]
if (!is.matrix(tab))
list(p.value = NA, statistic = NA, parameter = NA)
else fisher.test(tab)
}
list(P = st$p.value, stat = "", df = "",
testname = "Fisher's Exact", statname = "", latexstat = "", namefun = "",
plotmathstat = "")
}
I wanted to use library(Hmisc)'s summaryM function but with Fisher's exact test, so I wrote a catTestfisher function and set catTest = catTestfisher in my own summaryM2 function, which is exactly the same as summaryM, except for catTest = catTestfisher
summaryM2 <-
function (formula, groups = NULL, data = NULL, subset, na.action = na.retain,
overall = FALSE, continuous = 10, na.include = FALSE, quant = c(0.025,
0.05, 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 0.95,
0.975), nmin = 100, test = FALSE, conTest = conTestkw,
catTest = catTestfisher, ordTest = ordTestpo)
{
marg <- length(data) && ".marginal." %in% names(data)
if (marg)
formula <- update(formula, . ~ . + .marginal.)
formula <- Formula(formula)
Y <- if (!missing(subset) && length(subset))
model.frame(formula, data = data, subset = subset, na.action = na.action)
else model.frame(formula, data = data, na.action = na.action)
X <- model.part(formula, data = Y, rhs = 1)
Y <- model.part(formula, data = Y, lhs = 1)
getlab <- function(x, default) {
lab <- attr(x, "label")
if (!length(lab) || lab == "")
default
else lab
}
if (marg) {
xm <- X$.marginal.
X$.marginal. <- NULL
}
else xm <- rep("", nrow(X))
if (length(X)) {
xname <- names(X)
if (length(xname) == 1 && !length(groups))
groups <- xname
if (!length(groups) && length(xname) > 1) {
warnings("Must specify groups when > 1 right hand side variable is present.\ngroups taken as first right hand variable.")
groups <- xname[1]
}
svar <- if (length(xname) == 1)
factor(rep(".ALL.", nrow(X)))
else do.call("interaction", list(X[setdiff(xname, groups)],
sep = " "))
group <- X[[groups]]
glabel <- getlab(group, groups)
}
else {
svar <- factor(rep(".ALL.", nrow(Y)))
group <- rep("", nrow(Y))
groups <- group.freq <- NULL
glabel <- ""
}
quants <- unique(c(quant, 0.025, 0.05, 0.125, 0.25, 0.375,
0.5, 0.625, 0.75, 0.875, 0.95, 0.975))
nv <- ncol(Y)
nameY <- names(Y)
R <- list()
for (strat in levels(svar)) {
instrat <- svar == strat
n <- integer(nv)
type <- n
comp <- dat <- vector("list", nv)
names(comp) <- names(dat) <- nameY
labels <- Units <- vector("character", nv)
if (test) {
testresults <- vector("list", nv)
names(testresults) <- names(comp)
}
gr <- group[instrat]
xms <- xm[instrat]
if (all(xms != ""))
xms <- rep("", length(xms))
group.freq <- table(gr)
group.freq <- group.freq[group.freq > 0]
if (overall)
group.freq <- c(group.freq, Combined = sum(group.freq))
for (i in 1:nv) {
w <- Y[instrat, i]
if (length(attr(w, "label")))
labels[i] <- attr(w, "label")
if (length(attr(w, "units")))
Units[i] <- attr(w, "units")
if (!inherits(w, "mChoice")) {
if (!is.factor(w) && !is.logical(w) && length(unique(w[!is.na(w)])) <
continuous)
w <- as.factor(w)
s <- !is.na(w)
if (na.include && !all(s) && length(levels(w))) {
w <- na.include(w)
levels(w)[is.na(levels(w))] <- "NA"
s <- rep(TRUE, length(s))
}
n[i] <- sum(s & xms == "")
w <- w[s]
g <- gr[s, drop = TRUE]
if (is.factor(w) || is.logical(w)) {
tab <- table(w, g)
if (test) {
if (is.ordered(w))
testresults[[i]] <- ordTest(g, w)
else testresults[[i]] <- catTest(tab)
}
if (nrow(tab) == 1) {
b <- casefold(dimnames(tab)[[1]], upper = TRUE)
pres <- c("1", "Y", "YES", "PRESENT")
abse <- c("0", "N", "NO", "ABSENT")
jj <- match(b, pres, nomatch = 0)
if (jj > 0)
bc <- abse[jj]
else {
jj <- match(b, abse, nomatch = 0)
if (jj > 0)
bc <- pres[jj]
}
if (jj) {
tab <- rbind(tab, rep(0, ncol(tab)))
dimnames(tab)[[1]][2] <- bc
}
}
if (overall)
tab <- cbind(tab, Combined = apply(tab, 1,
sum))
comp[[i]] <- tab
type[i] <- 1
}
else {
sfn <- function(x, quant) {
o <- options(digits = 10)
on.exit(options(o))
c(quantile(x, quant), Mean = mean(x), SD = sqrt(var(x)),
N = sum(!is.na(x)))
}
qu <- tapply(w, g, sfn, simplify = TRUE, quants)
if (test)
testresults[[i]] <- conTest(g, w)
if (overall)
qu$Combined <- sfn(w, quants)
comp[[i]] <- matrix(unlist(qu), ncol = length(quants) +
3, byrow = TRUE, dimnames = list(names(qu),
c(format(quants), "Mean", "SD", "N")))
if (any(group.freq <= nmin))
dat[[i]] <- lapply(split(w, g), nmin = nmin,
function(x, nmin) if (length(x) <= nmin)
x
else NULL)
type[i] <- 2
}
}
else {
w <- as.numeric(w) == 1
n[i] <- sum(!is.na(apply(w, 1, sum)) & xms ==
"")
g <- as.factor(gr)
ncat <- ncol(w)
tab <- matrix(NA, nrow = ncat, ncol = length(levels(g)),
dimnames = list(dimnames(w)[[2]], levels(g)))
if (test) {
pval <- numeric(ncat)
names(pval) <- dimnames(w)[[2]]
d.f. <- stat <- pval
}
for (j in 1:ncat) {
tab[j, ] <- tapply(w[, j], g, sum, simplify = TRUE,
na.rm = TRUE)
if (test) {
tabj <- rbind(table(g) - tab[j, ], tab[j,
])
st <- catTest(tabj)
pval[j] <- st$P
stat[j] <- st$stat
d.f.[j] <- st$df
}
}
if (test)
testresults[[i]] <- list(P = pval, stat = stat,
df = d.f., testname = st$testname, statname = st$statname,
latexstat = st$latexstat, plotmathstat = st$plotmathstat)
if (overall)
tab <- cbind(tab, Combined = apply(tab, 1,
sum))
comp[[i]] <- tab
type[i] <- 3
}
}
labels <- ifelse(nchar(labels), labels, names(comp))
R[[strat]] <- list(stats = comp, type = type, group.freq = group.freq,
labels = labels, units = Units, quant = quant, data = dat,
N = sum(!is.na(gr) & xms == ""), n = n, testresults = if (test) testresults)
}
structure(list(results = R, group.name = groups, group.label = glabel,
call = call, formula = formula), class = "summaryM")
}
After trying to test it on the following data, I get a warning and an error:
library(Hmisc)
set.seed(173)
sex <- factor(sample(c("m","f"), 500, rep=TRUE))
treatment <- factor(sample(c("Drug","Placebo"), 500, rep=TRUE))
> summaryM2(sex ~ treatment, test=TRUE, overall = TRUE)
Error in round(teststat, 2) :
non-numeric argument to mathematical function
I tried stepping through the summaryM2 function line by line, but could not figure out what's causing the problem.
In your catTestfisher function, the output variables stat (test statistic) and df (degrees of freedom) should be numeric variables not empty strings. In the programming stat is coverted to teststat for rounding before being outputted (hence the error message for round("", 2) is non-numeric argument to mathematical function). See lines 1718 to 1721 in the summary.formula code) .
You can set df = NULL but a value is required for stat (not NA or NULL) otherwise no output is returned. You can get around the problem by setting stat = 0 (or any other number), and then only displaying the p value using prtest = "P".
catTestfisher2 <- function (tab)
{
st <- fisher.test(tab)
list(P = st$p.value, stat = 0, df = NULL,
testname = st$method, statname = "", latexstat = "", namefun = "",
plotmathstat = "")
}
output <- summaryM(sex ~ treatment, test=TRUE, overall = TRUE, catTest = catTestfisher2)
print(output, prtest = "P")
Descriptive Statistics (N=500)
+-------+-----------+-----------+-----------+-------+
| |Drug |Placebo |Combined |P-value|
| |(N=257) |(N=243) |(N=500) | |
+-------+-----------+-----------+-----------+-------+
|sex : m|0.52 (133)|0.52 (126)|0.52 (259)| 1 |
+-------+-----------+-----------+-----------+-------+
Note there is no need to define your own summaryM2 function. Just use catTest = to pass in your function.
Is there a package in R plotting newton-raphson/fisher scoring iterations when fitting a glm modelel (from the stats package)?
I answered a very similar question yesterday. In your case however, things are a little simpler.
Note that when you call glm, it eventually calls glm.fit (or any other method argument you specify to glm) which computes the solution path in the loop from lines 78 to 170. The current iteration's value of the coefficients is computed on line 97 using a .Call to a C function C_Cdqrls. As a hack, you can extract the current value of the coefficients to the global environment (fit$coefficients), within this loop, by modifying the glm.fit function like so:
glm.fit.new = function (x, y, weights = rep(1, nobs), start = NULL, etastart = NULL,
mustart = NULL, offset = rep(0, nobs), family = gaussian(),
control = list(), intercept = TRUE) {
control <- do.call("glm.control", control)
x <- as.matrix(x)
xnames <- dimnames(x)[[2L]]
ynames <- if (is.matrix(y))
rownames(y)
else names(y)
conv <- FALSE
nobs <- NROW(y)
nvars <- ncol(x)
EMPTY <- nvars == 0
if (is.null(weights))
weights <- rep.int(1, nobs)
if (is.null(offset))
offset <- rep.int(0, nobs)
variance <- family$variance
linkinv <- family$linkinv
if (!is.function(variance) || !is.function(linkinv))
stop("'family' argument seems not to be a valid family object",
call. = FALSE)
dev.resids <- family$dev.resids
aic <- family$aic
mu.eta <- family$mu.eta
unless.null <- function(x, if.null) if (is.null(x))
if.null
else x
valideta <- unless.null(family$valideta, function(eta) TRUE)
validmu <- unless.null(family$validmu, function(mu) TRUE)
if (is.null(mustart)) {
eval(family$initialize)
}
else {
mukeep <- mustart
eval(family$initialize)
mustart <- mukeep
}
if (EMPTY) {
eta <- rep.int(0, nobs) + offset
if (!valideta(eta))
stop("invalid linear predictor values in empty model",
call. = FALSE)
mu <- linkinv(eta)
if (!validmu(mu))
stop("invalid fitted means in empty model", call. = FALSE)
dev <- sum(dev.resids(y, mu, weights))
w <- ((weights * mu.eta(eta)^2)/variance(mu))^0.5
residuals <- (y - mu)/mu.eta(eta)
good <- rep_len(TRUE, length(residuals))
boundary <- conv <- TRUE
coef <- numeric()
iter <- 0L
}
else {
coefold <- NULL
eta <- if (!is.null(etastart))
etastart
else if (!is.null(start))
if (length(start) != nvars)
stop(gettextf("length of 'start' should equal %d and correspond to initial coefs for %s",
nvars, paste(deparse(xnames), collapse = ", ")),
domain = NA)
else {
coefold <- start
offset + as.vector(if (NCOL(x) == 1L)
x * start
else x %*% start)
}
else family$linkfun(mustart)
mu <- linkinv(eta)
if (!(validmu(mu) && valideta(eta)))
stop("cannot find valid starting values: please specify some",
call. = FALSE)
devold <- sum(dev.resids(y, mu, weights))
boundary <- conv <- FALSE
# EDIT: counter to create track of iterations
i <<- 1
for (iter in 1L:control$maxit) {
good <- weights > 0
varmu <- variance(mu)[good]
if (anyNA(varmu))
stop("NAs in V(mu)")
if (any(varmu == 0))
stop("0s in V(mu)")
mu.eta.val <- mu.eta(eta)
if (any(is.na(mu.eta.val[good])))
stop("NAs in d(mu)/d(eta)")
good <- (weights > 0) & (mu.eta.val != 0)
if (all(!good)) {
conv <- FALSE
warning(gettextf("no observations informative at iteration %d",
iter), domain = NA)
break
}
z <- (eta - offset)[good] + (y - mu)[good]/mu.eta.val[good]
w <- sqrt((weights[good] * mu.eta.val[good]^2)/variance(mu)[good])
fit <- .Call(stats:::C_Cdqrls, x[good, , drop = FALSE] *
w, z * w, min(1e-07, control$epsilon/1000), check = FALSE)
#======================================================
# EDIT: assign the coefficients to variables in the global namespace
#======================================================
assign(paste0("iteration_x_", i), fit$coefficients,
envir = .GlobalEnv)
i <<- i + 1 # increase the counter
if (any(!is.finite(fit$coefficients))) {
conv <- FALSE
warning(gettextf("non-finite coefficients at iteration %d",
iter), domain = NA)
break
}
if (nobs < fit$rank)
stop(sprintf(ngettext(nobs, "X matrix has rank %d, but only %d observation",
"X matrix has rank %d, but only %d observations"),
fit$rank, nobs), domain = NA)
start[fit$pivot] <- fit$coefficients
eta <- drop(x %*% start)
mu <- linkinv(eta <- eta + offset)
dev <- sum(dev.resids(y, mu, weights))
if (control$trace)
cat("Deviance = ", dev, " Iterations - ", iter,
"\n", sep = "")
boundary <- FALSE
if (!is.finite(dev)) {
if (is.null(coefold))
stop("no valid set of coefficients has been found: please supply starting values",
call. = FALSE)
warning("step size truncated due to divergence",
call. = FALSE)
ii <- 1
while (!is.finite(dev)) {
if (ii > control$maxit)
stop("inner loop 1; cannot correct step size",
call. = FALSE)
ii <- ii + 1
start <- (start + coefold)/2
eta <- drop(x %*% start)
mu <- linkinv(eta <- eta + offset)
dev <- sum(dev.resids(y, mu, weights))
}
boundary <- TRUE
if (control$trace)
cat("Step halved: new deviance = ", dev, "\n",
sep = "")
}
if (!(valideta(eta) && validmu(mu))) {
if (is.null(coefold))
stop("no valid set of coefficients has been found: please supply starting values",
call. = FALSE)
warning("step size truncated: out of bounds",
call. = FALSE)
ii <- 1
while (!(valideta(eta) && validmu(mu))) {
if (ii > control$maxit)
stop("inner loop 2; cannot correct step size",
call. = FALSE)
ii <- ii + 1
start <- (start + coefold)/2
eta <- drop(x %*% start)
mu <- linkinv(eta <- eta + offset)
}
boundary <- TRUE
dev <- sum(dev.resids(y, mu, weights))
if (control$trace)
cat("Step halved: new deviance = ", dev, "\n",
sep = "")
}
if (abs(dev - devold)/(0.1 + abs(dev)) < control$epsilon) {
conv <- TRUE
coef <- start
break
}
else {
devold <- dev
coef <- coefold <- start
}
}
if (!conv)
warning("glm.fit: algorithm did not converge", call. = FALSE)
if (boundary)
warning("glm.fit: algorithm stopped at boundary value",
call. = FALSE)
eps <- 10 * .Machine$double.eps
if (family$family == "binomial") {
if (any(mu > 1 - eps) || any(mu < eps))
warning("glm.fit: fitted probabilities numerically 0 or 1 occurred",
call. = FALSE)
}
if (family$family == "poisson") {
if (any(mu < eps))
warning("glm.fit: fitted rates numerically 0 occurred",
call. = FALSE)
}
if (fit$rank < nvars)
coef[fit$pivot][seq.int(fit$rank + 1, nvars)] <- NA
xxnames <- xnames[fit$pivot]
residuals <- (y - mu)/mu.eta(eta)
fit$qr <- as.matrix(fit$qr)
nr <- min(sum(good), nvars)
if (nr < nvars) {
Rmat <- diag(nvars)
Rmat[1L:nr, 1L:nvars] <- fit$qr[1L:nr, 1L:nvars]
}
else Rmat <- fit$qr[1L:nvars, 1L:nvars]
Rmat <- as.matrix(Rmat)
Rmat[row(Rmat) > col(Rmat)] <- 0
names(coef) <- xnames
colnames(fit$qr) <- xxnames
dimnames(Rmat) <- list(xxnames, xxnames)
}
names(residuals) <- ynames
names(mu) <- ynames
names(eta) <- ynames
wt <- rep.int(0, nobs)
wt[good] <- w^2
names(wt) <- ynames
names(weights) <- ynames
names(y) <- ynames
if (!EMPTY)
names(fit$effects) <- c(xxnames[seq_len(fit$rank)], rep.int("",
sum(good) - fit$rank))
wtdmu <- if (intercept)
sum(weights * y)/sum(weights)
else linkinv(offset)
nulldev <- sum(dev.resids(y, wtdmu, weights))
n.ok <- nobs - sum(weights == 0)
nulldf <- n.ok - as.integer(intercept)
rank <- if (EMPTY)
0
else fit$rank
resdf <- n.ok - rank
aic.model <- aic(y, n, mu, weights, dev) + 2 * rank
list(coefficients = coef, residuals = residuals, fitted.values = mu,
effects = if (!EMPTY) fit$effects, R = if (!EMPTY) Rmat,
rank = rank, qr = if (!EMPTY) structure(fit[c("qr", "rank",
"qraux", "pivot", "tol")], class = "qr"), family = family,
linear.predictors = eta, deviance = dev, aic = aic.model,
null.deviance = nulldev, iter = iter, weights = wt, prior.weights = weights,
df.residual = resdf, df.null = nulldf, y = y, converged = conv,
boundary = boundary)
}
Note that this is a hack for a couple of reasons:
1. The function C_Cdrqls is not exported by the package stats, and so we have to look for it within namespace:package:stats.
2. This pollutes your global environment with the iteration values via a side-effect of the call to glm.fit.new, creating one vector per iteration. Side-effects are generally frowned upon in functional languages like R. You can probably clean the multiple objects bit up by creating a matrix or a data.frame and assign within that.
However, once you have the iteration values extracted, you can do whatever you want with them, including plotting them.
Here is what a call to glm with the newly defined glm.fit.new method would look like:
counts = c(18,17,15,20,10,20,25,13,12)
outcome = gl(3,1,9)
treatment = gl(3,3)
print(d.AD = data.frame(treatment, outcome, counts))
glm.D93 = glm(counts ~ outcome + treatment, family = poisson(),
control = list(trace = TRUE, epsilon = 1e-16), method = "glm.fit.new")
You can check that the iteration parameter values have indeed been populated in the global environment:
> ls(pattern = "iteration_x_")
[1] "iteration_x_1" "iteration_x_10" "iteration_x_11" "iteration_x_2"
[5] "iteration_x_3" "iteration_x_4" "iteration_x_5" "iteration_x_6"
[9] "iteration_x_7" "iteration_x_8" "iteration_x_9"
I am trying to fit a double glm in R using the dglm package. This is used in combination with the statmod package to use the tweedie model. A reproduction of the problem is:
library(dglm)
library(statmod)
p <- 1.5
y <- runif(10)
x <- runif(10)
dglm(y~x,~x,family=tweedie(link.power=0, var.power=p))
#doesnt work
dglm(y~x,~x,family=tweedie(link.power=0, var.power=1.5))
#works
var.power needs to be defined in a variable, since I want to use a loop where dglm runs on every entry of it
So, you can fix the problem by forcing dglm to evaluate the call where you input p. In the dglm function, on about line 73:
if (family$family == "Tweedie") {
tweedie.p <- call$family$var.power
}
should be:
if (family$family == "Tweedie") {
tweedie.p <- eval(call$family$var.power)
}
You can make your own function with the patch like this:
dglm.nograpes <- function (formula = formula(data), dformula = ~1, family = gaussian,
dlink = "log", data = sys.parent(), subset = NULL, weights = NULL,
contrasts = NULL, method = "ml", mustart = NULL, betastart = NULL,
etastart = NULL, phistart = NULL, control = dglm.control(...),
ykeep = TRUE, xkeep = FALSE, zkeep = FALSE, ...)
{
call <- match.call()
if (is.character(family))
family <- get(family, mode = "function", envir = parent.frame())
if (is.function(family))
family <- family()
if (is.null(family$family)) {
print(family)
stop("'family' not recognized")
}
mnames <- c("", "formula", "data", "weights", "subset")
cnames <- names(call)
cnames <- cnames[match(mnames, cnames, 0)]
mcall <- call[cnames]
mcall[[1]] <- as.name("model.frame")
mframe <<- eval(mcall, sys.parent())
mf <- match.call(expand.dots = FALSE)
y <- model.response(mframe, "numeric")
if (is.null(dim(y))) {
N <- length(y)
}
else {
N <- dim(y)[1]
}
nobs <- N
mterms <- attr(mframe, "terms")
X <- model.matrix(mterms, mframe, contrasts)
weights <- model.weights(mframe)
if (is.null(weights))
weights <- rep(1, N)
if (is.null(weights))
weights <- rep(1, N)
if (!is.null(weights) && any(weights < 0)) {
stop("negative weights not allowed")
}
offset <- model.offset(mframe)
if (is.null(offset))
offset <- rep(0, N)
if (!is.null(offset) && length(offset) != NROW(y)) {
stop(gettextf("number of offsets is %d should equal %d (number of observations)",
length(offset), NROW(y)), domain = NA)
}
mcall$formula <- formula
mcall$formula[3] <- switch(match(length(dformula), c(0, 2,
3)), 1, dformula[2], dformula[3])
mframe <- eval(mcall, sys.parent())
dterms <- attr(mframe, "terms")
Z <- model.matrix(dterms, mframe, contrasts)
doffset <- model.extract(mframe, offset)
if (is.null(doffset))
doffset <- rep(0, N)
name.dlink <- substitute(dlink)
if (is.name(name.dlink)) {
if (is.character(dlink)) {
name.dlink <- dlink
}
else {
dlink <- name.dlink <- as.character(name.dlink)
}
}
else {
if (is.call(name.dlink))
name.dlink <- deparse(name.dlink)
}
if (!is.null(name.dlink))
name.dlink <- name.dlink
if (family$family == "Tweedie") {
tweedie.p <- eval(call$family$var.power)
}
Digamma <- family$family == "Gamma" || (family$family ==
"Tweedie" && tweedie.p == 2)
if (Digamma) {
linkinv <- make.link(name.dlink)$linkinv
linkfun <- make.link(name.dlink)$linkfun
mu.eta <- make.link(name.dlink)$mu.eta
valid.eta <- make.link(name.dlink)$valid.eta
init <- expression({
if (any(y <= 0)) {
print(y)
print(any(y <= 0))
stop("non-positive values not allowed for the DM gamma family")
}
n <- rep.int(1, nobs)
mustart <- y
})
dfamily <- structure(list(family = "Digamma", variance = varfun.digamma,
dev.resids = function(y, mu, wt) {
wt * unitdeviance.digamma(y, mu)
}, aic = function(y, n, mu, wt, dev) NA, link = name.dlink,
linkfun = linkfun, linkinv = linkinv, mu.eta = mu.eta,
initialize = init, validmu = function(mu) {
all(mu > 0)
}, valideta = valid.eta))
}
else {
eval(substitute(dfamily <- Gamma(link = lk), list(lk = name.dlink)))
}
dlink <- as.character(dfamily$link)
logdlink <- dlink == "log"
if (!is.null(call$method)) {
name.method <- substitute(method)
if (!is.character(name.method))
name.method <- deparse(name.method)
list.methods <- c("ml", "reml", "ML", "REML", "Ml", "Reml")
i.method <- pmatch(method, list.methods, nomatch = 0)
if (!i.method)
stop("Method must be ml or reml")
method <- switch(i.method, "ml", "reml", "ml", "reml",
"ml", "reml")
}
reml <- method == "reml"
if (is.null(mustart)) {
etastart <- NULL
eval(family$initialize)
mu <- mustart
mustart <- NULL
}
if (!is.null(betastart)) {
eta <- X %*% betastart
mu <- family$linkinv(eta + offset)
}
else {
if (!is.null(mustart)) {
mu <- mustart
eta <- family$linkfun(mu) - offset
}
else {
eta <- lm.fit(X, family$linkfun(mu) - offset, singular.ok = TRUE)$fitted.values
mu <- family$linkinv(eta + offset)
}
}
d <- family$dev.resids(y, mu, weights)
if (!is.null(phistart)) {
phi <- phistart
deta <- dfamily$linkfun(phi) - doffset
}
else {
deta <- lm.fit(Z, dfamily$linkfun(d + (d == 0)/6) - doffset,
singular.ok = TRUE)$fitted.values
if (logdlink)
deta <- deta + 1.27036
phi <- dfamily$linkinv(deta + offset)
}
if (any(phi <= 0)) {
cat("Some values for phi are non-positive, suggesting an inappropriate model",
"Try a different link function.\n")
}
zm <- as.vector(eta + (y - mu)/family$mu.eta(eta))
wm <- as.vector(eval(family$variance(mu)) * weights/phi)
mfit <- lm.wfit(X, zm, wm, method = "qr", singular.ok = TRUE)
eta <- mfit$fitted.values
mu <- family$linkinv(eta + offset)
cat("family:", family$family, "\n")
if (family$family == "Tweedie") {
cat("p:", tweedie.p, "\n")
if ((tweedie.p > 0) & (any(mu < 0))) {
cat("Some values for mu are negative, suggesting an inappropriate model.",
"Try a different link function.\n")
}
}
d <- family$dev.resids(y, mu, weights)
const <- dglm.constant(y, family, weights)
if (Digamma) {
h <- 2 * (lgamma(weights/phi) + (1 + log(phi/weights)) *
weights/phi)
}
else {
h <- log(phi/weights)
}
m2loglik <- const + sum(h + d/phi)
if (reml)
m2loglik <- m2loglik + 2 * log(abs(prod(diag(mfit$R))))
m2loglikold <- m2loglik + 1
epsilon <- control$epsilon
maxit <- control$maxit
trace <- control$trace
iter <- 0
while (abs(m2loglikold - m2loglik)/(abs(m2loglikold) + 1) >
epsilon && iter < maxit) {
hdot <- 1/dfamily$mu.eta(deta)
if (Digamma) {
delta <- 2 * weights * (log(weights/phi) - digamma(weights/phi))
u <- 2 * weights^2 * (trigamma(weights/phi) - phi/weights)
fdot <- phi^2/u * hdot
}
else {
delta <- phi
u <- phi^2
fdot <- hdot
}
wd <- 1/(fdot^2 * u)
if (reml) {
h <- hat(mfit$qr)
delta <- delta - phi * h
wd <- wd - 2 * (h/hdot^2/phi^2) + h^2
}
if (any(wd < 0)) {
cat(" Some weights are negative; temporarily fixing. This may be a sign of an inappropriate model.\n")
wd[wd < 0] <- 0
}
if (any(is.infinite(wd))) {
cat(" Some weights are negative; temporarily fixing. This may be a sign of an inappropriate model.\n")
wd[is.infinite(wd)] <- 100
}
zd <- deta + (d - delta) * fdot
dfit <- lm.wfit(Z, zd, wd, method = "qr", singular.ok = TRUE)
deta <- dfit$fitted.values
phi <- dfamily$linkinv(deta + doffset)
if (any(is.infinite(phi))) {
cat("*** Some values for phi are infinite, suggesting an inappropriate model",
"Try a different link function. Making an attempt to continue...\n")
phi[is.infinite(phi)] <- 10
}
zm <- eta + (y - mu)/family$mu.eta(eta)
fam.wt <- expression(weights * family$variance(mu))
wm <- eval(fam.wt)/phi
mfit <- lm.wfit(X, zm, wm, method = "qr", singular.ok = TRUE)
eta <- mfit$fitted.values
mu <- family$linkinv(eta + offset)
if (family$family == "Tweedie") {
if ((tweedie.p > 0) & (any(mu < 0))) {
cat("*** Some values for mu are negative, suggesting an inappropriate model.",
"Try a different link function. Making an attempt to continue...\n")
mu[mu <= 0] <- 1
}
}
d <- family$dev.resids(y, mu, weights)
m2loglikold <- m2loglik
if (Digamma) {
h <- 2 * (lgamma(weights/phi) + (1 + log(phi/weights)) *
weights/phi)
}
else {
h <- log(phi/weights)
}
m2loglik <- const + sum(h + d/phi)
if (reml) {
m2loglik <- m2loglik + 2 * log(abs(prod(diag(mfit$R))))
}
iter <- iter + 1
if (trace)
cat("DGLM iteration ", iter, ": -2*log-likelihood = ",
format(round(m2loglik, 4)), " \n", sep = "")
}
mfit$formula <- call$formula
mfit$call <- call
mfit$family <- family
mfit$linear.predictors <- mfit$fitted.values + offset
mfit$fitted.values <- mu
mfit$prior.weights <- weights
mfit$terms <- mterms
mfit$contrasts <- attr(X, "contrasts")
intercept <- attr(mterms, "intercept")
mfit$df.null <- N - sum(weights == 0) - as.integer(intercept)
mfit$call <- call
mfit$deviance <- sum(d/phi)
mfit$aic <- NA
mfit$null.deviance <- glm.fit(x = X, y = y, weights = weights/phi,
offset = offset, family = family)
if (length(mfit$null.deviance) > 1)
mfit$null.deviance <- mfit$null.deviance$null.deviance
if (ykeep)
mfit$y <- y
if (xkeep)
mfit$x <- X
class(mfit) <- c("glm", "lm")
dfit$family <- dfamily
dfit$prior.weights <- rep(1, N)
dfit$linear.predictors <- dfit$fitted.values + doffset
dfit$fitted.values <- phi
dfit$terms <- dterms
dfit$aic <- NA
call$formula <- call$dformula
call$dformula <- NULL
call$family <- call(dfamily$family, link = name.dlink)
dfit$call <- call
dfit$residuals <- dfamily$dev.resid(d, phi, wt = rep(1/2,
N))
dfit$deviance <- sum(dfit$residuals)
dfit$null.deviance <- glm.fit(x = Z, y = d, weights = rep(1/2,
N), offset = doffset, family = dfamily)
if (length(dfit$null.deviance) > 1)
dfit$null.deviance <- dfit$null.deviance$null.deviance
if (ykeep)
dfit$y <- d
if (zkeep)
dfit$z <- Z
dfit$formula <- as.vector(attr(dterms, "formula"))
dfit$iter <- iter
class(dfit) <- c("glm", "lm")
out <- c(mfit, list(dispersion.fit = dfit, iter = iter, method = method,
m2loglik = m2loglik))
class(out) <- c("dglm", "glm", "lm")
out
}
And then run it like this:
dglm.nograpes(y~x,~x,family=tweedie(link.power=0, var.power=p))