Modify SPDEP package - insert new function - r

I am trying to modify the stsls function of the R package spdep.
The function compute a spatial autoregressive function using a two stage least square. For both stages, the function uses the same spatial matrix.
What I want is to create a new function, say stslsm, which uses two different spatial matrices, one for the first stage (inlistw), one for the second stage (listw).
For this reason, I took the stsls function, I added a new entry, inlistw, and modified accordingly the script. For those who are interested, the code is at the bottom of this post, but please consider that this is just a first trial.
Now the problem is that I don't know how to insert this new function in the spdep package.
I read different posts on this issue. The most recurrent suggestion is:
unlockBinding("spdep", loadNamespace("spdep"));
assignInNamespace("stslsm", stslsm, ns=asNamespace("spdep"), envir=loadNamespace("spdep"));
assign("stslsm", stslsm, envir=env);
lockBinding(stslsm, loadNamespace("spdep"));
But after the second line of code I get the following
Error in bindingIsLocked(x, ns) : no binding for "stslsm"
I got stuck here. Do you have any suggestion?
function (formula, data = list(), listw, inlistw,zero.policy = NULL,
na.action = na.fail, robust = FALSE, HC = NULL, legacy = FALSE,
W2X = TRUE) {
if (!inherits(listw, "listw"))
stop("No neighbourhood list")
if (is.null(zero.policy))
zero.policy <- get("zeroPolicy", envir = .spdepOptions)
stopifnot(is.logical(zero.policy))
if (class(formula) != "formula")
formula <- as.formula(formula)
mt <- terms(formula, data = data)
mf <- lm(formula, data, na.action = na.action, method = "model.frame")
na.act <- attr(mf, "na.action")
if (!is.null(na.act)) {
subset <- !(1:length(listw$neighbours) %in% na.act)
listw <- subset(listw, subset, zero.policy = zero.policy)
}
y <- model.extract(mf, "response")
if (any(is.na(y)))
stop("NAs in dependent variable")
X <- model.matrix(mt, mf)
if (any(is.na(X)))
stop("NAs in independent variable")
if (robust) {
if (is.null(HC))
HC <- "HC0"
if (!any(HC %in% c("HC0", "HC1")))
stop("HC must be one of HC0, HC1")
}
Wy <- lag.listw(listw, y, zero.policy = zero.policy)
dim(Wy) <- c(nrow(X), 1)
colnames(Wy) <- c("Rho")
n <- NROW(X)
m <- NCOL(X)
xcolnames <- colnames(X)
K <- ifelse(xcolnames[1] == "(Intercept)", 2, 1)
if (m > 1) {
WX <- matrix(nrow = n, ncol = (m - (K - 1)))
if (W2X)
WWX <- matrix(nrow = n, ncol = ncol(WX))
for (k in K:m) {
wx <- lag.listw(inlistw, X[, k], zero.policy = zero.policy)
if (W2X)
wwx <- lag.listw(inlistw, wx, zero.policy = zero.policy)
if (any(is.na(wx)))
stop("NAs in lagged independent variable")
WX[, (k - (K - 1))] <- wx
if (W2X)
WWX[, (k - (K - 1))] <- wwx
}
if (W2X)
inst <- cbind(WX, WWX)
else inst <- WX
}
if (K == 2 && listw$style != "W") {
wx1 <- as.double(rep(1, n))
wx <- lag.listw(inlistw, wx1, zero.policy = zero.policy)
if (W2X)
wwx <- lag.listw(inlistw, wx, zero.policy = zero.policy)
if (m > 1) {
inst <- cbind(wx, inst)
if (W2X)
inst <- cbind(wwx, inst)
}
else {
inst <- matrix(wx, nrow = n, ncol = 1)
if (W2X)
inst <- cbind(inst, wwx)
}
}
result <- tsls(y = y, yend = Wy, X = X, Zinst = inst, robust = robust,
HC = HC, legacy = legacy)
result$zero.policy <- zero.policy
result$robust <- robust
if (robust)
result$HC <- HC
result$legacy <- legacy
result$listw_style <- listw$style
result$call <- match.call()
class(result) <- "stsls"
result
}

Related

Creating a giant matrix for use in CARBayes without running out of memory

I am trying to do an areal unit analysis using the package CARBayes. As part of the analysis, I am using the below code. my issue comes when I try to create the neighbour matrix with nb2mat. My sp object has 170,000 odd polygons in it so it can't make the matrix with the memory I have.
library(spdep)
library(CARBayes)
W.nb <- poly2nb(sp)
W <- nb2mat(W.nb, style = "B", zero.policy = TRUE)
test <- S.CARbym(case ~ covariate1),
family = "poisson",
data = sp,
W = W,
burnin = 10000,
n.sample = 30000,
thin = 20)
I found the below code in another thread to make a bigmemory matrix but CARBayes won't recognise it as a matrix.
My question is, does anyone know a way to use bigmemory or spam /sparse matrix or something similar to create the matrix so that it can be used in the CARBayes package without throwing an error saying the W isn't a matrix.
my_listw2mat = function (listw)
{
require(bigmemory)
n <- length(listw$neighbours)
if (n < 1)
stop("non-positive number of entities")
cardnb <- card(listw$neighbours)
if (any(is.na(unlist(listw$weights))))
stop("NAs in general weights list")
#res <- matrix(0, nrow = n, ncol = n)
res <- big.matrix(n, n, type='double', init=NULL)
options(bigmemory.allow.dimnames=TRUE)
for (i in 1:n) if (cardnb[i] > 0)
res[i, listw$neighbours[[i]]] <- listw$weights[[i]]
if (!is.null(attr(listw, "region.id")))
row.names(res) <- attr(listw, "region.id")
res
}
my_nb2mat = function (neighbours, glist = NULL, style = "W", zero.policy = NULL)
{
if (is.null(zero.policy))
zero.policy <- get("zeroPolicy", envir = .spdepOptions)
stopifnot(is.logical(zero.policy))
if (!inherits(neighbours, "nb"))
stop("Not a neighbours list")
listw <- nb2listw(neighbours, glist = glist, style = style,
zero.policy = zero.policy)
res <- my_listw2mat(listw)
attr(res, "call") <- match.call()
res
}
W <- my_nb2mat(W.nb, style = "B", zero.policy = TRUE)
test <- S.CARbym(case ~ covariate1),
family = "poisson",
data = sp,
W = W,
burnin = 10000,
n.sample = 30000,
thin = 20)

R Factor Analysis with factanal() for huge amount of predictors results in a system that is computationally singular

I am trying to run Factor analysis for a dataset with around 150 variables but only have around around 80 observations.
I tried the factanal() function in R and R reported error:
Error in solve.default(cv) :
system is computationally singular: reciprocal condition number = 3.0804e-20
Any suggestions on alternative methods / packages?
A demonstration on a dummy dataset would be:
# This will work (dataset with 80 obs and 15 predictors)
set.seed(1234)
fake_df = as.data.frame(matrix(rnorm(80*15), nrow = 80))
factanal(fake_df, factors = 2, rotation = "varimax")
# This will not (dataset with 80 obs and 150 predictors)
set.seed(1234)
fake_df = as.data.frame(matrix(rnorm(80*150), nrow = 80))
factanal(fake_df, factors = 2, rotation = "varimax")
So far I've replaced the solve function in the factanal() source code with a numerical solving function one that I created below, but it did not resolve the issue:
solve_G = function(M){
library(matrixcalc)
if(!is.singular.matrix(M)){
return(solve(M))
} else{
s = svd(M)
U = s$u
V = s$v
D_Inv = diag(1/s$d)
Num_Inv = V %*% D_Inv %*% t(U)
cat("Singular Matrix! SVD Used.\n")
return(Num_Inv)
}
}
And after you replace "solve" with "solve_G", a new error occurred:
Error in factanal.fit.mle(cv, factors, start[, i], max(cn$lower, 0), cn$opt) :
could not find function "factanal.fit.mle"
P.S. Here is the new "factanal" function named my_factanal:
The error above occurred when running the line:
nfit <- factanal.fit.mle(cv, factors, start[, i], max(cn$lower, 0), cn$opt)
And to run this, Set x to be a 80* 150 numerical dataframe, set factors = 2, set scores = "regression", rotation = "varimax":
my_factanal = function (x, factors, data = NULL, covmat = NULL, n.obs = NA,
subset, na.action, start = NULL, scores = c("none", "regression",
"Bartlett"), rotation = "varimax", control = NULL, ...)
{
sortLoadings <- function(Lambda) {
cn <- colnames(Lambda)
Phi <- attr(Lambda, "covariance")
ssq <- apply(Lambda, 2L, function(x) -sum(x^2))
Lambda <- Lambda[, order(ssq), drop = FALSE]
colnames(Lambda) <- cn
neg <- colSums(Lambda) < 0
Lambda[, neg] <- -Lambda[, neg]
if (!is.null(Phi)) {
unit <- ifelse(neg, -1, 1)
attr(Lambda, "covariance") <- unit %*% Phi[order(ssq),
order(ssq)] %*% unit
}
Lambda
}
cl <- match.call()
na.act <- NULL
if (is.list(covmat)) {
if (any(is.na(match(c("cov", "n.obs"), names(covmat)))))
stop("'covmat' is not a valid covariance list")
cv <- covmat$cov
n.obs <- covmat$n.obs
have.x <- FALSE
}
else if (is.matrix(covmat)) {
cv <- covmat
have.x <- FALSE
}
else if (is.null(covmat)) {
if (missing(x))
stop("neither 'x' nor 'covmat' supplied")
have.x <- TRUE
if (inherits(x, "formula")) {
mt <- terms(x, data = data)
if (attr(mt, "response") > 0)
stop("response not allowed in formula")
attr(mt, "intercept") <- 0
mf <- match.call(expand.dots = FALSE)
names(mf)[names(mf) == "x"] <- "formula"
mf$factors <- mf$covmat <- mf$scores <- mf$start <- mf$rotation <- mf$control <- mf$... <- NULL
mf[[1L]] <- quote(stats::model.frame)
mf <- eval.parent(mf)
na.act <- attr(mf, "na.action")
if (.check_vars_numeric(mf))
stop("factor analysis applies only to numerical variables")
z <- model.matrix(mt, mf)
}
else {
z <- as.matrix(x)
if (!is.numeric(z))
stop("factor analysis applies only to numerical variables")
if (!missing(subset))
z <- z[subset, , drop = FALSE]
}
covmat <- cov.wt(z)
cv <- covmat$cov
n.obs <- covmat$n.obs
}
else stop("'covmat' is of unknown type")
scores <- match.arg(scores)
if (scores != "none" && !have.x)
stop("requested scores without an 'x' matrix")
p <- ncol(cv)
if (p < 3)
stop("factor analysis requires at least three variables")
dof <- 0.5 * ((p - factors)^2 - p - factors)
if (dof < 0)
stop(sprintf(ngettext(factors, "%d factor is too many for %d variables",
"%d factors are too many for %d variables"), factors,
p), domain = NA)
sds <- sqrt(diag(cv))
cv <- cv/(sds %o% sds)
cn <- list(nstart = 1, trace = FALSE, lower = 0.005)
cn[names(control)] <- control
more <- list(...)[c("nstart", "trace", "lower", "opt", "rotate")]
if (length(more))
cn[names(more)] <- more
if (is.null(start)) {
start <- (1 - 0.5 * factors/p)/diag(solve_G(cv))
if ((ns <- cn$nstart) > 1)
start <- cbind(start, matrix(runif(ns - 1), p, ns -
1, byrow = TRUE))
}
start <- as.matrix(start)
if (nrow(start) != p)
stop(sprintf(ngettext(p, "'start' must have %d row",
"'start' must have %d rows"), p), domain = NA)
nc <- ncol(start)
if (nc < 1)
stop("no starting values supplied")
best <- Inf
for (i in 1L:nc) {
nfit <- factanal.fit.mle(cv, factors, start[, i], max(cn$lower, 0), cn$opt)
if (cn$trace)
cat("start", i, "value:", format(nfit$criteria[1L]),
"uniqs:", format(as.vector(round(nfit$uniquenesses,
4))), "\\n")
if (nfit$converged && nfit$criteria[1L] < best) {
fit <- nfit
best <- fit$criteria[1L]
}
}
if (best == Inf)
stop(ngettext(nc, "unable to optimize from this starting value",
"unable to optimize from these starting values"),
domain = NA)
load <- fit$loadings
if (rotation != "none") {
rot <- do.call(rotation, c(list(load), cn$rotate))
load <- if (is.list(rot)) {
load <- rot$loadings
fit$rotmat <- if (inherits(rot, "GPArotation"))
t(solve_G(rot$Th))
else rot$rotmat
rot$loadings
}
else rot
}
fit$loadings <- sortLoadings(load)
class(fit$loadings) <- "loadings"
fit$na.action <- na.act
if (have.x && scores != "none") {
Lambda <- fit$loadings
zz <- scale(z, TRUE, TRUE)
switch(scores, regression = {
sc <- zz %*% solve(cv, Lambda)
if (!is.null(Phi <- attr(Lambda, "covariance"))) sc <- sc %*%
Phi
}, Bartlett = {
d <- 1/fit$uniquenesses
tmp <- t(Lambda * d)
sc <- t(solve(tmp %*% Lambda, tmp %*% t(zz)))
})
rownames(sc) <- rownames(z)
colnames(sc) <- colnames(Lambda)
if (!is.null(na.act))
sc <- napredict(na.act, sc)
fit$scores <- sc
}
if (!is.na(n.obs) && dof > 0) {
fit$STATISTIC <- (n.obs - 1 - (2 * p + 5)/6 - (2 * factors)/3) *
fit$criteria["objective"]
fit$PVAL <- pchisq(fit$STATISTIC, dof, lower.tail = FALSE)
}
fit$n.obs <- n.obs
fit$call <- cl
fit
}

Plotting newton-raphson/fisher scoring iterations in R

Is there a package in R plotting newton-raphson/fisher scoring iterations when fitting a glm modelel (from the stats package)?
I answered a very similar question yesterday. In your case however, things are a little simpler.
Note that when you call glm, it eventually calls glm.fit (or any other method argument you specify to glm) which computes the solution path in the loop from lines 78 to 170. The current iteration's value of the coefficients is computed on line 97 using a .Call to a C function C_Cdqrls. As a hack, you can extract the current value of the coefficients to the global environment (fit$coefficients), within this loop, by modifying the glm.fit function like so:
glm.fit.new = function (x, y, weights = rep(1, nobs), start = NULL, etastart = NULL,
mustart = NULL, offset = rep(0, nobs), family = gaussian(),
control = list(), intercept = TRUE) {
control <- do.call("glm.control", control)
x <- as.matrix(x)
xnames <- dimnames(x)[[2L]]
ynames <- if (is.matrix(y))
rownames(y)
else names(y)
conv <- FALSE
nobs <- NROW(y)
nvars <- ncol(x)
EMPTY <- nvars == 0
if (is.null(weights))
weights <- rep.int(1, nobs)
if (is.null(offset))
offset <- rep.int(0, nobs)
variance <- family$variance
linkinv <- family$linkinv
if (!is.function(variance) || !is.function(linkinv))
stop("'family' argument seems not to be a valid family object",
call. = FALSE)
dev.resids <- family$dev.resids
aic <- family$aic
mu.eta <- family$mu.eta
unless.null <- function(x, if.null) if (is.null(x))
if.null
else x
valideta <- unless.null(family$valideta, function(eta) TRUE)
validmu <- unless.null(family$validmu, function(mu) TRUE)
if (is.null(mustart)) {
eval(family$initialize)
}
else {
mukeep <- mustart
eval(family$initialize)
mustart <- mukeep
}
if (EMPTY) {
eta <- rep.int(0, nobs) + offset
if (!valideta(eta))
stop("invalid linear predictor values in empty model",
call. = FALSE)
mu <- linkinv(eta)
if (!validmu(mu))
stop("invalid fitted means in empty model", call. = FALSE)
dev <- sum(dev.resids(y, mu, weights))
w <- ((weights * mu.eta(eta)^2)/variance(mu))^0.5
residuals <- (y - mu)/mu.eta(eta)
good <- rep_len(TRUE, length(residuals))
boundary <- conv <- TRUE
coef <- numeric()
iter <- 0L
}
else {
coefold <- NULL
eta <- if (!is.null(etastart))
etastart
else if (!is.null(start))
if (length(start) != nvars)
stop(gettextf("length of 'start' should equal %d and correspond to initial coefs for %s",
nvars, paste(deparse(xnames), collapse = ", ")),
domain = NA)
else {
coefold <- start
offset + as.vector(if (NCOL(x) == 1L)
x * start
else x %*% start)
}
else family$linkfun(mustart)
mu <- linkinv(eta)
if (!(validmu(mu) && valideta(eta)))
stop("cannot find valid starting values: please specify some",
call. = FALSE)
devold <- sum(dev.resids(y, mu, weights))
boundary <- conv <- FALSE
# EDIT: counter to create track of iterations
i <<- 1
for (iter in 1L:control$maxit) {
good <- weights > 0
varmu <- variance(mu)[good]
if (anyNA(varmu))
stop("NAs in V(mu)")
if (any(varmu == 0))
stop("0s in V(mu)")
mu.eta.val <- mu.eta(eta)
if (any(is.na(mu.eta.val[good])))
stop("NAs in d(mu)/d(eta)")
good <- (weights > 0) & (mu.eta.val != 0)
if (all(!good)) {
conv <- FALSE
warning(gettextf("no observations informative at iteration %d",
iter), domain = NA)
break
}
z <- (eta - offset)[good] + (y - mu)[good]/mu.eta.val[good]
w <- sqrt((weights[good] * mu.eta.val[good]^2)/variance(mu)[good])
fit <- .Call(stats:::C_Cdqrls, x[good, , drop = FALSE] *
w, z * w, min(1e-07, control$epsilon/1000), check = FALSE)
#======================================================
# EDIT: assign the coefficients to variables in the global namespace
#======================================================
assign(paste0("iteration_x_", i), fit$coefficients,
envir = .GlobalEnv)
i <<- i + 1 # increase the counter
if (any(!is.finite(fit$coefficients))) {
conv <- FALSE
warning(gettextf("non-finite coefficients at iteration %d",
iter), domain = NA)
break
}
if (nobs < fit$rank)
stop(sprintf(ngettext(nobs, "X matrix has rank %d, but only %d observation",
"X matrix has rank %d, but only %d observations"),
fit$rank, nobs), domain = NA)
start[fit$pivot] <- fit$coefficients
eta <- drop(x %*% start)
mu <- linkinv(eta <- eta + offset)
dev <- sum(dev.resids(y, mu, weights))
if (control$trace)
cat("Deviance = ", dev, " Iterations - ", iter,
"\n", sep = "")
boundary <- FALSE
if (!is.finite(dev)) {
if (is.null(coefold))
stop("no valid set of coefficients has been found: please supply starting values",
call. = FALSE)
warning("step size truncated due to divergence",
call. = FALSE)
ii <- 1
while (!is.finite(dev)) {
if (ii > control$maxit)
stop("inner loop 1; cannot correct step size",
call. = FALSE)
ii <- ii + 1
start <- (start + coefold)/2
eta <- drop(x %*% start)
mu <- linkinv(eta <- eta + offset)
dev <- sum(dev.resids(y, mu, weights))
}
boundary <- TRUE
if (control$trace)
cat("Step halved: new deviance = ", dev, "\n",
sep = "")
}
if (!(valideta(eta) && validmu(mu))) {
if (is.null(coefold))
stop("no valid set of coefficients has been found: please supply starting values",
call. = FALSE)
warning("step size truncated: out of bounds",
call. = FALSE)
ii <- 1
while (!(valideta(eta) && validmu(mu))) {
if (ii > control$maxit)
stop("inner loop 2; cannot correct step size",
call. = FALSE)
ii <- ii + 1
start <- (start + coefold)/2
eta <- drop(x %*% start)
mu <- linkinv(eta <- eta + offset)
}
boundary <- TRUE
dev <- sum(dev.resids(y, mu, weights))
if (control$trace)
cat("Step halved: new deviance = ", dev, "\n",
sep = "")
}
if (abs(dev - devold)/(0.1 + abs(dev)) < control$epsilon) {
conv <- TRUE
coef <- start
break
}
else {
devold <- dev
coef <- coefold <- start
}
}
if (!conv)
warning("glm.fit: algorithm did not converge", call. = FALSE)
if (boundary)
warning("glm.fit: algorithm stopped at boundary value",
call. = FALSE)
eps <- 10 * .Machine$double.eps
if (family$family == "binomial") {
if (any(mu > 1 - eps) || any(mu < eps))
warning("glm.fit: fitted probabilities numerically 0 or 1 occurred",
call. = FALSE)
}
if (family$family == "poisson") {
if (any(mu < eps))
warning("glm.fit: fitted rates numerically 0 occurred",
call. = FALSE)
}
if (fit$rank < nvars)
coef[fit$pivot][seq.int(fit$rank + 1, nvars)] <- NA
xxnames <- xnames[fit$pivot]
residuals <- (y - mu)/mu.eta(eta)
fit$qr <- as.matrix(fit$qr)
nr <- min(sum(good), nvars)
if (nr < nvars) {
Rmat <- diag(nvars)
Rmat[1L:nr, 1L:nvars] <- fit$qr[1L:nr, 1L:nvars]
}
else Rmat <- fit$qr[1L:nvars, 1L:nvars]
Rmat <- as.matrix(Rmat)
Rmat[row(Rmat) > col(Rmat)] <- 0
names(coef) <- xnames
colnames(fit$qr) <- xxnames
dimnames(Rmat) <- list(xxnames, xxnames)
}
names(residuals) <- ynames
names(mu) <- ynames
names(eta) <- ynames
wt <- rep.int(0, nobs)
wt[good] <- w^2
names(wt) <- ynames
names(weights) <- ynames
names(y) <- ynames
if (!EMPTY)
names(fit$effects) <- c(xxnames[seq_len(fit$rank)], rep.int("",
sum(good) - fit$rank))
wtdmu <- if (intercept)
sum(weights * y)/sum(weights)
else linkinv(offset)
nulldev <- sum(dev.resids(y, wtdmu, weights))
n.ok <- nobs - sum(weights == 0)
nulldf <- n.ok - as.integer(intercept)
rank <- if (EMPTY)
0
else fit$rank
resdf <- n.ok - rank
aic.model <- aic(y, n, mu, weights, dev) + 2 * rank
list(coefficients = coef, residuals = residuals, fitted.values = mu,
effects = if (!EMPTY) fit$effects, R = if (!EMPTY) Rmat,
rank = rank, qr = if (!EMPTY) structure(fit[c("qr", "rank",
"qraux", "pivot", "tol")], class = "qr"), family = family,
linear.predictors = eta, deviance = dev, aic = aic.model,
null.deviance = nulldev, iter = iter, weights = wt, prior.weights = weights,
df.residual = resdf, df.null = nulldf, y = y, converged = conv,
boundary = boundary)
}
Note that this is a hack for a couple of reasons:
1. The function C_Cdrqls is not exported by the package stats, and so we have to look for it within namespace:package:stats.
2. This pollutes your global environment with the iteration values via a side-effect of the call to glm.fit.new, creating one vector per iteration. Side-effects are generally frowned upon in functional languages like R. You can probably clean the multiple objects bit up by creating a matrix or a data.frame and assign within that.
However, once you have the iteration values extracted, you can do whatever you want with them, including plotting them.
Here is what a call to glm with the newly defined glm.fit.new method would look like:
counts = c(18,17,15,20,10,20,25,13,12)
outcome = gl(3,1,9)
treatment = gl(3,3)
print(d.AD = data.frame(treatment, outcome, counts))
glm.D93 = glm(counts ~ outcome + treatment, family = poisson(),
control = list(trace = TRUE, epsilon = 1e-16), method = "glm.fit.new")
You can check that the iteration parameter values have indeed been populated in the global environment:
> ls(pattern = "iteration_x_")
[1] "iteration_x_1" "iteration_x_10" "iteration_x_11" "iteration_x_2"
[5] "iteration_x_3" "iteration_x_4" "iteration_x_5" "iteration_x_6"
[9] "iteration_x_7" "iteration_x_8" "iteration_x_9"

How to deal with perfect fit linear model

The data I'm dealing with occasionally has a "perfectly fitting" linear model. For each regression I run, I need to extract the r.squared value which I've been doing with summary(mymodel)$r.squared but this fails in the case of a perfectly fitting model (see below).
df <- data.frame(x = c(1,2,3,4,5), y = c(1,1,1,1,1))
mymodel <- lm(y ~ x, data = df)
summary(mymodel)$r.squared #This raises a warning
0.5294
How can I handle these cases? Basically, I think I want to do something like
If(mymodel is a perfect fit)
rsquared = 1
else
rsquared = summary(mymodel)$r.squared
You can use tryCatch
df <- data.frame(x = c(1,2,3,4,5), y = c(1,1,1,1,1))
mymodel <- lm(y ~ x, data = df)
summary(mymodel)$r.squared #This raises a warning
tryCatch(summary(mymodel)$r.squared, warning = function(w) return(1))
# [1] 1
And with an added conditional to catch specific warnings
df <- data.frame(x = c(1,2,3,4,5), y = c(1,1,1,1,1))
mymodel <- lm(y ~ x, data = df)
summary(mymodel)$r.squared #This raises a warning
f <- function(expr) {
tryCatch(expr,
warning = function(w) {
if (grepl('perfect fit', w))
return(1)
else return(w)
})
}
f(TRUE)
# [1] TRUE
f(sum(1:5))
# [1] 15
f(summary(mymodel)$r.squared)
# [1] 1
f(warning('this is not a fit warning'))
# <simpleWarning in doTryCatch(return(expr), name, parentenv, handler): this is not a fit warning>
If you want to make sure that everything will be working perfect then you can just slightly modify the source code (type summary.lm to see the original code):
df <- data.frame(x = c(1,2,3,4,5), y = c(1,1,1,1,1))
mymodel <- lm(y ~ x, data = df)
This is how i modified it. All is the same as the original summary function apart from the bit at the bottom of the function.
summary2 <- function (object, correlation = FALSE, symbolic.cor = FALSE,
...)
{
z <- object
p <- z$rank
rdf <- z$df.residual
if (p == 0) {
r <- z$residuals
n <- length(r)
w <- z$weights
if (is.null(w)) {
rss <- sum(r^2)
}
else {
rss <- sum(w * r^2)
r <- sqrt(w) * r
}
resvar <- rss/rdf
ans <- z[c("call", "terms", if (!is.null(z$weights)) "weights")]
class(ans) <- "summary.lm"
ans$aliased <- is.na(coef(object))
ans$residuals <- r
ans$df <- c(0L, n, length(ans$aliased))
ans$coefficients <- matrix(NA, 0L, 4L)
dimnames(ans$coefficients) <- list(NULL, c("Estimate",
"Std. Error", "t value", "Pr(>|t|)"))
ans$sigma <- sqrt(resvar)
ans$r.squared <- ans$adj.r.squared <- 0
return(ans)
}
if (is.null(z$terms))
stop("invalid 'lm' object: no 'terms' component")
if (!inherits(object, "lm"))
warning("calling summary.lm(<fake-lm-object>) ...")
Qr <- qr(object)
n <- NROW(Qr$qr)
if (is.na(z$df.residual) || n - p != z$df.residual)
warning("residual degrees of freedom in object suggest this is not an \"lm\" fit")
r <- z$residuals
f <- z$fitted.values
w <- z$weights
if (is.null(w)) {
mss <- if (attr(z$terms, "intercept"))
sum((f - mean(f))^2)
else sum(f^2)
rss <- sum(r^2)
}
else {
mss <- if (attr(z$terms, "intercept")) {
m <- sum(w * f/sum(w))
sum(w * (f - m)^2)
}
else sum(w * f^2)
rss <- sum(w * r^2)
r <- sqrt(w) * r
}
resvar <- rss/rdf
p1 <- 1L:p
R <- chol2inv(Qr$qr[p1, p1, drop = FALSE])
se <- sqrt(diag(R) * resvar)
est <- z$coefficients[Qr$pivot[p1]]
tval <- est/se
ans <- z[c("call", "terms", if (!is.null(z$weights)) "weights")]
ans$residuals <- r
ans$coefficients <- cbind(est, se, tval, 2 * pt(abs(tval),
rdf, lower.tail = FALSE))
dimnames(ans$coefficients) <- list(names(z$coefficients)[Qr$pivot[p1]],
c("Estimate", "Std. Error", "t value", "Pr(>|t|)"))
ans$aliased <- is.na(coef(object))
ans$sigma <- sqrt(resvar)
ans$df <- c(p, rdf, NCOL(Qr$qr))
if (p != attr(z$terms, "intercept")) {
df.int <- if (attr(z$terms, "intercept"))
1L
else 0L
ans$r.squared <- mss/(mss + rss)
ans$adj.r.squared <- 1 - (1 - ans$r.squared) * ((n -
df.int)/rdf)
ans$fstatistic <- c(value = (mss/(p - df.int))/resvar,
numdf = p - df.int, dendf = rdf)
}
else ans$r.squared <- ans$adj.r.squared <- 0
ans$cov.unscaled <- R
dimnames(ans$cov.unscaled) <- dimnames(ans$coefficients)[c(1,
1)]
#below is the only change to the code
#instead of ans$r.squared <- 1 the original code had a warning
if (is.finite(resvar) && resvar < (mean(f)^2 + var(f)) *
1e-30) {
ans$r.squared <- 1 #this is practically the only change in the source code. Originally it had the warning here
}
#moved the above lower in the order of the code so as not to affect the original code
#checked it and seems to be working properly
if (correlation) {
ans$correlation <- (R * resvar)/outer(se, se)
dimnames(ans$correlation) <- dimnames(ans$cov.unscaled)
ans$symbolic.cor <- symbolic.cor
}
if (!is.null(z$na.action))
ans$na.action <- z$na.action
class(ans) <- "summary.lm"
ans
}
Run the new formula and see that it works now without any warnings. No other if or else if conditions are required.
> summary2(mymodel)$r.squared
[1] 1
One option to catch a perfect fit is to determine the residuals: if it is a perfect fit, the sum of residuals will be zero.
x = 1:5
# generate 3 sets of y values, last set is random values
y = matrix(data = c(rep(1,5),1:5,rnorm(5)), nrow = 5)
tolerance = 0.0001
r.sq = array(NA,ncol(y))
# check fit for three sets
for (i in 1:ncol(y)){
fit = lm(y[,i]~x)
# determine sum of residuals
if (sum(abs(resid(fit))) < tolerance) {
# perfect fit case
r.sq[i] = 1 } else {
# non-perfect fit case
r.sq[i] = summary(fit)$r.squared
}
}
print(r.sq)
# [1] 1.0000000 1.0000000 0.7638879

Reproducing results from previous answer is not working due to using new version of lme4

I have tried to reproduce the results from the answers for this question “Estimating random effects and applying user defined correlation/covariance structure with R lme4 or nlme package “ https://stats.stackexchange.com/questions/18563/estimating-random-effects-and-applying-user-defined-correlation-covariance-struc
Aaron Rendahl's codes
library(pedigreemm)
relmatmm <- function (formula, data, family = NULL, REML = TRUE, relmat = list(),
control = list(), start = NULL, verbose = FALSE, subset,
weights, na.action, offset, contrasts = NULL, model = TRUE,
x = TRUE, ...)
{
mc <- match.call()
lmerc <- mc
lmerc[[1]] <- as.name("lmer")
lmerc$relmat <- NULL
if (!length(relmat))
return(eval.parent(lmerc))
stopifnot(is.list(relmat), length(names(relmat)) == length(relmat))
lmerc$doFit <- FALSE
lmf <- eval(lmerc, parent.frame())
relfac <- relmat
relnms <- names(relmat)
stopifnot(all(relnms %in% names(lmf$FL$fl)))
asgn <- attr(lmf$FL$fl, "assign")
for (i in seq_along(relmat)) {
tn <- which(match(relnms[i], names(lmf$FL$fl)) == asgn)
if (length(tn) > 1)
stop("a relationship matrix must be associated with only one random effects term")
Zt <- lmf$FL$trms[[tn]]$Zt
relmat[[i]] <- Matrix(relmat[[i]][rownames(Zt), rownames(Zt)],
sparse = TRUE)
relfac[[i]] <- chol(relmat[[i]])
lmf$FL$trms[[tn]]$Zt <- lmf$FL$trms[[tn]]$A <- relfac[[i]] %*% Zt
}
ans <- do.call(if (!is.null(lmf$glmFit))
lme4:::glmer_finalize
else lme4:::lmer_finalize, lmf)
ans <- new("pedigreemm", relfac = relfac, ans)
ans#call <- match.call()
ans
}
the original example
set.seed(1234)
mydata <- data.frame (gen = factor(rep(1:10, each = 10)),
repl = factor(rep(1:10, 10)),
yld = rnorm(10, 5, 0.5))
library(lme4)
covmat <- round(nearPD(matrix(runif(100, 0, 0.2), nrow = 10))$mat, 2)
diag(covmat) <- diag(covmat)/10+1
rownames(covmat) <- colnames(covmat) <- levels(mydata$gen)
m <- relmatmm(yld ~ (1|gen) + (1|repl), relmat=list(gen=covmat), data=mydata)
here is the error message
Error in lmf$FL : $ operator not defined for this S4 class
In addition: Warning message:
In checkArgs("lmer", doFit = FALSE) : extra argument(s) ‘doFit’ disregarded
I will appreciate any help ?
Thanks
This is a re-implementation of the previous code -- I have done some slight modifications, and I have not tested it in any way -- test yourself and/or use at your own risk.
First create a slightly more modularized function that constructs the deviance function and fits the model:
doFit <- function(lmod,lmm=TRUE) {
## see ?modular
if (lmm) {
devfun <- do.call(mkLmerDevfun, lmod)
opt <- optimizeLmer(devfun)
mkMerMod(environment(devfun), opt, lmod$reTrms, fr = lmod$fr)
} else {
devfun <- do.call(mkGlmerDevfun, lmod)
opt <- optimizeGlmer(devfun)
devfun <- updateGlmerDevfun(devfun, lmod$reTrms)
opt <- optimizeGlmer(devfun, stage=2)
mkMerMod(environment(devfun), opt, lmod$reTrms, fr = lmod$fr)
}
}
Now create a function to construct the object that doFit needs and modify it:
relmatmm <- function (formula, ..., lmm=TRUE, relmat = list()) {
ff <- if (lmm) lFormula(formula, ...) else glFormula(formula, ...)
stopifnot(is.list(relmat), length(names(relmat)) == length(relmat))
relnms <- names(relmat)
relfac <- relmat
flist <- ff$reTrms[["flist"]] ## list of factors
## random-effects design matrix components
Ztlist <- ff$reTrms[["Ztlist"]]
stopifnot(all(relnms %in% names(flist)))
asgn <- attr(flist, "assign")
for (i in seq_along(relmat)) {
tn <- which(match(relnms[i], names(flist)) == asgn)
if (length(tn) > 1)
stop("a relationship matrix must be",
" associated with only one random effects term")
zn <- rownames(Ztlist[[i]])
relmat[[i]] <- Matrix(relmat[[i]][zn,zn],sparse = TRUE)
relfac[[i]] <- chol(relmat[[i]])
Ztlist[[i]] <- relfac[[i]] %*% Ztlist[[i]]
}
ff$reTrms[["Ztlist"]] <- Ztlist
ff$reTrms[["Zt"]] <- do.call(rBind,Ztlist)
fit <- doFit(ff,lmm)
}
Example
set.seed(1234)
mydata <- data.frame (gen = factor(rep(1:10, each = 10)),
repl = factor(rep(1:10, 10)),
yld = rnorm(10, 5, 0.5))
library(lme4)
covmat <- round(nearPD(matrix(runif(100, 0, 0.2), nrow = 10))$mat, 2)
diag(covmat) <- diag(covmat)/10+1
rownames(covmat) <- colnames(covmat) <- levels(mydata$gen)
m <- relmatmm(yld ~ (1|gen) + (1|repl), relmat=list(gen=covmat),
data=mydata)
This runs -- I don't know if the output is correct. It also doesn't make the resulting object into a pedigreemm object ...

Resources