This works.
ok <- function(data)
{
lattice:::bwplot(size ~ mxPH, data=data)
}
> ok(algae)
## no error
I'd like to specify size and mxPH by its column numbers. However, the following didn't work.
1st attempt
aa <- function(data, n1, n2)
{
names <- names(data)
lattice:::bwplot(names[n1] ~ names[n2], data=data)
}
> aa(algae,2,4)
Warning message:
In function (x, y, box.ratio = 1, box.width = box.ratio/(1 + box.ratio), :
NAs introduced by coercion
2nd Attempt (small modification to the 1st one)
bb <- function(data, n1, n2)
{
names <- factor(names(data))
lattice:::bwplot(names[n1] ~ names[n2], data=data)
}
> bb(algae,2,4)
## no error
3rd Attempt (small modification to the 2nd one)
cc <- function(data, n1, n2)
{
names <- factor(names(data))
lattice:::bwplot(deparse(substitute(names[n1])) ~ deparse(substitute(names[n2])), data=data)
}
> cc(algae,2,4)
Warning message:
In function (x, y, box.ratio = 1, box.width = box.ratio/(1 + box.ratio), :
NAs introduced by coercion
Data
algae <- utils:::read.table('http://www.liaad.up.pt/~ltorgo/DataMiningWithR/DataSets/Analysis.txt',
header=F,
dec='.',
col.names=c('season','size','speed','mxPH','mnO2','Cl','NO3','NH4','oPO4','PO4','Chla','a1','a2','a3','a4','a5','a6','a7'),
na.strings=c('XXXXXXX'))
library(lattice)
bwplot.column <- function(data, n1, n2, xlab = NULL, ylab = NULL, ...)
{
if (is.null(ylab)) {
ylab <- names(data)[n1]
}
if (is.null(xlab)) {
xlab <- names(data)[n2]
}
bwplot(data[,n1] ~ data[,n2], xlab = xlab, ylab = ylab, data = data, ...)
}
You can refer to the specific columns as Michael demonstrates, or you can also paste the formula together manually:
fun <- function(dat,n1,n2){
nms <- colnames(dat)
f <- as.formula(paste(nms[n1],"~",nms[n2]))
bwplot(f,data = dat)
}
fun(algae,2,4)
Related
I am currently trying to solve a bug but believe the data I am working with may be too complex and cause errors that shouldn't normally occur. I've written a function, and was hoping to add a try or tryCatch statement to skip the error if it occurs. I currently have:
library(glmnet)
foo <- function(data, ols_ps = TRUE, index) {
# index is the bootstrap sample index
x <- data[index, -1]
y <- data[index, 1]
ridge <- cv.glmnet(x, y, alpha = 0)
## The intercept estimate should be dropped.
weights <- as.numeric(coef(ridge, s = ridge$lambda.min))[-1]
# alpha=1, lasso
alasso <- cv.glmnet(x, y, alpha = 1,
penalty.factor = 1 / abs(weights))
# Select nonzero coefficients
coef <- as.vector(coef(alasso, s = alasso$lambda.min,
exact = TRUE, x = x, y = y,
penalty.factor = 1 / abs(weights)))[-1]
if (ols_ps == TRUE) {
coef_nonzero <- coef != 0
new_x <- tryCatch(x[, coef_nonzero, drop = FALSE],
error=function(e) NA)
if (!any(is.na(new_x)) & ncol(new_x) > 0) {
ls.obj <- lm(y ~ new_x)
ls_coef <- (ls.obj$coefficients)[-1]
coef[coef_nonzero] <- ls_coef
} else {
coef <- coef
}
} else {
coef <- coef
}
return(coef)
}
which normally works and works on most datasets. I think the error may be coming from a complex dataset. Is it possible to skip OLS if I get the below error?
"Error in x[, coef_nonzero, drop = FALSE] : \n (subscript) logical subscript too long\n"
attr(,"class")
Here is a minimal working example per request.
set.seed(123)
matrix <- matrix(runif(1000), ncol=10)
boot(matrix,foo,R=50)
Thanks in advance.
Maybe like this?
foo <- function(data, index) {
# index is the bootstrap sample index
x <- data[index, -1]
y <- data[index, 1]
ridge <- cv.glmnet(x, y, alpha = 0)
## The intercept estimate should be dropped.
weights <- as.numeric(coef(ridge, s = ridge$lambda.min))[-1]
# alpha=1, lasso
alasso <- cv.glmnet(x, y, alpha = 1,
penalty.factor = 1 / abs(weights))
# Select nonzero coefficients
coef <- as.vector(coef(alasso, s = alasso$lambda.min,
exact = TRUE, x = x, y = y,
penalty.factor = 1 / abs(weights)))[-1]
coef_nonzero <- coef != 0
new_x <- tryCatch(x[, coef_nonzero, drop = FALSE],
error=function(e) NA)
if (!any(is.na(new_x))) {
ls.obj <- lm(y ~ new_x)
ls_coef <- (ls.obj$coefficients)[-1]
coef[coef_nonzero] <- ls_coef
}
return(coef)
}
The problem is that we have no case when it fails so far.
I used to use a tobit regression with the following code:
tobit56 <- tobit (months56 ~ g1v3 + gender + un30min, left=0, right=60, data=gym)
summary(tobit56)
That code always worked well for me and got me a summary of the tobit.
Since yesterday whenever I run this, I get an error after I do summary:
tobit56 <- tobit (months56 ~ g1v3 + gender + un30min, left=0, right=60, data=gym)
summary(tobit56)
Error: $ operator is invalid for atomic vectors
Somebody has any idea what can cause this error message?
It seems to be a bug in the summary.tobit function of AER package.
Solution 1:
class(tobit_model$y) <- "Surv"
summary(tobit_model)
Solution 2:
Commenting out the line if(!inherits(y, "Surv")) y <- y$y and the function works fine.
summary.tobit(tobit_model)
summary.tobit <- function(object, correlation = FALSE, symbolic.cor = FALSE, vcov. = NULL, ...)
{
## failure
if(!is.null(object$fail)) {
warning("tobit/survreg failed.", object$fail, " No summary provided\n")
return(invisible(object))
}
## rank
if(all(is.na(object$coefficients))) {
warning("This model has zero rank --- no summary is provided")
return(invisible(object))
}
## vcov
if(is.null(vcov.)) vcov. <- vcov(object)
else {
if(is.function(vcov.)) vcov. <- vcov.(object)
}
## coefmat
coef <- coeftest(object, vcov. = vcov., ...)
attr(coef, "method") <- NULL
## Wald test
nc <- length(coef(object))
has_intercept <- attr(terms(object), "intercept") > 0.5
wald <- if(nc <= has_intercept) NULL else linearHypothesis(object,
if(has_intercept) cbind(0, diag(nc-1)) else diag(nc),
vcov. = vcov.)[2,3]
## instead of: waldtest(object, vcov = vcov.)
## correlation
correlation <- if(correlation) cov2cor(vcov.) else NULL
## distribution
dist <- object$dist
if(is.character(dist)) sd <- survreg.distributions[[dist]]
else sd <- dist
if(length(object$parms)) pprint <- paste(sd$name, "distribution: parmameters =", object$parms)
else pprint <- paste(sd$name, "distribution")
## number of observations
## (incorporating "bug fix" change for $y in survival 2.42-7)
surv_table <- function(y) {
# if(!inherits(y, "Surv")) y <- y$y
type <- attr(y, "type")
if(is.null(type) || (type == "left" && any(y[, 2L] > 1))) type <- "old"
y <- switch(type,
"left" = 2 - y[, 2L],
"interval" = y[, 3L],
y[, 2L]
)
table(factor(y, levels = c(2, 1, 0, 3),
labels = c("Left-censored", "Uncensored", "Right-censored", "Interval-censored")))
}
nobs <- surv_table(object$y)
nobs <- c("Total" = sum(nobs), nobs[1:3])
rval <- object[match(c("call", "df", "loglik", "iter", "na.action", "idf", "scale"),
names(object), nomatch = 0)]
rval <- c(rval, list(coefficients = coef, correlation = correlation,
symbolic.cor = symbolic.cor, parms = pprint, n = nobs, wald = wald))
class(rval) <- "summary.tobit"
return(rval)
}
I wonder how you can simplify these two :
plot (payroll,wins)
id = identify(payroll, wins,labels = code, n = 5)
plot (payroll,wins)
with(data, text(payroll, wins, labels = code, pos = 1, cex=0.5))
using other alternatives - pch() dan as.numeric()?
Not sure it's easier but you change pch during identification as below (taken from the R-help). Every time you click empty point change to filled-in dot.
# data simulation
data <- data.frame(payroll = rnorm(10), wins = rnorm(10), code = letters[1:10])
identifyPch <- function(x, y = NULL, n = length(x), plot = FALSE, pch = 19, ...)
{
xy <- xy.coords(x, y)
x <- xy$x
y <- xy$y
sel <- rep(FALSE, length(x))
while (sum(sel) < n) {
ans <- identify(x[!sel], y[!sel], labels = which(!sel), n = 1, plot = plot, ...)
if(!length(ans)) {
break
}
ans <- which(!sel)[ans]
points(x[ans], y[ans], pch = pch)
sel[ans] <- TRUE
}
## return indices of selected points
which(sel)
}
if(dev.interactive()) { ## use it
with(data, plot(payroll,wins))
id = with(data, identifyPch(payroll, wins))
}
Might be a silly question: How can I increase the size of letters at the top of the plot.cld()? For instance, how can I increase the size of display letters 'b','a','a' in the following plot? Hopfully I am not the only guy facing this annoyance. Thanks in advance!
library(multcomp)
data(warpbreaks)
amod <- aov(breaks ~ tension, data = warpbreaks)
tuk <- glht(amod, linfct = mcp(tension = "Tukey"))
tuk.cld <- cld(tuk)
old.par <- par(mai=c(1,1,1.25,1), no.readonly=TRUE)
plot(tuk.cld)
par(old.par)
From getAnywhere("plot.cld") I do not see how you could specifically alter the font size of the top labels. So... one way to solve this is adapting the existing function according to your needs:
library(multcomp)
data(warpbreaks)
amod <- aov(breaks ~ tension, data = warpbreaks)
tuk <- glht(amod, linfct = mcp(tension = "Tukey"))
tuk.cld <- cld(tuk)
old.par <- par(mai=c(1, 2,1.25, 1), no.readonly=TRUE)
plot2.cld(tuk.cld, cex.top = 2)
par(old.par)
with
plot2.cld <- function (x, type = c("response", "lp"), ..., cex.top = 1)
{
mcletters <- x$mcletters
msletters <- mcletters$monospacedLetters
vletters <- sapply(msletters, function(x) paste(strsplit(x,
"")[[1]], "\n", collapse = ""))
vletters <- vletters[gsub(" ", "", levels(x$x))]
msletters <- msletters[gsub(" ", "", levels(x$x))]
type <- match.arg(type)
dat <- x[c("x", "y", "lp")]
if (is.null(x$weights)) {
dat$weights <- rep(1, NROW(x$y))
}
else {
dat$weights <- x$weights
}
dat <- as.data.frame(dat)
xn <- x$xname
yn <- x$yname
if (!is.null(list(...)$xlab))
xn <- list(...)$xlab
if (!is.null(list(...)$ylab))
yn <- list(...)$ylab
if (x$covar || type == "lp") {
boxplot(lp ~ x, data = dat, xlab = xn, ylab = "linear predictor",
...)
axis(3, at = 1:nlevels(dat$x), labels = vletters)
}
else {
if (is.integer(dat$y))
dat$y <- as.numeric(dat$y)
switch(class(dat$y), numeric = {
boxplot(y ~ x, data = dat, xlab = xn, ylab = yn,
...)
axis(3, at = 1:nlevels(dat$x), labels = vletters, cex.axis = cex.top)
}, factor = {
at <- xtabs(weights ~ x, data = dat)/sum(dat$weights)
at <- cumsum(at) - at/2
mosaicplot(xtabs(weights ~ x + y, data = dat), main = NULL,
xlab = xn, ylab = yn, ...)
axis(3, at = at, labels = vletters, tick = FALSE, cex.axis = cex.top)
}, Surv = {
plot(survfit(y ~ x, data = dat), lty = 1:nlevels(dat$x),
...)
nc <- nchar(levels(dat$x))
spaces <- unlist(lapply(max(nc) - nc, function(x) return(paste(rep(" ",
x), collapse = ""))))
legend("topright", lty = 1:nlevels(dat$x), legend = paste(levels(dat$x),
spaces, ": ", msletters, sep = ""), ...)
})
}
}
giving
I have tried to reproduce the results from the answers for this question “Estimating random effects and applying user defined correlation/covariance structure with R lme4 or nlme package “ https://stats.stackexchange.com/questions/18563/estimating-random-effects-and-applying-user-defined-correlation-covariance-struc
Aaron Rendahl's codes
library(pedigreemm)
relmatmm <- function (formula, data, family = NULL, REML = TRUE, relmat = list(),
control = list(), start = NULL, verbose = FALSE, subset,
weights, na.action, offset, contrasts = NULL, model = TRUE,
x = TRUE, ...)
{
mc <- match.call()
lmerc <- mc
lmerc[[1]] <- as.name("lmer")
lmerc$relmat <- NULL
if (!length(relmat))
return(eval.parent(lmerc))
stopifnot(is.list(relmat), length(names(relmat)) == length(relmat))
lmerc$doFit <- FALSE
lmf <- eval(lmerc, parent.frame())
relfac <- relmat
relnms <- names(relmat)
stopifnot(all(relnms %in% names(lmf$FL$fl)))
asgn <- attr(lmf$FL$fl, "assign")
for (i in seq_along(relmat)) {
tn <- which(match(relnms[i], names(lmf$FL$fl)) == asgn)
if (length(tn) > 1)
stop("a relationship matrix must be associated with only one random effects term")
Zt <- lmf$FL$trms[[tn]]$Zt
relmat[[i]] <- Matrix(relmat[[i]][rownames(Zt), rownames(Zt)],
sparse = TRUE)
relfac[[i]] <- chol(relmat[[i]])
lmf$FL$trms[[tn]]$Zt <- lmf$FL$trms[[tn]]$A <- relfac[[i]] %*% Zt
}
ans <- do.call(if (!is.null(lmf$glmFit))
lme4:::glmer_finalize
else lme4:::lmer_finalize, lmf)
ans <- new("pedigreemm", relfac = relfac, ans)
ans#call <- match.call()
ans
}
the original example
set.seed(1234)
mydata <- data.frame (gen = factor(rep(1:10, each = 10)),
repl = factor(rep(1:10, 10)),
yld = rnorm(10, 5, 0.5))
library(lme4)
covmat <- round(nearPD(matrix(runif(100, 0, 0.2), nrow = 10))$mat, 2)
diag(covmat) <- diag(covmat)/10+1
rownames(covmat) <- colnames(covmat) <- levels(mydata$gen)
m <- relmatmm(yld ~ (1|gen) + (1|repl), relmat=list(gen=covmat), data=mydata)
here is the error message
Error in lmf$FL : $ operator not defined for this S4 class
In addition: Warning message:
In checkArgs("lmer", doFit = FALSE) : extra argument(s) ‘doFit’ disregarded
I will appreciate any help ?
Thanks
This is a re-implementation of the previous code -- I have done some slight modifications, and I have not tested it in any way -- test yourself and/or use at your own risk.
First create a slightly more modularized function that constructs the deviance function and fits the model:
doFit <- function(lmod,lmm=TRUE) {
## see ?modular
if (lmm) {
devfun <- do.call(mkLmerDevfun, lmod)
opt <- optimizeLmer(devfun)
mkMerMod(environment(devfun), opt, lmod$reTrms, fr = lmod$fr)
} else {
devfun <- do.call(mkGlmerDevfun, lmod)
opt <- optimizeGlmer(devfun)
devfun <- updateGlmerDevfun(devfun, lmod$reTrms)
opt <- optimizeGlmer(devfun, stage=2)
mkMerMod(environment(devfun), opt, lmod$reTrms, fr = lmod$fr)
}
}
Now create a function to construct the object that doFit needs and modify it:
relmatmm <- function (formula, ..., lmm=TRUE, relmat = list()) {
ff <- if (lmm) lFormula(formula, ...) else glFormula(formula, ...)
stopifnot(is.list(relmat), length(names(relmat)) == length(relmat))
relnms <- names(relmat)
relfac <- relmat
flist <- ff$reTrms[["flist"]] ## list of factors
## random-effects design matrix components
Ztlist <- ff$reTrms[["Ztlist"]]
stopifnot(all(relnms %in% names(flist)))
asgn <- attr(flist, "assign")
for (i in seq_along(relmat)) {
tn <- which(match(relnms[i], names(flist)) == asgn)
if (length(tn) > 1)
stop("a relationship matrix must be",
" associated with only one random effects term")
zn <- rownames(Ztlist[[i]])
relmat[[i]] <- Matrix(relmat[[i]][zn,zn],sparse = TRUE)
relfac[[i]] <- chol(relmat[[i]])
Ztlist[[i]] <- relfac[[i]] %*% Ztlist[[i]]
}
ff$reTrms[["Ztlist"]] <- Ztlist
ff$reTrms[["Zt"]] <- do.call(rBind,Ztlist)
fit <- doFit(ff,lmm)
}
Example
set.seed(1234)
mydata <- data.frame (gen = factor(rep(1:10, each = 10)),
repl = factor(rep(1:10, 10)),
yld = rnorm(10, 5, 0.5))
library(lme4)
covmat <- round(nearPD(matrix(runif(100, 0, 0.2), nrow = 10))$mat, 2)
diag(covmat) <- diag(covmat)/10+1
rownames(covmat) <- colnames(covmat) <- levels(mydata$gen)
m <- relmatmm(yld ~ (1|gen) + (1|repl), relmat=list(gen=covmat),
data=mydata)
This runs -- I don't know if the output is correct. It also doesn't make the resulting object into a pedigreemm object ...