what does the "data.frame$.variable" mean in a function? - r

I have seen a function:
smotest <- list(name = "SMOTE with more neighbors!",
func = function (x, y) {
library(DMwR)
dat <- if (is.data.frame(x)) x else as.data.frame(x)
dat$.y <- y
dat <- SMOTE(.y ~ ., data = dat, k = 10)
list(x = dat[, !grepl(".y", colnames(dat), fixed = TRUE)],
y = dat$.y)
},
first = TRUE)
In this function, what does thedat$.ymean, eapecially the ".", and why it has a ".".

Related

Error in Psych::Mediate: Object Not Found

I'm running a mediation analysis on a dataset in r and can't figure out how to get psych::mediate to work--I've done the same on another dataset before and didn't change anything, but it's not working with this new data for some reason.
I tried:
1. Turning 'condition' into a condition.f factor
2. Explicitly naming DATA a "data.frame"
3. Specifying different parameters such as "z" or "mod" in the function
4. Checked capitalization on all the variable column names.
None of the above seem to work.
library(psych)
DATA = STEX_S1_FINALCLEAN
Mediation_RA = psych::mediate( y = "DV_See", x = "Share_T", m = "Seff", data = DATA)
print(Mediation_RA,short=F)
I'd expect a full output with mediation values, but have gotten:
Error in psych::mediate(y = "DV_See", x = "Share_T", m = "Seff", data = DATA) :
object 'ex' not found
I don't see and object 'ex' anywhere, and that's not a name of any columns in the DATA data frame.
Following the suggestion of #r2evans, you can use the following modified function:
mymediate <- function (y, x, m = NULL, data, mod = NULL, z = NULL, n.obs = NULL,
use = "pairwise", n.iter = 5000, alpha = 0.05, std = FALSE,
plot = TRUE, zero = TRUE, main = "Mediation")
{
cl <- match.call()
if (class(y) == "formula") {
ps <- fparse(y)
y <- ps$y
x <- ps$x
m <- ps$m
mod <- ps$prod
ex <- ps$ex
x <- x[!ps$x %in% ps$m]
z <- ps$z
print(str(ps))
} else {
ex = NULL
}
all.ab <- NULL
if (is.numeric(y))
y <- colnames(data)[y]
if (is.numeric(x))
x <- colnames(data)[x]
if (!is.null(m))
if (is.numeric(m))
m <- colnames(data)[m]
if (!is.null(mod)) {
if (is.numeric(mod)) {
nmod <- length(mod)
mod <- colnames(data)[mod]
}
}
if (is.null(mod)) {
nmod <- 0
}
else {
nmod <- length(mod)
}
var.names <- list(IV = x, DV = y, med = m, mod = mod, z = z,
ex = ex)
if (any(!(unlist(var.names) %in% colnames(data)))) {
stop("Variable names not specified correctly")
}
if (ncol(data) == nrow(data)) {
raw <- FALSE
if (nmod > 0) {
stop("Moderation Analysis requires the raw data")
}
else {
data <- data[c(y, x, m, z), c(y, x, m, z)]
}
}
else {
data <- data[, c(y, x, m, z, ex)]
}
if (nmod == 1) {
mod <- c(x, mod)
nmod <- length(mod)
}
if (!is.matrix(data))
data <- as.matrix(data)
if ((dim(data)[1] != dim(data)[2])) {
n.obs = dim(data)[1]
if (!is.null(mod))
if (zero)
data <- scale(data, scale = FALSE)
C <- cov(data, use = use)
raw <- TRUE
if (std) {
C <- cov2cor(C)
}
}
else {
raw <- FALSE
C <- data
nvar <- ncol(C)
if (is.null(n.obs)) {
n.obs <- 1000
message("The data matrix was a correlation matrix and the number of subjects was not specified. \n n.obs arbitrarily set to 1000")
}
if (!is.null(m)) {
message("The replication data matrices were simulated based upon the specified number of subjects and the observed correlation matrix.")
eX <- eigen(C)
data <- matrix(rnorm(nvar * n.obs), n.obs)
data <- t(eX$vectors %*% diag(sqrt(pmax(eX$values,
0)), nvar) %*% t(data))
colnames(data) <- c(y, x, m)
}
}
if ((nmod > 0) | (!is.null(ex))) {
if (!raw) {
stop("Moderation analysis requires the raw data")
}
else {
if (zero) {
data <- scale(data, scale = FALSE)
}
}
}
if (nmod > 0) {
prods <- matrix(NA, ncol = length(ps$prod), nrow = nrow(data))
colnames(prods) <- paste0("V", 1:length(ps$prod))
for (i in 1:length(ps$prod)) {
prods[, i] <- apply(data[, ps$prod[[i]]], 1, prod)
colnames(prods)[i] <- paste0(ps$prod[[i]], collapse = "*")
}
data <- cbind(data, prods)
x <- c(x, colnames(prods))
}
if (!is.null(ex)) {
quads <- matrix(NA, ncol = length(ex), nrow = nrow(data))
colnames(quads) <- ex
for (i in 1:length(ex)) {
quads[, i] <- data[, ex[i]] * data[, ex[i]]
colnames(quads)[i] <- paste0(ex[i], "^2")
}
data <- cbind(data, quads)
x <- c(x, colnames(quads))
}
if (raw) {
C <- cov(data, use = use)
}
if (std) {
C <- cov2cor(C)
}
xy <- c(x, y)
numx <- length(x)
numy <- length(y)
if (!is.null(m)) {
numm <- length(m)
nxy <- numx + numy
m.matrix <- C[c(x, m), c(x, m), drop = FALSE]
}
else {
numm <- 0
nxy <- numx
}
df <- n.obs - nxy - 1
xy.matrix <- C[c(x, m), y, drop = FALSE]
total.reg <- matReg(x, y, m = m, z = z, C = C, n.obs = n.obs)
direct <- total.reg$beta
if (!is.null(z)) {
colnames(direct) <- paste0(colnames(direct), "*")
rownames(direct) <- paste0(rownames(direct), "*")
}
if (numm > 0) {
a.reg <- matReg(x = x, y = m, C = C, z = z, n.obs = n.obs)
b.reg <- matReg(c(x, m), y, C = C, z = z, n.obs = n.obs)
cprime.reg <- matReg(c(x, m), y, C = C, n.obs = n.obs,
z = z)
a <- a.reg$beta
b <- b.reg$beta[-(1:numx), , drop = FALSE]
c <- total.reg$beta
cprime <- cprime.reg$beta
all.ab <- matrix(NA, ncol = numm, nrow = numx)
for (i in 1:numx) {
all.ab[i, ] <- a[i, ] * t(b[, 1])
}
colnames(all.ab) <- m
rownames(all.ab) <- x
ab <- a %*% b
indirect <- c - ab
if (is.null(n.obs)) {
message("Bootstrap is not meaningful unless raw data are provided or the number of subjects is specified.")
mean.boot <- sd.boot <- ci.quant <- boot <- se <- tvalue <- prob <- NA
}
else {
boot <- psych:::boot.mediate(data, x, y, m, z, n.iter = n.iter,
std = std, use = use)
mean.boot <- colMeans(boot)
sd.boot <- apply(boot, 2, sd)
ci.quant <- apply(boot, 2, function(x) quantile(x,
c(alpha/2, 1 - alpha/2), na.rm = TRUE))
mean.boot <- matrix(mean.boot, nrow = numx)
sd.boot <- matrix(sd.boot, nrow = numx)
ci.ab <- matrix(ci.quant, nrow = 2 * numx * numy)
boots <- list(mean = mean.boot, sd = sd.boot, ci = ci.quant,
ci.ab = ci.ab)
}
}
else {
a.reg <- b.reg <- reg <- NA
a <- b <- c <- ab <- cprime <- boot <- boots <- indirect <- cprime.reg <- NA
}
if (!is.null(z)) {
var.names$IV <- paste0(var.names$IV, "*")
var.names$DV <- paste0(var.names$DV, "*")
var.names$med <- paste0(var.names$med, "*")
colnames(C) <- rownames(C) <- paste0(colnames(C), "*")
}
result <- list(var.names = var.names, a = a, b = b, ab = ab,
all.ab = all.ab, c = c, direct = direct, indirect = indirect,
cprime = cprime, total.reg = total.reg, a.reg = a.reg,
b.reg = b.reg, cprime.reg = cprime.reg, boot = boots,
boot.values = boot, sdnames = colnames(data), data = data,
C = C, Call = cl)
class(result) <- c("psych", "mediate")
if (plot) {
if (is.null(m)) {
moderate.diagram(result)
}
else {
mediate.diagram(result, main = main)
}
}
return(result)
}
You can test the mymediate function using the following example:
library(psych)
mod.k2 <- mymediate(y="OccupAsp", x=c("Intelligence","Siblings","FatherEd","FatherOcc"),
m= c(5:6), data=R.kerch, n.obs=767, n.iter=50)
print(mod.k2)

R error - Check for Remote errors returning multiple node issues

I am currently trying to run goodness of fit tests for data in the unmarked package. To do this I am using code written in the associated google group. This relies on parboot to assess the goodness of fit of the model. It then produces a Chi squared P value and C-hat value.
Strangely when I only perform >90 simulations of the model do I get the following error:
Error in checkForRemoteErrors(val) : 3 nodes produced errors;
first error: could not find function "mb.chisq.RN"
Below this number of simulations, the error is not encountered and the statistic can be computed.
I first run; mb.chisq.RN
mb.chisq.RN <- function (mod, print.table = TRUE, maxK=50,
...){
y.raw <- mod#data#y
N.raw <- nrow(y.raw)
na.raw <- apply(X = y.raw, MARGIN = 1, FUN = function(i) all(is.na(i)))
y.data <- y.raw[!na.raw, ]
N <- N.raw - sum(na.raw)
T <- ncol(y.data)
K <- 0:maxK
det.hist <- apply(X = y.data, MARGIN = 1, FUN = function(i) paste(i,
collapse = ""))
preds.lam <- predict(mod, type = "state")$Predicted
preds.p <- matrix(data = predict(mod, type = "det")$Predicted,
ncol = T, byrow = TRUE)
out.hist <- data.frame(det.hist, preds.lam)
un.hist <- unique(det.hist)
n.un.hist <- length(un.hist)
na.vals <- length(grep(pattern = "NA", x = un.hist)) > 0
if (na.vals) {
id.na <- grep(pattern = "NA", x = un.hist)
id.det.hist.na <- grep(pattern = "NA", x = det.hist)
cohort.na <- sort(un.hist[id.na])
n.cohort.na <- length(cohort.na)
unique.na <- gsub(pattern = "NA", replacement = "N",
x = cohort.na)
na.visits <- sapply(strsplit(x = unique.na, split = ""),
FUN = function(i) paste(ifelse(i == "N", 1, 0), collapse = ""))
names(cohort.na) <- na.visits
n.hist.missing.cohorts <- table(na.visits)
n.missing.cohorts <- length(n.hist.missing.cohorts)
out.hist.na <- out.hist[id.det.hist.na, ]
out.hist.na$det.hist <- droplevels(out.hist.na$det.hist)
just.na <- sapply(X = out.hist.na$det.hist, FUN = function(i) gsub(pattern = "1",
replacement = "0", x = i))
out.hist.na$coh <- sapply(X = just.na, FUN = function(i) gsub(pattern = "NA",
replacement = "1", x = i))
freqs.missing.cohorts <- table(out.hist.na$coh)
na.freqs <- table(det.hist[id.det.hist.na])
preds.p.na <- preds.p[id.det.hist.na, ]
cohort.not.na <- sort(un.hist[-id.na])
out.hist.not.na <- out.hist[-id.det.hist.na, ]
out.hist.not.na$det.hist <- droplevels(out.hist.not.na$det.hist)
n.cohort.not.na <- length(cohort.not.na)
n.sites.not.na <- length(det.hist) - length(id.det.hist.na)
preds.p.not.na <- preds.p[-id.det.hist.na, ]
}
else {
cohort.not.na <- sort(un.hist)
out.hist.not.na <- out.hist
preds.p.not.na <- preds.p
n.cohort.not.na <- length(cohort.not.na)
n.sites.not.na <- length(det.hist)
}
if (n.cohort.not.na > 0) {
exp.freqs <- rep(NA, n.cohort.not.na)
names(exp.freqs) <- cohort.not.na
for (i in 1:n.cohort.not.na) {
eq.solved <- rep(NA, n.sites.not.na)
select.hist <- cohort.not.na[i]
strip.hist <- unlist(strsplit(select.hist, split = ""))
hist.mat <- new.hist.mat <- new.hist.mat1 <- new.hist.mat0 <- matrix(NA, nrow = n.sites.not.na, ncol = T)
for (j in 1:n.sites.not.na) {
if (n.sites.not.na == 1) {
hist.mat[j,] <- preds.p.not.na
} else {hist.mat[j,] <- preds.p.not.na[j,]}
#Pr(y.ij=1|K)
p.k.mat <- sapply(hist.mat[j,],function(r){1-(1-r)^K})
new.hist.mat1[j,] <- dpois(K,out.hist.not.na[j, "preds.lam"]) %*% p.k.mat
new.hist.mat0[j,] <- dpois(K,out.hist.not.na[j, "preds.lam"]) %*% (1-p.k.mat)
new.hist.mat[j,] <- ifelse(strip.hist == "1",
new.hist.mat1[j,], ifelse(strip.hist == "0",
new.hist.mat0[j,], 0))
combo.lam.p <- paste(new.hist.mat[j, ], collapse = "*")
eq.solved[j] <- eval(parse(text = as.expression(combo.lam.p)))
}
exp.freqs[i] <- sum(eq.solved, na.rm = TRUE)
}
freqs <- table(out.hist.not.na$det.hist)
out.freqs <- matrix(NA, nrow = n.cohort.not.na, ncol = 4)
colnames(out.freqs) <- c("Cohort", "Observed", "Expected",
"Chi-square")
rownames(out.freqs) <- names(freqs)
out.freqs[, 1] <- 0
out.freqs[, 2] <- freqs
out.freqs[, 3] <- exp.freqs
out.freqs[, 4] <- ((out.freqs[, "Observed"] - out.freqs[,
"Expected"])^2)/out.freqs[, "Expected"]
}
if (na.vals) {
missing.cohorts <- list()
if (!is.matrix(preds.p.na)) {
preds.p.na <- matrix(data = preds.p.na, nrow = 1)
}
for (m in 1:n.missing.cohorts) {
select.cohort <- out.hist.na[which(out.hist.na$coh ==
names(freqs.missing.cohorts)[m]), ]
select.preds.p.na <- preds.p.na[which(out.hist.na$coh ==
names(freqs.missing.cohorts)[m]), ]
if (!is.matrix(select.preds.p.na)) {
select.preds.p.na <- matrix(data = select.preds.p.na,
nrow = 1)
}
select.preds.p.na[, gregexpr(pattern = "N", text = gsub(pattern = "NA",
replacement = "N", x = select.cohort$det.hist[1]))[[1]]] <- 1
n.total.sites <- nrow(select.cohort)
freqs.na <- table(droplevels(select.cohort$det.hist))
cohort.na.un <- sort(unique(select.cohort$det.hist))
n.hist.na <- length(freqs.na)
exp.na <- rep(NA, n.hist.na)
names(exp.na) <- cohort.na.un
for (i in 1:n.hist.na) {
n.sites.hist <- freqs.na[i]
eq.solved <- rep(NA, n.total.sites)
select.hist <- gsub(pattern = "NA", replacement = "N",
x = cohort.na.un[i])
strip.hist <- unlist(strsplit(select.hist, split = ""))
hist.mat <- new.hist.mat <- new.hist.mat1 <-new.hist.mat0 <- matrix(NA, nrow = n.total.sites, ncol = T)
for (j in 1:n.total.sites) {
hist.mat[j, ] <- select.preds.p.na[j, ]
#Pr(y.ij=1|K)
p.k.mat <- sapply(hist.mat[j,],function(r){1-(1-r)^K})
new.hist.mat1[j,] <- dpois(K,select.cohort[j, "preds.lam"]) %*% p.k.mat
new.hist.mat0[j,] <- dpois(K,select.cohort[j, "preds.lam"]) %*% (1-p.k.mat)
new.hist.mat[j,] <- ifelse(strip.hist == "1",
new.hist.mat1[j,], ifelse(strip.hist == "0",
new.hist.mat0[j,], 1))
combo.lam.p <- paste(new.hist.mat[j, ], collapse = "*")
eq.solved[j] <- eval(parse(text = as.expression(combo.lam.p)))
}
exp.na[i] <- sum(eq.solved, na.rm = TRUE)
}
out.freqs.na <- matrix(NA, nrow = n.hist.na, ncol = 4)
colnames(out.freqs.na) <- c("Cohort", "Observed",
"Expected", "Chi-square")
rownames(out.freqs.na) <- cohort.na.un
out.freqs.na[, 1] <- m
out.freqs.na[, 2] <- freqs.na
out.freqs.na[, 3] <- exp.na
out.freqs.na[, 4] <- ((out.freqs.na[, "Observed"] -
out.freqs.na[, "Expected"])^2)/out.freqs.na[,
"Expected"]
missing.cohorts[[m]] <- list(out.freqs.na = out.freqs.na)
}
}
if (na.vals) {
chisq.missing <- do.call("rbind", lapply(missing.cohorts,
FUN = function(i) i$out.freqs.na))
if (n.cohort.not.na > 0) {
chisq.unobs.det <- N - sum(out.freqs[, "Expected"]) -
sum(chisq.missing[, "Expected"])
chisq.table <- rbind(out.freqs, chisq.missing)
}
else {
chisq.unobs.det <- N - sum(chisq.missing[, "Expected"])
chisq.table <- chisq.missing
}
}
else {
chisq.unobs.det <- N - sum(out.freqs[, "Expected"])
chisq.na <- 0
chisq.table <- out.freqs
}
chisq <- sum(chisq.table[, "Chi-square"]) + chisq.unobs.det
if (print.table) {
out <- list(chisq.table = chisq.table, chi.square = chisq,
model.type = "single-season")
}
else {
out <- list(chi.square = chisq, model.type = "single-season")
}
class(out) <- "mb.chisq"
return(out)
}
Which will successfuly compute a Chi squared value.
I then run the test.
mb.gof.test.RN <- function (mod, nsim = 100, plot.hist = TRUE, ...){
mod.table <- mb.chisq.RN(mod)
out <- parboot(mod, statistic = function(i) mb.chisq.RN(i)$chi.square,
nsim = nsim)
p.value <- sum(out#t.star >= out#t0)/nsim
if (p.value == 0) {
p.display <- paste("<", 1/nsim)
}
else {
p.display = paste("=", round(p.value, digits = 4))
}
if (plot.hist) {
hist(out#t.star, main = paste("Bootstrapped MacKenzie and Bailey fit statistic (",
nsim, " samples)", sep = ""), xlim = range(c(out#t.star,
out#t0)), xlab = paste("Simulated statistic ", "(observed = ",
round(out#t0, digits = 2), ")", sep = ""))
title(main = bquote(paste(italic(P), " ", .(p.display))),
line = 0.5)
abline(v = out#t0, lty = "dashed", col = "red")
}
c.hat.est <- out#t0/mean(out#t.star)
gof.out <- list(model.type = mod.table$model.type, chisq.table = mod.table$chisq.table,
chi.square = mod.table$chi.square, t.star = out#t.star,
p.value = p.value, c.hat.est = c.hat.est, nsim = nsim)
class(gof.out) <- "mb.chisq"
return(gof.out)
}
>mb.gof.test.RN(fm9)
which produces the following error:
Error in checkForRemoteErrors(val) : 3 nodes produced errors;
first error: could not find function "mb.chisq.RN"
I'm not entirely sure why this error only occurs above a certain number of simulations so any pointers would be greatly received.

nls boot error must have positive length

I am getting the error below with nlsBoot() any idea what is wrong?
Error in apply(tabboot, 1, quantile, c(0.5, 0.025, 0.975)) :
dim(X) must have a positive length
set.seed(1)
x = 1:100
y = x^2+rnorm(100,50,500)
plot(x,y)
d = data.frame(x =x, y=y)
mymodel = nls(y~x^b,start= list(b=1),data = d)
mymodel
library(nlstools)
nlsBoot(mymodel, niter = 999)
Try to define the formula before applying the nls function, like this:
formula <- as.formula(y ~ x^b)
mymodel <- nls(formula,start= list(b=1),data = d)
added
Well, I've modified the code and now it can handle one parameter fit.
# My suggestion is to erase all the environment first:
rm(list = ls())
# Then we start again:
set.seed(1)
x = 1:100
y = x^2+rnorm(100,50,500)
plot(x,y)
d = data.frame(x =x, y=y)
mymodel = nls(y~x^b,start= list(b=1),data = d)
Here is the function that you have to use:
nlsboot_onepar <- function (nls, niter = 999)
{
if (!inherits(nls, "nls"))
stop("Use only with 'nls' objects")
data2 <- eval(nls$data, sys.frame(0))
fitted1 <- fitted(nls)
resid1 <- resid(nls)
var1 <- all.vars(formula(nls)[[2]])
l1 <- lapply(1:niter, function(i) {
data2[, var1] <- fitted1 + sample(scale(resid1, scale = FALSE),
replace = TRUE)
nls2 <- try(update(nls, start = as.list(coef(nls)),
data = data2), silent = TRUE)
if (inherits(nls2, "nls"))
return(list(coef = coef(nls2), rse = summary(nls2)$sigma))
})
if (sum(sapply(l1, is.null)) > niter/2)
stop(paste("Procedure aborted: the fit only converged in",
round(sum(sapply(l1, is.null))/niter), "% during bootstrapping"))
tabboot <- sapply(l1[!sapply(l1, is.null)], function(z) z$coef,simplify =
FALSE)
tabboot <- as.matrix(t(as.numeric(tabboot)))
rownames(tabboot) <- "b"
rseboot <- sapply(l1[!sapply(l1, is.null)], function(z) z$rse)
recapboot <- t(apply(tabboot, 1, quantile, c(0.5, 0.025,
0.975)))
colnames(recapboot) <- c("Median", "2.5%", "97.5%")
estiboot <- t(apply(tabboot, 1, function(z) c(mean(z), sd(z))))
colnames(estiboot) <- c("Estimate", "Std. error")
serr <- sum(sapply(l1, is.null))
if (serr > 0)
warning(paste("The fit did not converge", serr, "times during
bootstrapping"))
listboot <- list(coefboot = t(tabboot), rse = rseboot, bootCI = recapboot,
estiboot = estiboot)
class(listboot) <- "nlsBoot"
return(listboot)
}
And then we use it:
result <- nlsboot_onepar(mymodel, niter = 999)
If you want to plot the parameter distribution, you can do this:
graphics.off()
plot(density(as.vector(result$coefboot)))
# or
hist(as.vector(result$coefboot))
I hope that helps you.

Custom column names as arguments in the functions of stability R package

I developed the stability R package which can be installed from CRAN.
install.packages("stability")
However, I have difficulty in making it to take custom column names as function arguments. Here is an example of add_anova function
library(stability)
data(ge_data)
YieldANOVA <-
add_anova(
.data = ge_data
, .y = Yield
, .rep = Rep
, .gen = Gen
, .env = Env
)
YieldANOVA
The above code works fine. However, when I change the column names of the data.frame, it doesn't work as below:
df1 <- ge_data
names(df1) <- c("G", "Institute", "R", "Block", "E", "Y")
fm1 <-
add_anova(
.data = df1
, .y = Y
, .rep = Rep
, .gen = G
, .env = E
)
Error in model.frame.default(formula = terms(.data$Y ~ .data$E + .data$Rep:.data$E + :
invalid type (NULL) for variable '.data$Rep'
Similarly another function stab_reg
fm1Reg <-
stab_reg(
.data = df1
, .y = Y
, .gen = G
, .env = E
)
Error in eval(predvars, data, env) : object 'Gen' not found
The codes of these functions can be accessed by
getAnywhere(add_anova.default)
function (.data, .y, .rep, .gen, .env)
{
Y <- enquo(.y)
Rep <- enquo(.rep)
G <- enquo(.gen)
E <- enquo(.env)
fm1 <- lm(formula = terms(.data$Y ~ .data$E + .data$Rep:.data$E +
.data$G + .data$G:.data$E, keep.order = TRUE), data = .data)
fm1ANOVA <- anova(fm1)
rownames(fm1ANOVA) <- c("Env", "Rep(Env)", "Gen", "Gen:Env",
"Residuals")
fm1ANOVA[1, 4] <- fm1ANOVA[1, 3]/fm1ANOVA[2, 3]
fm1ANOVA[2, 4] <- NA
fm1ANOVA[1, 5] <- 1 - pf(as.numeric(fm1ANOVA[1, 4]), fm1ANOVA[1,
1], fm1ANOVA[2, 1])
fm1ANOVA[2, 5] <- 1 - pf(as.numeric(fm1ANOVA[2, 4]), fm1ANOVA[2,
1], fm1ANOVA[5, 1])
class(fm1ANOVA) <- c("anova", "data.frame")
return(list(anova = fm1ANOVA))
}
<bytecode: 0xc327c28>
<environment: namespace:stability>
and
getAnywhere(stab_reg.default)
function (.data, .y, .rep, .gen, .env)
{
Y <- enquo(.y)
Rep <- enquo(.rep)
G <- enquo(.gen)
E <- enquo(.env)
g <- length(levels(.data$G))
e <- length(levels(.data$E))
r <- length(levels(.data$Rep))
g_means <- .data %>% dplyr::group_by(!!G) %>% dplyr::summarize(Mean = mean(!!Y))
names(g_means) <- c("G", "Mean")
DataNew <- .data %>% dplyr::group_by(!!G, !!E) %>% dplyr::summarize(GEMean = mean(!!Y)) %>%
dplyr::group_by(!!E) %>% dplyr::mutate(EnvMean = mean(GEMean))
IndvReg <- lme4::lmList(GEMean ~ EnvMean | Gen, data = DataNew)
IndvRegFit <- summary(IndvReg)
StabIndvReg <- tibble::as_tibble(data.frame(g_means, Slope = coef(IndvRegFit)[,
, 2][, 1], LCI = confint(IndvReg)[, , 2][, 1], UCI = confint(IndvReg)[,
, 2][, 2], R.Sqr = IndvRegFit$r.squared, RMSE = IndvRegFit$sigma,
SSE = IndvRegFit$sigma^2 * IndvRegFit$df[, 2], Delta = IndvRegFit$sigma^2 *
IndvRegFit$df[, 2]/r))
MeanSlopePlot <- ggplot(data = StabIndvReg, mapping = aes(x = Slope,
y = Mean)) + geom_point() + geom_text(aes(label = G),
size = 2.5, vjust = 1.25, colour = "black") + geom_vline(xintercept = 1,
linetype = "dotdash") + geom_hline(yintercept = mean(StabIndvReg$Mean),
linetype = "dotdash") + labs(x = "Slope", y = "Mean") +
scale_x_continuous(sec.axis = dup_axis(), labels = scales::comma) +
scale_y_continuous(sec.axis = dup_axis(), labels = scales::comma) +
theme_bw()
return(list(StabIndvReg = StabIndvReg, MeanSlopePlot = MeanSlopePlot))
}
<bytecode: 0xe431010>
<environment: namespace:stability>
One of the problems in the data 'df1' is the column name is 'R' instead of "Rep" which was passed into the function. Second, the terms passed into the formula are quosures. we could change it to string with quo_names and then construct formula with paste
add_anova1 <- function (.data, .y, .rep, .gen, .env) {
y1 <- quo_name(enquo(.y))
r1 <- quo_name(enquo(.rep))
g1 <- quo_name(enquo(.gen))
e1 <- quo_name(enquo(.env))
fm <- formula(paste0(y1, "~", paste(e1, paste(r1, e1, sep=":"),
g1, paste(g1, e1, sep=":"), sep="+")))
fm1 <- lm(terms(fm, keep.order = TRUE), data = .data)
fm1ANOVA <- anova(fm1)
rownames(fm1ANOVA) <- c("Env", "Rep(Env)", "Gen", "Gen:Env",
"Residuals")
fm1ANOVA[1, 4] <- fm1ANOVA[1, 3]/fm1ANOVA[2, 3]
fm1ANOVA[2, 4] <- NA
fm1ANOVA[1, 5] <- 1 - pf(as.numeric(fm1ANOVA[1, 4]), fm1ANOVA[1,
1], fm1ANOVA[2, 1])
fm1ANOVA[2, 5] <- 1 - pf(as.numeric(fm1ANOVA[2, 4]), fm1ANOVA[2,
1], fm1ANOVA[5, 1])
class(fm1ANOVA) <- c("anova", "data.frame")
return(list(anova = fm1ANOVA))
}
YieldANOVA2 <- add_anova1(
.data = df1
, .y = Y
, .rep = R
, .gen = G
, .env = E
)
-checking with the output generated using 'ge_data' without changing the column names
all.equal(YieldANOVA, YieldANOVA2, check.attributes = FALSE)
#[1] TRUE
Similarly stab_reg could be changed

R: incorporating fisher.test into Hmisc's summaryM leads to error

catTestfisher <-
function (tab)
{
st <- if (!is.matrix(tab) || nrow(tab) < 2 | ncol(tab) <
2)
list(p.value = NA, statistic = NA, parameter = NA)
else {
rowcounts <- tab %*% rep(1, ncol(tab))
tab <- tab[rowcounts > 0, ]
if (!is.matrix(tab))
list(p.value = NA, statistic = NA, parameter = NA)
else fisher.test(tab)
}
list(P = st$p.value, stat = "", df = "",
testname = "Fisher's Exact", statname = "", latexstat = "", namefun = "",
plotmathstat = "")
}
I wanted to use library(Hmisc)'s summaryM function but with Fisher's exact test, so I wrote a catTestfisher function and set catTest = catTestfisher in my own summaryM2 function, which is exactly the same as summaryM, except for catTest = catTestfisher
summaryM2 <-
function (formula, groups = NULL, data = NULL, subset, na.action = na.retain,
overall = FALSE, continuous = 10, na.include = FALSE, quant = c(0.025,
0.05, 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 0.95,
0.975), nmin = 100, test = FALSE, conTest = conTestkw,
catTest = catTestfisher, ordTest = ordTestpo)
{
marg <- length(data) && ".marginal." %in% names(data)
if (marg)
formula <- update(formula, . ~ . + .marginal.)
formula <- Formula(formula)
Y <- if (!missing(subset) && length(subset))
model.frame(formula, data = data, subset = subset, na.action = na.action)
else model.frame(formula, data = data, na.action = na.action)
X <- model.part(formula, data = Y, rhs = 1)
Y <- model.part(formula, data = Y, lhs = 1)
getlab <- function(x, default) {
lab <- attr(x, "label")
if (!length(lab) || lab == "")
default
else lab
}
if (marg) {
xm <- X$.marginal.
X$.marginal. <- NULL
}
else xm <- rep("", nrow(X))
if (length(X)) {
xname <- names(X)
if (length(xname) == 1 && !length(groups))
groups <- xname
if (!length(groups) && length(xname) > 1) {
warnings("Must specify groups when > 1 right hand side variable is present.\ngroups taken as first right hand variable.")
groups <- xname[1]
}
svar <- if (length(xname) == 1)
factor(rep(".ALL.", nrow(X)))
else do.call("interaction", list(X[setdiff(xname, groups)],
sep = " "))
group <- X[[groups]]
glabel <- getlab(group, groups)
}
else {
svar <- factor(rep(".ALL.", nrow(Y)))
group <- rep("", nrow(Y))
groups <- group.freq <- NULL
glabel <- ""
}
quants <- unique(c(quant, 0.025, 0.05, 0.125, 0.25, 0.375,
0.5, 0.625, 0.75, 0.875, 0.95, 0.975))
nv <- ncol(Y)
nameY <- names(Y)
R <- list()
for (strat in levels(svar)) {
instrat <- svar == strat
n <- integer(nv)
type <- n
comp <- dat <- vector("list", nv)
names(comp) <- names(dat) <- nameY
labels <- Units <- vector("character", nv)
if (test) {
testresults <- vector("list", nv)
names(testresults) <- names(comp)
}
gr <- group[instrat]
xms <- xm[instrat]
if (all(xms != ""))
xms <- rep("", length(xms))
group.freq <- table(gr)
group.freq <- group.freq[group.freq > 0]
if (overall)
group.freq <- c(group.freq, Combined = sum(group.freq))
for (i in 1:nv) {
w <- Y[instrat, i]
if (length(attr(w, "label")))
labels[i] <- attr(w, "label")
if (length(attr(w, "units")))
Units[i] <- attr(w, "units")
if (!inherits(w, "mChoice")) {
if (!is.factor(w) && !is.logical(w) && length(unique(w[!is.na(w)])) <
continuous)
w <- as.factor(w)
s <- !is.na(w)
if (na.include && !all(s) && length(levels(w))) {
w <- na.include(w)
levels(w)[is.na(levels(w))] <- "NA"
s <- rep(TRUE, length(s))
}
n[i] <- sum(s & xms == "")
w <- w[s]
g <- gr[s, drop = TRUE]
if (is.factor(w) || is.logical(w)) {
tab <- table(w, g)
if (test) {
if (is.ordered(w))
testresults[[i]] <- ordTest(g, w)
else testresults[[i]] <- catTest(tab)
}
if (nrow(tab) == 1) {
b <- casefold(dimnames(tab)[[1]], upper = TRUE)
pres <- c("1", "Y", "YES", "PRESENT")
abse <- c("0", "N", "NO", "ABSENT")
jj <- match(b, pres, nomatch = 0)
if (jj > 0)
bc <- abse[jj]
else {
jj <- match(b, abse, nomatch = 0)
if (jj > 0)
bc <- pres[jj]
}
if (jj) {
tab <- rbind(tab, rep(0, ncol(tab)))
dimnames(tab)[[1]][2] <- bc
}
}
if (overall)
tab <- cbind(tab, Combined = apply(tab, 1,
sum))
comp[[i]] <- tab
type[i] <- 1
}
else {
sfn <- function(x, quant) {
o <- options(digits = 10)
on.exit(options(o))
c(quantile(x, quant), Mean = mean(x), SD = sqrt(var(x)),
N = sum(!is.na(x)))
}
qu <- tapply(w, g, sfn, simplify = TRUE, quants)
if (test)
testresults[[i]] <- conTest(g, w)
if (overall)
qu$Combined <- sfn(w, quants)
comp[[i]] <- matrix(unlist(qu), ncol = length(quants) +
3, byrow = TRUE, dimnames = list(names(qu),
c(format(quants), "Mean", "SD", "N")))
if (any(group.freq <= nmin))
dat[[i]] <- lapply(split(w, g), nmin = nmin,
function(x, nmin) if (length(x) <= nmin)
x
else NULL)
type[i] <- 2
}
}
else {
w <- as.numeric(w) == 1
n[i] <- sum(!is.na(apply(w, 1, sum)) & xms ==
"")
g <- as.factor(gr)
ncat <- ncol(w)
tab <- matrix(NA, nrow = ncat, ncol = length(levels(g)),
dimnames = list(dimnames(w)[[2]], levels(g)))
if (test) {
pval <- numeric(ncat)
names(pval) <- dimnames(w)[[2]]
d.f. <- stat <- pval
}
for (j in 1:ncat) {
tab[j, ] <- tapply(w[, j], g, sum, simplify = TRUE,
na.rm = TRUE)
if (test) {
tabj <- rbind(table(g) - tab[j, ], tab[j,
])
st <- catTest(tabj)
pval[j] <- st$P
stat[j] <- st$stat
d.f.[j] <- st$df
}
}
if (test)
testresults[[i]] <- list(P = pval, stat = stat,
df = d.f., testname = st$testname, statname = st$statname,
latexstat = st$latexstat, plotmathstat = st$plotmathstat)
if (overall)
tab <- cbind(tab, Combined = apply(tab, 1,
sum))
comp[[i]] <- tab
type[i] <- 3
}
}
labels <- ifelse(nchar(labels), labels, names(comp))
R[[strat]] <- list(stats = comp, type = type, group.freq = group.freq,
labels = labels, units = Units, quant = quant, data = dat,
N = sum(!is.na(gr) & xms == ""), n = n, testresults = if (test) testresults)
}
structure(list(results = R, group.name = groups, group.label = glabel,
call = call, formula = formula), class = "summaryM")
}
After trying to test it on the following data, I get a warning and an error:
library(Hmisc)
set.seed(173)
sex <- factor(sample(c("m","f"), 500, rep=TRUE))
treatment <- factor(sample(c("Drug","Placebo"), 500, rep=TRUE))
> summaryM2(sex ~ treatment, test=TRUE, overall = TRUE)
Error in round(teststat, 2) :
non-numeric argument to mathematical function
I tried stepping through the summaryM2 function line by line, but could not figure out what's causing the problem.
In your catTestfisher function, the output variables stat (test statistic) and df (degrees of freedom) should be numeric variables not empty strings. In the programming stat is coverted to teststat for rounding before being outputted (hence the error message for round("", 2) is non-numeric argument to mathematical function). See lines 1718 to 1721 in the summary.formula code) .
You can set df = NULL but a value is required for stat (not NA or NULL) otherwise no output is returned. You can get around the problem by setting stat = 0 (or any other number), and then only displaying the p value using prtest = "P".
catTestfisher2 <- function (tab)
{
st <- fisher.test(tab)
list(P = st$p.value, stat = 0, df = NULL,
testname = st$method, statname = "", latexstat = "", namefun = "",
plotmathstat = "")
}
output <- summaryM(sex ~ treatment, test=TRUE, overall = TRUE, catTest = catTestfisher2)
print(output, prtest = "P")
Descriptive Statistics (N=500)
+-------+-----------+-----------+-----------+-------+
| |Drug |Placebo |Combined |P-value|
| |(N=257) |(N=243) |(N=500) | |
+-------+-----------+-----------+-----------+-------+
|sex : m|0.52 (133)|0.52 (126)|0.52 (259)| 1 |
+-------+-----------+-----------+-----------+-------+
Note there is no need to define your own summaryM2 function. Just use catTest = to pass in your function.

Resources