I am trying to run the code below in order to simulate a set of P-values using a generalised linear model
However, I keep getting the error: Lapack routine dgesv: system is exactly singular: U[6,6] = 0
Here is the code I am trying to run:
#which_p_value = "x1"
which_p_value = "groupcategory"
#which_p_value = "x1:groupcategory"
run_anova = FALSE
simulate_mixed_effect = TRUE
mixed_effect_sd = 3.094069
mixed_effect_sd_slope = 3.098661
library(tidyverse)
n_people <- c(2,5,10,15,20)
coef1 <- 1.61
coef2 <- -0.01
#coef3 <- 5
#coef4 <- 0
g1 = 0
g2 = 1
g3 = 2
distances <- c(60,90,135,202.5,303.75,455.625)/100
n_trials <- 35
oneto1000 <- 25
n_track_lengths <- length(distances)
groupcategory = c(rep(g1, n_track_lengths), rep(g2, n_track_lengths),rep(g3,n_track_lengths))
z = c(n_people)
emptydataframeforpowerplots = NULL
coef3s <- c(-5, -4, -3, -2,-1, 0, 1, 2, 3, 4, 5)
coef4s <- c(-1, -0.8, -0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6, 0.8, 1)
Datarray <- array(dim=c(length(coef3s), length(coef4s),length(n_people)))
coef3_counter =1
for (coef3 in coef3s) {
coef4_counter =1
for (coef4 in coef4s) {
z1_g2 <- coef1 + coef2*distances + coef3*g2 + coef4*g2*distances
z1_g3 <- coef1 + coef2*distances + coef3*g3 + coef4*g3*distances
d = NULL
pr1 = 1/(1+exp(-z1_g2))
pr2 = 1/(1+exp(-z1_g3))
counter=1
for (i in n_people) {
for (j in 1:oneto1000){
df <- c()
for (k in 1:i){
# random effect from drawing a random intercept with sd = x
if (simulate_mixed_effect){
coef1_r = rnorm(1, mean=coef1, sd=mixed_effect_sd)
coef2_r = rnorm(1, mean=coef1, sd=mixed_effect_sd_slope)
} else {
coef1_r = coef1
coef2_r = coef2
}
z_g1 <- coef1_r + coef2*distances + coef3*g1 + coef4*g1*distances
pr = 1/(1+exp(-z_g1))
z1_g2 <- coef1_r + coef2*distances + coef3*g2 + coef4*g2*distances
pr1 = 1/(1+exp(-z1_g2))
if (run_anova) {
df <- rbind(df, data.frame(x1 = c(rep(distances, 3)),
y = c(rbinom(n_track_lengths,n_trials,pr), rbinom(n_track_lengths,n_trials,pr1),rbinom(n_track_lengths,n_trials,pr2)),
groupcategory = groupcategory, id = c(rep(k,18))))
} else { # this is for glmer data organisation
for (m in 1:n_trials) {
df <- rbind(df, data.frame(x1 = c(rep(distances, 3)),
y = c(rbinom(n_track_lengths,1,pr),rbinom(n_track_lengths,1,pr1),rbinom(n_track_lengths,1,pr2)),groupcategory = groupcategory,id = c(rep(k,18))))
}
}
}
if (run_anova) {
#df_aov <- aov(y~x1*groupcategory+Error(id/(x1*groupcategory)),data=df)
#df_aov_sum <- summary(df_aov)
#pvalue <- df_aov_sum[[5]][[1]][which_p_value,"Pr(>F)"]
df_aov <- aov(y~x1*groupcategory+Error(id),data=df)
df_aov_sum <- summary(df_aov)
pvalue <- df_aov_sum[[2]][[1]][which_p_value, "Pr(>F)"]
} else { # glmer
mod_group_glmer <- glmer(y ~ x1 + groupcategory + (1+x1|id), data = df, family = "binomial")
sum <- summary(mod_group_glmer)
pvalue <- sum$coefficients[which_p_value, "Pr(>|z|)"]
}
d = rbind(d,data.frame(pvalue))
}
count <- plyr::ldply(d,function(c) sum(c<=0.05))
Datarray[coef3_counter,coef4_counter,counter] <- count$V1/oneto1000
counter = counter +1
d = NULL
}
coef4_counter = coef4_counter + 1
}
coef3_counter = coef3_counter + 1
}
Below is the script from the debugger:
Lapack routine dgesv: system is exactly singular: U[6,6] = 0
8. stopifnot(length(value <- as.numeric(value)) == 1L)
7. nM$newf(fn(nM$xeval()))
6. (function (fn, par, lower = rep.int(-Inf, n), upper = rep.int(Inf, n), control = list()) { n <- length(par) ...
5. do.call(optfun, arglist)
4. withCallingHandlers(do.call(optfun, arglist), warning = function(w) { curWarnings <<- append(curWarnings, list(w$message)) })
3. optwrap(optimizer, devfun, start, rho$lower, control = control, adj = adj, verbose = verbose, ...)
2. optimizeGlmer(devfun, optimizer = control$optimizer[[2]], restart_edge = control$restart_edge, boundary.tol = control$boundary.tol, control = control$optCtrl, start = start, nAGQ = nAGQ, verbose = verbose, stage = 2, calc.derivs = control$calc.derivs, use.last.params = control$use.last.params)
1. glmer(y ~ x1 + groupcategory + (1 + x1 id), data = df, family = "binomial")
Would anybody be able to give a helping hand as to how I can proceed from here?
Related
I tried to run code from https://cran.r-project.org/web/packages/MultiBD/vignettes/SIR-MCMC.pdf on my R software - they are still running but no output until now but I can run the following code on an online R compiler. This is the codes:
library(MultiBD)
data(Eyam)
Eyam
loglik_sir <- function(param, data) {
alpha <- exp(param[1]) # Rates must be non-negative
beta <- exp(param[2])
# Set-up SIR model
drates1 <- function(a, b) { 0 }
brates2 <- function(a, b) { 0 }
drates2 <- function(a, b) { alpha * b }
trans12 <- function(a, b) { beta * a * b }
sum(sapply(1:(nrow(data) - 1), # Sum across all time steps k
function(k) {
log(
dbd_prob( # Compute the transition probability matrix
t = data$time[k + 1] - data$time[k], # Time increment
a0 = data$S[k], b0 = data$I[k], # From: S(t_k), I(t_k)
drates1, brates2, drates2, trans12,
a = data$S[k + 1], B = data$S[k] + data$I[k] - data$S[k + 1],
computeMode = 4, nblocks = 80 # Compute using 4 threads
)[1, data$I[k + 1] + 1] # To: S(t_(k+1)), I(t_(k+1))
)
}))
}
logprior <- function(param) {
log_alpha <- param[1]
log_beta <- param[2]
dnorm(log_alpha, mean = 0, sd = 100, log = TRUE) +
dnorm(log_beta, mean = 0, sd = 100, log = TRUE)
}
library(MCMCpack)
alpha0 <- 3.39
beta0 <- 0.0212
post_sample <- MCMCmetrop1R(fun = function(param) { loglik_sir(param, Eyam) + logprior(param)
}, theta.init = log(c(alpha0, beta0)), mcmc = 500, burnin = 100)
plot(as.vector(post_sample[,1]), type = "l", xlab = "Iteration", ylab =
expression(log(alpha)))
plot(as.vector(post_sample[,2]), type = "l", xlab = "Iteration", ylab = expression(log(beta)))
library(ggplot2)
x = as.vector(post_sample[,1])
y = as.vector(post_sample[,2])
df <- data.frame(x, y)
ggplot(df,aes(x = x,y = y)) +
stat_density2d(aes(fill = ..level..), geom = "polygon", h = 0.26) +
scale_fill_gradient(low = "grey85", high = "grey35", guide = FALSE) +
xlab(expression(log(alpha))) +
ylab(expression(log(beta)))
quantile(exp(post_sample[,1]), probs = c(0.025,0.975))
quantile(exp(post_sample[,2]), probs = c(0.025,0.975))
I realize that this is the part of the codes that took long time to run but produce no output :
post_sample <- MCMCmetrop1R(fun = function(param) { loglik_sir(param, Eyam) + logprior(param)},
theta.init = log(c(alpha0, beta0)), mcmc = 500, burnin = 100)
I think my R software is the problem but what is it?
Consider the following model for the evolution of an asset's price:
This what I have done (in R). I could not find a function that randomly outputs +1 or -1, so I decided to adapt the inbuilt rbinom function.
## This code is in R
rm(list = ls())
library(dplyr)
library(dint)
library(magrittr)
library(stats)
path =
function(T, mu, sigma, p, x0) {
x = rep(NA, T)
x[1] = x0
for(i in 2:T){
z = if_else(rbinom(1,1,p) == 0, -1, 1)
x[i] = x[i-1] * exp(mu + sigma*z)
}
return(x)
}
## Just some testing
x_sim = path(T = 4, mu = 0, sigma = 0.01, p = 0.5, x0 = 100)
## Actual answer
Np = 10000
mc = matrix(nrow = 17, ncol = Np)
for(j in 1:Np){
mc[,j] = path(T = 17, mu = 0, sigma = 0.01, p = 0.5, x0 = 100)
}
test = mc[2:nrow(mc), ] >= 100
sum_test = colSums(test)
comp = sum(sum_test >= 1)/length(sum_test)
prob = 1 - comp
Does this make sense? Any help/tips/advice would be much appreciated. Thanks!
Staying close to your code, I came up with this. Intuitively, if you think about it, the probability should be rather low due to the parameters and I get a probability of about 6.7% which is roughly what I get if I run your code with the parameters from the assignment.
simpath <- function(t, mu, sigma, p, x0, seed){
# set seed
if(!missing(seed)){
set.seed(seed)
}
# set up matrix for storing the results
res <- matrix(c(1:t, rep(NA, t*2)), ncol = 3)
colnames(res) <- c('t', 'z_t', 'x_t')
res[, 'z_t'] <- sample(c(1, -1), size = t, prob = c(p, 1-p), replace = TRUE)
res[1, 3] <- x0
for(i in 2:t){
res[i, 3] <- res[i-1, 3] * exp(mu+sigma*res[i, 2])
}
return(res)
}
x_sim <- simpath(t = 4, mu = 0, sigma = 0.01, p = 0.5, x0 = 100, seed = 123)
x_sim2 <- simpath(t = 36, mu = 0, sigma = 0.03, p = 0.5, x0 = 100, seed = 123)
## Actual answer
Np <- 100000
mc <- matrix(nrow = 36, ncol = Np)
for (j in 1:Np){
mc[, j] <- simpath(t = 36, mu = 0, sigma = 0.03, p = 0.5, x0 = 100)[, 3]
}
test <- mc > 100
sum_test <- colSums(test)
comp = sum(sum_test == 0)/length(sum_test)
prob = comp
> prob
[1] 0.06759
I have written this model but rjags gives dimension mismatch error; What's happening?
Error in jags.model(textConnection(model1), data = jags_data, n.chains = n_chains, :
RUNTIME ERROR:
Compilation error on line 8.
Dimension mismatch taking subset of y
library(rjags)
model1 <- "model {
C <- 10000
for (j in 1:nobs){
zeros[j] ~ dpois(phi[j])
phi[j] <- -log(L[j]) + C
L[j] <- add[j]*(lambda[j]^y[j])*(1-lambda[j])^(1-y[j])
add[j] = ifelse(lambda[j] == 0.5, 2, aux[j])
aux[j] = 2*arctanh(1 - 2*lambda[j] + 10^(-323))/(1 - 2*lambda[j] + 10^(-323))
logit(lambda[j]) <- inprod(X[j, ], beta)
}
beta[1] ~ dnorm(0,1)
beta[2] ~ dgamma(1,1)
}"
n_chains = 1
n_adapt = 5000
n_iter = 10000
n_thin = 1
n_burnin = 5000
# generate data
n = 100
Ffun = plogis
design_mat = cbind(1, matrix(seq(0,1,by = 0.2), ncol=1))
gen_data = function(n, beta) {
X = design_mat[sample(nrow(design_mat), size = n, replace = T), ]
lambda = Ffun(X %*% beta)
y = rcbern(n,lambda)
idx = is.nan(y)
y[idx] = runif(length(idx))
list(X = X, y = y)
}
rcbern = function(n,lam){
x = runif(n)
y = log((x*(2*lam-1) - (lam-1))/(1-lam))/log(lam/(1-lam))
return(y)
}
beta = as.matrix(c(-3, 5))
jags_data = gen_data(n, beta)
jags_data$nobs = n
jg_model <- jags.model(textConnection(model1),
data = jags_data,
n.chains = n_chains,
n.adapt = n_adapt)
update(jg_model, n.iter = n_burnin)
result <- coda.samples(jg_model,
variable.names = c("beta"),
n.iter = n_iter,
thin = n_thin,
n.chains = n_chains)
beta_est = list(apply(result[[1]],2,median))
As suggested by #user20650 the issue is that you are indexing y as vector and your functions are generating as a matrix. Try this code with a slight change in gen_data():
library(rjags)
model1 <- "model {
C <- 10000
for (j in 1:nobs){
zeros[j] ~ dpois(phi[j])
phi[j] <- -log(L[j]) + C
L[j] <- add[j]*(lambda[j]^y[j])*(1-lambda[j])^(1-y[j])
add[j] = ifelse(lambda[j] == 0.5, 2, aux[j])
aux[j] = 2*arctanh(1 - 2*lambda[j] + 10^(-323))/(1 - 2*lambda[j] + 10^(-323))
logit(lambda[j]) <- inprod(X[j, ], beta)
}
beta[1] ~ dnorm(0,1)
beta[2] ~ dgamma(1,1)
}"
n_chains = 1
n_adapt = 5000
n_iter = 10000
n_thin = 1
n_burnin = 5000
# generate data
n = 100
Ffun = plogis
design_mat = cbind(1, matrix(seq(0,1,by = 0.2), ncol=1))
gen_data = function(n, beta) {
X = design_mat[sample(nrow(design_mat), size = n, replace = T), ]
lambda = Ffun(X %*% beta)
y = rcbern(n,lambda)
y <- as.vector(y)
idx = is.nan(y)
y[idx] = runif(length(idx))
list(X = X, y = y)
}
rcbern = function(n,lam){
x = runif(n)
y = log((x*(2*lam-1) - (lam-1))/(1-lam))/log(lam/(1-lam))
return(y)
}
beta = as.matrix(c(-3, 5))
jags_data = gen_data(n, beta)
jags_data$nobs = n
jg_model <- jags.model(textConnection(model1),
data = jags_data,
n.chains = n_chains,
n.adapt = n_adapt)
update(jg_model, n.iter = n_burnin)
result <- coda.samples(jg_model,
variable.names = c("beta"),
n.iter = n_iter,
thin = n_thin,
n.chains = n_chains)
beta_est = list(apply(result[[1]],2,median))
Output:
beta_est
[[1]]
beta[1] beta[2]
-0.006031984 0.692007301
You can also try y <- y[,1,drop=T] in the same function instead of as.vector()
I am trying to run the following code. My computer keeps getting frozen when I try to run it. Therefore, I can see the correlation matrices, I am unable to view the results of the GLM/data arrays.
# running the assay
#which_p_value = "x1"
which_p_value = "groupcategory"
#which_p_value = "x1:groupcategory"
run_anova = FALSE
simulate_mixed_effect = TRUE
mixed_effect_sd = 3.094069
mixed_effect_sd_slope = 3.098661
library(tidyverse)
n_people <- c(2,5,10,15,20)
coef1 <- 1.61
coef2 <- -0.01
#coef3 <- 5
#coef4 <- 0
g1 = 0
g2 = 1
g3 = 2
distances <- c(60,90,135,202.5,303.75,455.625)/100
n_trials <- 35
oneto1000 <- 25
n_track_lengths <- length(distances)
groupcategory = c(rep(g1, n_track_lengths), rep(g2, n_track_lengths),rep(g3,n_track_lengths))
z = c(n_people)
emptydataframeforpowerplots = NULL
coef3s <- c(-5, -4, -3, -2,-1, 0, 1, 2, 3, 4, 5)
coef4s <- c(-1, -0.8, -0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6, 0.8, 1)
Datarray <- array(dim=c(length(coef3s), length(coef4s),length(n_people)))
coef3_counter =1
for (coef3 in coef3s) {
coef4_counter =1
for (coef4 in coef4s) {
z1_g2 <- coef1 + coef2*distances + coef3*g2 + coef4*g2*distances
z1_g3 <- coef1 + coef2*distances + coef3*g3 + coef4*g3*distances
d = NULL
pr1 = 1/(1+exp(-z1_g2))
pr2 = 1/(1+exp(-z1_g3))
counter=1
for (i in n_people) {
for (j in 1:oneto1000){
df <- c()
for (k in 1:i){
# random effect from drawing a random intercept with sd = x
if (simulate_mixed_effect){
coef1_r = rnorm(1, mean=coef1, sd=mixed_effect_sd)
coef2_r = rnorm(1, mean=coef1, sd=mixed_effect_sd_slope)
} else {
coef1_r = coef1
coef2_r = coef2
}
z_g1 <- coef1_r + coef2*distances + coef3*g1 + coef4*g1*distances
pr = 1/(1+exp(-z_g1))
z1_g2 <- coef1_r + coef2*distances + coef3*g2 + coef4*g2*distances
pr1 = 1/(1+exp(-z1_g2))
if (run_anova) {
df <- rbind(df, data.frame(x1 = c(rep(distances, 3)),
y = c(rbinom(n_track_lengths,n_trials,pr), rbinom(n_track_lengths,n_trials,pr1),rbinom(n_track_lengths,n_trials,pr2)),
groupcategory = groupcategory, id = c(rep(k,18))))
} else { # this is for glmer data organisation
for (m in 1:n_trials) {
df <- rbind(df, data.frame(x1 = c(rep(distances, 3)),
y = c(rbinom(n_track_lengths,1,pr),rbinom(n_track_lengths,1,pr1),rbinom(n_track_lengths,1,pr2)),groupcategory = groupcategory,id = c(rep(k,18))))
}
}
}
if (run_anova) {
#df_aov <- aov(y~x1*groupcategory+Error(id/(x1*groupcategory)),data=df)
#df_aov_sum <- summary(df_aov)
#pvalue <- df_aov_sum[[5]][[1]][which_p_value,"Pr(>F)"]
df_aov <- aov(y~x1*groupcategory+Error(id),data=df)
df_aov_sum <- summary(df_aov)
pvalue <- df_aov_sum[[2]][[1]][which_p_value, "Pr(>F)"]
}
checkme <- df %>% group_by(groupcategory,id) %>% summarise(miny=min(y),maxy=max(y)) %>% mutate(expectfail = miny==maxy)
else {
mod_group_glmer <- glmer(y ~ x1 + groupcategory + (1+x1|id), data = df, family = "binomial")
sum <- summary(mod_group_glmer)
pvalue <- sum$coefficients[which_p_value, "Pr(>|z|)"]
}
d = rbind(d,data.frame(pvalue))
}
count <- plyr::ldply(d,function(c) sum(c<=0.05))
Datarray[coef3_counter,coef4_counter,counter] <- count$V1/oneto1000
counter = counter +1
d = NULL
}
coef4_counter = coef4_counter + 1
}
coef3_counter = coef3_counter + 1
}
Does anybody have any advice on how I can overcome this issue? I have tried different things such as lowering the range of sample sizes (n_people) but I have still been unsuccessful. My computer starts making a whirring noise and eventually I am forced to have to 'force quit' the program?
catTestfisher <-
function (tab)
{
st <- if (!is.matrix(tab) || nrow(tab) < 2 | ncol(tab) <
2)
list(p.value = NA, statistic = NA, parameter = NA)
else {
rowcounts <- tab %*% rep(1, ncol(tab))
tab <- tab[rowcounts > 0, ]
if (!is.matrix(tab))
list(p.value = NA, statistic = NA, parameter = NA)
else fisher.test(tab)
}
list(P = st$p.value, stat = "", df = "",
testname = "Fisher's Exact", statname = "", latexstat = "", namefun = "",
plotmathstat = "")
}
I wanted to use library(Hmisc)'s summaryM function but with Fisher's exact test, so I wrote a catTestfisher function and set catTest = catTestfisher in my own summaryM2 function, which is exactly the same as summaryM, except for catTest = catTestfisher
summaryM2 <-
function (formula, groups = NULL, data = NULL, subset, na.action = na.retain,
overall = FALSE, continuous = 10, na.include = FALSE, quant = c(0.025,
0.05, 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 0.95,
0.975), nmin = 100, test = FALSE, conTest = conTestkw,
catTest = catTestfisher, ordTest = ordTestpo)
{
marg <- length(data) && ".marginal." %in% names(data)
if (marg)
formula <- update(formula, . ~ . + .marginal.)
formula <- Formula(formula)
Y <- if (!missing(subset) && length(subset))
model.frame(formula, data = data, subset = subset, na.action = na.action)
else model.frame(formula, data = data, na.action = na.action)
X <- model.part(formula, data = Y, rhs = 1)
Y <- model.part(formula, data = Y, lhs = 1)
getlab <- function(x, default) {
lab <- attr(x, "label")
if (!length(lab) || lab == "")
default
else lab
}
if (marg) {
xm <- X$.marginal.
X$.marginal. <- NULL
}
else xm <- rep("", nrow(X))
if (length(X)) {
xname <- names(X)
if (length(xname) == 1 && !length(groups))
groups <- xname
if (!length(groups) && length(xname) > 1) {
warnings("Must specify groups when > 1 right hand side variable is present.\ngroups taken as first right hand variable.")
groups <- xname[1]
}
svar <- if (length(xname) == 1)
factor(rep(".ALL.", nrow(X)))
else do.call("interaction", list(X[setdiff(xname, groups)],
sep = " "))
group <- X[[groups]]
glabel <- getlab(group, groups)
}
else {
svar <- factor(rep(".ALL.", nrow(Y)))
group <- rep("", nrow(Y))
groups <- group.freq <- NULL
glabel <- ""
}
quants <- unique(c(quant, 0.025, 0.05, 0.125, 0.25, 0.375,
0.5, 0.625, 0.75, 0.875, 0.95, 0.975))
nv <- ncol(Y)
nameY <- names(Y)
R <- list()
for (strat in levels(svar)) {
instrat <- svar == strat
n <- integer(nv)
type <- n
comp <- dat <- vector("list", nv)
names(comp) <- names(dat) <- nameY
labels <- Units <- vector("character", nv)
if (test) {
testresults <- vector("list", nv)
names(testresults) <- names(comp)
}
gr <- group[instrat]
xms <- xm[instrat]
if (all(xms != ""))
xms <- rep("", length(xms))
group.freq <- table(gr)
group.freq <- group.freq[group.freq > 0]
if (overall)
group.freq <- c(group.freq, Combined = sum(group.freq))
for (i in 1:nv) {
w <- Y[instrat, i]
if (length(attr(w, "label")))
labels[i] <- attr(w, "label")
if (length(attr(w, "units")))
Units[i] <- attr(w, "units")
if (!inherits(w, "mChoice")) {
if (!is.factor(w) && !is.logical(w) && length(unique(w[!is.na(w)])) <
continuous)
w <- as.factor(w)
s <- !is.na(w)
if (na.include && !all(s) && length(levels(w))) {
w <- na.include(w)
levels(w)[is.na(levels(w))] <- "NA"
s <- rep(TRUE, length(s))
}
n[i] <- sum(s & xms == "")
w <- w[s]
g <- gr[s, drop = TRUE]
if (is.factor(w) || is.logical(w)) {
tab <- table(w, g)
if (test) {
if (is.ordered(w))
testresults[[i]] <- ordTest(g, w)
else testresults[[i]] <- catTest(tab)
}
if (nrow(tab) == 1) {
b <- casefold(dimnames(tab)[[1]], upper = TRUE)
pres <- c("1", "Y", "YES", "PRESENT")
abse <- c("0", "N", "NO", "ABSENT")
jj <- match(b, pres, nomatch = 0)
if (jj > 0)
bc <- abse[jj]
else {
jj <- match(b, abse, nomatch = 0)
if (jj > 0)
bc <- pres[jj]
}
if (jj) {
tab <- rbind(tab, rep(0, ncol(tab)))
dimnames(tab)[[1]][2] <- bc
}
}
if (overall)
tab <- cbind(tab, Combined = apply(tab, 1,
sum))
comp[[i]] <- tab
type[i] <- 1
}
else {
sfn <- function(x, quant) {
o <- options(digits = 10)
on.exit(options(o))
c(quantile(x, quant), Mean = mean(x), SD = sqrt(var(x)),
N = sum(!is.na(x)))
}
qu <- tapply(w, g, sfn, simplify = TRUE, quants)
if (test)
testresults[[i]] <- conTest(g, w)
if (overall)
qu$Combined <- sfn(w, quants)
comp[[i]] <- matrix(unlist(qu), ncol = length(quants) +
3, byrow = TRUE, dimnames = list(names(qu),
c(format(quants), "Mean", "SD", "N")))
if (any(group.freq <= nmin))
dat[[i]] <- lapply(split(w, g), nmin = nmin,
function(x, nmin) if (length(x) <= nmin)
x
else NULL)
type[i] <- 2
}
}
else {
w <- as.numeric(w) == 1
n[i] <- sum(!is.na(apply(w, 1, sum)) & xms ==
"")
g <- as.factor(gr)
ncat <- ncol(w)
tab <- matrix(NA, nrow = ncat, ncol = length(levels(g)),
dimnames = list(dimnames(w)[[2]], levels(g)))
if (test) {
pval <- numeric(ncat)
names(pval) <- dimnames(w)[[2]]
d.f. <- stat <- pval
}
for (j in 1:ncat) {
tab[j, ] <- tapply(w[, j], g, sum, simplify = TRUE,
na.rm = TRUE)
if (test) {
tabj <- rbind(table(g) - tab[j, ], tab[j,
])
st <- catTest(tabj)
pval[j] <- st$P
stat[j] <- st$stat
d.f.[j] <- st$df
}
}
if (test)
testresults[[i]] <- list(P = pval, stat = stat,
df = d.f., testname = st$testname, statname = st$statname,
latexstat = st$latexstat, plotmathstat = st$plotmathstat)
if (overall)
tab <- cbind(tab, Combined = apply(tab, 1,
sum))
comp[[i]] <- tab
type[i] <- 3
}
}
labels <- ifelse(nchar(labels), labels, names(comp))
R[[strat]] <- list(stats = comp, type = type, group.freq = group.freq,
labels = labels, units = Units, quant = quant, data = dat,
N = sum(!is.na(gr) & xms == ""), n = n, testresults = if (test) testresults)
}
structure(list(results = R, group.name = groups, group.label = glabel,
call = call, formula = formula), class = "summaryM")
}
After trying to test it on the following data, I get a warning and an error:
library(Hmisc)
set.seed(173)
sex <- factor(sample(c("m","f"), 500, rep=TRUE))
treatment <- factor(sample(c("Drug","Placebo"), 500, rep=TRUE))
> summaryM2(sex ~ treatment, test=TRUE, overall = TRUE)
Error in round(teststat, 2) :
non-numeric argument to mathematical function
I tried stepping through the summaryM2 function line by line, but could not figure out what's causing the problem.
In your catTestfisher function, the output variables stat (test statistic) and df (degrees of freedom) should be numeric variables not empty strings. In the programming stat is coverted to teststat for rounding before being outputted (hence the error message for round("", 2) is non-numeric argument to mathematical function). See lines 1718 to 1721 in the summary.formula code) .
You can set df = NULL but a value is required for stat (not NA or NULL) otherwise no output is returned. You can get around the problem by setting stat = 0 (or any other number), and then only displaying the p value using prtest = "P".
catTestfisher2 <- function (tab)
{
st <- fisher.test(tab)
list(P = st$p.value, stat = 0, df = NULL,
testname = st$method, statname = "", latexstat = "", namefun = "",
plotmathstat = "")
}
output <- summaryM(sex ~ treatment, test=TRUE, overall = TRUE, catTest = catTestfisher2)
print(output, prtest = "P")
Descriptive Statistics (N=500)
+-------+-----------+-----------+-----------+-------+
| |Drug |Placebo |Combined |P-value|
| |(N=257) |(N=243) |(N=500) | |
+-------+-----------+-----------+-----------+-------+
|sex : m|0.52 (133)|0.52 (126)|0.52 (259)| 1 |
+-------+-----------+-----------+-----------+-------+
Note there is no need to define your own summaryM2 function. Just use catTest = to pass in your function.