GLM code/logistic regression is taking too long to run - r

I am trying to run the following code. My computer keeps getting frozen when I try to run it. Therefore, I can see the correlation matrices, I am unable to view the results of the GLM/data arrays.
# running the assay
#which_p_value = "x1"
which_p_value = "groupcategory"
#which_p_value = "x1:groupcategory"
run_anova = FALSE
simulate_mixed_effect = TRUE
mixed_effect_sd = 3.094069
mixed_effect_sd_slope = 3.098661
library(tidyverse)
n_people <- c(2,5,10,15,20)
coef1 <- 1.61
coef2 <- -0.01
#coef3 <- 5
#coef4 <- 0
g1 = 0
g2 = 1
g3 = 2
distances <- c(60,90,135,202.5,303.75,455.625)/100
n_trials <- 35
oneto1000 <- 25
n_track_lengths <- length(distances)
groupcategory = c(rep(g1, n_track_lengths), rep(g2, n_track_lengths),rep(g3,n_track_lengths))
z = c(n_people)
emptydataframeforpowerplots = NULL
coef3s <- c(-5, -4, -3, -2,-1, 0, 1, 2, 3, 4, 5)
coef4s <- c(-1, -0.8, -0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6, 0.8, 1)
Datarray <- array(dim=c(length(coef3s), length(coef4s),length(n_people)))
coef3_counter =1
for (coef3 in coef3s) {
coef4_counter =1
for (coef4 in coef4s) {
z1_g2 <- coef1 + coef2*distances + coef3*g2 + coef4*g2*distances
z1_g3 <- coef1 + coef2*distances + coef3*g3 + coef4*g3*distances
d = NULL
pr1 = 1/(1+exp(-z1_g2))
pr2 = 1/(1+exp(-z1_g3))
counter=1
for (i in n_people) {
for (j in 1:oneto1000){
df <- c()
for (k in 1:i){
# random effect from drawing a random intercept with sd = x
if (simulate_mixed_effect){
coef1_r = rnorm(1, mean=coef1, sd=mixed_effect_sd)
coef2_r = rnorm(1, mean=coef1, sd=mixed_effect_sd_slope)
} else {
coef1_r = coef1
coef2_r = coef2
}
z_g1 <- coef1_r + coef2*distances + coef3*g1 + coef4*g1*distances
pr = 1/(1+exp(-z_g1))
z1_g2 <- coef1_r + coef2*distances + coef3*g2 + coef4*g2*distances
pr1 = 1/(1+exp(-z1_g2))
if (run_anova) {
df <- rbind(df, data.frame(x1 = c(rep(distances, 3)),
y = c(rbinom(n_track_lengths,n_trials,pr), rbinom(n_track_lengths,n_trials,pr1),rbinom(n_track_lengths,n_trials,pr2)),
groupcategory = groupcategory, id = c(rep(k,18))))
} else { # this is for glmer data organisation
for (m in 1:n_trials) {
df <- rbind(df, data.frame(x1 = c(rep(distances, 3)),
y = c(rbinom(n_track_lengths,1,pr),rbinom(n_track_lengths,1,pr1),rbinom(n_track_lengths,1,pr2)),groupcategory = groupcategory,id = c(rep(k,18))))
}
}
}
if (run_anova) {
#df_aov <- aov(y~x1*groupcategory+Error(id/(x1*groupcategory)),data=df)
#df_aov_sum <- summary(df_aov)
#pvalue <- df_aov_sum[[5]][[1]][which_p_value,"Pr(>F)"]
df_aov <- aov(y~x1*groupcategory+Error(id),data=df)
df_aov_sum <- summary(df_aov)
pvalue <- df_aov_sum[[2]][[1]][which_p_value, "Pr(>F)"]
}
checkme <- df %>% group_by(groupcategory,id) %>% summarise(miny=min(y),maxy=max(y)) %>% mutate(expectfail = miny==maxy)
else {
mod_group_glmer <- glmer(y ~ x1 + groupcategory + (1+x1|id), data = df, family = "binomial")
sum <- summary(mod_group_glmer)
pvalue <- sum$coefficients[which_p_value, "Pr(>|z|)"]
}
d = rbind(d,data.frame(pvalue))
}
count <- plyr::ldply(d,function(c) sum(c<=0.05))
Datarray[coef3_counter,coef4_counter,counter] <- count$V1/oneto1000
counter = counter +1
d = NULL
}
coef4_counter = coef4_counter + 1
}
coef3_counter = coef3_counter + 1
}
Does anybody have any advice on how I can overcome this issue? I have tried different things such as lowering the range of sample sizes (n_people) but I have still been unsuccessful. My computer starts making a whirring noise and eventually I am forced to have to 'force quit' the program?

Related

R code for simulating stochastic asset price path

Consider the following model for the evolution of an asset's price:
This what I have done (in R). I could not find a function that randomly outputs +1 or -1, so I decided to adapt the inbuilt rbinom function.
## This code is in R
rm(list = ls())
library(dplyr)
library(dint)
library(magrittr)
library(stats)
path =
function(T, mu, sigma, p, x0) {
x = rep(NA, T)
x[1] = x0
for(i in 2:T){
z = if_else(rbinom(1,1,p) == 0, -1, 1)
x[i] = x[i-1] * exp(mu + sigma*z)
}
return(x)
}
## Just some testing
x_sim = path(T = 4, mu = 0, sigma = 0.01, p = 0.5, x0 = 100)
## Actual answer
Np = 10000
mc = matrix(nrow = 17, ncol = Np)
for(j in 1:Np){
mc[,j] = path(T = 17, mu = 0, sigma = 0.01, p = 0.5, x0 = 100)
}
test = mc[2:nrow(mc), ] >= 100
sum_test = colSums(test)
comp = sum(sum_test >= 1)/length(sum_test)
prob = 1 - comp
Does this make sense? Any help/tips/advice would be much appreciated. Thanks!
Staying close to your code, I came up with this. Intuitively, if you think about it, the probability should be rather low due to the parameters and I get a probability of about 6.7% which is roughly what I get if I run your code with the parameters from the assignment.
simpath <- function(t, mu, sigma, p, x0, seed){
# set seed
if(!missing(seed)){
set.seed(seed)
}
# set up matrix for storing the results
res <- matrix(c(1:t, rep(NA, t*2)), ncol = 3)
colnames(res) <- c('t', 'z_t', 'x_t')
res[, 'z_t'] <- sample(c(1, -1), size = t, prob = c(p, 1-p), replace = TRUE)
res[1, 3] <- x0
for(i in 2:t){
res[i, 3] <- res[i-1, 3] * exp(mu+sigma*res[i, 2])
}
return(res)
}
x_sim <- simpath(t = 4, mu = 0, sigma = 0.01, p = 0.5, x0 = 100, seed = 123)
x_sim2 <- simpath(t = 36, mu = 0, sigma = 0.03, p = 0.5, x0 = 100, seed = 123)
## Actual answer
Np <- 100000
mc <- matrix(nrow = 36, ncol = Np)
for (j in 1:Np){
mc[, j] <- simpath(t = 36, mu = 0, sigma = 0.03, p = 0.5, x0 = 100)[, 3]
}
test <- mc > 100
sum_test <- colSums(test)
comp = sum(sum_test == 0)/length(sum_test)
prob = comp
> prob
[1] 0.06759

Solving for an input value of an R function

In my attached R function, I was wondering how to solve for mdes (suppose it is unknown) which is currently one of the input values IF everything else is known?
Is it also possible to solve for mdes and power (both currently input values) IF everything else is known?
foo <- function(A = 200, As = 15, B = 100,Bs = 10,iccmax = 0.15,mdes = .25,SD = 1.2,power = 80)
{
tail <- 2
alpha <- 5
inv_d <- function(mdes) {
c(mean_dif = 1, Vmax = 2/mdes^2)
}
SDr <- 1/SD
pars <- inv_d(mdes)
mean_dif <- pars[[1]]
Vmax <- pars[[2]]
zbeta <- qnorm((power/100))
zalpha <- qnorm(1-(alpha/(100*tail)))
maxvarmean_difhat <- (mean_dif / (zbeta + zalpha))**2
ntreat <- sqrt((A/As)*((1-iccmax)/iccmax))
ncont <- sqrt((B/Bs)*((1-iccmax)/iccmax))
costpertreatcluster <- A + (As*ntreat)
costperconcluster <- B + (Bs*ncont)
gtreat <- (sqrt(A*iccmax) + sqrt(As*(1-iccmax)))**2
gcon <- (sqrt(B*iccmax) + sqrt(Bs*(1-iccmax)))**2
pratio <- sqrt(gtreat/gcon)
budgetratio <- 99999
budgetratio <- ifelse( ((pratio <= SD) & (pratio >= SDr)), pratio**2, ifelse((pratio > SD), pratio*SD, pratio*SDr))
fraction <- budgetratio/(1 + budgetratio)
mmvnumer <- 99999
mmvnumer <- ifelse( ((pratio <= SD) & (pratio >= SDr)),
gcon*Vmax*(1+(pratio**2)),
ifelse((pratio > SD),
gcon*Vmax*(((pratio*SD)+1)**2/((SD**2)+1)),
gcon*Vmax*(((pratio*SDr)+1)**2/((SDr**2) + 1))) )
budget <- mmvnumer/maxvarmean_difhat
treatbudget <- fraction*budget
conbudget <- (1-fraction)*budget
ktreat <- treatbudget/costpertreatcluster
kcont <- conbudget/costperconcluster
ktreatrup <- ceiling(ktreat)
kcontrup <- ceiling(kcont)
ktreatplus <- ifelse(pmin(ktreatrup,kcontrup) < 8, ktreatrup + 3, ktreatrup + 2)
kcontplus <- ifelse(pmin(ktreatrup,kcontrup) < 8, kcontrup + 3, kcontrup + 2)
budgetplus <- (ktreatplus*costpertreatcluster) + (kcontplus*costperconcluster)
return(c(ncont = ncont, kcont = kcontplus,
ntreat = ntreat, ktreat = ktreatplus, budget = budgetplus))
}
#--------------------------------------------------------------------------------
# EXAMPLE OF USE:
foo()
ncont kcont ntreat ktreat budget
7.527727 73.000000 8.692270 62.000000 33279.051347
Define a function of one variable as
p0 = foo()
fn1 = function(x) sum((foo(mdes=x) - p0)^2)
and find a minimum that should be 0, and which corresponds to your mdes = 0.25 input!
optimize(fn1, c(0.0, 1.0))
## $minimum
## [1] 0.2497695
## $objective
## [1] 0
For two variables, this is more difficult, as the function has many local minima and is ill-defined outside certain regions. Applying optim() you will need well-chosen starting points.

Lapack routine dgesv: system is exactly singular: U[6,6] = 0

I am trying to run the code below in order to simulate a set of P-values using a generalised linear model
However, I keep getting the error: Lapack routine dgesv: system is exactly singular: U[6,6] = 0
Here is the code I am trying to run:
#which_p_value = "x1"
which_p_value = "groupcategory"
#which_p_value = "x1:groupcategory"
run_anova = FALSE
simulate_mixed_effect = TRUE
mixed_effect_sd = 3.094069
mixed_effect_sd_slope = 3.098661
library(tidyverse)
n_people <- c(2,5,10,15,20)
coef1 <- 1.61
coef2 <- -0.01
#coef3 <- 5
#coef4 <- 0
g1 = 0
g2 = 1
g3 = 2
distances <- c(60,90,135,202.5,303.75,455.625)/100
n_trials <- 35
oneto1000 <- 25
n_track_lengths <- length(distances)
groupcategory = c(rep(g1, n_track_lengths), rep(g2, n_track_lengths),rep(g3,n_track_lengths))
z = c(n_people)
emptydataframeforpowerplots = NULL
coef3s <- c(-5, -4, -3, -2,-1, 0, 1, 2, 3, 4, 5)
coef4s <- c(-1, -0.8, -0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6, 0.8, 1)
Datarray <- array(dim=c(length(coef3s), length(coef4s),length(n_people)))
coef3_counter =1
for (coef3 in coef3s) {
coef4_counter =1
for (coef4 in coef4s) {
z1_g2 <- coef1 + coef2*distances + coef3*g2 + coef4*g2*distances
z1_g3 <- coef1 + coef2*distances + coef3*g3 + coef4*g3*distances
d = NULL
pr1 = 1/(1+exp(-z1_g2))
pr2 = 1/(1+exp(-z1_g3))
counter=1
for (i in n_people) {
for (j in 1:oneto1000){
df <- c()
for (k in 1:i){
# random effect from drawing a random intercept with sd = x
if (simulate_mixed_effect){
coef1_r = rnorm(1, mean=coef1, sd=mixed_effect_sd)
coef2_r = rnorm(1, mean=coef1, sd=mixed_effect_sd_slope)
} else {
coef1_r = coef1
coef2_r = coef2
}
z_g1 <- coef1_r + coef2*distances + coef3*g1 + coef4*g1*distances
pr = 1/(1+exp(-z_g1))
z1_g2 <- coef1_r + coef2*distances + coef3*g2 + coef4*g2*distances
pr1 = 1/(1+exp(-z1_g2))
if (run_anova) {
df <- rbind(df, data.frame(x1 = c(rep(distances, 3)),
y = c(rbinom(n_track_lengths,n_trials,pr), rbinom(n_track_lengths,n_trials,pr1),rbinom(n_track_lengths,n_trials,pr2)),
groupcategory = groupcategory, id = c(rep(k,18))))
} else { # this is for glmer data organisation
for (m in 1:n_trials) {
df <- rbind(df, data.frame(x1 = c(rep(distances, 3)),
y = c(rbinom(n_track_lengths,1,pr),rbinom(n_track_lengths,1,pr1),rbinom(n_track_lengths,1,pr2)),groupcategory = groupcategory,id = c(rep(k,18))))
}
}
}
if (run_anova) {
#df_aov <- aov(y~x1*groupcategory+Error(id/(x1*groupcategory)),data=df)
#df_aov_sum <- summary(df_aov)
#pvalue <- df_aov_sum[[5]][[1]][which_p_value,"Pr(>F)"]
df_aov <- aov(y~x1*groupcategory+Error(id),data=df)
df_aov_sum <- summary(df_aov)
pvalue <- df_aov_sum[[2]][[1]][which_p_value, "Pr(>F)"]
} else { # glmer
mod_group_glmer <- glmer(y ~ x1 + groupcategory + (1+x1|id), data = df, family = "binomial")
sum <- summary(mod_group_glmer)
pvalue <- sum$coefficients[which_p_value, "Pr(>|z|)"]
}
d = rbind(d,data.frame(pvalue))
}
count <- plyr::ldply(d,function(c) sum(c<=0.05))
Datarray[coef3_counter,coef4_counter,counter] <- count$V1/oneto1000
counter = counter +1
d = NULL
}
coef4_counter = coef4_counter + 1
}
coef3_counter = coef3_counter + 1
}
Below is the script from the debugger:
Lapack routine dgesv: system is exactly singular: U[6,6] = 0
8. stopifnot(length(value <- as.numeric(value)) == 1L)
7. nM$newf(fn(nM$xeval()))
6. (function (fn, par, lower = rep.int(-Inf, n), upper = rep.int(Inf, n), control = list()) { n <- length(par) ...
5. do.call(optfun, arglist)
4. withCallingHandlers(do.call(optfun, arglist), warning = function(w) { curWarnings <<- append(curWarnings, list(w$message)) })
3. optwrap(optimizer, devfun, start, rho$lower, control = control, adj = adj, verbose = verbose, ...)
2. optimizeGlmer(devfun, optimizer = control$optimizer[[2]], restart_edge = control$restart_edge, boundary.tol = control$boundary.tol, control = control$optCtrl, start = start, nAGQ = nAGQ, verbose = verbose, stage = 2, calc.derivs = control$calc.derivs, use.last.params = control$use.last.params)
1. glmer(y ~ x1 + groupcategory + (1 + x1 id), data = df, family = "binomial")
Would anybody be able to give a helping hand as to how I can proceed from here?

R: incorporating fisher.test into Hmisc's summaryM leads to error

catTestfisher <-
function (tab)
{
st <- if (!is.matrix(tab) || nrow(tab) < 2 | ncol(tab) <
2)
list(p.value = NA, statistic = NA, parameter = NA)
else {
rowcounts <- tab %*% rep(1, ncol(tab))
tab <- tab[rowcounts > 0, ]
if (!is.matrix(tab))
list(p.value = NA, statistic = NA, parameter = NA)
else fisher.test(tab)
}
list(P = st$p.value, stat = "", df = "",
testname = "Fisher's Exact", statname = "", latexstat = "", namefun = "",
plotmathstat = "")
}
I wanted to use library(Hmisc)'s summaryM function but with Fisher's exact test, so I wrote a catTestfisher function and set catTest = catTestfisher in my own summaryM2 function, which is exactly the same as summaryM, except for catTest = catTestfisher
summaryM2 <-
function (formula, groups = NULL, data = NULL, subset, na.action = na.retain,
overall = FALSE, continuous = 10, na.include = FALSE, quant = c(0.025,
0.05, 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 0.95,
0.975), nmin = 100, test = FALSE, conTest = conTestkw,
catTest = catTestfisher, ordTest = ordTestpo)
{
marg <- length(data) && ".marginal." %in% names(data)
if (marg)
formula <- update(formula, . ~ . + .marginal.)
formula <- Formula(formula)
Y <- if (!missing(subset) && length(subset))
model.frame(formula, data = data, subset = subset, na.action = na.action)
else model.frame(formula, data = data, na.action = na.action)
X <- model.part(formula, data = Y, rhs = 1)
Y <- model.part(formula, data = Y, lhs = 1)
getlab <- function(x, default) {
lab <- attr(x, "label")
if (!length(lab) || lab == "")
default
else lab
}
if (marg) {
xm <- X$.marginal.
X$.marginal. <- NULL
}
else xm <- rep("", nrow(X))
if (length(X)) {
xname <- names(X)
if (length(xname) == 1 && !length(groups))
groups <- xname
if (!length(groups) && length(xname) > 1) {
warnings("Must specify groups when > 1 right hand side variable is present.\ngroups taken as first right hand variable.")
groups <- xname[1]
}
svar <- if (length(xname) == 1)
factor(rep(".ALL.", nrow(X)))
else do.call("interaction", list(X[setdiff(xname, groups)],
sep = " "))
group <- X[[groups]]
glabel <- getlab(group, groups)
}
else {
svar <- factor(rep(".ALL.", nrow(Y)))
group <- rep("", nrow(Y))
groups <- group.freq <- NULL
glabel <- ""
}
quants <- unique(c(quant, 0.025, 0.05, 0.125, 0.25, 0.375,
0.5, 0.625, 0.75, 0.875, 0.95, 0.975))
nv <- ncol(Y)
nameY <- names(Y)
R <- list()
for (strat in levels(svar)) {
instrat <- svar == strat
n <- integer(nv)
type <- n
comp <- dat <- vector("list", nv)
names(comp) <- names(dat) <- nameY
labels <- Units <- vector("character", nv)
if (test) {
testresults <- vector("list", nv)
names(testresults) <- names(comp)
}
gr <- group[instrat]
xms <- xm[instrat]
if (all(xms != ""))
xms <- rep("", length(xms))
group.freq <- table(gr)
group.freq <- group.freq[group.freq > 0]
if (overall)
group.freq <- c(group.freq, Combined = sum(group.freq))
for (i in 1:nv) {
w <- Y[instrat, i]
if (length(attr(w, "label")))
labels[i] <- attr(w, "label")
if (length(attr(w, "units")))
Units[i] <- attr(w, "units")
if (!inherits(w, "mChoice")) {
if (!is.factor(w) && !is.logical(w) && length(unique(w[!is.na(w)])) <
continuous)
w <- as.factor(w)
s <- !is.na(w)
if (na.include && !all(s) && length(levels(w))) {
w <- na.include(w)
levels(w)[is.na(levels(w))] <- "NA"
s <- rep(TRUE, length(s))
}
n[i] <- sum(s & xms == "")
w <- w[s]
g <- gr[s, drop = TRUE]
if (is.factor(w) || is.logical(w)) {
tab <- table(w, g)
if (test) {
if (is.ordered(w))
testresults[[i]] <- ordTest(g, w)
else testresults[[i]] <- catTest(tab)
}
if (nrow(tab) == 1) {
b <- casefold(dimnames(tab)[[1]], upper = TRUE)
pres <- c("1", "Y", "YES", "PRESENT")
abse <- c("0", "N", "NO", "ABSENT")
jj <- match(b, pres, nomatch = 0)
if (jj > 0)
bc <- abse[jj]
else {
jj <- match(b, abse, nomatch = 0)
if (jj > 0)
bc <- pres[jj]
}
if (jj) {
tab <- rbind(tab, rep(0, ncol(tab)))
dimnames(tab)[[1]][2] <- bc
}
}
if (overall)
tab <- cbind(tab, Combined = apply(tab, 1,
sum))
comp[[i]] <- tab
type[i] <- 1
}
else {
sfn <- function(x, quant) {
o <- options(digits = 10)
on.exit(options(o))
c(quantile(x, quant), Mean = mean(x), SD = sqrt(var(x)),
N = sum(!is.na(x)))
}
qu <- tapply(w, g, sfn, simplify = TRUE, quants)
if (test)
testresults[[i]] <- conTest(g, w)
if (overall)
qu$Combined <- sfn(w, quants)
comp[[i]] <- matrix(unlist(qu), ncol = length(quants) +
3, byrow = TRUE, dimnames = list(names(qu),
c(format(quants), "Mean", "SD", "N")))
if (any(group.freq <= nmin))
dat[[i]] <- lapply(split(w, g), nmin = nmin,
function(x, nmin) if (length(x) <= nmin)
x
else NULL)
type[i] <- 2
}
}
else {
w <- as.numeric(w) == 1
n[i] <- sum(!is.na(apply(w, 1, sum)) & xms ==
"")
g <- as.factor(gr)
ncat <- ncol(w)
tab <- matrix(NA, nrow = ncat, ncol = length(levels(g)),
dimnames = list(dimnames(w)[[2]], levels(g)))
if (test) {
pval <- numeric(ncat)
names(pval) <- dimnames(w)[[2]]
d.f. <- stat <- pval
}
for (j in 1:ncat) {
tab[j, ] <- tapply(w[, j], g, sum, simplify = TRUE,
na.rm = TRUE)
if (test) {
tabj <- rbind(table(g) - tab[j, ], tab[j,
])
st <- catTest(tabj)
pval[j] <- st$P
stat[j] <- st$stat
d.f.[j] <- st$df
}
}
if (test)
testresults[[i]] <- list(P = pval, stat = stat,
df = d.f., testname = st$testname, statname = st$statname,
latexstat = st$latexstat, plotmathstat = st$plotmathstat)
if (overall)
tab <- cbind(tab, Combined = apply(tab, 1,
sum))
comp[[i]] <- tab
type[i] <- 3
}
}
labels <- ifelse(nchar(labels), labels, names(comp))
R[[strat]] <- list(stats = comp, type = type, group.freq = group.freq,
labels = labels, units = Units, quant = quant, data = dat,
N = sum(!is.na(gr) & xms == ""), n = n, testresults = if (test) testresults)
}
structure(list(results = R, group.name = groups, group.label = glabel,
call = call, formula = formula), class = "summaryM")
}
After trying to test it on the following data, I get a warning and an error:
library(Hmisc)
set.seed(173)
sex <- factor(sample(c("m","f"), 500, rep=TRUE))
treatment <- factor(sample(c("Drug","Placebo"), 500, rep=TRUE))
> summaryM2(sex ~ treatment, test=TRUE, overall = TRUE)
Error in round(teststat, 2) :
non-numeric argument to mathematical function
I tried stepping through the summaryM2 function line by line, but could not figure out what's causing the problem.
In your catTestfisher function, the output variables stat (test statistic) and df (degrees of freedom) should be numeric variables not empty strings. In the programming stat is coverted to teststat for rounding before being outputted (hence the error message for round("", 2) is non-numeric argument to mathematical function). See lines 1718 to 1721 in the summary.formula code) .
You can set df = NULL but a value is required for stat (not NA or NULL) otherwise no output is returned. You can get around the problem by setting stat = 0 (or any other number), and then only displaying the p value using prtest = "P".
catTestfisher2 <- function (tab)
{
st <- fisher.test(tab)
list(P = st$p.value, stat = 0, df = NULL,
testname = st$method, statname = "", latexstat = "", namefun = "",
plotmathstat = "")
}
output <- summaryM(sex ~ treatment, test=TRUE, overall = TRUE, catTest = catTestfisher2)
print(output, prtest = "P")
Descriptive Statistics (N=500)
+-------+-----------+-----------+-----------+-------+
| |Drug |Placebo |Combined |P-value|
| |(N=257) |(N=243) |(N=500) | |
+-------+-----------+-----------+-----------+-------+
|sex : m|0.52 (133)|0.52 (126)|0.52 (259)| 1 |
+-------+-----------+-----------+-----------+-------+
Note there is no need to define your own summaryM2 function. Just use catTest = to pass in your function.

loop with certain values is not working

I just need help for the first loop! I would like to run the loop for each certain value of m (see first line in code) but its running only for 1:10? The outcome shoud be stored in the last rows msediff1 to msediff100! Also i need the graphics for each value of m!Thanks in advance!
m = c(1,2,3,4,5,6,7,8,9,10,25,50,100)
for (m in 1:length(unique(m))){
n <- 150
x1 <- rnorm(n = n, mean = 10, sd = 4)
R <- 100 # Number of reps
results.true <- matrix(NA , ncol = 2, nrow = R)
colnames(results.true) <- c("beta0.hat", "beta1.hat")
results.diff <- matrix(NA, ncol = 2, nrow = R)
colnames(results.diff) <- c("beta0.hat", "betadiff.hat")
sigma <- 1.2
beta <- c(1.2)
X <- cbind(x1)
if (m==1){d0 <- .7071; d <- c(-.7071)}
if (m==2){d0 = .8090; d = c(-.5,-.309)}
if (m==3){d0 = .8582; d = c(-.3832,-.2809,-.1942) }
if (m==4){d0 = .8873; d = c(-.3090,-.2464,-.1901,-.1409)}
if (m==5){d0 <- .9064; d <- c(-.2600,-.2167,-.1774,-.1420,-.1103)}
if (m==6){d0 = .92; d = c(-.2238,-.1925,-.1635,-.1369,-.1126,-.0906)}
if (m==7){d0 = .9302; d = c(-.1965,-.1728,-.1506,-.1299,-.1107,-.093,-.0768)}
if (m==8){d0 = .9380; d = c(-.1751,-.1565,-.1389,-.1224,-.1069,-.0925,-.0791,-.0666)}
if (m==9){d0 = .9443; d = c(-.1578,-.1429,-.1287,-.1152,-.1025,-.0905,-.0792,-.0687,-.0538)}
if (m==10){d0 <- .9494;
d <- c(-.1437, -.1314, -.1197, -.1085, -.0978, -.0877, -.0782, -.0691, -.0606, -.0527)}
if (m==25){d0 <- 0.97873;
d <- c(-0.06128, -0.05915, -0.05705, -0.05500, -0.05298, -0.05100, -0.04906, -0.04715, -0.04528, -0.04345, -0.04166, -0.03990, -0.03818, -0.03650, -0.03486, -0.03325, -0.03168, -0.03015, -0.02865, -0.02719,
-0.02577, -0.02438, -0.02303, -0.02171, -0.02043) }
if (m==50) {d0 <- 0.98918;
d <- c(-0.03132, -0.03077, -0.03023, -0.02969, -0.02916, -0.02863, -0.02811, -0.02759, -0.02708, -0.02657, -0.02606, -0.02556, -0.02507, -0.02458, -0.02409, -0.02361, -0.02314, -0.02266, -0.02220, -0.02174, -0.02128, -0.02083, -0.02038, -0.01994, -0.01950, -0.01907, -0.01864, -0.01822, -0.01780, -0.01739,-0.01698,-0.01658,-0.01618,-0.01578,-0.01539,-0.01501,-0.01463,-0.01425,-0.01388,-0.01352,
-0.01316,-0.01280,-0.01245,-0.01210,-0.01176,-0.01142,-0.01108,-0.01075,-0.01043,-0.01011) }
if (m==100) { d0 <- 0.99454083;
d <- c(-0.01583636,-0.01569757,-0.01555936,-0.01542178,-0.01528478,-0.01514841,-0.01501262,-0.01487745,-0.01474289,-0.01460892,
-0.01447556,-0.01434282,-0.01421067,-0.01407914,-0.01394819,-0.01381786,-0.01368816,-0.01355903,-0.01343053,-0.01330264,
-0.01317535,-0.01304868,-0.01292260,-0.01279714,-0.01267228,-0.01254803,-0.01242439,-0.01230136,-0.01217894,-0.01205713,
-0.01193592,-0.01181533,-0.01169534,-0.01157596,-0.01145719,-0.01133903,-0.01122148,-0.01110453,-0.01098819,-0.01087247,
-0.01075735,-0.01064283,-0.01052892,-0.01041563,-0.01030293,-0.01019085,-0.01007937,-0.00996850,-0.00985823,-0.00974857,
-0.00963952,-0.00953107,-0.00942322,-0.00931598,-0.00920935,-0.00910332,-0.00899789,-0.00889306,-0.00878884,-0.00868522,
-0.00858220,-0.00847978,-0.00837797,-0.00827675,-0.00817614,-0.00807612,-0.00797670,-0.00787788,-0.00777966,-0.00768203,
-0.00758500,-0.00748857,-0.00739273,-0.00729749,-0.00720284,-0.00710878,-0.00701532,-0.00692245,-0.00683017,-0.00673848,
-0.00664738,-0.00655687,-0.00646694,-0.00637761,-0.00628886,-0.00620070,-0.00611312,-0.00602612,-0.00593971,-0.00585389,
-0.00576864,-0.00568397,-0.00559989,-0.00551638,-0.00543345,-0.00535110,-0.00526933,-0.00518813,-0.00510750,-0.00502745) }
for(r in 1:R){
u <- rnorm(n = n, mean = 0, sd = sigma)
y <- X%*%beta + u
yy = d0* y[(m+1):n]; Xd <- d0* x1[(m+1):n];
for (i in 1:m) { yy <- yy + d[i]* y[(m+1-i):(n-i) ]
Xd = Xd + d[i]* x1[(m+1-i):(n-i)] }
reg.true <- lm(y ~ x1)
reg.diff <- lm(yy ~ Xd)
results.true[r, ] <- coef(reg.true)
results.diff[r, ] <- coef(reg.diff)
}
results.true
results.diff
beta
apply(results.true, MARGIN = 2, FUN = mean)
apply(results.diff, MARGIN = 2, FUN = mean)
co <- 2
dens.true <- density(results.true[, co])
dens.diff <- density(results.diff[, co])
win.graph()
plot(dens.true,
xlim = range(c(results.true[, co], results.diff[, co])),
ylim = range(c(dens.true$y, dens.diff$yy)),
main = "beta estimation true vs. diff", lwd = 2,)
lines(density(results.diff[, co]), col = "red", lwd = 2)
abline(v = beta, col = "blue", lwd = 2)
legend(x=1.24,y=12,c("outcome true","outcome diff"),lty=c(1,1),col =c("black","red") )
legend(x=1.12,y=12,c("m=",m))
#Mean Squared Error
mse=mean(reg.true$residuals^2)
if (m==1) {msediff1=mean(reg.diff$residuals^2)}
if (m==2) {msediff2=mean(reg.diff$residuals^2)}
if (m==3) {msediff3=mean(reg.diff$residuals^2)}
if (m==4) {msediff4=mean(reg.diff$residuals^2)}
if (m==5) {msediff5=mean(reg.diff$residuals^2)}
if (m==6) {msediff6=mean(reg.diff$residuals^2)}
if (m==7) {msediff7=mean(reg.diff$residuals^2)}
if (m==8) {msediff8=mean(reg.diff$residuals^2)}
if (m==9) {msediff9=mean(reg.diff$residuals^2)}
if (m==10) {msediff10=mean(reg.diff$residuals^2)}
if (m==25) {msediff25=mean(reg.diff$residuals^2)}
if (m==50) {msediff50=mean(reg.diff$residuals^2)}
if (m==100) {msediff100=mean(reg.diff$residuals^2)}
}
I can see an error in the code.
m = c(1,2,3,4,5,6,7,8,9,10,25,50,100)
for (m in 1:length(unique(m))){
As soon as the loop starts, m is changed. It's not what's in the first line anymore...
Try, for (ind in 1:length(unique(m))){ if that's not the intention.

Resources