Chi square matrix residuals - r

below is a function to extract p-values from multiple Chi-Square tests and display them as a matrix. I'm trying to do the same, but to extract residuals instead. Any help is appreciated.
Sample data:
df <- data.frame(first_column = c(rep("E1_C1",5), rep("E1_C2",3), rep("E2_C2",7),rep("E3_C3",5)),
second_column = c(rep("E1_C1",3), rep("E1_C2",10), rep("E2_C2",4),rep("E3_C3",3)),
third_column = c(rep("E1_C1",7), rep("E1_C2",4), rep("E2_C2",3),rep("E3_C3",6)),
fourth_column = c(rep("E1_C1",4), rep("E1_C2",6), rep("E2_C2",6),rep("E3_C3",4))
)
Chi-square matrix function for P-Values:
chisqmatrix <- function(x) {
names = colnames(x); num = length(names)
m = matrix(nrow=num,ncol=num,dimnames=list(names,names))
for (i in 1:(num-1)) {
for (j in (i+1):num) {
#browser()
m[j,i] = chisq.test(x[, i, drop = TRUE],x[, j, drop = TRUE])$p.value
}
}
return (m)
}
Generate Chi-Square p-value matrix
res <- chisqmatrix(df)
res[, -ncol(res)]

In your case, the returned residuals is a 4x4 matrix. Instead of using a matrix to take the results, the following solution uses a list instead. This way you can have matrices of different sizes.
With minimal changes from your original code:
chisqlist <- function(x) {
names = colnames(x); num = length(names)
m = list()
index = 1
for (i in 1:(num-1)) {
for (j in (i+1):num) {
#browser()
m[[index]] = chisq.test(x[, i, drop = TRUE],x[, j, drop = TRUE])$residuals
index=index+1
}
}
return (m)
}
Edit:
I do prefer # Onyambu's answer, which I didn't see. It would be faster than a nested for loop.

Simply change your function from requesting $p.value to requesting $residuals. This will provide (observed - expected) / sqrt(expected). If you desire standardized residuals request $stdres.
chisqmatrix <- function(x) {
names = colnames(x); num = length(names)
m = matrix(nrow=num,ncol=num,dimnames=list(names,names))
for (i in 1:(num-1)) {
for (j in (i+1):num) {
#browser()
m[j,i] = chisq.test(x[, i, drop = TRUE],x[, j, drop = TRUE])$residuals
}
}
return (m)
}

Related

R; replace nested for loop with apply() function

I am working on data replacement in a matrix. The replaced data will be calculated by calculating the sd of the (1+2k)X(1+2k) matrix centered on the value.
replace.loop = function(n, m, k, pad){
#search the value row by row, column by column
for (i in n) {
for (j in m) {
pad[i,j] = sd(as.vector(pad[(i-k):(i+k),(j-k):(j+k)]))
}
}
return(pad) #return the matrix that finishing calculation
}
Is there any way to rewrite this function with any apply() function? I am an R starter learner, so I am not sure which apply() function I should use.
for example:
X = matrix(c(.5,.5,.4,.4,.3,.5,.5,.4,.3,.3,.4,.4,.3,.2,.2,.4,.4,.3,.2,.1,.3,.3,.2,.2,.1), ncol=5)
k = 1
pad.X = matrix(0, dim(X)[1]+2*k, dim(X)[2]+2*k)
n = (k+1):(dim(X)[1]+k); m = (k+1):(dim(X)[2]+k)
pad.X[n, m] = X
Thanks!
I created a copy of pad called padOut. mapply will return the results as a matrix, which you can then assign to the relevant portion of padOut using matrix indexing:
replace.apply = function(n, m, k, pad){
kk <- -k:k
idx <- expand.grid(n, m)
padOut <- pad
padOut[as.matrix(idx)] <- mapply(function(i) sd(pad[idx[i,1] + kk, idx[i, 2] + kk]), 1:(length(m)*length(n)))
return(padOut)
}
With your example data:
X = matrix(c(.5,.5,.4,.4,.3,.5,.5,.4,.3,.3,.4,.4,.3,.2,.2,.4,.4,.3,.2,.1,.3,.3,.2,.2,.1), ncol=5)
k = 1
pad.X = matrix(0, dim(X)[1]+2*k, dim(X)[2]+2*k)
n = (k+1):(dim(X)[1]+k); m = (k+1):(dim(X)[2]+k)
pad.X[n, m] = X
replace.loop = function(n, m, k, pad){
#search the value row by row, column by column
padOut <- pad
for (i in n) {
for (j in m) {
padOut[i,j] = sd(as.vector(pad[(i-k):(i+k),(j-k):(j+k)]))
}
}
return(padOut) #return the matrix that finishing calculation
}
pad1 <- replace.loop(n, m, k, pad.X)
pad2 <- replace.apply(n, m, k, pad.X)
identical(pad1, pad2)
#> [1] TRUE

Repeated sampling until condition

I am looking to sample repeatedly from a distribution with a specific condition.
I am sampling 50 values for four iterations and saving the results. However I need each individual results from the iteration to be smaller than the last result at the same position.
mu.c <- c(7,6,5,3) # Means of control chains
chains.sim <- function(vector, N) {
all.list <- list()
for (i in 1:length(vector)) {
Y <- MASS::rnegbin(n = N, mu = vector[i], theta = 4)
name <- paste('position:',i, sep = '')
all.list[[name]] <- Y
}
all.list
}
chains.sim(mu.c, 50)
The sampling part works fine, but the Y individual results are of course not always smaller than the results from the previous iteration ("position").
Is there a way to repeat the sampling process until the result is smaller?
I would really appreciate your help!
I would add a while loop inside your for loop which samples data sets until the condition is met.
mu.c <- c(7,6,5,3) # Means of control chains
chain.sim <- function(vector, N) {
all.list <- list()
all.list[[1]] <- MASS::rnegbin(n = N, mu = vector[1], theta = 4)
for (i in 2:length(vector)) {
is_smaller <- FALSE
while(!is_smaller){
Y <- MASS::rnegbin(n = N, mu = vector[i], theta = 4)
if (all(all.list[[i-1]] >= Y)) is_smaller <- TRUE
}
all.list[[i]] <- Y
}
all.list
}
chain.sim(mu.c, 3)
Note that I changed the condition to >=, because if 0 is generated in any round, it will never find smaller values. Also, with 50 elements this code will never stop, because it is really unlikely to get two samples where each value is smaller, let alone 4 different samples.
Edit:
it can be much faster by sampling individually as you pointed out
chain.sim <- function(vector, N) {
all.list <- list()
all.list[[1]] <- MASS::rnegbin(n = N, mu = vector[1], theta = 4)
for (i in 2:length(vector)) {
Y <- numeric(N)
for (j in 1:N){
previous_value <- all.list[[i-1]][j]
if (previous_value == 0){
Y[j] = 0
next
}
is_smaller <- FALSE
while(!is_smaller){
val <- MASS::rnegbin(1, mu = vector[i], theta = 4)
if (val <= previous_value) is_smaller <- TRUE
Y[j] <- val
}
}
all.list[[i]] <- Y
}
all.list
}
chain.sim(mu.c, 50)
If 0 is encountered anywhere, no more simulation is necessary as we know the next value can only be 0. This makes the simulation much faster

R: can `assign` be used for models?

Can the assign function in R be used to assign object names to models (e.g., gls models)? I'm guessing not b/c I keep getting a warning:
> Warning messages:
In assign(paste0(deparse(substitute(mod)), "_", i, j), update(mod, :
only the first element is used as variable name
As a result the objects are not created
Is there a way to do this?
Here is my fucntion code if it helps:
#Choose best corARMA structure for model of choice:
corARMA.chooser <- function(mod,min = 0,max = 3 ) {
#This function creates 1. object for each combo of ARMA {0:3} 2. AIC table comparing all of these models
mod <- get('mod')
aic.arma <- AIC(mod)
ps <- 0
qs <- 0
for(i in min:max) {
js <- if(i == 0) c(1:max) else c(min:max)
for(j in js) {
arma <- corARMA(p = i, q = j)
assign(paste0(deparse(substitute(mod)),'_',i,j), update(mod, .~., correlation = arma), envir = .GlobalEnv)
aic.arma <- c(aic.arma, AIC(get(paste0(deparse(substitute(mod)),'_',i,j))))
ps <- c(ps, i)
qs <- c(qs, i)
}
aic.arma.out <- data.frame(ps, qs, aic.arma)
aic.arma.out
}
}
Update:
I tried using the list approach, but I get the error:
Error in names(mod.list) <- c(names(mod.list), paste0(deparse(substitute(mod)), :
'names' attribute [1275] must be the same length as the vector [1]
EDIT: what actually tears your variable name apart is this line mod <- get('mod') where you overwrite your named instance of mod why do you actually do this? If change your function to this it behaves as I'd expect it to:
corARMA.chooser <- function(modIn,min = 0,max = 3 ) {
#This function creates 1. object for each combo of ARMA {0:3} 2. AIC table comparing all of these models mod <- get('modIn') aic.arma <- AIC(modIn) ps <- 0 qs <- 0 for(i in min:max) {
js <- if(i == 0) c(1:max) else c(min:max)
for(j in js) {
arma <- corARMA(p = i, q = j)
browser()
assign(paste0(deparse(substitute(modIn)),'_',i,j), update(mod, .~., correlation = arma), envir = .GlobalEnv)
aic.arma <- c(aic.arma, AIC(get(paste0(deparse(substitute(mod)),'_',i,j))))
ps <- c(ps, i)
qs <- c(qs, i)
}
aic.arma.out <- data.frame(ps, qs, aic.arma)
aic.arma.out
}
}
hope this is what you were trying to achieve.
Still not sure why the code works alone but not in the function, but it is clear that the deparse(substitute(mod)) is for some reason pulls mod apart to all of its parts first in the function, vs. simply creating a name of the object itself.
Here is my new code that works:
corARMA.chooser <- function(mod,p = 1,q = 0 ) {
#This function creates 1. object for each combo of ARMA {0:3} 2. AIC table comparing all of these models
mod.list <- NULL
nms <- NULL
aic.arma <- AIC(mod)
ps <- 0
qs <- 0
for(i in c(p)) {
js <- if(i == 0) c(q[q>0]) else c(q)
for(j in c(js)) {
arma <- corARMA(p = i, q = j)
mod.list <- c(mod.list, list(update(mod, .~., correlation = arma)))
names(mod.list) <- c(names(mod.list), paste0(deparse(substitute(mod)),'_',i,j))
aic.arma <- c(aic.arma, AIC(eval(parse(text=(paste0('mod.list$',deparse(substitute(mod)),'_',i,j))))))
ps <- c(ps, i)
qs <- c(qs, j)
}
}
assign(paste0(deparse(substitute(mod)),'_','ARMA'),mod.list, envir = .GlobalEnv)
aic.arma.out <- data.frame(p = ps, q = qs, AIC = aic.arma)
aic.arma.out
}

How to extract the p.value and estimate from cor.test()?

I perform cor.test for a dataset in a for loop, but I don't know how to extract the information like estimate and tau from my test.
Before performing for loop in the dataset, The cor.test() function returns as follows:
cor.test(armpit$Corynebacterium.1, armpit$Staphylococcus.1, alterantive="two-sided", method="kendall", exact=FALSE, continuity=TRUE)
return result
Here is my code for performing for loop. Now I want to extract estimate and tau from my test.
for (i in 1:8) {
for (j in 1:8) {
if (j != i)
cor.test( as.numeric(unlist(armpit[i])),
as.numeric(unlist(armpit[j])), alterantive="two-sided",
method="kendall", exact=FALSE, continuity=TRUE)
}
}
I have check the similar question from
similar question
Then I change my code as:
estimates = numeric(50)
pvalues = numeric(50)
for (i in 1:8) {
for (j in 1:8) {
if (j != i)
cor.test( as.numeric(unlist(armpit[i])),
as.numeric(unlist(armpit[j])), alterantive="two-sided",
method="kendall", exact=FALSE, continuity=TRUE)
estimates[i] = cor.test$estimate
pvalues[i]= cor.test$p-value
}
}
But it returns:
Error in cor.test$estimate : object of type 'closure' is not subsettable
Could anyone offer me some help about how to extract estimate and tau value from cor.test() function in a for loop? Thanks in advance.
cor.test returns a list. You can create an object to capture this list:
cor_test <- cor.test( as.numeric(unlist(armpit[i])), as.numeric(unlist(armpit[j])), alterantive="two-sided", method="kendall", exact=FALSE, continuity=TRUE)
Then use cor_test afterwards with $ to access each element of the list:
estimates[i] = cor_test$estimate
pvalues[i]= cor_test$p.value # note the ., not the -
The original error is pretty arcane, so it's understandable you were confused about this. You wrote cor.test$estimate, which asks R to access the estimate component of the cor.test function, not the result of the test.
estimates = numeric(50)
pvalues = numeric(50)
for (i in 1:8) {
for (j in 1:8) {
if (j != i)
cor_test <-
cor.test( as.numeric(unlist(armpit[i])),
as.numeric(unlist(armpit[j])), alterantive="two-sided",
method="kendall", exact=FALSE, continuity=TRUE)
estimates[i] = cor_test$estimate
pvalues[i]= cor_test$p.value
}
}
Alright, found it, we should have seen it earlier. The if (j != i) statement needs to have brackets around everything that should be done if the statement is true. With the particular formatting you had, R was not parsing it correctly. I couldn't get your data, so I made some up (which will test random rows against random columns). This works:
M <- matrix(rnorm(8*8), ncol = 8) # made up test data
estimates = numeric(50)
pvalues = numeric(50)
for (i in 1:8) {
for (j in 1:8) {
if (j != i) { # need this bracket
cor_test <- cor.test(M[i,], M[,j],
alternative="two.sided",
method="kendall", exact=FALSE, continuity=TRUE)
estimates[i] = cor_test$estimate
pvalues[i]= cor_test$p.value
} # and this bracket
}
}
estimates
pvalues
EDIT: alternative version to store all results in a data frame.
M <- matrix(rnorm(8*8), ncol = 8) # made up test data
ans <- data.frame(i = rep(NA, 64), j = rep(NA, 64), estimate = rep(NA, 64), pvalue = rep(NA, 64))
cnt <- 1
for (i in 1:8) {
for (j in 1:8) {
if (j != i) {
cor_test <- cor.test(M[i,], M[,j], alternative="two.sided", method="kendall", exact=FALSE, continuity=TRUE)
ans[cnt,1] <- i
ans[cnt,2] <- j
ans[cnt,3] <- cor_test$estimate
ans[cnt,4] <- cor_test$p.value
cnt <- cnt + 1
}
}
}
ans <- na.omit(ans)

3 d matrix calculation using apply

Functions like f=tanh the derivative can be written as df=f*(delta(i,j)-f). When g is a vector or column matrix, this can programmed as follows using a loop, transpose, and apply to calculate df.
set.seed(5)
g1<-matrix(rnorm(3),ncol=1)
f1<-tanh(g1)
df1a = matrix(NA,nrow=nrow(f1),ncol=nrow(f1))
df1b = matrix(NA,nrow=nrow(f1),ncol=nrow(f1))
for (i in seq(1,nrow(f1))) {
for (j in seq(1,nrow(f1))) {
df1a[i,j] = -f1[j]*f1[i]
df1b[i,j] = -f1[j]*f1[i]
}
df1b[i,i] = df1b[i,i]+f1[i]
}
df2a <- -f1 %*% t(f1)
df2b <- diag(as.list(f1))+df2a
df3a <- (-1)*apply(f1,1,'*',f1)
df3b <- diag(as.list(f1))+df3a
does_m1_m2_match = all.equal(df1b, df2b, tolerance = 1e-5)
does_m1_m3_match = all.equal(df1b, df3b, tolerance = 1e-5)
does_m1_m2_match
does_m1_m3_match
How to extend this when g is a matrix, and should be treated as a collection of column vectors. Here is the loop implementation. How to do the apply implementation?
ddf2a = array(NA,c(nrow(f2),nrow(f2),ncol(f2)))
ddf2b = array(NA,c(nrow(f2),nrow(f2),ncol(f2)))
for (k in seq(1,ncol(f2))) {
for (i in seq(1,nrow(f2))) {
for (j in seq(1,nrow(f2))) {
ddf2a[i,j,k] = -f2[j]*f2[i]
ddf2b[i,j,k] = -f2[j,k]*f2[i,k]
}
ddf2b[i,i,k] = ddf2b[i,i,k]+f2[i,k]
}
}
for (k in seq(1,ncol(f2))) {
does_m1_md_match = all.equal(df1b, ddf2b[,,k], tolerance = 1e-5)
print(paste('k',k,does_m1_md_match))
}

Resources