QUESTIONS
(1) what is the classification formula from the fit model in example code below named 'model1'? (is it formula A, B or Neither?)
(2) how does 'model1' determine if class == 1 vs. 2?
Formula A:
class(Species{1:2}) = (-31.938998) + (-7.501714 * [PetalLength]) + (63.670583 * [PetalWidth])
Formula B:
class(Species{1:2}) = 1.346075e-14 + (5.521371e-04 * [PetalLength]) + (4.485211e+27 * [PetalWidth])
USE CASE
Use R to fit/train a binary classification model, then interpret the model for the purpose of manual calculating classifications in Excel, not R.
MODEL COEFFICIENTS
>coef(model1)
#(Intercept) PetalLength PetalWidth
#-31.938998 -7.501714 63.670583
>exp(coef(model1))
#(Intercept) PetalLength PetalWidth
#1.346075e-14 5.521371e-04 4.485211e+27
R CODE EXAMPLE
# Load data (using iris dataset from Google Drive because uci.edu link wasn't working for me today)
#iris <- read.csv(url("http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"), header = FALSE)
iris <- read.csv(url("https://docs.google.com/spreadsheets/d/1ovz31Y6PrV5OwpqFI_wvNHlMTf9IiPfVy1c3fiQJMcg/pub?gid=811038462&single=true&output=csv"), header = FALSE)
dataSet <- iris
#assign column names
names(dataSet) <- c("SepalLength", "SepalWidth", "PetalLength", "PetalWidth", "Species")
#col names
dsColNames <- as.character(names(dataSet))
#num of columns and rows
dsColCount <- as.integer(ncol(dataSet))
dsRowCount <- as.integer(nrow(dataSet))
#class ordinality and name
classColumn <- 5
classColumnName <- dsColNames[classColumn]
y_col_pos <- classColumn
#features ordinality
x_col_start_pos <- 1
x_col_end_pos <- 4
# % of [dataset] reserved for training/test and validation
set.seed(10)
sampleAmt <- 0.25
mainSplit <- sample(2, dsRowCount, replace=TRUE, prob=c(sampleAmt, 1-sampleAmt))
#split [dataSet] into two sets
dsTrainingTest <- dataSet[mainSplit==1, 1:5]
dsValidation <- dataSet[mainSplit==2, 1:5]
nrow(dsTrainingTest);nrow(dsValidation);
# % of [dsTrainingTest] reserved for training
sampleAmt <- 0.5
secondarySplit <- sample(2, nrow(dsTrainingTest), replace=TRUE, prob=c(sampleAmt, 1-sampleAmt))
#split [dsTrainingTest] into two sets
dsTraining <- dsTrainingTest[secondarySplit==1, 1:5]
dsTest <- dsTrainingTest[secondarySplit==2, 1:5]
nrow(dsTraining);nrow(dsTest);
nrow(dataSet) == nrow(dsTrainingTest)+nrow(dsValidation)
nrow(dsTrainingTest) == nrow(dsTraining)+nrow(dsTest)
library(randomGLM)
dataSetEnum <- dsTraining[,1:5]
dataSetEnum[,5] <- as.character(dataSetEnum[,5])
dataSetEnum[,5][dataSetEnum[,5]=="Iris-setosa"] <- 1
dataSetEnum[,5][dataSetEnum[,5]=="Iris-versicolor"] <- 2
dataSetEnum[,5][dataSetEnum[,5]=="Iris-virginica"] <- 2
dataSetEnum[,5] <- as.integer(dataSetEnum[,5])
x <- as.matrix(dataSetEnum[,1:4])
y <- as.factor(dataSetEnum[,5:5])
# number of features
N <- ncol(x)
# define function misclassification.rate
if (exists("misclassification.rate") ) rm(misclassification.rate);
misclassification.rate<-function(tab){
num1<-sum(diag(tab))
denom1<-sum(tab)
signif(1-num1/denom1,3)
}
#Fit randomGLM model - Ensemble predictor comprised of individual generalized linear model predictors
RGLM <- randomGLM(x, y, classify=TRUE, keepModels=TRUE,randomSeed=1002)
RGLM$thresholdClassProb
tab1 <- table(y, RGLM$predictedOOB)
tab1
# y 1 2
# 1 2 0
# 2 0 12
# accuracy
1-misclassification.rate(tab1)
# variable importance measure
varImp = RGLM$timesSelectedByForwardRegression
sum(varImp>=0)
table(varImp)
# select most important features
impF = colnames(x)[varImp>=5]
impF
# build single GLM model with most important features
model1 = glm(y~., data=as.data.frame(x[, impF]), family = binomial(link='logit'))
coef(model1)
#(Intercept) PetalLength PetalWidth
#-31.938998 -7.501714 63.670583
exp(coef(model1))
#(Intercept) PetalLength PetalWidth
#1.346075e-14 5.521371e-04 4.485211e+27
confint.default(model1)
# 2.5 % 97.5 %
#(Intercept) -363922.5 363858.6
#PetalLength -360479.0 360464.0
#PetalWidth -916432.0 916559.4
Your model is defined as
model1 <- glm(y~., data=as.data.frame(x[, impF]), family=binomial(link='logit'))
The family=binomial(link='logit')) bit is saying that the response y is a series of Bernoulli trials, i.e. a variable that takes values 1 or 0 depending on a parameter p, and that p = exp(m) / (1 + exp(m)), where m is a function of the data, called the linear predictor.
The formula y~. means that m = a + b PetalLength + c PetalWidth, where a, b, c are the model coefficients.
Therefore the probability of y = 1 is
> m <- model.matrix(model1) %*% coef(model1)
> exp(m) / (1+exp(m))
[,1]
20 3.448852e-11
50 1.253983e-13
65 1.000000e+00
66 1.000000e+00
87 1.000000e+00
105 1.000000e+00
106 1.000000e+00
107 1.000000e+00
111 1.000000e+00
112 1.000000e+00
116 1.000000e+00
118 1.000000e+00
129 1.000000e+00
130 1.000000e+00
We can check that this is the same as the output of fitted.values
> fitted.values(model1)
20 50 65 66 87 105
3.448852e-11 1.253983e-13 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
106 107 111 112 116 118
1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
129 130
1.000000e+00 1.000000e+00
Finally, the response can be classified in two categories depending on whether P(Y = 1) is above or below a certain threshold. For example,
> ifelse(fitted.values(model1) > 0.5, 1, 0)
20 50 65 66 87 105 106 107 111 112 116 118 129 130
0 0 1 1 1 1 1 1 1 1 1 1 1 1
A GLM model has a link function and a linear predictor. You have not specified your link function above.
Let Y = {0,1} and X be a n x p matrix. (using pseudo-LaTeX) This leads to \hat Y= \phi(X \hat B) = \eta
where
- \eta is the linear predictor
- \phi() is the link function
The linear predictor is just X %*% \hat B and the classification back to P(Y=1|X) = \phi^{-1}(\eta) -- ie the inverse link function. The inverse link function obviously depends on the choice of link. For a logit, you have the inverse logit P(Y=1|X) = exp(eta) / (1+ exp(eta))
Related
Consider the following dataset
Quantity <- c(25,39,45,57,70,85,89,100,110,124,137,150,177)
Sales <- c(1000,1250,2600,3000,3500,4500,5000,4700,4405,4000,3730,3400,3300)
df <- data.frame(Quantity,Sales)
df
Plotting the data, the distribution of observations is clearly non-linear, but presents a likely breaking-point around Quantity = 89 (I skip the plot here). Therefore, I built a joint piecewise linear model as follows
df$Xbar <- ifelse(df$Quantity>89,1,0)
df$diff <- df$Quantity - 89
reg <- lm(Sales ~ Quantity + I(Xbar * (Quantity - 89)), data = df)
summary(reg)
or simply
df$X <- df$diff*df$Xbar
reg <- lm(Sales ~ Quantity + X, data = df)
summary(reg)
However, according to this parametrization, the coefficient of X represents the change in the slope from the preceding interval.
How can I parametrize the relevant coefficient to rather represent the slope for the second interval?
I did some research but I was unable to find the desired specification, apart from some automatization in stata (see the voice 'marginal' here https://www.stata.com/manuals13/rmkspline.pdf).
Any help is much appreciated. Thank you!
Acknowledgement:
the workable example is retrieved from
https://towardsdatascience.com/unraveling-spline-regression-in-r-937626bc3d96
The key here is to use a logical variable is.right which is TRUE for the points to the right of 89 and FALSE otherwise.
From the the output shown 60.88 is the slope to the left of 89 and -19.97 is the slope to the right. The lines intersect at Quantity = 89, Sales = 4817.30.
is.right <- df$Quantity > 89
fm <- lm(Sales ~ diff : is.right, df)
fm
## Call:
## lm(formula = Sales ~ diff:is.right, data = df)
##
## Coefficients:
## (Intercept) diff:is.rightFALSE diff:is.rightTRUE
## 4817.30 60.88 -19.97
Alternatives
Alternately if you want to use Xbar from the question do it this way. It gives the same coefficients as fm.
fm2 <- lm(Sales ~ diff : factor(Xbar), df)
or
fm3 <- lm(Sales ~ I(Xbar * diff) + I((1 - Xbar) * diff), df)
Double check with nls
We can double check these using nls with the following formulation which makes use of the fact that if we extend both lines the one to use at any Quantity is the lower of the two.
st <- list(a = 0, b1 = 1, b2 = -1)
fm4 <- nls(Sales ~ a + pmin(b1 * (Quantity - 89), b2 * (Quantity - 89)), start = st)
fm4
## Nonlinear regression model
## model: Sales ~ a + pmin(b1 * (Quantity - 89), b2 * (Quantity - 89))
## data: parent.frame()
## a b1 b2
## 4817.30 60.88 -19.97
## residual sum-of-squares: 713120
##
## Number of iterations to convergence: 1
## Achieved convergence tolerance: 2.285e-09
This would also work:
fm5 <- nls(Sales ~ a + ifelse(Quantity > 89, b2, b1) * diff, df, start = st)
Plot
Here is a plot:
plot(Sales ~ Quantity, df)
lines(fitted(fm) ~ Quantity, df)
Model matrix
And here is the model matrix for the linear regression:
> model.matrix(fm)
(Intercept) diff:is.rightFALSE diff:is.rightTRUE
1 1 -64 0
2 1 -50 0
3 1 -44 0
4 1 -32 0
5 1 -19 0
6 1 -4 0
7 1 0 0
8 1 0 11
9 1 0 21
10 1 0 35
11 1 0 48
12 1 0 61
13 1 0 88
If you know the breakpoints, then you almost have the model, it should be:
fit=lm(Sales ~ Quantity + Xbar + Quantity:Xbar,data=df)
Because if you don't introduce a new intercept (Xbar), it will start from the intercept already in the model, which will not work. We can plot it:
plot(df$Quantity,df$Sales)
newdata = data.frame(Quantity=seq(40,200,by=5))
newdata$Xbar= ifelse(newdata$Quantity>89,1,0)
lines(newdata$Quantity,predict(fit,newdata))
The coefficients are:
summary(fit)
Call:
lm(formula = Sales ~ Quantity * Xbar, data = df)
Residuals:
Min 1Q Median 3Q Max
-527.9 -132.2 -15.1 148.1 464.7
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -545.435 327.977 -1.663 0.131
Quantity 59.572 5.746 10.367 2.65e-06 ***
Xbar 7227.288 585.933 12.335 6.09e-07 ***
Quantity:Xbar -80.133 6.856 -11.688 9.64e-07 ***
And the coefficient of the 2nd slope is 59.572+(-80.133) = -20.561
I'm probably just missing something silly here but I can't seem to manually replicate the predicted values from this model. I'm following this example
library('foreign')
library('nnet')
library('tidyverse')
ml <- read.dta("https://stats.idre.ucla.edu/stat/data/hsbdemo.dta")
ml = ml %>%
mutate(prog2 = fct_relevel(prog, "academic"))
# Fit a very basic model of the students choice of program
# as a function of their socioeconmic status and writing score:
test <- multinom(prog2 ~ ses + write, data = ml)
summary(test)
# If we wanted to calculate the probability of a high SES student
# with a median writing score picking a vocational program,
# we should be able to do this:
coef = summary(test)$coefficients[2, c(1, 3:4)]
log_odds = sum(coef * c(1, 1, median(ml$write)))
prob = exp(log_odds)/(1 + exp(log_odds))
prob
# from preditions:
ml %>%
bind_cols(as_tibble(predict(test, type = 'probs'))) %>%
filter(ses == 'high', write == median(write))
I'm getting 13.0% from my manual calculation and the predict function gives 10.8%. What did I miss?
The prediction of a multinomial logistic model on the link & response scale can be obtained as follows (key is that the inverse link function for multinomial is the softmax function, not the inverse logit) :
library('foreign')
library('nnet')
library('tidyverse')
ml <- read.dta("https://stats.idre.ucla.edu/stat/data/hsbdemo.dta")
ml = ml %>%
mutate(prog2 = fct_relevel(prog, "academic"))
# Fit a very basic model of the students choice of program
# as a function of their socioeconmic status and writing score:
fit <- multinom(prog2 ~ ses + write, data = ml)
summary(fit)
# model predictions on link scale
X <- as.matrix(data.frame('(Intercept)'=1, sesmiddle=0, seshigh=1, write=median(ml$write), check.names=F))
# or X <- model.matrix(fit) to use model matrix
betahat <- t(rbind(0, coef(fit))) # model coefficients, with expicit zero row added for reference category & transposed
preds_link <- X %*% betahat # predictions on the link scale (with explicit zero for reference level included here, sometimes this level is dropped)
colnames(preds_link) <- fit$lev
# model prediction on response / probability scale
softMax <- function(eta){ # softmax function = inverse link function for multinomial
exp_eta <- exp(eta)
return(sweep(exp_eta, 1, STATS=rowSums(exp_eta), FUN="/"))
}
preds_response <- softMax(preds_link)
preds_response
# academic general vocation
# [1,] 0.721014 0.1710377 0.1079482
# this matches
ml %>%
bind_cols(as_tibble(predict(fit, type = 'probs'))) %>%
filter(ses == 'high', write == median(write))
# id female ses schtyp prog read write math science socst honors awards cid prog2 academic
# 71 90 female high public academic 42 54 50 50 52 not enrolled 1 8 academic 0.721014
# 92 130 female high public general 43 54 55 55 46 not enrolled 1 10 general 0.721014
# 140 97 male high public academic 60 54 58 58 61 not enrolled 1 14 academic 0.721014
# 157 96 female high public academic 65 54 61 58 56 not enrolled 1 16 academic 0.721014
# general vocation
# 71 0.1710377 0.1079482
# 92 0.1710377 0.1079482
# 140 0.1710377 0.1079482
# 157 0.1710377 0.1079482
Consider the following table :
DB <- data.frame(
Y =rnorm(6),
X1=c(T, T, F, T, F, F),
X2=c(T, F, T, F, T, T)
)
Y X1 X2
1 1.8376852 TRUE TRUE
2 -2.1173739 TRUE FALSE
3 1.3054450 FALSE TRUE
4 -0.3476706 TRUE FALSE
5 1.3219099 FALSE TRUE
6 0.6781750 FALSE TRUE
I'd like to explain my quantitative variable Y by two binary variables (TRUE or FALSE) without intercept.
The argument of this choice is that, in my study, we can't observe X1=FALSE and X2=FALSE at the same time, so it doesn't make sense to have a mean, other than 0, for this level.
With intercept
m1 <- lm(Y~X1+X2, data=DB)
summary(m1)
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.9684 1.0590 -1.859 0.1600
X1TRUE 0.7358 0.9032 0.815 0.4749
X2TRUE 3.0702 0.9579 3.205 0.0491 *
Without intercept
m0 <- lm(Y~0+X1+X2, data=DB)
summary(m0)
Coefficients:
Estimate Std. Error t value Pr(>|t|)
X1FALSE -1.9684 1.0590 -1.859 0.1600
X1TRUE -1.2325 0.5531 -2.229 0.1122
X2TRUE 3.0702 0.9579 3.205 0.0491 *
I can't explain why two coefficients are estimated for the variable X1. It seems to be equivalent to the intercept coefficient in the model with intercept.
Same results
When we display the estimation for all the combinations of variables, the two models are the same.
DisplayLevel <- function(m){
R <- outer(
unique(DB$X1),
unique(DB$X2),
function(a, b) predict(m,data.frame(X1=a, X2=b))
)
colnames(R) <- paste0('X2:', unique(DB$X2))
rownames(R) <- paste0('X1:', unique(DB$X1))
return(R)
}
DisplayLevel(m1)
X2:TRUE X2:FALSE
X1:TRUE 1.837685 -1.232522
X1:FALSE 1.101843 -1.968364
DisplayLevel(m0)
X2:TRUE X2:FALSE
X1:TRUE 1.837685 -1.232522
X1:FALSE 1.101843 -1.968364
So the two models are equivalent.
Question
My question is : can we just estimate one coefficient for the first effect ? Can we force R to assign a 0 value to the combinations X1=FALSE and X2=FALSE ?
Yes, we can, by
DB <- as.data.frame(data.matrix(DB))
## or you can do:
## DB$X1 <- as.integer(DB$X1)
## DB$X2 <- as.integer(DB$X2)
# Y X1 X2
# 1 -0.5059575 1 1
# 2 1.3430388 1 0
# 3 -0.2145794 0 1
# 4 -0.1795565 1 0
# 5 -0.1001907 0 1
# 6 0.7126663 0 1
## a linear model without intercept
m0 <- lm(Y ~ 0 + X1 + X2, data = DB)
DisplayLevel(m0)
# X2:1 X2:0
# X1:1 0.15967744 0.2489237
# X1:0 -0.08924625 0.0000000
I have explicitly coerced your TRUE/FALSE binary into numeric 1/0, so that no contrast is handled by lm().
The data appeared in my answer are different to yours, because you did not use set.seed(?) before rnorm() for reproducibility. But this is not a issue here.
After setting up the model and training it with Gibbs Sampling, I got the result of all the prediction of hidden values with:
jags <- jags.model('example.bug',
data = data,
n.chains = 4,
n.adapt = 100)
update(jags, 1000)
samples <- jags.samples(jags,
c('r','alpha','alpha_i','alpha_u','u','i'),
1000)
Where r is a list of rating, and some of them are withheld for a prediction with the model. And suppose I can get them with r[test], where test is a list of integer indicating the index of the rating withheld. But when I tried to get them using this way:
summary(samples$r, mean)[test]
I just got this:
$drop.dims
iteration chain
1000 4
Could you please tell me how to get the expected value? Thank you in advance!
draw samples
Absent your data or model I'll demonstrate using the simple example here, modified so that jags monitors the predicted outcomes.
library(rjags)
# simulate some data
N <- 1000
x <- 1:N
epsilon <- rnorm(N, 0, 1)
y <- x + epsilon
# define a jags model
writeLines("
model {
for (i in 1:N){
y[i] ~ dnorm(y.hat[i], tau)
y.hat[i] <- a + b * x[i]
}
a ~ dnorm(0, .0001)
b ~ dnorm(0, .0001)
tau <- pow(sigma, -2)
sigma ~ dunif(0, 100)
}
", con = "example2_mod.jags")
# create a jags model object
jags <- jags.model("example2_mod.jags",
data = list('x' = x,
'y' = y,
'N' = N),
n.chains = 4,
n.adapt = 100)
# burn-in
update(jags, 1000)
# drawing samples gives mcarrays
samples <- jags.samples(jags, c('a', 'b'), 1000)
str(samples)
# List of 2
# $ a: mcarray [1, 1:1000, 1:4] -0.0616 -0.0927 -0.0528 -0.0844 -0.06 ...
# ..- attr(*, "varname")= chr "a"
# $ b: mcarray [1, 1:1000, 1:4] 1 1 1 1 1 ...
# ..- attr(*, "varname")= chr "b"
# NULL
extract predictions
Our result, samples, is a list of mcarray objects with dimensions 1 x iterations x chains. You'd really want to run diagnostics at this point, but we'll jump to summarizing the samples from the posterior for your predictions. One approach is taking the mean over chains and iterations.
# extract posterior means from the mcarray object by marginalizing over
# chains and iterations (alternative: posterior modes)
posterior_means <- apply(samples$y.hat, 1, mean)
head(posterior_means)
# [1] 0.9201342 1.9202996 2.9204649 3.9206302 4.9207956 5.9209609
# reasonable?
head(predict(lm(y ~ x)))
# 1 2 3 4 5 6
# 0.9242663 1.9244255 2.9245847 3.9247439 4.9249031 5.9250622
out-of-sample predictions
Alternatively, here's how you could make out-of-sample predictions. I'll just reuse our existing feature vector x, but this could be test data instead.
# extract posterior means from the mcarray object by marginalizing over chains and iterations (alternative: posterior modes)
posterior_means <- lapply(samples, apply, 1, "mean")
str(posterior_means)
# List of 3
# $ a : num -0.08
# $ b : num 1
# $ y.hat: num [1:1000] 0.92 1.92 2.92 3.92 4.92 ...
# NULL
# create a model matrix from x
X <- cbind(1, x)
head(X)
# x
# [1,] 1 1
# [2,] 1 2
# [3,] 1 3
# [4,] 1 4
# [5,] 1 5
# [6,] 1 6
# take our posterior means
B <- as.matrix(unlist(posterior_means[c("a", "b")]))
# [,1]
# a -0.07530888
# b 1.00015874
Given the model, the predicted outcome is a + b * x[i] as we wrote in jags.
# predicted outcomes are the product of our model matrix and estimates
y_hat <- X %*% B
head(y_hat)
# [,1]
# [1,] 0.9248499
# [2,] 1.9250086
# [3,] 2.9251673
# [4,] 3.9253261
# [5,] 4.9254848
# [6,] 5.9256436
I would like to run the dependent variable of a logistic regression (in my data set it's : dat$admit) with all available variables, pairs and trios(3 Independent vars), each regression with a different Independent variables vs dependent variable. The outcome that I would like to get back is a list of each regression summary in a row: coeff,p-value ,AUC,CI 95%. Using the data set submitted below there should be 7 regressions:
dat$admit vs dat$female
dat$admit vs dat$apcalc
dat$admit vs dat$num
dat$admit vs dat$female + dat$apcalc
dat$admit vs dat$female + dat$num
dat$admit vs dat$apcalc + dat$num
dat$admit vs dat$female + dat$apcalc + dat$num
Here is a sample data set (where dat$admit is the logistic regression dependent variable) :
dat <- read.table(text = " female apcalc admit num
0 0 0 7
0 0 1 1
0 1 0 3
0 1 1 7
1 0 0 5
1 0 1 1
1 1 0 0
1 1 1 6",header = TRUE)
Per #marek comment, the output should be like this (for female alone and from female & apcalc ):
# Intercept Estimate P-Value (Intercept) P-Value (Estimate) AUC
# female 0.000000e+00 0.000000e+00 1 1 0.5
female+apcalc 0.000000e+00 0.000000e+00 1 1 0.5
There is a good code that #David Arenburg wrote that produces the stats but with no models creations of pairs and trios so I would like to know how can add the models creations.
Here is David Arenburg's code?
library(caTools)
ResFunc <- function(x) {
temp <- glm(reformulate(x,response="admit"), data=dat,family=binomial)
c(summary(temp)$coefficients[,1],
summary(temp)$coefficients[,4],
colAUC(predict(temp, type = "response"), dat$admit))
}
temp <- as.data.frame(t(sapply(setdiff(names(dat),"admit"), ResFunc)))
colnames(temp) <- c("Intercept", "Estimate", "P-Value (Intercept)", "P-Value (Estimate)", "AUC")
temp
# Intercept Estimate P-Value (Intercept) P-Value (Estimate) AUC
# female 0.000000e+00 0.000000e+00 1 1 0.5
# apcalc 0.000000e+00 0.000000e+00 1 1 0.5
# num 5.177403e-16 -1.171295e-16 1 1 0.5
Any idea how to create this list? Thanks, Ron
Simple solution is to make the list of models by hand:
results <- list(
"female" = glm(admit~female , family=binomial, dat)
,"apcalc" = glm(admit~apcalc , family=binomial, dat)
,"num" = glm(admit~num , family=binomial, dat)
,"female + apcalc" = glm(admit~female + apcalc, family=binomial, dat)
,"female + num" = glm(admit~female + num , family=binomial, dat)
,"apcalc + num" = glm(admit~apcalc + num , family=binomial, dat)
,"all" = glm(admit~female + apcalc + num, family=binomial, dat)
)
Then you could check models by lapplying over the list of models:
lapply(results, summary)
Or more advanced (coefficient statistics):
require(plyr)
ldply(results, function(m) {
name_rows(as.data.frame(summary(m)$coefficients))
})
In similar way you could extract every information you want. Just write function to extract statistics you want, which takes glm model as argument:
get_everything_i_want <- function(model) {
#... do what i want ...
# eg:
list(AIC = AIC(model))
}
and then apply to each model:
lapply(results, get_everything_i_want)
# $female
# $female$AIC
# [1] 15.0904
# $apcalc
# $apcalc$AIC
# [1] 15.0904
# $num
# $num$AIC
# [1] 15.0904
# $`female + apcalc`
# $`female + apcalc`$AIC
# [1] 17.0904
# $`female + num`
# $`female + num`$AIC
# [1] 17.0904
# $`apcalc + num`
# $`apcalc + num`$AIC
# [1] 17.0904
# $all
# $all$AIC
# [1] 19.0904