Extract inclusion probabilities and positive probabilities from BoomSpikeSlab model - r

The default plot function of BoomSpikeSlab models is a bar plot of each predictor's inclusion probability, colored by its probability of being positive:
set.seed(0)
simulate.lm.spike <- function(n=100, p=10, ngood=3, niter=1000, sigma=1) {
x <- cbind(matrix(rnorm(n * (p - 1)), nrow=n))
beta <- c(rnorm(ngood), rep(0, p - ngood))
y <- rnorm(n, beta[1] + x %*% beta[-1], sigma)
draws <- lm.spike(y ~ x, niter=niter)
return(invisible(draws))
}
model <- simulate.lm.spike(n=1000, p=50, sigma=.3)
plot(model, inclusion.threshold=.01)
How can I extract the data behind this plot, i.e. a data frame with each predictor's inclusion probability and probability of being positive?

Adapting the PlotMarginalInclusionProbabilities function:
GetMarginalInclusionProbabilities = function(
model,
burn = 0,
inclusion.threshold = 0,
unit.scale = TRUE,
number.of.variables = NULL) {
beta <- model$beta
if (burn > 0) {
beta <- beta[-(1:burn), , drop = FALSE]
}
inclusion.prob <- colMeans(beta != 0)
index <- order(inclusion.prob)
beta <- beta[, index, drop = FALSE]
inclusion.prob <- inclusion.prob[index]
compute.positive.prob <- function(x) {
## Compute the probability that x is positive, given that it is
## nonzero. If all(x == 0) then zero is returned.
x <- x[x != 0]
if (length(x) == 0) {
return(0)
}
return(mean(x > 0))
}
positive.prob <- apply(beta, 2, compute.positive.prob)
res <- data.frame(predictor = names(inclusion.prob),
inclusion.prob = inclusion.prob,
positive.prob = positive.prob)
return(res[order(-res$inclusion.prob), ])
}
Example:
GetMarginalInclusionProbabilities(model)
# predictor inclusion.prob positive.prob
# (Intercept) (Intercept) 1.000 1
# x1 x1 1.000 0
# x2 x2 0.999 1
# x15 x15 0.014 1
# x43 x43 0.002 1

Related

Condition for parameters in a function with optimx in R

Hi I made a piecewise linear regression in R as below.
x <- ques$Day
y <- ques$cells_L
fun <- function(par,x){
y1 <- x^0 * par["i1"]
y1[x >= par["x3"]] <- par["i3"]
r1 <- x > par["x1"] & x < par["x2"]
r2 <- x >= par["x2"] & x < par["x3"]
y1[r1] <- par["i1"]+(par["i2"]-par["i1"])/(par["x2"]-par["x1"])*(x[r1]-par["x1"])
y1[r2] <- par["i2"]+(par["i3"]-par["i2"])/(par["x3"]-par["x2"])*(x[r2]-par["x2"])
y1
}
}
#sum of squared residuals
SSR <- function(par) {
sum((y - fun(par, x))^2)
}
ls20 <- optimx(par = c(x1 = 100, x2 = 150, x3 = 200, i1 = 0, i2 = 6, i3 = 0),
fn = SSR, method = "Nelder-Mead")
plot(ques$Day, ques$cells_L, col="black",pch=16)
lines(1:366,
fun(c(x1 = ls21$x1, x2 = ls21$x2, x3 = ls21$x3,
i1 = ls21$i1, i2 = ls21$i2, i3 = ls21$i3), 1:366),col="red")
result plot
It is working well with the result of above image.
However, I want to fix the range for [i3], which is the third intercept value, below the value of [i2].
Where do I put the condition for [i3] range?
enter image description here
I want the results like the blue line.

Bayesian in R: Dimension mismatch in values supplied for betaA

I am working with a matrix that is 35 rows and 16 columns. I am trying to run a Bayesian Multistate Model but something in my model code prevents it from working. When I run the code in R, I get the error message:
`
Error in checkForRemoteErrors(val) :
3 nodes produced errors; first error: RUNTIME ERROR:
Dimension mismatch in values supplied for betaA
`
Any help is appreciated and my code is below:
# psi = movement probability
# phi = apparent survival
# p = detection probability
# o = occurrence probability
# load libraries
library(jagsUI)
library(lattice)
library(coda)
library("R2WinBUGS")
library("R2jags")
library(zoo)
devtools::install_github("bstaton1/postpack")`
# initializing functions####
known.state.ms <- function(ms, notseen){
#notseen: label for 'not seen'
state <- ms
state[state==notseen] <- NA
for (i in 1:dim(ms)[1]){
m <- min(which(!is.na(state[i,])))
state[i,m] <- NA
}
return(state)
`}`
#i = 1
#ch = CHY[i,]
#first = f[i]`
z_inits = function(ch, first) {
nt = length(ch)
to_fill = which(ch == 4 & 1:nt >= first)
to_keep = which(ch != 4 & 1:nt >= first)
known = ch; known[to_fill] = NA
unknown = rep(NA, nt)
known_alive = rep(NA, nt)
unknown[to_fill] = 2
for (t in 1:nt) {
known_alive[t] = ifelse(any(!is.na(known[t:nt])), 1, 0)
}
last_known_alive = max(which(known_alive == 1))
if (last_known_alive < 16) {
dead = rep(0, nt)
for (t in (last_known_alive + 1):nt) {
dead[t] = sample(c(0,1), size = 1, prob = matrix(c(0.9, 0.1, 0, 1), 2,
2, byrow = T)[dead[t-1] + 1,])
}
unknown[dead == 1] = 4
}
unknown
}
`
# import data
dat <- read.csv("bass_encounter_history_0.csv")
covs <- read.csv("depth.csv")
depth = covs[,1]
histories <- unlist(lapply(dat$history, function(x) strsplit(x,split="")))
CH <- t(matrix(histories,nrow=16,ncol=35))
CH <- gsub("0",4,CH)
CH <- gsub("A",1,CH)
CH <- gsub("B",2,CH)
CH <- gsub("C",3,CH)
CH <- matrix(as.numeric(CH),nrow=35,ncol=16)
# Built the model####
nind= nrow(CH)
n.occasions = ncol(CH)
f=c(1,1,1,2,2,2,2,2,3,3,3,4,4,4,4,4,4,1,1,1,3,1,1,1,3,3,3,3,5,3,5,6,6,6,6) # initial tagging week
jags_model = function() {
# -------------------------------------------------
# Parameters:
# phiA: survival probability at site A
# phiB: survival probability at site B
# phiC: survival probability at site C
# psiA[1,t]: probability of staying in site A
# psiA[2,t]: movement probability from site A to site B
# psiB[1,t]: movement probability from site B to site A
# psiB[2,t]: probability of staying in site B
# psiB[3,t]: movement probability from site B to site C
# psiC[1,t]: probability of staying in site C
# psiC[2,t]: movement probability from site C to site B
# betaA[i]: the effect of standardized flow on movement probabilities at site A
# betaB[i]: the effect of standardized flow on movement probabilities at site B
# betaC[i]: the effect of standardized flow on movement probabilities at site C
# wA,B,C: the variable weight of the betas, 1 = essential, 0 = insignificant
# pA: recapture probability at site A
# pB: recapture probability at site B
# pC: recapture probability at site C
# -------------------------------------------------
# States (S):
# 1 alive at A
# 2 alive at B
# 3 alive at C
# 4 dead
# Observations (O):
# 1 seen at A
# 2 seen at B
# 3 seen at C
# 4 not seen
# -------------------------------------------------
# Priors and constraints
# Survival and recapture: uniform
phiA ~ dunif(0, 1)
phiB ~ dunif(0, 1)
phiC ~ dunif(0, 1)
pA ~ dunif(0, 1)
pB ~ dunif(0, 1)
pC ~ dunif(0, 1)
wA ~ dbern(.5)
for(i in 1:3){
wB[i] ~ dbern(.5)
}
wC ~ dbern(.5)
for(t in 1:(n.occasions-1)){
logit(psiA[1,t]) <- muA + wA*betaA*x[t]
psiA[2,t] <- 1 - psiA[1,t]
logit(psiC[1,t]) <- muC + wC*betaC*x[t]
psiC[2,t] <- 1 - psiC[1,t]
for(i in 1:3){
b[i,t] <- exp(muB[i] + wB[i]*betaB[i]*x[t])
psiB[i,t] <- b[i,t]/sum(b[,t])
}
}
muA ~ dt(0, 1/1.566^2, 7.763)
muC ~ dt(0, 1/1.566^2, 7.763)
mean.psiA <- 1/(1+exp(-muA))
#it's not really the mean - it's the probability of staying in A at mean value of x (only b/c x is z- transformed)
mean.psiC <- 1/(1+exp(-muC))
betaA ~ dt(0, 1/1.566^2, 7.763)
betaC ~ dt(0, 1/1.566^2, 7.763)
for(i in 1:2){
muB[i] ~ dt(0, 1/1.566^2, 7.763)
betaB[i] ~ dt(0, 1/1.566^2, 7.763)
}
muB[3] <- 0
betaB[3] <- 0
# PREDICTED TRANSITION PROBS FOR PLOTTING
for(r in 1:n.depth){
for(i in 1:3){
pred.b[i,r] <- exp(muB[i] + wB[i]*betaB[i]*depthseq[r])
pred.psiB[i,r] <- pred.b[i,r]/sum(pred.b[,r])
}
logit(pred.psiA[1,r]) <- muA + wA*betaA*depthseq[r]
pred.psiA[2,r] <- 1 - pred.psiA[1,r]
logit(pred.psiC[1,r]) <- muC + wC*betaC*depthseq[r]
pred.psiC[2,r] <- 1 - pred.psiC[1,r]
}
# Define probabilities of state S(t+1) given S(t)
for (t in 1:(n.occasions-1)){
ps[1,t,1] <- phiA * psiA[1,t]
ps[1,t,2] <- phiA * psiA[2,t]
ps[1,t,3] <- 0
ps[1,t,4] <- 1-phiA
ps[2,t,1] <- phiB * psiB[1,t]
ps[2,t,2] <- phiB * psiB[2,t]
ps[2,t,3] <- phiB * psiB[3,t]
ps[2,t,4] <- 1-phiB
ps[3,t,1] <- 0
ps[3,t,2] <- phiC * psiC[2,t]
ps[3,t,3] <- phiC * psiC[1,t] # switch these so the coefs talk about prob(stay in C)
ps[3,t,4] <- 1-phiC
ps[4,t,1] <- 0
ps[4,t,2] <- 0
ps[4,t,3] <- 0
ps[4,t,4] <- 1
# Define probabilities of O(t) given S(t)
po[1,t,1] <- pA
po[1,t,2] <- 0
po[1,t,3] <- 0
po[1,t,4] <- 1-pA
po[2,t,1] <- 0
po[2,t,2] <- pB
po[2,t,3] <- 0
po[2,t,4] <- 1-pB
po[3,t,1] <- 0
po[3,t,2] <- 0
po[3,t,3] <- pC
po[3,t,4] <- 1-pC
po[4,t,1] <- 0
po[4,t,2] <- 0
po[4,t,3] <- 0
po[4,t,4] <- 1
} #t
# Likelihood
for (i in 1:nind){
# Define latent state at first capture
z[i,f[i]] <- y[i,f[i]]
for (t in (f[i]+1):n.occasions){
# State process: draw S(t) given S(t-1)
z[i,t] ~ dcat(ps[z[i,t-1], t-1,])
# Observation process: draw O(t) given S(t)
y[i,t] ~ dcat(po[z[i,t], t-1,])
} #t
} #i
}
jags_file = "invasiondepthmodel.txt"
postpack::write_model(jags_model, jags_file)
# Configure the model settings and initial values ####
depthseq = seq(min(depth),max(depth),length.out=100)
n.depth=length(depthseq)
#compile jags data object
jags_data <- list(y = CH, x= depth, depthseq=depthseq, n.depth=n.depth, f = f,
n.occasions = n.occasions, nind = nind, z = known.state.ms(CH, 4))
#specify initial values
jags_inits <- function(i){list(
muA = runif(1,-1,1),
muB = c(runif(2,-1,1),NA),
muC = runif(1,-1,1),
wA= rbinom(3, 1, 0.5),
wB= rbinom(3, 1, 0.5),
wC= rbinom(3, 1, 0.5),
betaA = runif(2,-1,1),
betaB = c(runif(2,-1,1),NA),
betaC = runif(2,-1,1),
phiA = runif(1, 0.5, 1),
phiB = runif(1, 0.5, 1),
phiC = runif(1, 0.5, 1),
pA = runif(1, 0.5, 1),
pB = runif(1, 0.5, 1),
pC = runif(1, 0.5, 1),
z = t(sapply(1:nind, function(i) z_inits(CH[i,], f[i])))
)
}
# Parameters monitored
jags_params <- c("phiA","phiB","phiC",
"psiA","psiB","psiC",
"wA","wB","wC",
"muA","muB","muC",
"betaA","betaB","betaC",
"pA","pB","pC",
"pred.psiA","pred.psiB","pred.psiC")
jags_dims = c(
na = 10000, # number of samples in adapting phase
ni = 40000, # number of post-burn-in samples per chain
nb = 40000, # number of burn-in samples
nt = 20, # thinning rate
nc = 3, # number of chains,
parallel = T # run chains in parallel?
); with(as.list(jags_dims), ni/nt * nc)
inits = lapply(1:jags_dims["nc"], jags_inits)
# Run the model in JAGS #####
starttime = Sys.time()
cat("MCMC Started: ", format(starttime), "\n")
post = jagsUI::jags.basic(
data = jags_data,
model.file = jags_file,
inits = inits,
parameters.to.save = jags_params,
n.adapt = jags_dims["na"],
n.iter = sum(jags_dims[c("ni", "nb")]),
n.thin = jags_dims["nt"],
n.burnin = jags_dims["nb"],
n.chains = jags_dims["nc"],
parallel = jags_dims["parallel"],
verbose = F
)
I was expecting the dimensions between the matrix and betaA values to match up. However, it seems like they are not.
In the model, betaA is a scalar. In the model code, you have betaA*x[t] and in the prior: betaA ~ dt(0, 1/1.566^2, 7.763) both indicating a single value. However, in the initial values, it is a vector of length 2: betaA = runif(2,-1,1). You either need to define it as a vector in the model or pass a single value in the inits.

Using `cor.test()` on ranked data

I would like to do a Spearman correlation test using rank data. How can I do this with cor.test()? I don't want the function to rerank the data.
Additionally, what form does the data need to be in? From the help, it seems to be the raw data as compared to a correlation matrix.
Consider this example
## Hollander & Wolfe (1973), p. 187f.
## Assessment of tuna quality. We compare the Hunter L measure of
## lightness to the averages of consumer panel scores (recoded as
## integer values from 1 to 6 and averaged over 80 such values) in
## 9 lots of canned tuna.
library(tidyverse)
A <- tibble(
x = c(44.4, 45.9, 41.9, 53.3, 44.7, 44.1, 50.7, 45.2, 60.1),
y = c( 2.6, 3.1, 2.5, 5.0, 3.6, 4.0, 5.2, 2.8, 3.8)
) %>%
mutate(rank_x = rank(x),
rank_y = rank(y)
)
Spearman's correlation coefficient is defined as Pearson's correlation between ranked variables
cor(A$x, A$y, method = "spearman")
#[1] 0.6
cor(A$rank_x, A$rank_y, method = "pearson")
#[1] 0.6
what about cor.test()? Can I use the rank data as its input?
x1 <- cor.test(A$x, A$y, method = "spearman")
x1
# Spearman's rank correlation rho
#
# data: A$x and A$y
# S = 48, p-value = 0.1
# alternative hypothesis: true rho is not equal to 0
# sample estimates:
# rho
# 0.6
x2 <- cor.test(A$rank_x, A$rank_y, method = "pearson")
x2
# Pearson's product-moment correlation
# data: A$rank_x and A$rank_y
# t = 2, df = 7, p-value = 0.09
# alternative hypothesis: true correlation is not equal to 0
# 95 percent confidence interval:
# -0.11 0.90
# sample estimates:
# cor
# 0.6
x3 <- cor.test(A$rank_x, A$rank_y, method = "spearman")
# Spearman's rank correlation rho
#
# data: A$rank_x and A$rank_y
# S = 48, p-value = 0.1
# alternative hypothesis: true rho is not equal to 0
# sample estimates:
# rho
# 0.6
Yes, you should use method = Spearman for ranked or original data. If rank data is used, the data is not reranked in the function.
As the help file implies, using method=Pearson with rank data conducts a Pearson's correlation test on the ranks, which would follow a t-distribution. However, since the ranks are not continuous variables, this approach is not correct.
getAnywhere(cor.test.default)
A single object matching ‘cor.test.default’ was found
It was found in the following places
registered S3 method for cor.test from namespace stats
namespace:stats
with value
function (x, y, alternative = c("two.sided", "less",
"greater"), method = c("pearson", "kendall",
"spearman"), exact = NULL, conf.level = 0.95, continuity = FALSE,
...)
{
alternative <- match.arg(alternative)
method <- match.arg(method)
DNAME <- paste(deparse1(substitute(x)), "and", deparse1(substitute(y)))
if (!is.numeric(x))
stop("'x' must be a numeric vector")
if (!is.numeric(y))
stop("'y' must be a numeric vector")
if (length(x) != length(y))
stop("'x' and 'y' must have the same length")
OK <- complete.cases(x, y)
x <- x[OK]
y <- y[OK]
n <- length(x)
NVAL <- 0
conf.int <- FALSE
if (method == "pearson") {
if (n < 3L)
stop("not enough finite observations")
method <- "Pearson's product-moment correlation"
names(NVAL) <- "correlation"
r <- cor(x, y)
df <- n - 2L
ESTIMATE <- c(cor = r)
PARAMETER <- c(df = df)
STATISTIC <- c(t = sqrt(df) * r/sqrt(1 - r^2))
if (n > 3) {
if (!missing(conf.level) && (length(conf.level) !=
1 || !is.finite(conf.level) || conf.level < 0 ||
conf.level > 1))
stop("'conf.level' must be a single number between 0 and 1")
conf.int <- TRUE
z <- atanh(r)
sigma <- 1/sqrt(n - 3)
cint <- switch(alternative, less = c(-Inf, z + sigma *
qnorm(conf.level)), greater = c(z - sigma * qnorm(conf.level),
Inf), two.sided = z + c(-1, 1) * sigma * qnorm((1 +
conf.level)/2))
cint <- tanh(cint)
attr(cint, "conf.level") <- conf.level
}
PVAL <- switch(alternative, less = pt(STATISTIC, df),
greater = pt(STATISTIC, df, lower.tail = FALSE),
two.sided = 2 * min(pt(STATISTIC, df), pt(STATISTIC,
df, lower.tail = FALSE)))
}
else {
if (n < 2)
stop("not enough finite observations")
PARAMETER <- NULL
TIES <- (min(length(unique(x)), length(unique(y))) <
n)
if (method == "kendall") {
method <- "Kendall's rank correlation tau"
names(NVAL) <- "tau"
r <- cor(x, y, method = "kendall")
ESTIMATE <- c(tau = r)
if (!is.finite(ESTIMATE)) {
ESTIMATE[] <- NA
STATISTIC <- c(T = NA)
PVAL <- NA
}
else {
if (is.null(exact))
exact <- (n < 50)
if (exact && !TIES) {
q <- round((r + 1) * n * (n - 1)/4)
STATISTIC <- c(T = q)
pkendall <- function(q, n) .Call(C_pKendall,
q, n)
PVAL <- switch(alternative, two.sided = {
if (q > n * (n - 1)/4) p <- 1 - pkendall(q -
1, n) else p <- pkendall(q, n)
min(2 * p, 1)
}, greater = 1 - pkendall(q - 1, n), less = pkendall(q,
n))
}
else {
xties <- table(x[duplicated(x)]) + 1
yties <- table(y[duplicated(y)]) + 1
T0 <- n * (n - 1)/2
T1 <- sum(xties * (xties - 1))/2
T2 <- sum(yties * (yties - 1))/2
S <- r * sqrt((T0 - T1) * (T0 - T2))
v0 <- n * (n - 1) * (2 * n + 5)
vt <- sum(xties * (xties - 1) * (2 * xties +
5))
vu <- sum(yties * (yties - 1) * (2 * yties +
5))
v1 <- sum(xties * (xties - 1)) * sum(yties *
(yties - 1))
v2 <- sum(xties * (xties - 1) * (xties - 2)) *
sum(yties * (yties - 1) * (yties - 2))
var_S <- (v0 - vt - vu)/18 + v1/(2 * n * (n -
1)) + v2/(9 * n * (n - 1) * (n - 2))
if (exact && TIES)
warning("Cannot compute exact p-value with ties")
if (continuity)
S <- sign(S) * (abs(S) - 1)
STATISTIC <- c(z = S/sqrt(var_S))
PVAL <- switch(alternative, less = pnorm(STATISTIC),
greater = pnorm(STATISTIC, lower.tail = FALSE),
two.sided = 2 * min(pnorm(STATISTIC), pnorm(STATISTIC,
lower.tail = FALSE)))
}
}
}
else {
method <- "Spearman's rank correlation rho"
if (is.null(exact))
exact <- TRUE
names(NVAL) <- "rho"
r <- cor(rank(x), rank(y))
ESTIMATE <- c(rho = r)
if (!is.finite(ESTIMATE)) {
ESTIMATE[] <- NA
STATISTIC <- c(S = NA)
PVAL <- NA
}
else {
pspearman <- function(q, n, lower.tail = TRUE) {
if (n <= 1290 && exact)
.Call(C_pRho, round(q) + 2 * lower.tail,
n, lower.tail)
else {
den <- (n * (n^2 - 1))/6
if (continuity)
den <- den + 1
r <- 1 - q/den
pt(r/sqrt((1 - r^2)/(n - 2)), df = n - 2,
lower.tail = !lower.tail)
}
}
q <- (n^3 - n) * (1 - r)/6
STATISTIC <- c(S = q)
if (TIES && exact) {
exact <- FALSE
warning("Cannot compute exact p-value with ties")
}
PVAL <- switch(alternative, two.sided = {
p <- if (q > (n^3 - n)/6) pspearman(q, n, lower.tail = FALSE) else pspearman(q,
n, lower.tail = TRUE)
min(2 * p, 1)
}, greater = pspearman(q, n, lower.tail = TRUE),
less = pspearman(q, n, lower.tail = FALSE))
}
}
}
RVAL <- list(statistic = STATISTIC, parameter = PARAMETER,
p.value = as.numeric(PVAL), estimate = ESTIMATE, null.value = NVAL,
alternative = alternative, method = method, data.name = DNAME)
if (conf.int)
RVAL <- c(RVAL, list(conf.int = cint))
class(RVAL) <- "htest"
RVAL
}
<bytecode: 0x0000018603fa9418>
<environment: namespace:stats>

Issue with calculating marginal effects for an ordered logit model in R with ocME

I am attempting to estimate an ordered logit model incl. the marginal effects in R through following the code from this tutorial. I am using polr from the MASS package to estimate the model and ocME from the erer package to attempt to calculate the marginal effects.
Estimating the model is no problem.
logitModelSentiment90 <- polr(availability_90_ord ~ mean_sentiment, data = data, Hess = T,
method = "logistic")
However, I run into an issue with ocME which generates the error message below:
ocME(logitModelSentiment90)
Error in eval(predvars, data, env) :
numeric 'envir' arg not of length one
The documentation below for ocME states that the object that should be used needs to come from the polr function which seems to be exactly what I am doing.
ocME(w, rev.dum = TRUE, digits = 3)
w = an ordered probit or logit model object estimated by polr from the MASS library.
So can anybody help me to understand what I am doing wrong? I have published a subset of my data with the two variables for the model here. In R I have the DV set up as a factor variable, the IV is continuous.
Side note:
I can pass the calculation to Stata from R with RStata to calculate the marginal effects without any problems. But I don't want to have to do this on a regular basis so I want to understand what is causing the issue with R and ocME.
stata("ologit availability_90_ord mean_sentiment
mfx", data.in = data)
. ologit availability_90_ord mean_sentiment
Iteration 0: log likelihood = -15379.121
Iteration 1: log likelihood = -15378.742
Iteration 2: log likelihood = -15378.742
Ordered logistic regression Number of obs = 11,901
LR chi2(1) = 0.76
Prob > chi2 = 0.3835
Log likelihood = -15378.742 Pseudo R2 = 0.0000
------------------------------------------------------------------------------
avail~90_ord | Coef. Std. Err. z P>|z| [95% Conf. Interval]
-------------+----------------------------------------------------------------
mean_senti~t | .0044728 .0051353 0.87 0.384 -.0055922 .0145379
-------------+----------------------------------------------------------------
/cut1 | -1.14947 .0441059 -1.235916 -1.063024
/cut2 | -.5286239 .042808 -.6125261 -.4447217
/cut3 | .3127556 .0426782 .2291079 .3964034
------------------------------------------------------------------------------
. mfx
Marginal effects after ologit
y = Pr(availability_90_ord==1) (predict)
= .23446398
------------------------------------------------------------------------------
variable | dy/dx Std. Err. z P>|z| [ 95% C.I. ] X
---------+--------------------------------------------------------------------
mean_s~t | -.0008028 .00092 -0.87 0.384 -.002609 .001004 7.55768
------------------------------------------------------------------------------
Your model has only one explanatory variable (mean_sentiment) and this seems to be a problem for ocME. Try for example to add a second variable to the model:
logitModelSentiment90 <- polr(availability_90_ord ~ mean_sentiment + I(mean_sentiment^2),
data = data, Hess = T, method = "logistic")
ocME(logitModelSentiment90)
# effect.0 effect.1 effect.2 effect.3
# mean_sentiment -0.004 -0.001 0 0.006
# I(mean_sentiment^2) 0.000 0.000 0 0.000
With minor modifications ocME can correctly run also with one independent variable.
Try the following myocME function
myocME <- function (w, rev.dum = TRUE, digits = 3)
{
if (!inherits(w, "polr")) {
stop("Need an ordered choice model from 'polr()'.\n")
}
if (w$method != "probit" & w$method != "logistic") {
stop("Need a probit or logit model.\n")
}
lev <- w$lev
J <- length(lev)
x.name <- attr(x = w$terms, which = "term.labels")
x2 <- w$model[, x.name, drop=FALSE]
ww <- paste("~ 1", paste("+", x.name, collapse = " "), collapse = " ")
x <- model.matrix(as.formula(ww), data = x2)[, -1, drop=FALSE]
x.bar <- as.matrix(colMeans(x))
b.est <- as.matrix(coef(w))
K <- nrow(b.est)
xb <- t(x.bar) %*% b.est
z <- c(-10^6, w$zeta, 10^6)
pfun <- switch(w$method, probit = pnorm, logistic = plogis)
dfun <- switch(w$method, probit = dnorm, logistic = dlogis)
V2 <- vcov(w)
V3 <- rbind(cbind(V2, 0, 0), 0, 0)
ind <- c(1:K, nrow(V3) - 1, (K + 1):(K + J - 1), nrow(V3))
V4 <- V3[ind, ]
V5 <- V4[, ind]
f.xb <- dfun(z[1:J] - c(xb)) - dfun(z[2:(J + 1)] - c(xb))
me <- b.est %*% matrix(data = f.xb, nrow = 1)
colnames(me) <- paste("effect", lev, sep = ".")
se <- matrix(0, nrow = K, ncol = J)
for (j in 1:J) {
u1 <- c(z[j] - xb)
u2 <- c(z[j + 1] - xb)
if (w$method == "probit") {
s1 <- -u1
s2 <- -u2
}
else {
s1 <- 1 - 2 * pfun(u1)
s2 <- 1 - 2 * pfun(u2)
}
d1 <- dfun(u1) * (diag(1, K, K) - s1 * (b.est %*% t(x.bar)))
d2 <- -1 * dfun(u2) * (diag(1, K, K) - s2 * (b.est %*%
t(x.bar)))
q1 <- dfun(u1) * s1 * b.est
q2 <- -1 * dfun(u2) * s2 * b.est
dr <- cbind(d1 + d2, q1, q2)
V <- V5[c(1:K, K + j, K + j + 1), c(1:K, K + j, K + j +
1)]
cova <- dr %*% V %*% t(dr)
se[, j] <- sqrt(diag(cova))
}
colnames(se) <- paste("SE", lev, sep = ".")
rownames(se) <- colnames(x)
if (rev.dum) {
for (k in 1:K) {
if (identical(sort(unique(x[, k])), c(0, 1))) {
for (j in 1:J) {
x.d1 <- x.bar
x.d1[k, 1] <- 1
x.d0 <- x.bar
x.d0[k, 1] <- 0
ua1 <- z[j] - t(x.d1) %*% b.est
ub1 <- z[j + 1] - t(x.d1) %*% b.est
ua0 <- z[j] - t(x.d0) %*% b.est
ub0 <- z[j + 1] - t(x.d0) %*% b.est
me[k, j] <- pfun(ub1) - pfun(ua1) - (pfun(ub0) -
pfun(ua0))
d1 <- (dfun(ua1) - dfun(ub1)) %*% t(x.d1) -
(dfun(ua0) - dfun(ub0)) %*% t(x.d0)
q1 <- -dfun(ua1) + dfun(ua0)
q2 <- dfun(ub1) - dfun(ub0)
dr <- cbind(d1, q1, q2)
V <- V5[c(1:K, K + j, K + j + 1), c(1:K, K +
j, K + j + 1)]
se[k, j] <- sqrt(c(dr %*% V %*% t(dr)))
}
}
}
}
t.value <- me/se
p.value <- 2 * (1 - pt(abs(t.value), w$df.residual))
out <- list()
for (j in 1:J) {
out[[j]] <- round(cbind(effect = me[, j], error = se[,
j], t.value = t.value[, j], p.value = p.value[, j]),
digits)
}
out[[J + 1]] <- round(me, digits)
names(out) <- paste("ME", c(lev, "all"), sep = ".")
result <- listn(w, out)
class(result) <- "ocME"
return(result)
}
and run the following code:
logitModelSentiment90 <- polr(availability_90_ord ~ mean_sentiment,
data = data, Hess = T, method = "logistic")
myocME(logitModelSentiment90)
# effect.0 effect.1 effect.2 effect.3
# mean_sentiment -0.001 0 0 0.001

Error in df(X0) : argument "df1" is missing, with no default--tracing R code

I have written two gradient descent functions and in the second one I just have the alpha parameter and the initial alpha is different. I receive a weird error and was unable to trace the reason for it.
Here's the code:
k=19000
rho.prime<-function(t,k) ifelse (abs(t)<=k,2*t,(2*k*sign(t)))
dMMSE <- function(b,k=19000, y=farmland$farm, x=farmland$land){
n = length(y)
a=0
d=0
for (i in 1:n) {
a = a + rho.prime(y[i]-b[1]-b[2]*x[i],k)
d = d + x[i]*rho.prime(y[i]-b[1]-b[2]*x[i],k)
}
a <- (-a/n)
d <- (-d/n)
return(c(a,d))
}
grd=gr.descent(dMMSE, c(3500,0.33),alpha=0.0001, verbose=TRUE)
gr.descent2 <- function(dMMSE,x0, alpha=0.1, eps=0.001, max.it = 50, verbose = FALSE){
X1 <- x0
cond <- TRUE
iteration <- 0
if(verbose) cat("X0 =",X1,"\n")
while(cond){
iteration <- iteration + 1
X0 <- X1
X1 <- X0 - alpha * df(X0)
alpha <- alpha/2
cond <- sum((X1 - X0)^2) > eps & iteration < max.it
if(verbose) cat(paste(sep="","X",iteration," ="), X1, "\n")
}
print("mona2")
print(X1)
return(X1)
}
grd2=gr.descent2(dMMSE, c(3500,0.33),alpha=0.1, verbose=TRUE)
#(beta0=grd2[1])
#(beta1=grd2[2])
So when I run the code I receive this error:
[1] "mona"
[1] 3496.409 -259466.640
X0 = 3500 0.33
Show Traceback
Rerun with Debug
Error in df(X0) : argument "df1" is missing, with no default
Which is related to gr.descent2 function. Any thought?
Type this:
?df # the F distribution density
And notice that the df1 and df2 arguments are not assumed to be any particular value so they do need to be supplied.
integrate( function(x) df(x, 1, 100), 0, 3.84)
# 0.9471727 with absolute error < 1.4e-05
And notice the similarity of result:
> integrate( function(x) dchisq(x, 1), 0, 3.84)
0.9499565 with absolute error < 1.4e-05
Here's the answer:
farmland <- read.csv("http://pages.stat.wisc.edu/~gvludwig/327-5/FarmLandArea.csv")
str(farmland)
plot(farm~land,data=farmland)
fit=lm(farm~land,data=farmland)
abline(fit) #lease square regression line
abline(rlm(farm~land,data=farmland),col="red")
gr.descent <- function(der_f, x0, alpha=0.0001, eps=0.001, max.it = 50, verbose = FALSE){
X1 <- x0
cond <- TRUE
iteration <- 0
if(verbose) cat("X0 =",X1,"\n")
while(cond){
iteration <- iteration + 1
X0 <- X1
X1 <- X0 - alpha * der_f(X0)
cond <- sum((X1 - X0)^2) > eps & iteration < max.it
if(verbose) cat(paste(sep="","X",iteration," ="), X1, "\n")
}
print("mona")
print(X1)
return(X1)
}
rho<-function(t,k) ifelse(abs(t)<=k,t^2,(2*k*abs(t))-k^2)
k=19000
rho.prime<-function(t,k) ifelse (abs(t)<=k,2*t,(2*k*sign(t)))
dMMSE <- function(b,k=19000, y=farmland$farm, x=farmland$land){
n = length(y)
a=0
d=0
for (i in 1:n) {
a = a + rho.prime(y[i]-b[1]-b[2]*x[i],k)
d = d + x[i]*rho.prime(y[i]-b[1]-b[2]*x[i],k)
}
a <- (-a/n)
d <- (-d/n)
return(c(a,d))
}
grd=gr.descent(dMMSE, c(3500,0.33),alpha=0.0001, verbose=TRUE)
gr.descent2 <- function(der_f,x0, alpha=0.1, eps=0.001, max.it = 50, verbose = FALSE){
X1 <- x0
cond <- TRUE
iteration <- 0
if(verbose) cat("X0 =",X1,"\n")
while(cond){
iteration <- iteration + 1
X0 <- X1
X1 <- X0 - alpha * der_f(X0)
alpha <- alpha/2
cond <- sum((X1 - X0)^2) > eps & iteration < max.it
if(verbose) cat(paste(sep="","X",iteration," ="), X1, "\n")
}
print("mona2")
print(X1)
return(X1)
}
#plot(farm~land,data=farmland)
#curve(rho(k=19000),xlim=c(-10,10),,col="blue", add="TRUE")
grd2=gr.descent2(dMMSE, c(3500,0.33),alpha=0.1, verbose=TRUE)
#(beta0=grd2[1])
#(beta1=grd2[2])

Resources