Using nparcomp with user-defined contrasts - r
I would like to test the hypothesis H0: a2 - a1 = b2 - b1 on the following data:
data.csv
a1,a2,b1,b2
0.439,0.066,0.0,0.001
0.451,0.07,0.0,0.0
0.446,0.06,0.0,0.0
0.34,0.056,0.0,0.0
0.294,0.008,0.0,0.0
0.284,0.002,0.001,0.0
1.0,1.0,0.002,0.0
First, I tried ANOVA:
library(car)
data = read.csv('data.csv')
labels = factor(rep(c('a1','a2','b1','b2'),
c(nrow(data['a1']), nrow(data['a2']), nrow(data['b1']), nrow(data['b2']))))
x = C(labels, c(-1, 1, 1, -1), 1)
y = c(data[['a1']], data[['a2']], data[['b1']], data[['b2']])
l = lm(y ~ x)
a = Anova(l)
print(a$`Pr(>F)`)
which gives p = 0.1891837.
However, since I cannot assume that the data comes from a normal distribution, I would like to use a non-parametric test instead. I tried using nparcomp with a user-defined contrast matrix:
library(nparcomp)
data = read.csv('data.csv')
x = factor(rep(c('a1','a2','b1','b2'),
c(nrow(data['a1']), nrow(data['a2']), nrow(data['b1']), nrow(data['b2']))))
y = c(data[['a1']], data[['a2']], data[['b1']], data[['b2']])
nparcomp(y ~ x, data=data.frame(x, y), type="UserDefined", contrast.matrix=t(matrix(c(-1, 1, 1, -1))))
This failed with the error:
Error in nparcomp(y ~ x, data = data.frame(x, y), type =
"UserDefined", : Sums of positive contrast coefficients must be 1!
which I fixed by rescaling the contrast matrix to t(matrix(c(-0.5, 0.5, 0.5, -0.5))). However, this failed with another error:
Error in logit.dev %*% cov.bf : non-conformable arguments
What is the correct way to use nparcomp with user-defined contrasts?
P.S. I am a newcomer to R, so apologies for the bad coding style.
Let me preface this by saying that I'm not familiar with this particular area of statistics at all, so although my code does run without errors, my modification may have completely changed the design of what you are trying to test - let me know if this is the case. Anyways, I looked through the definition of nparcomp to see what was causing the first error, Sums of positive contrast coefficients must be 1! -
if (type == "UserDefined") {
if (is.null(contrast.matrix)) {
stop("Please eanter a contrast matrix!")
}
Con <- contrast.matrix
rownames(Con) <- paste("C", 1:nrow(Con))
for (rc in 1:nrow(Con)) {
if (sum(Con[rc, ][Con[rc, ] > 0]) != 1) {
stop("Sums of positive contrast coefficients must be 1!")
}
}
colnames(Con) <- fl
}
In the case of your original contrast.matrix of t(matrix(c(-1, 1, 1, -1))) this evaluates to 2; whereas your second attempt of t(matrix(c(-0.5, 0.5, 0.5, -0.5))) clearly evaluates to 1 in the above block of code (since it doesn't produce this error).
Your second error, Error in logit.dev %*% cov.bf : non-conformable arguments, seems to be arising from trying to multiply matrices of incompatible dimensions. I didn't trace the root of this particular error (the body of this function is pretty lengthy), but it gave me the idea to try passing t(matrix(c(-0.5, 0.5, 0.5, -0.5))) as a symmetric matrix:
contrVec <- t(matrix(c(-1, 1, 1, -1)))
contrMat <- -1*(matrix(
outer(contrVec,
t(contrVec)),
ncol=4)/2)
##
> contrMat
[,1] [,2] [,3] [,4]
[1,] -0.5 0.5 0.5 -0.5
[2,] 0.5 -0.5 -0.5 0.5
[3,] 0.5 -0.5 -0.5 0.5
[4,] -0.5 0.5 0.5 -0.5
Passing this to contrast.matrix does not produce any errors, but as I've said, I am unfamiliar with this particular procedure / function, so I may have completely changed the hypothesis you are trying to test. Here's the full code:
library(car)
library(nparcomp)
##
data <- data.frame(
a1=c(.439,.451,.446,
.340,.294,.284,1.00),
a2=c(.066,.07,.06,.056,
.008,.002,1.00),
b1=c(rep(0.0,5),
.001,.002),
b2=c(.001,
rep(0.0,6)))
##
x <- factor(
rep(names(data),
each=nrow(data)))
##
y <- c(data[['a1']], data[['a2']],
data[['b1']], data[['b2']])
##
contrVec <- t(matrix(c(-1, 1, 1, -1)))
contrMat <- -1*(matrix(
outer(contrVec,
t(contrVec)),
ncol=4)/2)
##
nprce <- nparcomp(
y ~ x,
data=data.frame(x, y),
type="UserDefined",
contrast.matrix=contrMat)
##
> summary(nprce)
#------------Nonparametric Multiple Comparisons for relative contrast effects----------#
- Alternative Hypothesis: True relative contrast effect p is less or equal than 1/2
- Estimation Method: Global Pseudo ranks
- Type of Contrast : UserDefined
- Confidence Level: 95 %
- Method = Logit - Transformation
- Estimation Method: Pairwise rankings
#---------------------------Interpretation--------------------------------------------#
p(a,b) > 1/2 : b tends to be larger than a
#-------------------------------------------------------------------------------------#
#----Data Info-------------------------------------------------------------------------#
Sample Size
1 a1 7
2 a2 7
3 b1 7
4 b2 7
#----Contrast--------------------------------------------------------------------------#
a1 a2 b1 b2
C 1 -0.5 0.5 0.5 -0.5
C 2 0.5 -0.5 -0.5 0.5
C 3 0.5 -0.5 -0.5 0.5
C 4 -0.5 0.5 0.5 -0.5
#----Analysis--------------------------------------------------------------------------#
Comparison Estimator Lower Upper Statistic p.Value
1 C 1 0.429 0.345 0.517 -1.593593 0.1110273
2 C 2 0.571 0.483 0.655 1.593593 0.1110273
3 C 3 0.571 0.483 0.655 1.593593 0.1110273
4 C 4 0.429 0.345 0.517 -1.593593 0.1110273
#----Overall---------------------------------------------------------------------------#
Quantile p.Value
1 1.959966 0.1110273
#--------------------------------------------------------------------------------------#
Related
How to find the optimal cut-off point to minimize both the FNR and FPR in R?
I should find the optimal threshold to minimize both the false positive rate and false negative rate. An equal weight between these two rates should be assumed. I write the following code: data=read.csv( url("https://raw.githubusercontent.com/propublica/compas-analysis/master/compas-scores-two-years.csv"), sep=",") library(ROCR) pred=prediction(data$decile_score/10, data$two_year_recid) perf=performance(pred, measure="fnr",x.measure="fpr") opt.cut = function(perf, pred) { cut.ind = mapply(FUN=function(x, y, p){ d = (x - 0)^2 + (y-1)^2 ind = which(d == min(d)) c(False_negative_rate = 1-y[[ind]], False_positive_rate = x[[ind]], cutoff = p[[ind]]) }, perf#x.values, perf#y.values, pred#cutoffs) } print(opt.cut(perf, pred)) It throws out this result: [,1] False_negative_rate 0 False_positive_rate 0 cutoff Inf However, I think there is something wrong with my code.
Well, I think your code is flawed from a logical point of view. You said You want to minimize both the false positive rate and false negative rate But then you minimize d = (x - 0)^2 + (y-1)^2 which is 1 - FNR which is the True Positive Rate. Thus, assuming you want to minimize FPR and FNR you could simply do: pred#cutoffs[[1]][which.min(sqrt(perf#x.values[[1]] ^ 2 + perf#y.values[[1]] ^ 2))] # [1] 0.5 (no need to use extra loops as R is nicely vectorized) To verify this result, you can simply calculate FPR and FNR yourself for different cutoffs (which will give you the same results as performance of course, but it is a good exercise to understand the principles): t(sapply(pred#cutoffs[[1]], function(co) { prediction <- factor(ifelse(data$decile_score / 10 < co, 0, 1), 0:1) confusion_matrix <- table(data$two_year_recid, prediction) fpr <- confusion_matrix[1, 2] / sum(confusion_matrix[1, ]) fnr <- confusion_matrix[2, 1] / sum(confusion_matrix[2, ]) c(cutoff = co, fpr = fpr, fnr = fnr, dist = sqrt(fpr ^ 2 + fnr ^2)) })) # cutoff fpr fnr dist # [1,] Inf 0.00000000 1.00000000 1.0000000 # [2,] 1.0 0.02195307 0.90895109 0.9092162 # [3,] 0.9 0.06056018 0.79975392 0.8020436 # [4,] 0.8 0.10143830 0.69209474 0.6994890 # [5,] 0.7 0.16250315 0.58443556 0.6066071 # [6,] 0.6 0.23391370 0.47431560 0.5288581 # [7,] 0.5 0.32349230 0.37403876 0.4945223 #### <<- Minimum # [8,] 0.4 0.43325763 0.27130114 0.5111912 # [9,] 0.3 0.55084532 0.18486620 0.5810388 # [10,] 0.2 0.71435781 0.09474008 0.7206128 # [11,] 0.1 1.00000000 0.00000000 1.0000000
The first values in perf#x.values, perf#y.values, pred#cutoffs are causing your results, they are 1, 0 and Inf, respectively. In order to remove them, loop through each list member and extract the vectors without the 1st element. library(ROCR) opt.cut = function(perf, pred) { # x.values <- lapply(perf#x.values, `[`, -1) y.values <- lapply(perf#y.values, `[`, -1) cutoffs <- lapply(pred#cutoffs, `[`, -1) # cut.ind <- mapply(FUN=function(x, y, p){ d <- x^2 + y^2 ind <- which.min(d) c(False_negative_rate = y[[ind]], False_positive_rate = x[[ind]], cutoff = p[[ind]]) }, x.values, y.values, cutoffs) cut.ind } pred <- prediction(data$decile_score/10, data$two_year_recid) perf <- performance(pred, measure = "fnr", x.measure = "fpr") opt.cut(perf, pred) # [,1] #False_negative_rate 0.3740388 #False_positive_rate 0.3234923 #cutoff 0.5000000
Entropy calculation gives NaN - is applying na.omit a valid tweak?
By definition, the entropy is defined as: entropy <- function (p) sum(-p * log(p)) I'm performing LCA using the poLCA package and trying to calculate entropy, which for some of my models are outputting NaN. error_prior <- entropy(lca2$P) # Class proportions model 2 error_post <- mean(apply(lca2$posterior, 1, entropy), na.rm = TRUE) results[2,8] <- round(((error_prior - error_post) / error_prior), 3) From the answer to this question: Entropy output is NaN for some class solutions and not others, I learnt that it is caused by zeros in p and it can be resolved by adding na.omit to the function as follows: entropy <- function (p) sum(na.omit(-p * log(p))) My question is - is this technical tweak mathematically valid without affecting the integrity of the calculation? In my case, around 1/3 of the values in p are zeros. I'm really unsure if I should use na.omit or find another way to resolve this problem.
It is valid, but not transparent at first glance. The reason is that the mathematical limit of xlog(x) as x -> 0 is 0 (we can prove this using L'Hospital Rule). In this regard, the most robust definition of the function should be entropy.safe <- function (p) { if (any(p > 1 | p < 0)) stop("probability must be between 0 and 1") log.p <- numeric(length(p)) safe <- p != 0 log.p[safe] <- log(p[safe]) sum(-p * log.p) } But simply dropping p = 0 cases gives identical results, because the result at p = 0 is 0 and contributes nothing to the sum anyway. entropy.brutal <- function (p) { if (any(p > 1 | p < 0)) stop("probability must be between 0 and 1") log.p <- log(p) ## as same as sum(na.omit(-p * log.p)) sum(-p * log.p, na.rm = TRUE) } ## p has a single 0 ( p <- seq(0, 1, by = 0.1) ) #[1] 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 entropy.brutal(p) #[1] 2.455935 entropy.safe(p) #[1] 2.455935 ## half of p are zeros p[1:5] <- 0 p #[1] 0.0 0.0 0.0 0.0 0.0 0.5 0.6 0.7 0.8 0.9 1.0 entropy.brutal(p) #[1] 1.176081 entropy.safe(p) #[1] 1.176081 In conclusion, we can use either entropy.brutal or entropy.safe.
set constrains in a liner model using R
I am trying to predict future market share using the following linear model. x <- c(20, 60, 80,130) y <- c(0.8, 0.15,0.05, 0.0) z <-data.frame(x, y) z.fit <- lm(y~x, data=z) z.prediction <- predict(z.fit, data.frame(x=c(30, 65, 70, 100)), interval="prediction") The results are above 1, see below: fit 1 0.544 2 0.301 3 0.267 4 0.059 How can I set a constrain in the code for sum of "Share" =1?
The question did not define Share but assuming it is the fitted values, try the CVXR package. Note that a nonnegativity constraint has been added but you can drop it if negative values are acceptable. library(CVXR) b <- Variable(2) pred <- b[1] + b[2] * x objective <- Minimize(sum((y - pred)^2)) constraints <- list(sum(pred) == 1, pred >= 0) problem <- Problem(objective, constraints) soln <- solve(problem) bval <- soln$getValue(b) bval ## [,1] ## [1,] 0.565217391 ## [2,] -0.004347826 # check constraints predval <- soln$getValue(pred) round(predval, 5) ## [,1] ## [1,] 0.47826 ## [2,] 0.30435 ## [3,] 0.21739 ## [4,] 0.00000 sum(predval) ## [1] 1
Generating random variables with specific correlation threshold value
I am generating random variables with specified range and dimension.I have made a following code for this. generateRandom <- function(size,scale){ result<- round(runif(size,1,scale),1) return(result) } flag=TRUE x <- generateRandom(300,6) y <- generateRandom(300,6) while(flag){ corrXY <- cor(x,y) if(corrXY>=0.2){ flag=FALSE } else{ x <- generateRandom(300,6) y <- generateRandom(300,6) } } I want following 6 variables with size 300 and scale of all is between 1 to 6 except for one variable which would have scale 1-7 with following correlation structure among them. 1 0.45 -0.35 0.46 0.25 0.3 1 0.25 0.29 0.5 -0.3 1 -0.3 0.1 0.4 1 0.4 0.6 1 -0.4 1 But when I try to increase threshold value my program gets very slow.Moreover,I want more than 7 variables of size 300 and between each pair of those variables I want some specific correlation threshold.How would I do it efficiently?
This answer is directly inspired from here and there. We would like to generate 300 samples of a 6-variate uniform distribution with correlation structure equal to Rhos <- matrix(0, 6, 6) Rhos[lower.tri(Rhos)] <- c(0.450, -0.35, 0.46, 0.25, 0.3, 0.25, 0.29, 0.5, -0.3, -0.3, 0.1, 0.4, 0.4, 0.6, -0.4) Rhos <- Rhos + t(Rhos) diag(Rhos) <- 1 We first generate from this correlation structure the correlation structure of the Gaussian copula: Copucov <- 2 * sin(Rhos * pi/6) This matrix is not positive definite, we use instead the nearest positive definite matrix: library(Matrix) Copucov <- cov2cor(nearPD(Copucov)$mat) This correlation structure can be used as one of the inputs of MASS::mvrnorm: G <- mvrnorm(n=300, mu=rep(0,6), Sigma=Copucov, empirical=TRUE) We then transform G into a multivariate uniform sample whose values range from 1 to 6, except for the last variable which ranges from 1 to 7: U <- matrix(NA, 300, 6) U[, 1:5] <- 5 * pnorm(G[, 1:5]) + 1 U[, 6] <- 6 * pnorm(G[, 6]) + 1 After rounding (and taking the nearest positive matrix to the copula's covariance matrix etc.), the correlation structure is not changed much: Ur <- round(U, 1) cor(Ur)
Nonlinear Least Squares in R - Levenberg Marquardt to Fit Heligman Pollard Model Parameters
I am attempting to reproduce the solutions of paper by Kostakis. In this paper an abridged mortality table is expanded to a complete life table using de Heligman-Pollard model. The model has 8 parameters which have to be fitted. The author used a modified Gauss-Newton algorithm; this algorithm (E04FDF) is part of the NAG library of computer programs. Should not Levenberg Marquardt yield the same set of parameters? What is wrong with my code or application of the LM algorithm? library(minpack.lm) ## Heligman-Pollard is used to expand an abridged table. ## nonlinear least squares algorithm is used to fit the parameters on nqx observed over 5 year intervals (5qx) AGE <- c(0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70) MORTALITY <- c(0.010384069, 0.001469140, 0.001309318, 0.003814265, 0.005378395, 0.005985625, 0.006741766, 0.009325056, 0.014149626, 0.021601755, 0.034271934, 0.053836246, 0.085287751, 0.136549522, 0.215953304) ## The start parameters for de Heligman-Pollard Formula (Converged set a=0.0005893,b=0.0043836,c=0.0828424,d=0.000706,e=9.927863,f=22.197312,g=0.00004948,h=1.10003) ## I modified a random parameter "a" in order to have a start values. The converged set is listed above. parStart <- list(a=0.0008893,b=0.0043836,c=0.0828424,d=0.000706,e=9.927863,f=22.197312,g=0.00004948,h=1.10003) ## The Heligman-Pollard Formula (HP8) = qx/px = ...8 parameter equation HP8 <-function(parS,x) ifelse(x==0, parS$a^((x+parS$b)^parS$c) + parS$g*parS$h^x, parS$a^((x+parS$b)^parS$c) + parS$d*exp(-parS$e*(log(x/parS$f))^2) + parS$g*parS$h^x) ## Define qx = HP8/(1+HP8) qxPred <- function(parS,x) HP8(parS,x)/(1+HP8(parS,x)) ## Calculate nqx predicted by HP8 model (nqxPred(parStart,x)) nqxPred <- function(parS,x) (1 -(1-qxPred(parS,x)) * (1-qxPred(parS,x+1)) * (1-qxPred(parS,x+2)) * (1-qxPred(parS,x+3)) * (1-qxPred(parS,x+4))) ##Define Residual Function, the relative squared distance is minimized ResidFun <- function(parS, Observed,x) (nqxPred(parS,x)/Observed-1)^2 ## Applying the nls.lm algo. nls.out <- nls.lm(par=parStart, fn = ResidFun, Observed = MORTALITY, x = AGE, control = nls.lm.control(nprint=1, ftol = .Machine$double.eps, ptol = .Machine$double.eps, maxfev=10000, maxiter = 500)) summary(nls.out) ## The author used a modified Gauss-Newton algorithm, this alogorithm (E04FDF) is part of the NAG library of computer programs ## Should not Levenberg Marquardt yield the same set of parameters
The bottom line here is that #Roland is absolutely right, this is a very ill-posed problem, and you shouldn't necessarily expect to get reliable answers. Below I've cleaned up the code in a few small ways (this is just aesthetic) changed the ResidFun to return residuals, not squared residuals. (The former is correct, but this doesn't make very much difference.) explored results from several different optimizers. It actually looks like the answer you're getting is better than the "converged parameters" you list above, which I'm assuming are the parameters from the original study (can you please provide a reference?). Load package: library(minpack.lm) Data, as a data frame: d <- data.frame( AGE = seq(0,70,by=5), MORTALITY=c(0.010384069, 0.001469140, 0.001309318, 0.003814265, 0.005378395, 0.005985625, 0.006741766, 0.009325056, 0.014149626, 0.021601755, 0.034271934, 0.053836246, 0.085287751, 0.136549522, 0.215953304)) First view of the data: library(ggplot2) (g1 <- ggplot(d,aes(AGE,MORTALITY))+geom_point()) g1+geom_smooth() ## with loess fit Parameter choices: Presumably these are the parameters from the original paper ... parConv <- c(a=0.0005893,b=0.0043836,c=0.0828424, d=0.000706,e=9.927863,f=22.197312,g=0.00004948,h=1.10003) Perturbed parameters: parStart <- parConv parStart["a"] <- parStart["a"]+3e-4 The formulae: HP8 <-function(parS,x) with(as.list(parS), ifelse(x==0, a^((x+b)^c) + g*h^x, a^((x+b)^c) + d*exp(-e*(log(x/f))^2) + g*h^x)) ## Define qx = HP8/(1+HP8) qxPred <- function(parS,x) { h <- HP8(parS,x) h/(1+h) } ## Calculate nqx predicted by HP8 model (nqxPred(parStart,x)) nqxPred <- function(parS,x) (1 -(1-qxPred(parS,x)) * (1-qxPred(parS,x+1)) * (1-qxPred(parS,x+2)) * (1-qxPred(parS,x+3)) * (1-qxPred(parS,x+4))) ##Define Residual Function, the relative squared distance is minimized ResidFun <- function(parS, Observed,x) (nqxPred(parS,x)/Observed-1) n.b. this is changed slightly from the OP's version; nls.lm wants residuals, not squared residuals. A sum-of-squares function for use with other optimizers: ssqfun <- function(parS, Observed, x) { sum(ResidFun(parS, Observed, x)^2) } Applying nls.lm. (Not sure why ftol and ptol were lowered from sqrt(.Machine$double.eps) to .Machine$double.eps -- the former is generally a practical limit to precision ... nls.out <- nls.lm(par=parStart, fn = ResidFun, Observed = d$MORTALITY, x = d$AGE, control = nls.lm.control(nprint=0, ftol = .Machine$double.eps, ptol = .Machine$double.eps, maxfev=10000, maxiter = 1000)) parNLS <- coef(nls.out) pred0 <- nqxPred(as.list(parConv),d$AGE) pred1 <- nqxPred(as.list(parNLS),d$AGE) dPred <- with(d,rbind(data.frame(AGE,MORTALITY=pred0,w="conv"), data.frame(AGE,MORTALITY=pred1,w="nls"))) g1 + geom_line(data=dPred,aes(colour=w)) The lines are indistinguishable, but the parameters have some big differences: round(cbind(parNLS,parConv),5) ## parNLS parConv ## a 1.00000 0.00059 ## b 50.46708 0.00438 ## c 3.56799 0.08284 ## d 0.00072 0.00071 ## e 6.05200 9.92786 ## f 21.82347 22.19731 ## g 0.00005 0.00005 ## h 1.10026 1.10003 d,f,g,h are close, but a,b,c are orders of magnitude different and e is 50% different. Looking at the original equations, what's happening here is that a^((x+b)^c) is getting set to a constant, because a is approaching 1: once a is approximately 1, b and c are essentially irrelevant. Let's check the correlation (we need a generalized inverse because the matrix is so strongly correlated): obj <- nls.out vcov <- with(obj,deviance/(length(fvec) - length(par)) * MASS::ginv(hessian)) cmat <- round(cov2cor(vcov),1) dimnames(cmat) <- list(letters[1:8],letters[1:8]) ## a b c d e f g h ## a 1.0 0.0 0.0 0.0 0.0 0.0 -0.1 0.0 ## b 0.0 1.0 -1.0 1.0 -1.0 -1.0 -0.4 -1.0 ## c 0.0 -1.0 1.0 -1.0 1.0 1.0 0.4 1.0 ## d 0.0 1.0 -1.0 1.0 -1.0 -1.0 -0.4 -1.0 ## e 0.0 -1.0 1.0 -1.0 1.0 1.0 0.4 1.0 ## f 0.0 -1.0 1.0 -1.0 1.0 1.0 0.4 1.0 ## g -0.1 -0.4 0.4 -0.4 0.4 0.4 1.0 0.4 ## h 0.0 -1.0 1.0 -1.0 1.0 1.0 0.4 1.0 This is not actually so useful -- it really just confirms that lots of the variables are strongly correlated ... library(optimx) mvec <- c('Nelder-Mead','BFGS','CG','L-BFGS-B', 'nlm','nlminb','spg','ucminf') opt1 <- optimx(par=parStart, fn = ssqfun, Observed = d$MORTALITY, x = d$AGE, itnmax=5000, method=mvec,control=list(kkt=TRUE)) ## control=list(all.methods=TRUE,kkt=TRUE)) ## Boom! ## fvalues method fns grs itns conv KKT1 KKT2 xtimes ## 2 8.988466e+307 BFGS NA NULL NULL 9999 NA NA 0 ## 3 8.988466e+307 CG NA NULL NULL 9999 NA NA 0 ## 4 8.988466e+307 L-BFGS-B NA NULL NULL 9999 NA NA 0 ## 5 8.988466e+307 nlm NA NA NA 9999 NA NA 0 ## 7 0.3400858 spg 1 NA 1 3 NA NA 0.064 ## 8 0.3400858 ucminf 1 1 NULL 0 NA NA 0.032 ## 1 0.06099295 Nelder-Mead 501 NA NULL 1 NA NA 0.252 ## 6 0.009275733 nlminb 200 1204 145 1 NA NA 0.708 This warns about bad scaling, and also finds a variety of different answers: only ucminf claims to have converged, but nlminb gets a better answer -- and the itnmax parameter seems to be ignored ... opt2 <- nlminb(start=parStart, objective = ssqfun, Observed = d$MORTALITY, x = d$AGE, control= list(eval.max=5000,iter.max=5000)) parNLM <- opt2$par Finishes, but with a false convergence warning ... round(cbind(parNLS,parConv,parNLM),5) ## parNLS parConv parNLM ## a 1.00000 0.00059 1.00000 ## b 50.46708 0.00438 55.37270 ## c 3.56799 0.08284 3.89162 ## d 0.00072 0.00071 0.00072 ## e 6.05200 9.92786 6.04416 ## f 21.82347 22.19731 21.82292 ## g 0.00005 0.00005 0.00005 ## h 1.10026 1.10003 1.10026 sapply(list(parNLS,parConv,parNLM), ssqfun,Observed=d$MORTALITY,x=d$AGE) ## [1] 0.006346250 0.049972367 0.006315034 It looks like nlminb and minpack.lm are getting similar answers, and are actually doing better than the originally stated parameters (by quite a bit): pred2 <- nqxPred(as.list(parNLM),d$AGE) dPred <- with(d,rbind(dPred, data.frame(AGE,MORTALITY=pred2,w="nlminb"))) g1 + geom_line(data=dPred,aes(colour=w)) ggsave("cmpplot.png") ggplot(data=dPred,aes(x=AGE,y=MORTALITY-d$MORTALITY,colour=w))+ geom_line()+geom_point(aes(shape=w),alpha=0.3) ggsave("residplot.png") Other things one could try would be: appropriate scaling -- although a quick test of this doesn't seem to help that much provide analytical gradients use AD Model Builder use the slice function from bbmle to explore whether the old and new parameters seem to represent distinct minima, or whether the old parameters are just a false convergence ... get the KKT (Karsh-Kuhn-Tucker) criterion calculators from optimx or related packages working for similar checks PS: the largest deviations (by far) are for the oldest age classes, which probably also have small samples. From a statistical point of view it would probably be worth doing a fit that weighted by the precision of the individual points ...
#BenBolker, fitting the parameters with the entire dataset (underlying qx) values. Still not able to reproduce parameters library(minpack.lm) library(ggplot2) library(optimx) getwd() d <- data.frame(AGE = seq(0,74), MORTALITY=c(869,58,40,37,36,35,32,28,29,23,24,22,24,28, 33,52,57,77,93,103,103,109,105,114,108,112,119, 125,117,127,125,134,134,131,152,179,173,182,199, 203,232,245,296,315,335,356,405,438,445,535,594, 623,693,749,816,915,994,1128,1172,1294,1473, 1544,1721,1967,2129,2331,2559,2901,3203,3470, 3782,4348,4714,5245,5646)) d$MORTALITY <- d$MORTALITY/100000 ggplot(d,aes(AGE,MORTALITY))+geom_point() ##Not allowed to post Images g1 <- ggplot(d,aes(AGE,MORTALITY))+geom_point() g1+geom_smooth()## with loess fit Reported Parameters: parConv <- c(a=0.0005893,b=0.0043836,c=0.0828424,d=0.000706,e=9.927863,f=22.197312, g=0.00004948,h=1.10003) parStart <- parConv parStart["a"] <- parStart["a"]+3e-4 ## Define qx = HP8/(1+HP8) HP8 <-function(parS,x) with(as.list(parS), ifelse(x==0, a^((x+b)^c) + g*h^x, a^((x+b)^c) + d*exp(-e*(log(x/f))^2) + g*h^x)) qxPred <- function(parS,x) { h <- HP8(parS,x) h/(1+h) } ##Define Residual Function, the relative squared distance is minimized, ResidFun <- function(parS, Observed,x) (qxPred(parS,x)/Observed-1) ssqfun <- function(parS, Observed, x) { sum(ResidFun(parS, Observed, x)^2) } nls.out <- nls.lm(par=parStart, fn = ResidFun, Observed = d$MORTALITY, x = d$AGE, control = nls.lm.control(nprint=1, ftol = sqrt(.Machine$double.eps), ptol = sqrt(.Machine$double.eps), maxfev=1000, maxiter=1000)) parNLS <- coef(nls.out) pred0 <- qxPred(as.list(parConv),d$AGE) pred1 <- qxPred(as.list(parNLS),d$AGE) #Binds Row wise the dataframes from pred0 and pred1 dPred <- with(d,rbind(data.frame(AGE,MORTALITY=pred0,w="conv"), data.frame(AGE,MORTALITY=pred1,w="nls"))) g1 + geom_line(data=dPred,aes(colour=w)) round(cbind(parNLS,parConv),7) mvec <- c('Nelder-Mead','BFGS','CG','L-BFGS-B','nlm','nlminb','spg','ucminf') opt1 <- optimx(par=parStart, fn = ssqfun, Observed = d$MORTALITY, x = d$AGE, itnmax=5000, method=mvec, control=list(all.methods=TRUE,kkt=TRUE,) ## control=list(all.methods=TRUE,kkt=TRUE)) ## Boom get.result(opt1, attribute= c("fvalues","method", "grs", "itns", "conv", "KKT1", "KKT2", "xtimes")) ## method fvalues grs itns conv KKT1 KKT2 xtimes ##5 nlm 8.988466e+307 NA NA 9999 NA NA 0 ##4 L-BFGS-B 8.988466e+307 NULL NULL 9999 NA NA 0 ##2 CG 8.988466e+307 NULL NULL 9999 NA NA 0.02 ##1 BFGS 8.988466e+307 NULL NULL 9999 NA NA 0 ##3 Nelder-Mead 0.5673864 NA NULL 0 NA NA 0.42 ##6 nlminb 0.4127198 546 62 0 NA NA 0.17 opt2 <- nlminb(start=parStart, objective = ssqfun, Observed = d$MORTALITY, x = d$AGE, control= list(eval.max=5000,iter.max=5000)) parNLM <- opt2$par Check on parameters: round(cbind(parNLS,parConv,parNLM),5) ## parNLS parConv parNLM ##a 0.00058 0.00059 0.00058 ##b 0.00369 0.00438 0.00369 ##c 0.08065 0.08284 0.08065 ##d 0.00070 0.00071 0.00070 ##e 9.30948 9.92786 9.30970 ##f 22.30769 22.19731 22.30769 ##g 0.00005 0.00005 0.00005 ##h 1.10084 1.10003 1.10084 SSE Review: sapply(list(parNLS,parConv,parNLM), ssqfun,Observed=d$MORTALITY,x=d$AGE) ##[1] 0.4127198 0.4169513 0.4127198 Not able to upload graphs but the code is here. Still appears that the parameters found in the article are not the best fit when the complete mortality data (not abridged or subset) is used ##pred2 <- qxPred(as.list(parNLM),d$AGE) ##dPred <- with(d,rbind(dPred, data.frame(AGE,MORTALITY=pred2,w="nlminb"))) ##g1 + geom_line(data=dPred,aes(colour=w)) ggplot(data=dPred,aes(x=AGE,y=MORTALITY-d$MORTALITY,colour=w)) + geom_line()+geom_point(aes(shape=w),alpha=0.3)