xyplot not merging plots when more than two conditioning variables - r

When I run the following code, xyplot produces 4 separate plots 2 by 3 plots,
whereas I want a single 4 by 6 trellis (to save real estate
space on the axis anotation and legends).
Note that my problem is different from this one in that I don't want to
see four set of axis/legends.
Here is some example data:
B <- structure(list(yval = c(0.88, 4.31, 7.52, 3.21, 3.27, 4.93, 4.21,
0.7, 0.68, 0.92, 3.86, 5.67, 9.08, 1.95, 3.27, 1.44, 2.38, 0.85,
0.79, 0.55, 0.79, 10.52, 0.9, 4, 0.78, 2.46, 0.78, 1.64, 2.47,
0.77, 0.83, 0.86, 3.65, 8.25, 0.65, 0.88, 0.95, 4.05, 4.98, 1.43,
4.43, 2.94, 5.52, 0.9, 3.69, 0.79, 0.74, 1.49, 7.29, 0.58, 8.47,
5.82, 0.84, 0.87, 0.69, 1.38, 0.83, 2.32, 0.86, 7.32, 6.73, 6.7,
3.3, 1.58, 2.74, 0.88, 4.2, 3.79, 4.98, 2.54, 1.84, 1.2, 2.59,
11.99, 0.78, 0.92, 0.59, 3.83, 0.92, 2.6, 0.95, 3.18, 2.75, 9.83,
9.81, 0.55, 0.83, 6.29, 1.64, 1.12, 0.65, 3.96, 4.27, 3.99, 20,
0.83, 6.23, 6.81, 0.86, 0.7), xval = c(0.62, 0.81, 9.01, 3.72,
1.49, 3.92, 6.22, 6.64, 5.56, 6.64, 4, 7.36, 9.6, 1, 1.64, 3.34,
3.47, 3.37, 4.34, 6.63, 7.62, 4.07, 5.69, 3.76, 9.74, 1.58, 1.53,
2.62, 1.64, 1.18, 9.79, 9.9, 2.76, 7.96, 5.11, 4.74, 9.92, 0.49,
9.05, 8.59, 0.7, 5.8, 5.34, 3.14, 6.96, 2.05, 8.29, 0.35, 7.52,
6.56, 2.01, 7.92, 3.89, 6.31, 8.64, 6.18, 4.49, 0.63, 7.52, 7.82,
1.25, 9.54, 4.68, 0.4, 1.38, 8.7, 4.71, 8.27, 5.72, 0.75, 6.08,
0.11, 1.38, 0.37, 4.94, 0.53, 7.53, 3.11, 2.73, 4.93, 9.47, 2.18,
4.54, 7.12, 8.28, 6.62, 5.14, 4.42, 0.21, 9.52, 3.77, 6.43, 6.78,
6.87, 9.47, 6.42, 0.81, 8.88, 7.2, 8.68), gval = c(1, 2, 5, 5,
2, 1, 2, 1, 2, 3, 6, 5, 1, 3, 2, 3, 5, 2, 6, 4, 4, 1, 1, 6, 4,
2, 1, 2, 4, 5, 5, 3, 6, 5, 4, 2, 2, 3, 3, 6, 2, 4, 1, 4, 4, 1,
1, 2, 2, 5, 1, 1, 2, 2, 1, 3, 1, 5, 6, 5, 1, 5, 4, 4, 3, 6, 6,
4, 5, 4, 4, 6, 5, 6, 5, 2, 1, 1, 6, 6, 2, 5, 5, 1, 1, 4, 6, 3,
4, 6, 3, 5, 3, 3, 6, 2, 1, 5, 1, 3), type = c(5, 2, 1, 5, 1,
1, 1, 1, 2, 12, 5, 1, 2, 5, 5, 12, 12, 12, 12, 2, 12, 2, 12,
5, 12, 2, 12, 12, 5, 12, 12, 12, 5, 2, 5, 12, 1, 1, 1, 1, 2,
12, 1, 12, 2, 12, 2, 2, 1, 1, 2, 1, 5, 12, 12, 5, 12, 5, 5, 1,
1, 1, 2, 5, 5, 5, 5, 5, 1, 5, 12, 12, 5, 2, 12, 12, 1, 1, 5,
5, 5, 2, 5, 1, 2, 2, 5, 1, 5, 2, 5, 5, 5, 2, 2, 5, 1, 2, 2, 5
), cr = c(0.2, 0.4, 0.4, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.4,
0.4, 0.4, 0.2, 0.4, 0.4, 0.4, 0.2, 0.2, 0.2, 0.2, 0.4, 0.4, 0.4,
0.2, 0.2, 0.2, 0.4, 0.2, 0.2, 0.4, 0.4, 0.4, 0.4, 0.2, 0.4, 0.2,
0.4, 0.2, 0.2, 0.4, 0.4, 0.2, 0.2, 0.4, 0.2, 0.2, 0.2, 0.4, 0.2,
0.4, 0.2, 0.2, 0.4, 0.4, 0.2, 0.2, 0.4, 0.2, 0.4, 0.4, 0.4, 0.4,
0.2, 0.4, 0.4, 0.4, 0.4, 0.2, 0.4, 0.4, 0.2, 0.4, 0.4, 0.2, 0.2,
0.2, 0.2, 0.2, 0.4, 0.4, 0.4, 0.2, 0.4, 0.4, 0.2, 0.2, 0.4, 0.4,
0.2, 0.2, 0.2, 0.4, 0.2, 0.4, 0.4, 0.4, 0.4, 0.2, 0.2), p = c(4,
12, 12, 8, 12, 8, 12, 4, 4, 8, 8, 4, 4, 8, 8, 8, 4, 12, 8, 4,
12, 12, 12, 12, 8, 12, 4, 4, 8, 8, 8, 4, 8, 12, 4, 12, 12, 4,
12, 8, 4, 4, 12, 4, 4, 8, 4, 4, 8, 4, 8, 12, 12, 8, 4, 8, 8,
8, 8, 12, 4, 8, 4, 12, 4, 4, 12, 4, 12, 12, 8, 4, 4, 12, 8, 12,
4, 4, 12, 4, 8, 4, 8, 12, 8, 4, 4, 4, 8, 4, 4, 12, 8, 12, 8,
4, 4, 8, 8, 4), nsamp = c(100, 300, 300, 200, 300, 200, 300,
100, 100, 200, 200, 100, 100, 200, 200, 200, 100, 300, 200, 100,
300, 300, 300, 300, 200, 300, 100, 100, 200, 200, 200, 100, 200,
300, 100, 300, 300, 100, 300, 200, 100, 100, 300, 100, 100, 200,
100, 100, 200, 100, 200, 300, 300, 200, 100, 200, 200, 200, 200,
300, 100, 200, 100, 300, 100, 100, 300, 100, 300, 300, 200, 100,
100, 300, 200, 300, 100, 100, 300, 100, 200, 100, 200, 300, 200,
100, 100, 100, 200, 100, 100, 300, 200, 300, 200, 100, 100, 200,
200, 100)), .Names = c("yval", "xval", "gval", "type", "cr",
"p", "nsamp"), row.names = c(NA, -100L), class = "data.frame")
And here is the code I am running:
library(lattice)
library(latticeExtra)
library(grid)
types<-rep(NA,6)
types[1]<-expression(paste(epsilon,"=",0.2,", p=",4,sep=""))
types[2]<-expression(paste(epsilon,"=",0.2,", p=",8,sep=""))
types[3]<-expression(paste(epsilon,"=",0.2,", p=",12,sep=""))
types[4]<-expression(paste(epsilon,"=",0.4,", p=",4,sep=""))
types[5]<-expression(paste(epsilon,"=",0.4,", p=",8,sep=""))
types[6]<-expression(paste(epsilon,"=",0.4,", p=",12,sep=""))
types<-rep(types,4)
cl<-rainbow(7)[-4]
xyplot(B$yval~B$xval|as.factor(B$p)*as.factor(B$cr)*as.factor(B$type),
group=B$gval, as.table=TRUE,
ylab=expression(kappa(Sigma,S)), col=cl, xlab=expression(nu),
xlim=c(0,10), ylim=c(0,10), type=c("l","g"), lwd=5, cex.lab=2,
strip=function(...){
panel.fill(trellis.par.get("strip.background")$col[1])
type <- types[panel.number()]
grid::grid.text(label=type,x=0.5,y=0.5,gp=gpar(fontsize=20))
grid::grid.rect()
},
key=list(text=list(c("A","B","C","D","E","F"),cex=2),
lines=list(type=rep("l",6), label.cex=2,col=cl,lwd=3),columns=3),
par.settings=list(par.xlab.text=list(cex=2),axis.text=list(cex=2),
par.ylab.text=list(cex=2)))

Three conditioning variables means that it makes a three dimensional grid of panels, where the third dimension is onto multiple pages. One alternative is to only condition on two variables; here I use : to make the first conditioning factor the intersection of the first two original conditioning factors.
xyplot(B$yval~B$xval|as.factor(B$p):as.factor(B$cr)*as.factor(B$type), ...

I think you want layout=c(6,4) somewhere in your call to xyplot. Once you do that you will have to reconfigure many other settings.

Related

Coarsened Exact Matching with cem package- Error in .subset2(x, i, exact = exact)

I'm trying to perform coarsened exact matching on the following data.
> dput(head(cem_data))
structure(list(sex = c(1, 1, 1, 2, 2, 2), age = c(40, 59, 53,
60, 49, 60), edlev = c(3, 3, 3, 2, 3, 3), sw = c(44, 17, 10,
41, 26, 23), sw2 = c(15, 1, 5, 34, 5, 6), som = c(2.14, 0.14,
1.86, 3, 1.71, 2.14), som_2 = c(0.71, 0.14, 2, 2.57, 1.71, 2.14
), ap = c(3.5, 1.5, 1.33, 3.33, 2.67, 2.17), ap_2 = c(3, 0.17,
2.33, 3, 0.83, 1.67), dep = c(2.83, 0.17, 0.33, 2.83, 2.17, 2.33
), dep_2 = c(1.17, 0, 0.33, 2.33, 0.83, 1), int = c(2.86, 1.43,
1, 2, 2.29, 2.14), int_2 = c(2.29, 0.57, 0.14, 2.57, 1.71, 1.43
), pho = c(3.2, 0, 0, 3.4, 0.8, 0.4), pho_2 = c(1.6, 0, 0, 3.2,
0, 0.4), psy_b = c(2.67, 0.11, 0.83, 3.06, 1.61, 1.72), psy_b_2 = c(1.11,
0.06, 0.89, 2.67, 0.94, 1.28), s_wirk = c(4, 2.2, 1.6, 3.2, 1.4,
2.2), s_wirk_2 = c(2.8, 0.8, 1.8, 2.6, 1.6, 1.4), soz_b = c(2.75,
1.5, 1, 2.25, 1.25, 1.5), soz_b_2 = c(2.75, 1, 1, 2.25, 1.5,
1.25), soz_u = c(0.75, 0.75, 1.75, 3.25, 1, 3.25), soz_u_2 = c(1,
0.25, 1.75, 2.5, 2.5, 2), wohl = c(3.6, 1.4, 1.8, 3.4, 3, 3),
wohl_2 = c(2, 0.6, 1.4, 2.8, 2.2, 1.2), au_bei_aufn = c(1,
1, 1, 1, 1, 1), age_reha = c(40.9890410958904, 59.3945205479452,
53.372602739726, 60.2, 49.3342465753425, 60.7534246575342
), group_format = c(0, 0, 0, 0, 0, 0)), row.names = c(6L,
7L, 10L, 15L, 20L, 29L), class = "data.frame")
With the following code:
require(cem)
voll_data <- voll_data %>%
select(-c("auf_nr", "icd_10_1", "icd_10_2", "icd_10_3", "icd_10_4","icd_10_5", "bdi_date", "aufnahme", "entlassung")) %>%
mutate_if(is.factor,as.numeric) %>%
mutate_if(is.character, as.numeric)
cem_data <- data.frame(na.omit(voll_data))
#cem_data_s <- scale(cem_data[,5:26])
#cem_data <- cbind.data.frame(cem_data[, 1:4], cem_data_s, cem_data[, 27:36])
variables <- c("age", "sex", "edlev", "sw","au_bei_aufn")
ungleich2 <- imbalance(cem_data$group_format, data=cem_data)
However, following error is being shown, when calculating the "matt".
Error in .subset2(x, i, exact = exact) : attempt to select less than one element in get1index
7.
(function(x, i, exact) if (is.matrix(i)) as.matrix(x)[[i]] else .subset2(x, i, exact = exact))(x, ..., exact = exact)
6.
[[.data.frame(data, treatment)
5.
data[[treatment]]
4.
is.factor(x)
3.
as.factor(data[[treatment]])
2.
cem.main(treatment = treatment, data = data, cutpoints = cutpoints, drop = drop, k2k = k2k, method = method, mpower = mpower, verbose = verbose, baseline.group = baseline.group, keep.all = keep.all)
1.
cem(treatment = cem_data$group_format, data = cem_data, drop = "sw2", cutpoints = list(age = agecut), grouping = list(edlev_gr))
# automated coarsening
matt <- cem(cem_data$group_format, data = cem_data, drop= "sw2")
print(matt)
Does anyone have an idea what am I doing wrong?
Thanks a lot!!

How to draw a rectangle under the plot lines in plot?

This is my graph:
which I did using the following dataset:
targ_plot = structure(c(4.25, 4.25, 4.25, 4.25, 4.25, 4.25, 4.25, 4.25, 4.25,
4.25, 4.25, 3.75, 3.75, 3.75, 3.5, 3.5, 3.5, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3.25, 3.25, 3.25, 3.5, 3.5, 3.5, 3.75, 3.75, 4, 4, 4.25,
4.25, 4.5, 4.5, 4.5, 4.75, 4.75, 4.75, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5.25, 5.25, 5.25, 4.25, 3.75, 3, 3, 3, 2.5, 2.25,
1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75,
1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75,
1.75, 2, 2, 2, 2.25, 2.25, 2.25, 2.25, 2, 1.75, 1.75, 1.75, 1.75,
1.75, 1.75, 1.75, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5,
1.5, 1, 1, 1, 1, 1, 1, 0.75, 0.75, 0.75, 0.75, 0.75, 0.75, 0.75,
0.4, 0.4, 0.4, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.25, 0.25, 0.25, 0.25,
0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25,
0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25,
0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25,
0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 3.25,
3.25, 3.25, 3.25, 3.25, 3.25, 3.25, 3.25, 3.25, 3.25, 3.25, 2.75,
2.75, 2.75, 2.5, 2.5, 2.5, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2.25, 2.25,
2.25, 2.5, 2.5, 2.5, 2.75, 2.75, 3, 3, 3.25, 3.25, 3.5, 3.5,
3.5, 3.75, 3.75, 3.75, 4, 4.25, 4.25, 4.25, 4.25, 4.25, 4.25,
4.25, 4.25, 4.25, 4.25, 4.25, 4.25, 4.25, 4.25, 4.25, 3.75, 3.25,
2.5, 2, 2, 1.5, 1.25, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1.25, 1.25, 1.25, 1.5, 1.5, 1.5,
1.5, 1.25, 1, 1, 1, 1, 1, 1, 1, 0.75, 0.75, 0.75, 0.75, 0.75,
0.75, 0.75, 0.75, 0.75, 0.75, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.25,
0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.15, 0.15, 0.15, 0.05, 0.05,
0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05,
0.05, 0.05, 0.05, 0.05, 0.05, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2.25, 2.25, 2.25,
2.25, 2.25, 2.25, 2.25, 2.25, 2.25, 2.25, 2.25, 1.75, 1.75, 1.75,
1.5, 1.5, 1.5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1.25, 1.25, 1.25, 1.5,
1.5, 1.5, 1.75, 1.75, 2, 2, 2.25, 2.25, 2.5, 2.5, 2.5, 2.75,
2.75, 2.75, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3.25, 3.25,
3.25, 3.25, 2.75, 2, 1, 1, 0.5, 0.25, 0.25, 0.25, 0.25, 0.25,
0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25,
0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.5, 0.5, 0.5,
0.75, 0.75, 0.75, 0.75, 0.5, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25,
0.25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, -0.1, -0.1, -0.1, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2,
-0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0.3, -0.3,
-0.3, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4,
-0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4,
-0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4,
-0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.5,
-0.5, -0.5, -0.5, -0.5, 0.499999999999989, 1.24999999999997,
-0.250000000000039, 0, 0.999999999999979, -2, 0, 1.25000000000002,
-0.249999999999995, 2.99999999999998, 8.65, -5.14999999999999,
0.499999999999989, 2.4, 8.44999999999998, 2.99999999999998, 0.950000000000006,
-4.49999999999999, 0.124999999999998, 0, 0.550000000000006, 0.499999999999989,
0, 0, 0.499999999999989, 0.299999999999967, 2.50000000000001,
3.55000000000001, 0.249999999999995, -0.249999999999995, 0, -0.100000000000033,
0.300000000000011, -0.100000000000033, -0.200000000000022, -1.2,
-0.100000000000033, -0.699999999999967, 0, -0.649999999999995,
0, 0.800000000000001, 0, 0, 0, 0, -1.00000000000002, -0.800000000000001,
-0.200000000000022, -1.2, 0.200000000000022, -0.599999999999978,
-2.49999999999999, -0.550000000000006, -1.75000000000001, 0.424999999999986,
0, -0.099999999999989, -0.999999999999979, 0.4, -0.099999999999989,
-0.800000000000001, 0.099999999999989, 0.150000000000006, 0,
-0.100000000000033, 0.150000000000006, -0.350000000000072, -1.49999999999997,
-1.18499999999999, -0.300000000000011, 0.349999999999984, -0.0999999999999446,
0.349999999999984, -0.0500000000000611, 0, -0.100000000000033,
-0.100000000000033, -1.14999999999998, 0, -0.300000000000011,
2.4, -2.6, 2.59999999999998, -6.30000000000002, 0.349999999999984,
3.95, 4.6, -0.900000000000001, 1.35000000000001, 2.3, 0.2, -0.45,
0, -0.5, -0.750000000000001, -0.35, 0, -0.1, -0.25, 0.1, 1.6,
0.4, 0.2, 0.35, 0.3, 0, -0.2, 0.0500000000000056, 0.350000000000006,
-0.0499999999999945, 0.99999999999999, -0.700000000000012, 0,
0.299999999999989, 0.600000000000001, 2.5, 12.2, -14.3, 2.8,
0.1, 0.35, 0.499999999999995, -0.2, 0.3, 1.8, -10.4, 0.700000000000001,
0.85, 0.550000000000001, 0.599999999999999, 0, 0.1, 0, 0.1, 0.1,
-0.2, 0.399999999999999, 0.1, 0, 0, 0, -0.3, 0.45, 0, 0.550000000000001,
0.999999999999998, 0.899999999999998, 2.1, 0.499999999999999,
0, -0.1, -0.8, 0, 0.4, -0.35, 0.505, 0, -1.3, 1.175, 0, 0, -0.0399999999999998,
0, 0.0150000000000011, 0, 0, 5.03, 0.88, 0, 2.405, 0, 0, 0, 0.4,
0, 0.344999999999995, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0.0849992036819458,
0, 0, 0, 0, 0.964999198913574, 0, 0, 0, 0, 0, -0.455000996589661,
0, 0, 0, 0.549998879432678, 0, -0.510001182556152, 0, 0, 0, 0.499999523162842,
0, 0.499999523162842, 3.41, 0, 3.440002, -0.06, 0, 0.024998,
0.009999), .Dim = c(217L, 4L), .Dimnames = list(NULL, c("MLF",
"MRO", "DFR", "Target")), .Tsp = c(2002, 2020, 12), class = c("mts",
"ts", "matrix"))
colors = c("#00366C", "#909800" , "#79ABE2", "#E16A86")
and this is the code that produced the graph above:
plot(targ_plot, plot.type = "single", ylab = "" , xlab = "" , main = "Target", col = colors, lwd = 2, xaxt = "n")
axis(1, at = seq(2002, 2020, 1), labels = seq(2002, 2020, 1))
legend("bottomright", colnames(targ_plot), col = colors, lwd = 3, bty = "n")
abline(h = 0, col = "black", lty = 2)
rect(2008,16,2009,-17, col= rgb(0,0,1, alpha=0.5), border = FALSE)
As you can see the rectangle is above the lines, I want it to be below the lines and to be gray possibly. I have been looking for similar things everywhere. I couldn't solve the problem.
Can you help me sort this out?
Thanks
Steps:
Create a blank plotting region by plot(..., type = "n").
Draw the rectangle.
Set the graphical parameter new by par(new = T).
Run your plotting code.
Reset graphical parameters.
# (1)
plot(targ_plot, type = "n", plot.type = "single", ylab = "", xlab = "", main = "Target", col = colors, lwd = 2, xaxt = "n")
# (2)
rect(2008, 16, 2009, -17, col = gray(0.5, alpha = 0.5), border = FALSE)
# (3)
op <- par(new = T)
# (4)
plot(targ_plot, plot.type = "single", ylab = "", xlab = "", main = "Target", col = colors, lwd = 2, xaxt = "n")
axis(1, at = seq(2002, 2020, 1), labels = seq(2002, 2020, 1))
legend("bottomright", colnames(targ_plot), col = colors, lwd = 3, bty = "n")
abline(h = 0, col = "black", lty = 2)
# (5)
par(op)

Error in calculating VIF (Variance Inflation Factor)

I am getting the following error when calculating VIF on a small dataset in Rstudio. Could anyone help? I can provide more information on the dataset if needed.
"Error in as.vector(y) - mean(y) non-numeric argument to binary
operator".
Dataset: 80 obs. and 15 variables (all variables are numeric)
Steps Followed:
# 1. Determine correlation
library(corrplot)
cor.data <- cor(train)
corrplot(cor.data, method = 'color')
cor.data
# 2. Build Model
model2 <- lm(Volume~., train)
summary(model2)
# 3. Calculate VIF
library(VIF)
vif(model2)
Here is a sample dataset with 20 obs.
train <- structure(list(Price = c(949, 2249.99, 399, 409.99, 1079.99,
114.22, 379.99, 65.29, 119.99, 16.99, 6.55, 15, 52.5, 21.08,
18.98, 3.6, 3.6, 174.99, 9.99, 670), X.5.Star.Reviews. = c(3,
2, 3, 49, 58, 83, 11, 33, 16, 10, 21, 75, 10, 313, 349, 8, 11,
170, 15, 20), X.4.Star.Reviews. = c(3, 1, 0, 19, 31, 30, 3, 19,
9, 1, 2, 25, 8, 62, 118, 6, 5, 100, 12, 2), X.3.Star.Reviews. = c(2,
0, 0, 8, 11, 10, 0, 12, 2, 1, 2, 6, 5, 13, 27, 3, 2, 23, 4, 4
), X.2.Star.Reviews. = c(0, 0, 0, 3, 7, 9, 0, 5, 0, 0, 4, 3,
0, 8, 7, 2, 2, 20, 0, 2), X.1.Star.Reviews. = c(0, 0, 0, 9, 36,
40, 1, 9, 2, 0, 15, 3, 1, 16, 5, 1, 1, 20, 4, 4), X.Positive.Service.Review. = c(2,
1, 1, 7, 7, 12, 3, 5, 2, 2, 2, 9, 2, 44, 57, 0, 0, 310, 3, 4),
X.Negative.Service.Review. = c(0, 0, 0, 8, 20, 5, 0, 3, 1,
0, 1, 2, 0, 3, 3, 0, 0, 6, 1, 3), X.Would.consumer.recommend.product. = c(0.9,
0.9, 0.9, 0.8, 0.7, 0.3, 0.9, 0.7, 0.8, 0.9, 0.5, 0.2, 0.8,
0.9, 0.9, 0.8, 0.8, 0.8, 0.8, 0.7), X.Shipping.Weight..lbs.. = c(25.8,
50, 17.4, 5.7, 7, 1.6, 7.3, 12, 1.8, 0.75, 1, 2.2, 1.1, 0.35,
0.6, 0.01, 0.01, 1.4, 0.4, 0.25), X.Product.Depth. = c(23.94,
35, 10.5, 15, 12.9, 5.8, 6.7, 7.9, 10.6, 10.7, 7.3, 21.3,
15.6, 5.7, 1.7, 11.5, 11.5, 13.8, 11.1, 5.8), X.Product.Width. = c(6.62,
31.75, 8.3, 9.9, 0.3, 4, 10.3, 6.7, 9.4, 13.1, 7, 1.8, 3,
3.5, 13.5, 8.5, 8.5, 8.2, 7.6, 1.4), X.Product.Height. = c(16.89,
19, 10.2, 1.3, 8.9, 1, 11.5, 2.2, 4.7, 0.6, 1.6, 7.8, 15,
8.3, 10.2, 0.4, 0.4, 0.4, 0.5, 7.8), X.Profit.margin. = c(0.15,
0.25, 0.08, 0.08, 0.09, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05,
0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.15), Volume = c(12,
8, 12, 196, 232, 332, 44, 132, 64, 40, 84, 300, 40, 1252,
1396, 32, 44, 680, 60, 80)), .Names = c("Price", "X.5.Star.Reviews.",
"X.4.Star.Reviews.", "X.3.Star.Reviews.", "X.2.Star.Reviews.",
"X.1.Star.Reviews.", "X.Positive.Service.Review.", "X.Negative.Service.Review.",
"X.Would.consumer.recommend.product.", "X.Shipping.Weight..lbs..",
"X.Product.Depth.", "X.Product.Width.", "X.Product.Height.",
"X.Profit.margin.", "Volume"), row.names = c(NA, 20L), class = "data.frame")
The vif function from the VIF package does not estimates the Variance Inflation Factor(VIF). "It selects variables for a linear model" and "returns a subset of variables for building a linear model."; see here for the description.
What you want is the vif function from the car package.
install.packages("car")
library(car)
vif(model2) # This should do it
Edit: I won't comment specifically on the statistics side, but it seems like you have a perfect fit, something quite unusual, suggesting some problem in your data.
You're giving vif the wrong input. It wants the response y and predictor variables x:
vif(train$Volume,subset(train,select=-Volume),subsize=19)
I had to set the subsize argument to a value <= the number of observations (the default is 200).
There are 2 R libraries "car" and "VIF" which have the same function vif() defined differently. Your result/error depends on which package you have loaded in your current session.
If you use "VIF" library in the session and pass the linear model as parameter to the vif() function then you will get the error given in the initial query, as shown below:
> model1 = lm(Satisfaction~., data1)
> library(VIF)
Attaching package: ‘VIF’
The following object is masked from ‘package:car’:
vif
> vif(model1)
Error in as.vector(y) - mean(y) : non-numeric argument to binary operator
In addition: Warning message:
In mean.default(y) : argument is not numeric or logical: returning NA
If you load "car" library in R session and not "VIF", then you will get the vif numbers as expected for a linear model as shown below:
> model1 = lm(Satisfaction~., data1)
> library(car)
Loading required package: carData
Attaching package: ‘car’
The following object is masked from ‘package:psych’:
logit
> vif(model1)
ProdQual Ecom TechSup CompRes Advertising ProdLine SalesFImage ComPricing
1.635797 2.756694 2.976796 4.730448 1.508933 3.488185 3.439420 1.635000
WartyClaim OrdBilling DelSpeed
3.198337 2.902999 6.516014
All the columns in data1 are numeric. Hope that helps

Multiple stat_function on grouped data with ggplot2

I am studying a data set with multiple observation of a parameter overtime. the data is like:
test<-data.frame(t = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.33, 0.33, 0.33, 0.33, 0.33, 0.33, 0.33, 0.33, 0.33, 0.33, 0.33, 0.33, 0.33, 0.33, 0.33, 0.33, 0.33, 0.67, 0.67, 0.67, 0.67, 0.67, 0.67, 0.67, 0.67, 0.67, 0.67, 0.67, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1.33, 1.33, 1.33, 1.33, 1.33, 1.33, 1.33, 1.33, 1.33, 1.33, 1.33, 1.33, 1.33, 1.33, 1.33, 1.33, 1.67, 1.67, 1.67, 1.67, 1.67, 1.67, 1.67, 1.67, 1.67, 1.67, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 6, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10), int = c(76.44609375, 94.6619686800895, 112.148907103825, 75.1003097802036, 74.1037037037037, 76.7526662128432, 74.0734830988873, 87.9052100068855, 81.0525931336742, 92.1907873244038, 84.0708929788684, 88.8232221775814, 98.1323678006063, 115.175322139789, 91.2653104925053, 76.3661620658949, 152.637799717913, 107.054702135631, 83.4693197755961, 91.658991910392, 81.3991787335206, 106.153762268266, 100.919789842382, 67.2119436084271, 137.558914728682, 89.1182608695652, 156.10352233677, 108.180911207183, 87.9794680354643, 77.7501400560224, 80.7675382653061, 95.6662793399954, 92.5649630541872, 88.3301402668491, 84.3891875746714, 76.4318673395818, 111.413893510815, 82.4753828420879, 119.099190283401, 192.539417212559, 208.49203187251, 106.919937512205, 105.370936371214, 180.028767711464, 130.29369773608, 170.193357597816, 172.703180212014, 178.061569518042, 182.097607918614, 227.066976984743, 153.856101031661, 432.991580916745, 299.143735224586, 144.118156808803, 396.36644895153, 334.538796516231, 350.186359610275, 200.781101530882, 279.866079790223, 122.542700519331, 235.199555308505, 204.924140655867, 229.181848967152, 225.542753383955, 468.308974987739, 269.306058221873, 229.969282013323, 255.553846153846, 621.021220159151, 255.017211703959, 396.658265826583, 273.300663227708, 232.449965010497, 303.343894502483, 276.952483801296, 327.419805194805, 241.136864249474, 457.961489497136, 498.901714285714, 280.9558101473, 322.089588377724, 386.754533152909, 364.356809338521, 340.416035518412, 428.482916666667, 668.447197400487, 387.671341748481, 471.049545829893, 255.8802020688, 361.979536152797, 192.224629418472, 284.088954468803, 170.763997760358, 237.869065100343, 365.08237271854, 294.266488413547, 718.279750479846, 211.599427030671, 294.045375597047, 207.099267015707, 194.209973045822, 251.306358381503, 190.786794766966, 400.396083385976, 183.133240482823, 130.442107867392, 167.231452991453, 345.110896351776, 299.304645622394, 192.078204692282, 121.273544841369, 153.996295438759, 97.6034616378197, 362.80049522462, 130.498551774077, 106.031656035908, 117.682936668011, 90.1247837370242, 140.855475040258, 169.050049067713, 244.290241606527, 120.603356419819, 173.413333333333, 125.896389002872, 206.543873212215, 186.668320340184, 85.0988108720272, 106.57849117175, 102.867232728676, 216.232957110609, 86.6538461538462, 149.459777852575, 212.498573059361, 93.3816390633923, 105.567730417318, 120.095470383275, 137.205696941396, 141.156985871272, 90.578857338351, 84.8457760314342, 127.092660685395, 136.859870967742, 188.406440382942, 86.0879705400982))
class(test)
I managed to plot the density for each time point using:
ggplot(test, aes(int, group = as.factor(t),colour=t))+ geom_density()
But I would like to do the same graph but instead of the density I would like to plot a log normal fit of the density.
I know how to plot the lognormal fitting on one time point using fitdistr and passing parameter to stat_function whit this code
library(MASS)
fit <- fitdistr(subset(test, t == 0,select='int')$int, "lognormal")
ggplot(data=subset(test, t == 0,select='int'), aes(x=int)) +stat_function(fun = dlnorm,args = list(mean = fit$estimate[1], sd = fit$estimate[2]))
But how can I do it for all t with the colour of the line being given by the value of t is it possible to provide a function in the args list?
I thought of another naive solution: Predicting the values of every dlnorm().
## Split up the data according to t
tt <- split(test, test$t)
## Fit a lognormal to every dataset
fits <- lapply(tt, function(x) fitdistr(x$int, "lognormal"))
## Predict values
fitted <- lapply(fits, function(x) dlnorm(x = 1:max(test$int),
mean = x$estimate[1], sd = x$estimate[2]))
## Wrap everything into a data.frame ggplot can handle
plot.data <- data.frame(y = unlist(fitted), int = 1:max(test$int),
t = rep(unique(test$t),
each = length(unlist(fitted))/length(unique(test$t))))
## Plot
ggplot(test, aes(int, group = as.factor(t), colour=t)) +
#geom_density() +
geom_line(data = plot.data, aes(y = y), lwd = 1)
What about a naive solution, adding iteratively stat_function()?
cols <- brewer.pal(length(unique(test$t)),"Set1")
g <- ggplot(data=subset(test, t == 0, select='int'), aes(x=int))
n <- 1
for(i in unique(test$t)){
fit <- fitdistr(subset(test, t == i, select='int')$int, "lognormal")
g <- g+stat_function(fun = dlnorm,
args=list(mean=fit$estimate[1],sd=fit$estimate[2]),
col=cols[n])
n <- n + 1
}
g

Analytical gradient for bisection method nested within objective function

I'm attempting to fit parameters to a data set using optim() in R. The objective function requires iterative root-solving for equation G so that the predicted values p brings the values for G (nested within the objective function) to 0 (or as close as 0 to possible; I use 50 iterations of the bisection method for stability).
Here is the problem: I would really prefer to include an analytical gradient for optim(), but I suspect it isn't possible for an iterated function. However, before I give up on the analytical gradient, I wanted to run this problem by everyone here and see if there might be a solution I'm overlooking. Any thoughts?
Note: before settling on the bisection method, I tried other root-solving methods, but all non-bracketing methods (Newton, etc.) seem to be unstable.
Below is a reproducible example of the problem. With the provided data set and the starting values for optim(), the algorithm converges just fine without an analytical gradient, but it doesn't perform so well for other sets of data and starting values.
#the data set includes two input variables (x1 and x2)
#the response values are k successes out of n trials
x1=c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1.5, 1.5, 1.5, 1.5, 1.75, 1.75, 1.75, 1.75, 2, 2,
2, 2, 2.25, 2.25, 2.25, 2.25, 2.5, 2.5, 2.5, 2.5, 2.75, 2.75,
2.75, 2.75, 3, 3, 3, 3, 3.25, 3.25, 3.25, 3.25, 3.5, 3.5, 3.5,
3.5, 3.75, 3.75, 3.75, 3.75, 4, 4, 4, 4, 4.25, 4.25, 4.25, 4.25,
4.5, 4.5, 4.5, 4.5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1.5, 1.5, 1.5, 1.5,
1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5,
1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.75, 1.75,
1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75,
1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75,
1.75, 1.75, 1.75, 1.75, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2.25, 2.25, 2.25,
2.25, 2.25, 2.25, 2.25, 2.25, 2.25, 2.25, 2.25, 2.25, 2.25, 2.25,
2.25, 2.25, 2.25, 2.25, 2.25, 2.25, 2.25, 2.25, 2.25, 2.25, 2.25,
2.25, 2.25, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5,
2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5,
2.5, 2.5, 2.5, 2.5, 2.5, 2.75, 2.75, 2.75, 2.75, 2.75, 2.75,
2.75, 2.75, 2.75, 2.75, 2.75, 2.75, 2.75, 2.75, 2.75, 2.75, 2.75,
2.75, 2.75, 2.75, 2.75, 2.75, 2.75, 2.75, 2.75, 2.75, 2.75, 2.75,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3.25, 3.25, 3.25, 3.25, 3.25, 3.25, 3.25, 3.25,
3.25, 3.25, 3.25, 3.25, 3.25, 3.25, 3.25, 3.25, 3.25, 3.25, 3.25,
3.25, 3.25, 3.25, 3.25, 3.25, 3.25, 3.25, 3.25, 3.25, 3.5, 3.5,
3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5,
3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5,
3.75, 3.75, 3.75, 3.75, 3.75, 3.75, 3.75, 3.75, 3.75, 3.75, 3.75,
3.75, 3.75, 3.75, 3.75, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4.25, 4.25, 4.25, 4.25, 4.25, 4.25, 4.25,
4.25, 4.25, 4.25, 4.25, 4.25, 4.25, 4.25, 4.25, 4.25, 4.25, 4.25,
4.25, 4.25, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5,
4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5)
x2=c(0, 0, 0, 0, 0, 0, 0, 0, 0.1, 0.1, 0.1, 0.1, 0.15, 0.15, 0.15,
0.15, 0.2, 0.2, 0.2, 0.2, 0.233, 0.233, 0.233, 0.267, 0.267,
0.267, 0.267, 0.3, 0.3, 0.3, 0.3, 0.333, 0.333, 0.333, 0.333,
0.367, 0.367, 0.367, 0.367, 0.4, 0.4, 0.4, 0.4, 0.433, 0.433,
0.433, 0.433, 0.467, 0.467, 0.467, 0.5, 0.5, 0.5, 0.5, 0.55,
0.55, 0.55, 0.55, 0.6, 0.6, 0.6, 0.6, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.2, 0.2, 0.2, 0.267,
0.267, 0.267, 0.267, 0.333, 0.333, 0.333, 0.333, 0.4, 0.4, 0.4,
0.4, 0.467, 0.467, 0.467, 0.467, 0.55, 0.55, 0.55, 0.55, 0.15,
0.15, 0.15, 0.15, 0.233, 0.233, 0.233, 0.233, 0.3, 0.3, 0.3,
0.3, 0.367, 0.367, 0.367, 0.367, 0.433, 0.433, 0.433, 0.433,
0.5, 0.5, 0.5, 0.6, 0.6, 0.6, 0.6, 0.1, 0.1, 0.1, 0.1, 0.2, 0.2,
0.2, 0.2, 0.267, 0.267, 0.267, 0.267, 0.333, 0.333, 0.333, 0.333,
0.4, 0.4, 0.4, 0.4, 0.467, 0.467, 0.467, 0.467, 0.55, 0.55, 0.55,
0.55, 0.15, 0.15, 0.15, 0.15, 0.233, 0.233, 0.233, 0.233, 0.3,
0.3, 0.3, 0.3, 0.367, 0.367, 0.367, 0.367, 0.433, 0.433, 0.433,
0.433, 0.5, 0.5, 0.5, 0.5, 0.6, 0.6, 0.6, 0.6, 0.1, 0.1, 0.1,
0.1, 0.2, 0.2, 0.2, 0.267, 0.267, 0.267, 0.267, 0.333, 0.333,
0.333, 0.333, 0.4, 0.4, 0.4, 0.4, 0.467, 0.467, 0.467, 0.467,
0.55, 0.55, 0.55, 0.55, 0.15, 0.15, 0.15, 0.15, 0.233, 0.233,
0.233, 0.233, 0.3, 0.3, 0.3, 0.3, 0.367, 0.367, 0.367, 0.367,
0.433, 0.433, 0.433, 0.433, 0.5, 0.5, 0.5, 0.5, 0.6, 0.6, 0.6,
0.6, 0.1, 0.1, 0.1, 0.1, 0.2, 0.2, 0.2, 0.2, 0.267, 0.267, 0.267,
0.267, 0.333, 0.333, 0.333, 0.333, 0.4, 0.4, 0.4, 0.4, 0.467,
0.467, 0.467, 0.467, 0.55, 0.55, 0.55, 0.55, 0.15, 0.15, 0.15,
0.15, 0.233, 0.233, 0.233, 0.3, 0.3, 0.3, 0.3, 0.367, 0.367,
0.367, 0.367, 0.433, 0.433, 0.433, 0.433, 0.5, 0.5, 0.5, 0.6,
0.6, 0.6, 0.6, 0.1, 0.1, 0.1, 0.1, 0.2, 0.2, 0.2, 0.2, 0.267,
0.267, 0.267, 0.267, 0.333, 0.333, 0.333, 0.333, 0.4, 0.4, 0.4,
0.4, 0.467, 0.467, 0.467, 0.467, 0.55, 0.55, 0.55, 0.55, 0.15,
0.15, 0.15, 0.15, 0.233, 0.233, 0.233, 0.233, 0.3, 0.3, 0.3,
0.3, 0.367, 0.367, 0.367, 0.367, 0.433, 0.433, 0.433, 0.433,
0.5, 0.5, 0.5, 0.5, 0.6, 0.6, 0.6, 0.6, 0.1, 0.1, 0.1, 0.1, 0.2,
0.2, 0.2, 0.2, 0.267, 0.267, 0.267, 0.267, 0.333, 0.333, 0.333,
0.15, 0.15, 0.15, 0.15, 0.233, 0.233, 0.233, 0.233, 0.3, 0.3,
0.3, 0.3, 0.367, 0.367, 0.367, 0.367, 0.433, 0.433, 0.433, 0.433,
0.1, 0.1, 0.1, 0.1, 0.2, 0.2, 0.2, 0.2, 0.267, 0.267, 0.267,
0.267, 0.333, 0.333, 0.333, 0.333, 0.4, 0.4, 0.4, 0.4, 0.15,
0.15, 0.15, 0.15, 0.233, 0.233, 0.233, 0.233, 0.3, 0.3, 0.3,
0.3, 0.367, 0.367, 0.367, 0.367, 0.433, 0.433, 0.433, 0.433)
k=c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 1, 3,
3, 3, 3, 3, 3, 4, 2, 5, 3, 4, 7, 8, 5, 4, 5, 5, 4, 5, 5, 5, 6,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 2, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 3, 2, 4, 1, 2,
3, 4, 2, 2, 4, 4, 3, 1, 2, 0, 3, 4, 5, 5, 0, 0, 0, 0, 0, 0, 1,
0, 0, 1, 1, 2, 1, 2, 2, 0, 3, 1, 0, 2, 4, 6, 5, 5, 4, 5, 5, 5,
1, 0, 0, 0, 2, 1, 0, 1, 3, 2, 1, 1, 3, 4, 3, 4, 5, 5, 5, 5, 8,
6, 7, 6, 6, 5, 7, 0, 0, 0, 0, 2, 1, 0, 0, 0, 0, 2, 1, 1, 3, 3,
2, 1, 3, 6, 2, 5, 3, 3, 5, 6, 5, 5, 5, 1, 0, 1, 1, 2, 1, 1, 1,
3, 4, 2, 5, 5, 3, 4, 4, 6, 4, 6, 5, 6, 5, 5, 5, 5, 4, 5, 5, 0,
0, 0, 0, 0, 2, 0, 2, 3, 3, 3, 2, 3, 3, 1, 4, 4, 4, 4, 3, 5, 6,
5, 5, 5, 5, 5, 1, 4, 1, 2, 2, 3, 4, 2, 5, 5, 5, 5, 5, 4, 5, 7,
6, 7, 6, 5, 5, 5, 7, 5, 5, 5, 5, 5, 0, 1, 0, 0, 3, 2, 3, 3, 1,
2, 2, 2, 4, 2, 3, 2, 5, 5, 5, 5, 4, 6, 5, 6, 5, 5, 6, 5, 3, 5,
2, 4, 5, 3, 5, 5, 6, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 0, 0, 2, 0, 3, 2, 3, 2, 3, 4, 3, 4, 5, 5, 5, 5, 6, 4,
6, 4, 5, 7, 5, 5, 5, 6, 5, 5, 2, 3, 4, 4, 4, 4, 5, 5, 5, 6, 5,
5, 5, 5, 5, 4, 6, 5, 5, 5, 6, 5, 5, 5, 5, 5, 5, 5, 1, 0, 2, 0,
3, 5, 2, 2, 4, 5, 4, 5, 6, 6, 4, 5, 4, 5, 4, 5, 5, 5, 5, 5, 5,
6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 4, 1, 4, 4, 4, 4, 4, 3, 6, 5,
4, 3, 5, 4, 5, 6, 6, 5, 6, 5, 4, 5, 5, 5, 6, 5, 5, 5, 11, 5,
12, 5, 5, 5, 5, 4, 5, 5, 5)
n=c(5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 6, 5, 5, 5, 5, 6, 5, 5, 5, 5, 5, 5, 5,
6, 5, 6, 5, 5, 5, 5, 7, 5, 6, 8, 8, 6, 5, 6, 5, 5, 5, 5, 5, 6,
5, 5, 5, 5, 7, 11, 8, 7, 5, 5, 5, 5, 7, 5, 5, 5, 5, 5, 5, 5,
4, 5, 5, 5, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 4, 5, 5, 5, 6, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 6, 6, 5, 5, 5, 5, 6, 5, 5, 5, 5, 5, 7, 6, 7, 6, 5, 5, 5, 5,
5, 5, 5, 5, 5, 6, 5, 6, 6, 5, 5, 5, 5, 5, 6, 5, 5, 5, 5, 5, 5,
8, 6, 7, 6, 6, 5, 7, 5, 5, 5, 5, 6, 5, 5, 5, 7, 7, 6, 5, 6, 5,
5, 5, 5, 6, 6, 4, 6, 6, 5, 5, 6, 6, 5, 5, 5, 5, 5, 5, 7, 5, 5,
4, 5, 5, 5, 5, 5, 5, 5, 5, 6, 4, 6, 5, 6, 5, 5, 5, 5, 4, 5, 5,
5, 5, 6, 6, 5, 6, 5, 4, 5, 6, 5, 5, 5, 5, 5, 5, 5, 5, 6, 5, 5,
6, 5, 5, 5, 5, 5, 5, 6, 5, 6, 7, 4, 6, 5, 5, 5, 5, 5, 5, 4, 5,
7, 6, 7, 6, 5, 5, 5, 7, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 5,
5, 5, 5, 5, 5, 4, 5, 6, 5, 5, 5, 5, 5, 7, 5, 6, 5, 5, 6, 5, 5,
5, 5, 5, 5, 5, 5, 6, 6, 5, 5, 5, 5, 5, 6, 6, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6,
5, 6, 5, 6, 7, 5, 5, 5, 6, 5, 5, 4, 5, 5, 5, 5, 6, 5, 5, 5, 6,
5, 5, 5, 5, 5, 5, 6, 5, 5, 5, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 4, 5, 5, 5, 5, 5, 5, 5, 7, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6,
5, 5, 5, 5, 5, 5, 6, 6, 5, 6, 5, 5, 5, 5, 5, 6, 5, 5, 5, 11,
5, 12, 5, 5, 5, 5, 4, 5, 5, 5)
#low_high contains the lower and upper bounds for the bisection method
low_high=vector("list",2)
low_high[["low"]]=rep(0,length(x1))
low_high[["high"]]=rep(1,length(x1))
low_high_list=rep(list(low_high),50)
ll=function(theta)
{
names(theta)=c("b1","m1","b2","m2")
b1=theta[["b1"]]
m1=theta[["m1"]]
b2=theta[["b2"]]
m2=theta[["m2"]]
#bisection function is used to find y which makes G=0
bisection_function=function(prv,nxt)
{
low_high=prv
#G and y are both vectors of the length of the data set (in this example, 469)
y=(low_high[["low"]]+low_high[["high"]])/2
G=-1+(x1/((log(-y/(y-1))-b1)/m1))+(x2/((log(-y/(y-1))-b2)/m2))
low_high[["low"]][G>0]=y[G>0]
low_high[["high"]][G<0]=y[G<0]
return(low_high)
}
#Reduce is the fastest method I've found so far
#(in other words, if there is a better way, I'm certainly open to suggestions!)
low_high=Reduce(bisection_function,low_high_list)
p=(low_high[["low"]]+low_high[["high"]])/2
#sum of log likelihood for binomial distribution
loglik=sum(log((gamma(1+n)/(gamma(1+k)*(gamma(1+n-k))))*((p^k)*((1-p)^(n-k)))))
return(loglik)
}
theta.start=c(b1=-10,m1=10,b2=-10,m2=10)
mle=optim(theta.start,ll,control=list(fnscale=-1),hessian=TRUE)
Thanks!!
Using Vincent's suggestions, I was able to supply an analytic gradient via implicit differentiation. In case anyone else has a similar problem, I have included reproducible code below (to be added after the code included in the question).
Gexpression=parse(text="-1+(x1/((log(-p/(p-1))-b1)/m1))+(x2/((log(-p/(p-1))-b2)/m2))")
nested=function(theta)
{
names(theta)=c("b1","m1","b2","m2")
b1=theta[["b1"]]
m1=theta[["m1"]]
b2=theta[["b2"]]
m2=theta[["m2"]]
#bisection function is used to find y which makes G=0
bisection_function=function(prv,nxt)
{
low_high=prv
#G and y are both vectors of the length of the data set (in this example, 469)
y=(low_high[["low"]]+low_high[["high"]])/2
G=-1+(x1/((log(-y/(y-1))-b1)/m1))+(x2/((log(-y/(y-1))-b2)/m2))
low_high[["low"]][G>0]=y[G>0]
low_high[["high"]][G<0]=y[G<0]
return(low_high)
}
low_high=Reduce(bisection_function,low_high_list)
p=(low_high[["low"]]+low_high[["high"]])/2
return(p)
}
gr=function(theta)
{
names(theta)=c("b1","m1","b2","m2")
b1=theta[["b1"]]
m1=theta[["m1"]]
b2=theta[["b2"]]
m2=theta[["m2"]]
p=nested(theta)
# dll is the derivative of the loglik function, which takes the partial derivative
# of any parameter
dll=function(d_any) (((k / p) * d_any) - (((n - k) / (1 - p))*d_any))
#fd_any takes the partial derivative of the with respect to any parameter
fd_any=function(any) eval(parse(text=paste("-((",as.character(list(D(Gexpression,any))),")/(",as.character(list(D(Gexpression,'p'))),"))",sep="")))
DLb1=dll(fd_any("b1"))
DLb2=dll(fd_any("b2"))
DLm1=dll(fd_any("m1"))
DLm2=dll(fd_any("m2"))
DLb1[is.na(DLb1)]=0
DLb2[is.na(DLb2)]=0
DLm1[is.na(DLm1)]=0
DLm2[is.na(DLm2)]=0
colSums(cbind(b1=DLb1,m1=DLm1,b2=DLb2,m2=DLm2))
}
hs=function(theta)
{
names(theta)=c("b1","m1","b2","m2")
b1=theta[["b1"]]
m1=theta[["m1"]]
b2=theta[["b2"]]
m2=theta[["m2"]]
p=nested(theta)
fd_any_fun=function(any) paste("(-((",as.character(list(D(Gexpression,any))),")/(",as.character(list(D(Gexpression,'p'))),")))",sep="")
dll_fun=function(d_any_fun) paste("((k / p) * (",d_any_fun,")) - (((n - k) / (1 - p))*(",d_any_fun,"))",sep="")
hb1b1=eval(parse(text=D(parse(text=dll_fun(fd_any_fun("b1"))),"b1")))
hb1m1=eval(parse(text=D(parse(text=dll_fun(fd_any_fun("b1"))),"m1")))
hb1b2=eval(parse(text=D(parse(text=dll_fun(fd_any_fun("b1"))),"b2")))
hb1m2=eval(parse(text=D(parse(text=dll_fun(fd_any_fun("b1"))),"m2")))
hb1b1[is.na(hb1b1)]=0
hb1m1[is.na(hb1m1)]=0
hb1b2[is.na(hb1b2)]=0
hb1m2[is.na(hb1m2)]=0
hb1b1=sum(hb1b1)
hb1m1=sum(hb1m1)
hb1b2=sum(hb1b2)
hb1m2=sum(hb1m2)
h1=c(hb1b1,hb1m1,hb1b2,hb1m2)
hm1b1=eval(parse(text=D(parse(text=dll_fun(fd_any_fun("m1"))),"b1")))
hm1m1=eval(parse(text=D(parse(text=dll_fun(fd_any_fun("m1"))),"m1")))
hm1b2=eval(parse(text=D(parse(text=dll_fun(fd_any_fun("m1"))),"b2")))
hm1m2=eval(parse(text=D(parse(text=dll_fun(fd_any_fun("m1"))),"m2")))
hm1b1[is.na(hm1b1)]=0
hm1m1[is.na(hm1m1)]=0
hm1b2[is.na(hm1b2)]=0
hm1m2[is.na(hm1m2)]=0
hm1b1=sum(hm1b1)
hm1m1=sum(hm1m1)
hm1b2=sum(hm1b2)
hm1m2=sum(hm1m2)
h2=c(hm1b1,hm1m1,hm1b2,hm1m2)
hb2b1=eval(parse(text=D(parse(text=dll_fun(fd_any_fun("b2"))),"b1")))
hb2m1=eval(parse(text=D(parse(text=dll_fun(fd_any_fun("b2"))),"m1")))
hb2b2=eval(parse(text=D(parse(text=dll_fun(fd_any_fun("b2"))),"b2")))
hb2m2=eval(parse(text=D(parse(text=dll_fun(fd_any_fun("b2"))),"m2")))
hb2b1[is.na(hb2b1)]=0
hb2m1[is.na(hb2m1)]=0
hb2b2[is.na(hb2b2)]=0
hb2m2[is.na(hb2m2)]=0
hb2b1=sum(hb2b1)
hb2m1=sum(hb2m1)
hb2b2=sum(hb2b2)
hb2m2=sum(hb2m2)
h3=c(hb2b1,hb2m1,hb2b2,hb2m2)
hm2b1=eval(parse(text=D(parse(text=dll_fun(fd_any_fun("m2"))),"b1")))
hm2m1=eval(parse(text=D(parse(text=dll_fun(fd_any_fun("m2"))),"m1")))
hm2b2=eval(parse(text=D(parse(text=dll_fun(fd_any_fun("m2"))),"b2")))
hm2m2=eval(parse(text=D(parse(text=dll_fun(fd_any_fun("m2"))),"m2")))
hm2b1[is.na(hm2b1)]=0
hm2m1[is.na(hm2m1)]=0
hm2b2[is.na(hm2b2)]=0
hm2m2[is.na(hm2m2)]=0
hm2b1=sum(hm2b1)
hm2m1=sum(hm2m1)
hm2b2=sum(hm2b2)
hm2m2=sum(hm2m2)
h4=c(hm2b1,hm2m1,hm2b2,hm2m2)
h=rbind(h1,h2,h3,h4)
return(h)
}
The gradient seems to work fine. For some reason, the estimated Hessian matrix from optim() is different than the gradient calculated in hs(). The resulting standard errors are of the same order of magnitude, at least:
# Standard errors from optim Hessian
sqrt(abs(diag(solve(mle$hessian))))
# Standard errors from analytic Hessian
sqrt(abs(diag(solve(hs(mle$par)))))

Resources