Multiple stat_function on grouped data with ggplot2 - r

I am studying a data set with multiple observation of a parameter overtime. the data is like:
test<-data.frame(t = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.33, 0.33, 0.33, 0.33, 0.33, 0.33, 0.33, 0.33, 0.33, 0.33, 0.33, 0.33, 0.33, 0.33, 0.33, 0.33, 0.33, 0.67, 0.67, 0.67, 0.67, 0.67, 0.67, 0.67, 0.67, 0.67, 0.67, 0.67, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1.33, 1.33, 1.33, 1.33, 1.33, 1.33, 1.33, 1.33, 1.33, 1.33, 1.33, 1.33, 1.33, 1.33, 1.33, 1.33, 1.67, 1.67, 1.67, 1.67, 1.67, 1.67, 1.67, 1.67, 1.67, 1.67, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 6, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10), int = c(76.44609375, 94.6619686800895, 112.148907103825, 75.1003097802036, 74.1037037037037, 76.7526662128432, 74.0734830988873, 87.9052100068855, 81.0525931336742, 92.1907873244038, 84.0708929788684, 88.8232221775814, 98.1323678006063, 115.175322139789, 91.2653104925053, 76.3661620658949, 152.637799717913, 107.054702135631, 83.4693197755961, 91.658991910392, 81.3991787335206, 106.153762268266, 100.919789842382, 67.2119436084271, 137.558914728682, 89.1182608695652, 156.10352233677, 108.180911207183, 87.9794680354643, 77.7501400560224, 80.7675382653061, 95.6662793399954, 92.5649630541872, 88.3301402668491, 84.3891875746714, 76.4318673395818, 111.413893510815, 82.4753828420879, 119.099190283401, 192.539417212559, 208.49203187251, 106.919937512205, 105.370936371214, 180.028767711464, 130.29369773608, 170.193357597816, 172.703180212014, 178.061569518042, 182.097607918614, 227.066976984743, 153.856101031661, 432.991580916745, 299.143735224586, 144.118156808803, 396.36644895153, 334.538796516231, 350.186359610275, 200.781101530882, 279.866079790223, 122.542700519331, 235.199555308505, 204.924140655867, 229.181848967152, 225.542753383955, 468.308974987739, 269.306058221873, 229.969282013323, 255.553846153846, 621.021220159151, 255.017211703959, 396.658265826583, 273.300663227708, 232.449965010497, 303.343894502483, 276.952483801296, 327.419805194805, 241.136864249474, 457.961489497136, 498.901714285714, 280.9558101473, 322.089588377724, 386.754533152909, 364.356809338521, 340.416035518412, 428.482916666667, 668.447197400487, 387.671341748481, 471.049545829893, 255.8802020688, 361.979536152797, 192.224629418472, 284.088954468803, 170.763997760358, 237.869065100343, 365.08237271854, 294.266488413547, 718.279750479846, 211.599427030671, 294.045375597047, 207.099267015707, 194.209973045822, 251.306358381503, 190.786794766966, 400.396083385976, 183.133240482823, 130.442107867392, 167.231452991453, 345.110896351776, 299.304645622394, 192.078204692282, 121.273544841369, 153.996295438759, 97.6034616378197, 362.80049522462, 130.498551774077, 106.031656035908, 117.682936668011, 90.1247837370242, 140.855475040258, 169.050049067713, 244.290241606527, 120.603356419819, 173.413333333333, 125.896389002872, 206.543873212215, 186.668320340184, 85.0988108720272, 106.57849117175, 102.867232728676, 216.232957110609, 86.6538461538462, 149.459777852575, 212.498573059361, 93.3816390633923, 105.567730417318, 120.095470383275, 137.205696941396, 141.156985871272, 90.578857338351, 84.8457760314342, 127.092660685395, 136.859870967742, 188.406440382942, 86.0879705400982))
class(test)
I managed to plot the density for each time point using:
ggplot(test, aes(int, group = as.factor(t),colour=t))+ geom_density()
But I would like to do the same graph but instead of the density I would like to plot a log normal fit of the density.
I know how to plot the lognormal fitting on one time point using fitdistr and passing parameter to stat_function whit this code
library(MASS)
fit <- fitdistr(subset(test, t == 0,select='int')$int, "lognormal")
ggplot(data=subset(test, t == 0,select='int'), aes(x=int)) +stat_function(fun = dlnorm,args = list(mean = fit$estimate[1], sd = fit$estimate[2]))
But how can I do it for all t with the colour of the line being given by the value of t is it possible to provide a function in the args list?

I thought of another naive solution: Predicting the values of every dlnorm().
## Split up the data according to t
tt <- split(test, test$t)
## Fit a lognormal to every dataset
fits <- lapply(tt, function(x) fitdistr(x$int, "lognormal"))
## Predict values
fitted <- lapply(fits, function(x) dlnorm(x = 1:max(test$int),
mean = x$estimate[1], sd = x$estimate[2]))
## Wrap everything into a data.frame ggplot can handle
plot.data <- data.frame(y = unlist(fitted), int = 1:max(test$int),
t = rep(unique(test$t),
each = length(unlist(fitted))/length(unique(test$t))))
## Plot
ggplot(test, aes(int, group = as.factor(t), colour=t)) +
#geom_density() +
geom_line(data = plot.data, aes(y = y), lwd = 1)

What about a naive solution, adding iteratively stat_function()?
cols <- brewer.pal(length(unique(test$t)),"Set1")
g <- ggplot(data=subset(test, t == 0, select='int'), aes(x=int))
n <- 1
for(i in unique(test$t)){
fit <- fitdistr(subset(test, t == i, select='int')$int, "lognormal")
g <- g+stat_function(fun = dlnorm,
args=list(mean=fit$estimate[1],sd=fit$estimate[2]),
col=cols[n])
n <- n + 1
}
g

Related

How do I change confidence interval calculation to log-log on R?

If I wanted to calculate confidence intervals using the coxph and confinf functions, how do I change the confidence interval calculation to log-log? My understanding is that log is the default.
I tried conf.type="log-log" but it did not work, just got an error message
library(survival)
coxph(formula = Surv(futime, fustat) ~ tx, data = tki, conf.type="log-log")
fit <- coxph(formula = Surv(futime, fustat) ~ tx, data = tki)
summary(fit)
#output provides HR CIs
confint(fit)
#coefficient CIs
exp(confint(fit))
> dput(tki) structure(list(futime = c(9.26, 11.06, 2.35, 3.75, 12.4, 10.3, 8.11, 7.29, 6.75, 6.56, 0.26, 1.9, 0.34, 1.63, 1.55, 1.6, 4.78, 2.65, 1.72, 3.63), fustat = c(1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1), tx = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -20L))

Randomly sampling iteratively from a data frame

I have the following dataframe that simulates a panel data set (i.e., multiple waves per unit).
dat <- structure(list(x = c(-0.32, -0.26, 0.05, -0.37, -0.37, -0.08,
-0.01, 0.05, 0.19, -0.48, 0.37, 0.05, -0.58, -0.18, -0.04, -0.28,
-0.44, -0.48, 1.05, 0.62, 0.85, 0.42, 0.7, 0.64, -0.19, -0.11,
-0.65, -0.01, 0.39, -0.02, -0.23, -0.6, -0.1, 0.39, 0.33, 0.39,
-0.09, -0.16, 0.26, -0.62, -0.44, -0.6, -0.17, -0.27, -0.12,
-0.53, -0.38, -0.33, -0.17, -0.11, -0.25, -0.92, -0.6, -0.81,
0.75, 0.52, 0.57, 1.32, 1.21, 1.21), y = c(-0.42, -2.01, -1.19,
0.7, 1.28, 1.37, 0.52, 2.04, 2.34, -1.45, 2.84, 0.1, -3.12, 0.22,
-0.06, -1.65, -0.9, -1.5, -0.98, -0.69, 0.15, 1.7, 1.47, 0.15,
0.26, 0.84, 0.35, 0.86, -1.23, -0.74, -1.79, -0.56, -2.15, 2.11,
2.34, 0.57, 0.38, 0.57, 0.97, 0.32, -1.71, -0.8, 1.45, -0.12,
1.93, 2.76, 0.08, -2.8, -0.06, 1.09, -0.4, 0.41, 0.02, -1.61,
1.75, 1.6, -0.19, 0.13, -0.89, -1.1), unit = c(1, 1, 1, 2, 2,
2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9,
9, 10, 10, 10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 14, 14, 14,
15, 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 19, 20,
20, 20), wave = c(1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3,
1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3,
1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3,
1, 2, 3)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"
), row.names = c(NA, -60L), groups = structure(list(unit = c(1,
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20), .rows = structure(list(1:3, 4:6, 7:9, 10:12, 13:15, 16:18,
19:21, 22:24, 25:27, 28:30, 31:33, 34:36, 37:39, 40:42, 43:45,
46:48, 49:51, 52:54, 55:57, 58:60), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -20L), .drop = TRUE))
I now want to simulate attrition into this data set: Some units drop out with a certain probability in wave 2; some of those who remain drop out in wave 3; and so forth until wave n. The probability remains the same at each step. Note that this approach should be flexible regarding the number of waves.
Here's what I came up with. Though it works, it feels slow to me. However, due to the varying number of waves, I am not sure how to avoid the loop.
# number of units and number of observations per unit:
n = 20
n_perunit = 3
# define attrition probability:
attrition = 2/3
# Start with a vector of all units
remaining <- 1:n
# loop through waves beginning with 2
system.time(for (i in 2:n_perunit) {
n_remaining <- round(length(remaining)*attrition)
remaining <- sample(remaining, n_remaining)
dat <- dat %>%
mutate(drop = ifelse(
wave >= i & !(unit %in% remaining), TRUE, FALSE)) %>%
filter(drop == FALSE) %>%
mutate(drop = NULL)
})
Efficiency:
user system elapsed
0.016 0.000 0.016
Any ideas how to improve this?
EDIT:
Based on #jpsmith 's answer (which as far as I see does not work for groups in which no people drop out, because min(which(dropout == "yes") will return a value of Inf for those), I came up with the following:
set.seed(1234)
system.time(if (!is.null(attrition)) {
# assign a 1 or 0 indicating dropout
dat <- dat %>%
mutate(dropout = ifelse(
wave > 1, sample(
0:1, n(), prob = c(attrition, 1-attrition), replace = TRUE), 0))
# first get the first (minimum) dropout in each unit...
dat <- dat %>%
group_by(unit) %>%
mutate(min = ifelse(
length(which(dropout == 1) > 0), min(which(dropout == 1)), n_perunit)) %>%
# ... then slice out rows up to that row
slice(1:min) %>%
# as this also includes the first dropout rows, drop that one
filter(dropout == 0)
})
Efficiency:
user system elapsed
0.01 0.00 0.01
However, some annoying warnings produced by slice - any idea why?
Perhaps I'm wrong, but in effect the attrition is iid after the first wave: each subsequent wave has a probability of dropout - so if you made it to wave 3 then that probability is not conditioned on anything (akin to the probability of flipping a third heads if the first two were heads). If I am reading this correctly, you could assign the dropout simultaneously across waves > 1 and then drop all observations after the first "dropout". This would vectorize everything and be faster.
Code
set.seed(123) ), row.names = c(NA, -20L), .drop = TRUE))
attrition <- 2/3
# Assign "dropout" position
dat$dropout <- ifelse(dat$wave > 1, sample(c("Yes","No"), prob = c(attrition, 1-attrition)), "No")
# Drop all observations after first dropout recorded
dat %>% group_by(unit) %>% slice(seq_len(min(which(dropout == "Yes") - 1)))
Output:
# Groups: unit [20]
# x y unit wave dropout
# <dbl> <dbl> <dbl> <dbl> <chr>
# 1 -0.32 -0.42 1 1 No
# 2 -0.26 -2.01 1 2 No
# 3 -0.37 0.7 2 1 No
# 4 -0.01 0.52 3 1 No
# 5 0.05 2.04 3 2 No
# 6 -0.48 -1.45 4 1 No
# 7 -0.58 -3.12 5 1 No
# 8 -0.18 0.22 5 2 No
# 9 -0.28 -1.65 6 1 No
# 10 1.05 -0.98 7 1 No
# # … with 20 more rows
Since you didnt set a seed or provide a desired output dataset, I cant compare, but happy to test this if you provide.
user system elapsed
0.008 0.001 0.009
Since the number of units remaining after each wave is deterministic, we can do the sampling all in one go.
library(dplyr)
set.seed(5)
n <- 20
n_perunit <- 3
# define attrition probability:
attrition <- 2/3
# Start with a vector of all units
remaining <- 1:n
# loop through waves beginning with 2
fOriginal <- function(df, remaining) {
for (i in 2:n_perunit) {
n_remaining <- round(length(remaining)*attrition)
remaining <- sample(remaining, n_remaining)
df <- df %>%
mutate(drop = ifelse(
wave >= i & !(unit %in% remaining), TRUE, FALSE)) %>%
filter(drop == FALSE) %>%
mutate(drop = NULL)
}
df
}
fNew <- function(df) {
nleft <- numeric(n_perunit + 1)
nleft[1] <- n
for (i in 2:n_perunit) nleft[i] <- round(nleft[i - 1]*attrition)
df[df$wave <= sample(rep.int(1:n_perunit, -diff(nleft)))[df$unit],]
}
dfOrig <- fOriginal(dat, remaining)
dfNew <- fNew(dat)
# the resulting data.frames are not identical due to different random sampling
# methods, but they both have the same number of rows and same wave counts
identical(tabulate(dfOrig$wave), tabulate(dfNew$wave))
#> [1] TRUE
microbenchmark::microbenchmark(fOriginal = fOriginal(dat, remaining),
fNew = fNew(dat))
#> Unit: milliseconds
#> expr min lq mean median uq max neval
#> fOriginal 12.0433 13.24815 14.52889 14.02410 15.0525 23.5338 100
#> fNew 1.2956 1.41915 1.73176 1.56935 1.7398 5.0738 100

Coarsened Exact Matching with cem package- Error in .subset2(x, i, exact = exact)

I'm trying to perform coarsened exact matching on the following data.
> dput(head(cem_data))
structure(list(sex = c(1, 1, 1, 2, 2, 2), age = c(40, 59, 53,
60, 49, 60), edlev = c(3, 3, 3, 2, 3, 3), sw = c(44, 17, 10,
41, 26, 23), sw2 = c(15, 1, 5, 34, 5, 6), som = c(2.14, 0.14,
1.86, 3, 1.71, 2.14), som_2 = c(0.71, 0.14, 2, 2.57, 1.71, 2.14
), ap = c(3.5, 1.5, 1.33, 3.33, 2.67, 2.17), ap_2 = c(3, 0.17,
2.33, 3, 0.83, 1.67), dep = c(2.83, 0.17, 0.33, 2.83, 2.17, 2.33
), dep_2 = c(1.17, 0, 0.33, 2.33, 0.83, 1), int = c(2.86, 1.43,
1, 2, 2.29, 2.14), int_2 = c(2.29, 0.57, 0.14, 2.57, 1.71, 1.43
), pho = c(3.2, 0, 0, 3.4, 0.8, 0.4), pho_2 = c(1.6, 0, 0, 3.2,
0, 0.4), psy_b = c(2.67, 0.11, 0.83, 3.06, 1.61, 1.72), psy_b_2 = c(1.11,
0.06, 0.89, 2.67, 0.94, 1.28), s_wirk = c(4, 2.2, 1.6, 3.2, 1.4,
2.2), s_wirk_2 = c(2.8, 0.8, 1.8, 2.6, 1.6, 1.4), soz_b = c(2.75,
1.5, 1, 2.25, 1.25, 1.5), soz_b_2 = c(2.75, 1, 1, 2.25, 1.5,
1.25), soz_u = c(0.75, 0.75, 1.75, 3.25, 1, 3.25), soz_u_2 = c(1,
0.25, 1.75, 2.5, 2.5, 2), wohl = c(3.6, 1.4, 1.8, 3.4, 3, 3),
wohl_2 = c(2, 0.6, 1.4, 2.8, 2.2, 1.2), au_bei_aufn = c(1,
1, 1, 1, 1, 1), age_reha = c(40.9890410958904, 59.3945205479452,
53.372602739726, 60.2, 49.3342465753425, 60.7534246575342
), group_format = c(0, 0, 0, 0, 0, 0)), row.names = c(6L,
7L, 10L, 15L, 20L, 29L), class = "data.frame")
With the following code:
require(cem)
voll_data <- voll_data %>%
select(-c("auf_nr", "icd_10_1", "icd_10_2", "icd_10_3", "icd_10_4","icd_10_5", "bdi_date", "aufnahme", "entlassung")) %>%
mutate_if(is.factor,as.numeric) %>%
mutate_if(is.character, as.numeric)
cem_data <- data.frame(na.omit(voll_data))
#cem_data_s <- scale(cem_data[,5:26])
#cem_data <- cbind.data.frame(cem_data[, 1:4], cem_data_s, cem_data[, 27:36])
variables <- c("age", "sex", "edlev", "sw","au_bei_aufn")
ungleich2 <- imbalance(cem_data$group_format, data=cem_data)
However, following error is being shown, when calculating the "matt".
Error in .subset2(x, i, exact = exact) : attempt to select less than one element in get1index
7.
(function(x, i, exact) if (is.matrix(i)) as.matrix(x)[[i]] else .subset2(x, i, exact = exact))(x, ..., exact = exact)
6.
[[.data.frame(data, treatment)
5.
data[[treatment]]
4.
is.factor(x)
3.
as.factor(data[[treatment]])
2.
cem.main(treatment = treatment, data = data, cutpoints = cutpoints, drop = drop, k2k = k2k, method = method, mpower = mpower, verbose = verbose, baseline.group = baseline.group, keep.all = keep.all)
1.
cem(treatment = cem_data$group_format, data = cem_data, drop = "sw2", cutpoints = list(age = agecut), grouping = list(edlev_gr))
# automated coarsening
matt <- cem(cem_data$group_format, data = cem_data, drop= "sw2")
print(matt)
Does anyone have an idea what am I doing wrong?
Thanks a lot!!

Analytical gradient for bisection method nested within objective function

I'm attempting to fit parameters to a data set using optim() in R. The objective function requires iterative root-solving for equation G so that the predicted values p brings the values for G (nested within the objective function) to 0 (or as close as 0 to possible; I use 50 iterations of the bisection method for stability).
Here is the problem: I would really prefer to include an analytical gradient for optim(), but I suspect it isn't possible for an iterated function. However, before I give up on the analytical gradient, I wanted to run this problem by everyone here and see if there might be a solution I'm overlooking. Any thoughts?
Note: before settling on the bisection method, I tried other root-solving methods, but all non-bracketing methods (Newton, etc.) seem to be unstable.
Below is a reproducible example of the problem. With the provided data set and the starting values for optim(), the algorithm converges just fine without an analytical gradient, but it doesn't perform so well for other sets of data and starting values.
#the data set includes two input variables (x1 and x2)
#the response values are k successes out of n trials
x1=c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1.5, 1.5, 1.5, 1.5, 1.75, 1.75, 1.75, 1.75, 2, 2,
2, 2, 2.25, 2.25, 2.25, 2.25, 2.5, 2.5, 2.5, 2.5, 2.75, 2.75,
2.75, 2.75, 3, 3, 3, 3, 3.25, 3.25, 3.25, 3.25, 3.5, 3.5, 3.5,
3.5, 3.75, 3.75, 3.75, 3.75, 4, 4, 4, 4, 4.25, 4.25, 4.25, 4.25,
4.5, 4.5, 4.5, 4.5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1.5, 1.5, 1.5, 1.5,
1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5,
1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.75, 1.75,
1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75,
1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75,
1.75, 1.75, 1.75, 1.75, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2.25, 2.25, 2.25,
2.25, 2.25, 2.25, 2.25, 2.25, 2.25, 2.25, 2.25, 2.25, 2.25, 2.25,
2.25, 2.25, 2.25, 2.25, 2.25, 2.25, 2.25, 2.25, 2.25, 2.25, 2.25,
2.25, 2.25, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5,
2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5,
2.5, 2.5, 2.5, 2.5, 2.5, 2.75, 2.75, 2.75, 2.75, 2.75, 2.75,
2.75, 2.75, 2.75, 2.75, 2.75, 2.75, 2.75, 2.75, 2.75, 2.75, 2.75,
2.75, 2.75, 2.75, 2.75, 2.75, 2.75, 2.75, 2.75, 2.75, 2.75, 2.75,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3.25, 3.25, 3.25, 3.25, 3.25, 3.25, 3.25, 3.25,
3.25, 3.25, 3.25, 3.25, 3.25, 3.25, 3.25, 3.25, 3.25, 3.25, 3.25,
3.25, 3.25, 3.25, 3.25, 3.25, 3.25, 3.25, 3.25, 3.25, 3.5, 3.5,
3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5,
3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5,
3.75, 3.75, 3.75, 3.75, 3.75, 3.75, 3.75, 3.75, 3.75, 3.75, 3.75,
3.75, 3.75, 3.75, 3.75, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4.25, 4.25, 4.25, 4.25, 4.25, 4.25, 4.25,
4.25, 4.25, 4.25, 4.25, 4.25, 4.25, 4.25, 4.25, 4.25, 4.25, 4.25,
4.25, 4.25, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5,
4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5)
x2=c(0, 0, 0, 0, 0, 0, 0, 0, 0.1, 0.1, 0.1, 0.1, 0.15, 0.15, 0.15,
0.15, 0.2, 0.2, 0.2, 0.2, 0.233, 0.233, 0.233, 0.267, 0.267,
0.267, 0.267, 0.3, 0.3, 0.3, 0.3, 0.333, 0.333, 0.333, 0.333,
0.367, 0.367, 0.367, 0.367, 0.4, 0.4, 0.4, 0.4, 0.433, 0.433,
0.433, 0.433, 0.467, 0.467, 0.467, 0.5, 0.5, 0.5, 0.5, 0.55,
0.55, 0.55, 0.55, 0.6, 0.6, 0.6, 0.6, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0.1, 0.1, 0.1, 0.1, 0.2, 0.2, 0.2, 0.2, 0.267,
0.267, 0.267, 0.267, 0.333, 0.333, 0.333, 0.333, 0.4, 0.4, 0.4,
0.4, 0.467, 0.467, 0.467, 0.467, 0.55, 0.55, 0.55, 0.55, 0.15,
0.15, 0.15, 0.15, 0.233, 0.233, 0.233, 0.233, 0.3, 0.3, 0.3,
0.3, 0.367, 0.367, 0.367, 0.367, 0.433, 0.433, 0.433, 0.433,
0.5, 0.5, 0.5, 0.6, 0.6, 0.6, 0.6, 0.1, 0.1, 0.1, 0.1, 0.2, 0.2,
0.2, 0.2, 0.267, 0.267, 0.267, 0.267, 0.333, 0.333, 0.333, 0.333,
0.4, 0.4, 0.4, 0.4, 0.467, 0.467, 0.467, 0.467, 0.55, 0.55, 0.55,
0.55, 0.15, 0.15, 0.15, 0.15, 0.233, 0.233, 0.233, 0.233, 0.3,
0.3, 0.3, 0.3, 0.367, 0.367, 0.367, 0.367, 0.433, 0.433, 0.433,
0.433, 0.5, 0.5, 0.5, 0.5, 0.6, 0.6, 0.6, 0.6, 0.1, 0.1, 0.1,
0.1, 0.2, 0.2, 0.2, 0.267, 0.267, 0.267, 0.267, 0.333, 0.333,
0.333, 0.333, 0.4, 0.4, 0.4, 0.4, 0.467, 0.467, 0.467, 0.467,
0.55, 0.55, 0.55, 0.55, 0.15, 0.15, 0.15, 0.15, 0.233, 0.233,
0.233, 0.233, 0.3, 0.3, 0.3, 0.3, 0.367, 0.367, 0.367, 0.367,
0.433, 0.433, 0.433, 0.433, 0.5, 0.5, 0.5, 0.5, 0.6, 0.6, 0.6,
0.6, 0.1, 0.1, 0.1, 0.1, 0.2, 0.2, 0.2, 0.2, 0.267, 0.267, 0.267,
0.267, 0.333, 0.333, 0.333, 0.333, 0.4, 0.4, 0.4, 0.4, 0.467,
0.467, 0.467, 0.467, 0.55, 0.55, 0.55, 0.55, 0.15, 0.15, 0.15,
0.15, 0.233, 0.233, 0.233, 0.3, 0.3, 0.3, 0.3, 0.367, 0.367,
0.367, 0.367, 0.433, 0.433, 0.433, 0.433, 0.5, 0.5, 0.5, 0.6,
0.6, 0.6, 0.6, 0.1, 0.1, 0.1, 0.1, 0.2, 0.2, 0.2, 0.2, 0.267,
0.267, 0.267, 0.267, 0.333, 0.333, 0.333, 0.333, 0.4, 0.4, 0.4,
0.4, 0.467, 0.467, 0.467, 0.467, 0.55, 0.55, 0.55, 0.55, 0.15,
0.15, 0.15, 0.15, 0.233, 0.233, 0.233, 0.233, 0.3, 0.3, 0.3,
0.3, 0.367, 0.367, 0.367, 0.367, 0.433, 0.433, 0.433, 0.433,
0.5, 0.5, 0.5, 0.5, 0.6, 0.6, 0.6, 0.6, 0.1, 0.1, 0.1, 0.1, 0.2,
0.2, 0.2, 0.2, 0.267, 0.267, 0.267, 0.267, 0.333, 0.333, 0.333,
0.15, 0.15, 0.15, 0.15, 0.233, 0.233, 0.233, 0.233, 0.3, 0.3,
0.3, 0.3, 0.367, 0.367, 0.367, 0.367, 0.433, 0.433, 0.433, 0.433,
0.1, 0.1, 0.1, 0.1, 0.2, 0.2, 0.2, 0.2, 0.267, 0.267, 0.267,
0.267, 0.333, 0.333, 0.333, 0.333, 0.4, 0.4, 0.4, 0.4, 0.15,
0.15, 0.15, 0.15, 0.233, 0.233, 0.233, 0.233, 0.3, 0.3, 0.3,
0.3, 0.367, 0.367, 0.367, 0.367, 0.433, 0.433, 0.433, 0.433)
k=c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 1, 3,
3, 3, 3, 3, 3, 4, 2, 5, 3, 4, 7, 8, 5, 4, 5, 5, 4, 5, 5, 5, 6,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 2, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 3, 2, 4, 1, 2,
3, 4, 2, 2, 4, 4, 3, 1, 2, 0, 3, 4, 5, 5, 0, 0, 0, 0, 0, 0, 1,
0, 0, 1, 1, 2, 1, 2, 2, 0, 3, 1, 0, 2, 4, 6, 5, 5, 4, 5, 5, 5,
1, 0, 0, 0, 2, 1, 0, 1, 3, 2, 1, 1, 3, 4, 3, 4, 5, 5, 5, 5, 8,
6, 7, 6, 6, 5, 7, 0, 0, 0, 0, 2, 1, 0, 0, 0, 0, 2, 1, 1, 3, 3,
2, 1, 3, 6, 2, 5, 3, 3, 5, 6, 5, 5, 5, 1, 0, 1, 1, 2, 1, 1, 1,
3, 4, 2, 5, 5, 3, 4, 4, 6, 4, 6, 5, 6, 5, 5, 5, 5, 4, 5, 5, 0,
0, 0, 0, 0, 2, 0, 2, 3, 3, 3, 2, 3, 3, 1, 4, 4, 4, 4, 3, 5, 6,
5, 5, 5, 5, 5, 1, 4, 1, 2, 2, 3, 4, 2, 5, 5, 5, 5, 5, 4, 5, 7,
6, 7, 6, 5, 5, 5, 7, 5, 5, 5, 5, 5, 0, 1, 0, 0, 3, 2, 3, 3, 1,
2, 2, 2, 4, 2, 3, 2, 5, 5, 5, 5, 4, 6, 5, 6, 5, 5, 6, 5, 3, 5,
2, 4, 5, 3, 5, 5, 6, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 0, 0, 2, 0, 3, 2, 3, 2, 3, 4, 3, 4, 5, 5, 5, 5, 6, 4,
6, 4, 5, 7, 5, 5, 5, 6, 5, 5, 2, 3, 4, 4, 4, 4, 5, 5, 5, 6, 5,
5, 5, 5, 5, 4, 6, 5, 5, 5, 6, 5, 5, 5, 5, 5, 5, 5, 1, 0, 2, 0,
3, 5, 2, 2, 4, 5, 4, 5, 6, 6, 4, 5, 4, 5, 4, 5, 5, 5, 5, 5, 5,
6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 4, 1, 4, 4, 4, 4, 4, 3, 6, 5,
4, 3, 5, 4, 5, 6, 6, 5, 6, 5, 4, 5, 5, 5, 6, 5, 5, 5, 11, 5,
12, 5, 5, 5, 5, 4, 5, 5, 5)
n=c(5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 6, 5, 5, 5, 5, 6, 5, 5, 5, 5, 5, 5, 5,
6, 5, 6, 5, 5, 5, 5, 7, 5, 6, 8, 8, 6, 5, 6, 5, 5, 5, 5, 5, 6,
5, 5, 5, 5, 7, 11, 8, 7, 5, 5, 5, 5, 7, 5, 5, 5, 5, 5, 5, 5,
4, 5, 5, 5, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 4, 5, 5, 5, 6, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 6, 6, 5, 5, 5, 5, 6, 5, 5, 5, 5, 5, 7, 6, 7, 6, 5, 5, 5, 5,
5, 5, 5, 5, 5, 6, 5, 6, 6, 5, 5, 5, 5, 5, 6, 5, 5, 5, 5, 5, 5,
8, 6, 7, 6, 6, 5, 7, 5, 5, 5, 5, 6, 5, 5, 5, 7, 7, 6, 5, 6, 5,
5, 5, 5, 6, 6, 4, 6, 6, 5, 5, 6, 6, 5, 5, 5, 5, 5, 5, 7, 5, 5,
4, 5, 5, 5, 5, 5, 5, 5, 5, 6, 4, 6, 5, 6, 5, 5, 5, 5, 4, 5, 5,
5, 5, 6, 6, 5, 6, 5, 4, 5, 6, 5, 5, 5, 5, 5, 5, 5, 5, 6, 5, 5,
6, 5, 5, 5, 5, 5, 5, 6, 5, 6, 7, 4, 6, 5, 5, 5, 5, 5, 5, 4, 5,
7, 6, 7, 6, 5, 5, 5, 7, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 5,
5, 5, 5, 5, 5, 4, 5, 6, 5, 5, 5, 5, 5, 7, 5, 6, 5, 5, 6, 5, 5,
5, 5, 5, 5, 5, 5, 6, 6, 5, 5, 5, 5, 5, 6, 6, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6,
5, 6, 5, 6, 7, 5, 5, 5, 6, 5, 5, 4, 5, 5, 5, 5, 6, 5, 5, 5, 6,
5, 5, 5, 5, 5, 5, 6, 5, 5, 5, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 4, 5, 5, 5, 5, 5, 5, 5, 7, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6,
5, 5, 5, 5, 5, 5, 6, 6, 5, 6, 5, 5, 5, 5, 5, 6, 5, 5, 5, 11,
5, 12, 5, 5, 5, 5, 4, 5, 5, 5)
#low_high contains the lower and upper bounds for the bisection method
low_high=vector("list",2)
low_high[["low"]]=rep(0,length(x1))
low_high[["high"]]=rep(1,length(x1))
low_high_list=rep(list(low_high),50)
ll=function(theta)
{
names(theta)=c("b1","m1","b2","m2")
b1=theta[["b1"]]
m1=theta[["m1"]]
b2=theta[["b2"]]
m2=theta[["m2"]]
#bisection function is used to find y which makes G=0
bisection_function=function(prv,nxt)
{
low_high=prv
#G and y are both vectors of the length of the data set (in this example, 469)
y=(low_high[["low"]]+low_high[["high"]])/2
G=-1+(x1/((log(-y/(y-1))-b1)/m1))+(x2/((log(-y/(y-1))-b2)/m2))
low_high[["low"]][G>0]=y[G>0]
low_high[["high"]][G<0]=y[G<0]
return(low_high)
}
#Reduce is the fastest method I've found so far
#(in other words, if there is a better way, I'm certainly open to suggestions!)
low_high=Reduce(bisection_function,low_high_list)
p=(low_high[["low"]]+low_high[["high"]])/2
#sum of log likelihood for binomial distribution
loglik=sum(log((gamma(1+n)/(gamma(1+k)*(gamma(1+n-k))))*((p^k)*((1-p)^(n-k)))))
return(loglik)
}
theta.start=c(b1=-10,m1=10,b2=-10,m2=10)
mle=optim(theta.start,ll,control=list(fnscale=-1),hessian=TRUE)
Thanks!!
Using Vincent's suggestions, I was able to supply an analytic gradient via implicit differentiation. In case anyone else has a similar problem, I have included reproducible code below (to be added after the code included in the question).
Gexpression=parse(text="-1+(x1/((log(-p/(p-1))-b1)/m1))+(x2/((log(-p/(p-1))-b2)/m2))")
nested=function(theta)
{
names(theta)=c("b1","m1","b2","m2")
b1=theta[["b1"]]
m1=theta[["m1"]]
b2=theta[["b2"]]
m2=theta[["m2"]]
#bisection function is used to find y which makes G=0
bisection_function=function(prv,nxt)
{
low_high=prv
#G and y are both vectors of the length of the data set (in this example, 469)
y=(low_high[["low"]]+low_high[["high"]])/2
G=-1+(x1/((log(-y/(y-1))-b1)/m1))+(x2/((log(-y/(y-1))-b2)/m2))
low_high[["low"]][G>0]=y[G>0]
low_high[["high"]][G<0]=y[G<0]
return(low_high)
}
low_high=Reduce(bisection_function,low_high_list)
p=(low_high[["low"]]+low_high[["high"]])/2
return(p)
}
gr=function(theta)
{
names(theta)=c("b1","m1","b2","m2")
b1=theta[["b1"]]
m1=theta[["m1"]]
b2=theta[["b2"]]
m2=theta[["m2"]]
p=nested(theta)
# dll is the derivative of the loglik function, which takes the partial derivative
# of any parameter
dll=function(d_any) (((k / p) * d_any) - (((n - k) / (1 - p))*d_any))
#fd_any takes the partial derivative of the with respect to any parameter
fd_any=function(any) eval(parse(text=paste("-((",as.character(list(D(Gexpression,any))),")/(",as.character(list(D(Gexpression,'p'))),"))",sep="")))
DLb1=dll(fd_any("b1"))
DLb2=dll(fd_any("b2"))
DLm1=dll(fd_any("m1"))
DLm2=dll(fd_any("m2"))
DLb1[is.na(DLb1)]=0
DLb2[is.na(DLb2)]=0
DLm1[is.na(DLm1)]=0
DLm2[is.na(DLm2)]=0
colSums(cbind(b1=DLb1,m1=DLm1,b2=DLb2,m2=DLm2))
}
hs=function(theta)
{
names(theta)=c("b1","m1","b2","m2")
b1=theta[["b1"]]
m1=theta[["m1"]]
b2=theta[["b2"]]
m2=theta[["m2"]]
p=nested(theta)
fd_any_fun=function(any) paste("(-((",as.character(list(D(Gexpression,any))),")/(",as.character(list(D(Gexpression,'p'))),")))",sep="")
dll_fun=function(d_any_fun) paste("((k / p) * (",d_any_fun,")) - (((n - k) / (1 - p))*(",d_any_fun,"))",sep="")
hb1b1=eval(parse(text=D(parse(text=dll_fun(fd_any_fun("b1"))),"b1")))
hb1m1=eval(parse(text=D(parse(text=dll_fun(fd_any_fun("b1"))),"m1")))
hb1b2=eval(parse(text=D(parse(text=dll_fun(fd_any_fun("b1"))),"b2")))
hb1m2=eval(parse(text=D(parse(text=dll_fun(fd_any_fun("b1"))),"m2")))
hb1b1[is.na(hb1b1)]=0
hb1m1[is.na(hb1m1)]=0
hb1b2[is.na(hb1b2)]=0
hb1m2[is.na(hb1m2)]=0
hb1b1=sum(hb1b1)
hb1m1=sum(hb1m1)
hb1b2=sum(hb1b2)
hb1m2=sum(hb1m2)
h1=c(hb1b1,hb1m1,hb1b2,hb1m2)
hm1b1=eval(parse(text=D(parse(text=dll_fun(fd_any_fun("m1"))),"b1")))
hm1m1=eval(parse(text=D(parse(text=dll_fun(fd_any_fun("m1"))),"m1")))
hm1b2=eval(parse(text=D(parse(text=dll_fun(fd_any_fun("m1"))),"b2")))
hm1m2=eval(parse(text=D(parse(text=dll_fun(fd_any_fun("m1"))),"m2")))
hm1b1[is.na(hm1b1)]=0
hm1m1[is.na(hm1m1)]=0
hm1b2[is.na(hm1b2)]=0
hm1m2[is.na(hm1m2)]=0
hm1b1=sum(hm1b1)
hm1m1=sum(hm1m1)
hm1b2=sum(hm1b2)
hm1m2=sum(hm1m2)
h2=c(hm1b1,hm1m1,hm1b2,hm1m2)
hb2b1=eval(parse(text=D(parse(text=dll_fun(fd_any_fun("b2"))),"b1")))
hb2m1=eval(parse(text=D(parse(text=dll_fun(fd_any_fun("b2"))),"m1")))
hb2b2=eval(parse(text=D(parse(text=dll_fun(fd_any_fun("b2"))),"b2")))
hb2m2=eval(parse(text=D(parse(text=dll_fun(fd_any_fun("b2"))),"m2")))
hb2b1[is.na(hb2b1)]=0
hb2m1[is.na(hb2m1)]=0
hb2b2[is.na(hb2b2)]=0
hb2m2[is.na(hb2m2)]=0
hb2b1=sum(hb2b1)
hb2m1=sum(hb2m1)
hb2b2=sum(hb2b2)
hb2m2=sum(hb2m2)
h3=c(hb2b1,hb2m1,hb2b2,hb2m2)
hm2b1=eval(parse(text=D(parse(text=dll_fun(fd_any_fun("m2"))),"b1")))
hm2m1=eval(parse(text=D(parse(text=dll_fun(fd_any_fun("m2"))),"m1")))
hm2b2=eval(parse(text=D(parse(text=dll_fun(fd_any_fun("m2"))),"b2")))
hm2m2=eval(parse(text=D(parse(text=dll_fun(fd_any_fun("m2"))),"m2")))
hm2b1[is.na(hm2b1)]=0
hm2m1[is.na(hm2m1)]=0
hm2b2[is.na(hm2b2)]=0
hm2m2[is.na(hm2m2)]=0
hm2b1=sum(hm2b1)
hm2m1=sum(hm2m1)
hm2b2=sum(hm2b2)
hm2m2=sum(hm2m2)
h4=c(hm2b1,hm2m1,hm2b2,hm2m2)
h=rbind(h1,h2,h3,h4)
return(h)
}
The gradient seems to work fine. For some reason, the estimated Hessian matrix from optim() is different than the gradient calculated in hs(). The resulting standard errors are of the same order of magnitude, at least:
# Standard errors from optim Hessian
sqrt(abs(diag(solve(mle$hessian))))
# Standard errors from analytic Hessian
sqrt(abs(diag(solve(hs(mle$par)))))

xyplot not merging plots when more than two conditioning variables

When I run the following code, xyplot produces 4 separate plots 2 by 3 plots,
whereas I want a single 4 by 6 trellis (to save real estate
space on the axis anotation and legends).
Note that my problem is different from this one in that I don't want to
see four set of axis/legends.
Here is some example data:
B <- structure(list(yval = c(0.88, 4.31, 7.52, 3.21, 3.27, 4.93, 4.21,
0.7, 0.68, 0.92, 3.86, 5.67, 9.08, 1.95, 3.27, 1.44, 2.38, 0.85,
0.79, 0.55, 0.79, 10.52, 0.9, 4, 0.78, 2.46, 0.78, 1.64, 2.47,
0.77, 0.83, 0.86, 3.65, 8.25, 0.65, 0.88, 0.95, 4.05, 4.98, 1.43,
4.43, 2.94, 5.52, 0.9, 3.69, 0.79, 0.74, 1.49, 7.29, 0.58, 8.47,
5.82, 0.84, 0.87, 0.69, 1.38, 0.83, 2.32, 0.86, 7.32, 6.73, 6.7,
3.3, 1.58, 2.74, 0.88, 4.2, 3.79, 4.98, 2.54, 1.84, 1.2, 2.59,
11.99, 0.78, 0.92, 0.59, 3.83, 0.92, 2.6, 0.95, 3.18, 2.75, 9.83,
9.81, 0.55, 0.83, 6.29, 1.64, 1.12, 0.65, 3.96, 4.27, 3.99, 20,
0.83, 6.23, 6.81, 0.86, 0.7), xval = c(0.62, 0.81, 9.01, 3.72,
1.49, 3.92, 6.22, 6.64, 5.56, 6.64, 4, 7.36, 9.6, 1, 1.64, 3.34,
3.47, 3.37, 4.34, 6.63, 7.62, 4.07, 5.69, 3.76, 9.74, 1.58, 1.53,
2.62, 1.64, 1.18, 9.79, 9.9, 2.76, 7.96, 5.11, 4.74, 9.92, 0.49,
9.05, 8.59, 0.7, 5.8, 5.34, 3.14, 6.96, 2.05, 8.29, 0.35, 7.52,
6.56, 2.01, 7.92, 3.89, 6.31, 8.64, 6.18, 4.49, 0.63, 7.52, 7.82,
1.25, 9.54, 4.68, 0.4, 1.38, 8.7, 4.71, 8.27, 5.72, 0.75, 6.08,
0.11, 1.38, 0.37, 4.94, 0.53, 7.53, 3.11, 2.73, 4.93, 9.47, 2.18,
4.54, 7.12, 8.28, 6.62, 5.14, 4.42, 0.21, 9.52, 3.77, 6.43, 6.78,
6.87, 9.47, 6.42, 0.81, 8.88, 7.2, 8.68), gval = c(1, 2, 5, 5,
2, 1, 2, 1, 2, 3, 6, 5, 1, 3, 2, 3, 5, 2, 6, 4, 4, 1, 1, 6, 4,
2, 1, 2, 4, 5, 5, 3, 6, 5, 4, 2, 2, 3, 3, 6, 2, 4, 1, 4, 4, 1,
1, 2, 2, 5, 1, 1, 2, 2, 1, 3, 1, 5, 6, 5, 1, 5, 4, 4, 3, 6, 6,
4, 5, 4, 4, 6, 5, 6, 5, 2, 1, 1, 6, 6, 2, 5, 5, 1, 1, 4, 6, 3,
4, 6, 3, 5, 3, 3, 6, 2, 1, 5, 1, 3), type = c(5, 2, 1, 5, 1,
1, 1, 1, 2, 12, 5, 1, 2, 5, 5, 12, 12, 12, 12, 2, 12, 2, 12,
5, 12, 2, 12, 12, 5, 12, 12, 12, 5, 2, 5, 12, 1, 1, 1, 1, 2,
12, 1, 12, 2, 12, 2, 2, 1, 1, 2, 1, 5, 12, 12, 5, 12, 5, 5, 1,
1, 1, 2, 5, 5, 5, 5, 5, 1, 5, 12, 12, 5, 2, 12, 12, 1, 1, 5,
5, 5, 2, 5, 1, 2, 2, 5, 1, 5, 2, 5, 5, 5, 2, 2, 5, 1, 2, 2, 5
), cr = c(0.2, 0.4, 0.4, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.4,
0.4, 0.4, 0.2, 0.4, 0.4, 0.4, 0.2, 0.2, 0.2, 0.2, 0.4, 0.4, 0.4,
0.2, 0.2, 0.2, 0.4, 0.2, 0.2, 0.4, 0.4, 0.4, 0.4, 0.2, 0.4, 0.2,
0.4, 0.2, 0.2, 0.4, 0.4, 0.2, 0.2, 0.4, 0.2, 0.2, 0.2, 0.4, 0.2,
0.4, 0.2, 0.2, 0.4, 0.4, 0.2, 0.2, 0.4, 0.2, 0.4, 0.4, 0.4, 0.4,
0.2, 0.4, 0.4, 0.4, 0.4, 0.2, 0.4, 0.4, 0.2, 0.4, 0.4, 0.2, 0.2,
0.2, 0.2, 0.2, 0.4, 0.4, 0.4, 0.2, 0.4, 0.4, 0.2, 0.2, 0.4, 0.4,
0.2, 0.2, 0.2, 0.4, 0.2, 0.4, 0.4, 0.4, 0.4, 0.2, 0.2), p = c(4,
12, 12, 8, 12, 8, 12, 4, 4, 8, 8, 4, 4, 8, 8, 8, 4, 12, 8, 4,
12, 12, 12, 12, 8, 12, 4, 4, 8, 8, 8, 4, 8, 12, 4, 12, 12, 4,
12, 8, 4, 4, 12, 4, 4, 8, 4, 4, 8, 4, 8, 12, 12, 8, 4, 8, 8,
8, 8, 12, 4, 8, 4, 12, 4, 4, 12, 4, 12, 12, 8, 4, 4, 12, 8, 12,
4, 4, 12, 4, 8, 4, 8, 12, 8, 4, 4, 4, 8, 4, 4, 12, 8, 12, 8,
4, 4, 8, 8, 4), nsamp = c(100, 300, 300, 200, 300, 200, 300,
100, 100, 200, 200, 100, 100, 200, 200, 200, 100, 300, 200, 100,
300, 300, 300, 300, 200, 300, 100, 100, 200, 200, 200, 100, 200,
300, 100, 300, 300, 100, 300, 200, 100, 100, 300, 100, 100, 200,
100, 100, 200, 100, 200, 300, 300, 200, 100, 200, 200, 200, 200,
300, 100, 200, 100, 300, 100, 100, 300, 100, 300, 300, 200, 100,
100, 300, 200, 300, 100, 100, 300, 100, 200, 100, 200, 300, 200,
100, 100, 100, 200, 100, 100, 300, 200, 300, 200, 100, 100, 200,
200, 100)), .Names = c("yval", "xval", "gval", "type", "cr",
"p", "nsamp"), row.names = c(NA, -100L), class = "data.frame")
And here is the code I am running:
library(lattice)
library(latticeExtra)
library(grid)
types<-rep(NA,6)
types[1]<-expression(paste(epsilon,"=",0.2,", p=",4,sep=""))
types[2]<-expression(paste(epsilon,"=",0.2,", p=",8,sep=""))
types[3]<-expression(paste(epsilon,"=",0.2,", p=",12,sep=""))
types[4]<-expression(paste(epsilon,"=",0.4,", p=",4,sep=""))
types[5]<-expression(paste(epsilon,"=",0.4,", p=",8,sep=""))
types[6]<-expression(paste(epsilon,"=",0.4,", p=",12,sep=""))
types<-rep(types,4)
cl<-rainbow(7)[-4]
xyplot(B$yval~B$xval|as.factor(B$p)*as.factor(B$cr)*as.factor(B$type),
group=B$gval, as.table=TRUE,
ylab=expression(kappa(Sigma,S)), col=cl, xlab=expression(nu),
xlim=c(0,10), ylim=c(0,10), type=c("l","g"), lwd=5, cex.lab=2,
strip=function(...){
panel.fill(trellis.par.get("strip.background")$col[1])
type <- types[panel.number()]
grid::grid.text(label=type,x=0.5,y=0.5,gp=gpar(fontsize=20))
grid::grid.rect()
},
key=list(text=list(c("A","B","C","D","E","F"),cex=2),
lines=list(type=rep("l",6), label.cex=2,col=cl,lwd=3),columns=3),
par.settings=list(par.xlab.text=list(cex=2),axis.text=list(cex=2),
par.ylab.text=list(cex=2)))
Three conditioning variables means that it makes a three dimensional grid of panels, where the third dimension is onto multiple pages. One alternative is to only condition on two variables; here I use : to make the first conditioning factor the intersection of the first two original conditioning factors.
xyplot(B$yval~B$xval|as.factor(B$p):as.factor(B$cr)*as.factor(B$type), ...
I think you want layout=c(6,4) somewhere in your call to xyplot. Once you do that you will have to reconfigure many other settings.

Resources