I'm trying to use two different set of lists as an input of a function using mapply, but I'm not sure how to. With the current code I have, it gives out an unexpected error. Did I use the mapply in a wrong way?
Here is a sample of the code:
t_index <- c("2022-01-01","2022-01-08","2022-01-15","2022-01-22","2022-01-29","2022-02-05","2022-02-12","2022-02-19",
"2022-02-26","2022-03-12","2022-03-26","2022-04-09","2022-04-23","2022-05-07","2022-05-21","2022-06-11")
t_index_day <- filter(flu2, Date %in% t_index)
t_index_day <- t_index_day$day
# SIR model
sir_1 <- function(alpha, beta, gamma, delta, S0, E0, I0, R0, D0, N, times) {
require(deSolve) # for the "ode" function
# the differential equations:
sir_equations <- function(time, variables, parameters) {
with(as.list(c(variables, parameters)), {
dS <- -beta * I * S/N
dE <- beta* I * S/N - (alpha*E)
dI <- alpha*E - (gamma*I) - (delta*I)
dR <- gamma * I
dD <- delta * I
return(list(c(dS, dE,dI, dR, dD)))
})
}
# the parameters values:
parameters_values <- c(alpha = alpha,beta = beta, gamma = gamma, delta = delta)
# the initial values of variables:===
initial_values <- c(S = S0, E=E0, I = I0, R = R0, D = D0)
# solving
out <- ode(initial_values, times, sir_equations, parameters_values)
# returning the output:
as.data.frame(out)
}
# sum of squares equation function for first
ss <- function(alpha,beta, gamma, delta, time, num, data = flu2, N = 50000000) {
print (num)
data = data[time[num]:time[num+1],]
E0 <-data$cases[1]*(alpha)
I0 <- data$cases[1]
R0 <- data$cases[1]*(gamma)
D0 <- data$deaths[1]
times <- data$day
predictions <- sir_1(alpha = alpha,beta = beta, gamma = gamma, delta = delta, # parameters
S0 = N - 639083, # cumulative case until now
E0 = E0,
I0 = I0,
R0 = R0,
D0 = D0,
N = N,
times = times) # time points
sum((predictions$I[-1] - data$cases[-1])^2)
}
loop <- as.integer(1:(length(t_index)-1))
beta_val <- seq(from = 0.1, to = 0.43, le = 300)
delta_val <- seq(from = 1/10000, to = 1/100, le = 300)
for (i in loop){
if (i == 1){
ss_val <- mapply(ss,beta_val, delta_val,alpha = 1/5, gamma = 1/7,time = t_index_day, num = i,data = flu2)
print(ss_val)
Error code:
Then problem was with mapply trying to get all inputs as a part of a list, while I only wanted beta_val and delta_val as a part and the rest as a whole.
To solve this problem I changed the definition of the function ss:
ss <- function(alpha,beta, gamma, delta, time = t_index_day, num, data = flu2, N = 50000000) {
print (num)
data = data[time[num]:time[num+1],]
E0 <-data$cases[1]*(alpha)
I0 <- data$cases[1]
R0 <- data$cases[1]*(gamma)
D0 <- data$deaths[1]
times <- data$day
predictions <- sir_1(alpha = alpha,beta = beta, gamma = gamma, delta = delta, # parameters
S0 = N - 639083, # cumulative case until now
E0 = E0,
I0 = I0,
R0 = R0,
D0 = D0,
N = N,
times = times) # time points
sum((predictions$I[-1] - data$cases[-1])^2)
}
I'm trying to fit the tetracycline data set from Bates & Watts to a compartment model which forms a system of first order differential equations. The system has an analytic solution but I want to use the dede function to estimate the parameters numerically.
I can get parameter estimates which are close to the ones published in Bates and Watts but I'm wondering if I have coded the problem correctly. Specifically, since Bates & Watts account for dead time in their solution, I'm concerned about whether I have coded the use of lagvalue() in the function called DiffEqns correctly.
My programming question relates to coding of the derivatives with lag time. They are currently coded as:
dy1 <- -theta1*y1lag
dy2 <- theta1*y1lag - theta2*y2lag
However, I wonder if the derivatives should be coded instead as:
dy1 <- -theta1*y1lag*y[1]
dy2 <- theta1*y1lag*y[1] - theta2*y2lag*y[2]
Thanks and regards,
# Analyze the tetracycline data set as a two-compartment model
# (see Bates & Watts, "Nonlinear Regression Analysis and Its Applications")
## Note: the differential equations for the compartment model are:
## dy1/dt = -theta1*y1
## dy2/dt = theta1*y1 - theta2*y2
## (see p. 169 in Bates & Watts)
# Load packages
library(FME)
# Create the tetracycline dataset (see p. 281 in Bates & Watts)
tetra <- structure(list(time = c(1, 2, 3, 4, 6, 8, 10, 12, 16),
conc = c(0.7,1.2, 1.4, 1.4, 1.1, 0.8, 0.6, 0.5, 0.3)),
row.names = c(NA, 9L), class = "data.frame")
# Observe that: A) "conc" = data for y2; B) there is no data for y1; C) data start at time = 1 instead of time = 0
# Create a differential equation model with dead time
DiffEqns <- function(t, y, parms) {
theta1 <- parms[1] # rate constant for y1
theta2 <- parms[2] # rate constant for y2
theta3 <- parms[3] # amount of y1 at time = 0
theta4 <- parms[4] # parameter that accounts for dead time
y1lag <- ifelse(t - theta4 < 0, 0, lagvalue(t - theta4, 1))
y2lag <- ifelse(t - theta4 < 0, 0, lagvalue(t - theta4, 2))
dy1 <- -theta1*y1lag
dy2 <- theta1*y1lag - theta2*y2lag
return(list(c(dy1, dy2), y1lag = y1lag, y2lag = y2lag))
}
# Find a numerical solution for the system of delay differential equations using dede() from deSolve
time <- seq(from = 0, to = 16, by = 0.1)
Cost <- function(P) {
theta1 <- P[1]
theta2 <- P[2]
theta3 <- P[3]
theta4 <- P[4]
theta <- c(theta1, theta2, theta3, theta4)
yinit <- c(y1 = theta3, conc = 0)
out <- dede(y = yinit, times = time, func = DiffEqns, parms = theta)
modCost(model = out, obs = tetra)
}
theta <- c(theta1 = 0.1, theta2 = 0.2, theta3 = 5, theta4 = 0.2) # starting values for the parameters
yinit <- c(y1 = theta[3], conc = 0)
CompModFit2 <- modFit(f = Cost, p = theta, lower = c(0,0,0,0))
FMEtheta <- coef(CompModFit2)
# Compare data to numerical model solution using parameters from modFit
dedeFitted <- dede(times = time,y = c(y1 = FMEtheta[3], conc = 0), func = DiffEqns, parms = FMEtheta)
plot(dedeFitted, obs=tetra)
# Parameters from FME are:
# theta1 theta2 theta3 theta4
#0.1193617 0.6974401 10.7188251 0.2206997
# Compare FME parameters to the parameter estimates published in Bates & Watts:
# theta1 theta2 theta3 theta4
# 0.1488 0.7158 10.10 0.4123
What I am trying to do: I have a simple SIR model, with time varying transmission rates beta, I have already implemented this in R (thanks to #tpetzoldt). We have a population of N=10000, gamma is also fixed.
sir_1 <- function(f_beta, S0, I0, R0, times) {
# the differential equations
sir_equations <- function(time, variables, parameters) {
beta <- f_beta(time)
gamma <- f_gamma(time)
with(as.list(variables), {
dS <- -beta * I * S/10000
dI <- beta * I * S/10000 - 1/5 * I
dR <- 1/5 * I
return(list(c(dS, dI, dR), beta=beta))
})
}
# time dependent parameter functions
parameters_values <- list(
f_beta = f_beta
)
# the initial values of variables
initial_values <- c(S = S0, I = I0, R = R0)
out <- ode(initial_values, times, sir_equations, parameters)
}
times <- seq(0, 19)
f_beta <- approxfun(x=times, y=seq(0.901, 0.92, by=0.001), rule=2)
out <- as.data.frame(sir_1(f_beta=f_beta, S0 = 9990, I0 = 10, R0 = 0, times = times))
Now I have some "real" data, with the FME package I want to get the optimal beta parameters at each timestep
datareal <- cbind(time = times, I=c(10,32,120,230,480,567,1040,1743,2300,2619,3542,4039,4231,6378,
5356, 4987, 3421, 2789, 1789,1156))
sir_cost <- function (f_beta) {
outsir <- as.data.frame(sir_1(f_beta=f_beta, S0 = 9990, I0 = 10, R0 = 0, times = times))
costf <- modCost(model = outsir, obs = datareal)
}
p <- rep(0.8, 20)
Fit <- modFit(f = sir_cost, p = p)
Fit
$par
[1] 0.8 0.8 0.8 0.8 0.8 0.8 0.8 0.8 0.8 0.8 0.8 0.8 0.8 0.8 0.8 0.8 0.8 0.8 0.8 0.8
My issues:
For the initial values I took 0.8 at each timestep, however the Fit function does nothing, it only returns the 0.8 for each timestep (even if I take a very high value like 800, it says that this is already the best fit). My guess is for timevarying values of the same variable (beta) I have to approach this another way as it is in the documentation.
Any help is highly appreciated.
I don't think that estimating beta per time step is a good idea. This is inherent in the problem and not a fault of deSolve or FME. If a dynamic model shall be used to estimate time dependent parameters, I would recommend to use a suitable function with less knots, e.g. time dependent linear, quadratic or spline, for example 3-5 instead of 20 knots. Then replace approxfun with that function and plug it in. Model fitting is an art, so play with start values and solvers. And, read the books.
Note that the following is just a technical demonstration:
library("deSolve")
library("FME")
sir_1 <- function(f_beta, S0, I0, R0, times) {
# the differential equations
sir_equations <- function(time, variables, parameters) {
beta <- parameters$f_beta(time)
with(as.list(variables), {
dS <- -beta * I * S/10000
dI <- beta * I * S/10000 - 1/5 * I
dR <- 1/5 * I
return(list(c(dS, dI, dR), beta=beta))
})
}
initial_values <- c(S = S0, I = I0, R = R0)
parameters <- list(f_beta=f_beta)
out <- ode(initial_values, times, sir_equations, parameters)
}
times <- seq(0, 19)
# use method "constant" to leave beta constant over time step
f_beta <- approxfun(x=times, y=seq(0.901, 0.92, by=0.001), method="constant", rule=2)
out <- sir_1(f_beta=f_beta, S0 = 9990, I0 = 10, R0 = 0, times = times)
plot(out)
datareal <- cbind(time = times, I=c(10,32,120,230,480,567,1040,1743,2300,2619,3542,4039,4231,6378,
5356, 4987, 3421, 2789, 1789,1156))
plot(out, obs=datareal)
sir_cost <- function (p) {
f_beta <- approxfun(x=times, y=p, method="constant", rule=2)
outsir <- sir_1(f_beta=f_beta, S0 = 9990, I0 = 10, R0 = 0, times = times)
modCost(model = outsir, obs = datareal)
}
# Play with start values!!!
p <- rep(0.8, 20)
# e.g.: consider random start values
set.seed(123)
p <- runif(20, min=0.8, max=1.2)
# try other solvers, especially such with true box constraints
Fit <- modFit(f = sir_cost, p = p,
lower=rep(0.2, 20), upper=rep(5, 20), # box constraints
method="Port")
summary(Fit) # system is singular (that is what we expected)
# use another solver. Note: it takes a while
Fit <- modFit(f = sir_cost, p = p,
lower=rep(0.2, 20), upper=rep(5, 20), # box constraints
method="L-BFGS-B")
# goes in a surprisingly good direction
Fit$par
f_beta <- approxfun(x=times, y=Fit$par, method="constant", rule=2)
out2 <- sir_1(f_beta=f_beta, S0 = 9990, I0 = 10, R0 = 0, times = times)
# compare with data
plot(out, out2, obs=datareal)
# but see how unstable beta is
plot(out2)
Fitting a model with time dependent parameters may be a good idea or not, but if there are reasons to do so, I would suggest to restrict the number of parameters and to use a kind of smooth function.
The following example shows how to use a spline for this purpose, but it is of course also possible (and may be preferable) to use a function with some mechanistic meaning.
As a side effect, it was also possible to identify gamma instead of fixing it a-priori. Nevertheless, this is still a technical demonstration, but I leave the scientific question open, whether a time-dependent beta will make any sense.
library("FME")
sir_1 <- function(f_beta, gamma, S0, I0, R0, times) {
# the differential equations
sir_equations <- function(time, variables, parameters) {
beta <- parameters$f_beta(time)
gamma <- parameters$gamma
with(as.list(variables), {
dS <- -beta * I * S / 10000
dI <- beta * I * S / 10000 - gamma * I
dR <- gamma * I
# return vector of derivatives, and beta as auxiliary variable
return(list(c(dS, dI, dR), beta = beta))
})
}
initial_values <- c(S = S0, I = I0, R = R0)
# pass constant parameter and parameter function together as a list
parameters <- list(
f_beta = f_beta,
gamma = gamma
)
ode(initial_values, times, sir_equations, parameters)
}
times <- seq(0, 19)
datareal <- data.frame(
time = times,
I = c(10, 32, 120, 230, 480, 567, 1040, 1743, 2300,
2619, 3542, 4039, 4231, 6378,
5356, 4987, 3421, 2789, 1789, 1156)
)
## define parameter as a vector: gamma and beta
t_beta <- c(0, 12, 16, 19) # consider more or less knots
n_beta <- length(t_beta)
y_beta <- rep(1, n_beta)
p <- c(gamma = 1/5, y_beta) # combine all parameters in one vector
## a small helper function for parameter selection
select <- function(p, which, exclude = FALSE) {
parnames <- names(p)
p[(which == parnames) != exclude]
}
## check the helper function
select(p, "gamma")
select(p, "gamma", excl=TRUE)
## cost function, see ?modCost help page
sir_cost <- function (p) {
gamma <- select(p, "gamma")
y_beta <- select(p, "gamma", exclude = TRUE)
f_beta <- splinefun(x = t_beta, y = y_beta)
outsir <- sir_1(f_beta = f_beta, gamma = gamma,
S0 = 9990, I0 = 10, R0 = 0, times = times)
modCost(model = outsir, obs = datareal)
}
## model calibration, see ?modFit
Fit <- modFit(f = sir_cost, p = p,
# lower bound to avoid negative values of beta
lower = c(gamma = 0, rep(0.0, n_beta)),
# note: high sensitivity wrt. upper bound
upper = c(gamma=1, rep(2.0, n_beta)),
# an algorithm that supports box constraints
method = "Port")
## all parameters were identifiable
summary(Fit)
## smaller time steps to obtain a curves
times <- seq(0, 19, 0.1)
## split components of fitted parameters
gamma <- select(Fit$par, "gamma")
y_beta <- select(Fit$par, "gamma", exclude = TRUE)
out2 <- sir_1(f_beta = splinefun(x = t_beta, y = y_beta), gamma,
S0 = 9990, I0 = 10, R0 = 0, times = times)
## show fitted curves and compare simulation with data
## see ?plot.deSolve help page
plot(out2, obs = datareal, which = c("S", "R", "I", "beta"),
las = 1, obspar = list(pch = 16, col = "red"))
pval.dist.sim = function(n, sigma_x, rho, reps = 2500){
p = 5; sigma = sqrt(2)
beta = c(0.5, 0.5, 0, 0.25, 0)
mu = 10
# generate vector for pvals
pval.list = numeric(reps)
for(r in 1:reps){
# generate design matrix
X = gen_X(n = n, p = 5, rho = rho, sigma_x = sigma_x, mu = mu)
# generate the XtXinv portion of equation
XtXinv = qr.solve(crossprod(X))
sqrtXtXinv55 = sqrt(XtXinv[5,5])
y = X %*% beta + rnorm(n = n)
beta.hat = XtXinv %*% crossprod(X, y)
sE = sqrt(sum((y - X %*% beta.hat)^2)/(n-p))
t.val = beta.hat[3]/(sE * sqrtXtXinv55)
pval.list[r] = 2 * pt(-abs(t.val), df = n - p)
}
return(pval.list)
}
Above is the pval.dist simulation. I need to run this function to build my p.values to build my power curve
set.seed(3701)
# givens
p = 5; d = 2; mu = 10; sigmasqrd = 2; reps = 2500
n.list = seq(from=10, to=150, by=10)
# create a vector for the estimates of the power
est.power = numeric(length(n.list))
# create a vector for the left endpoints of the 95% CI
LB.list = numeric(length(n.list))
# create a vector for the right endpoints of the 95% CI
UB.list = numeric(length(n.list))
for(j in 1:length(n.list)){
# perform the test reps times
pvals = pval.dist.sim(n = n.list[j], sigma_x = 1.5, rho = 0.2, reps = reps )
# record the simulated estimate of the power
est.power[j] = mean(pvals<0.05)
# compute the 95% conf int
bounds = binom.test(x=sum(pvals < 0.05), n = reps, conf.level = 0.95)$conf.int[1:2]
LB.list[j] = bounds[1]
UB.list[j] = bounds[2]
}
## plot the power curve estimation
plot(n.list, est.power, t = "l", xlab = "n",ylab = "Power")
I am having the issue that my pvalues, when plugged in, are drastically low. I am getting values in the single digit percentage. What am I doing wrong?