I am trying to simulate cell uptake in R, having ported a model from Berkeley Madonna. The model is comprised of several constants and differential equations to calculate amounts and concentrations. A portion of the code is listed:
library(deSolve)
fb = 0.0510
Km = 23.5
Pdif = 0.429
Vmax = 270
Vol_cell = 9.33
Vol_media = 150
S = 10 #concentration of dosing media
yini = c(Amt_media=(S*Vol_media)-(S*fb*Vol_cell),
Amt_cell=S*fb*Vol_cell,
Amt_total=S*Vol_media,
Con_media=S-(S*fb),
Con_cell=S*fb)
Uptake = function(t, y, p){
dy1 = (- (Pdif * y[1]) + (Pdif * y[2]) - ((Vmax * y[4])/(Km + y[4])))
dy2 = (+ (Pdif * y[1]) - (Pdif * y[2]) + ((Vmax * y[4])/(Km + y[4])))
dy3 = dy1 + dy2
dy4 = dy1 / Vol_media
dy5 = dy2 / Vol_cell
list(c(dy1, dy2, dy3, dy4, dy5))}
times1 = seq(from=0, to=15, by=0.01)
out1 = ode(y=yini, times=times1, func=Uptake, parms=NULL, method="rk4")
The rest of the code is for output to dataframes and plotting. My question then is how to have the code structured to use "S" as a list of several concentrations such that each concentration can be applied to the differential equations (essentially giving me an out1 for S1, out2 for S2, etc, that can then be passed onto a dataframe)? In Berkeley Madonna this was achieved by writing over 35 differential equations, though I'd like to use a simplified approach in R if possible.
The only part where S is used is in the initialization of the yini values. Basically we just need to move that part and the part that runs ode with those values into a new function. Then you can call that function for what ever values you want. For example
#set up
library(deSolve)
fb <- 0.0510
Km <- 23.5
Pdif <- 0.429
Vmax <- 270
Vol_cell <- 9.33
Vol_media <- 150
Uptake <- function(t, y, p){
dy1 = (- (Pdif * y[1]) + (Pdif * y[2]) - ((Vmax * y[4])/(Km + y[4])))
dy2 = (+ (Pdif * y[1]) - (Pdif * y[2]) + ((Vmax * y[4])/(Km + y[4])))
dy3 = dy1 + dy2
dy4 = dy1 / Vol_media
dy5 = dy2 / Vol_cell
list(c(dy1, dy2, dy3, dy4, dy5))}
times1 <- seq(from=0, to=15, by=0.01)
# function with S as a parameter
runConc <- function(S) {
yini <- c(Amt_media=(S*Vol_media)-(S*fb*Vol_cell),
Amt_cell=S*fb*Vol_cell,
Amt_total=S*Vol_media,
Con_media=S-(S*fb),
Con_cell=S*fb)
ode(y=yini, times=times1, func=Uptake, parms=NULL, method="rk4")
}
#run for concentrations 10,20,30
out <- lapply(c(10,20,30), runConc)
This will result in a list object with the results for each concentration. So out[[1]] is the result for S=10, out[[2]] is S=20, etc. We can see the first few lines of each of the results with
lapply(out, head, 3)
# [[1]]
# time Amt_media Amt_cell Amt_total Con_media Con_cell
# [1,] 0.00 1495.242 4.75830 1500 9.490000 0.510000
# [2,] 0.01 1488.103 11.89710 1500 9.442408 1.275145
# [3,] 0.02 1481.028 18.97216 1500 9.395241 2.033457
#
# [[2]]
# time Amt_media Amt_cell Amt_total Con_media Con_cell
# [1,] 0.00 2990.483 9.51660 3000 18.98000 1.020000
# [2,] 0.01 2976.550 23.44980 3000 18.88711 2.513377
# [3,] 0.02 2962.739 37.26072 3000 18.79504 3.993646
#
# [[3]]
# time Amt_media Amt_cell Amt_total Con_media Con_cell
# [1,] 0.00 4485.725 14.27490 4500 28.47000 1.53000
# [2,] 0.01 4465.153 34.84653 4500 28.33286 3.73489
# [3,] 0.02 4444.761 55.23920 4500 28.19690 5.92060
Related
I have to write a code for this equations to find μ_0 and σ_0. equations
Here, Φ[.] is the cumulative standard Normal distribution. There are given values for σ = 2, E[M] = 10 and p = Pr[8 ≤ M ≤12] = 2/3.
My results should be μ_0 ≈ 0.28 and σ_0 ≈ 0.21, but something is wrong with my functions, i think. Can you pls help me?
sigma <- 2
E_M <- 10
Pr <- 2/3
a <- 8
b <- 12
#From first equation we take log(E[M]) = mu_0 + 1/2sigma^2 + 1/2sigma_0^2,
#As sigma = 2 and E[M] = 10 -> mu_0 = 0.303 - 1/2 sigma_0^2
fun <- function(sigma_0)
{pnorm((log(b) - 2 - 0.303 + 1/2 * sigma_0^2)/sigma_0, mean = 0.303 - 1/2 * sigma_0^2, sd = sigma_0) -
pnorm((log(a) - 2 -0.303 + 1/2 * sigma_0^2)/sigma_0, mean = 0.303 - 1/2 * sigma_0^2, sd = sigma_0) - Pr}
sigma_0 <- seq(0.1, 2, 0.05)
uniroot(fun, upper = 2, lower = 0.1)
I am trying to use a for-loop as a repeat counter to add summary data to a test sample. I have tried to use a data.frame, matrix, and a vector push my data out of the for loop and populate a table. The best I have got is filling one complete column in a vector and completing all columns but one row in a data frame.
#try empty vector to populate
large.sample.df <- vector(mode = "double", length = 1000)
#try matrix to populate
large.matrix <- matrix(nrow = 1000, ncol = 3)
matrix.names <- c("mean", "lwr", "upr")
colnames(large.matrix) <- matrix.names
#Try dataframe to populate
large.df <- data.frame(mean="", lwr="", upr="")
#set total length
n <- length(large.sample.df)
#use functions to calculate confidence interval
lwr.ci <- function(a) (mean(a) - 1.96 * (sd(a)/sqrt(length(a))))
upp.ci <- function(a) (mean(a) + 1.96 * (sd(a)/sqrt(length(a))))
#Start new seed count
set.seed(1234)
#begin for loop for mean, lwr, upr CI
for (i in 1:n) {
large.sample <- rgamma(n = 1000, shape = 4, rate = 2)
large.df$mean[i] <- mean(large.sample)
large.df$lwr[i] <- lwr.ci(large.sample)
large.df$upr[i] <- upp.ci(large.sample)
}
Here are two ways to get what you want. First we should distinguish between the sample size and the number of samples:
set.seed(1234)
n <- 1000
samples <- 10 # Keep this small for testing and then increase it
s <- 4
r <- 2
First your loop approach:
results <- data.frame(mean=NA, lwr=NA, upr=NA) # Not "" which makes the variables character strings
set.seed(1234)
for (i in 1:samples) {
x <- rgamma(n, shape = s, rate = r)
mn <- mean(x)
sder <- sd(x)/sqrt(n)
lwr <- mn - 1.96 * sder
upr <- mn + 1.96 * sder
results[i, ] <- c(mn, lwr, upr)
}
results
# mean lwr upr
# 1 2.015193688 1.952431714 2.077955663
# 2 2.024218250 1.962404608 2.086031891
# 3 2.008401293 1.948363928 2.068438658
# 4 1.993061142 1.932020588 2.054101696
# 5 1.975824831 1.912961486 2.038688176
# 6 1.983761126 1.923583927 2.043938325
# 7 1.983166350 1.924890819 2.041441880
# 8 1.975453269 1.915336118 2.035570420
# 9 1.976118333 1.915025748 2.037210918
# 10 2.044088839 1.983435628 2.104742050
Now using replicate
confint <- function(n, s, r) {
x <- rgamma(n, shape = s, rate = r)
mn <- mean(x)
sder <- sd(x)/sqrt(n)
lwr <- mn - 1.96 * sder
upr <- mn + 1.96 * sder
return(c(mean=mn, lwr=lwr, upr=upr))
}
confint(n, s, r) # Test the function
# mean lwr upr
# 1.974328366 1.914003710 2.034653023
set.seed(1234)
results <- replicate(samples, confint(n, s, r))
results <- t(results)
results
# mean lwr upr
# [1,] 2.015193688 1.952431714 2.077955663
# [2,] 2.024218250 1.962404608 2.086031891
# [3,] 2.008401293 1.948363928 2.068438658
# [4,] 1.993061142 1.932020588 2.054101696
# [5,] 1.975824831 1.912961486 2.038688176
# [6,] 1.983761126 1.923583927 2.043938325
# [7,] 1.983166350 1.924890819 2.041441880
# [8,] 1.975453269 1.915336118 2.035570420
# [9,] 1.976118333 1.915025748 2.037210918
# [10,] 2.044088839 1.983435628 2.104742050
Both approaches agree.
I'm trying to solve a non-linear optimization problem with constraints. I wrote the code (see below) to minimize sum of squared errors subject to constraints. I need to find a[1] and a[2] so that they sum up to 1. I provide a vector of initial guesses, the code runs but in the end gives me nothing but again my initial guesses. What do I do wrong? Thank you.
MIS <- c(0.0156, 0.0087, 0.0468)
EDF <- c(0.0008, 0.0088, 0.0059)
QFM <- data.frame(Factor1 = c(100,100,50), Factor2 = c(50,25,100))
qt.mean <- mean(EDF)
qt.sd <- sd(EDF)
z.qt <- (qnorm(EDF) - qt.mean)/qt.sd
weight <- 0.7
alpha <- -2.7
beta <- 1.0
objfun <- function(a) {
RQL <- a[1] * QFM$Factor1 + a[2] * QFM$Factor2
z.ql <- (RQL - mean(RQL))/sd(RQL)
corr.factor <- cor(z.qt, z.ql)
denom <- sqrt(weight ^ 2 + (1 - weight)^2 + 2 * corr.factor * weight * (1- weight))
z.cs <- 1/denom * (weight * z.qt + (1-weight) * z.ql)
z.fs <- alpha + beta * z.cs
return(sum((MIS - pnorm(z.fs))^2))
}
eqn <- function (a) {sum(a)}
solnp(c(0.5,0.5), fun = objfun, eqfun = eqn, eqB = 1, LB = c(0,0), UB = c(1,1))
Iter: 1 fn: 0.002509 Pars: 0.50000 0.50000
solnp--> Completed in 1 iterations
$pars
[1] 0.5 0.5
$convergence
[1] 0
$values
[1] 0.00250929 0.00250929
$lagrange
[,1]
[1,] 0
$hessian
[,1] [,2]
[1,] 1 0
[2,] 0 1
$ineqx0
NULL
$nfuneval
[1] 35
$outer.iter
[1] 1
$elapsed
Time difference of 0.02330089 secs
$vscale
[1] 0.00250929 0.60000000 1.00000000 1.00000000
I tried to implement the AdaBoost algorithm of Freund and Schapire as close to the original as possible (see p. 2 here: http://rob.schapire.net/papers/explaining-adaboost.pdf):
library(rpart)
library(OneR)
maxdepth <- 1
T <- 100 # number of rounds
# Given: (x_1, y_1),...,(x_m, y_m) where x_i element of X, y_i element of {-1, +1}
myocarde <- read.table("http://freakonometrics.free.fr/myocarde.csv", head = TRUE, sep = ";")
#myocarde <- read.table("data/myocarde.csv", header = TRUE, sep = ";")
y <- (myocarde[ , "PRONO"] == "SURVIE") * 2 - 1
x <- myocarde[ , 1:7]
m <- nrow(x)
data <- data.frame(x, y)
# Initialize: D_1(i) = 1/m for i = 1,...,m
D <- rep(1/m, m)
H <- replicate(T, list())
a <- vector(mode = "numeric", T)
set.seed(123)
# For t = 1,...,T
for(t in 1:T) {
# Train weak learner using distribution D_t
# Get weak hypothesis h_t: X -> {-1, +1}
data_D_t <- data[sample(m, 10*m, replace = TRUE, prob = D), ]
H[[t]] <- rpart(y ~., data = data_D_t, maxdepth = maxdepth, method = "class")
# Aim: select h_t with low weighted error: e_t = Pr_i~D_t[h_t(x_i) != y_i]
h <- predict(H[[t]], x, type = "class")
e <- sum(h != y) / m
# Choose a_t = 0.5 * log((1-e) / e)
a[t] <- 0.5 * log((1-e) / e)
# Update for i = 1,...,m: D_t+1(i) = (D_t(i) * exp(-a_t * y_i * h_t(x_i))) / Z_t
# where Z_t is a normalization factor (chosen so that Dt+1 will be a distribution)
D <- D * exp(-a[t] * y * as.numeric(h))
D <- D / sum(D)
}
# Output the final hypothesis: H(x) = sign(sum of a_t * h_t(x) for t=1 to T)
newdata <- x
H_x <- sapply(H, function(x) as.numeric(as.character(predict(x, newdata = newdata, type = "class"))))
H_x <- t(a * t(H_x))
pred <- sign(rowSums(H_x))
#H
#a
eval_model(pred, y)
##
## Confusion matrix (absolute):
## Actual
## Prediction -1 1 Sum
## -1 0 1 1
## 1 29 41 70
## Sum 29 42 71
##
## Confusion matrix (relative):
## Actual
## Prediction -1 1 Sum
## -1 0.00 0.01 0.01
## 1 0.41 0.58 0.99
## Sum 0.41 0.59 1.00
##
## Accuracy:
## 0.5775 (41/71)
##
## Error rate:
## 0.4225 (30/71)
##
## Error rate reduction (vs. base rate):
## -0.0345 (p-value = 0.6436)
As can be seen the accuracy of the model is horrible compared to other AdaBoost implementations, e.g.:
library(JOUSBoost)
## JOUSBoost 2.1.0
boost <- adaboost(as.matrix(x), y, tree_depth = maxdepth, n_rounds = T)
pred <- predict(boost, x)
eval_model(pred, y)
##
## Confusion matrix (absolute):
## Actual
## Prediction -1 1 Sum
## -1 29 0 29
## 1 0 42 42
## Sum 29 42 71
##
## Confusion matrix (relative):
## Actual
## Prediction -1 1 Sum
## -1 0.41 0.00 0.41
## 1 0.00 0.59 0.59
## Sum 0.41 0.59 1.00
##
## Accuracy:
## 1 (71/71)
##
## Error rate:
## 0 (0/71)
##
## Error rate reduction (vs. base rate):
## 1 (p-value < 2.2e-16)
My question
Could you please give me a hint what went wrong in my implementation? Thank you
Edit
The final and corrected code can be found in my blog post: Understanding AdaBoost – or how to turn Weakness into Strength
There are quite a few contributing factors as to why your implementation is not working.
You were not using rpart correctly. Adaboost implementation does not mention upsampling with the weights - but rpart itself can accept weights. My example below shows how rpart should be used for this purpose.
Calculation of the weighted error was wrong. You were calculating the error proportion (number of samples calculated incorrectly divided by number of samples). Adaboost uses the sum of the weights that were incorrectly predicted (sum(D[y != yhat])).
Final predictions seemed to be incorrect too, I just ended up doing a simple loop.
Next time I recommend diving into the source code the the other implementations you are comparing against.
https://github.com/cran/JOUSBoost/blob/master/R/adaboost.R uses almost identical code to my below example - and probably would have helped guide you originally.
Additionally using T as a variable could potentially interfere with the logical TRUE and it's shorthand T, so I'd avoid it.
### packages ###
library(rpart)
library(OneR)
### parameters ###
maxdepth <- 1
rounds <- 100
set.seed(123)
### data ###
myocarde <- read.table("http://freakonometrics.free.fr/myocarde.csv", head = TRUE, sep = ";")
y <- (myocarde[ , "PRONO"] == "SURVIE") * 2 - 1
x <- myocarde[ , 1:7]
m <- nrow(x)
dataset <- data.frame(x, y)
### initialisation ###
D <- rep(1/m, m)
H <- list()
a <- vector(mode = "numeric", length = rounds)
for (i in seq.int(rounds)) {
# train weak learner
H[[i]] = rpart(y ~ ., data = dataset, weights = D, maxdepth = maxdepth, method = "class")
# predictions
yhat <- predict(H[[i]], x, type = "class")
yhat <- as.numeric(as.character(yhat))
# weighted error
e <- sum(D[yhat != y])
# alpha coefficient
a[i] <- 0.5 * log((1 - e) / e)
# updating weights (D)
D <- D * exp(-a[i] * y * yhat)
D <- D / sum(D)
}
# predict with each weak learner on dataset
y_hat_final <- vector(mode = "numeric", length = m)
for (i in seq(rounds)) {
pred = predict(H[[i]], dataset, type = "class")
pred = as.numeric(as.character(pred))
y_hat_final = y_hat_final + (a[i] * pred)
}
pred <- sign(y_hat_final)
eval_model(pred, y)
> eval_model(pred, y)
Confusion matrix (absolute):
Actual
Prediction -1 1 Sum
-1 29 0 29
1 0 42 42
Sum 29 42 71
Confusion matrix (relative):
Actual
Prediction -1 1 Sum
-1 0.41 0.00 0.41
1 0.00 0.59 0.59
Sum 0.41 0.59 1.00
Accuracy:
1 (71/71)
Error rate:
0 (0/71)
Error rate reduction (vs. base rate):
1 (p-value < 2.2e-16)
> Name Date Close CP ttmDaysW ttm Strike Fut Wibor lambda omega alpha beta sigma
1 OW20C1330 2011-01-19 0.60 c 42 0.1673307 3300 2768 0.0425 0.03985676 1.205098e-06 0.05403404 0.9426635 0.010935144
2 OW20C1330 2011-02-16 0.21 c 22 0.0876494 3300 2703 0.0435 0.03285167 5.852091e-07 0.05208226 0.9462142 0.008209948
3 OW20C2150 2011-12-08 745.65 c 71 0.2828685 1500 2233 0.0499 0.05490974 1.213260e-06 0.06837361 0.9296792 0.018583414
4 OW20C2150 2011-12-09 720.80 c 70 0.2788845 1500 2262 0.0499 0.05119041 1.212956e-06 0.06813476 0.9299286 0.019143222
Hi, I created the above dataframe in R which has above 20000 rows. I wrote a code to compute theoretical prices of Options assuming that volatility follow a GARCH(1,1) process. The code works fine but is VERY sluggish. I wonder weather there is any chance to speed it up or Vectorize? I've tried to work it out, but I failed as a beginning R user.Computation is done by Monte Caro Simulation. OW is my Data.Frame
#Monte Carlo Garch(1,1)
nsim=10000
for (i in 1:nrow(OW)){
iopt<-ifelse(OW$CP[i]=="c",1,-1)
sum=0
for (j in 1:nsim){
Sigma2t<-(OW$sigma[i])^2
Eps<-rnorm(1)*OW$sigma[i]
sumSigma2t=0
sumEps=0
for (k in 1:OW$ttmDaysW[i]){
Sigma2t= OW$omega[i] +OW$alpha[i]*(Eps-OW$lambda[i]*sqrt(Sigma2t))^2+OW$beta[i]*Sigma2t
Eps <- rnorm(1)*sqrt(Sigma2t)
sumEps=sumEps+Eps
sumSigma2t = sumSigma2t + Sigma2t
}
Ft<-OW$Fut[i]*exp(-0.5*sumSigma2t+sumEps)
payoff <- max(c(iopt * (Ft - OW$Strike[i]), 0))
sum<-sum+payoff
}
OW$G[i] = exp(-OW$Wibor[i] * OW$ttm[i]) * sum / nsim
}
I have found only this help on my question:Simulation of GARCH in R
Generally, indexing data frames (which you do a lot), takes a lot of time. Consider vectorizing this. Especially since you are computing all those OW$something[i]'s more than 200 million times (since all the nested for loops) where they actually only have to be called 10,000 times (nsim times). See if this runs any quicker.
nsim=10000
for (i in 1:nrow(OW)){
iopt<-ifelse(OW$CP[i]=="c",1,-1)
sum=0
OWSigma <- OW$sigma[i]
OWOmega <- OW$omega[i]
OWAlpha <- OW$alpha[i]
OWLambda <- OW$lambda[i]
OWBeta <- OW$beta[i]
OWFut <- OW$Fut[i]
OWStrike <- OW$Strike[i]
OWTtmDaysW <- OW$ttmDaysW[i]
for (j in 1:nsim){
Sigma2t<-(OWSigma)^2
Eps<-rnorm(1)*OWSigma
sumSigma2t=0
sumEps=0
for (k in 1:OWTtmDaysW){
Sigma2t= OWOmega +OWAlpha*(Eps-OWLambda*sqrt(Sigma2t))^2+OWBeta*Sigma2t
Eps <- rnorm(1)*sqrt(Sigma2t)
sumEps=sumEps+Eps
sumSigma2t = sumSigma2t + Sigma2t
}
Ft<-OWFutexp(-0.5*sumSigma2t+sumEps)
payoff <- max(c(iopt * (Ft - OWStrike, 0))
sum<-sum+payoff
}
OW$G[i] = exp(-OW$Wibor[i] * OW$ttm[i]) * sum / nsim
}
It's vectorized solution to my question!
#Monte Carlo Garch(1,1)
N=10000
system.time(for (i in 1:nrow(OW)){
iopt<-ifelse(OW$CP[i]=="c",1,-1)
h0<- (OW$sigma[i])^2
omega <- OW$omega[i]
alpha1 <- OW$alpha[i]
lambda <- OW$lambda[i]
beta1 <- OW$beta[i]
Fu <- OW$Fut[i]
Str <- OW$Strike[i]
horizon <- OW$ttmDaysW[i]
Wibor<-OW$Wibor[i]
ttm<-OW$ttm[i]
ret <- et <- ht <- matrix(NA, nc = horizon, nr = N)
zt <- matrix(rnorm(N * horizon, 0, 1), nc = horizon)
hB<-matrix(rep(h0,N),nr=N)
eB<-matrix(rnorm(N, 0, 1), nc=1) * sqrt(hB)
Fut<-matrix(rep(Fu,N),nr=N)
Strike<-matrix(rep(Str,N),nr=N)
for(j in 1:horizon){
ifelse(j>1,
ht[, j ] <- omega + alpha1 * (et[, j-1]-lambda*sqrt(ht[, j-1])) ^ 2 + beta1 * ht[, j-1],
ht[, j ] <- omega + alpha1 * (eB -lambda*sqrt(hB))^ 2 + beta1 * hB
)
et[, j] <- zt[, j] * sqrt(ht[, j])
}
Ft<-Fut*exp(-0.5*rowSums(ht)+rowSums(et))
payoff<-pmax(iopt * (Ft - Strike),0)
OW$G[i] = exp(-Wibor *ttm) * sum(payoff) / N
}
)