Applying function to data.frame - r

I have a function with following which looks like
function(nsim = 10, maxN = 10000, mu = 0, sigma = 0.1, S0 = 100, endT = 1, K = 100){
nsim+maxN+mu+sigma+S0+endT+K
}
(The function here is just given for simplicity, the actual funtion is a simple Black Sholes pricing model)
Now, I have a data.frame:
df <- expand.grid(nsim = 10,
maxN = 10000,
mu = c(0.05, 0.10, 0.15),
sigma = c(0.2, 0.4, 0.6),
S0 = seq(80,120, by = 1),
endT = c(0.25, 0.50, 0.75),
K = 100,
sim = sprintf("Sim.%s", 1:10)
)
Which is just a collection of multiple values. Now the question is, how do I apply previous function to the data set to calculate a new column with values, but using the column values from each row as input?

You can add a column with mutate :
library(dplyr)
my_function <- function(nsim = 10, maxN = 10000, mu = 0, sigma = 0.1, S0 = 100, endT =
1, K = 100){
nsim+maxN+mu+sigma+S0+endT+K
}
df %>%
mutate(new_c = my_function(nsim, maxN, mu,sigma, S0, endT, K))

You can use mapply :
apply_fun <- function(nsim = 10, maxN = 10000, mu = 0, sigma = 0.1, S0 = 100, endT = 1, K = 100){
nsim+maxN+mu+sigma+S0+endT+K
}
df$price <- mapply(apply_fun, df$nsim, df$maxN, df$mu, df$sigma, df$S0, df$endT, df$K)
If you don't want to write each argument separately you can also use apply with do.call.
df$price <- apply(df[-ncol(df)], 1, function(x) do.call(apply_fun, as.list(x)))

Related

R code for simulating stochastic asset price path

Consider the following model for the evolution of an asset's price:
This what I have done (in R). I could not find a function that randomly outputs +1 or -1, so I decided to adapt the inbuilt rbinom function.
## This code is in R
rm(list = ls())
library(dplyr)
library(dint)
library(magrittr)
library(stats)
path =
function(T, mu, sigma, p, x0) {
x = rep(NA, T)
x[1] = x0
for(i in 2:T){
z = if_else(rbinom(1,1,p) == 0, -1, 1)
x[i] = x[i-1] * exp(mu + sigma*z)
}
return(x)
}
## Just some testing
x_sim = path(T = 4, mu = 0, sigma = 0.01, p = 0.5, x0 = 100)
## Actual answer
Np = 10000
mc = matrix(nrow = 17, ncol = Np)
for(j in 1:Np){
mc[,j] = path(T = 17, mu = 0, sigma = 0.01, p = 0.5, x0 = 100)
}
test = mc[2:nrow(mc), ] >= 100
sum_test = colSums(test)
comp = sum(sum_test >= 1)/length(sum_test)
prob = 1 - comp
Does this make sense? Any help/tips/advice would be much appreciated. Thanks!
Staying close to your code, I came up with this. Intuitively, if you think about it, the probability should be rather low due to the parameters and I get a probability of about 6.7% which is roughly what I get if I run your code with the parameters from the assignment.
simpath <- function(t, mu, sigma, p, x0, seed){
# set seed
if(!missing(seed)){
set.seed(seed)
}
# set up matrix for storing the results
res <- matrix(c(1:t, rep(NA, t*2)), ncol = 3)
colnames(res) <- c('t', 'z_t', 'x_t')
res[, 'z_t'] <- sample(c(1, -1), size = t, prob = c(p, 1-p), replace = TRUE)
res[1, 3] <- x0
for(i in 2:t){
res[i, 3] <- res[i-1, 3] * exp(mu+sigma*res[i, 2])
}
return(res)
}
x_sim <- simpath(t = 4, mu = 0, sigma = 0.01, p = 0.5, x0 = 100, seed = 123)
x_sim2 <- simpath(t = 36, mu = 0, sigma = 0.03, p = 0.5, x0 = 100, seed = 123)
## Actual answer
Np <- 100000
mc <- matrix(nrow = 36, ncol = Np)
for (j in 1:Np){
mc[, j] <- simpath(t = 36, mu = 0, sigma = 0.03, p = 0.5, x0 = 100)[, 3]
}
test <- mc > 100
sum_test <- colSums(test)
comp = sum(sum_test == 0)/length(sum_test)
prob = comp
> prob
[1] 0.06759

4d monte carlo integration R

How to integrate function using the Monte Carlo method
F(X[1],X[2],X[3],X[4])
Which depends on 4 variable by 4 dimensions?
I mean int_{0}^{1} int_{0}^{1} int_{0}^{1} int_{0}^{1} X[1]X[2]X[3]X[4] dX[1] dX[2] dX[3] dX[4]
UPD
function is
data1 = rnorm(100, 0, 1)
data2 = rnorm(100, 0.1, 0.5)
data3 = rnorm(100, 0.2, 0.8)
data4 = rnorm(100, 0.3, 0.9)
kernel1 = kdensity(data1,kernel = 'gaussian')
kernel2 = kdensity(data2,kernel = 'gaussian')
kernel3 = kdensity(data3,kernel = 'gaussian')
kernel4 = kdensity(data4,kernel = 'gaussian')
f <- function(X) {
return(X[1]*kernel1(X[1])*kernel2(X[2])*kernel3(X[3])*kernel4(X[4]))
}
and i want to integrate it
int_{0}^{1} int_{0}^{1} int_{0}^{1} int_{0}^{1} f dX[1] dX[2] dX[3] dX[4]
Due to your particular structure of integral expression, you can rewrite your nested integral into a product of integrals.
Thus, the solution below may be a example (with random seed set.seed(1)) for you:
g <- Vectorize(function(ker) {integrate(ker,0,1)}$value)
gE <- function(ker) {integrate(function(x) x*ker(x),0,1)}$value
res1 <- prod(c(gE(kernel1),g(c(kernel2,kernel3,kernel4))))
such that
> res1
[1] 0.01559343
Nested Integral: you need to rewrite your function f first, i.e.,
f <- function(x1,x2,x3,x4) {
return(x1*kernel1(x1)*kernel2(x2)*kernel3(x3)*kernel4(x4))
}
res2 <- integrate(Vectorize(function(x4)
integrate(Vectorize(function(x3,x4)
integrate(Vectorize(function(x2,x3,x4)
integrate(f,0,1,x2,x3,x4)$value),
0,
1,
x3,
x4)$value),
0,
1,
x4)$value),
0,
1)$value
such that
> res2
[1] 0.01559343

R. lapply multinomial test to list of dataframes

I have a data frame A, which I split into a list of 100 data frames, each having 3 rows (In my real data each data frame has 500 rows). Here I show A with 2 elements of the list (row1-row3; row4-row6):
A <- data.frame(n = c(0, 1, 2, 0, 1, 2),
prob = c(0.4, 0.5, 0.1, 0.4, 0.5, 0.1),
count = c(24878, 33605, 12100 , 25899, 34777, 13765))
# This is the list:
nest <- split(A, rep(1:2, each = 3))
I want to apply the multinomial test to each of these data frames and extract the p-value of each test. So far I have done this:
library(EMT)
fun <- function(x){
multinomial.test(x$count,
prob=x$prob,
useChisq = FALSE, MonteCarlo = TRUE,
ntrial = 100, # n of withdrawals accomplished
atOnce=100)
}
lapply(nest, fun)
However, I get:
"Error in multinomial.test(x$counts_set, prob = x$norm_genome, useChisq = F, :
Observations have to be stored in a vector, e.g. 'observed <- c(5,2,1)'"
Does anyone have a smarter way of doing this?
The results of split are created with names 1, 2 and so on. That's why x$count in fun cannot access it. To make it simpler, you can combine your splitted elements using the list function and then use lapply:
n <- c(0,1,2,0,1,2)
prob <- c(0.4, 0.5, 0.1, 0.4, 0.5, 0.1)
count <- c(24878, 33605, 12100 , 25899, 34777, 13765)
A <- cbind.data.frame(n, prob, count)
nest = split(A,rep(1:2,each=3))
fun <- function(x){
multinomial.test(x$count,
prob=x$prob,
useChisq = F, MonteCarlo = TRUE,
ntrial = 100, # n of withdrawals accomplished
atOnce=100)
}
# Create a list of splitted elements
new_list <- list(nest$`1`, nest$`2`)
lapply(new_list, fun)
A solution with dplyr.
A = data.frame(n = c(0,1,2,0,1,2),
prob = c(0.4, 0.5, 0.1, 0.4, 0.5, 0.1),
count = c(43, 42, 9, 74, 82, 9))
library(dplyr)
nest <- A %>%
mutate(pattern = rep(1:2,each=3)) %>%
group_by(pattern) %>%
dplyr::summarize(mn_pvals = multinomial.test(count, prob)$p.value)
nest

Optimize ensemble model weights with for loop

i have three model probability predictions mod.p1, mod.p2, mod.p3
require(Runuran)
mod.p1<- urnorm(n = 1000, mean = 0.2, sd = 0.35, lb = 0, ub = 1)
mod.p2<- urnorm(n = 1000, mean = 0.23, sd = 0.37, lb = 0, ub = 1)
mod.p3 = urnorm(n = 1000, mean = 0.19, sd = 0.39, lb = 0, ub = 1)
final outcomes is given in verdict ( Yes , No )
Verdict <- sample( c("yes","No"), 1000, replace=TRUE, prob=c(0.2,0.8) )
I have three weights for each model
i1=0.3
i2=0.3
i3=0.4
creating ensemble predictions and calculating auc
Ensemble=i1*mod.p1+i2*mod.p2+i3*mod.p3
require(ROCR)
Ensemble.pred = prediction(Ensemble, Verdict)
Ensemble.auc = as.numeric(performance(Ensemble.pred, "auc")#y.values)
Ensemble.auc # 0.52
Now how do i write a for loop to try out different values of i1 , i2 and i3 , and return best possible values of i1 , i2 and i3 which give me maximum AUC.
Adapted this version to apply the constraints and to explore in 0.1 increments. This uses a slightly brute force way to restrict the parameters but makes it a little more general
require(Runuran)
require(ROCR)
mod.p1<- urnorm(n = 1000, mean = 0.2, sd = 0.35, lb = 0, ub = 1)
mod.p2<- urnorm(n = 1000, mean = 0.23, sd = 0.37, lb = 0, ub = 1)
mod.p3 = urnorm(n = 1000, mean = 0.19, sd = 0.39, lb = 0, ub = 1)
## creating ensemble predictions and calculating auc
# make the examples reproducible
set.seed = 123
# expand.grid, in base R, returns a dataframe
grid <- expand.grid(
i1 = seq(0.1, 1, by=0.1),
i2 = seq(0.1, 1, by=0.1),
i3 = seq(0.1, 1, by=0.1))
# apply contraints to the grid
grid <- grid[(grid$i1 + grid$i2 + grid$i3) == 1,]
Ensembles <- list()
grid$auc <- NA
for (i in 1:nrow(grid)) {
# final outcomes is given in verdict ( Yes , No )
Verdict <- sample( c("yes","No"), 1000, replace=TRUE, prob=c(0.2,0.8) )
this_set <- grid[i,]
Ensemble=(this_set[["i1"]]*mod.p1) + (this_set[["i2"]]*mod.p2) + (this_set[["i3"]]*mod.p3)
Ensemble.pred = prediction(Ensemble, Verdict)
Ensemble.auc = as.numeric(performance(Ensemble.pred, "auc")#y.values)
grid$auc[i] <- Ensemble.auc
Ensembles[i] <- list(Ensemble)
}
# print the best value (note there is some other sourc
# of randomness so
# result svary form run to run)
grid[grid$auc == max(grid$auc),]
# i1 i2 i3 auc
# 8 0.8 0.1 0.1 0.554802
Got it , this is what i was looking at -
# creation of Dummy data
mod.p1<- urnorm(n = 1000, mean = 0.2, sd = 0.35, lb = 0, ub = 1)
mod.p2<- urnorm(n = 1000, mean = 0.23, sd = 0.37, lb = 0, ub = 1)
mod.p3 = urnorm(n = 1000, mean = 0.19, sd = 0.39, lb = 0, ub = 1)
Verdict <- sample( c("yes","No"), 1000, replace=TRUE, prob=c(0.2,0.8) )
#loop for optimizing the weights in ensemble
auc = 0
i1 = 0
i2 = 0
i3 = 0
for(i in seq(0,1,0.05)) {
for (j in seq(0,1-i,0.05)) {
k = 1-i-j
e = i1*mod.p1 + i2*mod.p2 + i3*mod.p3
Ensemble.pred = prediction(e, Verdict)
Ensemble.auc = as.numeric(performance(Ensemble.pred, "auc")#y.values)
if (Ensemble.auc>auc) {
auc = Ensemble.auc
i1 = i
i2 = j
i3 = k
pred = e
}
}
}
# get final values
auc #0.524
i1 #0
i2 #0.1
i3 #0.9
Your own proposed solution is actually pretty good I think. Only thing I don't understand is why you don't substitute the i,j,k values in the e = i1*mod.p1 + i2*mod.p2 + i3*mod.p3 line. I would adjust like below, and this works fine.
for(i in seq(0,1,0.03)) {
for (j in seq(0,1-i,0.03)) {
k = 1-i-j
e = i*mod.p1 + j*mod.p2 + k*mod.p3
Ensemble.pred = prediction(e, Verdict)
Ensemble.auc = as.numeric(performance(Ensemble.pred, "auc")#y.values)
if (Ensemble.auc>auc) {
auc = Ensemble.auc
i1 = i
i2 = j
i3 = k
pred = e
}
}
}
# get final values
auc #0.543
i1 #0.84
i2 #0.15
i3 #0.01

R Package Deepnet: Why sae_dnn_train does not work with large data sets

I am trying sae.dnn.train() with 5000 cases, 55-inputs and 3 hidden layers.
Why function nn.predict returns NaN? (vector)
I am using the following command
Nrow <-5000
Ncol <- 55
v <- c(rnorm(Nrow*Ncol,1, 0.5))
x <- matrix(v, nrow=Nrow, ncol=Ncol)
y <- c(rep(1, Nrow/2), rep(0, Nrow/2))
dnn <- sae.dnn.train(x, y, hidden = c(100,90,80),activationfun = "tanh", learningrate = 0.6, momentum = 0.5, learningrate_scale = 1.0,output = "sigm", sae_output = "linear", numepochs = 10, batchsize = 100, hidden_dropout = 0, visible_dropout = 0)
yy <- nn.predict(dnn, x)

Resources