Generating random numbers from the Laplace distribution - r

I have been trying to generate random numbers from the double exponential(Laplace) distribution. I am at a point I can write the code anymore. Any help would be appreciated. The code below is what I have written.
rlaplace = function(u,a,b){
u = c(runif(ns))
for(i in 1:ns){
if(u[i] <= 0.5){
X = a+b*log(2*u)
} else{
X = a-b*log(2*(1-u))
}
}
X
}
z1 = rlaplace(u,a,b)

From the Probability distributions CRAN Task View, there are several packages that already implement the Laplace distribution, notably distr and Runuran.
So you should be able to install distr, for example, and do something like :
library(distr)
D <- DExp(rate = 1)
r(D)(1)
Code taken from the examples of the DExp-class help page.

Try this?
#Using pdf for a laplace RV:
#F(y) = 1/sqrt(2*sigma^2)*exp(sqrt(2)*abs(y-mu)/sigma)
rlaplace = function(n,mu,sigma){
U = runif(n,0,1)
#This will give negative value half of the time
sign = ifelse(rbinom(n,1,.5)>.5,1,-1)
y = mu + sign*sigma/sqrt(2)*log(1-U)
y
}

Related

How can I force r optim to run more iterations?

R Optim stops iterating earlier than I want. I use method="L-BFGS-B" (as I need different bounds for different parameters). I know I can set the maximum of iterations via 'control'>'maxit', but optim does not reach the max. I guess 'control'>'pgtol' and/or 'factr' should help, but apparently they do not.
I do the same optimisation with Excel solver Add-In and therefore I know that R stops iterating too early.
Here is my sample data and code:
dsg <- as.data.frame(cbind(c(0:47)
,c(0.402469136,0.368944099,0.375477721,0.391121435,0.36741817,0.366685299,0.373907486,0.409429755,0.383399692,0.412436098,0.389864409,0.411901702,0.379012346,0.383269431,0.372778178,0.397308798,0.407005188,0.396770412,0.378525076,0.38084766,0.378051956,0.376836815,0.351144888,0.387655975,0.415815896,0.39851447,0.384345349,0.40061633,0.370402697,0.373590499,0.379474943,0.378865913,0.382395269,0.365808609,0.383106843,0.35946353,0.361037542,0.36077482,0.384418935,0.362583824,0.385405581,0.348344335,0.358934922,0.379079876,0.391434446,0.354347971,0.361197833,0.372232682)
,c(0.114814815,0.118012422,0.132153971,0.137563457,0.113412879,0.113819587,0.117105297,0.117003116,0.132768529,0.114580427,0.120072809,0.116621127,0.124691358,0.118103399,0.130523309,0.13783449,0.114587233,0.10441059,0.113704754,0.109561299,0.108298377,0.118025013,0.125106438,0.106440408,0.107985517,0.127293523,0.130639958,0.113993233,0.111258799,0.113139383,0.114220436,0.094720217,0.094661712,0.119814534,0.100816305,0.10081601,0.092889949,0.100408522,0.090772039,0.090377762,0.084900005,0.092355162,0.112520582,0.097859676,0.087209055,0.1041137,0.112856553,0.090746204)
,c(9.18031601,11.09227687,9.83844379,9.64580639,10.22514748,10.23337748,10.40043161,11.42924699,13.81486345,14.13952435,13.61129849,10.83903702,6.88640782,9.04216056,12.02954886,10.72787232,9.4425759,11.13168511,10.81846319,7.78656007,9.72518025,13.7847261,12.33280119,9.26193982,9.44348187,9.84196161,11.74926408,12.84258627,11.7028168,10.15912189,9.40823422,10.91680175,13.23648902,16.4693486,14.21047788,9.13496124,7.57774394,8.51722165,11.76416064,10.1919151,11.73247567,9.81560667,8.74626473,8.28651636,12.22919798,14.78829048,12.31028928,7.84778185)
,c(32.81570128,31.82592469,38.98876493,36.76658375,38.44461603,25.63108488,24.05370986,29.96483401,35.41164119,38.10191701,40.08138389,40.88474396,30.11146104,28.32714529,38.10802983,33.06030547,30.26582152,30.81661426,19.32980669,22.1124164,39.01648731,36.54290113,42.37598936,37.80545142,35.41146597,38.03598825,44.00978984,39.49187432,42.19555313,46.46831371,28.62873468,29.05176428,53.9939235,54.82043874,46.26856583,46.39431442,39.83112353,40.50502621,39.48027012,37.93228955,42.59635965,35.06031045,30.37208461,28.13106896,38.42397418,38.90616994,42.98276083,39.79207105)
,c(3470.0,3927.0,4996.8,3148.7,3882.4,4579.9,4191.0,4328.4,4059.6,3667.6,4074.3,4220.0,3698.0,3660.2,4717.6,3687.2,4259.2,4098.1,4297.1,4321.0,3761.5,3586.7,3952.5,3630.0,3803.1,3863.9,4457.1,4280.6,4083.1,4443.2,4534.8,4510.7,3839.3,4408.0,4404.3,3633.3,4153.2,4129.3,4648.0,4947.3,5136.5,4491.6,5758.0,5423.6,5177.8,5312.8,5241.7,4551.9)
))
vs <- names(dsg)[1:5]
cr <- names(dsg)[6]
attach(dsg)
#a linear regression
minL.RSS <- function(par) {
Zws <- par[1]
for(u in 1:length(vs)) {
Zws <- Zws + par[u+1] * (get(vs[u]) ^ 1)
}
Zws <- (Zws - get(cr))^2
sum(Zws)
}
#same linear regression adding an exponent
minE.RSS <- function(par) {
Zws <- par[1]
for(u in 1:length(vs)) {
Zws <- Zws + par[u+1] * (get(vs[u]) ^ par[u+1+length(vs)])
}
Zws <- (Zws - get(cr))^2
sum(Zws)
}
#running optim for the simple regression
resultL <- optim(par = c(0,rep(0,length(vs))), fn = minL.RSS,
method="L-BFGS-B"
, lower = c(0,-Inf,0,-Inf,-Inf,-Inf)
, upper = c(Inf,rep(c(Inf),length(vs)))
, control = list(maxit = 4000)
)
resultL
#running optim for the regression with exponent, using the parameter start values found with the model before - but they dont change (but should)
resultE <- optim(par = c(resultL$par[1],resultL$par[2:(length(vs)+1)],rep(1,length(vs))), fn = minE.RSS,
method="L-BFGS-B"
, lower = c(0,-Inf,0,-Inf,-Inf,-Inf,rep(c(0.1),length(vs)))
, upper = c(Inf,rep(c(Inf),length(vs)),rep(c(1),length(vs)))
, control = list(maxit = 4000, pgtol = 1e-100)
)
resultE
#using initial parameter values I received from same formula with Excel solver Add-In - the result is getting better=smaller
resultX <- optim(par = c(0,31,0,3500,2860,-31,1,1,1,0.17,1), fn = minE.RSS,
method="L-BFGS-B"
, lower = c(0,-Inf,0,-Inf,-Inf,-Inf,rep(c(0.1),length(vs)))
, upper = c(Inf,rep(c(Inf),length(vs)),rep(c(1),length(vs)))
, control = list(maxit = 4000, pgtol = 1e-100)
)
resultX
detach(dsg)
resultX$value
[1] 8109259
resultL$value
[1] 8175660
resultE$value
[1] 8175660
I tried pgtol and factr with very small and very big values (1e100 / 1e-100), but resultE does not get better than resultL. And I know from Excel solver Add-In that there is a better solution (resultX).
How can I force optim to run more iterations and/or find a solution as good as Excel solver Add-In does?
It seems like factr, ndeps and maxit have been limiting in your case. You can get pretty close to resultX$value when you do:
resultE2 <- optim(par = c(resultL$par[1],resultL$par[2:(length(vs)+1)],rep(1,length(vs))), fn = minE.RSS,
method="L-BFGS-B"
, lower = c(0,-Inf,0,-Inf,-Inf,-Inf,rep(c(0.1),length(vs)))
, upper = c(Inf,rep(c(Inf),length(vs)),rep(c(1),length(vs)))
, control = list(maxit = 1e4, pgtol = 0, ndeps = rep(1e-6, 11), factr=0))
resultE2$value
[1] 8109250

Optimization in R: constrOptim not converging

I am looking at a way to optimize a function in R having several constraints. That's a piece of cake using Excel but I cannot make it work in R.
What I want is to find the set of parameters that maximizes a function under the contraints that parameters should be non-increasing and that the sum of parameters x_i ...x_max is bound for each i.
I wrote a simple example. It works for two parameters but not for three. For three parameters it looks like the optimization procedure is not doing anything.
In real-life cases I would like to use between 12 and 120 parameters so I am a bit worried it does not work with 3 ...
So any help is welcome ... and thanks in advance for the (eventual) reply.
The code for two parameters is (working)
Omp <- function (p)
{
calc <- -p[1]-2*p[2]
return (calc)
}
ui1 <-matrix(c(-1,0,1,-1,-1,-1),ncol =2)
ci1 <-c(-100,-70,0)-0.0000001
init1 <-c(100,0)
(ui1 %*% init1) - ci1
tst <- constrOptim(init1, Omp, grad = NULL, ui = ui1, ci = ci1)
The output is conform expectations:
> sum(tst$par)
[1] 100
> tst$par
[1] 50 50
The code for 3 parameters is (not working)
Opm <- function (p)
{
calc <- -p[1]-2*p[2]-3*p[3]
print(calc)
return (calc)
}
ui1 <-matrix(c(-1,0,0,1,0,-1,-1,0,-1,1,-1,-1,-1,0,-1),ncol =3)
ci1 <-c(-100,-70,0,0,0)-0.0000001
init1 <-c(65,35,0)
(ui1 %*% init1) - ci1
tst <- constrOptim(init1, Opm, grad = NULL, ui = ui1, ci = ci1)
It runs but always remains close to the initial guess.
> tst$par
[1] 6.500000e+01 3.500000e+01 9.685755e-08
Someone else may be able to provide more insight, but your starting values may not be far enough inside the feasible region.
As you stated, this does not produce the expected result:
ui1 <-matrix(c(-1,0,0,1,0,-1,-1,0,-1,1,-1,-1,-1,0,-1),ncol =3)
ci1 <-c(-100,-70,0,0,0)-0.0000001
init1 <-c(65,35,0)
(ui1 %*% init1) - ci1
tst <- constrOptim(init1, Opm, grad = NULL, ui = ui1, ci = ci1)
round(tst$par)
[1] 65 35 0
But adjusting the small offset in ci1 slightly, I get a different result - similar to your expectations in your first example.
ci1 <-c(-100,-70,0,0,0)-0.00001
init1 <-c(65,35,0)
(ui1 %*% init1) - ci1
tst <- constrOptim(init1, Opm, grad = NULL, ui = ui1, ci = ci1)
round(tst$par)
[1] 50 50 0

how to solve negative subscript error in R?

I am trying to normalize the data frame before prediction but I get this error :
Error in seq_len(nrows)[i] :
only 0's may be mixed with negative subscripts
Called from: top level
Here is my code :
library('caret')
load(file = "some dataset path here")
DummyDataSet = data
attach(DummyDataSet)
foldCount = 10
classifyLabels = DummyDataSet$ClassLabel
folds = createFolds(classifyLabels,k=foldCount)
for (foldIndex in 1:foldCount){
cat("----- Start Fold -----\n")
#holding out samples of one fold in each iterration
testFold = DummyDataSet[folds[[foldIndex]],]
testLabels = classifyLabels[folds[[foldIndex]]]
trainFolds = DummyDataSet[-folds[[foldIndex]],]
trainLabels = classifyLabels[-folds[[foldIndex]]]
#Zero mean unit variance normalization to ONLY numerical data
for (k in 1:ncol(trainFolds)){
if (!is.integer(trainFolds[,k])){
params = meanStdCalculator(trainFolds[,k])
trainFolds[,k] = sapply(trainFolds[,k], function(x) (x - params[1])/params[2])
testFold[,k] = sapply(testFold[,k], function(x) (x - params[1])/params[2])
}
}
meanStdCalculator = function(data){
Avg = mean(data)
stdDeviation = sqrt(var(data))
return(c(Avg,stdDeviation))
}
cat("----- Start Fold -----\n")
}
where trainFolds is a fold creating by caret package and its type is data.frame.
I have already read these links :
R Debugging
Subset
Negative Subscripts
but I couldn't find out what is wrong with the indexes?
anybody can help me?

Genetic Algorithm Optimization

I asked a question a few weeks back regarding how one would do optimization in R(Optimizing for Vector Using Optimize R). Now that I have a proper grip with basic optimization in R, I would like to start employing GA's to solve for solutions.
Given the objective function:
div.ratio <- function(weight, vol, cov.mat){
weight <- weight / sum(weight)
dr <- (t(weight) %*% vol) / (sqrt(t(weight) %*% cov.mat %*% (weight)))
return(-dr)
}
I am using genalg package for optimizing, specifically the "rbga.bin" function. But the thing is one cannot seem to pass in more than one parameter, ie can't pass in vol and cov.mat. Am I missing something or understanding this incorrectly.
Edit:
In the genalg package, there is a function called rbga.bin which is the one I am using.
Here is the simple code from previous question that can get you started:
rm(list=ls())
require(RCurl)
sit = getURLContent('https://github.com/systematicinvestor/SIT/raw/master/sit.gz', binary=TRUE, followlocation = TRUE, ssl.verifypeer = FALSE)
con = gzcon(rawConnection(sit, 'rb'))
source(con)
close(con)
load.packages('quantmod')
data <- new.env()
tickers<-spl("VTI,VGK,VWO,GLD,VNQ,TIP,TLT,AGG,LQD")
getSymbols(tickers, src = 'yahoo', from = '1980-01-01', env = data, auto.assign = T)
for(i in ls(data)) data[[i]] = adjustOHLC(data[[i]], use.Adjusted=T)
bt.prep(data, align='remove.na', dates='1990::2013')
prices<-data$prices[,-10]
ret<-na.omit(prices/mlag(prices) - 1)
vol<-apply(ret,2,sd)
cov.mat<-cov(ret)
out <- optim(par = rep(1 / length(vol), length(vol)), # initial guess
fn = div.ratio,
vol = vol,
cov.mat = cov.mat,
method = "L-BFGS-B",
lower = 0,
upper = 1)
opt.weights <- out$par / sum(out$par) #optimal weights
While the above optim function works just fine, I was thinking if it is possible to reproduce this using a GA algorithm. So in the future if I am searching for multiple objectives I will be able to do this faster compared to GA. (I am not sure if it is faster, but this is the step to take to find out)
GAmodel <- rbga.bin(size = 7, #genes
popSize = 200, #initial number of chromosomes
iters = 100, #number of iterations
mutationChance = 0.01, #chance of mutation
evalFunc = div.ratio) #objective function
Doing the above seems to produce an error as div.ratio needs extra paramters, so I am looking for some help in structuring my problem so that it will be able to produce the optimal answer. I hope the above edit clarifies things.
Thanks
This is what you need:
GAmodel <- rbga(stringMin=rep(0, length(vol)), stringMax=rep(1, length(vol)),
popSize = 200,
iters = 100,
mutationChance = 0.01,
evalFunc = function(weight) div.ratio(weight, vol=vol, cov.mat=cov.mat))
(see first and last lines above).
The problems were:
vectors weight and vol must match lengths.
function evalFunc is called with a single parameter, causing the others to be missing. As I understand, you want to optimize in the weight vector only, keeping vol and cov.mat fixed.
If you want weight to be treated as a continuous variable, then use rbga instead.

while loop problem in r

i am trying to get this loop in my r program to work but it is not giving me the results that I desire. I am trying to model an insurance contract where there are n securities that have a fixed likelihood of default vector(data[i,2]) and a payout vector(data[i,1]).
i need to price the value of stop losses at the security level and at the portfolio level. to do this i created two while loops for the conditional vectors of each level (which will be inputed into the function by the user) one while loop to scan through the various securities and a final one to model the various scenarios. i tried to Use R's matrix capabilities to help organize the results.
the problem with this code is that the if statement behaves oddly, not activating and filtering correctly. this causes the program to be slow and provide bad results. it fills the individual protection column always rather than conditioning it on the likelihood vector(data[i,2]). there is a lot of moving parts but overall it is a simple model.
y = years
nr=nrow(data1)
nc=ncol(data1)
isl = individualStopLoss
asl = aggregateStoploss
Lasl = length(asl)
LIsl = length(isl)
claims = vector(mode = "logical",length= asl)
individualProtection = matrix(0,ncol=LIsl,nrow=y)
aggregateProtection = matrix(0,ncol=Lasl ,nrow=y)
expectedClaims = data1[,1]*data1[,2]
expectedClaims = sum(expectedClaims)
k = 1
m=1
while (k<=y)
{j = 1
m = 1
runi = runif(nr, min=0, max=1)
while (m<=Lasl)
{while (j<=LIsl)
{i=1
while (i<=nr)
{if ( runi[i] < data1[i,2] )
{individualProtection[k,j] = individualProtection[k,j] + max(data1[i,1]-isl[j],0)
claims[k] = claims[k] + data1[i,1]
i=i+1
}
else{i= i+1}
}
j=j+1
}
aggregateProtection[k,m]= aggregateProtection[k,m] + max(claims[k] - expectedClaims*asl[m],0)
m = m+1
}
k = k+1
}
Just an example to help you provide a reproducible example, will be deleted when your question is updated.
data1 <- cbind(rnorm(1000),rnorm(1000))
y = sample(rep(1990:2011,1000),1000)
nr=nrow(data1)
nc=ncol(data1)
isl = rnorm(500)
asl = rnorm(500)
Lasl = length(asl)
LIsl = length(isl)

Resources