Porting python CVXPY code to R CVXR - r

I am trying to learn how the CVXR package works, and I was porting a
Python example
by Steve Diamond here:
https://groups.google.com/forum/#!topic/cvxpy/5hBSB9KVbuI
and
http://nbviewer.jupyter.org/github/cvxgrp/cvx_short_course/blob/master/intro/control.ipynb
The R equivalent of the code is below:
set.seed(1)
n = 8
m = 2
T1 = 50
alpha = 0.2
beta = 5
A = diag(n) + alpha*replicate(n, rnorm(n))
B = replicate(m, rnorm(n))
x_0 = beta*replicate(1, rnorm(n))
# Form and solve control problem.
x = Variable(n, T1+1)
u = Variable(m, T1)
states = c()
for (t in 1:T1) {
cost = sum_squares(x[,t+1]) + sum_squares(u[,t])
constr = list(x[, t+1] == A%*%x[, t] + B%*%u[, t],
norm_inf(u[,t]) <= 1)
states = c(states, Problem(Minimize(cost), constr) )
}
# sums problem objectives and concatenates constraints.
prob <- Reduce("+", states)
constraints(prob) <- c(constraints(prob), x[ ,T1] == 0)
constraints(prob) <- c(constraints(prob), x[ ,0] == x_0)
sol <- solve(prob)
I have a challenge with the second-to-last line (it throws an error):
constraints(prob) <- c(constraints(prob), x[ ,0] == x_0)
My guess is that x[ , 0] points to the zero-th index position of the
variable, x, which does not exist in R. But from Python which the
program is converted from, a zero-th index position exists from the
for loop (for t in range(T)). range(T) is a vector starting from 0
- 49.
But in R, the for loop (for (t in 1:T1) ) is for a vector of 1 - 50.
Please, any ideas to help will be much appreciated.
Thank you.

You need to bump up the index number by 1, so x[,1] == x_0 and x[,T1+1] == 0 in the second and third from the last line, respectively. Otherwise, you never set the T1+1 entry.

Related

Area Under the Curve using Simpson's rule in R

I would like to compute the Area Under the Curve defined by a set of experimental values. I created a function to calculate an aproximation of the AUC using the Simpson's rule as I saw in this post. However, the function only works when it receives a vector of odd length. How can I modify the code to add the area of the last trapezoid when the input vector has an even length.
AUC <- function(x, h=1){
# AUC function computes the Area Under the Curve of a time serie using
# the Simpson's Rule (numerical method).
# https://link.springer.com/chapter/10.1007/978-1-4612-4974-0_26
# Arguments
# x: (vector) time serie values
# h: (int) temporal resolution of the time serie. default h=1
n = length(x)-1
xValues = seq(from=1, to=n, by=2)
sum <- list()
for(i in 1:length(xValues)){
n_sub <- xValues[[i]]-1
n <- xValues[[i]]
n_add <- xValues[[i]]+1
v1 <- x[[n_sub+1]]
v2 <- x[[n+1]]
v3 <- x[[n_add+1]]
s <- (h/3)*(v1+4*v2+v3)
sum <- append(sum, s)
}
sum <- unlist(sum)
auc <- sum(sum)
return(auc)
}
Here a data example:
smoothed = c(0.3,0.317,0.379,0.452,0.519,0.573,0.61,0.629,0.628,0.613,0.587,0.556,0.521,
0.485,0.448,0.411,0.363,0.317,0.273,0.227,0.185,0.148,0.12,0.103,0.093,0.086,
0.082,0.079,0.076,0.071,0.066,0.059,0.053,0.051,0.052,0.057,0.067,0.081,0.103,
0.129,0.165,0.209,0.252,0.292,0.328,0.363,0.398,0.431,0.459,0.479,0.491,0.494,
0.488,0.475,0.457,0.43,0.397,0.357,0.316,0.285,0.254,0.227,0.206,0.189,0.181,
0.171,0.157,0.151,0.162,0.192,0.239)
One recommended way to handle an even number of points and still achieve precision is to combine Simpson's 1/3 rule with Simpson's 3/8 rule, which can handle an even number of points. Such approaches can be found in (at least one or perhaps more) engineering textbooks on numerical methods.
However, as a practical matter, you can write a code chunk to check the data length and add a single trapezoid at the end, as was suggested in the last comment of the post to which you linked. I wouldn't assume that it is necessarily as precise as combining Simpson's 1/3 and 3/8 rules, but it is probably reasonable for many applications.
I would double-check my code edits below, but this is the basic idea.
AUC <- function(x, h=1){
# AUC function computes the Area Under the Curve of a time serie using
# the Simpson's Rule (numerical method).
# https://link.springer.com/chapter/10.1007/978-1-4612-4974-0_26
# Arguments
# x: (vector) time serie values
# h: (int) temporal resolution of the time serie. default h=1
#jh edit: check for even data length
#and chop off last data point if even
nn = length(x)
if(length(x) %% 2 == 0){
xlast = x[length(x)]
x = x[-length(x)]
}
n = length(x)-1
xValues = seq(from=1, to=n, by=2)
sum <- list()
for(i in 1:length(xValues)){
n_sub <- xValues[[i]]-1
n <- xValues[[i]]
n_add <- xValues[[i]]+1
v1 <- x[[n_sub+1]]
v2 <- x[[n+1]]
v3 <- x[[n_add+1]]
s <- (h/3)*(v1+4*v2+v3)
sum <- append(sum, s)
}
sum <- unlist(sum)
auc <- sum(sum)
##jh edit: add trapezoid for last two data points to result
if(nn %% 2 == 0){
auc <- auc + (x[length(x)] + xlast)/2 * h
}
return(auc)
}
sm = smoothed[-length(smoothed)]
length(sm)
[1] 70
#even data as an example
AUC(sm)
[1] 20.17633
#original odd data
AUC(smoothed)
[1] 20.389
There may be a good reason for you to prefer using Simpson's rule, but if you're just looking for a quick and efficient estimate of AUC, the trapezoid rule is far easier to implement, and does not require an even number of breaks:
AUC <- function(x, h = 1) sum((x[-1] + x[-length(x)]) / 2 * h)
AUC(smoothed)
#> [1] 20.3945
Here, I show example code that uses the Simpson's 1/3 and 3/8 rules in tandem for the numerical integration of data. As always, the usual caveats about the possibility of coding errors or compatibility issues apply.
The output at the end compares the numerical estimates of this algorithm with the trapezoidal rule using R's "integrate" function.
#Algorithm adapted from:
#Numerical Methods for Engineers, Seventh Edition,
#By Chapra and Canale, page 623
#Modified to accept data instead of functional values
#Modified by: Jeffrey Harkness, M.S.
##Begin Simpson's rule function code
simp13 <- function(dat, h = 1){
ans = 2*h*(dat[1] + 4*dat[2] + dat[3])/6
return(ans)}
simp13m <- function(dat, h = 1){
summ <- dat[1]
n <- length(dat)
nseq <- seq(2,(n-2),2)
for(i in nseq){
summ <- summ + 4*dat[i] + 2*dat[i+1]}
summ <- summ + 4*dat[n-1] + dat[n]
result <- (h*summ)/3
return(result)}
simp38 <- function(dat, h = 1){
ans <- 3*h*(dat[1] + 3*sum(dat[2:3]) + dat[4])/8
return(ans)}
simpson = function(dat, h = 1){
hin = h
len = length(dat)
comp <- len %% 2
##number of segments
if(len == 2){
ans = sum(dat)/2*h} ##n = 2 is the trapezoidal rule
if(len == 3){
ans = simp13(dat, h = hin)}
if(len == 4){
ans = simp38(dat,h = hin)}
if(len == 6){
ans <- simp38(dat[1:4],h = hin) + simp13(dat[4:len],h = hin)}
if(len > 6 & comp == 0){
ans = simp38(dat[1:4],h = hin) + simp13m(dat[4:len],h = hin)}
if(len >= 5 & comp == 1){
ans = simp13m(dat,h = hin)}
return(ans)}
##End Simpson's rule function code
This next section of code shows the performance comparison. This code can easily be altered for different test functions and cases.
The precision difference tends to change with the sample size and test function used; this example is not intended to imply that the difference is always this pronounced.
#other algorithm for comparison purposes, from Allan Cameron above
oa <- function(x, h = 1) sum((x[-1] + x[-length(x)]) / 2 * h)
#Testing and algorithm comparison code
simans = NULL; oaans = NULL; simerr = NULL; oaerr = NULL; mp = NULL
for( j in 1:10){
n = j
#f = function(x) cos(x) + 2 ##Test functions
f = function(x) 0.2 + 25*x - 200*x^2 + 675*x^3 - 900*x^4 + 400*x^5
a = 0;b = 10
h = (b-a)/n
datain = seq(a,b,by = h)
preans = integrate(f,a,b)$value #precise numerical estimate of test function
simans[j] = simpson(f(datain), h = h)
oaans[j] = oa(f(datain), h = h)
(simerr[j] = abs(simans[j] - preans)/preans * 100)
(oaerr[j] = abs(oaans[j] - preans)/preans * 100)
mp[j] = simerr[j] < oaerr[j]
}
(outframe = data.frame("simpsons percent diff" = simerr,"trapezoidal percent diff" = oaerr, "more precise?" = mp, check.names = F))
simpsons percent diff trapezoidal percent diff more precise?
1 214.73489738 214.734897 FALSE
2 15.07958148 64.993410 TRUE
3 6.70203621 29.816799 TRUE
4 0.94247384 16.955208 TRUE
5 0.54830021 10.905620 TRUE
6 0.18616767 7.593825 TRUE
7 0.12051767 5.588209 TRUE
8 0.05890462 4.282980 TRUE
9 0.04087107 3.386525 TRUE
10 0.02412733 2.744500 TRUE

R: find sum of every i < j without using for loop

How to find sum of i<j (i,j = 1 to 25) of i without using for loop in R language.
This equation is what I am trying to code exactly, I need to get the index of both i and j and calculate sum of determination from there.
{(x_i, j_i)}i = 1 to 25
We can use outer
sum(outer(i, j, FUN = `<`))
If we need to find the sum of 'x'
sum(matrix(x, 25, 25)[outer(x, x, FUN = `<`)])
data
i <- 1:25
j <- 1:25
x <- rnorm(25)
For vector x, you can try the code below
sum(cumsum(x)[-length(x)])
# DATA
set.seed(42)
n = 25
v = 1:n
x = rnorm(n)
sum(rep(v, n) < rep(v, each = n))
sum(rep(x, n)[rep(v, n) < rep(v, each = n)])

Faulty NMI implementation in R?

#calculate NMI(c,t) c : cluster assignment , t : ground truth
NMI <- function(c,t){
n <- length(c) # = length(t)
r <- length(unique(c))
g <- length(unique(t))
N <- matrix(0,nrow = r , ncol = g)
for(i in 1:r){
for (j in 1:g){
N[i,j] = sum(t[c == i] == j)
}
}
N_t <- colSums(N)
N_c <- rowSums(N)
B <- (1/n)*log(t( t( (n*N) / N_c ) / N_t))
W <- B*N
I <- sum(W,na.rm = T)
H_c <- sum((1/n)*(N_c * log(N_c/n)) , na.rm = T)
H_t <- sum((1/n)*(N_t * log(N_t/n)) , na.rm = T)
nmi <- I/sqrt(H_c * H_t)
return (nmi)
}
Running this on some clustering benchmarks here gives me a value of the Normalized Mutual information . But , when I compare it with values of NMI obtained from the aricode library , I get values of NMI that generally differ in the second decimal place .
I will be grateful if someone is able to pin-point any possible error that has creeped into this code .
I am including a test case for this using a synthetic case :
library(aricode)
c <- c(1,1,2,2,2,3,3,3,3,4,4,4)
t <- c(1,2,2,2,3,4,3,3,3,4,4,2)
print(aricode::NMI(c , t)) #0.489574
print(NMI(c,t)) #0.5030771
This might be very late for an answer but for the sake of posterity:
The difference is in the way you and the aricode package normalise the index. You divide by sqrt() whereas aricode offers the following options:
function (c1, c2, variant = c("max", "min", "sqrt", "sum", "joint"))
so if you select variant = sqrt you should hopefully get the same answer.
The NMI package uses sum.

Prime numbers from random samples in R

I wrote the following code trying to find all the prime numbers from a random generated data set. sadly it seems something went wrong, could anybody help me.
set.seed(20171106)
n <- 10000
num <- sample(1:100000,n,replace=TRUE)
findPrime <- function(x){
apple<-c()
n<-length(x)
for(i in n){
if(any(x[i]%%(1:(x[i]-1))!=0)) apple <-c(apple,x[i])
}
return(apple)
}
To get results:
type:findPrime(num)
This is the warning message:
Warning message:
In if (x[i]%%(1:(x[i] - 1)) == 0) apple <- c(apple, x[i]) :
the condition has length > 1 and only the first element will be used
so how can I fix the problem?
if statements only accept single elements and in your declaration seems to get the whole vector. I have rewritten your function using a ifelse expression wrapped inside a sapply loop.
I hope this works for you.
findPrime <- function(x = 0){
primes <- c()
# Prime finder
primes <- sapply(X = x,FUN = function(x) {
ifelse(any(x %% (1:(x - 1)) != 0), T, F)}
)
# Select primes
primes <- num[primes]
return(primes)
}
findPrime(num)
I have checked another silly mistake... Inside the function change num for x in the select primes step and invert the F, T outcomes. It should look like this:
findPrime <- function(x = 0){
primes <- c()
# Prime finder
primes <- sapply(X = x,FUN = function(x) {
ifelse(any(x %% (2:(x - 1)) == 0), F, T)}
)
# Select primes
primes <- x[primes]
return(primes)
}
I have just tried it and it works fine.
use package "gmp" which has a function "isprime" which returns 0 for non prime numbers and 2 for prime numbers and then subset the data based on the same
say you have a vector a = c(1:10)
a = c(1:10)
b = gmp::isprime(a)
c = cbind(a,b)
c = as.data.frame(c)
c = c[c$b==2,]
a1 = c$a
a1
In your code: for(i in 1:n), there is the error

Create a results-matrix after using for-loops

I (being an absolut beginner with R and programming) have to do some analysis with R for my thesis in finance :(
The purpose is to simulate data (stock prices) with a GBM and run over the results 2 trading strategies. Within the GBM I'll have to "play" with the veriables "r" and "sigma" (3 different values for each, thus 9 combinations). Each combination needs to be simulated 10000 times over a period T=10, N=250. To all these simulations 2 trading strategies have to be applied - MACD and RSI (within the TTR-package).
Now I'm facing an issue with writing the code :(
#Geometrical Brownian Motion
Sim <- GBM(x, r, sigma, T, N)
x <-100
r <-0
sigma <-1
T <- 10
N <- 250
#Additional info for RSI-strategy
retSim <- ROC(Sim)
SimRSI <- RSI(Sim, 14,)
SimRSI[is.na(SimRSI)] <- 0
#Create a vector for the results of the inner loop
portfolio <- rep(0:N)
portfolio[1] <- 100
runs <- 10000
#Creating vectors for final results of portfolio and simulation after 10000 runs (only the last value of each of the 10000 simulations and portfolio results of the strategy required)
resultsSimGBM <- rep(0:runs)
resultsRSIr1sig1 <- rep(0:runs)
#orders
buyRSI<-portfolio[i-1]*exp(retSim[i])
holdRSI<-portfolio[i-1]
#Simulation
portfolio[1]<-100
i <- 1
j <- 1
#Second loop
for(j in 0:runs){
#Simulation GBM
x <-100
r <-0
sigma <-1
T <- 10
N <- 250
Sim <- GBM(x, r, sigma, T, N)
retSim <- ROC(Sim)
SimRSI <- RSI(Sim, 14,)
SimRSI[is.na(SimRSI)] <- 0
portfolio[1] <- 100
#First loop
for(i in 2:length(Sim)){
#Apply RSI on GBM
buyRSI<-portfolio[i-1]*exp(retSim[i])
holdRSI<-portfolio[i-1]
if(SimRSI[i-1]<50 && SimRSI[i]>50){portfolio[i]=buyRSI}
if(SimRSI[i-1]>50 && SimRSI[i]<50){portfolio[i]=holdRSI}
if(SimRSI[i-1]<50 && SimRSI[i]<50){portfolio[i]=holdRSI}
if(SimRSI[i-1]>50 && SimRSI[i]>50){portfolio[i]=buyRSI}
i <- i+1
}
resultsRSI[j] <- portfolio[N]
resultsSimGBM[j] <- Sim[N]
j <- j+1
}
Anyway, this is what I have until now and it seems to work. However, into the first (inner) loop, I need to include also the second strategy (which until now, singled out) looked following :
#MACD strategy
portfolioMACD[1]<-100
i <- 1
j <- 1
for(j in 0:runs){
Sim <- BMSim
retSim <- ROC(Sim)
SimMACD <- MACD(Sim, 12, 26, 9, myType="EMA")
DataSimMACD <- data.frame(SimMACD)
DataSimMACD$macd[is.na(DataSimMACD$macd)] <- 0
DataSimMACD$signal[is.na(DataSimMACD$signal)] <- 0
for(i in 2:length(Sim)){
buyMACD<-portfolioMACD[i-1]*exp(retSim[i])
sellMACD<-portfolioMACD[i-1]
holdMACD<-portfolioMACD[i-1]*exp(retSim[i])
if(DataSimMACD$macd[i-1]<DataSimMACD$signal[i-1] && DataSimMACD$macd[i]>DataSimMACD$signal[i]){portfolioMACD[i]=buyMACD}
if(DataSimMACD$macd[i-1]>DataSimMACD$signal[i-1] && DataSimMACD$macd[i]<DataSimMACD$signal[i]){portfolioMACD[i]=sellMACD}
if(DataSimMACD$macd[i-1]>DataSimMACD$signal[i-1] && DataSimMACD$macd[i]>DataSimMACD$signal[i]){portfolioMACD[i]=holdMACD}
if(DataSimMACD$macd[i-1]<DataSimMACD$signal[i-1] && DataSimMACD$macd[i]<DataSimMACD$signal[i]){portfolioMACD[i]=sellMACD}
if(DataSimMACD$macd[i]==DataSimMACD$signal[i]){portfolioMACD[i]=sellMACD}
if(DataSimMACD$macd[i-1]==DataSimMACD$signal[i-1] && DataSimMACD$macd[i]!=DataSimMACD$signal[i]){portfolioMACD[i]=buyMACD}
i <- i+1
}
resultsMACD[j] <- portfolioMACD[length(Sim)]
j <- j+1
}
BASICALLY:
1-One Brownian motion has to consist of 2500 elements, to which both trading strategies have to be applied separately
2-this whole procedure has to be repeated 10000 times for each out of 9 combinations of variables r and sigma (r1sigma1, r1sigma2, r1sigma3,.....,r3sigma3) (this I don't have included in my code yet - no clue how to construct those 2 loops around it all...) :(
3-the endresult should be a 10000x27 matrix with 10000rows (for amount of runs) and 27 colums (9x GBM, RSI, MACD) filled only with the 2500th (endvalue) of each simulation (from point 1.) --> how to do it?
SOS! Could someone of you PLEASE PLEASE PLEASE help me with this mess? I'm completely lost and it's my graduation paper -.-
Every help will be highly praised and deeply appreciated!
Thanks in advance and sorry for the long post.
Cheers from Berlin, Ana :)
EDIT AND ANOTHER SIMPLIFIED EXAMPLE
library(sde)
#Vectors for results
Returns <- rep(0:N)
LogReturns <- rep(0:N)
Simulation <- rep(0:N)
ResultsSimulation <- rep(0:runs)
ResultsReturns <- rep(0:runs)
ResultsLog <- rep(0:runs)
runs=50 #how ofthen the Simulation of GBM should be repeated
i <- 1
j <- 1
#second loop
for(j in 2:runs){
Simulation <- GBM(x, r, sigma, T, N)
x=100
r=0
sigma=1
T=1
N=20
#first loop
for(i in 2:length(BM)){
Returns <- ROC(Simulation)
LogReturns[i+1] <- log(Simulation[i+1]/Simulation[i])
i <- i+1
}
ResultsSimulation[j]<-Simulation[N]
ResultsReturns[j]<-Returns[N]
ResultsLog[j]<-LogReturns[N]
j <- j+1
}
ResultsMatrix <- as.matrix(data.frame(ResultsSimulation, ResultsReturns, ResultsLog))
The structure of this example is basically what I have. I need to construct around it 2 more loops which will do the same simulations and calculations for 3 different "r" values and "sigma" values (variables within the GBM-function). The resuls (the final value of each Simulation and calculation from the first loop) should be saved in separate vectors or in a matrix consisting of those --> thus, 27 vestors of length 50 (3 results for each combination of variables r and sigma)
for example, if sigma=0.1; 0.3; 0,6 and r=0,03; 0,05; 0,08
How to construct those loops around it all and save the data accordingly?
Sorry for the questions guys, but I'm really lost :(
Cheers and thanks a lot in advance! At least for reading ;)
Is this close to what you need? If so, you can add new trading functions to return a portfolio, then just call it (see the example):
warning: It took about 1.5 hours to run with N_SIMS = 100000!
get.simulation.GBM = function(TIME = 10, N = 250, N_SIMS = 1000, x0 = 100, sigma = c(0.1, 0.3, 0.6), r = c(0.03, 0.05, 0.08))
{
require(sde)
params = expand.grid(sigma = sigma, r = r)
# pre-allocate matrix
results = matrix(0, ncol = N_SIMS * nrow(params), nrow = N)
for (i in 1:nrow(params))
{
idx.range = ((i - 1)*N_SIMS + 1):((i - 1)*N_SIMS + N_SIMS)
temp.res = replicate(N_SIMS, GBM(x0, r = params[i, 'r'], sigma = params[i, 'sigma'], T = TIME, N = N - 1 ))
results[, idx.range] = temp.res
}
return(results)
}
apply.MACD = function(serie, nFast = 12, nSlow = 26, nSig = 9, p0 = 100)
{
require(TTR)
roc = ROC(serie)
sim.MACD = MACD(serie, nFast, nSlow, nSig, maType = "EMA")
portfolio = rep(0, length = length(serie))
portfolio[1] = p0
sim.MACD[is.na(sim.MACD)] = 0
sim.MACD = as.data.frame(sim.MACD)
for (i in 2:length(serie))
{
buy = portfolio[i - 1] * exp(roc[i])
sell = portfolio[i - 1]
hold = buy
if(sim.MACD$macd[i - 1] < sim.MACD$signal[i - 1] && sim.MACD$macd[i] > sim.MACD$signal[i]){portfolio[i] = buy}
if(sim.MACD$macd[i - 1] > sim.MACD$signal[i - 1] && sim.MACD$macd[i] < sim.MACD$signal[i]){portfolio[i] = sell}
if(sim.MACD$macd[i - 1] > sim.MACD$signal[i - 1] && sim.MACD$macd[i] > sim.MACD$signal[i]){portfolio[i] = hold}
if(sim.MACD$macd[i - 1] < sim.MACD$signal[i - 1] && sim.MACD$macd[i] < sim.MACD$signal[i]){portfolio[i] = sell}
if(sim.MACD$macd[i] == sim.MACD$signal[i]){portfolio[i] = sell}
if(sim.MACD$macd[i - 1] == sim.MACD$signal[i - 1] && sim.MACD$macd[i] != sim.MACD$signal[i]){portfolio[i] = buy}
}
return(portfolio)
}
apply.RSI = function(serie, p0 = 100, n = 14)
{
require(TTR)
roc = ROC(serie)
sim.RSI = RSI(serie, n = n)
sim.RSI[is.na(sim.RSI)] = 0
portfolio = rep(0, length = length(serie))
portfolio[1] = p0
for (i in 2:length(serie))
{
buy = portfolio[i - 1] * exp(roc[i])
hold = portfolio[i - 1]
if(sim.RSI[i - 1] < 50 && sim.RSI[i] > 50){portfolio[i] = buy}
if(sim.RSI[i - 1] > 50 && sim.RSI[i] < 50){portfolio[i] = hold}
if(sim.RSI[i - 1] < 50 && sim.RSI[i] < 50){portfolio[i] = hold}
if(sim.RSI[i - 1] > 50 && sim.RSI[i] > 50){portfolio[i] = buy}
}
return(portfolio)
}
# Example (this is SLOW)
simulation.matrix = get.simulation.GBM()
portfolio.RSI = apply(simulation.matrix, 2, apply.RSI)
portfolio.MACD = apply(simulation.matrix, 2, apply.MACD)
# if you need only the last values
portfolio.RSI.last = tail(portfolio.RSI, 1)
portfolio.MACD.last = tail(portfolio.MACD, 1)

Resources