Improving for loop speed in Julia 1.0 - julia

I have a long vector V and a large matrix M. My purpose is in the Julia code below.
using LinearAlgebra
function myfunction(M,V)
n = size(V,1)
sum = 0
summ = 0
for i = 1:n-1
for j = i+1:n
a= [i,j]
Y = V[a]
X = M[a,a]
sum += Y'*inv(X)*Y
summ += tr(X)*Y'*Y
end
end
return sum, summ
end
M = randn(10000,10000)
V = randn(10000)
#time myfunction(M,V)
Since the vector is very long and the matrix is very large, this procedure takes a long time. I spent a long time on this issue. I really appreciate your help!

I just would manually unroll the calculations to avoid allocations:
function myfunction2(M::AbstractMatrix{T},V::AbstractVector{T}) where {T}
n = size(V, 1)
sum = zero(T)
summ = zero(T)
for i = 2:n
for j = 1:i-1
#inbounds y1, y2 = V[i], V[j]
y11 = y1*y1
y12 = y1*y2
y22 = y2*y2
#inbounds a, b, c, d = M[i,i], M[i,j], M[j,i], M[j,j]
sum += (d*y11-(c+b)*y12+a*y22) / (a*d-b*c)
summ += (a+d)*(y11+y22)
end
end
return sum, summ
end
(note that I make explicit assumptions about M and V)
EDIT this is minimally faster
function myfunction3(M::AbstractMatrix{T},V::AbstractVector{T}) where {T}
n = size(V, 1)
sum = zero(T)
summ = zero(T)
for i = 2:n
#inbounds y1 = V[i]
#inbounds a = M[i,i]
y11 = y1*y1
for j = 1:i-1
#inbounds y2 = V[j]
y12 = y1*y2
y22 = y2*y2
#inbounds b, c, d = M[i,j], M[j,i], M[j,j]
sum += (d*y11-(c+b)*y12+a*y22) / (a*d-b*c)
summ += (a+d)*(y11+y22)
end
end
return sum, summ
end

Related

Integration of interaction tensor

I need to calculate the following integral several thousand times per time step:
where and:
, and
So far I have implemented in Julia as:
using StaticArrays
function interactiontensor(C, a1, a2, a3, ϕ, θ)
n1,n2 = 100,50
T = fill(0.0,3,3,3,3)
Av = zeros(4,4)
invAv = similar(Av)
xi = Vector{Float64}(undef, 3)
#inbounds for p ∈ 1:n1
sinθp = sind(θ[p])
cosθp = cosd(θ[p])
for q ∈ 1:n2
sinϕq = sind(ϕ[q])
cosϕq = cosd(ϕ[q])
# -- Director cosines
xi[1] = sinθp*cosϕq/a1
xi[2] = sinθp*sinϕq/a2
xi[3] = cosθp/a3
Christoffel!(Av,C,xi)
fillAv!(Av, xi)
invAv = inv(SMatrix{4,4}(Av))
tensorT!(T,invAv,xi,sinθp)
surface += sinθp
end
end
return T ./= surface
end
#inline function Christoffel!(Av,C,xi)
#inbounds for t ∈ 1:3, r ∈ 1:3
aux = zero(eltype(C))
for u ∈ 1:3, s ∈ 1:3
aux += C[r, s, t, u] * xi[s] * xi[u]
end
Av[r, t] = aux
end
end
#inline function tensorT!(T,invAv,xi,sinθp)
#inbounds for k ∈ 1:3, i ∈ 1:3
aux = invAv[i, k]
for l ∈ 1:3, j ∈ 1:3
T[i, j, k, l] += aux * xi[j] * xi[l] * sinθp
end
end
end
#inline function fillAv!(Av, xi)
#inbounds for i ∈ 1:3
xi0 = xi[i]
Av[i, 4] = xi0
Av[4, i] = xi0
end
end
with
n1,n2 = 100,100
step = π/n1
dθ,dϕ = π/n1, 2π/n2
θ = rad2deg.(range(dθ, stop = pi, length = n1))
ϕ = rad2deg.(range(dϕ, stop = 2pi, length = n2))
C = #SArray rand(3,3,3,3)
#btime interactiontensor($C, $10.0, $5.0, $1.0, $ϕ, $θ);
# 544.795 μs (4 allocations: 1.08 KiB)
Given the number of times I ideally need to compute this integral, is there any optimization to my implementation, or an alternative approach, to considerably reduce the computational cost?
Here are some suggestions:
sinθp, cosθp = sincosd(θ[p]), i.e. computing sine and cosine in one step.
Initializing xi = #SVector zeros(3) as a static vector and then use Setfield.jl to assign the values in each iteration, i.e. #set x[1] = sinθp*cosϕq/a1.
Load the package LoopVectorization.jl and use the #avx macro (very roughly speaking similar to #simd) to speed up the loops in Christoffel!, tensorT! and fillAv!.
On my machine I find that these changes reduce the computation time by more than a factor of 5 (relative to the original function in the OP). The biggest chunk is due to #avx, the second point above amounts to about ~30%.
julia> #btime interactiontensor_original($C, $10.0, $5.0, $1.0, $ϕ, $θ);
661.655 μs (5 allocations: 1.28 KiB)
julia> #btime interactiontensor_optimized($C, $10.0, $5.0, $1.0, $ϕ, $θ);
125.352 μs (4 allocations: 1.17 KiB)
Here the full modified code (note that I commented out the lines involving surface which isn't specified in the OP):
using StaticArrays, Setfield, LoopVectorization
function interactiontensor_optimized(C, a1, a2, a3, ϕ, θ)
n1,n2 = 100,50
T = fill(0.0,3,3,3,3)
Av = zeros(4,4)
invAv = similar(Av)
xi = #SVector zeros(3)
#inbounds for p ∈ 1:n1
sinθp, cosθp = sincosd(θ[p])
for q ∈ 1:n2
sinϕq, cosϕq = sincosd(ϕ[q])
# -- Director cosines
#set xi[1] = sinθp*cosϕq/a1
#set xi[2] = sinθp*sinϕq/a2
#set xi[3] = cosθp/a3
Christoffel!(Av,C,xi)
fillAv!(Av, xi)
invAv = inv(SMatrix{4,4}(Av))
tensorT!(T,invAv,xi,sinθp)
# surface += sinθp
end
end
return T #./= surface
end
#inline function Christoffel!(Av,C,xi)
#avx for t ∈ 1:3, r ∈ 1:3
aux = zero(eltype(C))
for u ∈ 1:3, s ∈ 1:3
aux += C[r, s, t, u] * xi[s] * xi[u]
end
Av[r, t] = aux
end
end
#inline function tensorT!(T,invAv,xi,sinθp)
#avx for k ∈ 1:3, i ∈ 1:3
aux = invAv[i, k]
for l ∈ 1:3, j ∈ 1:3
T[i, j, k, l] += aux * xi[j] * xi[l] * sinθp
end
end
end
#inline function fillAv!(Av, xi)
#avx for i ∈ 1:3
xi0 = xi[i]
Av[i, 4] = xi0
Av[4, i] = xi0
end
end

What do multiple objective functions mean in Julia jump?

I have multiple objective functions for the same model in Julia JuMP created using an #optimize in a for loop. What does it mean to have multiple objective functions in Julia? What objective is minimized, or is it that all the objectives are minimized jointly? How are the objectives minimized jointly?
using JuMP
using MosekTools
K = 3
N = 2
penalties = [1.0, 3.9, 8.7]
function fac1(r::Number, i::Number, l::Number)
fac1 = 1.0
for m in 0:r-1
fac1 *= (i-m)*(l-m)
end
return fac1
end
function fac2(r::Number, i::Number, l::Number, tau::Float64)
return tau ^ (i + l - 2r + 1)/(i + l - 2r + 1)
end
function Q_r(i::Number, l::Number, r::Number, tau::Float64)
if i >= r && l >= r
return 2 * fac1(r, i, l) * fac2(r, i, l, tau)
else
return 0.0
end
end
function Q(i::Number, l::Number, tau::Number)
elem = 0
for r in 0:N
elem += penalties[r + 1] * Q_r(i, l, r, tau)
end
return elem
end
# discrete segment starting times
mat = Array{Float64, 3}(undef, K, N+1, N+1)
function Q_mat()
for k in 0:K-1
for i in 1:N+1
for j in 1:N+1
mat[k+1, i, j] = Q(i, j, convert(Float64, k))
end
end
return mat
end
end
function A_tau(r::Number, n::Number, tau::Float64)
fac = 1
for m in 1:r
fac *= (n - (m - 1))
end
if n >= r
return fac * tau ^ (n - r)
else
return 0.0
end
end
function A_tau_mat(tau::Float64)
mat = Array{Float64, 2}(undef, N+1, N+1)
for i in 1:N+1
for j in 1:N+1
mat[i, j] = A_tau(i, j, tau)
end
end
return mat
end
function A_0(r::Number, n::Number)
if r == n
fac = 1
for m in 1:r
fac *= r - (m - 1)
end
return fac
else
return 0.0
end
end
m = Model(optimizer_with_attributes(Mosek.Optimizer, "QUIET" => false, "INTPNT_CO_TOL_DFEAS" => 1e-7))
#variable(m, A[i=1:K+1,j=1:K,k=1:N+1,l=1:N+1])
#variable(m, p[i=1:K+1,j=1:N+1])
# constraint difference might be a small fractional difference.
# assuming that time difference is 1 second starting from 0.
for i in 1:K
#constraint(m, -A_tau_mat(convert(Float64, i-1)) * p[i] .+ A_tau_mat(convert(Float64, i-1)) * p[i+1] .== [0.0, 0.0, 0.0])
end
for i in 1:K+1
#constraint(m, A_tau_mat(convert(Float64, i-1)) * p[i] .== [1.0 12.0 13.0])
end
#constraint(m, A_tau_mat(convert(Float64, K+1)) * p[K+1] .== [0.0 0.0 0.0])
for i in 1:K+1
#objective(m, Min, p[i]' * Q_mat()[i] * p[i])
end
optimize!(m)
println("p value is ", value.(p))
println(A_tau_mat(0.0), A_tau_mat(1.0), A_tau_mat(2.0))
With the standard JuMP you can have only one goal function at a time. Running another #objective macro just overwrites the previous goal function.
Consider the following code:
julia> m = Model(GLPK.Optimizer);
julia> #variable(m,x >= 0)
x
julia> #objective(m, Max, 2x)
2 x
julia> #objective(m, Min, 2x)
2 x
julia> println(m)
Min 2 x
Subject to
x >= 0.0
It can be obviously seen that there is only one goal function left.
However, indeed there is an area in optimization called multi-criteria optimization. The goal here is to find a Pareto-barrier.
There is a Julia package for handling MC and it is named MultiJuMP. Here is a sample code:
using MultiJuMP, JuMP
using Clp
const mmodel = multi_model(Clp.Optimizer, linear = true)
const y = #variable(mmodel, 0 <= y <= 10.0)
const z = #variable(mmodel, 0 <= z <= 10.0)
#constraint(mmodel, y + z <= 15.0)
const exp_obj1 = #expression(mmodel, -y +0.05 * z)
const exp_obj2 = #expression(mmodel, 0.05 * y - z)
const obj1 = SingleObjective(exp_obj1)
const obj2 = SingleObjective(exp_obj2)
const multim = get_multidata(mmodel)
multim.objectives = [obj1, obj2]
optimize!(mmodel, method = WeightedSum())
This library also supports plotting of the Pareto frontier.
The disadvantage is that as of today it does not seem to be actively maintained (however it works with the current Julia and JuMP versions).

How to plot 2 functions with Julia

I am struggling to plot evaluated function and Cbebyshev approximation.
I am using Julia 1.2.0.
EDIT: Sorry, added completed code.
using Plots
pyplot()
mutable struct Cheb_struct
c::Vector{Float64}
min::Float64
max::Float64
end
function cheb_coeff(min::Float64, max::Float64, n::Int, fn::Function)::Cheb_struct
struc = Cheb_struct(Vector{Float64}(undef,n), min, max)
f = Vector{Float64}(undef,n)
p = Vector{Float64}(undef,n)
max_plus_min = (max + min) / 2
max_minus_min = (max - min) / 2
for k in 0:n-1
p[k+1] = pi * ((k+1) - 0.5) / n
f[k+1] = fn(max_plus_min + cos(p[k+1])*max_minus_min)
end
n2 = 2 / n
for j in 0:n-1
s = 0
for i in 0:n-1
s += f[i+1]*cos(j*p[i+1])
struc.c[j+1] = s * n2
end
end
return struc
end
function approximate(struc::Cheb_struct, x::Float64)::Float64
x1 = (2*x - struc.max - struc.min) / (struc.max - struc.min)
x2 = 2*x1
t = s = 0
for j in length(struc.c):-1:2
pom = s
s = x2 * s - t + struc.c[j]
t = pom
end
return (x1 * s - t + struc.c[1] / 2)
end
fn = sin
struc = cheb_coeff(0.0, 1.0, 10, fn)
println("coeff:")
for x in struc.c
#printf("% .15f\n", x)
end
println("\n x eval approx eval-approx")
for x in struc.min:0.1:struc.max
eval = fn(x)
approx = approximate(struc, x)
#printf("%11.8f %12.8f %12.8f % .3e\n", x,eval, approx, eval - approx)
display(plot(x=eval,y=approx))
end
I am getting empty plot window.
I would be very grateful if someone coould how to plot these two functions.
You should provide a working code as an example.
However the code below can show you how to plot:
using Plots
pyplot()
fn = sin
approxf(x) = sin(x)+rand()/10
x = 0:0.1:1
evalv = fn.(x)
approxv = approxf.(x)
p = plot(evalv,approxv)
using PyPlot
PyPlot.display_figs() #needed when running in IDE such as Atom

Can't call `sort_exercise()`

I am trying to call both functions, starting with sort_exercise
# reference https://www.geeksforgeeks.org/merge-sort/
# Merges two subarrays of A[]
# First subarray is A[p..m]
# Second subarray is A[m+1..r]
julia> function sort_exercise(A::Vector{Int}, p, m, r)
n1 = m - p + 1
n2 = r - m
# create temp arrays
L = zeros(Int, n1)
R = zeros(Int, n2)
# copy data to temp arrays L[] and R[]
for i = 1:n1
L[i] = A[p + i]
end
for j = 1:n2
R[j] = A[m + 1 + j]
end
# Merge temp arrays back to A[1..r]
i = 0 # Initial index of first subarray
j = 0 # Initial index of second subarray
k = p # Initial index of merged subarray
while i < n1; j < n2
if L[i] <= R[j]
A[k] = L[i]
i += 1
else
A[k] = R[j]
j += 1
end
k += 1
end
# Copy any possible remaining elements of L[]
while i < n1
A[k] = L[i]
i += 1
k += 1
end
# Copy any possible remaining elements of R[]
while j < n2
A[k] = R[j]
j += 1
k += 1
end
end
sort_exercise (generic function with 1 method)
julia> sort_exercise([4, 5, 22, 1, 3], 1, 3, 5)
ERROR: BoundsError: attempt to access 5-element Array{Int64,1} at index [6]
Stacktrace:
[1] sort_exercise(::Array{Int64,1}, ::Int64, ::Int64, ::Int64) at ./REPL[1]:14
julia> function merge_exercise(A::Vector{Int}, p, r)
if p < r
# equivalent to `(p + r) / 2` w/o overflow for big p and
h (no idea what h is)
m = (p+(r - 1)) / 2
# merge first half
merge_exercise(A, p, m)
# with second half
merge_exercise(A, m + 1, r)
# sort merged halves
sort_exercise(A, p, m, r)
end
end
merge_exercise (generic function with 1 method)
It seems that you have translated the Python code.
In fact, in python L = [0] * (n1) creates an array of size n1 filled with 0. In Julia you can use L = zeros(Int, n1) to accomplish the same.
L = zeros(Int, 1) * n1 is just the array [0] therefore you have the out-of-bound error.
Note that for i in range(1,n1) can also be written as for i = 1:n1.

Quasi-monte-carlo underperforms in path simulation of Brownian motion. Julia

I'm looking at using Sobol sequences as a variance reduction technique to speed up a simulation. However, the convergence of the QMC method seems very weak.
Does anyone know how or why this is the case? Is the advantage of QMC lost on these high dimensional cases? I get similar results for the Brownian bridge.
Code below attempts to simulate in Julia the following
$$ \mathbb{E} \int B_t dt $$
where $B_t$ is a standard Brownian motion. The true value is $\frac{1}{2}T^2$.
using PyPlot
import Distributions: Normal
import PyPlot: plt
using Sobol
function pseudo_path_variation(dt, T, M)
# The pseudo random approach
N = round(Int, T/dt);
Z = zeros(N, M);
d = Normal(0, 1.0);
sum = 0.0;
for j in 1:M, i in 1:N-1
Z[i+1, j] = Z[i, j] + sqrt(dt)*rand(d, 1)[1];
end
# Calculate sum
return sumabs2(Z)*dt/M;
end
function quasi_path_variation(dt, T, M)
# An attempt at the above using a N dimensional sobol sequence
N = round(Int, T/dt);
Z = zeros(N, M);
d = Normal(0, 1.0);
sum = 0.0;
# The N dimensional sobol sequence
s = SobolSeq(N);
# Burn in the sequence
for i in 1:10
next(s);
end
for j in 1:M
B = next(s);
for i in 1:N-1
Z[i+1, j] = Z[i, j] + sqrt(dt)*quantile(d, B[i]);
end
end
# Calculate sum
return sumabs2(Z)*dt/M;
end
dt = 0.5;
T = 10;
M = 1000;
estims = zeros(M);
for N = 1:M-1
estims[N+1] = quasi_path_variation(dt, T, N)
end
p = plot(linspace(0, M, M), estims);
estims = zeros(M);
for N = 1:M-1
estims[N+1] = pseudo_path_variation(dt, T, N)
end
p = plot(linspace(0, M, M), estims);
This is a comment, not an answer, but my SO score is not high enough for comments.
Timing with tic()..toc() - completed in 3 seconds.
tic()
dt = 0.5;
T = 10;
...
...
p = plot(linspace(0, M, M), estims);
toc()
elapsed time: 2.928828918 seconds

Resources