Why my code in Julia is getting slower for higher iteration? - julia

I wrote a main function which uses a stochastic optimization algorithm (Particle Swarm Optimization) to found optimal solution for a ODE system. I would run 50 times to make sure the optimal can be found. At first, it operates normally, but now I found the calculation time would increase with iteration increases.
It cost less than 300s for first ten calculations, but it would increase to 500s for final calculation. It seems that it would cost 3~5 seconds more for each calculation. I have followed the high performance tips to optimize my code but it doesn't work.
I am sorry I don't know quite well how to upload my code before, here is the code I wrote below. But in this code, the experimental data is not loaded, I may need to find a way to upload data. In main function, with the increase of i, the time cost is increasing for each calculation.
Oh, by the way, I found another interesting phenomenon. I changed the number of calculations and the calculation time changed again. For first 20 calculations in main loop, each calculation cost about 300s, and the memory useage fluctuates significantly. But something I don't know happend, it is speeding up. It cost 1/4 time less time for each calculation, which is about 80s. And the memory useage became a straight line like this:
I knew the Julia would do pre-heating for first run and then speed up. But this situation seems different. This situation looks like Julia run slowly for first 20 calculation, and then it found a good way to optimize the memory useage and speed up. Then the program just run at full speed.
using CSV, DataFrames
using BenchmarkTools
using DifferentialEquations
using Statistics
using Dates
using Base.Threads
using Suppressor
function uniform(dim::Int, lb::Array{Float64, 1}, ub::Array{Float64, 1})
arr = rand(Float64, dim)
#inbounds for i in 1:dim; arr[i] = arr[i] * (ub[i] - lb[i]) + lb[i] end
return arr
end
mutable struct Problem
cost_func
dim::Int
lb::Array{Float64,1}
ub::Array{Float64,1}
end
mutable struct Particle
position::Array{Float64,1}
velocity::Array{Float64,1}
cost::Float64
best_position::Array{Float64,1}
best_cost::Float64
end
mutable struct Gbest
position::Array{Float64,1}
cost::Float64
end
function PSO(problem, data_dict; max_iter=100,population=100,c1=1.4962,c2=1.4962,w=0.7298,wdamp=1.0)
dim = problem.dim
lb = problem.lb
ub = problem.ub
cost_func = problem.cost_func
gbest, particles = initialize_particles(problem, population, data_dict)
# main loop
for iter in 1:max_iter
#threads for i in 1:population
particles[i].velocity .= w .* particles[i].velocity .+
c1 .* rand(dim) .* (particles[i].best_position .- particles[i].position) .+
c2 .* rand(dim) .* (gbest.position .- particles[i].position)
particles[i].position .= particles[i].position .+ particles[i].velocity
particles[i].position .= max.(particles[i].position, lb)
particles[i].position .= min.(particles[i].position, ub)
particles[i].cost = cost_func(particles[i].position,data_dict)
if particles[i].cost < particles[i].best_cost
particles[i].best_position = copy(particles[i].position)
particles[i].best_cost = copy(particles[i].cost)
if particles[i].best_cost < gbest.cost
gbest.position = copy(particles[i].best_position)
gbest.cost = copy(particles[i].best_cost)
end
end
end
w = w * wdamp
if iter % 50 == 1
println("Iteration " * string(iter) * ": Best Cost = " * string(gbest.cost))
println("Best Position = " * string(gbest.position))
println()
end
end
gbest, particles
end
function initialize_particles(problem, population,data_dict)
dim = problem.dim
lb = problem.lb
ub = problem.ub
cost_func = problem.cost_func
gbest_position = uniform(dim, lb, ub)
gbest = Gbest(gbest_position, cost_func(gbest_position,data_dict))
particles = []
for i in 1:population
position = uniform(dim, lb, ub)
velocity = zeros(dim)
cost = cost_func(position,data_dict)
best_position = copy(position)
best_cost = copy(cost)
push!(particles, Particle(position, velocity, cost, best_position, best_cost))
if best_cost < gbest.cost
gbest.position = copy(best_position)
gbest.cost = copy(best_cost)
end
end
return gbest, particles
end
function get_dict_label(beta::Int)
beta_str = lpad(beta,2,"0")
T_label = "Temperature_" * beta_str
M_label = "Mass_" * beta_str
MLR_label = "MLR_" * beta_str
return T_label, M_label, MLR_label
end
function get_error(x::Vector{Float64}, y::Vector{Float64})
numerator = sum((x.-y).^2)
denominator = var(x) * length(x)
numerator/denominator
end
function central_diff(x::AbstractArray{Float64}, y::AbstractArray{Float64})
# Central difference quotient
dydx = Vector{Float64}(undef, length(x))
dydx[2:end] .= diff(y) ./ diff(x)
#views dydx[2:end-1] .= (dydx[2:end-1] .+ dydx[3:end])./2
# Forward and Backward difference
dydx[1] = (y[2]-y[1])/(x[2]-x[1])
dydx[end] = (y[end]-y[end-1])/(x[end]-x[end-1])
return dydx
end
function decomposition!(dm,m,p,T)
# A-> residue + volitale
# B-> residue + volatile
beta,A1,E1,n1,k1,A2,E2,n2,k2,m1,m2 = p
R = 8.314
rxn1 = -m1 * exp(A1-E1/R/T) * max(m[1]/m1,0)^n1 / beta
rxn2 = -m2 * exp(A2-E2/R/T) * max(m[2]/m2,0)^n2 / beta
dm[1] = rxn1
dm[2] = rxn2
dm[3] = -k1 * rxn1 - k2 * rxn2
dm[4] = dm[1] + dm[2] + dm[3]
end
function read_file(file_path)
df = CSV.read(file_path, DataFrame)
data_dict = Dict{String, Vector{Float64}}()
for beta in 5:5:21
T_label, M_label, MLR_label = get_dict_label(beta)
T_data = collect(skipmissing(df[:, T_label]))
M_data = collect(skipmissing(df[:, M_label]))
T = T_data[T_data .< 780]
M = M_data[T_data .< 780]
data_dict[T_label] = T
data_dict[M_label] = M
data_dict[MLR_label] = central_diff(T, M)
end
return data_dict
end
function initial_condition(beta::Int64, ode_parameters::Array{Float64,1})
m_FR_initial = ode_parameters[end]
m_PVC_initial = 1 - m_FR_initial
T_span = (300.0, 800.0) # temperature range
p = [beta; ode_parameters; m_PVC_initial]
m0 = [p[end-1], p[end], 0.0, 1.0] # initial mass
return m0, T_span, p
end
function cost_func(ode_parameters, data_dict)
total_error = 0.0
for beta in 5:5:21
T_label, M_label, MLR_label= get_dict_label(beta)
T = data_dict[T_label]::Vector{Float64}
M = data_dict[M_label]::Vector{Float64}
MLR = data_dict[MLR_label]::Vector{Float64}
m0, T_span, p = initial_condition(beta,ode_parameters)
prob = ODEProblem(decomposition!,m0,T_span,p)
sol = solve(prob, AutoVern9(Rodas5(autodiff=false)),saveat=T,abstol=1e-8,reltol=1e-8,maxiters=1e4)
if sol.retcode != :Success
# println(1)
return Inf
else
M_sol = #view sol[end, :]
MLR_sol = central_diff(T, M_sol)::Array{Float64,1}
error1 = get_error(MLR, MLR_sol)::Float64
error2 = get_error(M, M_sol)::Float64
total_error += error1 + error2
end
end
total_error
end
function main()
flush(stdout)
total_time = 0
best_costs = []
file_path = raw"F:\17-Fabric\17-Fabric (Smoothed) TG.csv"
data_dict = read_file(file_path)
dimension = 9
lb = [5, 47450, 0.0, 0.0, 24.36, 148010, 0.0, 0.0, 1e-5]
ub = [25.79, 167700, 5, 1, 58.95, 293890, 5, 1, 0.25]
problem = Problem(cost_func,dimension,lb,ub)
global_best_cost = Inf
println("-"^100)
println("Running PSO ...")
population = 50
max_iter = 1001
println("The population is: ", population)
println("Max iteration is:", max_iter)
for i in 1:50 # The number of calculation
start_time = Dates.now()
println("Current iteration is: ", string(i))
gbest, particles = PSO(problem, data_dict, max_iter=max_iter, population=population)
if gbest.cost < global_best_cost
global_best_cost = gbest.cost
global_best_position = gbest.position
end
end_time = Dates.now()
time_duration = round(end_time-start_time, Second)
total_time += time_duration.value
push!(best_costs, gbest.cost)
println()
println("The Best is:")
println(gbest.cost)
println(gbest.position)
println("The calculation time is: " * string(time_duration))
println()
println("-"^50)
end
println('-'^100)
println("Global best cost is: ", global_best_cost)
println("Global best position is: ", global_best_position)
println(total_time)
best_costs
end
#suppress_err begin
#time global best_costs = main()
end
So, what is the possible mechanism for this? Is there a way to avoid this problem? Because If I increase the population and max iterations of particles, the time increased would be extremely large and thus is unacceptable.
And what is the possible mechanism for speed up the program I mentioned above? How to trigger this mechanism?

As the parameters of an ODE optimizes it can completely change its characteristics. Your equation could be getting more stiff and require different ODE solvers. There are many other related ways, but you can see how changing parameters could give such a performance issue. It's best to use methods like AutoTsit5(Rodas5()) and the like in such estimation cases because it's hard to know or guess what the performance will be like, and thus adaptiveness in the method choice can be crucial.

Related

How to plot 2 functions with Julia

I am struggling to plot evaluated function and Cbebyshev approximation.
I am using Julia 1.2.0.
EDIT: Sorry, added completed code.
using Plots
pyplot()
mutable struct Cheb_struct
c::Vector{Float64}
min::Float64
max::Float64
end
function cheb_coeff(min::Float64, max::Float64, n::Int, fn::Function)::Cheb_struct
struc = Cheb_struct(Vector{Float64}(undef,n), min, max)
f = Vector{Float64}(undef,n)
p = Vector{Float64}(undef,n)
max_plus_min = (max + min) / 2
max_minus_min = (max - min) / 2
for k in 0:n-1
p[k+1] = pi * ((k+1) - 0.5) / n
f[k+1] = fn(max_plus_min + cos(p[k+1])*max_minus_min)
end
n2 = 2 / n
for j in 0:n-1
s = 0
for i in 0:n-1
s += f[i+1]*cos(j*p[i+1])
struc.c[j+1] = s * n2
end
end
return struc
end
function approximate(struc::Cheb_struct, x::Float64)::Float64
x1 = (2*x - struc.max - struc.min) / (struc.max - struc.min)
x2 = 2*x1
t = s = 0
for j in length(struc.c):-1:2
pom = s
s = x2 * s - t + struc.c[j]
t = pom
end
return (x1 * s - t + struc.c[1] / 2)
end
fn = sin
struc = cheb_coeff(0.0, 1.0, 10, fn)
println("coeff:")
for x in struc.c
#printf("% .15f\n", x)
end
println("\n x eval approx eval-approx")
for x in struc.min:0.1:struc.max
eval = fn(x)
approx = approximate(struc, x)
#printf("%11.8f %12.8f %12.8f % .3e\n", x,eval, approx, eval - approx)
display(plot(x=eval,y=approx))
end
I am getting empty plot window.
I would be very grateful if someone coould how to plot these two functions.
You should provide a working code as an example.
However the code below can show you how to plot:
using Plots
pyplot()
fn = sin
approxf(x) = sin(x)+rand()/10
x = 0:0.1:1
evalv = fn.(x)
approxv = approxf.(x)
p = plot(evalv,approxv)
using PyPlot
PyPlot.display_figs() #needed when running in IDE such as Atom

Julia loops are as slow as R loops

The code below in Julia and R is to show that the estimator of the population variance is a biased estimator, that is it depends on the sample size and no matter how many times we average over different observations, for small number of data points it is not equal to the variance of the population.
It takes for Julia ~10 seconds to finish the two loops and R does it in ~7 seconds.
If I leave the code inside the loops commented then the loops in R and Julia take the same time and if I only sum the iterators by s = s + i+ j Julia finishes in ~0.15s and R in ~0.5s.
Is it that Julia loops are slow or R became fast?
How can I improve the speed of the code below for Julia?
Can the R code become faster?
Julia:
using Plots
trials = 100000
sample_size = 10;
sd = Array{Float64}(trials,sample_size-1)
tic()
for i = 2:sample_size
for j = 1:trials
res = randn(i)
sd[j,i-1] = (1/(i))*(sum(res.^2))-(1/((i)*i))*(sum(res)*sum(res))
end
end
toc()
sd2 = mean(sd,1)
plot(sd2[1:end])
R:
trials = 100000
sample_size = 10
sd = matrix(, nrow = trials, ncol = sample_size-1)
start_time = Sys.time()
for(i in 2:sample_size){
for(j in 1:trials){
res <- rnorm(n = i, mean = 0, sd = 1)
sd[j,i-1] = (1/(i))*(sum(res*res))-(1/((i)*i))*(sum(res)*sum(res))
}
}
end_time = Sys.time()
end_time - start_time
sd2 = apply(sd,2,mean)
plot(sqrt(sd2))
The plot in case anybody is curious!:
One way I could achieve much higher speed is to use parallel loop which is ver easy to implement in Julia:
using Plots
trials = 100000
sample_size = 10;
sd = SharedArray{Float64}(trials,sample_size-1)
tic()
#parallel for i = 2:sample_size
for j = 1:trials
res = randn(i)
sd[j,i-1] = (1/(i))*(sum(res.^2))-(1/((i)*i))*(sum(res)*sum(res))
end
end
toc()
sd2 = mean(sd,1)
plot(sd2[1:end])
Using global variables in Julia in general is slow and should give you speed comparable to R. You should wrap your code in a function to make it fast.
Here is a timing from my laptop (I cut out only the relevant part):
julia> function test()
trials = 100000
sample_size = 10;
sd = Array{Float64}(trials,sample_size-1)
tic()
for i = 2:sample_size
for j = 1:trials
res = randn(i)
sd[j,i-1] = (1/(i))*(sum(res.^2))-(1/((i)*i))*(sum(res)*sum(res))
end
end
toc()
end
test (generic function with 1 method)
julia> test()
elapsed time: 0.243233887 seconds
0.243233887
Additionally in Julia if you use randn! instead of randn you can speed it up even more as you avoid reallocation of res vector (I am not doing other optimizations to the code as this optimization is distinct to Julia in comparison to R; all other possible speedups in this code would help Julia and R in a similar way):
julia> function test2()
trials = 100000
sample_size = 10;
sd = Array{Float64}(trials,sample_size-1)
tic()
for i = 2:sample_size
res = zeros(i)
for j = 1:trials
randn!(res)
sd[j,i-1] = (1/(i))*(sum(res.^2))-(1/((i)*i))*(sum(res)*sum(res))
end
end
toc()
end
test2 (generic function with 1 method)
julia> test2()
elapsed time: 0.154881137 seconds
0.154881137
Finally it is better to use BenchmarkTools package to measure execution time in Julia. First tic and toc functions will be removed from Julia 0.7. Second - you mix compilation and execution time if you use them (when running test function twice you will see that the time is reduced on the second run as Julia does not spend time compiling functions).
EDIT:
You can keep trials, sample_size and sd as global variables but then you should prefix them with const. Then it is enough to wrap a loop in a function like this:
const trials = 100000;
const sample_size = 10;
const sd = Array{Float64}(trials,sample_size-1);
function f()
for i = 2:sample_size
for j = 1:trials
res = randn(i)
sd[j,i-1] = (1/(i))*(sum(res.^2))-(1/((i)*i))*(sum(res)*sum(res))
end
end
end
tic()
f()
toc()
Now for #parallel:
First, you should use #sync before #parallel to make sure all works correctly (i.e. that all workers have finished before you move to the next instruction). To see why this is needed run the following code on a system with more than one worker:
sd = SharedArray{Float64}(10^6);
#parallel for i = 1:2
if i < 2
sd[i] = 1
else
for j in 2:10^6
sd[j] = 1
end
end
end
minimum(sd) # most probably prints 0.0
sleep(1)
minimum(sd) # most probably prints 1.0
while this
sd = SharedArray{Float64}(10^6);
#sync #parallel for i = 1:2
if i < 2
sd[i] = 1
else
for j in 2:10^6
sd[j] = 1
end
end
end
minimum(sd) # always prints 1.0
Second, the speed improvement is due to #parallel macro not SharedArray. If you try your code on Julia with one worker it is also faster. The reason, in short, is that #parallel internally wraps your code inside a function. You can check it by using #macroexpand:
julia> #macroexpand #sync #parallel for i = 2:sample_size
for j = 1:trials
res = randn(i)
sd[j,i-1] = (1/(i))*(sum(res.^2))-(1/((i)*i))*(sum(res)*sum(res))
end
end
quote # task.jl, line 301:
(Base.sync_begin)() # task.jl, line 302:
#19#v = (Base.Distributed.pfor)(begin # distributed\macros.jl, line 172:
function (#20#R, #21#lo::Base.Distributed.Int, #22#hi::Base.Distributed.Int) # distributed\macros.jl, line 173:
for i = #20#R[#21#lo:#22#hi] # distributed\macros.jl, line 174:
begin # REPL[22], line 2:
for j = 1:trials # REPL[22], line 3:
res = randn(i) # REPL[22], line 4:
sd[j, i - 1] = (1 / i) * sum(res .^ 2) - (1 / (i * i)) * (sum(res) * sum(res))
end
end
end
end
end, 2:sample_size) # task.jl, line 303:
(Base.sync_end)() # task.jl, line 304:
#19#v
end

How can I add results of a computing to a Dictionary in parallel in Julia?

I would like to parallelize the computation of the function solveZeros for the different elements in S. The function are written below:
function solveZeros(S)
"""
Solves for zeros of a linear equation for each element in S and returns
a dictionary with arguments k as keys and the solution as item
"""
results = Dict{}()
for (a,b) in S
solution = bisect(a, b)
results[(a,b)] = solution
end
return results
end
function bisect(a,b)
"""
Uses bisection to find the root of the linear function. a is the slope
and b the intercept
"""
low, high = 0, 100
while (high - low) > 1E-2
mid = low + (high - low ) / 2
if abs(linearEquation(a, b, mid)) < 1E-1
return mid
elseif linearEquation(a, b, mid) > 0
high = mid
else
low = mid
end
end
return nothing
end
function linearEquation(a, b, x)
return a * x + b
end
S = Array([(1., -10), (1., -20)])
Can somebody kindly explain how to parallelize the computation of the function solveZeros? This is a working example. In my actual computation, the functions solveZero and bisect and linearEqauation are drawn from different modules. How can I initialize these functions accordingly for parallel computation?

FOR loops and range in Julia

When I try to define range in a for loop when the range is less than 1 I get errors.
For example the following code:
i = linspace(0, 3, 200)
graph = zeros(length(i), 1)
for j in 0:0.015:3
graph[j] = j*cos(j^2)
end
Reports the following error: ERROR: BoundsError()
Why is that?
Like StefanKarpinski noted, it is not the for loop (variable) that only takes integers, but the array index. You cannot access the 0.15th element of an array.
How about this:
x = range(0, stop=3, length=200)
y = zeros(length(x))
for i = 1:length(x)
j = x[i]
y[i] = j*cos(j^2)
end
Or even:
x = range(0, stop=3, length=200)
y = zeros(length(x))
for (i, j) in enumerate(x)
y[i] = j * cos(j * j)
end
IMHO, the for loop takes more space without being clearer. Note sure what is considered "julianic", but in the python world I think most people would go for a list comprehension:
tic()
x = linspace(0, 3, 200)
y = [j*cos(j*j) for j in x]
toc()
elapsed time: 0.014455408 seconds
Even nicer to my eyes and faster is:
tic()
x = linspace(0, 3, 200)
y = x.*cos(x.^2)
toc()
elapsed time: 0.000600354 seconds
where the . in .* or .^ indicates you're applying the method/function element by element.
Not sure why this is a faster. A Julia expert may want to help us in that.

translating matlab script to R

I've just been working though converting some MATLAB scripts to work in R, however having never used MATLAB in my life, and not exactly being an expert on R I'm having some trouble.
Edit: It's a script I was given designed to correct temperature measurements for lag generated by insulation mass effects. My understanding is that It looks at the rate of change of the temperature and attempts to adjust for errors generated by the response time of the sensor. Unfortunately there is no literature available to me to give me an indication of the numbers i am expecting from the function, and the only way to find out will be to experimentally test it at a later date.
the original script:
function [Tc, dT] = CTD_TempTimelagCorrection(T0,Tau,t)
N1 = Tau/t;
Tc = T0;
N = 3;
for j=ceil(N/2):numel(T0)-ceil(N/2)
A = nan(N,1);
# Compute weights
for k=1:N
A(k) = (1/N) + N1 * ((12*k - (6*(N+1))) / (N*(N^2 - 1)));
end
A = A./sum(A);
# Verify unity
if sum(A) ~= 1
disp('Error: Sum of weights is not unity');
end
Comp = nan(N,1);
# Compute components
for k=1:N
Comp(k) = A(k)*T0(j - (ceil(N/2)) + k);
end
Tc(j) = sum(Comp);
dT = Tc - T0;
end
where I've managed to get to:
CTD_TempTimelagCorrection <- function(temp,Tau,t){
## Define which equation to use based on duration of lag and frequency
## With ESM2 profiler sampling # 2hz: N1>tau/t = TRUE
N1 = Tau/t
Tc = temp
N = 3
for(i in ceiling(N/2):length(temp)-ceiling(N/2)){
A = matrix(nrow=N,ncol=1)
# Compute weights
for(k in 1:N){
A[k] = (1/N) + N1 * ((12*k - (6*(N+1))) / (N*(N^2 - 1)))
}
A = A/sum(A)
# Verify unity
if(sum(A) != 1){
print("Error: Sum of weights is not unity")
}
Comp = matrix(nrow=N,ncol=1)
# Compute components
for(k in 1:N){
Comp[k] = A[k]*temp[i - (ceiling(N/2)) + k]
}
Tc[i] = sum(Comp)
dT = Tc - temp
}
return(dT)
}
I think the problem is the Comp[k] line, could someone point out what I've done wrong? I'm not sure I can select the elements of the array in such a way.
by the way, Tau = 1, t = 0.5 and temp (or T0) will be a vector.
Thanks
edit: apparently my description is too brief in explaining my code samples, not really sure what more I could write that would be relevant and not just wasting peoples time. Is this enough Mr Filter?
The error is as follows:
Error in Comp[k] = A[k] * temp[i - (ceiling(N/2)) + k] :
replacement has length zero
In addition: Warning message:
In Comp[k] = A[k] * temp[i - (ceiling(N/2)) + k] :
number of items to replace is not a multiple of replacement length
If you write print(i - (ceiling(N/2)) + k) before that line, you will see that you are using incorrect indices for temp[i - (ceiling(N/2)) + k], which means that nothing is returned to be inserted into Comp[k]. I assume this problem is due to Matlab allowing the use of 0 as an index and not R, and the way negative indices are handled (they don't work the same in both languages). You need to implement a fix to return the correct indices.

Resources