(Julia 1.x) BoundsError using pmap? - julia

I am having trouble with pmap() throwing a BoundsError when setting the values of array elements - my code works for 1 worker but not >1. I have written a minimum working example which roughly follows the real code flow:
Get source data
Define set of points over which to iterate
Initialise array points to be calculated
Calculate each array point
The main file:
#pmapdemo.jl
using Distributed
#addprocs(length(Sys.cpu_info())) # uncomment this line for error
#everywhere include(joinpath(#__DIR__, "pmapdemo2.jl"))
function main()
# Get source data
source = Dict{String, Any}("t"=>zeros(5),
"x"=>zeros(5,6),
"y"=>zeros(5,3),
"z"=>zeros(5,3))
# Define set of points over which to iterate
iterset = Dict{String, Any}("t"=>source["t"],
"x"=>source["x"],
"y"=>fill(2, size(source["t"])[1], 1),
"z"=>fill(2, size(source["t"])[1], 1))
data = Dict{String, Any}()
# Initialise array points to be calculated
MyMod.initialisearray!(data, iterset)
# Calculate each array point
MyMod.calcarray!(data, iterset, source)
#show data
end
main()
The functionality file:
#pmapdemo2.jl
module MyMod
using Distributed
#everywhere using SharedArrays
# Initialise data array
function initialisearray!(data, fieldset)
zerofield::SharedArray{Float64, 4} = zeros(size(fieldset["t"])[1],
size(fieldset["x"])[2],
size(fieldset["y"])[2],
size(fieldset["z"])[2])
data["field"] = deepcopy(zerofield)
end
# Calculate values of array elements according to values in source
function calcpoint!((data, source, a, b, c, d))
data["field"][a,b,c,d] = rand()
end
# Set values in array
function calcarray!(data, iterset, source)
for a in eachindex(iterset["t"])
# [additional functionality f(a) here]
b = eachindex(iterset["x"][a,:])
c = eachindex(iterset["y"][a,:])
d = eachindex(iterset["z"][a,:])
pmap(calcpoint!, Iterators.product(Iterators.repeated(data,1), Iterators.repeated(source,1), Iterators.repeated(a,1), b, c, d))
end
end
end
The error output:
ERROR: LoadError: On worker 2:
BoundsError: attempt to access 0×0×0×0 Array{Float64,4} at index [1]
setindex! at ./array.jl:767 [inlined]
setindex! at /build/julia/src/julia-1.1.1/usr/share/julia/stdlib/v1.1/SharedArrays/src/SharedArrays.jl:500 [inlined]
_setindex! at ./abstractarray.jl:1043
setindex! at ./abstractarray.jl:1020
calcpoint! at /home/dave/pmapdemo2.jl:25
#112 at /build/julia/src/julia-1.1.1/usr/share/julia/stdlib/v1.1/Distributed/src/process_messages.jl:269
run_work_thunk at /build/julia/src/julia-1.1.1/usr/share/julia/stdlib/v1.1/Distributed/src/process_messages.jl:56
macro expansion at /build/julia/src/julia-1.1.1/usr/share/julia/stdlib/v1.1/Distributed/src/process_messages.jl:269 [inlined]
#111 at ./task.jl:259
Stacktrace:
[1] (::getfield(Base, Symbol("##696#698")))(::Task) at ./asyncmap.jl:178
[2] foreach(::getfield(Base, Symbol("##696#698")), ::Array{Any,1}) at ./abstractarray.jl:1866
[3] maptwice(::Function, ::Channel{Any}, ::Array{Any,1}, ::Base.Iterators.ProductIterator{Tuple{Base.Iterators.Take{Base.Iterators.Repeated{Dict{String,Any}}},Base.Iterators.Take{Base.Iterators.Repeated{Dict{String,Any}}},Base.Iterators.Take{Base.Iterators.Repeated{Int64}},Base.OneTo{Int64},Base.OneTo{Int64},Base.OneTo{Int64}}}) at ./asyncmap.jl:178
[4] #async_usemap#681 at ./asyncmap.jl:154 [inlined]
[5] #async_usemap at ./none:0 [inlined]
[6] #asyncmap#680 at ./asyncmap.jl:81 [inlined]
[7] #asyncmap at ./none:0 [inlined]
[8] #pmap#213(::Bool, ::Int64, ::Nothing, ::Array{Any,1}, ::Nothing, ::Function, ::Function, ::WorkerPool, ::Base.Iterators.ProductIterator{Tuple{Base.Iterators.Take{Base.Iterators.Repeated{Dict{String,Any}}},Base.Iterators.Take{Base.Iterators.Repeated{Dict{String,Any}}},Base.Iterators.Take{Base.Iterators.Repeated{Int64}},Base.OneTo{Int64},Base.OneTo{Int64},Base.OneTo{Int64}}}) at /build/julia/src/julia-1.1.1/usr/share/julia/stdlib/v1.1/Distributed/src/pmap.jl:126
[9] pmap(::Function, ::WorkerPool, ::Base.Iterators.ProductIterator{Tuple{Base.Iterators.Take{Base.Iterators.Repeated{Dict{String,Any}}},Base.Iterators.Take{Base.Iterators.Repeated{Dict{String,Any}}},Base.Iterators.Take{Base.Iterators.Repeated{Int64}},Base.OneTo{Int64},Base.OneTo{Int64},Base.OneTo{Int64}}}) at /build/julia/src/julia-1.1.1/usr/share/julia/stdlib/v1.1/Distributed/src/pmap.jl:101
[10] #pmap#223(::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::Function, ::Function, ::Base.Iterators.ProductIterator{Tuple{Base.Iterators.Take{Base.Iterators.Repeated{Dict{String,Any}}},Base.Iterators.Take{Base.Iterators.Repeated{Dict{String,Any}}},Base.Iterators.Take{Base.Iterators.Repeated{Int64}},Base.OneTo{Int64},Base.OneTo{Int64},Base.OneTo{Int64}}}) at /build/julia/src/julia-1.1.1/usr/share/julia/stdlib/v1.1/Distributed/src/pmap.jl:156
[11] pmap(::Function, ::Base.Iterators.ProductIterator{Tuple{Base.Iterators.Take{Base.Iterators.Repeated{Dict{String,Any}}},Base.Iterators.Take{Base.Iterators.Repeated{Dict{String,Any}}},Base.Iterators.Take{Base.Iterators.Repeated{Int64}},Base.OneTo{Int64},Base.OneTo{Int64},Base.OneTo{Int64}}}) at /build/julia/src/julia-1.1.1/usr/share/julia/stdlib/v1.1/Distributed/src/pmap.jl:156
[12] calcarray!(::Dict{String,Any}, ::Dict{String,Any}, ::Dict{String,Any}) at /home/dave/pmapdemo2.jl:20
[13] main() at /home/dave/pmapdemo.jl:19
[14] top-level scope at none:0
in expression starting at /home/dave/pmapdemo.jl:23
In pmapdemo2.jl, replacing data["field"][a,b,c,d] = rand() with #show a, b, c, d demonstrates that all workers are running and have full access to the variables being passed, however instead replacing it with #show data["field"] throws the same error. Surely the entire purpose of SharedArrays is to avoid this? Or am I misunderstanding how to use it with pmap?
This is a crosspost from the Julia discourse here.

pmap will do the work of passing the data to the processes, so you don't need to use SharedArrays. Typically, the function provided to pmap (and indeed map) will be a pure function (and therefore doesn't mutate any variable) which returns one element of an output array. That function is mapped across each element of the input array, and the pmap function will construct the output array for you. For example, in your case, the code may look a bit like this
calcpoint(source, (a,b,c,d)) = rand() # Or some function of source and the indices a,b,c,d
field["data"] = pmap(calcpoint, Iterators.repeated(source), Iterators.product(a,b,c,d))

Related

How to pass a list of parameters to workers in Julia Distributed

with Julia 1.5.3, I wanted to pass a list or parameters to the distributed workers.
I first tried in a non distributed way :
using Distributed
#everywhere begin
using SharedArrays
solve(a,b,c) = return (1,2,3)
d_rates = LinRange(0.01, 0.33, 5)
m_rates = LinRange(0.01, 0.25, 5)
population_size = 10^3
max_iterations_perloop = 10^3
nb_repeats = 2
nb_params = length(d_rates)*length(m_rates)*nb_repeats
para = enumerate(Base.product(d_rates, m_rates, population_size, max_iterations_perloop, 1:nb_repeats))
results = SharedArray{Tuple{Int, Int, Int}}(nb_params)
end
for (y , x) in para
results[y] = solve(x[1], x[2], x[3])
end
which worked fine. And then changed the final loop to:
#sync #distributed for (y , x) in para
results[y] = solve(x[1], x[2], x[3])
end
I then got an error (truncated):
ERROR: LoadError: TaskFailedException:
MethodError: no method matching firstindex(::Base.Iterators.Enumerate{Base.Iterators.ProductIterator{Tuple{LinRange{Float64},LinRange{Float64},Int64,Int64,UnitRange{Int64}}}})
Closest candidates are:
firstindex(::Cmd) at process.jl:638
firstindex(::Core.SimpleVector) at essentials.jl:599
firstindex(::Base64.Buffer) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.5/Base64/src/buffer.jl:18
...
Stacktrace:
[1] (::Distributed.var"#159#161"{var"#271#272",Base.Iterators.Enumerate{Base.Iterators.ProductIterator{Tuple{LinRange{Float64},LinRange{Float64},Int64,Int64,UnitRange{Int64}}}}})() at ./task.jl:332
Stacktrace:
[1] sync_end(::Channel{Any}) at ./task.jl:314
[2] top-level scope at task.jl:333
[3] include_string(::Function, ::Module, ::String, ::String) at ./loading.jl:1088
[4] include_string(::Module, ::String, ::String) at ./loading.jl:1096
[5] invokelatest(::Any, ::Any, ::Vararg{Any,N} where N; kwargs::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}) at ./essentials.jl:710
[6] invokelatest(::Any, ::Any, ::Vararg{Any,N} where N) at ./essentials.jl:709
Is it possible to pass such a list, if so how?
I assume that all your workers are on a single server and that you have actually added some workers using the addprocs command. The first problem with your code is that you create the SharedArray on all workers. Rather than that the syntax of a SharedArray is the following:
help?> SharedArray
SharedArray{T}(dims::NTuple; init=false, pids=Int[])
SharedArray{T,N}(...)
Construct a SharedArray of a bits type T and size dims across the processes specified by pids - all of which have to be on the same host. (...)
This means that you create SharedArray only once from the master worker and you can specify the workers that are aware of it using the pids argument (if you do not specify pids all worker processes have the access).
Hence your code will look like this:
using Distributed, SharedArrays
addprocs(4)
#everywhere using SharedArrays
#everywhere solve(a,b,c) = return (1,2,3)
#(...) # your setup code without #everywhere
results = SharedArray{Tuple{Int, Int, Int}}(nb_params)
#sync #distributed for (y , x) in collect(para)
results[y] = solve(x[1], x[2], x[3])
end
Note that you will need collect because #distributed macro needs to know the size of the Vector and it does not work good with iterators.

Largest prime factor algorithm returning out of memory error

I'm trying to learn Julia via project Euler. And for the largest prime factor problem I ran into an issue with out of memory error and I don't understand why.
I have tried the same solution in Python and had no issues.
function Primes(n)
numbers = Set{Int64}(2:n)
primes = Int64[]
while length(numbers)!=0
p = pop!(numbers,minimum(numbers))
push!(primes,p)
if length(numbers)!=0
numbers = setdiff(numbers,Set{Int64}(2*p:p:maximum(numbers)))
end
end
return primes
end
function LPF(n)
primes = Primes(n//2)
for p in primes
if n%p==0
return p
end
end
end
LPF(600851475143)
this is my error message:
OutOfMemoryError()
Stacktrace:
[1] _growend! at .\array.jl:811 [inlined]
[2] resize! at .\array.jl:1003 [inlined]
[3] rehash!(::Dict{Int64,Nothing}, ::Int64) at .\dict.jl:183
[4] sizehint! at .\dict.jl:242 [inlined]
[5] sizehint! at .\set.jl:64 [inlined]
[6] union!(::Set{Int64}, ::UnitRange{Rational{Int64}}) at .\abstractset.jl:79
[7] Type at .\set.jl:10 [inlined]
[8] Primes(::Rational{Int64}) at .\In[14]:2
[9] LPF(::Int64) at .\In[24]:2
[10] top-level scope at In[25]:1

julia provide wrong numerical result

When I tried to calculate
julia> -2.3^-7.6
-0.0017818389423254909
But the result given by my calculator is
0.0005506 + 0.001694 i
Just to be safe I tried it again and this time it complains. Why does it not complain when I tried it the first time?
julia> a = -2.3; b = -7.6; a^b
ERROR: DomainError with -2.6:
Exponentiation yielding a complex result requires a complex argument.
Replace x^y with (x+0im)^y, Complex(x)^y, or similar.
Stacktrace:
[1] throw_exp_domainerror(::Float64) at ./math.jl:35
[2] ^(::Float64, ::Float64) at ./math.jl:769
[3] top-level scope at none:0
[4] eval at ./boot.jl:319 [inlined]
[5] #85 at /Users/ssiew/.julia/packages/Atom/jodeb/src/repl.jl:129 [inlined]
[6] with_logstate(::getfield(Main, Symbol("##85#87")),::Base.CoreLogging.LogState) at ./logging.jl:397
[7] with_logger(::Function, ::Atom.Progress.JunoProgressLogger) at ./logging.jl:493
[8] top-level scope at /Users/ssiew/.julia/packages/Atom/jodeb/src/repl.jl:128
This is an order of operations issue. You can see how Julia's parsing that expression:
julia> parse("-2.3^-7.6")
:(-(2.3 ^ -7.6))
and so the reason you don't have any problems is because you're actually taking 2.3 ^ (-7.6), which is 0.0017818389423254909, and then flipping the sign.
Your second approach is equivalent to making sure that the "x" in "x^y" is really negative, or:
julia> parse("(-2.3)^-7.6")
:(-2.3 ^ -7.6)
julia> eval(parse("(-2.3)^-7.6"))
ERROR: DomainError:
Exponentiation yielding a complex result requires a complex argument.
Replace x^y with (x+0im)^y, Complex(x)^y, or similar.
Stacktrace:
[1] nan_dom_err at ./math.jl:300 [inlined]
[2] ^(::Float64, ::Float64) at ./math.jl:699
[3] eval(::Module, ::Any) at ./boot.jl:235
[4] eval(::Any) at ./boot.jl:234
And if we follow that instruction, we get what you expect:
julia> Complex(-2.3)^-7.6
0.0005506185144176565 + 0.0016946295370871215im

Miximum Likelihood - using Optim package

Dear users of the language julia. I have a problem when using the optimize function of the Optim package. What is the error of the code below?
using Optim
using Distributions
rng = MersenneTwister(1234);
d = Weibull(1,1)
x = rand(d,1000)
function pdf_weibull(x, lambda, k)
k/lambda * (x/lambda).^(k-1) * exp((-x/lambda)^k)
end
function obj(x::Vector, lambda, k)
soma = 0
for i in x
soma = soma + log(pdf_weibull(i,lambda,k))
end
-soma
end
obj(x, pars) = obj(x, pars...)
optimize(vars -> obj(x, vars...), [1.0,1.0])
Output
julia> optimize(vars -> obj(x, vars...), [1.0,1.0])
ERROR: DomainError:
Exponentiation yielding a complex result requires a complex argument.
Replace x^y with (x+0im)^y, Complex(x)^y, or similar.
Stacktrace:
[1] nan_dom_err at ./math.jl:300 [inlined]
[2] ^ at ./math.jl:699 [inlined]
[3] (::##2#4)(::Float64, ::Float64, ::Float64) at ./<missing>:0
[4] pdf_weibull(::Float64, ::Float64, ::Float64) at ./REPL[6]:2
[5] obj(::Array{Float64,1}, ::Float64, ::Float64) at ./REPL[7]:4
[6] (::##5#6)(::Array{Float64,1}) at ./REPL[11]:1
[7] value(::NLSolversBase.NonDifferentiable{Float64,Array{Float64,1},Val{false}}, ::Array{Float64,1}) at /home/pedro/.julia/v0.6/NLSolversBase/src/interface.jl:19
[8] initial_state(::Optim.NelderMead{Optim.AffineSimplexer,Optim.AdaptiveParameters}, ::Optim.Options{Float64,Void}, ::NLSolversBase.NonDifferentiable{Float64,Array{Float64,1},Val{false}}, ::Array{Float64,1}) at /home/pedro/.julia/v0.6/Optim/src/multivariate/solvers/zeroth_order/nelder_mead.jl:139
[9] optimize(::NLSolversBase.NonDifferentiable{Float64,Array{Float64,1},Val{false}}, ::Array{Float64,1}, ::Optim.NelderMead{Optim.AffineSimplexer,Optim.AdaptiveParameters}, ::Optim.Options{Float64,Void}) at /home/pedro/.julia/v0.6/Optim/src/multivariate/optimize/optimize.jl:25
[10] #optimize#151(::Array{Any,1}, ::Function, ::Tuple{##5#6}, ::Array{Float64,1}) at /home/pedro/.julia/v0.6/Optim/src/multivariate/optimize/interface.jl:62
[11] #optimize#148(::Array{Any,1}, ::Function, ::Function, ::Array{Float64,1}) at /home/pedro/.julia/v0.6/Optim/src/multivariate/optimize/interface.jl:52
[12] optimize(::Function, ::Array{Float64,1}) at /home/pedro/.julia/v0.6/Optim/src/multivariate/optimize/interface.jl:52
[13] macro expansion at ./REPL.jl:97 [inlined]
[14] (::Base.REPL.##1#2{Base.REPL.REPLBackend})() at ./event.jl:73
It is a simple problem to obtain the maximum likelihood estimates of the parameters that index the weibull distribution.
Best regards.
The reason of your problem is that your definition of pdf_weibull is incorrect. Here is a corrected definition:
function pdf_weibull(x, lambda, k)
k/lambda * (x/lambda)^(k-1) * exp(-(x/lambda)^k)
end
Note that I have moved - sign in exp part of the expression. If you change this all will work as expected.
Now - why does Julia complain with DomainError. The reason is that because of the error in your code you try to calculate the value of something like (-1.0)^0.5. In Julia ^ is implemented in type stable way. This means, in particular, that when it is passed Float64 as both arguments it guarantees to return Float64 or throw an error. Clearly (-1.0)^0.5 cannot be computed in real domain - that is why an error is thrown. If you passed (-1+0im)^0.5 then there would be no error as we are passing a complex number to ^ so the result can also be complex number in a type stable way.

Benchmarktools' belapsed not working on symbol

Introduction
I have a directory with the following structure
--> Report
--> Problems
--> PE_001
--> Julia
PE_001.naive.jl
PE_001.jl
--> Benchmarks
test_001.txt
test_002.txt
--> Results
--> PE_002
.
.
.
--> PE_XXX
--> Benchmark
I am attempting to iterate over all the Julia files and benchmark them against the benchmarking data located under the top directory Benchmark. I do not want to have to cd into each directory and run # belapsed from the julia commandline to time every function individually.
To solve this problem I wrote the following code that is supposed to be located under benchmarks in the hierachy above. However I made it slightly simpler for illustrative purposes.
Attempt at solution
EDIT: The code below does NOT follow the hierarchy outlined above. To quickly reproduce the error, the code below has been written in such a way that all the files can be placed in the same directory.
benchmark.jl
include("PE_002.jl")
using BenchmarkTools
function get_file_path(PE=1)
current_folder = pwd()
PE_folder = "/PE_" * lpad(string(PE),3,"0")
dirname(pwd()) * "/Problems" * PE_folder * "/Julia"
end
function include_files(PE_dir)
for filename in readdir(PE_dir)
if !startswith(filename, "benchmark")
filepath = PE_dir * "/" * filename
#everywhere include($filepath)
end
end
end
function benchmark_files(PE_dir)
for filename in readdir(PE_dir)
if !startswith(filename, "benchmark")
f = getfield(Main, Symbol(filename[1:end-3]))
# Produces an error
println(#belapsed f())
end
end
end
# Works
println(#belapsed PE_002())
PE_dir = pwd()
include_files(PE_dir)
benchmark_files(PE_dir)
PE_002.jl
function PE_002(limit = 4*10^6)
a, b = 0, 2
while b < limit
a, b = b, 4 * b + a
end
div(a + b - 2, 4)
end
PE_002_naive.jl
function PE_002_naive(limit=4 * 10^6, F_1=1, F_2=2)
total = 0
while F_2 < limit
if F_2 % 2 == 0
total += F_2
end
F_1, F_2 = F_2, F_1 + F_2
end
total
end
test_001.txt
0*10**(2**0)
4*10**(2**0)
4*10**(2**1)
4*10**(2**2)
4*10**(2**3)
4*10**(2**4)
4*10**(2**5)
4*10**(2**6)
Question
Interestingly enough including the file PE_002 and then running # belapsed works, however obtaining the filename from the directory, turning it into a symbol, then trying to time it with #belapsed fails.
I know #elapsed works, however, due to garbage collection it is not nearly accurate enough for my needs.
Is there a simple way to benchmark all files in a remote directory using BenchmarkTools or similar tools as accurate?
All I need is a single number representing mean / average running time from each file.
EDIT 2: Per request I have included the full error message below
~/P/M/Julia-belaps ❯❯❯ julia benchmark.jl
9.495495495495496e-9
ERROR: LoadError: UndefVarError: f not defined
Stacktrace:
[1] ##core#665() at /home/oisov/.julia/v0.6/BenchmarkTools/src/execution.jl:290
[2] ##sample#666(::BenchmarkTools.Parameters) at /home/oisov/.julia/v0.6/BenchmarkTools/src/execution.jl:296
[3] #_run#6(::Bool, ::String, ::Array{Any,1}, ::Function, ::BenchmarkTools.Benchmark{Symbol("##benchmark#664")}, ::BenchmarkTools.Parameters) at /home/oisov/.julia/v0.6/BenchmarkTools/src/execution.jl:324
[4] (::BenchmarkTools.#kw##_run)(::Array{Any,1}, ::BenchmarkTools.#_run, ::BenchmarkTools.Benchmark{Symbol("##benchmark#664")}, ::BenchmarkTools.Parameters) at ./<missing>:0
[5] anonymous at ./<missing>:?
[6] #run_result#16(::Array{Any,1}, ::Function, ::BenchmarkTools.Benchmark{Symbol("##benchmark#664")}, ::BenchmarkTools.Parameters) at /home/oisov/.julia/v0.6/BenchmarkTools/src/execution.jl:40
[7] (::BenchmarkTools.#kw##run_result)(::Array{Any,1}, ::BenchmarkTools.#run_result, ::BenchmarkTools.Benchmark{Symbol("##benchmark#664")}, ::BenchmarkTools.Parameters) at ./<missing>:0
[8] #run#17(::Array{Any,1}, ::Function, ::BenchmarkTools.Benchmark{Symbol("##benchmark#664")}, ::BenchmarkTools.Parameters) at /home/oisov/.julia/v0.6/BenchmarkTools/src/execution.jl:43
[9] (::Base.#kw##run)(::Array{Any,1}, ::Base.#run, ::BenchmarkTools.Benchmark{Symbol("##benchmark#664")}, ::BenchmarkTools.Parameters) at ./<missing>:0 (repeats 2 times)
[10] macro expansion at /home/oisov/.julia/v0.6/BenchmarkTools/src/execution.jl:208 [inlined]
[11] benchmark_files(::String) at /home/oisov/Programming/Misc/Julia-belaps/benchmark.jl:26
[12] include_from_node1(::String) at ./loading.jl:569
[13] include(::String) at ./sysimg.jl:14
[14] process_options(::Base.JLOptions) at ./client.jl:305
[15] _start() at ./client.jl:371
while loading /home/oisov/Programming/Misc/Julia-belaps/benchmark.jl, in expression starting on line 36
Change println(#belapsed f()) to println(#belapsed $f()). I can't fully explain it, but here is a link to the relevant part of the docs.

Resources