Julia and Zygote: Adjoint Error When Doing QR Factorization - julia

I have written a surrogate function called GEKPLS and I am trying to make the code work with an optimizer to find the optimal theta parameter values.
As a first step, I'm trying to make it work with Zygote and have the following code:
function min_rlfv(theta)
g = GEKPLS(X, y, grads, n_comp, delta_x, xlimits, extra_points, theta)
return -g.reduced_likelihood_function_value
end
Zygote.gradient(min_rlfv, [0.01, 0.1])
Running the above code results in the following error message:
ERROR: Need an adjoint for constructor LinearAlgebra.QRCompactWYQ{Float64, Matrix{Float64}}. Gradient is of type LinearAlgebra.Transpose{Float64, Matrix{Float64}}
The stacktrace leads to the following function. The highlighted line in my code editor is Q, G = qr(Ft) from the following code block:
function _reduced_likelihood_function(theta, kernel_type, d, nt, ij, y_norma, noise = 0.0)
reduced_likelihood_function_value = -Inf
nugget = 1000000.0 * eps() #a jitter for numerical stability;
if kernel_type == "squar_exp"
r = squar_exp(theta, d)
end
R = (I + zeros(nt, nt)) .* (1.0 + nugget + noise)
for k in 1:size(ij)[1]
R[ij[k, 1], ij[k, 2]] = r[k]
R[ij[k, 2], ij[k, 1]] = r[k]
end
C = cholesky(R).L
F = ones(nt, 1)
Ft = C \ F
Q, G = qr(Ft)
Q = Array(Q)
Yt = C \ y_norma
beta = G \ [(transpose(Q) ⋅ Yt)]
rho = Yt .- (Ft .* beta)
gamma = transpose(C) \ rho
sigma2 = sum((rho) .^ 2, dims = 1) / nt
detR = prod(diag(C) .^ (2.0 / nt))
reduced_likelihood_function_value = -nt * log10(sum(sigma2)) - nt * log10(detR)
return beta, gamma, reduced_likelihood_function_value
end
Any pointers on how this can be fixed?

Related

Changing Sampled Parameter Type in Turing.jl Model

This is my Julia code to simulate data and sample from a Turing.jl model:
using LinearAlgebra, Distributions, StatsBase
using Turing, FillArrays, DynamicHMC, LabelledArrays
using NNlib, GLM
using CSV, DataFrames
function generate_hmnl_data(R::Int=100, S::Int=30, C::Int=3,
Theta::Array{Float64, 2}=ones(2, 4),
Sigma::Array{Float64, 2}=Matrix(Diagonal(fill(0.1, 4))))
K = size(Theta, 2)
G = size(Theta, 1)
Y = Array{Int64}(undef, R, S)
X = randn(R, S, C, K)
Z = Array{Float64}(undef, G, R)
Z[1, :] .= 1
if G > 1
Z[2:G, :] = randn(R * (G-1))
end
Beta = Array{Float64}(undef, K, R)
for r in 1:R
println(Z[:, r])
println(Theta)
Beta[:, r] = rand(MvNormal(Theta' * Z[:, r], Sigma))
for s in 1:S
Y[r, s] = sample(1:C, Weights(exp.(X[r, s, :, :] * Beta[:, r])))
end
end
return (R=R, S=S, C=C, K=K, G=G, Y=Y, X=X, Z=Z,
beta_true=Beta, Theta_true=Theta, Sigma_true=Sigma)
end
d1 = generate_hmnl_data()
#model function hmnl(G::Int, Y::Matrix{Int64}, X::Array{Float64}, Z::Matrix{Float64})
R, S, C, K = size(X)
Theta = zeros(K, G)
for k in 1:K
for g in 1:G
Theta[k, g] ~ Normal(0, 10)
end
end
Sigma ~ InverseWishart(K, diagm(ones(K)))
Beta = zeros(K, R)
println(eltype(Beta))
for r in 1:R
Beta[:, r] ~ MvNormal(Theta * Z[:, r], Sigma)
println(typeof(Beta[:, r]))
for s in 1:S
beta_r = copy(Beta[:, r])
beta_r = convert(Vector{Float64}, beta_r)
ut_rs = X[r, s, :, :] * beta_r
v = softmax(ut_rs)
Y[r, s] ~ Categorical(v)
end
end
end
sampler = HMC(.05, 10)
test_mod = hmnl(d1.G, d1.Y, d1.X, d1.Z)
chains = sample(test_mod, sampler, 1_000)
I get this error when I try to sample from the model: MethodError: no method matching float(::Type{Any}). The sampling statement Beta[:, r] ~ MvNormal(Theta * Z[:, r], Sigma) changes Beta[:, r] to type Vector{Any}.
I have tried
beta_r = copy(Beta[:, r])
beta_r = convert(Vector{Float64}, beta_r)
ut_rs = X[r, s, :, :] * beta_r
But then I get this error instead:
ERROR: TypeError: in typeassert, expected Float64, got a value of type ForwardDiff.Dual{Nothing, Float64, 12}
So it's messing with Turing AD somehow. I'm new to Turing and can't understand the right way to do this.
I'm reposting an answer from Tor Fjelde (https://github.com/torfjelde) which I received on Github. For Turing to work you need to ensure types in your model can be inferred. I wasn't doing that. https://turing.ml/v0.22/docs/using-turing/performancetips#ensure-that-types-in-your-model-can-be-inferred
This function worked:
#model function hmnl(G::Int, Y::Matrix{Int64}, X::Array{Float64}, Z::Matrix{Float64}, ::Type{T} = Float64) where {T}
R, S, C, K = size(X)
Theta = zeros(T, K, G)
for k in 1:K
for g in 1:G
Theta[k, g] ~ Normal(0, 10)
end
end
Sigma ~ InverseWishart(K, diagm(ones(K)))
Beta = zeros(T, K, R)
println(eltype(Beta))
for r in 1:R
Beta[:, r] ~ MvNormal(Theta * Z[:, r], Sigma)
println(typeof(Beta[:, r]))
for s in 1:S
ut_rs = X[r, s, :, :] * Beta[:, r]
v = softmax(ut_rs)
Y[r, s] ~ Categorical(v)
end
end
end

Can't seem to get NLsolve to converge in Julia. Can you suggest any tips?

I'm trying to solve a life cycle problem in economics using Julia but I'm having trouble with NLsolve. The model boils down to trying to solve two a two equation system to find optimal leisure hours and capital stock each working period. The economic agent after retirement sets leisure = 1 and I only need to solve a single non linear equation for capital. This part works fine. It's solving the two equation system that seems to break down.
As I'm fairly new to Julia / programming in general so any advice would be very helpful. Also advice / points / recommendations on all aspects of the code will be greatly appreciated. The model is solved backwards from the final time period.
My attempt
using Parameters
using Roots
using Plots
using NLsolve
using ForwardDiff
Model = #with_kw (α = 0.66,
δ = 0.02,
τ = 0.015,
β = 1/1.01,
T = 70,
Ret = 40,
);
function du_c(c, l, η=2, γ=2)
if c>0 && l>0
return (c+1e-6)^(-η) * l^((1-η)*γ)
else
return Inf
end
end
function du_l(c, l, η=2, γ=2)
if l>0 && c>0
return γ * (c+1e-6)^(1-η) * l^(γ*(1-η)-1)
else
return Inf
end
end
function create_euler_work(x, y, m, k, l, r, w, t)
# x = todays capital, y = leisure
#unpack α, β, τ, δ, T, Ret = m
c_1 = x*(1+r) + (1-τ)*w*(1-y) - k[t+1]
c_2 = k[t+1]*(1+r) + (1-τ)*w*(1-l[t+1]) - k[t+2]
return du_c(c_1,y) - β*(1+r)*du_c(c_2,l[t+1])
end
function create_euler_retire(x, m, k, r, b, t)
# Holds at time periods Ret onwards
#unpack α, β, τ, δ, T, Ret = m
c_1 = x*(1+r) + b - k[t+1]
c_2 = k[t+1]*(1+r) + b - k[t+2]
return du_c(c_1,1) - β*(1+r)*du_c(c_2,1)
end
function create_euler_lyw(x, y, m, k, r, w, b, t)
# x = todays capital, y = leisure
#unpack α, β, τ, δ, T, Ret = m
c_1 = x*(1+r) + (1-τ)*w*(1-y) - k[t+1]
c_2 = k[t+1]*(1+r) + b - k[t+2]
return du_c(c_1,y) - β*(1+r)*du_c(c_2,1)
end
function create_foc(x, y, m, k, r, w, t)
# x = todays capital, l= leisure
#unpack α, β, τ, δ, T = m
c = x*(1+r) + (1-τ)*w*(1-y) - k[t+1]
return du_l(c,y) - (1-τ)*w*du_c(c,y)
end
function life_cycle(m, guess, r, w, b, initial)
#unpack α, β, τ, δ, T, Ret = m
k = zeros(T+1);
l = zeros(T);
k[T] = guess
println("Period t = $(T+1) Retirment, k = $(k[T+1]), l.0 = NA")
println("Period t = $T Retirment, k = $(k[T]), l = 1.0")
########################## Retirment ################################
for t in T-1:-1:Ret+1
euler(x) = create_euler_retire(x, m, k, r, b, t)
k[t] = find_zero(euler, (0,100))
l[t] = 1
println("Period t = $t Retirment, k = $(k[t]), l = $(l[t])")
end
###################### Retirement Year #############################
for t in Ret:Ret
euler(x,y) = create_euler_lyw(x, y, m, k, r, w, b, t)
foc(x,y) = create_foc(x, y, m, k, r, w, t)
function f!(F, x)
F[1] = euler(x[1], x[2])
F[2] = foc(x[1], x[2])
end
res = nlsolve(f!, [5; 0.7], autodiff = :forward)
k[t] = res.zero[1]
l[t] = res.zero[2]
println("Period t = $t Working, k = $(k[t]), l = $(l[t])")
end
############################ Working ###############################
for t in Ret-1:-1:1
euler(x,y) = create_euler_work(x, y, m, k, l, r, w, t)
foc(x,y) = create_foc(x, y, m, k, r, w, t)
function f!(F, x)
F[1] = euler(x[1], x[2])
F[2] = foc(x[1], x[2])
end
res = nlsolve(f!, [5; 0.7], autodiff = :forward)
k[t] = res.zero[1]
l[t] = res.zero[2]
println("Period t = $t Working, k = $(k[t]), l = $(l[t])")
end
#####################################################################
return k[1] - initial, k, l
end
m = Model();
residual, k, l = life_cycle(m, 0.3, 0.03, 1.0, 0.0, 0.0)
The code seems to break on period 35 with the error "During the resolution of the nonlinear system, the evaluation of following equations resulted in a non-finite number: [1,2]" However the solutions seem to go weird at period 37.

Can't get performant Julia Turing model

I've tried to reproduce the model from a PYMC3 and Stan comparison. But it seems to run slowly and when I look at #code_warntype there are some things -- K and N I think -- which the compiler seemingly calls Any.
I've tried adding types -- though I can't add types to turing_model's arguments and things are complicated within turing_model because it's using autodiff variables and not the usuals. I put all the code into the function do_it to avoid globals, because they say that globals can slow things down. (It actually seems slower, though.)
Any suggestions as to what's causing the problem? The turing_model code is what's iterating, so that should make the most difference.
using Turing, StatsPlots, Random
sigmoid(x) = 1.0 / (1.0 + exp(-x))
function scale(w0::Float64, w1::Array{Float64,1})
scale = √(w0^2 + sum(w1 .^ 2))
return w0 / scale, w1 ./ scale
end
function do_it(iterations::Int64)::Chains
K = 10 # predictor dimension
N = 1000 # number of data samples
X = rand(N, K) # predictors (1000, 10)
w1 = rand(K) # weights (10,)
w0 = -median(X * w1) # 50% of elements for each class (number)
w0, w1 = scale(w0, w1) # unit length (euclidean)
w_true = [w0, w1...]
y = (w0 .+ (X * w1)) .> 0.0 # labels
y = [Float64(x) for x in y]
σ = 5.0
σm = [x == y ? σ : 0.0 for x in 1:K, y in 1:K]
#model turing_model(X, y, σ, σm) = begin
w0_pred ~ Normal(0.0, σ)
w1_pred ~ MvNormal(σm)
p = sigmoid.(w0_pred .+ (X * w1_pred))
#inbounds for n in 1:length(y)
y[n] ~ Bernoulli(p[n])
end
end
#time chain = sample(turing_model(X, y, σ, σm), NUTS(iterations, 200, 0.65));
# ϵ = 0.5
# τ = 10
# #time chain = sample(turing_model(X, y, σ), HMC(iterations, ϵ, τ));
return (w_true=w_true, chains=chain::Chains)
end
chain = do_it(1000)

Gradient descent implementation is not working in Julia

I am trying to Implement gradient Descent algorithm from scratch to find the slope and intercept value for my linear fit line.
Using the package and calculating slope and intercept, I get slope = 0.04 and intercept = 7.2 but when I use my gradient descent algorithm for the same problem, I get slope and intercept both values = (-infinity,-infinity)
Here is my code
x= [1,2,3,4,5,6,7,8,9,10,11,12,13,141,5,16,17,18,19,20]
y=[2,3,4,5,6,7,8,9,10,11,12,13,141,5,16,17,18,19,20,21]
function GradientDescent()
m=0
c=0
for i=1:10000
for k=1:length(x)
Yp = m*x[k] + c
E = y[k]-Yp #error in predicted value
dm = 2*E*(-x[k]) # partial derivation of cost function w.r.t slope(m)
dc = 2*E*(-1) # partial derivate of cost function w.r.t. Intercept(c)
m = m + (dm * 0.001)
c = c + (dc * 0.001)
end
end
return m,c
end
Values = GradientDescent() # after running values = (-inf,-inf)
I have not done the math, but instead wrote the tests. It seems you got a sign error when assigning m and c.
Also, writing the tests really helps, and Julia makes it simple :)
function GradientDescent(x, y)
m=0.0
c=0.0
for i=1:10000
for k=1:length(x)
Yp = m*x[k] + c
E = y[k]-Yp
dm = 2*E*(-x[k])
dc = 2*E*(-1)
m = m - (dm * 0.001)
c = c - (dc * 0.001)
end
end
return m,c
end
using Base.Test
#testset "gradient descent" begin
#testset "slope $slope" for slope in [0, 1, 2]
#testset "intercept for $intercept" for intercept in [0, 1, 2]
x = 1:20
y = broadcast(x -> slope * x + intercept, x)
computed_slope, computed_intercept = GradientDescent(x, y)
#test slope ≈ computed_slope atol=1e-8
#test intercept ≈ computed_intercept atol=1e-8
end
end
end
I can't get your exact numbers, but this is close. Perhaps it helps?
# 141 ?
datax = [1,2,3,4,5,6,7,8,9,10,11,12,13,141,5,16,17,18,19,20]
datay = [2,3,4,5,6,7,8,9,10,11,12,13,141,5,16,17,18,19,20,21]
function gradientdescent()
m = 0
b = 0
learning_rate = 0.00001
for n in 1:10000
for i in 1:length(datay)
x = datax[i]
y = datay[i]
guess = m * x + b
error = y - guess
dm = 2error * x
dc = 2error
m += dm * learning_rate
b += dc * learning_rate
end
end
return m, b
end
gradientdescent()
(-0.04, 17.35)
It seems that adjusting the learning rate is critical...

Optim using gradient Error: "no method matching"

I’m trying to optimize a function using one of the algorithms that require a gradient. Basically I’m trying to learn how to optimize a function using a gradient in Julia. I’m fairly confident that my gradient is specified correctly. I know this because the similarly defined Matlab function for the gradient gives me the same values as in Julia for some test values of the arguments. Also, the Matlab version using fminunc with the gradient seems to optimize the function fine.
However when I run the Julia script, I seem to get the following error:
julia> include("ex2b.jl")
ERROR: `g!` has no method matching g!(::Array{Float64,1}, ::Array{Float64,1})
while loading ...\ex2b.jl, in ex
pression starting on line 64
I'm running Julia 0.3.2 on a windows 7 32bit machine. Here is the code (basically a translation of some Matlab to Julia):
using Optim
function mapFeature(X1, X2)
degrees = 5
out = ones(size(X1)[1])
for i in range(1, degrees+1)
for j in range(0, i+1)
term = reshape( (X1.^(i-j) .* X2.^(j)), size(X1.^(i-j))[1], 1)
out = hcat(out, term)
end
end
return out
end
function sigmoid(z)
return 1 ./ (1 + exp(-z))
end
function costFunc_logistic(theta, X, y, lam)
m = length(y)
regularization = sum(theta[2:end].^2) * lam / (2 * m)
return sum( (-y .* log(sigmoid(X * theta)) - (1 - y) .* log(1 - sigmoid(X * theta))) ) ./ m + regularization
end
function costFunc_logistic_gradient!(theta, X, y, lam, m)
grad= X' * ( sigmoid(X * theta) .- y ) ./ m
grad[2:end] = grad[2:end] + theta[2:end] .* lam / m
return grad
end
data = readcsv("ex2data2.txt")
X = mapFeature(data[:,1], data[:,2])
m, n = size(data)
y = data[:, end]
theta = zeros(size(X)[2])
lam = 1.0
f(theta::Array) = costFunc_logistic(theta, X, y, lam)
g!(theta::Array) = costFunc_logistic_gradient!(theta, X, y, lam, m)
optimize(f, g!, theta, method = :l_bfgs)
And here is some of the data:
0.051267,0.69956,1
-0.092742,0.68494,1
-0.21371,0.69225,1
-0.375,0.50219,1
-0.51325,0.46564,1
-0.52477,0.2098,1
-0.39804,0.034357,1
-0.30588,-0.19225,1
0.016705,-0.40424,1
0.13191,-0.51389,1
0.38537,-0.56506,1
0.52938,-0.5212,1
0.63882,-0.24342,1
0.73675,-0.18494,1
0.54666,0.48757,1
0.322,0.5826,1
0.16647,0.53874,1
-0.046659,0.81652,1
-0.17339,0.69956,1
-0.47869,0.63377,1
-0.60541,0.59722,1
-0.62846,0.33406,1
-0.59389,0.005117,1
-0.42108,-0.27266,1
-0.11578,-0.39693,1
0.20104,-0.60161,1
0.46601,-0.53582,1
0.67339,-0.53582,1
-0.13882,0.54605,1
-0.29435,0.77997,1
-0.26555,0.96272,1
-0.16187,0.8019,1
-0.17339,0.64839,1
-0.28283,0.47295,1
-0.36348,0.31213,1
-0.30012,0.027047,1
-0.23675,-0.21418,1
-0.06394,-0.18494,1
0.062788,-0.16301,1
0.22984,-0.41155,1
0.2932,-0.2288,1
0.48329,-0.18494,1
0.64459,-0.14108,1
0.46025,0.012427,1
0.6273,0.15863,1
0.57546,0.26827,1
0.72523,0.44371,1
0.22408,0.52412,1
0.44297,0.67032,1
0.322,0.69225,1
0.13767,0.57529,1
-0.0063364,0.39985,1
-0.092742,0.55336,1
-0.20795,0.35599,1
-0.20795,0.17325,1
-0.43836,0.21711,1
-0.21947,-0.016813,1
-0.13882,-0.27266,1
0.18376,0.93348,0
0.22408,0.77997,0
Let me know if you guys need additional details. Btw, this relates to a coursera machine learning course if curious.
The gradient should not be a function to compute the gradient,
but a function to store it
(hence the exclamation mark in the function name, and the second argument in the error message).
The following seems to work.
function g!(theta::Array, storage::Array)
storage[:] = costFunc_logistic_gradient!(theta, X, y, lam, m)
end
optimize(f, g!, theta, method = :l_bfgs)
The same using closures and currying (version for those who got used to a function that returns the cost and gradient):
function cost_gradient(θ, X, y, λ)
m = length(y);
return (θ::Array) -> begin
h = sigmoid(X * θ); #(m,n+1)*(n+1,1) -> (m,1)
J = (1 / m) * sum(-y .* log(h) .- (1 - y) .* log(1 - h)) + λ / (2 * m) * sum(θ[2:end] .^ 2);
end, (θ::Array, storage::Array) -> begin
h = sigmoid(X * θ); #(m,n+1)*(n+1,1) -> (m,1)
storage[:] = (1 / m) * (X' * (h .- y)) + (λ / m) * [0; θ[2:end]];
end
end
Then, somewhere in the code:
initialθ = zeros(n,1);
f, g! = cost_gradient(initialθ, X, y, λ);
res = optimize(f, g!, initialθ, method = :cg, iterations = your_iterations);
θ = res.minimum;

Resources