Unable to understand dimension mismatch error in Julia - julia

I’m a beginner with Julia and ML. I’m attempting to re-use code from the Flux Model Zoo, specifically this, to classify images from this dataset. Below is my version of the code - I modified the data load and the params in the build_model to account for the difference in image size and the number of character types to be classified. The original had 28x28 and 10 digits, the arabic character set had 32x32 images and 28 characters.
function getimages(filename)
filepath = pwd() * "/images/" * filename
mtrx = Matrix(DataFrame(CSV.File(filepath)))
r, _ = size(mtrx)
v = Vector{Matrix{Int64}}()
for i = 1:r
push!(v, reshape(m[i, :], 32, 32))
end
v
end
function getlabels(filename)
filepath = pwd() * "/images/" * filename
vec(Matrix(DataFrame(CSV.File(filepath))))
end
function load_data(args)
train_data_file = "csvTrainImages.csv"
test_data_file = "csvTestImages.csv"
train_label_file = "csvTrainLabel.csv"
test_label_file = "csvTestLabel.csv"
train_data = getimages(train_data_file)
test_data = getimages(test_data_file)
train_labels = getlabels(train_label_file)
test_labels = getlabels(test_label_file)
xtrain = Flux.flatten(train_data)
xtest = Flux.flatten(test_data)
ytrain, ytest = onehotbatch(train_labels, 1:28), onehotbatch(test_labels, 1:28)
train_loader = DataLoader((xtrain, ytrain), batchsize=args.batchsize, shuffle=true)
test_loader = DataLoader((xtest, ytest), batchsize=args.batchsize)
return train_loader, test_loader
end
function build_model(; imgsize=(32,32,1), nclasses=28)
return Chain(
Dense(prod(imgsize), 32, relu),
Dense(32, nclasses))
end
function loss_and_accuracy(data_loader, model, device)
acc = 0
ls = 0.0f0
num = 0
for (x, y) in data_loader
x, y = device(x), device(y)
ŷ = model(x)
ls += logitcrossentropy(model(x), y, agg=sum)
acc += sum(onecold(cpu(model(x))) .== onecold(cpu(y)))
num += size(x, 2)
end
return ls / num, acc / num
end
#kwdef mutable struct Args
η::Float64 = 3e-4 # learning rate
batchsize::Int = 256 # batch size
epochs::Int = 10 # number of epochs
use_cuda::Bool = true # use gpu (if cuda available)
end
function train(; kws...)
args = Args(; kws...) # collect options in a struct for convenience
if CUDA.functional() && args.use_cuda
#info "Training on CUDA GPU"
CUDA.allowscalar(false)
device = gpu
else
#info "Training on CPU"
device = cpu
end
# Create test and train dataloaders
train_loader, test_loader = load_data(args)
# Construct model
model = build_model() |> device
ps = Flux.params(model) # model's trainable parameters
## Optimizer
opt = ADAM(args.η)
## Training
for epoch in 1:args.epochs
for (x, y) in train_loader
x, y = device(x), device(y) # transfer data to device
gs = gradient(() -> logitcrossentropy(model(x), y), ps) # compute gradient
Flux.Optimise.update!(opt, ps, gs) # update parameters
end
# Report on train and test
train_loss, train_acc = loss_and_accuracy(train_loader, model, device)
test_loss, test_acc = loss_and_accuracy(test_loader, model, device)
println("Epoch=$epoch")
println(" train_loss = $train_loss, train_accuracy = $train_acc")
println(" test_loss = $test_loss, test_accuracy = $test_acc")
end
end
I get the following error when I train the model. Specifically, during the gradient computation. Could you help me understand which two matrices the error refers to and point me towards a solution? My guess is that it has to do with the build_model params, but I’m not quite sure what needs to change and how.
DimensionMismatch("matrix A has dimensions (32,1024), matrix B has dimensions (1,256)")
macro expansion#interface2.jl:0[inlined]
_pullback(::Zygote.Context, ::typeof(throw), ::DimensionMismatch)#interface2.jl:9
_pullback#matmul.jl:814[inlined]
_pullback(::Zygote.Context, ::typeof(LinearAlgebra._generic_matmatmul!), ::Matrix{Matrix{Float32}}, ::Char, ::Char, ::Matrix{Float32}, ::Matrix{Matrix{Int64}}, ::LinearAlgebra.MulAddMul{true, true, Bool, Bool})#interface2.jl:0
_pullback#matmul.jl:802[inlined]
_pullback(::Zygote.Context, ::typeof(LinearAlgebra.generic_matmatmul!), ::Matrix{Matrix{Float32}}, ::Char, ::Char, ::Matrix{Float32}, ::Matrix{Matrix{Int64}}, ::LinearAlgebra.MulAddMul{true, true, Bool, Bool})#interface2.jl:0
_pullback#matmul.jl:302[inlined]
_pullback#matmul.jl:275[inlined]
_pullback(::Zygote.Context, ::typeof(LinearAlgebra.mul!), ::Matrix{Matrix{Float32}}, ::Matrix{Float32}, ::Matrix{Matrix{Int64}})#interface2.jl:0
_pullback#matmul.jl:153[inlined]
_pullback(::Zygote.Context, ::typeof(*), ::Matrix{Float32}, ::Matrix{Matrix{Int64}})#interface2.jl:0
_pullback#basic.jl:147[inlined] ....

Solved by fixing the get images method as below.
function getimages(filename)
filepath = pwd() * "/images/" * filename
mtrx = Matrix(DataFrame(CSV.File(filepath)))
return mtrx'
end

Related

TypeError: Caught TypeError in DataLoader worker process 0. TypeError: 'KeyError' object is not iterable

from torchvision_starter.engine import train_one_epoch, evaluate
from torchvision_starter import utils
import multiprocessing
import time
n_cpu = multiprocessing.cpu_count()
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
_ = model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
step_size=3,
gamma=0.2,
verbose=True
)
# Let's train for 10 epochs
num_epochs = 1
start = time.time()
for epoch in range(10, 10 + num_epochs):
# train for one epoch, printing every 10 iterations
train_one_epoch(model, optimizer, data_loaders['train'], device, epoch, print_freq=10)
# update the learning rate
lr_scheduler.step()
# evaluate on the validation dataset
evaluate(model, data_loaders['valid'], device=device)
stop = time.time()
print(f"\n\n{num_epochs} epochs in {stop - start} s ({(stop-start) / 3600:.2f} hrs)")
Before I move on to this part, everything is OK. But after I run the part, the error is like below:
I have tried to add drop_last to the helper.py's function like:
data_loaders["train"] = torch.utils.data.DataLoader(
train_data,
batch_size=batch_size,
sampler=train_sampler,
num_workers=num_workers,
collate_fn=utils.collate_fn,
drop_last=True
)
But it doesn't work. By the way, the torch and torchvision are compatible and Cuda is available.
I wonder how to fix it.
The get_data_loaders function:
def get_data_loaders(
folder, batch_size: int = 2, valid_size: float = 0.2, num_workers: int = -1, limit: int = -1, thinning: int = None
):
"""
Create and returns the train_one_epoch, validation and test data loaders.
:param foder: folder containing the dataset
:param batch_size: size of the mini-batches
:param valid_size: fraction of the dataset to use for validation. For example 0.2
means that 20% of the dataset will be used for validation
:param num_workers: number of workers to use in the data loaders. Use -1 to mean
"use all my cores"
:param limit: maximum number of data points to consider
:param thinning: take every n-th frame, instead of all frames
:return a dictionary with 3 keys: 'train_one_epoch', 'valid' and 'test' containing respectively the
train_one_epoch, validation and test data loaders
"""
if num_workers == -1:
# Use all cores
num_workers = multiprocessing.cpu_count()
# We will fill this up later
data_loaders = {"train": None, "valid": None, "test": None}
# create 3 sets of data transforms: one for the training dataset,
# containing data augmentation, one for the validation dataset
# (without data augmentation) and one for the test set (again
# without augmentation)
data_transforms = {
"train": get_transform(UdacitySelfDrivingDataset.mean, UdacitySelfDrivingDataset.std, train=True),
"valid": get_transform(UdacitySelfDrivingDataset.mean, UdacitySelfDrivingDataset.std, train=False),
"test": get_transform(UdacitySelfDrivingDataset.mean, UdacitySelfDrivingDataset.std, train=False),
}
# Create train and validation datasets
train_data = UdacitySelfDrivingDataset(
folder,
transform=data_transforms["train"],
train=True,
thinning=thinning
)
# The validation dataset is a split from the train_one_epoch dataset, so we read
# from the same folder, but we apply the transforms for validation
valid_data = UdacitySelfDrivingDataset(
folder,
transform=data_transforms["valid"],
train=True,
thinning=thinning
)
# obtain training indices that will be used for validation
n_tot = len(train_data)
indices = torch.randperm(n_tot)
# If requested, limit the number of data points to consider
if limit > 0:
indices = indices[:limit]
n_tot = limit
split = int(math.ceil(valid_size * n_tot))
train_idx, valid_idx = indices[split:], indices[:split]
# define samplers for obtaining training and validation batches
train_sampler = torch.utils.data.SubsetRandomSampler(train_idx)
valid_sampler = torch.utils.data.SubsetRandomSampler(valid_idx) # =
# prepare data loaders
data_loaders["train"] = torch.utils.data.DataLoader(
train_data,
batch_size=batch_size,
sampler=train_sampler,
num_workers=num_workers,
collate_fn=utils.collate_fn,
drop_last=True
)
data_loaders["valid"] = torch.utils.data.DataLoader(
valid_data, # -
batch_size=batch_size, # -
sampler=valid_sampler, # -
num_workers=num_workers, # -
collate_fn=utils.collate_fn,
drop_last=True
)
# Now create the test data loader
test_data = UdacitySelfDrivingDataset(
folder,
transform=data_transforms["test"],
train=False,
thinning=thinning
)
if limit > 0:
indices = torch.arange(limit)
test_sampler = torch.utils.data.SubsetRandomSampler(indices)
else:
test_sampler = None
data_loaders["test"] = torch.utils.data.DataLoader(
test_data,
batch_size=batch_size,
shuffle=False,
num_workers=num_workers,
sampler=test_sampler,
collate_fn=utils.collate_fn,
drop_last=True
# -
)
return data_loaders
class UdacitySelfDrivingDataset(torch.utils.data.Dataset):
# Mean and std of the dataset to be used in nn.Normalize
mean = torch.tensor([0.3680, 0.3788, 0.3892])
std = torch.tensor([0.2902, 0.3069, 0.3242])
def __init__(self, root, transform, train=True, thinning=None):
super().__init__()
self.root = os.path.abspath(os.path.expandvars(os.path.expanduser(root)))
self.transform = transform
# load datasets
if train:
self.df = pd.read_csv(os.path.join(self.root, "labels_train.csv"))
else:
self.df = pd.read_csv(os.path.join(self.root, "labels_test.csv"))
# Index by file id (i.e., a sequence of the same length as the number of images)
codes, uniques = pd.factorize(self.df['frame'])
if thinning:
# Take every n-th rows. This makes sense because the images are
# frames of videos from the car, so we are essentially reducing
# the frame rate
thinned = uniques[::thinning]
idx = self.df['frame'].isin(thinned)
print(f"Keeping {thinned.shape[0]} of {uniques.shape[0]} images")
print(f"Keeping {idx.sum()} objects out of {self.df.shape[0]}")
self.df = self.df[idx].reset_index(drop=True)
# Recompute codes
codes, uniques = pd.factorize(self.df['frame'])
self.n_images = len(uniques)
self.df['image_id'] = codes
self.df.set_index("image_id", inplace=True)
self.classes = ['car', 'truck', 'pedestrian', 'bicyclist', 'light']
self.colors = ['cyan', 'blue', 'red', 'purple', 'orange']
#property
def n_classes(self):
return len(self.classes)
def __getitem__(self, idx):
if idx in self.df.index:
row = self.df.loc[[idx]]
else:
return KeyError(f"Element {idx} not in dataframe")
# load images fromm file
img_path = os.path.join(self.root, "images", row['frame'].iloc[0])
img = Image.open(img_path).convert("RGB")
# Exclude bogus boxes with 0 height or width
h = row['ymax'] - row['ymin']
w = row['xmax'] - row['xmin']
filter_idx = (h > 0) & (w > 0)
row = row[filter_idx]
# get bounding box coordinates for each mask
boxes = row[['xmin', 'ymin', 'xmax', 'ymax']].values
# convert everything into a torch.Tensor
boxes = torch.as_tensor(boxes, dtype=torch.float32)
# get the labels
labels = torch.as_tensor(row['class_id'].values, dtype=int)
image_id = torch.tensor([idx])
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
# assume no crowd for everything
iscrowd = torch.zeros((row.shape[0],), dtype=torch.int64)
target = {}
target["boxes"] = boxes
target["labels"] = labels
target["image_id"] = image_id
target["area"] = area
target["iscrowd"] = iscrowd
if self.transform is not None:
img, target = self.transform(img, target)
return img, target
def __len__(self):
return self.n_images
def plot(self, idx, renormalize=True, predictions=None, threshold=0.5, ax=None):
image, label_js = self[idx]
if renormalize:
# Invert the T.Normalize transform
unnormalize = T.Compose(
[
T.Normalize(mean = [ 0., 0., 0. ], std = 1 / type(self).std),
T.Normalize(mean = -type(self).mean, std = [ 1., 1., 1. ])
]
)
image, label_js = unnormalize(image, label_js)
if ax is None:
fig, ax = plt.subplots(figsize=(8, 8))
_ = ax.imshow(torch.permute(image, [1, 2, 0]))
for i, box in enumerate(label_js['boxes']):
xy = (box[0], box[1])
h, w = (box[2] - box[0]), (box[3] - box[1])
r = patches.Rectangle(xy, h, w, fill=False, color=self.colors[label_js['labels'][i]-1], lw=2, alpha=0.5)
ax.add_patch(r)
if predictions is not None:
# Make sure the predictions are on the CPU
for k in predictions:
predictions[k] = predictions[k].detach().cpu().numpy()
for i, box in enumerate(predictions['boxes']):
if predictions['scores'][i] > threshold:
xy = (box[0], box[1])
h, w = (box[2] - box[0]), (box[3] - box[1])
r = patches.Rectangle(xy, h, w, fill=False, color=self.colors[predictions['labels'][i]-1], lw=2, linestyle=':')
ax.add_patch(r)
_ = ax.axis("off")
return ax

Error when calculating RHS of ode "no method matching Float64(::Num)"

I have some code that uses a function to calculate some changes in concentration, but I get an error of:
ERROR: LoadError: MethodError: no method matching Float64(::Num)
Closest candidates are:
(::Type{T})(::Real, ::RoundingMode) where T<:AbstractFloat at rounding.jl:200
(::Type{T})(::T) where T<:Number at boot.jl:760
(::Type{T})(::AbstractChar) where T<:Union{AbstractChar, Number} at char.jl:50
I have attached a MWE below.
The code initializes some parameters, and uses the initialized parameters to calculate additional parameters (Ke and kb), then inputs these parameters into my function oderhs(c,Ke,kb,aw,aw²,aw³,ρζ,ρζ²,ρζ³,γ,γ²) which should return dc which is my solution vector that I require.
using DifferentialEquations
#parameters t c0[1:4] Ke[1:2] kb[1:2] aw aw² aw³ ρ ζ ρζ ρζ² γ γ² T
# Calculate parameters
ρ = 0.592
ζ = 1.0
ρζ = ρ*ζ
ρζ² = ρζ*ρζ
ρζ³ = ρζ*ρζ²
aw = 0.995
aw² = aw*aw
aw³ = aw*aw²
γ = 1.08
γ² = γ*γ
T = 590.0
# calculate equilibrium constants
Ke[01] = (1.0E-06)*10.0^(-4.098 + (-3245.2/T) + (2.2362E+05/(T^2)) + (-3.9984E+07/(T^3)) + (log10(ρ) * (13.957 + (-1262.3/T) + (8.5641E+05/(T^2)))) )
Ke[02] = 10^(28.6059+0.012078*T+(1573.21/T)-13.2258*log10(T))
# calculate backward rate constants
kb[01] = Ke[01]*ρζ²/γ²
kb[02] = Ke[02]*γ/ρζ
# set initial concentrations
c0 = [0.09897, 0.01186, 2.94e-5, 4.17e-8]
function oderhs(c,Ke,kb,aw,aw²,aw³,ρζ,ρζ²,ρζ³,γ,γ²)
# rename c to their corresponding species
H₃BO₃ = c[1]; H₄BO₄⁻ = c[2]; OH⁻ = c[3]; H⁺ = c[4];
# rename Ke to their corresponding reactions
Ke_iw1 = Ke[1]; Ke_ba1 = Ke[2];
# rename kb to their corresponding reactions
kb_iw1 = kb[1]; kb_ba1 = kb[2];
# determine the rate of reaction for each reaction
r_iw1 = kb_iw1*(H⁺*OH⁻ - Ke_iw1*ρζ²*aw/γ²)
r_ba1 = kb_ba1*(H₄BO₄⁻ - H₃BO₃*OH⁻*Ke_ba1*γ/ρζ)
dc = zeros(eltype(c),4)
# calculate the change in species concentration
dc[1] = r_ba1
dc[2] = r_ba1
dc[3] = r_iw1 + r_ba1
dc[4] = r_iw1
return dc
end
dc = oderhs(c0,Ke,kb,aw,aw²,aw³,ρζ,ρζ²,ρζ³,γ,γ²)
zeros(eltype(c),4) creates an Array of Float64, which isn't what you want because you're trying to create a symbolic version of the ODE equations (right? otherwise this doesn't make sense). Thus you want to this be like zeros(Num,4), so that the return is the symbolic equations, and then you'd generate the actual code for DifferentialEquations.jl from the ModelingToolkit.jl ODESystem.

Why is Flux.jl throwing a "Warning: Slow Fallback implementation" and DimensionMismatch?

I am trying to create a convolutional neural network to classify MNIST data with Flux in Julia. I downloaded the data as a csv from this link: https://www.kaggle.com/oddrationale/mnist-in-csv.
My code is below:
using CSV
using DataFrames
using Images
using Base.Iterators: partition
using Flux
sqrt(x) = convert(Int64, floor(x^0.5))
matrixize(x) = cat([x[i:i+sqrt(length(x))-1] for i in 1:sqrt(length(x)):length(x)]..., dims=2)'
img(x) = Gray.(x)
process(row) = img(matrixize(row[2:length(row)])), convert(Int, 255*row[1]) |> gpu
train_data = DataFrame(CSV.File("MNIST_data/mnist_train.csv"))
train_X = []
train_y = Int64[]
for row in eachrow(train_data)
row = convert(Array, row)
row = [i/255 for i in row]
X, y = process(row)
push!(train_X, X)
push!(train_y, y)
end
train_y = Flux.onehotbatch(train_y, 0:9)
train = [(cat(float.(train_X[i])..., dims=4), train_y[:, i]) for i in partition(1:size(train_data)[1], 1000)] |> gpu
test_data = DataFrame(CSV.File("MNIST_data/mnist_train.csv"))
test_X = []
test_y = Int64[]
for row in eachrow(test_data)
row = convert(Array, row)
row = [i/255 for i in row]
X, y = process(row)
push!(test_X, X)
push!(test_y, y)
end
test_y = Flux.onehotbatch(test_y, 0:9)
println("Pre-processing Complete")
m = Chain(
Conv((5, 5), 1=>16, relu),
MaxPool((2, 2)),
Conv((5, 5), 16=>8, relu),
MaxPool((2, 2)),
Flux.flatten,
Dense(200, 100),
Dense(100, 10),
Flux.softmax
) |> gpu
loss(x, y) = Flux.Losses.crossentropy(m(x), y) |> gpu
opt = Momentum(0.01) |> gpu
println("Model Creation Complete")
println()
epochs = 10
for i in 1:epochs
for j in train
gs = gradient(params(m)) do
l = loss(j...)
end
update!(opt, params(m), gs)
end
#show accuracy(test_X, test_y)
end
println()
#show accuracy(test_X, test_y)
When I check values of test_X, test_y, train_X, and train_y, they are all in the appropriate format, but I get this error when I try to run the code:
┌ Warning: Slow fallback implementation invoked for conv! You probably don't want this; check your datatypes.
│ yT = Float64
│ T1 = Gray{Float64}
│ T2 = Float32
└ # NNlib /Users/satvikd/.julia/packages/NNlib/PI8Xh/src/conv.jl:206
┌ Warning: Slow fallback implementation invoked for conv! You probably don't want this; check your datatypes.
│ yT = Float64
│ T1 = Float64
│ T2 = Float32
└ # NNlib /Users/satvikd/.julia/packages/NNlib/PI8Xh/src/conv.jl:206
DimensionMismatch("A has dimensions (100,200) but B has dimensions (128,1000)")
The stack trace refers to line 55, the one with the gradient.
Any help would be appreciated.
It seems that you need to check your types, since one of them is float32 while other are float64. Typically flux defaults to using Float32, since that's plenty of precision for Deep learning tasks. You could use Flux.f64/f32 as well.

What's the NeuralNetDiffEq.jl equivalent of this piece of code for solving ODE in Julia?

I am trying to make documentation for NeuraNetDiffEq.jl. I find an ODE solution with Flux.jl from this amazing tutorial here https://mitmath.github.io/18S096SciML/lecture2/ml .
using Flux
using DifferentialEquations
using LinearAlgebra
using Plots
using Statistics
NNODE = Chain(x -> [x],
Dense(1, 32, tanh),
Dense(32, 1),
first)
NNODE(1.0)
g(t) = t * NNODE(t) + 1f0
ϵ = sqrt(eps(Float32))
loss() = mean(abs2(((g(t + ϵ) - g(t)) / ϵ) - cos(2π * t)) for t in 0:1f-2:1f0)
opt = Flux.Descent(0.01)
data = Iterators.repeated((), 5000)
iter = 0
cb = function () # callback function to observe training
global iter += 1
if iter % 500 == 0
display(loss())
end
end
display(loss())
Flux.train!(loss, Flux.params(NNODE), data, opt; cb=cb)
t = 0:0.001:1.0
plot(t,g.(t),label="NN")
plot!(t,1.0 .+ sin.(2π .* t) / 2π, label="True")
I am having trouble understanding the parameters involved to invoke training process for NueralNetDiffEq.jl as in:
function DiffEqBase.solve(
prob::DiffEqBase.AbstractODEProblem,
alg::NeuralNetDiffEqAlgorithm,
args...;
dt,
timeseries_errors = true,
save_everystep=true,
adaptive=false,
abstol = 1f-6,
verbose = false,
maxiters = 100)
What would be a valid input for alg parameter? What would be the equivalent code in NeuralNetDiffEq.jl for the above ODE example?

How can I access the trained parameters of a Neural ODE in Julia?

I'm trying to fit one Neural ODE to a time series usind Julia's DiffEqFlux. Here my code:
u0 = Float32[2.;0]
train_size = 15
tspan_train = (0.0f0,0.75f0)
function trueODEfunc(du,u,p,t)
true_A = [-0.1 2.0; -2.0 -0.1]
du .= ((u.^3)'true_A)'
end
t_train = range(tspan_train[1],tspan_train[2],length = train_size)
prob = ODEProblem(trueODEfunc, u0, tspan_train)
ode_data_train = Array(solve(prob, Tsit5(),saveat=t_train))
dudt = Chain(
Dense(2,50,tanh),
Dense(50,2))
ps = Flux.params(dudt)
n_ode = NeuralODE(dudt, tspan_train, Tsit5(), saveat = t_train, reltol=1e-7, abstol=1e-9)
**n_ode.p**
function predict_n_ode(p)
n_ode(u0,p)
end
function loss_n_ode(p)
pred = predict_n_ode(p)
loss = sum(abs2, ode_data_train .- pred)
loss,pred
end
final_p = []
losses = []
cb = function(p,l,pred)
display(l)
display(p)
push!(final_p, p)
push!(losses,l)
pl = scatter(t_train, ode_data_train[1,:],label="data")
scatter!(pl,t_train,pred[1,:],label="prediction")
display(plot(pl))
end
DiffEqFlux.sciml_train!(loss_n_ode, n_ode.p, ADAM(0.05), cb = cb, maxiters = 100)
**n_ode.p**
The problem is that calling n_ode.p (or Flux.params(dudt)) before and after the train function gives me back the save values. I would have expected to receive the latest updated values from the training. That's why I've created an array to gather all parameter values during the training and then access it to get the updated parameters.
Am I doing something wrong in the code? Does the train function automatically update the parameters? If not how to enforce it?
Thanks in advance!
The result is an object that holds the best parameters. Here's a complete example:
using DiffEqFlux, OrdinaryDiffEq, Flux, Optim, Plots
u0 = Float32[2.; 0.]
datasize = 30
tspan = (0.0f0,1.5f0)
function trueODEfunc(du,u,p,t)
true_A = [-0.1 2.0; -2.0 -0.1]
du .= ((u.^3)'true_A)'
end
t = range(tspan[1],tspan[2],length=datasize)
prob = ODEProblem(trueODEfunc,u0,tspan)
ode_data = Array(solve(prob,Tsit5(),saveat=t))
dudt2 = FastChain((x,p) -> x.^3,
FastDense(2,50,tanh),
FastDense(50,2))
n_ode = NeuralODE(dudt2,tspan,Tsit5(),saveat=t)
function predict_n_ode(p)
n_ode(u0,p)
end
function loss_n_ode(p)
pred = predict_n_ode(p)
loss = sum(abs2,ode_data .- pred)
loss,pred
end
loss_n_ode(n_ode.p) # n_ode.p stores the initial parameters of the neural ODE
cb = function (p,l,pred;doplot=false) #callback function to observe training
display(l)
# plot current prediction against data
if doplot
pl = scatter(t,ode_data[1,:],label="data")
scatter!(pl,t,pred[1,:],label="prediction")
display(plot(pl))
end
return false
end
# Display the ODE with the initial parameter values.
cb(n_ode.p,loss_n_ode(n_ode.p)...)
res1 = DiffEqFlux.sciml_train(loss_n_ode, n_ode.p, ADAM(0.05), cb = cb, maxiters = 300)
cb(res1.minimizer,loss_n_ode(res1.minimizer)...;doplot=true)
res2 = DiffEqFlux.sciml_train(loss_n_ode, res1.minimizer, LBFGS(), cb = cb)
cb(res2.minimizer,loss_n_ode(res2.minimizer)...;doplot=true)
# result is res2 as an Optim.jl object
# res2.minimizer are the best parameters
# res2.minimum is the best loss
At the end, the sciml_train function returns a result object that holds information about the optimization, including the final parameters as .minimizer.

Resources