I am trying to create a convolutional neural network to classify MNIST data with Flux in Julia. I downloaded the data as a csv from this link: https://www.kaggle.com/oddrationale/mnist-in-csv.
My code is below:
using CSV
using DataFrames
using Images
using Base.Iterators: partition
using Flux
sqrt(x) = convert(Int64, floor(x^0.5))
matrixize(x) = cat([x[i:i+sqrt(length(x))-1] for i in 1:sqrt(length(x)):length(x)]..., dims=2)'
img(x) = Gray.(x)
process(row) = img(matrixize(row[2:length(row)])), convert(Int, 255*row[1]) |> gpu
train_data = DataFrame(CSV.File("MNIST_data/mnist_train.csv"))
train_X = []
train_y = Int64[]
for row in eachrow(train_data)
row = convert(Array, row)
row = [i/255 for i in row]
X, y = process(row)
push!(train_X, X)
push!(train_y, y)
end
train_y = Flux.onehotbatch(train_y, 0:9)
train = [(cat(float.(train_X[i])..., dims=4), train_y[:, i]) for i in partition(1:size(train_data)[1], 1000)] |> gpu
test_data = DataFrame(CSV.File("MNIST_data/mnist_train.csv"))
test_X = []
test_y = Int64[]
for row in eachrow(test_data)
row = convert(Array, row)
row = [i/255 for i in row]
X, y = process(row)
push!(test_X, X)
push!(test_y, y)
end
test_y = Flux.onehotbatch(test_y, 0:9)
println("Pre-processing Complete")
m = Chain(
Conv((5, 5), 1=>16, relu),
MaxPool((2, 2)),
Conv((5, 5), 16=>8, relu),
MaxPool((2, 2)),
Flux.flatten,
Dense(200, 100),
Dense(100, 10),
Flux.softmax
) |> gpu
loss(x, y) = Flux.Losses.crossentropy(m(x), y) |> gpu
opt = Momentum(0.01) |> gpu
println("Model Creation Complete")
println()
epochs = 10
for i in 1:epochs
for j in train
gs = gradient(params(m)) do
l = loss(j...)
end
update!(opt, params(m), gs)
end
#show accuracy(test_X, test_y)
end
println()
#show accuracy(test_X, test_y)
When I check values of test_X, test_y, train_X, and train_y, they are all in the appropriate format, but I get this error when I try to run the code:
┌ Warning: Slow fallback implementation invoked for conv! You probably don't want this; check your datatypes.
│ yT = Float64
│ T1 = Gray{Float64}
│ T2 = Float32
└ # NNlib /Users/satvikd/.julia/packages/NNlib/PI8Xh/src/conv.jl:206
┌ Warning: Slow fallback implementation invoked for conv! You probably don't want this; check your datatypes.
│ yT = Float64
│ T1 = Float64
│ T2 = Float32
└ # NNlib /Users/satvikd/.julia/packages/NNlib/PI8Xh/src/conv.jl:206
DimensionMismatch("A has dimensions (100,200) but B has dimensions (128,1000)")
The stack trace refers to line 55, the one with the gradient.
Any help would be appreciated.
It seems that you need to check your types, since one of them is float32 while other are float64. Typically flux defaults to using Float32, since that's plenty of precision for Deep learning tasks. You could use Flux.f64/f32 as well.
Related
I need to create a dataset of 1000 graphs. I used the following code:
data_list = []
ngraphs = 1000
for i in range(ngraphs):
num_nodes = randint(10,500)
num_edges = randint(10,num_nodes*(num_nodes - 1))
f1 = np.random.randint(10, size=(num_nodes))
f2 = np.random.randint(10,20, size=(num_nodes))
f3 = np.random.randint(20,30, size=(num_nodes))
f_final = np.stack((f1,f2,f3), axis=1)
capital = 2*f1 + f2 - f3
f1_t = torch.from_numpy(f1)
f2_t = torch.from_numpy(f2)
f3_t = torch.from_numpy(f3)
capital_t = torch.from_numpy(capital)
capital_t = capital_t.type(torch.LongTensor)
x = torch.from_numpy(f_final)
x = x.type(torch.LongTensor)
edge_index = torch.randint(low=0, high=num_nodes, size=(num_edges,2), dtype=torch.long)
edge_attr = torch.randint(low=0, high=50, size=(num_edges,1), dtype=torch.long)
data = Data(x = x, edge_index = edge_index.t().contiguous(), y = capital_t, edge_attr=edge_attr )
data_list.append(data)
This works. But when I run my training function as follows:
for epoch in range(1, 500):
loss = train()
print(f'Loss: {loss:.4f}')
I keep getting the following error:
RuntimeError Traceback (most recent call
last) in ()
1 for epoch in range(1, 500):
----> 2 loss = train()
3 print(f'Loss: {loss:.4f}')
5 frames /usr/local/lib/python3.7/dist-packages/torch/nn/functional.py
in linear(input, weight, bias) 1845 if
has_torch_function_variadic(input, weight): 1846 return
handle_torch_function(linear, (input, weight), input, weight,
bias=bias)
-> 1847 return torch._C._nn.linear(input, weight, bias) 1848 1849
RuntimeError: expected scalar type Float but found Long
Can someone help me troubleshoot this. Or make a 1000 graph dataset that doesn't throw this error.
Change your x and y tensor into FloatTensor, since Linear layer in python only accept FloatTensor inputs
I’m a beginner with Julia and ML. I’m attempting to re-use code from the Flux Model Zoo, specifically this, to classify images from this dataset. Below is my version of the code - I modified the data load and the params in the build_model to account for the difference in image size and the number of character types to be classified. The original had 28x28 and 10 digits, the arabic character set had 32x32 images and 28 characters.
function getimages(filename)
filepath = pwd() * "/images/" * filename
mtrx = Matrix(DataFrame(CSV.File(filepath)))
r, _ = size(mtrx)
v = Vector{Matrix{Int64}}()
for i = 1:r
push!(v, reshape(m[i, :], 32, 32))
end
v
end
function getlabels(filename)
filepath = pwd() * "/images/" * filename
vec(Matrix(DataFrame(CSV.File(filepath))))
end
function load_data(args)
train_data_file = "csvTrainImages.csv"
test_data_file = "csvTestImages.csv"
train_label_file = "csvTrainLabel.csv"
test_label_file = "csvTestLabel.csv"
train_data = getimages(train_data_file)
test_data = getimages(test_data_file)
train_labels = getlabels(train_label_file)
test_labels = getlabels(test_label_file)
xtrain = Flux.flatten(train_data)
xtest = Flux.flatten(test_data)
ytrain, ytest = onehotbatch(train_labels, 1:28), onehotbatch(test_labels, 1:28)
train_loader = DataLoader((xtrain, ytrain), batchsize=args.batchsize, shuffle=true)
test_loader = DataLoader((xtest, ytest), batchsize=args.batchsize)
return train_loader, test_loader
end
function build_model(; imgsize=(32,32,1), nclasses=28)
return Chain(
Dense(prod(imgsize), 32, relu),
Dense(32, nclasses))
end
function loss_and_accuracy(data_loader, model, device)
acc = 0
ls = 0.0f0
num = 0
for (x, y) in data_loader
x, y = device(x), device(y)
ŷ = model(x)
ls += logitcrossentropy(model(x), y, agg=sum)
acc += sum(onecold(cpu(model(x))) .== onecold(cpu(y)))
num += size(x, 2)
end
return ls / num, acc / num
end
#kwdef mutable struct Args
η::Float64 = 3e-4 # learning rate
batchsize::Int = 256 # batch size
epochs::Int = 10 # number of epochs
use_cuda::Bool = true # use gpu (if cuda available)
end
function train(; kws...)
args = Args(; kws...) # collect options in a struct for convenience
if CUDA.functional() && args.use_cuda
#info "Training on CUDA GPU"
CUDA.allowscalar(false)
device = gpu
else
#info "Training on CPU"
device = cpu
end
# Create test and train dataloaders
train_loader, test_loader = load_data(args)
# Construct model
model = build_model() |> device
ps = Flux.params(model) # model's trainable parameters
## Optimizer
opt = ADAM(args.η)
## Training
for epoch in 1:args.epochs
for (x, y) in train_loader
x, y = device(x), device(y) # transfer data to device
gs = gradient(() -> logitcrossentropy(model(x), y), ps) # compute gradient
Flux.Optimise.update!(opt, ps, gs) # update parameters
end
# Report on train and test
train_loss, train_acc = loss_and_accuracy(train_loader, model, device)
test_loss, test_acc = loss_and_accuracy(test_loader, model, device)
println("Epoch=$epoch")
println(" train_loss = $train_loss, train_accuracy = $train_acc")
println(" test_loss = $test_loss, test_accuracy = $test_acc")
end
end
I get the following error when I train the model. Specifically, during the gradient computation. Could you help me understand which two matrices the error refers to and point me towards a solution? My guess is that it has to do with the build_model params, but I’m not quite sure what needs to change and how.
DimensionMismatch("matrix A has dimensions (32,1024), matrix B has dimensions (1,256)")
macro expansion#interface2.jl:0[inlined]
_pullback(::Zygote.Context, ::typeof(throw), ::DimensionMismatch)#interface2.jl:9
_pullback#matmul.jl:814[inlined]
_pullback(::Zygote.Context, ::typeof(LinearAlgebra._generic_matmatmul!), ::Matrix{Matrix{Float32}}, ::Char, ::Char, ::Matrix{Float32}, ::Matrix{Matrix{Int64}}, ::LinearAlgebra.MulAddMul{true, true, Bool, Bool})#interface2.jl:0
_pullback#matmul.jl:802[inlined]
_pullback(::Zygote.Context, ::typeof(LinearAlgebra.generic_matmatmul!), ::Matrix{Matrix{Float32}}, ::Char, ::Char, ::Matrix{Float32}, ::Matrix{Matrix{Int64}}, ::LinearAlgebra.MulAddMul{true, true, Bool, Bool})#interface2.jl:0
_pullback#matmul.jl:302[inlined]
_pullback#matmul.jl:275[inlined]
_pullback(::Zygote.Context, ::typeof(LinearAlgebra.mul!), ::Matrix{Matrix{Float32}}, ::Matrix{Float32}, ::Matrix{Matrix{Int64}})#interface2.jl:0
_pullback#matmul.jl:153[inlined]
_pullback(::Zygote.Context, ::typeof(*), ::Matrix{Float32}, ::Matrix{Matrix{Int64}})#interface2.jl:0
_pullback#basic.jl:147[inlined] ....
Solved by fixing the get images method as below.
function getimages(filename)
filepath = pwd() * "/images/" * filename
mtrx = Matrix(DataFrame(CSV.File(filepath)))
return mtrx'
end
I have the following function that uses symbolics in Julia. Everything works fine until the moment of plotting
using Distributions
using Plots
using Symbolics
using SymbolicUtils
function BinomialMeasure(iter::Int64, p::Float64, current_level = nothing)
#variables m0 m1
if current_level == nothing
current_level = [1]
end
next_level = []
for item in current_level
append!(next_level, m0*item)
append!(next_level, m1*item)
end
If iter != 0
current_level = next_level
return BinomialMeasure(iter - 1, p , current_level)
else
return [substitute(i, Dict([m0 => p, m1 => 1 - p])) for i in next_level]
end
end
y = BinomialMeasure(10, 0.4)
x = [( i + 1 ) / length(y) for i = 1:length(y) ]
append!(x, 0)
append!(y,0)
plot(x,y)
Then it returns the following:
MethodError: no method matching AbstractFloat(::Num)
Closest candidates are:
AbstractFloat(::Real, !Matched::RoundingMode) where T<:AbstractFloat at rounding.jl:200
AbstractFloat(::T) where T<:Number at boot.jl:716
AbstractFloat(!Matched::Bool) at float.jl:258
y is an Array{Num,1} and x is an Array{Float64,1}.
I tried map(float, y), convert(float,y) and float(y), but I think it not possible to convert a type Num to a Float64 or at least I don't know how to do it.
you can access the field val without using string and parse
y_val = [i.val for i in y]
this will of course have way better performance than parsing a string
Here is the great example from StatWithJuliaBook (please find the following)
It demos how to smooth a plot of stary sky stars.png
My question is about argmax().I. According to the author, "Note the use of the trailing “.I” at the end of each argmax, which extracts the values of the co-ordinates in column-major."
What does it mean? Is there other parameter? I can't find any description in the document.
According to author, it seems to be the position of column-wise maxmum value, yet when I tried argmax(gImg, dims=2), the result is different.
#julia> yOriginal, xOriginal = argmax(gImg).I
#(192, 168)
#julia> yy, xx = argmax(gImg, dims = 2)
#400×1 Matrix{CartesianIndex{2}}:
# CartesianIndex(1, 187)
# CartesianIndex(2, 229)
⋮
# CartesianIndex(399, 207)
# CartesianIndex(400, 285)
#julia> yy, xx
#(CartesianIndex(1, 187), CartesianIndex(2, 229))
Please advise.
using Plots, Images; pyplot()
img = load("stars.png")
gImg = red.(img)*0.299 + green.(img)*0.587 + blue.(img)*0.114
rows, cols = size(img)
println("Highest intensity pixel: ", findmax(gImg))
function boxBlur(image,x,y,d)
if x<=d || y<=d || x>=cols-d || y>=rows-d
return image[x,y]
else
total = 0.0
for xi = x-d:x+d
for yi = y-d:y+d
total += image[xi,yi]
end
end
return total/((2d+1)^2)
end
end
blurImg = [boxBlur(gImg,x,y,5) for x in 1:cols, y in 1:rows]
yOriginal, xOriginal = argmax(gImg).I
yBoxBlur, xBoxBlur = argmax(blurImg).I
p1 = heatmap(gImg, c=:Greys, yflip=true)
p1 = scatter!((xOriginal, yOriginal), ms=60, ma=0, msw=4, msc=:red)
p2 = heatmap(blurImg, c=:Greys, yflip=true)
p2 = scatter!((xBoxBlur, yBoxBlur), ms=60, ma=0, msw=4, msc=:red)
plot(p1, p2, size=(800, 400), ratio=:equal, xlims=(0,cols), ylims=(0,rows),
colorbar_entry=false, border=:none, legend=:none)
I is a field in an object of type CartesianIndex which is returned by argmax when its argument has more than 1 dimension.
If in doubt always try using dump.
Please consider the code below:
julia> arr = rand(4,4)
4×4 Matrix{Float64}:
0.971271 0.0350186 0.20805 0.284678
0.348161 0.19649 0.30343 0.291894
0.385583 0.990593 0.216894 0.814146
0.283823 0.750008 0.266643 0.473104
julia> el = argmax(arr)
CartesianIndex(3, 2)
julia> dump(el)
CartesianIndex{2}
I: Tuple{Int64, Int64}
1: Int64 3
2: Int64 2
However, getting CartesianIndex object data via its internal structure is not very elegant. The nice Julian way to do it is to use the appropriate method:
julia> Tuple(el)
(3, 2)
Or just access the indices directly:
julia> el[1], el[2]
(3, 2)
I have come across an implementation of SG-filter in Julia at this link. When I execute the function apply_filter, an error is returned -
UndefVarError: apply_filter not defined
I think this is an implementation for a previous version of Julia (?). I am executing this in Julia 1.0 as of now. Couldn't find documentation about the defined types, which is where my guess is concerning the error
I would like to forewarn the user about using the function savitzkyGolay in Julia. There is a mismatch with the result from Scipy implementation (which must have undergone several iterations of checking by the community)
#pyimport scipy.signal as ss
x=[1,2,3,4,5,6,7,8,9,10]
savitzkyGolay(x,5,1)
10-element Array{Float64,1}:
1.6000000000000003
2.200000000000001
3.0
4.0
5.000000000000001
6.000000000000001
7.0
8.0
8.8
9.400000000000002
#Python's scipy implementation
ss.savgol_filter(x,5,1)
10-element Array{Float64,1}:
1.0000000000000007
2.0000000000000004
2.9999999999999996
3.999999999999999
4.999999999999999
5.999999999999999
6.999999999999998
7.999999999999998
8.999999999999996
9.999999999999995
If it can help, I have simplified the code.
using Pkg, LinearAlgebra, DSP, Plots
function vandermonde(halfWindow, polyDeg)
x=[1.0*i for i in -halfWindow:halfWindow]
n = polyDeg+1
m = length(x)
V = zeros(m, n)
for i = 1:m
V[i,1] = 1.0
end
for j = 2:n
for i = 1:m
V[i,j] = x[i] * V[i,j-1]
end
end
return V
end
function SG(halfWindow, polyDeg)
V = vandermonde(halfWindow,polyDeg)
Q,R=qr(V)
n = polyDeg+1
m = 2*halfWindow+1
R1 = vcat(R, zeros(m-n,n))
sg = R1\Q'
for i in 1:(polyDeg+1)
sg[i,:] = sg[i,:]*factorial(i-1)
end
return sg'
end
function apply_filter(filter,signal)
halfWindow = round(Int,(length(filter)-1)/2)
padded_signal = [signal[1]*ones(halfWindow);signal;signal[end]*ones(halfWindow)]
filter_cross_signal = conv(filter[end:-1:1], padded_signal)
return filter_cross_signal[2*halfWindow+1:end-2*halfWindow]
end
Here is how I use it :
mean_speed_unfiltered = readdlm("mean_speeds_raw_-2.txt")
sg = SG(500,2); # halt-window, polynomal degree
t = 10*10^(-3)#s #time of the simulation
dt = 0.1/γ; #time step
Nt = convert(Int, round(t/dt)); #number of iteration
#Smooth the mean speed curve:
mean_speeds_smoothed = apply_filter(sg[:,1],mean_speed_unfiltered)
png(plot([j*dt for j=0:Nt] , mean_speeds_smoothed, title = "Smoothed mean speed over
time", xlabel = "t (s)"), "Mean_speed_filtered_SG")
derivative_mean_speeds_smoothed = apply_filter(sg[:,2],mean_speed_unfiltered)
plt1 = plot(mean_speeds_smoothed,derivative_mean_speeds_smoothed, title = "derivative mean speed over speed", xlabel = "<v>(t) (s)", ylabel = "d<v(t)>/dt")
png(plt1, "Force_SG_1D2Lasers")
However it seems to me that the code presented in https://gist.github.com/lnacquaroli/c97fbc9a15488607e236b3472bcdf097#file-savitzkygolay-jl-L34 is faster.