Skip to content

Commit

Permalink
Merge pull request #8 from JuliaGNI/dev-manifold-optimizers
Browse files Browse the repository at this point in the history
Implemented Householder reflections with modifications.
  • Loading branch information
michakraus authored May 26, 2023
2 parents 604354f + 47cd4e6 commit 42c7fda
Show file tree
Hide file tree
Showing 66 changed files with 2,035 additions and 629 deletions.
2 changes: 0 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Lux = "b2108857-7c20-44ae-9111-449ecde12c47"
Manifolds = "1cead3c2-87b3-11e9-0ccd-23c62b72b94e"
NLsolve = "2774e3e8-f4cf-5e23-947b-6d7e65073b56"
NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2"
Expand All @@ -25,7 +24,6 @@ Distances = "0.10"
ForwardDiff = "0.10"
HDF5 = "0.16"
Lux = "0.4"
Manifolds = "0.8"
NLsolve = "4"
NNlib = "0.8"
Optimisers = "0.2"
Expand Down
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,8 @@
[![CI](https://github.com/JuliaGNI/GeometricMachineLearning.jl/workflows/CI/badge.svg)](https://github.com/JuliaGNI/GeometricMachineLearning.jl/actions?query=workflow:CI)
[![Codecov Status](https://codecov.io/gh/JuliaGNI/GeometricMachineLearning.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/JuliaGNI/GeometricMachineLearning.jl)

This package implements various scientific machine learning models that aim at learning dynamical systems with geometric structure, such as Hamiltonian (symplectic) or Lagrangian (variational) systems.
`GeometricMachineLearning.jl` offers a flexible tool for designing neural networks for dynamical systems with geometric structure, such as Hamiltonian (symplectic) or Lagrangian (variational) systems.

At its core every neural network comprises three components: a neural network architecture, a loss function and an optimizer.

Traditionally, physical properties have been encoded into the loss function (PiNN approach), but in `GeometricMachineLearning.jl` this is exclusively done through the architectures and the optimizers of the neural network, thus giving theoretical guarentees that these properties are actually preserved.
76 changes: 76 additions & 0 deletions scripts/autoencoder.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
using MLDatasets
using Random
using GeometricMachineLearning
using LinearAlgebra

using Lux
#Lux is needed for this flatten operation -> should be removed!
import Flux, Zygote

train_x, train_y = MNIST(split=:train)[:]
#for visualization
#using ImageInTerminal, ImageShow
#convert2image(MNIST, train_x[:,:,1])

train_x = Flux.flatten(train_x) |> gpu
train_y = Flux.onehotbatch(train_y, 0:9) |> gpu

#encoder layer
Ψᵉ = Chain(
Dense(28*28, 16, tanh),
Dense(16, 16, tanh),
Dense(16,10, Lux.σ)
)

#decoder layer
Ψᵈ = Chain(
Dense(10, 16, tanh),
Dense(16, 16, tanh),
Dense(16, 28*28, Lux.σ)
)

const model = Chain(Ψᵉ, Ψᵈ)

ps, st = Lux.setup(Random.default_rng(), model) .|> gpu

#loss_sing
function loss_sing(ps, x, y)
norm(Lux.apply(model, x, ps, st)[1] - x)
end

function loss_sing(ps, train_x, train_y, index)
loss_sing(ps, train_x[:, index], train_y[:, index])
end
function full_loss(ps, train_x, train_y)
num = size(train_x, 2)
mapreduce(index -> loss_sing(ps, train_x, train_y, index), +, 1:num)
end

o = AdamOptimizer()
cache = init_optimizer_cache(model, o)
println("initial loss: ", full_loss(ps, train_x, train_y))

training_steps = 100

num = size(train_x, 2)
batch_size = 10

for i in 1:training_steps
#@time dp = Zygote.gradient(loss_closure, ps)[1]

index₁ = Int(ceil(rand()*num))
x = train_x[:, index₁]
y = train_y[:, index₁]
l, pb = Zygote.pullback(ps -> loss_sing(ps, x, y), ps)
dp = pb(one(l))[1]

indices = Int.(ceil.(rand(batch_size -1)*num))
for index in indices
x = train_x[:, index]
y = train_y[:, index]
l, pb = Zygote.pullback(ps -> loss_sing(ps, x, y), ps)
dp = _add(dp, pb(one(l))[1])
end
optimization_step!(o, Ψᵉ, ps, cache, dp)
end
println("final loss: ", full_loss(ps, train_x, train_y))
72 changes: 72 additions & 0 deletions scripts/classifier.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
using MLDatasets
using Random
using GeometricMachineLearning
using LinearAlgebra
using ProgressMeter
using Lux

#Lux is needed for this flatten operation -> should be removed!
import Flux, Zygote

train_x, train_y = MNIST(split=:train)[:]
test_x, test_y = MNIST(split=:test)[:]

#for visualization
#using ImageInTerminal, ImageShow
#convert2image(MNIST, train_x[:,:,1])

train_x = Flux.flatten(train_x) #|> gpu
train_y = Flux.onehotbatch(train_y, 0:9) #|> gpu
test_x = Flux.flatten(test_x) #|> gpu
test_y = Flux.onehotbatch(test_y, 0:9) #|> gpu

#encoder layer
Ψᵉ = Chain(
Dense(28*28, 64, tanh),
Dense(64, 64, tanh),
Dense(64, 10, Lux.σ)
)

ps, st = Lux.setup(Random.default_rng(), Ψᵉ) #.|> gpu

#loss_sing
function loss_sing(ps, x, y)
norm(Lux.apply(Ψᵉ, x, ps, st)[1] - y)
end
function loss_sing(ps, train_x, train_y, index)
loss_sing(ps, train_x[:, index], train_y[:, index])
end
function full_loss(ps, train_x, train_y)
num = size(train_x, 2)
mapreduce(index -> loss_sing(ps, train_x, train_y, index), +, 1:num)
end

num = size(train_x,2)
batch_size = 64
training_steps = 100


o = AdamOptimizer()
cache = init_optimizer_cache(Ψᵉ, o)

println("initial loss: ", full_loss(ps, train_x, train_y)/num)

@showprogress "Training network ..." for i in 1:training_steps
index₁ = Int(ceil(rand()*num))
x = train_x[:, index₁] #|> gpu
y = train_y[:, index₁] #|> gpu
l, pb = Zygote.pullback(ps -> loss_sing(ps, x, y), ps)
dp = pb(one(l))[1]

indices = Int.(ceil.(rand(batch_size -1)*num))
for index in indices
x = train_x[:, index] #|> gpu
y = train_y[:, index] #|> gpu
l, pb = Zygote.pullback(ps -> loss_sing(ps, x, y), ps)
dp = _add(dp, pb(one(l))[1])
end
optimization_step!(o, Ψᵉ, ps, cache, dp)
end
println("final loss: ", full_loss(ps, train_x, train_y)/num)

println("\nfinal test loss: ", full_loss(ps, test_x, test_y)/size(test_x, 2))
41 changes: 41 additions & 0 deletions scripts/normal_forms/non_rev_ham.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
using GeometricMachineLearning, Lux, LinearAlgebra
import Random, Zygote

τᵋ = Chain(Gradient(2, 10, tanh),
Gradient(2, 10, tanh; change_q=false))

Hᵋ = Chain(Dense(2, 10, tanh), Dense(10, 1, use_bias=false))

ps_τ, st_τ = Lux.setup(Random.default_rng(), τᵋ)
ps_F, st_F = Lux.setup(Random.default_rng(), Hᵋ)

Fᵋ(z, ps_F) = SymplecticMatrix(1)*(Zygote.gradient(z -> sum(Hᵋ(z, ps_F, st_F)[1]), z)[1])

f(z) = [3*z[1]*z[2]^2, -z[2]^3]
A = [0. -1.; 1. 0.]
expA(θ) = [cos(θ) -sin(θ); sin(θ) cos(θ)]

fτᵋ(z, ps_τ) = τᵋ(z, ps_τ, st_τ)[1] |> f
#const
θpoints = range(0, 2*π, 100)[1:end-1]
function Πₐfτᵋ(z, ps_τ)
output = zero(z)
for θ in θpoints
output += expA(-θ)*fτᵋ(expA(θ)*z, ps_τ)
end
output/(2*π*length(θpoints))
end

Jτᵋ(z, ps_τ) = Zygote.jacobian(z -> τᵋ(z, ps_τ, st_τ)[1], z)[1]

function ΠₐJτᵋ(z, ps_τ)
output = zeros(2,2)
for θ in θpoints
output += expA(-θ)*Jτᵋ(expA(θ)*z, ps_τ)
#display(output)
end
output/(2*π*length(θpoints))
end

loss_1(z, ps_τ, ps_F) = norm(ΠₐJτᵋ(z, ps_τ)*Fᵋ(z, ps_F) - Πₐfτᵋ(z, ps_τ))
loss_2(z, ps_τ, ps_F, ε=.1) = norm(Jτᵋ(z, ps_τ)*A*z - A*(τᵋ(z, ps_τ, st_τ)[1]) - ε*(fτᵋ(z, ps_τ) - Jτᵋ(z, ps_τ)*Fᵋ(z, ps_F)))
28 changes: 16 additions & 12 deletions scripts/sympnet_pendulum.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ using LinearAlgebra
using Lux
using Plots
using Zygote
using ProgressMeter

import Random

Expand All @@ -18,47 +19,50 @@ q, p = pendulum_data()
plt = plot(q, p, label="Training data.")

# Sympnet model
model = Chain( Gradient(2, 10, tanh),
model = Chain( Gradient(2, 10, tanh),
Gradient(2, 10, tanh; change_q=false),
Gradient(2, 10, tanh),
Gradient(2, 10, tanh; change_q=false)
#Gradient(2, 10, tanh),
#Gradient(2, 10, tanh; change_q=false)
)

ps, st = Lux.setup(Random.default_rng(), model)

function loss_sing(ps, q, p, index)
qp_new = Lux.apply(model, [q[index-1], p[index-1]], ps, st)[1]
norm(qp_new - [q[index], p[index]])
end


# defines a loss function
function loss(ps, q, p, batch_size=10)
loss = 0
ntime = lastindex(q)
for i in 1:batch_size
index = Int(ceil(rand()*ntime))
qp_new = Lux.apply(model, [q[index-1], p[index-1]], ps, st)[1]
loss += norm(qp_new - [q[index], p[index]])
loss += loss_sing(ps, q, p, index)
end
loss
end

function full_loss(ps, q, p)
loss = 0
for i in 1:lastindex(q)
qp_new = Lux.apply(model, [q[i-1], p[i-1]], ps, st)[1]
loss += norm(qp_new - [q[i], p[i]])
loss += loss_sing(ps, q, p, i)
end
loss
end

# define momentum optimizer and initialize
o = MomentumOptimizer(1e-2, 0.5)
o = AdamOptimizer()
# initial gradients for calling Cache constructor
dp_in = Zygote.gradient(ps -> loss(ps, q, p), ps)[1]
cache = MomentumOptimizerCache(o, model, ps, dp_in)
cache = init_optimizer_cache(model, o)

# training
println("initial loss: ", full_loss(ps, q, p))
training_steps = 1000
for i in 1:training_steps
@showprogress for i in 1:training_steps
dp = Zygote.gradient(ps -> loss(ps, q, p), ps)[1]
apply!(o, cache, model, ps, dp)
optimization_step!(o, model, ps, cache, dp)
end
println("final loss: ", full_loss(ps, q, p))

Expand Down
72 changes: 72 additions & 0 deletions scripts/transformer.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
using GeometricMachineLearning, LinearAlgebra, ProgressMeter
import Lux, Zygote, Random, MLDatasets, Flux

#MNIST images are 28×28, so a sequence_length of 16 = 4² means the image patches are of size 7² = 49
image_dim = 28
patch_length = 7
n_heads = 7
n_layers = 5
patch_number = (image_dim÷patch_length)^2

train_x, train_y = MLDatasets.MNIST(split=:train)[:]
test_x, test_y = MLDatasets.MNIST(split=:test)[:]


#preprocessing steps
train_x = Tuple(map(i -> split_and_flatten(train_x[:,:,i], patch_length), 1:size(train_x,3)))
test_x = Tuple(map(i -> split_and_flatten(test_x[:,:,i], patch_length), 1:size(test_x,3)))

#implement this encoding yourself!
train_y = Flux.onehotbatch(train_y, 0:9) #|> gpu
test_y = Flux.onehotbatch(test_y, 0:9) #|> gpu


#encoder layer - final layer has to be added for evaluation purposes!
Ψᵉ = Lux.Chain(
Transformer(patch_length^2, n_heads, n_layers, Stiefel=true),
Lux.Dense(patch_length^2, 10, Lux.σ, use_bias=false)
)

ps, st = Lux.setup(Random.default_rng(), Ψᵉ) # .|> gpu

#loss_sing
function loss_sing(ps, x, y)
norm(Lux.apply(Ψᵉ, x, ps, st)[1][:,1] - y)
end
function loss_sing(ps, train_x, train_y, index)
loss_sing(ps, train_x[index], train_y[:, index])
end
function full_loss(ps, train_x, train_y)
num = length(train_x)
mapreduce(index -> loss_sing(ps, train_x, train_y, index), +, 1:num)
end

num = length(train_x)
batch_size = 64
training_steps = 1000

o = AdamOptimizer()
cache = init_optimizer_cache(Ψᵉ, o)

println("initial loss: ", full_loss(ps, train_x, train_y)/num)

@showprogress "Training network ..." for i in 1:training_steps
index₁ = Int(ceil(rand()*num))
x = train_x[index₁] #|> gpu
y = train_y[:, index₁] #|> gpu
l, pb = Zygote.pullback(ps -> loss_sing(ps, x, y), ps)
dp = pb(one(l))[1]
#dp = Zyogte.gradient(ps -> loss_sing(ps, x, y), ps)[1]

indices = Int.(ceil.(rand(batch_size -1)*num))
for index in indices
x = train_x[index] #|> gpu
y = train_y[:, index] #|> gpu
l, pb = Zygote.pullback(ps -> loss_sing(ps, x, y), ps)
dp = _add(dp, pb(one(l))[1])
end
optimization_step!(o, Ψᵉ, ps, cache, dp)
end
println("final loss: ", full_loss(ps, train_x, train_y)/num)

println("\nfinal test loss: ", full_loss(ps, test_x, test_y)/length(test_x))
Loading

0 comments on commit 42c7fda

Please sign in to comment.