Skip to content

Commit

Permalink
Merge pull request #179 from JuliaGNI/compathelper/new_version/2024-1…
Browse files Browse the repository at this point in the history
…2-06-01-00-12-914-00751357659

CompatHelper: bump compat for AbstractNeuralNetworks to 0.5
  • Loading branch information
michakraus authored Dec 17, 2024
2 parents cde4d3f + 0bd231b commit 1774815
Show file tree
Hide file tree
Showing 103 changed files with 263 additions and 249 deletions.
2 changes: 1 addition & 1 deletion .githooks/pre-push
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# pre-push git hook that runs all tests before pushing
# pre-push git hook that runs all tests before pushin

red='\033[0;31m'
green='\033[0;32m'
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ UpdateJulia = "770da0de-323d-4d28-9202-0e205c1e0aff"
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"

[compat]
AbstractNeuralNetworks = "0.4"
AbstractNeuralNetworks = "0.5"
BandedMatrices = "1"
ChainRules = "1"
ChainRulesCore = "1"
Expand Down
1 change: 1 addition & 0 deletions docs/Project.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
[deps]
Bibliography = "f1be7e48-bf82-45af-a471-ae754a193061"
BrenierTwoFluid = "698bc5df-bacc-4e45-9592-41ae9e406d75"
CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
DocumenterCitations = "daee34ce-89f3-4625-b898-19384cb65244"
Expand Down
5 changes: 3 additions & 2 deletions docs/src/tutorials/adjusting_the_loss_function.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ We again consider training a SympNet on the data coming from a harmonic oscillat

```@example change_loss
using GeometricMachineLearning # hide
using GeometricMachineLearning: params # hide
using GeometricIntegrators: integrate, ImplicitMidpoint # hide
using GeometricProblems.HarmonicOscillator: hodeproblem
import Random # hide
Expand Down Expand Up @@ -45,7 +46,7 @@ function network_parameter_norm(params::NeuralNetworkParameters)
sum([network_parameter_norm(params[key]) for key in keys(params)])
end
network_parameter_norm(nn.params)
network_parameter_norm(params(nn))
```

We now implement a custom loss such that:
Expand Down Expand Up @@ -80,7 +81,7 @@ print(loss_array[end])
We see that the norm of the parameters is lower:

```@example change_loss
network_parameter_norm(nn_custom.params)
network_parameter_norm(params(nn_custom))
```

We can also compare the solutions of the two networks:
Expand Down
7 changes: 4 additions & 3 deletions docs/src/tutorials/grassmann_layer.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ Before we can use the Wasserstein distance however to train the neural network w

```@example rosenbrock
using GeometricMachineLearning # hide
using GeometricMachineLearning: params # hide
using Zygote, BrenierTwoFluid
using LinearAlgebra: norm # hide
import Random # hide
Expand All @@ -111,7 +112,7 @@ nothing # hide
We then *lift* the neural network parameters via [`GlobalSection`](@ref).

```@example rosenbrock
λY = GlobalSection(nn.params)
λY = GlobalSection(params(nn))
nothing # hide
```

Expand Down Expand Up @@ -280,9 +281,9 @@ CairoMakie.activate!() # hide
const training_steps = 80
loss_array = zeros(training_steps)
for i in 1:training_steps
val, dp = compute_gradient(nn.params)
val, dp = compute_gradient(params(nn))
loss_array[i] = val
optimization_step!(optimizer, λY, nn.params, dp.params)
optimization_step!(optimizer, λY, params(nn), dp.params)
end
```

Expand Down
2 changes: 1 addition & 1 deletion docs/src/tutorials/mnist/mnist_tutorial.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ Here we have chosen a [`ClassificationTransformer`](@ref), i.e. a composition of
We now have to initialize the neural network weights. This is done with the constructor for `NeuralNetwork`:

```@example mnist
backend = GeometricMachineLearning.get_backend(dl)
backend = GeometricMachineLearning.networkbackend(dl)
T = eltype(dl)
nn1 = NeuralNetwork(model1, backend, T)
nn2 = NeuralNetwork(model2, backend, T)
Expand Down
8 changes: 4 additions & 4 deletions docs/src/tutorials/volume_preserving_attention.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ In here we demonstrate the differences between the two approaches for computing

```@example volume_preserving_attention
using GeometricMachineLearning # hide
using GeometricMachineLearning: FeedForwardLoss, TransformerLoss # hide
using GeometricMachineLearning: FeedForwardLoss, TransformerLoss, params # hide
import Random # hide
Random.seed!(123) # hide
Expand Down Expand Up @@ -199,15 +199,15 @@ initial_condition = dl.input[:, 1:seq_length, 2]
function make_networks_neural_network_integrators(nn_skew, nn_arb, nn_comp)
nn_skew = NeuralNetwork(GeometricMachineLearning.DummyTransformer(seq_length),
nn_skew.model,
nn_skew.params,
params(nn_skew),
CPU())
nn_arb = NeuralNetwork(GeometricMachineLearning.DummyTransformer(seq_length),
nn_arb.model,
nn_arb.params,
params(nn_arb),
CPU())
nn_comp = NeuralNetwork(GeometricMachineLearning.DummyNNIntegrator(),
nn_comp.model,
nn_comp.params,
params(nn_comp),
CPU())
nn_skew, nn_arb, nn_comp
Expand Down
1 change: 1 addition & 0 deletions src/GeometricMachineLearning.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ module GeometricMachineLearning
import AbstractNeuralNetworks: GlorotUniform
import AbstractNeuralNetworks: params, architecture, model, dim
import AbstractNeuralNetworks: AbstractPullback, NetworkLoss, _compute_loss
import AbstractNeuralNetworks: networkbackend
# export params, architetcure, model
export dim
import NNlib: σ, sigmoid, softmax
Expand Down
27 changes: 13 additions & 14 deletions src/architectures/autoencoder.jl
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,10 @@ We show how to make an encoder from a custom architecture:
```jldoctest
using GeometricMachineLearning
using GeometricMachineLearning: UnknownEncoder
using GeometricMachineLearning: UnknownEncoder, params
model = Chain(Dense(5, 3, tanh; use_bias = false), Dense(3, 2, identity; use_bias = false))
params = NeuralNetworkParameters(initialparameters(model))
nn = NeuralNetwork(UnknownEncoder(5, 2, 2), model, params, CPU())
nn = NeuralNetwork(UnknownEncoder(5, 2, 2), model, params(NeuralNetwork(model)), CPU())
typeof(nn) <: NeuralNetwork{<:GeometricMachineLearning.Encoder}
Expand Down Expand Up @@ -173,7 +172,7 @@ end
function encoder_parameters(nn::NeuralNetwork{<:AutoEncoder})
n_encoder_layers = length(encoder_model(nn.architecture).layers)
keys = Tuple(Symbol.(["L$(i)" for i in 1:n_encoder_layers]))
NeuralNetworkParameters(NamedTuple{keys}(Tuple([nn.params[key] for key in keys])))
NeuralNetworkParameters(NamedTuple{keys}(Tuple([params(nn)[key] for key in keys])))
end

# """
Expand All @@ -183,13 +182,13 @@ end
# """
function decoder_parameters(nn::NeuralNetwork{<:AutoEncoder})
n_decoder_layers = length(decoder_model(nn.architecture).layers)
all_keys = keys(nn.params)
# "old keys" are the ones describing the correct parameters in nn.params
all_keys = keys(params(nn))
# "old keys" are the ones describing the correct parameters in params(nn)
keys_old = Tuple(Symbol.(["L$(i)" for i in (length(all_keys) - (n_decoder_layers - 1)):length(all_keys)]))
n_keys = length(keys_old)
# "new keys" are the ones describing the keys in the new NamedTuple
keys_new = Tuple(Symbol.(["L$(i)" for i in 1:n_keys]))
NeuralNetworkParameters(NamedTuple{keys_new}(Tuple([nn.params[key] for key in keys_old])))
NeuralNetworkParameters(NamedTuple{keys_new}(Tuple([params(nn)[key] for key in keys_old])))
end

function Chain(arch::AutoEncoder)
Expand All @@ -205,14 +204,14 @@ function encoder(nn::NeuralNetwork{<:AutoEncoder})
NeuralNetwork( UnknownEncoder(nn.architecture.full_dim, nn.architecture.reduced_dim, nn.architecture.n_encoder_blocks),
encoder_model(nn.architecture),
encoder_parameters(nn),
get_backend(nn))
networkbackend(nn))
end

function _encoder(nn::NeuralNetwork, full_dim::Integer, reduced_dim::Integer)
NeuralNetwork( UnknownEncoder(full_dim, reduced_dim, length(nn.model.layers)),
nn.model,
nn.params,
get_backend(nn))
params(nn),
networkbackend(nn))
end

function input_dimension(::AbstractExplicitLayer{M, N}) where {M, N}
Expand Down Expand Up @@ -242,11 +241,11 @@ end
Obtain the *decoder* from an [`AutoEncoder`](@ref) neural network.
"""
function decoder(nn::NeuralNetwork{<:AutoEncoder})
NeuralNetwork(UnknownDecoder(nn.architecture.full_dim, nn.architecture.reduced_dim, nn.architecture.n_encoder_blocks), decoder_model(nn.architecture), decoder_parameters(nn), get_backend(nn))
NeuralNetwork(UnknownDecoder(nn.architecture.full_dim, nn.architecture.reduced_dim, nn.architecture.n_encoder_blocks), decoder_model(nn.architecture), decoder_parameters(nn), networkbackend(nn))
end

function _decoder(nn::NeuralNetwork, full_dim::Integer, reduced_dim::Integer)
NeuralNetwork(UnknownDecoder(full_dim, reduced_dim, length(nn.model.layers)), nn.model, nn.params, get_backend(nn))
NeuralNetwork(UnknownDecoder(full_dim, reduced_dim, length(nn.model.layers)), nn.model, params(nn), networkbackend(nn))
end

@doc raw"""
Expand All @@ -263,9 +262,9 @@ function decoder(nn::NeuralNetwork)
end

function encoder(nn::NeuralNetwork{<:SymplecticCompression})
NeuralNetwork(UnknownSymplecticEncoder(nn.architecture.full_dim, nn.architecture.reduced_dim, nn.architecture.n_encoder_blocks), encoder_model(nn.architecture), encoder_parameters(nn), get_backend(nn))
NeuralNetwork(UnknownSymplecticEncoder(nn.architecture.full_dim, nn.architecture.reduced_dim, nn.architecture.n_encoder_blocks), encoder_model(nn.architecture), encoder_parameters(nn), networkbackend(nn))
end

function decoder(nn::NeuralNetwork{<:SymplecticCompression})
NeuralNetwork(UnknownSymplecticDecoder(nn.architecture.full_dim, nn.architecture.reduced_dim, nn.architecture.n_encoder_blocks), decoder_model(nn.architecture), decoder_parameters(nn), get_backend(nn))
NeuralNetwork(UnknownSymplecticDecoder(nn.architecture.full_dim, nn.architecture.reduced_dim, nn.architecture.n_encoder_blocks), decoder_model(nn.architecture), decoder_parameters(nn), networkbackend(nn))
end
4 changes: 2 additions & 2 deletions src/architectures/hamiltonian_neural_network.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ function Chain(nn::HamiltonianNeuralNetwork)
end

# gradient of the Hamiltonian Neural Network
gradient(nn::AbstractNeuralNetwork{<:HamiltonianNeuralNetwork}, x, params = nn.params) = Zygote.gradient-> sum(nn(ξ, params)), x)[1]
gradient(nn::AbstractNeuralNetwork{<:HamiltonianNeuralNetwork}, x, params = params(nn)) = Zygote.gradient-> sum(nn(ξ, params)), x)[1]

# vector field of the Hamiltonian Neural Network
function vectorfield(nn::AbstractNeuralNetwork{<:HamiltonianNeuralNetwork}, x, params = nn.params)
function vectorfield(nn::AbstractNeuralNetwork{<:HamiltonianNeuralNetwork}, x, params = params(nn))
n_dim = length(x)÷2
I = Diagonal(ones(n_dim))
Z = zeros(n_dim,n_dim)
Expand Down
8 changes: 4 additions & 4 deletions src/architectures/lagrangian_neural_network.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,14 @@ end


# gradient of the Lagrangian Neural Network
∇L(nn::NeuralNetwork{<:LagrangianNeuralNetwork}, x, params = nn.params) = Zygote.gradient(x->sum(nn(x, params)), x)[1]
∇L(nn::NeuralNetwork{<:LagrangianNeuralNetwork}, x, params = params(nn)) = Zygote.gradient(x->sum(nn(x, params)), x)[1]

# hessian of the Lagrangian Neural Network
∇∇L(nn::NeuralNetwork{<:LagrangianNeuralNetwork}, q, q̇, params = nn.params) = Zygote.hessian(x->sum(nn(x, params)),[q...,q̇...])
∇∇L(nn::NeuralNetwork{<:LagrangianNeuralNetwork}, q, q̇, params = params(nn)) = Zygote.hessian(x->sum(nn(x, params)),[q...,q̇...])

∇q̇∇q̇L(nn::NeuralNetwork{<:LagrangianNeuralNetwork}, q, q̇, params = nn.params) = ∇∇L(nn, q, q̇, params)[(1+length(q̇)):end,(1+length(q̇)):end]
∇q̇∇q̇L(nn::NeuralNetwork{<:LagrangianNeuralNetwork}, q, q̇, params = params(nn)) = ∇∇L(nn, q, q̇, params)[(1+length(q̇)):end,(1+length(q̇)):end]

∇q∇q̇L(nn::NeuralNetwork{<:LagrangianNeuralNetwork}, q, q̇, params = nn.params) = ∇∇L(nn, q, q̇, params)[1:length(q),(1+length(q̇)):end]
∇q∇q̇L(nn::NeuralNetwork{<:LagrangianNeuralNetwork}, q, q̇, params = params(nn)) = ∇∇L(nn, q, q̇, params)[1:length(q),(1+length(q̇)):end]



Expand Down
4 changes: 2 additions & 2 deletions src/architectures/neural_network_integrator.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ abstract type NeuralNetworkIntegrator <: Architecture end
function Base.iterate(nn::NeuralNetwork{<:NeuralNetworkIntegrator}, ics::AT; n_points = 100) where {T, AT<:AbstractVector{T}}

n_dim = length(ics)
backend = KernelAbstractions.get_backend(ics)
backend = networkbackend(ics)

# Array to store the predictions
valuation = KernelAbstractions.allocate(backend, T, n_dim, n_points)
Expand Down Expand Up @@ -97,7 +97,7 @@ The number of integration steps that should be performed.
function Base.iterate(nn::NeuralNetwork{<:NeuralNetworkIntegrator}, ics::BT; n_points = 100) where {T, AT<:AbstractVector{T}, BT<:NamedTuple{(:q, :p), Tuple{AT, AT}}}

n_dim2 = length(ics.q)
backend = KernelAbstractions.get_backend(ics.q)
backend = networkbackend(ics.q)

# Array to store the predictions
valuation = (q = KernelAbstractions.allocate(backend, T, n_dim2, n_points), p = KernelAbstractions.allocate(backend, T, n_dim2, n_points))
Expand Down
4 changes: 2 additions & 2 deletions src/architectures/psd.jl
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ function solve!(nn::NeuralNetwork{<:PSDArch}, input::AbstractMatrix)
half_of_dimension_in_big_space = nn.architecture.full_dim ÷ 2
@views input_qp = hcat(input[1 : half_of_dimension_in_big_space, :], input[(half_of_dimension_in_big_space + 1) : end, :])
U_term = svd(input_qp).U
@views nn.params[1].weight.A .= U_term[:, 1 : nn.architecture.reduced_dim ÷ 2]
@views nn.params[2].weight.A .= U_term[:, 1 : nn.architecture.reduced_dim ÷ 2]
@views params(nn)[1].weight.A .= U_term[:, 1 : nn.architecture.reduced_dim ÷ 2]
@views params(nn)[2].weight.A .= U_term[:, 1 : nn.architecture.reduced_dim ÷ 2]

AutoEncoderLoss()(nn, input)
end
Expand Down
4 changes: 2 additions & 2 deletions src/architectures/symplectic_autoencoder.jl
Original file line number Diff line number Diff line change
Expand Up @@ -151,10 +151,10 @@ end

function encoder(nn::NeuralNetwork{<:SymplecticAutoencoder})
arch = NonLinearSymplecticEncoder(nn.architecture.full_dim, nn.architecture.reduced_dim, nn.architecture.n_encoder_layers, nn.architecture.n_encoder_blocks, nn.architecture.sympnet_upscale, nn.architecture.activation)
NeuralNetwork(arch, encoder_model(nn.architecture), encoder_parameters(nn), get_backend(nn))
NeuralNetwork(arch, encoder_model(nn.architecture), encoder_parameters(nn), networkbackend(nn))
end

function decoder(nn::NeuralNetwork{<:SymplecticAutoencoder})
arch = NonLinearSymplecticDecoder(nn.architecture.full_dim, nn.architecture.reduced_dim, nn.architecture.n_decoder_layers, nn.architecture.n_decoder_blocks, nn.architecture.sympnet_upscale, nn.architecture.activation)
NeuralNetwork(arch, decoder_model(nn.architecture), decoder_parameters(nn), get_backend(nn))
NeuralNetwork(arch, decoder_model(nn.architecture), decoder_parameters(nn), networkbackend(nn))
end
4 changes: 2 additions & 2 deletions src/architectures/transformer_integrator.jl
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ function Base.iterate(nn::NeuralNetwork{<:TransformerIntegrator}, ics::NamedTupl
seq_length = typeof(nn.architecture) <: StandardTransformerIntegrator ? size(ics.q, 2) : nn.architecture.seq_length

n_dim = size(ics.q, 1)
backend = KernelAbstractions.get_backend(ics.q)
backend = networkbackend(ics.q)

n_iterations = Int(ceil((n_points - seq_length) / prediction_window))
# Array to store the predictions
Expand Down Expand Up @@ -84,7 +84,7 @@ function Base.iterate(nn::NeuralNetwork{<:TransformerIntegrator}, ics::AT; n_poi
end

n_dim = size(ics, 1)
backend = KernelAbstractions.get_backend(ics)
backend = networkbackend(ics)

n_iterations = Int(ceil((n_points - seq_length) / prediction_window))
# Array to store the predictions
Expand Down
2 changes: 1 addition & 1 deletion src/arrays/grassmann_lie_algebra_horizontal.jl
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ end
Base.parent(A::GrassmannLieAlgHorMatrix) = (A.B, )
Base.size(A::GrassmannLieAlgHorMatrix) = (A.N, A.N)

KernelAbstractions.get_backend(B::GrassmannLieAlgHorMatrix) = KernelAbstractions.get_backend(B.B)
networkbackend(B::GrassmannLieAlgHorMatrix) = networkbackend(B.B)

function Base.getindex(A::GrassmannLieAlgHorMatrix{T}, i::Integer, j::Integer) where {T}
if i A.n
Expand Down
2 changes: 1 addition & 1 deletion src/arrays/lower_triangular.jl
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ end
function map_to_lo(A::AbstractMatrix{T}) where T
n = size(A, 1)
@assert size(A, 2) == n
backend = KernelAbstractions.get_backend(A)
backend = networkbackend(A)
S = KernelAbstractions.zeros(backend, T, n * (n - 1) ÷ 2)
assign_Skew_val! = assign_Skew_val_kernel!(backend)
for i in 2:n
Expand Down
12 changes: 6 additions & 6 deletions src/arrays/skew_symmetric.jl
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ end

function Base.:+(A::SkewSymMatrix{T}, B::AbstractMatrix{T}) where T
@assert size(A) == size(B)
backend = KernelAbstractions.get_backend(B)
backend = networkbackend(B)
addition! = addition_kernel!(backend)
C = KernelAbstractions.allocate(backend, T, size(A)...)
addition!(C, A.S, B; ndrange = size(A))
Expand Down Expand Up @@ -215,7 +215,7 @@ LinearAlgebra.rmul!(C::SkewSymMatrix, α::Real) = mul!(C, C, α)
function Base.:*(A::SkewSymMatrix{T}, B::AbstractMatrix{T}) where T
m1, m2 = size(B)
@assert m1 == A.n
backend = KernelAbstractions.get_backend(A)
backend = networkbackend(A)
C = KernelAbstractions.allocate(backend, T, A.n, m2)

skew_mat_mul! = skew_mat_mul_kernel!(backend)
Expand Down Expand Up @@ -245,7 +245,7 @@ function Base.:*(A::SkewSymMatrix, b::AbstractVector{T}) where T
end

function Base.one(A::SkewSymMatrix{T}) where T
backend = KernelAbstractions.get_backend(A.S)
backend = networkbackend(A.S)
unit_matrix = KernelAbstractions.zeros(backend, T, A.n, A.n)
write_ones! = write_ones_kernel!(backend)
write_ones!(unit_matrix, ndrange=A.n)
Expand Down Expand Up @@ -290,8 +290,8 @@ function Base.zero(A::SkewSymMatrix)
SkewSymMatrix(zero(A.S), A.n)
end

function KernelAbstractions.get_backend(A::SkewSymMatrix)
KernelAbstractions.get_backend(A.S)
function networkbackend(A::SkewSymMatrix)
networkbackend(A.S)
end

function assign!(B::SkewSymMatrix{T}, C::SkewSymMatrix{T}) where T
Expand All @@ -311,7 +311,7 @@ function map_to_Skew(A::AbstractMatrix{T}) where T
n = size(A, 1)
@assert size(A, 2) == n
A_skew = T(.5)*(A - A')
backend = KernelAbstractions.get_backend(A)
backend = networkbackend(A)
S = if n != 1
KernelAbstractions.zeros(backend, T, n * (n - 1) ÷ 2)
else
Expand Down
6 changes: 3 additions & 3 deletions src/arrays/stiefel_lie_algebra_horizontal.jl
Original file line number Diff line number Diff line change
Expand Up @@ -273,8 +273,8 @@ function Base.zero(B::StiefelLieAlgHorMatrix)
)
end

function KernelAbstractions.get_backend(B::StiefelLieAlgHorMatrix)
KernelAbstractions.get_backend(B.B)
function networkbackend(B::StiefelLieAlgHorMatrix)
networkbackend(B.B)
end

# assign funciton; also implement this for other arrays!
Expand Down Expand Up @@ -302,7 +302,7 @@ function assign!(A::AbstractArray, B::AbstractArray)
end

function Base.one(B::StiefelLieAlgHorMatrix{T}) where T
backend = get_backend(B)
backend = networkbackend(B)
oneB = KernelAbstractions.zeros(backend, T, B.N, B.N)
write_ones! = write_ones_kernel!(backend)
write_ones!(oneB; ndrange = B.N)
Expand Down
Loading

0 comments on commit 1774815

Please sign in to comment.