From 55dc372cab485269deecb2bc6e92c930e93645d7 Mon Sep 17 00:00:00 2001
From: ayushinav <abhinav.ism.iit@gmail.com>
Date: Thu, 4 Jul 2024 09:48:31 -0400
Subject: [PATCH 01/16] deeponet multi-output fix

---
 src/LuxNeuralOperators.jl |   5 +-
 src/deeponet.jl           | 104 ++++++++++++++++++++++++--------------
 src/display.jl            |  22 ++++++++
 src/utils.jl              |  35 +++++++++++++
 test/deeponet_tests.jl    |  70 +++++++++++++------------
 5 files changed, 163 insertions(+), 73 deletions(-)
 create mode 100644 src/display.jl

diff --git a/src/LuxNeuralOperators.jl b/src/LuxNeuralOperators.jl
index 5dd908b..27bb836 100644
--- a/src/LuxNeuralOperators.jl
+++ b/src/LuxNeuralOperators.jl
@@ -7,9 +7,9 @@ using PrecompileTools: @recompile_invalidations
     using ChainRulesCore: ChainRulesCore, NoTangent
     using ConcreteStructs: @concrete
     using FFTW: FFTW, irfft, rfft
-    using Lux
+    using Lux: _print_wrapper_model
     using LuxCore: LuxCore, AbstractExplicitLayer
-    using NNlib: NNlib, ⊠
+    using NNlib: NNlib, ⊠, batched_adjoint
     using Random: Random, AbstractRNG
     using Reexport: @reexport
 end
@@ -26,6 +26,7 @@ include("layers.jl")
 
 include("fno.jl")
 include("deeponet.jl")
+include("display.jl")
 
 export FourierTransform
 export SpectralConv, OperatorConv, SpectralKernel, OperatorKernel
diff --git a/src/deeponet.jl b/src/deeponet.jl
index c893dbb..59fb951 100644
--- a/src/deeponet.jl
+++ b/src/deeponet.jl
@@ -11,6 +11,8 @@ Constructs a DeepONet composed of Dense layers. Make sure the last node of `bran
   - `trunk`: Tuple of integers containing the number of nodes in each layer for trunk net
   - `branch_activation`: activation function for branch net
   - `trunk_activation`: activation function for trunk net
+  - `additional`: `Lux` network to pass the output of DeepONet, to include additional operations
+for embeddings, defaults to `nothing`
 
 ## References
 
@@ -22,34 +24,27 @@ operators", doi: https://arxiv.org/abs/1910.03193
 
 ```jldoctest
 julia> deeponet = DeepONet(; branch=(64, 32, 32, 16), trunk=(1, 8, 8, 16))
-@compact(
-    branch = Chain(
+Branch net :
+(
+    Chain(
         layer_1 = Dense(64 => 32),      # 2_080 parameters
         layer_2 = Dense(32 => 32),      # 1_056 parameters
         layer_3 = Dense(32 => 16),      # 528 parameters
     ),
-    trunk = Chain(
+)
+
+Trunk net :
+(
+    Chain(
         layer_1 = Dense(1 => 8),        # 16 parameters
         layer_2 = Dense(8 => 8),        # 72 parameters
         layer_3 = Dense(8 => 16),       # 144 parameters
     ),
-) do (u, y)
-    t = trunk(y)
-    b = branch(u)
-    @argcheck ndims(t) == ndims(b) + 1 || ndims(t) == ndims(b)
-    @argcheck size(t, 1) == size(b, 1) "Branch and Trunk net must share the same amount of nodes in the last layer. Otherwise Σᵢ bᵢⱼ tᵢₖ won't work."
-    b_ = if ndims(t) == ndims(b)
-            b
-        else
-            reshape(b, size(b, 1), 1, (size(b))[2:end]...)
-        end
-    return dropdims(sum(t .* b_; dims = 1); dims = 1)
-end       # Total: 3_896 parameters,
-          #        plus 0 states.
+)
 ```
 """
 function DeepONet(; branch=(64, 32, 32, 16), trunk=(1, 8, 8, 16),
-        branch_activation=identity, trunk_activation=identity)
+        branch_activation=identity, trunk_activation=identity, additional = nothing)
 
     # checks for last dimension size
     @argcheck branch[end]==trunk[end] "Branch and Trunk net must share the same amount of \
@@ -62,7 +57,7 @@ function DeepONet(; branch=(64, 32, 32, 16), trunk=(1, 8, 8, 16),
     trunk_net = Chain([Dense(trunk[i] => trunk[i + 1], trunk_activation)
                        for i in 1:(length(trunk) - 1)]...)
 
-    return DeepONet(branch_net, trunk_net)
+    return DeepONet(branch_net, trunk_net, additional = additional)
 end
 
 """
@@ -76,6 +71,11 @@ nets output should have the same first dimension.
   - `branch`: `Lux` network to be used as branch net.
   - `trunk`: `Lux` network to be used as trunk net.
 
+## Keyword Arguments
+
+- `additional`: `Lux` network to pass the output of DeepONet, to include additional operations
+for embeddings, defaults to `nothing`
+
 ## References
 
 [1] Lu Lu, Pengzhan Jin, George Em Karniadakis, "DeepONet: Learning nonlinear operators for
@@ -90,43 +90,69 @@ julia> branch_net = Chain(Dense(64 => 32), Dense(32 => 32), Dense(32 => 16));
 julia> trunk_net = Chain(Dense(1 => 8), Dense(8 => 8), Dense(8 => 16));
 
 julia> deeponet = DeepONet(branch_net, trunk_net)
-@compact(
-    branch = Chain(
+Branch net :
+(
+    Chain(
         layer_1 = Dense(64 => 32),      # 2_080 parameters
         layer_2 = Dense(32 => 32),      # 1_056 parameters
         layer_3 = Dense(32 => 16),      # 528 parameters
     ),
-    trunk = Chain(
+)
+
+Trunk net :
+(
+    Chain(
         layer_1 = Dense(1 => 8),        # 16 parameters
         layer_2 = Dense(8 => 8),        # 72 parameters
         layer_3 = Dense(8 => 16),       # 144 parameters
     ),
-) do (u, y)
-    t = trunk(y)
-    b = branch(u)
-    @argcheck ndims(t) == ndims(b) + 1 || ndims(t) == ndims(b)
-    @argcheck size(t, 1) == size(b, 1) "Branch and Trunk net must share the same amount of nodes in the last layer. Otherwise Σᵢ bᵢⱼ tᵢₖ won't work."
-    b_ = if ndims(t) == ndims(b)
-            b
-        else
-            reshape(b, size(b, 1), 1, (size(b))[2:end]...)
-        end
-    return dropdims(sum(t .* b_; dims = 1); dims = 1)
-end       # Total: 3_896 parameters,
-          #        plus 0 states.
+)
+
+julia> branch_net = Chain(Dense(64 => 32), Dense(32 => 32), Dense(32 => 16));
+
+julia> trunk_net = Chain(Dense(1 => 8), Dense(8 => 8), Dense(8 => 16));
+
+julia> additional = Chain(Dense(1 => 4));
+
+julia> deeponet = DeepONet(branch_net, trunk_net, additional = additional)
+Branch net :
+(
+    Chain(
+        layer_1 = Dense(64 => 32),      # 2_080 parameters
+        layer_2 = Dense(32 => 32),      # 1_056 parameters
+        layer_3 = Dense(32 => 16),      # 528 parameters
+    ),
+)
+
+Trunk net :
+(
+    Chain(
+        layer_1 = Dense(1 => 8),        # 16 parameters
+        layer_2 = Dense(8 => 8),        # 72 parameters
+        layer_3 = Dense(8 => 16),       # 144 parameters
+    ),
+)
+
+Additional net :
+(
+    Dense(1 => 4),                      # 8 parameters
+)
 ```
 """
-function DeepONet(branch::L1, trunk::L2) where {L1, L2}
-    return @compact(; branch, trunk, dispatch=:DeepONet) do (u, y)
+function DeepONet(branch::L1, trunk::L2; additional = nothing) where {L1, L2}
+    return @compact(; branch, trunk, additional, dispatch=:DeepONet) do (u, y)
         t = trunk(y)   # p x N x nb...
         b = branch(u)  # p x nb...
 
-        @argcheck ndims(t) == ndims(b) + 1 || ndims(t) == ndims(b)
         @argcheck size(t, 1)==size(b, 1) "Branch and Trunk net must share the same \
                                           amount of nodes in the last layer. Otherwise \
                                           Σᵢ bᵢⱼ tᵢₖ won't work."
 
-        b_ = ndims(t) == ndims(b) ? b : reshape(b, size(b, 1), 1, size(b)[2:end]...)
-        @return dropdims(sum(t .* b_; dims=1); dims=1)
+        if isnothing(additional)
+            out_ = __project(b, t)
+        else
+            out_ = additional(__project(b, t))
+        end
+        @return out_
     end
 end
diff --git a/src/display.jl b/src/display.jl
new file mode 100644
index 0000000..24b6ac2
--- /dev/null
+++ b/src/display.jl
@@ -0,0 +1,22 @@
+function Base.show(io::IO, model::conv) where {conv <: OperatorConv}
+    # print(io, model.name*"()  # "*string(Lux.parameterlength(model))*" parameters")
+    print(io, model.name)
+end
+
+function Base.show(io::IO, ::MIME"text/plain", model::conv) where {conv <: OperatorConv}
+    show(io, model.name)
+end
+
+function Base.show(io::IO, model::Lux.CompactLuxLayer{:DeepONet})
+    _print_wrapper_model(io, "Branch net :\n", model.layers.branch)
+    print(io, "\n \n")
+    _print_wrapper_model(io, "Trunk net :\n", model.layers.trunk)
+    if :additional in keys(model.layers)
+        print(io, "\n \n")
+        _print_wrapper_model(io, "Additional net :\n", model.layers.additional)
+    end
+end
+
+function Base.show(io::IO, ::MIME"text/plain", x::CompactLuxLayer{:DeepONet})
+    show(io, x)
+end
diff --git a/src/utils.jl b/src/utils.jl
index 129c38f..556219f 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -1,2 +1,37 @@
 # Temporarily capture certain calls like AMDGPU for ComplexFloats
 @inline __batched_mul(x, y) = x ⊠ y
+
+@inline function __project(b::AbstractArray{T1, 2}, t::AbstractArray{T2, 3}) where {T1, T2}
+    # b : p x nb
+    # t : p x N x nb
+    @show size.([b, t])
+    b_ = reshape(b, size(b, 1), 1, size(b, 2)) # p x 1 x nb
+    return dropdims(sum(b_ .*t, dims = 1), dims = 1) # N x nb
+end
+
+@inline function __project(b::AbstractArray{T1, 3}, t::AbstractArray{T2, 3}) where {T1, T2}
+    # b : p x u x nb
+    # t : p x N x nb
+    @show size.([b, t])
+    if size(b, 2) == 1 || size(t, 2) == 1
+        return sum(b .* t, dims = 1) # 1 x N x nb
+    else
+        return LuxNeuralOperators.__batched_mul(batched_adjoint(t), b) # N x p x nb
+    end
+end
+
+@inline function __project(b::AbstractArray{T1, N}, t::AbstractArray{T2, 3}) where {T1, T2, N}
+    # b : p x u_size x nb
+    # t : p x N x nb
+    @show size.([b, t])
+
+    u_size = size(b)[2:end-1]
+
+    b_ = reshape(b, size(b,1), 1, u_size..., size(b)[end])
+    # p x u_size x 1 x nb
+
+    t_ = reshape(t, size(t)[1:2]..., ones(eltype(u_size), length(u_size))..., size(t)[end])
+    # p x (1,1,1...) X N x nb
+
+    return dropdims(sum(b_ .* t_; dims = 1), dims = 1) # u_size x N x nb
+end
\ No newline at end of file
diff --git a/test/deeponet_tests.jl b/test/deeponet_tests.jl
index 14aeabc..1ffb57b 100644
--- a/test/deeponet_tests.jl
+++ b/test/deeponet_tests.jl
@@ -2,47 +2,53 @@
     @testset "BACKEND: $(mode)" for (mode, aType, dev, ongpu) in MODES
         rng = StableRNG(12345)
 
-        u = rand(Float32, 64, 5) |> aType # sensor_points x nb
-        y = rand(Float32, 1, 10, 5) |> aType # ndims x N x nb
-        out_size = (10, 5)
-
-        deeponet = DeepONet(; branch=(64, 32, 32, 16), trunk=(1, 8, 8, 16))
-
-        ps, st = Lux.setup(rng, deeponet) |> dev
+        setups = [
+            (u_size=(64, 5), y_size=(1, 10, 5), out_size=(10, 5),
+                branch=(64, 32, 32, 16), trunk=(1, 8, 8, 16), name="Scalar"),
+            (u_size=(64, 3, 5), y_size=(4, 10, 5), out_size=(10, 3, 5),
+                branch=(64, 32, 32, 16), trunk=(1, 8, 8, 16), name="Vector"),
+            (u_size=(64, 4, 3, 3, 5), y_size=(4, 10, 5), out_size=(10, 4, 3, 3, 5),
+                branch=(64, 32, 32, 16), trunk=(1, 8, 8, 16), name="Tensor")]
+
+        @testset "$(setup.name)" for setup in setups
+            u = rand(Float32, setup.u_size...) |> aType
+            y = rand(Float32, setup.y_size...) |> aType
+            deeponet = DeepONet(; branch=setup.branch, trunk=setup.trunk)
+            ps, st = Lux.setup(rng, deeponet) |> dev
 
-        @inferred deeponet((u, y), ps, st)
-        @jet deeponet((u, y), ps, st)
+            @inferred deeponet((u, y), ps, st)
+            @jet deeponet((u, y), ps, st)
 
-        pred = first(deeponet((u, y), ps, st))
-        @test size(pred) == out_size
+            pred = first(deeponet(u, y), ps, st)
+            @test size(setup.out_size) == size(pred)
+        end
 
-        deeponet = DeepONet(Chain(Dense(64 => 32), Dense(32 => 32), Dense(32 => 16)),
-            Chain(Dense(1 => 8), Dense(8 => 8), Dense(8 => 16)))
+        @testset "Additonal layer" begin
+            u = rand(Float32, 64, 1, 5) |> aType # sensor_points x nb
+            y = rand(Float32, 1, 10, 5) |> aType # ndims x N x nb
+            out_size = (4, 10, 5)
 
-        ps, st = Lux.setup(rng, deeponet) |> dev
+            branch_net = Chain(Dense(64 => 32), Dense(32 => 32), Dense(32 => 16))
+            trunk_net = Chain(Dense(1 => 8), Dense(8 => 8), Dense(8 => 16))
+            additional = Chain(Dense(1 => 4))
+            deeponet = DeepONet(branch_net, trunk_net; additional=additional)
 
-        @inferred deeponet((u, y), ps, st)
-        @jet deeponet((u, y), ps, st)
+            ps, st = Lux.setup(rng, deeponet)
+            @inferred deeponet((u, y), ps, st)
+            @jet deeponet((u, y), ps, st)
 
-        pred = first(deeponet((u, y), ps, st))
-        @test size(pred) == out_size
+            pred = first(deeponet((u, y), ps, st))
+            @test size(out_size) == size(pred)
+        end
 
-        deeponet = DeepONet(Chain(Dense(64 => 32), Dense(32 => 32), Dense(32 => 20)),
-            Chain(Dense(1 => 8), Dense(8 => 8), Dense(8 => 16)))
-        ps, st = Lux.setup(rng, deeponet) |> dev
-        @test_throws ArgumentError deeponet((u, y), ps, st)
+        @testset "Embedding layer mismatch" begin
+            u = rand(Float32, 64, 5) |> aType
+            y = rand(Float32, 1, 10, 5) |> aType
 
-        @testset "higher-dim input #7" begin
-            u = ones(Float32, 10, 10, 10) |> aType
-            v = ones(Float32, 1, 10, 10) |> aType
-            deeponet = DeepONet(; branch=(10, 10, 10), trunk=(1, 10, 10))
+            deeponet = DeepONet(Chain(Dense(64 => 32), Dense(32 => 32), Dense(32 => 20)),
+                Chain(Dense(1 => 8), Dense(8 => 8), Dense(8 => 16)))
             ps, st = Lux.setup(rng, deeponet) |> dev
-
-            y, st_ = deeponet((u, v), ps, st)
-            @test size(y) == (10, 10)
-
-            @inferred deeponet((u, v), ps, st)
-            @jet deeponet((u, v), ps, st)
+            @test_throws ArgumentError deeponet((u, y), ps, st)
         end
     end
 end

From 36745f337c62adb3d6b2c13e93600e6553434b46 Mon Sep 17 00:00:00 2001
From: ayushinav <abhinav.ism.iit@gmail.com>
Date: Thu, 4 Jul 2024 10:45:12 -0400
Subject: [PATCH 02/16] test bug fix

---
 src/display.jl         | 14 +++++++-------
 src/utils.jl           | 12 ++++--------
 test/deeponet_tests.jl | 14 +++++++-------
 3 files changed, 18 insertions(+), 22 deletions(-)

diff --git a/src/display.jl b/src/display.jl
index 24b6ac2..07bc8a0 100644
--- a/src/display.jl
+++ b/src/display.jl
@@ -1,11 +1,11 @@
-function Base.show(io::IO, model::conv) where {conv <: OperatorConv}
-    # print(io, model.name*"()  # "*string(Lux.parameterlength(model))*" parameters")
-    print(io, model.name)
-end
+# function Base.show(io::IO, model::conv) where {conv <: OperatorConv}
+#     # print(io, model.name*"()  # "*string(Lux.parameterlength(model))*" parameters")
+#     print(io, model.name)
+# end
 
-function Base.show(io::IO, ::MIME"text/plain", model::conv) where {conv <: OperatorConv}
-    show(io, model.name)
-end
+# function Base.show(io::IO, ::MIME"text/plain", model::conv) where {conv <: OperatorConv}
+#     show(io, model.name)
+# end
 
 function Base.show(io::IO, model::Lux.CompactLuxLayer{:DeepONet})
     _print_wrapper_model(io, "Branch net :\n", model.layers.branch)
diff --git a/src/utils.jl b/src/utils.jl
index 556219f..2d1cc0a 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -4,7 +4,6 @@
 @inline function __project(b::AbstractArray{T1, 2}, t::AbstractArray{T2, 3}) where {T1, T2}
     # b : p x nb
     # t : p x N x nb
-    @show size.([b, t])
     b_ = reshape(b, size(b, 1), 1, size(b, 2)) # p x 1 x nb
     return dropdims(sum(b_ .*t, dims = 1), dims = 1) # N x nb
 end
@@ -12,26 +11,23 @@ end
 @inline function __project(b::AbstractArray{T1, 3}, t::AbstractArray{T2, 3}) where {T1, T2}
     # b : p x u x nb
     # t : p x N x nb
-    @show size.([b, t])
     if size(b, 2) == 1 || size(t, 2) == 1
         return sum(b .* t, dims = 1) # 1 x N x nb
     else
-        return LuxNeuralOperators.__batched_mul(batched_adjoint(t), b) # N x p x nb
+        return __batched_mul(batched_adjoint(t), b) # N x p x nb
     end
 end
 
 @inline function __project(b::AbstractArray{T1, N}, t::AbstractArray{T2, 3}) where {T1, T2, N}
     # b : p x u_size x nb
     # t : p x N x nb
-    @show size.([b, t])
-
     u_size = size(b)[2:end-1]
 
     b_ = reshape(b, size(b,1), 1, u_size..., size(b)[end])
-    # p x u_size x 1 x nb
+    # p x 1 x u_size x nb
 
     t_ = reshape(t, size(t)[1:2]..., ones(eltype(u_size), length(u_size))..., size(t)[end])
-    # p x (1,1,1...) X N x nb
+    # p x N x (1,1,1...) x nb
 
-    return dropdims(sum(b_ .* t_; dims = 1), dims = 1) # u_size x N x nb
+    return dropdims(sum(b_ .* t_; dims = 1), dims = 1) # N x u_size x nb
 end
\ No newline at end of file
diff --git a/test/deeponet_tests.jl b/test/deeponet_tests.jl
index 1ffb57b..a367533 100644
--- a/test/deeponet_tests.jl
+++ b/test/deeponet_tests.jl
@@ -6,21 +6,21 @@
             (u_size=(64, 5), y_size=(1, 10, 5), out_size=(10, 5),
                 branch=(64, 32, 32, 16), trunk=(1, 8, 8, 16), name="Scalar"),
             (u_size=(64, 3, 5), y_size=(4, 10, 5), out_size=(10, 3, 5),
-                branch=(64, 32, 32, 16), trunk=(1, 8, 8, 16), name="Vector"),
+                branch=(64, 32, 32, 16), trunk=(4, 8, 8, 16), name="Vector"),
             (u_size=(64, 4, 3, 3, 5), y_size=(4, 10, 5), out_size=(10, 4, 3, 3, 5),
-                branch=(64, 32, 32, 16), trunk=(1, 8, 8, 16), name="Tensor")]
+                branch=(64, 32, 32, 16), trunk=(4, 8, 8, 16), name="Tensor")]
 
         @testset "$(setup.name)" for setup in setups
             u = rand(Float32, setup.u_size...) |> aType
             y = rand(Float32, setup.y_size...) |> aType
             deeponet = DeepONet(; branch=setup.branch, trunk=setup.trunk)
-            ps, st = Lux.setup(rng, deeponet) |> dev
 
+            ps, st = Lux.setup(rng, deeponet) |> dev
             @inferred deeponet((u, y), ps, st)
             @jet deeponet((u, y), ps, st)
 
-            pred = first(deeponet(u, y), ps, st)
-            @test size(setup.out_size) == size(pred)
+            pred = first(deeponet((u, y), ps, st))
+            @test setup.out_size == size(pred)
         end
 
         @testset "Additonal layer" begin
@@ -33,12 +33,12 @@
             additional = Chain(Dense(1 => 4))
             deeponet = DeepONet(branch_net, trunk_net; additional=additional)
 
-            ps, st = Lux.setup(rng, deeponet)
+            ps, st = Lux.setup(rng, deeponet) |> dev
             @inferred deeponet((u, y), ps, st)
             @jet deeponet((u, y), ps, st)
 
             pred = first(deeponet((u, y), ps, st))
-            @test size(out_size) == size(pred)
+            @test out_size == size(pred)
         end
 
         @testset "Embedding layer mismatch" begin

From acd35d1f6908dd6c04aafd1fc3082e78f57e1efc Mon Sep 17 00:00:00 2001
From: ayushinav <abhinav.ism.iit@gmail.com>
Date: Thu, 4 Jul 2024 10:47:02 -0400
Subject: [PATCH 03/16] format

---
 src/deeponet.jl | 17 +++++++++--------
 src/utils.jl    | 13 +++++++------
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/src/deeponet.jl b/src/deeponet.jl
index 59fb951..d034da0 100644
--- a/src/deeponet.jl
+++ b/src/deeponet.jl
@@ -12,7 +12,7 @@ Constructs a DeepONet composed of Dense layers. Make sure the last node of `bran
   - `branch_activation`: activation function for branch net
   - `trunk_activation`: activation function for trunk net
   - `additional`: `Lux` network to pass the output of DeepONet, to include additional operations
-for embeddings, defaults to `nothing`
+    for embeddings, defaults to `nothing`
 
 ## References
 
@@ -43,8 +43,9 @@ Trunk net :
 )
 ```
 """
-function DeepONet(; branch=(64, 32, 32, 16), trunk=(1, 8, 8, 16),
-        branch_activation=identity, trunk_activation=identity, additional = nothing)
+function DeepONet(;
+        branch=(64, 32, 32, 16), trunk=(1, 8, 8, 16), branch_activation=identity,
+        trunk_activation=identity, additional=nothing)
 
     # checks for last dimension size
     @argcheck branch[end]==trunk[end] "Branch and Trunk net must share the same amount of \
@@ -57,7 +58,7 @@ function DeepONet(; branch=(64, 32, 32, 16), trunk=(1, 8, 8, 16),
     trunk_net = Chain([Dense(trunk[i] => trunk[i + 1], trunk_activation)
                        for i in 1:(length(trunk) - 1)]...)
 
-    return DeepONet(branch_net, trunk_net, additional = additional)
+    return DeepONet(branch_net, trunk_net; additional=additional)
 end
 
 """
@@ -73,8 +74,8 @@ nets output should have the same first dimension.
 
 ## Keyword Arguments
 
-- `additional`: `Lux` network to pass the output of DeepONet, to include additional operations
-for embeddings, defaults to `nothing`
+  - `additional`: `Lux` network to pass the output of DeepONet, to include additional operations
+    for embeddings, defaults to `nothing`
 
 ## References
 
@@ -114,7 +115,7 @@ julia> trunk_net = Chain(Dense(1 => 8), Dense(8 => 8), Dense(8 => 16));
 
 julia> additional = Chain(Dense(1 => 4));
 
-julia> deeponet = DeepONet(branch_net, trunk_net, additional = additional)
+julia> deeponet = DeepONet(branch_net, trunk_net; additional=additional)
 Branch net :
 (
     Chain(
@@ -139,7 +140,7 @@ Additional net :
 )
 ```
 """
-function DeepONet(branch::L1, trunk::L2; additional = nothing) where {L1, L2}
+function DeepONet(branch::L1, trunk::L2; additional=nothing) where {L1, L2}
     return @compact(; branch, trunk, additional, dispatch=:DeepONet) do (u, y)
         t = trunk(y)   # p x N x nb...
         b = branch(u)  # p x nb...
diff --git a/src/utils.jl b/src/utils.jl
index 2d1cc0a..dafd5d4 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -5,29 +5,30 @@
     # b : p x nb
     # t : p x N x nb
     b_ = reshape(b, size(b, 1), 1, size(b, 2)) # p x 1 x nb
-    return dropdims(sum(b_ .*t, dims = 1), dims = 1) # N x nb
+    return dropdims(sum(b_ .* t; dims=1); dims=1) # N x nb
 end
 
 @inline function __project(b::AbstractArray{T1, 3}, t::AbstractArray{T2, 3}) where {T1, T2}
     # b : p x u x nb
     # t : p x N x nb
     if size(b, 2) == 1 || size(t, 2) == 1
-        return sum(b .* t, dims = 1) # 1 x N x nb
+        return sum(b .* t; dims=1) # 1 x N x nb
     else
         return __batched_mul(batched_adjoint(t), b) # N x p x nb
     end
 end
 
-@inline function __project(b::AbstractArray{T1, N}, t::AbstractArray{T2, 3}) where {T1, T2, N}
+@inline function __project(
+        b::AbstractArray{T1, N}, t::AbstractArray{T2, 3}) where {T1, T2, N}
     # b : p x u_size x nb
     # t : p x N x nb
-    u_size = size(b)[2:end-1]
+    u_size = size(b)[2:(end - 1)]
 
-    b_ = reshape(b, size(b,1), 1, u_size..., size(b)[end])
+    b_ = reshape(b, size(b, 1), 1, u_size..., size(b)[end])
     # p x 1 x u_size x nb
 
     t_ = reshape(t, size(t)[1:2]..., ones(eltype(u_size), length(u_size))..., size(t)[end])
     # p x N x (1,1,1...) x nb
 
-    return dropdims(sum(b_ .* t_; dims = 1), dims = 1) # N x u_size x nb
+    return dropdims(sum(b_ .* t_; dims=1); dims=1) # N x u_size x nb
 end
\ No newline at end of file

From ec0644a50950559c37501d4df366bd88921431f2 Mon Sep 17 00:00:00 2001
From: ayushinav <abhinav.ism.iit@gmail.com>
Date: Fri, 5 Jul 2024 01:56:24 -0400
Subject: [PATCH 04/16] compat with additional layer

---
 src/LuxNeuralOperators.jl |  2 +-
 src/deeponet.jl           | 26 ++++++++++-------
 src/display.jl            | 22 --------------
 src/utils.jl              | 60 ++++++++++++++++++++++++++++++++-------
 test/deeponet_tests.jl    | 33 +++++++++++++--------
 5 files changed, 88 insertions(+), 55 deletions(-)
 delete mode 100644 src/display.jl

diff --git a/src/LuxNeuralOperators.jl b/src/LuxNeuralOperators.jl
index 27bb836..545931c 100644
--- a/src/LuxNeuralOperators.jl
+++ b/src/LuxNeuralOperators.jl
@@ -12,6 +12,7 @@ using PrecompileTools: @recompile_invalidations
     using NNlib: NNlib, ⊠, batched_adjoint
     using Random: Random, AbstractRNG
     using Reexport: @reexport
+    import Base: show
 end
 
 const CRC = ChainRulesCore
@@ -26,7 +27,6 @@ include("layers.jl")
 
 include("fno.jl")
 include("deeponet.jl")
-include("display.jl")
 
 export FourierTransform
 export SpectralConv, OperatorConv, SpectralKernel, OperatorKernel
diff --git a/src/deeponet.jl b/src/deeponet.jl
index d034da0..e044ef6 100644
--- a/src/deeponet.jl
+++ b/src/deeponet.jl
@@ -32,7 +32,7 @@ Branch net :
         layer_3 = Dense(32 => 16),      # 528 parameters
     ),
 )
-
+ 
 Trunk net :
 (
     Chain(
@@ -99,7 +99,7 @@ Branch net :
         layer_3 = Dense(32 => 16),      # 528 parameters
     ),
 )
-
+ 
 Trunk net :
 (
     Chain(
@@ -142,18 +142,24 @@ Additional net :
 """
 function DeepONet(branch::L1, trunk::L2; additional=nothing) where {L1, L2}
     return @compact(; branch, trunk, additional, dispatch=:DeepONet) do (u, y)
-        t = trunk(y)   # p x N x nb...
-        b = branch(u)  # p x nb...
+        t = trunk(y)   # p x N x nb
+        b = branch(u)  # p x u_size... x nb
 
         @argcheck size(t, 1)==size(b, 1) "Branch and Trunk net must share the same \
                                           amount of nodes in the last layer. Otherwise \
                                           Σᵢ bᵢⱼ tᵢₖ won't work."
 
-        if isnothing(additional)
-            out_ = __project(b, t)
-        else
-            out_ = additional(__project(b, t))
-        end
-        @return out_
+        @return __project(b, t, additional)
+    end
+end
+
+function Base.show(io::IO, ::MIME"text/plain", x::CompactLuxLayer{:DeepONet})
+    # show(io, x)
+    _print_wrapper_model(io, "Branch net :\n", x.layers.branch)
+    print(io, "\n \n")
+    _print_wrapper_model(io, "Trunk net :\n", x.layers.trunk)
+    if :additional in keys(x.layers)
+        print(io, "\n \n")
+        _print_wrapper_model(io, "Additional net :\n", x.layers.additional)
     end
 end
diff --git a/src/display.jl b/src/display.jl
deleted file mode 100644
index 07bc8a0..0000000
--- a/src/display.jl
+++ /dev/null
@@ -1,22 +0,0 @@
-# function Base.show(io::IO, model::conv) where {conv <: OperatorConv}
-#     # print(io, model.name*"()  # "*string(Lux.parameterlength(model))*" parameters")
-#     print(io, model.name)
-# end
-
-# function Base.show(io::IO, ::MIME"text/plain", model::conv) where {conv <: OperatorConv}
-#     show(io, model.name)
-# end
-
-function Base.show(io::IO, model::Lux.CompactLuxLayer{:DeepONet})
-    _print_wrapper_model(io, "Branch net :\n", model.layers.branch)
-    print(io, "\n \n")
-    _print_wrapper_model(io, "Trunk net :\n", model.layers.trunk)
-    if :additional in keys(model.layers)
-        print(io, "\n \n")
-        _print_wrapper_model(io, "Additional net :\n", model.layers.additional)
-    end
-end
-
-function Base.show(io::IO, ::MIME"text/plain", x::CompactLuxLayer{:DeepONet})
-    show(io, x)
-end
diff --git a/src/utils.jl b/src/utils.jl
index dafd5d4..d732457 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -1,34 +1,74 @@
 # Temporarily capture certain calls like AMDGPU for ComplexFloats
 @inline __batched_mul(x, y) = x ⊠ y
 
-@inline function __project(b::AbstractArray{T1, 2}, t::AbstractArray{T2, 3}) where {T1, T2}
+@inline function __project(b::AbstractArray{T1, 2}, t::AbstractArray{T2, 3},
+        additional::Nothing) where {T1, T2}
     # b : p x nb
     # t : p x N x nb
     b_ = reshape(b, size(b, 1), 1, size(b, 2)) # p x 1 x nb
     return dropdims(sum(b_ .* t; dims=1); dims=1) # N x nb
 end
 
-@inline function __project(b::AbstractArray{T1, 3}, t::AbstractArray{T2, 3}) where {T1, T2}
+@inline function __project(b::AbstractArray{T1, 3}, t::AbstractArray{T2, 3},
+        additional::Nothing) where {T1, T2}
     # b : p x u x nb
     # t : p x N x nb
     if size(b, 2) == 1 || size(t, 2) == 1
         return sum(b .* t; dims=1) # 1 x N x nb
     else
-        return __batched_mul(batched_adjoint(t), b) # N x p x nb
+        return __batched_mul(batched_adjoint(b), t) # u x N x b
     end
 end
 
+@inline function __project(b::AbstractArray{T1, N}, t::AbstractArray{T2, 3},
+        additional::Nothing) where {T1, T2, N}
+    # b : p x u_size x nb
+    # t : p x N x nb
+    u_size = size(b)[2:(end - 1)]
+
+    b_ = reshape(b, size(b, 1), u_size..., 1, size(b)[end])
+    # p x u_size x 1 x nb
+
+    t_ = reshape(t, size(t, 1), ones(eltype(u_size), length(u_size))..., size(t)[2:end]...)
+    # p x (1,1,1...) x N x nb
+
+    return dropdims(sum(b_ .* t_; dims=1); dims=1) # u_size x N x nb
+end
+
+@inline function __project(
+        b::AbstractArray{T1, 2}, t::AbstractArray{T2, 3}, additional::T) where {T1, T2, T}
+    # b : p x nb
+    # t : p x N x nb
+    b_ = reshape(b, size(b, 1), 1, size(b, 2)) # p x 1 x nb
+    return additional(b_ .* t) # p x N x nb => out_dims x N x nb
+end
+
 @inline function __project(
-        b::AbstractArray{T1, N}, t::AbstractArray{T2, 3}) where {T1, T2, N}
+        b::AbstractArray{T1, 3}, t::AbstractArray{T2, 3}, additional::T) where {T1, T2, T}
+    # b : p x u x nb
+    # t : p x N x nb
+
+    if size(b, 2) == 1 || size(t, 2) == 1
+        return additional(b .* t) # p x N x nb => out_dims x N x nb
+    else
+        b_ = reshape(b, size(b)[1:2]..., 1, size(b, 3)) # p x u x 1 x nb
+        t_ = reshape(t, size(t, 1), 1, size(t)[2:end]...) # p x 1 x N x nb
+
+        return additional(b_ .* t_) # p x u x N x nb => out_size x N x nb
+    end
+end
+
+@inline function __project(b::AbstractArray{T1, N}, t::AbstractArray{T2, 3},
+        additional::T) where {T1, T2, N, T}
     # b : p x u_size x nb
     # t : p x N x nb
     u_size = size(b)[2:(end - 1)]
 
-    b_ = reshape(b, size(b, 1), 1, u_size..., size(b)[end])
-    # p x 1 x u_size x nb
+    b_ = reshape(b, size(b, 1), u_size..., 1, size(b)[end])
+    # p x u_size x 1 x nb
 
-    t_ = reshape(t, size(t)[1:2]..., ones(eltype(u_size), length(u_size))..., size(t)[end])
-    # p x N x (1,1,1...) x nb
+    t_ = reshape(t, size(t, 1), ones(eltype(u_size), length(u_size))..., size(t)[2:end]...)
+    # p x (1,1,1...) x N x nb
 
-    return dropdims(sum(b_ .* t_; dims=1); dims=1) # N x u_size x nb
-end
\ No newline at end of file
+    return additional(b_ .* t_) # p x u_size x N x nb => out_size x N x nb
+end
diff --git a/test/deeponet_tests.jl b/test/deeponet_tests.jl
index a367533..e71bfdb 100644
--- a/test/deeponet_tests.jl
+++ b/test/deeponet_tests.jl
@@ -5,9 +5,11 @@
         setups = [
             (u_size=(64, 5), y_size=(1, 10, 5), out_size=(10, 5),
                 branch=(64, 32, 32, 16), trunk=(1, 8, 8, 16), name="Scalar"),
-            (u_size=(64, 3, 5), y_size=(4, 10, 5), out_size=(10, 3, 5),
+            (u_size=(64, 1, 5), y_size=(1, 10, 5), out_size=(1, 10, 5),
+                branch=(64, 32, 32, 16), trunk=(1, 8, 8, 16), name="Scalar II"),
+            (u_size=(64, 3, 5), y_size=(4, 10, 5), out_size=(3, 10, 5),
                 branch=(64, 32, 32, 16), trunk=(4, 8, 8, 16), name="Vector"),
-            (u_size=(64, 4, 3, 3, 5), y_size=(4, 10, 5), out_size=(10, 4, 3, 3, 5),
+            (u_size=(64, 4, 3, 3, 5), y_size=(4, 10, 5), out_size=(4, 3, 3, 10, 5),
                 branch=(64, 32, 32, 16), trunk=(4, 8, 8, 16), name="Tensor")]
 
         @testset "$(setup.name)" for setup in setups
@@ -23,22 +25,29 @@
             @test setup.out_size == size(pred)
         end
 
-        @testset "Additonal layer" begin
-            u = rand(Float32, 64, 1, 5) |> aType # sensor_points x nb
-            y = rand(Float32, 1, 10, 5) |> aType # ndims x N x nb
-            out_size = (4, 10, 5)
-
-            branch_net = Chain(Dense(64 => 32), Dense(32 => 32), Dense(32 => 16))
-            trunk_net = Chain(Dense(1 => 8), Dense(8 => 8), Dense(8 => 16))
-            additional = Chain(Dense(1 => 4))
-            deeponet = DeepONet(branch_net, trunk_net; additional=additional)
+        setups = [
+            (u_size=(64, 5), y_size=(1, 10, 5), out_size=(4, 10, 5),
+                branch=(64, 32, 32, 16), trunk=(1, 8, 8, 16),
+                additional=Dense(16 => 4), name="Scalar"),
+            (u_size=(64, 1, 5), y_size=(1, 10, 5), out_size=(4, 10, 5),
+                branch=(64, 32, 32, 16), trunk=(1, 8, 8, 16),
+                additional=Dense(16 => 4), name="Scalar II"),
+            (u_size=(64, 3, 5), y_size=(8, 10, 5), out_size=(4, 3, 10, 5),
+                branch=(64, 32, 32, 16), trunk=(8, 8, 8, 16),
+                additional=Dense(16 => 4), name="Vector")]
+
+        @testset "Additonal layer: $(setup.name)" for setup in setups
+            u = rand(Float32, setup.u_size...) |> aType
+            y = rand(Float32, setup.y_size...) |> aType
+            deeponet = DeepONet(;
+                branch=setup.branch, trunk=setup.trunk, additional=setup.additional)
 
             ps, st = Lux.setup(rng, deeponet) |> dev
             @inferred deeponet((u, y), ps, st)
             @jet deeponet((u, y), ps, st)
 
             pred = first(deeponet((u, y), ps, st))
-            @test out_size == size(pred)
+            @test setup.out_size == size(pred)
         end
 
         @testset "Embedding layer mismatch" begin

From a0e57d233803845968c975a1da1fdc46892a6263 Mon Sep 17 00:00:00 2001
From: ayushinav <abhinav.ism.iit@gmail.com>
Date: Wed, 10 Jul 2024 00:20:02 -0400
Subject: [PATCH 05/16] inference tests

---
 src/deeponet.jl        | 2 --
 test/deeponet_tests.jl | 8 ++++----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/deeponet.jl b/src/deeponet.jl
index e044ef6..a90cc37 100644
--- a/src/deeponet.jl
+++ b/src/deeponet.jl
@@ -32,7 +32,6 @@ Branch net :
         layer_3 = Dense(32 => 16),      # 528 parameters
     ),
 )
- 
 Trunk net :
 (
     Chain(
@@ -99,7 +98,6 @@ Branch net :
         layer_3 = Dense(32 => 16),      # 528 parameters
     ),
 )
- 
 Trunk net :
 (
     Chain(
diff --git a/test/deeponet_tests.jl b/test/deeponet_tests.jl
index e71bfdb..3291689 100644
--- a/test/deeponet_tests.jl
+++ b/test/deeponet_tests.jl
@@ -18,8 +18,8 @@
             deeponet = DeepONet(; branch=setup.branch, trunk=setup.trunk)
 
             ps, st = Lux.setup(rng, deeponet) |> dev
-            @inferred deeponet((u, y), ps, st)
-            @jet deeponet((u, y), ps, st)
+            @inferred first(deeponet((u, y), ps, st))
+            @jet first(deeponet((u, y), ps, st))
 
             pred = first(deeponet((u, y), ps, st))
             @test setup.out_size == size(pred)
@@ -43,8 +43,8 @@
                 branch=setup.branch, trunk=setup.trunk, additional=setup.additional)
 
             ps, st = Lux.setup(rng, deeponet) |> dev
-            @inferred deeponet((u, y), ps, st)
-            @jet deeponet((u, y), ps, st)
+            @inferred first(deeponet((u, y), ps, st))
+            @jet first(deeponet((u, y), ps, st))
 
             pred = first(deeponet((u, y), ps, st))
             @test setup.out_size == size(pred)

From f238d8084c38232c39f4ef57b2746045ee6f1ea0 Mon Sep 17 00:00:00 2001
From: ayushinav <abhinav.ism.iit@gmail.com>
Date: Wed, 10 Jul 2024 00:32:16 -0400
Subject: [PATCH 06/16] explicit import test fix

---
 src/LuxNeuralOperators.jl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/LuxNeuralOperators.jl b/src/LuxNeuralOperators.jl
index 545931c..54a0022 100644
--- a/src/LuxNeuralOperators.jl
+++ b/src/LuxNeuralOperators.jl
@@ -12,7 +12,6 @@ using PrecompileTools: @recompile_invalidations
     using NNlib: NNlib, ⊠, batched_adjoint
     using Random: Random, AbstractRNG
     using Reexport: @reexport
-    import Base: show
 end
 
 const CRC = ChainRulesCore

From a99e89f12b2acd44ee5cf0e29178132e3adffee6 Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Wed, 10 Jul 2024 23:00:21 -0700
Subject: [PATCH 07/16] chore: set version to 1.0.0-DEV

---
 .gitignore   | 2 ++
 Project.toml | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index c7e6298..38ed5d8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,4 @@
 Manifest.toml
 .vscode
+wip
+examples
diff --git a/Project.toml b/Project.toml
index 27ea2cb..a43a020 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "LuxNeuralOperators"
 uuid = "c0ba2cc5-a80b-46ec-84b3-091eb317b01d"
 authors = ["Avik Pal <avikpal@mit.edu>"]
-version = "0.1.0"
+version = "1.0.0-DEV"
 
 [deps]
 ArgCheck = "dce04be8-c92d-5529-be00-80e4d2c0e197"
@@ -23,7 +23,7 @@ AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
 LuxNeuralOperatorsAMDGPUExt = "AMDGPU"
 
 [compat]
-AMDGPU = "0.8.4, 0.9"
+AMDGPU = "0.9.6"
 Aqua = "0.8.7"
 ArgCheck = "2.3.0"
 ChainRulesCore = "1.24.0"

From eabe3842765d6fcc4e37181ab0a840396a4b373a Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Wed, 10 Jul 2024 23:02:35 -0700
Subject: [PATCH 08/16] chore: start migration to NeuralOperators.jl

---
 LocalPreferences.toml                              |  2 +-
 Project.toml                                       |  6 +++---
 README.md                                          |  4 ++--
 ...orsAMDGPUExt.jl => NeuralOperatorsAMDGPUExt.jl} |  6 +++---
 src/{LuxNeuralOperators.jl => NeuralOperators.jl}  |  2 +-
 test/qa_tests.jl                                   | 14 +++++++-------
 6 files changed, 17 insertions(+), 17 deletions(-)
 rename ext/{LuxNeuralOperatorsAMDGPUExt.jl => NeuralOperatorsAMDGPUExt.jl} (74%)
 rename src/{LuxNeuralOperators.jl => NeuralOperators.jl} (96%)

diff --git a/LocalPreferences.toml b/LocalPreferences.toml
index 22c70b2..beeea5a 100644
--- a/LocalPreferences.toml
+++ b/LocalPreferences.toml
@@ -1,2 +1,2 @@
 [LuxTestUtils]
-target_modules = ["LuxNeuralOperators", "Lux", "LuxLib"]
+target_modules = ["NeuralOperators", "Lux", "LuxLib"]
diff --git a/Project.toml b/Project.toml
index a43a020..d56b698 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,5 +1,5 @@
-name = "LuxNeuralOperators"
-uuid = "c0ba2cc5-a80b-46ec-84b3-091eb317b01d"
+name = "NeuralOperators"
+uuid = "ea5c82af-86e5-48da-8ee1-382d6ad7af4b"
 authors = ["Avik Pal <avikpal@mit.edu>"]
 version = "1.0.0-DEV"
 
@@ -20,7 +20,7 @@ WeightInitializers = "d49dbf32-c5c2-4618-8acc-27bb2598ef2d"
 AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
 
 [extensions]
-LuxNeuralOperatorsAMDGPUExt = "AMDGPU"
+NeuralOperatorsAMDGPUExt = "AMDGPU"
 
 [compat]
 AMDGPU = "0.9.6"
diff --git a/README.md b/README.md
index dff494d..275bd73 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
-# LuxNeuralOperators
+# NeuralOperators
 
-LuxNeuralOperators are NeuralOperators built using [Lux.jl](https://lux.csail.mit.edu/).
+NeuralOperators are NeuralOperators built using [Lux.jl](https://lux.csail.mit.edu/).
 For a version built using an old-generation framework [Flux.jl](https://fluxml.ai/), see
 [NeuralOperators.jl](https://docs.sciml.ai/NeuralOperators/stable/)
diff --git a/ext/LuxNeuralOperatorsAMDGPUExt.jl b/ext/NeuralOperatorsAMDGPUExt.jl
similarity index 74%
rename from ext/LuxNeuralOperatorsAMDGPUExt.jl
rename to ext/NeuralOperatorsAMDGPUExt.jl
index 14a5217..d9f00a3 100644
--- a/ext/LuxNeuralOperatorsAMDGPUExt.jl
+++ b/ext/NeuralOperatorsAMDGPUExt.jl
@@ -1,10 +1,10 @@
-module LuxNeuralOperatorsAMDGPUExt
+module NeuralOperatorsAMDGPUExt
 
 using AMDGPU: AnyROCArray
-using LuxNeuralOperators: LuxNeuralOperators
+using NeuralOperators: NeuralOperators
 
 # This should be upstreamed to NNlib before we release this package
-@inline function LuxNeuralOperators.__batched_mul(
+@inline function NeuralOperators.__batched_mul(
         x::AnyROCArray{<:Union{ComplexF16, ComplexF32, ComplexF64}, 3},
         y::AnyROCArray{<:Union{ComplexF16, ComplexF32, ComplexF64}, 3})
     # FIXME: This is not good for performance but that is okay for now
diff --git a/src/LuxNeuralOperators.jl b/src/NeuralOperators.jl
similarity index 96%
rename from src/LuxNeuralOperators.jl
rename to src/NeuralOperators.jl
index 54a0022..dd19da4 100644
--- a/src/LuxNeuralOperators.jl
+++ b/src/NeuralOperators.jl
@@ -1,4 +1,4 @@
-module LuxNeuralOperators
+module NeuralOperators
 
 using PrecompileTools: @recompile_invalidations
 
diff --git a/test/qa_tests.jl b/test/qa_tests.jl
index 210982a..e5587cf 100644
--- a/test/qa_tests.jl
+++ b/test/qa_tests.jl
@@ -1,22 +1,22 @@
 @testitem "doctests: Quality Assurance" tags=[:qa] begin
-    using Documenter, LuxNeuralOperators
+    using Documenter, NeuralOperators
 
     DocMeta.setdocmeta!(
-        LuxNeuralOperators, :DocTestSetup, :(using LuxNeuralOperators); recursive=true)
-    doctest(LuxNeuralOperators; manual=false)
+        NeuralOperators, :DocTestSetup, :(using NeuralOperators); recursive=true)
+    doctest(NeuralOperators; manual=false)
 end
 
 @testitem "Aqua: Quality Assurance" tags=[:qa] begin
     using Aqua
 
-    Aqua.test_all(LuxNeuralOperators)
+    Aqua.test_all(NeuralOperators)
 end
 
 @testitem "Explicit Imports: Quality Assurance" tags=[:qa] begin
     using ExplicitImports
 
     # Skip our own packages
-    @test check_no_implicit_imports(LuxNeuralOperators; skip=(Base, Core, Lux)) === nothing
-    @test check_no_stale_explicit_imports(LuxNeuralOperators) === nothing
-    @test check_all_qualified_accesses_via_owners(LuxNeuralOperators) === nothing
+    @test check_no_implicit_imports(NeuralOperators; skip=(Base, Core, Lux)) === nothing
+    @test check_no_stale_explicit_imports(NeuralOperators) === nothing
+    @test check_all_qualified_accesses_via_owners(NeuralOperators) === nothing
 end

From 37814053dbb5333b1151a385a0c903f55f07c033 Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Wed, 10 Jul 2024 23:08:11 -0700
Subject: [PATCH 09/16] refactor: remove the ext in favor of LuxDeviceUtils

---
 Project.toml                    |  9 +++------
 ext/NeuralOperatorsAMDGPUExt.jl | 14 --------------
 2 files changed, 3 insertions(+), 20 deletions(-)
 delete mode 100644 ext/NeuralOperatorsAMDGPUExt.jl

diff --git a/Project.toml b/Project.toml
index d56b698..a9ff616 100644
--- a/Project.toml
+++ b/Project.toml
@@ -10,18 +10,13 @@ ConcreteStructs = "2569d6c7-a4a2-43d3-a901-331e8e4be471"
 FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
 Lux = "b2108857-7c20-44ae-9111-449ecde12c47"
 LuxCore = "bb33d45b-7691-41d6-9220-0943567d0623"
+LuxDeviceUtils = "34f89e08-e1d5-43b4-8944-0b49ac560553"
 NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
 PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 WeightInitializers = "d49dbf32-c5c2-4618-8acc-27bb2598ef2d"
 
-[weakdeps]
-AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
-
-[extensions]
-NeuralOperatorsAMDGPUExt = "AMDGPU"
-
 [compat]
 AMDGPU = "0.9.6"
 Aqua = "0.8.7"
@@ -34,6 +29,7 @@ FFTW = "1.8.0"
 Lux = "0.5.56"
 LuxCUDA = "0.3.2"
 LuxCore = "0.1.15"
+LuxDeviceUtils = "0.1.24"
 LuxTestUtils = "0.1.15"
 NNlib = "0.9.17"
 Optimisers = "0.3.3"
@@ -48,6 +44,7 @@ Zygote = "0.6.70"
 julia = "1.10"
 
 [extras]
+AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 ExplicitImports = "7d51a73a-1435-4ff3-83d9-f097790105c7"
diff --git a/ext/NeuralOperatorsAMDGPUExt.jl b/ext/NeuralOperatorsAMDGPUExt.jl
deleted file mode 100644
index d9f00a3..0000000
--- a/ext/NeuralOperatorsAMDGPUExt.jl
+++ /dev/null
@@ -1,14 +0,0 @@
-module NeuralOperatorsAMDGPUExt
-
-using AMDGPU: AnyROCArray
-using NeuralOperators: NeuralOperators
-
-# This should be upstreamed to NNlib before we release this package
-@inline function NeuralOperators.__batched_mul(
-        x::AnyROCArray{<:Union{ComplexF16, ComplexF32, ComplexF64}, 3},
-        y::AnyROCArray{<:Union{ComplexF16, ComplexF32, ComplexF64}, 3})
-    # FIXME: This is not good for performance but that is okay for now
-    return stack(*, eachslice(x; dims=3), eachslice(y; dims=3))
-end
-
-end

From 1db83a640ff613eeea4dbc9c401dcab4aba28918 Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Wed, 10 Jul 2024 23:38:57 -0700
Subject: [PATCH 10/16] fix: update doctests to not check printing

---
 .buildkite/pipeline.yml  | 10 +----
 .github/workflows/CI.yml |  2 -
 codecov.yml              |  3 --
 src/deeponet.jl          | 82 ++++++++++------------------------------
 src/fno.jl               | 26 ++++---------
 src/layers.jl            | 64 +++++--------------------------
 test/qa_tests.jl         |  2 +-
 7 files changed, 40 insertions(+), 149 deletions(-)
 delete mode 100644 codecov.yml

diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
index 5503def..7979b22 100644
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@@ -7,16 +7,13 @@ steps:
           test_args: "--quickfail"
       - JuliaCI/julia-coverage#v1:
           codecov: true
-          dirs:
-            - src
-            - ext
     agents:
       queue: "juliagpu"
       cuda: "*"
     env:
       BACKEND_GROUP: "CUDA"
     if: build.message !~ /\[skip tests\]/
-    timeout_in_minutes: 240
+    timeout_in_minutes: 60
     matrix:
       setup:
         julia:
@@ -30,9 +27,6 @@ steps:
           test_args: "--quickfail"
       - JuliaCI/julia-coverage#v1:
           codecov: true
-          dirs:
-            - src
-            - ext
     env:
       JULIA_AMDGPU_CORE_MUST_LOAD: "1"
       JULIA_AMDGPU_HIP_MUST_LOAD: "1"
@@ -43,7 +37,7 @@ steps:
       rocm: "*"
       rocmgpu: "*"
     if: build.message !~ /\[skip tests\]/
-    timeout_in_minutes: 240
+    timeout_in_minutes: 60
     matrix:
       setup:
         julia:
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 6596d9d..076c39f 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -41,8 +41,6 @@ jobs:
           RETESTITEMS_NWORKERS: 4
           RETESTITEMS_NWORKER_THREADS: 2
       - uses: julia-actions/julia-processcoverage@v1
-        with:
-          directories: src,ext
       - uses: codecov/codecov-action@v4
         with:
           files: lcov.info
diff --git a/codecov.yml b/codecov.yml
deleted file mode 100644
index 0398f92..0000000
--- a/codecov.yml
+++ /dev/null
@@ -1,3 +0,0 @@
-codecov:
-  notify:
-    wait_for_ci: false
diff --git a/src/deeponet.jl b/src/deeponet.jl
index a90cc37..9072880 100644
--- a/src/deeponet.jl
+++ b/src/deeponet.jl
@@ -23,23 +23,16 @@ operators", doi: https://arxiv.org/abs/1910.03193
 ## Example
 
 ```jldoctest
-julia> deeponet = DeepONet(; branch=(64, 32, 32, 16), trunk=(1, 8, 8, 16))
-Branch net :
-(
-    Chain(
-        layer_1 = Dense(64 => 32),      # 2_080 parameters
-        layer_2 = Dense(32 => 32),      # 1_056 parameters
-        layer_3 = Dense(32 => 16),      # 528 parameters
-    ),
-)
-Trunk net :
-(
-    Chain(
-        layer_1 = Dense(1 => 8),        # 16 parameters
-        layer_2 = Dense(8 => 8),        # 72 parameters
-        layer_3 = Dense(8 => 16),       # 144 parameters
-    ),
-)
+julia> deeponet = DeepONet(; branch=(64, 32, 32, 16), trunk=(1, 8, 8, 16));
+
+julia> ps, st = Lux.setup(Xoshiro(), deeponet);
+
+julia> u = rand(Float32, 64, 5);
+
+julia> y = rand(Float32, 1, 10, 5);
+
+julia> size(first(deeponet((u, y), ps, st)))
+(10, 5)
 ```
 """
 function DeepONet(;
@@ -48,8 +41,8 @@ function DeepONet(;
 
     # checks for last dimension size
     @argcheck branch[end]==trunk[end] "Branch and Trunk net must share the same amount of \
-            nodes in the last layer. Otherwise Σᵢ bᵢⱼ tᵢₖ won't \
-            work."
+                                       nodes in the last layer. Otherwise Σᵢ bᵢⱼ tᵢₖ won't \
+                                       work."
 
     branch_net = Chain([Dense(branch[i] => branch[i + 1], branch_activation)
                         for i in 1:(length(branch) - 1)]...)
@@ -89,53 +82,16 @@ julia> branch_net = Chain(Dense(64 => 32), Dense(32 => 32), Dense(32 => 16));
 
 julia> trunk_net = Chain(Dense(1 => 8), Dense(8 => 8), Dense(8 => 16));
 
-julia> deeponet = DeepONet(branch_net, trunk_net)
-Branch net :
-(
-    Chain(
-        layer_1 = Dense(64 => 32),      # 2_080 parameters
-        layer_2 = Dense(32 => 32),      # 1_056 parameters
-        layer_3 = Dense(32 => 16),      # 528 parameters
-    ),
-)
-Trunk net :
-(
-    Chain(
-        layer_1 = Dense(1 => 8),        # 16 parameters
-        layer_2 = Dense(8 => 8),        # 72 parameters
-        layer_3 = Dense(8 => 16),       # 144 parameters
-    ),
-)
+julia> deeponet = DeepONet(branch_net, trunk_net);
 
-julia> branch_net = Chain(Dense(64 => 32), Dense(32 => 32), Dense(32 => 16));
+julia> ps, st = Lux.setup(Xoshiro(), deeponet);
 
-julia> trunk_net = Chain(Dense(1 => 8), Dense(8 => 8), Dense(8 => 16));
+julia> u = rand(Float32, 64, 5);
+
+julia> y = rand(Float32, 1, 10, 5);
 
-julia> additional = Chain(Dense(1 => 4));
-
-julia> deeponet = DeepONet(branch_net, trunk_net; additional=additional)
-Branch net :
-(
-    Chain(
-        layer_1 = Dense(64 => 32),      # 2_080 parameters
-        layer_2 = Dense(32 => 32),      # 1_056 parameters
-        layer_3 = Dense(32 => 16),      # 528 parameters
-    ),
-)
-
-Trunk net :
-(
-    Chain(
-        layer_1 = Dense(1 => 8),        # 16 parameters
-        layer_2 = Dense(8 => 8),        # 72 parameters
-        layer_3 = Dense(8 => 16),       # 144 parameters
-    ),
-)
-
-Additional net :
-(
-    Dense(1 => 4),                      # 8 parameters
-)
+julia> size(first(deeponet((u, y), ps, st)))
+(10, 5)
 ```
 """
 function DeepONet(branch::L1, trunk::L2; additional=nothing) where {L1, L2}
diff --git a/src/fno.jl b/src/fno.jl
index d6e8f31..26f67d0 100644
--- a/src/fno.jl
+++ b/src/fno.jl
@@ -27,24 +27,14 @@ kernels, and two `Dense` layers to project data back to the scalar field of inte
 ## Example
 
 ```jldoctest
-julia> FourierNeuralOperator(gelu; chs=(2, 64, 64, 128, 1), modes=(16,))
-FourierNeuralOperator(
-    lifting = Dense(2 => 64),           # 192 parameters
-    mapping = @compact(
-        l₁ = Dense(64 => 64),           # 4_160 parameters
-        l₂ = OperatorConv{FourierTransform}(64 => 64, (16,); permuted = false)(),  # 65_536 parameters
-        activation = gelu,
-    ) do x::AbstractArray
-        l₁x = l₁(x)
-        l₂x = l₂(x)
-        return @__dot__(activation(l₁x + l₂x))
-    end,
-    project = Chain(
-        layer_1 = Dense(64 => 128, gelu),  # 8_320 parameters
-        layer_2 = Dense(128 => 1),      # 129 parameters
-    ),
-)         # Total: 78_337 parameters,
-          #        plus 1 states.
+julia> fno = FourierNeuralOperator(gelu; chs=(2, 64, 64, 128, 1), modes=(16,));
+
+julia> ps, st = Lux.setup(Xoshiro(), fno);
+
+julia> u = rand(Float32, 2, 1024, 5);
+
+julia> size(first(fno(u, ps, st)))
+(1, 1024, 5)
 ```
 """
 function FourierNeuralOperator(
diff --git a/src/layers.jl b/src/layers.jl
index 1bac045..a970f14 100644
--- a/src/layers.jl
+++ b/src/layers.jl
@@ -20,11 +20,9 @@
 ## Example
 
 ```jldoctest
-julia> OperatorConv(2 => 5, (16,), FourierTransform{ComplexF32})
-OperatorConv{FourierTransform}(2 => 5, (16,); permuted = false)()  # 160 parameters
+julia> OperatorConv(2 => 5, (16,), FourierTransform{ComplexF32});
 
-julia> OperatorConv(2 => 5, (16,), FourierTransform{ComplexF32}; permuted=Val(true))
-OperatorConv{FourierTransform}(2 => 5, (16,); permuted = true)()  # 160 parameters
+julia> OperatorConv(2 => 5, (16,), FourierTransform{ComplexF32}; permuted=Val(true));
 ```
 """
 @concrete struct OperatorConv{perm, T <: AbstractTransform} <: AbstractExplicitLayer
@@ -78,11 +76,9 @@ Construct a `OperatorConv` with `FourierTransform{ComplexF32}` as the transform.
 ## Example
 
 ```jldoctest
-julia> SpectralConv(2 => 5, (16,))
-OperatorConv{FourierTransform}(2 => 5, (16,); permuted = false)()  # 160 parameters
+julia> SpectralConv(2 => 5, (16,));
 
-julia> SpectralConv(2 => 5, (16,); permuted=Val(true))
-OperatorConv{FourierTransform}(2 => 5, (16,); permuted = true)()  # 160 parameters
+julia> SpectralConv(2 => 5, (16,); permuted=Val(true));
 ```
 """
 SpectralConv(args...; kwargs...) = OperatorConv(
@@ -112,29 +108,9 @@ All the keyword arguments are passed to the [`OperatorConv`](@ref) constructor.
 ## Example
 
 ```jldoctest
-julia> OperatorKernel(2 => 5, (16,), FourierTransform{ComplexF64})
-@compact(
-    l₁ = Dense(2 => 5),                 # 15 parameters
-    l₂ = OperatorConv{FourierTransform}(2 => 5, (16,); permuted = false)(),  # 160 parameters
-    activation = identity,
-) do x::AbstractArray
-    l₁x = l₁(x)
-    l₂x = l₂(x)
-    return @__dot__(activation(l₁x + l₂x))
-end       # Total: 175 parameters,
-          #        plus 1 states.
-
-julia> OperatorKernel(2 => 5, (16,), FourierTransform{ComplexF64}; permuted=Val(true))
-@compact(
-    l₁ = Conv((1,), 2 => 5),            # 15 parameters
-    l₂ = OperatorConv{FourierTransform}(2 => 5, (16,); permuted = true)(),  # 160 parameters
-    activation = identity,
-) do x::AbstractArray
-    l₁x = l₁(x)
-    l₂x = l₂(x)
-    return @__dot__(activation(l₁x + l₂x))
-end       # Total: 175 parameters,
-          #        plus 1 states.
+julia> OperatorKernel(2 => 5, (16,), FourierTransform{ComplexF64});
+
+julia> OperatorKernel(2 => 5, (16,), FourierTransform{ComplexF64}; permuted=Val(true));
 ```
 """
 function OperatorKernel(ch::Pair{<:Integer, <:Integer}, modes::Dims{N}, transform::Type{TR},
@@ -160,29 +136,9 @@ Construct a `OperatorKernel` with `FourierTransform{ComplexF32}` as the transfor
 ## Example
 
 ```jldoctest
-julia> SpectralKernel(2 => 5, (16,))
-@compact(
-    l₁ = Dense(2 => 5),                 # 15 parameters
-    l₂ = OperatorConv{FourierTransform}(2 => 5, (16,); permuted = false)(),  # 160 parameters
-    activation = identity,
-) do x::AbstractArray
-    l₁x = l₁(x)
-    l₂x = l₂(x)
-    return @__dot__(activation(l₁x + l₂x))
-end       # Total: 175 parameters,
-          #        plus 1 states.
-
-julia> SpectralKernel(2 => 5, (16,); permuted=Val(true))
-@compact(
-    l₁ = Conv((1,), 2 => 5),            # 15 parameters
-    l₂ = OperatorConv{FourierTransform}(2 => 5, (16,); permuted = true)(),  # 160 parameters
-    activation = identity,
-) do x::AbstractArray
-    l₁x = l₁(x)
-    l₂x = l₂(x)
-    return @__dot__(activation(l₁x + l₂x))
-end       # Total: 175 parameters,
-          #        plus 1 states.
+julia> SpectralKernel(2 => 5, (16,));
+
+julia> SpectralKernel(2 => 5, (16,); permuted=Val(true));
 ```
 """
 function SpectralKernel(ch::Pair{<:Integer, <:Integer}, modes::Dims{N},
diff --git a/test/qa_tests.jl b/test/qa_tests.jl
index e5587cf..fd555fa 100644
--- a/test/qa_tests.jl
+++ b/test/qa_tests.jl
@@ -2,7 +2,7 @@
     using Documenter, NeuralOperators
 
     DocMeta.setdocmeta!(
-        NeuralOperators, :DocTestSetup, :(using NeuralOperators); recursive=true)
+        NeuralOperators, :DocTestSetup, :(using NeuralOperators, Random); recursive=true)
     doctest(NeuralOperators; manual=false)
 end
 

From 1926a264f277e02416d8dcc9a185873cfb8452db Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Wed, 10 Jul 2024 23:44:07 -0700
Subject: [PATCH 11/16] ci: use updated CI scripts

---
 .buildkite/pipeline.yml                 | 63 +++++++------------
 .buildkite/scripts/diff.sh              | 13 ++++
 .buildkite/scripts/find_branch_point.sh |  6 ++
 .buildkite/testing.yml                  | 49 +++++++++++++++
 .github/workflows/CI.yml                | 80 +++++++++++++++++++++++--
 .github/workflows/Downgrade.yml         | 41 -------------
 .github/workflows/FormatCheck.yml       |  9 ---
 .github/workflows/Invalidations.yml     | 40 -------------
 .github/workflows/QualityCheck.yml      | 19 ++++++
 README.md                               |  5 +-
 src/layers.jl                           |  4 ++
 11 files changed, 188 insertions(+), 141 deletions(-)
 create mode 100755 .buildkite/scripts/diff.sh
 create mode 100755 .buildkite/scripts/find_branch_point.sh
 create mode 100644 .buildkite/testing.yml
 delete mode 100644 .github/workflows/Downgrade.yml
 delete mode 100644 .github/workflows/FormatCheck.yml
 delete mode 100644 .github/workflows/Invalidations.yml
 create mode 100644 .github/workflows/QualityCheck.yml

diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
index 7979b22..2c00e63 100644
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@@ -1,49 +1,26 @@
 steps:
-  - label: ":julia: Julia {{matrix.julia}} + CUDA GPU"
-    plugins:
-      - JuliaCI/julia#v1:
-          version: "{{matrix.julia}}"
-      - JuliaCI/julia-test#v1:
-          test_args: "--quickfail"
-      - JuliaCI/julia-coverage#v1:
-          codecov: true
+  - label: "Triggering Pipelines (Pull Request)"
+    if: "build.pull_request.base_branch == 'main'"
     agents:
       queue: "juliagpu"
-      cuda: "*"
-    env:
-      BACKEND_GROUP: "CUDA"
-    if: build.message !~ /\[skip tests\]/
-    timeout_in_minutes: 60
-    matrix:
-      setup:
-        julia:
-          - "1"
-
-  - label: ":julia: Julia: {{matrix.julia}} + AMD GPU"
     plugins:
-      - JuliaCI/julia#v1:
-          version: "{{matrix.julia}}"
-      - JuliaCI/julia-test#v1:
-          test_args: "--quickfail"
-      - JuliaCI/julia-coverage#v1:
-          codecov: true
-    env:
-      JULIA_AMDGPU_CORE_MUST_LOAD: "1"
-      JULIA_AMDGPU_HIP_MUST_LOAD: "1"
-      JULIA_AMDGPU_DISABLE_ARTIFACTS: "1"
-      BACKEND_GROUP: "AMDGPU"
+      - monebag/monorepo-diff#v2.5.9:
+          diff: ".buildkite/scripts/diff.sh $BUILDKITE_COMMIT"
+          interpolation: false
+          watch:
+            - path:
+                - "src/"
+                - "ext/"
+                - "test/"
+                - "Project.toml"
+                - ".buildkite/"
+              config:
+                command: "buildkite-agent pipeline upload .buildkite/testing.yml"
+                agents:
+                  queue: "juliagpu"
+
+  - label: "Triggering Pipelines (Main Branch / Tag)"
+    if: build.branch == "main" || build.tag != null
     agents:
       queue: "juliagpu"
-      rocm: "*"
-      rocmgpu: "*"
-    if: build.message !~ /\[skip tests\]/
-    timeout_in_minutes: 60
-    matrix:
-      setup:
-        julia:
-          - "1"
-
-env:
-  RETESTITEMS_NWORKERS: 4
-  RETESTITEMS_NWORKER_THREADS: 2
-  SECRET_CODECOV_TOKEN: "vn5M+4wSwUFje6fl6UB/Q/rTmLHu3OlCCMgoPOXPQHYpLZTLz2hOHsV44MadAnxw8MsNVxLKZlXBKqP3IydU9gUfV7QUBtnvbUmIvgUHbr+r0bVaIVVhw6cnd0s8/b+561nU483eRJd35bjYDOlO+V5eDxkbdh/0bzLefXNXy5+ALxsBYzsp75Sx/9nuREfRqWwU6S45mne2ZlwCDpZlFvBDXQ2ICKYXpA45MpxhW9RuqfpQdi6sSR6I/HdHkV2cuJO99dqqh8xfUy6vWPC/+HUVrn9ETsrXtayX1MX3McKj869htGICpR8vqd311HTONYVprH2AN1bJqr5MOIZ8Xg==;U2FsdGVkX1+W55pTI7zq+NwYrbK6Cgqe+Gp8wMCmXY+W10aXTB0bS6zshiDYSQ1Y3piT91xFyNhS+9AsajY0yQ=="
+    command: "buildkite-agent pipeline upload .buildkite/testing.yml"
diff --git a/.buildkite/scripts/diff.sh b/.buildkite/scripts/diff.sh
new file mode 100755
index 0000000..b73437f
--- /dev/null
+++ b/.buildkite/scripts/diff.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+set -ueo pipefail
+
+# Script to output the diff where the branch was created
+# Usage: ./diff.sh $BUILDKITE_COMMIT
+
+COMMIT_HASH=$1
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+
+BRANCH_POINT_COMMIT=$($SCRIPT_DIR/find_branch_point.sh "$COMMIT_HASH")
+echo >&2 "Cannot find latest build. Running diff against: $BRANCH_POINT_COMMIT"
+diff=$(git diff --name-only "$BRANCH_POINT_COMMIT")
+echo "$diff"
diff --git a/.buildkite/scripts/find_branch_point.sh b/.buildkite/scripts/find_branch_point.sh
new file mode 100755
index 0000000..f829535
--- /dev/null
+++ b/.buildkite/scripts/find_branch_point.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+set -ue
+
+diff -u <(git rev-list --first-parent "$1") \
+        <(git rev-list --first-parent main) | \
+        sed -ne 's/^ //p' | head -1
diff --git a/.buildkite/testing.yml b/.buildkite/testing.yml
new file mode 100644
index 0000000..7979b22
--- /dev/null
+++ b/.buildkite/testing.yml
@@ -0,0 +1,49 @@
+steps:
+  - label: ":julia: Julia {{matrix.julia}} + CUDA GPU"
+    plugins:
+      - JuliaCI/julia#v1:
+          version: "{{matrix.julia}}"
+      - JuliaCI/julia-test#v1:
+          test_args: "--quickfail"
+      - JuliaCI/julia-coverage#v1:
+          codecov: true
+    agents:
+      queue: "juliagpu"
+      cuda: "*"
+    env:
+      BACKEND_GROUP: "CUDA"
+    if: build.message !~ /\[skip tests\]/
+    timeout_in_minutes: 60
+    matrix:
+      setup:
+        julia:
+          - "1"
+
+  - label: ":julia: Julia: {{matrix.julia}} + AMD GPU"
+    plugins:
+      - JuliaCI/julia#v1:
+          version: "{{matrix.julia}}"
+      - JuliaCI/julia-test#v1:
+          test_args: "--quickfail"
+      - JuliaCI/julia-coverage#v1:
+          codecov: true
+    env:
+      JULIA_AMDGPU_CORE_MUST_LOAD: "1"
+      JULIA_AMDGPU_HIP_MUST_LOAD: "1"
+      JULIA_AMDGPU_DISABLE_ARTIFACTS: "1"
+      BACKEND_GROUP: "AMDGPU"
+    agents:
+      queue: "juliagpu"
+      rocm: "*"
+      rocmgpu: "*"
+    if: build.message !~ /\[skip tests\]/
+    timeout_in_minutes: 60
+    matrix:
+      setup:
+        julia:
+          - "1"
+
+env:
+  RETESTITEMS_NWORKERS: 4
+  RETESTITEMS_NWORKER_THREADS: 2
+  SECRET_CODECOV_TOKEN: "vn5M+4wSwUFje6fl6UB/Q/rTmLHu3OlCCMgoPOXPQHYpLZTLz2hOHsV44MadAnxw8MsNVxLKZlXBKqP3IydU9gUfV7QUBtnvbUmIvgUHbr+r0bVaIVVhw6cnd0s8/b+561nU483eRJd35bjYDOlO+V5eDxkbdh/0bzLefXNXy5+ALxsBYzsp75Sx/9nuREfRqWwU6S45mne2ZlwCDpZlFvBDXQ2ICKYXpA45MpxhW9RuqfpQdi6sSR6I/HdHkV2cuJO99dqqh8xfUy6vWPC/+HUVrn9ETsrXtayX1MX3McKj869htGICpR8vqd311HTONYVprH2AN1bJqr5MOIZ8Xg==;U2FsdGVkX1+W55pTI7zq+NwYrbK6Cgqe+Gp8wMCmXY+W10aXTB0bS6zshiDYSQ1Y3piT91xFyNhS+9AsajY0yQ=="
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 076c39f..037da9c 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -3,22 +3,36 @@ on:
   pull_request:
     branches:
       - main
+    paths:
+      - "src/**"
+      - "ext/**"
+      - "test/**"
+      - "Project.toml"
+      - ".github/workflows/CI.yml"
   push:
     branches:
       - main
+
 concurrency:
   # Skip intermediate builds: always.
   # Cancel intermediate builds: only if it is a pull request build.
   group: ${{ github.workflow }}-${{ github.ref }}
   cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
+
 jobs:
-  test:
-    runs-on: ubuntu-latest
+  ci:
+    name: Julia ${{ matrix.version }} - ${{ matrix.os }}
+    if: ${{ !contains(github.event.head_commit.message, '[skip tests]') }}
+    runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
       matrix:
         version:
           - "1"
+        os:
+          - ubuntu-latest
+          - macos-latest
+          - windows-latest
     steps:
       - uses: actions/checkout@v4
       - uses: julia-actions/setup-julia@v2
@@ -36,10 +50,6 @@ jobs:
             ${{ runner.os }}-
       - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-runtest@v1
-        env:
-          BACKEND_GROUP: "CPU"
-          RETESTITEMS_NWORKERS: 4
-          RETESTITEMS_NWORKER_THREADS: 2
       - uses: julia-actions/julia-processcoverage@v1
       - uses: codecov/codecov-action@v4
         with:
@@ -47,3 +57,61 @@ jobs:
           token: ${{ secrets.CODECOV_TOKEN }}
           verbose: true
           fail_ci_if_error: true
+
+  downgrade:
+    if: ${{ !contains(github.event.head_commit.message, '[skip tests]') && github.base_ref == github.event.repository.default_branch }}
+    name: Downgrade Julia ${{ matrix.version }}
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        version: ["1"]
+    steps:
+      - uses: actions/checkout@v4
+      - uses: julia-actions/setup-julia@v2
+        with:
+          version: ${{ matrix.version }}
+      - uses: julia-actions/julia-downgrade-compat@v1
+      - uses: julia-actions/julia-buildpkg@v1
+      - uses: julia-actions/julia-runtest@v1
+      - uses: julia-actions/julia-processcoverage@v1
+      - uses: codecov/codecov-action@v4
+        with:
+          files: lcov.info
+          token: ${{ secrets.CODECOV_TOKEN }}
+          verbose: true
+          fail_ci_if_error: true
+
+  invalidations:
+    # Only run on PRs to the default branch.
+    # In the PR trigger above branches can be specified only explicitly whereas this check should work for master, main, or any other default branch
+    if: github.base_ref == github.event.repository.default_branch
+    runs-on: ubuntu-latest
+    steps:
+      - uses: julia-actions/setup-julia@v2
+        with:
+          version: "1"
+      - uses: actions/checkout@v4
+      - uses: julia-actions/julia-buildpkg@v1
+      - uses: julia-actions/julia-invalidations@v1
+        id: invs_pr
+
+      - uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.repository.default_branch }}
+      - uses: julia-actions/julia-buildpkg@v1
+      - uses: julia-actions/julia-invalidations@v1
+        id: invs_default
+
+      - name: Report invalidation counts
+        run: |
+          echo "Invalidations on default branch: ${{ steps.invs_default.outputs.total }} (${{ steps.invs_default.outputs.deps }} via deps)" >> $GITHUB_STEP_SUMMARY
+          echo "This branch: ${{ steps.invs_pr.outputs.total }} (${{ steps.invs_pr.outputs.deps }} via deps)" >> $GITHUB_STEP_SUMMARY
+      - name: Check if the PR does increase number of invalidations
+        if: steps.invs_pr.outputs.total > steps.invs_default.outputs.total
+        run: exit 1
+
+env:
+  BACKEND_GROUP: "CPU"
+  RETESTITEMS_NWORKERS: 4
+  RETESTITEMS_NWORKER_THREADS: 2
diff --git a/.github/workflows/Downgrade.yml b/.github/workflows/Downgrade.yml
deleted file mode 100644
index 695c8ef..0000000
--- a/.github/workflows/Downgrade.yml
+++ /dev/null
@@ -1,41 +0,0 @@
-name: Downgrade
-on:
-  pull_request:
-    branches:
-      - main
-    paths-ignore:
-      - 'docs/**'
-  push:
-    branches:
-      - master
-    paths-ignore:
-      - 'docs/**'
-jobs:
-  test:
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        version: ['1']
-    steps:
-      - uses: actions/checkout@v4
-      - uses: julia-actions/setup-julia@v2
-        with:
-          version: ${{ matrix.version }}
-      - uses: cjdoris/julia-downgrade-compat-action@v1
-        with:
-          skip: Pkg,TOML,Statistics,Random,Artifacts,LazyArtifacts
-      - uses: julia-actions/julia-buildpkg@v1
-      - uses: julia-actions/julia-runtest@v1
-        env:
-          BACKEND_GROUP: "CPU"
-          RETESTITEMS_NWORKERS: 4
-          RETESTITEMS_NWORKER_THREADS: 2
-      - uses: julia-actions/julia-processcoverage@v1
-        with:
-          directories: src,ext
-      - uses: codecov/codecov-action@v4
-        with:
-          files: lcov.info
-          token: ${{ secrets.CODECOV_TOKEN }}
-          verbose: true
-          fail_ci_if_error: true
\ No newline at end of file
diff --git a/.github/workflows/FormatCheck.yml b/.github/workflows/FormatCheck.yml
deleted file mode 100644
index 76aebbb..0000000
--- a/.github/workflows/FormatCheck.yml
+++ /dev/null
@@ -1,9 +0,0 @@
-name: Format suggestions
-
-on: [pull_request]
-
-jobs:
-  code-style:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: julia-actions/julia-format@v3
\ No newline at end of file
diff --git a/.github/workflows/Invalidations.yml b/.github/workflows/Invalidations.yml
deleted file mode 100644
index 7ed9990..0000000
--- a/.github/workflows/Invalidations.yml
+++ /dev/null
@@ -1,40 +0,0 @@
-name: Invalidations
-
-on:
-  pull_request:
-
-concurrency:
-  # Skip intermediate builds: always.
-  # Cancel intermediate builds: always.
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  evaluate:
-    # Only run on PRs to the default branch.
-    # In the PR trigger above branches can be specified only explicitly whereas this check should work for master, main, or any other default branch
-    if: github.base_ref == github.event.repository.default_branch
-    runs-on: ubuntu-latest
-    steps:
-      - uses: julia-actions/setup-julia@v2
-        with:
-          version: "1"
-      - uses: actions/checkout@v4
-      - uses: julia-actions/julia-buildpkg@v1
-      - uses: julia-actions/julia-invalidations@v1
-        id: invs_pr
-
-      - uses: actions/checkout@v4
-        with:
-          ref: ${{ github.event.repository.default_branch }}
-      - uses: julia-actions/julia-buildpkg@v1
-      - uses: julia-actions/julia-invalidations@v1
-        id: invs_default
-
-      - name: Report invalidation counts
-        run: |
-          echo "Invalidations on default branch: ${{ steps.invs_default.outputs.total }} (${{ steps.invs_default.outputs.deps }} via deps)" >> $GITHUB_STEP_SUMMARY
-          echo "This branch: ${{ steps.invs_pr.outputs.total }} (${{ steps.invs_pr.outputs.deps }} via deps)" >> $GITHUB_STEP_SUMMARY
-      - name: Check if the PR does increase number of invalidations
-        if: steps.invs_pr.outputs.total > steps.invs_default.outputs.total
-        run: exit 1
diff --git a/.github/workflows/QualityCheck.yml b/.github/workflows/QualityCheck.yml
new file mode 100644
index 0000000..72323bd
--- /dev/null
+++ b/.github/workflows/QualityCheck.yml
@@ -0,0 +1,19 @@
+name: Code Quality Check
+
+on: [pull_request]
+
+jobs:
+  code-style:
+    name: Format Suggestions
+    runs-on: ubuntu-latest
+    steps:
+      - uses: julia-actions/julia-format@v3
+
+  typos-check:
+    name: Spell Check with Typos
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Actions Repository
+        uses: actions/checkout@v4
+      - name: Check spelling
+        uses: crate-ci/typos@v1.23.1
diff --git a/README.md b/README.md
index 275bd73..514dbad 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,6 @@
 # NeuralOperators
 
-NeuralOperators are NeuralOperators built using [Lux.jl](https://lux.csail.mit.edu/).
+NeuralOperators built using [Lux.jl](https://lux.csail.mit.edu/).
 For a version built using an old-generation framework [Flux.jl](https://fluxml.ai/), see
-[NeuralOperators.jl](https://docs.sciml.ai/NeuralOperators/stable/)
+[NeuralOperators.jl](https://docs.sciml.ai/NeuralOperators/stable/), though that version
+will be soon replaced by this one.
diff --git a/src/layers.jl b/src/layers.jl
index a970f14..0f23635 100644
--- a/src/layers.jl
+++ b/src/layers.jl
@@ -23,6 +23,7 @@
 julia> OperatorConv(2 => 5, (16,), FourierTransform{ComplexF32});
 
 julia> OperatorConv(2 => 5, (16,), FourierTransform{ComplexF32}; permuted=Val(true));
+
 ```
 """
 @concrete struct OperatorConv{perm, T <: AbstractTransform} <: AbstractExplicitLayer
@@ -79,6 +80,7 @@ Construct a `OperatorConv` with `FourierTransform{ComplexF32}` as the transform.
 julia> SpectralConv(2 => 5, (16,));
 
 julia> SpectralConv(2 => 5, (16,); permuted=Val(true));
+
 ```
 """
 SpectralConv(args...; kwargs...) = OperatorConv(
@@ -111,6 +113,7 @@ All the keyword arguments are passed to the [`OperatorConv`](@ref) constructor.
 julia> OperatorKernel(2 => 5, (16,), FourierTransform{ComplexF64});
 
 julia> OperatorKernel(2 => 5, (16,), FourierTransform{ComplexF64}; permuted=Val(true));
+
 ```
 """
 function OperatorKernel(ch::Pair{<:Integer, <:Integer}, modes::Dims{N}, transform::Type{TR},
@@ -139,6 +142,7 @@ Construct a `OperatorKernel` with `FourierTransform{ComplexF32}` as the transfor
 julia> SpectralKernel(2 => 5, (16,));
 
 julia> SpectralKernel(2 => 5, (16,); permuted=Val(true));
+
 ```
 """
 function SpectralKernel(ch::Pair{<:Integer, <:Integer}, modes::Dims{N},

From ddb1f75f8acd9377a7c90dab2c073fbcb67c2a7e Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Wed, 10 Jul 2024 23:53:14 -0700
Subject: [PATCH 12/16] test: lazy install cuda and amdgpu

---
 Project.toml             | 12 ++++--------
 src/NeuralOperators.jl   | 23 ++++++++++-------------
 test/runtests.jl         | 20 ++++++++++++++++++--
 test/shared_testsetup.jl | 29 +++++++++++++----------------
 4 files changed, 45 insertions(+), 39 deletions(-)

diff --git a/Project.toml b/Project.toml
index a9ff616..816f6d0 100644
--- a/Project.toml
+++ b/Project.toml
@@ -12,28 +12,25 @@ Lux = "b2108857-7c20-44ae-9111-449ecde12c47"
 LuxCore = "bb33d45b-7691-41d6-9220-0943567d0623"
 LuxDeviceUtils = "34f89e08-e1d5-43b4-8944-0b49ac560553"
 NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
-PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 WeightInitializers = "d49dbf32-c5c2-4618-8acc-27bb2598ef2d"
 
 [compat]
-AMDGPU = "0.9.6"
 Aqua = "0.8.7"
 ArgCheck = "2.3.0"
 ChainRulesCore = "1.24.0"
 ConcreteStructs = "0.2.3"
 Documenter = "1.4.1"
-ExplicitImports = "1.6.0"
+ExplicitImports = "1.9.0"
 FFTW = "1.8.0"
 Lux = "0.5.56"
-LuxCUDA = "0.3.2"
 LuxCore = "0.1.15"
 LuxDeviceUtils = "0.1.24"
 LuxTestUtils = "0.1.15"
 NNlib = "0.9.17"
 Optimisers = "0.3.3"
-PrecompileTools = "1.2.1"
+Pkg = "1.10"
 Random = "1.10"
 ReTestItems = "1.24.0"
 Reexport = "1.2.2"
@@ -44,17 +41,16 @@ Zygote = "0.6.70"
 julia = "1.10"
 
 [extras]
-AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 ExplicitImports = "7d51a73a-1435-4ff3-83d9-f097790105c7"
-LuxCUDA = "d0bbae9a-e099-4d5b-a835-1c6931763bda"
 LuxTestUtils = "ac9de150-d08f-4546-94fb-7472b5760531"
 Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2"
+Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 ReTestItems = "817f1d60-ba6b-4fd5-9520-3cf149f6a823"
 StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [targets]
-test = ["Aqua", "Documenter", "ExplicitImports", "AMDGPU", "LuxCUDA", "LuxTestUtils", "Optimisers", "ReTestItems", "StableRNGs", "Test", "Zygote"]
+test = ["Aqua", "Documenter", "ExplicitImports",  "LuxTestUtils", "Optimisers", "Pkg", "ReTestItems", "StableRNGs", "Test", "Zygote"]
diff --git a/src/NeuralOperators.jl b/src/NeuralOperators.jl
index dd19da4..0bf3abb 100644
--- a/src/NeuralOperators.jl
+++ b/src/NeuralOperators.jl
@@ -1,18 +1,15 @@
 module NeuralOperators
 
-using PrecompileTools: @recompile_invalidations
-
-@recompile_invalidations begin
-    using ArgCheck: @argcheck
-    using ChainRulesCore: ChainRulesCore, NoTangent
-    using ConcreteStructs: @concrete
-    using FFTW: FFTW, irfft, rfft
-    using Lux: _print_wrapper_model
-    using LuxCore: LuxCore, AbstractExplicitLayer
-    using NNlib: NNlib, ⊠, batched_adjoint
-    using Random: Random, AbstractRNG
-    using Reexport: @reexport
-end
+using ArgCheck: @argcheck
+using ChainRulesCore: ChainRulesCore, NoTangent
+using ConcreteStructs: @concrete
+using FFTW: FFTW, irfft, rfft
+using Lux
+using LuxCore: LuxCore, AbstractExplicitLayer
+using LuxDeviceUtils: get_device, LuxAMDGPUDevice
+using NNlib: NNlib, ⊠
+using Random: Random, AbstractRNG
+using Reexport: @reexport
 
 const CRC = ChainRulesCore
 
diff --git a/test/runtests.jl b/test/runtests.jl
index 8ba7978..a72e768 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,3 +1,19 @@
-using ReTestItems
+using ReTestItems, Pkg, ReTestItems, Test
 
-ReTestItems.runtests(@__DIR__)
+const BACKEND_GROUP = lowercase(get(ENV, "BACKEND_GROUP", "all"))
+
+const EXTRA_PKGS = String[]
+(BACKEND_GROUP == "all" || BACKEND_GROUP == "cuda") && push!(EXTRA_PKGS, "LuxCUDA")
+(BACKEND_GROUP == "all" || BACKEND_GROUP == "amdgpu") && push!(EXTRA_PKGS, "AMDGPU")
+
+if !isempty(EXTRA_PKGS)
+    @info "Installing Extra Packages for testing" EXTRA_PKGS=EXTRA_PKGS
+    Pkg.add(EXTRA_PKGS)
+    Pkg.update()
+    Base.retry_load_extensions()
+    Pkg.instantiate()
+end
+
+@testset "ReTestItems Tests" begin
+    ReTestItems.runtests(@__DIR__)
+end
diff --git a/test/shared_testsetup.jl b/test/shared_testsetup.jl
index 9876c86..b5d00bc 100644
--- a/test/shared_testsetup.jl
+++ b/test/shared_testsetup.jl
@@ -1,20 +1,26 @@
 @testsetup module SharedTestSetup
 import Reexport: @reexport
 
-@reexport using Lux, LuxCUDA, AMDGPU, Zygote, Optimisers, Random, StableRNGs
+@reexport using Lux, Zygote, Optimisers, Random, StableRNGs
 using LuxTestUtils: @jet, @test_gradients
 
-CUDA.allowscalar(false)
+const BACKEND_GROUP = lowercase(get(ENV, "BACKEND_GROUP", "All"))
 
-const BACKEND_GROUP = get(ENV, "BACKEND_GROUP", "All")
+if BACKEND_GROUP == "all" || BACKEND_GROUP == "cuda"
+    using LuxCUDA
+end
+
+if BACKEND_GROUP == "all" || BACKEND_GROUP == "amdgpu"
+    using AMDGPU
+end
 
-cpu_testing() = BACKEND_GROUP == "All" || BACKEND_GROUP == "CPU"
+cpu_testing() = BACKEND_GROUP == "all" || BACKEND_GROUP == "cpu"
 function cuda_testing()
-    return (BACKEND_GROUP == "All" || BACKEND_GROUP == "CUDA") &&
+    return (BACKEND_GROUP == "all" || BACKEND_GROUP == "cuda") &&
            LuxDeviceUtils.functional(LuxCUDADevice)
 end
 function amdgpu_testing()
-    return (BACKEND_GROUP == "All" || BACKEND_GROUP == "AMDGPU") &&
+    return (BACKEND_GROUP == "all" || BACKEND_GROUP == "amdgpu") &&
            LuxDeviceUtils.functional(LuxAMDGPUDevice)
 end
 
@@ -26,14 +32,6 @@ const MODES = begin
     modes
 end
 
-# Some Helper Functions
-function get_default_rng(mode::String)
-    dev = mode == "CPU" ? LuxCPUDevice() :
-          mode == "CUDA" ? LuxCUDADevice() : mode == "AMDGPU" ? LuxAMDGPUDevice() : nothing
-    rng = default_device_rng(dev)
-    return rng isa TaskLocalRNG ? copy(rng) : deepcopy(rng)
-end
-
 train!(args...; kwargs...) = train!(MSELoss(), AutoZygote(), args...; kwargs...)
 
 function train!(loss, backend, model, ps, st, data; epochs=10)
@@ -50,7 +48,6 @@ function train!(loss, backend, model, ps, st, data; epochs=10)
 end
 
 export @jet, @test_gradients, check_approx
-export BACKEND_GROUP, MODES, cpu_testing, cuda_testing, amdgpu_testing, get_default_rng,
-       train!
+export BACKEND_GROUP, MODES, cpu_testing, cuda_testing, amdgpu_testing, train!
 
 end

From d2beb52eb21d2d18ab3ebe616c2da192837d3b15 Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Thu, 11 Jul 2024 00:06:59 -0700
Subject: [PATCH 13/16] ci: create local preferences in CI script

---
 .buildkite/testing.yml   |  4 ++++
 .github/workflows/CI.yml | 20 ++++++++++++++++++++
 LocalPreferences.toml    |  2 --
 test/runtests.jl         |  2 +-
 4 files changed, 25 insertions(+), 3 deletions(-)
 delete mode 100644 LocalPreferences.toml

diff --git a/.buildkite/testing.yml b/.buildkite/testing.yml
index 7979b22..605332b 100644
--- a/.buildkite/testing.yml
+++ b/.buildkite/testing.yml
@@ -7,6 +7,8 @@ steps:
           test_args: "--quickfail"
       - JuliaCI/julia-coverage#v1:
           codecov: true
+    commands: |
+      printf "[LuxTestUtils]\ntarget_modules = [\"NeuralOperators\", \"Lux\", \"LuxLib\"]\n[LuxLib]\ninstability_check = \"error\"\n[LuxCore]\ninstability_check = \"error\"\n" > LocalPreferences.toml
     agents:
       queue: "juliagpu"
       cuda: "*"
@@ -27,6 +29,8 @@ steps:
           test_args: "--quickfail"
       - JuliaCI/julia-coverage#v1:
           codecov: true
+    commands: |
+      printf "[LuxTestUtils]\ntarget_modules = [\"NeuralOperators\", \"Lux\", \"LuxLib\"]\n[LuxLib]\ninstability_check = \"error\"\n[LuxCore]\ninstability_check = \"error\"\n" > LocalPreferences.toml
     env:
       JULIA_AMDGPU_CORE_MUST_LOAD: "1"
       JULIA_AMDGPU_HIP_MUST_LOAD: "1"
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 037da9c..a68b3d7 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -35,6 +35,16 @@ jobs:
           - windows-latest
     steps:
       - uses: actions/checkout@v4
+      - uses: DamianReeves/write-file-action@master
+        with:
+          path: "LocalPreferences.toml"
+          contents: |
+            [LuxTestUtils]
+            target_modules = ["NeuralOperators", "Lux", "LuxLib"]
+            [LuxLib]
+            instability_check = "error"
+            [LuxCore]
+            instability_check = "error"
       - uses: julia-actions/setup-julia@v2
         with:
           version: ${{ matrix.version }}
@@ -68,6 +78,16 @@ jobs:
         version: ["1"]
     steps:
       - uses: actions/checkout@v4
+      - uses: DamianReeves/write-file-action@master
+        with:
+          path: "LocalPreferences.toml"
+          contents: |
+            [LuxTestUtils]
+            target_modules = ["NeuralOperators", "Lux", "LuxLib"]
+            [LuxLib]
+            instability_check = "error"
+            [LuxCore]
+            instability_check = "error"
       - uses: julia-actions/setup-julia@v2
         with:
           version: ${{ matrix.version }}
diff --git a/LocalPreferences.toml b/LocalPreferences.toml
deleted file mode 100644
index beeea5a..0000000
--- a/LocalPreferences.toml
+++ /dev/null
@@ -1,2 +0,0 @@
-[LuxTestUtils]
-target_modules = ["NeuralOperators", "Lux", "LuxLib"]
diff --git a/test/runtests.jl b/test/runtests.jl
index a72e768..765fe75 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -14,6 +14,6 @@ if !isempty(EXTRA_PKGS)
     Pkg.instantiate()
 end
 
-@testset "ReTestItems Tests" begin
+@testset "NeuralOperators.jl Tests" begin
     ReTestItems.runtests(@__DIR__)
 end

From de7085171d0bf7ce2f294508a28af781e3b6d32d Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Thu, 11 Jul 2024 00:08:56 -0700
Subject: [PATCH 14/16] test: more explicit imports testing

---
 test/qa_tests.jl | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/test/qa_tests.jl b/test/qa_tests.jl
index fd555fa..0e80dbd 100644
--- a/test/qa_tests.jl
+++ b/test/qa_tests.jl
@@ -18,5 +18,9 @@ end
     # Skip our own packages
     @test check_no_implicit_imports(NeuralOperators; skip=(Base, Core, Lux)) === nothing
     @test check_no_stale_explicit_imports(NeuralOperators) === nothing
+    @test check_no_self_qualified_accesses(NeuralOperators) === nothing
+    @test check_all_explicit_imports_via_owners(NeuralOperators) === nothing
     @test check_all_qualified_accesses_via_owners(NeuralOperators) === nothing
+    @test_broken check_all_explicit_imports_are_public(NeuralOperators) === nothing  # mostly upstream problems
+    @test_broken check_all_qualified_accesses_are_public(NeuralOperators) === nothing  # mostly upstream problems
 end

From f7f1ee25e7f53af92cb3b71f17e3779fd4b17330 Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Thu, 11 Jul 2024 00:17:02 -0700
Subject: [PATCH 15/16] test: display layers

---
 test/fno_tests.jl    | 4 ++--
 test/layers_tests.jl | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/test/fno_tests.jl b/test/fno_tests.jl
index af13909..b9b8dac 100644
--- a/test/fno_tests.jl
+++ b/test/fno_tests.jl
@@ -10,12 +10,12 @@
 
         @testset "$(length(setup.modes))D: permuted = $(setup.permuted)" for setup in setups
             fno = FourierNeuralOperator(; setup.chs, setup.modes, setup.permuted)
+            display(fno)
+            ps, st = Lux.setup(rng, fno) |> dev
 
             x = rand(rng, Float32, setup.x_size...) |> aType
             y = rand(rng, Float32, setup.y_size...) |> aType
 
-            ps, st = Lux.setup(rng, fno) |> dev
-
             @inferred fno(x, ps, st)
             @jet fno(x, ps, st)
 
diff --git a/test/layers_tests.jl b/test/layers_tests.jl
index 5724b3a..293182b 100644
--- a/test/layers_tests.jl
+++ b/test/layers_tests.jl
@@ -22,6 +22,7 @@
             l1 = p ? Conv(ntuple(_ -> 1, length(setup.m)), in_chs => first(ch)) :
                  Dense(in_chs => first(ch))
             m = Chain(l1, op(ch, setup.m; setup.permuted))
+            display(m)
             ps, st = Lux.setup(rng, m) |> dev
 
             x = rand(rng, Float32, setup.x_size...) |> aType

From a8149e2ef31708fc40a0509dd0c0e063b1c3c980 Mon Sep 17 00:00:00 2001
From: ayushinav <abhinav.ism.iit@gmail.com>
Date: Thu, 11 Jul 2024 23:15:53 -0400
Subject: [PATCH 16/16] fixes

---
 src/deeponet.jl        | 11 -----------
 src/utils.jl           |  4 ++--
 test/deeponet_tests.jl |  2 +-
 3 files changed, 3 insertions(+), 14 deletions(-)

diff --git a/src/deeponet.jl b/src/deeponet.jl
index 9072880..aadcefc 100644
--- a/src/deeponet.jl
+++ b/src/deeponet.jl
@@ -106,14 +106,3 @@ function DeepONet(branch::L1, trunk::L2; additional=nothing) where {L1, L2}
         @return __project(b, t, additional)
     end
 end
-
-function Base.show(io::IO, ::MIME"text/plain", x::CompactLuxLayer{:DeepONet})
-    # show(io, x)
-    _print_wrapper_model(io, "Branch net :\n", x.layers.branch)
-    print(io, "\n \n")
-    _print_wrapper_model(io, "Trunk net :\n", x.layers.trunk)
-    if :additional in keys(x.layers)
-        print(io, "\n \n")
-        _print_wrapper_model(io, "Additional net :\n", x.layers.additional)
-    end
-end
diff --git a/src/utils.jl b/src/utils.jl
index 81d942a..5f37e01 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -1,5 +1,4 @@
 # Temporarily capture certain calls like AMDGPU for ComplexFloats
-
 @inline __batched_mul(x, y) = __batched_mul(x, y, get_device((x, y)))
 @inline function __batched_mul(
         x::AbstractArray{<:Number, 3}, y::AbstractArray{<:Number, 3}, _)
@@ -9,6 +8,7 @@ end
         x::AbstractArray{<:Complex, 3}, y::AbstractArray{<:Complex, 3}, ::LuxAMDGPUDevice)
     # FIXME: This is not good for performance but that is okay for now
     return stack(*, eachslice(x; dims=3), eachslice(y; dims=3))
+end
 
 @inline function __project(b::AbstractArray{T1, 2}, t::AbstractArray{T2, 3},
         additional::Nothing) where {T1, T2}
@@ -80,4 +80,4 @@ end
     # p x (1,1,1...) x N x nb
 
     return additional(b_ .* t_) # p x u_size x N x nb => out_size x N x nb
-  
+end
diff --git a/test/deeponet_tests.jl b/test/deeponet_tests.jl
index 4cd98eb..80a3985 100644
--- a/test/deeponet_tests.jl
+++ b/test/deeponet_tests.jl
@@ -36,7 +36,7 @@
                 branch=(64, 32, 32, 16), trunk=(8, 8, 8, 16),
                 additional=Dense(16 => 4), name="Vector")]
 
-        @testset "Additonal layer: $(setup.name)" for setup in setups
+        @testset "Additional layer: $(setup.name)" for setup in setups
             u = rand(Float32, setup.u_size...) |> aType
             y = rand(Float32, setup.y_size...) |> aType
             deeponet = DeepONet(;