From 4883e789ca3f1440080033bd120e84c06f7c8d0d Mon Sep 17 00:00:00 2001 From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> Date: Sun, 6 Oct 2024 09:16:02 +0200 Subject: [PATCH 1/4] Update autodiff tutorial --- docs/Project.toml | 4 +- docs/src/tutorials/nonlinear/operator_ad.jl | 103 +++++++++++++++++++- 2 files changed, 101 insertions(+), 6 deletions(-) diff --git a/docs/Project.toml b/docs/Project.toml index abdd0302752..b7650c4841c 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -3,6 +3,7 @@ CDDLib = "3391f64e-dcde-5f30-b752-e11513730f60" CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" Clarabel = "61c947e1-3e6d-4ee4-985a-eec8c727bd6e" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63" DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" @@ -43,12 +44,13 @@ CDDLib = "=0.9.4" CSV = "0.10" Clarabel = "=0.9.0" DataFrames = "1" +DifferentiationInterface = "0.6.5" DimensionalData = "0.27.3" Distributions = "0.25" Documenter = "=1.6.0" DocumenterCitations = "1" Dualization = "0.5" -Enzyme = "0.12.14" +Enzyme = "0.13.7" ForwardDiff = "0.10" GLPK = "=1.2.1" HTTP = "1.5.4" diff --git a/docs/src/tutorials/nonlinear/operator_ad.jl b/docs/src/tutorials/nonlinear/operator_ad.jl index e1d08275497..dfa0e6c518d 100644 --- a/docs/src/tutorials/nonlinear/operator_ad.jl +++ b/docs/src/tutorials/nonlinear/operator_ad.jl @@ -35,6 +35,7 @@ using JuMP import Enzyme +import DifferentiationInterface as DI import ForwardDiff import Ipopt import Test @@ -248,18 +249,18 @@ Test.@test ≈(analytic_g, enzyme_g) # differentiation. # The code to implement the Hessian in Enzyme is complicated, so we will not -# explain it in detail; see the [Enzyme documentation](https://enzymead.github.io/Enzyme.jl/v0.11.20/generated/autodiff/#Vector-forward-over-reverse). +# explain it in detail; see the [Enzyme documentation](https://enzymead.github.io/Enzyme.jl/stable/generated/autodiff/#Vector-forward-over-reverse). function enzyme_∇²f(H::AbstractMatrix{T}, x::Vararg{T,N}) where {T,N} ## direction(i) returns a tuple with a `1` in the `i`'th entry and `0` ## otherwise direction(i) = ntuple(j -> Enzyme.Active(T(i == j)), N) ## As the inner function, compute the gradient using Reverse mode - ∇f_deferred(x...) = Enzyme.autodiff_deferred(Enzyme.Reverse, f, x...)[1] + ∇f(x...) = Enzyme.autodiff(Enzyme.Reverse, f, Enzyme.Active, x...)[1] ## For the outer autodiff, use Forward mode. hess = Enzyme.autodiff( Enzyme.Forward, - ∇f_deferred, + ∇f, ## Compute multiple evaluations of Forward mode, each time using `x` but ## initializing with a different direction. Enzyme.BatchDuplicated.(Enzyme.Active.(x), ntuple(direction, N))..., @@ -296,10 +297,10 @@ function enzyme_derivatives(f::Function) end function ∇²f(H::AbstractMatrix{T}, x::Vararg{T,N}) where {T,N} direction(i) = ntuple(j -> Enzyme.Active(T(i == j)), N) - ∇f_deferred(x...) = Enzyme.autodiff_deferred(Enzyme.Reverse, f, x...)[1] + ∇f(x...) = Enzyme.autodiff(Enzyme.Reverse, f, Enzyme.Active, x...)[1] hess = Enzyme.autodiff( Enzyme.Forward, - ∇f_deferred, + ∇f, Enzyme.BatchDuplicated.(Enzyme.Active.(x), ntuple(direction, N))..., )[1] for j in 1:N, i in 1:j @@ -324,3 +325,95 @@ function enzyme_rosenbrock() end enzyme_rosenbrock() + +# ## DifferentiationInterface + +#= +Beyond ForwardDiff and Enzyme, Julia offers [many different autodiff packages](https://juliadiff.org/). +Thanks to [DifferentiationInterface.jl](https://github.com/gdalle/DifferentiationInterface.jl) (or DI for short), you can write generic gradient and Hessian functions that work with most of them (although there are still some rough edges with Enzyme). +=# + +#= +All the necessary information about your choice of autodiff package is encoded in a "backend object" like this one. +It actually comes from another package called [ADTypes.jl](https://github.com/SciML/ADTypes.jl), but DI re-exports it: +=# + +DI.AutoForwardDiff() # could be AutoZygote(), AutoFiniteDiff(), etc + +# ### Gradient + +# Apart from providing the backend object, the syntax below remains very similar. + +function di_∇f(g::AbstractVector{T}, x::Vararg{T,N}) where {T,N} + DI.gradient!(Base.splat(f), g, DI.AutoForwardDiff(), collect(x)) + return +end + +# Let's check that we find the analytic solution: + +di_g = zeros(2) +di_∇f(di_g, x...) +Test.@test ≈(analytic_g, di_g) + +# ### Hessian + +# The Hessian follows exactly the same logic, except we only need the lower triangle. + +function di_∇²f(H::AbstractMatrix{T}, x::Vararg{T,N}) where {T,N} + H_bothtriangles = DI.hessian(Base.splat(f), DI.AutoForwardDiff(), collect(x)) + for i in 1:N, j in 1:i + H[i, j] = H_bothtriangles[i, j] + end + return +end + +# Let's check that we find the analytic solution: + +di_H = zeros(2, 2) +di_∇²f(di_H, x...) +Test.@test ≈(analytic_H, di_H) + +# ### JuMP example + +#= +The code for computing the gradient and Hessian using DI can be re-used for many operators. +Thus, it is helpful to encapsulate it into the function: +=# + +""" + di_derivatives(f::Function, backend) -> Tuple{Function,Function} + +Return a tuple of functions that evaluate the gradient and Hessian of `f` using +DifferentiationInterface.jl with any given backend. +""" +function di_derivatives(f::Function, backend) + function ∇f(g::AbstractVector{T}, x::Vararg{T,N}) where {T,N} + DI.gradient!(Base.splat(f), g, backend, collect(x)) + return + end + function ∇²f(H::AbstractMatrix{T}, x::Vararg{T,N}) where {T,N} + H_bothtriangles = DI.hessian(Base.splat(f), backend, collect(x)) + for i in 1:N, j in 1:i + H[i, j] = H_bothtriangles[i, j] + end + return + end + return ∇f, ∇²f +end + +# Here's an example using `di_derivatives`: + +function di_rosenbrock(backend) + model = Model(Ipopt.Optimizer) + set_silent(model) + @variable(model, x[1:2]) + @operator(model, op_rosenbrock, 2, f, di_derivatives(f, backend)...) + @objective(model, Min, op_rosenbrock(x[1], x[2])) + optimize!(model) + Test.@test is_solved_and_feasible(model) + return value.(x) +end + +#- + +di_rosenbrock(DI.AutoForwardDiff()) From 41fca677a35de094f33ae2c73a2c9297e71cb8ca Mon Sep 17 00:00:00 2001 From: Oscar Dowson Date: Mon, 7 Oct 2024 11:02:56 +1300 Subject: [PATCH 2/4] Update operator_ad.jl --- docs/src/tutorials/nonlinear/operator_ad.jl | 74 ++++++++++++--------- 1 file changed, 42 insertions(+), 32 deletions(-) diff --git a/docs/src/tutorials/nonlinear/operator_ad.jl b/docs/src/tutorials/nonlinear/operator_ad.jl index dfa0e6c518d..0031e293c43 100644 --- a/docs/src/tutorials/nonlinear/operator_ad.jl +++ b/docs/src/tutorials/nonlinear/operator_ad.jl @@ -35,7 +35,7 @@ using JuMP import Enzyme -import DifferentiationInterface as DI +import DifferentiationInterface import ForwardDiff import Ipopt import Test @@ -328,24 +328,31 @@ enzyme_rosenbrock() # ## DifferentiationInterface -#= -Beyond ForwardDiff and Enzyme, Julia offers [many different autodiff packages](https://juliadiff.org/). -Thanks to [DifferentiationInterface.jl](https://github.com/gdalle/DifferentiationInterface.jl) (or DI for short), you can write generic gradient and Hessian functions that work with most of them (although there are still some rough edges with Enzyme). -=# +# Julia offers [many different autodiff packages](https://juliadiff.org/). +# [DifferentiationInterface.jl](https://github.com/gdalle/DifferentiationInterface.jl) +# is a package that provides an abstraction layer across multiple underlying +# autodiff libaries. -#= -All the necessary information about your choice of autodiff package is encoded in a "backend object" like this one. -It actually comes from another package called [ADTypes.jl](https://github.com/SciML/ADTypes.jl), but DI re-exports it: -=# +# All the necessary information about your choice of underlyingg autodiff +# package is encoded in a "backend object" like this one: -DI.AutoForwardDiff() # could be AutoZygote(), AutoFiniteDiff(), etc +DifferentiationInterface.AutoForwardDiff() + +# This type comes from another package called [ADTypes.jl](https://github.com/SciML/ADTypes.jl), +# but DifferentiationInterface re-exports it. Other options include +# `AutoZygote()` and `AutoFiniteDiff()`. # ### Gradient -# Apart from providing the backend object, the syntax below remains very similar. +# Apart from providing the backend object, the syntax below remains very +# similar: -function di_∇f(g::AbstractVector{T}, x::Vararg{T,N}) where {T,N} - DI.gradient!(Base.splat(f), g, DI.AutoForwardDiff(), collect(x)) +function di_∇f( + g::AbstractVector{T}, + x::Vararg{T,N}; + backend = DifferentiationInterface.AutoForwardDiff(), +) where {T,N} + DifferentiationInterface.gradient!(splat(f), g, backend, collect(x)) return end @@ -357,12 +364,17 @@ Test.@test ≈(analytic_g, di_g) # ### Hessian -# The Hessian follows exactly the same logic, except we only need the lower triangle. +# The Hessian follows exactly the same logic, except we need only the lower +# triangle. -function di_∇²f(H::AbstractMatrix{T}, x::Vararg{T,N}) where {T,N} - H_bothtriangles = DI.hessian(Base.splat(f), DI.AutoForwardDiff(), collect(x)) +function di_∇²f( + H::AbstractMatrix{T}, + x::Vararg{T,N}; + backend = DifferentiationInterface.AutoForwardDiff(), +) where {T,N} + H_dense = DifferentiationInterface.hessian(splat(f), backend, collect(x)) for i in 1:N, j in 1:i - H[i, j] = H_bothtriangles[i, j] + H[i, j] = H_dense[i, j] end return end @@ -375,26 +387,26 @@ Test.@test ≈(analytic_H, di_H) # ### JuMP example -#= -The code for computing the gradient and Hessian using DI can be re-used for many operators. -Thus, it is helpful to encapsulate it into the function: -=# +# The code for computing the gradient and Hessian using DifferentiationInterface +# can be re-used for many operators. Thus, it is helpful to encapsulate it into +# the function: """ - di_derivatives(f::Function, backend) -> Tuple{Function,Function} + di_derivatives(f::Function; backend) -> Tuple{Function,Function} Return a tuple of functions that evaluate the gradient and Hessian of `f` using -DifferentiationInterface.jl with any given backend. +DifferentiationInterface.jl with any given `backend`. """ -function di_derivatives(f::Function, backend) +function di_derivatives(f::Function; backend) function ∇f(g::AbstractVector{T}, x::Vararg{T,N}) where {T,N} - DI.gradient!(Base.splat(f), g, backend, collect(x)) + DifferentiationInterface.gradient!(splat(f), g, backend, collect(x)) return end function ∇²f(H::AbstractMatrix{T}, x::Vararg{T,N}) where {T,N} - H_bothtriangles = DI.hessian(Base.splat(f), backend, collect(x)) + H_dense = + DifferentiationInterface.hessian(splat(f), backend, collect(x)) for i in 1:N, j in 1:i - H[i, j] = H_bothtriangles[i, j] + H[i, j] = H_dense[i, j] end return end @@ -403,17 +415,15 @@ end # Here's an example using `di_derivatives`: -function di_rosenbrock(backend) +function di_rosenbrock(; backend) model = Model(Ipopt.Optimizer) set_silent(model) @variable(model, x[1:2]) - @operator(model, op_rosenbrock, 2, f, di_derivatives(f, backend)...) + @operator(model, op_rosenbrock, 2, f, di_derivatives(f; backend)...) @objective(model, Min, op_rosenbrock(x[1], x[2])) optimize!(model) Test.@test is_solved_and_feasible(model) return value.(x) end -#- - -di_rosenbrock(DI.AutoForwardDiff()) +di_rosenbrock(; backend = DifferentiationInterface.AutoForwardDiff()) From b745a2f38a2ca623ad4362703a032da9c9e80d25 Mon Sep 17 00:00:00 2001 From: Oscar Dowson Date: Mon, 7 Oct 2024 14:03:40 +1300 Subject: [PATCH 3/4] Apply suggestions from code review --- docs/src/tutorials/nonlinear/operator_ad.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/src/tutorials/nonlinear/operator_ad.jl b/docs/src/tutorials/nonlinear/operator_ad.jl index 0031e293c43..5cd46217474 100644 --- a/docs/src/tutorials/nonlinear/operator_ad.jl +++ b/docs/src/tutorials/nonlinear/operator_ad.jl @@ -331,9 +331,9 @@ enzyme_rosenbrock() # Julia offers [many different autodiff packages](https://juliadiff.org/). # [DifferentiationInterface.jl](https://github.com/gdalle/DifferentiationInterface.jl) # is a package that provides an abstraction layer across multiple underlying -# autodiff libaries. +# autodiff libraries. -# All the necessary information about your choice of underlyingg autodiff +# All the necessary information about your choice of underlying autodiff # package is encoded in a "backend object" like this one: DifferentiationInterface.AutoForwardDiff() From 23c82e84b85ac1768e27a35c6502a2a1fb4530e1 Mon Sep 17 00:00:00 2001 From: Oscar Dowson Date: Mon, 7 Oct 2024 14:35:38 +1300 Subject: [PATCH 4/4] Update operator_ad.jl --- docs/src/tutorials/nonlinear/operator_ad.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/src/tutorials/nonlinear/operator_ad.jl b/docs/src/tutorials/nonlinear/operator_ad.jl index 5cd46217474..5bd1288379d 100644 --- a/docs/src/tutorials/nonlinear/operator_ad.jl +++ b/docs/src/tutorials/nonlinear/operator_ad.jl @@ -34,8 +34,8 @@ # This tutorial uses the following packages: using JuMP -import Enzyme import DifferentiationInterface +import Enzyme import ForwardDiff import Ipopt import Test @@ -330,7 +330,7 @@ enzyme_rosenbrock() # Julia offers [many different autodiff packages](https://juliadiff.org/). # [DifferentiationInterface.jl](https://github.com/gdalle/DifferentiationInterface.jl) -# is a package that provides an abstraction layer across multiple underlying +# is a package that provides an abstraction layer across a few underlying # autodiff libraries. # All the necessary information about your choice of underlying autodiff