diff --git a/Project.toml b/Project.toml index 003e0a0..ba03f4b 100644 --- a/Project.toml +++ b/Project.toml @@ -1,42 +1,31 @@ name = "LogDensityProblemsAD" uuid = "996a588d-648d-4e1f-a8f0-a84b347e47b1" authors = ["Tamás K. Papp "] -version = "1.9.0" +version = "2.0.0" [deps] +ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" +DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63" DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c" Requires = "ae029012-a4dd-5104-9daa-d747884805df" -SimpleUnPack = "ce78b400-467f-4804-87d8-8f486da07d0a" [weakdeps] -ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" -BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" -ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" -Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" -Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [extensions] -LogDensityProblemsADADTypesExt = "ADTypes" LogDensityProblemsADEnzymeExt = "Enzyme" LogDensityProblemsADFiniteDifferencesExt = "FiniteDifferences" -LogDensityProblemsADForwardDiffBenchmarkToolsExt = ["BenchmarkTools", "ForwardDiff"] LogDensityProblemsADForwardDiffExt = "ForwardDiff" -LogDensityProblemsADReverseDiffExt = "ReverseDiff" -LogDensityProblemsADTrackerExt = "Tracker" -LogDensityProblemsADZygoteExt = "Zygote" [compat] -ADTypes = "0.1.7, 0.2, 1" +ADTypes = "1.1" +DifferentiationInterface = "0.3.4" DocStringExtensions = "0.8, 0.9" -Enzyme = "0.11, 0.12" -FiniteDifferences = "0.12" LogDensityProblems = "1, 2" -Requires = "0.5, 1" -SimpleUnPack = "1" +Requires = "1.3" julia = "1.6" [extras] diff --git a/ext/DiffResults_helpers.jl b/ext/DiffResults_helpers.jl deleted file mode 100644 index 136b8d4..0000000 --- a/ext/DiffResults_helpers.jl +++ /dev/null @@ -1,28 +0,0 @@ -### -### Helper functions for working with DiffResults. -### Only included when required by AD wrappers. -### Requires that `DiffResults` and `DocStringExtensions.SIGNATURES` are available. -### - -""" -$(SIGNATURES) - -Allocate a DiffResults buffer for a gradient, taking the element type of `x` into account -(heuristically). -""" -function _diffresults_buffer(x) - T = eltype(x) - S = T <: Real ? float(T) : Float64 # heuristic - DiffResults.MutableDiffResult(zero(S), (similar(x, S), )) -end - -""" -$(SIGNATURES) - -Extract a return value for [`logdensity_and_gradient`](@ref) from a DiffResults buffer, -constructed with [`diffresults_buffer`](@ref). Gradient is not copied as caller created the -vector. -""" -function _diffresults_extract(diffresult::DiffResults.DiffResult) - DiffResults.value(diffresult), DiffResults.gradient(diffresult) -end diff --git a/ext/LogDensityProblemsADADTypesExt.jl b/ext/LogDensityProblemsADADTypesExt.jl deleted file mode 100644 index 588f276..0000000 --- a/ext/LogDensityProblemsADADTypesExt.jl +++ /dev/null @@ -1,53 +0,0 @@ -module LogDensityProblemsADADTypesExt - -if isdefined(Base, :get_extension) - import LogDensityProblemsAD - import ADTypes -else - import ..LogDensityProblemsAD - import ..ADTypes -end - -""" - ADgradient(ad::ADTypes.AbstractADType, ℓ) - -Wrap log density `ℓ` using automatic differentiation (AD) of type `ad` to obtain a gradient. - -Currently, -- `ad::ADTypes.AutoEnzyme` -- `ad::ADTypes.AutoForwardDiff` -- `ad::ADTypes.AutoReverseDiff` -- `ad::ADTypes.AutoTracker` -- `ad::ADTypes.AutoZygote` -are supported. -The AD configuration specified by `ad` is forwarded to the corresponding calls of `ADgradient(Val(...), ℓ)`. -""" -LogDensityProblemsAD.ADgradient(::ADTypes.AbstractADType, ℓ) - -function LogDensityProblemsAD.ADgradient(::ADTypes.AutoEnzyme, ℓ) - return LogDensityProblemsAD.ADgradient(Val(:Enzyme), ℓ) -end - -function LogDensityProblemsAD.ADgradient(ad::ADTypes.AutoForwardDiff{C}, ℓ) where {C} - if C === nothing - # Use default chunk size - return LogDensityProblemsAD.ADgradient(Val(:ForwardDiff), ℓ; tag = ad.tag) - else - return LogDensityProblemsAD.ADgradient(Val(:ForwardDiff), ℓ; chunk = C, tag = ad.tag) - end -end - -function LogDensityProblemsAD.ADgradient(ad::ADTypes.AutoReverseDiff, ℓ) - return LogDensityProblemsAD.ADgradient(Val(:ReverseDiff), ℓ; compile = Val(ad.compile)) -end - -function LogDensityProblemsAD.ADgradient(::ADTypes.AutoTracker, ℓ) - return LogDensityProblemsAD.ADgradient(Val(:Tracker), ℓ) -end - - -function LogDensityProblemsAD.ADgradient(::ADTypes.AutoZygote, ℓ) - return LogDensityProblemsAD.ADgradient(Val(:Zygote), ℓ) -end - -end # module diff --git a/ext/LogDensityProblemsADEnzymeExt.jl b/ext/LogDensityProblemsADEnzymeExt.jl index 42ff7bb..e384d00 100644 --- a/ext/LogDensityProblemsADEnzymeExt.jl +++ b/ext/LogDensityProblemsADEnzymeExt.jl @@ -1,78 +1,26 @@ -""" -Gradient AD implementation using Enzyme. -""" module LogDensityProblemsADEnzymeExt if isdefined(Base, :get_extension) - using LogDensityProblemsAD: ADGradientWrapper, logdensity - using LogDensityProblemsAD.SimpleUnPack: @unpack - - import LogDensityProblemsAD: ADgradient, logdensity_and_gradient - import Enzyme + using ADTypes: AutoEnzyme + using Enzyme: Reverse + using LogDensityProblemsAD: LogDensityProblemsAD, ADgradient, logdensity else - using ..LogDensityProblemsAD: ADGradientWrapper, logdensity - using ..LogDensityProblemsAD.SimpleUnPack: @unpack - - import ..LogDensityProblemsAD: ADgradient, logdensity_and_gradient - import ..Enzyme -end - -struct EnzymeGradientLogDensity{L,M<:Union{Enzyme.ForwardMode,Enzyme.ReverseMode},S} <: ADGradientWrapper - ℓ::L - mode::M - shadow::S # only used in forward mode + using ..ADTypes: AutoEnzyme + using ..Enzyme: Reverse + using ..LogDensityProblemsAD: LogDensityProblemsAD, ADgradient, logdensity end -""" - ADgradient(:Enzyme, ℓ; kwargs...) - ADgradient(Val(:Enzyme), ℓ; kwargs...) - -Gradient using algorithmic/automatic differentiation via Enzyme. - -# Keyword arguments - -- `mode::Enzyme.Mode`: Differentiation mode (default: `Enzyme.Reverse`). - Currently only `Enzyme.Reverse` and `Enzyme.Forward` are supported. - -- `shadow`: Collection of one-hot vectors for each entry of the inputs `x` to the log density - `ℓ`, or `nothing` (default: `nothing`). This keyword argument is only used in forward - mode. By default, it will be recomputed in every call of `logdensity_and_gradient(ℓ, x)`. - For performance reasons it is recommended to compute it only once when calling `ADgradient`. - The one-hot vectors can be constructed, e.g., with `Enzyme.onehot(x)`. -""" -function ADgradient(::Val{:Enzyme}, ℓ; mode::Enzyme.Mode = Enzyme.Reverse, shadow = nothing) - mode isa Union{Enzyme.ForwardMode,Enzyme.ReverseMode} || - throw(ArgumentError("currently automatic differentiation via Enzyme only supports " * - "`Enzyme.Forward` and `Enzyme.Reverse` modes")) - if mode isa Enzyme.ReverseMode && shadow !== nothing - @info "keyword argument `shadow` is ignored in reverse mode" - shadow = nothing +function LogDensityProblemsAD.ADgradient( + ::Val{:Enzyme}, + ℓ; + mode=Reverse, + shadow=nothing, +) + if !isnothing(shadow) + @warn "keyword argument `shadow` is now ignored" end - return EnzymeGradientLogDensity(ℓ, mode, shadow) -end - -function Base.show(io::IO, ∇ℓ::EnzymeGradientLogDensity) - print(io, "Enzyme AD wrapper for ", ∇ℓ.ℓ, " with ", - ∇ℓ.mode isa Enzyme.ForwardMode ? "forward" : "reverse", " mode") -end - -function logdensity_and_gradient(∇ℓ::EnzymeGradientLogDensity{<:Any,<:Enzyme.ForwardMode}, - x::AbstractVector) - @unpack ℓ, mode, shadow = ∇ℓ - _shadow = shadow === nothing ? Enzyme.onehot(x) : shadow - y, ∂ℓ_∂x = Enzyme.autodiff(mode, logdensity, Enzyme.BatchDuplicated, - Enzyme.Const(ℓ), - Enzyme.BatchDuplicated(x, _shadow)) - return y, collect(∂ℓ_∂x) -end - -function logdensity_and_gradient(∇ℓ::EnzymeGradientLogDensity{<:Any,<:Enzyme.ReverseMode}, - x::AbstractVector) - @unpack ℓ = ∇ℓ - ∂ℓ_∂x = zero(x) - _, y = Enzyme.autodiff(Enzyme.ReverseWithPrimal, logdensity, Enzyme.Active, - Enzyme.Const(ℓ), Enzyme.Duplicated(x, ∂ℓ_∂x)) - y, ∂ℓ_∂x + backend = AutoEnzyme(; mode) + return ADgradient(backend, ℓ) end end # module diff --git a/ext/LogDensityProblemsADFiniteDifferencesExt.jl b/ext/LogDensityProblemsADFiniteDifferencesExt.jl index cfd03fb..7b2ef3c 100644 --- a/ext/LogDensityProblemsADFiniteDifferencesExt.jl +++ b/ext/LogDensityProblemsADFiniteDifferencesExt.jl @@ -1,51 +1,19 @@ -""" -Gradient implementation using FiniteDifferences. -""" module LogDensityProblemsADFiniteDifferencesExt if isdefined(Base, :get_extension) - using LogDensityProblemsAD: ADGradientWrapper, logdensity - using LogDensityProblemsAD.SimpleUnPack: @unpack - - import LogDensityProblemsAD: ADgradient, logdensity_and_gradient - import FiniteDifferences + using ADTypes: AutoFiniteDifferences + import FiniteDifferences: central_fdm + using LogDensityProblemsAD: LogDensityProblemsAD, ADgradient else - using ..LogDensityProblemsAD: ADGradientWrapper, logdensity - using ..LogDensityProblemsAD.SimpleUnPack: @unpack - - import ..LogDensityProblemsAD: ADgradient, logdensity_and_gradient - import ..FiniteDifferences -end - -struct FiniteDifferencesGradientLogDensity{L,M} <: ADGradientWrapper - ℓ::L - "finite difference method" - fdm::M -end - -""" - ADgradient(:FiniteDifferences, ℓ; fdm = central_fdm(5, 1)) - ADgradient(Val(:FiniteDifferences), ℓ; fdm = central_fdm(5, 1)) - -Gradient using FiniteDifferences, mainly intended for checking results from other algorithms. - -# Keyword arguments - -- `fdm`: the finite difference method. Defaults to `central_fdm(5, 1)`. -""" -function ADgradient(::Val{:FiniteDifferences}, ℓ; fdm = FiniteDifferences.central_fdm(5, 1)) - FiniteDifferencesGradientLogDensity(ℓ, fdm) -end - -function Base.show(io::IO, ∇ℓ::FiniteDifferencesGradientLogDensity) - print(io, "FiniteDifferences AD wrapper for ", ∇ℓ.ℓ, " with ", ∇ℓ.fdm) + using ..ADTypes: AutoFiniteDifferences + import ..FiniteDifferences: central_fdm + using ..LogDensityProblemsAD: LogDensityProblemsAD, ADgradient end -function logdensity_and_gradient(∇ℓ::FiniteDifferencesGradientLogDensity, x::AbstractVector) - @unpack ℓ, fdm = ∇ℓ - y = logdensity(ℓ, x) - ∇y = only(FiniteDifferences.grad(fdm, Base.Fix1(logdensity, ℓ), x)) - y, ∇y +function LogDensityProblemsAD.ADgradient(::Val{:FiniteDifferences}, ℓ) + fdm = central_fdm(5, 1) + backend = AutoFiniteDifferences(; fdm) + ADgradient(backend, ℓ) end end # module diff --git a/ext/LogDensityProblemsADForwardDiffBenchmarkToolsExt.jl b/ext/LogDensityProblemsADForwardDiffBenchmarkToolsExt.jl deleted file mode 100644 index ef99a4b..0000000 --- a/ext/LogDensityProblemsADForwardDiffBenchmarkToolsExt.jl +++ /dev/null @@ -1,65 +0,0 @@ -""" -Utilities for benchmarking a log density problem with various chunk sizes using ForwardDiff. - -Loaded when both ForwardDiff and BenchmarkTools are loaded. -""" -module LogDensityProblemsADForwardDiffBenchmarkToolsExt - -if isdefined(Base, :get_extension) - using LogDensityProblemsAD: ADgradient, SIGNATURES, dimension, logdensity_and_gradient - using BenchmarkTools: @belapsed - using ForwardDiff: Chunk - - import LogDensityProblemsAD: benchmark_ForwardDiff_chunks, heuristic_chunks -else - using ..LogDensityProblemsAD: ADgradient, SIGNATURES, dimension, logdensity_and_gradient - using ..BenchmarkTools: @belapsed - using ..ForwardDiff: Chunk - - import ..LogDensityProblemsAD: benchmark_ForwardDiff_chunks, heuristic_chunks -end - -""" -$(SIGNATURES) - -Default chunk sizes to try for benchmarking. Fewer than `M`, always contains `1` and `N`. -""" -function heuristic_chunks(N, M = 20) - step = max(N ÷ M, 1) - Ns = 1:step:N - if N ∉ Ns - Ns = vcat(Ns, N) - end - Ns -end - -""" -$(SIGNATURES) - -Benchmark a log density problem with various chunk sizes using ForwardDiff. - -`chunks`, which defaults to all possible chunk sizes, determines the chunks that are tried. - -The function returns `chunk => time` pairs, where `time` is the benchmarked runtime in -seconds, as determined by `BenchmarkTools.@belapsed`. The gradient is evaluated at `x` -(defaults to zeros). - -*Runtime may be long* because of tuned benchmarks, so when `markprogress == true` (the -default), dots are printed to mark progress. - -This function is not exported, but part of the API. - -*It is loaded conditionally when both `ForwardDiff` and `BenchmarkTools` are loaded.* -""" -function benchmark_ForwardDiff_chunks(ℓ; - chunks = heuristic_chunks(dimension(ℓ), 20), - markprogress = true, - x = zeros(dimension(ℓ))) - map(chunks) do chunk - ∇ℓ = ADgradient(Val(:ForwardDiff), ℓ; chunk = Chunk(chunk)) - markprogress && print(".") - chunk => @belapsed logdensity_and_gradient($(∇ℓ), $(x)) - end -end - -end # module diff --git a/ext/LogDensityProblemsADForwardDiffExt.jl b/ext/LogDensityProblemsADForwardDiffExt.jl index 893912b..f349cea 100644 --- a/ext/LogDensityProblemsADForwardDiffExt.jl +++ b/ext/LogDensityProblemsADForwardDiffExt.jl @@ -1,122 +1,34 @@ -""" -Gradient AD implementation using ForwardDiff. -""" module LogDensityProblemsADForwardDiffExt if isdefined(Base, :get_extension) - using LogDensityProblemsAD: ADGradientWrapper, SIGNATURES, dimension, logdensity - using LogDensityProblemsAD.SimpleUnPack: @unpack - - import LogDensityProblemsAD: ADgradient, logdensity_and_gradient - import ForwardDiff - import ForwardDiff: DiffResults + using ADTypes: AutoForwardDiff + using ForwardDiff: Chunk + using LogDensityProblemsAD: LogDensityProblemsAD, ADgradient, dimension else - using ..LogDensityProblemsAD: ADGradientWrapper, SIGNATURES, dimension, logdensity - using ..LogDensityProblemsAD.SimpleUnPack: @unpack - - import ..LogDensityProblemsAD: ADgradient, logdensity_and_gradient - import ..ForwardDiff - import ..ForwardDiff: DiffResults -end - -# Load DiffResults helpers -include("DiffResults_helpers.jl") - -struct ForwardDiffLogDensity{L, C <: ForwardDiff.Chunk, T, - G <: Union{Nothing,ForwardDiff.GradientConfig}} <: ADGradientWrapper - "supports zero-order evaluation `logdensity(ℓ, x)`" - ℓ::L - "chunk size for ForwardDiff" - chunk::C - "tag, or `nothing` for the default" - tag::T - "gradient config, or `nothing` if created for each evaluation" - gradient_config::G -end - -function Base.show(io::IO, ℓ::ForwardDiffLogDensity) - print(io, "ForwardDiff AD wrapper for ", ℓ.ℓ, - ", w/ chunk size ", ForwardDiff.chunksize(ℓ.chunk)) -end - -_ensure_chunk(chunk::ForwardDiff.Chunk) = chunk -_ensure_chunk(chunk::Integer) = ForwardDiff.Chunk(chunk) - -_default_chunk(ℓ) = _ensure_chunk(dimension(ℓ)) - -function Base.copy(fℓ::ForwardDiffLogDensity{L,C,T,<:ForwardDiff.GradientConfig}) where {L,C,T} - @unpack ℓ, chunk, tag, gradient_config = fℓ - ForwardDiffLogDensity(ℓ, chunk, tag, copy(gradient_config)) -end - -""" -$(SIGNATURES) - -Make a `ForwardDiff.GradientConfig` for function `f` and input `x`. `tag = nothing` generates the default tag. -""" -function _make_gradient_config(f::F, x, chunk::ForwardDiff.Chunk, tag) where {F} - gradient_config = if tag ≡ nothing - ForwardDiff.GradientConfig(f, x, chunk) - elseif tag isa ForwardDiff.Tag - ForwardDiff.GradientConfig(f, x, chunk, tag) - else - ForwardDiff.GradientConfig(f, x, chunk, ForwardDiff.Tag(tag, eltype(x))) - end - gradient_config + using ..ADTypes: AutoForwardDiff + using ..ForwardDiff: Chunk + using ..LogDensityProblemsAD: LogDensityProblemsAD, ADgradient, dimension end -""" - ADgradient(:ForwardDiff, ℓ; chunk, tag, x) - ADgradient(Val(:ForwardDiff), ℓ; chunk, tag, x) - -Wrap a log density that supports evaluation of `Value` to handle `ValueGradient`, using -`ForwardDiff`. - -Keyword arguments: - -- `chunk` can be used to set the chunk size, an integer or a `ForwardDiff.Chunk` - -- `tag` (default: `nothing`) can be used to set a tag for `ForwardDiff`. - If `tag` is neither `nothing` nor a `ForwardDiff.Tag`, the tag for `ForwardDiff` is set - to `ForwardDiff.Tag(tag, eltype(x))` where `x` is the vector at which the logdensity and - its gradient are evaluated. - -- `x` (default: `nothing`) will be used to preallocate a `ForwardDiff.GradientConfig` with - the given vector. With the default, one is created for each evaluation. - -Note that **pre-allocating a `ForwardDiff.GradientConfig` is not thread-safe**. You can -[`copy`](@ref) the results for concurrent evaluation: -```julia -∇ℓ1 = ADgradient(:ForwardDiff, ℓ; x = zeros(dimension(ℓ))) -∇ℓ2 = copy(∇ℓ1) # you can now use both, in different threads -``` - -See also the ForwardDiff documentation regarding -[`ForwardDiff.GradientConfig`](https://juliadiff.org/ForwardDiff.jl/stable/user/api/#Preallocating/Configuring-Work-Buffers) -and [chunks and tags](https://juliadiff.org/ForwardDiff.jl/stable/user/advanced/). -""" -function ADgradient(::Val{:ForwardDiff}, ℓ; - chunk::Union{Integer,ForwardDiff.Chunk} = _default_chunk(ℓ), - tag = nothing, - x::Union{Nothing,AbstractVector} = nothing) - _chunk = _ensure_chunk(chunk) - gradient_config = if x ≡ nothing - nothing +_get_chunksize(::Chunk{C}) where {C} = C +_get_chunksize(chunk::Integer) = chunk + +_default_chunk(ℓ) = _get_chunksize(dimension(ℓ)) + +function LogDensityProblemsAD.ADgradient( + ::Val{:ForwardDiff}, + ℓ; + chunk::Union{Integer,Chunk}=_default_chunk(ℓ), + tag=nothing, + x::Union{Nothing,AbstractVector}=nothing, +) + chunksize = _get_chunksize(chunk) + backend = AutoForwardDiff{chunksize,typeof(tag)}(tag) + if isnothing(x) + return ADgradient(backend, ℓ) else - _make_gradient_config(Base.Fix1(logdensity, ℓ), x, _chunk, tag) - end - ForwardDiffLogDensity(ℓ, _chunk, tag, gradient_config) -end - -function logdensity_and_gradient(fℓ::ForwardDiffLogDensity, x::AbstractVector) - @unpack ℓ, chunk, tag, gradient_config = fℓ - buffer = _diffresults_buffer(x) - ℓ′ = Base.Fix1(logdensity, ℓ) - if gradient_config ≡ nothing - gradient_config = _make_gradient_config(ℓ′, x, chunk, tag) + return ADgradient(backend, ℓ, x) end - result = ForwardDiff.gradient!(buffer, ℓ′, x, gradient_config) - _diffresults_extract(result) end end # module diff --git a/ext/LogDensityProblemsADReverseDiffExt.jl b/ext/LogDensityProblemsADReverseDiffExt.jl deleted file mode 100644 index d101a7b..0000000 --- a/ext/LogDensityProblemsADReverseDiffExt.jl +++ /dev/null @@ -1,79 +0,0 @@ -""" -Gradient AD implementation using ReverseDiff. -""" -module LogDensityProblemsADReverseDiffExt - -if isdefined(Base, :get_extension) - using LogDensityProblemsAD: ADGradientWrapper, SIGNATURES, dimension, logdensity - using LogDensityProblemsAD.SimpleUnPack: @unpack - - import LogDensityProblemsAD: ADgradient, logdensity_and_gradient - import ReverseDiff - import ReverseDiff: DiffResults -else - using ..LogDensityProblemsAD: ADGradientWrapper, SIGNATURES, dimension, logdensity - using ..LogDensityProblemsAD.SimpleUnPack: @unpack - - import ..LogDensityProblemsAD: ADgradient, logdensity_and_gradient - import ..ReverseDiff - import ..ReverseDiff: DiffResults -end - -# Load DiffResults helpers -include("DiffResults_helpers.jl") - -struct ReverseDiffLogDensity{L,C} <: ADGradientWrapper - ℓ::L - compiledtape::C -end - -""" - ADgradient(:ReverseDiff, ℓ; compile=Val(false), x=nothing) - ADgradient(Val(:ReverseDiff), ℓ; compile=Val(false), x=nothing) - -Gradient using algorithmic/automatic differentiation via ReverseDiff. - -If `compile isa Val{true}`, a tape of the log density computation is created upon construction of the gradient function and used in every evaluation of the gradient. -One may provide an example input `x::AbstractVector` of the log density function. -If `x` is `nothing` (the default), the tape is created with input `zeros(dimension(ℓ))`. - -By default, no tape is created. - -!!! note - Using a compiled tape can lead to significant performance improvements when the gradient of the log density - is evaluated multiple times (possibly for different inputs). - However, if the log density contains branches, use of a compiled tape can lead to silently incorrect results. -""" -function ADgradient(::Val{:ReverseDiff}, ℓ; - compile::Union{Val{true},Val{false}}=Val(false), - x::Union{Nothing,AbstractVector}=nothing) - ReverseDiffLogDensity(ℓ, _compiledtape(ℓ, compile, x)) -end - -_compiledtape(ℓ, compile, x) = nothing -_compiledtape(ℓ, ::Val{true}, ::Nothing) = _compiledtape(ℓ, Val(true), zeros(dimension(ℓ))) -function _compiledtape(ℓ, ::Val{true}, x) - tape = ReverseDiff.GradientTape(Base.Fix1(logdensity, ℓ), x) - return ReverseDiff.compile(tape) -end - -function Base.show(io::IO, ∇ℓ::ReverseDiffLogDensity) - print(io, "ReverseDiff AD wrapper for ", ∇ℓ.ℓ, " (") - if ∇ℓ.compiledtape === nothing - print(io, "no ") - end - print(io, "compiled tape)") -end - -function logdensity_and_gradient(∇ℓ::ReverseDiffLogDensity, x::AbstractVector) - @unpack ℓ, compiledtape = ∇ℓ - buffer = _diffresults_buffer(x) - if compiledtape === nothing - result = ReverseDiff.gradient!(buffer, Base.Fix1(logdensity, ℓ), x) - else - result = ReverseDiff.gradient!(buffer, compiledtape, x) - end - _diffresults_extract(result) -end - -end # module diff --git a/ext/LogDensityProblemsADTrackerExt.jl b/ext/LogDensityProblemsADTrackerExt.jl deleted file mode 100644 index 2cab08a..0000000 --- a/ext/LogDensityProblemsADTrackerExt.jl +++ /dev/null @@ -1,46 +0,0 @@ -""" -Gradient AD implementation using Tracker. -""" -module LogDensityProblemsADTrackerExt - -if isdefined(Base, :get_extension) - using LogDensityProblemsAD: ADGradientWrapper, logdensity - using LogDensityProblemsAD.SimpleUnPack: @unpack - - import LogDensityProblemsAD: ADgradient, logdensity_and_gradient - import Tracker -else - using ..LogDensityProblemsAD: ADGradientWrapper, logdensity - using ..LogDensityProblemsAD.SimpleUnPack: @unpack - - import ..LogDensityProblemsAD: ADgradient, logdensity_and_gradient - import ..Tracker -end - -struct TrackerGradientLogDensity{L} <: ADGradientWrapper - ℓ::L -end - -""" - ADgradient(:Tracker, ℓ) - ADgradient(Val(:Tracker), ℓ) - -Gradient using algorithmic/automatic differentiation via Tracker. - -This package has been deprecated in favor of Zygote, but we keep the interface available. -""" -ADgradient(::Val{:Tracker}, ℓ) = TrackerGradientLogDensity(ℓ) - -Base.show(io::IO, ∇ℓ::TrackerGradientLogDensity) = print(io, "Tracker AD wrapper for ", ∇ℓ.ℓ) - -function logdensity_and_gradient(∇ℓ::TrackerGradientLogDensity, x::AbstractVector{T}) where {T} - @unpack ℓ = ∇ℓ - y, back = Tracker.forward(x -> logdensity(ℓ, x), x) - yval = Tracker.data(y) - # work around https://github.com/FluxML/Flux.jl/issues/497 - z = T <: Real ? zero(T) : 0.0 - S = typeof(z + 0.0) - S(yval)::S, (S.(first(Tracker.data.(back(1)))))::Vector{S} -end - -end # module diff --git a/ext/LogDensityProblemsADZygoteExt.jl b/ext/LogDensityProblemsADZygoteExt.jl deleted file mode 100644 index 4b57c6c..0000000 --- a/ext/LogDensityProblemsADZygoteExt.jl +++ /dev/null @@ -1,40 +0,0 @@ -""" -Gradient AD implementation using Zygote. -""" -module LogDensityProblemsADZygoteExt - -if isdefined(Base, :get_extension) - using LogDensityProblemsAD: ADGradientWrapper, logdensity - using LogDensityProblemsAD.SimpleUnPack: @unpack - - import LogDensityProblemsAD: ADgradient, logdensity_and_gradient - import Zygote -else - using ..LogDensityProblemsAD: ADGradientWrapper, logdensity - using ..LogDensityProblemsAD.SimpleUnPack: @unpack - - import ..LogDensityProblemsAD: ADgradient, logdensity_and_gradient - import ..Zygote -end - -struct ZygoteGradientLogDensity{L} <: ADGradientWrapper - ℓ::L -end - -""" - ADgradient(:Zygote, ℓ) - ADgradient(Val(:Zygote), ℓ) - -Gradient using algorithmic/automatic differentiation via Zygote. -""" -ADgradient(::Val{:Zygote}, ℓ) = ZygoteGradientLogDensity(ℓ) - -Base.show(io::IO, ∇ℓ::ZygoteGradientLogDensity) = print(io, "Zygote AD wrapper for ", ∇ℓ.ℓ) - -function logdensity_and_gradient(∇ℓ::ZygoteGradientLogDensity, x::AbstractVector) - @unpack ℓ = ∇ℓ - y, back = Zygote.pullback(Base.Fix1(logdensity, ℓ), x) - y, first(back(Zygote.sensitivity(y))) -end - -end # module diff --git a/src/LogDensityProblemsAD.jl b/src/LogDensityProblemsAD.jl index c2bb78f..348a57e 100644 --- a/src/LogDensityProblemsAD.jl +++ b/src/LogDensityProblemsAD.jl @@ -3,99 +3,121 @@ Automatic differentiation backends for LogDensityProblems. """ module LogDensityProblemsAD -export ADgradient - +using ADTypes +import DifferentiationInterface as DI using DocStringExtensions: SIGNATURES -import LogDensityProblems: logdensity, logdensity_and_gradient, capabilities, dimension -using LogDensityProblems: LogDensityOrder - -import SimpleUnPack - -##### -##### AD wrappers --- interface and generic code -##### - -""" -An abstract type that wraps another log density for calculating the gradient via AD. - -Automatically defines the methods `capabilities`, `dimension`, and `logdensity` forwarding -to the field `ℓ`, subtypes should define a [`logdensity_and_gradientent`](@ref). - -This is an implementation helper, not part of the API. -""" -abstract type ADGradientWrapper end +using LogDensityProblems: + LogDensityProblems, + LogDensityOrder, + logdensity, + logdensity_and_gradient, + capabilities, + dimension + +if !isdefined(Base, :get_extension) + using Requires: @require +end -logdensity(ℓ::ADGradientWrapper, x::AbstractVector) = logdensity(ℓ.ℓ, x) +export ADgradient -capabilities(::Type{<:ADGradientWrapper}) = LogDensityOrder{1}() +## Internal type -dimension(ℓ::ADGradientWrapper) = dimension(ℓ.ℓ) +struct ADgradientDI{B<:AbstractADType,L,E<:Union{DI.GradientExtras,Nothing}} + backend::B + ℓ::L + extras::E +end -Base.parent(ℓ::ADGradientWrapper) = ℓ.ℓ +LogDensityProblems.logdensity(∇ℓ::ADgradientDI, x::AbstractVector) = logdensity(∇ℓ.ℓ, x) +LogDensityProblems.capabilities(::Type{<:ADgradientDI}) = LogDensityOrder{1}() +LogDensityProblems.dimension(∇ℓ::ADgradientDI) = dimension(∇ℓ.ℓ) +Base.parent(∇ℓ::ADgradientDI) = ∇ℓ.ℓ +Base.copy(∇ℓ::ADgradientDI) = deepcopy(∇ℓ) + +function LogDensityProblems.logdensity_and_gradient( + ∇ℓ::ADgradientDI{B,<:Any,Nothing}, + x::AbstractVector{T}, +) where {B,T} + y, g = DI.value_and_gradient(Base.Fix1(logdensity, ∇ℓ.ℓ), ∇ℓ.backend, x) + if B <: AutoTracker + R = Base.promote_op(logdensity, typeof(∇ℓ), typeof(x)) + TF = float(T) + return convert(R, y)::R, convert(Vector{TF}, g)::Vector{TF} + else + return y, g + end +end -Base.copy(x::ADGradientWrapper) = x # no-op, except for ForwardDiff +function LogDensityProblems.logdensity_and_gradient( + ∇ℓ::ADgradientDI{B,<:Any,<:DI.GradientExtras}, + x::AbstractVector{T}, +) where {B,T} + y, g = DI.value_and_gradient(Base.Fix1(logdensity, ∇ℓ.ℓ), ∇ℓ.backend, x, ∇ℓ.extras) + if B <: AutoTracker + R = promote_type(T, Float64) + return convert(R, y)::R, convert(Vector{R}, g)::Vector{R} + else + return y, g + end +end -""" -$(SIGNATURES) +## Constructor from ADTypes -Wrap `P` using automatic differentiation to obtain a gradient. +function ADgradient(backend::AbstractADType, ℓ) + return ADgradientDI(backend, ℓ, nothing) +end -`kind` is usually a `Val` type with a symbol that refers to a package, for example -```julia -ADgradient(Val(:ForwardDiff), P) -ADgradient(Val(:ReverseDiff), P) -ADgradient(Val(:Zygote), P) -``` -Some methods may be loaded only conditionally after the relevant package is loaded (eg -`using Zygote`). +function ADgradient(backend::AbstractADType, ℓ, x::AbstractVector) + extras = DI.prepare_gradient(Base.Fix1(logdensity, ℓ), backend, x) + return ADgradientDI(backend, ℓ, extras) +end -The symbol can also be used directly as eg +## Constructor from symbols -```julia -ADgradient(:ForwardDiff, P) -``` +function ADgradient(kind::Symbol, ℓ; kwargs...) + return ADgradient(Val{kind}(), ℓ; kwargs...) +end -and should mostly be equivalent if the compiler manages to fold the constant. +function ADgradient(v::Val{kind}, ℓ; kwargs...) where {kind} + @info "Don't know how to AD with $(kind), consider `import $(kind)` if there is such a package." + throw(MethodError(ADgradient, (v, ℓ))) +end -The function `parent` can be used to retrieve the original argument. +function ADgradient( + ::Val{:ReverseDiff}, + ℓ; + compile::Val{comp}=Val(false), + x::Union{AbstractVector,Nothing}=nothing, +) where {comp} + backend = AutoReverseDiff(; compile=comp) + if isnothing(x) + return ADgradient(backend, ℓ) + else + return ADgradient(backend, ℓ, x) + end +end -!!! note - With the default options, automatic differentiation preserves thread-safety. See - exceptions and workarounds in the docstring for each backend. -""" -ADgradient(kind::Symbol, P; kwargs...) = ADgradient(Val{kind}(), P; kwargs...) +function ADgradient(::Val{:Tracker}, ℓ) + return ADgradient(AutoTracker(), ℓ) +end -function ADgradient(v::Val{kind}, P; kwargs...) where kind - @info "Don't know how to AD with $(kind), consider `import $(kind)` if there is such a package." - throw(MethodError(ADgradient, (v, P))) +function ADgradient(::Val{:Zygote}, ℓ) + return ADgradient(AutoZygote(), ℓ) end -##### -##### Empty method definitions for easier discoverability and backward compatibility -##### -function benchmark_ForwardDiff_chunks end -function heuristic_chunks end +## Initialization -# Backward compatible AD wrappers on Julia versions that do not support extensions -# TODO: Replace with proper version -const EXTENSIONS_SUPPORTED = isdefined(Base, :get_extension) -if !EXTENSIONS_SUPPORTED - using Requires: @require -end -@static if !EXTENSIONS_SUPPORTED +@static if !isdefined(Base, :get_extension) function __init__() - @require ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" include("../ext/LogDensityProblemsADADTypesExt.jl") - @require FiniteDifferences="26cc04aa-876d-5657-8c51-4c34ba976000" include("../ext/LogDensityProblemsADFiniteDifferencesExt.jl") - @require ForwardDiff="f6369f11-7733-5829-9624-2563aa707210" begin + @require Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" begin + include("../ext/LogDensityProblemsADEnzymeExt.jl") + end + @require FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" begin + include("../ext/LogDensityProblemsADFiniteDifferencesExt.jl") + end + @require ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" begin include("../ext/LogDensityProblemsADForwardDiffExt.jl") - @require BenchmarkTools="6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" begin - include("../ext/LogDensityProblemsADForwardDiffBenchmarkToolsExt.jl") - end end - @require Tracker="9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" include("../ext/LogDensityProblemsADTrackerExt.jl") - @require Zygote="e88e6eb3-aa80-5325-afca-941959d7151f" include("../ext/LogDensityProblemsADZygoteExt.jl") - @require ReverseDiff="37e2e3b7-166d-5795-8a7a-e32c996b4267" include("../ext/LogDensityProblemsADReverseDiffExt.jl") - @require Enzyme="7da242da-08ed-463a-9acd-ee780be4f1d9" include("../ext/LogDensityProblemsADEnzymeExt.jl") end end diff --git a/test/runtests.jl b/test/runtests.jl index c621922..6dd15d1 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -68,26 +68,20 @@ dimension(::TestLogDensity2) = 20 struct TestTag end # Allow tag type in gradient etc. calls of the log density function -ForwardDiff.checktag(::Type{ForwardDiff.Tag{TestTag, V}}, ::Base.Fix1{typeof(logdensity),typeof(TestLogDensity())}, ::AbstractArray{V}) where {V} = true +ForwardDiff.checktag(::Type{ForwardDiff.Tag{TestTag,V}}, ::Base.Fix1{typeof(logdensity),typeof(TestLogDensity())}, ::AbstractArray{V}) where {V} = true @testset "AD via ReverseDiff" begin ℓ = TestLogDensity() ∇ℓ_default = ADgradient(:ReverseDiff, ℓ) ∇ℓ_nocompile = ADgradient(:ReverseDiff, ℓ; compile=Val(false)) - for ∇ℓ in (∇ℓ_default, ∇ℓ_nocompile) - @test repr(∇ℓ) == "ReverseDiff AD wrapper for " * repr(ℓ) * " (no compiled tape)" - end # ADTypes support @test ADgradient(ADTypes.AutoReverseDiff(), ℓ) === ∇ℓ_default @test ADgradient(ADTypes.AutoReverseDiff(; compile = false), ℓ) === ∇ℓ_nocompile ∇ℓ_compile = ADgradient(:ReverseDiff, ℓ; compile=Val(true)) - ∇ℓ_compile_x = ADgradient(:ReverseDiff, ℓ; compile=Val(true), x=rand(3)) - for ∇ℓ in (∇ℓ_compile, ∇ℓ_compile_x) - @test repr(∇ℓ) == "ReverseDiff AD wrapper for " * repr(ℓ) * " (compiled tape)" - end + ∇ℓ_compile_x = ADgradient(:ReverseDiff, ℓ; compile=Val(true), x=randexp(3)) # ADTypes support @test typeof(ADgradient(ADTypes.AutoReverseDiff(; compile = true), ℓ)) === typeof(∇ℓ_compile) @@ -97,14 +91,14 @@ ForwardDiff.checktag(::Type{ForwardDiff.Tag{TestTag, V}}, ::Base.Fix1{typeof(log @test capabilities(∇ℓ) ≡ LogDensityOrder(1) for _ in 1:100 - x = rand(3) + x = randexp(3) @test @inferred(logdensity(∇ℓ, x)) ≅ test_logdensity(x) @test @inferred(logdensity_and_gradient(∇ℓ, x)) ≅ (test_logdensity(x), test_gradient(x)) x = -x @test @inferred(logdensity(∇ℓ, x)) ≅ test_logdensity(x) - if ∇ℓ.compiledtape === nothing + if ∇ℓ.extras === nothing # Recompute tape => correct results @test @inferred(logdensity_and_gradient(∇ℓ, x)) ≅ (test_logdensity(x), zero(x)) @@ -120,16 +114,15 @@ end @testset "AD via ForwardDiff" begin ℓ = TestLogDensity() ∇ℓ = ADgradient(:ForwardDiff, ℓ) - @test repr(∇ℓ) == "ForwardDiff AD wrapper for " * repr(ℓ) * ", w/ chunk size 3" @test dimension(∇ℓ) == 3 @test capabilities(∇ℓ) ≡ LogDensityOrder(1) # ADTypes support - @test ADgradient(ADTypes.AutoForwardDiff(), ℓ) === ∇ℓ + @test ADgradient(ADTypes.AutoForwardDiff(; chunksize=dimension(ℓ)), ℓ) === ∇ℓ for _ in 1:100 - x = randn(3) + x = randexp(3) @test @inferred(logdensity(∇ℓ, x)) ≅ test_logdensity(x) @test @inferred(logdensity_and_gradient(∇ℓ, x)) ≅ (test_logdensity(x), test_gradient(x)) @@ -154,7 +147,7 @@ end @test @inferred(logdensity(∇ℓ, x)) ≅ test_logdensity(x) @test @inferred(logdensity_and_gradient(∇ℓ, x)) ≅ (test_logdensity(x), test_gradient(x)) - @test @inferred(copy(∇ℓ)).gradient_config ≢ ∇ℓ.gradient_config + @test @inferred(copy(∇ℓ)).extras.config ≢ ∇ℓ.extras.config # custom tag + preallocated gradient config for T in (Float32, Float64) @@ -165,7 +158,7 @@ end @test @inferred(logdensity(∇ℓ, x)) ≅ test_logdensity(x) @test @inferred(logdensity_and_gradient(∇ℓ, x)) ≅ (test_logdensity(x), test_gradient(x)) - @test @inferred(copy(∇ℓ)).gradient_config ≢ ∇ℓ.gradient_config + @test @inferred(copy(∇ℓ)).extras.config ≢ ∇ℓ.extras.config end end @@ -182,7 +175,7 @@ end # cf https://github.com/tpapp/LogDensityProblemsAD.jl/pull/3 ℓ = TestLogDensity() ∇ℓ = ADgradient(:ForwardDiff, ℓ) - x = zeros(3) + x = randexp(3) y = ComponentVector(x = x) @test @inferred(logdensity(∇ℓ, y)) ≅ test_logdensity(x) @test @inferred(logdensity_and_gradient(∇ℓ, y)) ≅ @@ -193,21 +186,16 @@ end (test_logdensity(x), test_gradient(x)) end -@testset "chunk heuristics for ForwardDiff" begin - @test LogDensityProblemsAD.heuristic_chunks(82) == vcat(1:4:81, [82]) -end - @testset "AD via Tracker" begin ℓ = TestLogDensity() ∇ℓ = ADgradient(:Tracker, ℓ) - @test repr(∇ℓ) == "Tracker AD wrapper for " * repr(ℓ) @test dimension(∇ℓ) == 3 @test capabilities(∇ℓ) ≡ LogDensityOrder(1) for _ in 1:100 - x = randn(3) + x = randexp(3) @test @inferred(logdensity(∇ℓ, x)) ≅ test_logdensity(x) @test @inferred(logdensity_and_gradient(∇ℓ, x)) ≅ (test_logdensity(x), test_gradient(x)) - end + end # ADTypes support @test ADgradient(ADTypes.AutoTracker(), ℓ) === ∇ℓ @@ -216,11 +204,10 @@ end @testset "AD via Zygote" begin ℓ = TestLogDensity(test_logdensity1) ∇ℓ = ADgradient(:Zygote, ℓ) - @test repr(∇ℓ) == "Zygote AD wrapper for " * repr(ℓ) @test dimension(∇ℓ) == 3 @test capabilities(∇ℓ) ≡ LogDensityOrder(1) for _ in 1:100 - x = randn(3) + x = randexp(3) @test @inferred(logdensity(∇ℓ, x)) ≅ test_logdensity1(x) @test logdensity_and_gradient(∇ℓ, x) ≅ (test_logdensity1(x), test_gradient(x)) end @@ -234,43 +221,36 @@ end ∇ℓ_reverse = ADgradient(:Enzyme, ℓ) @test ∇ℓ_reverse === ADgradient(:Enzyme, ℓ; mode=Enzyme.Reverse) - @test repr(∇ℓ_reverse) == "Enzyme AD wrapper for " * repr(ℓ) * " with reverse mode" # ADTypes support - @test ADgradient(ADTypes.AutoEnzyme(), ℓ) === ∇ℓ_reverse + @test ADgradient(ADTypes.AutoEnzyme(; mode=Enzyme.Reverse), ℓ) === ∇ℓ_reverse ∇ℓ_forward = ADgradient(:Enzyme, ℓ; mode=Enzyme.Forward) - ∇ℓ_forward_shadow = ADgradient(:Enzyme, ℓ; - mode=Enzyme.Forward, - shadow=Enzyme.onehot(Vector{Float64}(undef, dimension(ℓ)))) - for ∇ℓ in (∇ℓ_forward, ∇ℓ_forward_shadow) - @test repr(∇ℓ) == "Enzyme AD wrapper for " * repr(ℓ) * " with forward mode" - end + ∇ℓ_forward_shadow = ADgradient( + :Enzyme, + ℓ; + mode=Enzyme.Forward, + shadow=Enzyme.onehot(Vector{Float64}(undef, dimension(ℓ))), + ) for ∇ℓ in (∇ℓ_reverse, ∇ℓ_forward, ∇ℓ_forward_shadow) @test dimension(∇ℓ) == 3 @test capabilities(∇ℓ) ≡ LogDensityOrder(1) for _ in 1:100 - x = randn(3) + x = randexp(3) @test @inferred(logdensity(∇ℓ, x)) ≅ test_logdensity1(x) @test logdensity_and_gradient(∇ℓ, x) ≅ (test_logdensity1(x), test_gradient(x)) end end - - # Branches in `ADgradient` - @test_throws ArgumentError ADgradient(:Enzyme, ℓ; mode=EnzymeTestMode()) - ∇ℓ = @test_logs (:info, "keyword argument `shadow` is ignored in reverse mode") ADgradient(:Enzyme, ℓ; shadow = (1,)) - @test ∇ℓ.shadow === nothing end @testset "AD via FiniteDifferences" begin ℓ = TestLogDensity(test_logdensity1) ∇ℓ = ADgradient(:FiniteDifferences, ℓ) - @test contains(repr(∇ℓ), "FiniteDifferences AD wrapper for " * repr(ℓ)) @test dimension(∇ℓ) == 3 @test capabilities(∇ℓ) ≡ LogDensityOrder(1) - for _ in 1:100 - x = randn(3) + for _ = 1:100 + x = randexp(3) @test @inferred(logdensity(∇ℓ, x)) ≅ test_logdensity1(x) @test ≅(logdensity_and_gradient(∇ℓ, x), (test_logdensity1(x), test_gradient(x)); atol = 1e-5) end @@ -280,9 +260,3 @@ end msg = "Don't know how to AD with Foo, consider `import Foo` if there is such a package." @test_logs((:info, msg), @test_throws(MethodError, ADgradient(:Foo, TestLogDensity2()))) end - -@testset "benchmark ForwardDiff chunk size" begin - b = LogDensityProblemsAD.benchmark_ForwardDiff_chunks(TestLogDensity2()) - @test b isa Vector{Pair{Int,Float64}} - @test length(b) ≤ 20 -end