diff --git a/test/runtests.jl b/test/runtests.jl
index beed52c511..332475c953 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -8,2323 +8,11 @@ if isfile(preferences_file) && !isfile(test_preferences_file)
 end
 end
 
-using GPUCompiler
 using Enzyme
 using Test
-using FiniteDifferences
-using ForwardDiff
-using Aqua
-using Statistics
-using LinearAlgebra
 
 using Enzyme_jll
 @info "Testing against" Enzyme_jll.libEnzyme
 
-# Test against FiniteDifferences
-function test_scalar(f, x; rtol=1e-9, atol=1e-9, fdm=central_fdm(5, 1), kwargs...)
-    ∂x, = autodiff(Reverse, f, Active, Active(x))[1]
-    if typeof(x) <: Complex
-    else
-      @test isapprox(∂x, fdm(f, x); rtol=rtol, atol=atol, kwargs...)
-    end
-
-    rm = ∂x
-    if typeof(x) <: Integer
-        x = Float64(x)
-    end
-    ∂x, = autodiff(Forward, f, Duplicated(x, one(typeof(x))))
-    if typeof(x) <: Complex
-      @test ∂x ≈ rm
-    else
-      @test isapprox(∂x, fdm(f, x); rtol=rtol, atol=atol, kwargs...)
-    end
-end
-
-function test_matrix_to_number(f, x; rtol=1e-9, atol=1e-9, fdm=central_fdm(5, 1), kwargs...)
-    dx_fd = map(eachindex(x)) do i
-        fdm(x[i]) do xi
-            x2 = copy(x)
-            x2[i] = xi
-            f(x2)
-        end
-    end
-
-    dx = zero(x)
-    autodiff(Reverse, f, Active, Duplicated(x, dx))
-    @test isapprox(reshape(dx, length(dx)), dx_fd; rtol=rtol, atol=atol, kwargs...)
-
-    dx_fwd = map(eachindex(x)) do i
-        dx = zero(x)
-        dx[i] = 1
-        ∂x = autodiff(Forward, f, Duplicated(x, dx))
-        isempty(∂x) ? zero(eltype(dx)) : ∂x[1]
-    end
-    @test isapprox(dx_fwd, dx_fd; rtol=rtol, atol=atol, kwargs...)
-end
-
-Aqua.test_all(Enzyme, unbound_args=false, piracy=false)
-
-include("abi.jl")
-include("typetree.jl")
-
-@static if Enzyme.EnzymeRules.issupported()
-    include("rules.jl")
-    include("rrules.jl")
-    include("kwrules.jl")
-    include("kwrrules.jl")
-    @static if VERSION ≥ v"1.9-"
-        # XXX invalidation does not work on Julia 1.8
-        include("ruleinvalidation.jl")
-    end
-end
-
-f0(x) = 1.0 + x
-function vrec(start, x)
-    if start > length(x)
-        return 1.0
-    else
-        return x[start] * vrec(start+1, x)
-    end
-end
-
-@testset "Internal tests" begin
-    @assert Enzyme.Compiler.active_reg(Tuple{Float32,Float32,Int})
-    @assert !Enzyme.Compiler.active_reg(Tuple{NamedTuple{(), Tuple{}}, NamedTuple{(), Tuple{}}})
-    @assert !Enzyme.Compiler.active_reg(Base.RefValue{Float32})
-    world = codegen_world_age(typeof(f0), Tuple{Float64})
-    thunk_a = Enzyme.Compiler.thunk(Val(world), Const{typeof(f0)}, Active, Tuple{Active{Float64}}, Val(API.DEM_ReverseModeCombined), Val(1), Val((false, false)), Val(false), Val(false), DefaultABI)
-    thunk_b = Enzyme.Compiler.thunk(Val(world), Const{typeof(f0)}, Const, Tuple{Const{Float64}}, Val(API.DEM_ReverseModeCombined), Val(1), Val((false, false)), Val(false), Val(false), DefaultABI)
-    thunk_c = Enzyme.Compiler.thunk(Val(world), Const{typeof(f0)}, Active{Float64}, Tuple{Active{Float64}}, Val(API.DEM_ReverseModeCombined), Val(1), Val((false, false)), Val(false), Val(false), DefaultABI)
-    thunk_d = Enzyme.Compiler.thunk(Val(world), Const{typeof(f0)}, Active{Float64}, Tuple{Active{Float64}}, Val(API.DEM_ReverseModeCombined), Val(1), Val((false, false)), Val(false), Val(false), DefaultABI)
-    @test thunk_a.adjoint !== thunk_b.adjoint
-    @test thunk_c.adjoint === thunk_a.adjoint
-    @test thunk_c.adjoint === thunk_d.adjoint
-
-    @test thunk_a(Const(f0), Active(2.0), 1.0) == ((1.0,),)
-    @test thunk_a(Const(f0), Active(2.0), 2.0) == ((2.0,),)
-    @test thunk_b(Const(f0), Const(2.0)) === ((nothing,),)
-
-    forward, pullback = Enzyme.Compiler.thunk(Val(world), Const{typeof(f0)}, Active, Tuple{Active{Float64}}, Val(Enzyme.API.DEM_ReverseModeGradient), Val(1), Val((false, false)), Val(false), Val(false), DefaultABI)
-
-    @test forward(Const(f0), Active(2.0)) == (nothing,nothing,nothing)
-    @test pullback(Const(f0), Active(2.0), 1.0, nothing) == ((1.0,),)
-
-    function mul2(x)
-        x[1] * x[2]
-    end
-    d = Duplicated([3.0, 5.0], [0.0, 0.0])
-
-    world = codegen_world_age(typeof(mul2), Tuple{Vector{Float64}})
-    forward, pullback = Enzyme.Compiler.thunk(Val(world), Const{typeof(mul2)}, Active, Tuple{Duplicated{Vector{Float64}}}, Val(Enzyme.API.DEM_ReverseModeGradient), Val(1), Val((false, true)), Val(false), Val(false), DefaultABI)
-    res = forward(Const(mul2), d)
-    @test typeof(res[1]) == Tuple{Float64, Float64}
-    pullback(Const(mul2), d, 1.0, res[1])
-    @test d.dval[1] ≈ 5.0
-    @test d.dval[2] ≈ 3.0
-
-    d = Duplicated([3.0, 5.0], [0.0, 0.0])
-    world = codegen_world_age(typeof(vrec), Tuple{Int, Vector{Float64}})
-    forward, pullback = Enzyme.Compiler.thunk(Val(world), Const{typeof(vrec)}, Active, Tuple{Const{Int}, Duplicated{Vector{Float64}}}, Val(Enzyme.API.DEM_ReverseModeGradient), Val(1), Val((false, false, true)), Val(false), Val(false), DefaultABI)
-    res = forward(Const(vrec), Const(Int(1)), d)
-    pullback(Const(vrec), Const(1), d, 1.0, res[1])
-    @test d.dval[1] ≈ 5.0
-    @test d.dval[2] ≈ 3.0
-
-    # @test thunk_split.primal !== C_NULL
-    # @test thunk_split.primal !== thunk_split.adjoint
-    # @test thunk_a.adjoint !== thunk_split.adjoint
-end
-
-@testset "Reflection" begin
-    Enzyme.Compiler.enzyme_code_typed(Active, Tuple{Active{Float64}}) do x
-        x ^ 2
-    end
-    sprint() do io
-        Enzyme.Compiler.enzyme_code_native(io, f0, Active, Tuple{Active{Float64}})
-    end
-
-    sprint() do io
-        Enzyme.Compiler.enzyme_code_llvm(io, f0, Active, Tuple{Active{Float64}})
-    end
-end
-
-
-# @testset "Split Tape" begin
-#     f(x) = x[1] * x[1]
-
-#     thunk_split = Enzyme.Compiler.thunk(f, Tuple{Duplicated{Array{Float64,1}}}, Val(Enzyme.API.DEM_ReverseModeGradient))
-#     @test thunk_split.primal !== C_NULL
-#     @test thunk_split.primal !== thunk_split.adjoint
-# end
-
-make3() = (1.0, 2.0, 3.0)
-
-
-@testset "Simple tests" begin
-    f1(x) = 1.0 + x
-    f2(x) = x*x
-    @test autodiff(Reverse, f1, Active, Active(1.0))[1][1] ≈ 1.0
-    @test autodiff(Forward, f1, DuplicatedNoNeed, Duplicated(1.0, 1.0))[1] ≈ 1.0
-    @test autodiff(Forward, f1, Duplicated, Duplicated(1.0, 1.0))[2] ≈ 1.0
-    @test autodiff(Reverse, f2, Active, Active(1.0))[1][1] ≈ 2.0
-    @test autodiff(Forward, f2, Duplicated(1.0, 1.0))[1] ≈ 2.0
-    tup = autodiff(Forward, f2, BatchDuplicated(1.0, (1.0, 2.0, 3.0)))[1]
-    @test tup[1] ≈ 2.0
-    @test tup[2] ≈ 4.0
-    @test tup[3] ≈ 6.0
-    tup = autodiff(Forward, f2, BatchDuplicatedFunc{Float64, 3, typeof(make3)}(1.0))[1]
-    @test tup[1] ≈ 2.0
-    @test tup[2] ≈ 4.0
-    @test tup[3] ≈ 6.0
-    @test autodiff(Reverse, tanh, Active, Active(1.0))[1][1] ≈ 0.41997434161402606939
-    @test autodiff(Forward, tanh, Duplicated(1.0, 1.0))[1] ≈ 0.41997434161402606939
-    @test autodiff(Reverse, tanh, Active, Active(1.0f0))[1][1] ≈ Float32(0.41997434161402606939)
-    @test autodiff(Forward, tanh, Duplicated(1.0f0, 1.0f0))[1] ≈ Float32(0.41997434161402606939)
-
-    for T in (Float64, Float32, Float16)
-        res = autodiff(Reverse, tanh, Active, Active(T(1.0)))[1][1]
-        @test res isa T
-        @test res ≈ T(0.41997434161402606939)
-        res = autodiff(Forward, tanh, Duplicated(T(1.0), T(1.0)))[1]
-        @test res isa T
-        @test res ≈ T(0.41997434161402606939)
-    end
-
-    test_scalar(f1, 1.0)
-    test_scalar(f2, 1.0)
-    test_scalar(log2, 1.0)
-    test_scalar(log1p, 1.0)
-
-    test_scalar(log10, 1.0)
-    test_scalar(Base.acos, 0.9)
-
-    test_scalar(Base.atan, 0.9)
-
-    res = autodiff(Reverse, Base.atan, Active, Active(0.9), Active(3.4))[1]
-    @test res[1] ≈ ForwardDiff.derivative(x->Base.atan(x, 3.4), 0.9)
-    @test res[2] ≈ ForwardDiff.derivative(x->Base.atan(0.9, x), 3.4)
-
-    test_scalar(cbrt, 1.0)
-    test_scalar(cbrt, 1.0f0; rtol = 1.0e-5, atol = 1.0e-5)
-    test_scalar(Base.sinh, 1.0)
-    test_scalar(Base.cosh, 1.0)
-    test_scalar(Base.sinc, 2.2)
-    test_scalar(Base.FastMath.sinh_fast, 1.0)
-    test_scalar(Base.FastMath.cosh_fast, 1.0)
-    test_scalar(Base.FastMath.exp_fast, 1.0)
-    test_scalar(Base.exp10, 1.0)
-    test_scalar(Base.exp2, 1.0)
-    test_scalar(Base.expm1, 1.0)
-    test_scalar(x->rem(x, 1), 0.7)
-    test_scalar(x->rem2pi(x,RoundDown), 0.7)
-    test_scalar(x->fma(x,x+1,x/3), 2.3)
-    
-    @test autodiff(Forward, sincos, Duplicated(1.0, 1.0))[1][1] ≈ cos(1.0)
-
-    @test autodiff(Reverse, (x)->log(x), Active(2.0)) == ((0.5,),)
-end
-
-@testset "Simple Exception" begin
-    f_simple_exc(x, i) = ccall(:jl_, Cvoid, (Any,), x[i])
-    y = [1.0, 2.0]
-    f_x = zero.(y)
-    @test_throws BoundsError autodiff(Reverse, f_simple_exc, Duplicated(y, f_x), 0)
-end
-
-
-@testset "Duplicated" begin
-    x = Ref(1.0)
-    y = Ref(2.0)
-
-    ∇x = Ref(0.0)
-    ∇y = Ref(0.0)
-
-    autodiff(Reverse, (a,b)->a[]*b[], Active, Duplicated(x, ∇x), Duplicated(y, ∇y))
-
-    @test ∇y[] == 1.0
-    @test ∇x[] == 2.0
-end
-
-@testset "Simple tests" begin
-    g(x) = real((x + im)*(1 - im*x))
-    @test first(autodiff(Reverse, g, Active, Active(2.0))[1]) ≈ 2.0
-    @test first(autodiff(Forward, g, Duplicated(2.0, 1.0))) ≈ 2.0
-    @test first(autodiff(Reverse, g, Active, Active(3.0))[1]) ≈ 2.0
-    @test first(autodiff(Forward, g, Duplicated(3.0, 1.0))) ≈ 2.0
-    test_scalar(g, 2.0)
-    test_scalar(g, 3.0)
-end
-
-@testset "Base functions" begin
-    f1(x) = prod(ntuple(i -> i * x, 3))
-    @test autodiff(Reverse, f1, Active, Active(2.0))[1][1] == 72
-    @test autodiff(Forward, f1, Duplicated(2.0, 1.0))[1]   == 72
-
-    f2(x) = x * something(nothing, 2)
-    @test autodiff(Reverse, f2, Active, Active(1.0))[1][1] == 2
-    @test autodiff(Forward, f2, Duplicated(1.0, 1.0))[1]   == 2
-
-    f3(x) = x * sum(unique([x, 2.0, 2.0, 3.0]))
-    @test autodiff(Reverse, f3, Active, Active(1.0))[1][1] == 7
-    @test autodiff(Forward, f3, Duplicated(1.0, 1.0))[1]   == 7
-
-    for rf in (reduce, foldl, foldr)
-        f4(x) = rf(*, [1.0, x, x, 3.0])
-        @test autodiff(Reverse, f4, Active, Active(2.0))[1][1] == 12
-        @test autodiff(Forward, f4, Duplicated(2.0, 1.0))[1]   == 12
-    end
-
-    f5(x) = sum(accumulate(+, [1.0, x, x, 3.0]))
-    @test autodiff(Reverse, f5, Active, Active(2.0))[1][1] == 5
-    @test autodiff(Forward, f5, Duplicated(2.0, 1.0))[1]   == 5
-
-    f6(x) = x |> inv |> abs
-    @test autodiff(Reverse, f6, Active, Active(-2.0))[1][1] == 1/4
-    @test autodiff(Forward, f6, Duplicated(-2.0, 1.0))[1]   == 1/4
-
-    f7(x) = (inv ∘ abs)(x)
-    @test autodiff(Reverse, f7, Active, Active(-2.0))[1][1] == 1/4
-    @test autodiff(Forward, f7, Duplicated(-2.0, 1.0))[1]   == 1/4
-
-    f8(x) = x * count(i -> i > 1, [0.5, x, 1.5])
-    @test autodiff(Reverse, f8, Active, Active(2.0))[1][1] == 2
-    @test autodiff(Forward, f8, Duplicated(2.0, 1.0))[1]   == 2
-
-    function f9(x)
-        y = []
-        foreach(i -> push!(y, i^2), [1.0, x, x])
-        return sum(y)
-    end
-    @test autodiff(Reverse, f9, Active, Active(2.0))[1][1] == 8
-    @test autodiff(Forward, f9, Duplicated(2.0, 1.0))[1]   == 8
-
-    f10(x) = hypot(x, 2x)
-    @test autodiff(Reverse, f10, Active, Active(2.0))[1][1] == sqrt(5)
-end
-
-@testset "Taylor series tests" begin
-
-# Taylor series for `-log(1-x)`
-# eval at -log(1-1/2) = -log(1/2)
-function euroad(f::T) where T
-    g = zero(T)
-    for i in 1:10^7
-        g += f^i / i
-    end
-    return g
-end
-
-euroad′(x) = first(autodiff(Reverse, euroad, Active, Active(x)))[1]
-
-@test euroad(0.5) ≈ -log(0.5) # -log(1-x)
-@test euroad′(0.5) ≈ 2.0 # d/dx -log(1-x) = 1/(1-x)
-test_scalar(euroad, 0.5)
-end
-
-@testset "Nested AD" begin
-    tonest(x,y) = (x + y)^2
-
-    @test autodiff(Forward, (x,y) -> autodiff_deferred(Forward, tonest, Duplicated(x, 1.0), Const(y))[1], Const(1.0), Duplicated(2.0, 1.0))[1] ≈ 2.0
-end
-
-@testset "Hessian" begin
-    function origf(x::Array{Float64}, y::Array{Float64})
-        y[1] = x[1] * x[1] + x[2] * x[1]
-        return nothing
-    end
-
-    function grad(x, dx, y, dy)
-      Enzyme.autodiff_deferred(Reverse, origf, Duplicated(x, dx), DuplicatedNoNeed(y, dy))
-      nothing
-    end
-
-    x = [2.0, 2.0]
-    y = Vector{Float64}(undef, 1)
-    dx = [0.0, 0.0]
-    dy = [1.0]
-
-    grad(x, dx, y, dy)
-
-    vx = ([1.0, 0.0], [0.0, 1.0])
-    hess = ([0.0, 0.0], [0.0, 0.0])
-    dx2 = [0.0, 0.0]
-    dy = [1.0]
-
-    Enzyme.autodiff(Enzyme.Forward, grad,
-                    Enzyme.BatchDuplicated(x, vx),
-                    Enzyme.BatchDuplicated(dx2, hess),
-                    Const(y),
-                    Const(dy))
-
-    @test dx ≈ dx2
-    @test hess[1][1] ≈ 2.0
-    @test hess[1][2] ≈ 1.0
-    @test hess[2][1] ≈ 1.0
-    @test hess[2][2] ≈ 0.0
-end
-
-@testset "Array tests" begin
-
-    function arsum(f::Array{T}) where T
-        g = zero(T)
-        for elem in f
-            g += elem
-        end
-        return g
-    end
-
-    inp = Float64[1.0, 2.0]
-    dinp = Float64[0.0, 0.0]
-    autodiff(Reverse, arsum, Active, Duplicated(inp, dinp))
-    @test inp ≈ Float64[1.0, 2.0]
-    @test dinp ≈ Float64[1.0, 1.0]
-
-    @test autodiff(Forward, arsum, Duplicated(inp, dinp))[1] ≈ 2.0
-
-    # On Julia 1.6 the gradients are wrong (1.0 too large) and on 1.7 it errors
-    @static if VERSION ≥ v"1.8-"
-        function f1(m)
-            s = 0.0
-            for (i, col) in enumerate(eachcol(m))
-                s += i * sum(col)
-            end
-            return s
-        end
-
-        m = Float64[1 2 3; 4 5 6; 7 8 9]
-        dm = zero(m)
-        autodiff(Reverse, f1, Active, Duplicated(m, dm))
-        @test dm == Float64[1 2 3; 1 2 3; 1 2 3]
-
-        function f2(m)
-            s = 0.0
-            for (i, col) in enumerate(eachrow(m))
-                s += i * sum(col)
-            end
-            return s
-        end
-
-        dm = zero(m)
-        autodiff(Reverse, f2, Active, Duplicated(m, dm))
-        @test dm == Float64[1 1 1; 2 2 2; 3 3 3]
-    end
-end
-
-@testset "Advanced array tests" begin
-    function arsum2(f::Array{T}) where T
-        return sum(f)
-    end
-    inp = Float64[1.0, 2.0]
-    dinp = Float64[0.0, 0.0]
-    autodiff(Reverse, arsum2, Active, Duplicated(inp, dinp))
-    @test inp ≈ Float64[1.0, 2.0]
-    @test dinp ≈ Float64[1.0, 1.0]
-
-    @test autodiff(Forward, arsum2, Duplicated(inp, dinp))[1] ≈ 2.0
-end
-
-@testset "Dict" begin
-    params = Dict{Symbol, Float64}()
-    dparams = Dict{Symbol, Float64}()
-
-    params[:var] = 10.0
-    dparams[:var] = 0.0
-
-    f_dict(params, x) = params[:var] * x
-
-    @test autodiff(Reverse, f_dict, Const(params), Active(5.0)) == ((nothing, 10.0,),)
-    @test autodiff(Reverse, f_dict, Duplicated(params, dparams), Active(5.0)) == ((nothing, 10.0,),)
-    @test dparams[:var] == 5.0
-
-
-    mutable struct MD
-        v::Float64
-        d::Dict{Symbol, MD}
-    end
-
-    # TODO without Float64 on return
-    # there is a potential phi bug
-    function sum_rec(d::Dict{Symbol,MD})::Float64
-        s = 0.0
-        for k in keys(d)
-            s += d[k].v
-            s += sum_rec(d[k].d)
-        end
-        return s
-    end
-
-    par = Dict{Symbol, MD}()
-    par[:var] = MD(10.0, Dict{Symbol, MD}())
-    par[:sub] = MD(2.0, Dict{Symbol, MD}(:a=>MD(3.0, Dict{Symbol, MD}())))
-
-    dpar = Dict{Symbol, MD}()
-    dpar[:var] = MD(0.0, Dict{Symbol, MD}())
-    dpar[:sub] = MD(0.0, Dict{Symbol, MD}(:a=>MD(0.0, Dict{Symbol, MD}())))
-
-    # TODO
-    # autodiff(Reverse, sum_rec, Duplicated(par, dpar))
-    # @show par, dpar, sum_rec(par)
-    # @test dpar[:var].v ≈ 1.0
-    # @test dpar[:sub].v ≈ 1.0
-    # @test dpar[:sub].d[:a].v ≈ 1.0
-end
-
-let
-    function loadsin(xp)
-        x = @inbounds xp[1]
-        @inbounds xp[1] = 0.0
-        sin(x)
-    end
-    global invsin
-    function invsin(xp)
-        xp = Base.invokelatest(convert, Vector{Float64}, xp)
-        loadsin(xp)
-    end
-    x = [2.0]
-end
-
-@testset "Struct return" begin
-    x = [2.0]
-    dx = [0.0]
-    @test Enzyme.autodiff(Reverse, invsin, Active, Duplicated(x, dx)) == ((nothing,),)
-    @test dx[1] == -0.4161468365471424
-end
-
-function grad_closure(f, x)
-    function noretval(x,res)
-        y = f(x)
-        copyto!(res,y)
-        return nothing
-    end
-    n = length(x)
-    dx = zeros(n)
-    y  = zeros(n)
-    dy = zeros(n)
-    dy[1] = 1.0
-
-    autodiff(Reverse, noretval, Duplicated(x,dx), Duplicated(y, dy))
-    return dx
-end
-
-@testset "Closure" begin
-    x = [2.0,6.0]
-    dx = grad_closure(x->[x[1], x[2]], x)
-    @test dx == [1.0, 0.0]
-end
-
-@testset "Advanced array tests sq" begin
-    function arsumsq(f::Array{T}) where T
-        return sum(f) * sum(f)
-    end
-    inp = Float64[1.0, 2.0]
-    dinp = Float64[0.0, 0.0]
-    autodiff(Reverse, arsumsq, Active, Duplicated(inp, dinp))
-    @test inp ≈ Float64[1.0, 2.0]
-    @test dinp ≈ Float64[6.0, 6.0]
-end
-
-@testset "Bithacks" begin
-    function fneg(x::Float64)
-        xptr = reinterpret(Int64, x)
-        y = Int64(-9223372036854775808)
-        out = y ⊻ xptr;
-        return reinterpret(Float64, out)
-    end
-    @test autodiff(Reverse, fneg, Active, Active(2.0))[1][1] ≈ -1.0
-    @test autodiff(Forward, fneg, Duplicated(2.0, 1.0))[1] ≈ -1.0
-    function expor(x::Float64)
-        xptr = reinterpret(Int64, x)
-        y = UInt64(4607182418800017408)
-        out = y | xptr;
-        return reinterpret(Float64, out)
-    end
-    @test autodiff(Reverse, expor, Active, Active(0.42))[1][1] ≈ 4.0
-    @test autodiff(Forward, expor, Duplicated(0.42, 1.0))[1] ≈ 4.0
-end
-
-@testset "Reshape Activity" begin
-    function f(x, bias)
-        mout = x + @inbounds vec(bias)[1]
-       sin(mout)
-    end
-
-    x  = [2.0,]
-
-    bias = Float32[0.0;;;]
-    res = Enzyme.autodiff(Reverse, f, Active, Active(x[1]), Const(bias))
-    
-    @test bias[1][1] ≈ 0.0
-    @test res[1][1] ≈ cos(x[1])
-end
-
-@testset "GC" begin
-    function gc_alloc(x)  # Basically g(x) = x^2
-        a = Array{Float64, 1}(undef, 10)
-        for n in 1:length(a)
-            a[n] = x^2
-        end
-        return mean(a)
-    end
-    @test autodiff(Reverse, gc_alloc, Active, Active(5.0))[1][1] ≈ 10
-    @test autodiff(Forward, gc_alloc, Duplicated(5.0, 1.0))[1] ≈ 10
-
-    A = Float64[2.0, 3.0]
-    B = Float64[4.0, 5.0]
-    dB = Float64[0.0, 0.0]
-    f = (X, Y) -> sum(X .* Y)
-    Enzyme.autodiff(Reverse, f, Active, A, Duplicated(B, dB))
-
-    function gc_copy(x)  # Basically g(x) = x^2
-        a = x * ones(10)
-        for n in 1:length(a)
-            a[n] = x^2
-        end
-        return mean(a)
-    end
-
-    @test Enzyme.autodiff(Reverse, gc_copy, Active, Active(5.0))[1][1] ≈ 10
-    @test Enzyme.autodiff(Forward, gc_copy, Duplicated(5.0, 1.0))[1] ≈ 10
-end
-
-@testset "Null init tape" begin
-    struct Leaf
-        params::NamedTuple
-    end
-
-    function LeafF(n::Leaf)::Float32
-        y = first(n.params.b2)
-        r = convert(Tuple{Float32}, (y,))
-        return r[1]
-    end
-
-    ps =
-        (
-            b2 = 1.0f0,
-        )
-
-    grads =
-        (
-            b2 = 0.0f0,
-        )
-
-    t1 = Leaf(ps)
-    t1Grads = Leaf(grads)
-
-    forward, pullback = Enzyme.autodiff_thunk(ReverseSplitModified(ReverseSplitNoPrimal, Val((false, true))), Const{typeof(LeafF)}, Active, Duplicated{Leaf})
-    tape, primal, shadow = forward(Const(LeafF), Duplicated(t1, t1Grads))
-
-
-    struct Foo2{X,Y}
-        x::X
-        y::Y
-    end
-
-    test_f(f::Foo2) = f.x^2
-    res = autodiff(Reverse, test_f, Active(Foo2(3.0, :two)))[1][1]
-    @test res.x ≈ 6.0
-    @test res.y == nothing
-end
-
-@testset "Null init union" begin
-    @noinline function unionret(itr, cond)
-        if cond
-            return Base._InitialValue()
-        else
-            return itr[1]
-        end
-    end
-
-    function fwdunion(data::Vector{Float64})::Real
-        unionret(data, false)
-    end
-
-    data = ones(Float64, 500)
-    ddata = zeros(Float64, 500)
-
-    forward, pullback = Enzyme.autodiff_thunk(ReverseSplitModified(ReverseSplitNoPrimal, Val((false, true))), Const{typeof(fwdunion)}, Active, Duplicated{Vector{Float64}})
-    tape, primal, shadow = forward(Const(fwdunion), Duplicated(data, ddata))
-
-	function firstimpl(itr)
-		v = firstfold(itr)
-		@assert !(v isa Base._InitialValue)
-		return v
-	end
-
-	function firstfold(itr)
-		op, itr = Base._xfadjoint(Base.BottomRF(Base.add_sum), Base.Generator(Base.identity, itr))
-		y = iterate(itr)
-		init = Base._InitialValue()
-		y === nothing && return init
-		v = op(init, y[1])
-		return v
-	end
-
-	function smallrf(weights::Vector{Float64}, data::Vector{Float64})::Float64
-		itr1 = (weight for (weight, mean) in zip(weights, weights))
-
-		itr2 = (firstimpl(itr1) for x in data)
-
-		firstimpl(itr2)
-	end
-
-	data = ones(Float64, 1)
-
-	weights = [0.2]
-	dweights = [0.0]
-    # Technically this test doesn't need runtimeactivity since the closure combo of active itr1 and const data
-    # doesn't use any of the const data values, but now that we error for activity confusion, we need to
-    # mark runtimeActivity to let this pass
-    Enzyme.API.runtimeActivity!(true)
-	Enzyme.autodiff(Enzyme.Reverse, smallrf, Enzyme.Duplicated(weights, dweights), Enzyme.Const(data))
-    @test dweights[1] ≈ 1.
-
-    function invokesum(weights::Vector{Float64}, data::Vector{Float64})::Float64
-        sum(
-            sum(
-                weight
-                for (weight, mean) in zip(weights, weights)
-            )
-            for x in data
-        )
-    end
-
-    data = ones(Float64, 20)
-
-    weights = [0.2, 0.8]
-    dweights = [0.0, 0.0]
-
-    Enzyme.autodiff(Enzyme.Reverse, invokesum, Enzyme.Duplicated(weights, dweights), Enzyme.Const(data))
-    Enzyme.API.runtimeActivity!(false)
-    @test dweights[1] ≈ 20.
-    @test dweights[2] ≈ 20.
-end
-
-# dot product (https://github.com/EnzymeAD/Enzyme.jl/issues/495)
-@testset "Dot product" for T in (Float32, Float64)
-    xx = rand(T, 10)
-    grads = zeros(T, size(xx))
-    autodiff(Reverse, (y) -> mapreduce(x -> x*x, +, y), Duplicated(xx, grads))
-    @test xx .* 2 == grads
-
-    xx = rand(T, 10)
-    grads = zeros(T, size(xx))
-    autodiff(Reverse, (x) -> sum(x .* x), Duplicated(xx, grads))
-    @test xx .* 2 == grads
-
-    xx = rand(T, 10)
-    grads = zeros(T, size(xx))
-    autodiff(Reverse, (x) -> x' * x, Duplicated(xx, grads))
-    @test xx .* 2 == grads
-end
-
-@testset "Compare against" begin
-    x = 3.0
-    fd = central_fdm(5, 1)(sin, x)
-
-    @test fd ≈ ForwardDiff.derivative(sin, x)
-    @test fd ≈ first(autodiff(Reverse, sin, Active, Active(x)))[1]
-    @test fd ≈ first(autodiff(Forward, sin, Duplicated(x, 1.0)))
-
-    x = 0.2 + sin(3.0)
-    fd = central_fdm(5, 1)(asin, x)
-
-    @test fd ≈ ForwardDiff.derivative(asin, x)
-    @test fd ≈ first(autodiff(Reverse, asin, Active, Active(x)))[1]
-    @test fd ≈ first(autodiff(Forward, asin, Duplicated(x, 1.0)))
-    test_scalar(asin, x)
-
-    function foo(x)
-        a = sin(x)
-        b = 0.2 + a
-        c = asin(b)
-        return c
-    end
-
-    x = 3.0
-    fd = central_fdm(5, 1)(foo, x)
-
-    @test fd ≈ ForwardDiff.derivative(foo, x)
-    @test fd ≈ first(autodiff(Reverse, foo, Active, Active(x)))[1]
-    @test fd ≈ first(autodiff(Forward, foo, Duplicated(x, 1.0)))
-    test_scalar(foo, x)
-
-    # Input type shouldn't matter
-    x = 3
-    @test fd ≈ ForwardDiff.derivative(foo, x)
-    @test fd ≈ first(autodiff(Reverse, foo, Active, Active(x)))[1]
-    # They do matter for duplicated, which can't be auto promoted
-    # @test fd ≈ first(autodiff(Forward, foo, Duplicated(x, 1)))
-
-    f74(a, c) = a * √c
-    @test √3 ≈ first(autodiff(Reverse, f74, Active, Active(2), 3))[1]
-    @test √3 ≈ first(autodiff(Forward, f74, Duplicated(2.0, 1.0), 3))
-end
-
-@testset "SinCos" begin
-	function sumsincos(theta)
-		a, b = sincos(theta)
-		return a + b
-	end
-    test_scalar(sumsincos, 1.0, rtol=1e-5, atol=1e-5)
-end
-
-@testset "BoxFloat" begin
-    function boxfloat(x)
-        x = ccall(:jl_box_float64, Any, (Float64,), x)
-        (sin(x)::Float64 + x)::Float64
-    end
-    @test 0.5838531634528576 ≈ Enzyme.autodiff(Reverse, boxfloat, Active, Active(2.0))[1][1]
-    @test 0.5838531634528576 ≈ Enzyme.autodiff(Forward, boxfloat, DuplicatedNoNeed, Duplicated(2.0, 1.0))[1]
-    res = Enzyme.autodiff(Forward, boxfloat, BatchDuplicatedNoNeed, BatchDuplicated(2.0, (1.0, 2.0)))[1]
-    @test 0.5838531634528576 ≈ res[1]
-    @test 1.1677063269057153 ≈ res[2]
-end
-
-"""
-    J(ν, z) := ∑ (−1)^k / Γ(k+1) / Γ(k+ν+1) * (z/2)^(ν+2k)
-"""
-function mybesselj(ν, z, atol=1e-8)
-    k = 0
-    s = (z/2)^ν / factorial(ν)
-    out = s
-    while abs(s) > atol
-        k += 1
-        s *= (-1) / k / (k+ν) * (z/2)^2
-        out += s
-    end
-    out
-end
-mybesselj0(z) = mybesselj(0, z)
-mybesselj1(z) = mybesselj(1, z)
-
-@testset "Bessel" begin
-    autodiff(Reverse, mybesselj, Active, Const(0), Active(1.0))
-    autodiff(Reverse, mybesselj, Active, 0, Active(1.0))
-    autodiff(Forward, mybesselj, Const(0), Duplicated(1.0, 1.0))
-    autodiff(Forward, mybesselj, 0, Duplicated(1.0, 1.0))
-    @testset "besselj0/besselj1" for x in (1.0, -1.0, 0.0, 0.5, 10, -17.1,) # 1.5 + 0.7im)
-        test_scalar(mybesselj0, x, rtol=1e-5, atol=1e-5)
-        test_scalar(mybesselj1, x, rtol=1e-5, atol=1e-5)
-    end
-end
-
-# Ensure that this returns an error, and not a crash
-# https://github.com/EnzymeAD/Enzyme.jl/issues/368
-abstract type TensorProductBasis <: Function end
-
-struct LegendreBasis <: TensorProductBasis
-    n::Int
-end
-
-function (basis::LegendreBasis)(x)
-    return x
-end
-
-struct MyTensorLayer
-    model::Array{TensorProductBasis}
-end
-
-function fn(layer::MyTensorLayer, x)
-    model = layer.model
-    return model[1](x)
-end
-
-const nn = MyTensorLayer([LegendreBasis(10)])
-
-function dxdt_pred(x)
-  return fn(nn, x)
-end
-
-@testset "AbstractType calling convention" begin
-    # TODO get rid of runtime activity
-    Enzyme.API.runtimeActivity!(true)
-    @test 1.0 ≈ Enzyme.autodiff(Reverse, dxdt_pred, Active(1.0))[1][1]
-    Enzyme.API.runtimeActivity!(false)
-end
-
-
-mutable struct RTGData
-	x
-end
-
-@noinline function rtg_sub(V, cv)
-	return cv
-end
-
-@noinline function rtg_cast(cv)
-	return cv
-end
-
-function rtg_f(V,@nospecialize(cv))
-	s = rtg_sub(V, Base.inferencebarrier(cv))::RTGData
-	s = rtg_cast(Base.inferencebarrier(s.x))::Float64
-	return s
-end
-
-@testset "RuntimeActivity generic call" begin
-    Enzyme.API.runtimeActivity!(true)
-    res = autodiff(Forward, rtg_f, Duplicated, Duplicated([0.2], [1.0]), Const(RTGData(3.14)))
-    @test 3.14 ≈ res[1]
-    @test 0.0 ≈ res[2]
-    Enzyme.API.runtimeActivity!(false)
-end
-
-@inline function myquantile(v::AbstractVector, p::Real; alpha)
-    n = length(v)
-    
-    m = 1.0 + p * (1.0 - alpha - 1.0)
-    aleph = n*p + oftype(p, m)
-    j = clamp(trunc(Int, aleph), 1, n-1)
-    γ = clamp(aleph - j, 0, 1)
-
-    if n == 1
-        a = @inbounds v[1]
-        b = @inbounds v[1]
-    else
-        a = @inbounds v[j]
-        b = @inbounds v[j + 1]
-    end
-    
-    return a + γ*(b-a)
-end
-
-function fquantile(x)
-    v = [1.0, x]
-    return @inbounds (map(y->myquantile(v, y, alpha=1.), [0.7]))[1]
-end
-
-@testset "Attributor issues" begin
-
-    cor = fquantile(2.0)
-    res = autodiff(Forward, fquantile, Duplicated,Duplicated(2.0, 1.0))
-    @test cor ≈ res[1]
-    @test 0.7 ≈ res[2]
-
-end
-
-## https://github.com/JuliaDiff/ChainRules.jl/tree/master/test/rulesets
-if !Sys.iswindows()
-    include("packages/specialfunctions.jl")
-end
-
-@testset "Threads" begin
-    cmd = `$(Base.julia_cmd()) --threads=1 --startup-file=no threads.jl`
-   	@test success(pipeline(cmd, stderr=stderr, stdout=stdout))
-    cmd = `$(Base.julia_cmd()) --threads=2 --startup-file=no threads.jl`
-   	@test success(pipeline(cmd, stderr=stderr, stdout=stdout))
-end
-
-@testset "DiffTest" begin
-    include("DiffTests.jl")
-
-    n = 1 + rand()
-    x, y = 1 .+ rand(5, 5), 1 .+ rand(5)
-    A, B = 1 .+ rand(5, 5), 1 .+ rand(5, 5)
-
-    # f returns Number
-    @testset "Number to Number" for f in DiffTests.NUMBER_TO_NUMBER_FUNCS
-        test_scalar(f, n)
-    end
-
-    @testset "Vector to Number" for f in DiffTests.VECTOR_TO_NUMBER_FUNCS
-        test_matrix_to_number(f, y; rtol=1e-6, atol=1e-6)
-    end
-
-    @testset "Matrix to Number" for f in DiffTests.MATRIX_TO_NUMBER_FUNCS
-        test_matrix_to_number(f, x; rtol=1e-6, atol=1e-6)
-    end
-
-    # TODO(vchuravy/wsmoses): Enable these tests
-    # for f in DiffTests.TERNARY_MATRIX_TO_NUMBER_FUNCS
-    #     @test isa(f(A, B, x), Number)
-    # end
-
-    # # f returns Array
-
-    # for f in DiffTests.NUMBER_TO_ARRAY_FUNCS
-    #     @test isa(f(n), Array)
-    # end
-
-    # for f in DiffTests.ARRAY_TO_ARRAY_FUNCS
-    #     @test isa(f(A), Array)
-    #     @test isa(f(y), Array)
-    # end
-
-    # for f in DiffTests.MATRIX_TO_MATRIX_FUNCS
-    #     @test isa(f(A), Array)
-    # end
-
-    # for f in DiffTests.BINARY_MATRIX_TO_MATRIX_FUNCS
-    #     @test isa(f(A, B), Array)
-    # end
-
-    # # f! returns Nothing
-
-    # for f! in DiffTests.INPLACE_ARRAY_TO_ARRAY_FUNCS
-    #     @test isa(f!(y, x), Nothing)
-    # end
-
-    # for f! in DiffTests.INPLACE_NUMBER_TO_ARRAY_FUNCS
-    #     @test isa(f!(y, n), Nothing)
-    # end
-
-end
-
-@testset "IO" begin
-
-    function printsq(x)
-        println(x)
-        x*x
-    end
-
-    @test 4.6 ≈ first(autodiff(Reverse, printsq, Active, Active(2.3)))[1]
-    @test 4.6 ≈ first(autodiff(Forward, printsq, Duplicated(2.3, 1.0)))
-
-    function tostring(x)
-        string(x)
-        x*x
-    end
-
-    @test 4.6 ≈ first(autodiff(Reverse, tostring, Active, Active(2.3)))[1]
-    @test 4.6 ≈ first(autodiff(Forward, tostring, Duplicated(2.3, 1.0)))
-end
-
-@testset "hmlstm" begin
-    sigm(x)  = @fastmath 1 / (1 + exp(-x))
-    @fastmath function hmlstm_update_c_scalar(z, zb, c, f, i, g)
-        if z == 1.0f0 # FLUSH
-            return sigm(i) * tanh(g)
-        elseif zb == 0.0f0 # COPY
-            return c
-        else # UPDATE
-            return sigm(f) * c + sigm(i) * tanh(g)
-        end
-    end
-
-    N = 64
-    Z = round.(rand(Float32, N))
-    Zb = round.(rand(Float32, N))
-    C = rand(Float32, N, N)
-    F = rand(Float32, N, N)
-    I = rand(Float32, N, N)
-    G = rand(Float32, N, N)
-
-    function broadcast_hmlstm(out, Z, Zb, C, F, I, G)
-        out .= hmlstm_update_c_scalar.(Z, Zb, C, F, I, G)
-        return nothing
-    end
-
-    ∇C = zeros(Float32, N, N)
-    ∇F = zeros(Float32, N, N)
-    ∇I = zeros(Float32, N, N)
-    ∇G = zeros(Float32, N, N)
-
-    # TODO(wsmoses): Check after updating Enzyme_jll
-    # autodiff(broadcast_hmlstm, Const,
-    #          Const(zeros(Float32, N, N)), Const(Z), Const(Zb),
-    #          Duplicated(C, ∇C), Duplicated(F, ∇F), Duplicated(I, ∇I), Duplicated(G, ∇G))
-    # fwddiff(broadcast_hmlstm, Const,
-    #          Const(zeros(Float32, N, N)), Const(Z), Const(Zb),
-    #          Duplicated(C, ∇C), Duplicated(F, ∇F), Duplicated(I, ∇I), Duplicated(G, ∇G))
-end
-
-@testset "No speculation" begin
-	mutable struct SpecFoo
-
-		iters::Int
-		a::Float64
-		b::Vector{Float64}
-
-	end
-
-	function f(Foo)
-		for i = 1:Foo.iters
-
-			c = -1.0
-
-			if Foo.a < 0.0
-				X = (-Foo.a)^0.25
-				c = 2*log(X)
-			end
-
-			# set b equal to desired result
-			Foo.b[1] = 1.0 / c
-
-			return nothing
-		end
-	end
-
-	foo  = SpecFoo(1, 1.0, zeros(Float64, 1))
-	dfoo = SpecFoo(0, 0.0, zeros(Float64, 1))
-
-	# should not throw a domain error, which
-	# will occur if the pow is mistakenly speculated
-	Enzyme.autodiff(Reverse, f, Duplicated(foo, dfoo))
-end
-
-genlatestsin(x)::Float64 = Base.invokelatest(sin, x)
-function genlatestsinx(xp)
-    x = @inbounds xp[1]
-    @inbounds xp[1] = 0.0
-    Base.invokelatest(sin, x)::Float64 + 1
-end
-
-function loadsin(xp)
-    x = @inbounds xp[1]
-    @inbounds xp[1] = 0.0
-    sin(x)
-end
-function invsin(xp)
-    xp = Base.invokelatest(convert, Vector{Float64}, xp)
-    loadsin(xp)
-end
-
-@testset "generic" begin
-    @test -0.4161468365471424 ≈ Enzyme.autodiff(Reverse, genlatestsin, Active, Active(2.0))[1][1]
-    @test -0.4161468365471424 ≈ Enzyme.autodiff(Forward, genlatestsin, Duplicated(2.0, 1.0))[1]
-
-    x = [2.0]
-    dx = [0.0]
-    Enzyme.autodiff(Reverse, genlatestsinx, Active, Duplicated(x, dx))
-    @test 0 ≈ x[1]
-    @test -0.4161468365471424 ≈ dx[1]
-
-    x = [2.0]
-    dx = [0.0]
-    Enzyme.autodiff(Reverse, invsin, Active, Duplicated(x, dx))
-    @test 0 ≈ x[1]
-    @test -0.4161468365471424 ≈ dx[1]
-
-	function inactive_gen(x)
-		n = 1
-		for k in 1:2
-			y = falses(n)
-		end
-		return x
-	end
-    @test 1.0 ≈ Enzyme.autodiff(Reverse, inactive_gen, Active, Active(1E4))[1][1]
-	@test 1.0 ≈ Enzyme.autodiff(Forward, inactive_gen, Duplicated(1E4, 1.0))[1]
-end
-
-@testset "invoke" begin
-    @noinline apply(@nospecialize(func)) = func()
-
-    function invtest(arr)
-        function f()
-           arr[1] *= 5.0
-           nothing
-        end
-        apply(f)
-    end
-
-    x  = [2.0]
-    dx = [1.0]
-
-    Enzyme.autodiff(Reverse, invtest, Duplicated(x, dx))
-
-    @test 10.0 ≈ x[1]
-    @test 5.0 ≈ dx[1]
-end
-
-@testset "Batch Generics" begin
-    function mul2ip(y)
-        y[1] *= 2
-        return nothing
-    end
-
-    function fwdlatestfooip(y)
-        Base.invokelatest(mul2ip, y)
-    end
-
-    x = [1.0, 2.0, 3.0]
-    dx = [1.0, 1.0, 1.0]
-    dx2 = [10.0, 20.0, 30.0]
-
-    res = Enzyme.autodiff(Forward, fwdlatestfooip, Const, BatchDuplicated(x, (dx, dx2)))
-    @test 2.0 ≈ dx[1]
-    @test 20.0 ≈ dx2[1]
-
-    function mul2(y)
-        return y[1] * 2
-    end
-
-    function fwdlatestfoo(y)
-        Base.invokelatest(mul2, y)
-    end
-
-    x = [1.0, 2.0, 3.0]
-    dx = [1.0, 1.0, 1.0]
-    dx2 = [10.0, 20.0, 30.0]
-
-    res = Enzyme.autodiff(Forward, fwdlatestfoo, BatchDuplicated, BatchDuplicated(x, (dx, dx2)))
-
-    @test 2.0 ≈ res[1][1]
-    @test 2.0 ≈ res[2][1]
-    @test 20.0 ≈ res[2][2]
-
-    res = Enzyme.autodiff(Forward, fwdlatestfoo, BatchDuplicatedNoNeed, BatchDuplicated(x, (dx, dx2)))
-    @test 2.0 ≈ res[1][1]
-    @test 20.0 ≈ res[1][2]
-
-
-    function revfoo(out, x)
-        out[] = x*x
-        nothing
-    end
-
-    out = Ref(0.0)
-    dout = Ref(1.0)
-    dout2 = Ref(10.0)
-
-    res = Enzyme.autodiff(Reverse, revfoo, BatchDuplicated(out, (dout, dout2)), Active(2.0))[1][2]
-    @test 4.0 ≈ res[1]
-    @test 40.0 ≈ res[2]
-    @test 0.0 ≈ dout[]
-    @test 0.0 ≈ dout2[]
-
-    out = Ref(0.0)
-    dout = Ref(1.0)
-    dout2 = Ref(10.0)
-
-    function rev_lq(y)
-        return y * y
-    end
-    function revfoo2(out, x)
-        out[] = Base.invokelatest(rev_lq, x)::Float64
-        nothing
-    end
-    res = Enzyme.autodiff(Reverse, revfoo2, BatchDuplicated(out, (dout, dout2)), Active(2.0))[1][2]
-    @test 4.0 ≈ res[1]
-    @test 40.0 ≈ res[2]
-    @test 0.0 ≈ dout[]
-    @test 0.0 ≈ dout2[]
-
-end
-
-@testset "Dynamic Val Construction" begin
-
-    dyn_f(::Val{D}) where D = prod(D)
-    dyn_mwe(x, t) = x / dyn_f(Val(t))
-
-    @test 0.5 ≈ Enzyme.autodiff(Reverse, dyn_mwe, Active, Active(1.0), Const((1, 2)))[1][1]
-end
-
-@testset "broadcast" begin
-    A = rand(10); B = rand(10); R = similar(A)
-    dA = zero(A); dB = zero(B); dR = fill!(similar(R), 1)
-
-    function foo_bc!(R, A, B)
-        R .= A .+ B
-        return nothing
-    end
-
-    autodiff(Reverse, foo_bc!, Const, Duplicated(R, dR), Duplicated(A, dA), Duplicated(B, dB))
-
-    # works since aliasing is "simple"
-    autodiff(Reverse, foo_bc!, Const, Duplicated(R, dR), Duplicated(R, dR), Duplicated(B, dB))
-
-    A = rand(10,10); B = rand(10, 10)
-    dA = zero(A); dB = zero(B); dR = fill!(similar(A), 1)
-
-    autodiff(Reverse, foo_bc!, Const, Duplicated(A, dR), Duplicated(transpose(A), transpose(dA)), Duplicated(B, dB))
-end
-
-
-@testset "DuplicatedReturn" begin
-    moo(x) = fill(x, 10)
-
-    @test_throws ErrorException autodiff(Reverse, moo, Active(2.1))
-    fo, = autodiff(Forward, moo, Duplicated(2.1, 1.0))
-    for i in 1:10
-        @test 1.0 ≈ fo[i]
-    end
-
-    @test_throws ErrorException autodiff(Forward, x->x, Active(2.1))
-end
-
-@testset "Mismatched return" begin
-    @test_throws ErrorException autodiff(Reverse, _->missing, Active, Active(2.1))
-    @test_throws ErrorException autodiff_deferred(Reverse, _->missing, Active, Active(2.1))
-end
-
-@testset "GCPreserve" begin
-    function f(x, y)
-        GC.@preserve x y begin
-            ccall(:memcpy, Cvoid,
-                (Ptr{Float64},Ptr{Float64},Csize_t), x, y, 8)
-        end
-        nothing
-    end
-    autodiff(Reverse, f, Duplicated([1.0], [0.0]), Duplicated([1.0], [0.0]))
-    autodiff(Forward, f, Duplicated([1.0], [0.0]), Duplicated([1.0], [0.0]))
-end
-
-@testset "Copy" begin
-    function advance(u_v_eta)
-        eta = copy(u_v_eta)
-        return @inbounds eta[1]
-    end
-
-    u_v_eta = [0.0]
-    ad_struct = [1.0]
-
-    autodiff(Reverse, advance, Active, Duplicated(u_v_eta, ad_struct))
-    @test ad_struct[1] ≈ 2.0
-
-    function advance2(u_v_eta)
-        eta = copy(u_v_eta)
-        return @inbounds eta[1][]
-    end
-
-    u_v_eta = [Ref(0.0)]
-    ad_struct = [Ref(1.0)]
-
-    autodiff(Reverse, advance2, Active, Duplicated(u_v_eta, ad_struct))
-    @test ad_struct[1][] ≈ 2.0
-
-
-    function incopy(u_v_eta, val, i)
-        eta = copy(u_v_eta)
-        eta[1] = val
-        return @inbounds eta[i]
-    end
-
-    u_v_eta = [0.0]
-
-    v = autodiff(Reverse, incopy, Active, Const(u_v_eta), Active(3.14), 1)[1][2]
-    @test v ≈ 1.0
-    @test u_v_eta[1] ≈ 0.0
-
-    function incopy2(val, i)
-        eta = Float64[2.3]
-        eta[1] = val
-        return @inbounds eta[i]
-    end
-
-    v = autodiff(Reverse, incopy2, Active, Active(3.14), 1)[1][1]
-    @test v ≈ 1.0
-end
-
-
-@testset "GCPreserve2" begin
-    function f!(a_out, a_in)
-           a_out[1:end-1] .= a_in[2:end]
-           return nothing
-    end
-    a_in = rand(4)
-    a_out = a_in
-
-    shadow_a_out = ones(4)
-    shadow_a_in = shadow_a_out
-
-    autodiff(Reverse, f!, Const, Duplicated(a_out, shadow_a_out), Duplicated(a_in, shadow_a_in))
-
-    @test shadow_a_in ≈ Float64[0.0, 1.0, 1.0, 2.0]
-    @test shadow_a_out ≈ Float64[0.0, 1.0, 1.0, 2.0]
-
-    autodiff(Forward, f!, Const, Duplicated(a_out, shadow_a_out), Duplicated(a_in, shadow_a_in))
-
-    @test shadow_a_in ≈ Float64[1.0, 1.0, 2.0, 2.0]
-    @test shadow_a_out ≈ Float64[1.0, 1.0, 2.0, 2.0]
-end
-
-@testset "UndefVar" begin
-    function f_undef(x, y)
-        if x
-            undefinedfnthowmagic()
-        end
-        y
-    end
-    @test 1.0 ≈ autodiff(Reverse, f_undef, false, Active(2.14))[1][2]
-    @test_throws Base.UndefVarError autodiff(Reverse, f_undef, true, Active(2.14))
-
-    @test 1.0 ≈ autodiff(Forward, f_undef, false, Duplicated(2.14, 1.0))[1]
-    @test_throws Base.UndefVarError autodiff(Forward, f_undef, true, Duplicated(2.14, 1.0))
-end
-
-@testset "Return GC error" begin
-	t = 0.0
-
-	function tobedifferentiated(cond, a)::Float64
-		if cond
-			t + t
-		else
-			0.0
-		end
-	end
-
-    @test 0.0 ≈ autodiff(Reverse, tobedifferentiated, true, Active(2.1))[1][2]
-	@test 0.0 ≈ autodiff(Forward, tobedifferentiated, true, Duplicated(2.1, 1.0))[1]
-
-	function tobedifferentiated2(cond, a)::Float64
-		if cond
-			a + t
-		else
-			0.0
-		end
-	end
-
-    @test 1.0 ≈ autodiff(Reverse, tobedifferentiated2, true, Active(2.1))[1][2]
-	@test 1.0 ≈ autodiff(Forward, tobedifferentiated2, true, Duplicated(2.1, 1.0))[1]
-
-    @noinline function copy(dest, p1, cond)
-        bc = convert(Broadcast.Broadcasted{Nothing}, Broadcast.instantiate(p1))
-
-        if cond
-            return nothing
-        end
-
-        bc2 = Broadcast.preprocess(dest, bc)
-        @inbounds    dest[1] = bc2[1]
-
-        nothing
-    end
-
-    function mer(F, F_H, cond)
-        p1 = Base.broadcasted(Base.identity, F_H)
-        copy(F, p1, cond)
-
-        # Force an apply generic
-        flush(stdout)
-        nothing
-    end
-
-    L_H = Array{Float64, 1}(undef, 2)
-    L = Array{Float64, 1}(undef, 2)
-
-    F_H = [1.0, 0.0]
-    F = [1.0, 0.0]
-
-    autodiff(Reverse, mer, Duplicated(F, L), Duplicated(F_H, L_H), true)
-    autodiff(Forward, mer, Duplicated(F, L), Duplicated(F_H, L_H), true)
-end
-
-@testset "GC Sret" begin
-    @noinline function _get_batch_statistics(x)
-        batchmean = @inbounds x[1]
-        return (x, x)
-    end
-
-    @noinline function _normalization_impl(x)
-        _stats = _get_batch_statistics(x)
-        return x
-    end
-
-    function gcloss(x)
-        _normalization_impl(x)[1]
-        return nothing
-    end
-
-    x = randn(10)
-    dx = zero(x)
-
-    Enzyme.autodiff(Reverse, gcloss, Duplicated(x, dx))
-end
-
-typeunknownvec = Float64[]
-
-@testset "GC Sret 2" begin
-
-    struct AGriddedInterpolation{K<:Tuple{Vararg{AbstractVector}}} <: AbstractArray{Float64, 1}
-        knots::K
-        v::Int
-    end
-
-    function AGriddedInterpolation(A::AbstractArray{Float64, 1})
-        knots = (A,)
-        use(A)
-        AGriddedInterpolation{typeof(knots)}(knots, 2)
-    end
-
-    function ainterpolate(A::AbstractArray{Float64,1})
-        AGriddedInterpolation(A)
-    end
-
-    function cost(C::Vector{Float64})
-        zs = typeunknownvec
-        ainterpolate(zs)
-        return nothing
-    end
-
-    A = Float64[]
-    dA = Float64[]
-    @test_throws Base.UndefVarError autodiff(Reverse, cost, Const, Duplicated(A, dA))
-end
-
-@testset "No Decayed / GC" begin
-    @noinline function deduplicate_knots!(knots)
-        last_knot = first(knots)
-        for i = eachindex(knots)
-            if i == 1
-                continue
-            end
-            if knots[i] == last_knot
-                @warn knots[i]
-                @inbounds knots[i] *= knots[i]
-            else
-                last_knot = @inbounds knots[i]
-            end
-        end
-    end
-
-    function cost(C::Vector{Float64})
-        deduplicate_knots!(C)
-        @inbounds C[1] = 0
-        return nothing
-    end
-    A = Float64[1, 3, 3, 7]
-    dA = Float64[1, 1, 1, 1]
-    autodiff(Reverse, cost, Const, Duplicated(A, dA))
-    @test dA ≈ [0.0, 1.0, 6.0, 1.0]
-end
-
-@testset "Split GC" begin
-    @noinline function bmat(x)
-        data = [x]
-        return data
-    end
-
-    function f(x::Float64)
-        @inbounds return bmat(x)[1]
-    end
-    @test 1.0 ≈ autodiff(Reverse, f, Active(0.1))[1][1]
-    @test 1.0 ≈ autodiff(Forward, f, Duplicated(0.1, 1.0))[1]
-end
-
-@testset "Array Copy" begin
-	F = [2.0, 3.0]
-
-	dF = [0.0, 0.0]
-
-	function copytest(F)
-		F2 = copy(F)
-		@inbounds F[1] = 1.234
-		@inbounds F[2] = 5.678
-		@inbounds F2[1] * F2[2]
-	end
-	autodiff(Reverse, copytest, Duplicated(F, dF))
-	@test F ≈ [1.234, 5.678]
-	@test dF ≈ [3.0, 2.0]
-
-    @test 31.0 ≈ autodiff(Forward, copytest, Duplicated([2.0, 3.0], [7.0, 5.0]))[1]
-
-    function sh(x)
-        Base.sizehint!(x, length(x))
-        nothing
-    end
-
-    autodiff(Reverse, sh, Duplicated([1.0], [0.0]))
-end
-
-@testset "No inference" begin
-    c = 5.0
-    @test 5.0 ≈ autodiff(Reverse, (A,)->c * A, Active, Active(2.0))[1][1]
-    @test 5.0 ≈ autodiff(Forward, (A,)->c * A, Duplicated(2.0, 1.0))[1]
-end
-
-@testset "Recursive GC" begin
-    function modf!(a)
-        as = [zero(a) for _ in 1:2]
-        a .+= sum(as)
-        return nothing
-    end
-
-    a = rand(5)
-    da = zero(a)
-    autodiff(Reverse, modf!, Duplicated(a, da))
-end
-
-@testset "Type-instable capture" begin
-    L = Array{Float64, 1}(undef, 2)
-
-    F = [1.0, 0.0]
-
-    function main()
-        t = 0.0
-
-        function cap(m)
-            t = m
-        end
-
-        @noinline function inner(F, cond)
-            if cond
-                genericcall(F)
-            end
-        end
-
-        function tobedifferentiated(F, cond)
-            inner(F, cond)
-            # Force an apply generic
-            -t
-            nothing
-        end
-        autodiff(Reverse, tobedifferentiated, Duplicated(F, L), false)
-        autodiff(Forward, tobedifferentiated, Duplicated(F, L), false)
-    end
-
-    main()
-end
-
-@testset "Arrays are double pointers" begin
-    @noinline function func_scalar(X)
-        return X
-    end
-
-    function timsteploop_scalar(FH1)
-        G = Float64[FH1]
-        k1 = @inbounds func_scalar(G[1])
-        return k1
-    end
-    @test Enzyme.autodiff(Reverse, timsteploop_scalar, Active(2.0))[1][1] ≈ 1.0
-    @test Enzyme.autodiff(Forward, timsteploop_scalar, Duplicated(2.0, 1.0))[1] ≈ 1.0
-
-    @noinline function func(X)
-        return @inbounds X[1]
-    end
-    function timsteploop(FH1)
-        G = Float64[FH1]
-        k1 = func(G)
-        return k1
-    end
-    @test Enzyme.autodiff(Reverse, timsteploop, Active(2.0))[1][1] ≈ 1.0
-    @test Enzyme.autodiff(Forward, timsteploop, Duplicated(2.0, 1.0))[1] ≈ 1.0
-end
-
-@testset "Type" begin
-    function foo(in::Ptr{Cvoid}, out::Ptr{Cvoid})
-        markType(Float64, in)
-        ccall(:memcpy,Cvoid, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), out, in, 8)
-    end
-
-    x = [2.0]
-    y = [3.0]
-    dx = [5.0]
-    dy = [7.0]
-
-    @test markType(x) === nothing
-    @test markType(zeros(Float32, 64)) === nothing
-    @test markType(view(zeros(64), 16:32)) === nothing
-
-    GC.@preserve x y begin
-        foo(Base.unsafe_convert(Ptr{Cvoid}, x), Base.unsafe_convert(Ptr{Cvoid}, y))
-    end
-
-    GC.@preserve x y dx dy begin
-      autodiff(Reverse, foo,
-                Duplicated(Base.unsafe_convert(Ptr{Cvoid}, x), Base.unsafe_convert(Ptr{Cvoid}, dx)),
-                Duplicated(Base.unsafe_convert(Ptr{Cvoid}, y), Base.unsafe_convert(Ptr{Cvoid}, dy)))
-    end
-end
-
-@testset "GetField" begin
-    mutable struct MyType
-       x::Float64
-    end
-
-    getfield_idx(v, idx) = ccall(:jl_get_nth_field_checked, Any, (Any, UInt), v, idx)
-
-    function gf(v::MyType, fld::Symbol)
-       x = getfield(v, fld)
-       x = x::Float64
-       2 * x
-    end
-
-    function gf(v::MyType, fld::Integer)
-       x = getfield_idx(v, fld)
-       x = x::Float64
-       2 * x
-    end
-
-    function gf2(v::MyType, fld::Integer, fld2::Integer)
-       x = getfield_idx(v, fld)
-       y = getfield_idx(v, fld2)
-       x + y
-    end
-
-    function gf2(v::MyType, fld::Symbol, fld2::Symbol)
-       x = getfield(v, fld)
-       y = getfield(v, fld2)
-       x + y
-    end
-
-    mx = MyType(3.0)
-    dx = MyType(0.0)
-
-    Enzyme.autodiff(Reverse, gf, Active, Duplicated(mx, dx), Const(:x))
-    @test mx.x ≈ 3.0
-    @test dx.x ≈ 2.0
-
-
-    mx = MyType(3.0)
-    dx = MyType(0.0)
-
-    Enzyme.autodiff(Reverse, gf, Active, Duplicated(mx, dx), Const(0))
-    @test mx.x ≈ 3.0
-    @test dx.x ≈ 2.0
-
-
-    mx = MyType(3.0)
-    dx = MyType(0.0)
-
-    Enzyme.autodiff(Reverse, gf2, Active, Duplicated(mx, dx), Const(:x), Const(:x))
-    @test mx.x ≈ 3.0
-    @test dx.x ≈ 2.0
-
-    mx = MyType(3.0)
-    dx = MyType(0.0)
-
-    Enzyme.autodiff(Reverse, gf2, Active, Duplicated(mx, dx), Const(0), Const(0))
-    @test mx.x ≈ 3.0
-    @test dx.x ≈ 2.0
-
-    function forbatch(v, fld::Symbol, out)
-        x = getfield(v, fld)
-        x = x::Float64
-        out[] = 2 * x
-        nothing
-    end
-    function forbatch(v, fld::Integer, out)
-        x = getfield_idx(v, fld)
-        x = x::Float64
-        out[] = 2 * x
-        nothing
-    end
-
-    mx = MyType(3.0)
-    dx = MyType(0.0)
-    dx2 = MyType(0.0)
-
-    Enzyme.autodiff(Reverse, forbatch, Const, BatchDuplicated(mx, (dx, dx2)), Const(:x), BatchDuplicated(Ref(0.0), (Ref(1.0), Ref(3.14))))
-    @test mx.x ≈ 3.0
-    @test dx.x ≈ 2.0
-    @test dx2.x ≈ 6.28
-
-    mx = MyType(3.0)
-    dx = MyType(0.0)
-    dx2 = MyType(0.0)
-
-    Enzyme.autodiff(Reverse, forbatch, Const, BatchDuplicated(mx, (dx, dx2)), Const(0), BatchDuplicated(Ref(0.0), (Ref(1.0), Ref(3.14))))
-    @test mx.x ≈ 3.0
-    @test dx.x ≈ 2.0
-    @test dx2.x ≈ 6.28
-
-    mutable struct MyType2
-       x::Float64
-       y::Float64
-    end
-
-    function sf2(v::MyType2, fld, fld2)
-       x = getfield(v, fld)
-       x = x::Float64
-       r = 2 * x
-       x = setfield!(v, fld2, r)
-       return nothing
-    end
-
-    mt2 = MyType2(3.0, 642.0)
-    dmt2 = MyType2(1.2, 541.0)
-
-    Enzyme.autodiff(Forward, sf2, Duplicated(mt2, dmt2), Const(:x), Const(:y))
-    @test mt2.x ≈ 3.0
-    @test mt2.y ≈ 6.0
-    @test dmt2.x ≈ 1.2
-    @test dmt2.y ≈ 2.4
-end
-
-@testset "BLAS" begin
-    x = [2.0, 3.0]
-    dx = [0.2,0.3]
-    y = [5.0, 7.0]
-    dy = [0.5,0.7]
-    Enzyme.autodiff(Reverse, (x,y)->x' * y, Duplicated(x, dx), Duplicated(y, dy))
-    @show x, dx, y, dy
-    @test dx ≈ [5.2, 7.3]
-    @test dy ≈ [2.5, 3.7]
-
-    f_exc(x) = sum(x*x)
-    y = [[1.0, 2.0] [3.0,4.0]]
-    f_x = zero.(y)
-    Enzyme.autodiff(Reverse, f_exc, Duplicated(y, f_x))
-    @test f_x ≈ [7.0 9.0; 11.0 13.0]
-end
-
-@testset "Exception" begin
-
-    f_no_derv(x) = ccall("extern doesnotexist", llvmcall, Float64, (Float64,), x)
-    @test_throws Enzyme.Compiler.NoDerivativeException autodiff(Reverse, f_no_derv, Active, Active(0.5))
-
-    f_union(cond, x) = cond ? x : 0
-    g_union(cond, x) = f_union(cond,x)*x
-    if sizeof(Int) == sizeof(Int64)
-        @test_throws Enzyme.Compiler.IllegalTypeAnalysisException autodiff(Reverse, g_union, Active, true, Active(1.0))
-    else
-        @test_throws Enzyme.Compiler.IllegalTypeAnalysisException autodiff(Reverse, g_union, Active, true, Active(1.0f0))
-    end
-    # TODO: Add test for NoShadowException
-end
-
-function objective!(x, loss, R)
-    for i in 1:1000
-        y = zeros(3)
-        y[1] = R[1,1] * x[1] + R[1,2] * x[2] + R[1,3] * x[3]
-
-        loss[] = y[1]
-    end
-    return nothing
-end;
-
-@testset "Static tape allocation" begin
-    x = zeros(3)
-    R = [1.0 0.0 0.0
-        0.0 1.0 0.0
-        0.0 0.0 1.0]
-    loss = Ref(0.0)
-    dloss = Ref(1.0)
-
-    autodiff(Reverse, objective!, Duplicated(x, zero(x)), Duplicated(loss, dloss), R)
-
-    @test loss[] ≈ 0.0
-    @show dloss[] ≈ 0.0
-end
-
-@testset "Union return" begin
-    function unionret(a, out, cond)
-        if cond
-            out[] = a
-        end
-    end
-
-    out = Ref(0.0)
-    dout = Ref(1.0)
-    @test 2.0 ≈ Enzyme.autodiff(Reverse, unionret, Active, Active(2.0), Duplicated(out, dout), true)[1][1]
-end
-
-@testset "Array push" begin
-
-    function pusher(x, y)
-        push!(x, y)
-        x[1] + x[2]
-    end
-
-    x  = [2.3]
-    dx = [0.0]
-    @test 1.0 ≈ first(Enzyme.autodiff(Reverse, pusher, Duplicated(x, dx), Active(2.0)))[2]
-    @test x ≈ [2.3, 2.0]
-    @test dx ≈ [1.0]
-
-    function double_push(x)
-        a = [0.5]
-        push!(a, 1.0)
-        push!(a, 1.0)
-        return x
-    end
-    y, = Enzyme.autodiff(Reverse, double_push,Active(1.0))[1]
-    @test y == 1.0
-
-    function aloss(a, arr)
-        for i in 1:2500
-            push!(arr, a)
-        end
-        return @inbounds arr[2500]
-    end
-    arr = Float64[]
-    darr = Float64[]
-
-    y = autodiff(
-        Reverse,
-        aloss,
-        Active,
-        Active(1.0),
-        Duplicated(arr, darr)
-       )[1][1]
-    @test y == 1.0
-end
-
-@testset "Batch Forward" begin
-    square(x)=x*x
-    bres = autodiff(Forward, square, BatchDuplicatedNoNeed, BatchDuplicated(3.0, (1.0, 2.0, 3.0)))
-    @test length(bres) == 1
-    @test length(bres[1]) == 3
-    @test bres[1][1] ≈  6.0
-    @test bres[1][2] ≈ 12.0
-    @test bres[1][3] ≈ 18.0
-
-    bres = autodiff(Forward, square, BatchDuplicatedNoNeed, BatchDuplicated(3.0 + 7.0im, (1.0+0im, 2.0+0im, 3.0+0im)))
-    @test bres[1][1] ≈  6.0 + 14.0im
-    @test bres[1][2] ≈ 12.0 + 28.0im
-    @test bres[1][3] ≈ 18.0 + 42.0im
-
-    squareidx(x)=x[1]*x[1]
-    inp = Float32[3.0]
-
-    # Shadow offset is not the same as primal so following doesn't work
-    # d_inp = Float32[1.0, 2.0, 3.0]
-    # autodiff(Forward, squareidx, BatchDuplicatedNoNeed, BatchDuplicated(view(inp, 1:1), (view(d_inp, 1:1), view(d_inp, 2:2), view(d_inp, 3:3))))
-
-    d_inp = (Float32[1.0], Float32[2.0], Float32[3.0])
-    bres = autodiff(Forward, squareidx, BatchDuplicatedNoNeed, BatchDuplicated(inp, d_inp))
-    @test bres[1][1] ≈  6.0
-    @test bres[1][2] ≈ 12.0
-    @test bres[1][3] ≈ 18.0
-end
-
-@testset "Batch Reverse" begin
-    function refbatchbwd(out, x)
-        v = x[]
-        out[1] = v
-        out[2] = v*v
-        out[3] = v*v*v
-        nothing
-    end
-
-    dxs = (Ref(0.0), Ref(0.0), Ref(0.0))
-    out = Float64[0,0,0]
-    x = Ref(2.0)
-
-    autodiff(Reverse, refbatchbwd, BatchDuplicated(out, Enzyme.onehot(out)), BatchDuplicated(x, dxs))
-    @test dxs[1][] ≈  1.0
-    @test dxs[2][] ≈  4.0
-    @test dxs[3][] ≈ 12.0
-
-    function batchbwd(out, v)
-        out[1] = v
-        out[2] = v*v
-        out[3] = v*v*v
-        nothing
-    end
-
-    bres = Enzyme.autodiff(Reverse, batchbwd, BatchDuplicated(out, Enzyme.onehot(out)), Active(2.0))[1]
-    @test length(bres) == 2
-    @test length(bres[2]) == 3
-    @test bres[2][1] ≈  1.0
-    @test bres[2][2] ≈  4.0
-    @test bres[2][3] ≈ 12.0
-end
-
-@testset "Jacobian" begin
-    function inout(v)
-       [v[2], v[1]*v[1], v[1]*v[1]*v[1]]
-    end
-
-    jac = Enzyme.jacobian(Reverse, inout, [2.0, 3.0], #=n_outs=# Val(3), Val(1))
-    @test size(jac) == (3, 2)
-    @test jac ≈ [ 0.0   1.0;
-                  4.0   0.0;
-                  12.0  0.0]
-
-    jac = Enzyme.jacobian(Forward, inout, [2.0, 3.0], Val(1))
-    @test size(jac) == (3, 2)
-    @test jac ≈ [ 0.0   1.0;
-                  4.0   0.0;
-                  12.0  0.0]
-
-    @test jac == Enzyme.jacobian(Forward, inout, [2.0, 3.0])
-    @test jac == ForwardDiff.jacobian(inout, [2.0, 3.0])
-
-    jac = Enzyme.jacobian(Reverse, inout, [2.0, 3.0], #=n_outs=# Val(3), Val(2))
-    @test size(jac) == (3, 2)
-    @test jac ≈ [ 0.0   1.0;
-                  4.0   0.0;
-                  12.0  0.0]
-
-    jac = Enzyme.jacobian(Forward, inout, [2.0, 3.0], Val(2))
-    @test size(jac) == (3, 2)
-    @test jac ≈ [ 0.0   1.0;
-                  4.0   0.0;
-                  12.0  0.0]
-
-    function f_test_1(A, x)
-        utmp = A*x[2:end] .+ x[1]
-        return utmp
-    end
-
-    function f_test_2(A, x)
-        utmp = Vector{Float64}(undef, length(x)-1)
-        utmp .= A*x[2:end] .+ x[1]
-        return utmp
-    end
-
-    function f_test_3!(u, A, x)
-        utmp .= A*x[2:end] .+ x[1]
-    end
-
-    J_r_1(A, x) = Enzyme.jacobian(Reverse, θ -> f_test_1(A, θ), x, Val(5))
-    J_r_2(A, x) = Enzyme.jacobian(Reverse, θ -> f_test_2(A, θ), x, Val(5))
-    J_r_3(u, A, x) = Enzyme.jacobian(Reverse, θ -> f_test_3!(u, A, θ), x, Val(5))
-
-    J_f_1(A, x) = Enzyme.jacobian(Forward, θ -> f_test_1(A, θ), x)
-    J_f_2(A, x) = Enzyme.jacobian(Forward, θ -> f_test_2(A, θ), x)
-    J_f_3(u, A, x) = Enzyme.jacobian(Forward, θ -> f_test_3!(u, A, θ), x)
-
-    x = ones(6)
-    A = Matrix{Float64}(LinearAlgebra.I, 5, 5)
-    u = Vector{Float64}(undef, 5)
-
-    @test J_r_1(A, x) == [
-        1.0  1.0  0.0  0.0  0.0  0.0;
-        1.0  0.0  1.0  0.0  0.0  0.0;
-        1.0  0.0  0.0  1.0  0.0  0.0;
-        1.0  0.0  0.0  0.0  1.0  0.0;
-        1.0  0.0  0.0  0.0  0.0  1.0;
-    ]
-
-    @test J_r_2(A, x) == [
-        1.0  1.0  0.0  0.0  0.0  0.0;
-        1.0  0.0  1.0  0.0  0.0  0.0;
-        1.0  0.0  0.0  1.0  0.0  0.0;
-        1.0  0.0  0.0  0.0  1.0  0.0;
-        1.0  0.0  0.0  0.0  0.0  1.0;
-    ]
-
-    @test J_f_1(A, x) == [
-        1.0  1.0  0.0  0.0  0.0  0.0;
-        1.0  0.0  1.0  0.0  0.0  0.0;
-        1.0  0.0  0.0  1.0  0.0  0.0;
-        1.0  0.0  0.0  0.0  1.0  0.0;
-        1.0  0.0  0.0  0.0  0.0  1.0;
-    ]
-    @test J_f_2(A, x) == [
-        1.0  1.0  0.0  0.0  0.0  0.0;
-        1.0  0.0  1.0  0.0  0.0  0.0;
-        1.0  0.0  0.0  1.0  0.0  0.0;
-        1.0  0.0  0.0  0.0  1.0  0.0;
-        1.0  0.0  0.0  0.0  0.0  1.0;
-    ]
-
-    # @show J_r_3(u, A, x)
-    # @show J_f_3(u, A, x)
-end
-
-@testset "Forward on Reverse" begin
-
-	function speelpenning(y, x)
-		ccall(:memmove, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t),
-								  y, x, 2 * 8)
-		return nothing
-	end
-
-	x = [0.5, 0.3]
-	y = zeros(2)
-    dx = ones(2)
-    rx = zeros(2)
-    drx = zeros(2)
-    dy = zeros(2)
-    ry = ones(2)
-    dry = zeros(2)
-
-    function foo(y, dy, x, dx)
-        autodiff_deferred(Reverse, speelpenning, Const, Duplicated(y, dy), Duplicated(x, dx))
-        return nothing
-    end
-
-    autodiff(Forward, foo, Duplicated(x, dx), Duplicated(rx, drx), Duplicated(y, dy), Duplicated(ry, dry))
-end
-
-using  Documenter
-DocMeta.setdocmeta!(Enzyme, :DocTestSetup, :(using Enzyme); recursive=true)
-@testset "DocTests" begin
-    doctest(Enzyme; manual = false)
-end
-
-using Random
-
-@testset "Random" begin
-    f_rand(x) = x*rand()
-    f_randn(x, N) = x*sum(randn(N))
-    @test 0 <= autodiff(Reverse, f_rand, Active, Active(1.0))[1][1] < 1
-    @test !iszero(autodiff(Reverse, f_randn, Active, Active(1.0), Const(64))[1][1])
-    @test iszero(autodiff(Reverse, x -> rand(), Active, Active(1.0))[1][1])
-    @test iszero(autodiff(Reverse, (x, N) -> sum(randn(N)), Active, Active(1.0), Const(64))[1][1])
-    @test autodiff(Reverse, x -> x * sum(randcycle(5)), Active, Active(1.0))[1][1] == 15
-    @test autodiff(Reverse, x -> x * sum(randperm( 5)), Active, Active(1.0))[1][1] == 15
-    @test autodiff(Reverse, x -> x * sum(shuffle(1:5)), Active, Active(1.0))[1][1] == 15
-end
-
-@testset "Reshape" begin
-
-	function rs(x)
-		y = reshape(x, 2, 2)
-		y[1,1] *= y[1, 2]
-		y[2, 2] *= y[2, 1]
-		nothing
-	end
-
-    data = Float64[1.,2.,3.,4.]
-	ddata = ones(4)
-
-	autodiff(Reverse, rs, Duplicated(data, ddata))
-	@test ddata ≈ [3.0, 5.0, 2.0, 2.0]
-
-    data = Float64[1.,2.,3.,4.]
-	ddata = ones(4)
-	autodiff(Forward, rs, Duplicated(data, ddata))
-	@test ddata ≈ [4.0, 1.0, 1.0, 6.0]
-end
-
-@testset "Copy Broadcast arg" begin
-	x = Float32[3]
-	w = Float32[1]
-	dw = zero(w)
-
-	function inactiveArg(w, x, cond)
-	   if cond
-		  x = copy(x)
-	   end
-	  @inbounds w[1] * x[1]
-	end
-
-	Enzyme.autodiff(Reverse, inactiveArg, Active, Duplicated(w, dw), Const(x), Const(false))
-
-    @test x ≈ [3.0]
-    @test w ≈ [1.0]
-    @test dw ≈ [3.0]
-
-    x = Float32[3]
-
-    function loss(w, x, cond)
-      dest = Array{Float32}(undef, 1)
-      r = cond ? copy(x) : x
-      res = @inbounds w[1] * r[1]
-      @inbounds dest[1] = res
-      res
-    end
-
-    dw = Enzyme.autodiff(Reverse, loss, Active, Active(1.0), Const(x), Const(false))[1]
-
-    @test x ≈ [3.0]
-    @test dw[1] ≈ 3.0
-
-    c = ones(3)
-    inner(e) = c .+ e
-    fres = Enzyme.autodiff(Enzyme.Forward, inner, Duplicated{Vector{Float64}}, Duplicated([0., 0., 0.], [1., 1., 1.]))[1]
-    @test c ≈ [1.0, 1.0, 1.0]
-    @test fres ≈ [1.0, 1.0, 1.0]
-end
-
-@testset "View Splat" begin
-	function getloc(locs, i)
-		loss = 0.0
-		if i==1
-			x, y = 0.0, 0.0
-		else
-		# correct
-			# x, y = locs[1,i-1], locs[2,i-1]
-		# incorrect
-		x, y = @inbounds locs[:,i-1]
-		end
-		loss += y
-		return loss
-	end
-
-	x0 = ones(2, 9)
-	din = zeros(2, 9)
-	Enzyme.autodiff(Reverse, getloc, Duplicated(x0, din), Const(2))
-	@test din[1, 1] ≈ 0.0
-	@test din[2, 1] ≈ 1.0
-end
-
-@testset "Uncached batch sizes" begin
-    genericsin(x) = Base.invokelatest(sin, x)
-    res = Enzyme.autodiff(Forward, genericsin, BatchDuplicated(2.0, NTuple{10,Float64}((Float64(i) for i in 1:10))))[1]
-    for (i, v) in enumerate(res)
-        @test v ≈ i * -0.4161468365471424
-    end
-    @assert length(res) == 10
-    res = Enzyme.autodiff(Forward, genericsin, BatchDuplicated(2.0, NTuple{40,Float64}((Float64(i) for i in 1:40))))[1]
-    for (i, v) in enumerate(res)
-        @test v ≈ i * -0.4161468365471424
-    end
-    @assert length(res) == 40
-end
-@testset "Large dynamic tape" begin
-
-	function ldynloss(X, Y, ps, bs)
-		ll = 0.0f0
-		for (x, y) in zip(X, Y)
-			yhat = ps * x .+ bs
-			ll += (yhat[1] - y)^2
-		end
-		return ll
-	end
-
-	ps = randn(Float32, (1, 5))
-	bs = randn(Float32)
-
-	X = map(x->rand(Float32, 5), 1:1000)
-	Y = map(x->rand(Float32), 1:1000)
-
-	grads = zero(ps)
-	for epoch=1:1000
-		fill!(grads, 0)
-		autodiff(Reverse, ldynloss, Const(X), Const(Y), Duplicated(ps, grads), Active(bs))
-	end
-
-end
-
-@testset "Union return getproperty" begin
-	using Enzyme
-
-	struct DOSData
-		interp_func
-	end
-
-	function get_dos(Ef=0.)
-		return x->x+Ef
-	end
-
-	struct MyMarcusHushChidseyDOS
-		A::Float64
-		dos::DOSData
-	end
-
-	mhcd = MyMarcusHushChidseyDOS(0.3,  DOSData(get_dos()));
-
-	function myintegrand(V, a_r)
-		function z(E)
-			dos = mhcd.dos
-
-			interp = dos.interp_func
-
-			res = interp(V)
-
-			return res
-		end
-		return z
-	end
-
-	function f2(V)
-		fn = myintegrand(V, 1.0)
-
-		fn(0.0)
-	end
-
-    Enzyme.API.runtimeActivity!(true)
-	res = autodiff(Forward, f2, Duplicated, Duplicated(0.2, 1.0))
-    Enzyme.API.runtimeActivity!(false)
-    @test res[1] ≈ 0.2
-    # broken as the return of an apply generic is {primal, primal}
-    # but since the return is abstractfloat doing the 
-    @static if VERSION ≥ v"1.9-" && !(VERSION ≥ v"1.10-" )
-        @test_broken res[2] ≈ 1.0
-    else
-        @test res[2] ≈ 1.0
-    end
-end
-
-@testset "Static activity" begin
-
-    struct Test2{T}
-        obs::T
-    end
-
-    function test(t, x)
-        o = t.obs
-        y = (x .- o)
-        yv = @inbounds y[1]
-        return yv*yv
-    end
-
-    obs = [1.0]
-    t = Test2(obs)
-
-    x0 = [0.0]
-    dx0 = [0.0]
-
-    autodiff(Reverse, test, Const(t), Duplicated(x0, dx0))
-
-    @test obs[1] ≈ 1.0
-    @test x0[1] ≈ 0.0
-    @test dx0[1] ≈ -2.0
-
-end
-
-@testset "Const Activity through intermediate" begin
-    struct RHS_terms
-        eta1::Vector{Float64}
-        u_t::Vector{Float64}
-        eta_t::Vector{Float64}
-    end
-
-    @noinline function comp_u_v_eta_t(rhs)
-        Base.unsafe_copyto!(rhs.eta_t, 1, rhs.u_t, 1, 1)
-        return nothing
-    end
-
-    function advance(eta, rhs)
-
-        @inbounds rhs.eta1[1] = @inbounds eta[1]
-
-        comp_u_v_eta_t(rhs)
-
-        @inbounds eta[1] = @inbounds rhs.eta_t[1]
-
-        return nothing
-
-    end
-
-    rhs_terms = RHS_terms(zeros(1), zeros(1), zeros(1))
-
-    u_v_eta = Float64[NaN]
-    ad_eta = zeros(1)
-
-    autodiff(Reverse, advance,
-        Duplicated(u_v_eta, ad_eta),
-        Const(rhs_terms),
-    )
-    @test ad_eta[1] ≈ 0.0
-end
-
-@testset "Type preservation" begin
-    # Float16 fails due to #870
-    for T in (Float64, Float32, #=Float16=#)
-        res = autodiff(Reverse, x -> x * 2.0, Active, Active(T(1.0)))[1][1]
-        @test res isa T
-        @test res == 2
-    end
-end
-
-@testset "Statistics" begin
-    f1(x) = var([x, 2.0, 3.0])
-    @test autodiff(Reverse, f1, Active, Active(0.0))[1][1] ≈ -5/3
-    @test autodiff(Forward, f1, Duplicated(0.0, 1.0))[1]   ≈ -5/3
-
-    f2(x) = varm([x, 2.0, 3.0], 5/3)
-    @test autodiff(Reverse, f2, Active, Active(0.0))[1][1] ≈ -5/3
-    @test autodiff(Forward, f2, Duplicated(0.0, 1.0))[1]   ≈ -5/3
-
-    f3(x) = std([x, 2.0, 3.0])
-    @test autodiff(Reverse, f3, Active, Active(0.0))[1][1] ≈ -0.54554472559
-    @test autodiff(Forward, f3, Duplicated(0.0, 1.0))[1]   ≈ -0.54554472559
-
-    f4(x) = stdm([x, 2.0, 3.0], 5/3)
-    @test autodiff(Reverse, f4, Active, Active(0.0))[1][1] ≈ -0.54554472559
-    @test autodiff(Forward, f4, Duplicated(0.0, 1.0))[1]   ≈ -0.54554472559
-
-    f5(x) = cor([2.0, x, 1.0], [1.0, 2.0, 3.0])
-    @test autodiff(Reverse, f5, Active, Active(4.0))[1][1] ≈ 0.11690244120
-    @test autodiff(Forward, f5, Duplicated(4.0, 1.0))[1]   ≈ 0.11690244120
-
-    f6(x) = cov([2.0, x, 1.0])
-    @test autodiff(Reverse, f6, Active, Active(4.0))[1][1] ≈ 5/3
-    @test autodiff(Forward, f6, Duplicated(4.0, 1.0))[1]   ≈ 5/3
-
-    f7(x) = median([2.0, 1.0, x])
-    # Fails on Julia 1.9 due to #880
-    #=@test autodiff(Reverse, f7, Active, Active(1.5))[1][1] == 1
-    @test autodiff(Forward, f7, Duplicated(1.5, 1.0))[1]   == 1
-    @test autodiff(Reverse, f7, Active, Active(2.5))[1][1] == 0
-    @test autodiff(Forward, f7, Duplicated(2.5, 1.0))[1]   == 0=#
-
-    f8(x) = middle([2.0, x, 1.0])
-    @test autodiff(Reverse, f8, Active, Active(2.5))[1][1] == 0.5
-    @test autodiff(Forward, f8, Duplicated(2.5, 1.0))[1]   == 0.5
-    @test autodiff(Reverse, f8, Active, Active(1.5))[1][1] == 0
-    @test autodiff(Forward, f8, Duplicated(1.5, 1.0))[1]   == 0
-
-    # On Julia 1.6 the gradients are wrong (0.7 not 1.2) and on 1.7 it errors
-    @static if VERSION ≥ v"1.8-"
-        f9(x) = sum(quantile([1.0, x], [0.5, 0.7]))
-        @test autodiff(Reverse, f9, Active, Active(2.0))[1][1] == 1.2
-        @test autodiff(Forward, f9, Duplicated(2.0, 1.0))[1]   == 1.2
-    end
-end
+f(x) = hypot(x, 2x)
+@test autodiff(Reverse, f, Active, Active(2.0))[1][1] == sqrt(5)