From 0c6345fd7905ee138dab61bfc90ec4865ed40cdf Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki Date: Tue, 6 Sep 2022 23:23:53 +0900 Subject: [PATCH] inlining: relax `finalizer` inlining control-flow restriction Eager `finalizer` inlining (#45272) currently has a restriction that requires all the def/uses are in a same basic block. This commit relaxes that restriction a bit by allowing def/uses to involve control flow when all of them are dominated by a `finalizer` call to be inlined, since in that case it is safe to insert the body of `finalizer` at the end of all the def/uses, e.g. ```julia Base.@assume_effects :nothrow safeprint(@nospecialize x...) = print(x...) @noinline nothrow_side_effect(x) = Base.@assume_effects :total !:effect_free @ccall jl_(x::Any)::Cvoid mutable struct DoAllocWithFieldInter x end function register_finalizer!(obj::DoAllocWithFieldInter) finalizer(obj) do this nothrow_side_effect(nothing) end end let src = code_typed1() do for i = -1000:1000 o = DoAllocWithFieldInter(i) register_finalizer!(o) if i == 1000 safeprint(o.x, '\n') elseif i > 0 safeprint(o.x) end end end # the finalzier call gets inlined @test count(isinvoke(:nothrow_side_effect), src.code) == 1 end ``` There is a room for further improvement though -- if we have a way to compute the post-domination, we can also inline a `finalizer` call in a case when a `finalizer` block is post-dominated by all the def/uses e.g. ``` let src = code_typed1() do for i = -1000:1000 o = DoAllocWithFieldInter(i) if i == 1000 safeprint(o.x, '\n') elseif i > 0 safeprint(o.x) end register_finalizer!(o) end end # currently this is broken @test_broken count(isinvoke(:nothrow_side_effect), src.code) == 1 end ``` --- base/compiler/ssair/passes.jl | 54 +++++++++++++--------- test/compiler/inline.jl | 85 +++++++++++++++++++++++++++++++++++ test/compiler/irutils.jl | 7 ++- 3 files changed, 123 insertions(+), 23 deletions(-) diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index 43becd18d97e6a..18eb0ab7a04b00 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -1100,32 +1100,42 @@ end is_nothrow(ir::IRCode, pc::Int) = ir.stmts[pc][:flag] & (IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW) ≠ 0 -function try_resolve_finalizer!(ir::IRCode, idx::Int, finalizer_idx::Int, defuse::SSADefUse, inlining::InliningState) - # For now: Require that all uses and defs are in the same basic block, - # so that live range calculations are easy. - bb = ir.cfg.blocks[block_for_inst(ir.cfg, first(defuse.uses).idx)] +function try_resolve_finalizer!(ir::IRCode, idx::Int, finalizer_idx::Int, defuse::SSADefUse, inlining::InliningState, lazydomtree::LazyDomtree) + # For now: Require that all the uses and defs are dominated by `finalizer` block + # TODO: further relax this restriction by allowing a case when `finalizer` block is post-dominated by them minval::Int = typemax(Int) maxval::Int = 0 - - function check_in_range(x::Union{Int,SSAUse}) - if isa(x, SSAUse) - didx = x.idx + function update_idx!(duidx::Int) + if duidx < minval + minval = duidx + end + if duidx > maxval + maxval = duidx + end + end + + domtree = get!(lazydomtree) + finalizer_bb = block_for_inst(ir, finalizer_idx) + update_idx!(finalizer_idx) + alloc_bb = block_for_inst(ir, idx) + dominates(domtree, alloc_bb, finalizer_bb) || return nothing + update_idx!(idx) + function check_defuse(x::Union{Int,SSAUse}) + duidx = x isa SSAUse ? x.idx : x + duidx == finalizer_idx && return true + bb = block_for_inst(ir, duidx) + if dominates(domtree, finalizer_bb, bb) + update_idx!(duidx) + return true + # TODO elseif post_dominates(domtree, bb, finalizer_bb) + # update_idx!(duidx) + # return true else - didx = x - end - didx in bb.stmts || return false - if didx < minval - minval = didx + return false end - if didx > maxval - maxval = didx - end - return true end - - check_in_range(idx) || return nothing - all(check_in_range, defuse.uses) || return nothing - all(check_in_range, defuse.defs) || return nothing + all(check_defuse, defuse.uses) || return nothing + all(check_defuse, defuse.defs) || return nothing # For now: Require all statements in the basic block range to be nothrow. all(minval:maxval) do idx::Int @@ -1184,7 +1194,7 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse end end if finalizer_idx !== nothing && inlining !== nothing - try_resolve_finalizer!(ir, idx, finalizer_idx, defuse, inlining) + try_resolve_finalizer!(ir, idx, finalizer_idx, defuse, inlining, lazydomtree) continue end # Partition defuses by field diff --git a/test/compiler/inline.jl b/test/compiler/inline.jl index eb2aa572392f8f..00b0e76c09aed8 100644 --- a/test/compiler/inline.jl +++ b/test/compiler/inline.jl @@ -1376,6 +1376,91 @@ let @test get_finalization_count() == 1000 end +# tests finalizer inlining when def/uses involve control flow +Base.@assume_effects :nothrow safeprint(@nospecialize x...) = print(x...) +mutable struct DoAllocWithField + x + function DoAllocWithField(x) + finalizer(new(x)) do this + nothrow_side_effect(nothing) + end + end +end +mutable struct DoAllocWithFieldInter + x +end +function register_finalizer!(obj::DoAllocWithFieldInter) + finalizer(obj) do this + nothrow_side_effect(nothing) + end +end +let src = code_typed1() do + for i = -1000:1000 + o = DoAllocWithField(i) + if i == 1000 + safeprint(o.x, '\n') + elseif i > 0 + safeprint(o.x) + end + end + end + @test count(isinvoke(:nothrow_side_effect), src.code) == 1 +end +let src = code_typed1() do + for i = -1000:1000 + o = DoAllocWithField(0) + o.x = i # with `setfield!` + if i == 1000 + safeprint(o.x, '\n') + elseif i > 0 + safeprint(o.x) + end + end + end + @test count(isinvoke(:nothrow_side_effect), src.code) == 1 +end +let src = code_typed1() do + for i = -1000:1000 + o = DoAllocWithFieldInter(i) + register_finalizer!(o) + if i == 1000 + safeprint(o.x, '\n') + elseif i > 0 + safeprint(o.x) + end + end + end + @test count(isinvoke(:nothrow_side_effect), src.code) == 1 +end +let src = code_typed1() do + for i = -1000:1000 + o = DoAllocWithFieldInter(0) + o.x = i # with `setfield!`` + register_finalizer!(o) + if i == 1000 + safeprint(o.x, '\n') + elseif i > 0 + safeprint(o.x) + end + end + end + @test count(isinvoke(:nothrow_side_effect), src.code) == 1 +end +let src = code_typed1() do + for i = -1000:1000 + o = DoAllocWithFieldInter(i) + if i == 1000 + safeprint(o.x, '\n') + elseif i > 0 + safeprint(o.x) + end + register_finalizer!(o) + end + end + # TODO we can fix this case by checking a case when a finalizer block is post-dominated by all the def/uses + @test_broken count(isinvoke(:nothrow_side_effect), src.code) == 1 +end + # optimize `[push!|pushfirst!](::Vector{Any}, x...)` @testset "optimize `$f(::Vector{Any}, x...)`" for f = Any[push!, pushfirst!] @eval begin diff --git a/test/compiler/irutils.jl b/test/compiler/irutils.jl index b44a656ea7b341..76f883d6cea2c3 100644 --- a/test/compiler/irutils.jl +++ b/test/compiler/irutils.jl @@ -17,7 +17,12 @@ function iscall((src, f)::Tuple{IR,Base.Callable}, @nospecialize(x)) where IR<:U singleton_type(argextype(x, src)) === f end end -iscall(pred::Base.Callable, @nospecialize(x)) = isexpr(x, :call) && pred(x.args[1]) +function iscall(pred::Base.Callable, @nospecialize(x)) + if isexpr(x, :(=)) + x = x.args[2] + end + return isexpr(x, :call) && pred(x.args[1]) +end # check if `x` is a statically-resolved call of a function whose name is `sym` isinvoke(y) = @nospecialize(x) -> isinvoke(y, x)