From da04e84598bed798121d279bade1335d836ade0d Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Wed, 11 May 2022 06:59:44 +0000 Subject: [PATCH 1/2] Eager finalizer insertion This is a variant of the eager-finalization idea (e.g. as seen in #44056), but with a focus on the mechanism of finalizer insertion, since I need a similar pass downstream. Integration of EscapeAnalysis is left to #44056. My motivation for this change is somewhat different. In particular, I want to be able to insert finalize call such that I can subsequently SROA the mutable object. This requires a couple design points that are more stringent than the pass from #44056, so I decided to prototype them as an independent PR. The primary things I need here that are not seen in #44056 are: - The ability to forgo finalizer registration with the runtime entirely (requires additional legality analyis) - The ability to inline the registered finalizer at the deallocation point (to enable subsequent SROA) To this end, adding a finalizer is promoted to a builtin that is recognized by inference and inlining (such that inference can produce an inferred version of the finalizer for inlining). The current status is that this fixes the minimal example I wanted to have work, but does not yet extend to the motivating case I had. Nevertheless, I felt that this was a good checkpoint to synchronize with other efforts along these lines. Currently working demo: ``` julia> const total_deallocations = Ref{Int}(0) Base.RefValue{Int64}(0) julia> mutable struct DoAlloc function DoAlloc() this = new() Core._add_finalizer(this, function(this) global total_deallocations[] += 1 end) return this end end julia> function foo() for i = 1:1000 DoAlloc() end end foo (generic function with 1 method) julia> @code_llvm foo() ; @ REPL[3]:1 within `foo` define void @julia_foo_111() #0 { top: %.promoted = load i64, i64* inttoptr (i64 140370001753968 to i64*), align 16 ; @ REPL[3]:2 within `foo` %0 = add i64 %.promoted, 1000 ; @ REPL[3] within `foo` store i64 %0, i64* inttoptr (i64 140370001753968 to i64*), align 16 ; @ REPL[3]:4 within `foo` ret void } ``` --- base/compiler/abstractinterpretation.jl | 11 ++ base/compiler/optimize.jl | 5 +- base/compiler/ssair/inlining.jl | 133 +++++++++++++++------ base/compiler/ssair/ir.jl | 60 +++++----- base/compiler/ssair/passes.jl | 149 ++++++++++++++++++++++-- base/compiler/stmtinfo.jl | 11 ++ base/compiler/tfuncs.jl | 2 +- src/builtins.c | 2 +- test/compiler/inline.jl | 87 ++++++++++++++ 9 files changed, 382 insertions(+), 78 deletions(-) diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl index 734b3d1859cb2..13a2a711ea747 100644 --- a/base/compiler/abstractinterpretation.jl +++ b/base/compiler/abstractinterpretation.jl @@ -1589,6 +1589,15 @@ function invoke_rewrite(xs::Vector{Any}) return newxs end +function abstract_finalizer(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::InferenceState) + if length(argtypes) == 3 + finalizer_argvec = Any[argtypes[2], argtypes[3]] + call = abstract_call(interp, ArgInfo(nothing, finalizer_argvec), sv, 1) + return CallMeta(Nothing, Effects(), FinalizerInfo(call.info, call.effects)) + end + return CallMeta(Nothing, Effects(), false) +end + # call where the function is known exactly function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f), arginfo::ArgInfo, sv::InferenceState, @@ -1603,6 +1612,8 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f), return abstract_invoke(interp, arginfo, sv) elseif f === modifyfield! return abstract_modifyfield!(interp, argtypes, sv) + elseif f === Core.finalizer + return abstract_finalizer(interp, argtypes, sv) end rt = abstract_call_builtin(interp, f, arginfo, sv, max_methods) return CallMeta(rt, builtin_effects(f, argtypes, rt), false) diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl index af4ef61704c1d..701aca7e0d551 100644 --- a/base/compiler/optimize.jl +++ b/base/compiler/optimize.jl @@ -27,6 +27,9 @@ const IR_FLAG_THROW_BLOCK = 0x01 << 3 # This statement may be removed if its result is unused. In particular it must # thus be both pure and effect free. const IR_FLAG_EFFECT_FREE = 0x01 << 4 +# This statement was proven not to throw +const IR_FLAG_NOTHROW = 0x01 << 5 + const TOP_TUPLE = GlobalRef(Core, :tuple) @@ -564,7 +567,7 @@ function run_passes( @pass "Inlining" ir = ssa_inlining_pass!(ir, ir.linetable, sv.inlining, ci.propagate_inbounds) # @timeit "verify 2" verify_ir(ir) @pass "compact 2" ir = compact!(ir) - @pass "SROA" ir = sroa_pass!(ir) + @pass "SROA" ir = sroa_pass!(ir, sv.inlining) @pass "ADCE" ir = adce_pass!(ir) @pass "type lift" ir = type_lift_pass!(ir) @pass "compact 3" ir = compact!(ir) diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl index 6dac8b60012c1..5f24cb57fc580 100644 --- a/base/compiler/ssair/inlining.jl +++ b/base/compiler/ssair/inlining.jl @@ -308,21 +308,17 @@ function finish_cfg_inline!(state::CFGInliningState) end end -function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector{Any}, - linetable::Vector{LineInfoNode}, item::InliningTodo, - boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}}) - # Ok, do the inlining here - spec = item.spec::ResolvedInliningSpec - sparam_vals = item.mi.sparam_vals - def = item.mi.def::Method +function ir_inline_linetable!(linetable::Vector{LineInfoNode}, inlinee_ir::IRCode, + inlinee::Method, + inlined_at::Int32) + coverage = coverage_enabled(inlinee.module) linetable_offset::Int32 = length(linetable) # Append the linetable of the inlined function to our line table - inlined_at = compact.result[idx][:line] topline::Int32 = linetable_offset + Int32(1) - coverage = coverage_enabled(def.module) coverage_by_path = JLOptions().code_coverage == 3 - push!(linetable, LineInfoNode(def.module, def.name, def.file, def.line, inlined_at)) - oldlinetable = spec.ir.linetable + push!(linetable, LineInfoNode(inlinee.module, inlinee.name, inlinee.file, inlinee.line, inlined_at)) + oldlinetable = inlinee_ir.linetable + extra_coverage_line = 0 for oldline in 1:length(oldlinetable) entry = oldlinetable[oldline] if !coverage && coverage_by_path && is_file_tracked(entry.file) @@ -341,8 +337,25 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector end push!(linetable, newentry) end - if coverage && spec.ir.stmts[1][:line] + linetable_offset != topline - insert_node_here!(compact, NewInstruction(Expr(:code_coverage_effect), Nothing, topline)) + if coverage && inlinee_ir.stmts[1][:line] + linetable_offset != topline + extra_coverage_line = topline + end + return linetable_offset, extra_coverage_line +end + +function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector{Any}, + linetable::Vector{LineInfoNode}, item::InliningTodo, + boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}}) + # Ok, do the inlining here + spec = item.spec::ResolvedInliningSpec + sparam_vals = item.mi.sparam_vals + def = item.mi.def::Method + inlined_at = compact.result[idx][:line] + linetable_offset::Int32 = length(linetable) + topline::Int32 = linetable_offset + Int32(1) + linetable_offset, extra_coverage_line = ir_inline_linetable!(linetable, item.spec.ir, def, inlined_at) + if extra_coverage_line != 0 + insert_node_here!(compact, NewInstruction(Expr(:code_coverage_effect), Nothing, extra_coverage_line)) end if def.isva nargs_def = Int(def.nargs::Int32) @@ -839,7 +852,7 @@ function resolve_todo(todo::InliningTodo, state::InliningState, flag::UInt8) src === nothing && return compileable_specialization(et, match, effects) et !== nothing && push!(et, mi) - return InliningTodo(mi, src, effects) + return InliningTodo(mi, retrieve_ir_for_inlining(mi, src), effects) end function resolve_todo((; fully_covered, atype, cases, #=bbs=#)::UnionSplit, state::InliningState, flag::UInt8) @@ -861,7 +874,8 @@ function validate_sparams(sparams::SimpleVector) end function analyze_method!(match::MethodMatch, argtypes::Vector{Any}, - flag::UInt8, state::InliningState) + flag::UInt8, state::InliningState, + do_resolve::Bool = true) method = match.method spec_types = match.spec_types @@ -895,7 +909,7 @@ function analyze_method!(match::MethodMatch, argtypes::Vector{Any}, todo = InliningTodo(mi, match, argtypes) # If we don't have caches here, delay resolving this MethodInstance # until the batch inlining step (or an external post-processing pass) - state.mi_cache === nothing && return todo + do_resolve && state.mi_cache === nothing && return todo return resolve_todo(todo, state, flag) end @@ -904,17 +918,12 @@ function InliningTodo(mi::MethodInstance, ir::IRCode, effects::Effects) return InliningTodo(mi, ResolvedInliningSpec(ir, effects)) end -function InliningTodo(mi::MethodInstance, src::Union{CodeInfo, Vector{UInt8}}, effects::Effects) - if !isa(src, CodeInfo) - src = ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), mi.def, C_NULL, src::Vector{UInt8})::CodeInfo - else - src = copy(src) - end - @timeit "inline IR inflation" begin - ir = inflate_ir!(src, mi)::IRCode - return InliningTodo(mi, ResolvedInliningSpec(ir, effects)) - end +function retrieve_ir_for_inlining(mi::MethodInstance, src::Array{UInt8, 1}) + src = ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), mi.def, C_NULL, src::Vector{UInt8})::CodeInfo + return inflate_ir!(src, mi) end +retrieve_ir_for_inlining(mi::MethodInstance, src::CodeInfo) = inflate_ir(src, mi)::IRCode +retrieve_ir_for_inlining(mi::MethodInstance, ir::IRCode) = copy(ir) function handle_single_case!( ir::IRCode, idx::Int, stmt::Expr, @@ -1196,7 +1205,7 @@ function process_simple!(ir::IRCode, idx::Int, state::InliningState, todo::Vecto end end - if sig.f !== Core.invoke && is_builtin(sig) + if sig.f !== Core.invoke && sig.f !== Core.finalizer && is_builtin(sig) # No inlining for builtins (other invoke/apply/typeassert) return nothing end @@ -1213,9 +1222,10 @@ function process_simple!(ir::IRCode, idx::Int, state::InliningState, todo::Vecto end # TODO inline non-`isdispatchtuple`, union-split callsites? -function analyze_single_call!( - ir::IRCode, idx::Int, stmt::Expr, infos::Vector{MethodMatchInfo}, flag::UInt8, - sig::Signature, state::InliningState, todo::Vector{Pair{Int, Any}}) +function compute_inlining_cases( + infos::Vector{MethodMatchInfo}, flag::UInt8, + sig::Signature, state::InliningState, + do_resolve::Bool = true) argtypes = sig.argtypes cases = InliningCase[] local any_fully_covered = false @@ -1232,7 +1242,7 @@ function analyze_single_call!( continue end for match in meth - handled_all_cases &= handle_match!(match, argtypes, flag, state, cases, true) + handled_all_cases &= handle_match!(match, argtypes, flag, state, cases, true, do_resolve) any_fully_covered |= match.fully_covers end end @@ -1242,8 +1252,18 @@ function analyze_single_call!( filter!(case::InliningCase->isdispatchtuple(case.sig), cases) end - handle_cases!(ir, idx, stmt, argtypes_to_type(argtypes), cases, - handled_all_cases & any_fully_covered, todo, state.params) + return cases, handled_all_cases & any_fully_covered +end + +function analyze_single_call!( + ir::IRCode, idx::Int, stmt::Expr, infos::Vector{MethodMatchInfo}, flag::UInt8, + sig::Signature, state::InliningState, todo::Vector{Pair{Int, Any}}) + + r = compute_inlining_cases(infos, flag, sig, state) + r === nothing && return nothing + cases, all_covered = r + handle_cases!(ir, idx, stmt, argtypes_to_type(sig.argtypes), cases, + all_covered, todo, state.params) end # similar to `analyze_single_call!`, but with constant results @@ -1299,14 +1319,15 @@ end function handle_match!( match::MethodMatch, argtypes::Vector{Any}, flag::UInt8, state::InliningState, - cases::Vector{InliningCase}, allow_abstract::Bool = false) + cases::Vector{InliningCase}, allow_abstract::Bool = false, + do_resolve::Bool = true) spec_types = match.spec_types allow_abstract || isdispatchtuple(spec_types) || return false # we may see duplicated dispatch signatures here when a signature gets widened # during abstract interpretation: for the purpose of inlining, we can just skip # processing this dispatch candidate _any(case->case.sig === spec_types, cases) && return true - item = analyze_method!(match, argtypes, flag, state) + item = analyze_method!(match, argtypes, flag, state, do_resolve) item === nothing && return false push!(cases, InliningCase(spec_types, item)) return true @@ -1419,6 +1440,48 @@ function assemble_inline_todo!(ir::IRCode, state::InliningState) continue end + # Handle finalizer + if sig.f === Core.finalizer + if isa(info, FinalizerInfo) + # Only inline finalizers that are known nothrow and notls. + # This avoids having to set up state for finalizer isolation + (is_nothrow(info.effects) && is_notaskstate(info.effects)) || continue + + info = info.info + if isa(info, MethodMatchInfo) + infos = MethodMatchInfo[info] + elseif isa(info, UnionSplitInfo) + infos = info.matches + else + continue + end + + ft = argextype(stmt.args[2], ir) + has_free_typevars(ft) && return nothing + f = singleton_type(ft) + argtypes = Vector{Any}(undef, 2) + argtypes[1] = ft + argtypes[2] = argextype(stmt.args[3], ir) + sig = Signature(f, ft, argtypes) + + cases, all_covered = compute_inlining_cases(infos, UInt8(0), sig, state, false) + length(cases) == 0 && continue + if all_covered && length(cases) == 1 + if isa(cases[1], InliningCase) + case1 = cases[1].item + if isa(case1, InliningTodo) + push!(stmt.args, true) + push!(stmt.args, case1.mi) + elseif isa(case1, InvokeCase) + push!(stmt.args, false) + push!(stmt.args, case1.invoke) + end + end + end + continue + end + end + # if inference arrived here with constant-prop'ed result(s), # we can perform a specialized analysis for just this case if isa(info, ConstCallInfo) diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl index 548c19eb031e7..e96fe2e92d0c1 100644 --- a/base/compiler/ssair/ir.jl +++ b/base/compiler/ssair/ir.jl @@ -163,36 +163,6 @@ const AnySSAValue = Union{SSAValue, OldSSAValue, NewSSAValue} # SSA-indexed nodes - -struct NewInstruction - stmt::Any - type::Any - info::Any - # If nothing, copy the line from previous statement - # in the insertion location - line::Union{Int32, Nothing} - flag::UInt8 - - ## Insertion options - - # The IR_FLAG_EFFECT_FREE flag has already been computed (or forced). - # Don't bother redoing so on insertion. - effect_free_computed::Bool - NewInstruction(@nospecialize(stmt), @nospecialize(type), @nospecialize(info), - line::Union{Int32, Nothing}, flag::UInt8, effect_free_computed::Bool) = - new(stmt, type, info, line, flag, effect_free_computed) -end -NewInstruction(@nospecialize(stmt), @nospecialize(type)) = - NewInstruction(stmt, type, nothing) -NewInstruction(@nospecialize(stmt), @nospecialize(type), line::Union{Nothing, Int32}) = - NewInstruction(stmt, type, nothing, line, IR_FLAG_NULL, false) - -effect_free(inst::NewInstruction) = - NewInstruction(inst.stmt, inst.type, inst.info, inst.line, inst.flag | IR_FLAG_EFFECT_FREE, true) -non_effect_free(inst::NewInstruction) = - NewInstruction(inst.stmt, inst.type, inst.info, inst.line, inst.flag & ~IR_FLAG_EFFECT_FREE, true) - - struct InstructionStream inst::Vector{Any} type::Vector{Any} @@ -292,6 +262,36 @@ function add!(new::NewNodeStream, pos::Int, attach_after::Bool) end copy(nns::NewNodeStream) = NewNodeStream(copy(nns.stmts), copy(nns.info)) +struct NewInstruction + stmt::Any + type::Any + info::Any + # If nothing, copy the line from previous statement + # in the insertion location + line::Union{Int32, Nothing} + flag::UInt8 + + ## Insertion options + + # The IR_FLAG_EFFECT_FREE flag has already been computed (or forced). + # Don't bother redoing so on insertion. + effect_free_computed::Bool + NewInstruction(@nospecialize(stmt), @nospecialize(type), @nospecialize(info), + line::Union{Int32, Nothing}, flag::UInt8, effect_free_computed::Bool) = + new(stmt, type, info, line, flag, effect_free_computed) +end +NewInstruction(@nospecialize(stmt), @nospecialize(type)) = + NewInstruction(stmt, type, nothing) +NewInstruction(@nospecialize(stmt), @nospecialize(type), line::Union{Nothing, Int32}) = + NewInstruction(stmt, type, nothing, line, IR_FLAG_NULL, false) +NewInstruction(@nospecialize(stmt), meta::Instruction; line::Union{Int32, Nothing}=nothing) = + NewInstruction(stmt, meta[:type], meta[:info], line === nothing ? meta[:line] : line, meta[:flag], true) + +effect_free(inst::NewInstruction) = + NewInstruction(inst.stmt, inst.type, inst.info, inst.line, inst.flag | IR_FLAG_EFFECT_FREE, true) +non_effect_free(inst::NewInstruction) = + NewInstruction(inst.stmt, inst.type, inst.info, inst.line, inst.flag & ~IR_FLAG_EFFECT_FREE, true) + struct IRCode stmts::InstructionStream argtypes::Vector{Any} diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index 9d36e52fb9f86..c9bc91d9aac48 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -14,6 +14,7 @@ GetfieldUse(idx::Int) = SSAUse(:getfield, idx) PreserveUse(idx::Int) = SSAUse(:preserve, idx) NoPreserve() = SSAUse(:nopreserve, 0) IsdefinedUse(idx::Int) = SSAUse(:isdefined, idx) +AddFinalizerUse(idx::Int) = SSAUse(:add_finalizer, idx) """ du::SSADefUse @@ -735,7 +736,7 @@ its argument). In a case when all usages are fully eliminated, `struct` allocation may also be erased as a result of succeeding dead code elimination. """ -function sroa_pass!(ir::IRCode) +function sroa_pass!(ir::IRCode, inlining::Union{Nothing, InliningState} = nothing) compact = IncrementalCompact(ir) defuses = nothing # will be initialized once we encounter mutability in order to reduce dynamic allocations lifting_cache = IdDict{Pair{AnySSAValue, Any}, AnySSAValue}() @@ -744,7 +745,7 @@ function sroa_pass!(ir::IRCode) for ((_, idx), stmt) in compact # check whether this statement is `getfield` / `setfield!` (or other "interesting" statement) isa(stmt, Expr) || continue - is_setfield = is_isdefined = false + is_setfield = is_isdefined = is_finalizer = false field_ordering = :unspecified if is_known_call(stmt, setfield!, compact) 4 <= length(stmt.args) <= 5 || continue @@ -767,6 +768,13 @@ function sroa_pass!(ir::IRCode) field_ordering = argextype(stmt.args[4], compact) widenconst(field_ordering) === Bool && (field_ordering = :unspecified) end + elseif is_known_call(stmt, Core.finalizer, compact) + 3 <= length(stmt.args) <= 5 || continue + # Inlining performs legality checks on the finalizer to determine + # whether or not we may inline it. If so, it appends extra arguments + # at the end of the intrinsic. Detect that here. + length(stmt.args) == 5 || continue + is_finalizer = true elseif isexpr(stmt, :foreigncall) nccallargs = length(stmt.args[3]::SimpleVector) preserved = Int[] @@ -824,10 +832,13 @@ function sroa_pass!(ir::IRCode) # analyze this `getfield` / `isdefined` / `setfield!` call - field = try_compute_field_stmt(compact, stmt) - field === nothing && continue - - val = stmt.args[2] + if !is_finalizer + field = try_compute_field_stmt(compact, stmt) + field === nothing && continue + val = stmt.args[2] + else + val = stmt.args[3] + end struct_typ = unwrap_unionall(widenconst(argextype(val, compact))) if isa(struct_typ, Union) && struct_typ <: Tuple @@ -864,14 +875,16 @@ function sroa_pass!(ir::IRCode) push!(defuse.defs, idx) elseif is_isdefined push!(defuse.uses, IsdefinedUse(idx)) + elseif is_finalizer + push!(defuse.uses, AddFinalizerUse(idx)) else push!(defuse.uses, GetfieldUse(idx)) end union!(mid, intermediaries) end continue - elseif is_setfield - continue # invalid `setfield!` call, but just ignore here + elseif is_setfield || is_finalizer + continue # invalid `setfield!` or `Core.finalizer` call, but just ignore here elseif is_isdefined continue # TODO? end @@ -921,7 +934,7 @@ function sroa_pass!(ir::IRCode) used_ssas = copy(compact.used_ssas) simple_dce!(compact, (x::SSAValue) -> used_ssas[x.id] -= 1) ir = complete(compact) - sroa_mutables!(ir, defuses, used_ssas, lazydomtree) + sroa_mutables!(ir, defuses, used_ssas, lazydomtree, inlining) return ir else simple_dce!(compact) @@ -929,7 +942,60 @@ function sroa_pass!(ir::IRCode) end end -function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse}}, used_ssas::Vector{Int}, lazydomtree::LazyDomtree) +function try_inline_finalizer!(ir::IRCode, argexprs::Vector{Any}, idx::Int, mi::MethodInstance, inlining::InliningState) + code = get(inlining.mi_cache, mi, nothing) + if code isa CodeInstance + if use_const_api(code) + # No code in the function - Nothing to do + inlining.et !== nothing && push!(inlining.et, mi) + return true + end + src = code.inferred + else + src = code + end + + src = inlining_policy(inlining.interp, src, IR_FLAG_NULL, mi, Any[]) + src === nothing && return false + src = retrieve_ir_for_inlining(mi, src) + + # For now: Require finalizer to only have one basic block + length(src.cfg.blocks) == 1 || return false + + # Ok, we're committed to inlining the finalizer + inlining.et !== nothing && push!(inlining.et, mi) + + linetable_offset, extra_coverage_line = ir_inline_linetable!(ir.linetable, src, mi.def, ir[SSAValue(idx)][:line]) + if extra_coverage_line != 0 + insert_node!(ir, idx, NewInstruction(Expr(:code_coverage_effect), Nothing, extra_coverage_line)) + end + + # TODO: Use the actual inliner here rather than open coding this special + # purpose inliner. + spvals = mi.sparam_vals + ssa_rename = Vector{Any}(undef, length(src.stmts)) + for idx′ = 1:length(src.stmts) + urs = userefs(src[SSAValue(idx′)][:inst]) + for ur in urs + if isa(ur[], SSAValue) + ur[] = ssa_rename[ur[].id] + elseif isa(ur[], Argument) + ur[] = argexprs[ur[].n] + elseif isexpr(ur[], :static_parameter) + ur[] = spvals[ur[].args[1]] + end + end + # TODO: Scan newly added statement into the sroa defuse struct + stmt = urs[] + isa(stmt, ReturnNode) && continue + inst = src[SSAValue(idx′)] + ssa_rename[idx′] = insert_node!(ir, idx, NewInstruction(stmt, inst; line = inst[:line] + linetable_offset), true) + end + return true +end + +is_nothrow(ir::IRCode, pc::Int) = ir.stmts[pc][:flag] & (IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW) ≠ 0 +function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse}}, used_ssas::Vector{Int}, lazydomtree::LazyDomtree, inlining::Union{Nothing, InliningState}) for (idx, (intermediaries, defuse)) in defuses intermediaries = collect(intermediaries) # Check if there are any uses we did not account for. If so, the variable @@ -952,9 +1018,72 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse # error at runtime, but is not illegal to have in the IR. ismutabletype(typ) || continue typ = typ::DataType + # First check for any add_finalizer calls + add_finalizer_idx = nothing + for use in defuse.uses + if use.kind === :add_finalizer + # For now: Only allow one add_finalizer per allocation + add_finalizer_idx !== nothing && @goto skip + add_finalizer_idx = use.idx + end + end + if add_finalizer_idx !== nothing + # For now: Require that all uses and defs are in the same basic block, + # so that live range calculations are easy. + bb = ir.cfg.blocks[block_for_inst(ir.cfg, first(defuse.uses).idx)] + minval::Int = typemax(Int) + maxval::Int = 0 + + check_in_range(defuse) = check_in_range(defuse.idx) + function check_in_range(didx::Int) + didx in bb.stmts || return false + if didx < minval + minval = didx + end + if didx > maxval + maxval = didx + end + return true + end + + check_in_range(idx) || continue + _all(check_in_range, defuse.uses) || continue + _all(check_in_range, defuse.defs) || continue + + # For now: Require all statements in the basic block range to be + # nothrow. + all_nothrow = _all(idx->is_nothrow(ir, idx) || idx == add_finalizer_idx, minval:maxval) + all_nothrow || continue + + # Ok, finalizer rewrite is legal. + add_finalizer_stmt = ir[SSAValue(add_finalizer_idx)][:inst] + argexprs = Any[add_finalizer_stmt.args[2], add_finalizer_stmt.args[3]] + may_inline = add_finalizer_stmt.args[4]::Bool + mi = add_finalizer_stmt.args[5]::Union{MethodInstance, Nothing} + if may_inline && mi !== nothing + if try_inline_finalizer!(ir, argexprs, maxval, add_finalizer_stmt.args[5], inlining) + @goto done_finalizer + end + mi = compileable_specialization(inlining.et, mi, Effects()).invoke + end + if mi !== nothing + insert_node!(ir, maxval, + NewInstruction(Expr(:invoke, mi, argexprs...), Nothing), + true) + else + insert_node!(ir, maxval, + NewInstruction(Expr(:call, argexprs...), Nothing), + true) + end + @label done_finalizer + # Erase call to add_finalizer + ir[SSAValue(add_finalizer_idx)][:inst] = nothing + continue + end # Partition defuses by field fielddefuse = SSADefUse[SSADefUse() for _ = 1:fieldcount(typ)] all_eliminated = all_forwarded = true + has_finalizer = false for use in defuse.uses if use.kind === :preserve for du in fielddefuse diff --git a/base/compiler/stmtinfo.jl b/base/compiler/stmtinfo.jl index 3f9a562061a12..72b4c8b829c06 100644 --- a/base/compiler/stmtinfo.jl +++ b/base/compiler/stmtinfo.jl @@ -183,4 +183,15 @@ struct ReturnTypeCallInfo info::Any end +""" + info::FinalizerInfo + +Represents the information of a potential (later) call to the finalizer on the given +object type. +""" +struct FinalizerInfo + info::Any + effects::Effects +end + @specialize diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl index bd5f4e4b30575..52fd2a42911d6 100644 --- a/base/compiler/tfuncs.jl +++ b/base/compiler/tfuncs.jl @@ -559,7 +559,7 @@ add_tfunc(atomic_pointerswap, 3, 3, (a, v, order) -> (@nospecialize; pointer_elt add_tfunc(atomic_pointermodify, 4, 4, atomic_pointermodify_tfunc, 5) add_tfunc(atomic_pointerreplace, 5, 5, atomic_pointerreplace_tfunc, 5) add_tfunc(donotdelete, 0, INT_INF, (@nospecialize args...)->Nothing, 0) -add_tfunc(Core.finalizer, 2, 2, (@nospecialize args...)->Nothing, 5) +add_tfunc(Core.finalizer, 2, 4, (@nospecialize args...)->Nothing, 5) # more accurate typeof_tfunc for vararg tuples abstract only in length function typeof_concrete_vararg(t::DataType) diff --git a/src/builtins.c b/src/builtins.c index 26363491846f9..2e93a752c3d29 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -1602,7 +1602,7 @@ JL_CALLABLE(jl_f_donotdelete) JL_CALLABLE(jl_f_finalizer) { - JL_NARGS(finalizer, 2, 2); + JL_NARGS(finalizer, 2, 4); jl_task_t *ct = jl_current_task; jl_gc_add_finalizer_(ct->ptls, args[1], args[0]); return jl_nothing; diff --git a/test/compiler/inline.jl b/test/compiler/inline.jl index a75372075da06..3dac08370c123 100644 --- a/test/compiler/inline.jl +++ b/test/compiler/inline.jl @@ -1288,3 +1288,90 @@ let src = code_typed1(Tuple{Int}) do x end @test count(x -> isa(x, Core.GlobalRef) && x.name === :nothing, src.code) == 0 end + +# Test that we can inline a finalizer for a struct that does not otherwise escape +@noinline nothrow_side_effect(x) = + @Base.assume_effects :total !:effect_free @ccall jl_(x::Any)::Cvoid + +mutable struct DoAllocNoEscape + function DoAllocNoEscape() + finalizer(new()) do this + nothrow_side_effect(nothing) + end + end +end + +let src = code_typed1() do + for i = 1:1000 + DoAllocNoEscape() + end + end + @test count(isnew, src.code) == 0 +end + +# Test that finalizer elision doesn't cause a throw to be inlined into a function +# that shouldn't have it +const finalizer_should_throw = Ref{Bool}(true) +mutable struct DoAllocFinalizerThrows + function DoAllocFinalizerThrows() + finalizer(new()) do this + finalizer_should_throw[] && error("Unexpected finalizer throw") + end + end +end + +function f_finalizer_throws() + prev = GC.enable(false) + for i = 1:100 + DoAllocFinalizerThrows() + end + finalizer_should_throw[] = false + GC.enable(prev) + GC.gc() + return true +end + +@test f_finalizer_throws() + +# Test finalizers with static parameters +global last_finalizer_type::Type = Any +mutable struct DoAllocNoEscapeSparam{T} + x::T + function finalizer_sparam(d::DoAllocNoEscapeSparam{T}) where {T} + nothrow_side_effect(nothing) + nothrow_side_effect(T) + end + function DoAllocNoEscapeSparam{T}(x::T) where {T} + finalizer(finalizer_sparam, new{T}(x)) + end +end +DoAllocNoEscapeSparam(x::T) where {T} = DoAllocNoEscapeSparam{T}(x) + +let src = code_typed1(Tuple{Any}) do x + for i = 1:1000 + DoAllocNoEscapeSparam(x) + end + end + # This requires more inlining enhancments. For now just make sure this + # doesn't error. + @test count(isnew, src.code) in (0, 1) # == 0 +end + +# Test noinline finalizer +@noinline function noinline_finalizer(d) + nothrow_side_effect(nothing) +end +mutable struct DoAllocNoEscapeNoInline + function DoAllocNoEscapeNoInline() + finalizer(noinline_finalizer, new()) + end +end + +let src = code_typed1() do + for i = 1:1000 + DoAllocNoEscapeNoInline() + end + end + @test count(isnew, src.code) == 1 + @test count(isinvoke(:noinline_finalizer), src.code) == 1 +end From 030be84265f41ac8aa23e3f3c12e710b4971fb50 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Mon, 30 May 2022 21:46:14 +0000 Subject: [PATCH 2/2] rm redundant copy --- base/compiler/ssair/inlining.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl index 25146f7d3bd4a..6b3c5b2e44c34 100644 --- a/base/compiler/ssair/inlining.jl +++ b/base/compiler/ssair/inlining.jl @@ -914,7 +914,6 @@ function analyze_method!(match::MethodMatch, argtypes::Vector{Any}, end function InliningTodo(mi::MethodInstance, ir::IRCode, effects::Effects) - ir = copy(ir) return InliningTodo(mi, ResolvedInliningSpec(ir, effects)) end