-
-
Notifications
You must be signed in to change notification settings - Fork 5.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
precompile: compile inlinable methods that were inferred #56156
Conversation
1c706b2
to
f311c6a
Compare
On my machine, this causes my sysimage to grow |
Oof.
Could we still throw out some of the smallest functions? E.f. have an inlining cost threshold that allows medium size function to be cached, but not tiny ones that mostly forward things? |
The code generation policy here previously assumes that inlinable code almost never ends up being `invoke`d, but it has the unfortunate side effect that making more code inline-eligible means fewer entry points to your code end up compiled. Intuitively I think users expect function calls that ran during pre-compilation to be compiled and available (regardless of the context they are called from), which requires us to pre-compile these This change may not be viable due to the increase in code size, but I wanted to open it to see what the effects are.
f311c6a
to
cdb197e
Compare
I thought it might be better to leverage some runtime behavior of the program for this issue, so I tried the following approach: diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl
index 8b85f7c6f3..216d51426e 100644
--- a/base/compiler/typeinfer.jl
+++ b/base/compiler/typeinfer.jl
@@ -1036,6 +1036,14 @@ end
_uncompressed_ir(codeinst::CodeInstance, s::String) =
ccall(:jl_uncompress_ir, Ref{CodeInfo}, (Any, Any, Any), codeinst.def.def::Method, codeinst, s)
+function force_compile_inlineable(codeinst::CodeInstance)
+ inferred = @atomic :monotonic codeinst.inferred
+ if inferred isa MaybeCompressed && is_inlineable(inferred)
+ ccall(:jl_force_compile_codeinst, Cvoid, (Any,), codeinst)
+ end
+ nothing
+end
+
# compute (and cache) an inferred AST and return type
function typeinf_ext(interp::AbstractInterpreter, mi::MethodInstance, source_mode::UInt8)
start_time = ccall(:jl_typeinf_timing_begin, UInt64, ())
@@ -1048,6 +1056,7 @@ function typeinf_ext(interp::AbstractInterpreter, mi::MethodInstance, source_mod
return code
end
if ci_meets_requirement(code, source_mode)
+ force_compile_inlineable(code)
ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
return code
end
@@ -1075,6 +1084,7 @@ function typeinf_ext(interp::AbstractInterpreter, mi::MethodInstance, source_mod
return code
end
if ci_meets_requirement(code, source_mode)
+ force_compile_inlineable(code)
engine_reject(interp, ci)
ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
return code
@@ -1112,6 +1122,7 @@ function typeinf_ext(interp::AbstractInterpreter, mi::MethodInstance, source_mod
finish!(interp, frame; can_discard_trees) # redo finish! with the correct can_discard_trees parameter value
@assert ci_meets_requirement(ci, source_mode)
end
+ force_compile_inlineable(ci)
return ci
end
diff --git a/src/gf.c b/src/gf.c
index fc2e62ebff..29b1d3c36c 100644
--- a/src/gf.c
+++ b/src/gf.c
@@ -581,6 +581,10 @@ JL_DLLEXPORT void jl_update_codeinst(
jl_atomic_store_relaxed(&codeinst->max_world, max_world); // since the edges shouldn't change after jl_fill_codeinst
}
+JL_DLLEXPORT void jl_force_compile_codeinst(jl_code_instance_t *codeinst) {
+ jl_atomic_store_relaxed(&codeinst->precompile, 1);
+}
+
JL_DLLEXPORT void jl_fill_codeinst(
jl_code_instance_t *codeinst,
jl_value_t *rettype, jl_value_t *exctype,
diff --git a/src/precompile_utils.c b/src/precompile_utils.c
index b361bbebc5..183e29d206 100644
--- a/src/precompile_utils.c
+++ b/src/precompile_utils.c
@@ -188,7 +188,7 @@ static int precompile_enq_specialization_(jl_method_instance_t *mi, void *closur
else if (jl_atomic_load_relaxed(&codeinst->invoke) != jl_fptr_const_return) {
jl_value_t *inferred = jl_atomic_load_relaxed(&codeinst->inferred);
if (inferred && inferred != jl_nothing && jl_options.compile_enabled != JL_OPTIONS_COMPILE_ALL &&
- (jl_options.trim == JL_TRIM_NO || jl_ir_inlining_cost(inferred) == UINT16_MAX)) {
+ ((jl_ir_inlining_cost(inferred) == UINT16_MAX) || jl_atomic_load_relaxed(&codeinst->precompile))) {
do_compile = 1;
}
else if (jl_atomic_load_relaxed(&codeinst->invoke) != NULL || jl_atomic_load_relaxed(&codeinst->precompile)) { The basic idea is that even if it is inlineable, I thought it might be more efficient to However, this approach is incomplete because if a code instance compiled from a type-stable caller context first is later called from another dynamic dispatch context, we would need a mechanism to re-enqueue that code instance via |
inferred != jl_nothing && | ||
(jl_options.compile_enabled != JL_OPTIONS_COMPILE_ALL && jl_ir_inlining_cost(inferred) == UINT16_MAX)) { | ||
if (inferred && inferred != jl_nothing && jl_options.compile_enabled != JL_OPTIONS_COMPILE_ALL && | ||
(jl_options.trim == JL_TRIM_NO || jl_ir_inlining_cost(inferred) == UINT16_MAX)) { | ||
do_compile = 1; | ||
} | ||
else if (jl_atomic_load_relaxed(&codeinst->invoke) != NULL || jl_atomic_load_relaxed(&codeinst->precompile)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Intuitively I think users expect function calls that ran during pre-compilation to be compiled and available
This line should already do exactly that
The code generation policy here previously assumes that inlinable code almost never ends up being
invoke
d, but it has the unfortunate side effect that making more code inline-eligible means fewer entry points to your code end up compiled.Intuitively I think users expect function calls that ran during pre-compilation to be compiled and available (regardless of the context they are called from), which requires us to pre-compile these
This change may not be viable due to the increase in code size, but I wanted to open it to see what the effects are.