Skip to content

Commit

Permalink
Make cumulative_compile_time_ns into a global, atomic variable.
Browse files Browse the repository at this point in the history
Instead of keeping per-task compilation time, this change keeps a
global counter of compilation time, protected with atomic mutations.

Fixes #41739

```julia
julia> include("/Users/nathandaly/Downloads/compilation-task-migration-17-example.jl")
start thread: 2
end thread: 2
  5.185706 seconds (3.53 M allocations: 2.570 GiB, 7.34% gc time, 15.57% compilation time)

julia> include("/Users/nathandaly/Downloads/compilation-task-migration-17-example.jl")
start thread: 3
WARNING: replacing module M.
end thread: 1
  4.110316 seconds (18.23 k allocations: 2.391 GiB, 5.67% gc time, 0.24% compilation time)
```
  • Loading branch information
NHDaly committed Jul 31, 2021
1 parent bf4ce55 commit ccfcc4f
Show file tree
Hide file tree
Showing 7 changed files with 32 additions and 48 deletions.
7 changes: 0 additions & 7 deletions base/timing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,6 @@ cumulative_compile_time_ns_before() = ccall(:jl_cumulative_compile_time_ns_befor
cumulative_compile_time_ns_after() = ccall(:jl_cumulative_compile_time_ns_after, UInt64, ())
# cumulative total time this thread has spent on compilation since process start.
cumulative_compile_time_ns() = ccall(:jl_cumulative_compile_time_ns, UInt64, ())
function process_cumulative_compile_time_ns()
out = fill(UInt(0), Threads.nthreads())
Threads.@threads for i in 1:Threads.nthreads()
out[i] = cumulative_compile_time_ns()
end
return sum(out)
end

# total time spend in garbage collection, in nanoseconds
gc_time_ns() = ccall(:jl_gc_total_hrtime, UInt64, ())
Expand Down
12 changes: 6 additions & 6 deletions src/aotcompile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _p
JL_LOCK(&codegen_lock);
uint64_t compiler_start_time = 0;
int tid = jl_threadid();
if (jl_measure_compile_time[tid])
if (jl_atomic_load(&jl_measure_compile_time))
compiler_start_time = jl_hrtime();

CompilationPolicy policy = (CompilationPolicy) _policy;
Expand Down Expand Up @@ -415,8 +415,8 @@ void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _p
}

data->M = std::move(clone);
if (jl_measure_compile_time[tid])
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
if (jl_atomic_load(&jl_measure_compile_time))
jl_atomic_fetch_add(&jl_measure_compile_time, (jl_hrtime() - compiler_start_time));
if (policy == CompilationPolicy::ImagingMode)
imaging_mode = 0;
JL_UNLOCK(&codegen_lock); // Might GC
Expand Down Expand Up @@ -917,7 +917,7 @@ void *jl_get_llvmf_defn(jl_method_instance_t *mi, size_t world, char getwrapper,
JL_LOCK(&codegen_lock);
uint64_t compiler_start_time = 0;
int tid = jl_threadid();
if (jl_measure_compile_time[tid])
if (jl_atomic_load(&jl_measure_compile_time))
compiler_start_time = jl_hrtime();
std::tie(m, decls) = jl_emit_code(mi, src, jlrettype, output);

Expand All @@ -942,8 +942,8 @@ void *jl_get_llvmf_defn(jl_method_instance_t *mi, size_t world, char getwrapper,
m.release(); // the return object `llvmf` will be the owning pointer
}
JL_GC_POP();
if (jl_measure_compile_time[tid])
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
if (jl_atomic_load(&jl_measure_compile_time))
jl_atomic_fetch_add(&jl_measure_compile_time, (jl_hrtime() - compiler_start_time));
JL_UNLOCK(&codegen_lock); // Might GC
if (F)
return F;
Expand Down
7 changes: 3 additions & 4 deletions src/gf.c
Original file line number Diff line number Diff line change
Expand Up @@ -3164,15 +3164,14 @@ static uint64_t inference_start_time = 0;
JL_DLLEXPORT void jl_typeinf_begin(void)
{
JL_LOCK(&typeinf_lock);
if (jl_measure_compile_time[jl_threadid()])
if (jl_atomic_load(&jl_measure_compile_time))
inference_start_time = jl_hrtime();
}

JL_DLLEXPORT void jl_typeinf_end(void)
{
int tid = jl_threadid();
if (typeinf_lock.count == 1 && jl_measure_compile_time[tid])
jl_cumulative_compile_time[tid] += (jl_hrtime() - inference_start_time);
if (typeinf_lock.count == 1 && jl_atomic_load(&jl_measure_compile_time))
jl_atomic_fetch_add(&jl_measure_compile_time, (jl_hrtime() - inference_start_time));
JL_UNLOCK(&typeinf_lock);
}

Expand Down
41 changes: 17 additions & 24 deletions src/jitlayers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,22 +78,19 @@ void jl_jit_globals(std::map<void *, GlobalVariable*> &globals)
extern "C" JL_DLLEXPORT
uint64_t jl_cumulative_compile_time_ns_before()
{
int tid = jl_threadid();
jl_measure_compile_time[tid] += 1;
return jl_cumulative_compile_time[tid];
jl_atomic_fetch_add(&jl_measure_compile_time, 1);
return jl_atomic_load(&jl_cumulative_compile_time);
}
extern "C" JL_DLLEXPORT
uint64_t jl_cumulative_compile_time_ns_after()
{
int tid = jl_threadid();
jl_measure_compile_time[tid] -= 1;
return jl_cumulative_compile_time[tid];
jl_atomic_fetch_add(&jl_measure_compile_time, -1);
return jl_atomic_load(&jl_cumulative_compile_time);
}

extern "C" JL_DLLEXPORT
uint64_t jl_cumulative_compile_time_ns() {
int tid = jl_threadid();
return jl_cumulative_compile_time[tid];
return jl_atomic_load(&jl_cumulative_compile_time);
}

// this generates llvm code for the lambda info
Expand Down Expand Up @@ -239,8 +236,7 @@ int jl_compile_extern_c(void *llvmmod, void *p, void *sysimg, jl_value_t *declrt
{
JL_LOCK(&codegen_lock);
uint64_t compiler_start_time = 0;
int tid = jl_threadid();
if (jl_measure_compile_time[tid])
if (jl_atomic_load(&jl_measure_compile_time))
compiler_start_time = jl_hrtime();
jl_codegen_params_t params;
jl_codegen_params_t *pparams = (jl_codegen_params_t*)p;
Expand All @@ -264,8 +260,8 @@ int jl_compile_extern_c(void *llvmmod, void *p, void *sysimg, jl_value_t *declrt
if (success && llvmmod == NULL)
jl_add_to_ee(std::unique_ptr<Module>(into));
}
if (codegen_lock.count == 1 && jl_measure_compile_time[tid])
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
if (codegen_lock.count == 1 && jl_atomic_load(&jl_measure_compile_time))
jl_atomic_fetch_add(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
JL_UNLOCK(&codegen_lock);
return success;
}
Expand Down Expand Up @@ -321,8 +317,7 @@ jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT
{
JL_LOCK(&codegen_lock); // also disables finalizers, to prevent any unexpected recursion
uint64_t compiler_start_time = 0;
int tid = jl_threadid();
if (jl_measure_compile_time[tid])
if (jl_atomic_load(&jl_measure_compile_time))
compiler_start_time = jl_hrtime();
// if we don't have any decls already, try to generate it now
jl_code_info_t *src = NULL;
Expand Down Expand Up @@ -360,8 +355,8 @@ jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT
else {
codeinst = NULL;
}
if (codegen_lock.count == 1 && jl_measure_compile_time[tid])
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
if (codegen_lock.count == 1 && jl_atomic_load(&jl_measure_compile_time))
jl_atomic_fetch_add(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
JL_UNLOCK(&codegen_lock);
JL_GC_POP();
return codeinst;
Expand All @@ -375,8 +370,7 @@ void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec)
}
JL_LOCK(&codegen_lock);
uint64_t compiler_start_time = 0;
int tid = jl_threadid();
if (jl_measure_compile_time[tid])
if (jl_atomic_load(&jl_measure_compile_time))
compiler_start_time = jl_hrtime();
if (unspec->invoke == NULL) {
jl_code_info_t *src = NULL;
Expand Down Expand Up @@ -404,8 +398,8 @@ void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec)
}
JL_GC_POP();
}
if (codegen_lock.count == 1 && jl_measure_compile_time[tid])
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
if (codegen_lock.count == 1 && jl_atomic_load(&jl_measure_compile_time))
jl_atomic_fetch_add(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
JL_UNLOCK(&codegen_lock); // Might GC
}

Expand All @@ -428,8 +422,7 @@ jl_value_t *jl_dump_method_asm(jl_method_instance_t *mi, size_t world,
// so create an exception here so we can print pretty our lies
JL_LOCK(&codegen_lock); // also disables finalizers, to prevent any unexpected recursion
uint64_t compiler_start_time = 0;
int tid = jl_threadid();
if (jl_measure_compile_time[tid])
if (jl_atomic_load(&jl_measure_compile_time))
compiler_start_time = jl_hrtime();
specfptr = (uintptr_t)codeinst->specptr.fptr;
if (specfptr == 0) {
Expand All @@ -454,8 +447,8 @@ jl_value_t *jl_dump_method_asm(jl_method_instance_t *mi, size_t world,
}
JL_GC_POP();
}
if (jl_measure_compile_time[tid])
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
if (jl_atomic_load(&jl_measure_compile_time))
jl_atomic_fetch_add(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
JL_UNLOCK(&codegen_lock);
}
if (specfptr != 0)
Expand Down
5 changes: 3 additions & 2 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,9 @@ static inline uint64_t cycleclock(void)

#include "timing.h"

extern uint8_t *jl_measure_compile_time;
extern uint64_t *jl_cumulative_compile_time;
// Global *atomic* integers controlling *process-wide* measurement of compilation time.
extern volatile uint8_t jl_measure_compile_time;
extern volatile uint64_t jl_cumulative_compile_time;

#ifdef _COMPILER_MICROSOFT_
# define jl_return_address() ((uintptr_t)_ReturnAddress())
Expand Down
2 changes: 1 addition & 1 deletion src/task.c
Original file line number Diff line number Diff line change
Expand Up @@ -561,7 +561,7 @@ static void JL_NORETURN throw_internal(jl_task_t *ct, jl_value_t *exception JL_M
ptls->io_wait = 0;
// @time needs its compile timer disabled on error,
// and cannot use a try-finally as it would break scope for assignments
jl_measure_compile_time[ptls->tid] = 0;
jl_atomic_fetch_add(&jl_measure_compile_time, -1);
JL_GC_PUSH1(&exception);
jl_gc_unsafe_enter(ptls);
if (exception) {
Expand Down
6 changes: 2 additions & 4 deletions src/threading.c
Original file line number Diff line number Diff line change
Expand Up @@ -287,8 +287,8 @@ void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t *k)
#endif

jl_ptls_t *jl_all_tls_states JL_GLOBALLY_ROOTED;
uint8_t *jl_measure_compile_time = NULL;
uint64_t *jl_cumulative_compile_time = NULL;
volatile uint8_t jl_measure_compile_time = 0;
volatile uint64_t jl_cumulative_compile_time = 0;

// return calling thread's ID
// Also update the suspended_threads list in signals-mach when changing the
Expand Down Expand Up @@ -467,8 +467,6 @@ void jl_init_threading(void)
}
if (jl_n_threads <= 0)
jl_n_threads = 1;
jl_measure_compile_time = (uint8_t*)calloc(jl_n_threads, sizeof(*jl_measure_compile_time));
jl_cumulative_compile_time = (uint64_t*)calloc(jl_n_threads, sizeof(*jl_cumulative_compile_time));
#ifndef __clang_analyzer__
jl_all_tls_states = (jl_ptls_t*)calloc(jl_n_threads, sizeof(void*));
#endif
Expand Down

0 comments on commit ccfcc4f

Please sign in to comment.