From 8bf9725640ebe7c07b44238c2006e269dfae5908 Mon Sep 17 00:00:00 2001 From: Ian Date: Fri, 3 Sep 2021 15:46:57 -0400 Subject: [PATCH] use two NULLs to mark block end (and strip one when stripping metadata) --- src/signal-handling.c | 4 ++-- src/signals-mach.c | 3 ++- src/signals-unix.c | 3 ++- src/signals-win.c | 3 ++- stdlib/Profile/src/Profile.jl | 41 ++++++++++++++++++----------------- 5 files changed, 29 insertions(+), 25 deletions(-) diff --git a/src/signal-handling.c b/src/signal-handling.c index 2939d73b25e81d..4743d8b37220aa 100644 --- a/src/signal-handling.c +++ b/src/signal-handling.c @@ -37,8 +37,8 @@ void jl_shuffle_int_array_inplace(volatile uint64_t *carray, size_t size, uint64 JL_DLLEXPORT int jl_profile_is_buffer_full(void) { - // the `+ 5` is for the block terminator `0` plus 4 metadata entries - return bt_size_cur + (JL_BT_MAX_ENTRY_SIZE + 1) + 5 > bt_size_max; + // the `+ 6` is for the two block terminators `0` plus 4 metadata entries + return bt_size_cur + (JL_BT_MAX_ENTRY_SIZE + 1) + 6 > bt_size_max; } static uint64_t jl_last_sigint_trigger = 0; diff --git a/src/signals-mach.c b/src/signals-mach.c index cfcbf64e6a55e0..1fc1ff98f2e8cb 100644 --- a/src/signals-mach.c +++ b/src/signals-mach.c @@ -602,7 +602,8 @@ void *mach_profile_listener(void *arg) // store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block bt_data_prof[bt_size_cur++].uintptr = ptls->sleep_check_state + 1; - // Mark the end of this block with 0 + // Mark the end of this block with two 0's + bt_data_prof[bt_size_cur++].uintptr = 0; bt_data_prof[bt_size_cur++].uintptr = 0; } // We're done! Resume the thread. diff --git a/src/signals-unix.c b/src/signals-unix.c index 414504d8f742f1..460ef4d80518eb 100644 --- a/src/signals-unix.c +++ b/src/signals-unix.c @@ -800,7 +800,8 @@ static void *signal_listener(void *arg) // store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block bt_data_prof[bt_size_cur++].uintptr = ptls->sleep_check_state + 1; - // Mark the end of this block with 0 + // Mark the end of this block with two 0's + bt_data_prof[bt_size_cur++].uintptr = 0; bt_data_prof[bt_size_cur++].uintptr = 0; } } diff --git a/src/signals-win.c b/src/signals-win.c index ba12f683cc7134..dd8dca959cabf3 100644 --- a/src/signals-win.c +++ b/src/signals-win.c @@ -375,7 +375,8 @@ static DWORD WINAPI profile_bt( LPVOID lparam ) // store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block bt_data_prof[bt_size_cur++].uintptr = ptls->sleep_check_state + 1; - // Mark the end of this block with 0 + // Mark the end of this block with two 0's + bt_data_prof[bt_size_cur++].uintptr = 0; bt_data_prof[bt_size_cur++].uintptr = 0; } jl_unlock_profile(); diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl index 0edce5fd4eeaed..91edf2e2bcb824 100644 --- a/stdlib/Profile/src/Profile.jl +++ b/stdlib/Profile/src/Profile.jl @@ -7,6 +7,8 @@ module Profile import Base.StackTraces: lookup, UNKNOWN, show_spec_linfo, StackFrame +const nmeta = 4 # number of metadata fields per block (threadid, taskid, cpu_cycle_clock, thread_sleeping) + # deprecated functions: use `getdict` instead lookup(ip::UInt) = lookup(convert(Ptr{Cvoid}, ip)) @@ -41,9 +43,9 @@ end Configure the `delay` between backtraces (measured in seconds), and the number `n` of instruction pointers that may be stored per thread. Each instruction pointer corresponds to a single line of code; backtraces generally consist of a long -list of instruction pointers. Note that 5 spaces for instruction pointers per backtrace are used to store metadata and a marker. -Current settings can be obtained by calling this function with no arguments, and each can be set independently using keywords -or in the order `(n, delay)`. +list of instruction pointers. Note that 6 spaces for instruction pointers per backtrace are used to store metadata and two +NULL end markers. Current settings can be obtained by calling this function with no arguments, and each can be set independently +using keywords or in the order `(n, delay)`. !!! compat "Julia 1.8" As of Julia 1.8, this function allocates space for `n` instruction pointers per thread being profiled. @@ -287,10 +289,10 @@ function get_thread_ids(data::Vector{<:Unsigned}, taskid = nothing) end function is_block_end(data, i) - i < 2 && return false - # 32-bit linux has been seen to have rogue ips equal to 0 so we cannot just rely on zeros being the block end. - # Also check for the previous entry looking like an idle state metadata entry which can only be 1 or 2 - return data[i] == 0 && in(data[i - 1], [1,2]) + i < nmeta + 1 && return false + # 32-bit linux has been seen to have rogue NULL ips, so we use two to indicate block end, where the 2nd is the + # actual end index + return data[i] == 0 && data[i - 1] == 0 end """ @@ -520,14 +522,13 @@ function fetch(;include_meta = false) nblocks += 1 end end - nmeta = 4 # number of metadata fields (threadid, taskid, cpu_cycle_clock, thread_sleeping) - data_stripped = Vector{UInt}(undef, length(data) - (nblocks * nmeta)) + data_stripped = Vector{UInt}(undef, length(data) - (nblocks * (nmeta + 1))) j = length(data_stripped) i = length(data) while i > 0 && j > 0 data_stripped[j] = data[i] if is_block_end(data, i) - i -= nmeta + i -= (nmeta + 1) # metadata fields and the extra NULL IP end i -= 1 j -= 1 @@ -554,14 +555,14 @@ function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict, skip = false nsleeping = 0 for i in startframe:-1:1 - startframe - 1 <= i <= startframe - 4 && continue # skip metadata (it's read ahead below) + startframe - 1 <= i <= startframe - (nmeta + 1) && continue # skip metadata (it's read ahead below) and extra block-end NULL IP ip = data[i] if is_block_end(data, i) # read metadata - thread_sleeping = data[i - 1] - 1 # subtract 1 as state is incremented to avoid being equal to 0 - # cpu_cycle_clock = data[i - 2] - taskid = data[i - 3] - threadid = data[i - 4] + thread_sleeping = data[i - 2] - 1 # subtract 1 as state is incremented to avoid being equal to 0 + # cpu_cycle_clock = data[i - 3] + taskid = data[i - 4] + threadid = data[i - 5] if !in(threadid, threads) || !in(taskid, tasks) skip = true continue @@ -802,14 +803,14 @@ function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineI skip = false nsleeping = 0 for i in startframe:-1:1 - startframe - 1 <= i <= startframe - 4 && continue # skip metadata (its read ahead below) + startframe - 1 <= i <= startframe - (nmeta + 1) && continue # skip metadata (its read ahead below) and extra block end NULL IP ip = all[i] if is_block_end(all, i) # read metadata - thread_sleeping = all[i - 1] - 1 # subtract 1 as state is incremented to avoid being equal to 0 - # cpu_cycle_clock = all[i - 2] - taskid = all[i - 3] - threadid = all[i - 4] + thread_sleeping = all[i - 2] - 1 # subtract 1 as state is incremented to avoid being equal to 0 + # cpu_cycle_clock = all[i - 3] + taskid = all[i - 4] + threadid = all[i - 5] if !in(threadid, threads) || !in(taskid, tasks) skip = true continue