Skip to content

Commit

Permalink
use two NULLs to mark block end (and strip one when stripping metadata)
Browse files Browse the repository at this point in the history
  • Loading branch information
IanButterworth committed Sep 3, 2021
1 parent 2fccb63 commit 8bf9725
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 25 deletions.
4 changes: 2 additions & 2 deletions src/signal-handling.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ void jl_shuffle_int_array_inplace(volatile uint64_t *carray, size_t size, uint64

JL_DLLEXPORT int jl_profile_is_buffer_full(void)
{
// the `+ 5` is for the block terminator `0` plus 4 metadata entries
return bt_size_cur + (JL_BT_MAX_ENTRY_SIZE + 1) + 5 > bt_size_max;
// the `+ 6` is for the two block terminators `0` plus 4 metadata entries
return bt_size_cur + (JL_BT_MAX_ENTRY_SIZE + 1) + 6 > bt_size_max;
}

static uint64_t jl_last_sigint_trigger = 0;
Expand Down
3 changes: 2 additions & 1 deletion src/signals-mach.c
Original file line number Diff line number Diff line change
Expand Up @@ -602,7 +602,8 @@ void *mach_profile_listener(void *arg)
// store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
bt_data_prof[bt_size_cur++].uintptr = ptls->sleep_check_state + 1;

// Mark the end of this block with 0
// Mark the end of this block with two 0's
bt_data_prof[bt_size_cur++].uintptr = 0;
bt_data_prof[bt_size_cur++].uintptr = 0;
}
// We're done! Resume the thread.
Expand Down
3 changes: 2 additions & 1 deletion src/signals-unix.c
Original file line number Diff line number Diff line change
Expand Up @@ -800,7 +800,8 @@ static void *signal_listener(void *arg)
// store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
bt_data_prof[bt_size_cur++].uintptr = ptls->sleep_check_state + 1;

// Mark the end of this block with 0
// Mark the end of this block with two 0's
bt_data_prof[bt_size_cur++].uintptr = 0;
bt_data_prof[bt_size_cur++].uintptr = 0;
}
}
Expand Down
3 changes: 2 additions & 1 deletion src/signals-win.c
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,8 @@ static DWORD WINAPI profile_bt( LPVOID lparam )
// store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
bt_data_prof[bt_size_cur++].uintptr = ptls->sleep_check_state + 1;

// Mark the end of this block with 0
// Mark the end of this block with two 0's
bt_data_prof[bt_size_cur++].uintptr = 0;
bt_data_prof[bt_size_cur++].uintptr = 0;
}
jl_unlock_profile();
Expand Down
41 changes: 21 additions & 20 deletions stdlib/Profile/src/Profile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ module Profile

import Base.StackTraces: lookup, UNKNOWN, show_spec_linfo, StackFrame

const nmeta = 4 # number of metadata fields per block (threadid, taskid, cpu_cycle_clock, thread_sleeping)

# deprecated functions: use `getdict` instead
lookup(ip::UInt) = lookup(convert(Ptr{Cvoid}, ip))

Expand Down Expand Up @@ -41,9 +43,9 @@ end
Configure the `delay` between backtraces (measured in seconds), and the number `n` of instruction pointers that may be
stored per thread. Each instruction pointer corresponds to a single line of code; backtraces generally consist of a long
list of instruction pointers. Note that 5 spaces for instruction pointers per backtrace are used to store metadata and a marker.
Current settings can be obtained by calling this function with no arguments, and each can be set independently using keywords
or in the order `(n, delay)`.
list of instruction pointers. Note that 6 spaces for instruction pointers per backtrace are used to store metadata and two
NULL end markers. Current settings can be obtained by calling this function with no arguments, and each can be set independently
using keywords or in the order `(n, delay)`.
!!! compat "Julia 1.8"
As of Julia 1.8, this function allocates space for `n` instruction pointers per thread being profiled.
Expand Down Expand Up @@ -287,10 +289,10 @@ function get_thread_ids(data::Vector{<:Unsigned}, taskid = nothing)
end

function is_block_end(data, i)
i < 2 && return false
# 32-bit linux has been seen to have rogue ips equal to 0 so we cannot just rely on zeros being the block end.
# Also check for the previous entry looking like an idle state metadata entry which can only be 1 or 2
return data[i] == 0 && in(data[i - 1], [1,2])
i < nmeta + 1 && return false
# 32-bit linux has been seen to have rogue NULL ips, so we use two to indicate block end, where the 2nd is the
# actual end index
return data[i] == 0 && data[i - 1] == 0
end

"""
Expand Down Expand Up @@ -520,14 +522,13 @@ function fetch(;include_meta = false)
nblocks += 1
end
end
nmeta = 4 # number of metadata fields (threadid, taskid, cpu_cycle_clock, thread_sleeping)
data_stripped = Vector{UInt}(undef, length(data) - (nblocks * nmeta))
data_stripped = Vector{UInt}(undef, length(data) - (nblocks * (nmeta + 1)))
j = length(data_stripped)
i = length(data)
while i > 0 && j > 0
data_stripped[j] = data[i]
if is_block_end(data, i)
i -= nmeta
i -= (nmeta + 1) # metadata fields and the extra NULL IP
end
i -= 1
j -= 1
Expand All @@ -554,14 +555,14 @@ function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict,
skip = false
nsleeping = 0
for i in startframe:-1:1
startframe - 1 <= i <= startframe - 4 && continue # skip metadata (it's read ahead below)
startframe - 1 <= i <= startframe - (nmeta + 1) && continue # skip metadata (it's read ahead below) and extra block-end NULL IP
ip = data[i]
if is_block_end(data, i)
# read metadata
thread_sleeping = data[i - 1] - 1 # subtract 1 as state is incremented to avoid being equal to 0
# cpu_cycle_clock = data[i - 2]
taskid = data[i - 3]
threadid = data[i - 4]
thread_sleeping = data[i - 2] - 1 # subtract 1 as state is incremented to avoid being equal to 0
# cpu_cycle_clock = data[i - 3]
taskid = data[i - 4]
threadid = data[i - 5]
if !in(threadid, threads) || !in(taskid, tasks)
skip = true
continue
Expand Down Expand Up @@ -802,14 +803,14 @@ function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineI
skip = false
nsleeping = 0
for i in startframe:-1:1
startframe - 1 <= i <= startframe - 4 && continue # skip metadata (its read ahead below)
startframe - 1 <= i <= startframe - (nmeta + 1) && continue # skip metadata (its read ahead below) and extra block end NULL IP
ip = all[i]
if is_block_end(all, i)
# read metadata
thread_sleeping = all[i - 1] - 1 # subtract 1 as state is incremented to avoid being equal to 0
# cpu_cycle_clock = all[i - 2]
taskid = all[i - 3]
threadid = all[i - 4]
thread_sleeping = all[i - 2] - 1 # subtract 1 as state is incremented to avoid being equal to 0
# cpu_cycle_clock = all[i - 3]
taskid = all[i - 4]
threadid = all[i - 5]
if !in(threadid, threads) || !in(taskid, tasks)
skip = true
continue
Expand Down

0 comments on commit 8bf9725

Please sign in to comment.