From a62fc7a453a18ba583b0266616ff43fe9e787604 Mon Sep 17 00:00:00 2001 From: Alexandre Fonseca Date: Wed, 22 Nov 2023 16:57:34 +0000 Subject: [PATCH 01/17] [PROF-8667] Heap Profiling - Part 1 - Setup --- benchmarks/profiler_sample_loop_v2.rb | 6 +- .../collectors_cpu_and_wall_time_worker.c | 63 ++++++-- .../collectors_thread_context.c | 12 ++ .../collectors_thread_context.h | 1 + .../extconf.rb | 5 + .../heap_recorder.c | 85 ++++++++++ .../heap_recorder.h | 101 ++++++++++++ .../libdatadog_helpers.c | 27 ++++ .../libdatadog_helpers.h | 6 + .../stack_recorder.c | 151 +++++++++++++++++- .../stack_recorder.h | 3 + lib/datadog/core/configuration/settings.rb | 61 ++++--- .../collectors/cpu_and_wall_time_worker.rb | 13 +- lib/datadog/profiling/component.rb | 56 ++++++- lib/datadog/profiling/ext.rb | 24 +++ lib/datadog/profiling/stack_recorder.rb | 4 +- spec/datadog/profiling/stack_recorder_spec.rb | 108 ++++++++++++- 17 files changed, 669 insertions(+), 57 deletions(-) create mode 100644 ext/ddtrace_profiling_native_extension/heap_recorder.c create mode 100644 ext/ddtrace_profiling_native_extension/heap_recorder.h diff --git a/benchmarks/profiler_sample_loop_v2.rb b/benchmarks/profiler_sample_loop_v2.rb index 0afc5d6cc70..4722a6c050f 100644 --- a/benchmarks/profiler_sample_loop_v2.rb +++ b/benchmarks/profiler_sample_loop_v2.rb @@ -16,7 +16,11 @@ class ProfilerSampleLoopBenchmark PROFILER_OVERHEAD_STACK_THREAD = Thread.new { sleep } def create_profiler - @recorder = Datadog::Profiling::StackRecorder.new(cpu_time_enabled: true, alloc_samples_enabled: true) + @recorder = Datadog::Profiling::StackRecorder.new( + cpu_time_enabled: true, + alloc_samples_enabled: false, + heap_samples_enabled: false + ) @collector = Datadog::Profiling::Collectors::ThreadContext.new( recorder: @recorder, max_frames: 400, tracer: nil, endpoint_collection_enabled: false, timeline_enabled: false ) diff --git a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c index e33cf760433..aa32adf8764 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +++ b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c @@ -83,7 +83,9 @@ struct cpu_and_wall_time_worker_state { bool allocation_counting_enabled; bool no_signals_workaround_enabled; bool dynamic_sampling_rate_enabled; - int allocation_sample_every; // Temporarily used for development/testing of allocation profiling + int allocation_sample_every; + bool allocation_profiling_enabled; + bool heap_profiling_enabled; VALUE self_instance; VALUE thread_context_collector_instance; VALUE idle_sampling_helper_instance; @@ -91,6 +93,7 @@ struct cpu_and_wall_time_worker_state { dynamic_sampling_rate_state dynamic_sampling_rate; VALUE gc_tracepoint; // Used to get gc start/finish information VALUE object_allocation_tracepoint; // Used to get allocation counts and allocation profiling + VALUE object_free_tracepoint; // Used to figure out live objects for heap profiling // These are mutable and used to signal things between the worker thread and other threads @@ -152,7 +155,9 @@ static VALUE _native_initialize( VALUE allocation_counting_enabled, VALUE no_signals_workaround_enabled, VALUE dynamic_sampling_rate_enabled, - VALUE allocation_sample_every + VALUE allocation_sample_every, + VALUE allocation_profiling_enabled, + VALUE heap_profiling_enabled ); static void cpu_and_wall_time_worker_typed_data_mark(void *state_ptr); static VALUE _native_sampling_loop(VALUE self, VALUE instance); @@ -186,6 +191,7 @@ static void reset_stats(struct cpu_and_wall_time_worker_state *state); static void sleep_for(uint64_t time_ns); static VALUE _native_allocation_count(DDTRACE_UNUSED VALUE self); static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused); +static void on_freeobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused); static void disable_tracepoints(struct cpu_and_wall_time_worker_state *state); static VALUE _native_with_blocked_sigprof(DDTRACE_UNUSED VALUE self); static VALUE rescued_sample_allocation(VALUE tracepoint_data); @@ -226,7 +232,7 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) { // https://bugs.ruby-lang.org/issues/18007 for a discussion around this. rb_define_alloc_func(collectors_cpu_and_wall_time_worker_class, _native_new); - rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 8); + rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 10); rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_sampling_loop", _native_sampling_loop, 1); rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stop", _native_stop, 2); rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_reset_after_fork", _native_reset_after_fork, 1); @@ -268,12 +274,15 @@ static VALUE _native_new(VALUE klass) { state->no_signals_workaround_enabled = false; state->dynamic_sampling_rate_enabled = true; state->allocation_sample_every = 0; + state->allocation_profiling_enabled = false; + state->heap_profiling_enabled = false; state->thread_context_collector_instance = Qnil; state->idle_sampling_helper_instance = Qnil; state->owner_thread = Qnil; dynamic_sampling_rate_init(&state->dynamic_sampling_rate); state->gc_tracepoint = Qnil; state->object_allocation_tracepoint = Qnil; + state->object_free_tracepoint = Qnil; atomic_init(&state->should_run, false); state->failure_exception = Qnil; @@ -295,13 +304,17 @@ static VALUE _native_initialize( VALUE allocation_counting_enabled, VALUE no_signals_workaround_enabled, VALUE dynamic_sampling_rate_enabled, - VALUE allocation_sample_every + VALUE allocation_sample_every, + VALUE allocation_profiling_enabled, + VALUE heap_profiling_enabled ) { ENFORCE_BOOLEAN(gc_profiling_enabled); ENFORCE_BOOLEAN(allocation_counting_enabled); ENFORCE_BOOLEAN(no_signals_workaround_enabled); ENFORCE_BOOLEAN(dynamic_sampling_rate_enabled); ENFORCE_TYPE(allocation_sample_every, T_FIXNUM); + ENFORCE_BOOLEAN(allocation_profiling_enabled); + ENFORCE_BOOLEAN(heap_profiling_enabled); struct cpu_and_wall_time_worker_state *state; TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state); @@ -311,15 +324,22 @@ static VALUE _native_initialize( state->no_signals_workaround_enabled = (no_signals_workaround_enabled == Qtrue); state->dynamic_sampling_rate_enabled = (dynamic_sampling_rate_enabled == Qtrue); state->allocation_sample_every = NUM2INT(allocation_sample_every); + state->allocation_profiling_enabled = (allocation_profiling_enabled == Qtrue); + state->heap_profiling_enabled = (heap_profiling_enabled == Qtrue); - if (state->allocation_sample_every < 0) { - rb_raise(rb_eArgError, "Unexpected value for allocation_sample_every: %d. This value must be >= 0.", state->allocation_sample_every); + if (state->allocation_sample_every <= 0) { + rb_raise(rb_eArgError, "Unexpected value for allocation_sample_every: %d. This value must be > 0.", state->allocation_sample_every); + } + + if (state->heap_profiling_enabled && !state->allocation_profiling_enabled) { + rb_raise(rb_eArgError, "Heap profiling requires allocation profiling to be enabled but it isn't."); } state->thread_context_collector_instance = enforce_thread_context_collector_instance(thread_context_collector_instance); state->idle_sampling_helper_instance = idle_sampling_helper_instance; state->gc_tracepoint = rb_tracepoint_new(Qnil, RUBY_INTERNAL_EVENT_GC_ENTER | RUBY_INTERNAL_EVENT_GC_EXIT, on_gc_event, NULL /* unused */); state->object_allocation_tracepoint = rb_tracepoint_new(Qnil, RUBY_INTERNAL_EVENT_NEWOBJ, on_newobj_event, NULL /* unused */); + state->object_free_tracepoint = rb_tracepoint_new(Qnil, RUBY_INTERNAL_EVENT_FREEOBJ, on_freeobj_event, NULL /* unused */); return Qtrue; } @@ -335,6 +355,7 @@ static void cpu_and_wall_time_worker_typed_data_mark(void *state_ptr) { rb_gc_mark(state->stop_thread); rb_gc_mark(state->gc_tracepoint); rb_gc_mark(state->object_allocation_tracepoint); + rb_gc_mark(state->object_free_tracepoint); } // Called in a background thread created in CpuAndWallTimeWorker#start @@ -632,7 +653,8 @@ static VALUE release_gvl_and_run_sampling_trigger_loop(VALUE instance) { // because they may raise exceptions. install_sigprof_signal_handler(handle_sampling_signal, "handle_sampling_signal"); if (state->gc_profiling_enabled) rb_tracepoint_enable(state->gc_tracepoint); - if (state->allocation_counting_enabled) rb_tracepoint_enable(state->object_allocation_tracepoint); + if (state->allocation_counting_enabled || state->allocation_profiling_enabled) rb_tracepoint_enable(state->object_allocation_tracepoint); + if (state->heap_profiling_enabled) rb_tracepoint_enable(state->object_free_tracepoint); rb_thread_call_without_gvl(run_sampling_trigger_loop, state, interrupt_sampling_trigger_loop, state); @@ -916,15 +938,11 @@ static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused) return; } - // @ivoanjo: Strictly speaking, this is not needed because Ruby should not call the same tracepoint while a previous - // invocation is still pending, (e.g. it wouldn't call `on_newobj_event` while it's already running), but I decided - // to keep this here for consistency -- every call to the thread context (other than the special gc calls which are - // defined as not being able to allocate) sets this. state->during_sample = true; // TODO: This is a placeholder sampling decision strategy. We plan to replace it with a better one soon (e.g. before // beta), and having something here allows us to test the rest of feature, sampling decision aside. - if (state->allocation_sample_every > 0 && ((allocation_count % state->allocation_sample_every) == 0)) { + if (state->allocation_profiling_enabled && state->allocation_sample_every > 0 && ((allocation_count % state->allocation_sample_every) == 0)) { // Rescue against any exceptions that happen during sampling safely_call(rescued_sample_allocation, tracepoint_data, state->self_instance); } @@ -932,9 +950,30 @@ static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused) state->during_sample = false; } +// Safety: This function may get called while Ruby is doing garbage collection. While Ruby is doing garbage collection, +// *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation. +// This includes exceptions and use of ruby_xcalloc (because xcalloc can trigger GC)! +static void on_freeobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused) { + struct cpu_and_wall_time_worker_state *state = active_sampler_instance_state; // Read from global variable, see "sampler global state safety" note above + + // This should not happen in a normal situation because the tracepoint is always enabled after the instance is set + // and disabled before it is cleared, but just in case... + if (state == NULL) return; + + // NOTE: Because this is likely to be happening during GC, handling of this tracepoint does not do any allocation. + // We also do not want to lose any frees as that would affect the accuracy of our live heap tracking so we skip + // the typical `state->during_sample` dropping that other sampling tracepoints have. + + rb_trace_arg_t *data = rb_tracearg_from_tracepoint(tracepoint_data); + VALUE freed_object = rb_tracearg_object(data); + + thread_context_collector_sample_free(state->thread_context_collector_instance, freed_object); +} + static void disable_tracepoints(struct cpu_and_wall_time_worker_state *state) { rb_tracepoint_disable(state->gc_tracepoint); rb_tracepoint_disable(state->object_allocation_tracepoint); + rb_tracepoint_disable(state->object_free_tracepoint); } static VALUE _native_with_blocked_sigprof(DDTRACE_UNUSED VALUE self) { diff --git a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c index 29bbd50051b..8183825ade7 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c +++ b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c @@ -1203,6 +1203,8 @@ void thread_context_collector_sample_allocation(VALUE self_instance, unsigned in } } + track_obj_allocation(state->recorder_instance, new_object, sample_weight); + trigger_sample_for_thread( state, /* thread: */ current_thread, @@ -1216,6 +1218,16 @@ void thread_context_collector_sample_allocation(VALUE self_instance, unsigned in ); } +// Safety: This function may get called while Ruby is doing garbage collection. While Ruby is doing garbage collection, +// *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation. +// This includes exceptions and use of ruby_xcalloc (because xcalloc can trigger GC)! +void thread_context_collector_sample_free(VALUE self_instance, VALUE freed_object) { + struct thread_context_collector_state *state; + TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state); + + record_obj_free(state->recorder_instance, freed_object); +} + // This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec. // It SHOULD NOT be used for other purposes. static VALUE _native_sample_allocation(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE sample_weight, VALUE new_object) { diff --git a/ext/ddtrace_profiling_native_extension/collectors_thread_context.h b/ext/ddtrace_profiling_native_extension/collectors_thread_context.h index 88dfabe9615..2bbf2bf1108 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_thread_context.h +++ b/ext/ddtrace_profiling_native_extension/collectors_thread_context.h @@ -8,6 +8,7 @@ void thread_context_collector_sample( VALUE profiler_overhead_stack_thread ); void thread_context_collector_sample_allocation(VALUE self_instance, unsigned int sample_weight, VALUE new_object); +void thread_context_collector_sample_free(VALUE self_instance, VALUE freed_object); VALUE thread_context_collector_sample_after_gc(VALUE self_instance); void thread_context_collector_on_gc_start(VALUE self_instance); void thread_context_collector_on_gc_finish(VALUE self_instance); diff --git a/ext/ddtrace_profiling_native_extension/extconf.rb b/ext/ddtrace_profiling_native_extension/extconf.rb index 933fc87d157..064b36e7445 100644 --- a/ext/ddtrace_profiling_native_extension/extconf.rb +++ b/ext/ddtrace_profiling_native_extension/extconf.rb @@ -120,6 +120,11 @@ def add_compiler_flag(flag) add_compiler_flag '-Wall' add_compiler_flag '-Wextra' +if ENV['DDTRACE_DEBUG'] + CONFIG['optflags'] = '-O0' + CONFIG['debugflags'] = '-ggdb3' +end + if RUBY_PLATFORM.include?('linux') # Supposedly, the correct way to do this is # ``` diff --git a/ext/ddtrace_profiling_native_extension/heap_recorder.c b/ext/ddtrace_profiling_native_extension/heap_recorder.c new file mode 100644 index 00000000000..7d2ecc606ea --- /dev/null +++ b/ext/ddtrace_profiling_native_extension/heap_recorder.c @@ -0,0 +1,85 @@ +#include "heap_recorder.h" +#include +#include "ruby/st.h" +#include "ruby/util.h" +#include "ruby_helpers.h" +#include + +// Allows storing data passed to ::start_heap_allocation_recording to make it accessible to +// ::end_heap_allocation_recording. +// +// obj != Qnil flags this struct as holding a valid partial heap recording. +typedef struct { + VALUE obj; + live_object_data object_data; +} partial_heap_recording; + +struct heap_recorder { + // Data for a heap recording that was started but not yet ended + partial_heap_recording active_recording; +}; + +// ========================== +// Heap Recorder External API +// +// WARN: Except for ::heap_recorder_for_each_live_object, we always assume interaction with these APIs +// happens under the GVL. +// +// ========================== +heap_recorder* heap_recorder_new(void) { + heap_recorder* recorder = ruby_xmalloc(sizeof(heap_recorder)); + + recorder->active_recording = (partial_heap_recording) { + .obj = Qnil, + .object_data = {0}, + }; + + return recorder; +} + +void heap_recorder_free(struct heap_recorder* recorder) { + ruby_xfree(recorder); +} + +void start_heap_allocation_recording(heap_recorder* heap_recorder, VALUE new_obj, unsigned int weight) { + heap_recorder->active_recording = (partial_heap_recording) { + .obj = new_obj, + .object_data = (live_object_data) { + .weight = weight, + }, + }; +} + +void end_heap_allocation_recording(struct heap_recorder *heap_recorder, ddog_prof_Slice_Location locations) { + partial_heap_recording *active_recording = &heap_recorder->active_recording; + + VALUE new_obj = active_recording->obj; + if (new_obj == Qnil) { + // Recording ended without having been started? + rb_raise(rb_eRuntimeError, "Ended a heap recording that was not started"); + } + + // From now on, mark active recording as invalid so we can short-circuit at any point and + // not end up with a still active recording. new_obj still holds the object for this recording + active_recording->obj = Qnil; + + // TODO: Implement +} + +// WARN: This can get called during Ruby GC. NO HEAP ALLOCATIONS OR EXCEPTIONS ARE ALLOWED. +void record_heap_free(heap_recorder *heap_recorder, VALUE obj) { + // TODO: Implement +} + +void heap_recorder_flush(heap_recorder *heap_recorder) { + // TODO: Implement +} + +void heap_recorder_for_each_live_object( + heap_recorder *heap_recorder, + void (*for_each_callback)(heap_recorder_iteration_data stack_data, void *extra_arg), + void *for_each_callback_extra_arg, + bool with_gvl) { + // TODO: Implement +} + diff --git a/ext/ddtrace_profiling_native_extension/heap_recorder.h b/ext/ddtrace_profiling_native_extension/heap_recorder.h new file mode 100644 index 00000000000..e189e206df2 --- /dev/null +++ b/ext/ddtrace_profiling_native_extension/heap_recorder.h @@ -0,0 +1,101 @@ +#pragma once + +#include +#include + +// A heap recorder keeps track of a collection of live heap objects. +// +// All allocations observed by this recorder for which a corresponding free was +// not yet observed are deemed as alive and can be iterated on to produce a +// live heap profile. +// +// WARN: Unless otherwise stated the heap recorder APIs assume calls are done +// under the GVL. +typedef struct heap_recorder heap_recorder; + +// Extra data associated with each live object being tracked. +typedef struct live_object_data { + // The weight of this object from a sampling perspective. + // + // A notion of weight is preserved for each tracked object to allow for an approximate + // extrapolation to an unsampled view. + // + // Example: If we were sampling every 50 objects, then each sampled object + // could be seen as being representative of 50 objects. + unsigned int weight; +} live_object_data; + +// Data that is made available to iterators of heap recorder data for each live object +// tracked therein. +typedef struct { + ddog_prof_Slice_Location locations; + live_object_data object_data; +} heap_recorder_iteration_data; + +// Initialize a new heap recorder. +heap_recorder* heap_recorder_new(void); + +// Free a previously initialized heap recorder. +void heap_recorder_free(heap_recorder *heap_recorder); + +// Start a heap allocation recording on the heap recorder for a new object. +// +// This heap allocation recording needs to be ended via ::end_heap_allocation_recording +// before it will become fully committed and able to be iterated on. +// +// @param new_obj +// The newly allocated Ruby object/value. +// @param weight +// The sampling weight of this object. +// +// WARN: It needs to be paired with a ::end_heap_allocation_recording call. +void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj, unsigned int weight); + +// End a previously started heap allocation recording on the heap recorder. +// +// It is at this point that an allocated object will become fully tracked and able to be iterated on. +// +// @param locations The stacktrace representing the location of the allocation. +// +// WARN: It is illegal to call this without previously having called ::start_heap_allocation_recording. +void end_heap_allocation_recording(heap_recorder *heap_recorder, ddog_prof_Slice_Location locations); + +// Record a heap free on the heap recorder. +// +// Contrary to heap allocations, no sampling should be applied to frees. Missing a free event +// risks negatively effecting the accuracy of the live state of tracked objects and thus the accuracy +// of the resulting profiles. +// +// Two things can happen depending on the object: +// * The object isn't being tracked: the operation is a no-op. +// * The object is being tracked: it is marked as no longer alive and will not appear in the next +// iteration. +// +// @param obj The object that was freed. +// +// NOTE: This function is safe to be called during a Ruby GC as it guarantees no heap mutations +// during its execution. +void record_heap_free(heap_recorder *heap_recorder, VALUE obj); + +// Flush any intermediate state that might be queued inside the heap recorder. +// +// NOTE: This should usually be called before iteration to ensure data is as little stale as possible. +void heap_recorder_flush(heap_recorder *heap_recorder); + +// Iterate over each live object being tracked by the heap recorder. +// +// @param for_each_callback +// A callback function that shall be called for each live object being tracked +// by the heap recorder. Alongside the iteration_data for each live object, +// a second argument will be forwarded with the contents of the optional +// for_each_callback_extra_arg. +// @param for_each_callback_extra_arg +// Optional (NULL if empty) extra data that should be passed to the +// callback function alongside the data for each live tracked object. +// @param with_gvl +// True if we're calling this while holding the GVL, false otherwise. +void heap_recorder_for_each_live_object( + heap_recorder *heap_recorder, + void (*for_each_callback)(heap_recorder_iteration_data data, void* extra_arg), + void *for_each_callback_extra_arg, + bool with_gvl); diff --git a/ext/ddtrace_profiling_native_extension/libdatadog_helpers.c b/ext/ddtrace_profiling_native_extension/libdatadog_helpers.c index 43bd02c4a07..07f2236decc 100644 --- a/ext/ddtrace_profiling_native_extension/libdatadog_helpers.c +++ b/ext/ddtrace_profiling_native_extension/libdatadog_helpers.c @@ -40,3 +40,30 @@ ddog_CharSlice ruby_value_type_to_char_slice(enum ruby_value_type type) { default: return DDOG_CHARSLICE_C("BUG: Unknown value for ruby_value_type"); } } + +#define MAX_DDOG_ERR_MESSAGE_SIZE 128 +#define DDOG_ERR_MESSAGE_TRUNCATION_SUFFIX "..." + +void grab_gvl_and_raise_ddogerr_and_drop(const char *while_context, ddog_Error *error) { + char error_msg[MAX_DDOG_ERR_MESSAGE_SIZE]; + ddog_CharSlice error_msg_slice = ddog_Error_message(error); + size_t copy_size = error_msg_slice.len; + // Account for extra null char for proper cstring + size_t error_msg_size = copy_size + 1; + bool truncated = false; + if (error_msg_size > MAX_DDOG_ERR_MESSAGE_SIZE) { + // if it seems like the error msg won't fit, lets truncate things by limiting + // the copy_size to MAX_SIZE minus the size of the truncation suffix + // (which already includes space for a terminating '\0' due to how sizeof works) + copy_size = MAX_DDOG_ERR_MESSAGE_SIZE - sizeof(DDOG_ERR_MESSAGE_TRUNCATION_SUFFIX); + error_msg_size = MAX_DDOG_ERR_MESSAGE_SIZE; + truncated = true; + } + strncpy(error_msg, error_msg_slice.ptr, copy_size); + if (truncated) { + strcpy(error_msg + copy_size, DDOG_ERR_MESSAGE_TRUNCATION_SUFFIX); + } + error_msg[error_msg_size - 1] = '\0'; + ddog_Error_drop(error); + grab_gvl_and_raise(rb_eRuntimeError, "Libstreaming error while (%s): %s", while_context, error_msg); +} diff --git a/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h b/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h index edd11e31954..9a1734327a8 100644 --- a/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h +++ b/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h @@ -24,6 +24,12 @@ inline static VALUE get_error_details_and_drop(ddog_Error *error) { return result; } +// Utility function to be able to easily raise a ddog_Error outside the GVL. +// The message will look like 'Libstreaming error while : ' +NORETURN( + void grab_gvl_and_raise_ddogerr_and_drop(const char *while_context, ddog_Error *error); +); + // Used for pretty printing this Ruby enum. Returns "T_UNKNOWN_OR_MISSING_RUBY_VALUE_TYPE_ENTRY" for unknown elements. // In practice, there's a few types that the profiler will probably never encounter, but I've added all entries of // ruby_value_type that Ruby uses so that we can also use this for debugging. diff --git a/ext/ddtrace_profiling_native_extension/stack_recorder.c b/ext/ddtrace_profiling_native_extension/stack_recorder.c index 41729b06c0d..503c677f170 100644 --- a/ext/ddtrace_profiling_native_extension/stack_recorder.c +++ b/ext/ddtrace_profiling_native_extension/stack_recorder.c @@ -7,6 +7,7 @@ #include "libdatadog_helpers.h" #include "ruby_helpers.h" #include "time_helpers.h" +#include "heap_recorder.h" // Used to wrap a ddog_prof_Profile in a Ruby object and expose Ruby-level serialization APIs // This file implements the native bits of the Datadog::Profiling::StackRecorder class @@ -152,18 +153,23 @@ static VALUE stack_recorder_class = Qnil; #define WALL_TIME_VALUE_ID 2 #define ALLOC_SAMPLES_VALUE {.type_ = VALUE_STRING("alloc-samples"), .unit = VALUE_STRING("count")} #define ALLOC_SAMPLES_VALUE_ID 3 +#define HEAP_SAMPLES_VALUE {.type_ = VALUE_STRING("heap-live-samples"), .unit = VALUE_STRING("count")} +#define HEAP_SAMPLES_VALUE_ID 4 -static const ddog_prof_ValueType all_value_types[] = {CPU_TIME_VALUE, CPU_SAMPLES_VALUE, WALL_TIME_VALUE, ALLOC_SAMPLES_VALUE}; +static const ddog_prof_ValueType all_value_types[] = {CPU_TIME_VALUE, CPU_SAMPLES_VALUE, WALL_TIME_VALUE, ALLOC_SAMPLES_VALUE, HEAP_SAMPLES_VALUE}; // This array MUST be kept in sync with all_value_types above and is intended to act as a "hashmap" between VALUE_ID and the position it // occupies on the all_value_types array. // E.g. all_value_types_positions[CPU_TIME_VALUE_ID] => 0, means that CPU_TIME_VALUE was declared at position 0 of all_value_types. -static const uint8_t all_value_types_positions[] = {CPU_TIME_VALUE_ID, CPU_SAMPLES_VALUE_ID, WALL_TIME_VALUE_ID, ALLOC_SAMPLES_VALUE_ID}; +static const uint8_t all_value_types_positions[] = {CPU_TIME_VALUE_ID, CPU_SAMPLES_VALUE_ID, WALL_TIME_VALUE_ID, ALLOC_SAMPLES_VALUE_ID, HEAP_SAMPLES_VALUE_ID}; #define ALL_VALUE_TYPES_COUNT (sizeof(all_value_types) / sizeof(ddog_prof_ValueType)) // Contains native state for each instance struct stack_recorder_state { + // Heap recorder instance + heap_recorder *heap_recorder; + pthread_mutex_t slot_one_mutex; ddog_prof_Profile slot_one_profile; @@ -199,11 +205,11 @@ static VALUE _native_new(VALUE klass); static void initialize_slot_concurrency_control(struct stack_recorder_state *state); static void initialize_profiles(struct stack_recorder_state *state, ddog_prof_Slice_ValueType sample_types); static void stack_recorder_typed_data_free(void *data); -static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE cpu_time_enabled, VALUE alloc_samples_enabled); +static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE cpu_time_enabled, VALUE alloc_samples_enabled, VALUE heap_samples_enabled); static VALUE _native_serialize(VALUE self, VALUE recorder_instance); static VALUE ruby_time_from(ddog_Timespec ddprof_time); static void *call_serialize_without_gvl(void *call_args); -static struct active_slot_pair sampler_lock_active_profile(); +static struct active_slot_pair sampler_lock_active_profile(struct stack_recorder_state *state); static void sampler_unlock_active_profile(struct active_slot_pair active_slot); static ddog_prof_Profile *serializer_flip_active_and_inactive_slots(struct stack_recorder_state *state); static VALUE _native_active_slot(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance); @@ -215,6 +221,9 @@ static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE recorder_ static void serializer_set_start_timestamp_for_next_profile(struct stack_recorder_state *state, ddog_Timespec start_time); static VALUE _native_record_endpoint(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE local_root_span_id, VALUE endpoint); static void reset_profile(ddog_prof_Profile *profile, ddog_Timespec *start_time /* Can be null */); +static VALUE _native_track_obj_allocation(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight); +static VALUE _native_record_obj_free(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE obj); + void stack_recorder_init(VALUE profiling_module) { stack_recorder_class = rb_define_class_under(profiling_module, "StackRecorder", rb_cObject); @@ -231,13 +240,15 @@ void stack_recorder_init(VALUE profiling_module) { // https://bugs.ruby-lang.org/issues/18007 for a discussion around this. rb_define_alloc_func(stack_recorder_class, _native_new); - rb_define_singleton_method(stack_recorder_class, "_native_initialize", _native_initialize, 3); + rb_define_singleton_method(stack_recorder_class, "_native_initialize", _native_initialize, 4); rb_define_singleton_method(stack_recorder_class, "_native_serialize", _native_serialize, 1); rb_define_singleton_method(stack_recorder_class, "_native_reset_after_fork", _native_reset_after_fork, 1); rb_define_singleton_method(testing_module, "_native_active_slot", _native_active_slot, 1); rb_define_singleton_method(testing_module, "_native_slot_one_mutex_locked?", _native_is_slot_one_mutex_locked, 1); rb_define_singleton_method(testing_module, "_native_slot_two_mutex_locked?", _native_is_slot_two_mutex_locked, 1); rb_define_singleton_method(testing_module, "_native_record_endpoint", _native_record_endpoint, 3); + rb_define_singleton_method(testing_module, "_native_track_obj_allocation", _native_track_obj_allocation, 3); + rb_define_singleton_method(testing_module, "_native_record_obj_free", _native_record_obj_free, 2); ok_symbol = ID2SYM(rb_intern_const("ok")); error_symbol = ID2SYM(rb_intern_const("error")); @@ -272,6 +283,8 @@ static VALUE _native_new(VALUE klass) { VALUE stack_recorder = TypedData_Wrap_Struct(klass, &stack_recorder_typed_data, state); + state->heap_recorder = heap_recorder_new(); + // Note: Don't raise exceptions after this point, since it'll lead to libdatadog memory leaking! initialize_profiles(state, sample_types); @@ -320,23 +333,32 @@ static void stack_recorder_typed_data_free(void *state_ptr) { pthread_mutex_destroy(&state->slot_two_mutex); ddog_prof_Profile_drop(&state->slot_two_profile); + if (state->heap_recorder != NULL) { + // Heap recorder will only be initialized if we're actually collecting heap samples. + // If it is initialized we need to free it... + heap_recorder_free(state->heap_recorder); + } ruby_xfree(state); } -static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE cpu_time_enabled, VALUE alloc_samples_enabled) { +static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE cpu_time_enabled, VALUE alloc_samples_enabled, VALUE heap_samples_enabled) { ENFORCE_BOOLEAN(cpu_time_enabled); ENFORCE_BOOLEAN(alloc_samples_enabled); + ENFORCE_BOOLEAN(heap_samples_enabled); struct stack_recorder_state *state; TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state); - if (cpu_time_enabled == Qtrue && alloc_samples_enabled == Qtrue) return Qtrue; // Nothing to do, this is the default + if (cpu_time_enabled == Qtrue && alloc_samples_enabled == Qtrue && heap_samples_enabled == Qtrue) return Qtrue; // Nothing to do, this is the default // When some sample types are disabled, we need to reconfigure libdatadog to record less types, // as well as reconfigure the position_for array to push the disabled types to the end so they don't get recorded. // See record_sample for details on the use of position_for. - state->enabled_values_count = ALL_VALUE_TYPES_COUNT - (cpu_time_enabled == Qtrue ? 0 : 1) - (alloc_samples_enabled == Qtrue? 0 : 1); + state->enabled_values_count = ALL_VALUE_TYPES_COUNT - + (cpu_time_enabled == Qtrue ? 0 : 1) - + (alloc_samples_enabled == Qtrue? 0 : 1) - + (heap_samples_enabled == Qtrue ? 0 : 1); ddog_prof_ValueType enabled_value_types[ALL_VALUE_TYPES_COUNT]; uint8_t next_enabled_pos = 0; @@ -364,6 +386,18 @@ static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_insta state->position_for[ALLOC_SAMPLES_VALUE_ID] = next_disabled_pos++; } + if (heap_samples_enabled == Qtrue) { + enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) HEAP_SAMPLES_VALUE; + state->position_for[HEAP_SAMPLES_VALUE_ID] = next_enabled_pos++; + } else { + state->position_for[HEAP_SAMPLES_VALUE_ID] = next_disabled_pos++; + + // Turns out heap sampling is disabled but we initialized everything in _native_new + // assuming all samples were enabled. We need to deinitialize the heap recorder. + heap_recorder_free(state->heap_recorder); + state->heap_recorder = NULL; + } + ddog_prof_Profile_drop(&state->slot_one_profile); ddog_prof_Profile_drop(&state->slot_two_profile); @@ -381,6 +415,16 @@ static VALUE _native_serialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instan // Need to do this while still holding on to the Global VM Lock; see comments on method for why serializer_set_start_timestamp_for_next_profile(state, finish_timestamp); + if (state->heap_recorder != NULL) { + // Flush any pending data in the heap recorder prior to doing the iteration during serialization. + // This needs to happen while holding on to the GVL + // TODO: Can this deadlock with the sampler thread due to GVL or Ruby specific things? + // 1. Sampler is in the middle of recording a heap allocation. + // 2. Recorder gets scheduled and tries to acquire the lock, waiting if needed. + // 3. Are we guaranteed that the sampler can make progress? + heap_recorder_flush(state->heap_recorder); + } + // We'll release the Global VM Lock while we're calling serialize, so that the Ruby VM can continue to work while this // is pending struct call_serialize_without_gvl_arguments args = {.state = state, .finish_timestamp = finish_timestamp, .serialize_ran = false}; @@ -442,6 +486,15 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, metric_values[position_for[WALL_TIME_VALUE_ID]] = values.wall_time_ns; metric_values[position_for[ALLOC_SAMPLES_VALUE_ID]] = values.alloc_samples; + if (values.alloc_samples != 0 && state->heap_recorder != NULL) { + // If we got an allocation sample and our heap recorder is initialized, + // end the heap allocation recording to commit the heap sample. + // FIXME: Heap sampling currently has to be done in 2 parts because the construction of locations is happening + // very late in the allocation-sampling path (which is shared with the cpu sampling path). This can + // be fixed with some refactoring but for now this leads to a less impactful change. + end_heap_allocation_recording(state->heap_recorder, locations); + } + ddog_prof_Profile_Result result = ddog_prof_Profile_add( active_slot.profile, (ddog_prof_Sample) { @@ -459,6 +512,30 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, } } +void track_obj_allocation(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight) { + struct stack_recorder_state *state; + TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state); + if (state->heap_recorder == NULL) { + // we aren't gathering heap data so we can ignore this + return; + } + // FIXME: Heap sampling currently has to be done in 2 parts because the construction of locations is happening + // very late in the allocation-sampling path (which is shared with the cpu sampling path). This can + // be fixed with some refactoring but for now this leads to a less impactful change. + start_heap_allocation_recording(state->heap_recorder, new_object, sample_weight); +} + +// WARN: This can get called during Ruby GC. NO HEAP ALLOCATIONS OR EXCEPTIONS ARE ALLOWED. +void record_obj_free(VALUE recorder_instance, VALUE freed_object) { + struct stack_recorder_state *state; + TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state); + if (state->heap_recorder == NULL) { + // we aren't gathering heap data so we can ignore this + return; + } + record_heap_free(state->heap_recorder, freed_object); +} + void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_CharSlice endpoint) { struct stack_recorder_state *state; TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state); @@ -474,10 +551,54 @@ void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_ } } +// Heap recorder iteration context allows us access to stack recorder state and profile being serialized +// during iteration of heap recorder live objects. +typedef struct heap_recorder_iteration_context { + struct stack_recorder_state *state; + ddog_prof_Profile *profile; +} heap_recorder_iteration_context; + +static void add_heap_sample_to_active_profile_without_gvl(heap_recorder_iteration_data iteration_data, void *extra_arg) { + heap_recorder_iteration_context *context = (heap_recorder_iteration_context*) extra_arg; + + int64_t metric_values[ALL_VALUE_TYPES_COUNT] = {0}; + uint8_t *position_for = context->state->position_for; + + metric_values[position_for[HEAP_SAMPLES_VALUE_ID]] = iteration_data.object_data.weight; + + ddog_prof_Profile_Result result = ddog_prof_Profile_add( + context->profile, + (ddog_prof_Sample) { + .locations = iteration_data.locations, + .values = (ddog_Slice_I64) {.ptr = metric_values, .len = context->state->enabled_values_count}, + }, + 0 + ); + + if (result.tag == DDOG_PROF_PROFILE_RESULT_ERR) { + grab_gvl_and_raise_ddogerr_and_drop("recording heap sample", &result.err); + } +} + +static void build_heap_profile_without_gvl(struct stack_recorder_state *state, ddog_prof_Profile *profile) { + heap_recorder_iteration_context iteration_context = { + .state = state, + .profile = profile + }; + heap_recorder_for_each_live_object(state->heap_recorder, add_heap_sample_to_active_profile_without_gvl, (void*) &iteration_context, false); +} + static void *call_serialize_without_gvl(void *call_args) { struct call_serialize_without_gvl_arguments *args = (struct call_serialize_without_gvl_arguments *) call_args; args->profile = serializer_flip_active_and_inactive_slots(args->state); + + if (args->state->heap_recorder != NULL) { + // Now that we have the inactive profile with all but heap samples, lets fill it with heap data + // without needing to race with the active sampler + build_heap_profile_without_gvl(args->state, args->profile); + } + // Note: The profile gets reset by the serialize call args->result = ddog_prof_Profile_serialize(args->profile, &args->finish_timestamp, NULL /* duration_nanos is optional */, NULL /* start_time is optional */); args->serialize_ran = true; @@ -599,6 +720,9 @@ static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE recorder_ reset_profile(&state->slot_one_profile, /* start_time: */ NULL); reset_profile(&state->slot_two_profile, /* start_time: */ NULL); + // After a fork, all tracked live objects should still be alive in the child process so we purposefully DON'T + // reset the heap recorder. The live heap it is tracking is still alive after all. + return Qtrue; } @@ -616,6 +740,17 @@ static VALUE _native_record_endpoint(DDTRACE_UNUSED VALUE _self, VALUE recorder_ return Qtrue; } +static VALUE _native_track_obj_allocation(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight) { + ENFORCE_TYPE(weight, T_FIXNUM); + track_obj_allocation(recorder_instance, new_obj, NUM2UINT(weight)); + return Qtrue; +} + +static VALUE _native_record_obj_free(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE obj) { + record_obj_free(recorder_instance, obj); + return Qtrue; +} + static void reset_profile(ddog_prof_Profile *profile, ddog_Timespec *start_time /* Can be null */) { ddog_prof_Profile_Result reset_result = ddog_prof_Profile_reset(profile, start_time); if (reset_result.tag == DDOG_PROF_PROFILE_RESULT_ERR) { diff --git a/ext/ddtrace_profiling_native_extension/stack_recorder.h b/ext/ddtrace_profiling_native_extension/stack_recorder.h index 2e871069019..aa492cacfa9 100644 --- a/ext/ddtrace_profiling_native_extension/stack_recorder.h +++ b/ext/ddtrace_profiling_native_extension/stack_recorder.h @@ -1,6 +1,7 @@ #pragma once #include +#include typedef struct { int64_t cpu_time_ns; @@ -21,4 +22,6 @@ typedef struct sample_labels { void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, sample_values values, sample_labels labels); void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_CharSlice endpoint); +void track_obj_allocation(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight); +void record_obj_free(VALUE recorder_instance, VALUE freed_object); VALUE enforce_recorder_instance(VALUE object); diff --git a/lib/datadog/core/configuration/settings.rb b/lib/datadog/core/configuration/settings.rb index 0b8ac838742..bbb071b8c40 100644 --- a/lib/datadog/core/configuration/settings.rb +++ b/lib/datadog/core/configuration/settings.rb @@ -314,34 +314,53 @@ def initialize(*_) # Can be used to enable/disable the Datadog::Profiling.allocation_count feature. # - # This feature is safe and enabled by default on Ruby 2.x, but has a few caveats on Ruby 3.x. - # - # Caveat 1 (severe): - # On Ruby versions 3.0 (all), 3.1.0 to 3.1.3, and 3.2.0 to 3.2.2 this is disabled by default because it - # can trigger a VM bug that causes a segmentation fault during garbage collection of Ractors - # (https://bugs.ruby-lang.org/issues/18464). We don't recommend using this feature on such Rubies. - # This bug is fixed on Ruby versions 3.1.4, 3.2.3 and 3.3.0. - # - # Caveat 2 (annoyance): - # On all known versions of Ruby 3.x, due to https://bugs.ruby-lang.org/issues/19112, when a ractor gets - # garbage collected, Ruby will disable all active tracepoints, which this feature internally relies on. - # Thus this feature is only usable if you're not using Ractors. - # - # Caveat 3 (severe): - # Ruby 3.2.0 to 3.2.2 have a bug in the newobj tracepoint (https://bugs.ruby-lang.org/issues/19482, - # https://github.com/ruby/ruby/pull/7464) so that's an extra reason why it's not safe on those Rubies. - # This bug is fixed on Ruby versions 3.2.3 and 3.3.0. + # This feature is safe and enabled by default only on Rubies where we haven't identified issues. + # Refer to {Datadog::Profiling::Ext::IS_ALLOC_SAMPLING_SUPPORTED} for the details. # # @default `true` on Ruby 2.x and 3.1.4+, 3.2.3+ and 3.3.0+; `false` for Ruby 3.0 and unpatched Rubies. option :allocation_counting_enabled do |o| + o.type :bool o.default do - RUBY_VERSION.start_with?('2.') || - (RUBY_VERSION.start_with?('3.1.') && RUBY_VERSION >= '3.1.4') || - (RUBY_VERSION.start_with?('3.2.') && RUBY_VERSION >= '3.2.3') || - RUBY_VERSION >= '3.3.' + Profiling::Ext::IS_ALLOCATION_SAMPLING_SUPPORTED end end + # Can be used to enable/disable collection of allocation profiles. + # + # This feature is alpha and disabled by default + # + # @default `DD_PROFILING_EXPERIMENTAL_ALLOCATION_ENABLED` environment variable as a boolean, otherwise `false` + option :experimental_allocation_enabled do |o| + o.type :bool + o.env 'DD_PROFILING_EXPERIMENTAL_ALLOCATION_ENABLED' + o.default false + end + + # Can be used to enable/disable the collection of heap profiles. + # + # This feature is alpha and disabled by default + # + # @default `DD_PROFILING_EXPERIMENTAL_HEAP_ENABLED` environment variable as a boolean, otherwise `false` + option :experimental_heap_enabled do |o| + o.type :bool + o.env 'DD_PROFILING_EXPERIMENTAL_HEAP_ENABLED' + o.default false + end + + # Can be used to configure the allocation sampling rate: a sample will be collected every x allocations. + # + # The lower the value, the more accuracy in allocation and heap tracking but the bigger the overhead. In + # particular, a value of 1 will sample ALL allocations. + # + # This feature is not supported in all Rubies. Refer to {Datadog::Profiling::Ext::IS_ALLOC_SAMPLING_SUPPORTED} + # + # @default `DD_PROFILING_EXPERIMENTAL_ALLOCATION_SAMPLE_RATE` environment variable, otherwise `50`. + option :experimental_allocation_sample_rate do |o| + o.type :int + o.env 'DD_PROFILING_EXPERIMENTAL_ALLOCATION_SAMPLE_RATE' + o.default 50 + end + # Can be used to disable checking which version of `libmysqlclient` is being used by the `mysql2` gem. # # This setting is only used when the `mysql2` gem is installed. diff --git a/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb b/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb index 0185b6b63d0..cb3182aab00 100644 --- a/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +++ b/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb @@ -22,7 +22,9 @@ def initialize( # **NOTE**: This should only be used for testing; disabling the dynamic sampling rate will increase the # profiler overhead! dynamic_sampling_rate_enabled: true, - allocation_sample_every: 0 # Currently only for testing; Setting this to > 0 can add a lot of overhead! + allocation_sample_every:, + allocation_profiling_enabled:, + heap_profiling_enabled: ) unless dynamic_sampling_rate_enabled Datadog.logger.warn( @@ -30,13 +32,6 @@ def initialize( ) end - if allocation_counting_enabled && allocation_sample_every > 0 - Datadog.logger.warn( - "Enabled experimental allocation profiling: allocation_sample_every=#{allocation_sample_every}. This is " \ - 'experimental, not recommended, and will increase overhead!' - ) - end - self.class._native_initialize( self, thread_context_collector, @@ -46,6 +41,8 @@ def initialize( no_signals_workaround_enabled, dynamic_sampling_rate_enabled, allocation_sample_every, + allocation_profiling_enabled, + heap_profiling_enabled, ) @worker_thread = nil @failure_exception = nil diff --git a/lib/datadog/profiling/component.rb b/lib/datadog/profiling/component.rb index fe2ba74f758..895559d2495 100644 --- a/lib/datadog/profiling/component.rb +++ b/lib/datadog/profiling/component.rb @@ -41,10 +41,14 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) no_signals_workaround_enabled = no_signals_workaround_enabled?(settings) timeline_enabled = settings.profiling.advanced.experimental_timeline_enabled + allocation_sample_every = get_allocation_sample_every(settings) + allocation_profiling_enabled = enable_allocation_profiling?(settings, allocation_sample_every) + heap_profiling_enabled = enable_heap_profiling?(settings, allocation_profiling_enabled) recorder = Datadog::Profiling::StackRecorder.new( cpu_time_enabled: RUBY_PLATFORM.include?('linux'), # Only supported on Linux currently - alloc_samples_enabled: false, # Always disabled for now -- work in progress + alloc_samples_enabled: allocation_profiling_enabled, + heap_samples_enabled: heap_profiling_enabled, ) thread_context_collector = Datadog::Profiling::Collectors::ThreadContext.new( recorder: recorder, @@ -58,7 +62,9 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) allocation_counting_enabled: settings.profiling.advanced.allocation_counting_enabled, no_signals_workaround_enabled: no_signals_workaround_enabled, thread_context_collector: thread_context_collector, - allocation_sample_every: 0, + allocation_sample_every: allocation_sample_every, + allocation_profiling_enabled: allocation_profiling_enabled, + heap_profiling_enabled: heap_profiling_enabled, ) internal_metadata = { @@ -110,6 +116,52 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) end end + private_class_method def self.get_allocation_sample_every(settings) + allocation_sample_rate = settings.profiling.advanced.experimental_allocation_sample_rate + + if allocation_sample_rate <= 0 + raise("Allocation sample rate must be a positive integer. Was #{allocation_sample_rate}") + end + + allocation_sample_rate + end + + private_class_method def self.enable_allocation_profiling?(settings, allocation_sample_every) + allocation_profiling_enabled = settings.profiling.advanced.experimental_allocation_enabled + + if allocation_profiling_enabled + Datadog.logger.warn( + "Enabled experimental allocation profiling: allocation_sample_rate=#{allocation_sample_every}. This is " \ + 'experimental, not recommended, and will increase overhead!' + ) + end + + if allocation_profiling_enabled && !Ext::IS_ALLOCATION_SAMPLING_SUPPORTED + Datadog.logger.warn( + "Current Ruby version (#{RUBY_VERSION}) does not officially support allocation profiling but it was " \ + 'requested. There may be unexpected problems during execution.' + ) + end + + allocation_profiling_enabled + end + + private_class_method def self.enable_heap_profiling?(settings, allocation_profiling_enabled) + heap_profiling_enabled = settings.profiling.advanced.experimental_allocation_enabled + + if heap_profiling_enabled && !allocation_profiling_enabled + raise('Heap profiling requires allocation profiling to be enabled') + end + + if heap_profiling_enabled + Datadog.logger.warn( + 'Enabled experimental heap profiling. This is experimental, not recommended, and will increase overhead!' + ) + end + + heap_profiling_enabled + end + private_class_method def self.no_signals_workaround_enabled?(settings) # rubocop:disable Metrics/MethodLength setting_value = settings.profiling.advanced.no_signals_workaround_enabled legacy_ruby_that_should_use_workaround = RUBY_VERSION.start_with?('2.3.', '2.4.', '2.5.') diff --git a/lib/datadog/profiling/ext.rb b/lib/datadog/profiling/ext.rb index 2122ec7a95f..b3836e2e6f4 100644 --- a/lib/datadog/profiling/ext.rb +++ b/lib/datadog/profiling/ext.rb @@ -9,6 +9,30 @@ module Ext ENV_AGENTLESS = 'DD_PROFILING_AGENTLESS' ENV_ENDPOINT_COLLECTION_ENABLED = 'DD_PROFILING_ENDPOINT_COLLECTION_ENABLED' + # Allocation sampling is safe and supported on Ruby 2.x, but has a few caveats on Ruby 3.x. + # + # TL;DR: Supported on (2.x, 3.1.4+, 3.2.3+, and 3.3.0+). + # + # Caveat 1 (severe): + # On Ruby versions 3.0 (all), 3.1.0 to 3.1.3, and 3.2.0 to 3.2.2 this is disabled by default because it + # can trigger a VM bug that causes a segmentation fault during garbage collection of Ractors + # (https://bugs.ruby-lang.org/issues/18464). We don't recommend using this feature on such Rubies. + # This bug is fixed on Ruby versions 3.1.4, 3.2.3 and 3.3.0. + # + # Caveat 2 (annoyance): + # On all known versions of Ruby 3.x, due to https://bugs.ruby-lang.org/issues/19112, when a ractor gets + # garbage collected, Ruby will disable all active tracepoints, which this feature internally relies on. + # Thus this feature is only usable if you're not using Ractors. + # + # Caveat 3 (severe): + # Ruby 3.2.0 to 3.2.2 have a bug in the newobj tracepoint (https://bugs.ruby-lang.org/issues/19482, + # https://github.com/ruby/ruby/pull/7464) so that's an extra reason why it's not safe on those Rubies. + # This bug is fixed on Ruby versions 3.2.3 and 3.3.0. + IS_ALLOCATION_SAMPLING_SUPPORTED = RUBY_VERSION.start_with?('2.') || + (RUBY_VERSION.start_with?('3.1.') && RUBY_VERSION >= '3.1.4') || + (RUBY_VERSION.start_with?('3.2.') && RUBY_VERSION >= '3.2.3') || + RUBY_VERSION >= '3.3.' + module Transport module HTTP FORM_FIELD_TAG_ENV = 'env' diff --git a/lib/datadog/profiling/stack_recorder.rb b/lib/datadog/profiling/stack_recorder.rb index ccbe710dafd..3153a4982c5 100644 --- a/lib/datadog/profiling/stack_recorder.rb +++ b/lib/datadog/profiling/stack_recorder.rb @@ -4,7 +4,7 @@ module Profiling # Note that `record_sample` is only accessible from native code. # Methods prefixed with _native_ are implemented in `stack_recorder.c` class StackRecorder - def initialize(cpu_time_enabled:, alloc_samples_enabled:) + def initialize(cpu_time_enabled:, alloc_samples_enabled:, heap_samples_enabled:) # This mutex works in addition to the fancy C-level mutexes we have in the native side (see the docs there). # It prevents multiple Ruby threads calling serialize at the same time -- something like # `10.times { Thread.new { stack_recorder.serialize } }`. @@ -13,7 +13,7 @@ def initialize(cpu_time_enabled:, alloc_samples_enabled:) # accidentally happening. @no_concurrent_synchronize_mutex = Mutex.new - self.class._native_initialize(self, cpu_time_enabled, alloc_samples_enabled) + self.class._native_initialize(self, cpu_time_enabled, alloc_samples_enabled, heap_samples_enabled) end def serialize diff --git a/spec/datadog/profiling/stack_recorder_spec.rb b/spec/datadog/profiling/stack_recorder_spec.rb index 585d41f9d44..38f98e0c0f2 100644 --- a/spec/datadog/profiling/stack_recorder_spec.rb +++ b/spec/datadog/profiling/stack_recorder_spec.rb @@ -7,9 +7,16 @@ let(:numeric_labels) { [] } let(:cpu_time_enabled) { true } let(:alloc_samples_enabled) { true } + # Disabling these by default since they require some extra setup and produce separate samples. + # Enabling this is tested in a particular context below. + let(:heap_samples_enabled) { false } subject(:stack_recorder) do - described_class.new(cpu_time_enabled: cpu_time_enabled, alloc_samples_enabled: alloc_samples_enabled) + described_class.new( + cpu_time_enabled: cpu_time_enabled, + alloc_samples_enabled: alloc_samples_enabled, + heap_samples_enabled: heap_samples_enabled + ) end # NOTE: A lot of libdatadog integration behaviors are tested in the Collectors::Stack specs, since we need actual @@ -114,6 +121,7 @@ def slot_two_mutex_locked? context 'when all profile types are enabled' do let(:cpu_time_enabled) { true } let(:alloc_samples_enabled) { true } + let(:heap_samples_enabled) { true } it 'returns a pprof with the configured sample types' do expect(sample_types_from(decoded_profile)).to eq( @@ -121,6 +129,7 @@ def slot_two_mutex_locked? 'cpu-samples' => 'count', 'wall-time' => 'nanoseconds', 'alloc-samples' => 'count', + 'heap-live-samples' => 'count', ) end end @@ -128,12 +137,14 @@ def slot_two_mutex_locked? context 'when cpu-time is disabled' do let(:cpu_time_enabled) { false } let(:alloc_samples_enabled) { true } + let(:heap_samples_enabled) { true } it 'returns a pprof without the cpu-type type' do expect(sample_types_from(decoded_profile)).to eq( 'cpu-samples' => 'count', 'wall-time' => 'nanoseconds', 'alloc-samples' => 'count', + 'heap-live-samples' => 'count', ) end end @@ -141,12 +152,29 @@ def slot_two_mutex_locked? context 'when alloc-samples is disabled' do let(:cpu_time_enabled) { true } let(:alloc_samples_enabled) { false } + let(:heap_samples_enabled) { true } it 'returns a pprof without the alloc-samples type' do expect(sample_types_from(decoded_profile)).to eq( 'cpu-time' => 'nanoseconds', 'cpu-samples' => 'count', 'wall-time' => 'nanoseconds', + 'heap-live-samples' => 'count', + ) + end + end + + context 'when heap-live-samples is disabled' do + let(:cpu_time_enabled) { true } + let(:alloc_samples_enabled) { true } + let(:heap_samples_enabled) { false } + + it 'returns a pprof without the heap-live-samples type' do + expect(sample_types_from(decoded_profile)).to eq( + 'cpu-time' => 'nanoseconds', + 'cpu-samples' => 'count', + 'wall-time' => 'nanoseconds', + 'alloc-samples' => 'count', ) end end @@ -154,8 +182,9 @@ def slot_two_mutex_locked? context 'when all optional types are disabled' do let(:cpu_time_enabled) { false } let(:alloc_samples_enabled) { false } + let(:heap_samples_enabled) { false } - it 'returns a pprof with without the optional types' do + it 'returns a pprof without the optional types' do expect(sample_types_from(decoded_profile)).to eq( 'cpu-samples' => 'count', 'wall-time' => 'nanoseconds', @@ -198,7 +227,12 @@ def sample_types_from(decoded_profile) it 'encodes the sample with the metrics provided' do expect(samples.first.values) - .to eq(:'cpu-time' => 123, :'cpu-samples' => 456, :'wall-time' => 789, :'alloc-samples' => 4242) + .to eq( + :'cpu-time' => 123, + :'cpu-samples' => 456, + :'wall-time' => 789, + :'alloc-samples' => 4242, + ) end context 'when disabling an optional profile sample type' do @@ -302,6 +336,74 @@ def sample_types_from(decoded_profile) end end + describe 'heap samples' do + let(:metric_values1) { { 'cpu-time' => 101, 'cpu-samples' => 1, 'wall-time' => 789, 'alloc-samples' => 10 } } + let(:metric_values2) { { 'cpu-time' => 102, 'cpu-samples' => 2, 'wall-time' => 790, 'alloc-samples' => 11 } } + let(:labels) { { 'label_a' => 'value_a', 'label_b' => 'value_b', 'state' => 'unknown' }.to_a } + let(:sample_rate) { 50 } + + let(:a_string) { 'a beautiful string' } + let(:another_string) { 'a fearsome string' } + let(:an_array) { [1..10] } + let(:another_array) { [-10..-1] } + let(:a_hash) { { 'a' => 1, 'b' => '2', 'c' => true } } + let(:another_hash) { { 'z' => -1, 'y' => '-2', 'x' => false } } + + let(:allocated_objects) do + [a_string, an_array, another_string, another_array, a_hash, another_hash] + end + + let(:freed_objects) do + ['this rando', another_string, 'that rando', 'another rando', another_array, another_hash] + end + + let(:samples) { samples_from_pprof(encoded_pprof) } + + before do + allocated_objects.each_with_index do |obj, i| + # Heap sampling currently requires this 2-step process to first pass data about the allocated object... + described_class::Testing._native_track_obj_allocation(stack_recorder, obj, sample_rate) + # ...and then pass data about the allocation stacktrace (with 2 distinct stacktraces) + if i.even? + Datadog::Profiling::Collectors::Stack::Testing + ._native_sample(Thread.current, stack_recorder, metric_values1, labels, numeric_labels, 400, false) + else + Datadog::Profiling::Collectors::Stack::Testing + ._native_sample(Thread.current, stack_recorder, metric_values2, labels, numeric_labels, 400, false) + end + end + + freed_objects.each do |obj| + described_class::Testing._native_record_obj_free(stack_recorder, obj) + end + end + + context 'when disabled' do + let(:heap_samples_enabled) { false } + + it 'are ommitted from the profile' do + # We sample from 2 distinct locations + expect(samples.size).to eq(2) + expect(samples.filter { |s| s.values.key?('heap-live-samples') }).to eq([]) + end + end + + context 'when enabled' do + let(:heap_samples_enabled) { true } + + it 'are included in the profile' do + pending 'heap_recorder implementation is currently missing' + # We sample from 2 distinct locations but heap samples don't have the same + # labels as the others so they get duped. + expect(samples.size).to eq(4) + + expect(samples.sum { |s| s.values[:'heap-live-samples'] || 0 }).to eq( + [a_string, an_array, a_hash].size * sample_rate + ) + end + end + end + context 'when there is a failure during serialization' do before do allow(Datadog.logger).to receive(:error) From 6cff321748ee128025242d5aa97fa0e6b3d0d922 Mon Sep 17 00:00:00 2001 From: Alexandre Fonseca Date: Fri, 24 Nov 2023 16:51:16 +0000 Subject: [PATCH 02/17] [PROF-8667] Fix CI --- lib/datadog/core/configuration/settings.rb | 2 +- lib/datadog/profiling/component.rb | 2 +- lib/datadog/profiling/ext.rb | 14 ++++++++++---- .../collectors/cpu_and_wall_time_worker.rbs | 6 +++++- sig/datadog/profiling/component.rbs | 3 +++ sig/datadog/profiling/ext.rbs | 2 ++ sig/datadog/profiling/stack_recorder.rbs | 3 ++- 7 files changed, 24 insertions(+), 8 deletions(-) diff --git a/lib/datadog/core/configuration/settings.rb b/lib/datadog/core/configuration/settings.rb index bbb071b8c40..2e00ce81672 100644 --- a/lib/datadog/core/configuration/settings.rb +++ b/lib/datadog/core/configuration/settings.rb @@ -321,7 +321,7 @@ def initialize(*_) option :allocation_counting_enabled do |o| o.type :bool o.default do - Profiling::Ext::IS_ALLOCATION_SAMPLING_SUPPORTED + Profiling::Ext.allocation_sampling_supported? end end diff --git a/lib/datadog/profiling/component.rb b/lib/datadog/profiling/component.rb index 895559d2495..c72e3ffc36c 100644 --- a/lib/datadog/profiling/component.rb +++ b/lib/datadog/profiling/component.rb @@ -136,7 +136,7 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) ) end - if allocation_profiling_enabled && !Ext::IS_ALLOCATION_SAMPLING_SUPPORTED + if allocation_profiling_enabled && !Ext.allocation_sampling_supported? Datadog.logger.warn( "Current Ruby version (#{RUBY_VERSION}) does not officially support allocation profiling but it was " \ 'requested. There may be unexpected problems during execution.' diff --git a/lib/datadog/profiling/ext.rb b/lib/datadog/profiling/ext.rb index b3836e2e6f4..9d05128a3c7 100644 --- a/lib/datadog/profiling/ext.rb +++ b/lib/datadog/profiling/ext.rb @@ -9,6 +9,8 @@ module Ext ENV_AGENTLESS = 'DD_PROFILING_AGENTLESS' ENV_ENDPOINT_COLLECTION_ENABLED = 'DD_PROFILING_ENDPOINT_COLLECTION_ENABLED' + module_function + # Allocation sampling is safe and supported on Ruby 2.x, but has a few caveats on Ruby 3.x. # # TL;DR: Supported on (2.x, 3.1.4+, 3.2.3+, and 3.3.0+). @@ -28,10 +30,14 @@ module Ext # Ruby 3.2.0 to 3.2.2 have a bug in the newobj tracepoint (https://bugs.ruby-lang.org/issues/19482, # https://github.com/ruby/ruby/pull/7464) so that's an extra reason why it's not safe on those Rubies. # This bug is fixed on Ruby versions 3.2.3 and 3.3.0. - IS_ALLOCATION_SAMPLING_SUPPORTED = RUBY_VERSION.start_with?('2.') || - (RUBY_VERSION.start_with?('3.1.') && RUBY_VERSION >= '3.1.4') || - (RUBY_VERSION.start_with?('3.2.') && RUBY_VERSION >= '3.2.3') || - RUBY_VERSION >= '3.3.' + # + # NOTE: This is a function to facilitate testing via const_stubbing. + def allocation_sampling_supported? + RUBY_VERSION.start_with?('2.') || + (RUBY_VERSION.start_with?('3.1.') && RUBY_VERSION >= '3.1.4') || + (RUBY_VERSION.start_with?('3.2.') && RUBY_VERSION >= '3.2.3') || + RUBY_VERSION >= '3.3.' + end module Transport module HTTP diff --git a/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs b/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs index d4876a7bd63..9b1bc1df3d5 100644 --- a/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs +++ b/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs @@ -14,7 +14,9 @@ module Datadog thread_context_collector: Datadog::Profiling::Collectors::ThreadContext, ?idle_sampling_helper: Datadog::Profiling::Collectors::IdleSamplingHelper, ?dynamic_sampling_rate_enabled: bool, - ?allocation_sample_every: Integer, + allocation_sample_every: Integer, + allocation_profiling_enabled: bool, + heap_profiling_enabled: bool, ) -> void def self._native_initialize: ( @@ -26,6 +28,8 @@ module Datadog bool no_signals_workaround_enabled, bool dynamic_sampling_rate_enabled, ::Integer allocation_sample_every, + bool allocation_profiling_enabled, + bool heap_profiling_enabled, ) -> true def start: (?on_failure_proc: ::Proc?) -> bool? diff --git a/sig/datadog/profiling/component.rbs b/sig/datadog/profiling/component.rbs index c3b8bc92240..d7bd1231b7a 100644 --- a/sig/datadog/profiling/component.rbs +++ b/sig/datadog/profiling/component.rbs @@ -19,6 +19,9 @@ module Datadog ) -> untyped def self.enable_gc_profiling?: (untyped settings) -> bool + def self.get_allocation_sample_every: (untyped settings) -> ::Integer + def self.enable_allocation_profiling?: (untyped settings, ::Integer allocation_sample_every) -> bool + def self.enable_heap_profiling?: (untyped settings, bool allocation_profiling_enabled) -> bool def self.no_signals_workaround_enabled?: (untyped settings) -> bool diff --git a/sig/datadog/profiling/ext.rbs b/sig/datadog/profiling/ext.rbs index 106a866e4f8..c35479cef9c 100644 --- a/sig/datadog/profiling/ext.rbs +++ b/sig/datadog/profiling/ext.rbs @@ -7,6 +7,8 @@ module Datadog ENV_AGENTLESS: "DD_PROFILING_AGENTLESS" ENV_ENDPOINT_COLLECTION_ENABLED: "DD_PROFILING_ENDPOINT_COLLECTION_ENABLED" + def self.allocation_sampling_supported?: -> bool + module Transport module HTTP FORM_FIELD_TAG_ENV: "env" diff --git a/sig/datadog/profiling/stack_recorder.rbs b/sig/datadog/profiling/stack_recorder.rbs index a3ff67b7e38..0f61f9c8ab9 100644 --- a/sig/datadog/profiling/stack_recorder.rbs +++ b/sig/datadog/profiling/stack_recorder.rbs @@ -3,12 +3,13 @@ module Datadog class StackRecorder @no_concurrent_synchronize_mutex: ::Thread::Mutex - def initialize: (cpu_time_enabled: bool, alloc_samples_enabled: bool) -> void + def initialize: (cpu_time_enabled: bool, alloc_samples_enabled: bool, heap_samples_enabled: bool) -> void def self._native_initialize: ( Datadog::Profiling::StackRecorder recorder_instance, bool cpu_time_enabled, bool alloc_samples_enabled, + bool heap_samples_enabled, ) -> true def serialize: () -> untyped From 043b3e730674dceff76fd18ae3926ac47b39f44f Mon Sep 17 00:00:00 2001 From: Alexandre Fonseca Date: Fri, 24 Nov 2023 17:28:07 +0000 Subject: [PATCH 03/17] [PROF-8667] More CI fixes --- .../heap_recorder.c | 6 ++++ .../collectors/cpu_and_wall_time_worker.rb | 8 ++--- lib/datadog/profiling/component.rb | 32 ++++++++++++------- sig/datadog/profiling/component.rbs | 12 +++++++ 4 files changed, 42 insertions(+), 16 deletions(-) diff --git a/ext/ddtrace_profiling_native_extension/heap_recorder.c b/ext/ddtrace_profiling_native_extension/heap_recorder.c index 7d2ecc606ea..4f8ca413015 100644 --- a/ext/ddtrace_profiling_native_extension/heap_recorder.c +++ b/ext/ddtrace_profiling_native_extension/heap_recorder.c @@ -50,6 +50,10 @@ void start_heap_allocation_recording(heap_recorder* heap_recorder, VALUE new_obj }; } +// TODO: Remove when things get implemented +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" + void end_heap_allocation_recording(struct heap_recorder *heap_recorder, ddog_prof_Slice_Location locations) { partial_heap_recording *active_recording = &heap_recorder->active_recording; @@ -83,3 +87,5 @@ void heap_recorder_for_each_live_object( // TODO: Implement } +// TODO: Remove when things get implemented +#pragma GCC diagnostic pop diff --git a/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb b/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb index cb3182aab00..81530803ee3 100644 --- a/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +++ b/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb @@ -18,13 +18,13 @@ def initialize( allocation_counting_enabled:, no_signals_workaround_enabled:, thread_context_collector:, - idle_sampling_helper: IdleSamplingHelper.new, + allocation_sample_every:, + allocation_profiling_enabled:, + heap_profiling_enabled:, # **NOTE**: This should only be used for testing; disabling the dynamic sampling rate will increase the # profiler overhead! dynamic_sampling_rate_enabled: true, - allocation_sample_every:, - allocation_profiling_enabled:, - heap_profiling_enabled: + idle_sampling_helper: IdleSamplingHelper.new ) unless dynamic_sampling_rate_enabled Datadog.logger.warn( diff --git a/lib/datadog/profiling/component.rb b/lib/datadog/profiling/component.rb index c72e3ffc36c..034c606b554 100644 --- a/lib/datadog/profiling/component.rb +++ b/lib/datadog/profiling/component.rb @@ -45,18 +45,8 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) allocation_profiling_enabled = enable_allocation_profiling?(settings, allocation_sample_every) heap_profiling_enabled = enable_heap_profiling?(settings, allocation_profiling_enabled) - recorder = Datadog::Profiling::StackRecorder.new( - cpu_time_enabled: RUBY_PLATFORM.include?('linux'), # Only supported on Linux currently - alloc_samples_enabled: allocation_profiling_enabled, - heap_samples_enabled: heap_profiling_enabled, - ) - thread_context_collector = Datadog::Profiling::Collectors::ThreadContext.new( - recorder: recorder, - max_frames: settings.profiling.advanced.max_frames, - tracer: optional_tracer, - endpoint_collection_enabled: settings.profiling.advanced.endpoint.collection.enabled, - timeline_enabled: timeline_enabled, - ) + recorder = build_recorder(allocation_profiling_enabled, heap_profiling_enabled) + thread_context_collector = build_thread_context_collector(settings, recorder, optional_tracer, timeline_enabled) worker = Datadog::Profiling::Collectors::CpuAndWallTimeWorker.new( gc_profiling_enabled: enable_gc_profiling?(settings), allocation_counting_enabled: settings.profiling.advanced.allocation_counting_enabled, @@ -79,6 +69,24 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) Profiling::Profiler.new(worker: worker, scheduler: scheduler) end + private_class_method def self.build_recorder(allocation_profiling_enabled, heap_profiling_enabled) + Datadog::Profiling::StackRecorder.new( + cpu_time_enabled: RUBY_PLATFORM.include?('linux'), # Only supported on Linux currently + alloc_samples_enabled: allocation_profiling_enabled, + heap_samples_enabled: heap_profiling_enabled, + ) + end + + private_class_method def self.build_thread_context_collector(settings, recorder, optional_tracer, timeline_enabled) + Datadog::Profiling::Collectors::ThreadContext.new( + recorder: recorder, + max_frames: settings.profiling.advanced.max_frames, + tracer: optional_tracer, + endpoint_collection_enabled: settings.profiling.advanced.endpoint.collection.enabled, + timeline_enabled: timeline_enabled, + ) + end + private_class_method def self.build_profiler_exporter(settings, recorder, internal_metadata:) code_provenance_collector = (Profiling::Collectors::CodeProvenance.new if settings.profiling.advanced.code_provenance_enabled) diff --git a/sig/datadog/profiling/component.rbs b/sig/datadog/profiling/component.rbs index d7bd1231b7a..0186b0670b0 100644 --- a/sig/datadog/profiling/component.rbs +++ b/sig/datadog/profiling/component.rbs @@ -7,6 +7,18 @@ module Datadog optional_tracer: Datadog::Tracing::Tracer?, ) -> Datadog::Profiling::Profiler? + def self.build_recorder: ( + bool allocation_profiling_enabled, + bool heap_profiling_enabled, + ) -> Datadog::Profiling::StackRecorder + + def self.build_thread_context_collector: ( + untyped settings, + Datadog::Profiling::StackRecorder recorder, + Datadog::Tracing::Tracer? optional_tracer, + bool timeline_enabled, + ) -> Datadog::Profiling::Collectors::ThreadContext + def self.build_profiler_exporter: ( untyped settings, Datadog::Profiling::StackRecorder recorder, From 04b4f0141574dbd1a8763b921b8003400ec68c51 Mon Sep 17 00:00:00 2001 From: Alexandre Fonseca Date: Mon, 27 Nov 2023 16:26:26 +0000 Subject: [PATCH 04/17] [PROF-8667] Address initial comments, more tests and more test fixes. --- .../collectors_cpu_and_wall_time_worker.c | 16 +- .../collectors_thread_context.c | 2 +- .../stack_recorder.c | 4 +- .../stack_recorder.h | 2 +- lib/datadog/core/configuration/settings.rb | 23 +- .../collectors/cpu_and_wall_time_worker.rb | 2 - lib/datadog/profiling/component.rb | 55 +++-- lib/datadog/profiling/ext.rb | 30 --- .../collectors/cpu_and_wall_time_worker.rbs | 2 - sig/datadog/profiling/ext.rbs | 2 - .../core/configuration/settings_spec.rb | 111 ++++++++-- .../cpu_and_wall_time_worker_spec.rb | 208 +++++++++++++----- spec/datadog/profiling/component_spec.rb | 138 +++++++++++- spec/datadog/profiling/spec_helper.rb | 10 +- 14 files changed, 439 insertions(+), 166 deletions(-) diff --git a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c index aa32adf8764..71ab9451e7a 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +++ b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c @@ -80,7 +80,6 @@ struct cpu_and_wall_time_worker_state { // These are immutable after initialization bool gc_profiling_enabled; - bool allocation_counting_enabled; bool no_signals_workaround_enabled; bool dynamic_sampling_rate_enabled; int allocation_sample_every; @@ -152,7 +151,6 @@ static VALUE _native_initialize( VALUE thread_context_collector_instance, VALUE gc_profiling_enabled, VALUE idle_sampling_helper_instance, - VALUE allocation_counting_enabled, VALUE no_signals_workaround_enabled, VALUE dynamic_sampling_rate_enabled, VALUE allocation_sample_every, @@ -232,7 +230,7 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) { // https://bugs.ruby-lang.org/issues/18007 for a discussion around this. rb_define_alloc_func(collectors_cpu_and_wall_time_worker_class, _native_new); - rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 10); + rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 9); rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_sampling_loop", _native_sampling_loop, 1); rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stop", _native_stop, 2); rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_reset_after_fork", _native_reset_after_fork, 1); @@ -270,7 +268,6 @@ static VALUE _native_new(VALUE klass) { // being leaked. state->gc_profiling_enabled = false; - state->allocation_counting_enabled = false; state->no_signals_workaround_enabled = false; state->dynamic_sampling_rate_enabled = true; state->allocation_sample_every = 0; @@ -301,7 +298,6 @@ static VALUE _native_initialize( VALUE thread_context_collector_instance, VALUE gc_profiling_enabled, VALUE idle_sampling_helper_instance, - VALUE allocation_counting_enabled, VALUE no_signals_workaround_enabled, VALUE dynamic_sampling_rate_enabled, VALUE allocation_sample_every, @@ -309,7 +305,6 @@ static VALUE _native_initialize( VALUE heap_profiling_enabled ) { ENFORCE_BOOLEAN(gc_profiling_enabled); - ENFORCE_BOOLEAN(allocation_counting_enabled); ENFORCE_BOOLEAN(no_signals_workaround_enabled); ENFORCE_BOOLEAN(dynamic_sampling_rate_enabled); ENFORCE_TYPE(allocation_sample_every, T_FIXNUM); @@ -320,7 +315,6 @@ static VALUE _native_initialize( TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state); state->gc_profiling_enabled = (gc_profiling_enabled == Qtrue); - state->allocation_counting_enabled = (allocation_counting_enabled == Qtrue); state->no_signals_workaround_enabled = (no_signals_workaround_enabled == Qtrue); state->dynamic_sampling_rate_enabled = (dynamic_sampling_rate_enabled == Qtrue); state->allocation_sample_every = NUM2INT(allocation_sample_every); @@ -653,7 +647,7 @@ static VALUE release_gvl_and_run_sampling_trigger_loop(VALUE instance) { // because they may raise exceptions. install_sigprof_signal_handler(handle_sampling_signal, "handle_sampling_signal"); if (state->gc_profiling_enabled) rb_tracepoint_enable(state->gc_tracepoint); - if (state->allocation_counting_enabled || state->allocation_profiling_enabled) rb_tracepoint_enable(state->object_allocation_tracepoint); + if (state->allocation_profiling_enabled) rb_tracepoint_enable(state->object_allocation_tracepoint); if (state->heap_profiling_enabled) rb_tracepoint_enable(state->object_free_tracepoint); rb_thread_call_without_gvl(run_sampling_trigger_loop, state, interrupt_sampling_trigger_loop, state); @@ -910,9 +904,9 @@ static void sleep_for(uint64_t time_ns) { } static VALUE _native_allocation_count(DDTRACE_UNUSED VALUE self) { - bool is_profiler_running = active_sampler_instance_state != NULL; + bool are_allocations_being_tracked = active_sampler_instance_state != NULL && active_sampler_instance_state->allocation_profiling_enabled; - return is_profiler_running ? ULL2NUM(allocation_count) : Qnil; + return are_allocations_being_tracked ? ULL2NUM(allocation_count) : Qnil; } // Implements memory-related profiling events. This function is called by Ruby via the `object_allocation_tracepoint` @@ -942,7 +936,7 @@ static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused) // TODO: This is a placeholder sampling decision strategy. We plan to replace it with a better one soon (e.g. before // beta), and having something here allows us to test the rest of feature, sampling decision aside. - if (state->allocation_profiling_enabled && state->allocation_sample_every > 0 && ((allocation_count % state->allocation_sample_every) == 0)) { + if (((allocation_count % state->allocation_sample_every) == 0)) { // Rescue against any exceptions that happen during sampling safely_call(rescued_sample_allocation, tracepoint_data, state->self_instance); } diff --git a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c index 8183825ade7..5bda980132d 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c +++ b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c @@ -1203,7 +1203,7 @@ void thread_context_collector_sample_allocation(VALUE self_instance, unsigned in } } - track_obj_allocation(state->recorder_instance, new_object, sample_weight); + track_object(state->recorder_instance, new_object, sample_weight); trigger_sample_for_thread( state, diff --git a/ext/ddtrace_profiling_native_extension/stack_recorder.c b/ext/ddtrace_profiling_native_extension/stack_recorder.c index 503c677f170..f3771a541ab 100644 --- a/ext/ddtrace_profiling_native_extension/stack_recorder.c +++ b/ext/ddtrace_profiling_native_extension/stack_recorder.c @@ -512,7 +512,7 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, } } -void track_obj_allocation(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight) { +void track_object(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight) { struct stack_recorder_state *state; TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state); if (state->heap_recorder == NULL) { @@ -742,7 +742,7 @@ static VALUE _native_record_endpoint(DDTRACE_UNUSED VALUE _self, VALUE recorder_ static VALUE _native_track_obj_allocation(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight) { ENFORCE_TYPE(weight, T_FIXNUM); - track_obj_allocation(recorder_instance, new_obj, NUM2UINT(weight)); + track_object(recorder_instance, new_obj, NUM2UINT(weight)); return Qtrue; } diff --git a/ext/ddtrace_profiling_native_extension/stack_recorder.h b/ext/ddtrace_profiling_native_extension/stack_recorder.h index aa492cacfa9..a68e2d51f0d 100644 --- a/ext/ddtrace_profiling_native_extension/stack_recorder.h +++ b/ext/ddtrace_profiling_native_extension/stack_recorder.h @@ -22,6 +22,6 @@ typedef struct sample_labels { void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, sample_values values, sample_labels labels); void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_CharSlice endpoint); -void track_obj_allocation(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight); +void track_object(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight); void record_obj_free(VALUE recorder_instance, VALUE freed_object); VALUE enforce_recorder_instance(VALUE object); diff --git a/lib/datadog/core/configuration/settings.rb b/lib/datadog/core/configuration/settings.rb index 2e00ce81672..f7a3baefa38 100644 --- a/lib/datadog/core/configuration/settings.rb +++ b/lib/datadog/core/configuration/settings.rb @@ -314,14 +314,14 @@ def initialize(*_) # Can be used to enable/disable the Datadog::Profiling.allocation_count feature. # - # This feature is safe and enabled by default only on Rubies where we haven't identified issues. - # Refer to {Datadog::Profiling::Ext::IS_ALLOC_SAMPLING_SUPPORTED} for the details. - # - # @default `true` on Ruby 2.x and 3.1.4+, 3.2.3+ and 3.3.0+; `false` for Ruby 3.0 and unpatched Rubies. + # This feature is now controlled via {:experimental_allocation_enabled} option :allocation_counting_enabled do |o| - o.type :bool - o.default do - Profiling::Ext.allocation_sampling_supported? + o.after_set do + Datadog.logger.warn( + 'The profiling.advanced.allocation_counting_enabled setting has been deprecated for removal and no ' \ + 'longer does anything. Please remove it from your Datadog.configure block. ' \ + 'Allocation counting is now controlled by the `experimental_allocation_enabled` setting instead.' + ) end end @@ -329,6 +329,12 @@ def initialize(*_) # # This feature is alpha and disabled by default # + # @warn This feature is not supported/safe in all Rubies. Details in {Datadog::Profiling::Component} but + # in summary, this should be supported on Ruby 2.x, 3.1.4+, 3.2.3+ and 3.3.0+. Enabling it on + # unsupported Rubies may result in unexpected behaviour, including crashes. + # + # @note Heap profiles are not yet GA in the Datadog UI, get in touch if you want to help us test it. + # # @default `DD_PROFILING_EXPERIMENTAL_ALLOCATION_ENABLED` environment variable as a boolean, otherwise `false` option :experimental_allocation_enabled do |o| o.type :bool @@ -340,6 +346,9 @@ def initialize(*_) # # This feature is alpha and disabled by default # + # @warn To enable heap profiling you are required to also enable allocation profiling. + # @note Heap profiles are not yet GA in the Datadog UI, get in touch if you want to help us test it. + # # @default `DD_PROFILING_EXPERIMENTAL_HEAP_ENABLED` environment variable as a boolean, otherwise `false` option :experimental_heap_enabled do |o| o.type :bool diff --git a/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb b/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb index 81530803ee3..b9a9055398c 100644 --- a/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +++ b/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb @@ -15,7 +15,6 @@ class CpuAndWallTimeWorker def initialize( gc_profiling_enabled:, - allocation_counting_enabled:, no_signals_workaround_enabled:, thread_context_collector:, allocation_sample_every:, @@ -37,7 +36,6 @@ def initialize( thread_context_collector, gc_profiling_enabled, idle_sampling_helper, - allocation_counting_enabled, no_signals_workaround_enabled, dynamic_sampling_rate_enabled, allocation_sample_every, diff --git a/lib/datadog/profiling/component.rb b/lib/datadog/profiling/component.rb index 034c606b554..76ccc4a65e3 100644 --- a/lib/datadog/profiling/component.rb +++ b/lib/datadog/profiling/component.rb @@ -49,7 +49,6 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) thread_context_collector = build_thread_context_collector(settings, recorder, optional_tracer, timeline_enabled) worker = Datadog::Profiling::Collectors::CpuAndWallTimeWorker.new( gc_profiling_enabled: enable_gc_profiling?(settings), - allocation_counting_enabled: settings.profiling.advanced.allocation_counting_enabled, no_signals_workaround_enabled: no_signals_workaround_enabled, thread_context_collector: thread_context_collector, allocation_sample_every: allocation_sample_every, @@ -60,6 +59,7 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) internal_metadata = { no_signals_workaround_enabled: no_signals_workaround_enabled, timeline_enabled: timeline_enabled, + allocation_sample_every: allocation_sample_every, }.freeze exporter = build_profiler_exporter(settings, recorder, internal_metadata: internal_metadata) @@ -128,37 +128,64 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) allocation_sample_rate = settings.profiling.advanced.experimental_allocation_sample_rate if allocation_sample_rate <= 0 - raise("Allocation sample rate must be a positive integer. Was #{allocation_sample_rate}") + raise ArgumentError, "Allocation sample rate must be a positive integer. Was #{allocation_sample_rate}" end allocation_sample_rate end private_class_method def self.enable_allocation_profiling?(settings, allocation_sample_every) - allocation_profiling_enabled = settings.profiling.advanced.experimental_allocation_enabled + unless settings.profiling.advanced.experimental_allocation_enabled + # Allocation profiling disabled, short-circuit out + return false + end - if allocation_profiling_enabled - Datadog.logger.warn( - "Enabled experimental allocation profiling: allocation_sample_rate=#{allocation_sample_every}. This is " \ - 'experimental, not recommended, and will increase overhead!' - ) + # Allocation sampling is safe and supported on Ruby 2.x, but has a few caveats on Ruby 3.x. + + # SEVERE - All configurations + # Ruby 3.2.0 to 3.2.2 have a bug in the newobj tracepoint (https://bugs.ruby-lang.org/issues/19482, + # https://github.com/ruby/ruby/pull/7464) that makes this crash in any configuration. This bug is + # fixed on Ruby versions 3.2.3 and 3.3.0. + if RUBY_VERSION.start_with?('3.2.') && RUBY_VERSION < '3.2.3' + raise "Sorry, allocation profiling is not supported in #{RUBY_VERSION}. Please use 3.1.x, 3.2.3 or 3.3.0+" end - if allocation_profiling_enabled && !Ext.allocation_sampling_supported? + # SEVERE - Only with Ractors + # On Ruby versions 3.0 (all), 3.1.0 to 3.1.3, and 3.2.0 to 3.2.2 allocation profiling can trigger a VM bug + # that causes a segmentation fault during garbage collection of Ractors + # (https://bugs.ruby-lang.org/issues/18464). We don't recommend using this feature on such Rubies. + # This bug is fixed on Ruby versions 3.1.4, 3.2.3 and 3.3.0. + if RUBY_VERSION.start_with?('3.0.') || + (RUBY_VERSION.start_with?('3.1.') && RUBY_VERSION < '3.1.4') || + (RUBY_VERSION.start_with?('3.2.') && RUBY_VERSION < '3.2.3') + Datadog.logger.warn( + "Current Ruby version (#{RUBY_VERSION}) has a VM bug where enabling allocation profiling while using "\ + 'Ractors may cause unexpected issues, including crashes. This does not happen if Ractors are not used.' + ) + # ANNOYANCE - Only with Ractors + # On all known versions of Ruby 3.x, due to https://bugs.ruby-lang.org/issues/19112, when a ractor gets + # garbage collected, Ruby will disable all active tracepoints, which this feature internally relies on. + elsif RUBY_VERSION.start_with?('3.') Datadog.logger.warn( - "Current Ruby version (#{RUBY_VERSION}) does not officially support allocation profiling but it was " \ - 'requested. There may be unexpected problems during execution.' + 'In all known versions of Ruby 3.x, using Ractors may result in allocation profiling unexpectedly stopping. '\ + 'Note that this stop has no impact in your application stability or performance. This does not happen if ' \ + 'Ractors are not used.' ) end - allocation_profiling_enabled + Datadog.logger.warn( + "Enabled experimental allocation profiling: allocation_sample_rate=#{allocation_sample_every}. This is " \ + 'experimental, not recommended, and will increase overhead!' + ) + + true end private_class_method def self.enable_heap_profiling?(settings, allocation_profiling_enabled) - heap_profiling_enabled = settings.profiling.advanced.experimental_allocation_enabled + heap_profiling_enabled = settings.profiling.advanced.experimental_heap_enabled if heap_profiling_enabled && !allocation_profiling_enabled - raise('Heap profiling requires allocation profiling to be enabled') + raise ArgumentError, 'Heap profiling requires allocation profiling to be enabled' end if heap_profiling_enabled diff --git a/lib/datadog/profiling/ext.rb b/lib/datadog/profiling/ext.rb index 9d05128a3c7..2122ec7a95f 100644 --- a/lib/datadog/profiling/ext.rb +++ b/lib/datadog/profiling/ext.rb @@ -9,36 +9,6 @@ module Ext ENV_AGENTLESS = 'DD_PROFILING_AGENTLESS' ENV_ENDPOINT_COLLECTION_ENABLED = 'DD_PROFILING_ENDPOINT_COLLECTION_ENABLED' - module_function - - # Allocation sampling is safe and supported on Ruby 2.x, but has a few caveats on Ruby 3.x. - # - # TL;DR: Supported on (2.x, 3.1.4+, 3.2.3+, and 3.3.0+). - # - # Caveat 1 (severe): - # On Ruby versions 3.0 (all), 3.1.0 to 3.1.3, and 3.2.0 to 3.2.2 this is disabled by default because it - # can trigger a VM bug that causes a segmentation fault during garbage collection of Ractors - # (https://bugs.ruby-lang.org/issues/18464). We don't recommend using this feature on such Rubies. - # This bug is fixed on Ruby versions 3.1.4, 3.2.3 and 3.3.0. - # - # Caveat 2 (annoyance): - # On all known versions of Ruby 3.x, due to https://bugs.ruby-lang.org/issues/19112, when a ractor gets - # garbage collected, Ruby will disable all active tracepoints, which this feature internally relies on. - # Thus this feature is only usable if you're not using Ractors. - # - # Caveat 3 (severe): - # Ruby 3.2.0 to 3.2.2 have a bug in the newobj tracepoint (https://bugs.ruby-lang.org/issues/19482, - # https://github.com/ruby/ruby/pull/7464) so that's an extra reason why it's not safe on those Rubies. - # This bug is fixed on Ruby versions 3.2.3 and 3.3.0. - # - # NOTE: This is a function to facilitate testing via const_stubbing. - def allocation_sampling_supported? - RUBY_VERSION.start_with?('2.') || - (RUBY_VERSION.start_with?('3.1.') && RUBY_VERSION >= '3.1.4') || - (RUBY_VERSION.start_with?('3.2.') && RUBY_VERSION >= '3.2.3') || - RUBY_VERSION >= '3.3.' - end - module Transport module HTTP FORM_FIELD_TAG_ENV = 'env' diff --git a/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs b/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs index 9b1bc1df3d5..22d2041472d 100644 --- a/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs +++ b/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs @@ -9,7 +9,6 @@ module Datadog def initialize: ( gc_profiling_enabled: bool, - allocation_counting_enabled: bool, no_signals_workaround_enabled: bool, thread_context_collector: Datadog::Profiling::Collectors::ThreadContext, ?idle_sampling_helper: Datadog::Profiling::Collectors::IdleSamplingHelper, @@ -24,7 +23,6 @@ module Datadog ThreadContext thread_context_collector, bool gc_profiling_enabled, IdleSamplingHelper idle_sampling_helper, - bool allocation_counting_enabled, bool no_signals_workaround_enabled, bool dynamic_sampling_rate_enabled, ::Integer allocation_sample_every, diff --git a/sig/datadog/profiling/ext.rbs b/sig/datadog/profiling/ext.rbs index c35479cef9c..106a866e4f8 100644 --- a/sig/datadog/profiling/ext.rbs +++ b/sig/datadog/profiling/ext.rbs @@ -7,8 +7,6 @@ module Datadog ENV_AGENTLESS: "DD_PROFILING_AGENTLESS" ENV_ENDPOINT_COLLECTION_ENABLED: "DD_PROFILING_ENDPOINT_COLLECTION_ENABLED" - def self.allocation_sampling_supported?: -> bool - module Transport module HTTP FORM_FIELD_TAG_ENV: "env" diff --git a/spec/datadog/core/configuration/settings_spec.rb b/spec/datadog/core/configuration/settings_spec.rb index 8e676e504d6..45e542670d2 100644 --- a/spec/datadog/core/configuration/settings_spec.rb +++ b/spec/datadog/core/configuration/settings_spec.rb @@ -497,39 +497,108 @@ end end - describe '#allocation_counting_enabled' do - subject(:allocation_counting_enabled) { settings.profiling.advanced.allocation_counting_enabled } + describe '#experimental_allocation_enabled' do + subject(:experimental_allocation_enabled) { settings.profiling.advanced.experimental_allocation_enabled } - before { stub_const('RUBY_VERSION', testing_version) } + context 'when DD_PROFILING_EXPERIMENTAL_ALLOCATION_ENABLED' do + around do |example| + ClimateControl.modify('DD_PROFILING_EXPERIMENTAL_ALLOCATION_ENABLED' => environment) do + example.run + end + end - context 'on Ruby 2.x' do - let(:testing_version) { '2.3.0 ' } - it { is_expected.to be true } - end + context 'is not defined' do + let(:environment) { nil } - ['3.0.0', '3.1.0', '3.1.3', '3.2.0', '3.2.2'].each do |broken_ruby| - context "on a Ruby 3 version affected by https://bugs.ruby-lang.org/issues/18464 (#{broken_ruby})" do - let(:testing_version) { broken_ruby } it { is_expected.to be false } end + + [true, false].each do |value| + context "is defined as #{value}" do + let(:environment) { value.to_s } + + it { is_expected.to be value } + end + end end + end - ['3.1.4', '3.2.3', '3.3.0'].each do |fixed_ruby| - context "on a Ruby 3 version where https://bugs.ruby-lang.org/issues/18464 is fixed (#{fixed_ruby})" do - let(:testing_version) { fixed_ruby } - it { is_expected.to be true } + describe '#experimental_allocation_enabled=' do + it 'updates the #experimental_allocation_enabled setting' do + expect { settings.profiling.advanced.experimental_allocation_enabled = true } + .to change { settings.profiling.advanced.experimental_allocation_enabled } + .from(false) + .to(true) + end + end + + describe '#experimental_heap_enabled' do + subject(:experimental_heap_enabled) { settings.profiling.advanced.experimental_heap_enabled } + + context 'when DD_PROFILING_EXPERIMENTAL_HEAP_ENABLED' do + around do |example| + ClimateControl.modify('DD_PROFILING_EXPERIMENTAL_HEAP_ENABLED' => environment) do + example.run + end + end + + context 'is not defined' do + let(:environment) { nil } + + it { is_expected.to be false } + end + + [true, false].each do |value| + context "is defined as #{value}" do + let(:environment) { value.to_s } + + it { is_expected.to be value } + end end end end - describe '#allocation_counting_enabled=' do - it 'updates the #allocation_counting_enabled setting' do - settings.profiling.advanced.allocation_counting_enabled = true + describe '#experimental_heap_enabled=' do + it 'updates the #experimental_heap_enabled setting' do + expect { settings.profiling.advanced.experimental_heap_enabled = true } + .to change { settings.profiling.advanced.experimental_heap_enabled } + .from(false) + .to(true) + end + end - expect { settings.profiling.advanced.allocation_counting_enabled = false } - .to change { settings.profiling.advanced.allocation_counting_enabled } - .from(true) - .to(false) + describe '#experimental_allocation_sample_rate' do + subject(:experimental_allocation_sample_rate) { settings.profiling.advanced.experimental_allocation_sample_rate } + + context 'when DD_PROFILING_EXPERIMENTAL_ALLOCATION_SAMPLE_RATE' do + around do |example| + ClimateControl.modify('DD_PROFILING_EXPERIMENTAL_ALLOCATION_SAMPLE_RATE' => environment) do + example.run + end + end + + context 'is not defined' do + let(:environment) { nil } + + it { is_expected.to be 50 } + end + + [100, 30.5].each do |value| + context "is defined as #{value}" do + let(:environment) { value.to_s } + + it { is_expected.to be value.to_i } + end + end + end + end + + describe '#experimental_allocation_sample_rate=' do + it 'updates the #experimental_allocation_sample_rate setting' do + expect { settings.profiling.advanced.experimental_allocation_sample_rate = 100 } + .to change { settings.profiling.advanced.experimental_allocation_sample_rate } + .from(50) + .to(100) end end diff --git a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb index e48655979d4..2587d0bd505 100644 --- a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb +++ b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb @@ -7,10 +7,12 @@ # drops support for these Rubies globally let(:described_class) { Datadog::Profiling::Collectors::CpuAndWallTimeWorker } - let(:recorder) { build_stack_recorder } let(:endpoint_collection_enabled) { true } let(:gc_profiling_enabled) { true } - let(:allocation_counting_enabled) { true } + let(:allocation_sample_every) { 50 } + let(:allocation_profiling_enabled) { false } + let(:heap_profiling_enabled) { false } + let(:recorder) { build_stack_recorder(true) } let(:no_signals_workaround_enabled) { false } let(:timeline_enabled) { false } let(:options) { {} } @@ -18,9 +20,11 @@ subject(:cpu_and_wall_time_worker) do described_class.new( gc_profiling_enabled: gc_profiling_enabled, - allocation_counting_enabled: allocation_counting_enabled, no_signals_workaround_enabled: no_signals_workaround_enabled, thread_context_collector: build_thread_context_collector(recorder), + allocation_sample_every: allocation_sample_every, + allocation_profiling_enabled: allocation_profiling_enabled, + heap_profiling_enabled: heap_profiling_enabled, **options ) end @@ -56,13 +60,21 @@ end describe '#start' do + let(:expected_worker_initialization_error) { nil } + subject(:start) do cpu_and_wall_time_worker.start wait_until_running end after do - cpu_and_wall_time_worker.stop + # If one of the tests makes cpu_and_wall_time_worker fail to initialize, it will fail again + # here in the after. We ignore the error if it is expected. + begin + cpu_and_wall_time_worker.stop + rescue => e + raise if !expected_worker_initialization_error || !e.is_a?(expected_worker_initialization_error) + end end it 'creates a new thread' do @@ -434,25 +446,22 @@ end end - context 'when allocation sampling is enabled' do - let(:options) { { allocation_sample_every: 1 } } + context 'when allocation profiling is enabled' do + let(:allocation_profiling_enabled) { true } + let(:test_num_allocated_object) { 123 } + # Sample all allocations to get easy-to-reason about numbers that are also not flaky in CI. + let(:allocation_sample_every) { 1 } before do allow(Datadog.logger).to receive(:warn) end - it 'logs a warning message mentioning this is experimental' do - expect(Datadog.logger).to receive(:warn).with(/Enabled experimental allocation profiling/) - - start - end - it 'records allocated objects' do stub_const('CpuAndWallTimeWorkerSpec::TestStruct', Struct.new(:foo)) start - 123.times { CpuAndWallTimeWorkerSpec::TestStruct.new } + test_num_allocated_object.times { CpuAndWallTimeWorkerSpec::TestStruct.new } allocation_line = __LINE__ - 1 cpu_and_wall_time_worker.stop @@ -461,7 +470,7 @@ samples_for_thread(samples_from_pprof(recorder.serialize!), Thread.current) .find { |s| s.labels[:'allocation class'] == 'CpuAndWallTimeWorkerSpec::TestStruct' } - expect(allocation_sample.values).to include(:'alloc-samples' => 123) + expect(allocation_sample.values).to include(:'alloc-samples' => test_num_allocated_object) expect(allocation_sample.locations.first.lineno).to eq allocation_line end @@ -482,7 +491,7 @@ end context 'when allocation sampling is disabled' do - let(:options) { { allocation_sample_every: 0 } } + let(:allocation_profiling_enabled) { false } it 'does not record allocations' do stub_const('CpuAndWallTimeWorkerSpec::TestStruct', Struct.new(:foo)) @@ -497,6 +506,61 @@ end end + context 'when heap profiling is enabled' do + let(:allocation_profiling_enabled) { true } + let(:heap_profiling_enabled) { true } + let(:test_num_allocated_object) { 123 } + # Sample all allocations to get easy-to-reason about numbers that are also not flaky in CI. + let(:allocation_sample_every) { 1 } + + before do + allow(Datadog.logger).to receive(:warn) + end + + it 'records live heap objects' do + pending "heap profiling isn't actually implemented just yet" + + stub_const('CpuAndWallTimeWorkerSpec::TestStruct', Struct.new(:foo)) + + start + + test_num_allocated_object.times { CpuAndWallTimeWorkerSpec::TestStruct.new } + allocation_line = __LINE__ - 1 + + cpu_and_wall_time_worker.stop + + total_heap_sampled_for_test_struct = samples_for_thread(samples_from_pprof(recorder.serialize!), Thread.current) + .filter { |s| s.locations.first.lineno == allocation_line && s.locations.first.path == __FILE__ } + .sum { |s| s.values[:'heap-live-samples'] } + + expect(total_heap_sampled_for_test_struct).to eq test_num_allocated_object + end + + context 'but allocation profiling is disabled' do + let(:allocation_profiling_enabled) { false } + let(:expected_worker_initialization_error) { ArgumentError } + it 'raises an ArgumentError' do + expect { cpu_and_wall_time_worker }.to raise_error(ArgumentError, /requires allocation profiling/) + end + end + end + + context 'when heap profiling is disabled' do + let(:allocation_profiling_enabled) { false } + + it 'does not record allocations' do + stub_const('CpuAndWallTimeWorkerSpec::TestStruct', Struct.new(:foo)) + + start + + 123.times { CpuAndWallTimeWorkerSpec::TestStruct.new } + + cpu_and_wall_time_worker.stop + + expect(samples_from_pprof(recorder.serialize!).map(&:values)).to all(include(:'heap-live-samples' => 0)) + end + end + context 'Process::Waiter crash regression tests' do # On Ruby 2.3 to 2.6, there's a crash when accessing instance variables of the `process_waiter_thread`, # see https://bugs.ruby-lang.org/issues/17807 . @@ -575,7 +639,7 @@ # so this must be disabled when interacting with Ractors. let(:gc_profiling_enabled) { false } # ...same thing for the tracepoint for allocation counting/profiling :( - let(:allocation_counting_enabled) { false } + let(:allocation_profiling_enabled) { false } describe 'handle_sampling_signal' do include_examples 'does not trigger a sample', @@ -747,61 +811,83 @@ it { is_expected.to be nil } end - context 'when CpuAndWallTimeWorker has been started' do - before do - cpu_and_wall_time_worker.start - wait_until_running - end + context 'when allocation profiling is enabled' do + let(:allocation_profiling_enabled) { true } - after do - cpu_and_wall_time_worker.stop - end + context 'when CpuAndWallTimeWorker has been started' do + before do + cpu_and_wall_time_worker.start + wait_until_running + end - it 'returns the number of allocations between two calls of the method' do - # To get the exact expected number of allocations, we run this once before so that Ruby can create and cache all - # it needs to - new_object = proc { Object.new } - 1.times(&new_object) + after do + cpu_and_wall_time_worker.stop + end - before_allocations = described_class._native_allocation_count - 100.times(&new_object) - after_allocations = described_class._native_allocation_count + it 'returns the number of allocations between two calls of the method' do + # To get the exact expected number of allocations, we run this once before so that Ruby can create and cache all + # it needs to + new_object = proc { Object.new } + 1.times(&new_object) - expect(after_allocations - before_allocations).to be 100 - end + before_allocations = described_class._native_allocation_count + 100.times(&new_object) + after_allocations = described_class._native_allocation_count - it 'returns different numbers of allocations for different threads' do - # To get the exact expected number of allocations, we run this once before so that Ruby can create and cache all - # it needs to - new_object = proc { Object.new } - 1.times(&new_object) + expect(after_allocations - before_allocations).to be 100 + end - t1_can_run = Queue.new - t1_has_run = Queue.new - before_t1 = nil - after_t1 = nil + it 'returns different numbers of allocations for different threads' do + # To get the exact expected number of allocations, we run this once before so that Ruby can create and cache all + # it needs to + new_object = proc { Object.new } + 1.times(&new_object) - background_t1 = Thread.new do - before_t1 = described_class._native_allocation_count - t1_can_run.pop + t1_can_run = Queue.new + t1_has_run = Queue.new + before_t1 = nil + after_t1 = nil - 100.times(&new_object) - after_t1 = described_class._native_allocation_count - t1_has_run << true + background_t1 = Thread.new do + before_t1 = described_class._native_allocation_count + t1_can_run.pop + + 100.times(&new_object) + after_t1 = described_class._native_allocation_count + t1_has_run << true + end + + before_allocations = described_class._native_allocation_count + t1_can_run << true + t1_has_run.pop + after_allocations = described_class._native_allocation_count + + background_t1.join + + # This test checks that even though we observed 100 allocations in a background thread t1, the counters for + # the current thread were not affected by this change + + expect(after_t1 - before_t1).to be 100 + expect(after_allocations - before_allocations).to be < 10 end + end - before_allocations = described_class._native_allocation_count - t1_can_run << true - t1_has_run.pop - after_allocations = described_class._native_allocation_count + context 'when allocation profiling is disabled' do + let(:allocation_profiling_enabled) { false } - background_t1.join + it 'still returns nil after a bunch of allocations' do + # To get the exact expected number of allocations, we run this once before so that Ruby can create and cache all + # it needs to + new_object = proc { Object.new } + 1.times(&new_object) - # This test checks that even though we observed 100 allocations in a background thread t1, the counters for - # the current thread were not affected by this change + before_allocations = described_class._native_allocation_count + 100.times(&new_object) + after_allocations = described_class._native_allocation_count - expect(after_t1 - before_t1).to be 100 - expect(after_allocations - before_allocations).to be < 10 + expect(before_allocations).to be nil + expect(after_allocations).to be nil + end end end end @@ -823,9 +909,11 @@ def samples_from_pprof_without_gc_and_overhead(pprof_data) def build_another_instance described_class.new( gc_profiling_enabled: gc_profiling_enabled, - allocation_counting_enabled: allocation_counting_enabled, no_signals_workaround_enabled: no_signals_workaround_enabled, - thread_context_collector: build_thread_context_collector(build_stack_recorder) + thread_context_collector: build_thread_context_collector(build_stack_recorder), + allocation_sample_every: allocation_sample_every, + allocation_profiling_enabled: allocation_profiling_enabled, + heap_profiling_enabled: heap_profiling_enabled ) end diff --git a/spec/datadog/profiling/component_spec.rb b/spec/datadog/profiling/component_spec.rb index 7d806e3b543..df0293e7366 100644 --- a/spec/datadog/profiling/component_spec.rb +++ b/spec/datadog/profiling/component_spec.rb @@ -76,10 +76,11 @@ expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with( gc_profiling_enabled: anything, - allocation_counting_enabled: anything, no_signals_workaround_enabled: :no_signals_result, thread_context_collector: instance_of(Datadog::Profiling::Collectors::ThreadContext), - allocation_sample_every: 0, + allocation_sample_every: kind_of(Integer), + allocation_profiling_enabled: false, + heap_profiling_enabled: false, ) build_profiler_component @@ -119,34 +120,146 @@ end end - context 'when allocation_counting_enabled is enabled' do + context 'when allocation profiling is enabled' do before do - settings.profiling.advanced.allocation_counting_enabled = true + settings.profiling.advanced.experimental_allocation_enabled = true + stub_const('RUBY_VERSION', testing_version) + end + + context 'on Ruby 2.x' do + let(:testing_version) { '2.3.0 ' } + + it 'initializes a CpuAndWallTimeWorker with allocation_profiling set to true and warns' do + expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with hash_including( + allocation_profiling_enabled: true, + ) + + expect(Datadog.logger).to receive(:warn).with(/experimental allocation profiling/) + expect(Datadog.logger).to_not receive(:warn).with(/Ractor/) + + build_profiler_component + end + end + + ['3.2.0', '3.2.1', '3.2.2'].each do |broken_ruby| + context "on a Ruby 3 version affected by https://bugs.ruby-lang.org/issues/19482 (#{broken_ruby})" do + let(:testing_version) { broken_ruby } + + it 'raises an error about lack of support during initialization of a CpuAndWallTimeWorker' do + expect { build_profiler_component }.to raise_error(/not supported.+Please use/) + end + end + end + + ['3.0.0', '3.1.0', '3.1.3'].each do |broken_ractors_ruby| + context "on a Ruby 3 version affected by https://bugs.ruby-lang.org/issues/18464 (#{broken_ractors_ruby})" do + let(:testing_version) { broken_ractors_ruby } + + it 'initializes a CpuAndWallTimeWorker with allocation_profiling forcibly set to true and warns' do + expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with hash_including( + allocation_profiling_enabled: true, + ) + + expect(Datadog.logger).to receive(:warn).with(/Ractors.+crashes/) + expect(Datadog.logger).to receive(:warn).with(/experimental allocation profiling/) + + build_profiler_component + end + end + end + + ['3.1.4', '3.2.3', '3.3.0'].each do |fixed_ruby| + context "on a Ruby 3 version where https://bugs.ruby-lang.org/issues/18464 is fixed (#{fixed_ruby})" do + let(:testing_version) { fixed_ruby } + it 'initializes a CpuAndWallTimeWorker with allocation_profiling forcibly set to true and warns' do + expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with hash_including( + allocation_profiling_enabled: true, + ) + + expect(Datadog.logger).to receive(:warn).with(/Ractors.+stopping/) + expect(Datadog.logger).to receive(:warn).with(/experimental allocation profiling/) + + build_profiler_component + end + end + end + end + + context 'when allocation profiling is disabled' do + before do + settings.profiling.advanced.experimental_allocation_enabled = false end - it 'initializes a CpuAndWallTimeWorker collector with allocation_counting_enabled set to true' do + it 'initializes a CpuAndWallTimeWorker collector with allocation_profiling set to false' do expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with hash_including( - allocation_counting_enabled: true, + allocation_profiling_enabled: false, ) build_profiler_component end end - context 'when allocation_counting_enabled is disabled' do + context 'when heap profiling is enabled' do + before do + settings.profiling.advanced.experimental_heap_enabled = true + # Universally supported ruby version for allocation profiling, we don't want to test those + # edge cases here + stub_const('RUBY_VERSION', '2.7.2') + end + + context 'and allocation profiling disabled' do + before do + settings.profiling.advanced.experimental_allocation_enabled = false + end + + it 'raises an ArgumentError during CpuAndWallTimeWorker initialization' do + expect { build_profiler_component }.to raise_error(ArgumentError, /requires allocation profiling/) + end + end + + context 'and allocation profiling enabled and supported' do + before do + settings.profiling.advanced.experimental_allocation_enabled = true + end + + it 'initializes a CpuAndWallTimeWorker with heap_profiling set to true and warns' do + expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with hash_including( + allocation_profiling_enabled: true, + ) + + expect(Datadog.logger).to receive(:warn).with(/experimental allocation profiling/) + expect(Datadog.logger).to receive(:warn).with(/experimental heap profiling/) + + build_profiler_component + end + end + end + + context 'when heap profiling is disabled' do before do - settings.profiling.advanced.allocation_counting_enabled = false + settings.profiling.advanced.experimental_heap_enabled = false end - it 'initializes a CpuAndWallTimeWorker collector with allocation_counting_enabled set to false' do + it 'initializes a CpuAndWallTimeWorker collector with allocation_profiling set to false' do expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with hash_including( - allocation_counting_enabled: false, + heap_profiling_enabled: false, ) build_profiler_component end end + describe '#allocation_counting_enabled=' do + it 'updates the #allocation_counting_enabled setting' do + settings.profiling.advanced.allocation_counting_enabled = true + + expect { settings.profiling.advanced.allocation_counting_enabled = false } + .to change { settings.profiling.advanced.allocation_counting_enabled } + .from(true) + .to(false) + end + end + it 'sets up the Profiler with the CpuAndWallTimeWorker collector' do expect(Datadog::Profiling::Profiler).to receive(:new).with( worker: instance_of(Datadog::Profiling::Collectors::CpuAndWallTimeWorker), @@ -163,17 +276,20 @@ build_profiler_component end - it 'sets up the Exporter internal_metadata with no_signals_workaround_enabled and timeline_enabled settings' do + it 'sets up the Exporter internal_metadata with relevant settings' do allow(Datadog::Profiling::Collectors::ThreadContext).to receive(:new) allow(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new) expect(described_class).to receive(:no_signals_workaround_enabled?).and_return(:no_signals_result) expect(settings.profiling.advanced).to receive(:experimental_timeline_enabled).and_return(:timeline_result) + expect(settings.profiling.advanced).to receive(:experimental_allocation_sample_rate) + .and_return(123) expect(Datadog::Profiling::Exporter).to receive(:new).with( hash_including( internal_metadata: { no_signals_workaround_enabled: :no_signals_result, timeline_enabled: :timeline_result, + allocation_sample_every: 123, } ) ) diff --git a/spec/datadog/profiling/spec_helper.rb b/spec/datadog/profiling/spec_helper.rb index 6d15cf22170..75f17183e9e 100644 --- a/spec/datadog/profiling/spec_helper.rb +++ b/spec/datadog/profiling/spec_helper.rb @@ -72,8 +72,14 @@ def samples_for_thread(samples, thread) samples.select { |sample| object_id_from(sample.labels.fetch(:'thread id')) == thread.object_id } end - def build_stack_recorder - Datadog::Profiling::StackRecorder.new(cpu_time_enabled: true, alloc_samples_enabled: true) + # We disable heap_sample collection by default in tests since it requires some extra mocking/ + # setup for it to properly work. + def build_stack_recorder(heap_samples_enabled = false) + Datadog::Profiling::StackRecorder.new( + cpu_time_enabled: true, + alloc_samples_enabled: true, + heap_samples_enabled: heap_samples_enabled + ) end end From 350058f8241f4f2fa83372c547daae9a78d85321 Mon Sep 17 00:00:00 2001 From: Alexandre Fonseca Date: Mon, 27 Nov 2023 17:01:50 +0000 Subject: [PATCH 05/17] [PROF-8667] Slightly simpler skipping of after on initialization error --- .../collectors/cpu_and_wall_time_worker_spec.rb | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb index 2587d0bd505..aefb6ac6210 100644 --- a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb +++ b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb @@ -67,14 +67,9 @@ wait_until_running end + @skip_cleanup = false after do - # If one of the tests makes cpu_and_wall_time_worker fail to initialize, it will fail again - # here in the after. We ignore the error if it is expected. - begin - cpu_and_wall_time_worker.stop - rescue => e - raise if !expected_worker_initialization_error || !e.is_a?(expected_worker_initialization_error) - end + cpu_and_wall_time_worker.stop unless @skip_cleanup end it 'creates a new thread' do @@ -538,8 +533,9 @@ context 'but allocation profiling is disabled' do let(:allocation_profiling_enabled) { false } - let(:expected_worker_initialization_error) { ArgumentError } it 'raises an ArgumentError' do + # We don't want the after hook to execute cpu_and_wall_time_worker.stop otherwise it'll reraise the error + @skip_cleanup = true expect { cpu_and_wall_time_worker }.to raise_error(ArgumentError, /requires allocation profiling/) end end From 4614b2b66e1141d2695bdf4c7aeed5b4e662b01d Mon Sep 17 00:00:00 2001 From: Alexandre Fonseca Date: Tue, 28 Nov 2023 10:42:11 +0000 Subject: [PATCH 06/17] [PROF-8667] Fix two extra issues with CI checks --- ext/ddtrace_profiling_native_extension/stack_recorder.c | 1 + .../profiling/collectors/cpu_and_wall_time_worker_spec.rb | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ext/ddtrace_profiling_native_extension/stack_recorder.c b/ext/ddtrace_profiling_native_extension/stack_recorder.c index f3771a541ab..0b431283b93 100644 --- a/ext/ddtrace_profiling_native_extension/stack_recorder.c +++ b/ext/ddtrace_profiling_native_extension/stack_recorder.c @@ -571,6 +571,7 @@ static void add_heap_sample_to_active_profile_without_gvl(heap_recorder_iteratio (ddog_prof_Sample) { .locations = iteration_data.locations, .values = (ddog_Slice_I64) {.ptr = metric_values, .len = context->state->enabled_values_count}, + .labels = {0}, }, 0 ); diff --git a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb index aefb6ac6210..a187497fa96 100644 --- a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb +++ b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb @@ -525,7 +525,7 @@ cpu_and_wall_time_worker.stop total_heap_sampled_for_test_struct = samples_for_thread(samples_from_pprof(recorder.serialize!), Thread.current) - .filter { |s| s.locations.first.lineno == allocation_line && s.locations.first.path == __FILE__ } + .select { |s| s.locations.first.lineno == allocation_line && s.locations.first.path == __FILE__ } .sum { |s| s.values[:'heap-live-samples'] } expect(total_heap_sampled_for_test_struct).to eq test_num_allocated_object From e61e72e8c0cc151e724bc0e3b9aa9b33e29b7449 Mon Sep 17 00:00:00 2001 From: Alexandre Fonseca Date: Tue, 28 Nov 2023 10:51:09 +0000 Subject: [PATCH 07/17] [PROF-8667] One more missing filter->select. --- spec/datadog/profiling/stack_recorder_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/datadog/profiling/stack_recorder_spec.rb b/spec/datadog/profiling/stack_recorder_spec.rb index 38f98e0c0f2..e6b8c8dd7d1 100644 --- a/spec/datadog/profiling/stack_recorder_spec.rb +++ b/spec/datadog/profiling/stack_recorder_spec.rb @@ -384,7 +384,7 @@ def sample_types_from(decoded_profile) it 'are ommitted from the profile' do # We sample from 2 distinct locations expect(samples.size).to eq(2) - expect(samples.filter { |s| s.values.key?('heap-live-samples') }).to eq([]) + expect(samples.select { |s| s.values.key?('heap-live-samples') }).to eq([]) end end From b9397665898fc5b181aa5ac3af6afc5faf5ba140 Mon Sep 17 00:00:00 2001 From: Alexandre Fonseca Date: Thu, 30 Nov 2023 12:32:13 +0000 Subject: [PATCH 08/17] [PROF-8667] More comment addressing --- .../collectors_cpu_and_wall_time_worker.c | 2 +- .../heap_recorder.c | 42 +++++++++-- .../heap_recorder.h | 11 ++- .../libdatadog_helpers.c | 33 ++++----- .../libdatadog_helpers.h | 9 ++- .../stack_recorder.c | 72 ++++++++++--------- lib/datadog/core/configuration/settings.rb | 4 +- lib/datadog/profiling/component.rb | 16 +++-- .../core/configuration/settings_spec.rb | 8 +++ .../cpu_and_wall_time_worker_spec.rb | 51 +++++++------ spec/datadog/profiling/component_spec.rb | 23 +++--- spec/datadog/profiling/spec_helper.rb | 2 +- spec/datadog/profiling/stack_recorder_spec.rb | 35 ++++++--- 13 files changed, 182 insertions(+), 126 deletions(-) diff --git a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c index 71ab9451e7a..362855c5af8 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +++ b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c @@ -936,7 +936,7 @@ static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused) // TODO: This is a placeholder sampling decision strategy. We plan to replace it with a better one soon (e.g. before // beta), and having something here allows us to test the rest of feature, sampling decision aside. - if (((allocation_count % state->allocation_sample_every) == 0)) { + if (allocation_count % state->allocation_sample_every == 0) { // Rescue against any exceptions that happen during sampling safely_call(rescued_sample_allocation, tracepoint_data, state->self_instance); } diff --git a/ext/ddtrace_profiling_native_extension/heap_recorder.c b/ext/ddtrace_profiling_native_extension/heap_recorder.c index 4f8ca413015..8effc6b4971 100644 --- a/ext/ddtrace_profiling_native_extension/heap_recorder.c +++ b/ext/ddtrace_profiling_native_extension/heap_recorder.c @@ -22,6 +22,8 @@ struct heap_recorder { // ========================== // Heap Recorder External API // +// WARN: All these APIs should support receiving a NULL heap_recorder, resulting in a noop. +// // WARN: Except for ::heap_recorder_for_each_live_object, we always assume interaction with these APIs // happens under the GVL. // @@ -38,10 +40,26 @@ heap_recorder* heap_recorder_new(void) { } void heap_recorder_free(struct heap_recorder* recorder) { + if (recorder == NULL) { + return; + } + ruby_xfree(recorder); } -void start_heap_allocation_recording(heap_recorder* heap_recorder, VALUE new_obj, unsigned int weight) { +// TODO: Remove when things get implemented +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" + +void heap_recorder_after_fork(heap_recorder *heap_recorder) { + // TODO: Implement +} + +void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj, unsigned int weight) { + if (heap_recorder == NULL) { + return; + } + heap_recorder->active_recording = (partial_heap_recording) { .obj = new_obj, .object_data = (live_object_data) { @@ -50,11 +68,11 @@ void start_heap_allocation_recording(heap_recorder* heap_recorder, VALUE new_obj }; } -// TODO: Remove when things get implemented -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-parameter" - void end_heap_allocation_recording(struct heap_recorder *heap_recorder, ddog_prof_Slice_Location locations) { + if (heap_recorder == NULL) { + return; + } + partial_heap_recording *active_recording = &heap_recorder->active_recording; VALUE new_obj = active_recording->obj; @@ -72,18 +90,30 @@ void end_heap_allocation_recording(struct heap_recorder *heap_recorder, ddog_pro // WARN: This can get called during Ruby GC. NO HEAP ALLOCATIONS OR EXCEPTIONS ARE ALLOWED. void record_heap_free(heap_recorder *heap_recorder, VALUE obj) { + if (heap_recorder == NULL) { + return; + } + // TODO: Implement } void heap_recorder_flush(heap_recorder *heap_recorder) { + if (heap_recorder == NULL) { + return; + } + // TODO: Implement } void heap_recorder_for_each_live_object( heap_recorder *heap_recorder, - void (*for_each_callback)(heap_recorder_iteration_data stack_data, void *extra_arg), + bool (*for_each_callback)(heap_recorder_iteration_data stack_data, void *extra_arg), void *for_each_callback_extra_arg, bool with_gvl) { + if (heap_recorder == NULL) { + return; + } + // TODO: Implement } diff --git a/ext/ddtrace_profiling_native_extension/heap_recorder.h b/ext/ddtrace_profiling_native_extension/heap_recorder.h index e189e206df2..e7a09a63477 100644 --- a/ext/ddtrace_profiling_native_extension/heap_recorder.h +++ b/ext/ddtrace_profiling_native_extension/heap_recorder.h @@ -9,6 +9,9 @@ // not yet observed are deemed as alive and can be iterated on to produce a // live heap profile. // +// NOTE: All public APIs of heap_recorder support receiving a NULL heap_recorder +// in which case the behaviour will be a noop. +// // WARN: Unless otherwise stated the heap recorder APIs assume calls are done // under the GVL. typedef struct heap_recorder heap_recorder; @@ -38,6 +41,9 @@ heap_recorder* heap_recorder_new(void); // Free a previously initialized heap recorder. void heap_recorder_free(heap_recorder *heap_recorder); +// Do any cleanup needed after forking. +void heap_recorder_after_fork(heap_recorder *heap_recorder); + // Start a heap allocation recording on the heap recorder for a new object. // // This heap allocation recording needs to be ended via ::end_heap_allocation_recording @@ -88,7 +94,8 @@ void heap_recorder_flush(heap_recorder *heap_recorder); // A callback function that shall be called for each live object being tracked // by the heap recorder. Alongside the iteration_data for each live object, // a second argument will be forwarded with the contents of the optional -// for_each_callback_extra_arg. +// for_each_callback_extra_arg. Iteration will continue until the callback +// returns false or we run out of objects. // @param for_each_callback_extra_arg // Optional (NULL if empty) extra data that should be passed to the // callback function alongside the data for each live tracked object. @@ -96,6 +103,6 @@ void heap_recorder_flush(heap_recorder *heap_recorder); // True if we're calling this while holding the GVL, false otherwise. void heap_recorder_for_each_live_object( heap_recorder *heap_recorder, - void (*for_each_callback)(heap_recorder_iteration_data data, void* extra_arg), + bool (*for_each_callback)(heap_recorder_iteration_data data, void* extra_arg), void *for_each_callback_extra_arg, bool with_gvl); diff --git a/ext/ddtrace_profiling_native_extension/libdatadog_helpers.c b/ext/ddtrace_profiling_native_extension/libdatadog_helpers.c index 07f2236decc..f6f8b69b1b9 100644 --- a/ext/ddtrace_profiling_native_extension/libdatadog_helpers.c +++ b/ext/ddtrace_profiling_native_extension/libdatadog_helpers.c @@ -41,29 +41,22 @@ ddog_CharSlice ruby_value_type_to_char_slice(enum ruby_value_type type) { } } -#define MAX_DDOG_ERR_MESSAGE_SIZE 128 -#define DDOG_ERR_MESSAGE_TRUNCATION_SUFFIX "..." +size_t read_ddogerr_string_and_drop(ddog_Error *error, char *string, size_t capacity) { + if (capacity == 0 || string == NULL) { + // short-circuit, we can't write anything + ddog_Error_drop(error); + return 0; + } -void grab_gvl_and_raise_ddogerr_and_drop(const char *while_context, ddog_Error *error) { - char error_msg[MAX_DDOG_ERR_MESSAGE_SIZE]; ddog_CharSlice error_msg_slice = ddog_Error_message(error); - size_t copy_size = error_msg_slice.len; + size_t error_msg_size = error_msg_slice.len; // Account for extra null char for proper cstring - size_t error_msg_size = copy_size + 1; - bool truncated = false; - if (error_msg_size > MAX_DDOG_ERR_MESSAGE_SIZE) { - // if it seems like the error msg won't fit, lets truncate things by limiting - // the copy_size to MAX_SIZE minus the size of the truncation suffix - // (which already includes space for a terminating '\0' due to how sizeof works) - copy_size = MAX_DDOG_ERR_MESSAGE_SIZE - sizeof(DDOG_ERR_MESSAGE_TRUNCATION_SUFFIX); - error_msg_size = MAX_DDOG_ERR_MESSAGE_SIZE; - truncated = true; - } - strncpy(error_msg, error_msg_slice.ptr, copy_size); - if (truncated) { - strcpy(error_msg + copy_size, DDOG_ERR_MESSAGE_TRUNCATION_SUFFIX); + if (error_msg_size >= capacity) { + // Error message too big, lets truncate it to capacity - 1 to allow for extra null at end + error_msg_size = capacity - 1; } - error_msg[error_msg_size - 1] = '\0'; + strncpy(string, error_msg_slice.ptr, error_msg_size); + string[error_msg_size] = '\0'; ddog_Error_drop(error); - grab_gvl_and_raise(rb_eRuntimeError, "Libstreaming error while (%s): %s", while_context, error_msg); + return error_msg_size; } diff --git a/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h b/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h index 9a1734327a8..7d4593fa550 100644 --- a/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h +++ b/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h @@ -24,11 +24,10 @@ inline static VALUE get_error_details_and_drop(ddog_Error *error) { return result; } -// Utility function to be able to easily raise a ddog_Error outside the GVL. -// The message will look like 'Libstreaming error while : ' -NORETURN( - void grab_gvl_and_raise_ddogerr_and_drop(const char *while_context, ddog_Error *error); -); +// Utility function to be able to extract an error cstring from a ddog_Error. +// Returns the amount of characters written to string (which are necessarily +// bounded by capacity - 1 since the string will be null-terminated). +size_t read_ddogerr_string_and_drop(ddog_Error *error, char *string, size_t capacity); // Used for pretty printing this Ruby enum. Returns "T_UNKNOWN_OR_MISSING_RUBY_VALUE_TYPE_ENTRY" for unknown elements. // In practice, there's a few types that the profiler will probably never encounter, but I've added all entries of diff --git a/ext/ddtrace_profiling_native_extension/stack_recorder.c b/ext/ddtrace_profiling_native_extension/stack_recorder.c index 0b431283b93..05ea9076435 100644 --- a/ext/ddtrace_profiling_native_extension/stack_recorder.c +++ b/ext/ddtrace_profiling_native_extension/stack_recorder.c @@ -221,7 +221,7 @@ static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE recorder_ static void serializer_set_start_timestamp_for_next_profile(struct stack_recorder_state *state, ddog_Timespec start_time); static VALUE _native_record_endpoint(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE local_root_span_id, VALUE endpoint); static void reset_profile(ddog_prof_Profile *profile, ddog_Timespec *start_time /* Can be null */); -static VALUE _native_track_obj_allocation(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight); +static VALUE _native_track_object(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight); static VALUE _native_record_obj_free(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE obj); @@ -247,7 +247,7 @@ void stack_recorder_init(VALUE profiling_module) { rb_define_singleton_method(testing_module, "_native_slot_one_mutex_locked?", _native_is_slot_one_mutex_locked, 1); rb_define_singleton_method(testing_module, "_native_slot_two_mutex_locked?", _native_is_slot_two_mutex_locked, 1); rb_define_singleton_method(testing_module, "_native_record_endpoint", _native_record_endpoint, 3); - rb_define_singleton_method(testing_module, "_native_track_obj_allocation", _native_track_obj_allocation, 3); + rb_define_singleton_method(testing_module, "_native_track_object", _native_track_object, 3); rb_define_singleton_method(testing_module, "_native_record_obj_free", _native_record_obj_free, 2); ok_symbol = ID2SYM(rb_intern_const("ok")); @@ -283,6 +283,10 @@ static VALUE _native_new(VALUE klass) { VALUE stack_recorder = TypedData_Wrap_Struct(klass, &stack_recorder_typed_data, state); + // NOTE: We initialize this because we want a new recorder to be operational even without initialization and our + // default is everything enabled. However, if during recording initialization it turns out we don't want + // heap samples, we will free and reset heap_recorder to NULL, effectively disabling all behaviour specific + // to heap profiling (all calls to heap_recorder_* with a NULL heap recorder are noops). state->heap_recorder = heap_recorder_new(); // Note: Don't raise exceptions after this point, since it'll lead to libdatadog memory leaking! @@ -415,15 +419,9 @@ static VALUE _native_serialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instan // Need to do this while still holding on to the Global VM Lock; see comments on method for why serializer_set_start_timestamp_for_next_profile(state, finish_timestamp); - if (state->heap_recorder != NULL) { - // Flush any pending data in the heap recorder prior to doing the iteration during serialization. - // This needs to happen while holding on to the GVL - // TODO: Can this deadlock with the sampler thread due to GVL or Ruby specific things? - // 1. Sampler is in the middle of recording a heap allocation. - // 2. Recorder gets scheduled and tries to acquire the lock, waiting if needed. - // 3. Are we guaranteed that the sampler can make progress? - heap_recorder_flush(state->heap_recorder); - } + // Flush any pending data in the heap recorder prior to doing the iteration during serialization. + // This needs to happen while holding on to the GVL + heap_recorder_flush(state->heap_recorder); // We'll release the Global VM Lock while we're calling serialize, so that the Ruby VM can continue to work while this // is pending @@ -486,9 +484,8 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, metric_values[position_for[WALL_TIME_VALUE_ID]] = values.wall_time_ns; metric_values[position_for[ALLOC_SAMPLES_VALUE_ID]] = values.alloc_samples; - if (values.alloc_samples != 0 && state->heap_recorder != NULL) { - // If we got an allocation sample and our heap recorder is initialized, - // end the heap allocation recording to commit the heap sample. + if (values.alloc_samples != 0) { + // If we got an allocation sample end the heap allocation recording to commit the heap sample. // FIXME: Heap sampling currently has to be done in 2 parts because the construction of locations is happening // very late in the allocation-sampling path (which is shared with the cpu sampling path). This can // be fixed with some refactoring but for now this leads to a less impactful change. @@ -515,10 +512,6 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, void track_object(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight) { struct stack_recorder_state *state; TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state); - if (state->heap_recorder == NULL) { - // we aren't gathering heap data so we can ignore this - return; - } // FIXME: Heap sampling currently has to be done in 2 parts because the construction of locations is happening // very late in the allocation-sampling path (which is shared with the cpu sampling path). This can // be fixed with some refactoring but for now this leads to a less impactful change. @@ -529,10 +522,6 @@ void track_object(VALUE recorder_instance, VALUE new_object, unsigned int sample void record_obj_free(VALUE recorder_instance, VALUE freed_object) { struct stack_recorder_state *state; TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state); - if (state->heap_recorder == NULL) { - // we aren't gathering heap data so we can ignore this - return; - } record_heap_free(state->heap_recorder, freed_object); } @@ -551,14 +540,18 @@ void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_ } } +#define MAX_LEN_HEAP_ITERATION_ERROR_MSG 256 + // Heap recorder iteration context allows us access to stack recorder state and profile being serialized // during iteration of heap recorder live objects. typedef struct heap_recorder_iteration_context { struct stack_recorder_state *state; ddog_prof_Profile *profile; + bool error; + char errorMsg[MAX_LEN_HEAP_ITERATION_ERROR_MSG]; } heap_recorder_iteration_context; -static void add_heap_sample_to_active_profile_without_gvl(heap_recorder_iteration_data iteration_data, void *extra_arg) { +static bool add_heap_sample_to_active_profile_without_gvl(heap_recorder_iteration_data iteration_data, void *extra_arg) { heap_recorder_iteration_context *context = (heap_recorder_iteration_context*) extra_arg; int64_t metric_values[ALL_VALUE_TYPES_COUNT] = {0}; @@ -577,16 +570,32 @@ static void add_heap_sample_to_active_profile_without_gvl(heap_recorder_iteratio ); if (result.tag == DDOG_PROF_PROFILE_RESULT_ERR) { - grab_gvl_and_raise_ddogerr_and_drop("recording heap sample", &result.err); + read_ddogerr_string_and_drop(&result.err, context->errorMsg, MAX_LEN_HEAP_ITERATION_ERROR_MSG); + context->error = true; + // By returning false we cancel the iteration + return false; } + + // Keep on iterating to next item! + return true; } static void build_heap_profile_without_gvl(struct stack_recorder_state *state, ddog_prof_Profile *profile) { heap_recorder_iteration_context iteration_context = { .state = state, - .profile = profile + .profile = profile, + .error = false, + .errorMsg = {0}, }; heap_recorder_for_each_live_object(state->heap_recorder, add_heap_sample_to_active_profile_without_gvl, (void*) &iteration_context, false); + if (iteration_context.error) { + // We wait until we're out of the iteration to grab the gvl and raise. This is important because during + // iteration we first grab the records_mutex and raising requires grabbing the GVL. When sampling, we are + // in the opposite situation: we have the GVL and may need to grab the records_mutex for mutation. This + // different ordering can lead to deadlocks. By delaying the raise here until after we no longer hold + // records_mutex, we prevent this different-lock-acquisition-order situation. + grab_gvl_and_raise(rb_eRuntimeError, "Failure during heap profile building: %s", iteration_context.errorMsg); + } } static void *call_serialize_without_gvl(void *call_args) { @@ -594,11 +603,9 @@ static void *call_serialize_without_gvl(void *call_args) { args->profile = serializer_flip_active_and_inactive_slots(args->state); - if (args->state->heap_recorder != NULL) { - // Now that we have the inactive profile with all but heap samples, lets fill it with heap data - // without needing to race with the active sampler - build_heap_profile_without_gvl(args->state, args->profile); - } + // Now that we have the inactive profile with all but heap samples, lets fill it with heap data + // without needing to race with the active sampler + build_heap_profile_without_gvl(args->state, args->profile); // Note: The profile gets reset by the serialize call args->result = ddog_prof_Profile_serialize(args->profile, &args->finish_timestamp, NULL /* duration_nanos is optional */, NULL /* start_time is optional */); @@ -721,8 +728,7 @@ static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE recorder_ reset_profile(&state->slot_one_profile, /* start_time: */ NULL); reset_profile(&state->slot_two_profile, /* start_time: */ NULL); - // After a fork, all tracked live objects should still be alive in the child process so we purposefully DON'T - // reset the heap recorder. The live heap it is tracking is still alive after all. + heap_recorder_after_fork(state->heap_recorder); return Qtrue; } @@ -741,7 +747,7 @@ static VALUE _native_record_endpoint(DDTRACE_UNUSED VALUE _self, VALUE recorder_ return Qtrue; } -static VALUE _native_track_obj_allocation(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight) { +static VALUE _native_track_object(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight) { ENFORCE_TYPE(weight, T_FIXNUM); track_object(recorder_instance, new_obj, NUM2UINT(weight)); return Qtrue; diff --git a/lib/datadog/core/configuration/settings.rb b/lib/datadog/core/configuration/settings.rb index f7a3baefa38..7050089d0a3 100644 --- a/lib/datadog/core/configuration/settings.rb +++ b/lib/datadog/core/configuration/settings.rb @@ -333,7 +333,7 @@ def initialize(*_) # in summary, this should be supported on Ruby 2.x, 3.1.4+, 3.2.3+ and 3.3.0+. Enabling it on # unsupported Rubies may result in unexpected behaviour, including crashes. # - # @note Heap profiles are not yet GA in the Datadog UI, get in touch if you want to help us test it. + # @note Allocation profiles are not yet GA in the Datadog UI, get in touch if you want to help us test it. # # @default `DD_PROFILING_EXPERIMENTAL_ALLOCATION_ENABLED` environment variable as a boolean, otherwise `false` option :experimental_allocation_enabled do |o| @@ -361,8 +361,6 @@ def initialize(*_) # The lower the value, the more accuracy in allocation and heap tracking but the bigger the overhead. In # particular, a value of 1 will sample ALL allocations. # - # This feature is not supported in all Rubies. Refer to {Datadog::Profiling::Ext::IS_ALLOC_SAMPLING_SUPPORTED} - # # @default `DD_PROFILING_EXPERIMENTAL_ALLOCATION_SAMPLE_RATE` environment variable, otherwise `50`. option :experimental_allocation_sample_rate do |o| o.type :int diff --git a/lib/datadog/profiling/component.rb b/lib/datadog/profiling/component.rb index 76ccc4a65e3..f986e76a2e0 100644 --- a/lib/datadog/profiling/component.rb +++ b/lib/datadog/profiling/component.rb @@ -147,7 +147,12 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) # https://github.com/ruby/ruby/pull/7464) that makes this crash in any configuration. This bug is # fixed on Ruby versions 3.2.3 and 3.3.0. if RUBY_VERSION.start_with?('3.2.') && RUBY_VERSION < '3.2.3' - raise "Sorry, allocation profiling is not supported in #{RUBY_VERSION}. Please use 3.1.x, 3.2.3 or 3.3.0+" + Datadog.logger.warn( + 'Allocation profiling is not supported in Ruby versions 3.2.0, 3.2.1 and 3.2.2 and will be forcibly '\ + 'disabled. This is due to a VM bug that can lead to crashes (https://bugs.ruby-lang.org/issues/19482). '\ + 'Other Ruby versions do not suffer from this issue.' + ) + return false end # SEVERE - Only with Ractors @@ -160,16 +165,17 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) (RUBY_VERSION.start_with?('3.2.') && RUBY_VERSION < '3.2.3') Datadog.logger.warn( "Current Ruby version (#{RUBY_VERSION}) has a VM bug where enabling allocation profiling while using "\ - 'Ractors may cause unexpected issues, including crashes. This does not happen if Ractors are not used.' + 'Ractors may cause unexpected issues, including crashes (https://bugs.ruby-lang.org/issues/18464). '\ + 'This does not happen if Ractors are not used.' ) # ANNOYANCE - Only with Ractors # On all known versions of Ruby 3.x, due to https://bugs.ruby-lang.org/issues/19112, when a ractor gets # garbage collected, Ruby will disable all active tracepoints, which this feature internally relies on. elsif RUBY_VERSION.start_with?('3.') Datadog.logger.warn( - 'In all known versions of Ruby 3.x, using Ractors may result in allocation profiling unexpectedly stopping. '\ - 'Note that this stop has no impact in your application stability or performance. This does not happen if ' \ - 'Ractors are not used.' + 'In all known versions of Ruby 3.x, using Ractors may result in allocation profiling unexpectedly ' \ + 'stopping (https://bugs.ruby-lang.org/issues/19112). Note that this stop has no impact in your ' \ + 'application stability or performance. This does not happen if Ractors are not used.' ) end diff --git a/spec/datadog/core/configuration/settings_spec.rb b/spec/datadog/core/configuration/settings_spec.rb index 45e542670d2..1a76f5b3393 100644 --- a/spec/datadog/core/configuration/settings_spec.rb +++ b/spec/datadog/core/configuration/settings_spec.rb @@ -497,6 +497,14 @@ end end + describe '#allocation_counting_enabled=' do + it 'logs a warning informing customers this no longer does anything' do + expect(Datadog.logger).to receive(:warn).with(/no longer does anything/) + + settings.profiling.advanced.allocation_counting_enabled = false + end + end + describe '#experimental_allocation_enabled' do subject(:experimental_allocation_enabled) { settings.profiling.advanced.experimental_allocation_enabled } diff --git a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb index a187497fa96..0f2c8c42cb9 100644 --- a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb +++ b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb @@ -12,7 +12,7 @@ let(:allocation_sample_every) { 50 } let(:allocation_profiling_enabled) { false } let(:heap_profiling_enabled) { false } - let(:recorder) { build_stack_recorder(true) } + let(:recorder) { build_stack_recorder(heap_samples_enabled: true) } let(:no_signals_workaround_enabled) { false } let(:timeline_enabled) { false } let(:options) { {} } @@ -67,7 +67,10 @@ wait_until_running end - @skip_cleanup = false + before do + @skip_cleanup = false + end + after do cpu_and_wall_time_worker.stop unless @skip_cleanup end @@ -524,11 +527,10 @@ cpu_and_wall_time_worker.stop - total_heap_sampled_for_test_struct = samples_for_thread(samples_from_pprof(recorder.serialize!), Thread.current) - .select { |s| s.locations.first.lineno == allocation_line && s.locations.first.path == __FILE__ } - .sum { |s| s.values[:'heap-live-samples'] } + relevant_sample = samples_for_thread(samples_from_pprof(recorder.serialize!), Thread.current) + .find { |s| s.locations.first.lineno == allocation_line && s.locations.first.path == __FILE__ } - expect(total_heap_sampled_for_test_struct).to eq test_num_allocated_object + expect(relevant_sample.values[':heap-live-samples']).to eq test_num_allocated_object end context 'but allocation profiling is disabled' do @@ -807,17 +809,21 @@ it { is_expected.to be nil } end - context 'when allocation profiling is enabled' do - let(:allocation_profiling_enabled) { true } + context 'when CpuAndWallTimeWorker has been started' do + before do + cpu_and_wall_time_worker.start + wait_until_running + end - context 'when CpuAndWallTimeWorker has been started' do - before do - cpu_and_wall_time_worker.start - wait_until_running - end + after do + cpu_and_wall_time_worker.stop + end + + context 'when allocation profiling is enabled' do + let(:allocation_profiling_enabled) { true } - after do - cpu_and_wall_time_worker.stop + it 'always returns a >= 0 value' do + expect(described_class._native_allocation_count).to be >= 0 end it 'returns the number of allocations between two calls of the method' do @@ -870,19 +876,10 @@ context 'when allocation profiling is disabled' do let(:allocation_profiling_enabled) { false } + it 'always returns a nil value' do + 100.times { Object.new } - it 'still returns nil after a bunch of allocations' do - # To get the exact expected number of allocations, we run this once before so that Ruby can create and cache all - # it needs to - new_object = proc { Object.new } - 1.times(&new_object) - - before_allocations = described_class._native_allocation_count - 100.times(&new_object) - after_allocations = described_class._native_allocation_count - - expect(before_allocations).to be nil - expect(after_allocations).to be nil + expect(described_class._native_allocation_count).to be nil end end end diff --git a/spec/datadog/profiling/component_spec.rb b/spec/datadog/profiling/component_spec.rb index df0293e7366..b27910f6f05 100644 --- a/spec/datadog/profiling/component_spec.rb +++ b/spec/datadog/profiling/component_spec.rb @@ -145,8 +145,16 @@ context "on a Ruby 3 version affected by https://bugs.ruby-lang.org/issues/19482 (#{broken_ruby})" do let(:testing_version) { broken_ruby } - it 'raises an error about lack of support during initialization of a CpuAndWallTimeWorker' do - expect { build_profiler_component }.to raise_error(/not supported.+Please use/) + it 'initializes a CpuAndWallTimeWorker with allocation_profiling forcibly set to false and warns' do + expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with hash_including( + allocation_profiling_enabled: false, + ) + + expect(Datadog.logger).to receive(:warn).with(/forcibly disabled/) + expect(Datadog.logger).to_not receive(:warn).with(/Ractor/) + expect(Datadog.logger).to_not receive(:warn).with(/experimental allocation profiling/) + + build_profiler_component end end end @@ -249,17 +257,6 @@ end end - describe '#allocation_counting_enabled=' do - it 'updates the #allocation_counting_enabled setting' do - settings.profiling.advanced.allocation_counting_enabled = true - - expect { settings.profiling.advanced.allocation_counting_enabled = false } - .to change { settings.profiling.advanced.allocation_counting_enabled } - .from(true) - .to(false) - end - end - it 'sets up the Profiler with the CpuAndWallTimeWorker collector' do expect(Datadog::Profiling::Profiler).to receive(:new).with( worker: instance_of(Datadog::Profiling::Collectors::CpuAndWallTimeWorker), diff --git a/spec/datadog/profiling/spec_helper.rb b/spec/datadog/profiling/spec_helper.rb index 75f17183e9e..e02c1af213b 100644 --- a/spec/datadog/profiling/spec_helper.rb +++ b/spec/datadog/profiling/spec_helper.rb @@ -74,7 +74,7 @@ def samples_for_thread(samples, thread) # We disable heap_sample collection by default in tests since it requires some extra mocking/ # setup for it to properly work. - def build_stack_recorder(heap_samples_enabled = false) + def build_stack_recorder(heap_samples_enabled: false) Datadog::Profiling::StackRecorder.new( cpu_time_enabled: true, alloc_samples_enabled: true, diff --git a/spec/datadog/profiling/stack_recorder_spec.rb b/spec/datadog/profiling/stack_recorder_spec.rb index e6b8c8dd7d1..28ac1c6bd27 100644 --- a/spec/datadog/profiling/stack_recorder_spec.rb +++ b/spec/datadog/profiling/stack_recorder_spec.rb @@ -337,15 +337,15 @@ def sample_types_from(decoded_profile) end describe 'heap samples' do - let(:metric_values1) { { 'cpu-time' => 101, 'cpu-samples' => 1, 'wall-time' => 789, 'alloc-samples' => 10 } } - let(:metric_values2) { { 'cpu-time' => 102, 'cpu-samples' => 2, 'wall-time' => 790, 'alloc-samples' => 11 } } - let(:labels) { { 'label_a' => 'value_a', 'label_b' => 'value_b', 'state' => 'unknown' }.to_a } let(:sample_rate) { 50 } + let(:metric_values1) { { 'cpu-time' => 101, 'cpu-samples' => 1, 'wall-time' => 789, 'alloc-samples' => sample_rate } } + let(:metric_values2) { { 'cpu-time' => 102, 'cpu-samples' => 2, 'wall-time' => 790, 'alloc-samples' => sample_rate } } + let(:labels) { { 'label_a' => 'value_a', 'label_b' => 'value_b', 'state' => 'unknown' }.to_a } let(:a_string) { 'a beautiful string' } let(:another_string) { 'a fearsome string' } - let(:an_array) { [1..10] } - let(:another_array) { [-10..-1] } + let(:an_array) { (1..10).to_a } + let(:another_array) { (-10..-1).to_a } let(:a_hash) { { 'a' => 1, 'b' => '2', 'c' => true } } let(:another_hash) { { 'z' => -1, 'y' => '-2', 'x' => false } } @@ -362,7 +362,7 @@ def sample_types_from(decoded_profile) before do allocated_objects.each_with_index do |obj, i| # Heap sampling currently requires this 2-step process to first pass data about the allocated object... - described_class::Testing._native_track_obj_allocation(stack_recorder, obj, sample_rate) + described_class::Testing._native_track_object(stack_recorder, obj, sample_rate) # ...and then pass data about the allocation stacktrace (with 2 distinct stacktraces) if i.even? Datadog::Profiling::Collectors::Stack::Testing @@ -391,16 +391,31 @@ def sample_types_from(decoded_profile) context 'when enabled' do let(:heap_samples_enabled) { true } + let(:heap_samples) do + samples.select { |s| s.values[:'heap-live-samples'] > 0 } + end + + let(:non_heap_samples) do + samples.select { |s| s.values[:'heap-live-samples'] == 0 } + end + it 'are included in the profile' do pending 'heap_recorder implementation is currently missing' - # We sample from 2 distinct locations but heap samples don't have the same - # labels as the others so they get duped. - expect(samples.size).to eq(4) - expect(samples.sum { |s| s.values[:'heap-live-samples'] || 0 }).to eq( + # We sample from 2 distinct locations + expect(heap_samples.size).to eq(2) + + expect(heap_samples.sum { |s| s.values[:'heap-live-samples'] || 0 }).to eq( [a_string, an_array, a_hash].size * sample_rate ) end + + it 'do not corrupt/overwrite non-heap-samples' do + expect(non_heap_samples.size).to eq(2) + + expect(non_heap_samples.sum { |s| s.values[:'cpu-time'] }).to be > 0 + expect(non_heap_samples.sum { |s| s.values[:'alloc-samples'] }).to eq(allocated_objects.size * sample_rate) + end end end From e94b26033d6f51838c866cf1cabc0d8fa4965158 Mon Sep 17 00:00:00 2001 From: Alexandre Fonseca Date: Thu, 30 Nov 2023 12:37:43 +0000 Subject: [PATCH 09/17] [PROF-8667] Missing comment --- ext/ddtrace_profiling_native_extension/heap_recorder.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ext/ddtrace_profiling_native_extension/heap_recorder.c b/ext/ddtrace_profiling_native_extension/heap_recorder.c index 8effc6b4971..edcc9369041 100644 --- a/ext/ddtrace_profiling_native_extension/heap_recorder.c +++ b/ext/ddtrace_profiling_native_extension/heap_recorder.c @@ -105,6 +105,7 @@ void heap_recorder_flush(heap_recorder *heap_recorder) { // TODO: Implement } +// WARN: If with_gvl = True, NO HEAP ALLOCATIONS, EXCEPTIONS or RUBY CALLS ARE ALLOWED. void heap_recorder_for_each_live_object( heap_recorder *heap_recorder, bool (*for_each_callback)(heap_recorder_iteration_data stack_data, void *extra_arg), From f51c2546a6e4a8f45ea049c7482c4baf3d03e020 Mon Sep 17 00:00:00 2001 From: Alexandre Fonseca Date: Thu, 30 Nov 2023 15:27:17 +0000 Subject: [PATCH 10/17] [PROF-8667] Migrate to an approach with no free tracepointing. --- .../collectors_cpu_and_wall_time_worker.c | 43 +++------------- .../collectors_thread_context.c | 10 ---- .../collectors_thread_context.h | 1 - .../heap_recorder.c | 9 ---- .../heap_recorder.h | 17 ------- .../stack_recorder.c | 14 ----- .../stack_recorder.h | 1 - .../collectors/cpu_and_wall_time_worker.rb | 2 - lib/datadog/profiling/component.rb | 1 - .../cpu_and_wall_time_worker_spec.rb | 29 +++-------- spec/datadog/profiling/component_spec.rb | 51 +++++++++++-------- spec/datadog/profiling/stack_recorder_spec.rb | 25 +++------ 12 files changed, 52 insertions(+), 151 deletions(-) diff --git a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c index 362855c5af8..29f1a166437 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +++ b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c @@ -154,8 +154,7 @@ static VALUE _native_initialize( VALUE no_signals_workaround_enabled, VALUE dynamic_sampling_rate_enabled, VALUE allocation_sample_every, - VALUE allocation_profiling_enabled, - VALUE heap_profiling_enabled + VALUE allocation_profiling_enabled ); static void cpu_and_wall_time_worker_typed_data_mark(void *state_ptr); static VALUE _native_sampling_loop(VALUE self, VALUE instance); @@ -189,7 +188,6 @@ static void reset_stats(struct cpu_and_wall_time_worker_state *state); static void sleep_for(uint64_t time_ns); static VALUE _native_allocation_count(DDTRACE_UNUSED VALUE self); static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused); -static void on_freeobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused); static void disable_tracepoints(struct cpu_and_wall_time_worker_state *state); static VALUE _native_with_blocked_sigprof(DDTRACE_UNUSED VALUE self); static VALUE rescued_sample_allocation(VALUE tracepoint_data); @@ -230,7 +228,7 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) { // https://bugs.ruby-lang.org/issues/18007 for a discussion around this. rb_define_alloc_func(collectors_cpu_and_wall_time_worker_class, _native_new); - rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 9); + rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 8); rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_sampling_loop", _native_sampling_loop, 1); rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stop", _native_stop, 2); rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_reset_after_fork", _native_reset_after_fork, 1); @@ -301,15 +299,13 @@ static VALUE _native_initialize( VALUE no_signals_workaround_enabled, VALUE dynamic_sampling_rate_enabled, VALUE allocation_sample_every, - VALUE allocation_profiling_enabled, - VALUE heap_profiling_enabled + VALUE allocation_profiling_enabled ) { ENFORCE_BOOLEAN(gc_profiling_enabled); ENFORCE_BOOLEAN(no_signals_workaround_enabled); ENFORCE_BOOLEAN(dynamic_sampling_rate_enabled); ENFORCE_TYPE(allocation_sample_every, T_FIXNUM); ENFORCE_BOOLEAN(allocation_profiling_enabled); - ENFORCE_BOOLEAN(heap_profiling_enabled); struct cpu_and_wall_time_worker_state *state; TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state); @@ -319,21 +315,15 @@ static VALUE _native_initialize( state->dynamic_sampling_rate_enabled = (dynamic_sampling_rate_enabled == Qtrue); state->allocation_sample_every = NUM2INT(allocation_sample_every); state->allocation_profiling_enabled = (allocation_profiling_enabled == Qtrue); - state->heap_profiling_enabled = (heap_profiling_enabled == Qtrue); if (state->allocation_sample_every <= 0) { rb_raise(rb_eArgError, "Unexpected value for allocation_sample_every: %d. This value must be > 0.", state->allocation_sample_every); } - if (state->heap_profiling_enabled && !state->allocation_profiling_enabled) { - rb_raise(rb_eArgError, "Heap profiling requires allocation profiling to be enabled but it isn't."); - } - state->thread_context_collector_instance = enforce_thread_context_collector_instance(thread_context_collector_instance); state->idle_sampling_helper_instance = idle_sampling_helper_instance; state->gc_tracepoint = rb_tracepoint_new(Qnil, RUBY_INTERNAL_EVENT_GC_ENTER | RUBY_INTERNAL_EVENT_GC_EXIT, on_gc_event, NULL /* unused */); state->object_allocation_tracepoint = rb_tracepoint_new(Qnil, RUBY_INTERNAL_EVENT_NEWOBJ, on_newobj_event, NULL /* unused */); - state->object_free_tracepoint = rb_tracepoint_new(Qnil, RUBY_INTERNAL_EVENT_FREEOBJ, on_freeobj_event, NULL /* unused */); return Qtrue; } @@ -349,7 +339,6 @@ static void cpu_and_wall_time_worker_typed_data_mark(void *state_ptr) { rb_gc_mark(state->stop_thread); rb_gc_mark(state->gc_tracepoint); rb_gc_mark(state->object_allocation_tracepoint); - rb_gc_mark(state->object_free_tracepoint); } // Called in a background thread created in CpuAndWallTimeWorker#start @@ -648,7 +637,6 @@ static VALUE release_gvl_and_run_sampling_trigger_loop(VALUE instance) { install_sigprof_signal_handler(handle_sampling_signal, "handle_sampling_signal"); if (state->gc_profiling_enabled) rb_tracepoint_enable(state->gc_tracepoint); if (state->allocation_profiling_enabled) rb_tracepoint_enable(state->object_allocation_tracepoint); - if (state->heap_profiling_enabled) rb_tracepoint_enable(state->object_free_tracepoint); rb_thread_call_without_gvl(run_sampling_trigger_loop, state, interrupt_sampling_trigger_loop, state); @@ -932,6 +920,10 @@ static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused) return; } + // @ivoanjo: Strictly speaking, this is not needed because Ruby should not call the same tracepoint while a previous + // invocation is still pending, (e.g. it wouldn't call `on_newobj_event` while it's already running), but I decided + // to keep this here for consistency -- every call to the thread context (other than the special gc calls which are + // defined as not being able to allocate) sets this. state->during_sample = true; // TODO: This is a placeholder sampling decision strategy. We plan to replace it with a better one soon (e.g. before @@ -944,30 +936,9 @@ static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused) state->during_sample = false; } -// Safety: This function may get called while Ruby is doing garbage collection. While Ruby is doing garbage collection, -// *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation. -// This includes exceptions and use of ruby_xcalloc (because xcalloc can trigger GC)! -static void on_freeobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused) { - struct cpu_and_wall_time_worker_state *state = active_sampler_instance_state; // Read from global variable, see "sampler global state safety" note above - - // This should not happen in a normal situation because the tracepoint is always enabled after the instance is set - // and disabled before it is cleared, but just in case... - if (state == NULL) return; - - // NOTE: Because this is likely to be happening during GC, handling of this tracepoint does not do any allocation. - // We also do not want to lose any frees as that would affect the accuracy of our live heap tracking so we skip - // the typical `state->during_sample` dropping that other sampling tracepoints have. - - rb_trace_arg_t *data = rb_tracearg_from_tracepoint(tracepoint_data); - VALUE freed_object = rb_tracearg_object(data); - - thread_context_collector_sample_free(state->thread_context_collector_instance, freed_object); -} - static void disable_tracepoints(struct cpu_and_wall_time_worker_state *state) { rb_tracepoint_disable(state->gc_tracepoint); rb_tracepoint_disable(state->object_allocation_tracepoint); - rb_tracepoint_disable(state->object_free_tracepoint); } static VALUE _native_with_blocked_sigprof(DDTRACE_UNUSED VALUE self) { diff --git a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c index 5bda980132d..282ff8282b1 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c +++ b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c @@ -1218,16 +1218,6 @@ void thread_context_collector_sample_allocation(VALUE self_instance, unsigned in ); } -// Safety: This function may get called while Ruby is doing garbage collection. While Ruby is doing garbage collection, -// *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation. -// This includes exceptions and use of ruby_xcalloc (because xcalloc can trigger GC)! -void thread_context_collector_sample_free(VALUE self_instance, VALUE freed_object) { - struct thread_context_collector_state *state; - TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state); - - record_obj_free(state->recorder_instance, freed_object); -} - // This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec. // It SHOULD NOT be used for other purposes. static VALUE _native_sample_allocation(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE sample_weight, VALUE new_object) { diff --git a/ext/ddtrace_profiling_native_extension/collectors_thread_context.h b/ext/ddtrace_profiling_native_extension/collectors_thread_context.h index 2bbf2bf1108..88dfabe9615 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_thread_context.h +++ b/ext/ddtrace_profiling_native_extension/collectors_thread_context.h @@ -8,7 +8,6 @@ void thread_context_collector_sample( VALUE profiler_overhead_stack_thread ); void thread_context_collector_sample_allocation(VALUE self_instance, unsigned int sample_weight, VALUE new_object); -void thread_context_collector_sample_free(VALUE self_instance, VALUE freed_object); VALUE thread_context_collector_sample_after_gc(VALUE self_instance); void thread_context_collector_on_gc_start(VALUE self_instance); void thread_context_collector_on_gc_finish(VALUE self_instance); diff --git a/ext/ddtrace_profiling_native_extension/heap_recorder.c b/ext/ddtrace_profiling_native_extension/heap_recorder.c index edcc9369041..7cfd5bc3ca5 100644 --- a/ext/ddtrace_profiling_native_extension/heap_recorder.c +++ b/ext/ddtrace_profiling_native_extension/heap_recorder.c @@ -88,15 +88,6 @@ void end_heap_allocation_recording(struct heap_recorder *heap_recorder, ddog_pro // TODO: Implement } -// WARN: This can get called during Ruby GC. NO HEAP ALLOCATIONS OR EXCEPTIONS ARE ALLOWED. -void record_heap_free(heap_recorder *heap_recorder, VALUE obj) { - if (heap_recorder == NULL) { - return; - } - - // TODO: Implement -} - void heap_recorder_flush(heap_recorder *heap_recorder) { if (heap_recorder == NULL) { return; diff --git a/ext/ddtrace_profiling_native_extension/heap_recorder.h b/ext/ddtrace_profiling_native_extension/heap_recorder.h index e7a09a63477..add47d46734 100644 --- a/ext/ddtrace_profiling_native_extension/heap_recorder.h +++ b/ext/ddtrace_profiling_native_extension/heap_recorder.h @@ -66,23 +66,6 @@ void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj // WARN: It is illegal to call this without previously having called ::start_heap_allocation_recording. void end_heap_allocation_recording(heap_recorder *heap_recorder, ddog_prof_Slice_Location locations); -// Record a heap free on the heap recorder. -// -// Contrary to heap allocations, no sampling should be applied to frees. Missing a free event -// risks negatively effecting the accuracy of the live state of tracked objects and thus the accuracy -// of the resulting profiles. -// -// Two things can happen depending on the object: -// * The object isn't being tracked: the operation is a no-op. -// * The object is being tracked: it is marked as no longer alive and will not appear in the next -// iteration. -// -// @param obj The object that was freed. -// -// NOTE: This function is safe to be called during a Ruby GC as it guarantees no heap mutations -// during its execution. -void record_heap_free(heap_recorder *heap_recorder, VALUE obj); - // Flush any intermediate state that might be queued inside the heap recorder. // // NOTE: This should usually be called before iteration to ensure data is as little stale as possible. diff --git a/ext/ddtrace_profiling_native_extension/stack_recorder.c b/ext/ddtrace_profiling_native_extension/stack_recorder.c index 05ea9076435..5269cba86ec 100644 --- a/ext/ddtrace_profiling_native_extension/stack_recorder.c +++ b/ext/ddtrace_profiling_native_extension/stack_recorder.c @@ -222,7 +222,6 @@ static void serializer_set_start_timestamp_for_next_profile(struct stack_recorde static VALUE _native_record_endpoint(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE local_root_span_id, VALUE endpoint); static void reset_profile(ddog_prof_Profile *profile, ddog_Timespec *start_time /* Can be null */); static VALUE _native_track_object(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight); -static VALUE _native_record_obj_free(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE obj); void stack_recorder_init(VALUE profiling_module) { @@ -248,7 +247,6 @@ void stack_recorder_init(VALUE profiling_module) { rb_define_singleton_method(testing_module, "_native_slot_two_mutex_locked?", _native_is_slot_two_mutex_locked, 1); rb_define_singleton_method(testing_module, "_native_record_endpoint", _native_record_endpoint, 3); rb_define_singleton_method(testing_module, "_native_track_object", _native_track_object, 3); - rb_define_singleton_method(testing_module, "_native_record_obj_free", _native_record_obj_free, 2); ok_symbol = ID2SYM(rb_intern_const("ok")); error_symbol = ID2SYM(rb_intern_const("error")); @@ -518,13 +516,6 @@ void track_object(VALUE recorder_instance, VALUE new_object, unsigned int sample start_heap_allocation_recording(state->heap_recorder, new_object, sample_weight); } -// WARN: This can get called during Ruby GC. NO HEAP ALLOCATIONS OR EXCEPTIONS ARE ALLOWED. -void record_obj_free(VALUE recorder_instance, VALUE freed_object) { - struct stack_recorder_state *state; - TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state); - record_heap_free(state->heap_recorder, freed_object); -} - void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_CharSlice endpoint) { struct stack_recorder_state *state; TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state); @@ -753,11 +744,6 @@ static VALUE _native_track_object(DDTRACE_UNUSED VALUE _self, VALUE recorder_ins return Qtrue; } -static VALUE _native_record_obj_free(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE obj) { - record_obj_free(recorder_instance, obj); - return Qtrue; -} - static void reset_profile(ddog_prof_Profile *profile, ddog_Timespec *start_time /* Can be null */) { ddog_prof_Profile_Result reset_result = ddog_prof_Profile_reset(profile, start_time); if (reset_result.tag == DDOG_PROF_PROFILE_RESULT_ERR) { diff --git a/ext/ddtrace_profiling_native_extension/stack_recorder.h b/ext/ddtrace_profiling_native_extension/stack_recorder.h index a68e2d51f0d..9190cc89a25 100644 --- a/ext/ddtrace_profiling_native_extension/stack_recorder.h +++ b/ext/ddtrace_profiling_native_extension/stack_recorder.h @@ -23,5 +23,4 @@ typedef struct sample_labels { void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, sample_values values, sample_labels labels); void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_CharSlice endpoint); void track_object(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight); -void record_obj_free(VALUE recorder_instance, VALUE freed_object); VALUE enforce_recorder_instance(VALUE object); diff --git a/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb b/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb index b9a9055398c..859cfdf87d7 100644 --- a/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +++ b/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb @@ -19,7 +19,6 @@ def initialize( thread_context_collector:, allocation_sample_every:, allocation_profiling_enabled:, - heap_profiling_enabled:, # **NOTE**: This should only be used for testing; disabling the dynamic sampling rate will increase the # profiler overhead! dynamic_sampling_rate_enabled: true, @@ -40,7 +39,6 @@ def initialize( dynamic_sampling_rate_enabled, allocation_sample_every, allocation_profiling_enabled, - heap_profiling_enabled, ) @worker_thread = nil @failure_exception = nil diff --git a/lib/datadog/profiling/component.rb b/lib/datadog/profiling/component.rb index f986e76a2e0..d8e8ac0ce19 100644 --- a/lib/datadog/profiling/component.rb +++ b/lib/datadog/profiling/component.rb @@ -53,7 +53,6 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) thread_context_collector: thread_context_collector, allocation_sample_every: allocation_sample_every, allocation_profiling_enabled: allocation_profiling_enabled, - heap_profiling_enabled: heap_profiling_enabled, ) internal_metadata = { diff --git a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb index 0f2c8c42cb9..8be19d345d8 100644 --- a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb +++ b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb @@ -12,7 +12,7 @@ let(:allocation_sample_every) { 50 } let(:allocation_profiling_enabled) { false } let(:heap_profiling_enabled) { false } - let(:recorder) { build_stack_recorder(heap_samples_enabled: true) } + let(:recorder) { build_stack_recorder(heap_samples_enabled: heap_profiling_enabled) } let(:no_signals_workaround_enabled) { false } let(:timeline_enabled) { false } let(:options) { {} } @@ -24,7 +24,6 @@ thread_context_collector: build_thread_context_collector(recorder), allocation_sample_every: allocation_sample_every, allocation_profiling_enabled: allocation_profiling_enabled, - heap_profiling_enabled: heap_profiling_enabled, **options ) end @@ -67,12 +66,8 @@ wait_until_running end - before do - @skip_cleanup = false - end - after do - cpu_and_wall_time_worker.stop unless @skip_cleanup + cpu_and_wall_time_worker.stop end it 'creates a new thread' do @@ -532,21 +527,12 @@ expect(relevant_sample.values[':heap-live-samples']).to eq test_num_allocated_object end - - context 'but allocation profiling is disabled' do - let(:allocation_profiling_enabled) { false } - it 'raises an ArgumentError' do - # We don't want the after hook to execute cpu_and_wall_time_worker.stop otherwise it'll reraise the error - @skip_cleanup = true - expect { cpu_and_wall_time_worker }.to raise_error(ArgumentError, /requires allocation profiling/) - end - end end context 'when heap profiling is disabled' do - let(:allocation_profiling_enabled) { false } + let(:heap_profiling_enabled) { false } - it 'does not record allocations' do + it 'does not record heap samples' do stub_const('CpuAndWallTimeWorkerSpec::TestStruct', Struct.new(:foo)) start @@ -555,7 +541,7 @@ cpu_and_wall_time_worker.stop - expect(samples_from_pprof(recorder.serialize!).map(&:values)).to all(include(:'heap-live-samples' => 0)) + expect(samples_from_pprof(recorder.serialize!).select { |s| s.values.key?(:'heap-live-samples') }).to be_empty end end @@ -903,10 +889,11 @@ def build_another_instance described_class.new( gc_profiling_enabled: gc_profiling_enabled, no_signals_workaround_enabled: no_signals_workaround_enabled, - thread_context_collector: build_thread_context_collector(build_stack_recorder), + thread_context_collector: build_thread_context_collector( + build_stack_recorder(heap_samples_enabled: heap_profiling_enabled) + ), allocation_sample_every: allocation_sample_every, allocation_profiling_enabled: allocation_profiling_enabled, - heap_profiling_enabled: heap_profiling_enabled ) end diff --git a/spec/datadog/profiling/component_spec.rb b/spec/datadog/profiling/component_spec.rb index b27910f6f05..9a4e8385617 100644 --- a/spec/datadog/profiling/component_spec.rb +++ b/spec/datadog/profiling/component_spec.rb @@ -80,7 +80,6 @@ thread_context_collector: instance_of(Datadog::Profiling::Collectors::ThreadContext), allocation_sample_every: kind_of(Integer), allocation_profiling_enabled: false, - heap_profiling_enabled: false, ) build_profiler_component @@ -129,10 +128,13 @@ context 'on Ruby 2.x' do let(:testing_version) { '2.3.0 ' } - it 'initializes a CpuAndWallTimeWorker with allocation_profiling set to true and warns' do + it 'initializes CpuAndWallTimeWorker and StackRecorder with allocation sampling support and warns' do expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with hash_including( allocation_profiling_enabled: true, ) + expect(Datadog::Profiling::StackRecorder).to receive(:new) + .with(hash_including(alloc_samples_enabled: true)) + .and_call_original expect(Datadog.logger).to receive(:warn).with(/experimental allocation profiling/) expect(Datadog.logger).to_not receive(:warn).with(/Ractor/) @@ -145,10 +147,13 @@ context "on a Ruby 3 version affected by https://bugs.ruby-lang.org/issues/19482 (#{broken_ruby})" do let(:testing_version) { broken_ruby } - it 'initializes a CpuAndWallTimeWorker with allocation_profiling forcibly set to false and warns' do + it 'initializes a CpuAndWallTimeWorker and StackRecorder with allocation sampling force-disabled and warns' do expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with hash_including( allocation_profiling_enabled: false, ) + expect(Datadog::Profiling::StackRecorder).to receive(:new) + .with(hash_including(alloc_samples_enabled: false)) + .and_call_original expect(Datadog.logger).to receive(:warn).with(/forcibly disabled/) expect(Datadog.logger).to_not receive(:warn).with(/Ractor/) @@ -163,10 +168,13 @@ context "on a Ruby 3 version affected by https://bugs.ruby-lang.org/issues/18464 (#{broken_ractors_ruby})" do let(:testing_version) { broken_ractors_ruby } - it 'initializes a CpuAndWallTimeWorker with allocation_profiling forcibly set to true and warns' do + it 'initializes CpuAndWallTimeWorker and StackRecorder with allocation sampling support and warns' do expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with hash_including( allocation_profiling_enabled: true, ) + expect(Datadog::Profiling::StackRecorder).to receive(:new) + .with(hash_including(alloc_samples_enabled: true)) + .and_call_original expect(Datadog.logger).to receive(:warn).with(/Ractors.+crashes/) expect(Datadog.logger).to receive(:warn).with(/experimental allocation profiling/) @@ -179,10 +187,13 @@ ['3.1.4', '3.2.3', '3.3.0'].each do |fixed_ruby| context "on a Ruby 3 version where https://bugs.ruby-lang.org/issues/18464 is fixed (#{fixed_ruby})" do let(:testing_version) { fixed_ruby } - it 'initializes a CpuAndWallTimeWorker with allocation_profiling forcibly set to true and warns' do + it 'initializes CpuAndWallTimeWorker and StackRecorder with allocation sampling support and warns' do expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with hash_including( allocation_profiling_enabled: true, ) + expect(Datadog::Profiling::StackRecorder).to receive(:new) + .with(hash_including(alloc_samples_enabled: true)) + .and_call_original expect(Datadog.logger).to receive(:warn).with(/Ractors.+stopping/) expect(Datadog.logger).to receive(:warn).with(/experimental allocation profiling/) @@ -198,10 +209,13 @@ settings.profiling.advanced.experimental_allocation_enabled = false end - it 'initializes a CpuAndWallTimeWorker collector with allocation_profiling set to false' do + it 'initializes CpuAndWallTimeWorker and StackRecorder without allocation sampling support' do expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with hash_including( allocation_profiling_enabled: false, ) + expect(Datadog::Profiling::StackRecorder).to receive(:new) + .with(hash_including(alloc_samples_enabled: false)) + .and_call_original build_profiler_component end @@ -220,7 +234,7 @@ settings.profiling.advanced.experimental_allocation_enabled = false end - it 'raises an ArgumentError during CpuAndWallTimeWorker initialization' do + it 'raises an ArgumentError during component initialization' do expect { build_profiler_component }.to raise_error(ArgumentError, /requires allocation profiling/) end end @@ -230,10 +244,10 @@ settings.profiling.advanced.experimental_allocation_enabled = true end - it 'initializes a CpuAndWallTimeWorker with heap_profiling set to true and warns' do - expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with hash_including( - allocation_profiling_enabled: true, - ) + it 'initializes StackRecorder with heap sampling support and warns' do + expect(Datadog::Profiling::StackRecorder).to receive(:new) + .with(hash_including(heap_samples_enabled: true)) + .and_call_original expect(Datadog.logger).to receive(:warn).with(/experimental allocation profiling/) expect(Datadog.logger).to receive(:warn).with(/experimental heap profiling/) @@ -248,10 +262,10 @@ settings.profiling.advanced.experimental_heap_enabled = false end - it 'initializes a CpuAndWallTimeWorker collector with allocation_profiling set to false' do - expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with hash_including( - heap_profiling_enabled: false, - ) + it 'initializes StackRecorder without heap sampling support' do + expect(Datadog::Profiling::StackRecorder).to receive(:new) + .with(hash_including(heap_samples_enabled: false)) + .and_call_original build_profiler_component end @@ -294,13 +308,6 @@ build_profiler_component end - it 'sets up the StackRecorder with alloc_samples_enabled: false' do - expect(Datadog::Profiling::StackRecorder) - .to receive(:new).with(hash_including(alloc_samples_enabled: false)).and_call_original - - build_profiler_component - end - context 'when on Linux' do before { stub_const('RUBY_PLATFORM', 'some-linux-based-platform') } diff --git a/spec/datadog/profiling/stack_recorder_spec.rb b/spec/datadog/profiling/stack_recorder_spec.rb index 28ac1c6bd27..60a17c4d8f9 100644 --- a/spec/datadog/profiling/stack_recorder_spec.rb +++ b/spec/datadog/profiling/stack_recorder_spec.rb @@ -343,24 +343,15 @@ def sample_types_from(decoded_profile) let(:labels) { { 'label_a' => 'value_a', 'label_b' => 'value_b', 'state' => 'unknown' }.to_a } let(:a_string) { 'a beautiful string' } - let(:another_string) { 'a fearsome string' } - let(:an_array) { (1..10).to_a } - let(:another_array) { (-10..-1).to_a } + let(:an_array) { [1..10] } let(:a_hash) { { 'a' => 1, 'b' => '2', 'c' => true } } - let(:another_hash) { { 'z' => -1, 'y' => '-2', 'x' => false } } - - let(:allocated_objects) do - [a_string, an_array, another_string, another_array, a_hash, another_hash] - end - - let(:freed_objects) do - ['this rando', another_string, 'that rando', 'another rando', another_array, another_hash] - end let(:samples) { samples_from_pprof(encoded_pprof) } before do - allocated_objects.each_with_index do |obj, i| + allocations = [a_string, an_array, 'a fearsome string', [-10..-1], a_hash, { 'z' => -1, 'y' => '-2', 'x' => false }] + @num_allocations = 0 + allocations.each_with_index do |obj, i| # Heap sampling currently requires this 2-step process to first pass data about the allocated object... described_class::Testing._native_track_object(stack_recorder, obj, sample_rate) # ...and then pass data about the allocation stacktrace (with 2 distinct stacktraces) @@ -371,11 +362,11 @@ def sample_types_from(decoded_profile) Datadog::Profiling::Collectors::Stack::Testing ._native_sample(Thread.current, stack_recorder, metric_values2, labels, numeric_labels, 400, false) end + @num_allocations += 1 end - freed_objects.each do |obj| - described_class::Testing._native_record_obj_free(stack_recorder, obj) - end + allocations.clear # The literals in the previous array are now dangling + GC.start # And this will clear them, leaving only the non-literals which are still pointed to by the lets end context 'when disabled' do @@ -414,7 +405,7 @@ def sample_types_from(decoded_profile) expect(non_heap_samples.size).to eq(2) expect(non_heap_samples.sum { |s| s.values[:'cpu-time'] }).to be > 0 - expect(non_heap_samples.sum { |s| s.values[:'alloc-samples'] }).to eq(allocated_objects.size * sample_rate) + expect(non_heap_samples.sum { |s| s.values[:'alloc-samples'] }).to eq(@num_allocations * sample_rate) end end end From 6893e74688d33b12ab1d1d6b165fa2b801ac9383 Mon Sep 17 00:00:00 2001 From: Alexandre Fonseca Date: Thu, 30 Nov 2023 15:39:42 +0000 Subject: [PATCH 11/17] [PROF-8667] Fix types --- sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs | 2 -- 1 file changed, 2 deletions(-) diff --git a/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs b/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs index 22d2041472d..3a2700cf1c7 100644 --- a/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs +++ b/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs @@ -15,7 +15,6 @@ module Datadog ?dynamic_sampling_rate_enabled: bool, allocation_sample_every: Integer, allocation_profiling_enabled: bool, - heap_profiling_enabled: bool, ) -> void def self._native_initialize: ( @@ -27,7 +26,6 @@ module Datadog bool dynamic_sampling_rate_enabled, ::Integer allocation_sample_every, bool allocation_profiling_enabled, - bool heap_profiling_enabled, ) -> true def start: (?on_failure_proc: ::Proc?) -> bool? From 21bee6aa470fadbf729c596bcff0c79a4ac008f4 Mon Sep 17 00:00:00 2001 From: Alexandre Fonseca Date: Thu, 30 Nov 2023 15:45:11 +0000 Subject: [PATCH 12/17] [PROF-8667] Do not use sum for max Ruby compat. --- spec/datadog/profiling/stack_recorder_spec.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/spec/datadog/profiling/stack_recorder_spec.rb b/spec/datadog/profiling/stack_recorder_spec.rb index 60a17c4d8f9..b54e6ca500e 100644 --- a/spec/datadog/profiling/stack_recorder_spec.rb +++ b/spec/datadog/profiling/stack_recorder_spec.rb @@ -396,9 +396,9 @@ def sample_types_from(decoded_profile) # We sample from 2 distinct locations expect(heap_samples.size).to eq(2) - expect(heap_samples.sum { |s| s.values[:'heap-live-samples'] || 0 }).to eq( - [a_string, an_array, a_hash].size * sample_rate - ) + sum_heap_samples = 0 + heap_samples.each { |s| sum_heap_samples += s.values[:'heap-live-samples'] } + expect(sum_heap_samples).to eq([a_string, an_array, a_hash].size * sample_rate) end it 'do not corrupt/overwrite non-heap-samples' do From 56745c9ffd8ade729e10247ee5efd83da49be8b1 Mon Sep 17 00:00:00 2001 From: Alexandre Fonseca Date: Thu, 30 Nov 2023 16:07:32 +0000 Subject: [PATCH 13/17] [PROF-8667] Remove one remaining usage of sum. --- spec/datadog/profiling/stack_recorder_spec.rb | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/spec/datadog/profiling/stack_recorder_spec.rb b/spec/datadog/profiling/stack_recorder_spec.rb index b54e6ca500e..5d73316ae61 100644 --- a/spec/datadog/profiling/stack_recorder_spec.rb +++ b/spec/datadog/profiling/stack_recorder_spec.rb @@ -404,8 +404,16 @@ def sample_types_from(decoded_profile) it 'do not corrupt/overwrite non-heap-samples' do expect(non_heap_samples.size).to eq(2) - expect(non_heap_samples.sum { |s| s.values[:'cpu-time'] }).to be > 0 - expect(non_heap_samples.sum { |s| s.values[:'alloc-samples'] }).to eq(@num_allocations * sample_rate) + sum_cpu_time = 0 + sum_alloc_samples = 0 + + non_heap_samples.each do |s| + sum_cpu_time += s.values[:'cpu-time'] + sum_alloc_samples += s.values[:'alloc-samples'] + end + + expect(sum_cpu_time).to be > 0 + expect(sum_alloc_samples).to eq(@num_allocations * sample_rate) end end end From 3ac74e09d432bba357a00e9e1e1c2eecc9326693 Mon Sep 17 00:00:00 2001 From: Alexandre Fonseca Date: Mon, 11 Dec 2023 12:51:09 +0000 Subject: [PATCH 14/17] [PROF-8667] Remove last traces of object_free_tracepoint. --- .../collectors_cpu_and_wall_time_worker.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c index 08c269baf24..2fe5e065bb0 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +++ b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c @@ -92,7 +92,6 @@ struct cpu_and_wall_time_worker_state { dynamic_sampling_rate_state dynamic_sampling_rate; VALUE gc_tracepoint; // Used to get gc start/finish information VALUE object_allocation_tracepoint; // Used to get allocation counts and allocation profiling - VALUE object_free_tracepoint; // Used to figure out live objects for heap profiling // These are mutable and used to signal things between the worker thread and other threads @@ -278,7 +277,6 @@ static VALUE _native_new(VALUE klass) { dynamic_sampling_rate_init(&state->dynamic_sampling_rate); state->gc_tracepoint = Qnil; state->object_allocation_tracepoint = Qnil; - state->object_free_tracepoint = Qnil; atomic_init(&state->should_run, false); state->failure_exception = Qnil; From fdca793767fea08578e56ae4568dd9be81d458e1 Mon Sep 17 00:00:00 2001 From: Alexandre Fonseca Date: Mon, 11 Dec 2023 14:31:56 +0000 Subject: [PATCH 15/17] [PROF-8667] Final cleanup and comment addressing --- .../collectors_cpu_and_wall_time_worker.c | 2 -- .../heap_recorder.c | 6 +++++- .../stack_recorder.c | 7 ++----- spec/datadog/profiling/stack_recorder_spec.rb | 11 +++++------ 4 files changed, 12 insertions(+), 14 deletions(-) diff --git a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c index 2fe5e065bb0..af9b0127e18 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +++ b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c @@ -84,7 +84,6 @@ struct cpu_and_wall_time_worker_state { bool dynamic_sampling_rate_enabled; int allocation_sample_every; bool allocation_profiling_enabled; - bool heap_profiling_enabled; VALUE self_instance; VALUE thread_context_collector_instance; VALUE idle_sampling_helper_instance; @@ -270,7 +269,6 @@ static VALUE _native_new(VALUE klass) { state->dynamic_sampling_rate_enabled = true; state->allocation_sample_every = 0; state->allocation_profiling_enabled = false; - state->heap_profiling_enabled = false; state->thread_context_collector_instance = Qnil; state->idle_sampling_helper_instance = Qnil; state->owner_thread = Qnil; diff --git a/ext/ddtrace_profiling_native_extension/heap_recorder.c b/ext/ddtrace_profiling_native_extension/heap_recorder.c index 7cfd5bc3ca5..627567c3880 100644 --- a/ext/ddtrace_profiling_native_extension/heap_recorder.c +++ b/ext/ddtrace_profiling_native_extension/heap_recorder.c @@ -52,6 +52,10 @@ void heap_recorder_free(struct heap_recorder* recorder) { #pragma GCC diagnostic ignored "-Wunused-parameter" void heap_recorder_after_fork(heap_recorder *heap_recorder) { + if (heap_recorder == NULL) { + return; + } + // TODO: Implement } @@ -96,7 +100,7 @@ void heap_recorder_flush(heap_recorder *heap_recorder) { // TODO: Implement } -// WARN: If with_gvl = True, NO HEAP ALLOCATIONS, EXCEPTIONS or RUBY CALLS ARE ALLOWED. +// WARN: If with_gvl = False, NO HEAP ALLOCATIONS, EXCEPTIONS or RUBY CALLS ARE ALLOWED. void heap_recorder_for_each_live_object( heap_recorder *heap_recorder, bool (*for_each_callback)(heap_recorder_iteration_data stack_data, void *extra_arg), diff --git a/ext/ddtrace_profiling_native_extension/stack_recorder.c b/ext/ddtrace_profiling_native_extension/stack_recorder.c index 30e1a4fb344..c420e88a27b 100644 --- a/ext/ddtrace_profiling_native_extension/stack_recorder.c +++ b/ext/ddtrace_profiling_native_extension/stack_recorder.c @@ -333,11 +333,8 @@ static void stack_recorder_typed_data_free(void *state_ptr) { pthread_mutex_destroy(&state->slot_two_mutex); ddog_prof_Profile_drop(&state->slot_two_profile); - if (state->heap_recorder != NULL) { - // Heap recorder will only be initialized if we're actually collecting heap samples. - // If it is initialized we need to free it... - heap_recorder_free(state->heap_recorder); - } + heap_recorder_free(state->heap_recorder); + ruby_xfree(state); } diff --git a/spec/datadog/profiling/stack_recorder_spec.rb b/spec/datadog/profiling/stack_recorder_spec.rb index 5d73316ae61..4b0aed87704 100644 --- a/spec/datadog/profiling/stack_recorder_spec.rb +++ b/spec/datadog/profiling/stack_recorder_spec.rb @@ -338,8 +338,7 @@ def sample_types_from(decoded_profile) describe 'heap samples' do let(:sample_rate) { 50 } - let(:metric_values1) { { 'cpu-time' => 101, 'cpu-samples' => 1, 'wall-time' => 789, 'alloc-samples' => sample_rate } } - let(:metric_values2) { { 'cpu-time' => 102, 'cpu-samples' => 2, 'wall-time' => 790, 'alloc-samples' => sample_rate } } + let(:metric_values) { { 'cpu-time' => 101, 'cpu-samples' => 1, 'wall-time' => 789, 'alloc-samples' => sample_rate } } let(:labels) { { 'label_a' => 'value_a', 'label_b' => 'value_b', 'state' => 'unknown' }.to_a } let(:a_string) { 'a beautiful string' } @@ -357,10 +356,10 @@ def sample_types_from(decoded_profile) # ...and then pass data about the allocation stacktrace (with 2 distinct stacktraces) if i.even? Datadog::Profiling::Collectors::Stack::Testing - ._native_sample(Thread.current, stack_recorder, metric_values1, labels, numeric_labels, 400, false) + ._native_sample(Thread.current, stack_recorder, metric_values, labels, numeric_labels, 400, false) else Datadog::Profiling::Collectors::Stack::Testing - ._native_sample(Thread.current, stack_recorder, metric_values2, labels, numeric_labels, 400, false) + ._native_sample(Thread.current, stack_recorder, metric_values, labels, numeric_labels, 400, false) end @num_allocations += 1 end @@ -375,7 +374,7 @@ def sample_types_from(decoded_profile) it 'are ommitted from the profile' do # We sample from 2 distinct locations expect(samples.size).to eq(2) - expect(samples.select { |s| s.values.key?('heap-live-samples') }).to eq([]) + expect(samples.select { |s| s.values.key?('heap-live-samples') }).to be_empty end end @@ -390,7 +389,7 @@ def sample_types_from(decoded_profile) samples.select { |s| s.values[:'heap-live-samples'] == 0 } end - it 'are included in the profile' do + it 'include the stack and sample counts for the objects still left alive' do pending 'heap_recorder implementation is currently missing' # We sample from 2 distinct locations From fad58f4a37b5af02305682eafd0f559d64f13f86 Mon Sep 17 00:00:00 2001 From: Alexandre Fonseca Date: Mon, 11 Dec 2023 15:04:09 +0000 Subject: [PATCH 16/17] [PROF-8667] Fix lint error. --- spec/datadog/profiling/stack_recorder_spec.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spec/datadog/profiling/stack_recorder_spec.rb b/spec/datadog/profiling/stack_recorder_spec.rb index 4b0aed87704..d6dca67d762 100644 --- a/spec/datadog/profiling/stack_recorder_spec.rb +++ b/spec/datadog/profiling/stack_recorder_spec.rb @@ -358,8 +358,9 @@ def sample_types_from(decoded_profile) Datadog::Profiling::Collectors::Stack::Testing ._native_sample(Thread.current, stack_recorder, metric_values, labels, numeric_labels, 400, false) else + # 401 used instead of 400 here just to make the branches different and appease Rubocop Datadog::Profiling::Collectors::Stack::Testing - ._native_sample(Thread.current, stack_recorder, metric_values, labels, numeric_labels, 400, false) + ._native_sample(Thread.current, stack_recorder, metric_values, labels, numeric_labels, 401, false) end @num_allocations += 1 end From 3ec2698576911f0a9f6386c7243455eb4e7328b0 Mon Sep 17 00:00:00 2001 From: Alexandre Fonseca Date: Tue, 12 Dec 2023 11:38:56 +0000 Subject: [PATCH 17/17] [PROF-8667] Last comment and more thorough checking in a test case --- .../stack_recorder.c | 8 +++---- spec/datadog/profiling/stack_recorder_spec.rb | 22 ++++++++++++------- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/ext/ddtrace_profiling_native_extension/stack_recorder.c b/ext/ddtrace_profiling_native_extension/stack_recorder.c index c420e88a27b..4918f8caea8 100644 --- a/ext/ddtrace_profiling_native_extension/stack_recorder.c +++ b/ext/ddtrace_profiling_native_extension/stack_recorder.c @@ -534,7 +534,7 @@ typedef struct heap_recorder_iteration_context { struct stack_recorder_state *state; ddog_prof_Profile *profile; bool error; - char errorMsg[MAX_LEN_HEAP_ITERATION_ERROR_MSG]; + char error_msg[MAX_LEN_HEAP_ITERATION_ERROR_MSG]; } heap_recorder_iteration_context; static bool add_heap_sample_to_active_profile_without_gvl(heap_recorder_iteration_data iteration_data, void *extra_arg) { @@ -556,7 +556,7 @@ static bool add_heap_sample_to_active_profile_without_gvl(heap_recorder_iteratio ); if (result.tag == DDOG_PROF_PROFILE_RESULT_ERR) { - read_ddogerr_string_and_drop(&result.err, context->errorMsg, MAX_LEN_HEAP_ITERATION_ERROR_MSG); + read_ddogerr_string_and_drop(&result.err, context->error_msg, MAX_LEN_HEAP_ITERATION_ERROR_MSG); context->error = true; // By returning false we cancel the iteration return false; @@ -571,7 +571,7 @@ static void build_heap_profile_without_gvl(struct stack_recorder_state *state, d .state = state, .profile = profile, .error = false, - .errorMsg = {0}, + .error_msg = {0}, }; heap_recorder_for_each_live_object(state->heap_recorder, add_heap_sample_to_active_profile_without_gvl, (void*) &iteration_context, false); if (iteration_context.error) { @@ -580,7 +580,7 @@ static void build_heap_profile_without_gvl(struct stack_recorder_state *state, d // in the opposite situation: we have the GVL and may need to grab the records_mutex for mutation. This // different ordering can lead to deadlocks. By delaying the raise here until after we no longer hold // records_mutex, we prevent this different-lock-acquisition-order situation. - grab_gvl_and_raise(rb_eRuntimeError, "Failure during heap profile building: %s", iteration_context.errorMsg); + grab_gvl_and_raise(rb_eRuntimeError, "Failure during heap profile building: %s", iteration_context.error_msg); } } diff --git a/spec/datadog/profiling/stack_recorder_spec.rb b/spec/datadog/profiling/stack_recorder_spec.rb index d6dca67d762..8652ceb5e05 100644 --- a/spec/datadog/profiling/stack_recorder_spec.rb +++ b/spec/datadog/profiling/stack_recorder_spec.rb @@ -401,19 +401,25 @@ def sample_types_from(decoded_profile) expect(sum_heap_samples).to eq([a_string, an_array, a_hash].size * sample_rate) end - it 'do not corrupt/overwrite non-heap-samples' do + it 'keeps on reporting accurate samples for other profile types' do expect(non_heap_samples.size).to eq(2) - sum_cpu_time = 0 - sum_alloc_samples = 0 - + summed_values = {} non_heap_samples.each do |s| - sum_cpu_time += s.values[:'cpu-time'] - sum_alloc_samples += s.values[:'alloc-samples'] + s.values.each_pair do |k, v| + summed_values[k] = (summed_values[k] || 0) + v + end + end + + # We use the same metric_values in all sample calls in before. So we'd expect + # the summed values to match `@num_allocations * metric_values[profile-type]` + # for each profile-type there in. + expected_summed_values = { :'heap-live-samples' => 0 } + metric_values.each_pair do |k, v| + expected_summed_values[k.to_sym] = v * @num_allocations end - expect(sum_cpu_time).to be > 0 - expect(sum_alloc_samples).to eq(@num_allocations * sample_rate) + expect(summed_values).to eq(expected_summed_values) end end end