Skip to content

Commit

Permalink
Extract crashtracking component into core
Browse files Browse the repository at this point in the history
  • Loading branch information
TonyCTHsu committed Aug 12, 2024
1 parent 46c8c8b commit c7defa4
Show file tree
Hide file tree
Showing 28 changed files with 823 additions and 579 deletions.
11 changes: 10 additions & 1 deletion Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ namespace :spec do
task all: [:main, :benchmark,
:rails, :railsredis, :railsredis_activesupport, :railsactivejob,
:elasticsearch, :http, :redis, :sidekiq, :sinatra, :hanami, :hanami_autoinstrument,
:profiling]
:profiling, :crashtracking]

desc '' # "Explicitly hiding from `rake -T`"
RSpec::Core::RakeTask.new(:main) do |t, args|
Expand Down Expand Up @@ -169,6 +169,15 @@ namespace :spec do
t.rspec_opts = args.to_a.join(' ')
end

# rubocop:disable Style/MultilineBlockChain
RSpec::Core::RakeTask.new(:crashtracking) do |t, args|
t.pattern = 'spec/datadog/core/crashtracking/**/*_spec.rb'
t.rspec_opts = args.to_a.join(' ')
end.tap do |t|
Rake::Task[t.name].enhance(["compile:libdatadog_api.#{RUBY_VERSION[/\d+.\d+/]}_#{RUBY_PLATFORM}"])
end
# rubocop:enable Style/MultilineBlockChain

desc '' # "Explicitly hiding from `rake -T`"
RSpec::Core::RakeTask.new(:contrib) do |t, args|
contrib_paths = [
Expand Down
11 changes: 6 additions & 5 deletions ext/libdatadog_api/crashtracker.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,21 @@

static VALUE _native_start_or_update_on_fork(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _self);
static VALUE _native_stop(DDTRACE_UNUSED VALUE _self);
static void crashtracker_init(VALUE profiling_module);
static void crashtracker_init(VALUE crashtracking_module);

// Used to report Ruby VM crashes.
// Once initialized, segfaults will be reported automatically using libdatadog.

void DDTRACE_EXPORT Init_libdatadog_api(void) {
VALUE datadog_module = rb_define_module("Datadog");
VALUE profiling_module = rb_define_module_under(datadog_module, "Profiling");
VALUE core_module = rb_define_module_under(datadog_module, "Core");
VALUE crashtracking_module = rb_define_module_under(core_module, "Crashtracking");

crashtracker_init(profiling_module);
crashtracker_init(crashtracking_module);
}

void crashtracker_init(VALUE profiling_module) {
VALUE crashtracker_class = rb_define_class_under(profiling_module, "Crashtracker", rb_cObject);
void crashtracker_init(VALUE crashtracking_module) {
VALUE crashtracker_class = rb_define_class_under(crashtracking_module, "Component", rb_cObject);

rb_define_singleton_method(crashtracker_class, "_native_start_or_update_on_fork", _native_start_or_update_on_fork, -1);
rb_define_singleton_method(crashtracker_class, "_native_stop", _native_stop, 0);
Expand Down
15 changes: 14 additions & 1 deletion lib/datadog/core/configuration/components.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
require_relative '../../tracing/component'
require_relative '../../profiling/component'
require_relative '../../appsec/component'
require_relative '../crashtracking/component'

module Datadog
module Core
Expand Down Expand Up @@ -58,6 +59,17 @@ def build_runtime_metrics_worker(settings)
def build_telemetry(settings, agent_settings, logger)
Telemetry::Component.build(settings, agent_settings, logger)
end

def build_crashtracker(settings, agent_settings, logger:)
return unless settings.crashtracking.enabled

if (libdatadog_api_failure = Datadog::Core::Crashtracking::Component::LIBDATADOG_API_FAILURE)
logger.debug("Cannot enable crashtracking: #{libdatadog_api_failure}")
return
end

Datadog::Core::Crashtracking::Component.build(settings, agent_settings, logger: logger)
end
end

include Datadog::Tracing::Component::InstanceMethods
Expand All @@ -83,11 +95,12 @@ def initialize(settings)

@remote = Remote::Component.build(settings, agent_settings)
@tracer = self.class.build_tracer(settings, agent_settings, logger: @logger)
self.class.build_crashtracker(settings, agent_settings, logger: @logger)

@profiler, profiler_logger_extra = Datadog::Profiling::Component.build_profiler_component(
settings: settings,
agent_settings: agent_settings,
optional_tracer: @tracer,
optional_tracer: @tracer
)
@environment_logger_extra.merge!(profiler_logger_extra) if profiler_logger_extra

Expand Down
28 changes: 18 additions & 10 deletions lib/datadog/core/configuration/settings.rb
Original file line number Diff line number Diff line change
Expand Up @@ -451,17 +451,16 @@ def initialize(*_)
o.default 60
end

# Enables reporting of information when the Ruby VM crashes.
#
# This feature is no longer experimental, and we plan to deprecate this setting and replace it with a
# properly-named one soon.
#
# @default `DD_PROFILING_EXPERIMENTAL_CRASH_TRACKING_ENABLED` environment variable as a boolean,
# otherwise `true`
# DEV-3.0: Remove `experimental_crash_tracking_enabled` option
option :experimental_crash_tracking_enabled do |o|
o.type :bool
o.env 'DD_PROFILING_EXPERIMENTAL_CRASH_TRACKING_ENABLED'
o.default true
o.after_set do |_, _, precedence|
unless precedence == Datadog::Core::Configuration::Option::Precedence::DEFAULT
Core.log_deprecation(key: :experimental_crash_tracking_enabled) do
'The profiling.advanced.experimental_crash_tracking_enabled setting has been deprecated for removal '\
'and no longer does anything. Please remove it from your Datadog.configure block.'
end
end
end
end
end

Expand Down Expand Up @@ -833,6 +832,15 @@ def initialize(*_)
option :service
end

settings :crashtracking do
# Enables reporting of information when Ruby VM crashes.
option :enabled do |o|
o.type :bool
o.default true
o.env 'DD_CRASHTRACKING_ENABLED'
end
end

# TODO: Tracing should manage its own settings.
# Keep this extension here for now to keep things working.
extend Datadog::Tracing::Configuration::Settings
Expand Down
21 changes: 21 additions & 0 deletions lib/datadog/core/crashtracking/agent_base_url.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# frozen_string_literal: true

require_relative '../configuration/ext'

module Datadog
module Core
module Crashtracking
# This module provides a method to resolve the base URL of the agent
module AgentBaseUrl
def self.resolve(agent_settings)
case agent_settings.adapter
when Datadog::Core::Configuration::Ext::Agent::HTTP::ADAPTER
"#{agent_settings.ssl ? 'https' : 'http'}://#{agent_settings.hostname}:#{agent_settings.port}/"
when Datadog::Core::Configuration::Ext::Agent::UnixSocket::ADAPTER
"unix://#{agent_settings.uds_path}"
end
end
end
end
end
end
112 changes: 112 additions & 0 deletions lib/datadog/core/crashtracking/component.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
# frozen_string_literal: true

require 'libdatadog'

require_relative 'tag_builder'
require_relative 'agent_base_url'
require_relative '../utils/only_once'
require_relative '../utils/at_fork_monkey_patch'

module Datadog
module Core
module Crashtracking
# Used to report Ruby VM crashes.
#
# NOTE: The crashtracker native state is a singleton; so even if you create multiple instances of `Crashtracker`
# and start them, it only works as "last writer wins". Same for stop -- there's only one state, so calling stop
# on it will stop the crash tracker, regardless of which instance started it.
#
# Methods prefixed with _native_ are implemented in `crashtracker.c`
class Component
LIBDATADOG_API_FAILURE =
begin
require "libdatadog_api.#{RUBY_VERSION[/\d+.\d+/]}_#{RUBY_PLATFORM}"
nil
rescue LoadError => e
e.message
end

ONLY_ONCE = Core::Utils::OnlyOnce.new

def self.build(settings, agent_settings, logger:)
tags = TagBuilder.call(settings)
agent_base_url = AgentBaseUrl.resolve(agent_settings)
logger.warn('Missing agent base URL; cannot enable crash tracking') unless agent_base_url

ld_library_path = ::Libdatadog.ld_library_path
logger.warn('Missing ld_library_path; cannot enable crash tracking') unless ld_library_path

path_to_crashtracking_receiver_binary = ::Libdatadog.path_to_crashtracking_receiver_binary
unless path_to_crashtracking_receiver_binary
logger.warn('Missing path_to_crashtracking_receiver_binary; cannot enable crash tracking')
end

return unless agent_base_url
return unless ld_library_path
return unless path_to_crashtracking_receiver_binary

new(
tags: tags,
agent_base_url: agent_base_url,
ld_library_path: ld_library_path,
path_to_crashtracking_receiver_binary: path_to_crashtracking_receiver_binary,
logger: logger
).tap(&:start)
end

def initialize(tags:, agent_base_url:, ld_library_path:, path_to_crashtracking_receiver_binary:, logger:)
@tags = tags
@agent_base_url = agent_base_url
@ld_library_path = ld_library_path
@path_to_crashtracking_receiver_binary = path_to_crashtracking_receiver_binary
@logger = logger
end

def start
Utils::AtForkMonkeyPatch.apply!

start_or_update_on_fork(action: :start)
reset_after_fork
end

def reset_after_fork
ONLY_ONCE.run do
Utils::AtForkMonkeyPatch.at_fork(:child) do
start_or_update_on_fork(action: :update_on_fork)
end
end
end

def stop
begin
self.class._native_stop
logger.debug('Crash tracking stopped successfully')
rescue => e
logger.error("Failed to stop crash tracking: #{e.message}")
end
end

private

attr_reader :tags, :agent_base_url, :ld_library_path, :path_to_crashtracking_receiver_binary, :logger

def start_or_update_on_fork(action:)
logger.debug("Crash tracking #{action}...")
begin
self.class._native_start_or_update_on_fork(
action: action,
exporter_configuration: [:agent, agent_base_url],
path_to_crashtracking_receiver_binary: path_to_crashtracking_receiver_binary,
ld_library_path: ld_library_path,
tags_as_array: tags.to_a,
upload_timeout_seconds: 1
)
logger.debug("Crash tracking #{action} successful")
rescue => e
logger.error("Failed to #{action} crash tracking: #{e.message}")
end
end
end
end
end
end
39 changes: 39 additions & 0 deletions lib/datadog/core/crashtracking/tag_builder.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# frozen_string_literal: true

require_relative '../utils'
require_relative '../environment/socket'
require_relative '../environment/identity'
require_relative '../environment/git'

module Datadog
module Core
module Crashtracking
# This module builds a hash of tags
module TagBuilder
def self.call(settings)
hash = {
'host' => Environment::Socket.hostname,
'language' => Environment::Identity.lang,
'process_id' => Process.pid.to_s,
'profiler_version' => Environment::Identity.gem_datadog_version,
'runtime' => Environment::Identity.lang, # This is known to be repeated from language, above
'runtime_engine' => Environment::Identity.lang_engine,
'runtime-id' => Environment::Identity.id,
'runtime_platform' => Environment::Identity.lang_platform,
'runtime_version' => Environment::Identity.lang_version,
'env' => settings.env,
'service' => settings.service,
'version' => settings.version,
'git.repository_url' => Environment::Git.git_repository_url,
'git.commit.sha' => Environment::Git.git_commit_sha,
}.compact

# Make sure everything is an utf-8 string, to avoid encoding issues in downstream
settings.tags.merge(hash).each_with_object({}) do |(key, value), h|
h[Utils.utf8_encode(key)] = Utils.utf8_encode(value)
end
end
end
end
end
end
1 change: 0 additions & 1 deletion lib/datadog/profiling.rb
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,6 @@ def self.allocation_count # rubocop:disable Lint/NestedMethodDefinition (On purp
require_relative 'profiling/collectors/idle_sampling_helper'
require_relative 'profiling/collectors/stack'
require_relative 'profiling/collectors/thread_context'
require_relative 'profiling/crashtracker'
require_relative 'profiling/stack_recorder'
require_relative 'profiling/exporter'
require_relative 'profiling/flush'
Expand Down
35 changes: 4 additions & 31 deletions lib/datadog/profiling/component.rb
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,10 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
exporter = build_profiler_exporter(settings, recorder, worker, internal_metadata: internal_metadata)
transport = build_profiler_transport(settings, agent_settings)
scheduler = Profiling::Scheduler.new(exporter: exporter, transport: transport, interval: upload_period_seconds)
crashtracker = build_crashtracker(settings, transport)
profiler = Profiling::Profiler.new(worker: worker, scheduler: scheduler, optional_crashtracker: crashtracker)
profiler = Profiling::Profiler.new(
worker: worker,
scheduler: scheduler
)

if dir_interruption_workaround_enabled?(settings, no_signals_workaround_enabled)
Datadog::Profiling::Ext::DirMonkeyPatches.apply!
Expand Down Expand Up @@ -117,35 +119,6 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
)
end

private_class_method def self.build_crashtracker(settings, transport)
return unless settings.profiling.advanced.experimental_crash_tracking_enabled

# By default, the transport is an instance of HttpTransport, which validates the configuration and makes
# it available for us to use here.
# But we support overriding the transport with a user-specific one, which may e.g. write stuff to a file,
# and thus can't really provide a valid configuration to talk to a Datadog agent. Thus, in this situation,
# we can't use the crashtracker, even if enabled.
unless transport.respond_to?(:exporter_configuration)
Datadog.logger.debug(
'Cannot enable profiling crash tracking as a custom settings.profiling.exporter.transport is configured'
)
return
end

if Datadog::Profiling::Crashtracker::LIBDATADOG_API_FAILURE
Datadog.logger.debug(
"Cannot enable crashtracking: #{Datadog::Profiling::Crashtracker::LIBDATADOG_API_FAILURE}"
)
return
end

Datadog::Profiling::Crashtracker.new(
exporter_configuration: transport.exporter_configuration,
tags: Datadog::Profiling::TagBuilder.call(settings: settings),
upload_timeout_seconds: settings.profiling.upload.timeout_seconds,
)
end

private_class_method def self.enable_gc_profiling?(settings)
return false unless settings.profiling.advanced.gc_enabled

Expand Down
Loading

0 comments on commit c7defa4

Please sign in to comment.