From f73518c2ffffbefffd2652ea62d1feeb9efa5fea Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Fri, 10 Jan 2025 12:21:08 +0100 Subject: [PATCH] fix(dynamic-sampling): zero-division error in transaction rebalance (#83155) Closes https://github.com/getsentry/sentry/issues/82082 --- .../models/transactions_rebalancing.py | 5 ++- .../tasks/boost_low_volume_transactions.py | 4 ++ .../models/test_transactions_rebalancing.py | 38 +++++++++++++++++++ 3 files changed, 46 insertions(+), 1 deletion(-) diff --git a/src/sentry/dynamic_sampling/models/transactions_rebalancing.py b/src/sentry/dynamic_sampling/models/transactions_rebalancing.py index ef695164907a18..58a1f98f16486b 100644 --- a/src/sentry/dynamic_sampling/models/transactions_rebalancing.py +++ b/src/sentry/dynamic_sampling/models/transactions_rebalancing.py @@ -54,7 +54,10 @@ def _run(self, model_input: TransactionsRebalancingInput) -> tuple[list[Rebalanc if total is None: total = total_explicit - if total_num_classes is None: + # invariant violation: total number of classes should be at least the number of specified classes + # sometimes (maybe due to running the queries at slightly different times), the totals number might be less. + # in this case we should use the number of specified classes as the total number of classes + if total_num_classes is None or total_num_classes < len(classes): total_num_classes = len(classes) # total count for the unspecified classes diff --git a/src/sentry/dynamic_sampling/tasks/boost_low_volume_transactions.py b/src/sentry/dynamic_sampling/tasks/boost_low_volume_transactions.py index dbf6e8229bcc01..cda389716dad64 100644 --- a/src/sentry/dynamic_sampling/tasks/boost_low_volume_transactions.py +++ b/src/sentry/dynamic_sampling/tasks/boost_low_volume_transactions.py @@ -209,6 +209,10 @@ def boost_low_volume_transactions_of_project(project_transactions: ProjectTransa if sample_rate == 1.0: return + # the model fails when we are not having any transactions, thus we can simply return here + if len(transactions) == 0: + return + intensity = options.get("dynamic-sampling.prioritise_transactions.rebalance_intensity", 1.0) model = model_factory(ModelType.TRANSACTIONS_REBALANCING) diff --git a/tests/sentry/dynamic_sampling/models/test_transactions_rebalancing.py b/tests/sentry/dynamic_sampling/models/test_transactions_rebalancing.py index 49b249e4ed7e22..597ac976342849 100644 --- a/tests/sentry/dynamic_sampling/models/test_transactions_rebalancing.py +++ b/tests/sentry/dynamic_sampling/models/test_transactions_rebalancing.py @@ -128,3 +128,41 @@ def test_explicit_elements_ideal_rate( actual_rate * count == pytest.approx(ideal_number_of_elements_per_class) or actual_rate * count >= ideal_number_of_elements_per_class ) + + +def test_total_num_classes_mismatch(transactions_rebalancing_model): + """ + Simple test case that checks that the model is resilient to cases where the + reported total number of classes is less than the number of passed classes + """ + sample_rate = 0.9 + transactions = create_transaction_counts(big=3, med=4, small=2) + explict_transactions = transactions[0:None] + total = sum_classes_counts(transactions) + total_classes = len(transactions) + + trans, global_rate = transactions_rebalancing_model.run( + TransactionsRebalancingInput( + classes=explict_transactions, + sample_rate=sample_rate, + total_num_classes=total_classes - 1, + total=total, + intensity=1, + ), + ) + + ideal_number_of_elements_per_class = total * sample_rate / total_classes + + trans_dict = {t.id: t.new_sample_rate for t in trans} + + for transaction in explict_transactions: + count = transaction.count + actual_rate = trans_dict[transaction.id] + + if ideal_number_of_elements_per_class > count: + assert actual_rate == 1.0 # tiny transactions not sampled + else: + assert ( + actual_rate * count == pytest.approx(ideal_number_of_elements_per_class) + or actual_rate * count >= ideal_number_of_elements_per_class + )