Skip to content

Commit

Permalink
fix(dynamic-sampling): zero-division error in transaction rebalance (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
constantinius authored and andrewshie-sentry committed Jan 22, 2025
1 parent 83b012d commit f73518c
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,10 @@ def _run(self, model_input: TransactionsRebalancingInput) -> tuple[list[Rebalanc
if total is None:
total = total_explicit

if total_num_classes is None:
# invariant violation: total number of classes should be at least the number of specified classes
# sometimes (maybe due to running the queries at slightly different times), the totals number might be less.
# in this case we should use the number of specified classes as the total number of classes
if total_num_classes is None or total_num_classes < len(classes):
total_num_classes = len(classes)

# total count for the unspecified classes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,10 @@ def boost_low_volume_transactions_of_project(project_transactions: ProjectTransa
if sample_rate == 1.0:
return

# the model fails when we are not having any transactions, thus we can simply return here
if len(transactions) == 0:
return

intensity = options.get("dynamic-sampling.prioritise_transactions.rebalance_intensity", 1.0)

model = model_factory(ModelType.TRANSACTIONS_REBALANCING)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,3 +128,41 @@ def test_explicit_elements_ideal_rate(
actual_rate * count == pytest.approx(ideal_number_of_elements_per_class)
or actual_rate * count >= ideal_number_of_elements_per_class
)


def test_total_num_classes_mismatch(transactions_rebalancing_model):
"""
Simple test case that checks that the model is resilient to cases where the
reported total number of classes is less than the number of passed classes
"""
sample_rate = 0.9
transactions = create_transaction_counts(big=3, med=4, small=2)
explict_transactions = transactions[0:None]
total = sum_classes_counts(transactions)
total_classes = len(transactions)

trans, global_rate = transactions_rebalancing_model.run(
TransactionsRebalancingInput(
classes=explict_transactions,
sample_rate=sample_rate,
total_num_classes=total_classes - 1,
total=total,
intensity=1,
),
)

ideal_number_of_elements_per_class = total * sample_rate / total_classes

trans_dict = {t.id: t.new_sample_rate for t in trans}

for transaction in explict_transactions:
count = transaction.count
actual_rate = trans_dict[transaction.id]

if ideal_number_of_elements_per_class > count:
assert actual_rate == 1.0 # tiny transactions not sampled
else:
assert (
actual_rate * count == pytest.approx(ideal_number_of_elements_per_class)
or actual_rate * count >= ideal_number_of_elements_per_class
)

0 comments on commit f73518c

Please sign in to comment.