Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(dynamic-sampling): zero-division error in transaction rebalance #83155

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,10 @@ def _run(self, model_input: TransactionsRebalancingInput) -> tuple[list[Rebalanc
if total is None:
total = total_explicit

if total_num_classes is None:
# invariant violation: total number of classes should be at least the number of specified classes
# sometimes (maybe due to running the queries at slightly different times), the totals number might be less.
# in this case we should use the number of specified classes as the total number of classes
if total_num_classes is None or total_num_classes < len(classes):
total_num_classes = len(classes)

# total count for the unspecified classes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,10 @@ def boost_low_volume_transactions_of_project(project_transactions: ProjectTransa
if sample_rate == 1.0:
return

# the model fails when we are not having any transactions, thus we can simply return here
if len(transactions) == 0:
return

intensity = options.get("dynamic-sampling.prioritise_transactions.rebalance_intensity", 1.0)

model = model_factory(ModelType.TRANSACTIONS_REBALANCING)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,3 +128,41 @@ def test_explicit_elements_ideal_rate(
actual_rate * count == pytest.approx(ideal_number_of_elements_per_class)
or actual_rate * count >= ideal_number_of_elements_per_class
)


def test_total_num_classes_mismatch(transactions_rebalancing_model):
"""
Simple test case that checks that the model is resilient to cases where the
reported total number of classes is less than the number of passed classes
"""
sample_rate = 0.9
transactions = create_transaction_counts(big=3, med=4, small=2)
explict_transactions = transactions[0:None]
total = sum_classes_counts(transactions)
total_classes = len(transactions)

trans, global_rate = transactions_rebalancing_model.run(
TransactionsRebalancingInput(
classes=explict_transactions,
sample_rate=sample_rate,
total_num_classes=total_classes - 1,
total=total,
intensity=1,
),
)

ideal_number_of_elements_per_class = total * sample_rate / total_classes

trans_dict = {t.id: t.new_sample_rate for t in trans}

for transaction in explict_transactions:
count = transaction.count
actual_rate = trans_dict[transaction.id]

if ideal_number_of_elements_per_class > count:
assert actual_rate == 1.0 # tiny transactions not sampled
else:
assert (
actual_rate * count == pytest.approx(ideal_number_of_elements_per_class)
or actual_rate * count >= ideal_number_of_elements_per_class
)
Loading