Skip to content

Commit

Permalink
Switch win probability to calculate against control
Browse files Browse the repository at this point in the history
  • Loading branch information
danielbachhuber committed Jan 22, 2025
1 parent ac86bab commit 25fe35e
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 6 deletions.
19 changes: 13 additions & 6 deletions posthog/hogql_queries/experiments/funnels_statistics_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ def calculate_probabilities_v2(
Returns:
--------
list[float]
A list of probabilities that sum to 1, where:
- The first element is the probability that the control variant is the best
- Subsequent elements are the probabilities that each test variant is the best
A list of probabilities where each element represents:
- index 0: probability control variant beats all test variants
- index i>0: probability test variant i-1 beats control
Notes:
------
Expand Down Expand Up @@ -70,10 +70,17 @@ def calculate_probabilities_v2(
samples.append(variant_samples)

samples_array = np.array(samples)
# Calculate probability of each variant being the best
# Calculate probability of each variant beating the control
probabilities = []
for i in range(len(all_variants)):
probability = (samples_array[i] == np.max(samples_array, axis=0)).mean()
control_samples = samples_array[0] # Control is always first variant

# Control's probability is of being better than all variants
control_prob = np.mean(np.all(control_samples >= samples_array[1:], axis=0))
probabilities.append(float(control_prob))

# For each test variant, calculate probability of beating control
for i in range(1, len(all_variants)):
probability = np.mean(samples_array[i] > control_samples)
probabilities.append(float(probability))

return probabilities
Expand Down
29 changes: 29 additions & 0 deletions posthog/hogql_queries/experiments/test/test_funnels_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,35 @@ def run_test(stats_version, calculate_probabilities, are_results_significant, ca

self.run_test_for_both_implementations(run_test)

@flaky(max_runs=5, min_passes=1)
def test_many_variants_win_probabilty_compared_to_control(self):
"""Test with multiple variants, win probability compared to control"""

def run_test(stats_version, calculate_probabilities, are_results_significant, calculate_credible_intervals):
# test_a is worse than control
# test_b is best overall
# test_c is slightly better than control
control = create_variant("control", success_count=100, failure_count=900) # 10% conversion
test_a = create_variant("test_a", success_count=80, failure_count=920) # 8% conversion
test_b = create_variant("test_b", success_count=150, failure_count=850) # 15% conversion
test_c = create_variant("test_c", success_count=110, failure_count=890) # 11% conversion

probabilities = calculate_probabilities(control, [test_a, test_b, test_c])

self.assertEqual(len(probabilities), 4)
if stats_version == 2:
self.assertAlmostEqual(probabilities[0], 0, delta=0.05)
self.assertAlmostEqual(probabilities[1], 0.05, delta=0.05)
self.assertAlmostEqual(probabilities[2], 0.99, delta=0.05)
self.assertAlmostEqual(probabilities[3], 0.76, delta=0.05)
else:
self.assertAlmostEqual(probabilities[0], 0, delta=0.05)
self.assertAlmostEqual(probabilities[1], 0, delta=0.05)
self.assertAlmostEqual(probabilities[2], 0.99, delta=0.05)
self.assertAlmostEqual(probabilities[3], 0.0, delta=0.05)

self.run_test_for_both_implementations(run_test)

@flaky(max_runs=5, min_passes=1)
def test_insufficient_sample_size(self):
"""Test with sample size below threshold"""
Expand Down

0 comments on commit 25fe35e

Please sign in to comment.