feat: disparate impact (#62)

* feat: implemented disparate impact * feat: tested disparate impact on fair dataset * feat: handling of division by zero * feat: tested disparate impact on unfair dataset * feat: added docstring --------- Co-authored-by: Liam James
aequitas-aod · Dec 14, 2023 · 926ebba · 926ebba
1 parent 58d9dbc
commit 926ebba
Show file tree

Hide file tree

Showing 2 changed files with 60 additions and 0 deletions.
diff --git a/aequitas/core/metrics.py b/aequitas/core/metrics.py
@@ -121,5 +121,45 @@ def discrete_equalised_odds(x: np.array, y: np.array, y_pred: np.array) -> np.ar
     differences = np.array(differences)
     return differences
 
+def discrete_disparate_impact(x: np.array,
+                              y: np.array,
+                              x_cond: ConditionOrScalar,
+                              y_cond: ConditionOrScalar) -> float:
+    """
+    Computes the disparate impact for a given classifier h (represented by its predictions h(X)).
+    A classifier suffers from disparate impact if its predictions disproportionately hurt people
+    with certain sensitive attributes. It is defined as the minimum between two fractions. 
+
+    One fraction is:
+
+    :math:`P(h(X) = 1 | X = 1) / P(h(X) = 1 | X = 0)`
+
+    while the other is its reciprocal. If the minimum between the two is exactly 1 then the classifier
+    doesn't suffer from disparate impact.
+
+    Also see:
+        * https://www.ijcai.org/proceedings/2020/0315.pdf, sec. 3, definition 3
+
+    :param x: (formally :math:`X`) vector of protected attribute (where each component gets values from a **discrete
+        distribution**, whose admissible values are :math:`{0, 1}`
+
+    :param y: (formally :math:`Y`) vector of values predicted by the binary classifier
+    
+    :param x_cond: current value assigned to :math:`X`
+
+    :param y_cond: current value assigned to :math:`Y`
+
+    :return: it returns the minimum between the two previously described fractions
+    """    
+
+    prob1 = conditional_probability(y, y_cond, x, x_cond)
+    prob2 = conditional_probability(y, y_cond, x, abs(x_cond - 1))
+
+    if prob1 == 0.0 or prob2 == 0.0:
+        return 0.0
+    else:
+        return min((prob1/prob2, prob2/prob1))
+
+
 
 aequitas.logger.debug("Module %s correctly loaded", __name__)
diff --git a/test/core/test_metrics.py b/test/core/test_metrics.py
@@ -1,6 +1,7 @@
 from test import uniform_binary_dataset, skewed_binary_dataset, uniform_binary_dataset_gt, skewed_binary_dataset_gt
 from aequitas.core.metrics import discrete_demographic_parities
 from aequitas.core.metrics import discrete_equalised_odds
+from aequitas.core.metrics import discrete_disparate_impact
 import unittest
 import numpy as np
 
@@ -63,6 +64,25 @@ def test_equalised_odds_on_unfair_binary_case(self):
             for diff in diff_row:            
                 self.assertInRange(diff, 0.3, 1.0)
 
+class TestDisparateImpact(AbstractMetricTestCase):
+    def setUp(self) -> None:
+        self.fair_dataset = uniform_binary_dataset(rows=DATASET_SIZE)
+        self.unfair_dataset = skewed_binary_dataset(rows=DATASET_SIZE, p=0.9)
+
+    def test_disparate_impact_on_fair_dataset(self):
+        x = self.fair_dataset[:, 0]
+        y = self.fair_dataset[:, 1]
+
+        disparate_impact = discrete_disparate_impact(x, y, 1, 1)
+        self.assertInRange(disparate_impact, 0.7, 1.3)
+
+    def test_disparate_impact_on_unfair_dataset(self):
+        x = self.unfair_dataset[:, 0]
+        y = self.unfair_dataset[:, 1]
+
+        disparate_impact = discrete_disparate_impact(x, y, 1, 1)
+        self.assertTrue(disparate_impact < 0.5 or disparate_impact > 1.5)
+
 
 # delete this abstract class, so that the included tests are not run
 del AbstractMetricTestCase