DataDome · adedaran · Apr 6, 2023 · Apr 5, 2023 · Apr 5, 2023
@@ -70,6 +70,15 @@ class Slicefinder(BaseEstimator, TransformerMixin):
     average_error_: float
         Mean value of the input error.
 
+    top_slices_statistics_: list of dict of length `len(top_slices_)`
+        The statistics of the slices found sorted by slice's scores.
+        For each slice, the following statistics are stored:
+            - slice_score: the score of the slice (defined in `_score` method)
+            - sum_slice_error: the sum of all the errors in the slice
+            - max_slice_error: the maximum of all errors in the slice
+            - slice_size: the number of elements in the slice
+            - slice_average_error: the average error in the slice (sum_slice_error / slice_size)
+
     References
     ----------
     `SliceLine: Fast, Linear-Algebra-based Slice Finding for ML Model Debugging
@@ -92,7 +101,8 @@ def __init__(
         self.verbose = verbose
 
         self._one_hot_encoder = self._top_slices_enc = None
-        self.top_slices_ = self.average_error_ = None
+        self.top_slices_ = self.top_slices_statistics_ = None
+        self.average_error_ = None
 
         if self.verbose:
             logger.setLevel(logging.DEBUG)
@@ -679,4 +689,28 @@ def _search_slices(
                 top_k_slices
             )
 
+        # compute slices' average errors
+        top_k_statistics = np.column_stack(
+            (
+                top_k_statistics,
+                np.divide(top_k_statistics[:, 1], top_k_statistics[:, 3]),
+            )
+        )
+
+        # transform statistics to a list of dict
+        statistics_names = [
+            "slice_score",
+            "sum_slice_error",
+            "max_slice_error",
+            "slice_size",
+            "slice_average_error",
+        ]
+        self.top_slices_statistics_ = [
+            {
+                stat_name: stat_value
+                for stat_value, stat_name in zip(statistic, statistics_names)
+            }
+            for statistic in top_k_statistics
+        ]
+
         logger.debug("Terminated at level %i." % level)