From c6c706acec2ce4d4963dbe23f0e11cf83ea86a09 Mon Sep 17 00:00:00 2001
From: adedaran <antoine.dedaran@datadome.co>
Date: Wed, 5 Apr 2023 15:46:27 +0200
Subject: [PATCH 1/2] Lint code

---
 Makefile                  |   8 ++-
 docs/source/conf.py       |  31 +++++----
 pyproject.toml            |  14 ++++
 setup.cfg                 |   6 +-
 sliceline/__init__.py     |   2 +-
 sliceline/slicefinder.py  | 103 +++++++++++++++++++---------
 sliceline/validation.py   |  19 ++++--
 tests/conftest.py         | 137 ++++++++++++++++++++++++++++++++------
 tests/experiment.py       |   3 +-
 tests/test_slicefinder.py | 123 +++++++++++++++++++++++++++++++---
 10 files changed, 360 insertions(+), 86 deletions(-)

diff --git a/Makefile b/Makefile
index da5d0c2..b8ee908 100644
--- a/Makefile
+++ b/Makefile
@@ -3,8 +3,14 @@ init:
 	pip3 install poetry
 	poetry install
 
+lint:
+	poetry run black .
+	poetry run isort .
+	poetry run flake8
+
 test:
-	poetry run pytest tests --cov=sliceline --cov-report=xml:.github/reports/coverage.xml
+	poetry run coverage run -m pytest
+	poetry run coverage report -m
 
 doc:
 	sphinx-build -a docs/source docs/build
diff --git a/docs/source/conf.py b/docs/source/conf.py
index b3e88ed..d466e57 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -1,37 +1,36 @@
 # Configuration file for the Sphinx documentation builder.
-import sys
 import os
-
+import sys
 
 sys.path.insert(0, os.path.abspath("../../"))
 
 # -- Project information
 
-project = 'Sliceline'
-copyright = '2022, DataDome'
-author = 'Antoine de Daran'
+project = "Sliceline"
+copyright = "2022, DataDome"
+author = "Antoine de Daran"
 
 # -- General configuration
 
 extensions = [
-    'sphinx.ext.duration',
-    'sphinx.ext.doctest',
-    'sphinx.ext.autodoc',
-    'sphinx.ext.autosummary',
-    'sphinx.ext.intersphinx',
+    "sphinx.ext.duration",
+    "sphinx.ext.doctest",
+    "sphinx.ext.autodoc",
+    "sphinx.ext.autosummary",
+    "sphinx.ext.intersphinx",
 ]
 
 intersphinx_mapping = {
-    'python': ('https://docs.python.org/3/', None),
-    'sphinx': ('https://www.sphinx-doc.org/en/master/', None),
+    "python": ("https://docs.python.org/3/", None),
+    "sphinx": ("https://www.sphinx-doc.org/en/master/", None),
 }
-intersphinx_disabled_domains = ['std']
+intersphinx_disabled_domains = ["std"]
 
-templates_path = ['_templates']
+templates_path = ["_templates"]
 
 # -- Options for HTML output
 
-html_theme = 'sphinx_rtd_theme'
+html_theme = "sphinx_rtd_theme"
 
 # -- Options for EPUB output
-epub_show_urls = 'footnote'
+epub_show_urls = "footnote"
diff --git a/pyproject.toml b/pyproject.toml
index 14c6fe3..21a88b3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,3 +29,17 @@ pytest-benchmark = "^3.4.1"
 pytest-cov = "^3.0.0"
 Sphinx = "^4.0.0"
 sphinx-rtd-theme = "^1.0.0"
+
+[tool.black]
+build-backend = "poetry.core.masonry.api"
+line-length = 79
+include = '\.pyi?$'
+
+[tool.isort]
+profile = "black"
+
+[tool.coverage.run]
+omit = [".*", "*/site-packages/*", "tests/*", "*/validation.py"]
+
+[tool.coverage.report]
+fail_under = 80
diff --git a/setup.cfg b/setup.cfg
index fb87a91..3238fb1 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,4 +1,6 @@
 [flake8]
-exclude = validation
+docstring-convention = numpy
+format = pylint
+ignore = E203 W503 C901
 max-line-length = 100
-ignore = E203 W503
+max-complexity = 10
diff --git a/sliceline/__init__.py b/sliceline/__init__.py
index 270acd4..b225858 100644
--- a/sliceline/__init__.py
+++ b/sliceline/__init__.py
@@ -1,3 +1,3 @@
 from .slicefinder import Slicefinder
 
-__all__ = ("Slicefinder",)
\ No newline at end of file
+__all__ = ("Slicefinder",)
diff --git a/sliceline/slicefinder.py b/sliceline/slicefinder.py
index 90ebf4a..0bd51ad 100644
--- a/sliceline/slicefinder.py
+++ b/sliceline/slicefinder.py
@@ -24,8 +24,8 @@ class Slicefinder(BaseEstimator, TransformerMixin):
 
     Given an input dataset (`X`) and a model error vector (`errors`), SliceLine finds
     the `k` slices in `X` that identify where the model performs significantly worse.
-    A slice is a subspace of `X` defined by one or more predicates. The maximal dimension
-    of this subspace is controlled by `max_l`.
+    A slice is a subspace of `X` defined by one or more predicates.
+    The maximal dimension of this subspace is controlled by `max_l`.
 
     The slice scoring function is the linear combination of two objectives:
         - Find sufficiently large slices, with more than `min_sup` elements
@@ -55,7 +55,8 @@ class Slicefinder(BaseEstimator, TransformerMixin):
     min_sup: int or float, default=10
         Minimum support threshold.
         Inspired by frequent itemset mining, it ensures statistical significance.
-        If `min_sup` is a float (0 < `min_sup` < 1), it represents the faction of the input dataset (`X`)
+        If `min_sup` is a float (0 < `min_sup` < 1),
+            it represents the faction of the input dataset (`X`).
 
     verbose: bool, default=True
         Controls the verbosity.
@@ -71,7 +72,8 @@ class Slicefinder(BaseEstimator, TransformerMixin):
 
     References
     ----------
-    `SliceLine: Fast, Linear-Algebra-based Slice Finding for ML Model Debugging <https://mboehm7.github.io/resources/sigmod2021b_sliceline.pdf>`__,
+    `SliceLine: Fast, Linear-Algebra-based Slice Finding for ML Model Debugging
+    <https://mboehm7.github.io/resources/sigmod2021b_sliceline.pdf>`__,
     from *Svetlana Sagadeeva* and *Matthias Boehm* of Graz University of Technology.
     """
 
@@ -108,9 +110,8 @@ def _check_params(self):
         if self.max_l <= 0:
             raise ValueError(f"Invalid 'max_l' parameter: {self.max_l}")
 
-        if (
-            self.min_sup < 0 or
-            (isinstance(self.min_sup, float) and self.min_sup >= 1)
+        if self.min_sup < 0 or (
+            isinstance(self.min_sup, float) and self.min_sup >= 1
         ):
             raise ValueError(f"Invalid 'min_sup' parameter: {self.min_sup}")
 
@@ -228,9 +229,9 @@ def _get_slices_masks(self, X):
         slice_candidates = self._top_slices_enc @ X_encoded.T
 
         # self._top_slices_enc.sum(axis=1) is the number of predicate(s) for each top_slices_
-        slices_masks = (slice_candidates == self._top_slices_enc.sum(axis=1)).A.astype(
-            int
-        )
+        slices_masks = (
+            slice_candidates == self._top_slices_enc.sum(axis=1)
+        ).A.astype(int)
 
         return slices_masks
 
@@ -243,8 +244,12 @@ def _n_features_out(self):
     def _dummify(array: np.ndarray, n_col_x_encoded: int) -> sp.csr_matrix:
         """Dummify `array` with respect to `n_col_x_encoded`.
         Assumption: v does not contain any 0."""
-        assert 0 not in array, "Modality 0 is not expected to be one-hot encoded."
-        one_hot_encoding = sp.lil_matrix((array.size, n_col_x_encoded), dtype=bool)
+        assert (
+            0 not in array
+        ), "Modality 0 is not expected to be one-hot encoded."
+        one_hot_encoding = sp.lil_matrix(
+            (array.size, n_col_x_encoded), dtype=bool
+        )
         one_hot_encoding[np.arange(array.size), array - 1] = True
         return one_hot_encoding.tocsr()
 
@@ -257,14 +262,18 @@ def _maintain_top_k(
     ) -> Tuple[sp.csr_matrix, np.ndarray]:
         """Add new `slices` to `top_k_slices` and update the top-k slices."""
         # prune invalid min_sup and scores
-        valid_slices_mask = (statistics[:, 3] >= self.min_sup) & (statistics[:, 0] > 0)
+        valid_slices_mask = (statistics[:, 3] >= self.min_sup) & (
+            statistics[:, 0] > 0
+        )
         if np.sum(valid_slices_mask) != 0:
             slices, statistics = (
                 slices[valid_slices_mask],
                 statistics[valid_slices_mask],
             )
 
-            if (slices.shape[1] != top_k_slices.shape[1]) & (slices.shape[1] == 1):
+            if (slices.shape[1] != top_k_slices.shape[1]) & (
+                slices.shape[1] == 1
+            ):
                 slices, statistics = slices.T, statistics.T
 
             # evaluated candidates and previous top-k
@@ -272,7 +281,9 @@ def _maintain_top_k(
             statistics = np.concatenate([top_k_statistics, statistics])
 
             # extract top-k
-            top_slices_bool = rankdata(-statistics[:, 0], method="min") <= self.k
+            top_slices_bool = (
+                rankdata(-statistics[:, 0], method="min") <= self.k
+            )
             top_k_slices, top_k_statistics = (
                 slices[top_slices_bool],
                 statistics[top_slices_bool],
@@ -298,7 +309,9 @@ def _score_ub(
         potential_solutions = np.column_stack(
             (
                 self.min_sup * np.ones(slice_sizes_ub.shape[0]),
-                np.maximum(slice_errors_ub / max_slice_errors_ub, self.min_sup),
+                np.maximum(
+                    slice_errors_ub / max_slice_errors_ub, self.min_sup
+                ),
                 slice_sizes_ub,
             )
         )
@@ -307,7 +320,8 @@ def _score_ub(
                 self.alpha
                 * (
                     np.minimum(
-                        potential_solutions.T * max_slice_errors_ub, slice_errors_ub
+                        potential_solutions.T * max_slice_errors_ub,
+                        slice_errors_ub,
                     ).T
                     / self.average_error_
                     - potential_solutions
@@ -325,7 +339,9 @@ def _analyse_top_k(top_k_statistics: np.ndarray) -> tuple:
         max_slice_scores = min_slice_scores = -np.inf
         if top_k_statistics.shape[0] > 0:
             max_slice_scores = top_k_statistics[0, 0]
-            min_slice_scores = top_k_statistics[top_k_statistics.shape[0] - 1, 0]
+            min_slice_scores = top_k_statistics[
+                top_k_statistics.shape[0] - 1, 0
+            ]
         return max_slice_scores, min_slice_scores
 
     def _score(
@@ -354,7 +370,9 @@ def _eval_slice(
         max_slice_errors = slice_candidates.T.multiply(errors).max(axis=1).A
 
         # score of relative error and relative size
-        slice_scores = self._score(slice_sizes, slice_errors, x_encoded.shape[0])
+        slice_scores = self._score(
+            slice_sizes, slice_errors, x_encoded.shape[0]
+        )
         return np.column_stack(
             [slice_scores, slice_errors, max_slice_errors, slice_sizes]
         )
@@ -379,7 +397,9 @@ def _create_and_score_basic_slices(
         slices = self._dummify(attr, n_col_x_encoded)
 
         # score 1-slices and create initial top-k
-        slice_scores = self._score(slice_sizes, slice_errors, x_encoded.shape[0])
+        slice_scores = self._score(
+            slice_sizes, slice_errors, x_encoded.shape[0]
+        )
         statistics = np.column_stack(
             (slice_scores, slice_errors, max_slice_errors, slice_sizes)
         )
@@ -397,11 +417,15 @@ def _get_pruned_s_r(
     ) -> Tuple[sp.csr_matrix, np.ndarray]:
         """Prune invalid slices.
         Do not affect overall pruning effectiveness due to handling of missing parents."""
-        valid_slices_mask = (statistics[:, 3] >= self.min_sup) & (statistics[:, 1] > 0)
+        valid_slices_mask = (statistics[:, 3] >= self.min_sup) & (
+            statistics[:, 1] > 0
+        )
         return slices[valid_slices_mask], statistics[valid_slices_mask]
 
     @staticmethod
-    def _join_compatible_slices(slices: sp.csr_matrix, level: int) -> np.ndarray:
+    def _join_compatible_slices(
+        slices: sp.csr_matrix, level: int
+    ) -> np.ndarray:
         """Join compatible slices according to `level`."""
         slices_int = slices.astype(int)
         join = (slices_int @ slices_int.T).A == level - 2
@@ -409,7 +433,9 @@ def _join_compatible_slices(slices: sp.csr_matrix, level: int) -> np.ndarray:
 
     @staticmethod
     def _combine_slices(
-        slices: sp.csr_matrix, statistics: np.ndarray, compatible_slices: np.ndarray
+        slices: sp.csr_matrix,
+        statistics: np.ndarray,
+        compatible_slices: np.ndarray,
     ) -> Tuple[sp.csr_matrix, np.ndarray, np.ndarray, np.ndarray]:
         """Combine slices by exploiting parents node statistics."""
         parent_1_idx, parent_2_idx = np.where(compatible_slices == 1)
@@ -459,7 +485,9 @@ def _prepare_deduplication_and_pruning(
         """Prepare IDs for deduplication and pruning."""
         ids = np.zeros(pair_candidates.shape[0])
         dom = feature_domains + 1
-        for j, (start, end) in enumerate(zip(feature_offset_start, feature_offset_end)):
+        for j, (start, end) in enumerate(
+            zip(feature_offset_start, feature_offset_end)
+        ):
             sub_pair_candidates = pair_candidates[:, start:end]
             # sub_p should not contain multiple True on the same line
             i = sub_pair_candidates.argmax(axis=1).T + np.any(
@@ -510,7 +538,10 @@ def _get_pair_candidates(
             return sp.csr_matrix(np.empty((0, slices.shape[1])))
 
         ids = self._prepare_deduplication_and_pruning(
-            feature_offset_start, feature_offset_end, feature_domains, pair_candidates
+            feature_offset_start,
+            feature_offset_end,
+            feature_domains,
+            pair_candidates,
         )
 
         # remove duplicate candidates and select corresponding statistics
@@ -579,7 +610,9 @@ def _search_slices(
             np.zeros((0, 4)),
         )
 
-        max_slice_scores, min_slice_scores = self._analyse_top_k(top_k_statistics)
+        max_slice_scores, min_slice_scores = self._analyse_top_k(
+            top_k_statistics
+        )
         logger.debug(
             "Initial top-K: count=%i, max=%f, min=%f"
             % (top_k_slices.shape[0], max_slice_scores, min_slice_scores)
@@ -589,7 +622,11 @@ def _search_slices(
         # termination condition (max #feature levels)
         level = 1
         min_condition = min(input_x.shape[1], self.max_l)
-        while (slices.shape[0] > 0) & (slices.sum() > 0) & (level < min_condition):
+        while (
+            (slices.shape[0] > 0)
+            & (slices.sum() > 0)
+            & (level < min_condition)
+        ):
             level += 1
 
             # enumerate candidate join pairs, including size/error pruning
@@ -620,8 +657,12 @@ def _search_slices(
                 slices, statistics, top_k_slices, top_k_statistics
             )
 
-            max_slice_scores, min_slice_scores = self._analyse_top_k(top_k_statistics)
-            valid = np.sum((statistics[:, 3] >= self.min_sup) & (statistics[:, 1] > 0))
+            max_slice_scores, min_slice_scores = self._analyse_top_k(
+                top_k_statistics
+            )
+            valid = np.sum(
+                (statistics[:, 3] >= self.min_sup) & (statistics[:, 1] > 0)
+            )
             logger.debug(
                 " -- valid slices after eval: %s/%i" % (valid, slices.shape[0])
             )
@@ -634,6 +675,8 @@ def _search_slices(
         if top_k_slices.shape[0] == 0:
             self.top_slices_ = np.empty((0, input_x.shape[1]))
         else:
-            self.top_slices_ = self._one_hot_encoder.inverse_transform(top_k_slices)
+            self.top_slices_ = self._one_hot_encoder.inverse_transform(
+                top_k_slices
+            )
 
         logger.debug("Terminated at level %i." % level)
diff --git a/sliceline/validation.py b/sliceline/validation.py
index 1d50add..ae0a01e 100644
--- a/sliceline/validation.py
+++ b/sliceline/validation.py
@@ -99,7 +99,8 @@ def _num_samples(x):
     if hasattr(x, "shape") and x.shape is not None:
         if len(x.shape) == 0:
             raise TypeError(
-                "Singleton array %r cannot be considered a valid collection." % x
+                "Singleton array %r cannot be considered a valid collection."
+                % x
             )
         # Check that shape is returning an integer or default to len
         # Dask dataframes may not return numeric shape[0] value
@@ -242,7 +243,8 @@ def _ensure_sparse_format(
     if force_all_finite:
         if not hasattr(spmatrix, "data"):
             warnings.warn(
-                "Can't check %s sparse matrix for nan or inf." % spmatrix.format,
+                "Can't check %s sparse matrix for nan or inf."
+                % spmatrix.format,
                 stacklevel=2,
             )
         else:
@@ -450,7 +452,10 @@ def check_array(
         with suppress(ImportError):
             from pandas.api.types import is_sparse
 
-            if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
+            if (
+                not hasattr(array, "sparse")
+                and array.dtypes.apply(is_sparse).any()
+            ):
                 warnings.warn(
                     "pandas.DataFrame with sparse columns found."
                     "It will be converted to a dense numpy array."
@@ -781,7 +786,9 @@ def check_X_e(
         input_name="X",
     )
 
-    y = _check_y(y, multi_output=multi_output, y_numeric=y_numeric, estimator=estimator)
+    y = _check_y(
+        y, multi_output=multi_output, y_numeric=y_numeric, estimator=estimator
+    )
 
     check_consistent_length(X, y)
 
@@ -847,4 +854,6 @@ def column_or_1d(y, *, warn=False):
             )
         return np.ravel(y)
 
-    raise ValueError(f"y should be a 1d array, got an array of shape {shape} instead.")
+    raise ValueError(
+        f"y should be a 1d array, got an array of shape {shape} instead."
+    )
diff --git a/tests/conftest.py b/tests/conftest.py
index 52d194e..affc7bd 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -122,7 +122,9 @@ def experiments():
         ]
     ).T
     errors_1 = np.array([True] * n_small + [False] * n_small)
-    expected_top_slices_1 = np.array([[1, 1, None], [None, 1, 2], [1, None, 2]])
+    expected_top_slices_1 = np.array(
+        [[1, 1, None], [None, 1, 2], [1, None, 2]]
+    )
     experiment_1 = Experiment(X_1, errors_1, expected_top_slices_1)
 
     # Experiment 2: Experiment 1 + more columns and different order
@@ -154,7 +156,9 @@ def experiments():
         ]
     ).T
     errors_3 = np.array([1] * n + [0] * n)
-    expected_top_slices_3 = np.array([[1.0, 1.0, None], [1.0, None, None], [None, 1.0, None]])
+    expected_top_slices_3 = np.array(
+        [[1.0, 1.0, None], [1.0, None, None], [None, 1.0, None]]
+    )
     experiment_3 = Experiment(X_3, errors_3, expected_top_slices_3)
 
     # Experiment 4: Experiment 3 + more columns
@@ -172,19 +176,28 @@ def experiments():
     ).T
     errors_4 = np.array([1] * n + [0] * n)
     expected_top_slices_4 = np.array(
-        [[1.0, 1.0, None, None, None, None], [1.0, None, 3.0, None, None, None]]
+        [
+            [1.0, 1.0, None, None, None, None],
+            [1.0, None, 3.0, None, None, None],
+        ]
     )
     experiment_4 = Experiment(X_4, errors_4, expected_top_slices_4)
 
     # Experiment 5: Experiment 4 w/ min_sup=50
     expected_top_slices_5 = np.array(
-        [[1.0, 1.0, None, None, None, None], [1.0, None, 3.0, None, None, None]]
+        [
+            [1.0, 1.0, None, None, None, None],
+            [1.0, None, 3.0, None, None, None],
+        ]
     )
     experiment_5 = Experiment(X_4, errors_4, expected_top_slices_5, min_sup=50)
 
     # Experiment 6: Experiment 4 w/ max_l=1
     expected_top_slices_6 = np.array(
-        [[1.0, None, None, None, None, None], [None, 1.0, None, None, None, None]]
+        [
+            [1.0, None, None, None, None, None],
+            [None, 1.0, None, None, None, None],
+        ]
     )
     experiment_6 = Experiment(X_4, errors_4, expected_top_slices_6, max_l=1)
 
@@ -212,7 +225,12 @@ def experiments():
     # Experiment 9: Experiment 1 w/ float label
     np.random.seed(9)
     errors_9 = (
-        np.concatenate([np.random.randint(1, 61, n_small), np.random.randint(41, 101, n_small)])
+        np.concatenate(
+            [
+                np.random.randint(1, 61, n_small),
+                np.random.randint(41, 101, n_small),
+            ]
+        )
         / 100
     )
     expected_top_slices_9 = np.array([[2.0, None, None], [2.0, 1.0, None]])
@@ -252,9 +270,16 @@ def experiments():
     ).T
     errors_11 = np.array([1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0])
     expected_top_slices_11 = np.array(
-        [[1.0, 1.0, 1.0, None], [None, 1.0, 1.0, None], [1, None, 1, None], [1, 1, None, None]]
+        [
+            [1.0, 1.0, 1.0, None],
+            [None, 1.0, 1.0, None],
+            [1, None, 1, None],
+            [1, 1, None, None],
+        ]
+    )
+    experiment_11 = Experiment(
+        X_11, errors_11, expected_top_slices_11, max_l=3
     )
-    experiment_11 = Experiment(X_11, errors_11, expected_top_slices_11, max_l=3)
 
     # Experiment 12: max_l=4
     X_12 = np.array(
@@ -276,7 +301,9 @@ def experiments():
             [1, 1, 1, None, None],
         ]
     )
-    experiment_12 = Experiment(X_12, errors_12, expected_top_slices_12, max_l=4)
+    experiment_12 = Experiment(
+        X_12, errors_12, expected_top_slices_12, max_l=4
+    )
 
     # Experiment 13: mixed types
     X_13 = np.array(
@@ -289,7 +316,9 @@ def experiments():
         dtype=object,
     ).T
     errors_13 = np.array([1, 1, 1, 1, 0, 0, 0, 0])
-    expected_top_slices_13 = np.array([[1, "a", None, None], [None, "a", None, 3]])
+    expected_top_slices_13 = np.array(
+        [[1, "a", None, None], [None, "a", None, 3]]
+    )
     experiment_13 = Experiment(X_13, errors_13, expected_top_slices_13)
 
     # Experiment 14: Experiment 4 w/ min_sup=10
@@ -299,19 +328,87 @@ def experiments():
             [1.0, None, 3.0, None, None, None],
         ]
     )
-    experiment_14 = Experiment(X_4, errors_4, expected_top_slices_14, min_sup=10)
+    experiment_14 = Experiment(
+        X_4, errors_4, expected_top_slices_14, min_sup=10
+    )
 
     # Experiment 15: Experiment 4 w/ alpha=0.5
     expected_top_slices_15 = np.empty((0, 6))
-    experiment_15 = Experiment(X_4, errors_4, expected_top_slices_15, alpha=0.5)
+    experiment_15 = Experiment(
+        X_4, errors_4, expected_top_slices_15, alpha=0.5
+    )
 
     # Experiment 16: Experiment with missing parent pruning
     X_16 = np.array(
         [
-            ["g", "g", "g", "g", "g", "g", "a", "a", "a", "a", "a", "a", "a", "a", "a"],
-            ["b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "f", "f", "f"],
-            ["h", "h", "h", "h", "c", "c", "c", "c", "h", "h", "h", "h", "h", "h", "h"],
-            ["d", "d", "d", "d", "d", "d", "d", "d", "d", "d", "d", "d", "e", "e", "e"],
+            [
+                "g",
+                "g",
+                "g",
+                "g",
+                "g",
+                "g",
+                "a",
+                "a",
+                "a",
+                "a",
+                "a",
+                "a",
+                "a",
+                "a",
+                "a",
+            ],
+            [
+                "b",
+                "b",
+                "b",
+                "b",
+                "b",
+                "b",
+                "b",
+                "b",
+                "b",
+                "b",
+                "b",
+                "b",
+                "f",
+                "f",
+                "f",
+            ],
+            [
+                "h",
+                "h",
+                "h",
+                "h",
+                "c",
+                "c",
+                "c",
+                "c",
+                "h",
+                "h",
+                "h",
+                "h",
+                "h",
+                "h",
+                "h",
+            ],
+            [
+                "d",
+                "d",
+                "d",
+                "d",
+                "d",
+                "d",
+                "d",
+                "d",
+                "d",
+                "d",
+                "d",
+                "d",
+                "e",
+                "e",
+                "e",
+            ],
         ],
         dtype=object,
     ).T
@@ -326,10 +423,12 @@ def experiments():
         [
             [1.0, 1.0, None, None, None, None],
             [1.0, None, None, None, None, None],
-            [None, 1.0, None, None, None, None]
+            [None, 1.0, None, None, None, None],
         ]
     )
-    experiment_17 = Experiment(X_4, errors_4, expected_top_slices_17, min_sup=0.5)
+    experiment_17 = Experiment(
+        X_4, errors_4, expected_top_slices_17, min_sup=0.5
+    )
 
     return {
         "experiment_1": experiment_1,
@@ -348,5 +447,5 @@ def experiments():
         "experiment_14": experiment_14,
         "experiment_15": experiment_15,
         "experiment_16": experiment_16,
-        "experiment_17": experiment_17
+        "experiment_17": experiment_17,
     }
diff --git a/tests/experiment.py b/tests/experiment.py
index 53df664..ee9bf19 100644
--- a/tests/experiment.py
+++ b/tests/experiment.py
@@ -39,7 +39,8 @@ class Experiment:
     min_sup: int or float, default=10
         Minimum support threshold.
         Inspired by frequent itemset mining, it ensures statistical significance.
-        If `min_sup` is a float (0 < `min_sup` < 1), it represents the faction of the input dataset (`X`)
+        If `min_sup` is a float (0 < `min_sup` < 1),
+            it represents the faction of the input dataset (`X`)
 
     verbose: bool, default=True
         Controls the verbosity.
diff --git a/tests/test_slicefinder.py b/tests/test_slicefinder.py
index 74ec5c7..3e2d2b6 100644
--- a/tests/test_slicefinder.py
+++ b/tests/test_slicefinder.py
@@ -13,7 +13,9 @@ def test_dummify(benchmark, basic_test_data):
     """Test _dummify method."""
     array = np.array([1, 3, 5, 6, 7, 8, 13, 15])
     computed = benchmark(
-        basic_test_data["slicefinder_model"]._dummify, array, basic_test_data["n_col_x_encoded"]
+        basic_test_data["slicefinder_model"]._dummify,
+        array,
+        basic_test_data["n_col_x_encoded"],
     )
 
     assert np.array_equal(computed.A, basic_test_data["slices"].A)
@@ -62,13 +64,97 @@ def test_maintain_top_k(benchmark, basic_test_data):
 def test_score_ub(benchmark, basic_test_data):
     """Test _score_ub method."""
     slice_sizes_ub = np.array(
-        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 4, 1, 1, 1, 1, 3, 4, 1, 1, 1, 1, 6]
+        [
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            3,
+            4,
+            1,
+            1,
+            1,
+            1,
+            3,
+            4,
+            1,
+            1,
+            1,
+            1,
+            6,
+        ]
     )
     slice_errors_ub = np.array(
-        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 4]
+        [
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            3,
+            1,
+            1,
+            1,
+            1,
+            1,
+            3,
+            1,
+            1,
+            1,
+            1,
+            4,
+        ]
     )
     max_slice_errors_ub = np.array(
-        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+        [
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+            1,
+        ]
     )
 
     computed = benchmark(
@@ -136,7 +222,10 @@ def test_score(benchmark, basic_test_data):
     n_row_x_encoded = 8
 
     computed = benchmark(
-        basic_test_data["slicefinder_model"]._score, slice_sizes, slice_errors, n_row_x_encoded
+        basic_test_data["slicefinder_model"]._score,
+        slice_sizes,
+        slice_errors,
+        n_row_x_encoded,
     )
     expected = np.array(
         [
@@ -221,7 +310,9 @@ def test_get_pair_candidates(benchmark, basic_test_data):
     assert np.array_equal(computed.A, basic_test_data["candidates"].A)
 
 
-def test_get_pair_candidates_with_missing_parents_pruning(benchmark, basic_test_data):
+def test_get_pair_candidates_with_missing_parents_pruning(
+    benchmark, basic_test_data
+):
     """Test _get_pair_candidates where missing parents are present in pruning."""
     slices = sp.csr_matrix(
         [
@@ -250,7 +341,9 @@ def test_get_pair_candidates_with_missing_parents_pruning(benchmark, basic_test_
     )
     slicefinder_model_parents_pruning.average_error_ = 0.4
 
-    expected = np.array([[False, False, True, False, False, True, True, False]])
+    expected = np.array(
+        [[False, False, True, False, False, True, True, False]]
+    )
 
     computed = benchmark(
         slicefinder_model_parents_pruning._get_pair_candidates,
@@ -323,7 +416,9 @@ def test_experiments(benchmark, experiments, experiment_name):
         experiment.input_errors,
     )
     computed_top_k_slices = slicefinder_model.top_slices_
-    assert np.array_equal(computed_top_k_slices, experiment.expected_top_k_slices)
+    assert np.array_equal(
+        computed_top_k_slices, experiment.expected_top_k_slices
+    )
 
 
 def test_transform(benchmark, basic_test_data):
@@ -333,17 +428,23 @@ def test_transform(benchmark, basic_test_data):
         basic_test_data["X"],
         basic_test_data["errors"],
     )
-    expected = np.array([[1, 1], [1, 1], [1, 1], [1, 0], [0, 0], [0, 0], [0, 0], [0, 0]])
+    expected = np.array(
+        [[1, 1], [1, 1], [1, 1], [1, 0], [0, 0], [0, 0], [0, 0], [0, 0]]
+    )
     assert np.array_equal(computed, expected)
 
 
 def test_get_slice(benchmark, basic_test_data):
     """Test get_slice method."""
-    basic_test_data["slicefinder_model"].fit(basic_test_data["X"], basic_test_data["errors"])
+    basic_test_data["slicefinder_model"].fit(
+        basic_test_data["X"], basic_test_data["errors"]
+    )
     computed_slice = benchmark(
         basic_test_data["slicefinder_model"].get_slice,
         basic_test_data["X"],
         0,
     )
-    expected_slice = np.array([[1, 1, 1, 3], [1, 1, 2, 3], [1, 1, 3, 3], [1, 1, 4, 1]])
+    expected_slice = np.array(
+        [[1, 1, 1, 3], [1, 1, 2, 3], [1, 1, 3, 3], [1, 1, 4, 1]]
+    )
     assert np.array_equal(computed_slice, expected_slice)

From eeb511edd033804cf856fa50c27532cbd04353e2 Mon Sep 17 00:00:00 2001
From: adedaran <antoine.dedaran@datadome.co>
Date: Wed, 5 Apr 2023 16:40:42 +0200
Subject: [PATCH 2/2] Fix syntax in flake8

---
 pyproject.toml | 1 -
 setup.cfg      | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 21a88b3..59a5b19 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -31,7 +31,6 @@ Sphinx = "^4.0.0"
 sphinx-rtd-theme = "^1.0.0"
 
 [tool.black]
-build-backend = "poetry.core.masonry.api"
 line-length = 79
 include = '\.pyi?$'
 
diff --git a/setup.cfg b/setup.cfg
index 3238fb1..af9603f 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
 [flake8]
 docstring-convention = numpy
 format = pylint
-ignore = E203 W503 C901
+extend-ignore = E203,W503,C901
 max-line-length = 100
 max-complexity = 10