From c6c706acec2ce4d4963dbe23f0e11cf83ea86a09 Mon Sep 17 00:00:00 2001 From: adedaran Date: Wed, 5 Apr 2023 15:46:27 +0200 Subject: [PATCH 1/2] Lint code --- Makefile | 8 ++- docs/source/conf.py | 31 +++++---- pyproject.toml | 14 ++++ setup.cfg | 6 +- sliceline/__init__.py | 2 +- sliceline/slicefinder.py | 103 +++++++++++++++++++--------- sliceline/validation.py | 19 ++++-- tests/conftest.py | 137 ++++++++++++++++++++++++++++++++------ tests/experiment.py | 3 +- tests/test_slicefinder.py | 123 +++++++++++++++++++++++++++++++--- 10 files changed, 360 insertions(+), 86 deletions(-) diff --git a/Makefile b/Makefile index da5d0c2..b8ee908 100644 --- a/Makefile +++ b/Makefile @@ -3,8 +3,14 @@ init: pip3 install poetry poetry install +lint: + poetry run black . + poetry run isort . + poetry run flake8 + test: - poetry run pytest tests --cov=sliceline --cov-report=xml:.github/reports/coverage.xml + poetry run coverage run -m pytest + poetry run coverage report -m doc: sphinx-build -a docs/source docs/build diff --git a/docs/source/conf.py b/docs/source/conf.py index b3e88ed..d466e57 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -1,37 +1,36 @@ # Configuration file for the Sphinx documentation builder. -import sys import os - +import sys sys.path.insert(0, os.path.abspath("../../")) # -- Project information -project = 'Sliceline' -copyright = '2022, DataDome' -author = 'Antoine de Daran' +project = "Sliceline" +copyright = "2022, DataDome" +author = "Antoine de Daran" # -- General configuration extensions = [ - 'sphinx.ext.duration', - 'sphinx.ext.doctest', - 'sphinx.ext.autodoc', - 'sphinx.ext.autosummary', - 'sphinx.ext.intersphinx', + "sphinx.ext.duration", + "sphinx.ext.doctest", + "sphinx.ext.autodoc", + "sphinx.ext.autosummary", + "sphinx.ext.intersphinx", ] intersphinx_mapping = { - 'python': ('https://docs.python.org/3/', None), - 'sphinx': ('https://www.sphinx-doc.org/en/master/', None), + "python": ("https://docs.python.org/3/", None), + "sphinx": ("https://www.sphinx-doc.org/en/master/", None), } -intersphinx_disabled_domains = ['std'] +intersphinx_disabled_domains = ["std"] -templates_path = ['_templates'] +templates_path = ["_templates"] # -- Options for HTML output -html_theme = 'sphinx_rtd_theme' +html_theme = "sphinx_rtd_theme" # -- Options for EPUB output -epub_show_urls = 'footnote' +epub_show_urls = "footnote" diff --git a/pyproject.toml b/pyproject.toml index 14c6fe3..21a88b3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,3 +29,17 @@ pytest-benchmark = "^3.4.1" pytest-cov = "^3.0.0" Sphinx = "^4.0.0" sphinx-rtd-theme = "^1.0.0" + +[tool.black] +build-backend = "poetry.core.masonry.api" +line-length = 79 +include = '\.pyi?$' + +[tool.isort] +profile = "black" + +[tool.coverage.run] +omit = [".*", "*/site-packages/*", "tests/*", "*/validation.py"] + +[tool.coverage.report] +fail_under = 80 diff --git a/setup.cfg b/setup.cfg index fb87a91..3238fb1 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,4 +1,6 @@ [flake8] -exclude = validation +docstring-convention = numpy +format = pylint +ignore = E203 W503 C901 max-line-length = 100 -ignore = E203 W503 +max-complexity = 10 diff --git a/sliceline/__init__.py b/sliceline/__init__.py index 270acd4..b225858 100644 --- a/sliceline/__init__.py +++ b/sliceline/__init__.py @@ -1,3 +1,3 @@ from .slicefinder import Slicefinder -__all__ = ("Slicefinder",) \ No newline at end of file +__all__ = ("Slicefinder",) diff --git a/sliceline/slicefinder.py b/sliceline/slicefinder.py index 90ebf4a..0bd51ad 100644 --- a/sliceline/slicefinder.py +++ b/sliceline/slicefinder.py @@ -24,8 +24,8 @@ class Slicefinder(BaseEstimator, TransformerMixin): Given an input dataset (`X`) and a model error vector (`errors`), SliceLine finds the `k` slices in `X` that identify where the model performs significantly worse. - A slice is a subspace of `X` defined by one or more predicates. The maximal dimension - of this subspace is controlled by `max_l`. + A slice is a subspace of `X` defined by one or more predicates. + The maximal dimension of this subspace is controlled by `max_l`. The slice scoring function is the linear combination of two objectives: - Find sufficiently large slices, with more than `min_sup` elements @@ -55,7 +55,8 @@ class Slicefinder(BaseEstimator, TransformerMixin): min_sup: int or float, default=10 Minimum support threshold. Inspired by frequent itemset mining, it ensures statistical significance. - If `min_sup` is a float (0 < `min_sup` < 1), it represents the faction of the input dataset (`X`) + If `min_sup` is a float (0 < `min_sup` < 1), + it represents the faction of the input dataset (`X`). verbose: bool, default=True Controls the verbosity. @@ -71,7 +72,8 @@ class Slicefinder(BaseEstimator, TransformerMixin): References ---------- - `SliceLine: Fast, Linear-Algebra-based Slice Finding for ML Model Debugging `__, + `SliceLine: Fast, Linear-Algebra-based Slice Finding for ML Model Debugging + `__, from *Svetlana Sagadeeva* and *Matthias Boehm* of Graz University of Technology. """ @@ -108,9 +110,8 @@ def _check_params(self): if self.max_l <= 0: raise ValueError(f"Invalid 'max_l' parameter: {self.max_l}") - if ( - self.min_sup < 0 or - (isinstance(self.min_sup, float) and self.min_sup >= 1) + if self.min_sup < 0 or ( + isinstance(self.min_sup, float) and self.min_sup >= 1 ): raise ValueError(f"Invalid 'min_sup' parameter: {self.min_sup}") @@ -228,9 +229,9 @@ def _get_slices_masks(self, X): slice_candidates = self._top_slices_enc @ X_encoded.T # self._top_slices_enc.sum(axis=1) is the number of predicate(s) for each top_slices_ - slices_masks = (slice_candidates == self._top_slices_enc.sum(axis=1)).A.astype( - int - ) + slices_masks = ( + slice_candidates == self._top_slices_enc.sum(axis=1) + ).A.astype(int) return slices_masks @@ -243,8 +244,12 @@ def _n_features_out(self): def _dummify(array: np.ndarray, n_col_x_encoded: int) -> sp.csr_matrix: """Dummify `array` with respect to `n_col_x_encoded`. Assumption: v does not contain any 0.""" - assert 0 not in array, "Modality 0 is not expected to be one-hot encoded." - one_hot_encoding = sp.lil_matrix((array.size, n_col_x_encoded), dtype=bool) + assert ( + 0 not in array + ), "Modality 0 is not expected to be one-hot encoded." + one_hot_encoding = sp.lil_matrix( + (array.size, n_col_x_encoded), dtype=bool + ) one_hot_encoding[np.arange(array.size), array - 1] = True return one_hot_encoding.tocsr() @@ -257,14 +262,18 @@ def _maintain_top_k( ) -> Tuple[sp.csr_matrix, np.ndarray]: """Add new `slices` to `top_k_slices` and update the top-k slices.""" # prune invalid min_sup and scores - valid_slices_mask = (statistics[:, 3] >= self.min_sup) & (statistics[:, 0] > 0) + valid_slices_mask = (statistics[:, 3] >= self.min_sup) & ( + statistics[:, 0] > 0 + ) if np.sum(valid_slices_mask) != 0: slices, statistics = ( slices[valid_slices_mask], statistics[valid_slices_mask], ) - if (slices.shape[1] != top_k_slices.shape[1]) & (slices.shape[1] == 1): + if (slices.shape[1] != top_k_slices.shape[1]) & ( + slices.shape[1] == 1 + ): slices, statistics = slices.T, statistics.T # evaluated candidates and previous top-k @@ -272,7 +281,9 @@ def _maintain_top_k( statistics = np.concatenate([top_k_statistics, statistics]) # extract top-k - top_slices_bool = rankdata(-statistics[:, 0], method="min") <= self.k + top_slices_bool = ( + rankdata(-statistics[:, 0], method="min") <= self.k + ) top_k_slices, top_k_statistics = ( slices[top_slices_bool], statistics[top_slices_bool], @@ -298,7 +309,9 @@ def _score_ub( potential_solutions = np.column_stack( ( self.min_sup * np.ones(slice_sizes_ub.shape[0]), - np.maximum(slice_errors_ub / max_slice_errors_ub, self.min_sup), + np.maximum( + slice_errors_ub / max_slice_errors_ub, self.min_sup + ), slice_sizes_ub, ) ) @@ -307,7 +320,8 @@ def _score_ub( self.alpha * ( np.minimum( - potential_solutions.T * max_slice_errors_ub, slice_errors_ub + potential_solutions.T * max_slice_errors_ub, + slice_errors_ub, ).T / self.average_error_ - potential_solutions @@ -325,7 +339,9 @@ def _analyse_top_k(top_k_statistics: np.ndarray) -> tuple: max_slice_scores = min_slice_scores = -np.inf if top_k_statistics.shape[0] > 0: max_slice_scores = top_k_statistics[0, 0] - min_slice_scores = top_k_statistics[top_k_statistics.shape[0] - 1, 0] + min_slice_scores = top_k_statistics[ + top_k_statistics.shape[0] - 1, 0 + ] return max_slice_scores, min_slice_scores def _score( @@ -354,7 +370,9 @@ def _eval_slice( max_slice_errors = slice_candidates.T.multiply(errors).max(axis=1).A # score of relative error and relative size - slice_scores = self._score(slice_sizes, slice_errors, x_encoded.shape[0]) + slice_scores = self._score( + slice_sizes, slice_errors, x_encoded.shape[0] + ) return np.column_stack( [slice_scores, slice_errors, max_slice_errors, slice_sizes] ) @@ -379,7 +397,9 @@ def _create_and_score_basic_slices( slices = self._dummify(attr, n_col_x_encoded) # score 1-slices and create initial top-k - slice_scores = self._score(slice_sizes, slice_errors, x_encoded.shape[0]) + slice_scores = self._score( + slice_sizes, slice_errors, x_encoded.shape[0] + ) statistics = np.column_stack( (slice_scores, slice_errors, max_slice_errors, slice_sizes) ) @@ -397,11 +417,15 @@ def _get_pruned_s_r( ) -> Tuple[sp.csr_matrix, np.ndarray]: """Prune invalid slices. Do not affect overall pruning effectiveness due to handling of missing parents.""" - valid_slices_mask = (statistics[:, 3] >= self.min_sup) & (statistics[:, 1] > 0) + valid_slices_mask = (statistics[:, 3] >= self.min_sup) & ( + statistics[:, 1] > 0 + ) return slices[valid_slices_mask], statistics[valid_slices_mask] @staticmethod - def _join_compatible_slices(slices: sp.csr_matrix, level: int) -> np.ndarray: + def _join_compatible_slices( + slices: sp.csr_matrix, level: int + ) -> np.ndarray: """Join compatible slices according to `level`.""" slices_int = slices.astype(int) join = (slices_int @ slices_int.T).A == level - 2 @@ -409,7 +433,9 @@ def _join_compatible_slices(slices: sp.csr_matrix, level: int) -> np.ndarray: @staticmethod def _combine_slices( - slices: sp.csr_matrix, statistics: np.ndarray, compatible_slices: np.ndarray + slices: sp.csr_matrix, + statistics: np.ndarray, + compatible_slices: np.ndarray, ) -> Tuple[sp.csr_matrix, np.ndarray, np.ndarray, np.ndarray]: """Combine slices by exploiting parents node statistics.""" parent_1_idx, parent_2_idx = np.where(compatible_slices == 1) @@ -459,7 +485,9 @@ def _prepare_deduplication_and_pruning( """Prepare IDs for deduplication and pruning.""" ids = np.zeros(pair_candidates.shape[0]) dom = feature_domains + 1 - for j, (start, end) in enumerate(zip(feature_offset_start, feature_offset_end)): + for j, (start, end) in enumerate( + zip(feature_offset_start, feature_offset_end) + ): sub_pair_candidates = pair_candidates[:, start:end] # sub_p should not contain multiple True on the same line i = sub_pair_candidates.argmax(axis=1).T + np.any( @@ -510,7 +538,10 @@ def _get_pair_candidates( return sp.csr_matrix(np.empty((0, slices.shape[1]))) ids = self._prepare_deduplication_and_pruning( - feature_offset_start, feature_offset_end, feature_domains, pair_candidates + feature_offset_start, + feature_offset_end, + feature_domains, + pair_candidates, ) # remove duplicate candidates and select corresponding statistics @@ -579,7 +610,9 @@ def _search_slices( np.zeros((0, 4)), ) - max_slice_scores, min_slice_scores = self._analyse_top_k(top_k_statistics) + max_slice_scores, min_slice_scores = self._analyse_top_k( + top_k_statistics + ) logger.debug( "Initial top-K: count=%i, max=%f, min=%f" % (top_k_slices.shape[0], max_slice_scores, min_slice_scores) @@ -589,7 +622,11 @@ def _search_slices( # termination condition (max #feature levels) level = 1 min_condition = min(input_x.shape[1], self.max_l) - while (slices.shape[0] > 0) & (slices.sum() > 0) & (level < min_condition): + while ( + (slices.shape[0] > 0) + & (slices.sum() > 0) + & (level < min_condition) + ): level += 1 # enumerate candidate join pairs, including size/error pruning @@ -620,8 +657,12 @@ def _search_slices( slices, statistics, top_k_slices, top_k_statistics ) - max_slice_scores, min_slice_scores = self._analyse_top_k(top_k_statistics) - valid = np.sum((statistics[:, 3] >= self.min_sup) & (statistics[:, 1] > 0)) + max_slice_scores, min_slice_scores = self._analyse_top_k( + top_k_statistics + ) + valid = np.sum( + (statistics[:, 3] >= self.min_sup) & (statistics[:, 1] > 0) + ) logger.debug( " -- valid slices after eval: %s/%i" % (valid, slices.shape[0]) ) @@ -634,6 +675,8 @@ def _search_slices( if top_k_slices.shape[0] == 0: self.top_slices_ = np.empty((0, input_x.shape[1])) else: - self.top_slices_ = self._one_hot_encoder.inverse_transform(top_k_slices) + self.top_slices_ = self._one_hot_encoder.inverse_transform( + top_k_slices + ) logger.debug("Terminated at level %i." % level) diff --git a/sliceline/validation.py b/sliceline/validation.py index 1d50add..ae0a01e 100644 --- a/sliceline/validation.py +++ b/sliceline/validation.py @@ -99,7 +99,8 @@ def _num_samples(x): if hasattr(x, "shape") and x.shape is not None: if len(x.shape) == 0: raise TypeError( - "Singleton array %r cannot be considered a valid collection." % x + "Singleton array %r cannot be considered a valid collection." + % x ) # Check that shape is returning an integer or default to len # Dask dataframes may not return numeric shape[0] value @@ -242,7 +243,8 @@ def _ensure_sparse_format( if force_all_finite: if not hasattr(spmatrix, "data"): warnings.warn( - "Can't check %s sparse matrix for nan or inf." % spmatrix.format, + "Can't check %s sparse matrix for nan or inf." + % spmatrix.format, stacklevel=2, ) else: @@ -450,7 +452,10 @@ def check_array( with suppress(ImportError): from pandas.api.types import is_sparse - if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any(): + if ( + not hasattr(array, "sparse") + and array.dtypes.apply(is_sparse).any() + ): warnings.warn( "pandas.DataFrame with sparse columns found." "It will be converted to a dense numpy array." @@ -781,7 +786,9 @@ def check_X_e( input_name="X", ) - y = _check_y(y, multi_output=multi_output, y_numeric=y_numeric, estimator=estimator) + y = _check_y( + y, multi_output=multi_output, y_numeric=y_numeric, estimator=estimator + ) check_consistent_length(X, y) @@ -847,4 +854,6 @@ def column_or_1d(y, *, warn=False): ) return np.ravel(y) - raise ValueError(f"y should be a 1d array, got an array of shape {shape} instead.") + raise ValueError( + f"y should be a 1d array, got an array of shape {shape} instead." + ) diff --git a/tests/conftest.py b/tests/conftest.py index 52d194e..affc7bd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -122,7 +122,9 @@ def experiments(): ] ).T errors_1 = np.array([True] * n_small + [False] * n_small) - expected_top_slices_1 = np.array([[1, 1, None], [None, 1, 2], [1, None, 2]]) + expected_top_slices_1 = np.array( + [[1, 1, None], [None, 1, 2], [1, None, 2]] + ) experiment_1 = Experiment(X_1, errors_1, expected_top_slices_1) # Experiment 2: Experiment 1 + more columns and different order @@ -154,7 +156,9 @@ def experiments(): ] ).T errors_3 = np.array([1] * n + [0] * n) - expected_top_slices_3 = np.array([[1.0, 1.0, None], [1.0, None, None], [None, 1.0, None]]) + expected_top_slices_3 = np.array( + [[1.0, 1.0, None], [1.0, None, None], [None, 1.0, None]] + ) experiment_3 = Experiment(X_3, errors_3, expected_top_slices_3) # Experiment 4: Experiment 3 + more columns @@ -172,19 +176,28 @@ def experiments(): ).T errors_4 = np.array([1] * n + [0] * n) expected_top_slices_4 = np.array( - [[1.0, 1.0, None, None, None, None], [1.0, None, 3.0, None, None, None]] + [ + [1.0, 1.0, None, None, None, None], + [1.0, None, 3.0, None, None, None], + ] ) experiment_4 = Experiment(X_4, errors_4, expected_top_slices_4) # Experiment 5: Experiment 4 w/ min_sup=50 expected_top_slices_5 = np.array( - [[1.0, 1.0, None, None, None, None], [1.0, None, 3.0, None, None, None]] + [ + [1.0, 1.0, None, None, None, None], + [1.0, None, 3.0, None, None, None], + ] ) experiment_5 = Experiment(X_4, errors_4, expected_top_slices_5, min_sup=50) # Experiment 6: Experiment 4 w/ max_l=1 expected_top_slices_6 = np.array( - [[1.0, None, None, None, None, None], [None, 1.0, None, None, None, None]] + [ + [1.0, None, None, None, None, None], + [None, 1.0, None, None, None, None], + ] ) experiment_6 = Experiment(X_4, errors_4, expected_top_slices_6, max_l=1) @@ -212,7 +225,12 @@ def experiments(): # Experiment 9: Experiment 1 w/ float label np.random.seed(9) errors_9 = ( - np.concatenate([np.random.randint(1, 61, n_small), np.random.randint(41, 101, n_small)]) + np.concatenate( + [ + np.random.randint(1, 61, n_small), + np.random.randint(41, 101, n_small), + ] + ) / 100 ) expected_top_slices_9 = np.array([[2.0, None, None], [2.0, 1.0, None]]) @@ -252,9 +270,16 @@ def experiments(): ).T errors_11 = np.array([1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0]) expected_top_slices_11 = np.array( - [[1.0, 1.0, 1.0, None], [None, 1.0, 1.0, None], [1, None, 1, None], [1, 1, None, None]] + [ + [1.0, 1.0, 1.0, None], + [None, 1.0, 1.0, None], + [1, None, 1, None], + [1, 1, None, None], + ] + ) + experiment_11 = Experiment( + X_11, errors_11, expected_top_slices_11, max_l=3 ) - experiment_11 = Experiment(X_11, errors_11, expected_top_slices_11, max_l=3) # Experiment 12: max_l=4 X_12 = np.array( @@ -276,7 +301,9 @@ def experiments(): [1, 1, 1, None, None], ] ) - experiment_12 = Experiment(X_12, errors_12, expected_top_slices_12, max_l=4) + experiment_12 = Experiment( + X_12, errors_12, expected_top_slices_12, max_l=4 + ) # Experiment 13: mixed types X_13 = np.array( @@ -289,7 +316,9 @@ def experiments(): dtype=object, ).T errors_13 = np.array([1, 1, 1, 1, 0, 0, 0, 0]) - expected_top_slices_13 = np.array([[1, "a", None, None], [None, "a", None, 3]]) + expected_top_slices_13 = np.array( + [[1, "a", None, None], [None, "a", None, 3]] + ) experiment_13 = Experiment(X_13, errors_13, expected_top_slices_13) # Experiment 14: Experiment 4 w/ min_sup=10 @@ -299,19 +328,87 @@ def experiments(): [1.0, None, 3.0, None, None, None], ] ) - experiment_14 = Experiment(X_4, errors_4, expected_top_slices_14, min_sup=10) + experiment_14 = Experiment( + X_4, errors_4, expected_top_slices_14, min_sup=10 + ) # Experiment 15: Experiment 4 w/ alpha=0.5 expected_top_slices_15 = np.empty((0, 6)) - experiment_15 = Experiment(X_4, errors_4, expected_top_slices_15, alpha=0.5) + experiment_15 = Experiment( + X_4, errors_4, expected_top_slices_15, alpha=0.5 + ) # Experiment 16: Experiment with missing parent pruning X_16 = np.array( [ - ["g", "g", "g", "g", "g", "g", "a", "a", "a", "a", "a", "a", "a", "a", "a"], - ["b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "f", "f", "f"], - ["h", "h", "h", "h", "c", "c", "c", "c", "h", "h", "h", "h", "h", "h", "h"], - ["d", "d", "d", "d", "d", "d", "d", "d", "d", "d", "d", "d", "e", "e", "e"], + [ + "g", + "g", + "g", + "g", + "g", + "g", + "a", + "a", + "a", + "a", + "a", + "a", + "a", + "a", + "a", + ], + [ + "b", + "b", + "b", + "b", + "b", + "b", + "b", + "b", + "b", + "b", + "b", + "b", + "f", + "f", + "f", + ], + [ + "h", + "h", + "h", + "h", + "c", + "c", + "c", + "c", + "h", + "h", + "h", + "h", + "h", + "h", + "h", + ], + [ + "d", + "d", + "d", + "d", + "d", + "d", + "d", + "d", + "d", + "d", + "d", + "d", + "e", + "e", + "e", + ], ], dtype=object, ).T @@ -326,10 +423,12 @@ def experiments(): [ [1.0, 1.0, None, None, None, None], [1.0, None, None, None, None, None], - [None, 1.0, None, None, None, None] + [None, 1.0, None, None, None, None], ] ) - experiment_17 = Experiment(X_4, errors_4, expected_top_slices_17, min_sup=0.5) + experiment_17 = Experiment( + X_4, errors_4, expected_top_slices_17, min_sup=0.5 + ) return { "experiment_1": experiment_1, @@ -348,5 +447,5 @@ def experiments(): "experiment_14": experiment_14, "experiment_15": experiment_15, "experiment_16": experiment_16, - "experiment_17": experiment_17 + "experiment_17": experiment_17, } diff --git a/tests/experiment.py b/tests/experiment.py index 53df664..ee9bf19 100644 --- a/tests/experiment.py +++ b/tests/experiment.py @@ -39,7 +39,8 @@ class Experiment: min_sup: int or float, default=10 Minimum support threshold. Inspired by frequent itemset mining, it ensures statistical significance. - If `min_sup` is a float (0 < `min_sup` < 1), it represents the faction of the input dataset (`X`) + If `min_sup` is a float (0 < `min_sup` < 1), + it represents the faction of the input dataset (`X`) verbose: bool, default=True Controls the verbosity. diff --git a/tests/test_slicefinder.py b/tests/test_slicefinder.py index 74ec5c7..3e2d2b6 100644 --- a/tests/test_slicefinder.py +++ b/tests/test_slicefinder.py @@ -13,7 +13,9 @@ def test_dummify(benchmark, basic_test_data): """Test _dummify method.""" array = np.array([1, 3, 5, 6, 7, 8, 13, 15]) computed = benchmark( - basic_test_data["slicefinder_model"]._dummify, array, basic_test_data["n_col_x_encoded"] + basic_test_data["slicefinder_model"]._dummify, + array, + basic_test_data["n_col_x_encoded"], ) assert np.array_equal(computed.A, basic_test_data["slices"].A) @@ -62,13 +64,97 @@ def test_maintain_top_k(benchmark, basic_test_data): def test_score_ub(benchmark, basic_test_data): """Test _score_ub method.""" slice_sizes_ub = np.array( - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 4, 1, 1, 1, 1, 3, 4, 1, 1, 1, 1, 6] + [ + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 3, + 4, + 1, + 1, + 1, + 1, + 3, + 4, + 1, + 1, + 1, + 1, + 6, + ] ) slice_errors_ub = np.array( - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 4] + [ + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 3, + 1, + 1, + 1, + 1, + 1, + 3, + 1, + 1, + 1, + 1, + 4, + ] ) max_slice_errors_ub = np.array( - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] + [ + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + ] ) computed = benchmark( @@ -136,7 +222,10 @@ def test_score(benchmark, basic_test_data): n_row_x_encoded = 8 computed = benchmark( - basic_test_data["slicefinder_model"]._score, slice_sizes, slice_errors, n_row_x_encoded + basic_test_data["slicefinder_model"]._score, + slice_sizes, + slice_errors, + n_row_x_encoded, ) expected = np.array( [ @@ -221,7 +310,9 @@ def test_get_pair_candidates(benchmark, basic_test_data): assert np.array_equal(computed.A, basic_test_data["candidates"].A) -def test_get_pair_candidates_with_missing_parents_pruning(benchmark, basic_test_data): +def test_get_pair_candidates_with_missing_parents_pruning( + benchmark, basic_test_data +): """Test _get_pair_candidates where missing parents are present in pruning.""" slices = sp.csr_matrix( [ @@ -250,7 +341,9 @@ def test_get_pair_candidates_with_missing_parents_pruning(benchmark, basic_test_ ) slicefinder_model_parents_pruning.average_error_ = 0.4 - expected = np.array([[False, False, True, False, False, True, True, False]]) + expected = np.array( + [[False, False, True, False, False, True, True, False]] + ) computed = benchmark( slicefinder_model_parents_pruning._get_pair_candidates, @@ -323,7 +416,9 @@ def test_experiments(benchmark, experiments, experiment_name): experiment.input_errors, ) computed_top_k_slices = slicefinder_model.top_slices_ - assert np.array_equal(computed_top_k_slices, experiment.expected_top_k_slices) + assert np.array_equal( + computed_top_k_slices, experiment.expected_top_k_slices + ) def test_transform(benchmark, basic_test_data): @@ -333,17 +428,23 @@ def test_transform(benchmark, basic_test_data): basic_test_data["X"], basic_test_data["errors"], ) - expected = np.array([[1, 1], [1, 1], [1, 1], [1, 0], [0, 0], [0, 0], [0, 0], [0, 0]]) + expected = np.array( + [[1, 1], [1, 1], [1, 1], [1, 0], [0, 0], [0, 0], [0, 0], [0, 0]] + ) assert np.array_equal(computed, expected) def test_get_slice(benchmark, basic_test_data): """Test get_slice method.""" - basic_test_data["slicefinder_model"].fit(basic_test_data["X"], basic_test_data["errors"]) + basic_test_data["slicefinder_model"].fit( + basic_test_data["X"], basic_test_data["errors"] + ) computed_slice = benchmark( basic_test_data["slicefinder_model"].get_slice, basic_test_data["X"], 0, ) - expected_slice = np.array([[1, 1, 1, 3], [1, 1, 2, 3], [1, 1, 3, 3], [1, 1, 4, 1]]) + expected_slice = np.array( + [[1, 1, 1, 3], [1, 1, 2, 3], [1, 1, 3, 3], [1, 1, 4, 1]] + ) assert np.array_equal(computed_slice, expected_slice) From eeb511edd033804cf856fa50c27532cbd04353e2 Mon Sep 17 00:00:00 2001 From: adedaran Date: Wed, 5 Apr 2023 16:40:42 +0200 Subject: [PATCH 2/2] Fix syntax in flake8 --- pyproject.toml | 1 - setup.cfg | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 21a88b3..59a5b19 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,6 @@ Sphinx = "^4.0.0" sphinx-rtd-theme = "^1.0.0" [tool.black] -build-backend = "poetry.core.masonry.api" line-length = 79 include = '\.pyi?$' diff --git a/setup.cfg b/setup.cfg index 3238fb1..af9603f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [flake8] docstring-convention = numpy format = pylint -ignore = E203 W503 C901 +extend-ignore = E203,W503,C901 max-line-length = 100 max-complexity = 10