combine integration and release tests (#482)

Category: test JIRA issue: MIC-5642 Combine integration and release tests into one folder. Update some variable naming to be more readable. Error out if user passes both --runslow and --release. Testing Ran pytest with no flags, with --runslow, with --release, and with both.
ihmeuw · Jan 6, 2025 · e313609 · e313609
1 parent 48e6d3b
commit e313609
Show file tree

Hide file tree

Showing 8 changed files with 54 additions and 159 deletions.
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -46,11 +46,19 @@ def pytest_configure(config: Config) -> None:
 
 
 def pytest_collection_modifyitems(config: Config, items: list[Function]) -> None:
+    if config.getoption("--release") and config.getoption("--runslow"):
+        raise ValueError("You cannot run the release tests and slow tests simultaneously.")
     skip_release = pytest.mark.skip(reason="need --release to run")
+    skip_non_release = pytest.mark.skip(reason="only running release tests")
     if not config.getoption("--release"):
         for item in items:
-            if "release" in item.keywords:
+            parametrized_test_name = [x for x in item.keywords][0]
+            if "release" in item.keywords and "test_slow_tests" not in parametrized_test_name:
                 item.add_marker(skip_release)
+    else:
+        for item in items:
+            if "release" not in item.keywords:
+                item.add_marker(skip_non_release)
 
     if config.getoption("--runslow"):
         # --runslow given in cli: do not skip slow tests
@@ -105,50 +113,3 @@ def fuzzy_checker(output_directory: Path) -> Generator[FuzzyChecker, None, None]
     yield checker
 
     checker.save_diagnostic_output(output_directory)
-
-
-@pytest.fixture(scope="session")
-def config() -> dict[str, Any]:
-    """Returns a custom configuration dict to be used in noising"""
-    ROW_PROBABILITY = 0.05
-    config = get_configuration().to_dict()  # default config
-
-    # Increase row noise probabilities to 5% and column cell_probabilities to 25%
-    for dataset_name in config:
-        dataset_schema = DATASET_SCHEMAS.get_dataset_schema(dataset_name)
-        config[dataset_schema.name][Keys.ROW_NOISE] = {
-            noise_type.name: {
-                Keys.ROW_PROBABILITY: ROW_PROBABILITY,
-            }
-            for noise_type in dataset_schema.row_noise_types
-            if noise_type != NOISE_TYPES.duplicate_with_guardian
-        }
-        for col in [c for c in dataset_schema.columns if c.noise_types]:
-            config[dataset_name][Keys.COLUMN_NOISE][col.name] = {
-                noise_type.name: {
-                    Keys.CELL_PROBABILITY: CELL_PROBABILITY,
-                }
-                for noise_type in col.noise_types
-            }
-
-    # FIXME: Remove when record_id is added as the truth deck for datasets.
-    # For integration tests, we will NOT duplicate rows with guardian duplication.
-    # This is because we want to be able to compare the noised and unnoised data
-    # and a big assumption we make is that simulant_id and household_id are the
-    # truth decks in our datasets.
-    config[DATASET_SCHEMAS.census.name][Keys.ROW_NOISE][
-        NOISE_TYPES.duplicate_with_guardian.name
-    ] = {
-        Keys.ROW_PROBABILITY_IN_HOUSEHOLDS_UNDER_18: 0.0,
-        Keys.ROW_PROBABILITY_IN_COLLEGE_GROUP_QUARTERS_UNDER_24: 0.0,
-    }
-    # Update SSA dataset to noise 'ssn' but NOT noise 'ssa_event_type' since that
-    # will be used as an identifier along with simulant_id
-    # TODO: Noise ssa_event_type when record IDs are implemented (MIC-4039)
-    config[DATASET_SCHEMAS.ssa.name][Keys.COLUMN_NOISE][COLUMNS.ssa_event_type.name] = {
-        noise_type.name: {
-            Keys.CELL_PROBABILITY: 0,
-        }
-        for noise_type in COLUMNS.ssa_event_type.noise_types
-    }
-    return config
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
@@ -122,7 +122,7 @@ def split_sample_data_dir_state_edit(
     return Path(split_sample_data_dir_state_edit)
 
 
-@pytest.fixture(scope="module")
+@pytest.fixture(scope="session")
 def config() -> dict[str, Any]:
     """Returns a custom configuration dict to be used in noising"""
     config = get_configuration().to_dict()  # default config

diff --git a/tests/release/__init__.py → tests/integration/release/__init__.py b/tests/release/__init__.py → tests/integration/release/__init__.py
diff --git a/tests/release/conftest.py → tests/integration/release/conftest.py b/tests/release/conftest.py → tests/integration/release/conftest.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import functools
 import os
 import time
@@ -132,14 +134,16 @@ def dataset_params(
 
 
 @pytest.fixture(scope="session")
-def data(
+def noised_data(
     dataset_params: tuple[str | int | Callable[..., pd.DataFrame] | None, ...],
     release_output_dir: Path,
+    request: pytest.FixtureRequest,
     config: dict[str, Any],
 ) -> pd.DataFrame:
     _, dataset_func, source, year, state, engine = dataset_params
 
-    if source is None:
+    run_slow = request.config.getoption("--runslow")
+    if run_slow:  # get sample data
         return dataset_func(seed=SEED, year=None, config=config)  # type: ignore [misc, operator]
 
     kwargs = {
@@ -150,19 +154,23 @@ def data(
     }
     if dataset_func != generate_social_security:
         kwargs["state"] = state
-    return profile_data_generation(release_output_dir)(dataset_func)(**kwargs)
+    noised_data = profile_data_generation(release_output_dir)(dataset_func)(**kwargs)
+    if engine == "dask":
+        # mypy expects noised_data to be a series rather than dask object
+        noised_data = noised_data.compute()  # type: ignore [operator]
+    return noised_data
 
 
 @pytest.fixture(scope="session")
 def unnoised_dataset(
     dataset_params: tuple[str | int | Callable[..., pd.DataFrame] | None, ...],
     request: pytest.FixtureRequest,
-    config: dict[str, Any],
 ) -> Dataset:
     dataset_arg, dataset_func, source, year, state, engine = dataset_params
     dataset_name = DATASET_ARG_TO_FULL_NAME_MAPPER[dataset_arg]  # type: ignore [index]
 
-    if source is None:
+    run_slow = request.config.getoption("--runslow")
+    if run_slow:  # get sample data
         return initialize_dataset_with_sample(dataset_name)
 
     kwargs = {
@@ -174,7 +182,9 @@ def unnoised_dataset(
     if dataset_func != generate_social_security:
         kwargs["state"] = state
     unnoised_data = dataset_func(**kwargs)  # type: ignore [misc, operator]
-
+    if engine == "dask":
+        # mypy expects unnoised_data to be a series rather than dask object
+        unnoised_data = unnoised_data.compute()  # type: ignore [operator]
     dataset_schema = DATASET_SCHEMAS.get_dataset_schema(dataset_name)
     return Dataset(dataset_schema, unnoised_data, SEED)
 

diff --git a/tests/release/test_fixture_args.py → .../integration/release/test_fixture_args.py b/tests/release/test_fixture_args.py → .../integration/release/test_fixture_args.py
@@ -15,7 +15,7 @@
     generate_taxes_w2_and_1099,
     generate_women_infants_and_children,
 )
-from tests.release.conftest import (
+from tests.integration.release.conftest import (
     CLI_DEFAULT_ENGINE,
     CLI_DEFAULT_STATE,
     CLI_DEFAULT_YEAR,

diff --git a/tests/release/test_release.py → tests/integration/release/test_release.py b/tests/release/test_release.py → tests/integration/release/test_release.py
@@ -2,10 +2,14 @@
 
 from typing import Any
 
+import pandas as pd
+import pytest
 from _pytest.fixtures import FixtureRequest
 from vivarium_testing_utils import FuzzyChecker
 
+from pseudopeople.dataset import Dataset
 from pseudopeople.schema_entities import COLUMNS, DATASET_SCHEMAS
+from tests.constants import DATASET_GENERATION_FUNCS
 from tests.integration.conftest import IDX_COLS, _get_common_datasets, get_unnoised_data
 from tests.utilities import (
     initialize_dataset_with_sample,
@@ -15,30 +19,32 @@
 
 
 def test_column_noising(
+    unnoised_dataset: Dataset,
+    noised_data: pd.DataFrame,
     config: dict[str, Any],
     dataset_name: str,
-    request: FixtureRequest,
     fuzzy_checker: FuzzyChecker,
 ) -> None:
     """Tests that columns are noised as expected"""
-    original = request.getfixturevalue("unnoised_dataset")
-    noised_data = request.getfixturevalue("data")
-
-    check_noised, check_original, shared_idx = _get_common_datasets(original, noised_data)
+    check_noised, check_original, shared_idx = _get_common_datasets(
+        unnoised_dataset, noised_data
+    )
 
     run_column_noising_tests(
         dataset_name, config, fuzzy_checker, check_noised, check_original, shared_idx
     )
 
 
 def test_row_noising_omit_row_or_do_not_respond(
-    dataset_name: str, config: dict[str, Any], request: FixtureRequest
+    noised_data: pd.DataFrame,
+    dataset_name: str,
+    config: dict[str, Any],
+    request: FixtureRequest,
 ) -> None:
     """Tests that omit_row and do_not_respond row noising are being applied"""
     idx_cols = IDX_COLS.get(dataset_name)
     original = get_unnoised_data(dataset_name)
     original_data = original.data.set_index(idx_cols)
-    noised_data = request.getfixturevalue("data")
     noised_data = noised_data.set_index(idx_cols)
 
     run_omit_row_or_do_not_respond_tests(dataset_name, config, original_data, noised_data)
@@ -52,7 +58,7 @@ def test_unnoised_id_cols(dataset_name: str, request: FixtureRequest) -> None:
     if dataset_name != DATASET_SCHEMAS.ssa.name:
         unnoised_id_cols.append(COLUMNS.household_id.name)
     original = initialize_dataset_with_sample(dataset_name)
-    noised_data = request.getfixturevalue("data")
+    noised_data = request.getfixturevalue("noised_data")
     check_noised, check_original, _ = _get_common_datasets(original, noised_data)
     assert (
         (

diff --git a/tests/release/test_runner.py → tests/integration/release/test_runner.py b/tests/release/test_runner.py → tests/integration/release/test_runner.py
@@ -17,9 +17,17 @@
         # (["--dataset", "wic", "--population", "USA", "--state", "RI", "--year", "2015"]),
     ],
 )
-def test_runner(pytest_args: list[str]) -> None:
+def test_release_tests(pytest_args: list[str]) -> None:
     os.chdir(Path(__file__).parent)  # need this to access cli options from conftest.py
     base_cmd = ["pytest", "--release", "test_release.py"]
     cmd = base_cmd + pytest_args
     result = subprocess.run(cmd, capture_output=True, text=True)
     assert result.returncode == 0
+
+
+@pytest.mark.parametrize("dataset", ["acs", "cps"])
+def test_slow_tests(dataset: str) -> None:
+    os.chdir(Path(__file__).parent)  # need this to access cli options from conftest.py
+    cmd = ["pytest", "--runslow", "test_release.py", "--dataset", dataset]
+    result = subprocess.run(cmd, capture_output=True, text=True)
+    assert result.returncode == 0
diff --git a/tests/integration/test_interface.py b/tests/integration/test_interface.py
@@ -224,96 +224,6 @@ def test_column_dtypes(
         assert noised_data[col.name].dtype == expected_dtype
 
 
-@pytest.mark.parametrize(
-    "dataset_name",
-    [
-        DATASET_SCHEMAS.census.name,
-        DATASET_SCHEMAS.acs.name,
-        DATASET_SCHEMAS.cps.name,
-        DATASET_SCHEMAS.ssa.name,
-        DATASET_SCHEMAS.tax_w2_1099.name,
-        DATASET_SCHEMAS.wic.name,
-        DATASET_SCHEMAS.tax_1040.name,
-    ],
-)
-@pytest.mark.parametrize(
-    "engine",
-    [
-        "pandas",
-        "dask",
-    ],
-)
-def test_column_noising(
-    dataset_name: str,
-    engine: str,
-    config: dict[str, Any],
-    request: FixtureRequest,
-    fuzzy_checker: FuzzyChecker,
-) -> None:
-    """Tests that columns are noised as expected"""
-    if "TODO" in dataset_name:
-        pytest.skip(reason=dataset_name)
-    original = initialize_dataset_with_sample(dataset_name)
-    if engine == "dask":
-        generation_function = DATASET_GENERATION_FUNCS[dataset_name]
-        noised_data = generation_function(
-            seed=SEED,
-            year=None,
-            config=config,
-            engine=engine,
-        ).compute()
-    else:
-        noised_data = request.getfixturevalue(f"noised_sample_data_{dataset_name}")
-    check_noised, check_original, shared_idx = _get_common_datasets(original, noised_data)
-
-    run_column_noising_tests(
-        dataset_name, config, fuzzy_checker, check_noised, check_original, shared_idx
-    )
-
-
-@pytest.mark.parametrize(
-    "dataset_name",
-    [
-        DATASET_SCHEMAS.census.name,
-        DATASET_SCHEMAS.acs.name,
-        DATASET_SCHEMAS.cps.name,
-        DATASET_SCHEMAS.ssa.name,
-        DATASET_SCHEMAS.tax_w2_1099.name,
-        DATASET_SCHEMAS.wic.name,
-        DATASET_SCHEMAS.tax_1040.name,
-    ],
-)
-@pytest.mark.parametrize(
-    "engine",
-    [
-        "pandas",
-        "dask",
-    ],
-)
-def test_row_noising_omit_row_or_do_not_respond(
-    dataset_name: str, engine: str, config: dict[str, Any], request: FixtureRequest
-) -> None:
-    """Tests that omit_row and do_not_respond row noising are being applied"""
-    if "TODO" in dataset_name:
-        pytest.skip(reason=dataset_name)
-    idx_cols = IDX_COLS.get(dataset_name)
-    original = get_unnoised_data(dataset_name)
-    original_data = original.data.set_index(idx_cols)
-    if engine == "dask":
-        generation_function = DATASET_GENERATION_FUNCS[dataset_name]
-        noised_data = generation_function(
-            seed=SEED,
-            year=None,
-            config=config,
-            engine=engine,
-        ).compute()
-    else:
-        noised_data = request.getfixturevalue(f"noised_sample_data_{dataset_name}")
-    noised_data = noised_data.set_index(idx_cols)
-
-    run_omit_row_or_do_not_respond_tests(dataset_name, config, original_data, noised_data)
-
-
 @pytest.mark.skip(reason="TODO: Implement duplication row noising")
 @pytest.mark.parametrize(
     "dataset_name",
@@ -358,16 +268,16 @@ def test_dataset_filter_by_year(
         pytest.skip(reason=dataset_name)
     year = 2030  # not default 2020
 
-    # Generate a new (non-fixture) noised dataset for a single year but mocked such
+    # Generate a new (non-fixture) dataset for a single year but mocked such
     # that no noise actually happens (otherwise the years would get noised and
     # we couldn't tell if the filter was working properly)
     mocker.patch("pseudopeople.dataset.Dataset._noise_dataset")
     generation_function = DATASET_GENERATION_FUNCS[dataset_name]
-    noised_data = generation_function(year=year, engine=engine)
+    data = generation_function(year=year, engine=engine)
     if engine == "dask":
-        noised_data = noised_data.compute()
+        data = data.compute()
     dataset = DATASET_SCHEMAS.get_dataset_schema(dataset_name)
-    assert (noised_data[dataset.date_column_name] == year).all()
+    assert (data[dataset.date_column_name] == year).all()
 
 
 @pytest.mark.parametrize(