diff --git a/pkg/initializer_v2/dataset/huggingface_test.py b/pkg/initializer_v2/dataset/dataset_huggingface_test.py similarity index 100% rename from pkg/initializer_v2/dataset/huggingface_test.py rename to pkg/initializer_v2/dataset/dataset_huggingface_test.py diff --git a/pkg/initializer_v2/dataset/main_test.py b/pkg/initializer_v2/dataset/dataset_main_test.py similarity index 76% rename from pkg/initializer_v2/dataset/main_test.py rename to pkg/initializer_v2/dataset/dataset_main_test.py index bf1b269083..0b3a2ad41b 100644 --- a/pkg/initializer_v2/dataset/main_test.py +++ b/pkg/initializer_v2/dataset/dataset_main_test.py @@ -1,29 +1,9 @@ -import os from unittest.mock import MagicMock, patch import pytest from pkg.initializer_v2.dataset.__main__ import main - - -@pytest.fixture -def mock_env_vars(): - """Fixture to set and clean up environment variables""" - original_env = dict(os.environ) - - def _set_env_vars(**kwargs): - for key, value in kwargs.items(): - if value is None: - os.environ.pop(key, None) - else: - os.environ[key] = str(value) - return os.environ - - yield _set_env_vars - - # Cleanup - os.environ.clear() - os.environ.update(original_env) +from pkg.initializer_v2.utils.utils_test import mock_env_vars # noqa: F401 @pytest.mark.parametrize( @@ -59,16 +39,6 @@ def _set_env_vars(**kwargs): "expected_error": Exception, }, ), - ( - "Config loading failure", - { - "storage_uri": "hf://dataset/path", - "access_token": None, - "mock_config_error": True, - "mock_download_error": False, - "expected_error": Exception, - }, - ), ( "Dataset download failure", { @@ -81,7 +51,7 @@ def _set_env_vars(**kwargs): ), ], ) -def test_dataset_main(test_name, test_case, mock_env_vars): +def test_dataset_main(test_name, test_case, mock_env_vars): # noqa: F811 """Test main script with different scenarios""" print(f"Running test: {test_name}") diff --git a/pkg/initializer_v2/model/huggingface.py b/pkg/initializer_v2/model/huggingface.py index b09a284378..53374ed38d 100644 --- a/pkg/initializer_v2/model/huggingface.py +++ b/pkg/initializer_v2/model/huggingface.py @@ -29,6 +29,8 @@ def download_model(self): if self.config.access_token: huggingface_hub.login(self.config.access_token) + print(f"Model path: {MODEL_PATH}") + print(f"Model URI: {model_uri}") # TODO (andreyvelich): We should consider to follow vLLM approach with allow patterns. # Ref: https://github.com/kubeflow/training-operator/pull/2303#discussion_r1815913663 # TODO (andreyvelich): We should update patterns for Mistral model diff --git a/pkg/initializer_v2/model/huggingface_test.py b/pkg/initializer_v2/model/model_huggingface_test.py similarity index 100% rename from pkg/initializer_v2/model/huggingface_test.py rename to pkg/initializer_v2/model/model_huggingface_test.py diff --git a/pkg/initializer_v2/model/main_test.py b/pkg/initializer_v2/model/model_main_test.py similarity index 76% rename from pkg/initializer_v2/model/main_test.py rename to pkg/initializer_v2/model/model_main_test.py index 6e44600c67..15f74b3923 100644 --- a/pkg/initializer_v2/model/main_test.py +++ b/pkg/initializer_v2/model/model_main_test.py @@ -1,29 +1,9 @@ -import os from unittest.mock import MagicMock, patch import pytest from pkg.initializer_v2.model.__main__ import main - - -@pytest.fixture -def mock_env_vars(): - """Fixture to set and clean up environment variables""" - original_env = dict(os.environ) - - def _set_env_vars(**kwargs): - for key, value in kwargs.items(): - if value is None: - os.environ.pop(key, None) - else: - os.environ[key] = str(value) - return os.environ - - yield _set_env_vars - - # Cleanup - os.environ.clear() - os.environ.update(original_env) +from pkg.initializer_v2.utils.utils_test import mock_env_vars # noqa: F401 @pytest.mark.parametrize( @@ -59,16 +39,6 @@ def _set_env_vars(**kwargs): "expected_error": Exception, }, ), - ( - "Config loading failure", - { - "storage_uri": "hf://model/path", - "access_token": None, - "mock_config_error": True, - "mock_download_error": False, - "expected_error": Exception, - }, - ), ( "Model download failure", { @@ -81,7 +51,7 @@ def _set_env_vars(**kwargs): ), ], ) -def test_model_main(test_name, test_case, mock_env_vars): +def test_model_main(test_name, test_case, mock_env_vars): # noqa: F811 """Test main script with different scenarios""" print(f"Running test: {test_name}") diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/integration/__init__.py b/test/integration/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/integration/initializer_v2/__init__.py b/test/integration/initializer_v2/__init__.py index cae8b68a7e..e69de29bb2 100644 --- a/test/integration/initializer_v2/__init__.py +++ b/test/integration/initializer_v2/__init__.py @@ -1,6 +0,0 @@ -import os -import sys - -sys.path.insert( - 0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../")) -) diff --git a/test/integration/initializer_v2/dataset_test.py b/test/integration/initializer_v2/dataset_test.py index 592482f9a1..f0673fc879 100644 --- a/test/integration/initializer_v2/dataset_test.py +++ b/test/integration/initializer_v2/dataset_test.py @@ -1,10 +1,9 @@ import os import runpy -import shutil -import tempfile +from test.integration.initializer_v2.utils import setup_temp_path # noqa: F401 +from test.integration.initializer_v2.utils import verify_downloaded_files import pytest -from kubeflow.training import DATASET_PATH import pkg.initializer_v2.utils.utils as utils @@ -13,34 +12,8 @@ class TestDatasetIntegration: """Integration tests for dataset initialization""" @pytest.fixture(autouse=True) - def setup_teardown(self, monkeypatch): - """Setup and teardown for each test""" - # Create temporary directory for dataset downloads - current_dir = os.path.dirname(os.path.abspath(__file__)) - self.temp_dir = tempfile.mkdtemp(dir=current_dir) - os.environ[DATASET_PATH] = self.temp_dir - - # Store original environment - self.original_env = dict(os.environ) - - # Monkeypatch the constant in the module - import kubeflow.training as training - - monkeypatch.setattr(training, "DATASET_PATH", self.temp_dir) - - yield - - # Cleanup - shutil.rmtree(self.temp_dir, ignore_errors=True) - os.environ.clear() - os.environ.update(self.original_env) - - def verify_dataset_files(self, expected_files): - """Verify downloaded dataset files""" - if expected_files: - actual_files = set(os.listdir(self.temp_dir)) - missing_files = set(expected_files) - actual_files - assert not missing_files, f"Missing expected files: {missing_files}" + def setup_teardown(self, setup_temp_path): # noqa: F811 + self.temp_dir = setup_temp_path("DATASET_PATH") @pytest.mark.parametrize( "test_name, provider, test_case", @@ -97,6 +70,6 @@ def test_dataset_download(self, test_name, provider, test_case): ) else: runpy.run_module("pkg.initializer_v2.dataset.__main__", run_name="__main__") - self.verify_dataset_files(expected_files) + verify_downloaded_files(self.temp_dir, expected_files) print("Test execution completed") diff --git a/test/integration/initializer_v2/model_test.py b/test/integration/initializer_v2/model_test.py index 11c07ace51..85b4706605 100644 --- a/test/integration/initializer_v2/model_test.py +++ b/test/integration/initializer_v2/model_test.py @@ -1,10 +1,9 @@ import os import runpy -import shutil -import tempfile +from test.integration.initializer_v2.utils import setup_temp_path # noqa: F401 +from test.integration.initializer_v2.utils import verify_downloaded_files import pytest -from kubeflow.training import MODEL_PATH import pkg.initializer_v2.utils.utils as utils @@ -13,34 +12,8 @@ class TestModelIntegration: """Integration tests for model initialization""" @pytest.fixture(autouse=True) - def setup_teardown(self, monkeypatch): - """Setup and teardown for each test""" - # Create temporary directory for model downloads - current_dir = os.path.dirname(os.path.abspath(__file__)) - self.temp_dir = tempfile.mkdtemp(dir=current_dir) - os.environ[MODEL_PATH] = self.temp_dir - - # Store original environment - self.original_env = dict(os.environ) - - # Monkeypatch the constant in the module - import kubeflow.training as training - - monkeypatch.setattr(training, "MODEL_PATH", self.temp_dir) - - yield - - # Cleanup - shutil.rmtree(self.temp_dir, ignore_errors=True) - os.environ.clear() - os.environ.update(self.original_env) - - def verify_model_files(self, expected_files): - """Verify downloaded model files""" - if expected_files: - actual_files = set(os.listdir(self.temp_dir)) - missing_files = set(expected_files) - actual_files - assert not missing_files, f"Missing expected files: {missing_files}" + def setup_teardown(self, setup_temp_path): # noqa: F811 + self.temp_dir = setup_temp_path("MODEL_PATH") @pytest.mark.parametrize( "test_name, provider, test_case", @@ -103,6 +76,6 @@ def test_model_download(self, test_name, provider, test_case): ) else: runpy.run_module("pkg.initializer_v2.model.__main__", run_name="__main__") - self.verify_model_files(expected_files) + verify_downloaded_files(self.temp_dir, expected_files) print("Test execution completed") diff --git a/test/integration/initializer_v2/utils.py b/test/integration/initializer_v2/utils.py new file mode 100644 index 0000000000..4015b89f79 --- /dev/null +++ b/test/integration/initializer_v2/utils.py @@ -0,0 +1,55 @@ +import os +import shutil +import tempfile + +import pytest + + +@pytest.fixture +def setup_temp_path(monkeypatch): + """Creates temporary directory and patches path constant. + + This fixture: + 1. Creates a temporary directory + 2. Allows configuration of path constant + 3. Handles automatic cleanup after tests + 4. Restores original environment state + + Args: + monkeypatch: pytest fixture for modifying objects + + Returns: + function: A configurator that accepts path_var (str) and returns temp_dir path + + Usage: + def test_something(setup_temp_path): + temp_dir = setup_temp_path("MODEL_PATH") + # temp_dir is created and MODEL_PATH is patched + # cleanup happens automatically after test + """ + # Setup + original_env = dict(os.environ) + current_dir = os.path.dirname(os.path.abspath(__file__)) + temp_dir = tempfile.mkdtemp(dir=current_dir) + + def configure_path(path_var: str): + """Configure path variable in kubeflow.training""" + import kubeflow.training as training + + monkeypatch.setattr(training, path_var, temp_dir) + return temp_dir + + yield configure_path + + # Cleanup guaranteed to run after test + shutil.rmtree(temp_dir, ignore_errors=True) + os.environ.clear() + os.environ.update(original_env) + + +def verify_downloaded_files(dir_path, expected_files): + """Verify downloaded files""" + if expected_files: + actual_files = set(os.listdir(dir_path)) + missing_files = set(expected_files) - actual_files + assert not missing_files, f"Missing expected files: {missing_files}"