diff --git a/metadata-ingestion/src/datahub/configuration/config_loader.py b/metadata-ingestion/src/datahub/configuration/config_loader.py index 513c15ee444268..c36faae8c70372 100644 --- a/metadata-ingestion/src/datahub/configuration/config_loader.py +++ b/metadata-ingestion/src/datahub/configuration/config_loader.py @@ -2,7 +2,8 @@ import pathlib import re import sys -from typing import Any, Dict, Union +import unittest.mock +from typing import Any, Dict, Set, Union from expandvars import UnboundVariable, expandvars @@ -51,6 +52,19 @@ def resolve_env_variables(config: dict) -> dict: return new_dict +def list_referenced_env_variables(config: dict) -> Set[str]: + # This is a bit of a hack, but expandvars does a bunch of escaping + # and other logic that we don't want to duplicate here. + + with unittest.mock.patch("expandvars.getenv") as mock_getenv: + mock_getenv.return_value = "mocked_value" + + resolve_env_variables(config) + + calls = mock_getenv.mock_calls + return set([call[1][0] for call in calls]) + + def load_config_file( config_file: Union[pathlib.Path, str], squirrel_original_config: bool = False, @@ -63,8 +77,7 @@ def load_config_file( config_mech = YamlConfigurationMechanism() raw_config_file = sys.stdin.read() else: - if isinstance(config_file, str): - config_file = pathlib.Path(config_file) + config_file = pathlib.Path(config_file) if not config_file.is_file(): raise ConfigurationError(f"Cannot open config file {config_file}") @@ -74,9 +87,7 @@ def load_config_file( config_mech = TomlConfigurationMechanism() else: raise ConfigurationError( - "Only .toml and .yml are supported. Cannot process file type {}".format( - config_file.suffix - ) + f"Only .toml and .yml are supported. Cannot process file type {config_file.suffix}" ) raw_config_file = config_file.read_text() diff --git a/metadata-ingestion/tests/unit/config/test_config_loader.py b/metadata-ingestion/tests/unit/config/test_config_loader.py index 23d344c0e61987..e29aa3b0b582c3 100644 --- a/metadata-ingestion/tests/unit/config/test_config_loader.py +++ b/metadata-ingestion/tests/unit/config/test_config_loader.py @@ -3,20 +3,24 @@ import expandvars import pytest +import yaml from datahub.configuration.common import ConfigurationError -from datahub.configuration.config_loader import load_config_file +from datahub.configuration.config_loader import ( + list_referenced_env_variables, + load_config_file, +) @pytest.mark.parametrize( - "filename,golden_config,env,error_type", + "filename,golden_config,env,referenced_env_vars", [ ( # Basic YAML load "tests/unit/config/basic.yml", {"foo": "bar", "nested": {"array": ["one", "two"], "hi": "hello"}}, {}, - None, + set(), ), ( # Basic TOML load @@ -38,35 +42,7 @@ "VAR1": "stuff1", "VAR2": "stuff2", }, - None, - ), - ( - # Variable expansion error - "tests/unit/config/bad_variable_expansion.yml", - None, - {}, - expandvars.ParameterNullOrNotSet, - ), - ( - # Missing file - "tests/unit/config/this_file_does_not_exist.yml", - None, - {}, - ConfigurationError, - ), - ( - # Unknown extension - "tests/unit/config/bad_extension.whatevenisthis", - None, - {}, - ConfigurationError, - ), - ( - # Variable expansion error - "tests/unit/config/bad_complex_variable_expansion.yml", - None, - {}, - expandvars.MissingClosingBrace, + set(["VAR1", "UNSET_VAR3", "VAR2"]), ), ( "tests/unit/config/complex_variable_expansion.yml", @@ -121,17 +97,71 @@ "VAR10": "stuff10", "VAR11": "stuff11", }, - None, + set( + [ + "VAR1", + "VAR2", + "VAR3", + "VAR4", + "VAR5", + "VAR6", + "VAR7", + "VAR8", + "VAR9", + "VAR10", + # VAR11 is escaped and hence not referenced + "VARNONEXISTENT", + ] + ), + ), + ], +) +def test_load_success(pytestconfig, filename, golden_config, env, referenced_env_vars): + filepath = pytestconfig.rootpath / filename + + if referenced_env_vars is not None: + raw_config = yaml.safe_load(filepath.read_text()) + assert list_referenced_env_variables(raw_config) == referenced_env_vars + + with mock.patch.dict(os.environ, env): + loaded_config = load_config_file(filepath) + assert loaded_config == golden_config + + # TODO check referenced env vars + + +@pytest.mark.parametrize( + "filename,env,error_type", + [ + ( + # Variable expansion error + "tests/unit/config/bad_variable_expansion.yml", + {}, + expandvars.ParameterNullOrNotSet, + ), + ( + # Missing file + "tests/unit/config/this_file_does_not_exist.yml", + {}, + ConfigurationError, + ), + ( + # Unknown extension + "tests/unit/config/bad_extension.whatevenisthis", + {}, + ConfigurationError, + ), + ( + # Variable expansion error + "tests/unit/config/bad_complex_variable_expansion.yml", + {}, + expandvars.MissingClosingBrace, ), ], ) -def test_load(pytestconfig, filename, golden_config, env, error_type): +def test_load_error(pytestconfig, filename, env, error_type): filepath = pytestconfig.rootpath / filename with mock.patch.dict(os.environ, env): - if error_type: - with pytest.raises(error_type): - _ = load_config_file(filepath) - else: - loaded_config = load_config_file(filepath) - assert loaded_config == golden_config + with pytest.raises(error_type): + _ = load_config_file(filepath)