From cc6932b8b42e4694141605eb38aa0e75078a0a19 Mon Sep 17 00:00:00 2001 From: Shirshanka Das Date: Tue, 18 Oct 2022 17:40:33 -0700 Subject: [PATCH] changing config name to something more meaningful --- .../src/datahub/ingestion/source/sql/presto_on_hive.py | 10 +++++----- .../integration/presto-on-hive/test_presto_on_hive.py | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py b/metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py index c0389a23768cf..d9274246a5a34 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py @@ -121,9 +121,9 @@ class PrestoOnHiveConfig(BasicSQLAlchemyConfig): description="Dataset Subtype name to be 'Table' or 'View' Valid options: ['True', 'False']", ) - add_db_name_to_urns: bool = Field( + include_catalog_name_in_ids: bool = Field( default=False, - description="Add database name/alias to the generated dataset urns", + description="Add the Presto catalog name (e.g. hive) to the generated dataset urns. `urn:li:dataset:(urn:li:dataPlatform:hive,hive.user.logging_events,PROD)` versus `urn:li:dataset:(urn:li:dataPlatform:hive,user.logging_events,PROD)`", ) def get_sql_alchemy_url(self, uri_opts: Optional[Dict[str, Any]] = None) -> str: @@ -415,7 +415,7 @@ def loop_tables( db_name = self.get_db_name(inspector) schema_name = ( f"{db_name}.{key.schema}" - if self.config.add_db_name_to_urns + if self.config.include_catalog_name_in_ids else key.schema ) @@ -537,7 +537,7 @@ def get_hive_view_columns(self, inspector: Inspector) -> Iterable[ViewDataset]: db_name = self.get_db_name(inspector) schema_name = ( f"{db_name}.{key.schema}" - if self.config.add_db_name_to_urns + if self.config.include_catalog_name_in_ids else key.schema ) dataset_name = self.get_identifier( @@ -575,7 +575,7 @@ def get_presto_view_columns(self, inspector: Inspector) -> Iterable[ViewDataset] db_name = self.get_db_name(inspector) schema_name = ( f"{db_name}.{row['schema']}" - if self.config.add_db_name_to_urns + if self.config.include_catalog_name_in_ids else row["schema"] ) dataset_name = self.get_identifier( diff --git a/metadata-ingestion/tests/integration/presto-on-hive/test_presto_on_hive.py b/metadata-ingestion/tests/integration/presto-on-hive/test_presto_on_hive.py index a80063e09d1db..e4f904ab91e74 100644 --- a/metadata-ingestion/tests/integration/presto-on-hive/test_presto_on_hive.py +++ b/metadata-ingestion/tests/integration/presto-on-hive/test_presto_on_hive.py @@ -53,7 +53,7 @@ def loaded_presto_on_hive(presto_on_hive_runner): @freeze_time(FROZEN_TIME) @pytest.mark.integration_batch_1 @pytest.mark.parametrize( - "mode,use_catalog_subtype,use_dataset_pascalcase_subtype,add_db_name_to_urns,test_suffix", + "mode,use_catalog_subtype,use_dataset_pascalcase_subtype,include_catalog_name_in_ids,test_suffix", [ ("hive", False, False, False, "_1"), ("presto-on-hive", True, True, False, "_2"), @@ -70,7 +70,7 @@ def test_presto_on_hive_ingest( mode, use_catalog_subtype, use_dataset_pascalcase_subtype, - add_db_name_to_urns, + include_catalog_name_in_ids, test_suffix, ): @@ -94,7 +94,7 @@ def test_presto_on_hive_ingest( "scheme": "postgresql+psycopg2", "include_views": True, "include_tables": True, - "add_db_name_to_urns": add_db_name_to_urns, + "include_catalog_name_in_ids": include_catalog_name_in_ids, "schema_pattern": {"allow": ["^public"]}, "mode": mode, "use_catalog_subtype": use_catalog_subtype,