From c5c5f94052257d17c597b25fe37b125f24633dca Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 16 Nov 2022 23:24:37 -0500 Subject: [PATCH] fix(ingest/dbt): disable incremental lineage by default Temporary fix due to an issue in GMS. Can be reverted once that is resolved. --- metadata-ingestion/src/datahub/ingestion/source/dbt.py | 5 +++++ metadata-ingestion/tests/integration/dbt/test_dbt.py | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt.py b/metadata-ingestion/src/datahub/ingestion/source/dbt.py index 85520a32a22141..6408d62ef6ff18 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt.py @@ -307,6 +307,11 @@ class DBTConfig(StatefulIngestionConfigBase, LineageConfig): description="Reference to your github location to enable easy navigation from DataHub to your dbt files.", ) + incremental_lineage: bool = Field( + # Copied from LineageConfig, and changed the default. + default=False, + description="When enabled, emits lineage as incremental to existing lineage already in DataHub. When disabled, re-states lineage on each run.", + ) stateful_ingestion: Optional[DBTStatefulIngestionConfig] = pydantic.Field( default=None, description="DBT Stateful Ingestion Config." ) diff --git a/metadata-ingestion/tests/integration/dbt/test_dbt.py b/metadata-ingestion/tests/integration/dbt/test_dbt.py index 4ff16e0d05693c..c32c5b62c5d55c 100644 --- a/metadata-ingestion/tests/integration/dbt/test_dbt.py +++ b/metadata-ingestion/tests/integration/dbt/test_dbt.py @@ -64,6 +64,8 @@ def set_paths( self.output_path = f"{tmp_path}/{self.output_file}" self.golden_path = f"{test_resources_dir}/{self.golden_file}" + + self.source_config_modifiers.setdefault("incremental_lineage", True) self.source_config = dict( { "manifest_path": self.manifest_path, @@ -298,6 +300,7 @@ def test_dbt_stateful(pytestconfig, tmp_path, mock_time, mock_datahub_graph): # This will bypass check in get_workunits function of dbt.py "write_semantics": "OVERRIDE", "owner_extraction_pattern": r"^@(?P(.*))", + "incremental_lineage": True, # enable stateful ingestion **stateful_config, } @@ -309,6 +312,7 @@ def test_dbt_stateful(pytestconfig, tmp_path, mock_time, mock_datahub_graph): "target_platform": "postgres", "write_semantics": "OVERRIDE", "owner_extraction_pattern": r"^@(?P(.*))", + "incremental_lineage": True, # enable stateful ingestion **stateful_config, } @@ -506,6 +510,7 @@ def test_dbt_tests(pytestconfig, tmp_path, mock_time, **kwargs): ), # this is just here to avoid needing to access datahub server write_semantics="OVERRIDE", + incremental_lineage=True, ), ), sink=DynamicTypedConfig(type="file", config={"filename": str(output_file)}), @@ -555,6 +560,7 @@ def test_dbt_stateful_tests(pytestconfig, tmp_path, mock_time, mock_datahub_grap # This will bypass check in get_workunits function of dbt.py "write_semantics": "OVERRIDE", "owner_extraction_pattern": r"^@(?P(.*))", + "incremental_lineage": True, # enable stateful ingestion **stateful_config, }