From 43af9a27cafe5ebc26a8f7f893b889788cf0e5af Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Mon, 19 Dec 2022 17:08:27 -0500 Subject: [PATCH 1/5] fix(ingest): conditionally include env in assertion guid --- .../src/datahub/ingestion/source/dbt/dbt_common.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index 44338c3d7d1028..4d7e7059ecc61a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -684,6 +684,14 @@ def create_test_entity_mcps( "platform": DBT_PLATFORM, "name": node.dbt_name, "instance": self.config.platform_instance, + **( + # Ideally we'd include the env unconditionally. However, we started out + # not including env in the guid, so we need to maintain backwards compatibility + # with existing PROD assertions. + {} + if self.config.env == mce_builder.DEFAULT_ENV + else {"env": self.config.env} + ), } ) ) From 59754a534e5330be5c8ca4b8f3c6dbe7b051902c Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Mon, 19 Dec 2022 18:07:51 -0500 Subject: [PATCH 2/5] add backcompat flag --- .../src/datahub/ingestion/source/dbt/dbt_common.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index 4d7e7059ecc61a..aef4c668413c9d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -251,7 +251,7 @@ class DBTCommonConfig(StatefulIngestionConfigBase, LineageConfig): ) backcompat_skip_source_on_lineage_edge: bool = Field( False, - description="Prior to version 0.8.41, lineage edges to sources were directed to the target platform node rather than the dbt source node. This contradicted the established pattern for other lineage edges to point to upstream dbt nodes. To revert lineage logic to this legacy approach, set this flag to true.", + description="[deprecated] Prior to version 0.8.41, lineage edges to sources were directed to the target platform node rather than the dbt source node. This contradicted the established pattern for other lineage edges to point to upstream dbt nodes. To revert lineage logic to this legacy approach, set this flag to true.", ) incremental_lineage: bool = Field( @@ -259,6 +259,12 @@ class DBTCommonConfig(StatefulIngestionConfigBase, LineageConfig): default=False, description="When enabled, emits lineage as incremental to existing lineage already in DataHub. When disabled, re-states lineage on each run.", ) + backcompat_no_env_in_assertion_guid: bool = Field( + default=False, + description="[deprecated] Prior to version 0.9.3.4, the assertion GUIDs did not include the environment. This flag can be used to revert to the legacy behavior. " + "Note that this may cause the assertions between different envs to overwrite each other, as the GUIDs will be the same.", + hidden_from_schema=True, + ) stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = pydantic.Field( default=None, description="DBT Stateful Ingestion Config." ) @@ -689,7 +695,7 @@ def create_test_entity_mcps( # not including env in the guid, so we need to maintain backwards compatibility # with existing PROD assertions. {} - if self.config.env == mce_builder.DEFAULT_ENV + if self.config.env == mce_builder.DEFAULT_ENV or self.config.backcompat_no_env_in_assertion_guid else {"env": self.config.env} ), } From 5efb0314377de1813ab68e89184af113883d0fca Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Mon, 19 Dec 2022 18:23:01 -0500 Subject: [PATCH 3/5] lint --- .../src/datahub/ingestion/source/dbt/dbt_common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index aef4c668413c9d..0bdff9b518d868 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -695,7 +695,8 @@ def create_test_entity_mcps( # not including env in the guid, so we need to maintain backwards compatibility # with existing PROD assertions. {} - if self.config.env == mce_builder.DEFAULT_ENV or self.config.backcompat_no_env_in_assertion_guid + if self.config.env == mce_builder.DEFAULT_ENV + or self.config.backcompat_no_env_in_assertion_guid else {"env": self.config.env} ), } From ce7cbdc0eea4e7a146226e3778da483266117cad Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 22 Dec 2022 16:12:32 -0500 Subject: [PATCH 4/5] make this opt-in --- .../src/datahub/ingestion/source/dbt/dbt_common.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index 0bdff9b518d868..d85a19fe957fd0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -259,11 +259,10 @@ class DBTCommonConfig(StatefulIngestionConfigBase, LineageConfig): default=False, description="When enabled, emits lineage as incremental to existing lineage already in DataHub. When disabled, re-states lineage on each run.", ) - backcompat_no_env_in_assertion_guid: bool = Field( + include_env_in_assertion_guid: bool = Field( default=False, - description="[deprecated] Prior to version 0.9.3.4, the assertion GUIDs did not include the environment. This flag can be used to revert to the legacy behavior. " - "Note that this may cause the assertions between different envs to overwrite each other, as the GUIDs will be the same.", - hidden_from_schema=True, + description="Prior to version 0.9.4.2, the assertion GUIDs did not include the environment. If you're using multiple dbt ingestion " + "that are only distinguished by env, then you should set this flag to True.", ) stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = pydantic.Field( default=None, description="DBT Stateful Ingestion Config." @@ -696,7 +695,7 @@ def create_test_entity_mcps( # with existing PROD assertions. {} if self.config.env == mce_builder.DEFAULT_ENV - or self.config.backcompat_no_env_in_assertion_guid + or self.config.include_env_in_assertion_guid else {"env": self.config.env} ), } From 33bdeb1a7579fe1587e8cd43922bc77278b78836 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Fri, 23 Dec 2022 15:36:12 -0500 Subject: [PATCH 5/5] fix condition --- .../src/datahub/ingestion/source/dbt/dbt_common.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index d85a19fe957fd0..adfbfe1c8df2fd 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -693,10 +693,10 @@ def create_test_entity_mcps( # Ideally we'd include the env unconditionally. However, we started out # not including env in the guid, so we need to maintain backwards compatibility # with existing PROD assertions. - {} - if self.config.env == mce_builder.DEFAULT_ENV - or self.config.include_env_in_assertion_guid - else {"env": self.config.env} + {"env": self.config.env} + if self.config.env != mce_builder.DEFAULT_ENV + and self.config.include_env_in_assertion_guid + else {} ), } )