Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(ingest/dbt): disable incremental lineage by default #6467

Merged
merged 1 commit into from
Nov 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions metadata-ingestion/src/datahub/ingestion/source/dbt.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,11 @@ class DBTConfig(StatefulIngestionConfigBase, LineageConfig):
description="Reference to your github location to enable easy navigation from DataHub to your dbt files.",
)

incremental_lineage: bool = Field(
# Copied from LineageConfig, and changed the default.
default=False,
description="When enabled, emits lineage as incremental to existing lineage already in DataHub. When disabled, re-states lineage on each run.",
)
stateful_ingestion: Optional[DBTStatefulIngestionConfig] = pydantic.Field(
default=None, description="DBT Stateful Ingestion Config."
)
Expand Down
6 changes: 6 additions & 0 deletions metadata-ingestion/tests/integration/dbt/test_dbt.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ def set_paths(
self.output_path = f"{tmp_path}/{self.output_file}"

self.golden_path = f"{test_resources_dir}/{self.golden_file}"

self.source_config_modifiers.setdefault("incremental_lineage", True)
self.source_config = dict(
{
"manifest_path": self.manifest_path,
Expand Down Expand Up @@ -298,6 +300,7 @@ def test_dbt_stateful(pytestconfig, tmp_path, mock_time, mock_datahub_graph):
# This will bypass check in get_workunits function of dbt.py
"write_semantics": "OVERRIDE",
"owner_extraction_pattern": r"^@(?P<owner>(.*))",
"incremental_lineage": True,
# enable stateful ingestion
**stateful_config,
}
Expand All @@ -309,6 +312,7 @@ def test_dbt_stateful(pytestconfig, tmp_path, mock_time, mock_datahub_graph):
"target_platform": "postgres",
"write_semantics": "OVERRIDE",
"owner_extraction_pattern": r"^@(?P<owner>(.*))",
"incremental_lineage": True,
# enable stateful ingestion
**stateful_config,
}
Expand Down Expand Up @@ -506,6 +510,7 @@ def test_dbt_tests(pytestconfig, tmp_path, mock_time, **kwargs):
),
# this is just here to avoid needing to access datahub server
write_semantics="OVERRIDE",
incremental_lineage=True,
),
),
sink=DynamicTypedConfig(type="file", config={"filename": str(output_file)}),
Expand Down Expand Up @@ -555,6 +560,7 @@ def test_dbt_stateful_tests(pytestconfig, tmp_path, mock_time, mock_datahub_grap
# This will bypass check in get_workunits function of dbt.py
"write_semantics": "OVERRIDE",
"owner_extraction_pattern": r"^@(?P<owner>(.*))",
"incremental_lineage": True,
# enable stateful ingestion
**stateful_config,
}
Expand Down