From f7245c88a4154f98e371b92b1df201770302bb3d Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 23 Nov 2022 13:35:57 -0500 Subject: [PATCH] feat(ingest): disable stateful_ingestion failsafe by default --- .../source/state/stale_entity_removal_handler.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/state/stale_entity_removal_handler.py b/metadata-ingestion/src/datahub/ingestion/source/state/stale_entity_removal_handler.py index 984b826acfd651..9e6d60e0e6b526 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/state/stale_entity_removal_handler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/state/stale_entity_removal_handler.py @@ -34,10 +34,11 @@ class StatefulStaleMetadataRemovalConfig(StatefulIngestionConfig): description="Soft-deletes the entities present in the last successful run but missing in the current run with stateful_ingestion enabled.", ) fail_safe_threshold: float = pydantic.Field( - default=20.0, + default=100.0, description="Prevents large amount of soft deletes & the state from committing from accidental changes to the source configuration if the relative change percent in entities compared to the previous state is above the 'fail_safe_threshold'.", - le=100.0, # mypy does not work with pydantic.confloat. This is the recommended work-around. + le=100.0, ge=0.0, + hidden_from_schema=True, ) @@ -261,12 +262,16 @@ def gen_removed_entity_workunits(self) -> Iterable[MetadataWorkUnit]: if ( entity_difference_percent > self.stateful_ingestion_config.fail_safe_threshold + # Adding this check to protect against cases where get_percent_entities_changed returns over 100%. + # This previously happened due to a bug in the implementation, which caused this condition to be + # triggered too frequently. + and self.stateful_ingestion_config.fail_safe_threshold < 100.0 ): # Log the failure. This would prevent the current state from getting committed. self.source.get_report().report_failure( "Stateful Ingestion", - f"Fail safe mode triggered, entity difference percent:{entity_difference_percent}" - " > fail_safe_threshold:{self.stateful_ingestion_config.fail_safe_threshold}", + f"Will not soft-delete entities, since we'd be deleting {entity_difference_percent:.1f}% of the existing entities. " + f"To force a deletion, increase the value of 'stateful_ingestion.fail_safe_threshold' (currently {self.stateful_ingestion_config.fail_safe_threshold})", ) # Bail so that we don't emit the stale entity removal workunits. return