From 333598fda20d2e5902afd034d1da5a65427f1499 Mon Sep 17 00:00:00 2001 From: Tamas Nemeth Date: Wed, 19 Oct 2022 23:03:21 +0200 Subject: [PATCH] fix(ingest): stateful-ingestion - keep dataset urn case in checkpoints (#6244) --- metadata-ingestion/src/datahub/emitter/mce_builder.py | 6 ++++++ .../src/datahub/utilities/checkpoint_state_util.py | 7 +++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/src/datahub/emitter/mce_builder.py b/metadata-ingestion/src/datahub/emitter/mce_builder.py index cb1cc5874d601..bf3069ca5b10e 100644 --- a/metadata-ingestion/src/datahub/emitter/mce_builder.py +++ b/metadata-ingestion/src/datahub/emitter/mce_builder.py @@ -122,6 +122,12 @@ def dataset_urn_to_key(dataset_urn: str) -> Optional[DatasetKeyClass]: return None +def dataset_key_to_urn(key: DatasetKeyClass) -> str: + return ( + f"urn:li:dataset:(urn:li:dataPlatform:{key.platform},{key.name},{key.origin})" + ) + + def make_container_new_urn(guid: str) -> str: return f"urn:dh:container:0:({guid})" diff --git a/metadata-ingestion/src/datahub/utilities/checkpoint_state_util.py b/metadata-ingestion/src/datahub/utilities/checkpoint_state_util.py index d069dedceb9ae..350ac6b40f998 100644 --- a/metadata-ingestion/src/datahub/utilities/checkpoint_state_util.py +++ b/metadata-ingestion/src/datahub/utilities/checkpoint_state_util.py @@ -1,6 +1,7 @@ from typing import Iterable, List, Set -from datahub.emitter.mce_builder import dataset_urn_to_key, make_dataset_urn +from datahub.emitter.mce_builder import dataset_key_to_urn, dataset_urn_to_key +from datahub.metadata.schema_classes import DatasetKeyClass class CheckpointStateUtil: @@ -35,4 +36,6 @@ def get_dataset_urns_not_in( ) for encoded_urn in difference: platform, name, env = encoded_urn.split(CheckpointStateUtil.get_separator()) - yield make_dataset_urn(platform, name, env) + yield dataset_key_to_urn( + DatasetKeyClass(platform=platform, name=name, origin=env) + )