Skip to content

Commit

Permalink
fix(ingest): stateful-ingestion - keep dataset urn case in checkpoints (
Browse files Browse the repository at this point in the history
  • Loading branch information
treff7es authored Oct 19, 2022
1 parent 0b60225 commit 333598f
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 2 deletions.
6 changes: 6 additions & 0 deletions metadata-ingestion/src/datahub/emitter/mce_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,12 @@ def dataset_urn_to_key(dataset_urn: str) -> Optional[DatasetKeyClass]:
return None


def dataset_key_to_urn(key: DatasetKeyClass) -> str:
return (
f"urn:li:dataset:(urn:li:dataPlatform:{key.platform},{key.name},{key.origin})"
)


def make_container_new_urn(guid: str) -> str:
return f"urn:dh:container:0:({guid})"

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Iterable, List, Set

from datahub.emitter.mce_builder import dataset_urn_to_key, make_dataset_urn
from datahub.emitter.mce_builder import dataset_key_to_urn, dataset_urn_to_key
from datahub.metadata.schema_classes import DatasetKeyClass


class CheckpointStateUtil:
Expand Down Expand Up @@ -35,4 +36,6 @@ def get_dataset_urns_not_in(
)
for encoded_urn in difference:
platform, name, env = encoded_urn.split(CheckpointStateUtil.get_separator())
yield make_dataset_urn(platform, name, env)
yield dataset_key_to_urn(
DatasetKeyClass(platform=platform, name=name, origin=env)
)

0 comments on commit 333598f

Please sign in to comment.