diff --git a/metadata-ingestion/src/datahub/emitter/mcp_builder.py b/metadata-ingestion/src/datahub/emitter/mcp_builder.py index f80435fa7fd92..1ae62a2e88d80 100644 --- a/metadata-ingestion/src/datahub/emitter/mcp_builder.py +++ b/metadata-ingestion/src/datahub/emitter/mcp_builder.py @@ -1,11 +1,15 @@ import hashlib import json -from typing import Any, Iterable, List, Optional, TypeVar +from typing import Any, Dict, Iterable, List, Optional, TypeVar from pydantic.fields import Field from pydantic.main import BaseModel -from datahub.emitter.mce_builder import make_container_urn, make_data_platform_urn +from datahub.emitter.mce_builder import ( + make_container_urn, + make_data_platform_urn, + make_dataplatform_instance_urn, +) from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.metadata.com.linkedin.pegasus2avro.common import DataPlatformInstance @@ -28,23 +32,51 @@ from datahub.utilities.urns.urn import guess_entity_type +def _stable_guid_from_dict(d: dict) -> str: + json_key = json.dumps( + d, + separators=(",", ":"), + sort_keys=True, + cls=DatahubKeyJSONEncoder, + ) + md5_hash = hashlib.md5(json_key.encode("utf-8")) + return str(md5_hash.hexdigest()) + + class DatahubKey(BaseModel): + def guid_dict(self) -> Dict[str, str]: + return self.dict(by_alias=True, exclude_none=True) + def guid(self) -> str: - nonnull_dict = self.dict(by_alias=True, exclude_none=True) - json_key = json.dumps( - nonnull_dict, - separators=(",", ":"), - sort_keys=True, - cls=DatahubKeyJSONEncoder, - ) - md5_hash = hashlib.md5(json_key.encode("utf-8")) - return str(md5_hash.hexdigest()) + bag = self.guid_dict() + return _stable_guid_from_dict(bag) class PlatformKey(DatahubKey): platform: str instance: Optional[str] = None + # BUG: In some of our sources, we incorrectly set the platform instance + # to the env if no platform instance was specified. Now, we have to maintain + # backwards compatibility with this bug, which means generating our GUIDs + # in the same way. Specifically, we need to use the backcompat value if + # the normal instance value is not set. + backcompat_instance_for_guid: Optional[str] = Field(default=None, exclude=True) + + def guid_dict(self) -> Dict[str, str]: + # FIXME: Notice that we can't use exclude_none=True here. This is because + # we need to maintain the insertion order in the dict (so that instance) + # comes before the keys from any subclasses. While the guid computation + # method uses sort_keys=True, we also use the guid_dict method when + # generating custom properties, which are not sorted. + bag = self.dict(by_alias=True, exclude_none=False) + + if self.instance is None: + bag["instance"] = self.backcompat_instance_for_guid + + bag = {k: v for k, v in bag.items() if v is not None} + return bag + class DatabaseKey(PlatformKey): database: str @@ -173,7 +205,7 @@ def gen_containers( aspect=ContainerProperties( name=name, description=description, - customProperties=container_key.dict(exclude_none=True, by_alias=True), + customProperties=container_key.guid_dict(), externalUrl=external_url, qualifiedName=qualified_name, ), @@ -196,6 +228,9 @@ def gen_containers( # entityKeyAspect=ContainerKeyClass(guid=schema_container_key.guid()), aspect=DataPlatformInstance( platform=f"{make_data_platform_urn(container_key.platform)}", + instance=f"{make_dataplatform_instance_urn(container_key.platform, container_key.instance)}" + if container_key.instance + else None, ), ) wu = MetadataWorkUnit( diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py index 8aac56a7ef634..ef349b0085724 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py @@ -923,10 +923,8 @@ def gen_database_key(self, database: str) -> DatabaseKey: return DatabaseKey( database=database, platform=self.platform, - instance=self.source_config.platform_instance - # keeps backward compatibility when platform instance is missed - if self.source_config.platform_instance is not None - else self.source_config.env, + instance=self.source_config.platform_instance, + backcompat_instance_for_guid=self.source_config.env, ) def gen_database_containers( diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py index 8530090f9f503..1418261d8fa0d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py @@ -399,18 +399,16 @@ def gen_dataset_key(self, db_name: str, schema: str) -> PlatformKey: project_id=db_name, dataset_id=schema, platform=self.platform, - instance=self.config.platform_instance - if self.config.platform_instance is not None - else self.config.env, + instance=self.config.platform_instance, + backcompat_instance_for_guid=self.config.env, ) def gen_project_id_key(self, database: str) -> PlatformKey: return ProjectIdKey( project_id=database, platform=self.platform, - instance=self.config.platform_instance - if self.config.platform_instance is not None - else self.config.env, + instance=self.config.platform_instance, + backcompat_instance_for_guid=self.config.env, ) def _gen_domain_urn(self, dataset_name: str) -> Optional[str]: diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/data_lake_utils.py b/metadata-ingestion/src/datahub/ingestion/source/s3/data_lake_utils.py index 5efeb5be3167f..243bdcf7cd325 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/s3/data_lake_utils.py +++ b/metadata-ingestion/src/datahub/ingestion/source/s3/data_lake_utils.py @@ -27,7 +27,8 @@ class ContainerWUCreator: def __init__(self, platform, platform_instance, env): self.processed_containers = [] self.platform = platform - self.instance = env if platform_instance is None else platform_instance + self.instance = platform_instance + self.env = env def create_emit_containers( self, @@ -54,6 +55,7 @@ def gen_folder_key(self, abs_path): return FolderKey( platform=self.platform, instance=self.instance, + backcompat_instance_for_guid=self.env, folder_abs_path=abs_path, ) @@ -61,6 +63,7 @@ def gen_bucket_key(self, name): return S3BucketKey( platform="s3", instance=self.instance, + backcompat_instance_for_guid=self.env, bucket_name=name, ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py index d11f5e512d979..7f18d20ffb39d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py @@ -798,18 +798,16 @@ def gen_schema_key(self, db_name: str, schema: str) -> PlatformKey: database=db_name, schema=schema, platform=self.platform, - instance=self.config.platform_instance - if self.config.platform_instance is not None - else self.config.env, + instance=self.config.platform_instance, + backcompat_instance_for_guid=self.config.env, ) def gen_database_key(self, database: str) -> PlatformKey: return DatabaseKey( database=database, platform=self.platform, - instance=self.config.platform_instance - if self.config.platform_instance is not None - else self.config.env, + instance=self.config.platform_instance, + backcompat_instance_for_guid=self.config.env, ) def _gen_domain_urn(self, dataset_name: str) -> Optional[str]: diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py index 351a066e8f602..594c7857afb35 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py @@ -172,9 +172,8 @@ def gen_schema_key(self, db_name: str, schema: str) -> DatabaseKey: return DatabaseKey( database=schema, platform=self.platform, - instance=self.config.platform_instance - if self.config.platform_instance is not None - else self.config.env, + instance=self.config.platform_instance, + backcompat_instance_for_guid=self.config.env, ) def gen_schema_containers( diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py index b054864ddc764..715f265b6bb66 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py @@ -1181,18 +1181,16 @@ def gen_schema_key(self, db_name: str, schema: str) -> PlatformKey: project_id=db_name, dataset_id=schema, platform=self.platform, - instance=self.config.platform_instance - if self.config.platform_instance is not None - else self.config.env, + instance=self.config.platform_instance, + backcompat_instance_for_guid=self.config.env, ) def gen_database_key(self, database: str) -> PlatformKey: return ProjectIdKey( project_id=database, platform=self.platform, - instance=self.config.platform_instance - if self.config.platform_instance is not None - else self.config.env, + instance=self.config.platform_instance, + backcompat_instance_for_guid=self.config.env, ) def gen_database_containers(self, database: str) -> Iterable[MetadataWorkUnit]: diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py index 6785487ab2057..257c84ff5f2e9 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py @@ -566,18 +566,16 @@ def gen_schema_key(self, db_name: str, schema: str) -> PlatformKey: database=db_name, schema=schema, platform=self.platform, - instance=self.config.platform_instance - if self.config.platform_instance is not None - else self.config.env, + instance=self.config.platform_instance, + backcompat_instance_for_guid=self.config.env, ) def gen_database_key(self, database: str) -> PlatformKey: return DatabaseKey( database=database, platform=self.platform, - instance=self.config.platform_instance - if self.config.platform_instance is not None - else self.config.env, + instance=self.config.platform_instance, + backcompat_instance_for_guid=self.config.env, ) def gen_database_containers(self, database: str) -> Iterable[MetadataWorkUnit]: @@ -605,6 +603,7 @@ def gen_schema_containers( database_container_key = self.gen_database_key(database=db_name) container_workunits = gen_containers( + # TODO: this one is bad schema_container_key, schema, [SqlContainerSubTypes.SCHEMA], diff --git a/metadata-ingestion/tests/integration/clickhouse/clickhouse_mces_golden.json b/metadata-ingestion/tests/integration/clickhouse/clickhouse_mces_golden.json index 3929b10dfb414..8cc83ddd92765 100644 --- a/metadata-ingestion/tests/integration/clickhouse/clickhouse_mces_golden.json +++ b/metadata-ingestion/tests/integration/clickhouse/clickhouse_mces_golden.json @@ -33,7 +33,7 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:clickhouse\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:clickhouse\", \"instance\": \"urn:li:dataPlatformInstance:(urn:li:dataPlatform:clickhouse,clickhousetestserver)\"}", "contentType": "application/json" }, "systemMetadata": { @@ -89,7 +89,7 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:clickhouse\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:clickhouse\", \"instance\": \"urn:li:dataPlatformInstance:(urn:li:dataPlatform:clickhouse,clickhousetestserver)\"}", "contentType": "application/json" }, "systemMetadata": { @@ -172,11 +172,11 @@ "primary_key": "col_Int64", "sampling_key": "", "storage_policy": "default", - "metadata_modification_time": "2022-10-08 00:28:00", + "metadata_modification_time": "2022-10-13 04:56:28", "total_rows": "10", "total_bytes": "671", - "data_paths": "['/var/lib/clickhouse/store/3d0/3d0dc2c7-992b-4867-9bce-9272fff6f4ce/']", - "metadata_path": "/var/lib/clickhouse/store/603/603d6700-1767-42bf-9669-b2b86c3de24e/mv_target_table.sql" + "data_paths": "['/var/lib/clickhouse/store/f12/f12ef1cd-d030-4ce1-aa00-2abe38a0af22/']", + "metadata_path": "/var/lib/clickhouse/store/1df/1df0ba79-5a9d-4f93-8462-1e7056c3aea5/mv_target_table.sql" }, "name": "mv_target_table", "description": "This is target table for materialized view", @@ -340,11 +340,11 @@ "primary_key": "", "sampling_key": "", "storage_policy": "default", - "metadata_modification_time": "2022-10-08 00:28:00", + "metadata_modification_time": "2022-10-13 04:56:28", "total_rows": "0", "total_bytes": "0", - "data_paths": "['/var/lib/clickhouse/store/490/4900ce0f-0c6d-4dad-8317-647e14fa4fee/']", - "metadata_path": "/var/lib/clickhouse/store/603/603d6700-1767-42bf-9669-b2b86c3de24e/test_data_types.sql" + "data_paths": "['/var/lib/clickhouse/store/938/9383ca3f-c98e-4240-9e0d-0f32939d170b/']", + "metadata_path": "/var/lib/clickhouse/store/1df/1df0ba79-5a9d-4f93-8462-1e7056c3aea5/test_data_types.sql" }, "name": "test_data_types", "description": "This table has basic types", @@ -951,11 +951,11 @@ "primary_key": "", "sampling_key": "", "storage_policy": "", - "metadata_modification_time": "2022-10-08 00:28:00", + "metadata_modification_time": "2022-10-13 04:56:28", "total_rows": "None", "total_bytes": "None", "data_paths": "[]", - "metadata_path": "/var/lib/clickhouse/store/603/603d6700-1767-42bf-9669-b2b86c3de24e/test_dict.sql" + "metadata_path": "/var/lib/clickhouse/store/1df/1df0ba79-5a9d-4f93-8462-1e7056c3aea5/test_dict.sql" }, "name": "test_dict", "description": "", @@ -1080,11 +1080,11 @@ "primary_key": "", "sampling_key": "", "storage_policy": "default", - "metadata_modification_time": "2022-10-08 00:28:00", + "metadata_modification_time": "2022-10-13 04:56:28", "total_rows": "0", "total_bytes": "0", - "data_paths": "['/var/lib/clickhouse/store/a35/a358faa5-d2c5-4f95-9557-064345ec20df/']", - "metadata_path": "/var/lib/clickhouse/store/603/603d6700-1767-42bf-9669-b2b86c3de24e/test_nested_data_types.sql" + "data_paths": "['/var/lib/clickhouse/store/f7f/f7f15fe6-10cc-45d6-928e-6eb4ecc7a502/']", + "metadata_path": "/var/lib/clickhouse/store/1df/1df0ba79-5a9d-4f93-8462-1e7056c3aea5/test_nested_data_types.sql" }, "name": "test_nested_data_types", "description": "This table has nested types", @@ -1327,11 +1327,11 @@ "primary_key": "", "sampling_key": "", "storage_policy": "", - "metadata_modification_time": "2022-10-08 00:28:00", + "metadata_modification_time": "2022-10-13 04:56:28", "total_rows": "None", "total_bytes": "None", - "data_paths": "['/var/lib/clickhouse/store/3d0/3d0dc2c7-992b-4867-9bce-9272fff6f4ce/']", - "metadata_path": "/var/lib/clickhouse/store/603/603d6700-1767-42bf-9669-b2b86c3de24e/mv_with_target_table.sql", + "data_paths": "['/var/lib/clickhouse/store/f12/f12ef1cd-d030-4ce1-aa00-2abe38a0af22/']", + "metadata_path": "/var/lib/clickhouse/store/1df/1df0ba79-5a9d-4f93-8462-1e7056c3aea5/mv_with_target_table.sql", "view_definition": "", "is_view": "True" }, @@ -1525,11 +1525,11 @@ "primary_key": "", "sampling_key": "", "storage_policy": "", - "metadata_modification_time": "2022-10-08 00:28:00", + "metadata_modification_time": "2022-10-13 04:56:28", "total_rows": "0", "total_bytes": "0", - "data_paths": "['/var/lib/clickhouse/store/3b4/3b4a7a66-2669-49b9-aa3d-07443d5ace06/']", - "metadata_path": "/var/lib/clickhouse/store/603/603d6700-1767-42bf-9669-b2b86c3de24e/mv_without_target_table.sql", + "data_paths": "['/var/lib/clickhouse/store/6f6/6f6a1b0a-8873-48b2-a370-1fbf5c04eea8/']", + "metadata_path": "/var/lib/clickhouse/store/1df/1df0ba79-5a9d-4f93-8462-1e7056c3aea5/mv_without_target_table.sql", "view_definition": "", "is_view": "True" }, @@ -1723,11 +1723,11 @@ "primary_key": "", "sampling_key": "", "storage_policy": "", - "metadata_modification_time": "2022-10-08 00:28:00", + "metadata_modification_time": "2022-10-13 04:56:28", "total_rows": "None", "total_bytes": "None", "data_paths": "[]", - "metadata_path": "/var/lib/clickhouse/store/603/603d6700-1767-42bf-9669-b2b86c3de24e/test_view.sql", + "metadata_path": "/var/lib/clickhouse/store/1df/1df0ba79-5a9d-4f93-8462-1e7056c3aea5/test_view.sql", "view_definition": "", "is_view": "True" }, diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_allow_table.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_allow_table.json index 4a320e9ef03c2..a4c1ca47c05a1 100644 --- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_allow_table.json +++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_allow_table.json @@ -126,7 +126,7 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:delta-lake\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:delta-lake\", \"instance\": \"urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)\"}", "contentType": "application/json" }, "systemMetadata": { @@ -182,7 +182,7 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:delta-lake\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:delta-lake\", \"instance\": \"urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)\"}", "contentType": "application/json" }, "systemMetadata": { @@ -252,7 +252,7 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:delta-lake\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:delta-lake\", \"instance\": \"urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)\"}", "contentType": "application/json" }, "systemMetadata": { @@ -322,7 +322,7 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:delta-lake\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:delta-lake\", \"instance\": \"urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)\"}", "contentType": "application/json" }, "systemMetadata": { @@ -392,7 +392,7 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:delta-lake\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:delta-lake\", \"instance\": \"urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)\"}", "contentType": "application/json" }, "systemMetadata": { @@ -978,7 +978,7 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:delta-lake\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:delta-lake\", \"instance\": \"urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)\"}", "contentType": "application/json" }, "systemMetadata": { diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_file_without_extension.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_file_without_extension.json index dc2e6ed442675..ddb4f948f25a3 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_file_without_extension.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_file_without_extension.json @@ -140,7 +140,7 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:file\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:file\", \"instance\": \"urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)\"}", "contentType": "application/json" }, "systemMetadata": { @@ -196,7 +196,7 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:file\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:file\", \"instance\": \"urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)\"}", "contentType": "application/json" }, "systemMetadata": { @@ -266,7 +266,7 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:file\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:file\", \"instance\": \"urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)\"}", "contentType": "application/json" }, "systemMetadata": { @@ -336,7 +336,7 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:file\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:file\", \"instance\": \"urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)\"}", "contentType": "application/json" }, "systemMetadata": { @@ -406,7 +406,7 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:file\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:file\", \"instance\": \"urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)\"}", "contentType": "application/json" }, "systemMetadata": { @@ -476,7 +476,7 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:file\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:file\", \"instance\": \"urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)\"}", "contentType": "application/json" }, "systemMetadata": { @@ -546,7 +546,7 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:file\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:file\", \"instance\": \"urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)\"}", "contentType": "application/json" }, "systemMetadata": { @@ -616,7 +616,7 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:file\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:file\", \"instance\": \"urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)\"}", "contentType": "application/json" }, "systemMetadata": { @@ -686,7 +686,7 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:file\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:file\", \"instance\": \"urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)\"}", "contentType": "application/json" }, "systemMetadata": { diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_files.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_files.json index c804a8e9ebc0b..54d628e135b32 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_files.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_files.json @@ -452,7 +452,7 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:file\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:file\", \"instance\": \"urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)\"}", "contentType": "application/json" }, "systemMetadata": { @@ -508,7 +508,7 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:file\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:file\", \"instance\": \"urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)\"}", "contentType": "application/json" }, "systemMetadata": { @@ -578,7 +578,7 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:file\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:file\", \"instance\": \"urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)\"}", "contentType": "application/json" }, "systemMetadata": { @@ -648,7 +648,7 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:file\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:file\", \"instance\": \"urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)\"}", "contentType": "application/json" }, "systemMetadata": { @@ -718,7 +718,7 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:file\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:file\", \"instance\": \"urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)\"}", "contentType": "application/json" }, "systemMetadata": { @@ -788,7 +788,7 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:file\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:file\", \"instance\": \"urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)\"}", "contentType": "application/json" }, "systemMetadata": { @@ -858,7 +858,7 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:file\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:file\", \"instance\": \"urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)\"}", "contentType": "application/json" }, "systemMetadata": { @@ -928,7 +928,7 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:file\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:file\", \"instance\": \"urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)\"}", "contentType": "application/json" }, "systemMetadata": { diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_without_extension.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_without_extension.json index 6f3261b96a277..fbb52b73b2e23 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_without_extension.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_without_extension.json @@ -141,7 +141,7 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:s3\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:s3\", \"instance\": \"urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)\"}", "contentType": "application/json" }, "systemMetadata": { @@ -197,7 +197,7 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:s3\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:s3\", \"instance\": \"urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)\"}", "contentType": "application/json" }, "systemMetadata": { @@ -267,7 +267,7 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:s3\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:s3\", \"instance\": \"urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)\"}", "contentType": "application/json" }, "systemMetadata": { @@ -337,7 +337,7 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:s3\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:s3\", \"instance\": \"urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)\"}", "contentType": "application/json" }, "systemMetadata": { @@ -407,7 +407,7 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:s3\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:s3\", \"instance\": \"urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)\"}", "contentType": "application/json" }, "systemMetadata": { diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json index c1df865bb00c6..3a68e0f853b0e 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json @@ -453,7 +453,7 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:s3\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:s3\", \"instance\": \"urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)\"}", "contentType": "application/json" }, "systemMetadata": { @@ -509,7 +509,7 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:s3\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:s3\", \"instance\": \"urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)\"}", "contentType": "application/json" }, "systemMetadata": { @@ -579,7 +579,7 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:s3\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:s3\", \"instance\": \"urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)\"}", "contentType": "application/json" }, "systemMetadata": { @@ -649,7 +649,7 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:s3\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:s3\", \"instance\": \"urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)\"}", "contentType": "application/json" }, "systemMetadata": { diff --git a/metadata-ingestion/tests/unit/glue/glue_mces_platform_instance_golden.json b/metadata-ingestion/tests/unit/glue/glue_mces_platform_instance_golden.json index ed3bc7626111e..1f77631c936b3 100644 --- a/metadata-ingestion/tests/unit/glue/glue_mces_platform_instance_golden.json +++ b/metadata-ingestion/tests/unit/glue/glue_mces_platform_instance_golden.json @@ -25,7 +25,7 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:glue\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:glue\", \"instance\": \"urn:li:dataPlatformInstance:(urn:li:dataPlatform:glue,some_instance_name)\"}", "contentType": "application/json" } }, @@ -286,7 +286,7 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:glue\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:glue\", \"instance\": \"urn:li:dataPlatformInstance:(urn:li:dataPlatform:glue,some_instance_name)\"}", "contentType": "application/json" } }, diff --git a/metadata-ingestion/tests/unit/test_mcp_builder.py b/metadata-ingestion/tests/unit/test_mcp_builder.py index 0c851fe37f914..6e87d8cbc7e67 100644 --- a/metadata-ingestion/tests/unit/test_mcp_builder.py +++ b/metadata-ingestion/tests/unit/test_mcp_builder.py @@ -36,6 +36,18 @@ def test_guid_generator_with_instance(): assert guid == "f096b3799fc86a3e5d5d0c083eb1f2a4" +def test_guid_generator_with_backcompat_instance(): + key = builder.SchemaKey( + database="test", + schema="Test", + platform="mysql", + instance=None, + backcompat_instance_for_guid="TestInstance", + ) + guid = key.guid() + assert guid == "f096b3799fc86a3e5d5d0c083eb1f2a4" + + def test_guid_generators(): key = builder.SchemaKey( database="test", schema="Test", platform="mysql", instance="TestInstance"