From 30c4fa91ce4a5721a58ab872a0e72a65d6fa3985 Mon Sep 17 00:00:00 2001 From: Andrew Sikowitz Date: Wed, 21 Aug 2024 11:53:47 -0700 Subject: [PATCH] feat(ingest/mongodb): Ingest databases as containers (#11178) Co-authored-by: david-leifker <114954101+david-leifker@users.noreply.github.com> --- .../src/datahub/ingestion/source/mongodb.py | 180 ++++++------ .../mongodb/mongodb_mces_golden.json | 258 ++++++++++++++++++ ...mongodb_mces_small_schema_size_golden.json | 258 ++++++++++++++++++ 3 files changed, 618 insertions(+), 78 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py index 7ce3b5bc34da2..e4dadaf602852 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py @@ -18,9 +18,13 @@ from datahub.emitter.mce_builder import ( make_data_platform_urn, make_dataplatform_instance_urn, - make_dataset_urn_with_platform_instance, ) from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.emitter.mcp_builder import ( + DatabaseKey, + add_dataset_to_container, + gen_containers, +) from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.decorators import ( SourceCapability, @@ -32,6 +36,7 @@ ) from datahub.ingestion.api.source import MetadataWorkUnitProcessor from datahub.ingestion.api.workunit import MetadataWorkUnit +from datahub.ingestion.source.common.subtypes import DatasetContainerSubTypes from datahub.ingestion.source.schema_inference.object import ( SchemaDescription, construct_schema, @@ -64,6 +69,7 @@ DataPlatformInstanceClass, DatasetPropertiesClass, ) +from datahub.metadata.urns import DatasetUrn logger = logging.getLogger(__name__) @@ -263,6 +269,7 @@ class MongoDBSource(StatefulIngestionSourceBase): config: MongoDBConfig report: MongoDBSourceReport mongo_client: MongoClient + platform: str = "mongodb" def __init__(self, ctx: PipelineContext, config: MongoDBConfig): super().__init__(config, ctx) @@ -282,7 +289,9 @@ def __init__(self, ctx: PipelineContext, config: MongoDBConfig): } # See https://pymongo.readthedocs.io/en/stable/examples/datetimes.html#handling-out-of-range-datetimes - self.mongo_client = MongoClient(self.config.connect_uri, datetime_conversion="DATETIME_AUTO", **options) # type: ignore + self.mongo_client = MongoClient( + self.config.connect_uri, datetime_conversion="DATETIME_AUTO", **options + ) # type: ignore # This cheaply tests the connection. For details, see # https://pymongo.readthedocs.io/en/stable/api/pymongo/mongo_client.html#pymongo.mongo_client.MongoClient @@ -351,8 +360,6 @@ def get_field_type( return SchemaFieldDataType(type=TypeClass()) def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: - platform = "mongodb" - database_names: List[str] = self.mongo_client.list_database_names() # traverse databases in sorted order so output is consistent @@ -364,8 +371,19 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: continue database = self.mongo_client[database_name] - collection_names: List[str] = database.list_collection_names() + database_key = DatabaseKey( + database=database_name, + platform=self.platform, + instance=self.config.platform_instance, + env=self.config.env, + ) + yield from gen_containers( + container_key=database_key, + name=database_name, + sub_types=[DatasetContainerSubTypes.DATABASE], + ) + collection_names: List[str] = database.list_collection_names() # traverse collections in sorted order so output is consistent for collection_name in sorted(collection_names): dataset_name = f"{database_name}.{collection_name}" @@ -374,9 +392,9 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: self.report.report_dropped(dataset_name) continue - dataset_urn = make_dataset_urn_with_platform_instance( - platform=platform, - name=dataset_name, + dataset_urn = DatasetUrn.create_from_ids( + platform_id=self.platform, + table_name=dataset_name, env=self.config.env, platform_instance=self.config.platform_instance, ) @@ -385,9 +403,9 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: data_platform_instance = None if self.config.platform_instance: data_platform_instance = DataPlatformInstanceClass( - platform=make_data_platform_urn(platform), + platform=make_data_platform_urn(self.platform), instance=make_dataplatform_instance_urn( - platform, self.config.platform_instance + self.platform, self.config.platform_instance ), ) @@ -397,83 +415,21 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: ) schema_metadata: Optional[SchemaMetadata] = None - if self.config.enableSchemaInference: - assert self.config.maxDocumentSize is not None - collection_schema = construct_schema_pymongo( - database[collection_name], - delimiter=".", - use_random_sampling=self.config.useRandomSampling, - max_document_size=self.config.maxDocumentSize, - should_add_document_size_filter=self.should_add_document_size_filter(), - sample_size=self.config.schemaSamplingSize, - ) - - # initialize the schema for the collection - canonical_schema: List[SchemaField] = [] - max_schema_size = self.config.maxSchemaSize - collection_schema_size = len(collection_schema.values()) - collection_fields: Union[ - List[SchemaDescription], ValuesView[SchemaDescription] - ] = collection_schema.values() - assert max_schema_size is not None - if collection_schema_size > max_schema_size: - # downsample the schema, using frequency as the sort key - self.report.report_warning( - title="Too many schema fields", - message=f"Downsampling the collection schema because it has too many schema fields. Configured threshold is {max_schema_size}", - context=f"Schema Size: {collection_schema_size}, Collection: {dataset_urn}", - ) - # Add this information to the custom properties so user can know they are looking at downsampled schema - dataset_properties.customProperties[ - "schema.downsampled" - ] = "True" - dataset_properties.customProperties[ - "schema.totalFields" - ] = f"{collection_schema_size}" - - logger.debug( - f"Size of collection fields = {len(collection_fields)}" - ) - # append each schema field (sort so output is consistent) - for schema_field in sorted( - collection_fields, - key=lambda x: ( - -x["count"], - x["delimited_name"], - ), # Negate `count` for descending order, `delimited_name` stays the same for ascending - )[0:max_schema_size]: - field = SchemaField( - fieldPath=schema_field["delimited_name"], - nativeDataType=self.get_pymongo_type_string( - schema_field["type"], dataset_name - ), - type=self.get_field_type( - schema_field["type"], dataset_name - ), - description=None, - nullable=schema_field["nullable"], - recursive=False, - ) - canonical_schema.append(field) - - # create schema metadata object for collection - schema_metadata = SchemaMetadata( - schemaName=collection_name, - platform=f"urn:li:dataPlatform:{platform}", - version=0, - hash="", - platformSchema=SchemalessClass(), - fields=canonical_schema, + schema_metadata = self._infer_schema_metadata( + collection=database[collection_name], + dataset_urn=dataset_urn, + dataset_properties=dataset_properties, ) # TODO: use list_indexes() or index_information() to get index information # See https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.list_indexes. + yield from add_dataset_to_container(database_key, dataset_urn.urn()) yield from [ mcp.as_workunit() for mcp in MetadataChangeProposalWrapper.construct_many( - entityUrn=dataset_urn, + entityUrn=dataset_urn.urn(), aspects=[ schema_metadata, dataset_properties, @@ -482,6 +438,74 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: ) ] + def _infer_schema_metadata( + self, + collection: pymongo.collection.Collection, + dataset_urn: DatasetUrn, + dataset_properties: DatasetPropertiesClass, + ) -> SchemaMetadata: + assert self.config.maxDocumentSize is not None + collection_schema = construct_schema_pymongo( + collection, + delimiter=".", + use_random_sampling=self.config.useRandomSampling, + max_document_size=self.config.maxDocumentSize, + should_add_document_size_filter=self.should_add_document_size_filter(), + sample_size=self.config.schemaSamplingSize, + ) + + # initialize the schema for the collection + canonical_schema: List[SchemaField] = [] + max_schema_size = self.config.maxSchemaSize + collection_schema_size = len(collection_schema.values()) + collection_fields: Union[ + List[SchemaDescription], ValuesView[SchemaDescription] + ] = collection_schema.values() + assert max_schema_size is not None + if collection_schema_size > max_schema_size: + # downsample the schema, using frequency as the sort key + self.report.report_warning( + title="Too many schema fields", + message=f"Downsampling the collection schema because it has too many schema fields. Configured threshold is {max_schema_size}", + context=f"Schema Size: {collection_schema_size}, Collection: {dataset_urn}", + ) + # Add this information to the custom properties so user can know they are looking at downsampled schema + dataset_properties.customProperties["schema.downsampled"] = "True" + dataset_properties.customProperties[ + "schema.totalFields" + ] = f"{collection_schema_size}" + + logger.debug(f"Size of collection fields = {len(collection_fields)}") + # append each schema field (sort so output is consistent) + for schema_field in sorted( + collection_fields, + key=lambda x: ( + -x["count"], + x["delimited_name"], + ), # Negate `count` for descending order, `delimited_name` stays the same for ascending + )[0:max_schema_size]: + field = SchemaField( + fieldPath=schema_field["delimited_name"], + nativeDataType=self.get_pymongo_type_string( + schema_field["type"], dataset_urn.name + ), + type=self.get_field_type(schema_field["type"], dataset_urn.name), + description=None, + nullable=schema_field["nullable"], + recursive=False, + ) + canonical_schema.append(field) + + # create schema metadata object for collection + return SchemaMetadata( + schemaName=collection.name, + platform=f"urn:li:dataPlatform:{self.platform}", + version=0, + hash="", + platformSchema=SchemalessClass(), + fields=canonical_schema, + ) + def is_server_version_gte_4_4(self) -> bool: try: server_version = self.mongo_client.server_info().get("versionArray") diff --git a/metadata-ingestion/tests/integration/mongodb/mongodb_mces_golden.json b/metadata-ingestion/tests/integration/mongodb/mongodb_mces_golden.json index ec3fd80e6a6ea..bba160984eed8 100644 --- a/metadata-ingestion/tests/integration/mongodb/mongodb_mces_golden.json +++ b/metadata-ingestion/tests/integration/mongodb/mongodb_mces_golden.json @@ -1,4 +1,59 @@ [ +{ + "entityType": "container", + "entityUrn": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mongodb", + "instance": "instance", + "env": "PROD", + "database": "mngdb" + }, + "name": "mngdb" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mongodb", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)", @@ -47,6 +102,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)", @@ -64,6 +135,70 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Database" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)" + }, + { + "id": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed", + "urn": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.firstCollection,PROD)", @@ -377,6 +512,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.firstCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.firstCollection,PROD)", @@ -4026,6 +4177,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.largeCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.largeCollection,PROD)", @@ -4046,6 +4213,31 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.firstCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)" + }, + { + "id": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed", + "urn": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.largeCollection,PROD)", @@ -4063,6 +4255,47 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.largeCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)" + }, + { + "id": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed", + "urn": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.secondCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.secondCollection,PROD)", @@ -4237,6 +4470,31 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.secondCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)" + }, + { + "id": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed", + "urn": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)", diff --git a/metadata-ingestion/tests/integration/mongodb/mongodb_mces_small_schema_size_golden.json b/metadata-ingestion/tests/integration/mongodb/mongodb_mces_small_schema_size_golden.json index 72b5fee49a0db..b2a1ba03dab76 100644 --- a/metadata-ingestion/tests/integration/mongodb/mongodb_mces_small_schema_size_golden.json +++ b/metadata-ingestion/tests/integration/mongodb/mongodb_mces_small_schema_size_golden.json @@ -1,4 +1,59 @@ [ +{ + "entityType": "container", + "entityUrn": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mongodb", + "instance": "instance", + "env": "PROD", + "database": "mngdb" + }, + "name": "mngdb" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mongodb", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)", @@ -47,6 +102,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)", @@ -64,6 +135,70 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Database" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)" + }, + { + "id": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed", + "urn": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.firstCollection,PROD)", @@ -236,6 +371,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.firstCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.firstCollection,PROD)", @@ -405,6 +556,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.largeCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.largeCollection,PROD)", @@ -425,6 +592,31 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.firstCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)" + }, + { + "id": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed", + "urn": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.largeCollection,PROD)", @@ -442,6 +634,47 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.largeCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)" + }, + { + "id": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed", + "urn": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.secondCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.secondCollection,PROD)", @@ -616,6 +849,31 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.secondCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)" + }, + { + "id": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed", + "urn": "urn:li:container:f5ff6ace1ed73cb3fd4c73dc718c39ed" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)",