From 276a91108f78f7e8b9397c697b4250cf5210a147 Mon Sep 17 00:00:00 2001 From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> Date: Wed, 28 Dec 2022 21:50:37 +0530 Subject: [PATCH] feat(ingest/snowflake): handle failures gracefully and raise permission failures (#6748) --- .../docs/sources/snowflake/snowflake_pre.md | 17 +- .../ingestion/source/snowflake/constants.py | 48 ++ .../source/snowflake/snowflake_lineage.py | 715 +++++++++++------- .../source/snowflake/snowflake_profiler.py | 14 +- .../source/snowflake/snowflake_query.py | 6 +- .../source/snowflake/snowflake_report.py | 3 + .../source/snowflake/snowflake_schema.py | 53 +- .../source/snowflake/snowflake_usage_v2.py | 176 +++-- .../source/snowflake/snowflake_utils.py | 147 +++- .../source/snowflake/snowflake_v2.py | 630 ++++++++++----- .../ingestion/source/sql/sql_common.py | 5 +- .../ingestion/source_config/sql/snowflake.py | 39 +- .../source_config/usage/snowflake_usage.py | 64 +- .../tests/integration/snowflake/__init__.py | 0 .../common.py} | 313 +++----- .../snowflake_golden.json} | 660 +++++++++++----- .../snowflake_privatelink_golden.json} | 138 ++-- .../integration/snowflake/test_snowflake.py | 188 +++++ .../snowflake/test_snowflake_failures.py | 301 ++++++++ ...eta_source.py => test_snowflake_source.py} | 48 +- 20 files changed, 2398 insertions(+), 1167 deletions(-) create mode 100644 metadata-ingestion/src/datahub/ingestion/source/snowflake/constants.py create mode 100644 metadata-ingestion/tests/integration/snowflake/__init__.py rename metadata-ingestion/tests/integration/{snowflake-beta/test_snowflake_beta.py => snowflake/common.py} (58%) rename metadata-ingestion/tests/integration/{snowflake-beta/snowflake_beta_golden.json => snowflake/snowflake_golden.json} (77%) rename metadata-ingestion/tests/integration/{snowflake-beta/snowflake_privatelink_beta_golden.json => snowflake/snowflake_privatelink_golden.json} (94%) create mode 100644 metadata-ingestion/tests/integration/snowflake/test_snowflake.py create mode 100644 metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py rename metadata-ingestion/tests/unit/{test_snowflake_beta_source.py => test_snowflake_source.py} (91%) diff --git a/metadata-ingestion/docs/sources/snowflake/snowflake_pre.md b/metadata-ingestion/docs/sources/snowflake/snowflake_pre.md index 9f76554b22e65d..ad33b147ffcb9d 100644 --- a/metadata-ingestion/docs/sources/snowflake/snowflake_pre.md +++ b/metadata-ingestion/docs/sources/snowflake/snowflake_pre.md @@ -16,7 +16,7 @@ grant usage on DATABASE "" to role datahub_role; grant usage on all schemas in database "" to role datahub_role; grant usage on future schemas in database "" to role datahub_role; -// If you are NOT using Snowflake Profiling feature: Grant references privileges to your tables and views +// If you are NOT using Snowflake Profiling or Classification feature: Grant references privileges to your tables and views grant references on all tables in database "" to role datahub_role; grant references on future tables in database "" to role datahub_role; grant references on all external tables in database "" to role datahub_role; @@ -24,13 +24,11 @@ grant references on future external tables in database "" to role grant references on all views in database "" to role datahub_role; grant references on future views in database "" to role datahub_role; -// If you ARE using Snowflake Profiling feature: Grant select privileges to your tables and views +// If you ARE using Snowflake Profiling or Classification feature: Grant select privileges to your tables grant select on all tables in database "" to role datahub_role; grant select on future tables in database "" to role datahub_role; grant select on all external tables in database "" to role datahub_role; grant select on future external tables in database "" to role datahub_role; -grant select on all views in database "" to role datahub_role; -grant select on future views in database "" to role datahub_role; // Create a new DataHub user and assign the DataHub role to it create user datahub_user display_name = 'DataHub' password='' default_role = datahub_role default_warehouse = ''; @@ -40,10 +38,12 @@ grant role datahub_role to user datahub_user; ``` The details of each granted privilege can be viewed in [snowflake docs](https://docs.snowflake.com/en/user-guide/security-access-control-privileges.html). A summarization of each privilege, and why it is required for this connector: + - `operate` is required on warehouse to execute queries - `usage` is required for us to run queries using the warehouse - `usage` on `database` and `schema` are required because without it tables and views inside them are not accessible. If an admin does the required grants on `table` but misses the grants on `schema` or the `database` in which the table/view exists then we will not be able to get metadata for the table/view. - If metadata is required only on some schemas then you can grant the usage privilieges only on a particular schema like + ```sql grant usage on schema ""."" to role datahub_role; ``` @@ -51,6 +51,13 @@ grant usage on schema ""."" to role datahub_role; This represents the bare minimum privileges required to extract databases, schemas, views, tables from Snowflake. If you plan to enable extraction of table lineage, via the `include_table_lineage` config flag or extraction of usage statistics, via the `include_usage_stats` config, you'll also need to grant access to the [Account Usage](https://docs.snowflake.com/en/sql-reference/account-usage.html) system tables, using which the DataHub source extracts information. This can be done by granting access to the `snowflake` database. + ```sql grant imported privileges on database snowflake to role datahub_role; -``` \ No newline at end of file +``` + +### Caveats + +- Some of the features are only available in the Snowflake Enterprise Edition. This doc has notes mentioning where this applies. +- The underlying Snowflake views that we use to get metadata have a [latency of 45 minutes to 3 hours](https://docs.snowflake.com/en/sql-reference/account-usage.html#differences-between-account-usage-and-information-schema). So we would not be able to get very recent metadata in some cases like queries you ran within that time period etc. +- If there is any [incident going on for Snowflake](https://status.snowflake.com/) we will not be able to get the metadata until that incident is resolved. diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/constants.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/constants.py new file mode 100644 index 00000000000000..36c3c77b231e82 --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/constants.py @@ -0,0 +1,48 @@ +from enum import Enum + + +class SnowflakeCloudProvider(str, Enum): + AWS = "aws" + GCP = "gcp" + AZURE = "azure" + + +SNOWFLAKE_DEFAULT_CLOUD = SnowflakeCloudProvider.AWS + + +class SnowflakeEdition(str, Enum): + STANDARD = "Standard" + + # We use this to represent Enterprise Edition or higher + ENTERPRISE = "Enterprise or above" + + +# See https://docs.snowflake.com/en/user-guide/admin-account-identifier.html#region-ids +# Includes only exceptions to format _ +SNOWFLAKE_REGION_CLOUD_REGION_MAPPING = { + "aws_us_east_1_gov": (SnowflakeCloudProvider.AWS, "us-east-1"), + "azure_uksouth": (SnowflakeCloudProvider.AZURE, "uk-south"), + "azure_centralindia": (SnowflakeCloudProvider.AZURE, "central-india.azure"), +} + +# https://docs.snowflake.com/en/sql-reference/snowflake-db.html +SNOWFLAKE_DATABASE = "SNOWFLAKE" + + +# We will always compare with lowercase +# Complete list for objectDomain - https://docs.snowflake.com/en/sql-reference/account-usage/access_history.html +class SnowflakeObjectDomain(str, Enum): + TABLE = "table" + EXTERNAL_TABLE = "external table" + VIEW = "view" + MATERIALIZED_VIEW = "materialized view" + + +GENERIC_PERMISSION_ERROR_KEY = "permission-error" +LINEAGE_PERMISSION_ERROR = "lineage-permission-error" + + +# Snowflake connection arguments +# https://docs.snowflake.com/en/user-guide/python-connector-api.html#connect +CLIENT_PREFETCH_THREADS = "client_prefetch_threads" +CLIENT_SESSION_KEEP_ALIVE = "client_session_keep_alive" diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage.py index 20fa698d8b496d..a23e68320a4b79 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage.py @@ -2,14 +2,20 @@ import logging from collections import defaultdict from dataclasses import dataclass, field -from typing import Dict, FrozenSet, List, Optional, Set, Tuple +from typing import Dict, FrozenSet, Iterable, List, Optional, Set from pydantic import Field from pydantic.error_wrappers import ValidationError from snowflake.connector import SnowflakeConnection import datahub.emitter.mce_builder as builder +from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.aws.s3_util import make_s3_urn +from datahub.ingestion.source.snowflake.constants import ( + LINEAGE_PERMISSION_ERROR, + SnowflakeEdition, + SnowflakeObjectDomain, +) from datahub.ingestion.source.snowflake.snowflake_config import SnowflakeV2Config from datahub.ingestion.source.snowflake.snowflake_query import SnowflakeQuery from datahub.ingestion.source.snowflake.snowflake_report import SnowflakeV2Report @@ -18,6 +24,8 @@ ) from datahub.ingestion.source.snowflake.snowflake_utils import ( SnowflakeCommonMixin, + SnowflakeConnectionMixin, + SnowflakePermissionError, SnowflakeQueryMixin, ) from datahub.metadata.com.linkedin.pegasus2avro.dataset import ( @@ -152,144 +160,133 @@ def update_lineage( ) -class SnowflakeLineageExtractor(SnowflakeQueryMixin, SnowflakeCommonMixin): +class SnowflakeLineageExtractor( + SnowflakeQueryMixin, SnowflakeConnectionMixin, SnowflakeCommonMixin +): + """ + Extracts Lineage from Snowflake. + Following lineage edges are considered. + + 1. "Table to View" lineage via `snowflake.account_usage.object_dependencies` view + 2. "S3 to Table" lineage via `show external tables` query. + 3. "View to Table" lineage via `snowflake.account_usage.access_history` view (requires Snowflake Enterprise Edition or above) + 4. "Table to Table" lineage via `snowflake.account_usage.access_history` view (requires Snowflake Enterprise Edition or above) + 5. "S3 to Table" lineage via `snowflake.account_usage.access_history` view (requires Snowflake Enterprise Edition or above) + + Edition Note - Snowflake Standard Edition does not have Access History Feature. So it does not support lineage extraction for edges 3, 4, 5 mentioned above. + """ + def __init__(self, config: SnowflakeV2Config, report: SnowflakeV2Report) -> None: - self._lineage_map: Optional[Dict[str, SnowflakeTableLineage]] = None - self._external_lineage_map: Optional[Dict[str, Set[str]]] = None + self._lineage_map: Dict[str, SnowflakeTableLineage] = defaultdict( + SnowflakeTableLineage + ) + self._external_lineage_map: Dict[str, Set[str]] = defaultdict(set) self.config = config self.platform = "snowflake" self.report = report self.logger = logger + self.connection: Optional[SnowflakeConnection] = None - # Rewrite implementation for readability, efficiency and extensibility - def _get_upstream_lineage_info( - self, dataset_name: str - ) -> Optional[Tuple[UpstreamLineage, Dict[str, str]]]: - if self._lineage_map is None or self._external_lineage_map is None: - conn = self.config.get_connection() - if self._lineage_map is None: - with PerfTimer() as timer: - self._populate_lineage(conn) - self.report.table_lineage_query_secs = timer.elapsed_seconds() - if self.config.include_view_lineage: - self._populate_view_lineage(conn) + def get_workunits( + self, discovered_tables: List[str], discovered_views: List[str] + ) -> Iterable[MetadataWorkUnit]: + + self.connection = self.create_connection() + if self.connection is None: + return - if self._external_lineage_map is None: + self._populate_table_lineage() + + if self.config.include_view_lineage: + if len(discovered_views) > 0: + self._populate_view_lineage() + else: + logger.info("No views found. Skipping View Lineage Extraction.") + + self._populate_external_lineage() + + if ( + len(self._lineage_map.keys()) == 0 + and len(self._external_lineage_map.keys()) == 0 + ): + logger.debug("No lineage found.") + return + + yield from self.get_table_upstream_workunits(discovered_tables) + yield from self.get_view_upstream_workunits(discovered_views) + + def _populate_table_lineage(self): + if self.report.edition == SnowflakeEdition.STANDARD: + logger.info( + "Snowflake Account is Standard Edition. Table to Table Lineage Feature is not supported." + ) # See Edition Note above for why + else: with PerfTimer() as timer: - self._populate_external_lineage(conn) - self.report.external_lineage_queries_secs = timer.elapsed_seconds() + self._populate_lineage() + self.report.table_lineage_query_secs = timer.elapsed_seconds() - assert self._lineage_map is not None - assert self._external_lineage_map is not None + def get_table_upstream_workunits(self, discovered_tables): + if self.config.include_table_lineage: + for dataset_name in discovered_tables: + if self._is_dataset_pattern_allowed( + dataset_name, SnowflakeObjectDomain.TABLE + ): + dataset_urn = builder.make_dataset_urn_with_platform_instance( + self.platform, + dataset_name, + self.config.platform_instance, + self.config.env, + ) + upstream_lineage = self._get_upstream_lineage_info(dataset_name) + if upstream_lineage is not None: + yield self.wrap_aspect_as_workunit( + "dataset", dataset_urn, "upstreamLineage", upstream_lineage + ) + + def get_view_upstream_workunits(self, discovered_views): + if self.config.include_view_lineage: + for view_name in discovered_views: + if self._is_dataset_pattern_allowed(view_name, "view"): + dataset_urn = builder.make_dataset_urn_with_platform_instance( + self.platform, + view_name, + self.config.platform_instance, + self.config.env, + ) + upstream_lineage = self._get_upstream_lineage_info(view_name) + if upstream_lineage is not None: + yield self.wrap_aspect_as_workunit( + "dataset", dataset_urn, "upstreamLineage", upstream_lineage + ) + def _get_upstream_lineage_info( + self, dataset_name: str + ) -> Optional[UpstreamLineage]: lineage = self._lineage_map[dataset_name] external_lineage = self._external_lineage_map[dataset_name] if not (lineage.upstreamTables or lineage.columnLineages or external_lineage): logger.debug(f"No lineage found for {dataset_name}") return None + upstream_tables: List[UpstreamClass] = [] finegrained_lineages: List[FineGrainedLineage] = [] - fieldset_finegrained_lineages: List[FineGrainedLineage] = [] - column_lineage: Dict[str, str] = {} + dataset_urn = builder.make_dataset_urn_with_platform_instance( self.platform, dataset_name, self.config.platform_instance, self.config.env, ) - for lineage_entry in sorted( - lineage.upstreamTables.values(), key=lambda x: x.upstreamDataset - ): - # Update the table-lineage - upstream_table_name = lineage_entry.upstreamDataset - upstream_table_urn = builder.make_dataset_urn_with_platform_instance( - self.platform, - upstream_table_name, - self.config.platform_instance, - self.config.env, - ) - upstream_table = UpstreamClass( - dataset=upstream_table_urn, - type=DatasetLineageTypeClass.TRANSFORMED, - ) - upstream_tables.append(upstream_table) - - if lineage_entry.upstreamColumns and lineage_entry.downstreamColumns: - # This is not used currently. This indicates same column lineage as was set - # in customProperties earlier - not accurate. - fieldset_finegrained_lineage = FineGrainedLineage( - upstreamType=FineGrainedLineageUpstreamType.FIELD_SET, - downstreamType=FineGrainedLineageDownstreamType.FIELD_SET - if len(lineage_entry.downstreamColumns) > 1 - else FineGrainedLineageDownstreamType.FIELD, - upstreams=sorted( - [ - builder.make_schema_field_urn( - upstream_table_urn, - self.snowflake_identifier(d.columnName), - ) - for d in lineage_entry.upstreamColumns - ] - ), - downstreams=sorted( - [ - builder.make_schema_field_urn( - dataset_urn, self.snowflake_identifier(d.columnName) - ) - for d in lineage_entry.downstreamColumns - ] - ), - ) - fieldset_finegrained_lineages.append(fieldset_finegrained_lineage) + # Populate the table-lineage in aspect + self.update_upstream_tables_lineage(upstream_tables, lineage) - for col, col_upstreams in lineage.columnLineages.items(): - for fine_upstream in col_upstreams.upstreams: - fieldPath = col - finegrained_lineage_entry = FineGrainedLineage( - upstreamType=FineGrainedLineageUpstreamType.FIELD_SET, - upstreams=sorted( - [ - builder.make_schema_field_urn( - builder.make_dataset_urn_with_platform_instance( - self.platform, - self.get_dataset_identifier_from_qualified_name( - upstream_col.objectName - ), - self.config.platform_instance, - self.config.env, - ), - self.snowflake_identifier(upstream_col.columnName), - ) - for upstream_col in fine_upstream.inputColumns # type:ignore - if upstream_col.objectName - and upstream_col.columnName - and self._is_dataset_pattern_allowed( - upstream_col.objectName, upstream_col.objectDomain - ) - ] - ), - downstreamType=FineGrainedLineageDownstreamType.FIELD, - downstreams=sorted( - [ - builder.make_schema_field_urn( - dataset_urn, self.snowflake_identifier(fieldPath) - ) - ] - ), - ) - if finegrained_lineage_entry.upstreams: - finegrained_lineages.append(finegrained_lineage_entry) + # Populate the column-lineage in aspect + self.update_upstream_columns_lineage(dataset_urn, finegrained_lineages, lineage) - for external_lineage_entry in sorted(external_lineage): - # For now, populate only for S3 - if external_lineage_entry.startswith("s3://"): - external_upstream_table = UpstreamClass( - dataset=make_s3_urn(external_lineage_entry, self.config.env), - type=DatasetLineageTypeClass.COPY, - ) - upstream_tables.append(external_upstream_table) + # Populate the external-table-lineage(s3->snowflake) in aspect + self.update_external_tables_lineage(upstream_tables, external_lineage) - if upstream_tables: + if len(upstream_tables) > 0: logger.debug( f"Upstream lineage of '{dataset_name}': {[u.dataset for u in upstream_tables]}" ) @@ -297,180 +294,227 @@ def _get_upstream_lineage_info( self.report.upstream_lineage[dataset_name] = [ u.dataset for u in upstream_tables ] - return ( - UpstreamLineage( - upstreams=upstream_tables, - fineGrainedLineages=sorted( - finegrained_lineages, key=lambda x: (x.downstreams, x.upstreams) - ) - or None, - ), - column_lineage, + return UpstreamLineage( + upstreams=upstream_tables, + fineGrainedLineages=sorted( + finegrained_lineages, key=lambda x: (x.downstreams, x.upstreams) + ) + or None, ) - return None + else: + return None - def _populate_view_lineage(self, conn: SnowflakeConnection) -> None: + def _populate_view_lineage(self) -> None: with PerfTimer() as timer: - self._populate_view_upstream_lineage(conn) + self._populate_view_upstream_lineage() self.report.view_upstream_lineage_query_secs = timer.elapsed_seconds() + + if self.report.edition == SnowflakeEdition.STANDARD: + logger.info( + "Snowflake Account is Standard Edition. View to Table Lineage Feature is not supported." + ) # See Edition Note above for why + else: + with PerfTimer() as timer: + self._populate_view_downstream_lineage() + self.report.view_downstream_lineage_query_secs = timer.elapsed_seconds() + + def _populate_external_lineage(self) -> None: with PerfTimer() as timer: - self._populate_view_downstream_lineage(conn) - self.report.view_downstream_lineage_query_secs = timer.elapsed_seconds() + self.report.num_external_table_edges_scanned = 0 - def _populate_external_lineage(self, conn: SnowflakeConnection) -> None: - # Handles the case where a table is populated from an external location via copy. - # Eg: copy into category_english from 's3://acryl-snow-demo-olist/olist_raw_data/category_english'credentials=(aws_key_id='...' aws_secret_key='...') pattern='.*.csv'; - query: str = SnowflakeQuery.external_table_lineage_history( - start_time_millis=int(self.config.start_time.timestamp() * 1000) - if not self.config.ignore_start_time_lineage - else 0, - end_time_millis=int(self.config.end_time.timestamp() * 1000), - ) + if self.report.edition == SnowflakeEdition.STANDARD: + logger.info( + "Snowflake Account is Standard Edition. External Lineage Feature via Access History is not supported." + ) # See Edition Note above for why + else: + self._populate_external_lineage_from_access_history() - num_edges: int = 0 - self._external_lineage_map = defaultdict(set) - try: - for db_row in self.query(conn, query): - # key is the down-stream table name - key: str = self.get_dataset_identifier_from_qualified_name( - db_row["DOWNSTREAM_TABLE_NAME"] - ) - if not self._is_dataset_pattern_allowed(key, "table"): - continue - self._external_lineage_map[key] |= { - *json.loads(db_row["UPSTREAM_LOCATIONS"]) - } - logger.debug( - f"ExternalLineage[Table(Down)={key}]:External(Up)={self._external_lineage_map[key]} via access_history" - ) - except Exception as e: - self.warn( - "external_lineage", - f"Populating table external lineage from Snowflake failed." - f"Please check your premissions. Continuing...\nError was {e}.", + self._populate_external_lineage_from_show_query() + + logger.info( + f"Found {self.report.num_external_table_edges_scanned} external lineage edges." ) - # Handles the case for explicitly created external tables. - # NOTE: Snowflake does not log this information to the access_history table. + + self.report.external_lineage_queries_secs = timer.elapsed_seconds() + + # Handles the case for explicitly created external tables. + # NOTE: Snowflake does not log this information to the access_history table. + def _populate_external_lineage_from_show_query(self): external_tables_query: str = SnowflakeQuery.show_external_tables() try: - for db_row in self.query(conn, external_tables_query): + for db_row in self.query(external_tables_query): key = self.get_dataset_identifier( db_row["name"], db_row["schema_name"], db_row["database_name"] ) - if not self._is_dataset_pattern_allowed(key, "table"): + if not self._is_dataset_pattern_allowed( + key, SnowflakeObjectDomain.TABLE + ): continue self._external_lineage_map[key].add(db_row["location"]) logger.debug( f"ExternalLineage[Table(Down)={key}]:External(Up)={self._external_lineage_map[key]} via show external tables" ) - num_edges += 1 + self.report.num_external_table_edges_scanned += 1 except Exception as e: - self.warn( + logger.debug(e, exc_info=e) + self.report_warning( "external_lineage", - f"Populating external table lineage from Snowflake failed." - f"Please check your premissions. Continuing...\nError was {e}.", + f"Populating external table lineage from Snowflake failed due to error {e}.", ) - logger.info(f"Found {num_edges} external lineage edges.") - self.report.num_external_table_edges_scanned = num_edges - def _populate_lineage(self, conn: SnowflakeConnection) -> None: - query: str = SnowflakeQuery.table_to_table_lineage_history( + # Handles the case where a table is populated from an external location via copy. + # Eg: copy into category_english from 's3://acryl-snow-demo-olist/olist_raw_data/category_english'credentials=(aws_key_id='...' aws_secret_key='...') pattern='.*.csv'; + def _populate_external_lineage_from_access_history(self): + query: str = SnowflakeQuery.external_table_lineage_history( start_time_millis=int(self.config.start_time.timestamp() * 1000) if not self.config.ignore_start_time_lineage else 0, end_time_millis=int(self.config.end_time.timestamp() * 1000), - include_column_lineage=self.config.include_column_lineage, ) - num_edges: int = 0 - self._lineage_map = defaultdict(SnowflakeTableLineage) + try: - for db_row in self.query(conn, query): - # key is the down-stream table name - key: str = self.get_dataset_identifier_from_qualified_name( - db_row["DOWNSTREAM_TABLE_NAME"] - ) - upstream_table_name = self.get_dataset_identifier_from_qualified_name( - db_row["UPSTREAM_TABLE_NAME"] + for db_row in self.query(query): + self._process_external_lineage_result_row(db_row) + except Exception as e: + if isinstance(e, SnowflakePermissionError): + error_msg = "Failed to get external lineage. Please grant imported privileges on SNOWFLAKE database. " + self.warn_if_stateful_else_error(LINEAGE_PERMISSION_ERROR, error_msg) + else: + logger.debug(e, exc_info=e) + self.report_warning( + "external_lineage", + f"Populating table external lineage from Snowflake failed due to error {e}.", ) - if not self._is_dataset_pattern_allowed(key, "table") or not ( - self._is_dataset_pattern_allowed(upstream_table_name, "table") - ): - continue - self._lineage_map[key].update_lineage( - # (, , ) - SnowflakeUpstreamTable.from_dict( - upstream_table_name, - db_row["UPSTREAM_TABLE_COLUMNS"], - db_row["DOWNSTREAM_TABLE_COLUMNS"], - ), - self.config.include_column_lineage, - ) - num_edges += 1 - logger.debug( - f"Lineage[Table(Down)={key}]:Table(Up)={self._lineage_map[key]}" - ) - except Exception as e: - logger.error(e, exc_info=e) - self.warn( - "lineage", - f"Extracting lineage from Snowflake failed." - f"Please check your premissions. Continuing...\nError was {e}.", + def _process_external_lineage_result_row(self, db_row): + # key is the down-stream table name + key: str = self.get_dataset_identifier_from_qualified_name( + db_row["DOWNSTREAM_TABLE_NAME"] + ) + if not self._is_dataset_pattern_allowed(key, SnowflakeObjectDomain.TABLE): + return + + if db_row["UPSTREAM_LOCATIONS"] is not None: + external_locations = json.loads(db_row["UPSTREAM_LOCATIONS"]) + + for loc in external_locations: + if loc not in self._external_lineage_map[key]: + self._external_lineage_map[key].add(loc) + self.report.num_external_table_edges_scanned += 1 + + logger.debug( + f"ExternalLineage[Table(Down)={key}]:External(Up)={self._external_lineage_map[key]} via access_history" ) + + def _populate_lineage(self) -> None: + query: str = SnowflakeQuery.table_to_table_lineage_history( + start_time_millis=int(self.config.start_time.timestamp() * 1000) + if not self.config.ignore_start_time_lineage + else 0, + end_time_millis=int(self.config.end_time.timestamp() * 1000), + include_column_lineage=self.config.include_column_lineage, + ) + self.report.num_table_to_table_edges_scanned = 0 + try: + for db_row in self.query(query): + self._process_table_lineage_row(db_row) + except Exception as e: + if isinstance(e, SnowflakePermissionError): + error_msg = "Failed to get table to table lineage. Please grant imported privileges on SNOWFLAKE database. " + self.warn_if_stateful_else_error(LINEAGE_PERMISSION_ERROR, error_msg) + else: + logger.debug(e, exc_info=e) + self.report_warning( + "table-lineage", + f"Extracting lineage from Snowflake failed due to error {e}.", + ) logger.info( - f"A total of {num_edges} Table->Table edges found" + f"A total of {self.report.num_table_to_table_edges_scanned} Table->Table edges found" f" for {len(self._lineage_map)} downstream tables.", ) - self.report.num_table_to_table_edges_scanned = num_edges - def _populate_view_upstream_lineage(self, conn: SnowflakeConnection) -> None: + def _process_table_lineage_row(self, db_row): + # key is the down-stream table name + key: str = self.get_dataset_identifier_from_qualified_name( + db_row["DOWNSTREAM_TABLE_NAME"] + ) + upstream_table_name = self.get_dataset_identifier_from_qualified_name( + db_row["UPSTREAM_TABLE_NAME"] + ) + if not self._is_dataset_pattern_allowed( + key, SnowflakeObjectDomain.TABLE + ) or not ( + self._is_dataset_pattern_allowed( + upstream_table_name, SnowflakeObjectDomain.TABLE + ) + ): + return + self._lineage_map[key].update_lineage( + # (, , ) + SnowflakeUpstreamTable.from_dict( + upstream_table_name, + db_row["UPSTREAM_TABLE_COLUMNS"], + db_row["DOWNSTREAM_TABLE_COLUMNS"], + ), + self.config.include_column_lineage, + ) + self.report.num_table_to_table_edges_scanned += 1 + logger.debug(f"Lineage[Table(Down)={key}]:Table(Up)={self._lineage_map[key]}") + + def _populate_view_upstream_lineage(self) -> None: # NOTE: This query captures only the upstream lineage of a view (with no column lineage). # For more details see: https://docs.snowflake.com/en/user-guide/object-dependencies.html#object-dependencies # and also https://docs.snowflake.com/en/sql-reference/account-usage/access_history.html#usage-notes for current limitations on capturing the lineage for views. view_upstream_lineage_query: str = SnowflakeQuery.view_dependencies() - assert self._lineage_map is not None - num_edges: int = 0 + self.report.num_table_to_view_edges_scanned = 0 try: - for db_row in self.query(conn, view_upstream_lineage_query): - # Process UpstreamTable/View/ExternalTable/Materialized View->View edge. - view_upstream: str = self.get_dataset_identifier_from_qualified_name( - db_row["VIEW_UPSTREAM"] - ) - view_name: str = self.get_dataset_identifier_from_qualified_name( - db_row["DOWNSTREAM_VIEW"] + for db_row in self.query(view_upstream_lineage_query): + self._process_view_upstream_lineage_row(db_row) + except Exception as e: + if isinstance(e, SnowflakePermissionError): + error_msg = "Failed to get table to view lineage. Please grant imported privileges on SNOWFLAKE database." + self.warn_if_stateful_else_error(LINEAGE_PERMISSION_ERROR, error_msg) + else: + logger.debug(e, exc_info=e) + self.report_warning( + "view-upstream-lineage", + f"Extracting the upstream view lineage from Snowflake failed due to error {e}.", ) + logger.info( + f"A total of {self.report.num_table_to_view_edges_scanned} View upstream edges found." + ) - if not self._is_dataset_pattern_allowed( - dataset_name=view_name, - dataset_type=db_row["REFERENCING_OBJECT_DOMAIN"], - ) or not self._is_dataset_pattern_allowed( - view_upstream, db_row["REFERENCED_OBJECT_DOMAIN"] - ): - continue + def _process_view_upstream_lineage_row(self, db_row): + # Process UpstreamTable/View/ExternalTable/Materialized View->View edge. + view_upstream: str = self.get_dataset_identifier_from_qualified_name( + db_row["VIEW_UPSTREAM"] + ) + view_name: str = self.get_dataset_identifier_from_qualified_name( + db_row["DOWNSTREAM_VIEW"] + ) - # key is the downstream view name - self._lineage_map[view_name].update_lineage( - # (, , ) - SnowflakeUpstreamTable.from_dict(view_upstream, None, None), - self.config.include_column_lineage, - ) - num_edges += 1 - logger.debug( - f"Upstream->View: Lineage[View(Down)={view_name}]:Upstream={view_upstream}" - ) - except Exception as e: - self.warn( - "view_upstream_lineage", - "Extracting the upstream view lineage from Snowflake failed." - + f"Please check your permissions. Continuing...\nError was {e}.", - ) - logger.info(f"A total of {num_edges} View upstream edges found.") - self.report.num_table_to_view_edges_scanned = num_edges + if not self._is_dataset_pattern_allowed( + dataset_name=view_name, + dataset_type=db_row["REFERENCING_OBJECT_DOMAIN"], + ) or not self._is_dataset_pattern_allowed( + view_upstream, db_row["REFERENCED_OBJECT_DOMAIN"] + ): + return + # key is the downstream view name + self._lineage_map[view_name].update_lineage( + # (, , ) + SnowflakeUpstreamTable.from_dict(view_upstream, None, None), + self.config.include_column_lineage, + ) + self.report.num_table_to_view_edges_scanned += 1 + logger.debug( + f"Upstream->View: Lineage[View(Down)={view_name}]:Upstream={view_upstream}" + ) - def _populate_view_downstream_lineage(self, conn: SnowflakeConnection) -> None: + def _populate_view_downstream_lineage(self) -> None: # This query captures the downstream table lineage for views. # See https://docs.snowflake.com/en/sql-reference/account-usage/access_history.html#usage-notes for current limitations on capturing the lineage for views. # Eg: For viewA->viewB->ViewC->TableD, snowflake does not yet log intermediate view logs, resulting in only the viewA->TableD edge. @@ -482,56 +526,163 @@ def _populate_view_downstream_lineage(self, conn: SnowflakeConnection) -> None: include_column_lineage=self.config.include_column_lineage, ) - assert self._lineage_map is not None self.report.num_view_to_table_edges_scanned = 0 try: - db_rows = self.query(conn, view_lineage_query) + for db_row in self.query(view_lineage_query): + self._process_view_downstream_lineage_row(db_row) except Exception as e: - self.warn( - "view_downstream_lineage", - f"Extracting the view lineage from Snowflake failed." - f"Please check your permissions. Continuing...\nError was {e}.", - ) - else: - for db_row in db_rows: - view_name: str = self.get_dataset_identifier_from_qualified_name( - db_row["VIEW_NAME"] - ) - downstream_table: str = self.get_dataset_identifier_from_qualified_name( - db_row["DOWNSTREAM_TABLE_NAME"] - ) - if not self._is_dataset_pattern_allowed( - view_name, db_row["VIEW_DOMAIN"] - ) or not self._is_dataset_pattern_allowed( - downstream_table, db_row["DOWNSTREAM_TABLE_DOMAIN"] - ): - continue - - # Capture view->downstream table lineage. - self._lineage_map[downstream_table].update_lineage( - # (, , ) - SnowflakeUpstreamTable.from_dict( - view_name, - db_row["VIEW_COLUMNS"], - db_row["DOWNSTREAM_TABLE_COLUMNS"], - ), - self.config.include_column_lineage, - ) - self.report.num_view_to_table_edges_scanned += 1 - - logger.debug( - f"View->Table: Lineage[Table(Down)={downstream_table}]:View(Up)={self._lineage_map[downstream_table]}" + if isinstance(e, SnowflakePermissionError): + error_msg = "Failed to get view to table lineage. Please grant imported privileges on SNOWFLAKE database. " + self.warn_if_stateful_else_error(LINEAGE_PERMISSION_ERROR, error_msg) + else: + logger.debug(e, exc_info=e) + self.report_warning( + "view-downstream-lineage", + f"Extracting the view lineage from Snowflake failed due to error {e}.", ) logger.info( f"Found {self.report.num_view_to_table_edges_scanned} View->Table edges." ) - def warn(self, key: str, reason: str) -> None: - self.report.report_warning(key, reason) - self.logger.warning(f"{key} => {reason}") + def _process_view_downstream_lineage_row(self, db_row): + view_name: str = self.get_dataset_identifier_from_qualified_name( + db_row["VIEW_NAME"] + ) + downstream_table: str = self.get_dataset_identifier_from_qualified_name( + db_row["DOWNSTREAM_TABLE_NAME"] + ) + if not self._is_dataset_pattern_allowed( + view_name, db_row["VIEW_DOMAIN"] + ) or not self._is_dataset_pattern_allowed( + downstream_table, db_row["DOWNSTREAM_TABLE_DOMAIN"] + ): + return + + # Capture view->downstream table lineage. + self._lineage_map[downstream_table].update_lineage( + # (, , ) + SnowflakeUpstreamTable.from_dict( + view_name, + db_row["VIEW_COLUMNS"], + db_row["DOWNSTREAM_TABLE_COLUMNS"], + ), + self.config.include_column_lineage, + ) + self.report.num_view_to_table_edges_scanned += 1 + + logger.debug( + f"View->Table: Lineage[Table(Down)={downstream_table}]:View(Up)={self._lineage_map[downstream_table]}" + ) + + def update_upstream_tables_lineage( + self, upstream_tables: List[UpstreamClass], lineage: SnowflakeTableLineage + ) -> None: + for lineage_entry in sorted( + lineage.upstreamTables.values(), key=lambda x: x.upstreamDataset + ): + upstream_table_name = lineage_entry.upstreamDataset + upstream_table_urn = builder.make_dataset_urn_with_platform_instance( + self.platform, + upstream_table_name, + self.config.platform_instance, + self.config.env, + ) + upstream_table = UpstreamClass( + dataset=upstream_table_urn, + type=DatasetLineageTypeClass.TRANSFORMED, + ) + upstream_tables.append(upstream_table) - def error(self, key: str, reason: str) -> None: - self.report.report_failure(key, reason) - self.logger.error(f"{key} => {reason}") + def update_upstream_columns_lineage( + self, + dataset_urn: str, + finegrained_lineages: List[FineGrainedLineage], + lineage: SnowflakeTableLineage, + ) -> None: + # For every column for which upstream lineage is available + for col, col_upstreams in lineage.columnLineages.items(): + # For every upstream of column + self.update_upstream_columns_lineage_of_column( + dataset_urn, col, finegrained_lineages, col_upstreams + ) + + def update_upstream_columns_lineage_of_column( + self, + dataset_urn: str, + col: str, + finegrained_lineages: List[FineGrainedLineage], + col_upstreams: SnowflakeColumnUpstreams, + ) -> None: + for fine_upstream in col_upstreams.upstreams: + finegrained_lineage_entry = self.build_finegrained_lineage( + dataset_urn, col, fine_upstream + ) + if finegrained_lineage_entry.upstreams: + finegrained_lineages.append(finegrained_lineage_entry) + + def build_finegrained_lineage( + self, + dataset_urn: str, + col: str, + fine_upstream: SnowflakeColumnFineGrainedLineage, + ) -> FineGrainedLineage: + fieldPath = col + + column_upstreams = self.build_finegrained_lineage_upstreams(fine_upstream) + finegrained_lineage_entry = FineGrainedLineage( + upstreamType=FineGrainedLineageUpstreamType.FIELD_SET, + # Sorting the list of upstream lineage events in order to avoid creating multiple aspects in backend + # even if the lineage is same but the order is different. + upstreams=sorted(column_upstreams), + downstreamType=FineGrainedLineageDownstreamType.FIELD, + downstreams=[ + builder.make_schema_field_urn( + dataset_urn, self.snowflake_identifier(fieldPath) + ) + ], + ) + + return finegrained_lineage_entry + + def build_finegrained_lineage_upstreams( + self, fine_upstream: SnowflakeColumnFineGrainedLineage + ) -> List[str]: + column_upstreams = [] + for upstream_col in fine_upstream.inputColumns: + if ( + upstream_col.objectName + and upstream_col.columnName + and self._is_dataset_pattern_allowed( + upstream_col.objectName, upstream_col.objectDomain + ) + ): + upstream_dataset_name = self.get_dataset_identifier_from_qualified_name( + upstream_col.objectName + ) + upstream_dataset_urn = builder.make_dataset_urn_with_platform_instance( + self.platform, + upstream_dataset_name, + self.config.platform_instance, + self.config.env, + ) + column_upstreams.append( + builder.make_schema_field_urn( + upstream_dataset_urn, + self.snowflake_identifier(upstream_col.columnName), + ) + ) + return column_upstreams + + def update_external_tables_lineage( + self, upstream_tables: List[UpstreamClass], external_lineage: Set[str] + ) -> None: + for external_lineage_entry in sorted(external_lineage): + # For now, populate only for S3 + if external_lineage_entry.startswith("s3://"): + external_upstream_table = UpstreamClass( + dataset=make_s3_urn(external_lineage_entry, self.config.env), + type=DatasetLineageTypeClass.COPY, + ) + upstream_tables.append(external_upstream_table) diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py index cbc0e009d00b6e..605cb480b856a2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py @@ -21,10 +21,7 @@ SnowflakeDatabase, SnowflakeTable, ) -from datahub.ingestion.source.snowflake.snowflake_utils import ( - SnowflakeCommonMixin, - SnowflakeCommonProtocol, -) +from datahub.ingestion.source.snowflake.snowflake_utils import SnowflakeCommonMixin from datahub.ingestion.source.sql.sql_generic_profiler import ( GenericProfiler, TableProfilerRequest, @@ -42,10 +39,7 @@ class SnowflakeProfilerRequest(GEProfilerRequest): profile_table_level_only: bool = False -class SnowflakeProfiler(SnowflakeCommonMixin, GenericProfiler, SnowflakeCommonProtocol): - config: SnowflakeV2Config - report: SnowflakeV2Report - +class SnowflakeProfiler(GenericProfiler, SnowflakeCommonMixin): def __init__( self, config: SnowflakeV2Config, @@ -53,8 +47,8 @@ def __init__( state_handler: Optional[ProfilingHandler] = None, ) -> None: super().__init__(config, report, self.platform, state_handler) - self.config = config - self.report = report + self.config: SnowflakeV2Config = config + self.report: SnowflakeV2Report = report self.logger = logger def get_workunits(self, databases: List[SnowflakeDatabase]) -> Iterable[WorkUnit]: diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py index 15dade9959c6d8..0e378db317e919 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py @@ -34,6 +34,10 @@ def current_schema() -> str: def show_databases() -> str: return "show databases" + @staticmethod + def show_tags() -> str: + return "show tags" + @staticmethod def use_database(db_name: str) -> str: return f'use database "{db_name}"' @@ -541,7 +545,7 @@ def usage_per_object_per_time_bucket_for_time_window( 'View', 'Materialized view', 'External table' - ) + ) and basic_usage_counts.object_name is not null group by basic_usage_counts.object_name, basic_usage_counts.bucket_start_time diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py index 02edf76fa9284d..ff9f349cd9b67f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py @@ -1,5 +1,6 @@ from typing import Optional +from datahub.ingestion.source.snowflake.constants import SnowflakeEdition from datahub.ingestion.source.sql.sql_generic_profiler import ProfilingSqlReport from datahub.ingestion.source_report.sql.snowflake import SnowflakeReport from datahub.ingestion.source_report.usage.snowflake_usage import SnowflakeUsageReport @@ -32,6 +33,8 @@ class SnowflakeV2Report(SnowflakeReport, SnowflakeUsageReport, ProfilingSqlRepor rows_zero_objects_modified: int = 0 + edition: Optional[SnowflakeEdition] = None + def report_entity_scanned(self, name: str, ent_type: str = "table") -> None: """ Entity could be a view or a table or a schema or a database diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py index 20aeca5f227398..8031f9f5b5031b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py @@ -91,12 +91,20 @@ class SnowflakeDatabase: class SnowflakeDataDictionary(SnowflakeQueryMixin): def __init__(self) -> None: self.logger = logger + self.connection: Optional[SnowflakeConnection] = None - def show_databases(self, conn: SnowflakeConnection) -> List[SnowflakeDatabase]: + def set_connection(self, connection: SnowflakeConnection) -> None: + self.connection = connection + + def get_connection(self) -> SnowflakeConnection: + # Connection is already present by the time this is called + assert self.connection is not None + return self.connection + + def show_databases(self) -> List[SnowflakeDatabase]: databases: List[SnowflakeDatabase] = [] cur = self.query( - conn, SnowflakeQuery.show_databases(), ) @@ -110,13 +118,10 @@ def show_databases(self, conn: SnowflakeConnection) -> List[SnowflakeDatabase]: return databases - def get_databases( - self, conn: SnowflakeConnection, db_name: str - ) -> List[SnowflakeDatabase]: + def get_databases(self, db_name: str) -> List[SnowflakeDatabase]: databases: List[SnowflakeDatabase] = [] cur = self.query( - conn, SnowflakeQuery.get_databases(db_name), ) @@ -131,13 +136,10 @@ def get_databases( return databases - def get_schemas_for_database( - self, conn: SnowflakeConnection, db_name: str - ) -> List[SnowflakeSchema]: + def get_schemas_for_database(self, db_name: str) -> List[SnowflakeSchema]: snowflake_schemas = [] cur = self.query( - conn, SnowflakeQuery.schemas_for_database(db_name), ) @@ -152,12 +154,11 @@ def get_schemas_for_database( return snowflake_schemas def get_tables_for_database( - self, conn: SnowflakeConnection, db_name: str + self, db_name: str ) -> Optional[Dict[str, List[SnowflakeTable]]]: tables: Dict[str, List[SnowflakeTable]] = {} try: cur = self.query( - conn, SnowflakeQuery.tables_for_database(db_name), ) except Exception as e: @@ -184,12 +185,11 @@ def get_tables_for_database( return tables def get_tables_for_schema( - self, conn: SnowflakeConnection, schema_name: str, db_name: str + self, schema_name: str, db_name: str ) -> List[SnowflakeTable]: tables: List[SnowflakeTable] = [] cur = self.query( - conn, SnowflakeQuery.tables_for_schema(schema_name, db_name), ) @@ -208,11 +208,11 @@ def get_tables_for_schema( return tables def get_views_for_database( - self, conn: SnowflakeConnection, db_name: str + self, db_name: str ) -> Optional[Dict[str, List[SnowflakeView]]]: views: Dict[str, List[SnowflakeView]] = {} try: - cur = self.query(conn, SnowflakeQuery.show_views_for_database(db_name)) + cur = self.query(SnowflakeQuery.show_views_for_database(db_name)) except Exception as e: logger.debug( f"Failed to get all views for database - {db_name}", exc_info=e @@ -236,13 +236,11 @@ def get_views_for_database( return views def get_views_for_schema( - self, conn: SnowflakeConnection, schema_name: str, db_name: str + self, schema_name: str, db_name: str ) -> List[SnowflakeView]: views: List[SnowflakeView] = [] - cur = self.query( - conn, SnowflakeQuery.show_views_for_schema(schema_name, db_name) - ) + cur = self.query(SnowflakeQuery.show_views_for_schema(schema_name, db_name)) for table in cur: views.append( SnowflakeView( @@ -257,13 +255,11 @@ def get_views_for_schema( return views def get_columns_for_schema( - self, conn: SnowflakeConnection, schema_name: str, db_name: str + self, schema_name: str, db_name: str ) -> Optional[Dict[str, List[SnowflakeColumn]]]: columns: Dict[str, List[SnowflakeColumn]] = {} try: - cur = self.query( - conn, SnowflakeQuery.columns_for_schema(schema_name, db_name) - ) + cur = self.query(SnowflakeQuery.columns_for_schema(schema_name, db_name)) except Exception as e: logger.debug( f"Failed to get all columns for schema - {schema_name}", exc_info=e @@ -290,12 +286,11 @@ def get_columns_for_schema( return columns def get_columns_for_table( - self, conn: SnowflakeConnection, table_name: str, schema_name: str, db_name: str + self, table_name: str, schema_name: str, db_name: str ) -> List[SnowflakeColumn]: columns: List[SnowflakeColumn] = [] cur = self.query( - conn, SnowflakeQuery.columns_for_table(table_name, schema_name, db_name), ) @@ -315,11 +310,10 @@ def get_columns_for_table( return columns def get_pk_constraints_for_schema( - self, conn: SnowflakeConnection, schema_name: str, db_name: str + self, schema_name: str, db_name: str ) -> Dict[str, SnowflakePK]: constraints: Dict[str, SnowflakePK] = {} cur = self.query( - conn, SnowflakeQuery.show_primary_keys_for_schema(schema_name, db_name), ) @@ -332,13 +326,12 @@ def get_pk_constraints_for_schema( return constraints def get_fk_constraints_for_schema( - self, conn: SnowflakeConnection, schema_name: str, db_name: str + self, schema_name: str, db_name: str ) -> Dict[str, List[SnowflakeFK]]: constraints: Dict[str, List[SnowflakeFK]] = {} fk_constraints_map: Dict[str, SnowflakeFK] = {} cur = self.query( - conn, SnowflakeQuery.show_foreign_keys_for_schema(schema_name, db_name), ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py index e55d2d16f06e3c..64a09ad9fc7d78 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py @@ -13,11 +13,14 @@ ) from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.workunit import MetadataWorkUnit +from datahub.ingestion.source.snowflake.constants import SnowflakeEdition from datahub.ingestion.source.snowflake.snowflake_config import SnowflakeV2Config from datahub.ingestion.source.snowflake.snowflake_query import SnowflakeQuery from datahub.ingestion.source.snowflake.snowflake_report import SnowflakeV2Report from datahub.ingestion.source.snowflake.snowflake_utils import ( SnowflakeCommonMixin, + SnowflakeConnectionMixin, + SnowflakePermissionError, SnowflakeQueryMixin, ) from datahub.metadata.com.linkedin.pegasus2avro.dataset import ( @@ -86,19 +89,33 @@ class SnowflakeJoinedAccessEvent(PermissiveModel): role_name: str -class SnowflakeUsageExtractor(SnowflakeQueryMixin, SnowflakeCommonMixin): +class SnowflakeUsageExtractor( + SnowflakeQueryMixin, SnowflakeConnectionMixin, SnowflakeCommonMixin +): def __init__(self, config: SnowflakeV2Config, report: SnowflakeV2Report) -> None: self.config: SnowflakeV2Config = config self.report: SnowflakeV2Report = report self.logger = logger + self.connection: Optional[SnowflakeConnection] = None def get_workunits( self, discovered_datasets: List[str] ) -> Iterable[MetadataWorkUnit]: - conn = self.config.get_connection() + self.connection = self.create_connection() + if self.connection is None: + return + + if self.report.edition == SnowflakeEdition.STANDARD.value: + logger.info( + "Snowflake Account is Standard Edition. Usage and Operation History Feature is not supported." + ) + return logger.info("Checking usage date ranges") - self._check_usage_date_ranges(conn) + + self._check_usage_date_ranges() + + # If permission error, execution returns from here if ( self.report.min_access_history_time is None or self.report.max_access_history_time is None @@ -109,38 +126,44 @@ def get_workunits( # Now, we report the usage as well as operation metadata even if user email is absent if self.config.include_usage_stats: - yield from self.get_usage_workunits(conn, discovered_datasets) + yield from self.get_usage_workunits(discovered_datasets) if self.config.include_operational_stats: # Generate the operation workunits. - access_events = self._get_snowflake_history(conn) + access_events = self._get_snowflake_history() for event in access_events: yield from self._get_operation_aspect_work_unit( event, discovered_datasets ) - conn.close() - def get_usage_workunits( - self, conn: SnowflakeConnection, discovered_datasets: List[str] + self, discovered_datasets: List[str] ) -> Iterable[MetadataWorkUnit]: with PerfTimer() as timer: logger.info("Getting aggregated usage statistics") - results = self.query( - conn, - SnowflakeQuery.usage_per_object_per_time_bucket_for_time_window( - start_time_millis=int(self.config.start_time.timestamp() * 1000), - end_time_millis=int(self.config.end_time.timestamp() * 1000), - time_bucket_size=self.config.bucket_duration, - use_base_objects=self.config.apply_view_usage_to_tables, - top_n_queries=self.config.top_n_queries, - include_top_n_queries=self.config.include_top_n_queries, - ), - ) + try: + results = self.query( + SnowflakeQuery.usage_per_object_per_time_bucket_for_time_window( + start_time_millis=int( + self.config.start_time.timestamp() * 1000 + ), + end_time_millis=int(self.config.end_time.timestamp() * 1000), + time_bucket_size=self.config.bucket_duration, + use_base_objects=self.config.apply_view_usage_to_tables, + top_n_queries=self.config.top_n_queries, + include_top_n_queries=self.config.include_top_n_queries, + ), + ) + except Exception as e: + logger.debug(e, exc_info=e) + self.report_warning( + "usage-statistics", + f"Populating table usage statistics from Snowflake failed due to error {e}.", + ) + return self.report.usage_aggregation_query_secs = timer.elapsed_seconds() for row in results: - assert row["OBJECT_NAME"] is not None, "Null objectName not allowed" if not self._is_dataset_pattern_allowed( row["OBJECT_NAME"], row["OBJECT_DOMAIN"], @@ -156,6 +179,10 @@ def get_usage_workunits( ) continue + yield from self.build_usage_statistics_for_dataset(dataset_identifier, row) + + def build_usage_statistics_for_dataset(self, dataset_identifier, row): + try: stats = DatasetUsageStatistics( timestampMillis=int(row["BUCKET_START_TIME"].timestamp() * 1000), eventGranularity=TimeWindowSize( @@ -183,6 +210,14 @@ def get_usage_workunits( "datasetUsageStatistics", stats, ) + except Exception as e: + logger.debug( + f"Failed to parse usage statistics for dataset {dataset_identifier} due to error {e}.", + exc_info=e, + ) + self.report_warning( + "Failed to parse usage statistics for dataset", dataset_identifier + ) def _map_top_sql_queries(self, top_sql_queries: Dict) -> List[str]: total_budget_for_query_list: int = 24000 @@ -234,13 +269,19 @@ def _map_field_counts(self, field_counts: Dict) -> List[DatasetFieldUsageCounts] key=lambda v: v.fieldPath, ) - def _get_snowflake_history( - self, conn: SnowflakeConnection - ) -> Iterable[SnowflakeJoinedAccessEvent]: + def _get_snowflake_history(self) -> Iterable[SnowflakeJoinedAccessEvent]: logger.info("Getting access history") with PerfTimer() as timer: query = self._make_operations_query() - results = self.query(conn, query) + try: + results = self.query(query) + except Exception as e: + logger.debug(e, exc_info=e) + self.report_warning( + "operation", + f"Populating table operation history from Snowflake failed due to error {e}.", + ) + return self.report.access_history_query_secs = round(timer.elapsed_seconds(), 2) for row in results: @@ -251,18 +292,22 @@ def _make_operations_query(self) -> str: end_time = int(self.config.end_time.timestamp() * 1000) return SnowflakeQuery.operational_data_for_time_window(start_time, end_time) - def _check_usage_date_ranges(self, conn: SnowflakeConnection) -> Any: + def _check_usage_date_ranges(self) -> Any: with PerfTimer() as timer: try: - results = self.query( - conn, SnowflakeQuery.get_access_history_date_range() - ) + results = self.query(SnowflakeQuery.get_access_history_date_range()) except Exception as e: - self.warn( - "check-usage-data", - f"Extracting the date range for usage data from Snowflake failed." - f"Please check your permissions. Continuing...\nError was {e}.", - ) + if isinstance(e, SnowflakePermissionError): + error_msg = "Failed to get usage. Please grant imported privileges on SNOWFLAKE database. " + self.warn_if_stateful_else_error( + "usage-permission-error", error_msg + ) + else: + logger.debug(e, exc_info=e) + self.report_warning( + "usage", + f"Extracting the date range for usage data from Snowflake failed due to error {e}.", + ) else: for db_row in results: if ( @@ -270,11 +315,11 @@ def _check_usage_date_ranges(self, conn: SnowflakeConnection) -> Any: or db_row["MIN_TIME"] is None or db_row["MAX_TIME"] is None ): - self.warn( + self.report_warning( "check-usage-data", - f"Missing data for access_history {db_row} - Check if using Enterprise edition of Snowflake", + f"Missing data for access_history {db_row}.", ) - continue + break self.report.min_access_history_time = db_row["MIN_TIME"].astimezone( tz=timezone.utc ) @@ -341,20 +386,33 @@ def _get_operation_aspect_work_unit( def _process_snowflake_history_row( self, row: Any ) -> Iterable[SnowflakeJoinedAccessEvent]: - self.report.rows_processed += 1 - # Make some minor type conversions. - if hasattr(row, "_asdict"): - # Compat with SQLAlchemy 1.3 and 1.4 - # See https://docs.sqlalchemy.org/en/14/changelog/migration_14.html#rowproxy-is-no-longer-a-proxy-is-now-called-row-and-behaves-like-an-enhanced-named-tuple. - event_dict = row._asdict() - else: - event_dict = dict(row) - - # no use processing events that don't have a query text - if not event_dict["QUERY_TEXT"]: - self.report.rows_missing_query_text += 1 - return + try: # big hammer try block to ensure we don't fail on parsing events + self.report.rows_processed += 1 + # Make some minor type conversions. + if hasattr(row, "_asdict"): + # Compat with SQLAlchemy 1.3 and 1.4 + # See https://docs.sqlalchemy.org/en/14/changelog/migration_14.html#rowproxy-is-no-longer-a-proxy-is-now-called-row-and-behaves-like-an-enhanced-named-tuple. + event_dict = row._asdict() + else: + event_dict = dict(row) + # no use processing events that don't have a query text + if not event_dict["QUERY_TEXT"]: + self.report.rows_missing_query_text += 1 + return + self.parse_event_objects(event_dict) + event = SnowflakeJoinedAccessEvent( + **{k.lower(): v for k, v in event_dict.items()} + ) + yield event + except Exception as e: + self.report.rows_parsing_error += 1 + self.report_warning( + "operation", + f"Failed to parse operation history row {event_dict}, {e}", + ) + + def parse_event_objects(self, event_dict): event_dict["BASE_OBJECTS_ACCESSED"] = [ obj for obj in json.loads(event_dict["BASE_OBJECTS_ACCESSED"]) @@ -397,18 +455,6 @@ def _process_snowflake_history_row( if not event_dict["EMAIL"]: self.report.rows_missing_email += 1 - try: # big hammer try block to ensure we don't fail on parsing events - event = SnowflakeJoinedAccessEvent( - **{k.lower(): v for k, v in event_dict.items()} - ) - yield event - except Exception as e: - self.report.rows_parsing_error += 1 - self.warn( - "usage", - f"Failed to parse usage line {event_dict}, {e}", - ) - def _is_unsupported_object_accessed(self, obj: Dict[str, Any]) -> bool: unsupported_keys = ["locations"] @@ -425,11 +471,3 @@ def _is_object_valid(self, obj: Dict[str, Any]) -> bool: ): return False return True - - def warn(self, key: str, reason: str) -> None: - self.report.report_warning(key, reason) - self.logger.warning(f"{key} => {reason}") - - def error(self, key: str, reason: str) -> None: - self.report.report_failure(key, reason) - self.logger.error(f"{key} => {reason}") diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py index 88a5057c6431c2..3a7c015dc9f125 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py @@ -1,14 +1,20 @@ import logging -from enum import Enum from typing import Any, Optional from snowflake.connector import SnowflakeConnection from snowflake.connector.cursor import DictCursor from typing_extensions import Protocol +from datahub.configuration.common import MetaError from datahub.configuration.pattern_utils import is_schema_allowed from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.workunit import MetadataWorkUnit +from datahub.ingestion.source.snowflake.constants import ( + GENERIC_PERMISSION_ERROR_KEY, + SNOWFLAKE_DEFAULT_CLOUD, + SNOWFLAKE_REGION_CLOUD_REGION_MAPPING, + SnowflakeObjectDomain, +) from datahub.ingestion.source.snowflake.snowflake_config import SnowflakeV2Config from datahub.ingestion.source.snowflake.snowflake_report import SnowflakeV2Report from datahub.metadata.com.linkedin.pegasus2avro.events.metadata import ChangeType @@ -17,21 +23,8 @@ logger: logging.Logger = logging.getLogger(__name__) -class SnowflakeCloudProvider(str, Enum): - AWS = "aws" - GCP = "gcp" - AZURE = "azure" - - -# See https://docs.snowflake.com/en/user-guide/admin-account-identifier.html#region-ids -# Includes only exceptions to format _ -SNOWFLAKE_REGION_CLOUD_REGION_MAPPING = { - "aws_us_east_1_gov": (SnowflakeCloudProvider.AWS, "us-east-1"), - "azure_uksouth": (SnowflakeCloudProvider.AZURE, "uk-south"), - "azure_centralindia": (SnowflakeCloudProvider.AZURE, "central-india.azure"), -} - -SNOWFLAKE_DEFAULT_CLOUD = SnowflakeCloudProvider.AWS +class SnowflakePermissionError(MetaError): + """A permission error has happened""" # Required only for mypy, since we are using mixin classes, and not inheritance. @@ -40,6 +33,24 @@ class SnowflakeLoggingProtocol(Protocol): logger: logging.Logger +class SnowflakeQueryProtocol(SnowflakeLoggingProtocol, Protocol): + def get_connection(self) -> SnowflakeConnection: + ... + + +class SnowflakeQueryMixin: + def query(self: SnowflakeQueryProtocol, query: str) -> Any: + try: + self.logger.debug("Query : {}".format(query)) + resp = self.get_connection().cursor(DictCursor).execute(query) + return resp + + except Exception as e: + if is_permission_error(e): + raise SnowflakePermissionError(e) from e + raise + + class SnowflakeCommonProtocol(SnowflakeLoggingProtocol, Protocol): config: SnowflakeV2Config report: SnowflakeV2Report @@ -55,14 +66,11 @@ def get_dataset_identifier_from_qualified_name(self, qualified_name: str) -> str def snowflake_identifier(self, identifier: str) -> str: ... + def report_warning(self, key: str, reason: str) -> None: + ... -class SnowflakeQueryMixin: - def query( - self: SnowflakeLoggingProtocol, conn: SnowflakeConnection, query: str - ) -> Any: - self.logger.debug("Query : {}".format(query)) - resp = conn.cursor(DictCursor).execute(query) - return resp + def report_error(self, key: str, reason: str) -> None: + ... class SnowflakeCommonMixin: @@ -103,8 +111,15 @@ def _is_dataset_pattern_allowed( if not dataset_type or not dataset_name: return True dataset_params = dataset_name.split(".") + if dataset_type.lower() not in ( + SnowflakeObjectDomain.TABLE, + SnowflakeObjectDomain.EXTERNAL_TABLE, + SnowflakeObjectDomain.VIEW, + SnowflakeObjectDomain.MATERIALIZED_VIEW, + ): + return False if len(dataset_params) != 3: - self.report.report_warning( + self.report_warning( "invalid-dataset-pattern", f"Found {dataset_params} of type {dataset_type}", ) @@ -121,7 +136,9 @@ def _is_dataset_pattern_allowed( ): return False - if dataset_type.lower() in {"table"} and not self.config.table_pattern.allowed( + if dataset_type.lower() in { + SnowflakeObjectDomain.TABLE + } and not self.config.table_pattern.allowed( self.get_dataset_identifier_from_qualified_name(dataset_name) ): return False @@ -199,3 +216,83 @@ def wrap_aspect_as_workunit( ) self.report.report_workunit(wu) return wu + + # TODO: Revisit this after stateful ingestion can commit checkpoint + # for failures that do not affect the checkpoint + def warn_if_stateful_else_error( + self: SnowflakeCommonProtocol, key: str, reason: str + ) -> None: + if ( + self.config.stateful_ingestion is not None + and self.config.stateful_ingestion.enabled + ): + self.report_warning(key, reason) + else: + self.report_error(key, reason) + + def report_warning(self: SnowflakeCommonProtocol, key: str, reason: str) -> None: + self.report.report_warning(key, reason) + self.logger.warning(f"{key} => {reason}") + + def report_error(self: SnowflakeCommonProtocol, key: str, reason: str) -> None: + self.report.report_failure(key, reason) + self.logger.error(f"{key} => {reason}") + + +class SnowflakeConnectionProtocol(SnowflakeLoggingProtocol, Protocol): + connection: Optional[SnowflakeConnection] + config: SnowflakeV2Config + report: SnowflakeV2Report + + def create_connection(self) -> Optional[SnowflakeConnection]: + ... + + def report_error(self, key: str, reason: str) -> None: + ... + + +class SnowflakeConnectionMixin: + def get_connection(self: SnowflakeConnectionProtocol) -> SnowflakeConnection: + if self.connection is None: + # Ideally this is never called here + self.logger.info("Did you forget to initialize connection for module?") + self.connection = self.create_connection() + + # Connection is already present by the time its used for query + # Every module initializes the connection or fails and returns + assert self.connection is not None + return self.connection + + # If connection succeeds, return connection, else return None and report failure + def create_connection( + self: SnowflakeConnectionProtocol, + ) -> Optional[SnowflakeConnection]: + try: + conn = self.config.get_connection() + except Exception as e: + logger.debug(e, exc_info=e) + if "not granted to this user" in str(e): + self.report_error( + GENERIC_PERMISSION_ERROR_KEY, + f"Failed to connect with snowflake due to error {e}", + ) + else: + logger.debug(e, exc_info=e) + self.report_error( + "snowflake-connection", + f"Failed to connect to snowflake instance due to error {e}.", + ) + return None + else: + return conn + + def close(self: SnowflakeConnectionProtocol) -> None: + if self.connection is not None and not self.connection.is_closed(): + self.connection.close() + + +def is_permission_error(e: Exception) -> bool: + msg = str(e) + # 002003 (02000): SQL compilation error: Database/SCHEMA 'XXXX' does not exist or not authorized. + # Insufficient privileges to operate on database 'XXXX' + return "Insufficient privileges" in msg or "not authorized" in msg diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py index 7ed8456415af03..7f4afc32d38f87 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py @@ -43,6 +43,12 @@ ) from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.glossary.classification_mixin import ClassificationMixin +from datahub.ingestion.source.snowflake.constants import ( + GENERIC_PERMISSION_ERROR_KEY, + SNOWFLAKE_DATABASE, + SnowflakeEdition, + SnowflakeObjectDomain, +) from datahub.ingestion.source.snowflake.snowflake_config import SnowflakeV2Config from datahub.ingestion.source.snowflake.snowflake_lineage import ( SnowflakeLineageExtractor, @@ -65,6 +71,8 @@ ) from datahub.ingestion.source.snowflake.snowflake_utils import ( SnowflakeCommonMixin, + SnowflakeConnectionMixin, + SnowflakePermissionError, SnowflakeQueryMixin, ) from datahub.ingestion.source.sql.sql_common import SqlContainerSubTypes @@ -88,7 +96,6 @@ ) from datahub.metadata.com.linkedin.pegasus2avro.dataset import ( DatasetProperties, - UpstreamLineage, ViewProperties, ) from datahub.metadata.com.linkedin.pegasus2avro.schema import ( @@ -184,6 +191,7 @@ class SnowflakeV2Source( ClassificationMixin, SnowflakeQueryMixin, + SnowflakeConnectionMixin, SnowflakeCommonMixin, StatefulIngestionSourceBase, TestableSource, @@ -193,7 +201,8 @@ def __init__(self, ctx: PipelineContext, config: SnowflakeV2Config): self.config: SnowflakeV2Config = config self.report: SnowflakeV2Report = SnowflakeV2Report() self.logger = logger - self.snowsight_base_url = None + self.snowsight_base_url: Optional[str] = None + self.connection: Optional[SnowflakeConnection] = None # Create and register the stateful ingestion use-case handlers. self.stale_entity_removal_handler = StaleEntityRemovalHandler( source=self, @@ -418,7 +427,7 @@ def query(query): SourceCapability.USAGE_STATS, ): failure_message = ( - f"Current role does not have permissions to use warehouse {connection_conf.warehouse}" + f"Current role {current_role} does not have permissions to use warehouse {connection_conf.warehouse}. Please check the grants associated with this role." if connection_conf.warehouse is not None else "No default warehouse set for user. Either set default warehouse for user or configure warehouse in recipe" ) @@ -439,33 +448,73 @@ def query(query): return _report def get_workunits(self) -> Iterable[WorkUnit]: - conn: SnowflakeConnection = self.config.get_connection() + + self.connection = self.create_connection() + if self.connection is None: + return + self.add_config_to_report() - self.inspect_session_metadata(conn) + self.inspect_session_metadata() + if self.config.include_external_url: - self.snowsight_base_url = self.get_snowsight_base_url(conn) + self.snowsight_base_url = self.get_snowsight_base_url() - self.report.include_technical_schema = self.config.include_technical_schema - databases: List[SnowflakeDatabase] = [] + if self.report.default_warehouse is None: + self.report_warehouse_failure() + return - databases = self.get_databases(conn) + self.data_dictionary.set_connection(self.connection) + databases = self.get_databases() - for snowflake_db in databases: - self.report.report_entity_scanned(snowflake_db.name, "database") + if databases is None or len(databases) == 0: + return - if not self.config.database_pattern.allowed(snowflake_db.name): - self.report.report_dropped(f"{snowflake_db.name}.*") - continue + for snowflake_db in databases: + try: + yield from self._process_database(snowflake_db) + except SnowflakePermissionError as e: + # FIXME - This may break satetful ingestion if new tables than previous run are emitted above + # and stateful ingestion is enabled + self.report_error(GENERIC_PERMISSION_ERROR_KEY, str(e)) + return - yield from self._process_database(conn, snowflake_db) + self.connection.close() - conn.close() # Emit Stale entity workunits yield from self.stale_entity_removal_handler.gen_removed_entity_workunits() + # TODO: The checkpoint state for stale entity detection can be comitted here. + if self.config.profiling.enabled and len(databases) != 0: yield from self.profiler.get_workunits(databases) + discovered_tables: List[str] = [ + self.get_dataset_identifier(table.name, schema.name, db.name) + for db in databases + for schema in db.schemas + for table in schema.tables + ] + discovered_views: List[str] = [ + self.get_dataset_identifier(table.name, schema.name, db.name) + for db in databases + for schema in db.schemas + for table in schema.views + ] + + if len(discovered_tables) == 0 and len(discovered_views) == 0: + self.report_error( + GENERIC_PERMISSION_ERROR_KEY, + "No tables/views found. Please check permissions.", + ) + return + + discovered_datasets = discovered_tables + discovered_views + + if self.config.include_table_lineage: + yield from self.lineage_extractor.get_workunits( + discovered_tables, discovered_views + ) + if self.config.include_usage_stats or self.config.include_operational_stats: if ( self.config.store_last_usage_extraction_timestamp @@ -487,110 +536,211 @@ def get_workunits(self) -> Iterable[WorkUnit]: end_time_millis=datetime_to_ts_millis(self.config.end_time), ) - discovered_datasets: List[str] = [ - self.get_dataset_identifier(table.name, schema.name, db.name) - for db in databases - for schema in db.schemas - for table in schema.tables - ] + [ - self.get_dataset_identifier(table.name, schema.name, db.name) - for db in databases - for schema in db.schemas - for table in schema.views - ] yield from self.usage_extractor.get_workunits(discovered_datasets) - def get_databases(self, conn): - databases = self.data_dictionary.show_databases(conn) + def report_warehouse_failure(self): + if self.config.warehouse is not None: + self.report_error( + GENERIC_PERMISSION_ERROR_KEY, + f"Current role does not have permissions to use warehouse {self.config.warehouse}. Please update permissions.", + ) + else: + self.report_error( + "no-active-warehouse", + "No default warehouse set for user. Either set default warehouse for user or configure warehouse in recipe.", + ) + + def get_databases(self) -> Optional[List[SnowflakeDatabase]]: + try: + # `show databases` is required only to get one of the databases + # whose information_schema can be queried to start with. + databases = self.data_dictionary.show_databases() + except Exception as e: + logger.debug(f"Failed to list databases due to error {e}", exc_info=e) + self.report_error( + "list-databases", + f"Failed to list databases due to error {e}", + ) + return None + else: + ischema_databases: List[ + SnowflakeDatabase + ] = self.get_databases_from_ischema(databases) + + if len(ischema_databases) == 0: + self.report_error( + GENERIC_PERMISSION_ERROR_KEY, + "No databases found. Please check permissions.", + ) + return ischema_databases - # Below code block is required to enrich database with additional - # information that is missing in `show databases` results - # For example - last modified time of database - ischema_database_map: Dict[str, SnowflakeDatabase] = {} + def get_databases_from_ischema(self, databases): + ischema_databases: List[SnowflakeDatabase] = [] for database in databases: try: - ischema_databases = self.data_dictionary.get_databases( - conn, database.name - ) - ischema_database_map = {db.name: db for db in ischema_databases} + ischema_databases = self.data_dictionary.get_databases(database.name) break except Exception: # query fails if "USAGE" access is not granted for database + # This is okay, because `show databases` query lists all databases irrespective of permission, + # if role has `MANAGE GRANTS` privilege. (not advisable) logger.debug( f"Failed to list databases {database.name} information_schema" ) - for database in databases: - if database.name in ischema_database_map.keys(): - database.last_altered = ischema_database_map[database.name].last_altered + # SNOWFLAKE database always shows up even if permissions are missing + if database == SNOWFLAKE_DATABASE: + continue + logger.info( + f"The role {self.report.role} has `MANAGE GRANTS` privilege. This is not advisable and also not required." + ) - return databases + return ischema_databases def _process_database( - self, conn: SnowflakeConnection, snowflake_db: SnowflakeDatabase + self, snowflake_db: SnowflakeDatabase ) -> Iterable[MetadataWorkUnit]: + + self.report.report_entity_scanned(snowflake_db.name, "database") + if not self.config.database_pattern.allowed(snowflake_db.name): + self.report.report_dropped(f"{snowflake_db.name}.*") + return + db_name = snowflake_db.name + try: + self.query(SnowflakeQuery.use_database(db_name)) + except Exception as e: + if isinstance(e, SnowflakePermissionError): + # This may happen if REFERENCE_USAGE permissions are set + # We can not run show queries on database in such case. + # This need not be a failure case. + self.report_warning( + "Insufficient privileges to operate on database, skipping. Please grant USAGE permissions on database to extract its metadata.", + db_name, + ) + else: + logger.debug( + f"Failed to use database {db_name} due to error {e}", + exc_info=e, + ) + self.report_warning( + "Failed to get schemas for database", + db_name, + ) + return + if self.config.include_technical_schema: yield from self.gen_database_containers(snowflake_db) - # Use database and extract metadata from its information_schema - # If this query fails, it means, user does not have usage access on database + self.fetch_schemas_for_database(snowflake_db, db_name) + + for snowflake_schema in snowflake_db.schemas: + yield from self._process_schema(snowflake_schema, db_name) + + def fetch_schemas_for_database(self, snowflake_db, db_name): try: - self.query(conn, SnowflakeQuery.use_database(db_name)) snowflake_db.schemas = self.data_dictionary.get_schemas_for_database( - conn, db_name + db_name ) except Exception as e: - self.warn( - self.logger, - db_name, - f"unable to get metadata information for database {db_name} due to an error -> {e}", - ) - self.report.report_dropped(f"{db_name}.*") - return - - for snowflake_schema in snowflake_db.schemas: - self.report.report_entity_scanned(snowflake_schema.name, "schema") + if isinstance(e, SnowflakePermissionError): + error_msg = f"Failed to get schemas for database {db_name}. Please check permissions." + # Ideal implementation would use PEP 678 – Enriching Exceptions with Notes + raise SnowflakePermissionError(error_msg) from e.__cause__ + else: + logger.debug( + f"Failed to get schemas for database {db_name} due to error {e}", + exc_info=e, + ) + self.report_warning( + "Failed to get schemas for database", + db_name, + ) - if not is_schema_allowed( - self.config.schema_pattern, - snowflake_schema.name, + if not snowflake_db.schemas: + self.report_warning( + "No schemas found in database. If schemas exist, please grant USAGE permissions on them.", db_name, - self.config.match_fully_qualified_names, - ): - self.report.report_dropped(f"{db_name}.{snowflake_schema.name}.*") - continue - - yield from self._process_schema(conn, snowflake_schema, db_name) + ) def _process_schema( - self, conn: SnowflakeConnection, snowflake_schema: SnowflakeSchema, db_name: str + self, snowflake_schema: SnowflakeSchema, db_name: str ) -> Iterable[MetadataWorkUnit]: + + self.report.report_entity_scanned(snowflake_schema.name, "schema") + if not is_schema_allowed( + self.config.schema_pattern, + snowflake_schema.name, + db_name, + self.config.match_fully_qualified_names, + ): + self.report.report_dropped(f"{db_name}.{snowflake_schema.name}.*") + return + schema_name = snowflake_schema.name if self.config.include_technical_schema: yield from self.gen_schema_containers(snowflake_schema, db_name) if self.config.include_tables: - snowflake_schema.tables = self.get_tables_for_schema( - conn, schema_name, db_name - ) + self.fetch_tables_for_schema(snowflake_schema, db_name, schema_name) if self.config.include_technical_schema: for table in snowflake_schema.tables: - yield from self._process_table(conn, table, schema_name, db_name) + yield from self._process_table(table, schema_name, db_name) if self.config.include_views: - snowflake_schema.views = self.get_views_for_schema( - conn, schema_name, db_name - ) + self.fetch_views_for_schema(snowflake_schema, db_name, schema_name) if self.config.include_technical_schema: for view in snowflake_schema.views: - yield from self._process_view(conn, view, schema_name, db_name) + yield from self._process_view(view, schema_name, db_name) + + if not snowflake_schema.views and not snowflake_schema.tables: + self.report_warning( + "No tables/views found in schema. If tables exist, please grant REFERENCES or SELECT permissions on them.", + f"{db_name}.{schema_name}", + ) + + def fetch_views_for_schema(self, snowflake_schema, db_name, schema_name): + try: + snowflake_schema.views = self.get_views_for_schema(schema_name, db_name) + + except Exception as e: + if isinstance(e, SnowflakePermissionError): + # Ideal implementation would use PEP 678 – Enriching Exceptions with Notes + error_msg = f"Failed to get views for schema {db_name}.{schema_name}. Please check permissions." + + raise SnowflakePermissionError(error_msg) from e.__cause__ + else: + logger.debug( + f"Failed to get views for schema {db_name}.{schema_name} due to error {e}", + exc_info=e, + ) + self.report_warning( + "Failed to get views for schema", + f"{db_name}.{schema_name}", + ) + + def fetch_tables_for_schema(self, snowflake_schema, db_name, schema_name): + try: + snowflake_schema.tables = self.get_tables_for_schema(schema_name, db_name) + except Exception as e: + if isinstance(e, SnowflakePermissionError): + # Ideal implementation would use PEP 678 – Enriching Exceptions with Notes + error_msg = f"Failed to get tables for schema {db_name}.{schema_name}. Please check permissions." + raise SnowflakePermissionError(error_msg) from e.__cause__ + else: + logger.debug( + f"Failed to get tables for schema {db_name}.{schema_name} due to error {e}", + exc_info=e, + ) + self.report_warning( + "Failed to get tables for schema", + f"{db_name}.{schema_name}", + ) def _process_table( self, - conn: SnowflakeConnection, table: SnowflakeTable, schema_name: str, db_name: str, @@ -603,40 +753,82 @@ def _process_table( self.report.report_dropped(table_identifier) return - table.columns = self.get_columns_for_table( - conn, table.name, schema_name, db_name - ) - table.pk = self.get_pk_constraints_for_table( - conn, table.name, schema_name, db_name - ) - table.foreign_keys = self.get_fk_constraints_for_table( - conn, table.name, schema_name, db_name - ) + self.fetch_columns_for_table(table, schema_name, db_name, table_identifier) + + self.fetch_pk_for_table(table, schema_name, db_name, table_identifier) + + self.fetch_foreign_keys_for_table(table, schema_name, db_name, table_identifier) + dataset_name = self.get_dataset_identifier(table.name, schema_name, db_name) - if self.is_classification_enabled_for_table(dataset_name): + self.fetch_sample_data_for_classification( + table, schema_name, db_name, dataset_name + ) + + yield from self.gen_dataset_workunits(table, schema_name, db_name) + + def fetch_sample_data_for_classification( + self, table, schema_name, db_name, dataset_name + ): + if table.columns and self.is_classification_enabled_for_table(dataset_name): try: table.sample_data = self.get_sample_values_for_table( - conn, table.name, schema_name, db_name + table.name, schema_name, db_name ) except Exception as e: - self.warn( - self.logger, - dataset_name, - f"unable to get table sample data due to error -> {e}", + logger.debug( + f"Failed to get sample values for dataset {dataset_name} due to error {e}", + exc_info=e, ) + if isinstance(e, SnowflakePermissionError): + self.report_warning( + "Failed to get sample values for dataset. Please grant SELECT permissions on dataset.", + dataset_name, + ) + else: + self.report_warning( + "Failed to get sample values for dataset", + dataset_name, + ) - lineage_info = None - if self.config.include_table_lineage: - lineage_info = self.lineage_extractor._get_upstream_lineage_info( - dataset_name + def fetch_foreign_keys_for_table( + self, table, schema_name, db_name, table_identifier + ): + try: + table.foreign_keys = self.get_fk_constraints_for_table( + table.name, schema_name, db_name + ) + except Exception as e: + logger.debug( + f"Failed to get foreign key for table {table_identifier} due to error {e}", + exc_info=e, + ) + self.report_warning("Failed to get foreign key for table", table_identifier) + + def fetch_pk_for_table(self, table, schema_name, db_name, table_identifier): + try: + table.pk = self.get_pk_constraints_for_table( + table.name, schema_name, db_name ) + except Exception as e: + logger.debug( + f"Failed to get primary key for table {table_identifier} due to error {e}", + exc_info=e, + ) + self.report_warning("Failed to get primary key for table", table_identifier) - yield from self.gen_dataset_workunits(table, schema_name, db_name, lineage_info) + def fetch_columns_for_table(self, table, schema_name, db_name, table_identifier): + try: + table.columns = self.get_columns_for_table(table.name, schema_name, db_name) + except Exception as e: + logger.debug( + f"Failed to get columns for table {table_identifier} due to error {e}", + exc_info=e, + ) + self.report_warning("Failed to get columns for table", table_identifier) def _process_view( self, - conn: SnowflakeConnection, view: SnowflakeView, schema_name: str, db_name: str, @@ -649,18 +841,22 @@ def _process_view( self.report.report_dropped(view_name) return - view.columns = self.get_columns_for_table(conn, view.name, schema_name, db_name) - lineage_info = None - if self.config.include_view_lineage: - lineage_info = self.lineage_extractor._get_upstream_lineage_info(view_name) - yield from self.gen_dataset_workunits(view, schema_name, db_name, lineage_info) + try: + view.columns = self.get_columns_for_table(view.name, schema_name, db_name) + except Exception as e: + logger.debug( + f"Failed to get columns for view {view_name} due to error {e}", + exc_info=e, + ) + self.report_warning("Failed to get columns for view", view_name) + + yield from self.gen_dataset_workunits(view, schema_name, db_name) def gen_dataset_workunits( self, table: Union[SnowflakeTable, SnowflakeView], schema_name: str, db_name: str, - lineage_info: Optional[Tuple[UpstreamLineage, Dict[str, str]]], ) -> Iterable[MetadataWorkUnit]: dataset_name = self.get_dataset_identifier(table.name, schema_name, db_name) dataset_urn = make_dataset_urn_with_platform_instance( @@ -671,17 +867,15 @@ def gen_dataset_workunits( ) # Add the entity to the state. - type = "table" if isinstance(table, SnowflakeTable) else "view" + type = ( + SnowflakeObjectDomain.TABLE + if isinstance(table, SnowflakeTable) + else SnowflakeObjectDomain.VIEW + ) self.stale_entity_removal_handler.add_entity_to_state( type=type, urn=dataset_urn ) - if lineage_info is not None: - upstream_lineage, upstream_column_props = lineage_info - else: - upstream_column_props = {} - upstream_lineage = None - status = Status(removed=False) yield self.wrap_aspect_as_workunit("dataset", dataset_urn, "status", status) @@ -690,27 +884,8 @@ def gen_dataset_workunits( "dataset", dataset_urn, "schemaMetadata", schema_metadata ) - dataset_properties = DatasetProperties( - name=table.name, - created=TimeStamp(time=int(table.created.timestamp() * 1000)) - if table.created is not None - else None, - lastModified=TimeStamp(time=int(table.last_altered.timestamp() * 1000)) - if table.last_altered is not None - else TimeStamp(time=int(table.created.timestamp() * 1000)) - if table.created is not None - else None, - description=table.comment, - qualifiedName=dataset_name, - customProperties={**upstream_column_props}, - externalUrl=self.get_external_url_for_table( - table.name, - schema_name, - db_name, - "table" if isinstance(table, SnowflakeTable) else "view", - ) - if self.config.include_external_url - else None, + dataset_properties = self.get_dataset_properties( + table, schema_name, db_name, dataset_name ) yield self.wrap_aspect_as_workunit( "dataset", dataset_urn, "datasetProperties", dataset_properties @@ -736,13 +911,10 @@ def gen_dataset_workunits( entity_type="dataset", ) - if upstream_lineage is not None: - # Emit the lineage work unit - yield self.wrap_aspect_as_workunit( - "dataset", dataset_urn, "upstreamLineage", upstream_lineage - ) - - if isinstance(table, SnowflakeView): + if ( + isinstance(table, SnowflakeView) + and cast(SnowflakeView, table).view_definition is not None + ): view = cast(SnowflakeView, table) view_properties_aspect = ViewProperties( materialized=False, @@ -756,6 +928,32 @@ def gen_dataset_workunits( view_properties_aspect, ) + def get_dataset_properties(self, table, schema_name, db_name, dataset_name): + return DatasetProperties( + name=table.name, + created=TimeStamp(time=int(table.created.timestamp() * 1000)) + if table.created is not None + else None, + lastModified=TimeStamp(time=int(table.last_altered.timestamp() * 1000)) + if table.last_altered is not None + else TimeStamp(time=int(table.created.timestamp() * 1000)) + if table.created is not None + else None, + description=table.comment, + qualifiedName=dataset_name, + customProperties={}, + externalUrl=self.get_external_url_for_table( + table.name, + schema_name, + db_name, + SnowflakeObjectDomain.TABLE + if isinstance(table, SnowflakeTable) + else SnowflakeObjectDomain.VIEW, + ) + if self.config.include_external_url + else None, + ) + def get_schema_metadata( self, table: Union[SnowflakeTable, SnowflakeView], @@ -764,35 +962,7 @@ def get_schema_metadata( ) -> SchemaMetadata: foreign_keys: Optional[List[ForeignKeyConstraint]] = None if isinstance(table, SnowflakeTable) and len(table.foreign_keys) > 0: - foreign_keys = [] - for fk in table.foreign_keys: - foreign_dataset = make_dataset_urn( - self.platform, - self.get_dataset_identifier( - fk.referred_table, fk.referred_schema, fk.referred_database - ), - self.config.env, - ) - foreign_keys.append( - ForeignKeyConstraint( - name=fk.name, - foreignDataset=foreign_dataset, - foreignFields=[ - make_schema_field_urn( - foreign_dataset, - self.snowflake_identifier(col), - ) - for col in fk.referred_column_names - ], - sourceFields=[ - make_schema_field_urn( - dataset_urn, - self.snowflake_identifier(col), - ) - for col in fk.column_names - ], - ) - ) + foreign_keys = self.build_foreign_keys(table, dataset_urn, foreign_keys) schema_metadata = SchemaMetadata( schemaName=dataset_name, @@ -821,6 +991,43 @@ def get_schema_metadata( # TODO: classification is only run for snowflake tables. # Should we run classification for snowflake views as well? + self.classify_snowflake_table(table, dataset_name, schema_metadata) + + return schema_metadata + + def build_foreign_keys(self, table, dataset_urn, foreign_keys): + foreign_keys = [] + for fk in table.foreign_keys: + foreign_dataset = make_dataset_urn( + self.platform, + self.get_dataset_identifier( + fk.referred_table, fk.referred_schema, fk.referred_database + ), + self.config.env, + ) + foreign_keys.append( + ForeignKeyConstraint( + name=fk.name, + foreignDataset=foreign_dataset, + foreignFields=[ + make_schema_field_urn( + foreign_dataset, + self.snowflake_identifier(col), + ) + for col in fk.referred_column_names + ], + sourceFields=[ + make_schema_field_urn( + dataset_urn, + self.snowflake_identifier(col), + ) + for col in fk.column_names + ], + ) + ) + return foreign_keys + + def classify_snowflake_table(self, table, dataset_name, schema_metadata): if isinstance( table, SnowflakeTable ) and self.is_classification_enabled_for_table(dataset_name): @@ -839,14 +1046,15 @@ def get_schema_metadata( else {}, ) except Exception as e: - self.warn( - self.logger, + logger.debug( + f"Failed to classify table columns for {dataset_name} due to error -> {e}", + exc_info=e, + ) + self.report_warning( + "Failed to classify table columns", dataset_name, - f"unable to classify table columns due to error -> {e}", ) - return schema_metadata - def get_report(self) -> SourceReport: return self.report @@ -1017,10 +1225,10 @@ def gen_schema_containers( yield wu def get_tables_for_schema( - self, conn: SnowflakeConnection, schema_name: str, db_name: str + self, schema_name: str, db_name: str ) -> List[SnowflakeTable]: if db_name not in self.db_tables.keys(): - tables = self.data_dictionary.get_tables_for_database(conn, db_name) + tables = self.data_dictionary.get_tables_for_database(db_name) self.db_tables[db_name] = tables else: tables = self.db_tables[db_name] @@ -1029,18 +1237,16 @@ def get_tables_for_schema( # falling back to get tables for schema if tables is None: self.report.num_get_tables_for_schema_queries += 1 - return self.data_dictionary.get_tables_for_schema( - conn, schema_name, db_name - ) + return self.data_dictionary.get_tables_for_schema(schema_name, db_name) # Some schema may not have any table return tables.get(schema_name, []) def get_views_for_schema( - self, conn: SnowflakeConnection, schema_name: str, db_name: str + self, schema_name: str, db_name: str ) -> List[SnowflakeView]: if db_name not in self.db_views.keys(): - views = self.data_dictionary.get_views_for_database(conn, db_name) + views = self.data_dictionary.get_views_for_database(db_name) self.db_views[db_name] = views else: views = self.db_views[db_name] @@ -1049,18 +1255,16 @@ def get_views_for_schema( # falling back to get views for schema if views is None: self.report.num_get_views_for_schema_queries += 1 - return self.data_dictionary.get_views_for_schema(conn, schema_name, db_name) + return self.data_dictionary.get_views_for_schema(schema_name, db_name) # Some schema may not have any table return views.get(schema_name, []) def get_columns_for_table( - self, conn: SnowflakeConnection, table_name: str, schema_name: str, db_name: str + self, table_name: str, schema_name: str, db_name: str ) -> List[SnowflakeColumn]: if (db_name, schema_name) not in self.schema_columns.keys(): - columns = self.data_dictionary.get_columns_for_schema( - conn, schema_name, db_name - ) + columns = self.data_dictionary.get_columns_for_schema(schema_name, db_name) self.schema_columns[(db_name, schema_name)] = columns else: columns = self.schema_columns[(db_name, schema_name)] @@ -1070,18 +1274,18 @@ def get_columns_for_table( if columns is None: self.report.num_get_columns_for_table_queries += 1 return self.data_dictionary.get_columns_for_table( - conn, table_name, schema_name, db_name + table_name, schema_name, db_name ) # Access to table but none of its columns - is this possible ? return columns.get(table_name, []) def get_pk_constraints_for_table( - self, conn: SnowflakeConnection, table_name: str, schema_name: str, db_name: str + self, table_name: str, schema_name: str, db_name: str ) -> Optional[SnowflakePK]: if (db_name, schema_name) not in self.schema_pk_constraints.keys(): constraints = self.data_dictionary.get_pk_constraints_for_schema( - conn, schema_name, db_name + schema_name, db_name ) self.schema_pk_constraints[(db_name, schema_name)] = constraints else: @@ -1091,11 +1295,11 @@ def get_pk_constraints_for_table( return constraints.get(table_name) def get_fk_constraints_for_table( - self, conn: SnowflakeConnection, table_name: str, schema_name: str, db_name: str + self, table_name: str, schema_name: str, db_name: str ) -> List[SnowflakeFK]: if (db_name, schema_name) not in self.schema_fk_constraints.keys(): constraints = self.data_dictionary.get_fk_constraints_for_schema( - conn, schema_name, db_name + schema_name, db_name ) self.schema_fk_constraints[(db_name, schema_name)] = constraints else: @@ -1112,6 +1316,7 @@ def add_config_to_report(self): self.report.lineage_start_time = self.config.start_time self.report.lineage_end_time = self.config.end_time self.report.check_role_grants = self.config.check_role_grants + self.report.include_technical_schema = self.config.include_technical_schema self.report.include_usage_stats = self.config.include_usage_stats self.report.include_operational_stats = self.config.include_operational_stats self.report.include_column_lineage = self.config.include_column_lineage @@ -1119,25 +1324,34 @@ def add_config_to_report(self): self.report.window_start_time = self.config.start_time self.report.window_end_time = self.config.end_time - def inspect_session_metadata(self, conn: SnowflakeConnection) -> None: + def inspect_session_metadata(self) -> None: try: logger.info("Checking current version") - for db_row in self.query(conn, SnowflakeQuery.current_version()): + for db_row in self.query(SnowflakeQuery.current_version()): self.report.saas_version = db_row["CURRENT_VERSION()"] except Exception as e: - self.report.report_failure("version", f"Error: {e}") + self.report_error("version", f"Error: {e}") try: logger.info("Checking current role") - for db_row in self.query(conn, SnowflakeQuery.current_role()): + for db_row in self.query(SnowflakeQuery.current_role()): self.report.role = db_row["CURRENT_ROLE()"] except Exception as e: - self.report.report_failure("version", f"Error: {e}") + self.report_error("version", f"Error: {e}") try: logger.info("Checking current warehouse") - for db_row in self.query(conn, SnowflakeQuery.current_warehouse()): + for db_row in self.query(SnowflakeQuery.current_warehouse()): self.report.default_warehouse = db_row["CURRENT_WAREHOUSE()"] except Exception as e: - self.report.report_failure("current_warehouse", f"Error: {e}") + self.report_error("current_warehouse", f"Error: {e}") + + try: + logger.info("Checking current edition") + if self.is_standard_edition(): + self.report.edition = SnowflakeEdition.STANDARD + else: + self.report.edition = SnowflakeEdition.ENTERPRISE + except Exception: + self.report.edition = None # Stateful Ingestion Overrides. def get_platform_instance_id(self) -> str: @@ -1146,9 +1360,9 @@ def get_platform_instance_id(self) -> str: # Ideally we do not want null values in sample data for a column. # However that would require separate query per column and # that would be expensive, hence not done. - def get_sample_values_for_table(self, conn, table_name, schema_name, db_name): + def get_sample_values_for_table(self, table_name, schema_name, db_name): # Create a cursor object. - cur = conn.cursor() + cur = self.get_connection().cursor() NUM_SAMPLED_ROWS = 1000 # Execute a statement that will generate a result set. sql = f'select * from "{db_name}"."{schema_name}"."{table_name}" sample ({NUM_SAMPLED_ROWS} rows);' @@ -1181,13 +1395,13 @@ def get_external_url_for_database(self, db_name: str) -> Optional[str]: return f"{self.snowsight_base_url}#/data/databases/{db_name}/" return None - def get_snowsight_base_url(self, conn): + def get_snowsight_base_url(self) -> Optional[str]: try: # See https://docs.snowflake.com/en/user-guide/admin-account-identifier.html#finding-the-region-and-locator-for-an-account - for db_row in self.query(conn, SnowflakeQuery.current_account()): + for db_row in self.query(SnowflakeQuery.current_account()): account_locator = db_row["CURRENT_ACCOUNT()"] - for db_row in self.query(conn, SnowflakeQuery.current_region()): + for db_row in self.query(SnowflakeQuery.current_region()): region = db_row["CURRENT_REGION()"] self.report.account_locator = account_locator @@ -1217,3 +1431,19 @@ def get_snowsight_base_url(self, conn): f"unable to get snowsight base url due to an error -> {e}", ) return None + + def is_standard_edition(self): + try: + self.query(SnowflakeQuery.show_tags()) + return False + except Exception as e: + if "Unsupported feature 'TAG'" in str(e): + return True + raise + + def close(self) -> None: + super().close() + if hasattr(self, "lineage_extractor"): + self.lineage_extractor.close() + if hasattr(self, "usage_extractor"): + self.usage_extractor.close() diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py index 84118b305b93e9..6db79f2fb6f69a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py @@ -222,7 +222,10 @@ def report_from_query_combiner( class SQLAlchemyConfig(StatefulIngestionConfigBase): - options: dict = {} + options: dict = pydantic.Field( + default_factory=dict, + description="Any options specified here will be passed to SQLAlchemy's create_engine as kwargs. See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.", + ) # Although the 'table_pattern' enables you to skip everything from certain schemas, # having another option to allow/deny on schema level is an optimization for the case when there is a large number # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter diff --git a/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py b/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py index 6f88871d9aeced..3ef6b62eea8c51 100644 --- a/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py +++ b/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py @@ -19,6 +19,10 @@ OauthConfiguration, ) from datahub.configuration.time_window_config import BaseTimeWindowConfig +from datahub.ingestion.source.snowflake.constants import ( + CLIENT_PREFETCH_THREADS, + CLIENT_SESSION_KEEP_ALIVE, +) from datahub.ingestion.source.sql.oauth_generator import OauthTokenGenerator from datahub.ingestion.source.sql.sql_common import ( SQLAlchemyConfig, @@ -143,11 +147,11 @@ class BaseSnowflakeConfig(BaseTimeWindowConfig): role: Optional[str] = pydantic.Field(default=None, description="Snowflake role.") include_table_lineage: bool = pydantic.Field( default=True, - description="If enabled, populates the snowflake table-to-table and s3-to-snowflake table lineage. Requires appropriate grants given to the role.", + description="If enabled, populates the snowflake table-to-table and s3-to-snowflake table lineage. Requires appropriate grants given to the role and Snowflake Enterprise Edition or above.", ) include_view_lineage: bool = pydantic.Field( default=True, - description="If enabled, populates the snowflake view->table and table->view lineages (no view->view lineage yet). Requires appropriate grants given to the role, and include_table_lineage to be True.", + description="If enabled, populates the snowflake view->table and table->view lineages (no view->view lineage yet). Requires appropriate grants given to the role, and include_table_lineage to be True. view->table lineage requires Snowflake Enterprise Edition or above.", ) connect_args: Optional[Dict] = pydantic.Field( default=None, @@ -293,10 +297,29 @@ def get_sql_alchemy_url( }, ) - def get_sql_alchemy_connect_args(self) -> dict: - if self.authentication_type != "KEY_PAIR_AUTHENTICATOR": - return {} + def get_connect_args(self) -> dict: + """ + Builds connect args and updates self.connect_args so that + Subsequent calls to this method are efficient, i.e. do not read files again + """ + + base_connect_args = { + # Improves performance and avoids timeout errors for larger query result + CLIENT_PREFETCH_THREADS: 10, + CLIENT_SESSION_KEEP_ALIVE: True, + } + if self.connect_args is None: + self.connect_args = base_connect_args + else: + # Let user override the default config values + base_connect_args.update(self.connect_args) + self.connect_args = base_connect_args + + if ( + self.authentication_type == "KEY_PAIR_AUTHENTICATOR" + and "private_key" not in self.connect_args.keys() + ): if self.private_key is not None: pkey_bytes = self.private_key.replace("\\n", "\n").encode() else: @@ -319,7 +342,7 @@ def get_sql_alchemy_connect_args(self) -> dict: format=serialization.PrivateFormat.PKCS8, encryption_algorithm=serialization.NoEncryption(), ) - self.connect_args = {"private_key": pkb} + self.connect_args.update({"private_key": pkb}) return self.connect_args @@ -344,11 +367,9 @@ def get_sql_alchemy_url( ) def get_options(self) -> dict: - options_connect_args: Dict = super().get_sql_alchemy_connect_args() + options_connect_args: Dict = super().get_connect_args() options_connect_args.update(self.options.get("connect_args", {})) self.options["connect_args"] = options_connect_args - if self.connect_args is not None: - self.options["connect_args"].update(self.connect_args) return self.options def get_oauth_connection(self): diff --git a/metadata-ingestion/src/datahub/ingestion/source_config/usage/snowflake_usage.py b/metadata-ingestion/src/datahub/ingestion/source_config/usage/snowflake_usage.py index 55ca0eba0c1b6a..0930a75eae5cbf 100644 --- a/metadata-ingestion/src/datahub/ingestion/source_config/usage/snowflake_usage.py +++ b/metadata-ingestion/src/datahub/ingestion/source_config/usage/snowflake_usage.py @@ -1,79 +1,19 @@ import logging -from typing import Dict, Optional +from typing import Optional import pydantic -from datahub.configuration.common import AllowDenyPattern -from datahub.ingestion.source.state.redundant_run_skip_handler import ( - StatefulRedundantRunSkipConfig, -) -from datahub.ingestion.source.state.stateful_ingestion_base import ( - StatefulIngestionConfigBase, -) from datahub.ingestion.source.usage.usage_common import BaseUsageConfig -from datahub.ingestion.source_config.sql.snowflake import BaseSnowflakeConfig logger = logging.getLogger(__name__) -class SnowflakeStatefulIngestionConfig(StatefulRedundantRunSkipConfig): - """ - Specialization of basic StatefulIngestionConfig to adding custom config. - This will be used to override the stateful_ingestion config param of StatefulIngestionConfigBase - in the SnowflakeUsageConfig. - """ - - pass - - -class SnowflakeUsageConfig( - BaseSnowflakeConfig, BaseUsageConfig, StatefulIngestionConfigBase -): - options: dict = pydantic.Field( - default_factory=dict, - description="Any options specified here will be passed to SQLAlchemy's create_engine as kwargs. See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.", - ) - - database_pattern: AllowDenyPattern = pydantic.Field( - default=AllowDenyPattern( - deny=[r"^UTIL_DB$", r"^SNOWFLAKE$", r"^SNOWFLAKE_SAMPLE_DATA$"] - ), - description="List of regex patterns for databases to include/exclude in usage ingestion.", - ) +class SnowflakeUsageConfig(BaseUsageConfig): email_domain: Optional[str] = pydantic.Field( default=None, description="Email domain of your organisation so users can be displayed on UI appropriately.", ) - schema_pattern: AllowDenyPattern = pydantic.Field( - default=AllowDenyPattern.allow_all(), - description="List of regex patterns for schemas to include/exclude in usage ingestion.", - ) - table_pattern: AllowDenyPattern = pydantic.Field( - default=AllowDenyPattern.allow_all(), - description="List of regex patterns for tables to include in ingestion.", - ) - view_pattern: AllowDenyPattern = pydantic.Field( - default=AllowDenyPattern.allow_all(), - description="List of regex patterns for views to include in ingestion.", - ) apply_view_usage_to_tables: bool = pydantic.Field( default=False, description="Allow/deny patterns for views in snowflake dataset names.", ) - stateful_ingestion: Optional[SnowflakeStatefulIngestionConfig] = pydantic.Field( - default=None, description="Stateful ingestion related configs" - ) - - def get_options(self) -> dict: - options_connect_args: Dict = super().get_sql_alchemy_connect_args() - options_connect_args.update(self.options.get("connect_args", {})) - self.options["connect_args"] = options_connect_args - return self.options - - def get_sql_alchemy_url(self): - return super().get_sql_alchemy_url( - database="snowflake", - username=self.username, - password=self.password, - role=self.role, - ) diff --git a/metadata-ingestion/tests/integration/snowflake/__init__.py b/metadata-ingestion/tests/integration/snowflake/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/metadata-ingestion/tests/integration/snowflake-beta/test_snowflake_beta.py b/metadata-ingestion/tests/integration/snowflake/common.py similarity index 58% rename from metadata-ingestion/tests/integration/snowflake-beta/test_snowflake_beta.py rename to metadata-ingestion/tests/integration/snowflake/common.py index 8b3a64c68dd78b..285c1657ba7e15 100644 --- a/metadata-ingestion/tests/integration/snowflake-beta/test_snowflake_beta.py +++ b/metadata-ingestion/tests/integration/snowflake/common.py @@ -1,39 +1,25 @@ import json -import random -import string from datetime import datetime, timezone -from unittest import mock -import pandas as pd -from freezegun import freeze_time - -from datahub.configuration.common import AllowDenyPattern, DynamicTypedConfig -from datahub.ingestion.glossary.classifier import ( - ClassificationConfig, - DynamicTypedClassifierConfig, -) -from datahub.ingestion.glossary.datahub_classifier import ( - DataHubClassifierConfig, - InfoTypeConfig, - PredictionFactorsAndWeights, -) -from datahub.ingestion.run.pipeline import Pipeline -from datahub.ingestion.run.pipeline_config import PipelineConfig, SourceConfig from datahub.ingestion.source.snowflake import snowflake_query -from datahub.ingestion.source.snowflake.snowflake_config import SnowflakeV2Config from datahub.ingestion.source.snowflake.snowflake_query import SnowflakeQuery -from tests.test_helpers import mce_helpers NUM_TABLES = 10 +NUM_VIEWS = 2 NUM_COLS = 10 NUM_OPS = 10 -def default_query_results(query): +FROZEN_TIME = "2022-06-07 17:00:00" + + +def default_query_results(query): # noqa: C901 if query == SnowflakeQuery.current_account(): return [{"CURRENT_ACCOUNT()": "ABC12345"}] if query == SnowflakeQuery.current_region(): return [{"CURRENT_REGION()": "AWS_AP_SOUTH_1"}] + if query == SnowflakeQuery.show_tags(): + return [] if query == SnowflakeQuery.current_role(): return [{"CURRENT_ROLE()": "TEST_ROLE"}] elif query == SnowflakeQuery.current_version(): @@ -77,6 +63,10 @@ def default_query_results(query): }, ] elif query == SnowflakeQuery.tables_for_database("TEST_DB"): + raise Exception("Information schema query returned too much data") + elif query == SnowflakeQuery.show_views_for_database("TEST_DB"): + raise Exception("Information schema query returned too much data") + elif query == SnowflakeQuery.tables_for_schema("TEST_SCHEMA", "TEST_DB"): return [ { "TABLE_SCHEMA": "TEST_SCHEMA", @@ -90,25 +80,38 @@ def default_query_results(query): } for tbl_idx in range(1, NUM_TABLES + 1) ] - elif query == SnowflakeQuery.tables_for_schema("TEST_SCHEMA", "TEST_DB"): + elif query == SnowflakeQuery.show_views_for_schema("TEST_SCHEMA", "TEST_DB"): return [ { - "TABLE_NAME": "TABLE_{}".format(tbl_idx), - "CREATED": datetime(2021, 6, 8, 0, 0, 0, 0), - "LAST_ALTERED": datetime(2021, 6, 8, 0, 0, 0, 0), - "BYTES": 1024, - "ROW_COUNT": 10000, - "COMMENT": "Comment for Table", - "CLUSTERING_KEY": None, + "schema_name": "TEST_SCHEMA", + "name": "VIEW_{}".format(view_idx), + "created_on": datetime(2021, 6, 8, 0, 0, 0, 0), + "comment": "Comment for View", + "text": None, } - for tbl_idx in range(1, NUM_TABLES + 1) + for view_idx in range(1, NUM_VIEWS + 1) ] elif query == SnowflakeQuery.columns_for_schema("TEST_SCHEMA", "TEST_DB"): + raise Exception("Information schema query returned too much data") + elif query in [ + *[ + SnowflakeQuery.columns_for_table( + "TABLE_{}".format(tbl_idx), "TEST_SCHEMA", "TEST_DB" + ) + for tbl_idx in range(1, NUM_TABLES + 1) + ], + *[ + SnowflakeQuery.columns_for_table( + "VIEW_{}".format(view_idx), "TEST_SCHEMA", "TEST_DB" + ) + for view_idx in range(1, NUM_VIEWS + 1) + ], + ]: return [ { - "TABLE_CATALOG": "TEST_DB", - "TABLE_SCHEMA": "TEST_SCHEMA", - "TABLE_NAME": "TABLE_{}".format(tbl_idx), + # "TABLE_CATALOG": "TEST_DB", + # "TABLE_SCHEMA": "TEST_SCHEMA", + # "TABLE_NAME": "TABLE_{}".format(tbl_idx), "COLUMN_NAME": "COL_{}".format(col_idx), "ORDINAL_POSITION": col_idx, "IS_NULLABLE": "NO", @@ -118,24 +121,6 @@ def default_query_results(query): "NUMERIC_PRECISION": None if col_idx > 1 else 38, "NUMERIC_SCALE": None if col_idx > 1 else 0, } - # first column number, all others text - for col_idx in range(1, NUM_COLS + 1) - for tbl_idx in range(1, NUM_TABLES + 1) - ] - elif query in [ - SnowflakeQuery.columns_for_table( - "TABLE_{}".format(tbl_idx), "TEST_SCHEMA", "TEST_DB" - ) - for tbl_idx in range(1, NUM_TABLES + 1) - ]: - return [ - { - "COLUMN_NAME": "COL_{}".format(col_idx), - "ORDINAL_POSITION": 0, - "IS_NULLABLE": "NO", - "DATA_TYPE": "VARCHAR", - "COMMENT": "Comment for column", - } for col_idx in range(1, NUM_COLS + 1) ] elif query in ( @@ -301,167 +286,71 @@ def default_query_results(query): } for op_idx in range(1, NUM_OPS + 1) ] - # Unreachable code - raise Exception(f"Unknown query {query}") - - -FROZEN_TIME = "2022-06-07 17:00:00" - - -def random_email(): - return ( - "".join( - [ - random.choice(string.ascii_lowercase) - for i in range(random.randint(10, 15)) - ] - ) - + "@xyz.com" - ) - - -@freeze_time(FROZEN_TIME) -def test_snowflake_basic(pytestconfig, tmp_path, mock_time, mock_datahub_graph): - test_resources_dir = pytestconfig.rootpath / "tests/integration/snowflake-beta" - - # Run the metadata ingestion pipeline. - output_file = tmp_path / "snowflake_test_events.json" - golden_file = test_resources_dir / "snowflake_beta_golden.json" - - with mock.patch("snowflake.connector.connect") as mock_connect, mock.patch( - "datahub.ingestion.source.snowflake.snowflake_v2.SnowflakeV2Source.get_sample_values_for_table" - ) as mock_sample_values: - sf_connection = mock.MagicMock() - sf_cursor = mock.MagicMock() - mock_connect.return_value = sf_connection - sf_connection.cursor.return_value = sf_cursor - sf_cursor.execute.side_effect = default_query_results - - mock_sample_values.return_value = pd.DataFrame( - data={ - "col_1": [random.randint(0, 100) for i in range(1, 200)], - "col_2": [random_email() for i in range(1, 200)], - } - ) - - datahub_classifier_config = DataHubClassifierConfig() - datahub_classifier_config.confidence_level_threshold = 0.58 - datahub_classifier_config.info_types_config = { - "Age": InfoTypeConfig( - Prediction_Factors_and_Weights=PredictionFactorsAndWeights( - Name=0, Values=1, Description=0, Datatype=0 - ) - ), - } - pipeline = Pipeline( - config=PipelineConfig( - run_id="snowflake-beta-2022_06_07-17_00_00", - source=SourceConfig( - type="snowflake", - config=SnowflakeV2Config( - account_id="ABC12345.ap-south-1.aws", - username="TST_USR", - password="TST_PWD", - include_views=False, - match_fully_qualified_names=True, - schema_pattern=AllowDenyPattern(allow=["test_db.test_schema"]), - include_technical_schema=True, - include_table_lineage=True, - include_view_lineage=False, - include_usage_stats=False, - include_operational_stats=True, - start_time=datetime(2022, 6, 6, 7, 17, 0, 0).replace( - tzinfo=timezone.utc - ), - end_time=datetime(2022, 6, 7, 7, 17, 0, 0).replace( - tzinfo=timezone.utc - ), - classification=ClassificationConfig( - enabled=True, - column_pattern=AllowDenyPattern( - allow=[".*col_1$", ".*col_2$"] - ), - classifiers=[ - DynamicTypedClassifierConfig( - type="datahub", config=datahub_classifier_config - ) - ], - ), - ), - ), - sink=DynamicTypedConfig( - type="file", config={"filename": str(output_file)} - ), - ) - ) - pipeline.run() - pipeline.pretty_print_summary() - pipeline.raise_from_status() - - # Verify the output. - - mce_helpers.check_golden_file( - pytestconfig, - output_path=output_file, - golden_path=golden_file, - ignore_paths=[], - ) - - -@freeze_time(FROZEN_TIME) -def test_snowflake_private_link(pytestconfig, tmp_path, mock_time, mock_datahub_graph): - test_resources_dir = pytestconfig.rootpath / "tests/integration/snowflake-beta" - - # Run the metadata ingestion pipeline. - output_file = tmp_path / "snowflake_privatelink_test_events.json" - golden_file = test_resources_dir / "snowflake_privatelink_beta_golden.json" - - with mock.patch("snowflake.connector.connect") as mock_connect: - sf_connection = mock.MagicMock() - sf_cursor = mock.MagicMock() - mock_connect.return_value = sf_connection - sf_connection.cursor.return_value = sf_cursor - sf_cursor.execute.side_effect = default_query_results - - pipeline = Pipeline( - config=PipelineConfig( - run_id="snowflake-beta-2022_06_07-17_00_00", - source=SourceConfig( - type="snowflake", - config=SnowflakeV2Config( - account_id="ABC12345.ap-south-1.privatelink", - username="TST_USR", - password="TST_PWD", - include_views=False, - schema_pattern=AllowDenyPattern(allow=["test_schema"]), - include_technical_schema=True, - include_table_lineage=True, - include_column_lineage=False, - include_view_lineage=False, - include_usage_stats=False, - include_operational_stats=False, - start_time=datetime(2022, 6, 6, 7, 17, 0, 0).replace( - tzinfo=timezone.utc - ), - end_time=datetime(2022, 6, 7, 7, 17, 0, 0).replace( - tzinfo=timezone.utc - ), - ), + elif query == snowflake_query.SnowflakeQuery.external_table_lineage_history( + 1654499820000, + 1654586220000, + ): + return [] + elif query in [ + snowflake_query.SnowflakeQuery.view_lineage_history( + 1654499820000, + 1654586220000, + ), + snowflake_query.SnowflakeQuery.view_lineage_history( + 1654499820000, 1654586220000, False + ), + ]: + return [ + { + "DOWNSTREAM_TABLE_NAME": "TEST_DB.TEST_SCHEMA.TABLE_1", + "VIEW_NAME": "TEST_DB.TEST_SCHEMA.VIEW_1", + "VIEW_DOMAIN": "VIEW", + "VIEW_COLUMNS": json.dumps( + [ + {"columnId": 0, "columnName": "COL_{}".format(col_idx)} + for col_idx in range(1, NUM_COLS + 1) + ] ), - sink=DynamicTypedConfig( - type="file", config={"filename": str(output_file)} + "DOWNSTREAM_TABLE_DOMAIN": "TABLE", + "DOWNSTREAM_TABLE_COLUMNS": json.dumps( + [ + { + "columnId": 0, + "columnName": "COL_{}".format(col_idx), + "directSources": [ + { + "columnName": "COL_{}".format(col_idx), + "objectDomain": "Table", + "objectId": 0, + "objectName": "TEST_DB.TEST_SCHEMA.TABLE_2", + } + ], + } + for col_idx in range(1, NUM_COLS + 1) + ] ), - ) - ) - pipeline.run() - pipeline.pretty_print_summary() - pipeline.raise_from_status() - - # Verify the output. + } + ] + elif query in [ + snowflake_query.SnowflakeQuery.view_dependencies(), + ]: + return [ + { + "REFERENCED_OBJECT_DOMAIN": "table", + "REFERENCING_OBJECT_DOMAIN": "view", + "DOWNSTREAM_VIEW": "TEST_DB.TEST_SCHEMA.TABLE_2", + "VIEW_UPSTREAM": "TEST_DB.TEST_SCHEMA.VIEW_2", + } + ] + elif query in [ + snowflake_query.SnowflakeQuery.external_table_lineage_history( + 1654499820000, + 1654586220000, + ), + snowflake_query.SnowflakeQuery.view_dependencies(), + snowflake_query.SnowflakeQuery.show_external_tables(), + ]: + return [] - mce_helpers.check_golden_file( - pytestconfig, - output_path=output_file, - golden_path=golden_file, - ignore_paths=[], - ) + # Unreachable code + raise Exception(f"Unknown query {query}") diff --git a/metadata-ingestion/tests/integration/snowflake-beta/snowflake_beta_golden.json b/metadata-ingestion/tests/integration/snowflake/snowflake_golden.json similarity index 77% rename from metadata-ingestion/tests/integration/snowflake-beta/snowflake_beta_golden.json rename to metadata-ingestion/tests/integration/snowflake/snowflake_golden.json index 27a2c089e45ac7..7988a135f3476c 100644 --- a/metadata-ingestion/tests/integration/snowflake-beta/snowflake_beta_golden.json +++ b/metadata-ingestion/tests/integration/snowflake/snowflake_golden.json @@ -10,7 +10,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -24,7 +24,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -38,7 +38,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -52,7 +52,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -66,7 +66,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -80,7 +80,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -94,7 +94,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -108,7 +108,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -122,7 +122,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -136,7 +136,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -150,7 +150,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -164,7 +164,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -178,7 +178,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -192,21 +192,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", - "changeType": "UPSERT", - "aspectName": "upstreamLineage", - "aspect": { - "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)\", \"type\": \"TRANSFORMED\"}], \"fineGrainedLineages\": [{\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_1)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_1)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_10)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_10)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_2)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_2)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_3)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_3)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_4)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_4)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_5)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_5)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_6)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_6)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_7)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_7)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_8)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_8)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_9)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_9)\"], \"confidenceScore\": 1.0}]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -220,7 +206,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -234,7 +220,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -248,7 +234,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -262,7 +248,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -276,21 +262,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", - "changeType": "UPSERT", - "aspectName": "upstreamLineage", - "aspect": { - "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)\", \"type\": \"TRANSFORMED\"}], \"fineGrainedLineages\": [{\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_1)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_1)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_10)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_10)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_2)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_2)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_3)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_3)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_4)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_4)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_5)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_5)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_6)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_6)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_7)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_7)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_8)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_8)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_9)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_9)\"], \"confidenceScore\": 1.0}]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -304,7 +276,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -318,7 +290,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -332,7 +304,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -346,7 +318,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -360,21 +332,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)", - "changeType": "UPSERT", - "aspectName": "upstreamLineage", - "aspect": { - "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)\", \"type\": \"TRANSFORMED\"}], \"fineGrainedLineages\": [{\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_1)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD),col_1)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_10)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD),col_10)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_2)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD),col_2)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_3)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD),col_3)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_4)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD),col_4)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_5)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD),col_5)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_6)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD),col_6)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_7)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD),col_7)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_8)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD),col_8)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_9)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD),col_9)\"], \"confidenceScore\": 1.0}]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -388,7 +346,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -402,7 +360,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -416,7 +374,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -430,7 +388,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -444,21 +402,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)", - "changeType": "UPSERT", - "aspectName": "upstreamLineage", - "aspect": { - "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)\", \"type\": \"TRANSFORMED\"}], \"fineGrainedLineages\": [{\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_1)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD),col_1)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_10)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD),col_10)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_2)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD),col_2)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_3)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD),col_3)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_4)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD),col_4)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_5)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD),col_5)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_6)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD),col_6)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_7)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD),col_7)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_8)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD),col_8)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_9)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD),col_9)\"], \"confidenceScore\": 1.0}]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -472,7 +416,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -486,7 +430,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -500,7 +444,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -514,7 +458,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -528,21 +472,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)", - "changeType": "UPSERT", - "aspectName": "upstreamLineage", - "aspect": { - "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)\", \"type\": \"TRANSFORMED\"}], \"fineGrainedLineages\": [{\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_1)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD),col_1)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_10)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD),col_10)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_2)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD),col_2)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_3)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD),col_3)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_4)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD),col_4)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_5)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD),col_5)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_6)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD),col_6)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_7)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD),col_7)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_8)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD),col_8)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_9)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD),col_9)\"], \"confidenceScore\": 1.0}]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -556,7 +486,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -570,7 +500,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -584,7 +514,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -598,7 +528,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -612,21 +542,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)", - "changeType": "UPSERT", - "aspectName": "upstreamLineage", - "aspect": { - "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)\", \"type\": \"TRANSFORMED\"}], \"fineGrainedLineages\": [{\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_1)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD),col_1)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_10)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD),col_10)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_2)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD),col_2)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_3)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD),col_3)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_4)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD),col_4)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_5)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD),col_5)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_6)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD),col_6)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_7)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD),col_7)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_8)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD),col_8)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_9)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD),col_9)\"], \"confidenceScore\": 1.0}]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -640,7 +556,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -654,7 +570,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -668,7 +584,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -682,7 +598,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -696,21 +612,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)", - "changeType": "UPSERT", - "aspectName": "upstreamLineage", - "aspect": { - "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)\", \"type\": \"TRANSFORMED\"}], \"fineGrainedLineages\": [{\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_1)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD),col_1)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_10)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD),col_10)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_2)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD),col_2)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_3)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD),col_3)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_4)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD),col_4)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_5)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD),col_5)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_6)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD),col_6)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_7)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD),col_7)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_8)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD),col_8)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_9)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD),col_9)\"], \"confidenceScore\": 1.0}]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -724,7 +626,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -738,7 +640,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -752,7 +654,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -766,7 +668,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -780,27 +682,83 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "status", "aspect": { - "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)\", \"type\": \"TRANSFORMED\"}], \"fineGrainedLineages\": [{\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_1)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD),col_1)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_10)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD),col_10)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_2)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD),col_2)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_3)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD),col_3)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_4)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD),col_4)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_5)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD),col_5)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_6)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD),col_6)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_7)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD),col_7)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_8)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD),col_8)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_9)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD),col_9)\"], \"confidenceScore\": 1.0}]}", + "value": "{\"removed\": false}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "value": "{\"schemaName\": \"test_db.test_schema.table_9\", \"platform\": \"urn:li:dataPlatform:snowflake\", \"version\": 0, \"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"hash\": \"\", \"platformSchema\": {\"com.linkedin.schema.MySqlDDL\": {\"tableSchema\": \"\"}}, \"fields\": [{\"fieldPath\": \"col_1\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.NumberType\": {}}}, \"nativeDataType\": \"NUMBER(38,0)\", \"recursive\": false, \"glossaryTerms\": {\"terms\": [{\"urn\": \"urn:li:glossaryTerm:Age\"}], \"auditStamp\": {\"time\": 1654621200000, \"actor\": \"urn:li:corpuser:datahub\"}}, \"isPartOfKey\": false}, {\"fieldPath\": \"col_2\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"glossaryTerms\": {\"terms\": [{\"urn\": \"urn:li:glossaryTerm:Email_Address\"}], \"auditStamp\": {\"time\": 1654621200000, \"actor\": \"urn:li:corpuser:datahub\"}}, \"isPartOfKey\": false}, {\"fieldPath\": \"col_3\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_4\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_5\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_6\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_7\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_8\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_9\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_10\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_9/\", \"name\": \"TABLE_9\", \"qualifiedName\": \"test_db.test_schema.table_9\", \"description\": \"Comment for Table\", \"created\": {\"time\": 1623110400000}, \"lastModified\": {\"time\": 1623110400000}, \"tags\": []}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "value": "{\"container\": \"urn:li:container:94c696a054bab40b73e640a7f82e3b1c\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "value": "{\"typeNames\": [\"table\"]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "changeType": "UPSERT", "aspectName": "status", "aspect": { "value": "{\"removed\": false}", @@ -808,40 +766,40 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", "changeType": "UPSERT", "aspectName": "schemaMetadata", "aspect": { - "value": "{\"schemaName\": \"test_db.test_schema.table_9\", \"platform\": \"urn:li:dataPlatform:snowflake\", \"version\": 0, \"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"hash\": \"\", \"platformSchema\": {\"com.linkedin.schema.MySqlDDL\": {\"tableSchema\": \"\"}}, \"fields\": [{\"fieldPath\": \"col_1\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.NumberType\": {}}}, \"nativeDataType\": \"NUMBER(38,0)\", \"recursive\": false, \"glossaryTerms\": {\"terms\": [{\"urn\": \"urn:li:glossaryTerm:Age\"}], \"auditStamp\": {\"time\": 1654621200000, \"actor\": \"urn:li:corpuser:datahub\"}}, \"isPartOfKey\": false}, {\"fieldPath\": \"col_2\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"glossaryTerms\": {\"terms\": [{\"urn\": \"urn:li:glossaryTerm:Email_Address\"}], \"auditStamp\": {\"time\": 1654621200000, \"actor\": \"urn:li:corpuser:datahub\"}}, \"isPartOfKey\": false}, {\"fieldPath\": \"col_3\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_4\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_5\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_6\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_7\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_8\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_9\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_10\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}]}", + "value": "{\"schemaName\": \"test_db.test_schema.table_10\", \"platform\": \"urn:li:dataPlatform:snowflake\", \"version\": 0, \"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"hash\": \"\", \"platformSchema\": {\"com.linkedin.schema.MySqlDDL\": {\"tableSchema\": \"\"}}, \"fields\": [{\"fieldPath\": \"col_1\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.NumberType\": {}}}, \"nativeDataType\": \"NUMBER(38,0)\", \"recursive\": false, \"glossaryTerms\": {\"terms\": [{\"urn\": \"urn:li:glossaryTerm:Age\"}], \"auditStamp\": {\"time\": 1654621200000, \"actor\": \"urn:li:corpuser:datahub\"}}, \"isPartOfKey\": false}, {\"fieldPath\": \"col_2\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"glossaryTerms\": {\"terms\": [{\"urn\": \"urn:li:glossaryTerm:Email_Address\"}], \"auditStamp\": {\"time\": 1654621200000, \"actor\": \"urn:li:corpuser:datahub\"}}, \"isPartOfKey\": false}, {\"fieldPath\": \"col_3\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_4\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_5\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_6\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_7\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_8\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_9\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_10\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}]}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { - "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_9/\", \"name\": \"TABLE_9\", \"qualifiedName\": \"test_db.test_schema.table_9\", \"description\": \"Comment for Table\", \"created\": {\"time\": 1623110400000}, \"lastModified\": {\"time\": 1623110400000}, \"tags\": []}", + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_10/\", \"name\": \"TABLE_10\", \"qualifiedName\": \"test_db.test_schema.table_10\", \"description\": \"Comment for Table\", \"created\": {\"time\": 1623110400000}, \"lastModified\": {\"time\": 1623110400000}, \"tags\": []}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -850,12 +808,12 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -864,26 +822,82 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "status", "aspect": { - "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)\", \"type\": \"TRANSFORMED\"}], \"fineGrainedLineages\": [{\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_1)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD),col_1)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_10)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD),col_10)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_2)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD),col_2)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_3)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD),col_3)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_4)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD),col_4)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_5)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD),col_5)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_6)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD),col_6)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_7)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD),col_7)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_8)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD),col_8)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_9)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD),col_9)\"], \"confidenceScore\": 1.0}]}", + "value": "{\"removed\": false}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "value": "{\"schemaName\": \"test_db.test_schema.view_1\", \"platform\": \"urn:li:dataPlatform:snowflake\", \"version\": 0, \"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"hash\": \"\", \"platformSchema\": {\"com.linkedin.schema.MySqlDDL\": {\"tableSchema\": \"\"}}, \"fields\": [{\"fieldPath\": \"col_1\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.NumberType\": {}}}, \"nativeDataType\": \"NUMBER(38,0)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_2\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_3\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_4\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_5\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_6\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_7\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_8\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_9\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_10\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/view/VIEW_1/\", \"name\": \"VIEW_1\", \"qualifiedName\": \"test_db.test_schema.view_1\", \"description\": \"Comment for View\", \"created\": {\"time\": 1623110400000}, \"lastModified\": {\"time\": 1623110400000}, \"tags\": []}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "value": "{\"container\": \"urn:li:container:94c696a054bab40b73e640a7f82e3b1c\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "value": "{\"typeNames\": [\"view\"]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -892,40 +906,40 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD)", "changeType": "UPSERT", "aspectName": "schemaMetadata", "aspect": { - "value": "{\"schemaName\": \"test_db.test_schema.table_10\", \"platform\": \"urn:li:dataPlatform:snowflake\", \"version\": 0, \"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"hash\": \"\", \"platformSchema\": {\"com.linkedin.schema.MySqlDDL\": {\"tableSchema\": \"\"}}, \"fields\": [{\"fieldPath\": \"col_1\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.NumberType\": {}}}, \"nativeDataType\": \"NUMBER(38,0)\", \"recursive\": false, \"glossaryTerms\": {\"terms\": [{\"urn\": \"urn:li:glossaryTerm:Age\"}], \"auditStamp\": {\"time\": 1654621200000, \"actor\": \"urn:li:corpuser:datahub\"}}, \"isPartOfKey\": false}, {\"fieldPath\": \"col_2\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"glossaryTerms\": {\"terms\": [{\"urn\": \"urn:li:glossaryTerm:Email_Address\"}], \"auditStamp\": {\"time\": 1654621200000, \"actor\": \"urn:li:corpuser:datahub\"}}, \"isPartOfKey\": false}, {\"fieldPath\": \"col_3\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_4\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_5\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_6\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_7\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_8\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_9\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_10\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}]}", + "value": "{\"schemaName\": \"test_db.test_schema.view_2\", \"platform\": \"urn:li:dataPlatform:snowflake\", \"version\": 0, \"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"hash\": \"\", \"platformSchema\": {\"com.linkedin.schema.MySqlDDL\": {\"tableSchema\": \"\"}}, \"fields\": [{\"fieldPath\": \"col_1\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.NumberType\": {}}}, \"nativeDataType\": \"NUMBER(38,0)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_2\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_3\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_4\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_5\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_6\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_7\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_8\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_9\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_10\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}]}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { - "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_10/\", \"name\": \"TABLE_10\", \"qualifiedName\": \"test_db.test_schema.table_10\", \"description\": \"Comment for Table\", \"created\": {\"time\": 1623110400000}, \"lastModified\": {\"time\": 1623110400000}, \"tags\": []}", + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/view/VIEW_2/\", \"name\": \"VIEW_2\", \"qualifiedName\": \"test_db.test_schema.view_2\", \"description\": \"Comment for View\", \"created\": {\"time\": 1623110400000}, \"lastModified\": {\"time\": 1623110400000}, \"tags\": []}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -934,21 +948,287 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { - "value": "{\"typeNames\": [\"table\"]}", + "value": "{\"typeNames\": [\"view\"]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProfile", + "aspect": { + "value": "{\"timestampMillis\": 1654621200000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 10000, \"columnCount\": 10, \"sizeInBytes\": 1024}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProfile", + "aspect": { + "value": "{\"timestampMillis\": 1654621200000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 10000, \"columnCount\": 10, \"sizeInBytes\": 1024}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProfile", + "aspect": { + "value": "{\"timestampMillis\": 1654621200000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 10000, \"columnCount\": 10, \"sizeInBytes\": 1024}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProfile", + "aspect": { + "value": "{\"timestampMillis\": 1654621200000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 10000, \"columnCount\": 10, \"sizeInBytes\": 1024}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProfile", + "aspect": { + "value": "{\"timestampMillis\": 1654621200000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 10000, \"columnCount\": 10, \"sizeInBytes\": 1024}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProfile", + "aspect": { + "value": "{\"timestampMillis\": 1654621200000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 10000, \"columnCount\": 10, \"sizeInBytes\": 1024}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProfile", + "aspect": { + "value": "{\"timestampMillis\": 1654621200000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 10000, \"columnCount\": 10, \"sizeInBytes\": 1024}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProfile", + "aspect": { + "value": "{\"timestampMillis\": 1654621200000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 10000, \"columnCount\": 10, \"sizeInBytes\": 1024}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProfile", + "aspect": { + "value": "{\"timestampMillis\": 1654621200000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 10000, \"columnCount\": 10, \"sizeInBytes\": 1024}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProfile", + "aspect": { + "value": "{\"timestampMillis\": 1654621200000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 10000, \"columnCount\": 10, \"sizeInBytes\": 1024}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)\", \"type\": \"TRANSFORMED\"}, {\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD)\", \"type\": \"TRANSFORMED\"}], \"fineGrainedLineages\": [{\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_1)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_1)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_10)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_10)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_2)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_2)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_3)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_3)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_4)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_4)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_5)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_5)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_6)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_6)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_7)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_7)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_8)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_8)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_9)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_9)\"], \"confidenceScore\": 1.0}]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)\", \"type\": \"TRANSFORMED\"}, {\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD)\", \"type\": \"TRANSFORMED\"}], \"fineGrainedLineages\": [{\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_1)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_1)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_10)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_10)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_2)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_2)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_3)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_3)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_4)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_4)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_5)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_5)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_6)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_6)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_7)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_7)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_8)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_8)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_9)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_9)\"], \"confidenceScore\": 1.0}]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)\", \"type\": \"TRANSFORMED\"}], \"fineGrainedLineages\": [{\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_1)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD),col_1)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_10)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD),col_10)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_2)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD),col_2)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_3)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD),col_3)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_4)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD),col_4)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_5)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD),col_5)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_6)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD),col_6)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_7)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD),col_7)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_8)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD),col_8)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_9)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD),col_9)\"], \"confidenceScore\": 1.0}]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)\", \"type\": \"TRANSFORMED\"}], \"fineGrainedLineages\": [{\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_1)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD),col_1)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_10)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD),col_10)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_2)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD),col_2)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_3)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD),col_3)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_4)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD),col_4)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_5)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD),col_5)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_6)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD),col_6)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_7)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD),col_7)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_8)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD),col_8)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_9)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD),col_9)\"], \"confidenceScore\": 1.0}]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)\", \"type\": \"TRANSFORMED\"}], \"fineGrainedLineages\": [{\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_1)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD),col_1)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_10)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD),col_10)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_2)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD),col_2)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_3)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD),col_3)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_4)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD),col_4)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_5)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD),col_5)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_6)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD),col_6)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_7)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD),col_7)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_8)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD),col_8)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_9)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD),col_9)\"], \"confidenceScore\": 1.0}]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)\", \"type\": \"TRANSFORMED\"}], \"fineGrainedLineages\": [{\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_1)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD),col_1)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_10)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD),col_10)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_2)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD),col_2)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_3)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD),col_3)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_4)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD),col_4)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_5)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD),col_5)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_6)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD),col_6)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_7)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD),col_7)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_8)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD),col_8)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_9)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD),col_9)\"], \"confidenceScore\": 1.0}]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)\", \"type\": \"TRANSFORMED\"}], \"fineGrainedLineages\": [{\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_1)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD),col_1)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_10)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD),col_10)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_2)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD),col_2)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_3)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD),col_3)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_4)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD),col_4)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_5)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD),col_5)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_6)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD),col_6)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_7)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD),col_7)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_8)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD),col_8)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_9)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD),col_9)\"], \"confidenceScore\": 1.0}]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)\", \"type\": \"TRANSFORMED\"}], \"fineGrainedLineages\": [{\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_1)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD),col_1)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_10)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD),col_10)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_2)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD),col_2)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_3)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD),col_3)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_4)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD),col_4)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_5)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD),col_5)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_6)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD),col_6)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_7)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD),col_7)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_8)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD),col_8)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_9)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD),col_9)\"], \"confidenceScore\": 1.0}]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)\", \"type\": \"TRANSFORMED\"}], \"fineGrainedLineages\": [{\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_1)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD),col_1)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_10)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD),col_10)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_2)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD),col_2)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_3)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD),col_3)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_4)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD),col_4)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_5)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD),col_5)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_6)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD),col_6)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_7)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD),col_7)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_8)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD),col_8)\"], \"confidenceScore\": 1.0}, {\"upstreamType\": \"FIELD_SET\", \"upstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_9)\"], \"downstreamType\": \"FIELD\", \"downstreams\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD),col_9)\"], \"confidenceScore\": 1.0}]}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -962,7 +1242,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -976,7 +1256,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -990,7 +1270,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -1004,7 +1284,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -1018,7 +1298,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -1032,7 +1312,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -1046,7 +1326,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -1060,7 +1340,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -1074,7 +1354,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -1088,7 +1368,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -1102,7 +1382,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/snowflake-beta/snowflake_privatelink_beta_golden.json b/metadata-ingestion/tests/integration/snowflake/snowflake_privatelink_golden.json similarity index 94% rename from metadata-ingestion/tests/integration/snowflake-beta/snowflake_privatelink_beta_golden.json rename to metadata-ingestion/tests/integration/snowflake/snowflake_privatelink_golden.json index 2f3ec2f32a2fb2..c24dd5173d3818 100644 --- a/metadata-ingestion/tests/integration/snowflake-beta/snowflake_privatelink_beta_golden.json +++ b/metadata-ingestion/tests/integration/snowflake/snowflake_privatelink_golden.json @@ -10,7 +10,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -24,7 +24,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -38,7 +38,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -52,7 +52,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -66,7 +66,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -80,7 +80,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -94,7 +94,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -108,7 +108,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -122,7 +122,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -136,7 +136,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -150,7 +150,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -164,7 +164,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -178,7 +178,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -192,7 +192,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -206,7 +206,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -220,7 +220,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -234,7 +234,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -248,7 +248,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -262,7 +262,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -276,7 +276,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -290,7 +290,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -304,7 +304,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -318,7 +318,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -332,7 +332,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -346,7 +346,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -360,7 +360,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -374,7 +374,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -388,7 +388,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -402,7 +402,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -416,7 +416,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -430,7 +430,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -444,7 +444,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -458,7 +458,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -472,7 +472,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -486,7 +486,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -500,7 +500,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -514,7 +514,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -528,7 +528,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -542,7 +542,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -556,7 +556,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -570,7 +570,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -584,7 +584,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -598,7 +598,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -612,7 +612,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -626,7 +626,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -640,7 +640,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -654,7 +654,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -668,7 +668,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -682,7 +682,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -696,7 +696,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -710,7 +710,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -724,7 +724,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -738,7 +738,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -752,7 +752,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -766,7 +766,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -780,7 +780,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -794,7 +794,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -808,7 +808,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -822,7 +822,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -836,7 +836,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -850,7 +850,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -864,7 +864,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -878,7 +878,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -892,7 +892,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -906,7 +906,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -920,7 +920,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -934,7 +934,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -948,7 +948,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } }, { @@ -962,7 +962,7 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-beta-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/snowflake/test_snowflake.py b/metadata-ingestion/tests/integration/snowflake/test_snowflake.py new file mode 100644 index 00000000000000..ec9e342cbea56a --- /dev/null +++ b/metadata-ingestion/tests/integration/snowflake/test_snowflake.py @@ -0,0 +1,188 @@ +import random +import string +from datetime import datetime, timezone +from unittest import mock + +import pandas as pd +from freezegun import freeze_time + +from datahub.configuration.common import AllowDenyPattern, DynamicTypedConfig +from datahub.ingestion.glossary.classifier import ( + ClassificationConfig, + DynamicTypedClassifierConfig, +) +from datahub.ingestion.glossary.datahub_classifier import ( + DataHubClassifierConfig, + InfoTypeConfig, + PredictionFactorsAndWeights, +) +from datahub.ingestion.run.pipeline import Pipeline +from datahub.ingestion.run.pipeline_config import PipelineConfig, SourceConfig +from datahub.ingestion.source.ge_profiling_config import GEProfilingConfig +from datahub.ingestion.source.snowflake.snowflake_config import SnowflakeV2Config +from tests.integration.snowflake.common import FROZEN_TIME, default_query_results +from tests.test_helpers import mce_helpers + + +def random_email(): + return ( + "".join( + [ + random.choice(string.ascii_lowercase) + for i in range(random.randint(10, 15)) + ] + ) + + "@xyz.com" + ) + + +@freeze_time(FROZEN_TIME) +def test_snowflake_basic(pytestconfig, tmp_path, mock_time, mock_datahub_graph): + test_resources_dir = pytestconfig.rootpath / "tests/integration/snowflake" + + # Run the metadata ingestion pipeline. + output_file = tmp_path / "snowflake_test_events.json" + golden_file = test_resources_dir / "snowflake_golden.json" + + with mock.patch("snowflake.connector.connect") as mock_connect, mock.patch( + "datahub.ingestion.source.snowflake.snowflake_v2.SnowflakeV2Source.get_sample_values_for_table" + ) as mock_sample_values: + sf_connection = mock.MagicMock() + sf_cursor = mock.MagicMock() + mock_connect.return_value = sf_connection + sf_connection.cursor.return_value = sf_cursor + + sf_cursor.execute.side_effect = default_query_results + + mock_sample_values.return_value = pd.DataFrame( + data={ + "col_1": [random.randint(0, 100) for i in range(1, 200)], + "col_2": [random_email() for i in range(1, 200)], + } + ) + + datahub_classifier_config = DataHubClassifierConfig() + datahub_classifier_config.confidence_level_threshold = 0.58 + datahub_classifier_config.info_types_config = { + "Age": InfoTypeConfig( + Prediction_Factors_and_Weights=PredictionFactorsAndWeights( + Name=0, Values=1, Description=0, Datatype=0 + ) + ), + } + pipeline = Pipeline( + config=PipelineConfig( + source=SourceConfig( + type="snowflake", + config=SnowflakeV2Config( + account_id="ABC12345.ap-south-1.aws", + username="TST_USR", + password="TST_PWD", + match_fully_qualified_names=True, + schema_pattern=AllowDenyPattern(allow=["test_db.test_schema"]), + include_technical_schema=True, + include_table_lineage=True, + include_view_lineage=True, + include_usage_stats=False, + include_operational_stats=True, + start_time=datetime(2022, 6, 6, 7, 17, 0, 0).replace( + tzinfo=timezone.utc + ), + end_time=datetime(2022, 6, 7, 7, 17, 0, 0).replace( + tzinfo=timezone.utc + ), + classification=ClassificationConfig( + enabled=True, + column_pattern=AllowDenyPattern( + allow=[".*col_1$", ".*col_2$"] + ), + classifiers=[ + DynamicTypedClassifierConfig( + type="datahub", config=datahub_classifier_config + ) + ], + ), + profiling=GEProfilingConfig( + enabled=True, + profile_if_updated_since_days=None, + profile_table_row_limit=None, + profile_table_size_limit=None, + profile_table_level_only=True, + ), + ), + ), + sink=DynamicTypedConfig( + type="file", config={"filename": str(output_file)} + ), + ) + ) + pipeline.run() + pipeline.pretty_print_summary() + pipeline.raise_from_status() + + # Verify the output. + + mce_helpers.check_golden_file( + pytestconfig, + output_path=output_file, + golden_path=golden_file, + ignore_paths=[], + ) + + +@freeze_time(FROZEN_TIME) +def test_snowflake_private_link(pytestconfig, tmp_path, mock_time, mock_datahub_graph): + test_resources_dir = pytestconfig.rootpath / "tests/integration/snowflake" + + # Run the metadata ingestion pipeline. + output_file = tmp_path / "snowflake_privatelink_test_events.json" + golden_file = test_resources_dir / "snowflake_privatelink_golden.json" + + with mock.patch("snowflake.connector.connect") as mock_connect: + sf_connection = mock.MagicMock() + sf_cursor = mock.MagicMock() + mock_connect.return_value = sf_connection + sf_connection.cursor.return_value = sf_cursor + sf_cursor.execute.side_effect = default_query_results + + pipeline = Pipeline( + config=PipelineConfig( + source=SourceConfig( + type="snowflake", + config=SnowflakeV2Config( + account_id="ABC12345.ap-south-1.privatelink", + username="TST_USR", + password="TST_PWD", + schema_pattern=AllowDenyPattern(allow=["test_schema"]), + include_technical_schema=True, + include_table_lineage=True, + include_column_lineage=False, + include_views=False, + include_view_lineage=False, + include_usage_stats=False, + include_operational_stats=False, + start_time=datetime(2022, 6, 6, 7, 17, 0, 0).replace( + tzinfo=timezone.utc + ), + end_time=datetime(2022, 6, 7, 7, 17, 0, 0).replace( + tzinfo=timezone.utc + ), + ), + ), + sink=DynamicTypedConfig( + type="file", config={"filename": str(output_file)} + ), + ) + ) + pipeline.run() + pipeline.pretty_print_summary() + pipeline.raise_from_status() + + # Verify the output. + + mce_helpers.check_golden_file( + pytestconfig, + output_path=output_file, + golden_path=golden_file, + ignore_paths=[], + ) diff --git a/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py b/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py new file mode 100644 index 00000000000000..e7c70e2cf4d22c --- /dev/null +++ b/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py @@ -0,0 +1,301 @@ +from datetime import datetime, timezone +from typing import cast +from unittest import mock + +from freezegun import freeze_time +from pytest import fixture + +from datahub.configuration.common import AllowDenyPattern, DynamicTypedConfig +from datahub.ingestion.run.pipeline import Pipeline +from datahub.ingestion.run.pipeline_config import PipelineConfig, SourceConfig +from datahub.ingestion.source.snowflake import snowflake_query +from datahub.ingestion.source.snowflake.snowflake_config import SnowflakeV2Config +from datahub.ingestion.source.snowflake.snowflake_query import SnowflakeQuery +from tests.integration.snowflake.common import ( + FROZEN_TIME, + NUM_TABLES, + default_query_results, +) + + +def query_permission_error_override(fn, override_for_query, error_msg): + def my_function(query): + if query in override_for_query: + raise Exception(error_msg) + else: + return fn(query) + + return my_function + + +def query_permission_response_override(fn, override_for_query, response): + def my_function(query): + if query in override_for_query: + return response + else: + return fn(query) + + return my_function + + +@fixture(scope="function") +def snowflake_pipeline_config(tmp_path): + + output_file = tmp_path / "snowflake_test_events_permission_error.json" + config = PipelineConfig( + source=SourceConfig( + type="snowflake", + config=SnowflakeV2Config( + account_id="ABC12345.ap-south-1.aws", + username="TST_USR", + password="TST_PWD", + role="TEST_ROLE", + warehouse="TEST_WAREHOUSE", + include_technical_schema=True, + match_fully_qualified_names=True, + schema_pattern=AllowDenyPattern(allow=["test_db.test_schema"]), + include_view_lineage=False, + include_usage_stats=False, + start_time=datetime(2022, 6, 6, 7, 17, 0, 0).replace( + tzinfo=timezone.utc + ), + end_time=datetime(2022, 6, 7, 7, 17, 0, 0).replace(tzinfo=timezone.utc), + ), + ), + sink=DynamicTypedConfig(type="file", config={"filename": str(output_file)}), + ) + return config + + +@freeze_time(FROZEN_TIME) +def test_snowflake_missing_role_access_causes_pipeline_failure( + pytestconfig, + snowflake_pipeline_config, +): + with mock.patch("snowflake.connector.connect") as mock_connect: + # Snowflake connection fails role not granted error + mock_connect.side_effect = Exception( + "250001 (08001): Failed to connect to DB: abc12345.ap-south-1.snowflakecomputing.com:443. Role 'TEST_ROLE' specified in the connect string is not granted to this user. Contact your local system administrator, or attempt to login with another role, e.g. PUBLIC" + ) + + pipeline = Pipeline(snowflake_pipeline_config) + pipeline.run() + assert "permission-error" in pipeline.source.get_report().failures.keys() + + +@freeze_time(FROZEN_TIME) +def test_snowflake_missing_warehouse_access_causes_pipeline_failure( + pytestconfig, + snowflake_pipeline_config, +): + with mock.patch("snowflake.connector.connect") as mock_connect: + sf_connection = mock.MagicMock() + sf_cursor = mock.MagicMock() + mock_connect.return_value = sf_connection + sf_connection.cursor.return_value = sf_cursor + + # Current warehouse query leads to blank result + sf_cursor.execute.side_effect = query_permission_response_override( + default_query_results, + [SnowflakeQuery.current_warehouse()], + [(None,)], + ) + pipeline = Pipeline(snowflake_pipeline_config) + pipeline.run() + assert "permission-error" in pipeline.source.get_report().failures.keys() + + +@freeze_time(FROZEN_TIME) +def test_snowflake_no_databases_with_access_causes_pipeline_failure( + pytestconfig, + snowflake_pipeline_config, +): + with mock.patch("snowflake.connector.connect") as mock_connect: + sf_connection = mock.MagicMock() + sf_cursor = mock.MagicMock() + mock_connect.return_value = sf_connection + sf_connection.cursor.return_value = sf_cursor + + # Error in listing databases + sf_cursor.execute.side_effect = query_permission_error_override( + default_query_results, + [SnowflakeQuery.get_databases("TEST_DB")], + "Database 'TEST_DB' does not exist or not authorized.", + ) + pipeline = Pipeline(snowflake_pipeline_config) + pipeline.run() + assert "permission-error" in pipeline.source.get_report().failures.keys() + + +@freeze_time(FROZEN_TIME) +def test_snowflake_no_tables_causes_pipeline_failure( + pytestconfig, + snowflake_pipeline_config, +): + with mock.patch("snowflake.connector.connect") as mock_connect: + sf_connection = mock.MagicMock() + sf_cursor = mock.MagicMock() + mock_connect.return_value = sf_connection + sf_connection.cursor.return_value = sf_cursor + + # Error in listing databases + no_tables_fn = query_permission_response_override( + default_query_results, + [SnowflakeQuery.tables_for_schema("TEST_SCHEMA", "TEST_DB")], + [], + ) + sf_cursor.execute.side_effect = query_permission_response_override( + no_tables_fn, + [SnowflakeQuery.show_views_for_schema("TEST_SCHEMA", "TEST_DB")], + [], + ) + + pipeline = Pipeline(snowflake_pipeline_config) + pipeline.run() + assert "permission-error" in pipeline.source.get_report().failures.keys() + + +@freeze_time(FROZEN_TIME) +def test_snowflake_list_columns_error_causes_pipeline_warning( + pytestconfig, + snowflake_pipeline_config, +): + + with mock.patch("snowflake.connector.connect") as mock_connect: + sf_connection = mock.MagicMock() + sf_cursor = mock.MagicMock() + mock_connect.return_value = sf_connection + sf_connection.cursor.return_value = sf_cursor + + # Error in listing columns + sf_cursor.execute.side_effect = query_permission_error_override( + default_query_results, + [ + SnowflakeQuery.columns_for_table( + "TABLE_{}".format(tbl_idx), "TEST_SCHEMA", "TEST_DB" + ) + for tbl_idx in range(1, NUM_TABLES + 1) + ], + "Database 'TEST_DB' does not exist or not authorized.", + ) + pipeline = Pipeline(snowflake_pipeline_config) + pipeline.run() + pipeline.raise_from_status() # pipeline should not fail + assert ( + "Failed to get columns for table" + in pipeline.source.get_report().warnings.keys() + ) + + +@freeze_time(FROZEN_TIME) +def test_snowflake_list_primary_keys_error_causes_pipeline_warning( + pytestconfig, + snowflake_pipeline_config, +): + + with mock.patch("snowflake.connector.connect") as mock_connect: + sf_connection = mock.MagicMock() + sf_cursor = mock.MagicMock() + mock_connect.return_value = sf_connection + sf_connection.cursor.return_value = sf_cursor + + # Error in listing keys leads to warning + sf_cursor.execute.side_effect = query_permission_error_override( + default_query_results, + [SnowflakeQuery.show_primary_keys_for_schema("TEST_SCHEMA", "TEST_DB")], + "Insufficient privileges to operate on TEST_DB", + ) + pipeline = Pipeline(snowflake_pipeline_config) + pipeline.run() + pipeline.raise_from_status() # pipeline should not fail + assert ( + "Failed to get primary key for table" + in pipeline.source.get_report().warnings.keys() + ) + + +@freeze_time(FROZEN_TIME) +def test_snowflake_missing_snowflake_lineage_permission_causes_pipeline_failure( + pytestconfig, + snowflake_pipeline_config, +): + + with mock.patch("snowflake.connector.connect") as mock_connect: + sf_connection = mock.MagicMock() + sf_cursor = mock.MagicMock() + mock_connect.return_value = sf_connection + sf_connection.cursor.return_value = sf_cursor + + # Error in getting lineage + sf_cursor.execute.side_effect = query_permission_error_override( + default_query_results, + [ + snowflake_query.SnowflakeQuery.table_to_table_lineage_history( + 1654499820000, + 1654586220000, + ) + ], + "Database 'SNOWFLAKE' does not exist or not authorized.", + ) + pipeline = Pipeline(snowflake_pipeline_config) + pipeline.run() + assert ( + "lineage-permission-error" in pipeline.source.get_report().failures.keys() + ) + + +@freeze_time(FROZEN_TIME) +def test_snowflake_missing_snowflake_operations_permission_causes_pipeline_failure( + pytestconfig, + snowflake_pipeline_config, +): + + with mock.patch("snowflake.connector.connect") as mock_connect: + sf_connection = mock.MagicMock() + sf_cursor = mock.MagicMock() + mock_connect.return_value = sf_connection + sf_connection.cursor.return_value = sf_cursor + + # Error in getting access history date range + sf_cursor.execute.side_effect = query_permission_error_override( + default_query_results, + [snowflake_query.SnowflakeQuery.get_access_history_date_range()], + "Database 'SNOWFLAKE' does not exist or not authorized.", + ) + pipeline = Pipeline(snowflake_pipeline_config) + pipeline.run() + assert "usage-permission-error" in pipeline.source.get_report().failures.keys() + + +@freeze_time(FROZEN_TIME) +def test_snowflake_unexpected_snowflake_view_lineage_error_causes_pipeline_warning( + pytestconfig, + snowflake_pipeline_config, +): + + with mock.patch("snowflake.connector.connect") as mock_connect: + sf_connection = mock.MagicMock() + sf_cursor = mock.MagicMock() + mock_connect.return_value = sf_connection + sf_connection.cursor.return_value = sf_cursor + + # Error in getting view lineage + sf_cursor.execute.side_effect = query_permission_error_override( + default_query_results, + [ + snowflake_query.SnowflakeQuery.view_lineage_history( + 1654499820000, + 1654586220000, + ) + ], + "Unexpected Error", + ) + + cast( + SnowflakeV2Config, + cast(PipelineConfig, snowflake_pipeline_config).source.config, + ).include_view_lineage = True + pipeline = Pipeline(snowflake_pipeline_config) + pipeline.run() + pipeline.raise_from_status() # pipeline should not fail + assert "view-downstream-lineage" in pipeline.source.get_report().warnings.keys() diff --git a/metadata-ingestion/tests/unit/test_snowflake_beta_source.py b/metadata-ingestion/tests/unit/test_snowflake_source.py similarity index 91% rename from metadata-ingestion/tests/unit/test_snowflake_beta_source.py rename to metadata-ingestion/tests/unit/test_snowflake_source.py index c4e5a9a03adfb8..93b6acaac5a16f 100644 --- a/metadata-ingestion/tests/unit/test_snowflake_beta_source.py +++ b/metadata-ingestion/tests/unit/test_snowflake_source.py @@ -5,8 +5,12 @@ from datahub.configuration.common import ConfigurationError, OauthConfiguration from datahub.ingestion.api.source import SourceCapability +from datahub.ingestion.source.snowflake.constants import ( + CLIENT_PREFETCH_THREADS, + CLIENT_SESSION_KEEP_ALIVE, + SnowflakeCloudProvider, +) from datahub.ingestion.source.snowflake.snowflake_config import SnowflakeV2Config -from datahub.ingestion.source.snowflake.snowflake_utils import SnowflakeCloudProvider from datahub.ingestion.source.snowflake.snowflake_v2 import SnowflakeV2Source @@ -213,6 +217,43 @@ def test_snowflake_config_with_column_lineage_no_table_lineage_throws_error(): ) +def test_snowflake_config_with_no_connect_args_returns_base_connect_args(): + config: SnowflakeV2Config = SnowflakeV2Config.parse_obj( + { + "username": "user", + "password": "password", + "host_port": "acctname", + "database_pattern": {"allow": {"^demo$"}}, + "warehouse": "COMPUTE_WH", + "role": "sysadmin", + } + ) + assert config.get_options()["connect_args"] is not None + assert config.get_options()["connect_args"] == { + CLIENT_PREFETCH_THREADS: 10, + CLIENT_SESSION_KEEP_ALIVE: True, + } + + +def test_snowflake_config_with_connect_args_overrides_base_connect_args(): + config: SnowflakeV2Config = SnowflakeV2Config.parse_obj( + { + "username": "user", + "password": "password", + "host_port": "acctname", + "database_pattern": {"allow": {"^demo$"}}, + "warehouse": "COMPUTE_WH", + "role": "sysadmin", + "connect_args": { + CLIENT_PREFETCH_THREADS: 5, + }, + } + ) + assert config.get_options()["connect_args"] is not None + assert config.get_options()["connect_args"][CLIENT_PREFETCH_THREADS] == 5 + assert config.get_options()["connect_args"][CLIENT_SESSION_KEEP_ALIVE] is True + + @patch("snowflake.connector.connect") def test_test_connection_failure(mock_connect): mock_connect.side_effect = Exception("Failed to connect to snowflake") @@ -309,7 +350,10 @@ def query_results(query): ].failure_reason assert failure_reason - assert "Current role does not have permissions to use warehouse" in failure_reason + assert ( + "Current role TEST_ROLE does not have permissions to use warehouse" + in failure_reason + ) @patch("snowflake.connector.connect")