From 2f1419d495e4235f7d3f11b9dd4b00702b6ea49a Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 6 Jun 2024 15:54:32 -0700 Subject: [PATCH] fix(ingest/snowflake): avoid overfetching schemas from datahub (#10527) --- .../ingestion/source/snowflake/snowflake_v2.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py index 9bb6226b4947a2..f155ac24fea3fc 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py @@ -254,7 +254,18 @@ def __init__(self, ctx: PipelineContext, config: SnowflakeV2Config): platform=self.platform, platform_instance=self.config.platform_instance, env=self.config.env, - graph=self.ctx.graph, + graph=( + # If we're ingestion schema metadata for tables/views, then we will populate + # schemas into the resolver as we go. We only need to do a bulk fetch + # if we're not ingesting schema metadata as part of ingestion. + self.ctx.graph + if not ( + self.config.include_technical_schema + and self.config.include_tables + and self.config.include_views + ) + else None + ), generate_usage_statistics=False, generate_operations=False, format_queries=self.config.format_sql_queries, @@ -1252,7 +1263,7 @@ def gen_schema_metadata( foreignKeys=foreign_keys, ) - if self.aggregator and self.config.parse_view_ddl: + if self.aggregator: self.aggregator.register_schema(urn=dataset_urn, schema=schema_metadata) return schema_metadata