From db28ab69935f03c7c72ce2504b6c5cbea35537f9 Mon Sep 17 00:00:00 2001 From: treff7es Date: Fri, 2 Dec 2022 15:34:59 +0100 Subject: [PATCH] Ignore complext types from profiling --- .../datahub/ingestion/source/bigquery_v2/profiler.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py index 726b54e58629fb..d58457534977ac 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py @@ -165,6 +165,16 @@ def get_workunits( continue for table in tables[project][dataset]: + for column in table.columns: + # Profiler has issues with complex types (array, struct, geography, json), so we deny those types from profiling + # We also filter columns without data type as it means that column is part of a complex type. + if not column.data_type or any( + word in column.data_type.lower() + for word in ["array", "struct", "geography", "json"] + ): + self.config.profile_pattern.deny.append( + f"^{project}.{dataset}.{table.name}.{column.field_path}$" + ) # Emit the profile work unit profile_request = self.get_bigquery_profile_request( project=project, dataset=dataset, table=table