Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(ingest): allow snowflake profiling to work with geography type #6162

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def get_column_unique_count_patch(self, column):
elif self.engine.dialect.name.lower() in {"bigquery", "snowflake"}:
element_values = self.engine.execute(
sa.select(
[sa.text(f'APPROX_COUNT_DISTINCT ("{column}")')] # type:ignore
[sa.text(f"APPROX_COUNT_DISTINCT ({sa.column(column)})")] # type:ignore
).select_from(self._table)
)
return convert_to_json_serializable(element_values.fetchone()[0])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
import logging
from typing import Callable, Dict, Iterable, List, Optional, Tuple, cast

from snowflake.sqlalchemy import snowdialect
from sqlalchemy import create_engine, inspect
from sqlalchemy.sql import sqltypes

from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance
from datahub.ingestion.api.common import WorkUnit
Expand All @@ -22,6 +24,8 @@
from datahub.metadata.com.linkedin.pegasus2avro.dataset import DatasetProfile
from datahub.metadata.schema_classes import DatasetProfileClass

snowdialect.ischema_names["GEOGRAPHY"] = sqltypes.NullType

logger = logging.getLogger(__name__)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,28 @@ class SnowflakeColumn:
is_nullable: bool
data_type: str
comment: Optional[str]
character_maximum_length: Optional[int]
numeric_precision: Optional[int]
numeric_scale: Optional[int]

def get_precise_native_type(self):
precise_native_type = self.data_type
# https://docs.snowflake.com/en/sql-reference/data-types-numeric.html
if (
self.data_type in ("NUMBER", "NUMERIC", "DECIMAL")
and self.numeric_precision is not None
and self.numeric_scale is not None
):
precise_native_type = (
f"NUMBER({self.numeric_precision},{self.numeric_scale})"
)
# https://docs.snowflake.com/en/sql-reference/data-types-text.html
elif (
self.data_type in ("TEXT", "STRING", "VARCHAR")
and self.character_maximum_length is not None
):
precise_native_type = f"VARCHAR({self.character_maximum_length})"
return precise_native_type


@dataclass
Expand Down Expand Up @@ -251,6 +273,9 @@ def get_columns_for_schema(
is_nullable=column["IS_NULLABLE"] == "YES",
data_type=column["DATA_TYPE"],
comment=column["COMMENT"],
character_maximum_length=column["CHARACTER_MAXIMUM_LENGTH"],
numeric_precision=column["NUMERIC_PRECISION"],
numeric_scale=column["NUMERIC_SCALE"],
)
)
return columns
Expand All @@ -273,6 +298,9 @@ def get_columns_for_table(
is_nullable=column["IS_NULLABLE"] == "YES",
data_type=column["DATA_TYPE"],
comment=column["COMMENT"],
character_maximum_length=column["CHARACTER_MAXIMUM_LENGTH"],
numeric_precision=column["NUMERIC_PRECISION"],
numeric_scale=column["NUMERIC_SCALE"],
)
)
return columns
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -724,7 +724,7 @@ def get_schema_metadata(
SNOWFLAKE_FIELD_TYPE_MAPPINGS.get(col.data_type, NullType)()
),
# NOTE: nativeDataType will not be in sync with older connector
nativeDataType=col.data_type,
nativeDataType=col.get_precise_native_type(),
description=col.comment,
nullable=col.is_nullable,
isPartOfKey=col.name in table.pk.column_names
Expand Down
Loading