From 8e662916d5763f9554f29ba45ccdac5a34f511a3 Mon Sep 17 00:00:00 2001 From: Pavel Klammert | LOGEX Date: Thu, 6 Oct 2022 14:29:20 +0200 Subject: [PATCH] fix(ingestion): add output converters for ODBC unsuported datatype in MSSQL #5344 --- .../src/datahub/ingestion/source/sql/mssql.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql.py index ca991c9c4d60c1..d684abfa066d1f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql.py @@ -123,9 +123,22 @@ def __init__(self, config: SQLServerConfig, ctx: PipelineContext): for inspector in self.get_inspectors(): db_name: str = self.get_db_name(inspector) with inspector.engine.connect() as conn: + if self.config.use_odbc: + self._add_output_converters(conn) self._populate_table_descriptions(conn, db_name) self._populate_column_descriptions(conn, db_name) + @staticmethod + def _add_output_converters(conn: Connection) -> None: + def handle_sql_variant_as_string(value): + return value.decode('utf-16le') + # see https://stackoverflow.com/questions/45677374/pandas-pyodbc-odbc-sql-type-150-is-not-yet-supported + # and https://stackoverflow.com/questions/11671170/adding-output-converter-to-pyodbc-connection-in-sqlalchemy + try: + conn.connection.add_output_converter(-150, handle_sql_variant_as_string) + except AttributeError as e: + logger.debug(f"Failed to mount output converter for MSSQL data type -150 due to {e}") + def _populate_table_descriptions(self, conn: Connection, db_name: str) -> None: # see https://stackoverflow.com/questions/5953330/how-do-i-map-the-id-in-sys-extended-properties-to-an-object-name # also see https://www.mssqltips.com/sqlservertip/5384/working-with-sql-server-extended-properties/