fix(ingest): fix doc generation import ordering issue with postgres (d…

…atahub-project#5846) Relying on the correct import directly, rather than going through SQLAlchemy's import wrapper (in their dialect.py) allows us to bypass this strange error in doc generation.
shirshanka · Sep 8, 2022 · 0b747ad · 0b747ad
1 parent 959c66e
commit 0b747ad
Show file tree

Hide file tree

Showing 2 changed files with 24 additions and 24 deletions.
diff --git a/metadata-ingestion/scripts/docgen.py b/metadata-ingestion/scripts/docgen.py
@@ -18,8 +18,7 @@
     SourceCapability,
     SupportStatus,
 )
-from datahub.ingestion.api.registry import PluginRegistry
-from datahub.ingestion.api.source import Source
+from datahub.ingestion.source.source_registry import source_registry
 
 logger = logging.getLogger(__name__)
 
@@ -500,11 +499,7 @@ def generate(
                             file_contents,
                         )
 
-    source_registry = PluginRegistry[Source]()
-    source_registry.register_from_entrypoint("datahub.ingestion.source.plugins")
-
-    # This source is always enabled
-    for plugin_name in sorted(source_registry._mapping.keys()):
+    for plugin_name in sorted(source_registry.mapping.keys()):
         if source and source != plugin_name:
             continue
 
@@ -526,8 +521,9 @@ def generate(
                 get_additional_deps_for_extra(extra_plugin) if extra_plugin else []
             )
         except Exception as e:
-            print(f"Failed to process {plugin_name} due to exception")
-            print(repr(e))
+            logger.warning(
+                f"Failed to process {plugin_name} due to exception {e}", exc_info=e
+            )
             metrics["plugins"]["failed"] = metrics["plugins"].get("failed", 0) + 1
 
         if source_type and hasattr(source_type, "get_config_class"):

diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
@@ -22,8 +22,9 @@
 from urllib.parse import quote_plus
 
 import pydantic
+import sqlalchemy.dialects.postgresql.base
 from pydantic.fields import Field
-from sqlalchemy import create_engine, dialects, inspect
+from sqlalchemy import create_engine, inspect
 from sqlalchemy.engine.reflection import Inspector
 from sqlalchemy.exc import ProgrammingError
 from sqlalchemy.sql import sqltypes as types
@@ -350,20 +351,23 @@ class SqlWorkUnit(MetadataWorkUnit):
     types.DATETIME: TimeTypeClass,
     types.TIMESTAMP: TimeTypeClass,
     types.JSON: RecordTypeClass,
-    dialects.postgresql.base.BYTEA: BytesTypeClass,
-    dialects.postgresql.base.DOUBLE_PRECISION: NumberTypeClass,
-    dialects.postgresql.base.INET: StringTypeClass,
-    dialects.postgresql.base.MACADDR: StringTypeClass,
-    dialects.postgresql.base.MONEY: NumberTypeClass,
-    dialects.postgresql.base.OID: StringTypeClass,
-    dialects.postgresql.base.REGCLASS: BytesTypeClass,
-    dialects.postgresql.base.TIMESTAMP: TimeTypeClass,
-    dialects.postgresql.base.TIME: TimeTypeClass,
-    dialects.postgresql.base.INTERVAL: TimeTypeClass,
-    dialects.postgresql.base.BIT: BytesTypeClass,
-    dialects.postgresql.base.UUID: StringTypeClass,
-    dialects.postgresql.base.TSVECTOR: BytesTypeClass,
-    dialects.postgresql.base.ENUM: EnumTypeClass,
+    # Because the postgresql dialect is used internally by many other dialects,
+    # we add some postgres types here. This is ok to do because the postgresql
+    # dialect is built-in to sqlalchemy.
+    sqlalchemy.dialects.postgresql.base.BYTEA: BytesTypeClass,
+    sqlalchemy.dialects.postgresql.base.DOUBLE_PRECISION: NumberTypeClass,
+    sqlalchemy.dialects.postgresql.base.INET: StringTypeClass,
+    sqlalchemy.dialects.postgresql.base.MACADDR: StringTypeClass,
+    sqlalchemy.dialects.postgresql.base.MONEY: NumberTypeClass,
+    sqlalchemy.dialects.postgresql.base.OID: StringTypeClass,
+    sqlalchemy.dialects.postgresql.base.REGCLASS: BytesTypeClass,
+    sqlalchemy.dialects.postgresql.base.TIMESTAMP: TimeTypeClass,
+    sqlalchemy.dialects.postgresql.base.TIME: TimeTypeClass,
+    sqlalchemy.dialects.postgresql.base.INTERVAL: TimeTypeClass,
+    sqlalchemy.dialects.postgresql.base.BIT: BytesTypeClass,
+    sqlalchemy.dialects.postgresql.base.UUID: StringTypeClass,
+    sqlalchemy.dialects.postgresql.base.TSVECTOR: BytesTypeClass,
+    sqlalchemy.dialects.postgresql.base.ENUM: EnumTypeClass,
     # When SQLAlchemy is unable to map a type into its internal hierarchy, it
     # assigns the NullType by default. We want to carry this warning through.
     types.NullType: NullTypeClass,