Skip to content

Commit

Permalink
fix(ingest): fix doc generation import ordering issue with postgres (d…
Browse files Browse the repository at this point in the history
…atahub-project#5846)

Relying on the correct import directly, rather than going through
SQLAlchemy's import wrapper (in their dialect.py) allows us to bypass
this strange error in doc generation.
  • Loading branch information
hsheth2 authored and shirshanka committed Sep 8, 2022
1 parent 959c66e commit 0b747ad
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 24 deletions.
14 changes: 5 additions & 9 deletions metadata-ingestion/scripts/docgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@
SourceCapability,
SupportStatus,
)
from datahub.ingestion.api.registry import PluginRegistry
from datahub.ingestion.api.source import Source
from datahub.ingestion.source.source_registry import source_registry

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -500,11 +499,7 @@ def generate(
file_contents,
)

source_registry = PluginRegistry[Source]()
source_registry.register_from_entrypoint("datahub.ingestion.source.plugins")

# This source is always enabled
for plugin_name in sorted(source_registry._mapping.keys()):
for plugin_name in sorted(source_registry.mapping.keys()):
if source and source != plugin_name:
continue

Expand All @@ -526,8 +521,9 @@ def generate(
get_additional_deps_for_extra(extra_plugin) if extra_plugin else []
)
except Exception as e:
print(f"Failed to process {plugin_name} due to exception")
print(repr(e))
logger.warning(
f"Failed to process {plugin_name} due to exception {e}", exc_info=e
)
metrics["plugins"]["failed"] = metrics["plugins"].get("failed", 0) + 1

if source_type and hasattr(source_type, "get_config_class"):
Expand Down
34 changes: 19 additions & 15 deletions metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@
from urllib.parse import quote_plus

import pydantic
import sqlalchemy.dialects.postgresql.base
from pydantic.fields import Field
from sqlalchemy import create_engine, dialects, inspect
from sqlalchemy import create_engine, inspect
from sqlalchemy.engine.reflection import Inspector
from sqlalchemy.exc import ProgrammingError
from sqlalchemy.sql import sqltypes as types
Expand Down Expand Up @@ -350,20 +351,23 @@ class SqlWorkUnit(MetadataWorkUnit):
types.DATETIME: TimeTypeClass,
types.TIMESTAMP: TimeTypeClass,
types.JSON: RecordTypeClass,
dialects.postgresql.base.BYTEA: BytesTypeClass,
dialects.postgresql.base.DOUBLE_PRECISION: NumberTypeClass,
dialects.postgresql.base.INET: StringTypeClass,
dialects.postgresql.base.MACADDR: StringTypeClass,
dialects.postgresql.base.MONEY: NumberTypeClass,
dialects.postgresql.base.OID: StringTypeClass,
dialects.postgresql.base.REGCLASS: BytesTypeClass,
dialects.postgresql.base.TIMESTAMP: TimeTypeClass,
dialects.postgresql.base.TIME: TimeTypeClass,
dialects.postgresql.base.INTERVAL: TimeTypeClass,
dialects.postgresql.base.BIT: BytesTypeClass,
dialects.postgresql.base.UUID: StringTypeClass,
dialects.postgresql.base.TSVECTOR: BytesTypeClass,
dialects.postgresql.base.ENUM: EnumTypeClass,
# Because the postgresql dialect is used internally by many other dialects,
# we add some postgres types here. This is ok to do because the postgresql
# dialect is built-in to sqlalchemy.
sqlalchemy.dialects.postgresql.base.BYTEA: BytesTypeClass,
sqlalchemy.dialects.postgresql.base.DOUBLE_PRECISION: NumberTypeClass,
sqlalchemy.dialects.postgresql.base.INET: StringTypeClass,
sqlalchemy.dialects.postgresql.base.MACADDR: StringTypeClass,
sqlalchemy.dialects.postgresql.base.MONEY: NumberTypeClass,
sqlalchemy.dialects.postgresql.base.OID: StringTypeClass,
sqlalchemy.dialects.postgresql.base.REGCLASS: BytesTypeClass,
sqlalchemy.dialects.postgresql.base.TIMESTAMP: TimeTypeClass,
sqlalchemy.dialects.postgresql.base.TIME: TimeTypeClass,
sqlalchemy.dialects.postgresql.base.INTERVAL: TimeTypeClass,
sqlalchemy.dialects.postgresql.base.BIT: BytesTypeClass,
sqlalchemy.dialects.postgresql.base.UUID: StringTypeClass,
sqlalchemy.dialects.postgresql.base.TSVECTOR: BytesTypeClass,
sqlalchemy.dialects.postgresql.base.ENUM: EnumTypeClass,
# When SQLAlchemy is unable to map a type into its internal hierarchy, it
# assigns the NullType by default. We want to carry this warning through.
types.NullType: NullTypeClass,
Expand Down

0 comments on commit 0b747ad

Please sign in to comment.