apache · villebro · Mar 12, 2021 · Feb 23, 2021 · Feb 23, 2021 · Feb 23, 2021
diff --git a/superset/db_engine_specs/base.py b/superset/db_engine_specs/base.py
@@ -41,7 +41,7 @@
 import sqlparse
 from flask import g
 from flask_babel import lazy_gettext as _
-from sqlalchemy import column, DateTime, select
+from sqlalchemy import column, DateTime, select, types
 from sqlalchemy.engine.base import Engine
 from sqlalchemy.engine.interfaces import Compiled, Dialect
 from sqlalchemy.engine.reflection import Inspector
@@ -57,6 +57,7 @@
 from superset.models.sql_lab import Query
 from superset.sql_parse import ParsedQuery, Table
 from superset.utils import core as utils
+from superset.utils.core import ColumnSpec
 
 if TYPE_CHECKING:
     # prevent circular imports
@@ -145,7 +146,12 @@ class BaseEngineSpec:  # pylint: disable=too-many-public-methods
     _date_trunc_functions: Dict[str, str] = {}
     _time_grain_expressions: Dict[Optional[str], str] = {}
     column_type_mappings: Tuple[
-        Tuple[Pattern[str], Union[TypeEngine, Callable[[Match[str]], TypeEngine]]], ...,
+        Tuple[
+            Pattern[str],
+            Union[TypeEngine, Callable[[Match[str]], TypeEngine]],
+            utils.GenericDataType,
+        ],
+        ...,
     ] = ()
     time_groupby_inline = False
     limit_method = LimitMethod.FORCE_LIMIT
@@ -179,6 +185,13 @@ class BaseEngineSpec:  # pylint: disable=too-many-public-methods
         ),
     }
 
+    dttm_types = [
+        types.TIME,
+        types.TIMESTAMP,
+        types.TIMESTAMP(timezone=True),
+        types.Interval,
+    ]
+
     @classmethod
     def get_dbapi_exception_mapping(cls) -> Dict[Type[Exception], Type[Exception]]:
         """
@@ -967,25 +980,27 @@ def make_label_compatible(cls, label: str) -> Union[str, quoted_name]:
         return label_mutated
 
     @classmethod
-    def get_sqla_column_type(cls, type_: Optional[str]) -> Optional[TypeEngine]:
+    def get_sqla_column_type(
+        cls, column_type: Optional[str]
+    ) -> Tuple[Union[TypeEngine, utils.GenericDataType, None]]:
         """
         Return a sqlalchemy native column type that corresponds to the column type
         defined in the data source (return None to use default type inferred by
         SQLAlchemy). Override `column_type_mappings` for specific needs
         (see MSSQL for example of NCHAR/NVARCHAR handling).
 
-        :param type_: Column type returned by inspector
+        :param column_type: Column type returned by inspector
         :return: SqlAlchemy column type
         """
-        if not type_:
-            return None
-        for regex, sqla_type in cls.column_type_mappings:
-            match = regex.match(type_)
+        if not column_type:
+            return None, None
+        for regex, sqla_type, generic_type in cls.column_type_mappings:
+            match = regex.match(column_type)
             if match:
                 if callable(sqla_type):
-                    return sqla_type(match)
-                return sqla_type
-        return None
+                    return sqla_type(match), generic_type
+                return sqla_type, generic_type
+        return None, None
 
     @staticmethod
     def _mutate_label(label: str) -> str:
@@ -1097,3 +1112,26 @@ def get_extra_params(database: "Database") -> Dict[str, Any]:
     def is_readonly_query(cls, parsed_query: ParsedQuery) -> bool:
         """Pessimistic readonly, 100% sure statement won't mutate anything"""
         return parsed_query.is_select() or parsed_query.is_explain()
+
+    def get_column_spec(
+        self,
+        column_name: Optional[str],
+        native_type: str,
+        source: utils.ColumnTypeSource = utils.ColumnTypeSource.GET_TABLE,
+    ) -> utils.ColumnSpec:
+
+        column_type, generic_type = self.get_sqla_column_type(native_type)
+        is_dttm = generic_type == utils.GenericDataType.TEMPORAL
+
+        if column_name:  # Further logic to be implemented
+            pass
+        if (
+            source == utils.ColumnTypeSource.CURSOR_DESCRIPION
+        ):  # Further logic to be implemented
+            pass
+
+        column_spec = ColumnSpec(
+            sqla_type=column_type, generic_type=generic_type, is_dttm=is_dttm
+        )
+
+        return column_spec
diff --git a/superset/db_engine_specs/mssql.py b/superset/db_engine_specs/mssql.py
@@ -78,8 +78,16 @@ def fetch_data(
         return cls.pyodbc_rows_to_tuples(data)
 
     column_type_mappings = (
-        (re.compile(r"^N((VAR)?CHAR|TEXT)", re.IGNORECASE), UnicodeText()),
-        (re.compile(r"^((VAR)?CHAR|TEXT|STRING)", re.IGNORECASE), String()),
+        (
+            re.compile(r"^N((VAR)?CHAR|TEXT)", re.IGNORECASE),
+            UnicodeText(),
+            utils.GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^((VAR)?CHAR|TEXT|STRING)", re.IGNORECASE),
+            String(),
+            utils.GenericDataType.STRING,
+        ),
     )
 
     @classmethod

diff --git a/superset/db_engine_specs/postgres.py b/superset/db_engine_specs/postgres.py
@@ -21,6 +21,8 @@
 from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING
 
 from pytz import _FixedOffset  # type: ignore
+from sqlalchemy import types
+from sqlalchemy.dialects.postgresql import DOUBLE_PRECISION
 from sqlalchemy.dialects.postgresql.base import PGInspector
 
 from superset.db_engine_specs.base import BaseEngineSpec
@@ -45,6 +47,96 @@ class PostgresBaseEngineSpec(BaseEngineSpec):
     engine = ""
     engine_name = "PostgreSQL"
 
+    column_type_mappings = (
+        (
+            re.compile(r"^smallint", re.IGNORECASE),
+            types.SMALLINT,
+            utils.GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^integer", re.IGNORECASE),
+            types.INTEGER,
+            utils.GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^bigint", re.IGNORECASE),
+            types.BIGINT,
+            utils.GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^decimal", re.IGNORECASE),
+            types.DECIMAL,
+            utils.GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^numeric", re.IGNORECASE),
+            types.NUMERIC,
+            utils.GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^real", re.IGNORECASE),
+            types.REAL,
+            utils.GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^double precision", re.IGNORECASE),
+            DOUBLE_PRECISION,
+            utils.GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^smallserial", re.IGNORECASE),
+            types.SMALLINT,
+            utils.GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^serial", re.IGNORECASE),
+            types.INTEGER,
+            utils.GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^bigserial", re.IGNORECASE),
+            types.BIGINT,
+            utils.GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^varchar", re.IGNORECASE),
+            types.VARCHAR,
+            utils.GenericDataType.STRING,
+        ),
+        (re.compile(r"^char", re.IGNORECASE), types.CHAR, utils.GenericDataType.STRING),
+        (re.compile(r"^text", re.IGNORECASE), types.TEXT, utils.GenericDataType.STRING),
+        (
+            re.compile(r"^date", re.IGNORECASE),
+            types.DATE,
+            utils.GenericDataType.TEMPORAL,
+        ),
+        (
+            re.compile(r"^time", re.IGNORECASE),
+            types.TIME,
+            utils.GenericDataType.TEMPORAL,
+        ),
+        (
+            re.compile(r"^timestamp", re.IGNORECASE),
+            types.TIMESTAMP,
+            utils.GenericDataType.TEMPORAL,
+        ),
+        (
+            re.compile(r"^timestamptz", re.IGNORECASE),
+            types.TIMESTAMP(timezone=True),
+            utils.GenericDataType.TEMPORAL,
+        ),
+        (
+            re.compile(r"^interval", re.IGNORECASE),
+            types.Interval,
+            utils.GenericDataType.TEMPORAL,
+        ),
+        (
+            re.compile(r"^boolean", re.IGNORECASE),
+            types.BOOLEAN,
+            utils.GenericDataType.BOOLEAN,
+        ),
+    )
+
     _time_grain_expressions = {
         None: "{col}",
         "PT1S": "DATE_TRUNC('second', {col})",

diff --git a/superset/db_engine_specs/presto.py b/superset/db_engine_specs/presto.py
@@ -356,31 +356,89 @@ def _show_columns(
         return columns
 
     column_type_mappings = (
-        (re.compile(r"^boolean.*", re.IGNORECASE), types.Boolean()),
-        (re.compile(r"^tinyint.*", re.IGNORECASE), TinyInteger()),
-        (re.compile(r"^smallint.*", re.IGNORECASE), types.SmallInteger()),
-        (re.compile(r"^integer.*", re.IGNORECASE), types.Integer()),
-        (re.compile(r"^bigint.*", re.IGNORECASE), types.BigInteger()),
-        (re.compile(r"^real.*", re.IGNORECASE), types.Float()),
-        (re.compile(r"^double.*", re.IGNORECASE), types.Float()),
-        (re.compile(r"^decimal.*", re.IGNORECASE), types.DECIMAL()),
+        (
+            re.compile(r"^boolean.*", re.IGNORECASE),
+            types.Boolean(),
+            utils.GenericDataType.BOOLEAN,
+        ),
+        (
+            re.compile(r"^tinyint.*", re.IGNORECASE),
+            TinyInteger(),
+            utils.GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^smallint.*", re.IGNORECASE),
+            types.SmallInteger(),
+            utils.GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^integer.*", re.IGNORECASE),
+            types.Integer(),
+            utils.GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^bigint.*", re.IGNORECASE),
+            types.BigInteger(),
+            utils.GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^real.*", re.IGNORECASE),
+            types.Float(),
+            utils.GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^double.*", re.IGNORECASE),
+            types.Float(),
+            utils.GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^decimal.*", re.IGNORECASE),
+            types.DECIMAL(),
+            utils.GenericDataType.NUMERIC,
+        ),
         (
             re.compile(r"^varchar(\((\d+)\))*$", re.IGNORECASE),
             lambda match: types.VARCHAR(int(match[2])) if match[2] else types.String(),
+            utils.GenericDataType.STRING,
         ),
         (
             re.compile(r"^char(\((\d+)\))*$", re.IGNORECASE),
             lambda match: types.CHAR(int(match[2])) if match[2] else types.CHAR(),
+            utils.GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^varbinary.*", re.IGNORECASE),
+            types.VARBINARY(),
+            utils.GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^json.*", re.IGNORECASE),
+            types.JSON(),
+            utils.GenericDataType.JSON,
+        ),
+        (
+            re.compile(r"^date.*", re.IGNORECASE),
+            types.DATE(),
+            utils.GenericDataType.TEMPORAL,
+        ),
+        (
+            re.compile(r"^timestamp.*", re.IGNORECASE),
+            types.TIMESTAMP(),
+            utils.GenericDataType.TEMPORAL,
+        ),
+        (
+            re.compile(r"^time.*", re.IGNORECASE),
+            types.Time(),
+            utils.GenericDataType.TEMPORAL,
+        ),
+        (
+            re.compile(r"^interval.*", re.IGNORECASE),
+            Interval(),
+            utils.GenericDataType.TEMPORAL,
         ),
-        (re.compile(r"^varbinary.*", re.IGNORECASE), types.VARBINARY()),
-        (re.compile(r"^json.*", re.IGNORECASE), types.JSON()),
-        (re.compile(r"^date.*", re.IGNORECASE), types.DATE()),
-        (re.compile(r"^timestamp.*", re.IGNORECASE), types.TIMESTAMP()),
-        (re.compile(r"^time.*", re.IGNORECASE), types.Time()),
-        (re.compile(r"^interval.*", re.IGNORECASE), Interval()),
-        (re.compile(r"^array.*", re.IGNORECASE), Array()),
-        (re.compile(r"^map.*", re.IGNORECASE), Map()),
-        (re.compile(r"^row.*", re.IGNORECASE), Row()),
+        (re.compile(r"^array.*", re.IGNORECASE), Array(), utils.GenericDataType.ARRAY),
+        (re.compile(r"^map.*", re.IGNORECASE), Map(), utils.GenericDataType.MAP),
+        (re.compile(r"^row.*", re.IGNORECASE), Row(), utils.GenericDataType.ROW),
     )
 
     @classmethod

diff --git a/superset/utils/core.py b/superset/utils/core.py
@@ -82,7 +82,7 @@
 from sqlalchemy.engine import Connection, Engine
 from sqlalchemy.engine.reflection import Inspector
 from sqlalchemy.sql.type_api import Variant
-from sqlalchemy.types import TEXT, TypeDecorator
+from sqlalchemy.types import TEXT, TypeDecorator, TypeEngine
 from typing_extensions import TypedDict
 
 import _thread  # pylint: disable=C0411
@@ -148,6 +148,10 @@ class GenericDataType(IntEnum):
     STRING = 1
     TEMPORAL = 2
     BOOLEAN = 3
+    ARRAY = 4
+    JSON = 5
+    MAP = 6
+    ROW = 7
 
 
 class ChartDataResultFormat(str, Enum):
@@ -306,6 +310,19 @@ class TemporalType(str, Enum):
     TIMESTAMP = "TIMESTAMP"
 
 
+class ColumnTypeSource(Enum):
+    GET_TABLE = 1
+    CURSOR_DESCRIPION = 2
+
+
+class ColumnSpec(NamedTuple):
+    sqla_type: Union[TypeEngine, str]
+    generic_type: GenericDataType
+    is_dttm: bool
+    normalized_column_name: Optional[str] = None
+    python_date_format: Optional[str] = None
+
+
 try:
     # Having might not have been imported.
     class DimSelector(Having):