-
Notifications
You must be signed in to change notification settings - Fork 14.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(explore): Postgres datatype conversion #13294
Changes from 10 commits
70572c8
8e330db
83f3996
d6c4c1c
35fa495
c00e4c3
488a840
c894b90
2fb975b
82a8c9d
4b8d0ec
ddcc14a
fcb5edc
010e50e
32f58a8
ebcbb53
1bdebbf
2f341b9
39cc3b3
d9afba7
1050974
1a799df
b92e2ac
e630335
c9b5e56
276c820
8430ef3
0189072
bfdc994
8e1d813
f089e9a
7292e37
7e0d4d1
b5f6244
1e06266
4336ae5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,7 +41,7 @@ | |
import sqlparse | ||
from flask import g | ||
from flask_babel import lazy_gettext as _ | ||
from sqlalchemy import column, DateTime, select | ||
from sqlalchemy import column, DateTime, select, types | ||
from sqlalchemy.engine.base import Engine | ||
from sqlalchemy.engine.interfaces import Compiled, Dialect | ||
from sqlalchemy.engine.reflection import Inspector | ||
|
@@ -57,6 +57,7 @@ | |
from superset.models.sql_lab import Query | ||
from superset.sql_parse import ParsedQuery, Table | ||
from superset.utils import core as utils | ||
from superset.utils.core import ColumnSpec | ||
|
||
if TYPE_CHECKING: | ||
# prevent circular imports | ||
|
@@ -145,7 +146,12 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods | |
_date_trunc_functions: Dict[str, str] = {} | ||
_time_grain_expressions: Dict[Optional[str], str] = {} | ||
column_type_mappings: Tuple[ | ||
Tuple[Pattern[str], Union[TypeEngine, Callable[[Match[str]], TypeEngine]]], ..., | ||
Tuple[ | ||
Pattern[str], | ||
Union[TypeEngine, Callable[[Match[str]], TypeEngine]], | ||
utils.GenericDataType, | ||
], | ||
..., | ||
] = () | ||
time_groupby_inline = False | ||
limit_method = LimitMethod.FORCE_LIMIT | ||
|
@@ -967,25 +973,27 @@ def make_label_compatible(cls, label: str) -> Union[str, quoted_name]: | |
return label_mutated | ||
|
||
@classmethod | ||
def get_sqla_column_type(cls, type_: Optional[str]) -> Optional[TypeEngine]: | ||
def get_sqla_column_type( | ||
cls, column_type: Optional[str] | ||
) -> Tuple[Union[TypeEngine, utils.GenericDataType, None]]: | ||
""" | ||
Return a sqlalchemy native column type that corresponds to the column type | ||
defined in the data source (return None to use default type inferred by | ||
SQLAlchemy). Override `column_type_mappings` for specific needs | ||
(see MSSQL for example of NCHAR/NVARCHAR handling). | ||
|
||
:param type_: Column type returned by inspector | ||
:param column_type: Column type returned by inspector | ||
:return: SqlAlchemy column type | ||
""" | ||
if not type_: | ||
return None | ||
for regex, sqla_type in cls.column_type_mappings: | ||
match = regex.match(type_) | ||
if not column_type: | ||
return None, None | ||
for regex, sqla_type, generic_type in cls.column_type_mappings: | ||
match = regex.match(column_type) | ||
if match: | ||
if callable(sqla_type): | ||
return sqla_type(match) | ||
return sqla_type | ||
return None | ||
return sqla_type(match), generic_type | ||
villebro marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return sqla_type, generic_type | ||
return None, None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For cases where we want to return an empty value (=no match was made), I'd perhaps prefer returning a pure |
||
|
||
@staticmethod | ||
def _mutate_label(label: str) -> str: | ||
|
@@ -1097,3 +1105,18 @@ def get_extra_params(database: "Database") -> Dict[str, Any]: | |
def is_readonly_query(cls, parsed_query: ParsedQuery) -> bool: | ||
"""Pessimistic readonly, 100% sure statement won't mutate anything""" | ||
return parsed_query.is_select() or parsed_query.is_explain() | ||
|
||
def get_column_spec( | ||
self, | ||
native_type: str, | ||
source: utils.ColumnTypeSource = utils.ColumnTypeSource.GET_TABLE, | ||
) -> utils.ColumnSpec: | ||
|
||
column_type, generic_type = self.get_sqla_column_type(native_type) | ||
is_dttm = generic_type == utils.GenericDataType.TEMPORAL | ||
|
||
column_spec = ColumnSpec( | ||
sqla_type=column_type, generic_type=generic_type, is_dttm=is_dttm | ||
) | ||
|
||
return column_spec |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -78,8 +78,16 @@ def fetch_data( | |
return cls.pyodbc_rows_to_tuples(data) | ||
|
||
column_type_mappings = ( | ||
(re.compile(r"^N((VAR)?CHAR|TEXT)", re.IGNORECASE), UnicodeText()), | ||
(re.compile(r"^((VAR)?CHAR|TEXT|STRING)", re.IGNORECASE), String()), | ||
( | ||
re.compile(r"^N((VAR)?CHAR|TEXT)", re.IGNORECASE), | ||
UnicodeText(), | ||
utils.GenericDataType.STRING, | ||
), | ||
( | ||
re.compile(r"^((VAR)?CHAR|TEXT|STRING)", re.IGNORECASE), | ||
String(), | ||
utils.GenericDataType.STRING, | ||
), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would probably design this so that an engine spec can extend the base mapping. In the case of MSSQL, I believe the base mapping is a good fallback. Also, we might consider incorporating these types into the base spec, as I assume fairly many engines support N-prefixed character types, and some of those engines might also benefit from the |
||
) | ||
|
||
@classmethod | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,6 +21,8 @@ | |
from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING | ||
|
||
from pytz import _FixedOffset # type: ignore | ||
from sqlalchemy import types | ||
from sqlalchemy.dialects.postgresql import DOUBLE_PRECISION | ||
from sqlalchemy.dialects.postgresql.base import PGInspector | ||
|
||
from superset.db_engine_specs.base import BaseEngineSpec | ||
|
@@ -45,6 +47,96 @@ class PostgresBaseEngineSpec(BaseEngineSpec): | |
engine = "" | ||
engine_name = "PostgreSQL" | ||
|
||
column_type_mappings = ( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it make sense to consolidate the structure of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes - this is the ultimate objective. However, we want to leave the existing structures untouched until we're certain this doesn't break existing functionality. We'll be including other types in this mapping that are missing in |
||
( | ||
re.compile(r"^smallint", re.IGNORECASE), | ||
types.SMALLINT, | ||
utils.GenericDataType.NUMERIC, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: can we import There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But, is adding There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ktmud Yes, it could've been mapped as you proposed. The reason behind the current way is that if we get multiple matches we can prioritise those at the start of the list without necessarily sorting the results for the most part. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I understand the need to use a list to have priority for RegExp matching, but will there be a case where two SQLA types may be matched to different |
||
), | ||
( | ||
re.compile(r"^integer", re.IGNORECASE), | ||
types.INTEGER, | ||
utils.GenericDataType.NUMERIC, | ||
), | ||
( | ||
re.compile(r"^bigint", re.IGNORECASE), | ||
types.BIGINT, | ||
utils.GenericDataType.NUMERIC, | ||
), | ||
( | ||
re.compile(r"^decimal", re.IGNORECASE), | ||
types.DECIMAL, | ||
utils.GenericDataType.NUMERIC, | ||
), | ||
( | ||
re.compile(r"^numeric", re.IGNORECASE), | ||
types.NUMERIC, | ||
utils.GenericDataType.NUMERIC, | ||
), | ||
( | ||
re.compile(r"^real", re.IGNORECASE), | ||
types.REAL, | ||
utils.GenericDataType.NUMERIC, | ||
), | ||
( | ||
re.compile(r"^double precision", re.IGNORECASE), | ||
DOUBLE_PRECISION, | ||
utils.GenericDataType.NUMERIC, | ||
), | ||
( | ||
re.compile(r"^smallserial", re.IGNORECASE), | ||
types.SMALLINT, | ||
utils.GenericDataType.NUMERIC, | ||
), | ||
( | ||
re.compile(r"^serial", re.IGNORECASE), | ||
types.INTEGER, | ||
utils.GenericDataType.NUMERIC, | ||
), | ||
( | ||
re.compile(r"^bigserial", re.IGNORECASE), | ||
types.BIGINT, | ||
utils.GenericDataType.NUMERIC, | ||
), | ||
( | ||
re.compile(r"^varchar", re.IGNORECASE), | ||
types.VARCHAR, | ||
utils.GenericDataType.STRING, | ||
), | ||
(re.compile(r"^char", re.IGNORECASE), types.CHAR, utils.GenericDataType.STRING), | ||
(re.compile(r"^text", re.IGNORECASE), types.TEXT, utils.GenericDataType.STRING), | ||
( | ||
re.compile(r"^date", re.IGNORECASE), | ||
types.DATE, | ||
utils.GenericDataType.TEMPORAL, | ||
), | ||
( | ||
re.compile(r"^time", re.IGNORECASE), | ||
types.TIME, | ||
utils.GenericDataType.TEMPORAL, | ||
), | ||
( | ||
re.compile(r"^timestamp", re.IGNORECASE), | ||
types.TIMESTAMP, | ||
utils.GenericDataType.TEMPORAL, | ||
), | ||
( | ||
re.compile(r"^timestamptz", re.IGNORECASE), | ||
types.TIMESTAMP(timezone=True), | ||
utils.GenericDataType.TEMPORAL, | ||
), | ||
( | ||
re.compile(r"^interval", re.IGNORECASE), | ||
types.Interval, | ||
utils.GenericDataType.TEMPORAL, | ||
), | ||
( | ||
re.compile(r"^boolean", re.IGNORECASE), | ||
types.BOOLEAN, | ||
utils.GenericDataType.BOOLEAN, | ||
), | ||
) | ||
|
||
_time_grain_expressions = { | ||
None: "{col}", | ||
"PT1S": "DATE_TRUNC('second', {col})", | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -356,31 +356,89 @@ def _show_columns( | |
return columns | ||
|
||
column_type_mappings = ( | ||
(re.compile(r"^boolean.*", re.IGNORECASE), types.Boolean()), | ||
(re.compile(r"^tinyint.*", re.IGNORECASE), TinyInteger()), | ||
(re.compile(r"^smallint.*", re.IGNORECASE), types.SmallInteger()), | ||
(re.compile(r"^integer.*", re.IGNORECASE), types.Integer()), | ||
(re.compile(r"^bigint.*", re.IGNORECASE), types.BigInteger()), | ||
(re.compile(r"^real.*", re.IGNORECASE), types.Float()), | ||
(re.compile(r"^double.*", re.IGNORECASE), types.Float()), | ||
(re.compile(r"^decimal.*", re.IGNORECASE), types.DECIMAL()), | ||
( | ||
re.compile(r"^boolean.*", re.IGNORECASE), | ||
types.Boolean(), | ||
utils.GenericDataType.BOOLEAN, | ||
), | ||
( | ||
re.compile(r"^tinyint.*", re.IGNORECASE), | ||
TinyInteger(), | ||
utils.GenericDataType.NUMERIC, | ||
), | ||
( | ||
re.compile(r"^smallint.*", re.IGNORECASE), | ||
types.SmallInteger(), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Changed it to keep it consistent |
||
utils.GenericDataType.NUMERIC, | ||
), | ||
( | ||
re.compile(r"^integer.*", re.IGNORECASE), | ||
types.Integer(), | ||
utils.GenericDataType.NUMERIC, | ||
), | ||
( | ||
re.compile(r"^bigint.*", re.IGNORECASE), | ||
types.BigInteger(), | ||
utils.GenericDataType.NUMERIC, | ||
), | ||
( | ||
re.compile(r"^real.*", re.IGNORECASE), | ||
types.Float(), | ||
utils.GenericDataType.NUMERIC, | ||
), | ||
( | ||
re.compile(r"^double.*", re.IGNORECASE), | ||
types.Float(), | ||
utils.GenericDataType.NUMERIC, | ||
), | ||
( | ||
re.compile(r"^decimal.*", re.IGNORECASE), | ||
types.DECIMAL(), | ||
utils.GenericDataType.NUMERIC, | ||
), | ||
( | ||
re.compile(r"^varchar(\((\d+)\))*$", re.IGNORECASE), | ||
lambda match: types.VARCHAR(int(match[2])) if match[2] else types.String(), | ||
utils.GenericDataType.STRING, | ||
), | ||
( | ||
re.compile(r"^char(\((\d+)\))*$", re.IGNORECASE), | ||
lambda match: types.CHAR(int(match[2])) if match[2] else types.CHAR(), | ||
utils.GenericDataType.STRING, | ||
), | ||
( | ||
re.compile(r"^varbinary.*", re.IGNORECASE), | ||
types.VARBINARY(), | ||
utils.GenericDataType.STRING, | ||
), | ||
( | ||
re.compile(r"^json.*", re.IGNORECASE), | ||
types.JSON(), | ||
utils.GenericDataType.STRING, | ||
), | ||
( | ||
re.compile(r"^date.*", re.IGNORECASE), | ||
types.DATE(), | ||
utils.GenericDataType.TEMPORAL, | ||
), | ||
( | ||
re.compile(r"^timestamp.*", re.IGNORECASE), | ||
types.TIMESTAMP(), | ||
utils.GenericDataType.TEMPORAL, | ||
), | ||
( | ||
re.compile(r"^time.*", re.IGNORECASE), | ||
types.Time(), | ||
utils.GenericDataType.TEMPORAL, | ||
), | ||
( | ||
re.compile(r"^interval.*", re.IGNORECASE), | ||
Interval(), | ||
utils.GenericDataType.TEMPORAL, | ||
), | ||
(re.compile(r"^varbinary.*", re.IGNORECASE), types.VARBINARY()), | ||
(re.compile(r"^json.*", re.IGNORECASE), types.JSON()), | ||
(re.compile(r"^date.*", re.IGNORECASE), types.DATE()), | ||
(re.compile(r"^timestamp.*", re.IGNORECASE), types.TIMESTAMP()), | ||
(re.compile(r"^time.*", re.IGNORECASE), types.Time()), | ||
(re.compile(r"^interval.*", re.IGNORECASE), Interval()), | ||
(re.compile(r"^array.*", re.IGNORECASE), Array()), | ||
(re.compile(r"^map.*", re.IGNORECASE), Map()), | ||
(re.compile(r"^row.*", re.IGNORECASE), Row()), | ||
(re.compile(r"^array.*", re.IGNORECASE), Array(), utils.GenericDataType.STRING), | ||
(re.compile(r"^map.*", re.IGNORECASE), Map(), utils.GenericDataType.STRING), | ||
(re.compile(r"^row.*", re.IGNORECASE), Row(), utils.GenericDataType.STRING), | ||
) | ||
|
||
@classmethod | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would consider just leaving the signature of this method unchanged for now; otherwise I'd just remove this one and start using the new
get_column_spec
method and implement that whereverget_sqla_column_type
is being used, as that's the end state we want to aim for.