-
-
Notifications
You must be signed in to change notification settings - Fork 352
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #135 from centerofci/infer_column_type_two
Infer column type two
- Loading branch information
Showing
4 changed files
with
161 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
import pytest | ||
from sqlalchemy import Column, MetaData, Table | ||
from sqlalchemy import BOOLEAN, Numeric, NUMERIC, String, VARCHAR | ||
from sqlalchemy.schema import CreateSchema, DropSchema | ||
from db.engine import _add_custom_types_to_engine | ||
from db.types import base, inference, install | ||
|
||
TEST_SCHEMA = "test_schema" | ||
|
||
|
||
@pytest.fixture | ||
def engine_with_types(engine): | ||
_add_custom_types_to_engine(engine) | ||
return engine | ||
|
||
|
||
@pytest.fixture | ||
def temporary_testing_schema(engine_with_types): | ||
schema = TEST_SCHEMA | ||
with engine_with_types.begin() as conn: | ||
conn.execute(CreateSchema(schema)) | ||
yield engine_with_types, schema | ||
with engine_with_types.begin() as conn: | ||
conn.execute(DropSchema(schema, cascade=True, if_exists=True)) | ||
|
||
|
||
@pytest.fixture | ||
def engine_email_type(temporary_testing_schema): | ||
engine, schema = temporary_testing_schema | ||
install.install_mathesar_on_database(engine) | ||
yield engine, schema | ||
with engine.begin() as conn: | ||
conn.execute(DropSchema(base.SCHEMA, cascade=True, if_exists=True)) | ||
|
||
|
||
type_data_list = [ | ||
(Numeric, [0, 2, 1, 0], NUMERIC), | ||
(Numeric, [0, 1, 1, 0], BOOLEAN), | ||
(String, ["t", "false", "true", "f", "f"], BOOLEAN), | ||
(String, ["t", "false", "2", "0"], VARCHAR), | ||
(String, ["a", "cat", "mat", "bat"], VARCHAR), | ||
(String, ["2", "1", "0", "0"], NUMERIC), | ||
] | ||
|
||
|
||
@pytest.mark.parametrize("type_,value_list,expect_type", type_data_list) | ||
def test_type_inference(engine_email_type, type_, value_list, expect_type): | ||
engine, schema = engine_email_type | ||
TEST_TABLE = "test_table" | ||
TEST_COLUMN = "test_column" | ||
metadata = MetaData(bind=engine) | ||
input_table = Table( | ||
TEST_TABLE, | ||
metadata, | ||
Column(TEST_COLUMN, type_), | ||
schema=schema | ||
) | ||
input_table.create() | ||
for value in value_list: | ||
ins = input_table.insert(values=(value,)) | ||
with engine.begin() as conn: | ||
conn.execute(ins) | ||
inference.infer_column_type( | ||
schema, | ||
TEST_TABLE, | ||
TEST_COLUMN, | ||
engine | ||
) | ||
with engine.begin(): | ||
metadata = MetaData(bind=engine, schema=schema) | ||
actual_type = Table( | ||
TEST_TABLE, metadata, schema=schema, autoload_with=engine, | ||
).columns[TEST_COLUMN].type.__class__ | ||
assert actual_type == expect_type |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
import logging | ||
from sqlalchemy import MetaData, Table | ||
from sqlalchemy import VARCHAR, TEXT, Text | ||
from sqlalchemy.exc import DatabaseError | ||
from db.types import alteration | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
TYPE_INFERENCE_DAG = { | ||
alteration.BOOLEAN: [], | ||
alteration.EMAIL: [], | ||
alteration.INTERVAL: [], | ||
alteration.NUMERIC: [ | ||
alteration.BOOLEAN, | ||
], | ||
alteration.STRING: [ | ||
alteration.BOOLEAN, | ||
alteration.NUMERIC, | ||
alteration.INTERVAL, | ||
alteration.EMAIL, | ||
], | ||
} | ||
|
||
|
||
class DagCycleError(Exception): | ||
pass | ||
|
||
|
||
def infer_column_type( | ||
schema, | ||
table_name, | ||
column_name, | ||
engine, | ||
depth=0, | ||
type_inference_dag=TYPE_INFERENCE_DAG, | ||
): | ||
if depth > 100: | ||
raise DagCycleError("The type_inference_dag likely has a cycle") | ||
supported_types = alteration.get_supported_alter_column_types(engine) | ||
reverse_type_map = { | ||
Text: alteration.STRING, | ||
TEXT: alteration.STRING, | ||
VARCHAR: alteration.STRING, | ||
} | ||
reverse_type_map.update({v: k for k, v in supported_types.items()}) | ||
with engine.begin(): | ||
metadata = MetaData(bind=engine, schema=schema) | ||
column_type = Table( | ||
table_name, metadata, schema=schema, autoload_with=engine, | ||
).columns[column_name].type.__class__ | ||
column_type_str = reverse_type_map.get(column_type) | ||
logger.debug(f"column_type_str: {column_type_str}") | ||
for type_str in type_inference_dag.get(column_type_str, []): | ||
try: | ||
alteration.alter_column_type( | ||
schema, table_name, column_name, type_str, engine, | ||
) | ||
logger.info(f"Column {column_name} altered to type {type_str}") | ||
column_type = infer_column_type( | ||
schema, | ||
table_name, | ||
column_name, | ||
engine, | ||
depth=depth + 1, | ||
type_inference_dag=type_inference_dag, | ||
) | ||
break | ||
# It's expected we catch this error when the test to see whether | ||
# a type is appropriate for a column fails. | ||
except DatabaseError: | ||
logger.info( | ||
f"Cannot alter column {column_name} to type {type_str}" | ||
) | ||
return column_type |