From d8957ea4ea3810b4b4ae590f88e19ad5591cccad Mon Sep 17 00:00:00 2001
From: Brent Moran <brentmoran@gmail.com>
Date: Wed, 19 May 2021 18:05:27 +0800
Subject: [PATCH 1/6] bring over inference code from old branch

The merge with master was too hairy, started a new branch.
---
 db/tests/types/test_inference.py | 73 +++++++++++++++++++++++++++++++
 db/types/alteration.py           | 21 ++++-----
 db/types/inference.py            | 75 ++++++++++++++++++++++++++++++++
 3 files changed, 159 insertions(+), 10 deletions(-)
 create mode 100644 db/tests/types/test_inference.py
 create mode 100644 db/types/inference.py

diff --git a/db/tests/types/test_inference.py b/db/tests/types/test_inference.py
new file mode 100644
index 0000000000..44b0a26dae
--- /dev/null
+++ b/db/tests/types/test_inference.py
@@ -0,0 +1,73 @@
+import pytest
+from sqlalchemy import Column, MetaData, Table
+from sqlalchemy import BOOLEAN, String, NUMERIC, VARCHAR
+from sqlalchemy.schema import CreateSchema, DropSchema
+from db.engine import _add_custom_types_to_engine
+from db.types import base, inference, install
+
+TEST_SCHEMA = "test_schema"
+
+
+@pytest.fixture
+def engine_with_types(engine):
+    _add_custom_types_to_engine(engine)
+    return engine
+
+
+@pytest.fixture
+def temporary_testing_schema(engine_with_types):
+    schema = TEST_SCHEMA
+    with engine_with_types.begin() as conn:
+        conn.execute(CreateSchema(schema))
+    yield engine_with_types, schema
+    with engine_with_types.begin() as conn:
+        conn.execute(DropSchema(schema, cascade=True, if_exists=True))
+
+
+@pytest.fixture
+def engine_email_type(temporary_testing_schema):
+    engine, schema = temporary_testing_schema
+    install.install_mathesar_on_database(engine)
+    yield engine, schema
+    with engine.begin() as conn:
+        conn.execute(DropSchema(base.SCHEMA, cascade=True, if_exists=True))
+
+
+type_data_list = [
+    (String, ["t", "false", "true", "f", "f"], BOOLEAN),
+    (String, ["t", "false", "2", "0"], VARCHAR),
+    (String, ["a", "cat", "mat", "bat"], VARCHAR),
+    (String, ["2", "1", "0", "0"], NUMERIC),
+]
+
+
+@pytest.mark.parametrize("type_,value_list,expect_type", type_data_list)
+def test_type_inference(engine_email_type, type_, value_list, expect_type):
+    engine, schema = engine_email_type
+    TEST_TABLE = "test_table"
+    TEST_COLUMN = "test_column"
+    metadata = MetaData(bind=engine)
+    print(value_list)
+    input_table = Table(
+        TEST_TABLE,
+        metadata,
+        Column(TEST_COLUMN, type_),
+        schema=schema
+    )
+    input_table.create()
+    for value in value_list:
+        ins = input_table.insert(values=(value,))
+        with engine.begin() as conn:
+            conn.execute(ins)
+    inference.infer_column_type(
+        schema,
+        TEST_TABLE,
+        TEST_COLUMN,
+        engine
+    )
+    with engine.begin():
+        metadata = MetaData(bind=engine, schema=schema)
+        actual_type = Table(
+            TEST_TABLE, metadata, schema=schema, autoload_with=engine,
+        ).columns[TEST_COLUMN].type.__class__
+    assert actual_type == expect_type
diff --git a/db/types/alteration.py b/db/types/alteration.py
index 2cb115f6ae..ef0dea79f2 100644
--- a/db/types/alteration.py
+++ b/db/types/alteration.py
@@ -2,9 +2,10 @@
 from db.types import base, email
 
 BOOLEAN = "boolean"
-EMAIL = email.QUALIFIED_EMAIL
+EMAIL = "email"
 INTERVAL = "interval"
 NUMERIC = "numeric"
+STRING = "string"
 TEXT = "text"
 VARCHAR = "varchar"
 
@@ -13,12 +14,12 @@ def get_supported_alter_column_types(engine):
     dialect_types = engine.dialect.ischema_names
     type_map = {
         # Default Postgres types
-        "boolean": dialect_types.get("boolean"),
-        "interval": dialect_types.get("interval"),
-        "numeric": dialect_types.get("numeric"),
-        "string": dialect_types.get("name"),
+        BOOLEAN: dialect_types.get("boolean"),
+        INTERVAL: dialect_types.get("interval"),
+        NUMERIC: dialect_types.get("numeric"),
+        STRING: dialect_types.get("name"),
         # Custom Mathesar types
-        "email": dialect_types.get(email.QUALIFIED_EMAIL)
+        EMAIL: dialect_types.get(email.QUALIFIED_EMAIL)
     }
     return {k: v for k, v in type_map.items() if v is not None}
 
@@ -141,18 +142,18 @@ def create_email_casts(engine):
                      DOMAIN).
     """
     type_body_map = {
-        EMAIL: """
+        email.QUALIFIED_EMAIL: """
         BEGIN
           RETURN $1;
         END;
         """,
         TEXT: f"""
         BEGIN
-          RETURN $1::{EMAIL};
+          RETURN $1::{email.QUALIFIED_EMAIL};
         END;
         """,
     }
-    create_cast_functions(EMAIL, type_body_map, engine)
+    create_cast_functions(email.QUALIFIED_EMAIL, type_body_map, engine)
 
 
 def create_interval_casts(engine):
@@ -209,7 +210,7 @@ def create_varchar_casts(engine):
           RETURN $1::{VARCHAR};
         END;
         """,
-        EMAIL: f"""
+        email.QUALIFIED_EMAIL: f"""
         BEGIN
           RETURN $1::{VARCHAR};
         END;
diff --git a/db/types/inference.py b/db/types/inference.py
new file mode 100644
index 0000000000..3f2498c00c
--- /dev/null
+++ b/db/types/inference.py
@@ -0,0 +1,75 @@
+import logging
+from sqlalchemy import MetaData, Table
+from sqlalchemy import VARCHAR, TEXT, Text
+from db.types import alteration
+
+logging.basicConfig(
+    format='%(asctime)s - %(name)s - %(levelname)s:  %(message)s',
+    level=logging.INFO
+)
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+
+TYPE_INFERENCE_DAG = {
+    alteration.BOOLEAN: [],
+    alteration.EMAIL: [],
+    alteration.INTERVAL: [],
+    alteration.NUMERIC: [],
+    alteration.STRING: [
+        alteration.BOOLEAN,
+        alteration.NUMERIC,
+        alteration.INTERVAL,
+        alteration.EMAIL,
+    ],
+}
+
+
+class DagCycleError(Exception):
+    pass
+
+
+def infer_column_type(
+        schema,
+        table_name,
+        column_name,
+        engine,
+        depth=0,
+        type_inference_dag=TYPE_INFERENCE_DAG,
+):
+    if depth > 100:
+        raise DagCycleError("The type_inference_dag likely has a cycle")
+    supported_types = alteration.get_supported_alter_column_types(engine)
+    reverse_type_map = {
+        Text: alteration.STRING,
+        TEXT: alteration.STRING,
+        VARCHAR: alteration.STRING,
+    }
+    reverse_type_map.update({v: k for k, v in supported_types.items()})
+    with engine.begin():
+        metadata = MetaData(bind=engine, schema=schema)
+        column_type = Table(
+            table_name, metadata, schema=schema, autoload_with=engine,
+        ).columns[column_name].type.__class__
+    column_type_str = reverse_type_map.get(column_type)
+    logger.debug(f"column_type_str: {column_type_str}")
+    for type_str in type_inference_dag.get(column_type_str, []):
+        try:
+            alteration.alter_column_type(
+                schema, table_name, column_name, type_str, engine,
+            )
+            logger.info(f"Column {column_name} altered to type {type_str}")
+            column_type = infer_column_type(
+                schema,
+                table_name,
+                column_name,
+                engine,
+                depth=depth + 1,
+                type_inference_dag=type_inference_dag,
+            )
+            break
+        except Exception:
+            logger.info(
+                f"Cannot alter column {column_name} to type {type_str}"
+            )
+    return column_type

From 1bcdeb822b7a5034b1edd9ee0cc0bca23bed794f Mon Sep 17 00:00:00 2001
From: Brent Moran <brentmoran@gmail.com>
Date: Wed, 19 May 2021 18:26:40 +0800
Subject: [PATCH 2/6] allow '0' and '1' strings to cast to boolean

---
 db/tests/types/test_alteration.py | 2 --
 db/types/alteration.py            | 4 ++--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/db/tests/types/test_alteration.py b/db/tests/types/test_alteration.py
index 67f820c743..74a2ca040e 100644
--- a/db/tests/types/test_alteration.py
+++ b/db/tests/types/test_alteration.py
@@ -153,8 +153,6 @@ def test_alter_column_type_casts_column_data(
 
 
 type_test_bad_data_list = [
-    (String, "boolean", "0"),
-    (String, "boolean", "1"),
     (String, "boolean", "cat"),
     (String, "interval", "1 potato"),
     (String, "interval", "3"),
diff --git a/db/types/alteration.py b/db/types/alteration.py
index ef0dea79f2..385c464aed 100644
--- a/db/types/alteration.py
+++ b/db/types/alteration.py
@@ -80,8 +80,8 @@ def create_boolean_casts(engine):
         DECLARE
         istrue {BOOLEAN};
         BEGIN
-          SELECT lower($1)='t' OR lower($1)='true' INTO istrue;
-          IF istrue OR lower($1)='f' OR lower($1)='false' THEN
+          SELECT lower($1)='t' OR lower($1)='true' OR $1='1' INTO istrue;
+          IF istrue OR lower($1)='f' OR lower($1)='false' OR $1='0' THEN
             RETURN istrue;
           END IF;
           {not_bool_exception_str}

From a9cc190b9ffefd55411092ba276f386c1b4349ff Mon Sep 17 00:00:00 2001
From: Brent Moran <brentmoran@gmail.com>
Date: Wed, 19 May 2021 18:27:29 +0800
Subject: [PATCH 3/6] let numerics be inferred to be boolean

---
 db/types/inference.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/db/types/inference.py b/db/types/inference.py
index 3f2498c00c..36f7e61c12 100644
--- a/db/types/inference.py
+++ b/db/types/inference.py
@@ -15,7 +15,9 @@
     alteration.BOOLEAN: [],
     alteration.EMAIL: [],
     alteration.INTERVAL: [],
-    alteration.NUMERIC: [],
+    alteration.NUMERIC: [
+        alteration.BOOLEAN,
+    ],
     alteration.STRING: [
         alteration.BOOLEAN,
         alteration.NUMERIC,

From 82a9db8a034dbce7523c148d4fd99ca830e8acd3 Mon Sep 17 00:00:00 2001
From: Brent Moran <brentmoran@gmail.com>
Date: Wed, 19 May 2021 21:48:54 +0800
Subject: [PATCH 4/6] add tests to make sure non-0,1 numerics stay numeric

---
 db/tests/types/test_inference.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/db/tests/types/test_inference.py b/db/tests/types/test_inference.py
index 44b0a26dae..f21cf83a9d 100644
--- a/db/tests/types/test_inference.py
+++ b/db/tests/types/test_inference.py
@@ -1,6 +1,6 @@
 import pytest
 from sqlalchemy import Column, MetaData, Table
-from sqlalchemy import BOOLEAN, String, NUMERIC, VARCHAR
+from sqlalchemy import BOOLEAN, Numeric, NUMERIC, String, VARCHAR
 from sqlalchemy.schema import CreateSchema, DropSchema
 from db.engine import _add_custom_types_to_engine
 from db.types import base, inference, install
@@ -34,6 +34,8 @@ def engine_email_type(temporary_testing_schema):
 
 
 type_data_list = [
+    (Numeric, [0, 2, 1, 0], NUMERIC),
+    (Numeric, [0, 1, 1, 0], BOOLEAN),
     (String, ["t", "false", "true", "f", "f"], BOOLEAN),
     (String, ["t", "false", "2", "0"], VARCHAR),
     (String, ["a", "cat", "mat", "bat"], VARCHAR),

From 4f1a3ad68d896e84ca8fcd8650194e8a4227bea0 Mon Sep 17 00:00:00 2001
From: Brent Moran <brentmoran@gmail.com>
Date: Thu, 20 May 2021 17:24:16 +0800
Subject: [PATCH 5/6] clean up logging setup, remove debug print

---
 db/tests/types/test_inference.py | 1 -
 db/types/inference.py            | 6 ------
 2 files changed, 7 deletions(-)

diff --git a/db/tests/types/test_inference.py b/db/tests/types/test_inference.py
index f21cf83a9d..7e75ad6270 100644
--- a/db/tests/types/test_inference.py
+++ b/db/tests/types/test_inference.py
@@ -49,7 +49,6 @@ def test_type_inference(engine_email_type, type_, value_list, expect_type):
     TEST_TABLE = "test_table"
     TEST_COLUMN = "test_column"
     metadata = MetaData(bind=engine)
-    print(value_list)
     input_table = Table(
         TEST_TABLE,
         metadata,
diff --git a/db/types/inference.py b/db/types/inference.py
index 36f7e61c12..f9c4296fdd 100644
--- a/db/types/inference.py
+++ b/db/types/inference.py
@@ -3,13 +3,7 @@
 from sqlalchemy import VARCHAR, TEXT, Text
 from db.types import alteration
 
-logging.basicConfig(
-    format='%(asctime)s - %(name)s - %(levelname)s:  %(message)s',
-    level=logging.INFO
-)
-
 logger = logging.getLogger(__name__)
-logger.setLevel(logging.DEBUG)
 
 TYPE_INFERENCE_DAG = {
     alteration.BOOLEAN: [],

From 851009cfe72935adbc2f086fded0daba45d8c096 Mon Sep 17 00:00:00 2001
From: Brent Moran <brentmoran@gmail.com>
Date: Thu, 20 May 2021 17:57:08 +0800
Subject: [PATCH 6/6] use more specific error for control flow

---
 db/types/inference.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/db/types/inference.py b/db/types/inference.py
index f9c4296fdd..e7965952d4 100644
--- a/db/types/inference.py
+++ b/db/types/inference.py
@@ -1,6 +1,7 @@
 import logging
 from sqlalchemy import MetaData, Table
 from sqlalchemy import VARCHAR, TEXT, Text
+from sqlalchemy.exc import DatabaseError
 from db.types import alteration
 
 logger = logging.getLogger(__name__)
@@ -64,7 +65,9 @@ def infer_column_type(
                 type_inference_dag=type_inference_dag,
             )
             break
-        except Exception:
+        # It's expected we catch this error when the test to see whether
+        # a type is appropriate for a column fails.
+        except DatabaseError:
             logger.info(
                 f"Cannot alter column {column_name} to type {type_str}"
             )