Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue #30: Improve type handling #36

Merged
merged 5 commits into from
Jan 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,15 @@ RUN wget -q https://github.com/nalgeon/sqlean/releases/download/0.15.2/text.so

WORKDIR /opt/dbt-sqlite/src

COPY . .
COPY setup.py .
COPY dbt ./dbt

RUN pip install .

COPY run_tests.sh .
COPY pytest.ini .
COPY tests ./tests

ENV TESTDATA=/opt/dbt-sqlite/testdata

RUN mkdir $TESTDATA
Expand Down
16 changes: 9 additions & 7 deletions dbt/adapters/sqlite/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ class SQLiteAdapter(SQLAdapter):
def date_function(cls):
return 'date()'

# sqlite reports the exact string (including case) used when declaring a column of a certain type
# sqlite reports the exact string (including case) used when declaring a column of a certain type.
# the types here should correspond to affinities recognized by SQLite.
# see https://www.sqlite.org/datatype3.html

@classmethod
def convert_text_type(cls, agate_table: agate.Table, col_idx: int) -> str:
Expand All @@ -33,23 +35,23 @@ def convert_text_type(cls, agate_table: agate.Table, col_idx: int) -> str:
@classmethod
def convert_number_type(cls, agate_table: agate.Table, col_idx: int) -> str:
decimals = agate_table.aggregate(agate.MaxPrecision(col_idx)) # type: ignore[attr-defined]
return "NUMERIC" if decimals else "INT"
return "REAL" if decimals else "INT"

@classmethod
def convert_boolean_type(cls, agate_table: agate.Table, col_idx: int) -> str:
return "BOOLEAN"
return "INT"

@classmethod
def convert_datetime_type(cls, agate_table: agate.Table, col_idx: int) -> str:
return "TIMESTAMP WITHOUT TIMEZONE"
return "TEXT"

@classmethod
def convert_date_type(cls, agate_table: agate.Table, col_idx: int) -> str:
return "DATE"
return "TEXT"

@classmethod
def convert_time_type(cls, agate_table: agate.Table, col_idx: int) -> str:
return "TIME"
return "TEXT"

def get_live_relation_type(self, relation):
"""
Expand Down Expand Up @@ -102,7 +104,7 @@ def get_columns_in_relation(self, relation):
for row in results:
new_row = [
row[1],
row[2] or 'TEXT',
row[2] or 'UNKNOWN',
None,
None,
None
Expand Down
4 changes: 2 additions & 2 deletions run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@ git clone --depth 1 https://github.com/dbt-labs/jaffle_shop.git

cd jaffle_shop

git pull

mkdir -p /tmp/jaffle_shop

mkdir -p $HOME/.dbt
Expand Down Expand Up @@ -57,4 +55,6 @@ jaffle_shop:

EOF

dbt seed
dbt run
dbt docs generate
2 changes: 2 additions & 0 deletions run_tests_docker.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#!/bin/bash

set -e

docker build . -t dbt-sqlite

docker run \
Expand Down
107 changes: 9 additions & 98 deletions tests/functional/adapter/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from dbt.tests.adapter.basic.test_snapshot_check_cols import BaseSnapshotCheckCols
from dbt.tests.adapter.basic.test_snapshot_timestamp import BaseSnapshotTimestamp
from dbt.tests.adapter.basic.test_adapter_methods import BaseAdapterMethod
from dbt.tests.adapter.basic.test_docs_generate import BaseDocsGenerate, BaseDocsGenReferences
from dbt.tests.adapter.basic.test_docs_generate import BaseDocsGenerate, BaseDocsGenReferences, models__schema_yml, models__readme_md


class TestSimpleMaterializationsSqlite(BaseSimpleMaterializations):
Expand Down Expand Up @@ -58,101 +58,11 @@ class TestBaseAdapterMethodSqlite(BaseAdapterMethod):
pass


@pytest.mark.skip("TODO: fix data type problems")
class TestDocsGenerateSqlite(BaseDocsGenerate):
"""
Change underlying test to avoid having views referencing views in other schemas, which is a no-no in sqlite.
"""

models__schema_yml = """
version: 2

models:
- name: model
description: "The test model"
docs:
show: false
columns:
- name: id
description: The user ID number
tests:
- unique
- not_null
- name: first_name
description: The user's first name
- name: email
description: The user's email
- name: ip_address
description: The user's IP address
- name: updated_at
description: The last time this user's email was updated
tests:
- test.nothing

- name: second_model
description: "The second test model"
docs:
show: false
columns:
- name: id
description: The user ID number
- name: first_name
description: The user's first name
- name: email
description: The user's email
- name: ip_address
description: The user's IP address
- name: updated_at
description: The last time this user's email was updated


sources:
- name: my_source
description: "My source"
loader: a_loader
schema: "{{ var('test_schema') }}"
tables:
- name: my_table
description: "My table"
identifier: seed
quoting:
identifier: True
columns:
- name: id
description: "An ID field"


exposures:
- name: simple_exposure
type: dashboard
depends_on:
- ref('model')
- source('my_source', 'my_table')
owner:
email: [email protected]
- name: notebook_exposure
type: notebook
depends_on:
- ref('model')
- ref('second_model')
owner:
email: [email protected]
name: Some name
description: >
A description of the complex exposure
maturity: medium
meta:
tool: 'my_tool'
languages:
- python
tags: ['my_department']
url: http://example.com/notebook/1
"""

models__readme_md = """
This is a readme.md file with {{ invalid-ish jinja }} in it
"""

models__model_sql = """
{{
config(
Expand All @@ -178,9 +88,9 @@ class TestDocsGenerateSqlite(BaseDocsGenerate):
def models(self):
# replace models with
return {
"schema.yml": self.models__schema_yml,
"schema.yml": models__schema_yml,
"second_model.sql": self.models__second_model_sql,
"readme.md": self.models__readme_md,
"readme.md": models__readme_md,
"model.sql": self.models__model_sql,
}

Expand All @@ -191,7 +101,7 @@ def expected_catalog(self, project):
role=None,
id_type="INT",
text_type="TEXT",
time_type="DATETIME",
time_type="TEXT",
view_type="view",
table_type="table",
model_stats=no_stats(),
Expand All @@ -205,20 +115,21 @@ def expected_catalog(self, project):
return expected_catalog


@pytest.mark.skip("TODO: fix data type problems")
@pytest.mark.skip("TODO: not sure why 'index' values are off by 1")
class TestDocsGenReferencesSqlite(BaseDocsGenReferences):

@pytest.fixture(scope="class")
def expected_catalog(self, project):
return expected_references_catalog(
project,
role=None,
id_type="INT",
text_type="TEXT",
time_type="DATETIME",
time_type="TEXT",
bigint_type="bigint",
view_type="view",
table_type="table",
model_stats=no_stats(),
seed_stats=no_stats(),
view_summary_stats=no_stats(),
#seed_stats=no_stats(),
#view_summary_stats=no_stats(),
)
50 changes: 35 additions & 15 deletions tests/functional/adapter/utils/test_data_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,24 @@
from dbt.tests.adapter.utils.data_types.test_type_string import BaseTypeString
from dbt.tests.adapter.utils.data_types.test_type_timestamp import BaseTypeTimestamp

# sqlite's table_info() pragma returns an empty type for columns in views
# so we tweak the models in these tests to materialize as tables
# These tests compare the resulting column types of CASTs against the types
# inferred by agate when loading seeds.
#
# There's a Column class in dbt-core that's used by the default adapter implementation
# of methods like [adapter].type_timestamp() to get a type for CASTs.
#
# Some quirks of SQLite that make these tests challenging:
#
# - a CAST seems to always result in an empty type (i.e. no type affinity) in views,
# but not in a CREATE TABLE AS. So we tweak the tests to materialize models as tables.
#
# - CASTs to an unrecognized type will result in the type being 'NUM' which is a bit
# mysterious.

class TestTypeBigInt(BaseTypeBigInt):
pass


@pytest.mark.skip("TODO: fix this")
class TestTypeFloat(BaseTypeFloat):

models__actual_sql = """
Expand All @@ -40,29 +50,39 @@ def models(self):
return {"actual.sql": self.interpolate_macro_namespace(self.models__actual_sql, "type_int")}


@pytest.mark.skip("TODO: fix this")
class TestTypeNumeric(BaseTypeNumeric):

models__actual_sql = """
{{ config(materialized='table') }}

select cast('1.2345' as {{ type_numeric() }}) as numeric_col
"""

@pytest.fixture(scope="class")
def models(self):
return {"actual.sql": self.interpolate_macro_namespace(self.models__actual_sql, "type_numeric")}

def numeric_fixture_type(self):
return "numeric"
return "NUM"


class TestTypeString(BaseTypeString):
pass


@pytest.mark.skip("TODO: fix this")
class TestTypeTimestamp(BaseTypeTimestamp):

models__actual_sql = """
{{ config(materialized='table') }}

select cast('2021-01-01 01:01:01' as {{ type_timestamp() }}) as timestamp_col
select cast('Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.'
as {{ type_string() }}) as string_col
"""

@pytest.fixture(scope="class")
def models(self):
return {
"actual.sql": self.interpolate_macro_namespace(self.models__actual_sql, "type_timestamp")
}
return {"actual.sql": self.interpolate_macro_namespace(self.models__actual_sql, "type_string")}



# casting to TIMESTAMP results in an 'NUM' type which truncates the original value
# to only the year portion. It's up to the user to properly deal with timestamps
# values from source tables.
@pytest.mark.skip("timestamp not supported in SQLite")
class TestTypeTimestamp(BaseTypeTimestamp):
pass