From a21b285378de1ebc3c9df524d4e8d3134b097eaa Mon Sep 17 00:00:00 2001 From: Peter Allen Webb Date: Thu, 27 Apr 2023 12:45:38 -0400 Subject: [PATCH 1/8] CT-2223: Add CONSTRAINT_SUPPORT mapping to adapter implementation --- .../unreleased/Features-20230427-123135.yaml | 6 ++++++ dbt/adapters/spark/impl.py | 17 ++++++++++++++--- 2 files changed, 20 insertions(+), 3 deletions(-) create mode 100644 .changes/unreleased/Features-20230427-123135.yaml diff --git a/.changes/unreleased/Features-20230427-123135.yaml b/.changes/unreleased/Features-20230427-123135.yaml new file mode 100644 index 000000000..2fd977291 --- /dev/null +++ b/.changes/unreleased/Features-20230427-123135.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Implement full support for model and column constraints. +time: 2023-04-27T12:31:35.011284-04:00 +custom: + Author: peterallenwebb + Issue: 656 657 diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 37de188c5..d29b56178 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -2,17 +2,16 @@ from concurrent.futures import Future from dataclasses import dataclass from typing import Any, Dict, Iterable, List, Optional, Union, Type, Tuple, Callable + from typing_extensions import TypeAlias import agate -from dbt.contracts.relation import RelationType import dbt import dbt.exceptions from dbt.adapters.base import AdapterConfig, PythonJobHelper -from dbt.adapters.base.impl import catch_as_completed -from dbt.contracts.connection import AdapterResponse +from dbt.adapters.base.impl import catch_as_completed, ConstraintSupport from dbt.adapters.sql import SQLAdapter from dbt.adapters.spark import SparkConnectionManager from dbt.adapters.spark import SparkRelation @@ -23,6 +22,9 @@ ) from dbt.adapters.base import BaseRelation from dbt.clients.agate_helper import DEFAULT_TYPE_TESTER +from dbt.contracts.connection import AdapterResponse +from dbt.contracts.graph.nodes import ConstraintType +from dbt.contracts.relation import RelationType from dbt.events import AdapterLogger from dbt.flags import get_flags from dbt.utils import executor, AttrDict @@ -82,6 +84,7 @@ class SparkAdapter(SQLAdapter): INFORMATION_COLUMNS_REGEX = re.compile(r"^ \|-- (.*): (.*) \(nullable = (.*)\b", re.MULTILINE) INFORMATION_OWNER_REGEX = re.compile(r"^Owner: (.*)$", re.MULTILINE) INFORMATION_STATISTICS_REGEX = re.compile(r"^Statistics: (.*)$", re.MULTILINE) + HUDI_METADATA_COLUMNS = [ "_hoodie_commit_time", "_hoodie_commit_seqno", @@ -90,6 +93,14 @@ class SparkAdapter(SQLAdapter): "_hoodie_file_name", ] + CONSTRAINT_SUPPORT = { + ConstraintType.check: ConstraintSupport.NOT_ENFORCED, + ConstraintType.not_null: ConstraintSupport.NOT_ENFORCED, + ConstraintType.unique: ConstraintSupport.NOT_ENFORCED, + ConstraintType.primary_key: ConstraintSupport.NOT_ENFORCED, + ConstraintType.foreign_key: ConstraintSupport.NOT_ENFORCED, + } + Relation: TypeAlias = SparkRelation RelationInfo = Tuple[str, str, str] Column: TypeAlias = SparkColumn From a34538a44e7d0fe837a1d40fc2193d7a7f64502a Mon Sep 17 00:00:00 2001 From: Peter Allen Webb Date: Thu, 4 May 2023 17:24:00 -0400 Subject: [PATCH 2/8] CT-2223: Model constraints implemented for spark --- dbt/include/spark/macros/adapters.sql | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql index 0d397e6d6..1ef5d79eb 100644 --- a/dbt/include/spark/macros/adapters.sql +++ b/dbt/include/spark/macros/adapters.sql @@ -183,7 +183,7 @@ {% macro spark__persist_constraints(relation, model) %} {%- set contract_config = config.get('contract') -%} {% if contract_config.enforced and config.get('file_format', 'delta') == 'delta' %} - {% do alter_table_add_constraints(relation, model.columns) %} + {% do alter_table_add_constraints(relation, model.constraints) %} {% do alter_column_set_constraints(relation, model.columns) %} {% endif %} {% endmacro %} @@ -192,18 +192,14 @@ {{ return(adapter.dispatch('alter_table_add_constraints', 'dbt')(relation, constraints)) }} {% endmacro %} -{% macro spark__alter_table_add_constraints(relation, column_dict) %} - - {% for column_name in column_dict %} - {% set constraints = column_dict[column_name]['constraints'] %} - {% for constraint in constraints %} - {% if constraint.type == 'check' and not is_incremental() %} - {%- set constraint_hash = local_md5(column_name ~ ";" ~ constraint.expression ~ ";" ~ loop.index) -%} - {% call statement() %} - alter table {{ relation }} add constraint {{ constraint_hash }} check {{ constraint.expression }}; - {% endcall %} - {% endif %} - {% endfor %} +{% macro spark__alter_table_add_constraints(relation, constraints) %} + {% for constraint in constraints %} + {% if constraint.type == 'check' and not is_incremental() %} + {%- set constraint_hash = local_md5(column_name ~ ";" ~ constraint.expression ~ ";" ~ loop.index) -%} + {% call statement() %} + alter table {{ relation }} add constraint {{ constraint.name if constraint.name else constraint_hash }} check {{ constraint.expression }}; + {% endcall %} + {% endif %} {% endfor %} {% endmacro %} From 940842cfbc84cff8c05fec4da8d473be3d9b651b Mon Sep 17 00:00:00 2001 From: Peter Allen Webb Date: Tue, 23 May 2023 15:28:22 -0400 Subject: [PATCH 3/8] CT-2223: Add unit test --- tests/functional/adapter/test_constraints.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/functional/adapter/test_constraints.py b/tests/functional/adapter/test_constraints.py index 1287dfd37..bcaeade2f 100644 --- a/tests/functional/adapter/test_constraints.py +++ b/tests/functional/adapter/test_constraints.py @@ -1,5 +1,6 @@ import pytest from dbt.tests.adapter.constraints.test_constraints import ( + BaseModelConstraintsRuntimeEnforcement, BaseTableConstraintsColumnsEqual, BaseViewConstraintsColumnsEqual, BaseIncrementalConstraintsColumnsEqual, @@ -9,6 +10,7 @@ BaseIncrementalConstraintsRollback, ) from dbt.tests.adapter.constraints.fixtures import ( + constrained_model_schema_yml, my_model_sql, my_model_wrong_order_sql, my_model_wrong_name_sql, @@ -40,6 +42,7 @@ # Different on Spark: # - does not support a data type named 'text' (TODO handle this in the base test classes using string_type constraints_yml = model_schema_yml.replace("text", "string").replace("primary key", "") +model_constraints_yml = constrained_model_schema_yml.replace("text", "string") class PyodbcSetup: @@ -245,6 +248,7 @@ def expected_error_messages(self): "violate the new NOT NULL constraint", "(id > 0) violated by row with values:", # incremental mats "DELTA_VIOLATE_CONSTRAINT_WITH_VALUES", # incremental mats + "NOT NULL constraint violated for column", ] def assert_expected_error_messages(self, error_message, expected_error_messages): @@ -285,3 +289,19 @@ def models(self): "my_model.sql": my_incremental_model_sql, "constraints_schema.yml": constraints_yml, } + + +# TODO: Like the tests above, this does test that model-level constraints don't +# result in errors, but it does not verify that they are actually present in +# Spark and that the ALTER TABLE statement actually ran. +class TestSparkModelConstraintsRuntimeEnforcement(BaseModelConstraintsRuntimeEnforcement): + @pytest.fixture(scope="class") + def models(self): + return { + "my_model.sql": my_incremental_model_sql, + "constraints_schema.yml": model_constraints_yml, + } + + @pytest.fixture(scope="class") + def expected_sql(self): + return _expected_sql_spark From 4f081ebeca35530294825f3b0233eeec191baf7b Mon Sep 17 00:00:00 2001 From: Peter Allen Webb Date: Tue, 23 May 2023 15:55:37 -0400 Subject: [PATCH 4/8] CT-2223: Exclude test not applicable to spark --- tests/functional/adapter/test_constraints.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/functional/adapter/test_constraints.py b/tests/functional/adapter/test_constraints.py index bcaeade2f..3e49eade3 100644 --- a/tests/functional/adapter/test_constraints.py +++ b/tests/functional/adapter/test_constraints.py @@ -294,6 +294,7 @@ def models(self): # TODO: Like the tests above, this does test that model-level constraints don't # result in errors, but it does not verify that they are actually present in # Spark and that the ALTER TABLE statement actually ran. +@pytest.mark.skip_profile("spark_session", "apache_spark") class TestSparkModelConstraintsRuntimeEnforcement(BaseModelConstraintsRuntimeEnforcement): @pytest.fixture(scope="class") def models(self): From 98da4dcce61666c3814a574cfe8ecc9654ac709a Mon Sep 17 00:00:00 2001 From: Peter Allen Webb Date: Tue, 23 May 2023 21:39:37 -0400 Subject: [PATCH 5/8] CT-2223: Another unit test fix. --- tests/functional/adapter/test_constraints.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/functional/adapter/test_constraints.py b/tests/functional/adapter/test_constraints.py index 55f8a2273..e16bf1834 100644 --- a/tests/functional/adapter/test_constraints.py +++ b/tests/functional/adapter/test_constraints.py @@ -300,6 +300,14 @@ def models(self): # Spark and that the ALTER TABLE statement actually ran. @pytest.mark.skip_profile("spark_session", "apache_spark") class TestSparkModelConstraintsRuntimeEnforcement(BaseModelConstraintsRuntimeEnforcement): + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "models": { + "+file_format": "delta", + } + } + @pytest.fixture(scope="class") def models(self): return { From 7098c22aabf0ef24cc583171b96837702f4d1fe6 Mon Sep 17 00:00:00 2001 From: Peter Allen Webb Date: Tue, 23 May 2023 22:20:31 -0400 Subject: [PATCH 6/8] CT-2223: Final (?) unit test fix --- tests/functional/adapter/test_constraints.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/tests/functional/adapter/test_constraints.py b/tests/functional/adapter/test_constraints.py index e16bf1834..02e147f62 100644 --- a/tests/functional/adapter/test_constraints.py +++ b/tests/functional/adapter/test_constraints.py @@ -39,6 +39,22 @@ '2019-01-01' as date_day ) as model_subq """ +_expected_sql_spark_model_constraints = """ +create or replace table + using delta + as +select + id, + color, + date_day +from + +( select + 1 as id, + 'blue' as color, + '2019-01-01' as date_day ) as model_subq +""" + # Different on Spark: # - does not support a data type named 'text' (TODO handle this in the base test classes using string_type constraints_yml = model_schema_yml.replace("text", "string").replace("primary key", "") @@ -317,4 +333,4 @@ def models(self): @pytest.fixture(scope="class") def expected_sql(self): - return _expected_sql_spark + return _expected_sql_spark_model_constraints From 5019b6d13f2a73acbc831c2bfe2de99ebbbac482 Mon Sep 17 00:00:00 2001 From: Peter Allen Webb Date: Tue, 23 May 2023 23:16:06 -0400 Subject: [PATCH 7/8] CT-2223: Final (??) unit test fix --- tests/functional/adapter/test_constraints.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/functional/adapter/test_constraints.py b/tests/functional/adapter/test_constraints.py index 02e147f62..3bfa888af 100644 --- a/tests/functional/adapter/test_constraints.py +++ b/tests/functional/adapter/test_constraints.py @@ -265,6 +265,7 @@ def expected_error_messages(self): return [ "violate the new CHECK constraint", "DELTA_NEW_CHECK_CONSTRAINT_VIOLATION", + "DELTA_NEW_NOT_NULL_VIOLATION", "violate the new NOT NULL constraint", "(id > 0) violated by row with values:", # incremental mats "DELTA_VIOLATE_CONSTRAINT_WITH_VALUES", # incremental mats From a377c2a549882ce28cc23e07d32514257a643a64 Mon Sep 17 00:00:00 2001 From: Peter Webb Date: Wed, 24 May 2023 11:09:12 -0400 Subject: [PATCH 8/8] Update .changes/unreleased/Features-20230427-123135.yaml Co-authored-by: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> --- .changes/unreleased/Features-20230427-123135.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.changes/unreleased/Features-20230427-123135.yaml b/.changes/unreleased/Features-20230427-123135.yaml index 2fd977291..a1cf88be5 100644 --- a/.changes/unreleased/Features-20230427-123135.yaml +++ b/.changes/unreleased/Features-20230427-123135.yaml @@ -1,5 +1,5 @@ kind: Features -body: Implement full support for model and column constraints. +body: All constraint types are supported, but not enforced. time: 2023-04-27T12:31:35.011284-04:00 custom: Author: peterallenwebb