datakind · dividor · Nov 8, 2022 · Oct 30, 2022 · Oct 30, 2022 · Oct 30, 2022
diff --git a/dot/config/example/self_tests/dbt/dbt_project.yml b/dot/config/example/self_tests/dbt/dbt_project.yml
@@ -1,3 +1,5 @@
+
+
 name: 'dbt_model_1'
 version: '0.0.1'
 
@@ -7,9 +9,9 @@ profile: 'default'
 # These configurations specify where dbt should look for different types of files.
 # The `source-paths` config, for example, states that models in this project can be
 # found in the "models/" directory. You probably won't need to change these!
-model-paths: ["models_self_tests"]  # here the tool sets the output to a project-dependent folder
+model-paths: ["models/ScanProject1"]  # here the tool sets the output to a project-dependent folder
 analysis-paths: ["analysis"]
-test-paths: ["tests_self_tests"]  # here the tool sets the output  to a project-dependent folder
+test-paths: ["tests/ScanProject1"]  # here the tool sets the output  to a project-dependent folder
 seed-paths: ["data"]
 macro-paths: ["macros"]
 snapshot-paths: ["snapshots"]
@@ -26,4 +28,9 @@ config-version: 2
 
 # In this example config, we tell dbt to build all models in the example/ directory
 # as tables. These settings can be overridden in the individual model files
-# using the `{{ config(...) }}` macro.
+models:
+    dbt_model_1:
+        core:
+            +schema: 'tests'
+        test:
+            +schema: 'tests'
diff --git a/dot/self_tests/data/dot_input_files/dbt/core/dot_model__airlines_data.sql b/dot/self_tests/data/dot_input_files/dbt/core/dot_model__airlines_data.sql
@@ -0,0 +1,4 @@
+{{ config(materialized='view') }}
+{% set schema = 'self_tests_public' %}
+select DISTINCT airline
+from {{ schema }}.flight_data   
diff --git a/dot/self_tests/data/dot_input_files/dbt/core/dot_model__all_airports_data.sql b/dot/self_tests/data/dot_input_files/dbt/core/dot_model__all_airports_data.sql
@@ -0,0 +1,4 @@
+{{ config(materialized='view') }}
+{% set schema = 'self_tests_public' %}
+select *
+from {{ schema }}.airport_data   
diff --git a/dot/self_tests/data/dot_input_files/dbt/core/dot_model__all_airports_data.yml b/dot/self_tests/data/dot_input_files/dbt/core/dot_model__all_airports_data.yml
@@ -0,0 +1,8 @@
+version: 2
+models:
+-   name: dot_model__all_airports_data
+    columns:
+    -   name: airport
+        description: Airport not unique
+        tests:
+        - unique
diff --git a/dot/self_tests/data/dot_input_files/dbt/core/dot_model__all_flight_data.sql b/dot/self_tests/data/dot_input_files/dbt/core/dot_model__all_flight_data.sql
@@ -0,0 +1,4 @@
+{{ config(materialized='view') }}
+{% set schema = 'self_tests_public' %}
+select *
+from {{ schema }}.flight_data 
diff --git a/dot/self_tests/data/dot_input_files/dbt/core/dot_model__all_flight_data.yml b/dot/self_tests/data/dot_input_files/dbt/core/dot_model__all_flight_data.yml
@@ -0,0 +1,26 @@
+version: 2
+models:
+-   name: dot_model__all_flight_data
+    columns:
+    -   name: origin_airport
+        description: Flight with no airport record
+        tests:
+        - not_null
+        -   relationships:
+                to: ref('dot_model__all_airports_data')
+                name: flight_with_no_airport
+                field: airport
+    -   name: price
+        description: Price is not negative
+        tests:
+        -   not_negative_string_column:
+                name: price
+    -   name: stops
+        description: Disallowed FP methods entered in form
+        tests:
+        -   accepted_values:
+                values:
+                - '1'
+                - '2'
+                - '3'
+                - Non-stop
diff --git a/dot/self_tests/data/dot_input_files/dbt/core/dot_model__ancview_pregnancy.sql b/dot/self_tests/data/dot_input_files/dbt/core/dot_model__ancview_pregnancy.sql
diff --git a/dot/self_tests/data/dot_input_files/dbt/core/dot_model__ancview_pregnancy.yml b/dot/self_tests/data/dot_input_files/dbt/core/dot_model__ancview_pregnancy.yml
diff --git a/dot/self_tests/data/dot_input_files/dbt/core/dot_model__ethiopia_airlines_data.sql b/dot/self_tests/data/dot_input_files/dbt/core/dot_model__ethiopia_airlines_data.sql
@@ -0,0 +1,4 @@
+{{ config(materialized='view') }}
+{% set schema = 'self_tests_public' %}
+select *
+from {{ schema }}.flight_data WHERE airline='Ethiopian Airlines'    
diff --git a/dot/self_tests/data/dot_input_files/dbt/core/dot_model__fpview_registration.sql b/dot/self_tests/data/dot_input_files/dbt/core/dot_model__fpview_registration.sql
diff --git a/dot/self_tests/data/dot_input_files/dbt/core/dot_model__fpview_registration.yml b/dot/self_tests/data/dot_input_files/dbt/core/dot_model__fpview_registration.yml
diff --git a/dot/self_tests/data/dot_input_files/dbt/core/dot_model__zagreb_flight_data.sql b/dot/self_tests/data/dot_input_files/dbt/core/dot_model__zagreb_flight_data.sql
@@ -0,0 +1,4 @@
+{{ config(materialized='view') }}
+{% set schema = 'self_tests_public' %}
+select *
+from {{ schema }}.flight_data WHERE origin_airport='Zagreb airport'    
diff --git a/dot/self_tests/data/expected/integration/test_results.csv b/dot/self_tests/data/expected/integration/test_results.csv
diff --git a/dot/self_tests/data/expected/integration/test_results_summary.csv b/dot/self_tests/data/expected/integration/test_results_summary.csv
@@ -0,0 +1,41 @@
+,run_id,test_id,entity_id,test_type,column_name,test_parameters,test_status,test_status_message,failed_tests_view,failed_tests_view_sql,rows_total,rows_failed,rows_passed
+0,cf7d4d65-c952-43b4-a87a-0f9c3591eceb,dac4c545-f610-3dae-ad82-1ddf27dae144,ca4513fa-96e0-3a95-a1a8-7f0c127ea82a,accepted_values,stops,"{'values': ['1', '2', '3', 'Non-stop']}",fail,"got 2 results, configured to fail if != 0",tr_dot_model__all_flight_data_accepted_values_stops," WITH all_values AS (
+         SELECT dot_model__all_flight_data.stops AS value_field,
+            count(*) AS n_records
+           FROM self_tests_public_tests.dot_model__all_flight_data
+          GROUP BY dot_model__all_flight_data.stops
+        )
+ SELECT all_values.value_field,
+    all_values.n_records
+   FROM all_values
+  WHERE all_values.value_field::text <> ALL (ARRAY['1'::character varying, '2'::character varying, '3'::character varying, 'Non-stop'::character varying]::text[]);",1001,2,999
+1,cf7d4d65-c952-43b4-a87a-0f9c3591eceb,49aa2fd3-511c-3d84-a782-a5daf57f98da,ca4513fa-96e0-3a95-a1a8-7f0c127ea82a,not_negative_string_column,price,{'name': 'price'},fail,"got 1 result, configured to fail if != 0",tr_dot_model__all_flight_data_price," SELECT array_agg(dot_model__all_flight_data.uuid) AS uuid_list
+   FROM self_tests_public_tests.dot_model__all_flight_data
+  WHERE dot_model__all_flight_data.price::character varying::text ~~ '-%'::text
+ HAVING count(*) > 0;",1001,38,963
+2,cf7d4d65-c952-43b4-a87a-0f9c3591eceb,983a5746-bea7-3072-9a80-2c1c6706ceed,ca4513fa-96e0-3a95-a1a8-7f0c127ea82a,not_null,origin_airport,{},fail,"got 53 results, configured to fail if != 0",tr_dot_model__all_flight_data_not_null_origin_a," SELECT dot_model__all_flight_data.uuid,
+    dot_model__all_flight_data.departure_time,
+    dot_model__all_flight_data.airline,
+    dot_model__all_flight_data.origin_airport,
+    dot_model__all_flight_data.origin_iata,
+    dot_model__all_flight_data.destination_airport,
+    dot_model__all_flight_data.destination_iata,
+    dot_model__all_flight_data.stops,
+    dot_model__all_flight_data.price
+   FROM self_tests_public_tests.dot_model__all_flight_data
+  WHERE dot_model__all_flight_data.origin_airport IS NULL;",1001,53,948
+3,cf7d4d65-c952-43b4-a87a-0f9c3591eceb,7aa26bda-57e1-39b8-a3e9-979a3d882577,ca4513fa-96e0-3a95-a1a8-7f0c127ea82a,relationships,origin_airport,"{'to': ""ref('dot_model__all_airports_data')"", 'name': 'flight_with_no_airport', 'field': 'airport'}",fail,"got 1 result, configured to fail if != 0",tr_dot_model__all_flight_data_flight_with_no_a," SELECT array_agg(from_model.from_uuid) AS uuid_list
+   FROM ( SELECT dot_model__all_flight_data.uuid AS from_uuid,
+            dot_model__all_flight_data.origin_airport AS from_column_id
+           FROM self_tests_public_tests.dot_model__all_flight_data) from_model
+     LEFT JOIN ( SELECT dot_model__all_airports_data.airport AS to_id
+           FROM self_tests_public_tests.dot_model__all_airports_data) to_model ON to_model.to_id::text = from_model.from_column_id::text
+  WHERE from_model.from_column_id IS NOT NULL AND to_model.to_id IS NULL
+ HAVING count(*) > 0;",1001,186,815
+4,cf7d4d65-c952-43b4-a87a-0f9c3591eceb,aa1c361c-a9ba-350e-9959-e92a5654f7dc,7b689796-afde-3930-87be-ed8b7c7a0474,unique,airport,{},fail,"got 2 results, configured to fail if != 0",tr_dot_model__all_airports_data_unique_airport," SELECT dot_model__all_airports_data.airport AS unique_field,
+    count(*) AS n_records
+   FROM self_tests_public_tests.dot_model__all_airports_data
+  WHERE dot_model__all_airports_data.airport IS NOT NULL
+  GROUP BY dot_model__all_airports_data.airport
+ HAVING count(*) > 1;",365,2,363
+5,cf7d4d65-c952-43b4-a87a-0f9c3591eceb,3b30906a-2088-3f34-9fd3-e4d931c04ba4,ca4513fa-96e0-3a95-a1a8-7f0c127ea82a,expect_similar_means_across_reporters,price,"{'key': 'airline', 'quantity': 'price', 'id_column': 'airline', 'data_table': 'dot_model__all_flight_data', 'target_table': 'dot_model__airlines_data'}",fail,,chv_tr_different_dot_model__all_flight_data_price_distribution,,1001,1,1000
diff --git a/dot/self_tests/integration/__init__.py b/dot/self_tests/integration/__init__.py
diff --git a/dot/self_tests/integration/test_run_dot_tests.py b/dot/self_tests/integration/test_run_dot_tests.py
@@ -0,0 +1,77 @@
+""" Integration test: runs DOT for the demo dataset and checks the results """
+import os
+import uuid
+import logging
+import shutil
+import pandas as pd
+from mock import patch
+from ..self_tests_utils.dbt_base_safe_test_class import DbtBaseSelfTestClass
+
+# UT after base_self_test_class imports
+from utils.run_management import run_dot_tests  # pylint: disable=wrong-import-order
+from utils.utils import setup_custom_logger  # pylint: disable=wrong-import-order
+from utils.connection_utils import (
+    get_db_params_from_config,
+)  # pylint: disable=wrong-import-order
+from utils.configuration_utils import (
+    DbParamsConfigFile,
+    DbParamsConnection,
+)  # pylint: disable=wrong-import-order
+
+
+class RunDotTestsTest(DbtBaseSelfTestClass):
+    """Test Class"""
+
+    def setUp(self) -> None:
+        # load the DOT demo dataset
+        self.create_self_tests_db_schema()
+
+        self.cleanup_dbt_output_dir()
+
+    @patch("utils.configuration_utils._get_filename_safely")
+    def test_run_dot_tests(
+        self, mock_get_filename_safely
+    ):  # pylint: disable=no-value-for-parameter
+        """run all dot tests"""
+        mock_get_filename_safely.side_effect = self.mock_get_filename_safely
+
+        logger = setup_custom_logger(
+            "self_tests/output/logs/run_everything.log", logging.INFO
+        )
+
+        run_id = uuid.uuid4()
+
+        run_dot_tests("ScanProject1", logger, run_id)
+
+        # check results
+        schema_dot, _, conn_dot = get_db_params_from_config(
+            DbParamsConfigFile["dot_config.yml"],
+            DbParamsConnection["dot"],
+            "ScanProject1",
+        )
+
+        test_results_summary = pd.read_sql(
+            f"SELECT * FROM {schema_dot}.test_results_summary", conn_dot
+        )
+        expected_test_results_summary = pd.read_csv(
+            "self_tests/data/expected/integration/test_results_summary.csv", index_col=0
+        )
+        pd.testing.assert_frame_equal(
+            test_results_summary.drop(columns=["run_id"]),
+            expected_test_results_summary.drop(columns=["run_id"]),
+        )
+
+        test_results = pd.read_sql(f"SELECT * FROM {schema_dot}.test_results", conn_dot)
+        expected_test_results = pd.read_csv(
+            "self_tests/data/expected/integration/test_results.csv", index_col=0
+        )
+        pd.testing.assert_frame_equal(
+            expected_test_results.drop(
+                columns=["run_id", "test_result_id", "id_column_value"]
+            ),
+            test_results.drop(columns=["run_id", "test_result_id", "id_column_value"]),
+        )
+        self.assertListEqual(
+            sorted(expected_test_results["id_column_value"].to_list()),
+            sorted(test_results["id_column_value"].to_list()),
+        )
diff --git a/dot/self_tests/self_tests_utils/__init__.py b/dot/self_tests/self_tests_utils/__init__.py
diff --git a/dot/self_tests/unit/base_self_test_class.py → .../self_tests_utils/base_self_test_class.py b/dot/self_tests/unit/base_self_test_class.py → .../self_tests_utils/base_self_test_class.py
@@ -60,9 +60,16 @@ def mock_get_filename_safely(path: str) -> str:
             return path
         if path == DBT_PROJECT_FINAL_FILENAME:
             return DBT_PROJECT_FINAL_FILENAME
-            # return "./config/example/project_name/dbt/dbt_project.yml"
         raise FileNotFoundError(f"file path {path} needs to be mocked")
 
+    def setUp(self) -> None:
+        """creates DB schema for the demo dataset by default"""
+        self.create_self_tests_db_schema()
+
+    def tearDown(self) -> None:
+        """drops the DB schema for the demo dataset by default"""
+        self.drop_self_tests_db_schema()
+
     @patch("utils.configuration_utils._get_filename_safely")
     def get_self_tests_db_conn(
         self,

diff --git a/dot/self_tests/self_tests_utils/dbt_base_safe_test_class.py b/dot/self_tests/self_tests_utils/dbt_base_safe_test_class.py
@@ -0,0 +1,69 @@
+"""base self tests class for tests checking the output of the DBT process"""
+import os
+import logging
+import shutil
+
+from mock import patch
+from ..self_tests_utils.base_self_test_class import BaseSelfTestClass
+
+from utils.utils import setup_custom_logger  # pylint: disable=wrong-import-order
+
+from utils.dbt import (  # pylint: disable=wrong-import-order
+    run_dbt_core,
+    archive_previous_dbt_results,
+    create_failed_dbt_test_models,
+    run_dbt_test,
+)
+
+
+class DbtBaseSelfTestClass(BaseSelfTestClass):
+    @staticmethod
+    def cleanup_dbt_output_dir():
+        # for safety: remove any previous dbt target directory and model files
+        if os.path.isdir("dbt/target"):
+            shutil.rmtree("dbt/target")
+        for path in os.listdir("dbt/"):
+            if path.startswith("models") or path.startswith("tests"):
+                shutil.rmtree(f"dbt/{path}")
+
+    @patch("utils.configuration_utils._get_filename_safely")
+    def setUp(
+        self, mock_get_filename_safely
+    ) -> None:  # pylint: disable=no-value-for-parameter
+        super().setUp()
+
+        self.cleanup_dbt_output_dir()
+
+        mock_get_filename_safely.side_effect = self.mock_get_filename_safely
+
+        self.dbt_test_setup()
+
+    def dbt_test_setup(self):
+        """
+        setup for dbt tests
+
+        - dbt_project config file
+        - entities to be tested
+        """
+        shutil.copy(
+            "./config/example/self_tests/dbt/dbt_project.yml", "./dbt/dbt_project.yml"
+        )
+
+        # copy the models
+        # (i.e. in the full DOT pipeline these are generated from the configured_entities)
+        shutil.rmtree("dbt/models", ignore_errors=True)
+        shutil.copytree(
+            "self_tests/data/dot_input_files/dbt", "dbt/models/ScanProject1"
+        )
+
+    @staticmethod
+    def run_dbt_steps():
+        """
+        Runs all the actions for dbt
+        """
+        project_id = "ScanProject1"
+        logger = setup_custom_logger("self_tests/output/test.log", logging.INFO)
+        run_dbt_core(project_id, logger)
+        archive_previous_dbt_results(logger)
+        create_failed_dbt_test_models(project_id, logger, "view")
+        run_dbt_test(project_id, logger)
diff --git a/dot/self_tests/unit/test_configuration_utils.py b/dot/self_tests/unit/test_configuration_utils.py
@@ -1,7 +1,7 @@
 """ Tests of configuration utils module """
 
 from mock import patch
-from .base_self_test_class import BaseSelfTestClass
+from ..self_tests_utils.base_self_test_class import BaseSelfTestClass
 
 # UT after base_self_test_class imports
 from utils.configuration_utils import (  # pylint: disable=wrong-import-order

diff --git a/dot/self_tests/unit/test_connection_utils.py b/dot/self_tests/unit/test_connection_utils.py
@@ -1,4 +1,4 @@
-from .base_self_test_class import BaseSelfTestClass
+from ..self_tests_utils.base_self_test_class import BaseSelfTestClass
 
 # UT after base_self_test_class imports
 from utils.connection_utils import (  # pylint: disable=wrong-import-order

diff --git a/dot/self_tests/unit/test_core_entities_creation.py b/dot/self_tests/unit/test_core_entities_creation.py
@@ -1,7 +1,7 @@
 import logging
 import os
 from mock import patch
-from .base_self_test_class import BaseSelfTestClass
+from ..self_tests_utils.base_self_test_class import BaseSelfTestClass
 
 # UT after base_self_test_class imports
 from utils.dbt import create_core_entities  # pylint: disable=wrong-import-order
@@ -10,12 +10,6 @@
 class CoreEntitiesCreationTest(BaseSelfTestClass):
     """Test Class"""
 
-    def setUp(self) -> None:
-        self.create_self_tests_db_schema()
-
-    def tearDown(self) -> None:
-        self.drop_self_tests_db_schema()
-
     @patch("utils.configuration_utils._get_filename_safely")
     def test_yaml_creation(
         self, mock_get_filename_safely

diff --git a/dot/self_tests/unit/test_dbt.py b/dot/self_tests/unit/test_dbt.py
@@ -5,7 +5,7 @@
 import pandas as pd
 from mock import patch
 
-from .base_self_test_class import BaseSelfTestClass
+from ..self_tests_utils.base_self_test_class import BaseSelfTestClass
 
 # UT after base_self_test_class imports
 from utils.dbt import (  # pylint: disable=wrong-import-order
@@ -22,9 +22,6 @@ def setUp(self) -> None:
         with open("self_tests/data/queries/dbt_core_generated_objects.sql", "r") as f:
             self.create_self_tests_db_schema(f.read())
 
-    def tearDown(self) -> None:
-        self.drop_self_tests_db_schema()
-
     @patch("utils.configuration_utils._get_filename_safely")
     def test_extract_df_from_dbt_test_results_json(
         self, mock_get_filename_safely