Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add integration tests #29

Merged
merged 10 commits into from
Nov 8, 2022
13 changes: 10 additions & 3 deletions dot/config/example/self_tests/dbt/dbt_project.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@


name: 'dbt_model_1'
version: '0.0.1'

Expand All @@ -7,9 +9,9 @@ profile: 'default'
# These configurations specify where dbt should look for different types of files.
# The `source-paths` config, for example, states that models in this project can be
# found in the "models/" directory. You probably won't need to change these!
model-paths: ["models_self_tests"] # here the tool sets the output to a project-dependent folder
model-paths: ["models/ScanProject1"] # here the tool sets the output to a project-dependent folder
analysis-paths: ["analysis"]
test-paths: ["tests_self_tests"] # here the tool sets the output to a project-dependent folder
test-paths: ["tests/ScanProject1"] # here the tool sets the output to a project-dependent folder
seed-paths: ["data"]
macro-paths: ["macros"]
snapshot-paths: ["snapshots"]
Expand All @@ -26,4 +28,9 @@ config-version: 2

# In this example config, we tell dbt to build all models in the example/ directory
# as tables. These settings can be overridden in the individual model files
# using the `{{ config(...) }}` macro.
models:
dbt_model_1:
core:
+schema: 'tests'
test:
+schema: 'tests'
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{{ config(materialized='view') }}
{% set schema = 'self_tests_public' %}
select DISTINCT airline
from {{ schema }}.flight_data
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{{ config(materialized='view') }}
{% set schema = 'self_tests_public' %}
select *
from {{ schema }}.airport_data
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
version: 2
models:
- name: dot_model__all_airports_data
columns:
- name: airport
description: Airport not unique
tests:
- unique
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{{ config(materialized='view') }}
{% set schema = 'self_tests_public' %}
select *
from {{ schema }}.flight_data
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
version: 2
models:
- name: dot_model__all_flight_data
columns:
- name: origin_airport
description: Flight with no airport record
tests:
- not_null
- relationships:
to: ref('dot_model__all_airports_data')
name: flight_with_no_airport
field: airport
- name: price
description: Price is not negative
tests:
- not_negative_string_column:
name: price
- name: stops
description: Disallowed FP methods entered in form
tests:
- accepted_values:
values:
- '1'
- '2'
- '3'
- Non-stop

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{{ config(materialized='view') }}
{% set schema = 'self_tests_public' %}
select *
from {{ schema }}.flight_data WHERE airline='Ethiopian Airlines'

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{{ config(materialized='view') }}
{% set schema = 'self_tests_public' %}
select *
from {{ schema }}.flight_data WHERE origin_airport='Zagreb airport'
312 changes: 312 additions & 0 deletions dot/self_tests/data/expected/integration/test_results.csv

Large diffs are not rendered by default.

41 changes: 41 additions & 0 deletions dot/self_tests/data/expected/integration/test_results_summary.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
,run_id,test_id,entity_id,test_type,column_name,test_parameters,test_status,test_status_message,failed_tests_view,failed_tests_view_sql,rows_total,rows_failed,rows_passed
0,cf7d4d65-c952-43b4-a87a-0f9c3591eceb,dac4c545-f610-3dae-ad82-1ddf27dae144,ca4513fa-96e0-3a95-a1a8-7f0c127ea82a,accepted_values,stops,"{'values': ['1', '2', '3', 'Non-stop']}",fail,"got 2 results, configured to fail if != 0",tr_dot_model__all_flight_data_accepted_values_stops," WITH all_values AS (
SELECT dot_model__all_flight_data.stops AS value_field,
count(*) AS n_records
FROM self_tests_public_tests.dot_model__all_flight_data
GROUP BY dot_model__all_flight_data.stops
)
SELECT all_values.value_field,
all_values.n_records
FROM all_values
WHERE all_values.value_field::text <> ALL (ARRAY['1'::character varying, '2'::character varying, '3'::character varying, 'Non-stop'::character varying]::text[]);",1001,2,999
1,cf7d4d65-c952-43b4-a87a-0f9c3591eceb,49aa2fd3-511c-3d84-a782-a5daf57f98da,ca4513fa-96e0-3a95-a1a8-7f0c127ea82a,not_negative_string_column,price,{'name': 'price'},fail,"got 1 result, configured to fail if != 0",tr_dot_model__all_flight_data_price," SELECT array_agg(dot_model__all_flight_data.uuid) AS uuid_list
FROM self_tests_public_tests.dot_model__all_flight_data
WHERE dot_model__all_flight_data.price::character varying::text ~~ '-%'::text
HAVING count(*) > 0;",1001,38,963
2,cf7d4d65-c952-43b4-a87a-0f9c3591eceb,983a5746-bea7-3072-9a80-2c1c6706ceed,ca4513fa-96e0-3a95-a1a8-7f0c127ea82a,not_null,origin_airport,{},fail,"got 53 results, configured to fail if != 0",tr_dot_model__all_flight_data_not_null_origin_a," SELECT dot_model__all_flight_data.uuid,
dot_model__all_flight_data.departure_time,
dot_model__all_flight_data.airline,
dot_model__all_flight_data.origin_airport,
dot_model__all_flight_data.origin_iata,
dot_model__all_flight_data.destination_airport,
dot_model__all_flight_data.destination_iata,
dot_model__all_flight_data.stops,
dot_model__all_flight_data.price
FROM self_tests_public_tests.dot_model__all_flight_data
WHERE dot_model__all_flight_data.origin_airport IS NULL;",1001,53,948
3,cf7d4d65-c952-43b4-a87a-0f9c3591eceb,7aa26bda-57e1-39b8-a3e9-979a3d882577,ca4513fa-96e0-3a95-a1a8-7f0c127ea82a,relationships,origin_airport,"{'to': ""ref('dot_model__all_airports_data')"", 'name': 'flight_with_no_airport', 'field': 'airport'}",fail,"got 1 result, configured to fail if != 0",tr_dot_model__all_flight_data_flight_with_no_a," SELECT array_agg(from_model.from_uuid) AS uuid_list
FROM ( SELECT dot_model__all_flight_data.uuid AS from_uuid,
dot_model__all_flight_data.origin_airport AS from_column_id
FROM self_tests_public_tests.dot_model__all_flight_data) from_model
LEFT JOIN ( SELECT dot_model__all_airports_data.airport AS to_id
FROM self_tests_public_tests.dot_model__all_airports_data) to_model ON to_model.to_id::text = from_model.from_column_id::text
WHERE from_model.from_column_id IS NOT NULL AND to_model.to_id IS NULL
HAVING count(*) > 0;",1001,186,815
4,cf7d4d65-c952-43b4-a87a-0f9c3591eceb,aa1c361c-a9ba-350e-9959-e92a5654f7dc,7b689796-afde-3930-87be-ed8b7c7a0474,unique,airport,{},fail,"got 2 results, configured to fail if != 0",tr_dot_model__all_airports_data_unique_airport," SELECT dot_model__all_airports_data.airport AS unique_field,
count(*) AS n_records
FROM self_tests_public_tests.dot_model__all_airports_data
WHERE dot_model__all_airports_data.airport IS NOT NULL
GROUP BY dot_model__all_airports_data.airport
HAVING count(*) > 1;",365,2,363
5,cf7d4d65-c952-43b4-a87a-0f9c3591eceb,3b30906a-2088-3f34-9fd3-e4d931c04ba4,ca4513fa-96e0-3a95-a1a8-7f0c127ea82a,expect_similar_means_across_reporters,price,"{'key': 'airline', 'quantity': 'price', 'id_column': 'airline', 'data_table': 'dot_model__all_flight_data', 'target_table': 'dot_model__airlines_data'}",fail,,chv_tr_different_dot_model__all_flight_data_price_distribution,,1001,1,1000
Empty file.
77 changes: 77 additions & 0 deletions dot/self_tests/integration/test_run_dot_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
""" Integration test: runs DOT for the demo dataset and checks the results """
import os
import uuid
import logging
import shutil
import pandas as pd
from mock import patch
from ..self_tests_utils.dbt_base_safe_test_class import DbtBaseSelfTestClass

# UT after base_self_test_class imports
from utils.run_management import run_dot_tests # pylint: disable=wrong-import-order
from utils.utils import setup_custom_logger # pylint: disable=wrong-import-order
from utils.connection_utils import (
get_db_params_from_config,
) # pylint: disable=wrong-import-order
from utils.configuration_utils import (
DbParamsConfigFile,
DbParamsConnection,
) # pylint: disable=wrong-import-order


class RunDotTestsTest(DbtBaseSelfTestClass):
"""Test Class"""

def setUp(self) -> None:
# load the DOT demo dataset
self.create_self_tests_db_schema()

self.cleanup_dbt_output_dir()

@patch("utils.configuration_utils._get_filename_safely")
def test_run_dot_tests(
self, mock_get_filename_safely
): # pylint: disable=no-value-for-parameter
"""run all dot tests"""
mock_get_filename_safely.side_effect = self.mock_get_filename_safely

logger = setup_custom_logger(
"self_tests/output/logs/run_everything.log", logging.INFO
)

run_id = uuid.uuid4()

run_dot_tests("ScanProject1", logger, run_id)

# check results
schema_dot, _, conn_dot = get_db_params_from_config(
DbParamsConfigFile["dot_config.yml"],
DbParamsConnection["dot"],
"ScanProject1",
)

test_results_summary = pd.read_sql(
f"SELECT * FROM {schema_dot}.test_results_summary", conn_dot
)
expected_test_results_summary = pd.read_csv(
"self_tests/data/expected/integration/test_results_summary.csv", index_col=0
)
pd.testing.assert_frame_equal(
test_results_summary.drop(columns=["run_id"]),
expected_test_results_summary.drop(columns=["run_id"]),
)

test_results = pd.read_sql(f"SELECT * FROM {schema_dot}.test_results", conn_dot)
expected_test_results = pd.read_csv(
"self_tests/data/expected/integration/test_results.csv", index_col=0
)
pd.testing.assert_frame_equal(
expected_test_results.drop(
columns=["run_id", "test_result_id", "id_column_value"]
),
test_results.drop(columns=["run_id", "test_result_id", "id_column_value"]),
)
self.assertListEqual(
sorted(expected_test_results["id_column_value"].to_list()),
sorted(test_results["id_column_value"].to_list()),
)
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,16 @@ def mock_get_filename_safely(path: str) -> str:
return path
if path == DBT_PROJECT_FINAL_FILENAME:
return DBT_PROJECT_FINAL_FILENAME
# return "./config/example/project_name/dbt/dbt_project.yml"
raise FileNotFoundError(f"file path {path} needs to be mocked")

def setUp(self) -> None:
"""creates DB schema for the demo dataset by default"""
self.create_self_tests_db_schema()

def tearDown(self) -> None:
"""drops the DB schema for the demo dataset by default"""
self.drop_self_tests_db_schema()

@patch("utils.configuration_utils._get_filename_safely")
def get_self_tests_db_conn(
self,
Expand Down
69 changes: 69 additions & 0 deletions dot/self_tests/self_tests_utils/dbt_base_safe_test_class.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
"""base self tests class for tests checking the output of the DBT process"""
import os
import logging
import shutil

from mock import patch
from ..self_tests_utils.base_self_test_class import BaseSelfTestClass

from utils.utils import setup_custom_logger # pylint: disable=wrong-import-order

from utils.dbt import ( # pylint: disable=wrong-import-order
run_dbt_core,
archive_previous_dbt_results,
create_failed_dbt_test_models,
run_dbt_test,
)


class DbtBaseSelfTestClass(BaseSelfTestClass):
@staticmethod
def cleanup_dbt_output_dir():
# for safety: remove any previous dbt target directory and model files
if os.path.isdir("dbt/target"):
shutil.rmtree("dbt/target")
for path in os.listdir("dbt/"):
if path.startswith("models") or path.startswith("tests"):
shutil.rmtree(f"dbt/{path}")

@patch("utils.configuration_utils._get_filename_safely")
def setUp(
self, mock_get_filename_safely
) -> None: # pylint: disable=no-value-for-parameter
super().setUp()

self.cleanup_dbt_output_dir()

mock_get_filename_safely.side_effect = self.mock_get_filename_safely

self.dbt_test_setup()

def dbt_test_setup(self):
"""
setup for dbt tests

- dbt_project config file
- entities to be tested
"""
shutil.copy(
"./config/example/self_tests/dbt/dbt_project.yml", "./dbt/dbt_project.yml"
)

# copy the models
# (i.e. in the full DOT pipeline these are generated from the configured_entities)
shutil.rmtree("dbt/models", ignore_errors=True)
shutil.copytree(
"self_tests/data/dot_input_files/dbt", "dbt/models/ScanProject1"
)

@staticmethod
def run_dbt_steps():
"""
Runs all the actions for dbt
"""
project_id = "ScanProject1"
logger = setup_custom_logger("self_tests/output/test.log", logging.INFO)
run_dbt_core(project_id, logger)
archive_previous_dbt_results(logger)
create_failed_dbt_test_models(project_id, logger, "view")
run_dbt_test(project_id, logger)
2 changes: 1 addition & 1 deletion dot/self_tests/unit/test_configuration_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
""" Tests of configuration utils module """

from mock import patch
from .base_self_test_class import BaseSelfTestClass
from ..self_tests_utils.base_self_test_class import BaseSelfTestClass

# UT after base_self_test_class imports
from utils.configuration_utils import ( # pylint: disable=wrong-import-order
Expand Down
2 changes: 1 addition & 1 deletion dot/self_tests/unit/test_connection_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .base_self_test_class import BaseSelfTestClass
from ..self_tests_utils.base_self_test_class import BaseSelfTestClass

# UT after base_self_test_class imports
from utils.connection_utils import ( # pylint: disable=wrong-import-order
Expand Down
8 changes: 1 addition & 7 deletions dot/self_tests/unit/test_core_entities_creation.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
import os
from mock import patch
from .base_self_test_class import BaseSelfTestClass
from ..self_tests_utils.base_self_test_class import BaseSelfTestClass

# UT after base_self_test_class imports
from utils.dbt import create_core_entities # pylint: disable=wrong-import-order
Expand All @@ -10,12 +10,6 @@
class CoreEntitiesCreationTest(BaseSelfTestClass):
"""Test Class"""

def setUp(self) -> None:
self.create_self_tests_db_schema()

def tearDown(self) -> None:
self.drop_self_tests_db_schema()

@patch("utils.configuration_utils._get_filename_safely")
def test_yaml_creation(
self, mock_get_filename_safely
Expand Down
5 changes: 1 addition & 4 deletions dot/self_tests/unit/test_dbt.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pandas as pd
from mock import patch

from .base_self_test_class import BaseSelfTestClass
from ..self_tests_utils.base_self_test_class import BaseSelfTestClass

# UT after base_self_test_class imports
from utils.dbt import ( # pylint: disable=wrong-import-order
Expand All @@ -22,9 +22,6 @@ def setUp(self) -> None:
with open("self_tests/data/queries/dbt_core_generated_objects.sql", "r") as f:
self.create_self_tests_db_schema(f.read())

def tearDown(self) -> None:
self.drop_self_tests_db_schema()

@patch("utils.configuration_utils._get_filename_safely")
def test_extract_df_from_dbt_test_results_json(
self, mock_get_filename_safely
Expand Down
Loading