diff --git a/README.md b/README.md index ccd9fee..64e6af8 100644 --- a/README.md +++ b/README.md @@ -88,6 +88,7 @@ To run the tests: - [expect_column_values_to_be_unique](#expect_column_values_to_be_unique) - [expect_column_values_to_be_of_type](#expect_column_values_to_be_of_type) - [expect_column_values_to_be_in_type_list](#expect_column_values_to_be_in_type_list) +- [expect_column_values_to_have_consistent_casing](#expect_column_values_to_have_consistent_casing) ### Sets and ranges @@ -410,6 +411,18 @@ tests: column_type_list: [date, datetime] ``` +### [expect_column_values_to_have_consistent_casing](macros/schema_tests/column_values_basic/expect_column_values_to_have_consistent_casing.sql) + +Expect a column to have consistent casing. By setting `display_inconsistent_columns` to true, the number of inconsistent values in the column will be displayed in the terminal whereas the inconsistent values themselves will be returned if the SQL compiled test is run. + +*Applies to:* Column + +```yaml +tests: + - dbt_expectations.expect_column_values_to_have_consistent_casing: + display_inconsistent_columns: false # (Optional) +``` + ### [expect_column_values_to_be_in_set](macros/schema_tests/column_values_basic/expect_column_values_to_be_in_set.sql) Expect each column value to be in a given set. diff --git a/integration_tests/models/schema_tests/schema.yml b/integration_tests/models/schema_tests/schema.yml index 8b377ac..4a27fd1 100644 --- a/integration_tests/models/schema_tests/schema.yml +++ b/integration_tests/models/schema_tests/schema.yml @@ -274,9 +274,11 @@ models: - dbt_expectations.expect_column_distinct_values_to_contain_set: value_set: ['a','b'] quote_values: true - - - dbt_expectations.expect_column_value_lengths_to_equal : + - dbt_expectations.expect_column_value_lengths_to_equal: value: 1 + - dbt_expectations.expect_column_values_to_have_consistent_casing + - dbt_expectations.expect_column_values_to_have_consistent_casing: + display_inconsistent_columns: true - name: col_string_b tests: @@ -328,4 +330,3 @@ models: datepart: day interval: 1 row_condition: group_id = 4 - diff --git a/macros/schema_tests/column_values_basic/expect_column_values_to_have_consistent_casing.sql b/macros/schema_tests/column_values_basic/expect_column_values_to_have_consistent_casing.sql new file mode 100644 index 0000000..1007150 --- /dev/null +++ b/macros/schema_tests/column_values_basic/expect_column_values_to_have_consistent_casing.sql @@ -0,0 +1,41 @@ +{% test expect_column_values_to_have_consistent_casing(model, column_name, display_inconsistent_columns=False) %} + +with test_data as ( + + select + distinct {{ column_name }} as distinct_values + from + {{ model }} + + ), + {% if display_inconsistent_columns %} + validation_errors as ( + + select + lower(distinct_values) as inconsistent_columns, + count(distinct_values) as set_count_case_insensitive + from + test_data + group by 1 + having + count(distinct_values) > 1 + + ) + select * from validation_errors + {% else %} + validation_errors as ( + + select + count(1) as set_count, + count(distinct lower(distinct_values)) as set_count_case_insensitive + from + test_data + + ) + select * + from + validation_errors + where + set_count != set_count_case_insensitive + {% endif %} + {%- endtest -%}