Skip to content

Commit

Permalink
Add support for regex flags for BigQuery (#253)
Browse files Browse the repository at this point in the history
* added support for regex flags for BigQuery

* made inline flag test bigquery-specific

* remove enable config when all adapters are covered
  • Loading branch information
lookslikeitsnot authored Apr 2, 2023
1 parent a9d90df commit 73f2d00
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 15 deletions.
21 changes: 10 additions & 11 deletions integration_tests/models/schema_tests/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,17 @@ models:
config:
error_if: "=0"
warn_if: "<4"
# match all uppercase, but match case-insensitive (where implemented)
# match all uppercase, but match case-insensitive
- dbt_expectations.expect_column_values_to_match_regex:
regex: "[A-Z]"
flags: i
# match all uppercase with inline case-insensitive flag and case-insensitive flag parameter (where implemented)
# check that adapters handling flags by inlining them don't break because of the flag duplication
- dbt_expectations.expect_column_values_to_match_regex:
regex: "(?i)[A-Z]"
flags: i
config:
enabled: "{{ target.type in ['postgres', 'snowflake', 'redshift' ] }}"
enabled: "{{ target.type == 'bigquery' }}"
# match all uppercase, case-sensitive (where implemented), should fail
- dbt_expectations.expect_column_values_to_match_regex:
regex: "[A-Z]"
Expand Down Expand Up @@ -54,12 +59,10 @@ models:
config:
error_if: "=0"
warn_if: "<4"
# match all uppercase, but match case-insensitive (where implemented)
# match all uppercase, but match case-insensitive
- dbt_expectations.expect_column_values_to_match_regex_list:
regex_list: ["[A-G]", "[H-Z]"]
flags: i
config:
enabled: "{{ target.type in ['postgres', 'snowflake', 'redshift' ] }}"
# match all uppercase, but match case-sensitive (where implemented), should fail
- dbt_expectations.expect_column_values_to_match_regex_list:
regex_list: ["[A-G]", "[H-Z]"]
Expand Down Expand Up @@ -91,20 +94,16 @@ models:
# do not match all uppercase
- dbt_expectations.expect_column_values_to_not_match_regex_list:
regex_list: ["[A-G]", "[H-Z]"]
# do not match all uppercase or numbers, case-insensitive (where implemented)
# do not match all uppercase or numbers, case-insensitive
- dbt_expectations.expect_column_values_to_not_match_regex_list:
regex_list: ["[A-Z]", "[0-9]"]
flags: i
config:
enabled: "{{ target.type in ['postgres', 'snowflake', 'redshift' ] }}"
# do not match all uppercase and numbers, case-insensitive (where implemented)
# do not match all uppercase and numbers, case-insensitive
- dbt_expectations.expect_column_values_to_not_match_regex_list:
regex_list: ["[A-Z]", "[0-9]"]
flags: i
match_on: all
config:
enabled: "{{ target.type in ['postgres', 'snowflake', 'redshift' ] }}"
enabled: "{{ target.type in ['postgres', 'snowflake', 'redshift' ] }}"
error_if: "=0"
warn_if: "<4"
# match '@' anywhere in string
Expand Down
30 changes: 26 additions & 4 deletions macros/regex/regexp_instr.sql
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ regexp_instr({{ source_value }}, {{ regexp }}, {{ position }}, {{ occurrence }},
{# BigQuery uses "r" to escape raw strings #}
{% macro bigquery__regexp_instr(source_value, regexp, position, occurrence, is_raw, flags) %}
{% if flags %}
{{ exceptions.warn(
"The flags option is not supported for BigQuery and is being ignored."
) }}
{{ dbt_expectations._validate_re2_flags(flags) }}
{# BigQuery prepends "(?flags)" to set flags for current group #}
{%- set regexp = "(?" ~ flags ~ ")" ~ regexp -%}
{% endif %}
{%- set regexp = "r'" ~ regexp ~ "'" if is_raw else "'" ~ regexp ~ "'" -%}
regexp_instr({{ source_value }}, {{ regexp }}, {{ position }}, {{ occurrence }})
Expand All @@ -50,9 +50,31 @@ regexp_instr({{ source_value }}, '{{ regexp }}', {{ position }}, {{ occurrence }
{% macro _validate_flags(flags, alphabet) %}
{% for flag in flags %}
{% if flag not in alphabet %}
{{ exceptions.raise_compiler_error(
{# Using raise_compiler_error causes disabled tests with invalid flags to fail compilation #}
{{ exceptions.warn(
"flag " ~ flag ~ " not in list of allowed flags for this adapter: " ~ alphabet | join(", ")
) }}
{% endif %}
{% endfor %}
{% endmacro %}

{# Re2 requires specific flag validation because of its clear flag operator #}
{% macro _validate_re2_flags(flags) %}
{# Re2 supports following flags: #}
{# i : case-insensitive (default fault) #}
{# m : multi-line mode: ^ and $ match begin/end line in addition to begin/end text (default false) #}
{# s : let . match \n (default false) #}
{# U : ungreedy: swap meaning of x* and x*?, x+ and x+?, etc (default false) #}
{# Flag syntax is xyz (set) or -xyz (clear) or xy-z (set xy, clear z). #}

{# Regex explanation: do not allow consecutive dashes, accept all re2 flags and clear operator, do not end with a dash #}
{% set re2_flags_pattern = '^(?!.*--)[-imsU]*(?<!-)$' %}
{% set re = modules.re %}
{% set is_match = re.match(re2_flags_pattern, flags) %}
{% if not is_match %}
{# Using raise_compiler_error causes disabled tests with invalid flags to fail compilation #}
{{ exceptions.warn(
"flags " ~ flags ~ " isn't a valid re2 flag pattern"
) }}
{% endif %}
{% endmacro %}

0 comments on commit 73f2d00

Please sign in to comment.