Skip to content

Commit

Permalink
fixed date macro and usage in 2 models (#76)
Browse files Browse the repository at this point in the history
* fixed date macro and usage in 2 models

* comment
  • Loading branch information
pratzrao authored Feb 10, 2025
1 parent 6f2b874 commit 7d6e434
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 17 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@
target/
dbt_packages/
logs/
/profiles.yml

# legacy -- renamed to dbt_packages in v1
dbt_modules/
venv/
.user.yml
package-lock.yml
models/wash_unit_tests.yml
profiles.yml


37 changes: 31 additions & 6 deletions macros/validate_date.sql
Original file line number Diff line number Diff line change
@@ -1,11 +1,36 @@
{% macro validate_date(field_name) %}

-- Attempt to validate the field as a date, return the original format
-- We have data coming from several google sheets and from commcare. below macro makes sure every data column, no matter
-- which table it is in or which source it is coming from is in the same appropriate format.
-- Sometimes in google sheets, in one row someone enters 2024/10/12 and elsewhere someone enters 10-5-2024 it will standardize it
case
when {{ field_name }}::text ~ '^\s*\d{4}-\d{2}-\d{2}\s*$' then
-- It strictly looks like a date (YYYY-MM-DD), safely cast to date
-- Ensure NULL and empty strings are converted to NULL safely before anything else
when {{ field_name }} is null or COALESCE({{ field_name }}::text, '') = '' then null::date

-- If it's already a DATE, return it as is
when pg_typeof({{ field_name }})::text = 'date' then
{{ field_name }}::date

-- Ensures that TIMESTAMP and TIMESTAMP WITH TIME ZONE are converted properly
when pg_typeof({{ field_name }})::text like 'timestamp%' then
{{ field_name }}::date
else null
end

-- Handle standard YYYY-MM-DD format
when COALESCE({{ field_name }}::text, '') ~ '^\d{4}-\d{2}-\d{2}$' then
to_date({{ field_name }}::text, 'YYYY-MM-DD')

-- Handle DD-MM-YYYY format
when COALESCE({{ field_name }}::text, '') ~ '^\d{2}-\d{2}-\d{4}$' then
to_date({{ field_name }}::text, 'DD-MM-YYYY')

-- Handle MM/DD/YYYY format
when COALESCE({{ field_name }}::text, '') ~ '^\d{2}/\d{2}/\d{4}$' then
to_date({{ field_name }}::text, 'MM/DD/YYYY')

-- Handle DD Mon YYYY format (e.g., 12 Jan 2024)
when COALESCE({{ field_name }}::text, '') ~ '^\d{1,2} \w{3} \d{4}$' then
to_date({{ field_name }}::text, 'DD Mon YYYY')

-- If none of the formats match, return NULL
else null::date
end
{% endmacro %}
10 changes: 5 additions & 5 deletions models/marts/gender/case_occurence.sql
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ case_occurrences_data as (
previous_case_number,
{{ validate_date("date_of_reporting") }} as date_of_case_reporting,
cast(
to_char(cast(date_of_reporting as date), 'YYYYMMDD') as integer
to_char({{ validate_date("date_of_reporting") }}, 'YYYYMMDD') as integer
) as case_reporting_datekey,

case
Expand All @@ -32,8 +32,8 @@ case_occurrences_data as (

case
when case_intake_date is NULL
then cast(to_char(cast(date_of_reporting as date), 'YYYYMMDD') as integer)
else cast(to_char(cast(case_intake_date as date), 'YYYYMMDD') as integer)
then cast(to_char({{ validate_date("date_of_reporting") }}, 'YYYYMMDD') as integer)
else cast(to_char({{ validate_date("date_of_reporting") }}, 'YYYYMMDD') as integer)
end as case_intake_datekey,
{{ validate_date("date_of_case_closure") }} as date_of_case_closure,
{{ validate_date("case_assignment_date") }} as date_of_case_assignment,
Expand All @@ -45,10 +45,10 @@ case_occurrences_data as (
case
when {{ validate_date("date_of_case_closure") }} is not NULL
then
(cast({{ validate_date("date_of_case_closure") }} as date) - cast({{ validate_date("case_intake_date") }} as date))
({{ validate_date("date_of_case_closure") }} - {{ validate_date("case_intake_date") }} )
-- For ongoing cases, calculate duration from intake to today
else
(current_date - cast({{ validate_date("case_intake_date") }} as date))
(current_date - {{ validate_date("case_intake_date") }})
end
end as case_duration_in_days,
gender_site_code_of_reporting,
Expand Down
10 changes: 5 additions & 5 deletions models/marts/gender/case_occurence_pii.sql
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ case_occurrences_data as (
case_summary_notes,
{{ validate_date("date_of_reporting") }} as date_of_case_reporting,
cast(
to_char(cast(date_of_reporting as date), 'YYYYMMDD') as integer
to_char({{ validate_date("date_of_reporting") }}, 'YYYYMMDD') as integer
) as case_reporting_datekey,

case
Expand All @@ -34,8 +34,8 @@ case_occurrences_data as (

case
when case_intake_date is NULL
then cast(to_char(cast(date_of_reporting as date), 'YYYYMMDD') as integer)
else cast(to_char(cast(case_intake_date as date), 'YYYYMMDD') as integer)
then cast(to_char({{ validate_date("date_of_reporting") }}, 'YYYYMMDD') as integer)
else cast(to_char({{ validate_date("date_of_reporting") }}, 'YYYYMMDD') as integer)
end as case_intake_datekey,
{{ validate_date("date_of_case_closure") }} as date_of_case_closure,
{{ validate_date("case_assignment_date") }} as date_of_case_assignment,
Expand All @@ -47,10 +47,10 @@ case_occurrences_data as (
case
when {{ validate_date("date_of_case_closure") }} is not NULL
then
(cast({{ validate_date("date_of_case_closure") }} as date) - cast({{ validate_date("case_intake_date") }} as date))
({{ validate_date("date_of_case_closure") }} - {{ validate_date("case_intake_date") }})
-- For ongoing cases, calculate duration from intake to today
else
(current_date - cast({{ validate_date("case_intake_date") }} as date))
(current_date - {{ validate_date("case_intake_date") }})
end
end as case_duration_in_days,
gender_site_code_of_reporting,
Expand Down
17 changes: 17 additions & 0 deletions models/staging/gender/staging_gender_survivors_commcare.sql
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,23 @@ with survivors_cte as ({{
)
}})

-- with cleaned_survivors_cte as (
-- SELECT
-- "case_id",
-- "assigned_to",
-- "county",
-- "constituency",
-- "date_of_birth",
-- NULLIF("date_of_registration", '')::date,
-- "gender",
-- "gender_site_code_of_registration",
-- "police_ob_number_for_registration",
-- "registered_by",
-- "village",
-- "ward",
-- "what_is_the_age_provided",
-- "what_is_the_year_of_birth_of_survivor"
-- )

{{ dbt_utils.deduplicate(
relation='survivors_cte',
Expand Down

0 comments on commit 7d6e434

Please sign in to comment.