From f41966f4f7fe00df5eee21c167115d63e868b680 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Sun, 12 Mar 2017 20:44:13 -0400 Subject: [PATCH 1/2] support composite UKs for archival --- dbt/runner.py | 6 ++++-- dbt/templates.py | 10 +++++----- test/integration/004_simple_archive_test/seed.sql | 2 +- .../004_simple_archive_test/test_simple_archive.py | 2 +- test/integration/004_simple_archive_test/update.sql | 4 ++-- 5 files changed, 13 insertions(+), 11 deletions(-) diff --git a/dbt/runner.py b/dbt/runner.py index 1a042136907..f3d5277eee2 100644 --- a/dbt/runner.py +++ b/dbt/runner.py @@ -288,6 +288,8 @@ def execute_archive(profile, node, context): profile, node_cfg.get('source_schema'), node_cfg.get('source_table')) if len(source_columns) == 0: + source_schema = node_cfg.get('source_schema') + source_table = node_cfg.get('source_table') raise RuntimeError( 'Source table "{}"."{}" does not ' 'exist'.format(source_schema, source_table)) @@ -304,8 +306,8 @@ def execute_archive(profile, node, context): schema=node_cfg.get('target_schema'), table=node_cfg.get('target_table'), columns=dest_columns, - sort=node_cfg.get('updated_at'), - dist=node_cfg.get('unique_key')) + sort='dbt_updated_at', + dist='scd_id') # TODO move this to inject_runtime_config, generate archive SQL # in wrap step. can't do this right now because we actually need diff --git a/dbt/templates.py b/dbt/templates.py index a1aef839ec7..8c56a8d61ba 100644 --- a/dbt/templates.py +++ b/dbt/templates.py @@ -120,9 +120,9 @@ def wrap(self, opts): {% for col in get_columns_in_table(source_schema, source_table) %} "{{ col.name }}" {% if not loop.last %},{% endif %} {% endfor %}, - "{{ updated_at }}" as "dbt_updated_at", - "{{ unique_key }}" as "dbt_pk", - "{{ updated_at }}" as "valid_from", + {{ updated_at }} as "dbt_updated_at", + {{ unique_key }} as "dbt_pk", + {{ updated_at }} as "valid_from", null::timestamp as "tmp_valid_to" from "{{ source_schema }}"."{{ source_table }}" @@ -134,8 +134,8 @@ def wrap(self, opts): {% for col in get_columns_in_table(source_schema, source_table) %} "{{ col.name }}" {% if not loop.last %},{% endif %} {% endfor %}, - "{{ updated_at }}" as "dbt_updated_at", - "{{ unique_key }}" as "dbt_pk", + {{ updated_at }} as "dbt_updated_at", + {{ unique_key }} as "dbt_pk", "valid_from", "valid_to" as "tmp_valid_to" from "{{ target_schema }}"."{{ target_table }}" diff --git a/test/integration/004_simple_archive_test/seed.sql b/test/integration/004_simple_archive_test/seed.sql index 7053ee7c67d..83065f102d7 100644 --- a/test/integration/004_simple_archive_test/seed.sql +++ b/test/integration/004_simple_archive_test/seed.sql @@ -76,5 +76,5 @@ select "updated_at" as valid_from, null::timestamp as valid_to, "updated_at" as dbt_updated_at, - md5("id" || '|' || "updated_at"::text) as scd_id + md5("id" || '-' || "first_name" || '|' || "updated_at"::text) as scd_id from "simple_archive_004"."seed"; diff --git a/test/integration/004_simple_archive_test/test_simple_archive.py b/test/integration/004_simple_archive_test/test_simple_archive.py index 87de009fbe5..6343b9280da 100644 --- a/test/integration/004_simple_archive_test/test_simple_archive.py +++ b/test/integration/004_simple_archive_test/test_simple_archive.py @@ -26,7 +26,7 @@ def project_config(self): "source_table": "seed", "target_table": "archive_actual", "updated_at": "updated_at", - "unique_key": "id" + "unique_key": "id || '-' || first_name" } ] } diff --git a/test/integration/004_simple_archive_test/update.sql b/test/integration/004_simple_archive_test/update.sql index 71927b2ea1d..da80b0858de 100644 --- a/test/integration/004_simple_archive_test/update.sql +++ b/test/integration/004_simple_archive_test/update.sql @@ -26,7 +26,7 @@ select "updated_at" as "valid_from", null::timestamp as "valid_to", "updated_at" as "dbt_updated_at", - md5("id" || '|' || "updated_at"::text) as "scd_id" + md5("id" || '-' || "first_name" || '|' || "updated_at"::text) as "scd_id" from "simple_archive_004"."seed" where "id" >= 10 and "id" <= 20; @@ -72,6 +72,6 @@ select "updated_at" as "valid_from", null::timestamp as "valid_to", "updated_at" as "dbt_updated_at", - md5("id" || '|' || "updated_at"::text) as "scd_id" + md5("id" || '-' || "first_name" || '|' || "updated_at"::text) as "scd_id" from "simple_archive_004"."seed" where "id" > 20; From 855b0114f0d8787897cfb88a2935986a3b223ded Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Sun, 12 Mar 2017 20:45:26 -0400 Subject: [PATCH 2/2] add composite uk note to changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 124cdef77e5..4d360c4ca4c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ### Bugfixes - Fix ephemeral load order bug ([#292](https://github.com/fishtown-analytics/dbt/pull/292), [#285](https://github.com/fishtown-analytics/dbt/pull/285)) +- Support composite unique key in archivals ([#324](https://github.com/fishtown-analytics/dbt/pull/324)) ### Changes