diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..485dee6 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.idea diff --git a/README.md b/README.md index 72ca71e..d312126 100644 --- a/README.md +++ b/README.md @@ -8,10 +8,11 @@ To install the package add the package path to the `packages.yml` file in your d In order to use the model audit post-hook the following variables have to be set in your `dbt_project.yml` file. -| Variable | Description | -| --------------------- | -------------------------- | -| `dbt_ml:audit_schema` | Schema of the audit table. | -| `dbt_ml:audit_table` | Name of the audit table. | +| Variable | Description | +| ---------------------- | --------------------------------- | +| `dbt_ml:audit_database`| Name of the GCP Project to use. | +| `dbt_ml:audit_schema` | Schema of the audit table. | +| `dbt_ml:audit_table` | Name of the audit table. | You will also need to specify the post-hook in your `dbt_project.yml` file[1] as `{{ dbt_ml.model_audit() }}`. Optionally, you can use the `dbt_ml.create_model_audit_table()` macro to create the audit table automatically if it does not exist - for example in an on-run-start hook. @@ -20,6 +21,7 @@ Example config for `dbt_project.yml` below: vars: "dbt_ml:audit_schema": "audit" "dbt_ml:audit_table": "ml_models" + "dbt_ml:audit_database": "database" on-run-start: - '{% do adapter.create_schema(api.Relation.create(target.project, "audit")) %}' - "{{ dbt_ml.create_model_audit_table() }}" diff --git a/macros/hooks/model_audit.sql b/macros/hooks/model_audit.sql index 74b4b36..7065fb4 100644 --- a/macros/hooks/model_audit.sql +++ b/macros/hooks/model_audit.sql @@ -5,7 +5,7 @@ 'schema': 'string', 'created_at': type_timestamp(), 'training_info': 'array>>>', - 'feature_info': 'array>', + 'feature_info': 'array>', 'weights': 'array>>>', 'evaluate': 'array>', }) %} @@ -100,7 +100,7 @@ tensorflow: {} {% set info_types = ['training_info', 'feature_info', 'weights', 'evaluate'] %} - insert `{{ target.database }}.{{ var('dbt_ml:audit_schema') }}.{{ var('dbt_ml:audit_table') }}` + insert `{{ var('dbt_ml:audit_database') }}.{{ var('dbt_ml:audit_schema') }}.{{ var('dbt_ml:audit_table') }}` (model, schema, created_at, {{ info_types | join(', ') }}) select @@ -125,7 +125,7 @@ tensorflow: {} {% macro create_model_audit_table() %} {%- set audit_table = api.Relation.create( - database=target.database, + database=var('dbt_ml:audit_database'), schema=var('dbt_ml:audit_schema'), identifier=var('dbt_ml:audit_table'), type='table' diff --git a/macros/materializations/model.sql b/macros/materializations/model.sql index 529692e..354a50d 100644 --- a/macros/materializations/model.sql +++ b/macros/materializations/model.sql @@ -19,18 +19,11 @@ {% endmacro %} {% macro model_options(ml_config, labels) %} - {%- if labels -%} - {%- set label_list = [] -%} - {%- for label, value in labels.items() -%} - {%- do label_list.append((label, value)) -%} - {%- endfor -%} - {%- do ml_config.update({'labels': label_list}) -%} - {%- endif -%} {% set options -%} options( {%- for opt_key, opt_val in ml_config.items() -%} - {%- if opt_val is sequence and not (opt_val | first) is number and (opt_val | first).startswith('hparam_') -%} + {%- if opt_val is sequence and (opt_val | first) is string and (opt_val | first).startswith('hparam_') -%} {{ opt_key }}={{ opt_val[0] }}({{ opt_val[1:] | join(', ') }}) {%- else -%} {{ opt_key }}={{ (opt_val | tojson) if opt_val is string else opt_val }}