Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for setting table properties as part of a model configuration #49

Merged
merged 7 commits into from
Feb 23, 2022
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
## dbt-databricks 1.0.2 (Release TBD)

### Features
- Support to set tblproperties when creating tables ([#33](https://github.com/databricks/dbt-databricks/issues/33), [#49](https://github.com/databricks/dbt-databricks/pull/49))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about "Support for setting table properties as part of a model configuration"


## dbt-databricks 1.0.1 (February 8, 2022)

### Features
Expand Down
1 change: 1 addition & 0 deletions dbt/adapters/databricks/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class DatabricksConfig(AdapterConfig):
buckets: Optional[int] = None
options: Optional[Dict[str, str]] = None
merge_update_columns: Optional[str] = None
tblproperties: Optional[Dict[str, str]] = None


class DatabricksAdapter(SparkAdapter):
Expand Down
12 changes: 12 additions & 0 deletions dbt/include/databricks/macros/adapters.sql
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,17 @@
{%- endif %}
{%- endmacro -%}

{% macro dbt_databricks_tblproperties_clause() -%}
{%- set tblproperties = config.get('tblproperties') -%}
{%- if tblproperties is not none %}
tblproperties (
{%- for prop in tblproperties -%}
{{ prop }} = '{{ tblproperties[prop] }}' {% if not loop.last %}, {% endif %}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it might be better to treat the prop as a string literal and wrap it in single quotes 'delta.appendOnly' just in case the user puts a reserved keyword in there.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for tableproperties[prop], technically the value can be a boolean, string, or numeric type. I think wrapping the value in single quotes is okay because it will try to be implicitly casted but we should double check.

{%- endfor %}
)
{%- endif %}
{%- endmacro -%}

{#-- We can't use temporary tables with `create ... as ()` syntax #}
{% macro dbt_databricks_create_temporary_view(relation, sql) -%}
create temporary view {{ relation.include(schema=false) }} as
Expand All @@ -100,6 +111,7 @@
{{ dbt_databricks_clustered_cols(label="clustered by") }}
{{ dbt_databricks_location_clause() }}
{{ dbt_databricks_comment_clause() }}
{{ dbt_databricks_tblproperties_clause() }}
as
{{ sql }}
{%- endif %}
superdupershant marked this conversation as resolved.
Show resolved Hide resolved
Expand Down
1 change: 1 addition & 0 deletions dbt/include/databricks/macros/materializations/seed.sql
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
{{ dbt_databricks_clustered_cols(label="clustered by") }}
{{ dbt_databricks_location_clause() }}
{{ dbt_databricks_comment_clause() }}
{{ dbt_databricks_tblproperties_clause() }}
{% endset %}

{% call statement('_') -%}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{{ config(
materialized = 'incremental',
unique_key = 'id',
tblproperties={
'delta.autoOptimize.optimizeWrite' : 'true',
'delta.autoOptimize.autoCompact' : 'true'
}
) }}

{% if not is_incremental() %}

select cast(1 as bigint) as id, 'hello' as msg
union all
select cast(2 as bigint) as id, 'goodbye' as msg

{% else %}

select cast(2 as bigint) as id, 'yo' as msg
union all
select cast(3 as bigint) as id, 'anyway' as msg

{% endif %}
4 changes: 4 additions & 0 deletions tests/integration/set_tblproperties/seeds/expected.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
id,msg
1,hello
2,yo
3,anyway
42 changes: 42 additions & 0 deletions tests/integration/set_tblproperties/test_set_tblproperties.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from cProfile import run
from tests.integration.base import DBTIntegrationTest, use_profile
import dbt.exceptions

import json


class TestSetTblproperties(DBTIntegrationTest):
@property
def schema(self):
return "set_tblproperties"

@property
def models(self):
return "models"

def test_set_tblproperties(self):
self.run_dbt(['seed'])
self.run_dbt(['run'])
self.run_dbt(['run'])

self.assertTablesEqual("set_tblproperties", "expected")

results = self.run_sql(
'describe extended {schema}.{table}'.format(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: you can use "SHOW TBLPROPERTIES" it's shorter and doesn't need to be parsed.

schema=self.unique_schema(), table='set_tblproperties'
),
fetch='all'
)

for result in results:
if result[0] == 'Table Properties':
assert 'delta.autoOptimize.optimizeWrite' in result[1]
assert 'delta.autoOptimize.autoCompact' in result[1]

@use_profile("databricks_cluster")
def test_set_tblproperties_databricks_cluster(self):
self.test_set_tblproperties()

@use_profile("databricks_sql_endpoint")
def test_set_tblproperties_databricks_sql_endpoint(self):
self.test_set_tblproperties()
12 changes: 10 additions & 2 deletions tests/unit/test_macros.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,13 @@ def test_macros_create_table_as_comment(self):
sql = self.__run_macro(template, 'databricks__create_table_as', False, 'my_table', 'select 1').strip()
self.assertEqual(sql, "create or replace table my_table using delta comment 'Description Test' as select 1")

def test_macros_create_table_as_tblproperties(self):
template = self.__get_template('adapters.sql')

self.config['tblproperties'] = {"delta.appendOnly": "true"}
sql = self.__run_macro(template, 'databricks__create_table_as', False, 'my_table', 'select 1').strip()
self.assertEqual(sql, "create or replace table my_table using delta tblproperties (delta.appendOnly = 'true' ) as select 1")

def test_macros_create_table_as_all(self):
template = self.__get_template('adapters.sql')

Expand All @@ -133,17 +140,18 @@ def test_macros_create_table_as_all(self):
self.config['clustered_by'] = ['cluster_1', 'cluster_2']
self.config['buckets'] = '1'
self.config['persist_docs'] = {'relation': True}
self.config['tblproperties'] = {"delta.appendOnly": "true"}
self.default_context['model'].description = 'Description Test'

sql = self.__run_macro(template, 'databricks__create_table_as', False, 'my_table', 'select 1').strip()
self.assertEqual(
sql,
"create or replace table my_table using delta partitioned by (partition_1,partition_2) clustered by (cluster_1,cluster_2) into 1 buckets location '/mnt/root/my_table' comment 'Description Test' as select 1"
"create or replace table my_table using delta partitioned by (partition_1,partition_2) clustered by (cluster_1,cluster_2) into 1 buckets location '/mnt/root/my_table' comment 'Description Test' tblproperties (delta.appendOnly = 'true' ) as select 1"
)

self.config['file_format'] = 'hudi'
sql = self.__run_macro(template, 'databricks__create_table_as', False, 'my_table', 'select 1').strip()
self.assertEqual(
sql,
"create table my_table using hudi partitioned by (partition_1,partition_2) clustered by (cluster_1,cluster_2) into 1 buckets location '/mnt/root/my_table' comment 'Description Test' as select 1"
"create table my_table using hudi partitioned by (partition_1,partition_2) clustered by (cluster_1,cluster_2) into 1 buckets location '/mnt/root/my_table' comment 'Description Test' tblproperties (delta.appendOnly = 'true' ) as select 1"
)