From 940ef003e3a8765f576fb2145fabf74b98ec0dff Mon Sep 17 00:00:00 2001 From: Bohdan Udovenko Date: Fri, 1 Nov 2024 10:42:20 +0200 Subject: [PATCH 1/5] Comments are optional for athena table columns. This change fixes openlineage behavior for AthenaOperator. --- providers/src/airflow/providers/amazon/aws/operators/athena.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/providers/src/airflow/providers/amazon/aws/operators/athena.py b/providers/src/airflow/providers/amazon/aws/operators/athena.py index d48ac751d4239..f5f41635373ac 100644 --- a/providers/src/airflow/providers/amazon/aws/operators/athena.py +++ b/providers/src/airflow/providers/amazon/aws/operators/athena.py @@ -311,7 +311,7 @@ def get_openlineage_dataset(self, database, table) -> Dataset | None: } fields = [ SchemaDatasetFacetFields( - name=column["Name"], type=column["Type"], description=column["Comment"] + name=column["Name"], type=column["Type"], description=column["Comment"] if 'Comment' in column else None ) for column in table_metadata["TableMetadata"]["Columns"] ] From 83391d917dbcfcbbb4b1133932a622f8f2a82ffb Mon Sep 17 00:00:00 2001 From: Bohdan Udovenko Date: Fri, 1 Nov 2024 11:55:32 +0200 Subject: [PATCH 2/5] Fix linter --- .../src/airflow/providers/amazon/aws/operators/athena.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/providers/src/airflow/providers/amazon/aws/operators/athena.py b/providers/src/airflow/providers/amazon/aws/operators/athena.py index f5f41635373ac..8e3c9d3a5041e 100644 --- a/providers/src/airflow/providers/amazon/aws/operators/athena.py +++ b/providers/src/airflow/providers/amazon/aws/operators/athena.py @@ -311,7 +311,9 @@ def get_openlineage_dataset(self, database, table) -> Dataset | None: } fields = [ SchemaDatasetFacetFields( - name=column["Name"], type=column["Type"], description=column["Comment"] if 'Comment' in column else None + name=column["Name"], + type=column["Type"], + description=column["Comment"] if 'Comment' in column else None ) for column in table_metadata["TableMetadata"]["Columns"] ] From f258dcb49baa0cf9df8e1eca98925cb70edfc109 Mon Sep 17 00:00:00 2001 From: Bohdan Udovenko Date: Fri, 1 Nov 2024 13:35:33 +0200 Subject: [PATCH 3/5] Fix linter v2 and just revert changes for description --- providers/src/airflow/providers/amazon/aws/operators/athena.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/providers/src/airflow/providers/amazon/aws/operators/athena.py b/providers/src/airflow/providers/amazon/aws/operators/athena.py index 8e3c9d3a5041e..c602611803ed5 100644 --- a/providers/src/airflow/providers/amazon/aws/operators/athena.py +++ b/providers/src/airflow/providers/amazon/aws/operators/athena.py @@ -313,7 +313,7 @@ def get_openlineage_dataset(self, database, table) -> Dataset | None: SchemaDatasetFacetFields( name=column["Name"], type=column["Type"], - description=column["Comment"] if 'Comment' in column else None + description=column.get("Comment"), ) for column in table_metadata["TableMetadata"]["Columns"] ] From eb56c4563ea2d242f5735b084758fd8d1f2d31b8 Mon Sep 17 00:00:00 2001 From: Bohdan Udovenko Date: Mon, 4 Nov 2024 10:02:08 +0200 Subject: [PATCH 4/5] Fix athena metadata - comments to fields are optional. --- providers/tests/amazon/aws/operators/athena_metadata.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/providers/tests/amazon/aws/operators/athena_metadata.json b/providers/tests/amazon/aws/operators/athena_metadata.json index f13b1241742ab..121a51920c9f0 100644 --- a/providers/tests/amazon/aws/operators/athena_metadata.json +++ b/providers/tests/amazon/aws/operators/athena_metadata.json @@ -28,8 +28,7 @@ }, { "Name": "ENDS_ON", - "Type": "timestamp", - "Comment": "from deserializer" + "Type": "timestamp" } ], "PartitionKeys": [], From 332506c6be61df468f6d75491f015b2c5cfd1293 Mon Sep 17 00:00:00 2001 From: Bohdan Udovenko Date: Mon, 4 Nov 2024 13:50:59 +0200 Subject: [PATCH 5/5] Fix expected mock result --- providers/tests/amazon/aws/operators/test_athena.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/providers/tests/amazon/aws/operators/test_athena.py b/providers/tests/amazon/aws/operators/test_athena.py index e4d7d6622a091..5ee557caea906 100644 --- a/providers/tests/amazon/aws/operators/test_athena.py +++ b/providers/tests/amazon/aws/operators/test_athena.py @@ -358,7 +358,7 @@ def mock_get_table_metadata(CatalogName, DatabaseName, TableName): SchemaDatasetFacetFields( name="ENDS_ON", type="timestamp", - description="from deserializer", + description=None, ), ], ),