diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py index bcc611ef773293..6070e5ddf8ddef 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py @@ -543,6 +543,8 @@ def ingest_table( "number_of_files": str(table_data.number_of_files), "size_in_bytes": str(table_data.size_in_bytes), } + if table_data.is_s3: + customProperties["table_path"] = str(table_data.table_path) dataset_properties = DatasetPropertiesClass( description="", diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_without_extension.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_without_extension.json index 75b3a72aa4eb3a..6f3261b96a277a 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_without_extension.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_without_extension.json @@ -8,7 +8,8 @@ "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { "number_of_files": "1", - "size_in_bytes": "172" + "size_in_bytes": "172", + "table_path": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/no_extension/small" }, "name": "small", "description": "", diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json index aba52da527be6d..c1df865bb00c63 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json @@ -8,7 +8,8 @@ "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { "number_of_files": "1", - "size_in_bytes": "3575" + "size_in_bytes": "3575", + "table_path": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv" }, "name": "NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv", "description": "", @@ -707,7 +708,8 @@ "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { "number_of_files": "1", - "size_in_bytes": "1024" + "size_in_bytes": "1024", + "table_path": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro" }, "name": "chord_progressions_avro.avro", "description": "", @@ -828,7 +830,8 @@ "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { "number_of_files": "1", - "size_in_bytes": "604" + "size_in_bytes": "604", + "table_path": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv" }, "name": "chord_progressions_csv.csv", "description": "", @@ -949,7 +952,8 @@ "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { "number_of_files": "1", - "size_in_bytes": "4646" + "size_in_bytes": "4646", + "table_path": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/countries_json.json" }, "name": "countries_json.json", "description": "", @@ -1046,7 +1050,8 @@ "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { "number_of_files": "1", - "size_in_bytes": "4206" + "size_in_bytes": "4206", + "table_path": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet.parquet" }, "name": "food_parquet.parquet", "description": "", @@ -1167,7 +1172,8 @@ "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { "number_of_files": "1", - "size_in_bytes": "172" + "size_in_bytes": "172", + "table_path": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv" }, "name": "small.csv", "description": "", @@ -1288,7 +1294,8 @@ "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { "number_of_files": "1", - "size_in_bytes": "34056" + "size_in_bytes": "34056", + "table_path": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv" }, "name": "wa_fn_usec_hr_employee_attrition_csv.csv", "description": "", diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_spec_for_files.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_spec_for_files.json index 95e18b6e1f871f..18c4596001d0cf 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_spec_for_files.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_spec_for_files.json @@ -8,7 +8,8 @@ "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { "number_of_files": "1", - "size_in_bytes": "1024" + "size_in_bytes": "1024", + "table_path": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro" }, "name": "chord_progressions_avro.avro", "description": "", @@ -407,7 +408,8 @@ "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { "number_of_files": "1", - "size_in_bytes": "604" + "size_in_bytes": "604", + "table_path": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv" }, "name": "chord_progressions_csv.csv", "description": "", diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_single_file.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_single_file.json index d8825802ebb189..255a1ae95eeb6f 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_single_file.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_single_file.json @@ -8,7 +8,8 @@ "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { "number_of_files": "1", - "size_in_bytes": "1024" + "size_in_bytes": "1024", + "table_path": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro" }, "name": "chord_progressions_avro.avro", "description": "",