Skip to content

Commit

Permalink
feat(ingest): include raw s3 paths if s3 source (#6168)
Browse files Browse the repository at this point in the history
  • Loading branch information
hsheth2 authored Oct 11, 2022
1 parent ffa838e commit e70c0ac
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 11 deletions.
2 changes: 2 additions & 0 deletions metadata-ingestion/src/datahub/ingestion/source/s3/source.py
Original file line number Diff line number Diff line change
Expand Up @@ -543,6 +543,8 @@ def ingest_table(
"number_of_files": str(table_data.number_of_files),
"size_in_bytes": str(table_data.size_in_bytes),
}
if table_data.is_s3:
customProperties["table_path"] = str(table_data.table_path)

dataset_properties = DatasetPropertiesClass(
description="",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
"number_of_files": "1",
"size_in_bytes": "172"
"size_in_bytes": "172",
"table_path": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/no_extension/small"
},
"name": "small",
"description": "",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
"number_of_files": "1",
"size_in_bytes": "3575"
"size_in_bytes": "3575",
"table_path": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv"
},
"name": "NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv",
"description": "",
Expand Down Expand Up @@ -707,7 +708,8 @@
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
"number_of_files": "1",
"size_in_bytes": "1024"
"size_in_bytes": "1024",
"table_path": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro"
},
"name": "chord_progressions_avro.avro",
"description": "",
Expand Down Expand Up @@ -828,7 +830,8 @@
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
"number_of_files": "1",
"size_in_bytes": "604"
"size_in_bytes": "604",
"table_path": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv"
},
"name": "chord_progressions_csv.csv",
"description": "",
Expand Down Expand Up @@ -949,7 +952,8 @@
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
"number_of_files": "1",
"size_in_bytes": "4646"
"size_in_bytes": "4646",
"table_path": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/countries_json.json"
},
"name": "countries_json.json",
"description": "",
Expand Down Expand Up @@ -1046,7 +1050,8 @@
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
"number_of_files": "1",
"size_in_bytes": "4206"
"size_in_bytes": "4206",
"table_path": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet.parquet"
},
"name": "food_parquet.parquet",
"description": "",
Expand Down Expand Up @@ -1167,7 +1172,8 @@
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
"number_of_files": "1",
"size_in_bytes": "172"
"size_in_bytes": "172",
"table_path": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv"
},
"name": "small.csv",
"description": "",
Expand Down Expand Up @@ -1288,7 +1294,8 @@
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
"number_of_files": "1",
"size_in_bytes": "34056"
"size_in_bytes": "34056",
"table_path": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv"
},
"name": "wa_fn_usec_hr_employee_attrition_csv.csv",
"description": "",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
"number_of_files": "1",
"size_in_bytes": "1024"
"size_in_bytes": "1024",
"table_path": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro"
},
"name": "chord_progressions_avro.avro",
"description": "",
Expand Down Expand Up @@ -407,7 +408,8 @@
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
"number_of_files": "1",
"size_in_bytes": "604"
"size_in_bytes": "604",
"table_path": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv"
},
"name": "chord_progressions_csv.csv",
"description": "",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
"number_of_files": "1",
"size_in_bytes": "1024"
"size_in_bytes": "1024",
"table_path": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro"
},
"name": "chord_progressions_avro.avro",
"description": "",
Expand Down

0 comments on commit e70c0ac

Please sign in to comment.