From 20c54e2af21a58d3cbd00d51116594df701dab78 Mon Sep 17 00:00:00 2001 From: treff7es Date: Mon, 5 Aug 2024 21:09:54 +0200 Subject: [PATCH] Update doc --- metadata-ingestion/docs/sources/s3/s3.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/metadata-ingestion/docs/sources/s3/s3.md b/metadata-ingestion/docs/sources/s3/s3.md index 9484cd8de6666e..0367b3a2ce3e1d 100644 --- a/metadata-ingestion/docs/sources/s3/s3.md +++ b/metadata-ingestion/docs/sources/s3/s3.md @@ -75,6 +75,11 @@ Path specs config to ingest folders `orders` and `returns` as datasets: path_specs: - include: s3://test-bucket/{table}/{partition_key[0]}={partition[0]}/{partition_key[1]}={partition[1]}/*.parquet ``` +or with partition auto-detection: +``` +path_specs: + - include: s3://test-bucket/{table}/ +``` One can also use `include: s3://test-bucket/{table}/*/*/*.parquet` here however above format is preferred as it allows declaring partitions explicitly. @@ -150,6 +155,7 @@ Above config has 3 path_specs and will ingest following datasets s3://my-bucket/foo/tests/bar.avro # single file table s3://my-bucket/foo/tests/*.* # mulitple file level tables s3://my-bucket/foo/tests/{table}/*.avro #table without partition +s3://my-bucket/foo/tests/{table}/ #table with partition autodetection. Partition only can be detected if it is in the format of key=value s3://my-bucket/foo/tests/{table}/*/*.avro #table where partitions are not specified s3://my-bucket/foo/tests/{table}/*.* # table where no partitions as well as data type specified s3://my-bucket/{dept}/tests/{table}/*.avro # specifying keywords to be used in display name