From af460378ba6ec0eee6470d4438eaa7e044aea66b Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 4 Aug 2023 17:54:44 +0800 Subject: [PATCH 1/9] rfc: List With Metakey Signed-off-by: Xuanwo --- core/src/docs/rfcs/0000_list_query.md | 121 ++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 core/src/docs/rfcs/0000_list_query.md diff --git a/core/src/docs/rfcs/0000_list_query.md b/core/src/docs/rfcs/0000_list_query.md new file mode 100644 index 000000000000..8ff1e1cd61ab --- /dev/null +++ b/core/src/docs/rfcs/0000_list_query.md @@ -0,0 +1,121 @@ +- Proposal Name: `list_with_metakey` +- Start Date: 2023-08-17 +- RFC PR: [apache/incubator-opendal#0000](https://github.com/apache/incubator-opendal/pull/0000) +- Tracking Issue: [apache/incubator-opendal#0000](https://github.com/apache/incubator-opendal/issues/0000) + +# Summary + +Move `metadata` API to `Lister` to simplify the usage. + +# Motivation + +The current `Entry` metadata API is: + +```rust +use opendal::Entry; +use opendal::Metakey; + +let meta = op + .metadata(&entry, Metakey::ContentLength | Metakey::ContentType) + .await?; +``` + +This API is difficult to understand and rarely used correctly. And in reality, users always fetch the same set of metadata during listing. + +Take one of our users code as an example: + +```rust +let stream = self + .inner + .scan(&path) + .await + .map_err(|err| format_object_store_error(err, &path))?; + +let stream = stream.then(|res| async { + let entry = res.map_err(|err| format_object_store_error(err, ""))?; + let meta = self + .inner + .metadata(&entry, Metakey::ContentLength | Metakey::LastModified) + .await + .map_err(|err| format_object_store_error(err, entry.path()))?; + + Ok(format_object_meta(entry.path(), &meta)) +}); + +Ok(stream.boxed()) +``` + +By moving metadata to `lister`, our user code can be simplified to: + +```rust +let stream = self + .inner + .scan_with(&path) + .metakey(Metakey::ContentLength | Metakey::LastModified) + .await + .map_err(|err| format_object_store_error(err, &path))?; + +let stream = stream.then(|res| async { + let entry = res.map_err(|err| format_object_store_error(err, ""))?; + let meta = entry.into_metadata() + + Ok(format_object_meta(entry.path(), &meta)) +}); + +Ok(stream.boxed()) +``` + +By introducing this change: + +- Users don't need to capture `Operator` in the closure. +- Users don't need to do async call like `metadata()` again. + +# Guide-level explanation + +The new API will be: + +```rust +let entries: Vec = op + .list_with("dir") + .metakey(Metakey::ContentLength | Metakey::ContentType).await?; + +let meta: &Metadata = entries[0].metadata(); +``` + +Metadata can be queried directly when listing entries via `metadata()`, and later extracted via `into_parts()`. + +# Reference-level explanation + +We will add `metakey` into `OpList`. Underlying services can use those information to try their best to fetch the metadata. + +There are following possibilities: + +- The entry metadata is met: `Lister` return the entry directly +- The entry metadata is not met and not fully filled: `Lister` will try to send `stat` call to fetch the metadata +- The entry metadata is not met and fully filled: `Lister` will return the entry directly. + +To make sure we can handle all metadata correctly, we will add a new capability called `stat_max_metakey`. This capability will be used to indicate the maximum number of metadata that can be fetched via `stat` call. `Lister` can use this capability to decide whether to send `stat` call or not. + +Services' lister implementation should not changed. + +# Drawbacks + +None + +# Rationale and alternatives + +Keeping the complex standalone API has limited benefit given low usage. + +# Prior art + +None + +# Unresolved questions + +None + +# Future possibilities + +## Add glob and regex support for Lister + +We can add `glob` and `regex` support for `Lister` to make it more powerful. From b9d62332ce0df2367ff455773ba9ca08efd438cc Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 4 Aug 2023 17:55:24 +0800 Subject: [PATCH 2/9] Rename Signed-off-by: Xuanwo --- .../docs/rfcs/{0000_list_query.md => 0000_list_with_metakey.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename core/src/docs/rfcs/{0000_list_query.md => 0000_list_with_metakey.md} (100%) diff --git a/core/src/docs/rfcs/0000_list_query.md b/core/src/docs/rfcs/0000_list_with_metakey.md similarity index 100% rename from core/src/docs/rfcs/0000_list_query.md rename to core/src/docs/rfcs/0000_list_with_metakey.md From ab47289795eedde03454d91c7db7f4c06805cba4 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 4 Aug 2023 17:57:23 +0800 Subject: [PATCH 3/9] assign number Signed-off-by: Xuanwo --- .../{0000_list_with_metakey.md => 2779_list_with_metakey.md} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename core/src/docs/rfcs/{0000_list_with_metakey.md => 2779_list_with_metakey.md} (97%) diff --git a/core/src/docs/rfcs/0000_list_with_metakey.md b/core/src/docs/rfcs/2779_list_with_metakey.md similarity index 97% rename from core/src/docs/rfcs/0000_list_with_metakey.md rename to core/src/docs/rfcs/2779_list_with_metakey.md index 8ff1e1cd61ab..b0acfef52a07 100644 --- a/core/src/docs/rfcs/0000_list_with_metakey.md +++ b/core/src/docs/rfcs/2779_list_with_metakey.md @@ -1,6 +1,6 @@ - Proposal Name: `list_with_metakey` - Start Date: 2023-08-17 -- RFC PR: [apache/incubator-opendal#0000](https://github.com/apache/incubator-opendal/pull/0000) +- RFC PR: [apache/incubator-opendal#2779](https://github.com/apache/incubator-opendal/pull/2779) - Tracking Issue: [apache/incubator-opendal#0000](https://github.com/apache/incubator-opendal/issues/0000) # Summary From f60c0237258a3042d7efa4774cdc5b71b99b20e8 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 4 Aug 2023 17:57:35 +0800 Subject: [PATCH 4/9] Fix typo Signed-off-by: Xuanwo --- core/src/docs/rfcs/mod.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/core/src/docs/rfcs/mod.rs b/core/src/docs/rfcs/mod.rs index 8f7c2be50971..e5e58f4a8e54 100644 --- a/core/src/docs/rfcs/mod.rs +++ b/core/src/docs/rfcs/mod.rs @@ -151,3 +151,6 @@ pub mod rfc_2758_merge_append_into_write {} #[doc = include_str!("2774_lister_api.md")] pub mod rfc_2774_lister_api {} + +#[doc = include_str!("2779_list_with_metakey.md")] +pub mod rfc_2779_list_with_metakey {} From d44b4c4f5a84d461aaebffe7bf65fa839e3f798f Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 4 Aug 2023 18:02:47 +0800 Subject: [PATCH 5/9] Fix date Signed-off-by: Xuanwo --- core/src/docs/rfcs/2779_list_with_metakey.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/docs/rfcs/2779_list_with_metakey.md b/core/src/docs/rfcs/2779_list_with_metakey.md index b0acfef52a07..921a7bc3eb31 100644 --- a/core/src/docs/rfcs/2779_list_with_metakey.md +++ b/core/src/docs/rfcs/2779_list_with_metakey.md @@ -1,5 +1,5 @@ - Proposal Name: `list_with_metakey` -- Start Date: 2023-08-17 +- Start Date: 2023-08-04 - RFC PR: [apache/incubator-opendal#2779](https://github.com/apache/incubator-opendal/pull/2779) - Tracking Issue: [apache/incubator-opendal#0000](https://github.com/apache/incubator-opendal/issues/0000) From 45c03a94ba01806abd9aa81cd9501d2883488145 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 4 Aug 2023 20:43:29 +0800 Subject: [PATCH 6/9] Fix summary Signed-off-by: Xuanwo --- core/src/docs/rfcs/2779_list_with_metakey.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/docs/rfcs/2779_list_with_metakey.md b/core/src/docs/rfcs/2779_list_with_metakey.md index 921a7bc3eb31..c37e11487da9 100644 --- a/core/src/docs/rfcs/2779_list_with_metakey.md +++ b/core/src/docs/rfcs/2779_list_with_metakey.md @@ -5,7 +5,7 @@ # Summary -Move `metadata` API to `Lister` to simplify the usage. +Move `Operator` `metadata` API to `list_with().metakey()` to simplify the usage. # Motivation From 5606a7013d155198a6871c8c405d7aeadbdf3e7b Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 4 Aug 2023 20:47:27 +0800 Subject: [PATCH 7/9] Fix Signed-off-by: Xuanwo --- core/src/docs/rfcs/2779_list_with_metakey.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/docs/rfcs/2779_list_with_metakey.md b/core/src/docs/rfcs/2779_list_with_metakey.md index c37e11487da9..2e182e46c083 100644 --- a/core/src/docs/rfcs/2779_list_with_metakey.md +++ b/core/src/docs/rfcs/2779_list_with_metakey.md @@ -94,7 +94,7 @@ There are following possibilities: - The entry metadata is not met and not fully filled: `Lister` will try to send `stat` call to fetch the metadata - The entry metadata is not met and fully filled: `Lister` will return the entry directly. -To make sure we can handle all metadata correctly, we will add a new capability called `stat_max_metakey`. This capability will be used to indicate the maximum number of metadata that can be fetched via `stat` call. `Lister` can use this capability to decide whether to send `stat` call or not. +To make sure we can handle all metadata correctly, we will add a new capability called `stat_complete_metakey`. This capability will be used to indicate the complete set of metadata that can be fetched via `stat` call. For example, `s3` can set this capability to `ContentLength | ContentType | LastModified | ...`, and `fs` only have `ContentLength | LastModified`. `Lister` can use this capability to decide whether to send `stat` call or not. Services' lister implementation should not changed. From 86c28658579d92224112cdd0d4004b2f532b184f Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 4 Aug 2023 20:53:52 +0800 Subject: [PATCH 8/9] Fix Signed-off-by: Xuanwo --- core/src/docs/rfcs/2779_list_with_metakey.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/core/src/docs/rfcs/2779_list_with_metakey.md b/core/src/docs/rfcs/2779_list_with_metakey.md index 2e182e46c083..43d94ef10960 100644 --- a/core/src/docs/rfcs/2779_list_with_metakey.md +++ b/core/src/docs/rfcs/2779_list_with_metakey.md @@ -70,6 +70,11 @@ By introducing this change: - Users don't need to capture `Operator` in the closure. - Users don't need to do async call like `metadata()` again. +If we don't have this change: + +- every place that could receive a `fn()` must use `Fn()` instead which enforce users to have a generic parameter in their code. +- It's harder for other languages binding to implement `op.metadata()` right. + # Guide-level explanation The new API will be: From af87ca009bd4e5259ff20677495e1811bdb4a84b Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Mon, 7 Aug 2023 14:02:17 +0800 Subject: [PATCH 9/9] Add more context on how metakey works Signed-off-by: Xuanwo --- core/src/docs/rfcs/2779_list_with_metakey.md | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/core/src/docs/rfcs/2779_list_with_metakey.md b/core/src/docs/rfcs/2779_list_with_metakey.md index 43d94ef10960..783e3cb5b3db 100644 --- a/core/src/docs/rfcs/2779_list_with_metakey.md +++ b/core/src/docs/rfcs/2779_list_with_metakey.md @@ -1,7 +1,7 @@ - Proposal Name: `list_with_metakey` - Start Date: 2023-08-04 - RFC PR: [apache/incubator-opendal#2779](https://github.com/apache/incubator-opendal/pull/2779) -- Tracking Issue: [apache/incubator-opendal#0000](https://github.com/apache/incubator-opendal/issues/0000) +- Tracking Issue: [apache/incubator-opendal#2802](https://github.com/apache/incubator-opendal/issues/2802) # Summary @@ -91,6 +91,23 @@ Metadata can be queried directly when listing entries via `metadata()`, and late # Reference-level explanation +## How metakey works + +For every services, `stat` will return the full set of it's metadata. For example, `s3` will return `ContentLength | ContentType | LastModified | ...`, and `fs` will return `ContentLength | LastModified`. And most services will return part of those metadata during `list`. `s3` will return `ContentLength`, `LastModified`, but `fs` returns none of them. + +So when users use `list` to list entries, they will get a list of entries with incomplete metadata. The metadata could be in three states: + +- Filled: the metadata is returned in `list` +- NotExist: the metadata is not supported by service. +- Unknown: the metadata is supported by service but not returned in `list`. + +By accept `metakey`, we can compare the returning entry's metadata with metakey: + +- Return the entry if metakey already met by `Filled` and `NotExist`. +- Send `stat` call to fetch the metadata if metadata is `Unknown`. + +## Changes + We will add `metakey` into `OpList`. Underlying services can use those information to try their best to fetch the metadata. There are following possibilities: