From 7e33f96bae865d26552984d0343be9f26b099116 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Sat, 14 Dec 2024 18:16:16 +0800 Subject: [PATCH 1/2] feat: Store file io props to allow re-build it Signed-off-by: Xuanwo --- crates/iceberg/src/io/file_io.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/crates/iceberg/src/io/file_io.rs b/crates/iceberg/src/io/file_io.rs index 8365d622c..2a042410e 100644 --- a/crates/iceberg/src/io/file_io.rs +++ b/crates/iceberg/src/io/file_io.rs @@ -43,10 +43,21 @@ use crate::{Error, ErrorKind, Result}; /// | GCS | `storage-gcs` | `gcs` | #[derive(Clone, Debug)] pub struct FileIO { + scheme: String, + props: HashMap, + inner: Arc, } impl FileIO { + /// Split file IO into scheme and props which used to build this FileIO. + /// + /// This function is useful when you want serialize and deserialize FileIO across + /// distributed systems. + pub fn into_props(self) -> (String, HashMap) { + (self.scheme, self.props) + } + /// Try to infer file io scheme from path. See [`FileIO`] for supported schemes. /// /// - If it's a valid url, for example `s3://bucket/a`, url scheme will be used, and the rest of the url will be ignored. @@ -187,8 +198,12 @@ impl FileIOBuilder { /// Builds [`FileIO`]. pub fn build(self) -> crate::Result { + let scheme = self.scheme_str.clone().unwrap_or_default(); + let props = self.props.clone(); let storage = Storage::build(self)?; Ok(FileIO { + scheme, + props, inner: Arc::new(storage), }) } From 1569979e58d5dd694a12d0947b36913f55259549 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Sat, 14 Dec 2024 20:49:43 +0800 Subject: [PATCH 2/2] Store file io builder instead Signed-off-by: Xuanwo --- crates/iceberg/src/io/file_io.rs | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/crates/iceberg/src/io/file_io.rs b/crates/iceberg/src/io/file_io.rs index 2a042410e..7eaa6aa6d 100644 --- a/crates/iceberg/src/io/file_io.rs +++ b/crates/iceberg/src/io/file_io.rs @@ -43,19 +43,18 @@ use crate::{Error, ErrorKind, Result}; /// | GCS | `storage-gcs` | `gcs` | #[derive(Clone, Debug)] pub struct FileIO { - scheme: String, - props: HashMap, + builder: FileIOBuilder, inner: Arc, } impl FileIO { - /// Split file IO into scheme and props which used to build this FileIO. + /// Convert FileIO into [`FileIOBuilder`] which used to build this FileIO. /// /// This function is useful when you want serialize and deserialize FileIO across /// distributed systems. - pub fn into_props(self) -> (String, HashMap) { - (self.scheme, self.props) + pub fn into_builder(self) -> FileIOBuilder { + self.builder } /// Try to infer file io scheme from path. See [`FileIO`] for supported schemes. @@ -145,7 +144,7 @@ impl FileIO { } /// Builder for [`FileIO`]. -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct FileIOBuilder { /// This is used to infer scheme of operator. /// @@ -176,7 +175,7 @@ impl FileIOBuilder { /// Fetch the scheme string. /// /// The scheme_str will be empty if it's None. - pub(crate) fn into_parts(self) -> (String, HashMap) { + pub fn into_parts(self) -> (String, HashMap) { (self.scheme_str.unwrap_or_default(), self.props) } @@ -197,13 +196,10 @@ impl FileIOBuilder { } /// Builds [`FileIO`]. - pub fn build(self) -> crate::Result { - let scheme = self.scheme_str.clone().unwrap_or_default(); - let props = self.props.clone(); - let storage = Storage::build(self)?; + pub fn build(self) -> Result { + let storage = Storage::build(self.clone())?; Ok(FileIO { - scheme, - props, + builder: self, inner: Arc::new(storage), }) }