Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat (static table): implement a read-only table struct loaded from metadata #259

Merged
merged 2 commits into from
Mar 15, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
165 changes: 165 additions & 0 deletions crates/iceberg/src/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
use crate::io::FileIO;
use crate::scan::TableScanBuilder;
use crate::spec::{TableMetadata, TableMetadataRef};
use crate::Result;
use crate::TableIdent;
use futures::AsyncReadExt;
use typed_builder::TypedBuilder;

/// Table represents a table in the catalog.
Expand All @@ -31,6 +33,8 @@ pub struct Table {
#[builder(setter(into))]
metadata: TableMetadataRef,
identifier: TableIdent,
#[builder(default = false)]
readonly: bool,
}

impl Table {
Expand Down Expand Up @@ -62,4 +66,165 @@ impl Table {
pub fn scan(&self) -> TableScanBuilder<'_> {
TableScanBuilder::new(self)
}

/// Returns the flag indicating whether the `Table` is readonly or not
pub fn readonly(&self) -> bool {
self.readonly
}
}

/// `StaticTable` is a read-only table struct that can be created from a metadata file or from `TableMetaData` without a catalog.
/// It can only be used to read metadata and for table scan.
/// # Examples
///
/// ```rust, no_run
/// # use iceberg::io::FileIO;
/// # use iceberg::table::StaticTable;
/// # use iceberg::TableIdent;
/// # async fn example() {
/// let metadata_file_location = "s3://bucket_name/path/to/metadata.json";
/// let file_io = FileIO::from_path(&metadata_file_location).unwrap().build().unwrap();
/// let static_identifier = TableIdent::from_strs(["static_ns", "static_table"]).unwrap();
/// let static_table = StaticTable::from_metadata_file(&metadata_file_location, static_identifier, file_io).await.unwrap();
/// let snapshot_id = static_table
/// .metadata()
/// .current_snapshot()
/// .unwrap()
/// .snapshot_id();
/// # }
/// ```
pub struct StaticTable(Table);
Copy link
Member

@Xuanwo Xuanwo Mar 14, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

static has special meanings in Rust. Have we considered using ReadOnlyTable for greater expressiveness?

cc @Fokko @liurenjie1024

Copy link
Contributor Author

@a-agmon a-agmon Mar 14, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @Xuanwo
I think the idea was to be consistent with the Python API that uses StaticTable for this functionality. I'm also OK with ReadOnlyTable

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


impl StaticTable {
/// Creates a static table from a given `TableMetadata` and `FileIO`
pub async fn from_metadata(
metadata: TableMetadata,
table_ident: TableIdent,
file_io: FileIO,
) -> Result<Self> {
let table = Table::builder()
.metadata(metadata)
.identifier(table_ident)
.file_io(file_io)
.readonly(true)
.build();

Ok(Self(table))
}
a-agmon marked this conversation as resolved.
Show resolved Hide resolved
/// Creates a static table directly from metadata file and `FileIO`
pub async fn from_metadata_file(
metadata_file_path: &str,
table_ident: TableIdent,
file_io: FileIO,
) -> Result<Self> {
let metadata_file = file_io.new_input(metadata_file_path)?;
let mut metadata_file_reader = metadata_file.reader().await?;
let mut metadata_file_content = String::new();
metadata_file_reader
.read_to_string(&mut metadata_file_content)
.await?;
let table_metadata = serde_json::from_str::<TableMetadata>(&metadata_file_content)?;
Self::from_metadata(table_metadata, table_ident, file_io).await
}

/// Create a TableScanBuilder for the static table.
pub fn scan(&self) -> TableScanBuilder<'_> {
self.0.scan()
}

/// Get TableMetadataRef for the static table
pub fn metadata(&self) -> TableMetadataRef {
self.0.metadata_ref()
}

/// Consumes the `StaticTable` and return it as a `Table`
/// Please use this method carefully as the Table it returns remains detached from a catalog
/// and can't be used to perform modifications on the table.
pub fn into_table(self) -> Table {
self.0
}
}

#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn test_static_table_from_file() {
let metadata_file_name = "TableMetadataV2Valid.json";
let metadata_file_path = format!(
"{}/testdata/table_metadata/{}",
env!("CARGO_MANIFEST_DIR"),
metadata_file_name
);
let file_io = FileIO::from_path(&metadata_file_path)
.unwrap()
.build()
.unwrap();
let static_identifier = TableIdent::from_strs(["static_ns", "static_table"]).unwrap();
let static_table =
StaticTable::from_metadata_file(&metadata_file_path, static_identifier, file_io)
.await
.unwrap();
let snapshot_id = static_table
.metadata()
.current_snapshot()
.unwrap()
.snapshot_id();
assert_eq!(
snapshot_id, 3055729675574597004,
"snapshot id from metadata don't match"
);
}

#[tokio::test]
async fn test_static_into_table() {
let metadata_file_name = "TableMetadataV2Valid.json";
let metadata_file_path = format!(
"{}/testdata/table_metadata/{}",
env!("CARGO_MANIFEST_DIR"),
metadata_file_name
);
let file_io = FileIO::from_path(&metadata_file_path)
.unwrap()
.build()
.unwrap();
let static_identifier = TableIdent::from_strs(["static_ns", "static_table"]).unwrap();
let static_table =
StaticTable::from_metadata_file(&metadata_file_path, static_identifier, file_io)
.await
.unwrap();
let table = static_table.into_table();
assert!(table.readonly());
assert_eq!(table.identifier.name(), "static_table");
}

#[tokio::test]
async fn test_table_readonly_flag() {
let metadata_file_name = "TableMetadataV2Valid.json";
let metadata_file_path = format!(
"{}/testdata/table_metadata/{}",
env!("CARGO_MANIFEST_DIR"),
metadata_file_name
);
let file_io = FileIO::from_path(&metadata_file_path)
.unwrap()
.build()
.unwrap();
let metadata_file = file_io.new_input(metadata_file_path).unwrap();
let mut metadata_file_reader = metadata_file.reader().await.unwrap();
let mut metadata_file_content = String::new();
metadata_file_reader
.read_to_string(&mut metadata_file_content)
.await
.unwrap();
let table_metadata = serde_json::from_str::<TableMetadata>(&metadata_file_content).unwrap();
let static_identifier = TableIdent::from_strs(["ns", "table"]).unwrap();
let table = Table::builder()
.metadata(table_metadata)
.identifier(static_identifier)
.file_io(file_io)
.build();
assert!(!table.readonly());
assert_eq!(table.identifier.name(), "table");
}
}
Loading