diff --git a/crates/iceberg/src/catalog/mod.rs b/crates/iceberg/src/catalog/mod.rs index 708e6bf3c..ad7acf4a2 100644 --- a/crates/iceberg/src/catalog/mod.rs +++ b/crates/iceberg/src/catalog/mod.rs @@ -18,7 +18,8 @@ //! Catalog API for Apache Iceberg use crate::spec::{ - FormatVersion, Schema, Snapshot, SnapshotReference, SortOrder, UnboundPartitionSpec, + FormatVersion, Schema, Snapshot, SnapshotReference, SortOrder, TableMetadataBuilder, + UnboundPartitionSpec, }; use crate::table::Table; use crate::{Error, ErrorKind, Result}; @@ -427,14 +428,24 @@ pub enum TableUpdate { }, } +impl TableUpdate { + /// Applies the update to the table metadata builder. + pub fn apply(self, builder: TableMetadataBuilder) -> Result { + match self { + TableUpdate::AssignUuid { uuid } => builder.assign_uuid(uuid), + _ => unimplemented!(), + } + } +} + #[cfg(test)] mod tests { use crate::spec::{ FormatVersion, NestedField, NullOrder, Operation, PrimitiveType, Schema, Snapshot, SnapshotReference, SnapshotRetention, SortDirection, SortField, SortOrder, Summary, - Transform, Type, UnboundPartitionField, UnboundPartitionSpec, + TableMetadataBuilder, Transform, Type, UnboundPartitionField, UnboundPartitionSpec, }; - use crate::{NamespaceIdent, TableIdent, TableRequirement, TableUpdate}; + use crate::{NamespaceIdent, TableCreation, TableIdent, TableRequirement, TableUpdate}; use serde::de::DeserializeOwned; use serde::Serialize; use std::collections::HashMap; @@ -1065,4 +1076,28 @@ mod tests { test_serde_json(json, update); } + + #[test] + fn test_table_update_apply() { + let table_creation = TableCreation::builder() + .location("s3://db/table".to_string()) + .name("table".to_string()) + .properties(HashMap::new()) + .schema(Schema::builder().build().unwrap()) + .build(); + let table_metadata = TableMetadataBuilder::from_table_creation(table_creation) + .unwrap() + .build() + .unwrap(); + let table_metadata_builder = TableMetadataBuilder::new(table_metadata); + + let uuid = uuid::Uuid::new_v4(); + let update = TableUpdate::AssignUuid { uuid }; + let updated_metadata = update + .apply(table_metadata_builder) + .unwrap() + .build() + .unwrap(); + assert_eq!(updated_metadata.uuid(), uuid); + } } diff --git a/crates/iceberg/src/spec/table_metadata.rs b/crates/iceberg/src/spec/table_metadata.rs index 0ce3e742b..9893e9eea 100644 --- a/crates/iceberg/src/spec/table_metadata.rs +++ b/crates/iceberg/src/spec/table_metadata.rs @@ -32,6 +32,8 @@ use super::{ use _serde::TableMetadataEnum; +use crate::error::Result; +use crate::{Error, ErrorKind, TableCreation}; use chrono::{DateTime, TimeZone, Utc}; static MAIN_BRANCH: &str = "main"; @@ -275,6 +277,82 @@ impl TableMetadata { } } +/// Manipulating table metadata. +pub struct TableMetadataBuilder(TableMetadata); + +impl TableMetadataBuilder { + /// Creates a new table metadata builder from the given table metadata. + pub fn new(origin: TableMetadata) -> Self { + Self(origin) + } + + /// Creates a new table metadata builder from the given table creation. + pub fn from_table_creation(table_creation: TableCreation) -> Result { + let TableCreation { + name: _, + location, + schema, + partition_spec, + sort_order, + properties, + } = table_creation; + + if partition_spec.is_some() { + return Err(Error::new( + ErrorKind::FeatureUnsupported, + "Can't create table with partition spec now", + )); + } + + if sort_order.is_some() { + return Err(Error::new( + ErrorKind::FeatureUnsupported, + "Can't create table with sort order now", + )); + } + + let table_metadata = TableMetadata { + format_version: FormatVersion::V2, + table_uuid: Uuid::new_v4(), + location: location.ok_or_else(|| { + Error::new( + ErrorKind::DataInvalid, + "Can't create table without location", + ) + })?, + last_sequence_number: 0, + last_updated_ms: Utc::now().timestamp_millis(), + last_column_id: schema.highest_field_id(), + current_schema_id: schema.schema_id(), + schemas: HashMap::from([(schema.schema_id(), Arc::new(schema))]), + partition_specs: Default::default(), + default_spec_id: 0, + last_partition_id: 0, + properties, + current_snapshot_id: None, + snapshots: Default::default(), + snapshot_log: vec![], + sort_orders: Default::default(), + metadata_log: vec![], + default_sort_order_id: 0, + refs: Default::default(), + }; + + Ok(Self(table_metadata)) + } + + /// Changes uuid of table metadata. + pub fn assign_uuid(mut self, uuid: Uuid) -> Result { + self.0.table_uuid = uuid; + Ok(self) + } + + /// Returns the new table metadata after changes. + pub fn build(self) -> Result { + Ok(self.0) + } +} + pub(super) mod _serde { /// This is a helper module that defines types to help with serialization/deserialization. /// For deserialization the input first gets read into either the [TableMetadataV1] or [TableMetadataV2] struct @@ -838,13 +916,16 @@ mod tests { use pretty_assertions::assert_eq; - use crate::spec::{ - table_metadata::TableMetadata, NestedField, NullOrder, Operation, PartitionField, - PartitionSpec, PrimitiveType, Schema, Snapshot, SnapshotReference, SnapshotRetention, - SortDirection, SortField, SortOrder, Summary, Transform, Type, + use crate::{ + spec::{ + table_metadata::TableMetadata, NestedField, NullOrder, Operation, PartitionField, + PartitionSpec, PrimitiveType, Schema, Snapshot, SnapshotReference, SnapshotRetention, + SortDirection, SortField, SortOrder, Summary, Transform, Type, + }, + TableCreation, }; - use super::{FormatVersion, MetadataLog, SnapshotLog}; + use super::{FormatVersion, MetadataLog, SnapshotLog, TableMetadataBuilder}; fn check_table_metadata_serde(json: &str, expected_type: TableMetadata) { let desered_type: TableMetadata = serde_json::from_str(json).unwrap(); @@ -1569,4 +1650,46 @@ mod tests { table_meta_data.sort_orders.get(&default_sort_order_id) ) } + + #[test] + fn test_table_metadata_builder_from_table_creation() { + let table_creation = TableCreation::builder() + .location("s3://db/table".to_string()) + .name("table".to_string()) + .properties(HashMap::new()) + .schema(Schema::builder().build().unwrap()) + .build(); + let table_metadata = TableMetadataBuilder::from_table_creation(table_creation) + .unwrap() + .build() + .unwrap(); + assert_eq!(table_metadata.location, "s3://db/table"); + assert_eq!(table_metadata.schemas.len(), 1); + assert_eq!( + table_metadata + .schemas + .get(&0) + .unwrap() + .as_struct() + .fields() + .len(), + 0 + ); + assert_eq!(table_metadata.partition_specs.len(), 0); + assert_eq!(table_metadata.properties.len(), 0); + assert_eq!(table_metadata.sort_orders.len(), 0); + } + + #[test] + fn test_table_builder_from_table_metadata() { + let table_metadata = get_test_table_metadata("TableMetadataV2Valid.json"); + let table_metadata_builder = TableMetadataBuilder::new(table_metadata); + let uuid = Uuid::new_v4(); + let table_metadata = table_metadata_builder + .assign_uuid(uuid) + .unwrap() + .build() + .unwrap(); + assert_eq!(table_metadata.uuid(), uuid); + } }