Skip to content

Commit

Permalink
Follow naming convention from Iceberg's Java and Python implementatio…
Browse files Browse the repository at this point in the history
…ns (apache#204)
  • Loading branch information
s-akhtar-baig authored and shaeqahmed committed Dec 9, 2024
1 parent 0f531a4 commit b33d3fd
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 49 deletions.
12 changes: 6 additions & 6 deletions crates/iceberg/src/spec/manifest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
use self::_const_schema::{manifest_schema_v1, manifest_schema_v2};

use super::{
FieldSummary, FormatVersion, ManifestContentType, ManifestListEntry, PartitionSpec, Schema,
FieldSummary, FormatVersion, ManifestContentType, ManifestFile, PartitionSpec, Schema,
SchemaId, Struct, INITIAL_SEQUENCE_NUMBER,
};
use super::{Literal, UNASSIGNED_SEQUENCE_NUMBER};
Expand Down Expand Up @@ -196,7 +196,7 @@ impl ManifestWriter {
}

/// Write a manifest entry.
pub async fn write(mut self, manifest: Manifest) -> Result<ManifestListEntry> {
pub async fn write(mut self, manifest: Manifest) -> Result<ManifestFile> {
// Create the avro writer
let partition_type = manifest
.metadata
Expand Down Expand Up @@ -302,7 +302,7 @@ impl ManifestWriter {
let partition_summary =
self.get_field_summary_vec(&manifest.metadata.partition_spec.fields);

Ok(ManifestListEntry {
Ok(ManifestFile {
manifest_path: self.output.location().to_string(),
manifest_length: length as i64,
partition_spec_id: manifest.metadata.partition_spec.spec_id,
Expand Down Expand Up @@ -861,7 +861,7 @@ impl ManifestEntry {
}

/// Inherit data from manifest list, such as snapshot id, sequence number.
pub(crate) fn inherit_data(&mut self, snapshot_entry: &ManifestListEntry) {
pub(crate) fn inherit_data(&mut self, snapshot_entry: &ManifestFile) {
if self.snapshot_id.is_none() {
self.snapshot_id = Some(snapshot_entry.added_snapshot_id);
}
Expand Down Expand Up @@ -1981,7 +1981,7 @@ mod tests {
async fn test_manifest_read_write(
manifest: Manifest,
writer_builder: impl FnOnce(OutputFile) -> ManifestWriter,
) -> ManifestListEntry {
) -> ManifestFile {
let (bs, res) = write_manifest(&manifest, writer_builder).await;
let actual_manifest = Manifest::parse_avro(bs.as_slice()).unwrap();

Expand All @@ -1993,7 +1993,7 @@ mod tests {
async fn write_manifest(
manifest: &Manifest,
writer_builder: impl FnOnce(OutputFile) -> ManifestWriter,
) -> (Vec<u8>, ManifestListEntry) {
) -> (Vec<u8>, ManifestFile) {
let temp_dir = TempDir::new().unwrap();
let path = temp_dir.path().join("test_manifest.avro");
let io = FileIOBuilder::new_fs_io().build().unwrap();
Expand Down
86 changes: 43 additions & 43 deletions crates/iceberg/src/spec/manifest_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use futures::{AsyncReadExt, AsyncWriteExt};

use self::{
_const_schema::{MANIFEST_LIST_AVRO_SCHEMA_V1, MANIFEST_LIST_AVRO_SCHEMA_V2},
_serde::{ManifestListEntryV1, ManifestListEntryV2},
_serde::{ManifestFileV1, ManifestFileV2},
};

use super::{FormatVersion, Manifest, StructType};
Expand All @@ -51,7 +51,7 @@ pub const UNASSIGNED_SEQUENCE_NUMBER: i64 = -1;
#[derive(Debug, Clone, PartialEq)]
pub struct ManifestList {
/// Entries in a manifest list.
entries: Vec<ManifestListEntry>,
entries: Vec<ManifestFile>,
}

impl ManifestList {
Expand All @@ -76,7 +76,7 @@ impl ManifestList {
}

/// Get the entries in the manifest list.
pub fn entries(&self) -> &[ManifestListEntry] {
pub fn entries(&self) -> &[ManifestFile] {
&self.entries
}
}
Expand Down Expand Up @@ -168,12 +168,12 @@ impl ManifestListWriter {
/// Append manifest entries to be written.
pub fn add_manifest_entries(
&mut self,
manifest_entries: impl Iterator<Item = ManifestListEntry>,
manifest_entries: impl Iterator<Item = ManifestFile>,
) -> Result<()> {
match self.format_version {
FormatVersion::V1 => {
for manifest_entry in manifest_entries {
let manifest_entry: ManifestListEntryV1 = manifest_entry.try_into()?;
let manifest_entry: ManifestFileV1 = manifest_entry.try_into()?;
self.avro_writer.append_ser(manifest_entry)?;
}
}
Expand Down Expand Up @@ -203,7 +203,7 @@ impl ManifestListWriter {
}
manifest_entry.min_sequence_number = self.sequence_number;
}
let manifest_entry: ManifestListEntryV2 = manifest_entry.try_into()?;
let manifest_entry: ManifestFileV2 = manifest_entry.try_into()?;
self.avro_writer.append_ser(manifest_entry)?;
}
}
Expand Down Expand Up @@ -503,7 +503,7 @@ mod _const_schema {

/// Entry in a manifest list.
#[derive(Debug, PartialEq, Clone)]
pub struct ManifestListEntry {
pub struct ManifestFile {
/// field: 500
///
/// Location of the manifest file
Expand Down Expand Up @@ -630,7 +630,7 @@ impl TryFrom<i32> for ManifestContentType {
}
}

impl ManifestListEntry {
impl ManifestFile {
/// Load [`Manifest`].
///
/// This method will also initialize inherited values of [`ManifestEntry`], such as `sequence_number`.
Expand Down Expand Up @@ -679,9 +679,9 @@ pub struct FieldSummary {
}

/// This is a helper module that defines types to help with serialization/deserialization.
/// For deserialization the input first gets read into either the [ManifestListEntryV1] or [ManifestListEntryV2] struct
/// and then converted into the [ManifestListEntry] struct. Serialization works the other way around.
/// [ManifestListEntryV1] and [ManifestListEntryV2] are internal struct that are only used for serialization and deserialization.
/// For deserialization the input first gets read into either the [ManifestFileV1] or [ManifestFileV2] struct
/// and then converted into the [ManifestFile] struct. Serialization works the other way around.
/// [ManifestFileV1] and [ManifestFileV2] are internal struct that are only used for serialization and deserialization.
pub(super) mod _serde {
use crate::{
spec::{Literal, StructType, Type},
Expand All @@ -690,19 +690,19 @@ pub(super) mod _serde {
pub use serde_bytes::ByteBuf;
use serde_derive::{Deserialize, Serialize};

use super::ManifestListEntry;
use super::ManifestFile;
use crate::error::Result;

#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
#[serde(transparent)]
pub(crate) struct ManifestListV2 {
entries: Vec<ManifestListEntryV2>,
entries: Vec<ManifestFileV2>,
}

#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
#[serde(transparent)]
pub(crate) struct ManifestListV1 {
entries: Vec<ManifestListEntryV1>,
entries: Vec<ManifestFileV1>,
}

impl ManifestListV2 {
Expand Down Expand Up @@ -790,7 +790,7 @@ pub(super) mod _serde {
}

#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
pub(super) struct ManifestListEntryV1 {
pub(super) struct ManifestFileV1 {
pub manifest_path: String,
pub manifest_length: i64,
pub partition_spec_id: i32,
Expand All @@ -806,7 +806,7 @@ pub(super) mod _serde {
}

#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
pub(super) struct ManifestListEntryV2 {
pub(super) struct ManifestFileV2 {
pub manifest_path: String,
pub manifest_length: i64,
pub partition_spec_id: i32,
Expand Down Expand Up @@ -885,12 +885,12 @@ pub(super) mod _serde {
}
}

impl ManifestListEntryV2 {
/// Converts the [ManifestListEntryV2] into a [ManifestListEntry].
impl ManifestFileV2 {
/// Converts the [ManifestFileV2] into a [ManifestFile].
/// The convert of [partitions] need the partition_type info so use this function instead of [std::TryFrom] trait.
pub fn try_into(self, partition_type: Option<&StructType>) -> Result<ManifestListEntry> {
pub fn try_into(self, partition_type: Option<&StructType>) -> Result<ManifestFile> {
let partitions = try_convert_to_field_summary(self.partitions, partition_type)?;
Ok(ManifestListEntry {
Ok(ManifestFile {
manifest_path: self.manifest_path,
manifest_length: self.manifest_length,
partition_spec_id: self.partition_spec_id,
Expand All @@ -910,12 +910,12 @@ pub(super) mod _serde {
}
}

impl ManifestListEntryV1 {
/// Converts the [ManifestListEntryV1] into a [ManifestListEntry].
impl ManifestFileV1 {
/// Converts the [MManifestFileV1] into a [ManifestFile].
/// The convert of [partitions] need the partition_type info so use this function instead of [std::TryFrom] trait.
pub fn try_into(self, partition_type: Option<&StructType>) -> Result<ManifestListEntry> {
pub fn try_into(self, partition_type: Option<&StructType>) -> Result<ManifestFile> {
let partitions = try_convert_to_field_summary(self.partitions, partition_type)?;
Ok(ManifestListEntry {
Ok(ManifestFile {
manifest_path: self.manifest_path,
manifest_length: self.manifest_length,
partition_spec_id: self.partition_spec_id,
Expand Down Expand Up @@ -977,10 +977,10 @@ pub(super) mod _serde {
}
}

impl TryFrom<ManifestListEntry> for ManifestListEntryV2 {
impl TryFrom<ManifestFile> for ManifestFileV2 {
type Error = Error;

fn try_from(value: ManifestListEntry) -> std::result::Result<Self, Self::Error> {
fn try_from(value: ManifestFile) -> std::result::Result<Self, Self::Error> {
let partitions = convert_to_serde_field_summary(value.partitions);
let key_metadata = convert_to_serde_key_metadata(value.key_metadata);
Ok(Self {
Expand All @@ -996,7 +996,7 @@ pub(super) mod _serde {
.ok_or_else(|| {
Error::new(
crate::ErrorKind::DataInvalid,
"added_data_files_count in ManifestListEntryV2 should be require",
"added_data_files_count in ManifestFileV2 should be require",
)
})?
.try_into()?,
Expand All @@ -1005,7 +1005,7 @@ pub(super) mod _serde {
.ok_or_else(|| {
Error::new(
crate::ErrorKind::DataInvalid,
"existing_data_files_count in ManifestListEntryV2 should be require",
"existing_data_files_count in ManifestFileV2 should be require",
)
})?
.try_into()?,
Expand All @@ -1014,7 +1014,7 @@ pub(super) mod _serde {
.ok_or_else(|| {
Error::new(
crate::ErrorKind::DataInvalid,
"deleted_data_files_count in ManifestListEntryV2 should be require",
"deleted_data_files_count in ManifestFileV2 should be require",
)
})?
.try_into()?,
Expand All @@ -1023,7 +1023,7 @@ pub(super) mod _serde {
.ok_or_else(|| {
Error::new(
crate::ErrorKind::DataInvalid,
"added_rows_count in ManifestListEntryV2 should be require",
"added_rows_count in ManifestFileV2 should be require",
)
})?
.try_into()?,
Expand All @@ -1032,7 +1032,7 @@ pub(super) mod _serde {
.ok_or_else(|| {
Error::new(
crate::ErrorKind::DataInvalid,
"existing_rows_count in ManifestListEntryV2 should be require",
"existing_rows_count in ManifestFileV2 should be require",
)
})?
.try_into()?,
Expand All @@ -1041,7 +1041,7 @@ pub(super) mod _serde {
.ok_or_else(|| {
Error::new(
crate::ErrorKind::DataInvalid,
"deleted_rows_count in ManifestListEntryV2 should be require",
"deleted_rows_count in ManifestFileV2 should be require",
)
})?
.try_into()?,
Expand All @@ -1051,10 +1051,10 @@ pub(super) mod _serde {
}
}

impl TryFrom<ManifestListEntry> for ManifestListEntryV1 {
impl TryFrom<ManifestFile> for ManifestFileV1 {
type Error = Error;

fn try_from(value: ManifestListEntry) -> std::result::Result<Self, Self::Error> {
fn try_from(value: ManifestFile) -> std::result::Result<Self, Self::Error> {
let partitions = convert_to_serde_field_summary(value.partitions);
let key_metadata = convert_to_serde_key_metadata(value.key_metadata);
Ok(Self {
Expand Down Expand Up @@ -1100,7 +1100,7 @@ mod test {
io::FileIOBuilder,
spec::{
manifest_list::{_serde::ManifestListV1, UNASSIGNED_SEQUENCE_NUMBER},
FieldSummary, Literal, ManifestContentType, ManifestList, ManifestListEntry,
FieldSummary, Literal, ManifestContentType, ManifestFile, ManifestList,
ManifestListWriter, NestedField, PrimitiveType, StructType, Type,
},
};
Expand All @@ -1111,7 +1111,7 @@ mod test {
async fn test_parse_manifest_list_v1() {
let manifest_list = ManifestList {
entries: vec![
ManifestListEntry {
ManifestFile {
manifest_path: "/opt/bitnami/spark/warehouse/db/table/metadata/10d28031-9739-484c-92db-cdf2975cead4-m0.avro".to_string(),
manifest_length: 5806,
partition_spec_id: 0,
Expand Down Expand Up @@ -1161,7 +1161,7 @@ mod test {
async fn test_parse_manifest_list_v2() {
let manifest_list = ManifestList {
entries: vec![
ManifestListEntry {
ManifestFile {
manifest_path: "s3a://icebergdata/demo/s1/t1/metadata/05ffe08b-810f-49b3-a8f4-e88fc99b254a-m0.avro".to_string(),
manifest_length: 6926,
partition_spec_id: 1,
Expand All @@ -1178,7 +1178,7 @@ mod test {
partitions: vec![FieldSummary { contains_null: false, contains_nan: Some(false), lower_bound: Some(Literal::long(1)), upper_bound: Some(Literal::long(1))}],
key_metadata: vec![],
},
ManifestListEntry {
ManifestFile {
manifest_path: "s3a://icebergdata/demo/s1/t1/metadata/05ffe08b-810f-49b3-a8f4-e88fc99b254a-m1.avro".to_string(),
manifest_length: 6926,
partition_spec_id: 2,
Expand Down Expand Up @@ -1249,7 +1249,7 @@ mod test {
#[test]
fn test_serialize_manifest_list_v1() {
let manifest_list:ManifestListV1 = ManifestList {
entries: vec![ManifestListEntry {
entries: vec![ManifestFile {
manifest_path: "/opt/bitnami/spark/warehouse/db/table/metadata/10d28031-9739-484c-92db-cdf2975cead4-m0.avro".to_string(),
manifest_length: 5806,
partition_spec_id: 0,
Expand Down Expand Up @@ -1277,7 +1277,7 @@ mod test {
#[test]
fn test_serialize_manifest_list_v2() {
let manifest_list:ManifestListV2 = ManifestList {
entries: vec![ManifestListEntry {
entries: vec![ManifestFile {
manifest_path: "s3a://icebergdata/demo/s1/t1/metadata/05ffe08b-810f-49b3-a8f4-e88fc99b254a-m0.avro".to_string(),
manifest_length: 6926,
partition_spec_id: 1,
Expand Down Expand Up @@ -1305,7 +1305,7 @@ mod test {
#[tokio::test]
async fn test_manifest_list_writer_v1() {
let expected_manifest_list = ManifestList {
entries: vec![ManifestListEntry {
entries: vec![ManifestFile {
manifest_path: "/opt/bitnami/spark/warehouse/db/table/metadata/10d28031-9739-484c-92db-cdf2975cead4-m0.avro".to_string(),
manifest_length: 5806,
partition_spec_id: 1,
Expand Down Expand Up @@ -1361,7 +1361,7 @@ mod test {
let snapshot_id = 377075049360453639;
let seq_num = 1;
let mut expected_manifest_list = ManifestList {
entries: vec![ManifestListEntry {
entries: vec![ManifestFile {
manifest_path: "s3a://icebergdata/demo/s1/t1/metadata/05ffe08b-810f-49b3-a8f4-e88fc99b254a-m0.avro".to_string(),
manifest_length: 6926,
partition_spec_id: 1,
Expand Down Expand Up @@ -1415,7 +1415,7 @@ mod test {
#[tokio::test]
async fn test_manifest_list_writer_v1_as_v2() {
let expected_manifest_list = ManifestList {
entries: vec![ManifestListEntry {
entries: vec![ManifestFile {
manifest_path: "/opt/bitnami/spark/warehouse/db/table/metadata/10d28031-9739-484c-92db-cdf2975cead4-m0.avro".to_string(),
manifest_length: 5806,
partition_spec_id: 1,
Expand Down

0 comments on commit b33d3fd

Please sign in to comment.