Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Set field IDs. #417

Merged
merged 7 commits into from
Jan 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions rust/src/arrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,10 @@
//!
//! To improve Arrow-RS egonomitic

use arrow_array::types::UInt8Type;
use arrow_array::{
Array, FixedSizeBinaryArray, FixedSizeListArray, Int32Array, ListArray, UInt8Array,
};
use arrow_data::{ArrayData, ArrayDataBuilder};
use arrow_schema::DataType::FixedSizeBinary;
use arrow_data::ArrayDataBuilder;
use arrow_schema::{DataType, Field};

use crate::error::Result;
Expand Down
71 changes: 65 additions & 6 deletions rust/src/datatypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,18 @@ impl Field {
)
}

/// Recursively set field ID and parent ID for this field and all its children.
fn set_id(&mut self, parent_id: i32, id_seed: &mut i32) {
self.parent_id = parent_id;
if self.id < 0 {
self.id = *id_seed;
*id_seed += 1;
}
self.children
.iter_mut()
.for_each(|f| f.set_id(self.id, id_seed));
}

// Find any nested child with a specific field id
fn mut_field_by_id(&mut self, id: i32) -> Option<&mut Field> {
for child in self.children.as_mut_slice() {
Expand Down Expand Up @@ -558,6 +570,13 @@ impl Schema {
}
Ok(())
}

fn set_field_id(&mut self) {
let mut current_id = self.max_field_id().unwrap_or(-1) + 1;
self.fields
.iter_mut()
.for_each(|f| f.set_id(-1, &mut current_id));
}
}

impl fmt::Display for Schema {
Expand All @@ -574,14 +593,17 @@ impl TryFrom<&ArrowSchema> for Schema {
type Error = Error;

fn try_from(schema: &ArrowSchema) -> Result<Self> {
Ok(Self {
let mut schema = Self {
fields: schema
.fields
.iter()
.map(Field::try_from)
.collect::<Result<_>>()?,
metadata: schema.metadata.clone(),
})
};
schema.set_field_id();

Ok(schema)
}
}

Expand Down Expand Up @@ -629,10 +651,10 @@ impl From<&Schema> for Vec<pb::Field> {

#[cfg(test)]
mod tests {
use arrow_schema::{Field as ArrowField, TimeUnit};

use super::*;

use arrow_schema::{Field as ArrowField, TimeUnit};

#[test]
fn arrow_field_to_field() {
for (name, data_type) in [
Expand Down Expand Up @@ -759,7 +781,7 @@ mod tests {
),
ArrowField::new("c", DataType::Float64, false),
]);
assert_eq!(projected, Schema::try_from(&expected_arrow_schema).unwrap());
assert_eq!(ArrowSchema::from(&projected), expected_arrow_schema);
}

#[test]
Expand All @@ -778,6 +800,43 @@ mod tests {
ArrowField::new("c", DataType::Float64, false),
]);
let schema = Schema::try_from(&arrow_schema).unwrap();
let projected = schema.project_by_ids(&[1, 4, 5]).unwrap();
let projected = schema.project_by_ids(&[1, 2, 4, 5]).unwrap();

let expected_arrow_schema = ArrowSchema::new(vec![
ArrowField::new(
"b",
DataType::Struct(vec![
ArrowField::new("f1", DataType::Utf8, true),
ArrowField::new("f3", DataType::Float32, false),
]),
true,
),
ArrowField::new("c", DataType::Float64, false),
]);
assert_eq!(ArrowSchema::from(&projected), expected_arrow_schema);
}

#[test]
fn test_schema_set_ids() {
let arrow_schema = ArrowSchema::new(vec![
ArrowField::new("a", DataType::Int32, false),
ArrowField::new(
"b",
DataType::Struct(vec![
ArrowField::new("f1", DataType::Utf8, true),
ArrowField::new("f2", DataType::Boolean, false),
ArrowField::new("f3", DataType::Float32, false),
]),
true,
),
ArrowField::new("c", DataType::Float64, false),
]);
let schema = Schema::try_from(&arrow_schema).unwrap();

let protos: Vec<pb::Field> = (&schema).into();
assert_eq!(
protos.iter().map(|p| p.id).collect::<Vec<_>>(),
(0..6).collect::<Vec<_>>()
);
}
}
1 change: 0 additions & 1 deletion rust/src/encodings/plain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,6 @@ impl<'a> Decoder for PlainDecoder<'a> {
mod tests {
use crate::io::ObjectStore;
use arrow_array::*;
use arrow_schema::DataType::FixedSizeList;
use arrow_schema::Field;
use object_store::path::Path;
use rand::prelude::*;
Expand Down