diff --git a/rust/lance-encoding/src/encodings/logical/list.rs b/rust/lance-encoding/src/encodings/logical/list.rs index dd8e4b1f55..39d261be1b 100644 --- a/rust/lance-encoding/src/encodings/logical/list.rs +++ b/rust/lance-encoding/src/encodings/logical/list.rs @@ -1217,7 +1217,7 @@ mod tests { use arrow::array::StringBuilder; use arrow_array::{ builder::{Int32Builder, ListBuilder}, - ArrayRef, BooleanArray, ListArray, + Array, ArrayRef, BooleanArray, ListArray, StructArray, UInt64Array, }; use arrow_buffer::{OffsetBuffer, ScalarBuffer}; use arrow_schema::{DataType, Field, Fields}; @@ -1260,6 +1260,27 @@ mod tests { check_round_trip_encoding_random(field, HashMap::new()).await; } + #[test_log::test(tokio::test)] + async fn test_list_struct_empty() { + let fields = Fields::from(vec![Field::new("inner", DataType::UInt64, true)]); + let items = UInt64Array::from(Vec::::new()); + let structs = StructArray::new(fields, vec![Arc::new(items)], None); + let offsets = OffsetBuffer::new(ScalarBuffer::::from(vec![0; 2 * 1024 * 1024 + 1])); + let lists = ListArray::new( + Arc::new(Field::new("item", structs.data_type().clone(), true)), + offsets, + Arc::new(structs), + None, + ); + + check_round_trip_encoding_of_data( + vec![Arc::new(lists)], + &TestCases::default(), + HashMap::new(), + ) + .await; + } + #[test_log::test(tokio::test)] async fn test_simple_list() { let items_builder = Int32Builder::new(); diff --git a/rust/lance-encoding/src/encodings/logical/struct.rs b/rust/lance-encoding/src/encodings/logical/struct.rs index 116156dcc3..b3ba018926 100644 --- a/rust/lance-encoding/src/encodings/logical/struct.rs +++ b/rust/lance-encoding/src/encodings/logical/struct.rs @@ -550,27 +550,7 @@ impl FieldEncoder for StructFieldEncoder { .iter_mut() .map(|encoder| encoder.flush()) .collect::>>()?; - let mut child_tasks = child_tasks.into_iter().flatten().collect::>(); - let num_rows_seen = self.num_rows_seen; - let column_index = self.column_index; - // In this "simple struct / no nulls" case we emit a single header page at - // the very end which covers the entire struct. - child_tasks.push( - std::future::ready(Ok(EncodedPage { - array: EncodedArray { - buffers: vec![], - encoding: pb::ArrayEncoding { - array_encoding: Some(pb::array_encoding::ArrayEncoding::Struct( - pb::SimpleStruct {}, - )), - }, - }, - num_rows: num_rows_seen, - column_idx: column_index, - })) - .boxed(), - ); - Ok(child_tasks) + Ok(child_tasks.into_iter().flatten().collect::>()) } fn num_columns(&self) -> u32 { @@ -585,7 +565,21 @@ impl FieldEncoder for StructFieldEncoder { async move { let mut columns = Vec::new(); // Add a column for the struct header - columns.push(EncodedColumn::default()); + let mut header = EncodedColumn::default(); + header.final_pages.push(EncodedPage { + array: EncodedArray { + buffers: vec![], + encoding: pb::ArrayEncoding { + array_encoding: Some(pb::array_encoding::ArrayEncoding::Struct( + pb::SimpleStruct {}, + )), + }, + }, + num_rows: self.num_rows_seen, + column_idx: self.column_index, + }); + columns.push(header); + // Now run finish on the children for child in self.children.iter_mut() { columns.extend(child.finish().await?); }