From 8729262cf9e2a81b3a00dd9d397cf1085ebce02a Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Tue, 28 May 2024 12:33:56 -0700 Subject: [PATCH] Removed EncodedMetadataBuffer in favor of EncodedBuffer --- rust/lance-encoding-datafusion/Cargo.toml | 1 + rust/lance-encoding-datafusion/src/zone.rs | 11 ++++++----- rust/lance-encoding/src/encoder.rs | 8 +------- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/rust/lance-encoding-datafusion/Cargo.toml b/rust/lance-encoding-datafusion/Cargo.toml index 514ed4407d..5f3b7e1ddf 100644 --- a/rust/lance-encoding-datafusion/Cargo.toml +++ b/rust/lance-encoding-datafusion/Cargo.toml @@ -16,6 +16,7 @@ lance-core = { workspace = true, features = ["datafusion"] } lance-encoding.workspace = true lance-file.workspace = true arrow-array.workspace = true +arrow-buffer.workspace = true arrow-schema.workspace = true datafusion-common.workspace = true datafusion-expr.workspace = true diff --git a/rust/lance-encoding-datafusion/src/zone.rs b/rust/lance-encoding-datafusion/src/zone.rs index a4b8fbe972..68e044df7b 100644 --- a/rust/lance-encoding-datafusion/src/zone.rs +++ b/rust/lance-encoding-datafusion/src/zone.rs @@ -4,13 +4,14 @@ use std::sync::Arc; use arrow_array::{ArrayRef, RecordBatch, UInt32Array}; +use arrow_buffer::Buffer; use arrow_schema::{Field, Schema}; use datafusion_common::{arrow::datatypes::DataType, ScalarValue}; use datafusion_expr::Accumulator; use datafusion_physical_expr::expressions::{MaxAccumulator, MinAccumulator}; use futures::{future::BoxFuture, FutureExt}; use lance_encoding::encoder::{ - encode_batch, CoreFieldEncodingStrategy, EncodedMetadataBuffer, FieldEncoder, + encode_batch, CoreFieldEncodingStrategy, EncodedBuffer, FieldEncoder, }; use lance_core::Result; @@ -109,7 +110,7 @@ impl ZoneMapsFieldEncoder { Ok(()) } - async fn maps_to_metadata(&mut self) -> Result> { + async fn maps_to_metadata(&mut self) -> Result> { let maps = std::mem::take(&mut self.maps); let (mins, (maxes, null_counts)): (Vec<_>, (Vec<_>, Vec<_>)) = maps .into_iter() @@ -127,8 +128,8 @@ impl ZoneMapsFieldEncoder { let encoding_strategy = CoreFieldEncodingStrategy::default(); let encoded_zone_maps = encode_batch(&zone_maps, &encoding_strategy, u64::MAX).await?; let zone_maps_buffer = encoded_zone_maps.try_to_mini_lance()?; - Ok(vec![EncodedMetadataBuffer { - parts: vec![zone_maps_buffer], + Ok(vec![EncodedBuffer { + parts: vec![Buffer::from(zone_maps_buffer)], }]) } } @@ -155,7 +156,7 @@ impl FieldEncoder for ZoneMapsFieldEncoder { self.items_encoder.flush() } - fn finish(&mut self) -> BoxFuture<'_, Result>> { + fn finish(&mut self) -> BoxFuture<'_, Result>> { async move { self.maps_to_metadata().await }.boxed() } diff --git a/rust/lance-encoding/src/encoder.rs b/rust/lance-encoding/src/encoder.rs index df2f751f6a..f2afcb9070 100644 --- a/rust/lance-encoding/src/encoder.rs +++ b/rust/lance-encoding/src/encoder.rs @@ -132,12 +132,6 @@ pub trait ArrayEncoder: std::fmt::Debug + Send + Sync { /// A task to create a page of data pub type EncodeTask = BoxFuture<'static, Result>; -/// A buffer of encoded metadata to be placed in the column metadata -pub struct EncodedMetadataBuffer { - // Different parts that will be written to a single buffer on disk - pub parts: Vec, -} - /// Top level encoding trait to code any Arrow array type into one or more pages. /// /// The field encoder implements buffering and encoding of a single input column @@ -171,7 +165,7 @@ pub trait FieldEncoder: Send { /// This is called only once, after all encode tasks have completed /// /// By default, returns an empty Vec (no column metadata buffers) - fn finish(&mut self) -> BoxFuture<'_, Result>> { + fn finish(&mut self) -> BoxFuture<'_, Result>> { std::future::ready(Ok(vec![])).boxed() } /// The number of output columns this encoding will create