Skip to content

Commit

Permalink
chore: add expect_stat, expect_single_stat in GetStat trait (#3126)
Browse files Browse the repository at this point in the history
This PR tries to add helper function `expect_stat` and
`expect_single_stat` to make DataBlock statistics easier to use.
  • Loading branch information
broccoliSpicy authored Nov 26, 2024
1 parent e6c2343 commit dc9afbb
Show file tree
Hide file tree
Showing 4 changed files with 235 additions and 603 deletions.
14 changes: 7 additions & 7 deletions rust/lance-encoding/src/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -951,17 +951,17 @@ impl DataBlock {
as_type_ref!(as_variable_width_ref, VariableWidth, VariableWidthBlock);
as_type_ref!(as_struct_ref, Struct, StructDataBlock);
as_type_ref!(as_dictionary_ref, Dictionary, DictionaryDataBlock);
as_type_ref_mut!(as_all_null_mut_ref, AllNull, AllNullDataBlock);
as_type_ref_mut!(as_nullable_mut_ref, Nullable, NullableDataBlock);
as_type_ref_mut!(as_fixed_width_mut_ref, FixedWidth, FixedWidthDataBlock);
as_type_ref_mut!(as_all_null_ref_mut, AllNull, AllNullDataBlock);
as_type_ref_mut!(as_nullable_ref_mut, Nullable, NullableDataBlock);
as_type_ref_mut!(as_fixed_width_ref_mut, FixedWidth, FixedWidthDataBlock);
as_type_ref_mut!(
as_fixed_size_list_mut_ref,
as_fixed_size_list_ref_mut,
FixedSizeList,
FixedSizeListBlock
);
as_type_ref_mut!(as_variable_width_mut_ref, VariableWidth, VariableWidthBlock);
as_type_ref_mut!(as_struct_mut_ref, Struct, StructDataBlock);
as_type_ref_mut!(as_dictionary_mut_ref, Dictionary, DictionaryDataBlock);
as_type_ref_mut!(as_variable_width_ref_mut, VariableWidth, VariableWidthBlock);
as_type_ref_mut!(as_struct_ref_mut, Struct, StructDataBlock);
as_type_ref_mut!(as_dictionary_ref_mut, Dictionary, DictionaryDataBlock);
}

// Methods to convert from Arrow -> DataBlock
Expand Down
16 changes: 4 additions & 12 deletions rust/lance-encoding/src/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -791,9 +791,7 @@ impl CompressionStrategy for CoreArrayEncodingStrategy {
data: &DataBlock,
) -> Result<Box<dyn MiniBlockCompressor>> {
if let DataBlock::FixedWidth(ref fixed_width_data) = data {
let bit_widths = data
.get_stat(Stat::BitWidth)
.expect("FixedWidthDataBlock should have valid `Stat::BitWidth` statistics");
let bit_widths = data.expect_stat(Stat::BitWidth);
// Temporary hack to work around https://github.com/lancedb/lance/issues/3102
// Ideally we should still be able to bit-pack here (either to 0 or 1 bit per value)
let has_all_zeros = bit_widths
Expand All @@ -812,15 +810,9 @@ impl CompressionStrategy for CoreArrayEncodingStrategy {
}
if let DataBlock::VariableWidth(ref variable_width_data) = data {
if variable_width_data.bits_per_offset == 32 {
let data_size = variable_width_data.get_stat(Stat::DataSize).expect(
"VariableWidth DataBlock should have valid `Stat::DataSize` statistics",
);
let data_size = data_size.as_primitive::<UInt64Type>().value(0);

let max_len = variable_width_data.get_stat(Stat::MaxLength).expect(
"VariableWidth DataBlock should have valid `Stat::DataSize` statistics",
);
let max_len = max_len.as_primitive::<UInt64Type>().value(0);
let data_size =
variable_width_data.expect_single_stat::<UInt64Type>(Stat::DataSize);
let max_len = variable_width_data.expect_single_stat::<UInt64Type>(Stat::MaxLength);

if max_len >= FSST_LEAST_INPUT_MAX_LENGTH
&& data_size >= FSST_LEAST_INPUT_SIZE as u64
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1573,9 +1573,7 @@ macro_rules! chunk_data_impl {
let data_buffer = $data.data.borrow_to_typed_slice::<$data_type>();
let data_buffer = data_buffer.as_ref();

let bit_widths = $data
.get_stat(Stat::BitWidth)
.expect("FixedWidthDataBlock should have valid bit width statistics");
let bit_widths = $data.expect_stat(Stat::BitWidth);
let bit_widths_array = bit_widths
.as_any()
.downcast_ref::<PrimitiveArray<UInt64Type>>()
Expand Down
Loading

0 comments on commit dc9afbb

Please sign in to comment.