diff --git a/columnar/src/column_index/optional_index/tests.rs b/columnar/src/column_index/optional_index/tests.rs index 18b05f1127..bf642f5a32 100644 --- a/columnar/src/column_index/optional_index/tests.rs +++ b/columnar/src/column_index/optional_index/tests.rs @@ -212,10 +212,13 @@ mod bench { fn gen_bools(fill_ratio: f64) -> OptionalIndex { let mut out = Vec::new(); let mut rng: StdRng = StdRng::from_seed([1u8; 32]); - let vals: Vec = (0..TOTAL_NUM_VALUES) + let vals: Vec = (0..TOTAL_NUM_VALUES) .map(|_| rng.gen_bool(fill_ratio)) + .enumerate() + .filter(|(pos, val)| *val) + .map(|(pos, _)| pos as RowId) .collect(); - serialize_optional_index(&&vals[..], &mut out).unwrap(); + serialize_optional_index(&&vals[..], TOTAL_NUM_VALUES, &mut out).unwrap(); let codec = open_optional_index(OwnedBytes::new(out)).unwrap(); codec } diff --git a/columnar/src/column_values/mod.rs b/columnar/src/column_values/mod.rs index 2a347da513..a77b81e37c 100644 --- a/columnar/src/column_values/mod.rs +++ b/columnar/src/column_values/mod.rs @@ -137,6 +137,8 @@ mod bench { use test::{self, Bencher}; use super::*; + use crate::column_values::serialize::NormalizedHeader; + use crate::column_values::u64_based::*; fn get_data() -> Vec { let mut rng = StdRng::seed_from_u64(2u64); @@ -152,23 +154,30 @@ mod bench { data } + fn compute_stats(vals: impl Iterator) -> Stats { + let mut stats_collector = StatsCollector::default(); + for val in vals { + stats_collector.collect(val); + } + stats_collector.stats() + } + #[inline(never)] fn value_iter() -> impl Iterator { 0..20_000 } - fn get_reader_for_bench(data: &[u64]) -> Codec::Reader { + fn get_reader_for_bench(data: &[u64]) -> Codec::Reader { let mut bytes = Vec::new(); - let min_value = *data.iter().min().unwrap(); - let data = data.iter().map(|el| *el - min_value).collect::>(); - let col = VecColumn::from(&data); - let normalized_header = NormalizedHeader { - num_vals: col.num_vals(), - max_value: col.max_value(), - }; - Codec::serialize(&VecColumn::from(&data), &mut bytes).unwrap(); - Codec::open_from_bytes(OwnedBytes::new(bytes), normalized_header).unwrap() - } - fn bench_get(b: &mut Bencher, data: &[u64]) { + let stats = compute_stats(data.iter().cloned()); + let mut codec_serializer = Codec::estimator(); + for val in data { + codec_serializer.collect(*val); + } + codec_serializer.serialize(&stats, Box::new(data.iter().copied()).as_mut(), &mut bytes); + + Codec::load(OwnedBytes::new(bytes)).unwrap() + } + fn bench_get(b: &mut Bencher, data: &[u64]) { let col = get_reader_for_bench::(data); b.iter(|| { let mut sum = 0u64; @@ -192,18 +201,22 @@ mod bench { }); } - fn bench_get_dynamic(b: &mut Bencher, data: &[u64]) { + fn bench_get_dynamic(b: &mut Bencher, data: &[u64]) { let col = Arc::new(get_reader_for_bench::(data)); bench_get_dynamic_helper(b, col); } - fn bench_create(b: &mut Bencher, data: &[u64]) { - let min_value = *data.iter().min().unwrap(); - let data = data.iter().map(|el| *el - min_value).collect::>(); + fn bench_create(b: &mut Bencher, data: &[u64]) { + let stats = compute_stats(data.iter().cloned()); let mut bytes = Vec::new(); b.iter(|| { bytes.clear(); - Codec::serialize(&VecColumn::from(&data), &mut bytes).unwrap(); + let mut codec_serializer = Codec::estimator(); + for val in data.iter().take(1024) { + codec_serializer.collect(*val); + } + + codec_serializer.serialize(&stats, Box::new(data.iter().copied()).as_mut(), &mut bytes) }); } diff --git a/columnar/src/column_values/u64_based/mod.rs b/columnar/src/column_values/u64_based/mod.rs index 909bffa27f..cdf997b683 100644 --- a/columnar/src/column_values/u64_based/mod.rs +++ b/columnar/src/column_values/u64_based/mod.rs @@ -13,10 +13,10 @@ use common::{BinarySerializable, OwnedBytes}; use crate::column_values::monotonic_mapping::{ StrictlyMonotonicMappingInverter, StrictlyMonotonicMappingToInternal, }; -use crate::column_values::u64_based::bitpacked::BitpackedCodec; -use crate::column_values::u64_based::blockwise_linear::BlockwiseLinearCodec; -use crate::column_values::u64_based::linear::LinearCodec; -use crate::column_values::u64_based::stats_collector::StatsCollector; +pub use crate::column_values::u64_based::bitpacked::BitpackedCodec; +pub use crate::column_values::u64_based::blockwise_linear::BlockwiseLinearCodec; +pub use crate::column_values::u64_based::linear::LinearCodec; +pub use crate::column_values::u64_based::stats_collector::StatsCollector; use crate::column_values::{monotonic_map_column, Stats}; use crate::iterable::Iterable; use crate::{ColumnValues, MonotonicallyMappableToU64}; diff --git a/src/indexer/merger_sorted_index_test.rs b/src/indexer/merger_sorted_index_test.rs index 8a2fc4c099..ab939b52ff 100644 --- a/src/indexer/merger_sorted_index_test.rs +++ b/src/indexer/merger_sorted_index_test.rs @@ -560,7 +560,9 @@ mod bench_sorted_index_merge { let merger: IndexMerger = IndexMerger::open(index.schema(), index.settings().clone(), &segments[..])?; b.iter(|| { - merger.generate_doc_id_mapping(&sort_by_field).unwrap(); + merger + .generate_doc_id_mapping_with_sort_by_field(&sort_by_field) + .unwrap(); }); Ok(())