Skip to content

Commit

Permalink
Perf fix on the MonotonicMapping column
Browse files Browse the repository at this point in the history
The Monotonic mapping was using the default implementation
for `get_range` and `.iter`.

As a result, some of the column used in merge (e.g. multivalued
fast fields) were exhibiting a very strong performance regression.
  • Loading branch information
fulmicoton committed Sep 15, 2022
1 parent 817225e commit ac0ddb9
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 7 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ maplit = "1.0.2"
matches = "0.1.9"
pretty_assertions = "1.2.1"
proptest = "1.0.0"
criterion = "0.4.0"
criterion = "0.3.5"
test-log = "0.2.10"
env_logger = "0.9.0"
pprof = { version = "0.10.0", features = ["flamegraph", "criterion"] }
Expand Down
59 changes: 53 additions & 6 deletions fastfield_codecs/src/column.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,11 @@ pub trait Column<T = u64>: Send + Sync {
/// associated with the `DocId` going from
/// `start` to `start + output.len()`.
///
/// Regardless of the type of `Item`, this method works
/// - transmuting the output array
/// - extracting the `Item`s as if they were `u64`
/// - possibly converting the `u64` value to the right type.
///
/// # Panics
///
/// May panic if `start + output.len()` is greater than
/// Must panic if `start + output.len()` is greater than
/// the segment's `maxdoc`.
#[inline]
fn get_range(&self, start: u64, output: &mut [T]) {
for (out, idx) in output.iter_mut().zip(start..) {
*out = self.get_val(idx);
Expand Down Expand Up @@ -78,6 +74,14 @@ impl<'a, C: Column<T>, T: Copy + PartialOrd> Column<T> for &'a C {
fn num_vals(&self) -> u64 {
(*self).num_vals()
}

fn iter<'b>(&'b self) -> Box<dyn Iterator<Item = T> + 'b> {
(*self).iter()
}

fn get_range(&self, start: u64, output: &mut [T]) {
(*self).get_range(start, output)
}
}

impl<'a, T: Copy + PartialOrd + Send + Sync> Column<T> for VecColumn<'a, T> {
Expand All @@ -100,6 +104,10 @@ impl<'a, T: Copy + PartialOrd + Send + Sync> Column<T> for VecColumn<'a, T> {
fn num_vals(&self) -> u64 {
self.values.len() as u64
}

fn get_range(&self, start: u64, output: &mut [T]) {
output.copy_from_slice(&self.values[start as usize..][..output.len()])
}
}

impl<'a, T: Copy + Ord + Default, V> From<&'a V> for VecColumn<'a, T>
Expand Down Expand Up @@ -147,6 +155,7 @@ where
Input: Send + Sync,
Output: Send + Sync,
{
#[inline]
fn get_val(&self, idx: u64) -> Output {
let from_val = self.from_column.get_val(idx);
(self.monotonic_mapping)(from_val)
Expand All @@ -165,6 +174,13 @@ where
fn num_vals(&self) -> u64 {
self.from_column.num_vals()
}

fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = Output> + 'a> {
Box::new(self.from_column.iter().map(&self.monotonic_mapping))
}

// We voluntarily do not implement get_range as it yields a regression,
// and we do not have any specialized implementation anyway.
}

pub struct RemappedColumn<T, M, C> {
Expand Down Expand Up @@ -251,6 +267,7 @@ where
#[cfg(test)]
mod tests {
use super::*;
use crate::MonotonicallyMappableToU64;

#[test]
fn test_monotonic_mapping() {
Expand All @@ -271,4 +288,34 @@ mod tests {
assert_eq!(col.num_vals(), 90);
assert_eq!(col.max_value(), 99);
}

#[test]
fn test_monotonic_mapping_iter() {
let vals: Vec<u64> = (-1..99).map(i64::to_u64).collect();
let col = VecColumn::from(&vals);
let mapped = monotonic_map_column(col, |el| i64::from_u64(el) * 10i64);
let val_i64s: Vec<i64> = mapped.iter().collect();
for i in 0..100 {
assert_eq!(val_i64s[i as usize], mapped.get_val(i));
}
}

#[test]
fn test_monotonic_mapping_get_range() {
let vals: Vec<u64> = (-1..99).map(i64::to_u64).collect();
let col = VecColumn::from(&vals);
let mapped = monotonic_map_column(col, |el| i64::from_u64(el) * 10i64);
assert_eq!(mapped.min_value(), -10i64);
assert_eq!(mapped.max_value(), 980i64);
assert_eq!(mapped.num_vals(), 100);
let val_i64s: Vec<i64> = mapped.iter().collect();
assert_eq!(val_i64s.len(), 100);
for i in 0..100 {
assert_eq!(val_i64s[i as usize], mapped.get_val(i));
assert_eq!(val_i64s[i as usize], i64::from_u64(vals[i as usize]) * 10);
}
let mut buf = [0i64; 20];
mapped.get_range(7, &mut buf[..]);
assert_eq!(&val_i64s[7..][..20], &buf);
}
}

0 comments on commit ac0ddb9

Please sign in to comment.