Skip to content

Commit

Permalink
Added comments
Browse files Browse the repository at this point in the history
  • Loading branch information
fulmicoton committed Aug 26, 2022
1 parent d3744cd commit c7a8248
Showing 1 changed file with 90 additions and 44 deletions.
134 changes: 90 additions & 44 deletions fastfield_codecs/src/line.rs
Original file line number Diff line number Diff line change
@@ -1,67 +1,105 @@
use std::io;
use std::{io, num::NonZeroU64};

use common::{BinarySerializable, VInt};

use crate::FastFieldDataAccess;

const MID_POINT: u64 = (1u64 << 32) - 1u64;

#[derive(Debug, Clone, Copy)]
/// `Line` describes a line function `y: ax + b` using integer
/// arithmetics.
///
/// The slope is in fact a decimal split into a 32 bit integer value,
/// and a 32-bit decimal value.
///
/// The multiplication then becomes.
/// `y = m * x >> 32 + b`
#[derive(Debug, Clone, Copy, Default)]
pub struct Line {
slope: u64,
intercept: u64,
}

/// Compute the line slope.
fn compute_slope(y0: u64, y1: u64, num_vals: NonZeroU64) -> u64 {
let dy = y1.wrapping_sub(y0);
let sign = dy < (1 << 32);
let abs_dy = if sign {
y1.wrapping_sub(y0)
} else {
y0.wrapping_sub(y1)
};

let abs_slope = (abs_dy << 32) / num_vals.get();
if sign {
abs_slope
} else {
u64::MAX - abs_slope
}
}

impl Line {
#[inline(always)]
pub fn eval(&self, x: u64) -> u64 {
let res = x.wrapping_mul(self.slope).wrapping_shr(32) as i32 as u64;
res.wrapping_add(self.intercept)
let linear_part = (x.wrapping_mul(self.slope) >> 32) as i32 as u64;
self.intercept.wrapping_add(linear_part)
}

/// Returns a function such that f(i)
/// - computes without panicking for any i..
/// - `f(i).wrapping_sub(ys[i])` is small for i in [0..ys.len())
/// Returns a line that attemps to approximate a function
/// f: i in 0..[ys.num_vals()) -> ys[i].
///
/// - The approximation is always lower than the actual value.
/// Or more rigorously, formally `f(i).wrapping_sub(ys[i])` is small
/// for any i in [0..ys.len()).
/// - It computes without panicking for any value of it.
pub fn train(ys: &dyn FastFieldDataAccess) -> Self {
let num_vals = ys.num_vals();
assert!(num_vals > 0);
let num_vals =
if let Some(num_vals) = NonZeroU64::new(ys.num_vals()) {
num_vals
} else {
return Line::default();
};

let y0 = ys.get_val(0);
let y1 = ys.get_val(num_vals - 1);

let dy = y1.wrapping_sub(y0);
let sign = dy < (1 << 32);
let abs_dy = if sign {
y1.wrapping_sub(y0)
} else {
y0.wrapping_sub(y1)
};

let abs_slope = (abs_dy << 32) / (num_vals as u64);

let slope = if sign {
abs_slope
} else {
u64::MAX - abs_slope
};

let y1 = ys.get_val(num_vals.get() - 1);

// We first independently pick our slope.
let slope = compute_slope(y0, y1, num_vals);

// We picked our slope. Note that it does not have to be perfect.
// Now we need to compute the best intercept.
//
// Intuitively, the best intercept is such that line passes through one of the
// `(i, ys[])`.
//
// The best intercept therefore has the form
// `y[i] - line.eval(i)` (using wrapping arithmetics).
// In other words, the best intercept is one of the `y - Line::eval(ys[i])`
// and our task is just to pick the one that minimizes our error.
//
// Without sorting our values, this is a difficult problem.
// We however rely on the following trick...
//
// We only focus on the case where the interpolation is half decent.
// If the line interpolation is doing its job on a dataset suited for it,
// we can hope that the maximum error won't be larger than `u64::MAX / 2`.
//
// In other words, even without the intercept the values `y - Line::eval(ys[i])` will all be within
// an interval that takes less than half of the modulo space of `u64`.
//
// Our task is therefore to identify this interval.
// Here we simply translate all of our values by `y0 - 2^63` and pick the min.
let mut line = Line {
slope,
intercept: 0,
};

let heuristic_shift = y0.wrapping_sub(MID_POINT);

line.intercept = ys
.iter()
.enumerate()
.map(|(i, y)| y.wrapping_sub(line.eval(i as u64)))
// The shift here is a way to deal with the case where the current line is going below
// the vals. This is an heuristic that works perfectly if our values are all
// located within an interval of amplitude lesser than 2^63.
.min_by_key(|&val| val.wrapping_sub(heuristic_shift))
.unwrap();

.unwrap_or(0u64); //< Never happens.
line
}
}
Expand All @@ -84,14 +122,22 @@ impl BinarySerializable for Line {
mod tests {
use super::*;

fn test_eval_max_err(ys: &[u64], expected: Option<u64>) {
for &shift in &[0, 100, 1 << 63, u64::MAX, u64::MAX - 10, u64::MAX - 12] {
assert_eq!(test_eval_max_err_with_shift(ys, shift), expected);
/// Test training a line and ensuring that the maximum difference between
/// the data points and the line is `expected`.
///
/// This function operates translation over the data for better coverage.
fn test_line_interpol_with_translation(ys: &[u64], expected: Option<u64>) {
let mut translations = vec![0, 100, u64::MAX, u64::MAX - 1];
translations.extend_from_slice(ys);
for translation in translations {
let translated_ys: Vec<u64> = ys.iter().copied().map(|y| y.wrapping_add(translation)).collect();
let largest_err = test_eval_max_err(&translated_ys);
assert_eq!(largest_err, expected);

}
}

fn test_eval_max_err_with_shift(ys: &[u64], shift: u64) -> Option<u64> {
let ys: Vec<u64> = ys.iter().copied().map(|y| y.wrapping_add(shift)).collect();
fn test_eval_max_err(ys: &[u64]) -> Option<u64> {
let line = Line::train(&&ys[..]);
ys.iter()
.enumerate()
Expand All @@ -101,10 +147,10 @@ mod tests {

#[test]
fn test_train() {
test_eval_max_err(&[11, 11, 11, 12, 12, 13], Some(1));
test_eval_max_err(&[13, 12, 12, 11, 11, 11], Some(0));
test_eval_max_err(&[13, 13, 12, 11, 11, 11], Some(1));
test_eval_max_err(&[13, 13, 12, 11, 11, 11], Some(1));
test_eval_max_err(&[u64::MAX - 1, 0, 0, 1], Some(2));
test_line_interpol_with_translation(&[11, 11, 11, 12, 12, 13], Some(1));
test_line_interpol_with_translation(&[13, 12, 12, 11, 11, 11], Some(0));
test_line_interpol_with_translation(&[13, 13, 12, 11, 11, 11], Some(1));
test_line_interpol_with_translation(&[13, 13, 12, 11, 11, 11], Some(1));
test_line_interpol_with_translation(&[u64::MAX - 1, 0, 0, 1], Some(2));
}
}

0 comments on commit c7a8248

Please sign in to comment.