Skip to content

Commit

Permalink
Homogeneous codec names.
Browse files Browse the repository at this point in the history
  • Loading branch information
fulmicoton committed Aug 24, 2022
1 parent 298b5dd commit c8972cf
Show file tree
Hide file tree
Showing 9 changed files with 116 additions and 135 deletions.
16 changes: 6 additions & 10 deletions fastfield_codecs/benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,9 @@ extern crate test;
mod tests {
use fastfield_codecs::bitpacked::{BitpackedFastFieldReader, BitpackedFastFieldSerializer};
use fastfield_codecs::blockwise_linear::{
BlockwiseLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader,
};
use fastfield_codecs::linear::{
LinearInterpolFastFieldReader, LinearInterpolFastFieldSerializer,
BlockwiseLinearFastFieldReader, BlockwiseLinearFastFieldSerializer,
};
use fastfield_codecs::linear::{LinearFastFieldReader, LinearFastFieldSerializer};
use fastfield_codecs::*;

fn get_data() -> Vec<u64> {
Expand Down Expand Up @@ -59,12 +57,12 @@ mod tests {
#[bench]
fn bench_fastfield_linearinterpol_create(b: &mut Bencher) {
let data: Vec<_> = get_data();
bench_create::<LinearInterpolFastFieldSerializer>(b, &data);
bench_create::<LinearFastFieldSerializer>(b, &data);
}
#[bench]
fn bench_fastfield_multilinearinterpol_create(b: &mut Bencher) {
let data: Vec<_> = get_data();
bench_create::<BlockwiseLinearInterpolFastFieldSerializer>(b, &data);
bench_create::<BlockwiseLinearFastFieldSerializer>(b, &data);
}
#[bench]
fn bench_fastfield_bitpack_get(b: &mut Bencher) {
Expand All @@ -74,14 +72,12 @@ mod tests {
#[bench]
fn bench_fastfield_linearinterpol_get(b: &mut Bencher) {
let data: Vec<_> = get_data();
bench_get::<LinearInterpolFastFieldSerializer, LinearInterpolFastFieldReader>(b, &data);
bench_get::<LinearFastFieldSerializer, LinearFastFieldReader>(b, &data);
}
#[bench]
fn bench_fastfield_multilinearinterpol_get(b: &mut Bencher) {
let data: Vec<_> = get_data();
bench_get::<BlockwiseLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader>(
b, &data,
);
bench_get::<BlockwiseLinearFastFieldSerializer, BlockwiseLinearFastFieldReader>(b, &data);
}
pub fn stats_from_vec(data: &[u64]) -> FastFieldStats {
let min_value = data.iter().cloned().min().unwrap_or(0);
Expand Down
32 changes: 16 additions & 16 deletions fastfield_codecs/src/blockwise_linear.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//! MultiLinearInterpol compressor uses linear interpolation to guess a values and stores the
//! The BlockwiseLinear codec uses linear interpolation to guess a values and stores the
//! offset, but in blocks of 512.
//!
//! With a CHUNK_SIZE of 512 and 29 byte metadata per block, we get a overhead for metadata of 232 /
Expand Down Expand Up @@ -27,9 +27,9 @@ const CHUNK_SIZE: u64 = 512;
/// Depending on the field type, a different
/// fast field is required.
#[derive(Clone)]
pub struct MultiLinearInterpolFastFieldReader {
pub struct BlockwiseLinearFastFieldReader {
data: OwnedBytes,
pub footer: MultiLinearInterpolFooter,
pub footer: BlockwiseLinearFooter,
}

#[derive(Clone, Debug, Default)]
Expand Down Expand Up @@ -104,14 +104,14 @@ impl BinarySerializable for Function {
}

#[derive(Clone, Debug)]
pub struct MultiLinearInterpolFooter {
pub struct BlockwiseLinearFooter {
pub num_vals: u64,
pub min_value: u64,
pub max_value: u64,
interpolations: Vec<Function>,
}

impl BinarySerializable for MultiLinearInterpolFooter {
impl BinarySerializable for BlockwiseLinearFooter {
fn serialize<W: Write>(&self, write: &mut W) -> io::Result<()> {
let mut out = vec![];
self.num_vals.serialize(&mut out)?;
Expand All @@ -123,8 +123,8 @@ impl BinarySerializable for MultiLinearInterpolFooter {
Ok(())
}

fn deserialize<R: Read>(reader: &mut R) -> io::Result<MultiLinearInterpolFooter> {
let mut footer = MultiLinearInterpolFooter {
fn deserialize<R: Read>(reader: &mut R) -> io::Result<BlockwiseLinearFooter> {
let mut footer = BlockwiseLinearFooter {
num_vals: u64::deserialize(reader)?,
min_value: u64::deserialize(reader)?,
max_value: u64::deserialize(reader)?,
Expand All @@ -148,14 +148,14 @@ fn get_interpolation_function(doc: u64, interpolations: &[Function]) -> &Functio
&interpolations[get_interpolation_position(doc)]
}

impl FastFieldCodecReader for MultiLinearInterpolFastFieldReader {
impl FastFieldCodecReader for BlockwiseLinearFastFieldReader {
/// Opens a fast field given a file.
fn open_from_bytes(bytes: OwnedBytes) -> io::Result<Self> {
let footer_len: u32 = (&bytes[bytes.len() - 4..]).deserialize()?;
let footer_offset = bytes.len() - 4 - footer_len as usize;
let (data, mut footer) = bytes.split(footer_offset);
let footer = MultiLinearInterpolFooter::deserialize(&mut footer)?;
Ok(MultiLinearInterpolFastFieldReader { data, footer })
let footer = BlockwiseLinearFooter::deserialize(&mut footer)?;
Ok(BlockwiseLinearFastFieldReader { data, footer })
}

#[inline]
Expand All @@ -181,10 +181,10 @@ impl FastFieldCodecReader for MultiLinearInterpolFastFieldReader {
}

/// Same as LinearInterpolFastFieldSerializer, but working on chunks of CHUNK_SIZE elements.
pub struct BlockwiseLinearInterpolFastFieldSerializer {}
pub struct BlockwiseLinearFastFieldSerializer {}

impl FastFieldCodecSerializer for BlockwiseLinearInterpolFastFieldSerializer {
const CODEC_TYPE: FastFieldCodecType = FastFieldCodecType::BlockwiseLinearInterpol;
impl FastFieldCodecSerializer for BlockwiseLinearFastFieldSerializer {
const CODEC_TYPE: FastFieldCodecType = FastFieldCodecType::BlockwiseLinear;
/// Creates a new fast field serializer.
fn serialize(
write: &mut impl Write,
Expand Down Expand Up @@ -270,7 +270,7 @@ impl FastFieldCodecSerializer for BlockwiseLinearInterpolFastFieldSerializer {
}
bit_packer.close(write)?;

let footer = MultiLinearInterpolFooter {
let footer = BlockwiseLinearFooter {
num_vals: fastfield_accessor.num_vals(),
min_value: fastfield_accessor.min_value(),
max_value: fastfield_accessor.max_value(),
Expand Down Expand Up @@ -360,8 +360,8 @@ mod tests {

fn create_and_validate(data: &[u64], name: &str) -> (f32, f32) {
crate::tests::create_and_validate::<
BlockwiseLinearInterpolFastFieldSerializer,
MultiLinearInterpolFastFieldReader,
BlockwiseLinearFastFieldSerializer,
BlockwiseLinearFastFieldReader,
>(data, name)
}

Expand Down
34 changes: 16 additions & 18 deletions fastfield_codecs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ pub trait FastFieldCodecReader: Sized {
#[repr(u8)]
pub enum FastFieldCodecType {
Bitpacked = 1,
LinearInterpol = 2,
BlockwiseLinearInterpol = 3,
Linear = 2,
BlockwiseLinear = 3,
Gcd = 4,
}

Expand All @@ -50,8 +50,8 @@ impl FastFieldCodecType {
pub fn from_code(code: u8) -> Option<Self> {
match code {
1 => Some(Self::Bitpacked),
2 => Some(Self::LinearInterpol),
3 => Some(Self::BlockwiseLinearInterpol),
2 => Some(Self::Linear),
3 => Some(Self::BlockwiseLinear),
4 => Some(Self::Gcd),
_ => None,
}
Expand Down Expand Up @@ -167,9 +167,9 @@ mod tests {

use crate::bitpacked::{BitpackedFastFieldReader, BitpackedFastFieldSerializer};
use crate::blockwise_linear::{
BlockwiseLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader,
BlockwiseLinearFastFieldReader, BlockwiseLinearFastFieldSerializer,
};
use crate::linear::{LinearInterpolFastFieldReader, LinearInterpolFastFieldSerializer};
use crate::linear::{LinearFastFieldReader, LinearFastFieldSerializer};

pub fn create_and_validate<S: FastFieldCodecSerializer, R: FastFieldCodecReader>(
data: &[u64],
Expand Down Expand Up @@ -200,15 +200,15 @@ mod tests {
proptest! {
#[test]
fn test_proptest_small(data in proptest::collection::vec(any::<u64>(), 1..10)) {
create_and_validate::<LinearInterpolFastFieldSerializer, LinearInterpolFastFieldReader>(&data, "proptest linearinterpol");
create_and_validate::<BlockwiseLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader>(&data, "proptest multilinearinterpol");
create_and_validate::<LinearFastFieldSerializer, LinearFastFieldReader>(&data, "proptest linearinterpol");
create_and_validate::<BlockwiseLinearFastFieldSerializer, BlockwiseLinearFastFieldReader>(&data, "proptest multilinearinterpol");
create_and_validate::<BitpackedFastFieldSerializer, BitpackedFastFieldReader>(&data, "proptest bitpacked");
}

#[test]
fn test_proptest_large(data in proptest::collection::vec(any::<u64>(), 1..6000)) {
create_and_validate::<LinearInterpolFastFieldSerializer, LinearInterpolFastFieldReader>(&data, "proptest linearinterpol");
create_and_validate::<BlockwiseLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader>(&data, "proptest multilinearinterpol");
create_and_validate::<LinearFastFieldSerializer, LinearFastFieldReader>(&data, "proptest linearinterpol");
create_and_validate::<BlockwiseLinearFastFieldSerializer, BlockwiseLinearFastFieldReader>(&data, "proptest multilinearinterpol");
create_and_validate::<BitpackedFastFieldSerializer, BitpackedFastFieldReader>(&data, "proptest bitpacked");
}

Expand Down Expand Up @@ -248,12 +248,11 @@ mod tests {
}
#[test]
fn test_codec_interpolation() {
test_codec::<LinearInterpolFastFieldSerializer, LinearInterpolFastFieldReader>();
test_codec::<LinearFastFieldSerializer, LinearFastFieldReader>();
}
#[test]
fn test_codec_multi_interpolation() {
test_codec::<BlockwiseLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader>(
);
test_codec::<BlockwiseLinearFastFieldSerializer, BlockwiseLinearFastFieldReader>();
}

use super::*;
Expand All @@ -262,11 +261,10 @@ mod tests {
fn estimation_good_interpolation_case() {
let data = (10..=20000_u64).collect::<Vec<_>>();

let linear_interpol_estimation = LinearInterpolFastFieldSerializer::estimate(&data);
let linear_interpol_estimation = LinearFastFieldSerializer::estimate(&data);
assert_le!(linear_interpol_estimation, 0.01);

let multi_linear_interpol_estimation =
BlockwiseLinearInterpolFastFieldSerializer::estimate(&data);
let multi_linear_interpol_estimation = BlockwiseLinearFastFieldSerializer::estimate(&data);
assert_le!(multi_linear_interpol_estimation, 0.2);
assert_le!(linear_interpol_estimation, multi_linear_interpol_estimation);

Expand All @@ -277,7 +275,7 @@ mod tests {
fn estimation_test_bad_interpolation_case() {
let data = vec![200, 10, 10, 10, 10, 1000, 20];

let linear_interpol_estimation = LinearInterpolFastFieldSerializer::estimate(&data);
let linear_interpol_estimation = LinearFastFieldSerializer::estimate(&data);
assert_le!(linear_interpol_estimation, 0.32);

let bitpacked_estimation = BitpackedFastFieldSerializer::estimate(&data);
Expand All @@ -290,7 +288,7 @@ mod tests {

// in this case the linear interpolation can't in fact not be worse than bitpacking,
// but the estimator adds some threshold, which leads to estimated worse behavior
let linear_interpol_estimation = LinearInterpolFastFieldSerializer::estimate(&data);
let linear_interpol_estimation = LinearFastFieldSerializer::estimate(&data);
assert_le!(linear_interpol_estimation, 0.35);

let bitpacked_estimation = BitpackedFastFieldSerializer::estimate(&data);
Expand Down
39 changes: 19 additions & 20 deletions fastfield_codecs/src/linear.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@ use crate::{
/// Depending on the field type, a different
/// fast field is required.
#[derive(Clone)]
pub struct LinearInterpolFastFieldReader {
pub struct LinearFastFieldReader {
data: OwnedBytes,
bit_unpacker: BitUnpacker,
pub footer: LinearInterpolFooter,
pub footer: LinearFooter,
pub slope: f32,
}

#[derive(Clone, Debug)]
pub struct LinearInterpolFooter {
pub struct LinearFooter {
pub relative_max_value: u64,
pub offset: u64,
pub first_val: u64,
Expand All @@ -30,7 +30,7 @@ pub struct LinearInterpolFooter {
pub max_value: u64,
}

impl BinarySerializable for LinearInterpolFooter {
impl BinarySerializable for LinearFooter {
fn serialize<W: Write>(&self, write: &mut W) -> io::Result<()> {
self.relative_max_value.serialize(write)?;
self.offset.serialize(write)?;
Expand All @@ -42,8 +42,8 @@ impl BinarySerializable for LinearInterpolFooter {
Ok(())
}

fn deserialize<R: Read>(reader: &mut R) -> io::Result<LinearInterpolFooter> {
Ok(LinearInterpolFooter {
fn deserialize<R: Read>(reader: &mut R) -> io::Result<LinearFooter> {
Ok(LinearFooter {
relative_max_value: u64::deserialize(reader)?,
offset: u64::deserialize(reader)?,
first_val: u64::deserialize(reader)?,
Expand All @@ -55,20 +55,20 @@ impl BinarySerializable for LinearInterpolFooter {
}
}

impl FixedSize for LinearInterpolFooter {
impl FixedSize for LinearFooter {
const SIZE_IN_BYTES: usize = 56;
}

impl FastFieldCodecReader for LinearInterpolFastFieldReader {
impl FastFieldCodecReader for LinearFastFieldReader {
/// Opens a fast field given a file.
fn open_from_bytes(bytes: OwnedBytes) -> io::Result<Self> {
let footer_offset = bytes.len() - LinearInterpolFooter::SIZE_IN_BYTES;
let footer_offset = bytes.len() - LinearFooter::SIZE_IN_BYTES;
let (data, mut footer) = bytes.split(footer_offset);
let footer = LinearInterpolFooter::deserialize(&mut footer)?;
let footer = LinearFooter::deserialize(&mut footer)?;
let slope = get_slope(footer.first_val, footer.last_val, footer.num_vals);
let num_bits = compute_num_bits(footer.relative_max_value);
let bit_unpacker = BitUnpacker::new(num_bits);
Ok(LinearInterpolFastFieldReader {
Ok(LinearFastFieldReader {
data,
bit_unpacker,
footer,
Expand All @@ -93,7 +93,7 @@ impl FastFieldCodecReader for LinearInterpolFastFieldReader {

/// Fastfield serializer, which tries to guess values by linear interpolation
/// and stores the difference bitpacked.
pub struct LinearInterpolFastFieldSerializer {}
pub struct LinearFastFieldSerializer {}

#[inline]
pub(crate) fn get_slope(first_val: u64, last_val: u64, num_vals: u64) -> f32 {
Expand Down Expand Up @@ -134,8 +134,8 @@ pub fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 {
}
}

impl FastFieldCodecSerializer for LinearInterpolFastFieldSerializer {
const CODEC_TYPE: FastFieldCodecType = FastFieldCodecType::LinearInterpol;
impl FastFieldCodecSerializer for LinearFastFieldSerializer {
const CODEC_TYPE: FastFieldCodecType = FastFieldCodecType::Linear;

/// Creates a new fast field serializer.
fn serialize(
Expand Down Expand Up @@ -175,7 +175,7 @@ impl FastFieldCodecSerializer for LinearInterpolFastFieldSerializer {
}
bit_packer.close(write)?;

let footer = LinearInterpolFooter {
let footer = LinearFooter {
relative_max_value,
offset,
first_val,
Expand Down Expand Up @@ -239,7 +239,7 @@ impl FastFieldCodecSerializer for LinearInterpolFastFieldSerializer {

let num_bits = compute_num_bits(relative_max_value as u64) as u64
* fastfield_accessor.num_vals()
+ LinearInterpolFooter::SIZE_IN_BYTES as u64;
+ LinearFooter::SIZE_IN_BYTES as u64;
let num_bits_uncompressed = 64 * fastfield_accessor.num_vals();
num_bits as f32 / num_bits_uncompressed as f32
}
Expand All @@ -260,10 +260,9 @@ mod tests {
use crate::tests::get_codec_test_data_sets;

fn create_and_validate(data: &[u64], name: &str) -> (f32, f32) {
crate::tests::create_and_validate::<
LinearInterpolFastFieldSerializer,
LinearInterpolFastFieldReader,
>(data, name)
crate::tests::create_and_validate::<LinearFastFieldSerializer, LinearFastFieldReader>(
data, name,
)
}

#[test]
Expand Down
8 changes: 4 additions & 4 deletions fastfield_codecs/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#[macro_use]
extern crate prettytable;
use fastfield_codecs::blockwise_linear::BlockwiseLinearInterpolFastFieldSerializer;
use fastfield_codecs::linear::LinearInterpolFastFieldSerializer;
use fastfield_codecs::blockwise_linear::BlockwiseLinearFastFieldSerializer;
use fastfield_codecs::linear::LinearFastFieldSerializer;
use fastfield_codecs::{FastFieldCodecSerializer, FastFieldCodecType, FastFieldStats};
use prettytable::{Cell, Row, Table};

Expand All @@ -13,9 +13,9 @@ fn main() {

for (data, data_set_name) in get_codec_test_data_sets() {
let mut results = vec![];
let res = serialize_with_codec::<LinearInterpolFastFieldSerializer>(&data);
let res = serialize_with_codec::<LinearFastFieldSerializer>(&data);
results.push(res);
let res = serialize_with_codec::<BlockwiseLinearInterpolFastFieldSerializer>(&data);
let res = serialize_with_codec::<BlockwiseLinearFastFieldSerializer>(&data);
results.push(res);
let res = serialize_with_codec::<fastfield_codecs::bitpacked::BitpackedFastFieldSerializer>(
&data,
Expand Down
8 changes: 3 additions & 5 deletions src/fastfield/gcd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -237,11 +237,9 @@ mod tests {
assert!(size_prec_sec < size_prec_micro);

let size_prec_sec =
test_gcd_date_with_codec(FastFieldCodecType::LinearInterpol, DatePrecision::Seconds)?;
let size_prec_micro = test_gcd_date_with_codec(
FastFieldCodecType::LinearInterpol,
DatePrecision::Microseconds,
)?;
test_gcd_date_with_codec(FastFieldCodecType::Linear, DatePrecision::Seconds)?;
let size_prec_micro =
test_gcd_date_with_codec(FastFieldCodecType::Linear, DatePrecision::Microseconds)?;
assert!(size_prec_sec < size_prec_micro);

Ok(())
Expand Down
Loading

0 comments on commit c8972cf

Please sign in to comment.