Skip to content

Commit

Permalink
add columnar format compatibiliy tests (quickwit-oss#2433)
Browse files Browse the repository at this point in the history
* add columnar format compatibiliy tests

* always try to write current format
  • Loading branch information
PSeitz authored and philippemnoel committed Aug 31, 2024
1 parent bfc2ffb commit bb29639
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 1 deletion.
Binary file added columnar/compat_tests_data/v1.columnar
Binary file not shown.
13 changes: 13 additions & 0 deletions columnar/src/columnar/format_version.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
use core::fmt;
use std::fmt::{Display, Formatter};

use crate::InvalidData;

pub const VERSION_FOOTER_NUM_BYTES: usize = MAGIC_BYTES.len() + std::mem::size_of::<u32>();
Expand All @@ -20,12 +23,22 @@ pub fn parse_footer(footer_bytes: [u8; VERSION_FOOTER_NUM_BYTES]) -> Result<Vers
Version::try_from_bytes(footer_bytes[0..4].try_into().unwrap())
}

pub const CURRENT_VERSION: Version = Version::V1;

#[derive(Debug, Copy, Clone, Eq, PartialEq)]
#[repr(u32)]
pub enum Version {
V1 = 1u32,
}

impl Display for Version {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
match self {
Version::V1 => write!(f, "v1"),
}
}
}

impl Version {
fn to_bytes(self) -> [u8; 4] {
(self as u32).to_le_bytes()
Expand Down
1 change: 1 addition & 0 deletions columnar/src/columnar/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ mod reader;
mod writer;

pub use column_type::{ColumnType, HasAssociatedColumnType};
pub use format_version::{Version, CURRENT_VERSION};
#[cfg(test)]
pub(crate) use merge::ColumnTypeCategory;
pub use merge::{merge_columnar, MergeRowOrder, ShuffleMergeOrder, StackMergeOrder};
Expand Down
85 changes: 85 additions & 0 deletions columnar/src/compat_tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
use std::path::PathBuf;

use crate::{Column, ColumnarReader, DynamicColumn, CURRENT_VERSION};

const NUM_DOCS: u32 = u16::MAX as u32;

fn generate_columnar(num_docs: u32) -> Vec<u8> {
use crate::ColumnarWriter;

let mut columnar_writer = ColumnarWriter::default();

for i in 0..num_docs {
if i % 100 == 0 {
columnar_writer.record_numerical(i, "sparse", i as u64);
}
if i % 2 == 0 {
columnar_writer.record_numerical(i, "dense", i as u64);
}
columnar_writer.record_numerical(i, "full", i as u64);
columnar_writer.record_numerical(i, "multi", i as u64);
columnar_writer.record_numerical(i, "multi", i as u64);
}

let mut wrt: Vec<u8> = Vec::new();
columnar_writer.serialize(num_docs, None, &mut wrt).unwrap();

wrt
}

#[test]
/// Writes a columnar for the CURRENT_VERSION to disk.
fn create_format() {
let version = CURRENT_VERSION.to_string();
let file_path = path_for_version(&version);
if PathBuf::from(file_path.clone()).exists() {
return;
}
let columnar = generate_columnar(NUM_DOCS);
std::fs::write(file_path, columnar).unwrap();
}

fn path_for_version(version: &str) -> String {
format!("./compat_tests_data/{}.columnar", version)
}

#[test]
fn test_format_v1() {
let path = path_for_version("v1");
test_format(&path);
}

fn test_format(path: &str) {
let file_content = std::fs::read(path).unwrap();
let reader = ColumnarReader::open(file_content).unwrap();

let column = open_column(&reader, "full");
assert_eq!(column.first(0).unwrap(), 0);
assert_eq!(column.first(NUM_DOCS - 1).unwrap(), NUM_DOCS as u64 - 1);

let column = open_column(&reader, "multi");
assert_eq!(column.first(0).unwrap(), 0);
assert_eq!(column.first(NUM_DOCS - 1).unwrap(), NUM_DOCS as u64 - 1);

let column = open_column(&reader, "sparse");
assert_eq!(column.first(0).unwrap(), 0);
assert_eq!(column.first(NUM_DOCS - 1), None);
assert_eq!(column.first(65000), Some(65000));

let column = open_column(&reader, "dense");
assert_eq!(column.first(0).unwrap(), 0);
assert_eq!(column.first(NUM_DOCS - 1).unwrap(), NUM_DOCS as u64 - 1);
assert_eq!(column.first(NUM_DOCS - 2), None);
}

fn open_column(reader: &ColumnarReader, name: &str) -> Column<u64> {
let column = reader.read_columns(name).unwrap()[0]
.open()
.unwrap()
.coerce_numerical(crate::NumericalType::U64)
.unwrap();
let DynamicColumn::U64(column) = column else {
panic!();
};
column
}
5 changes: 4 additions & 1 deletion columnar/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ pub use column_values::{
};
pub use columnar::{
merge_columnar, ColumnType, ColumnarReader, ColumnarWriter, HasAssociatedColumnType,
MergeRowOrder, ShuffleMergeOrder, StackMergeOrder,
MergeRowOrder, ShuffleMergeOrder, StackMergeOrder, Version, CURRENT_VERSION,
};
use sstable::VoidSSTable;
pub use value::{NumericalType, NumericalValue};
Expand Down Expand Up @@ -131,3 +131,6 @@ impl Cardinality {

#[cfg(test)]
mod tests;

#[cfg(test)]
mod compat_tests;

0 comments on commit bb29639

Please sign in to comment.