Skip to content

Commit

Permalink
Merge pull request #1554 from quickwit-oss/prepare_ip_field
Browse files Browse the repository at this point in the history
prepare for ip field
  • Loading branch information
PSeitz authored Sep 26, 2022
2 parents 21e0ade + f757471 commit 10f10a3
Show file tree
Hide file tree
Showing 7 changed files with 176 additions and 14 deletions.
42 changes: 42 additions & 0 deletions fastfield_codecs/src/monotonic_mapping_u128.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
use std::net::{IpAddr, Ipv6Addr};

pub trait MonotonicallyMappableToU128: 'static + PartialOrd + Copy + Send + Sync {
/// Converts a value to u128.
///
/// Internally all fast field values are encoded as u64.
fn to_u128(self) -> u128;

/// Converts a value from u128
///
/// Internally all fast field values are encoded as u64.
/// **Note: To be used for converting encoded Term, Posting values.**
fn from_u128(val: u128) -> Self;
}

impl MonotonicallyMappableToU128 for u128 {
fn to_u128(self) -> u128 {
self
}

fn from_u128(val: u128) -> Self {
val
}
}

impl MonotonicallyMappableToU128 for IpAddr {
fn to_u128(self) -> u128 {
ip_to_u128(self)
}

fn from_u128(val: u128) -> Self {
IpAddr::from(val.to_be_bytes())
}
}

fn ip_to_u128(ip_addr: IpAddr) -> u128 {
let ip_addr_v6: Ipv6Addr = match ip_addr {
IpAddr::V4(v4) => v4.to_ipv6_mapped(),
IpAddr::V6(v6) => v6,
};
u128::from_be_bytes(ip_addr_v6.octets())
}
30 changes: 17 additions & 13 deletions src/fastfield/bytes/reader.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::ops::Range;
use std::sync::Arc;

use fastfield_codecs::Column;
Expand Down Expand Up @@ -31,36 +32,39 @@ impl BytesFastFieldReader {
Ok(BytesFastFieldReader { idx_reader, values })
}

fn range(&self, doc: DocId) -> (usize, usize) {
fn range(&self, doc: DocId) -> Range<u64> {
let idx = doc as u64;
let start = self.idx_reader.get_val(idx) as usize;
let stop = self.idx_reader.get_val(idx + 1) as usize;
(start, stop)
let start = self.idx_reader.get_val(idx);
let end = self.idx_reader.get_val(idx + 1);
start..end
}

/// Returns the bytes associated to the given `doc`
pub fn get_bytes(&self, doc: DocId) -> &[u8] {
let (start, stop) = self.range(doc);
&self.values.as_slice()[start..stop]
let range = self.range(doc);
&self.values.as_slice()[range.start as usize..range.end as usize]
}

/// Returns the length of the bytes associated to the given `doc`
pub fn num_bytes(&self, doc: DocId) -> usize {
let (start, stop) = self.range(doc);
stop - start
pub fn num_bytes(&self, doc: DocId) -> u64 {
let range = self.range(doc);
range.end - range.start
}

/// Returns the overall number of bytes in this bytes fast field.
pub fn total_num_bytes(&self) -> usize {
self.values.len()
pub fn total_num_bytes(&self) -> u64 {
self.values.len() as u64
}
}

impl MultiValueLength for BytesFastFieldReader {
fn get_range(&self, doc_id: DocId) -> std::ops::Range<u64> {
self.range(doc_id)
}
fn get_len(&self, doc_id: DocId) -> u64 {
self.num_bytes(doc_id) as u64
self.num_bytes(doc_id)
}
fn get_total_len(&self) -> u64 {
self.total_num_bytes() as u64
self.total_num_bytes()
}
}
2 changes: 2 additions & 0 deletions src/fastfield/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ mod writer;
/// Trait for `BytesFastFieldReader` and `MultiValuedFastFieldReader` to return the length of data
/// for a doc_id
pub trait MultiValueLength {
/// returns the positions for a docid
fn get_range(&self, doc_id: DocId) -> std::ops::Range<u64>;
/// returns the num of values associated to a doc_id
fn get_len(&self, doc_id: DocId) -> u64;
/// returns the sum of num values for all doc_ids
Expand Down
3 changes: 3 additions & 0 deletions src/fastfield/multivalued/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ impl<Item: FastValue> MultiValuedFastFieldReader<Item> {
}

impl<Item: FastValue> MultiValueLength for MultiValuedFastFieldReader<Item> {
fn get_range(&self, doc_id: DocId) -> Range<u64> {
self.range(doc_id)
}
fn get_len(&self, doc_id: DocId) -> u64 {
self.num_vals(doc_id) as u64
}
Expand Down
2 changes: 1 addition & 1 deletion src/indexer/doc_id_mapping.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ impl SegmentDocIdMapping {

/// Returns an iterator over the old document addresses, ordered by the new document ids.
///
/// In the returned `DocAddress`, the `segment_ord` is the ordinal of targetted segment
/// In the returned `DocAddress`, the `segment_ord` is the ordinal of targeted segment
/// in the list of merged segments.
pub(crate) fn iter_old_doc_addrs(&self) -> impl Iterator<Item = DocAddress> + '_ {
self.new_doc_id_to_old_doc_addr.iter().copied()
Expand Down
109 changes: 109 additions & 0 deletions src/schema/ip_options.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
use std::ops::BitOr;

use serde::{Deserialize, Serialize};

use super::flags::{FastFlag, IndexedFlag, SchemaFlagList, StoredFlag};
use super::Cardinality;

/// Define how an ip field should be handled by tantivy.
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
pub struct IpOptions {
#[serde(skip_serializing_if = "Option::is_none")]
fast: Option<Cardinality>,
stored: bool,
}

impl IpOptions {
/// Returns true iff the value is a fast field.
pub fn is_fast(&self) -> bool {
self.fast.is_some()
}

/// Returns `true` if the json object should be stored.
pub fn is_stored(&self) -> bool {
self.stored
}

/// Returns the cardinality of the fastfield.
///
/// If the field has not been declared as a fastfield, then
/// the method returns None.
pub fn get_fastfield_cardinality(&self) -> Option<Cardinality> {
self.fast
}

/// Sets the field as stored
#[must_use]
pub fn set_stored(mut self) -> Self {
self.stored = true;
self
}

/// Set the field as a fast field.
///
/// Fast fields are designed for random access.
/// Access time are similar to a random lookup in an array.
/// If more than one value is associated to a fast field, only the last one is
/// kept.
#[must_use]
pub fn set_fast(mut self, cardinality: Cardinality) -> Self {
self.fast = Some(cardinality);
self
}
}

impl From<()> for IpOptions {
fn from(_: ()) -> IpOptions {
IpOptions::default()
}
}

impl From<FastFlag> for IpOptions {
fn from(_: FastFlag) -> Self {
IpOptions {
stored: false,
fast: Some(Cardinality::SingleValue),
}
}
}

impl From<StoredFlag> for IpOptions {
fn from(_: StoredFlag) -> Self {
IpOptions {
stored: true,
fast: None,
}
}
}

impl From<IndexedFlag> for IpOptions {
fn from(_: IndexedFlag) -> Self {
IpOptions {
stored: false,
fast: None,
}
}
}

impl<T: Into<IpOptions>> BitOr<T> for IpOptions {
type Output = IpOptions;

fn bitor(self, other: T) -> IpOptions {
let other = other.into();
IpOptions {
stored: self.stored | other.stored,
fast: self.fast.or(other.fast),
}
}
}

impl<Head, Tail> From<SchemaFlagList<Head, Tail>> for IpOptions
where
Head: Clone,
Tail: Clone,
Self: BitOr<Output = Self> + From<Head> + From<Tail>,
{
fn from(head_tail: SchemaFlagList<Head, Tail>) -> Self {
Self::from(head_tail.head) | Self::from(head_tail.tail)
}
}
2 changes: 2 additions & 0 deletions src/schema/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ mod date_time_options;
mod field;
mod flags;
mod index_record_option;
mod ip_options;
mod json_object_options;
mod named_field_document;
mod numeric_options;
Expand All @@ -138,6 +139,7 @@ pub use self::field_type::{FieldType, Type};
pub use self::field_value::FieldValue;
pub use self::flags::{FAST, INDEXED, STORED};
pub use self::index_record_option::IndexRecordOption;
pub use self::ip_options::IpOptions;
pub use self::json_object_options::JsonObjectOptions;
pub use self::named_field_document::NamedFieldDocument;
pub use self::numeric_options::NumericOptions;
Expand Down

0 comments on commit 10f10a3

Please sign in to comment.