Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

prepare for ip field #1554

Merged
merged 1 commit into from
Sep 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions fastfield_codecs/src/monotonic_mapping_u128.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
use std::net::{IpAddr, Ipv6Addr};

pub trait MonotonicallyMappableToU128: 'static + PartialOrd + Copy + Send + Sync {
/// Converts a value to u128.
///
/// Internally all fast field values are encoded as u64.
fn to_u128(self) -> u128;

/// Converts a value from u128
///
/// Internally all fast field values are encoded as u64.
/// **Note: To be used for converting encoded Term, Posting values.**
fn from_u128(val: u128) -> Self;
}

impl MonotonicallyMappableToU128 for u128 {
fn to_u128(self) -> u128 {
self
}

fn from_u128(val: u128) -> Self {
val
}
}

impl MonotonicallyMappableToU128 for IpAddr {
fn to_u128(self) -> u128 {
ip_to_u128(self)
}

fn from_u128(val: u128) -> Self {
IpAddr::from(val.to_be_bytes())
}
}

fn ip_to_u128(ip_addr: IpAddr) -> u128 {
let ip_addr_v6: Ipv6Addr = match ip_addr {
IpAddr::V4(v4) => v4.to_ipv6_mapped(),
IpAddr::V6(v6) => v6,
};
u128::from_be_bytes(ip_addr_v6.octets())
}
30 changes: 17 additions & 13 deletions src/fastfield/bytes/reader.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::ops::Range;
use std::sync::Arc;

use fastfield_codecs::Column;
Expand Down Expand Up @@ -31,36 +32,39 @@ impl BytesFastFieldReader {
Ok(BytesFastFieldReader { idx_reader, values })
}

fn range(&self, doc: DocId) -> (usize, usize) {
fn range(&self, doc: DocId) -> Range<u64> {
let idx = doc as u64;
let start = self.idx_reader.get_val(idx) as usize;
let stop = self.idx_reader.get_val(idx + 1) as usize;
(start, stop)
let start = self.idx_reader.get_val(idx);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
let start = self.idx_reader.get_val(idx);
let start = self.idx_reader.get_val(idx) as usize;

let end = self.idx_reader.get_val(idx + 1);
start..end
}

/// Returns the bytes associated to the given `doc`
pub fn get_bytes(&self, doc: DocId) -> &[u8] {
let (start, stop) = self.range(doc);
&self.values.as_slice()[start..stop]
let range = self.range(doc);
&self.values.as_slice()[range.start as usize..range.end as usize]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
&self.values.as_slice()[range.start as usize..range.end as usize]
&self.values.as_slice()[range]

}

/// Returns the length of the bytes associated to the given `doc`
pub fn num_bytes(&self, doc: DocId) -> usize {
let (start, stop) = self.range(doc);
stop - start
pub fn num_bytes(&self, doc: DocId) -> u64 {
let range = self.range(doc);
range.end - range.start
Copy link
Collaborator

@fulmicoton fulmicoton Sep 26, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
range.end - range.start
range.len() as u64

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You might need to use std::iter::ExactSizeIterator for that one.

}

/// Returns the overall number of bytes in this bytes fast field.
pub fn total_num_bytes(&self) -> usize {
self.values.len()
pub fn total_num_bytes(&self) -> u64 {
self.values.len() as u64
}
}

impl MultiValueLength for BytesFastFieldReader {
fn get_range(&self, doc_id: DocId) -> std::ops::Range<u64> {
self.range(doc_id)
}
fn get_len(&self, doc_id: DocId) -> u64 {
self.num_bytes(doc_id) as u64
self.num_bytes(doc_id)
}
fn get_total_len(&self) -> u64 {
self.total_num_bytes() as u64
self.total_num_bytes()
}
}
2 changes: 2 additions & 0 deletions src/fastfield/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ mod writer;
/// Trait for `BytesFastFieldReader` and `MultiValuedFastFieldReader` to return the length of data
/// for a doc_id
pub trait MultiValueLength {
/// returns the positions for a docid
fn get_range(&self, doc_id: DocId) -> std::ops::Range<u64>;
/// returns the num of values associated to a doc_id
fn get_len(&self, doc_id: DocId) -> u64;
/// returns the sum of num values for all doc_ids
Expand Down
3 changes: 3 additions & 0 deletions src/fastfield/multivalued/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ impl<Item: FastValue> MultiValuedFastFieldReader<Item> {
}

impl<Item: FastValue> MultiValueLength for MultiValuedFastFieldReader<Item> {
fn get_range(&self, doc_id: DocId) -> Range<u64> {
self.range(doc_id)
}
fn get_len(&self, doc_id: DocId) -> u64 {
self.num_vals(doc_id) as u64
}
Expand Down
2 changes: 1 addition & 1 deletion src/indexer/doc_id_mapping.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ impl SegmentDocIdMapping {

/// Returns an iterator over the old document addresses, ordered by the new document ids.
///
/// In the returned `DocAddress`, the `segment_ord` is the ordinal of targetted segment
/// In the returned `DocAddress`, the `segment_ord` is the ordinal of targeted segment
/// in the list of merged segments.
pub(crate) fn iter_old_doc_addrs(&self) -> impl Iterator<Item = DocAddress> + '_ {
self.new_doc_id_to_old_doc_addr.iter().copied()
Expand Down
109 changes: 109 additions & 0 deletions src/schema/ip_options.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
use std::ops::BitOr;

use serde::{Deserialize, Serialize};

use super::flags::{FastFlag, IndexedFlag, SchemaFlagList, StoredFlag};
use super::Cardinality;

/// Define how an ip field should be handled by tantivy.
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
pub struct IpOptions {
#[serde(skip_serializing_if = "Option::is_none")]
fast: Option<Cardinality>,
stored: bool,
}

impl IpOptions {
/// Returns true iff the value is a fast field.
pub fn is_fast(&self) -> bool {
self.fast.is_some()
}

/// Returns `true` if the json object should be stored.
pub fn is_stored(&self) -> bool {
self.stored
}

/// Returns the cardinality of the fastfield.
///
/// If the field has not been declared as a fastfield, then
/// the method returns None.
pub fn get_fastfield_cardinality(&self) -> Option<Cardinality> {
self.fast
}

/// Sets the field as stored
#[must_use]
pub fn set_stored(mut self) -> Self {
self.stored = true;
self
}

/// Set the field as a fast field.
///
/// Fast fields are designed for random access.
/// Access time are similar to a random lookup in an array.
/// If more than one value is associated to a fast field, only the last one is
/// kept.
#[must_use]
pub fn set_fast(mut self, cardinality: Cardinality) -> Self {
self.fast = Some(cardinality);
self
}
}

impl From<()> for IpOptions {
fn from(_: ()) -> IpOptions {
IpOptions::default()
}
}

impl From<FastFlag> for IpOptions {
fn from(_: FastFlag) -> Self {
IpOptions {
stored: false,
fast: Some(Cardinality::SingleValue),
}
}
}

impl From<StoredFlag> for IpOptions {
fn from(_: StoredFlag) -> Self {
IpOptions {
stored: true,
fast: None,
}
}
}

impl From<IndexedFlag> for IpOptions {
fn from(_: IndexedFlag) -> Self {
IpOptions {
stored: false,
fast: None,
}
}
}

impl<T: Into<IpOptions>> BitOr<T> for IpOptions {
type Output = IpOptions;

fn bitor(self, other: T) -> IpOptions {
let other = other.into();
IpOptions {
stored: self.stored | other.stored,
fast: self.fast.or(other.fast),
}
}
}

impl<Head, Tail> From<SchemaFlagList<Head, Tail>> for IpOptions
where
Head: Clone,
Tail: Clone,
Self: BitOr<Output = Self> + From<Head> + From<Tail>,
{
fn from(head_tail: SchemaFlagList<Head, Tail>) -> Self {
Self::from(head_tail.head) | Self::from(head_tail.tail)
}
}
2 changes: 2 additions & 0 deletions src/schema/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ mod date_time_options;
mod field;
mod flags;
mod index_record_option;
mod ip_options;
mod json_object_options;
mod named_field_document;
mod numeric_options;
Expand All @@ -138,6 +139,7 @@ pub use self::field_type::{FieldType, Type};
pub use self::field_value::FieldValue;
pub use self::flags::{FAST, INDEXED, STORED};
pub use self::index_record_option::IndexRecordOption;
pub use self::ip_options::IpOptions;
pub use self::json_object_options::JsonObjectOptions;
pub use self::named_field_document::NamedFieldDocument;
pub use self::numeric_options::NumericOptions;
Expand Down