diff --git a/common/src/bounds.rs b/common/src/bounds.rs new file mode 100644 index 0000000000..712c77852f --- /dev/null +++ b/common/src/bounds.rs @@ -0,0 +1,130 @@ +use std::io; +use std::ops::Bound; + +#[derive(Clone, Debug)] +pub struct BoundsRange { + pub lower_bound: Bound, + pub upper_bound: Bound, +} +impl BoundsRange { + pub fn new(lower_bound: Bound, upper_bound: Bound) -> Self { + BoundsRange { + lower_bound, + upper_bound, + } + } + pub fn is_unbounded(&self) -> bool { + matches!(self.lower_bound, Bound::Unbounded) && matches!(self.upper_bound, Bound::Unbounded) + } + pub fn map_bound(&self, transform: impl Fn(&T) -> TTo) -> BoundsRange { + BoundsRange { + lower_bound: map_bound(&self.lower_bound, &transform), + upper_bound: map_bound(&self.upper_bound, &transform), + } + } + + pub fn map_bound_res( + &self, + transform: impl Fn(&T) -> Result, + ) -> Result, Err> { + Ok(BoundsRange { + lower_bound: map_bound_res(&self.lower_bound, &transform)?, + upper_bound: map_bound_res(&self.upper_bound, &transform)?, + }) + } + + pub fn transform_inner( + &self, + transform_lower: impl Fn(&T) -> TransformBound, + transform_upper: impl Fn(&T) -> TransformBound, + ) -> BoundsRange { + BoundsRange { + lower_bound: transform_bound_inner(&self.lower_bound, &transform_lower), + upper_bound: transform_bound_inner(&self.upper_bound, &transform_upper), + } + } + + /// Returns the first set inner value + pub fn get_inner(&self) -> Option<&T> { + inner_bound(&self.lower_bound).or(inner_bound(&self.upper_bound)) + } +} + +pub enum TransformBound { + /// Overwrite the bounds + NewBound(Bound), + /// Use Existing bounds with new value + Existing(T), +} + +/// Takes a bound and transforms the inner value into a new bound via a closure. +/// The bound variant may change by the value returned value from the closure. +pub fn transform_bound_inner_res( + bound: &Bound, + transform: impl Fn(&TFrom) -> io::Result>, +) -> io::Result> { + use self::Bound::*; + Ok(match bound { + Excluded(ref from_val) => match transform(from_val)? { + TransformBound::NewBound(new_val) => new_val, + TransformBound::Existing(new_val) => Excluded(new_val), + }, + Included(ref from_val) => match transform(from_val)? { + TransformBound::NewBound(new_val) => new_val, + TransformBound::Existing(new_val) => Included(new_val), + }, + Unbounded => Unbounded, + }) +} + +/// Takes a bound and transforms the inner value into a new bound via a closure. +/// The bound variant may change by the value returned value from the closure. +pub fn transform_bound_inner( + bound: &Bound, + transform: impl Fn(&TFrom) -> TransformBound, +) -> Bound { + use self::Bound::*; + match bound { + Excluded(ref from_val) => match transform(from_val) { + TransformBound::NewBound(new_val) => new_val, + TransformBound::Existing(new_val) => Excluded(new_val), + }, + Included(ref from_val) => match transform(from_val) { + TransformBound::NewBound(new_val) => new_val, + TransformBound::Existing(new_val) => Included(new_val), + }, + Unbounded => Unbounded, + } +} + +/// Returns the inner value of a `Bound` +pub fn inner_bound(val: &Bound) -> Option<&T> { + match val { + Bound::Included(term) | Bound::Excluded(term) => Some(term), + Bound::Unbounded => None, + } +} + +pub fn map_bound( + bound: &Bound, + transform: impl Fn(&TFrom) -> TTo, +) -> Bound { + use self::Bound::*; + match bound { + Excluded(ref from_val) => Bound::Excluded(transform(from_val)), + Included(ref from_val) => Bound::Included(transform(from_val)), + Unbounded => Unbounded, + } +} + +pub fn map_bound_res( + bound: &Bound, + transform: impl Fn(&TFrom) -> Result, +) -> Result, Err> { + use self::Bound::*; + Ok(match bound { + Excluded(ref from_val) => Excluded(transform(from_val)?), + Included(ref from_val) => Included(transform(from_val)?), + Unbounded => Unbounded, + }) +} diff --git a/common/src/lib.rs b/common/src/lib.rs index bfbccecd93..0a51f91fe3 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -5,6 +5,7 @@ use std::ops::Deref; pub use byteorder::LittleEndian as Endianness; mod bitset; +pub mod bounds; mod byte_count; mod datetime; pub mod file_slice; diff --git a/src/query/query_parser/query_parser.rs b/src/query/query_parser/query_parser.rs index 81115d2878..9c98e7c7bf 100644 --- a/src/query/query_parser/query_parser.rs +++ b/src/query/query_parser/query_parser.rs @@ -482,16 +482,32 @@ impl QueryParser { }); if terms.len() != 1 { return Err(QueryParserError::UnsupportedQuery(format!( - "Range query boundary cannot have multiple tokens: {phrase:?}." + "Range query boundary cannot have multiple tokens: {phrase:?} [{terms:?}]." ))); } Ok(terms.into_iter().next().unwrap()) } - FieldType::JsonObject(_) => { - // Json range are not supported. - Err(QueryParserError::UnsupportedQuery( - "Range query are not supported on json field.".to_string(), - )) + FieldType::JsonObject(ref json_options) => { + let get_term_with_path = || { + Term::from_field_json_path( + field, + json_path, + json_options.is_expand_dots_enabled(), + ) + }; + if let Some(term) = + // Try to convert the phrase to a fast value + convert_to_fast_value_and_append_to_json_term( + get_term_with_path(), + phrase, + ) + { + Ok(term) + } else { + let mut term = get_term_with_path(); + term.append_type_and_str(phrase); + Ok(term) + } } FieldType::Facet(_) => match Facet::from_text(phrase) { Ok(facet) => Ok(Term::from_facet(field, &facet)), @@ -1123,8 +1139,8 @@ mod test { let query = make_query_parser().parse_query("title:[A TO B]").unwrap(); assert_eq!( format!("{query:?}"), - "RangeQuery { lower_bound: Included(Term(field=0, type=Str, \"a\")), upper_bound: \ - Included(Term(field=0, type=Str, \"b\")), limit: None }" + "RangeQuery { bounds: BoundsRange { lower_bound: Included(Term(field=0, type=Str, \ + \"a\")), upper_bound: Included(Term(field=0, type=Str, \"b\")) }, limit: None }" ); } diff --git a/src/query/range_query/mod.rs b/src/query/range_query/mod.rs index 40effb85b1..ed2e535a42 100644 --- a/src/query/range_query/mod.rs +++ b/src/query/range_query/mod.rs @@ -1,5 +1,3 @@ -use std::ops::Bound; - use crate::schema::Type; mod fast_field_range_doc_set; @@ -12,29 +10,10 @@ pub use self::range_query_u64_fastfield::FastFieldRangeWeight; // TODO is this correct? pub(crate) fn is_type_valid_for_fastfield_range_query(typ: Type) -> bool { match typ { - Type::Str | Type::U64 | Type::I64 | Type::F64 | Type::Bool | Type::Date => true, + Type::Str | Type::U64 | Type::I64 | Type::F64 | Type::Bool | Type::Date | Type::Json => { + true + } Type::IpAddr => true, - Type::Facet | Type::Bytes | Type::Json => false, - } -} - -fn map_bound(bound: &Bound, transform: impl Fn(&TFrom) -> TTo) -> Bound { - use self::Bound::*; - match bound { - Excluded(ref from_val) => Excluded(transform(from_val)), - Included(ref from_val) => Included(transform(from_val)), - Unbounded => Unbounded, + Type::Facet | Type::Bytes => false, } } - -fn map_bound_res( - bound: &Bound, - transform: impl Fn(&TFrom) -> Result, -) -> Result, Err> { - use self::Bound::*; - Ok(match bound { - Excluded(ref from_val) => Excluded(transform(from_val)?), - Included(ref from_val) => Included(transform(from_val)?), - Unbounded => Unbounded, - }) -} diff --git a/src/query/range_query/range_query.rs b/src/query/range_query/range_query.rs index 4b27714c3d..d492462709 100644 --- a/src/query/range_query/range_query.rs +++ b/src/query/range_query/range_query.rs @@ -1,9 +1,9 @@ use std::io; use std::ops::Bound; +use common::bounds::{map_bound, BoundsRange}; use common::BitSet; -use super::map_bound; use super::range_query_u64_fastfield::FastFieldRangeWeight; use crate::index::SegmentReader; use crate::query::explanation::does_not_match; @@ -69,19 +69,10 @@ use crate::{DocId, Score}; /// ``` #[derive(Clone, Debug)] pub struct RangeQuery { - lower_bound: Bound, - upper_bound: Bound, + bounds: BoundsRange, limit: Option, } -/// Returns the inner value of a `Bound` -pub(crate) fn inner_bound(val: &Bound) -> Option<&Term> { - match val { - Bound::Included(term) | Bound::Excluded(term) => Some(term), - Bound::Unbounded => None, - } -} - impl RangeQuery { /// Creates a new `RangeQuery` from bounded start and end terms. /// @@ -89,8 +80,7 @@ impl RangeQuery { /// the `Weight` object is created. pub fn new(lower_bound: Bound, upper_bound: Bound) -> RangeQuery { RangeQuery { - lower_bound, - upper_bound, + bounds: BoundsRange::new(lower_bound, upper_bound), limit: None, } } @@ -106,8 +96,8 @@ impl RangeQuery { } pub(crate) fn get_term(&self) -> &Term { - inner_bound(&self.lower_bound) - .or(inner_bound(&self.upper_bound)) + self.bounds + .get_inner() .expect("At least one bound must be set") } @@ -128,15 +118,19 @@ impl Query for RangeQuery { if field_type.is_fast() && is_type_valid_for_fastfield_range_query(self.value_type()) { Ok(Box::new(FastFieldRangeWeight::new( self.field(), - self.lower_bound.clone(), - self.upper_bound.clone(), + self.bounds.clone(), ))) } else { + if field_type.is_json() { + return Err(crate::TantivyError::InvalidArgument( + "RangeQuery on JSON is only supported for fast fields currently".to_string(), + )); + } let verify_and_unwrap_term = |val: &Term| val.serialized_value_bytes().to_owned(); Ok(Box::new(RangeWeight { field: self.field(), - lower_bound: map_bound(&self.lower_bound, verify_and_unwrap_term), - upper_bound: map_bound(&self.upper_bound, verify_and_unwrap_term), + lower_bound: map_bound(&self.bounds.lower_bound, verify_and_unwrap_term), + upper_bound: map_bound(&self.bounds.upper_bound, verify_and_unwrap_term), limit: self.limit, })) } diff --git a/src/query/range_query/range_query_u64_fastfield.rs b/src/query/range_query/range_query_u64_fastfield.rs index 918d524ccf..b7b3e484ad 100644 --- a/src/query/range_query/range_query_u64_fastfield.rs +++ b/src/query/range_query/range_query_u64_fastfield.rs @@ -5,32 +5,29 @@ use std::net::Ipv6Addr; use std::ops::{Bound, RangeInclusive}; -use columnar::{Column, MonotonicallyMappableToU128, MonotonicallyMappableToU64, StrColumn}; +use columnar::{ + Column, ColumnType, MonotonicallyMappableToU128, MonotonicallyMappableToU64, NumericalType, + StrColumn, +}; +use common::bounds::{BoundsRange, TransformBound}; use common::BinarySerializable; use super::fast_field_range_doc_set::RangeDocSet; -use super::{map_bound, map_bound_res}; -use crate::query::range_query::range_query::inner_bound; use crate::query::{AllScorer, ConstScorer, EmptyScorer, Explanation, Query, Scorer, Weight}; -use crate::schema::{Field, Type}; +use crate::schema::{Field, Type, ValueBytes}; use crate::{DocId, DocSet, Score, SegmentReader, TantivyError, Term}; /// `FastFieldRangeWeight` uses the fast field to execute range queries. #[derive(Clone, Debug)] pub struct FastFieldRangeWeight { - lower_bound: Bound, - upper_bound: Bound, + bounds: BoundsRange, field: Field, } impl FastFieldRangeWeight { /// Create a new FastFieldRangeWeight - pub fn new(field: Field, lower_bound: Bound, upper_bound: Bound) -> Self { - Self { - lower_bound, - upper_bound, - field, - } + pub(crate) fn new(field: Field, bounds: BoundsRange) -> Self { + Self { bounds, field } } } @@ -46,14 +43,14 @@ impl Query for FastFieldRangeWeight { impl Weight for FastFieldRangeWeight { fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result> { // Check if both bounds are Bound::Unbounded - if self.lower_bound == Bound::Unbounded && self.upper_bound == Bound::Unbounded { + if self.bounds.is_unbounded() { return Ok(Box::new(AllScorer::new(reader.max_doc()))); } - let field_name = reader.schema().get_field_name(self.field); let field_type = reader.schema().get_field_entry(self.field).field_type(); - let term = inner_bound(&self.lower_bound) - .or(inner_bound(&self.upper_bound)) + let term = self + .bounds + .get_inner() .expect("At least one bound must be set"); assert_eq!( term.typ(), @@ -62,83 +59,135 @@ impl Weight for FastFieldRangeWeight { field_type, term.typ() ); - if field_type.is_ip_addr() { + let field_name = term.get_full_path(reader.schema()); + + let get_value_bytes = |term: &Term| term.value().value_bytes_payload(); + let get_term_u64_internal_representation = |term: &Term| { + let bytes = term.value().value_bytes_payload(); + u64::from_be(BinarySerializable::deserialize(&mut &bytes[..]).unwrap()) + }; + + let term_value = term.value(); + if field_type.is_json() { + let bounds = self.bounds.map_bound(|term| { + let val = term.value().as_json_value_bytes().unwrap().to_owned(); + val + }); + // Unlike with other field types JSON may have multiple columns of different types + // under the same name + // + // In the JSON case the provided type in term may not exactly match the column type, + // especially with the numeric type interpolation + let json_value_bytes = term_value + .as_json_value_bytes() + .expect("expected json type in term"); + let typ = json_value_bytes.typ(); + match typ { + Type::Str => { + // If we are here that means we already tried to convert to a fast value and + // failed + let Some(str_dict_column): Option = + reader.fast_fields().str(&field_name)? + else { + return Ok(Box::new(EmptyScorer)); + }; + let dict = str_dict_column.dictionary(); + + let bounds = self.bounds.map_bound(get_value_bytes); + // Get term ids for terms + let (lower_bound, upper_bound) = + dict.term_bounds_to_ord(bounds.lower_bound, bounds.upper_bound)?; + let fast_field_reader = reader.fast_fields(); + let Some((column, _col_type)) = + fast_field_reader.u64_lenient_for_type(None, &field_name)? + else { + return Ok(Box::new(EmptyScorer)); + }; + search_on_u64_ff(column, boost, BoundsRange::new(lower_bound, upper_bound)) + } + Type::U64 | Type::I64 | Type::F64 => { + search_on_json_numerical_field(reader, &field_name, typ, bounds, boost) + } + Type::Date => { + let fast_field_reader = reader.fast_fields(); + let Some((column, _col_type)) = fast_field_reader + .u64_lenient_for_type(Some(&[ColumnType::DateTime]), &field_name)? + else { + return Ok(Box::new(EmptyScorer)); + }; + let bounds = bounds.map_bound(|term| term.as_date().unwrap().to_u64()); + search_on_u64_ff( + column, + boost, + BoundsRange::new(bounds.lower_bound, bounds.upper_bound), + ) + } + Type::Bool | Type::Facet | Type::Bytes | Type::Json | Type::IpAddr => { + Err(crate::TantivyError::InvalidArgument(format!( + "unsupported value bytes type in json term value_bytes {:?}", + term_value.typ() + ))) + } + } + } else if field_type.is_ip_addr() { let parse_ip_from_bytes = |term: &Term| { term.value().as_ip_addr().ok_or_else(|| { crate::TantivyError::InvalidArgument("Expected ip address".to_string()) }) }; - let lower_bound = map_bound_res(&self.lower_bound, parse_ip_from_bytes)?; - let upper_bound = map_bound_res(&self.upper_bound, parse_ip_from_bytes)?; + let bounds: BoundsRange = self.bounds.map_bound_res(parse_ip_from_bytes)?; let Some(ip_addr_column): Option> = - reader.fast_fields().column_opt(field_name)? + reader.fast_fields().column_opt(&field_name)? else { return Ok(Box::new(EmptyScorer)); }; - let value_range = bound_to_value_range_ip( - &lower_bound, - &upper_bound, + let value_range = bound_range_inclusive_ip( + &bounds.lower_bound, + &bounds.upper_bound, ip_addr_column.min_value(), ip_addr_column.max_value(), ); let docset = RangeDocSet::new(value_range, ip_addr_column); Ok(Box::new(ConstScorer::new(docset, boost))) - } else { - let (lower_bound, upper_bound) = if field_type.is_str() { - let Some(str_dict_column): Option = - reader.fast_fields().str(field_name)? - else { - return Ok(Box::new(EmptyScorer)); - }; - let dict = str_dict_column.dictionary(); - - let lower_bound = map_bound(&self.lower_bound, |term| { - term.serialized_value_bytes().to_vec() - }); - let upper_bound = map_bound(&self.upper_bound, |term| { - term.serialized_value_bytes().to_vec() - }); - // Get term ids for terms - let (lower_bound, upper_bound) = - dict.term_bounds_to_ord(lower_bound, upper_bound)?; - (lower_bound, upper_bound) - } else { - assert!( - maps_to_u64_fastfield(field_type.value_type()), - "{:?}", - field_type - ); - let parse_from_bytes = |term: &Term| { - u64::from_be( - BinarySerializable::deserialize(&mut &term.serialized_value_bytes()[..]) - .unwrap(), - ) - }; + } else if field_type.is_str() { + let Some(str_dict_column): Option = reader.fast_fields().str(&field_name)? + else { + return Ok(Box::new(EmptyScorer)); + }; + let dict = str_dict_column.dictionary(); - let lower_bound = map_bound(&self.lower_bound, parse_from_bytes); - let upper_bound = map_bound(&self.upper_bound, parse_from_bytes); - (lower_bound, upper_bound) + let bounds = self.bounds.map_bound(get_value_bytes); + // Get term ids for terms + let (lower_bound, upper_bound) = + dict.term_bounds_to_ord(bounds.lower_bound, bounds.upper_bound)?; + let fast_field_reader = reader.fast_fields(); + let Some((column, _col_type)) = + fast_field_reader.u64_lenient_for_type(None, &field_name)? + else { + return Ok(Box::new(EmptyScorer)); }; + search_on_u64_ff(column, boost, BoundsRange::new(lower_bound, upper_bound)) + } else { + assert!( + maps_to_u64_fastfield(field_type.value_type()), + "{:?}", + field_type + ); + + let bounds = self.bounds.map_bound(get_term_u64_internal_representation); let fast_field_reader = reader.fast_fields(); - let Some((column, _)) = fast_field_reader.u64_lenient_for_type(None, field_name)? + let Some((column, _col_type)) = + fast_field_reader.u64_lenient_for_type(None, &field_name)? else { return Ok(Box::new(EmptyScorer)); }; - #[allow(clippy::reversed_empty_ranges)] - let value_range = bound_to_value_range( - &lower_bound, - &upper_bound, - column.min_value(), - column.max_value(), + search_on_u64_ff( + column, + boost, + BoundsRange::new(bounds.lower_bound, bounds.upper_bound), ) - .unwrap_or(1..=0); // empty range - if value_range.is_empty() { - return Ok(Box::new(EmptyScorer)); - } - let docset = RangeDocSet::new(value_range, column); - Ok(Box::new(ConstScorer::new(docset, boost))) } } @@ -155,6 +204,186 @@ impl Weight for FastFieldRangeWeight { } } +fn search_on_json_numerical_field( + reader: &SegmentReader, + field_name: &str, + typ: Type, + bounds: BoundsRange>>, + boost: Score, +) -> crate::Result> { + // Since we don't know which type was interpolated for the internal column whe + // have to check for all types (only one exists) + let allowed_column_types: Option<&[ColumnType]> = + Some(&[ColumnType::F64, ColumnType::I64, ColumnType::U64]); + let fast_field_reader = reader.fast_fields(); + let Some((column, col_type)) = + fast_field_reader.u64_lenient_for_type(allowed_column_types, field_name)? + else { + return Ok(Box::new(EmptyScorer)); + }; + let actual_colum_type: NumericalType = col_type.numerical_type().unwrap_or_else(|| { + panic!( + "internal error: couldn't cast to numerical_type: {:?}", + col_type + ) + }); + + let bounds = match typ.numerical_type().unwrap() { + NumericalType::I64 => { + let bounds = bounds.map_bound(|term| (term.as_i64().unwrap())); + match actual_colum_type { + NumericalType::I64 => bounds.map_bound(|&term| term.to_u64()), + NumericalType::U64 => { + bounds.transform_inner( + |&val| { + if val < 0 { + return TransformBound::NewBound(Bound::Unbounded); + } + TransformBound::Existing(val as u64) + }, + |&val| { + if val < 0 { + // no hits case + return TransformBound::NewBound(Bound::Excluded(0)); + } + TransformBound::Existing(val as u64) + }, + ) + } + NumericalType::F64 => bounds.map_bound(|&term| (term as f64).to_u64()), + } + } + NumericalType::U64 => { + let bounds = bounds.map_bound(|term| (term.as_u64().unwrap())); + match actual_colum_type { + NumericalType::U64 => bounds.map_bound(|&term| term.to_u64()), + NumericalType::I64 => { + bounds.transform_inner( + |&val| { + if val > i64::MAX as u64 { + // Actual no hits case + return TransformBound::NewBound(Bound::Excluded(i64::MAX as u64)); + } + TransformBound::Existing((val as i64).to_u64()) + }, + |&val| { + if val > i64::MAX as u64 { + return TransformBound::NewBound(Bound::Unbounded); + } + TransformBound::Existing((val as i64).to_u64()) + }, + ) + } + NumericalType::F64 => bounds.map_bound(|&term| (term as f64).to_u64()), + } + } + NumericalType::F64 => { + let bounds = bounds.map_bound(|term| (term.as_f64().unwrap())); + match actual_colum_type { + NumericalType::U64 => transform_from_f64_bounds::(&bounds), + NumericalType::I64 => transform_from_f64_bounds::(&bounds), + NumericalType::F64 => bounds.map_bound(|&term| term.to_u64()), + } + } + }; + search_on_u64_ff( + column, + boost, + BoundsRange::new(bounds.lower_bound, bounds.upper_bound), + ) +} + +trait IntType { + fn min() -> Self; + fn max() -> Self; + fn to_f64(self) -> f64; + fn from_f64(val: f64) -> Self; +} +impl IntType for i64 { + fn min() -> Self { + Self::MIN + } + fn max() -> Self { + Self::MAX + } + fn to_f64(self) -> f64 { + self as f64 + } + fn from_f64(val: f64) -> Self { + val as Self + } +} +impl IntType for u64 { + fn min() -> Self { + Self::MIN + } + fn max() -> Self { + Self::MAX + } + fn to_f64(self) -> f64 { + self as f64 + } + fn from_f64(val: f64) -> Self { + val as Self + } +} + +fn transform_from_f64_bounds( + bounds: &BoundsRange, +) -> BoundsRange { + bounds.transform_inner( + |&lower_bound| { + if lower_bound < T::min().to_f64() { + return TransformBound::NewBound(Bound::Unbounded); + } + if lower_bound > T::max().to_f64() { + // no hits case + return TransformBound::NewBound(Bound::Excluded(u64::MAX)); + } + + if lower_bound.fract() == 0.0 { + TransformBound::Existing(T::from_f64(lower_bound).to_u64()) + } else { + TransformBound::NewBound(Bound::Included(T::from_f64(lower_bound.trunc()).to_u64())) + } + }, + |&upper_bound| { + if upper_bound < T::min().to_f64() { + return TransformBound::NewBound(Bound::Unbounded); + } + if upper_bound > T::max().to_f64() { + // no hits case + return TransformBound::NewBound(Bound::Included(u64::MAX)); + } + if upper_bound.fract() == 0.0 { + TransformBound::Existing(T::from_f64(upper_bound).to_u64()) + } else { + TransformBound::NewBound(Bound::Included(T::from_f64(upper_bound.trunc()).to_u64())) + } + }, + ) +} + +fn search_on_u64_ff( + column: Column, + boost: Score, + bounds: BoundsRange, +) -> crate::Result> { + #[allow(clippy::reversed_empty_ranges)] + let value_range = bound_to_value_range( + &bounds.lower_bound, + &bounds.upper_bound, + column.min_value(), + column.max_value(), + ) + .unwrap_or(1..=0); // empty range + if value_range.is_empty() { + return Ok(Box::new(EmptyScorer)); + } + let docset = RangeDocSet::new(value_range, column); + Ok(Box::new(ConstScorer::new(docset, boost))) +} + /// Returns true if the type maps to a u64 fast field pub(crate) fn maps_to_u64_fastfield(typ: Type) -> bool { match typ { @@ -164,7 +393,7 @@ pub(crate) fn maps_to_u64_fastfield(typ: Type) -> bool { } } -fn bound_to_value_range_ip( +fn bound_range_inclusive_ip( lower_bound: &Bound, upper_bound: &Bound, min_value: Ipv6Addr, @@ -212,16 +441,21 @@ fn bound_to_value_range( pub mod tests { use std::ops::{Bound, RangeInclusive}; + use common::bounds::BoundsRange; + use common::DateTime; use proptest::prelude::*; use rand::rngs::StdRng; use rand::seq::SliceRandom; use rand::SeedableRng; + use time::format_description::well_known::Rfc3339; + use time::OffsetDateTime; use crate::collector::{Count, TopDocs}; + use crate::fastfield::FastValue; use crate::query::range_query::range_query_u64_fastfield::FastFieldRangeWeight; - use crate::query::{QueryParser, Weight}; + use crate::query::{QueryParser, RangeQuery, Weight}; use crate::schema::{ - NumericOptions, Schema, SchemaBuilder, FAST, INDEXED, STORED, STRING, TEXT, + Field, NumericOptions, Schema, SchemaBuilder, FAST, INDEXED, STORED, STRING, TEXT, }; use crate::{Index, IndexWriter, Term, TERMINATED}; @@ -284,6 +518,208 @@ pub mod tests { Ok(()) } + fn get_json_term(field: Field, path: &str, value: T) -> Term { + let mut term = Term::from_field_json_path(field, path, true); + term.append_type_and_fast_value(value); + term + } + + #[test] + fn json_range_test() { + let mut schema_builder = Schema::builder(); + let json_field = schema_builder.add_json_field("json", TEXT | STORED | FAST); + let schema = schema_builder.build(); + + let index = Index::create_in_ram(schema); + let u64_val = u64::MAX - 1; + { + let mut index_writer = index.writer_with_num_threads(1, 50_000_000).unwrap(); + let doc = json!({ + "id_u64": 0, + "id_f64": 10.5, + "id_i64": -100, + "date": "2022-12-01T00:00:01Z" + }); + index_writer.add_document(doc!(json_field => doc)).unwrap(); + let doc = json!({ + "id_u64": u64_val, + "id_f64": 1000.5, + "id_i64": 1000, + "date": "2023-12-01T00:00:01Z" + }); + index_writer.add_document(doc!(json_field => doc)).unwrap(); + + index_writer.commit().unwrap(); + } + + let reader = index.reader().unwrap(); + let searcher = reader.searcher(); + let count = |range_query: RangeQuery| searcher.search(&range_query, &Count).unwrap(); + + // u64 on u64 + assert_eq!( + count(RangeQuery::new( + Bound::Included(get_json_term(json_field, "id_u64", u64_val)), + Bound::Included(get_json_term(json_field, "id_u64", u64_val)), + )), + 1 + ); + assert_eq!( + count(RangeQuery::new( + Bound::Included(get_json_term(json_field, "id_u64", u64_val)), + Bound::Excluded(get_json_term(json_field, "id_u64", u64_val)), + )), + 0 + ); + // f64 on u64 field + assert_eq!( + count(RangeQuery::new( + // We need to subtract since there is some inaccuracy + Bound::Included(get_json_term( + json_field, + "id_u64", + (u64_val - 10000) as f64 + )), + Bound::Included(get_json_term(json_field, "id_u64", (u64_val) as f64)), + )), + 1 + ); + // i64 on u64 + assert_eq!( + count(RangeQuery::new( + Bound::Included(get_json_term(json_field, "id_u64", 0_i64)), + Bound::Included(get_json_term(json_field, "id_u64", 0_i64)), + )), + 1 + ); + assert_eq!( + count(RangeQuery::new( + Bound::Included(get_json_term(json_field, "id_u64", 1_i64)), + Bound::Included(get_json_term(json_field, "id_u64", 1_i64)), + )), + 0 + ); + // u64 on f64 + assert_eq!( + count(RangeQuery::new( + Bound::Included(get_json_term(json_field, "id_f64", 10_u64)), + Bound::Included(get_json_term(json_field, "id_f64", 11_u64)), + )), + 1 + ); + assert_eq!( + count(RangeQuery::new( + Bound::Included(get_json_term(json_field, "id_f64", 10_u64)), + Bound::Included(get_json_term(json_field, "id_f64", 2000_u64)), + )), + 2 + ); + // i64 on f64 + assert_eq!( + count(RangeQuery::new( + Bound::Included(get_json_term(json_field, "id_f64", 10_i64)), + Bound::Included(get_json_term(json_field, "id_f64", 2000_i64)), + )), + 2 + ); + + // i64 on i64 + assert_eq!( + count(RangeQuery::new( + Bound::Included(get_json_term(json_field, "id_i64", -1000i64)), + Bound::Included(get_json_term(json_field, "id_i64", 1000i64)), + )), + 2 + ); + + // u64 on i64 + assert_eq!( + count(RangeQuery::new( + Bound::Included(get_json_term(json_field, "id_i64", 0_u64)), + Bound::Included(get_json_term(json_field, "id_i64", 1000u64)), + )), + 1 + ); + assert_eq!( + count(RangeQuery::new( + Bound::Included(get_json_term(json_field, "id_i64", 0_u64)), + Bound::Included(get_json_term(json_field, "id_i64", 999u64)), + )), + 0 + ); + // f64 on i64 field + assert_eq!( + count(RangeQuery::new( + Bound::Included(get_json_term(json_field, "id_i64", -1000.0)), + Bound::Included(get_json_term(json_field, "id_i64", 1000.0)), + )), + 2 + ); + assert_eq!( + count(RangeQuery::new( + Bound::Included(get_json_term(json_field, "id_i64", -1000.0f64)), + Bound::Excluded(get_json_term(json_field, "id_i64", 1000.0f64)), + )), + 1 + ); + assert_eq!( + count(RangeQuery::new( + Bound::Included(get_json_term(json_field, "id_i64", -1000.0f64)), + Bound::Included(get_json_term(json_field, "id_i64", 1000.0f64)), + )), + 2 + ); + assert_eq!( + count(RangeQuery::new( + Bound::Included(get_json_term(json_field, "id_i64", -1000.0f64)), + Bound::Excluded(get_json_term(json_field, "id_i64", 1000.01f64)), + )), + 2 + ); + assert_eq!( + count(RangeQuery::new( + Bound::Included(get_json_term(json_field, "id_i64", -1000.0f64)), + Bound::Included(get_json_term(json_field, "id_i64", 999.99f64)), + )), + 1 + ); + assert_eq!( + count(RangeQuery::new( + Bound::Excluded(get_json_term(json_field, "id_i64", 999.9)), + Bound::Excluded(get_json_term(json_field, "id_i64", 1000.1)), + )), + 1 + ); + + // Date field + let dt1 = + DateTime::from_utc(OffsetDateTime::parse("2022-12-01T00:00:01Z", &Rfc3339).unwrap()); + let dt2 = + DateTime::from_utc(OffsetDateTime::parse("2023-12-01T00:00:01Z", &Rfc3339).unwrap()); + + assert_eq!( + count(RangeQuery::new( + Bound::Included(get_json_term(json_field, "date", dt1)), + Bound::Included(get_json_term(json_field, "date", dt2)), + )), + 2 + ); + assert_eq!( + count(RangeQuery::new( + Bound::Included(get_json_term(json_field, "date", dt1)), + Bound::Excluded(get_json_term(json_field, "date", dt2)), + )), + 1 + ); + assert_eq!( + count(RangeQuery::new( + Bound::Excluded(get_json_term(json_field, "date", dt1)), + Bound::Excluded(get_json_term(json_field, "date", dt2)), + )), + 0 + ); + } + #[derive(Clone, Debug)] pub struct Doc { pub id_name: String, @@ -326,6 +762,12 @@ pub mod tests { assert!(test_id_range_for_docs(ops).is_ok()); } + #[test] + fn range_regression1_test_json() { + let ops = vec![doc_from_id_1(0)]; + assert!(test_id_range_for_docs_json(ops).is_ok()); + } + #[test] fn test_range_regression2() { let ops = vec![ @@ -356,8 +798,10 @@ pub mod tests { let searcher = index.reader().unwrap().searcher(); let range_query = FastFieldRangeWeight::new( field, - Bound::Included(Term::from_field_u64(field, 50_000)), - Bound::Included(Term::from_field_u64(field, 50_002)), + BoundsRange::new( + Bound::Included(Term::from_field_u64(field, 50_000)), + Bound::Included(Term::from_field_u64(field, 50_002)), + ), ); let scorer = range_query .scorer(searcher.segment_reader(0), 1.0f32) @@ -377,56 +821,94 @@ pub mod tests { assert!(test_id_range_for_docs(ops).is_ok()); } - pub fn create_index_from_docs(docs: &[Doc]) -> Index { + pub fn create_index_from_docs(docs: &[Doc], json_field: bool) -> Index { let mut schema_builder = Schema::builder(); - let id_u64_field = schema_builder.add_u64_field("id", INDEXED | STORED | FAST); - let ids_u64_field = - schema_builder.add_u64_field("ids", NumericOptions::default().set_fast().set_indexed()); - - let id_f64_field = schema_builder.add_f64_field("id_f64", INDEXED | STORED | FAST); - let ids_f64_field = schema_builder.add_f64_field( - "ids_f64", - NumericOptions::default().set_fast().set_indexed(), - ); + if json_field { + let json_field = schema_builder.add_json_field("json", TEXT | STORED | FAST); + let schema = schema_builder.build(); - let id_i64_field = schema_builder.add_i64_field("id_i64", INDEXED | STORED | FAST); - let ids_i64_field = schema_builder.add_i64_field( - "ids_i64", - NumericOptions::default().set_fast().set_indexed(), - ); + let index = Index::create_in_ram(schema); - let text_field = schema_builder.add_text_field("id_name", STRING | STORED | FAST); - let text_field2 = schema_builder.add_text_field("id_name_fast", STRING | STORED | FAST); - let schema = schema_builder.build(); - let index = Index::create_in_ram(schema); + { + let mut index_writer = index.writer_with_num_threads(1, 50_000_000).unwrap(); + for doc in docs.iter() { + let doc = json!({ + "ids_i64": doc.id as i64, + "ids_i64": doc.id as i64, + "ids_f64": doc.id as f64, + "ids_f64": doc.id as f64, + "ids": doc.id, + "ids": doc.id, + "id": doc.id, + "id_f64": doc.id as f64, + "id_i64": doc.id as i64, + "id_name": doc.id_name.to_string(), + "id_name_fast": doc.id_name.to_string(), + }); + index_writer.add_document(doc!(json_field => doc)).unwrap(); + } - { - let mut index_writer = index.writer_with_num_threads(1, 50_000_000).unwrap(); - for doc in docs.iter() { - index_writer - .add_document(doc!( - ids_i64_field => doc.id as i64, - ids_i64_field => doc.id as i64, - ids_f64_field => doc.id as f64, - ids_f64_field => doc.id as f64, - ids_u64_field => doc.id, - ids_u64_field => doc.id, - id_u64_field => doc.id, - id_f64_field => doc.id as f64, - id_i64_field => doc.id as i64, - text_field => doc.id_name.to_string(), - text_field2 => doc.id_name.to_string(), - )) - .unwrap(); + index_writer.commit().unwrap(); } + index + } else { + let id_u64_field = schema_builder.add_u64_field("id", INDEXED | STORED | FAST); + let ids_u64_field = schema_builder + .add_u64_field("ids", NumericOptions::default().set_fast().set_indexed()); + + let id_f64_field = schema_builder.add_f64_field("id_f64", INDEXED | STORED | FAST); + let ids_f64_field = schema_builder.add_f64_field( + "ids_f64", + NumericOptions::default().set_fast().set_indexed(), + ); - index_writer.commit().unwrap(); + let id_i64_field = schema_builder.add_i64_field("id_i64", INDEXED | STORED | FAST); + let ids_i64_field = schema_builder.add_i64_field( + "ids_i64", + NumericOptions::default().set_fast().set_indexed(), + ); + + let text_field = schema_builder.add_text_field("id_name", STRING | STORED); + let text_field2 = schema_builder.add_text_field("id_name_fast", STRING | STORED | FAST); + let schema = schema_builder.build(); + + let index = Index::create_in_ram(schema); + + { + let mut index_writer = index.writer_with_num_threads(1, 50_000_000).unwrap(); + for doc in docs.iter() { + index_writer + .add_document(doc!( + ids_i64_field => doc.id as i64, + ids_i64_field => doc.id as i64, + ids_f64_field => doc.id as f64, + ids_f64_field => doc.id as f64, + ids_u64_field => doc.id, + ids_u64_field => doc.id, + id_u64_field => doc.id, + id_f64_field => doc.id as f64, + id_i64_field => doc.id as i64, + text_field => doc.id_name.to_string(), + text_field2 => doc.id_name.to_string(), + )) + .unwrap(); + } + + index_writer.commit().unwrap(); + } + index } - index } fn test_id_range_for_docs(docs: Vec) -> crate::Result<()> { - let index = create_index_from_docs(&docs); + test_id_range_for_docs_with_opt(docs, false) + } + fn test_id_range_for_docs_json(docs: Vec) -> crate::Result<()> { + test_id_range_for_docs_with_opt(docs, true) + } + + fn test_id_range_for_docs_with_opt(docs: Vec, json: bool) -> crate::Result<()> { + let index = create_index_from_docs(&docs, json); let reader = index.reader().unwrap(); let searcher = reader.searcher(); @@ -439,11 +921,29 @@ pub mod tests { .unwrap() }; + let field_path = |field: &str| { + if json { + format!("json.{}", field) + } else { + field.to_string() + } + }; + let gen_query_inclusive = |field: &str, range: RangeInclusive| { - format!("{}:[{} TO {}]", field, range.start(), range.end()) + format!( + "{}:[{} TO {}]", + field_path(field), + range.start(), + range.end() + ) }; let gen_query_exclusive = |field: &str, range: RangeInclusive| { - format!("{}:{{{} TO {}}}", field, range.start(), range.end()) + format!( + "{}:{{{} TO {}}}", + field_path(field), + range.start(), + range.end() + ) }; let test_sample = |sample_docs: Vec| { @@ -470,7 +970,12 @@ pub mod tests { .iter() .filter(|doc| (id_names[0]..=id_names[1]).contains(&doc.id_name.as_str())) .count(); - let query = format!("{}:[{} TO {}]", field_name, id_names[0], id_names[1]); + let query = format!( + "{}:[{} TO {}]", + field_path(field_name), + id_names[0], + id_names[1] + ); assert_eq!(get_num_hits(query_from_text(&query)), expected_num_hits); }; @@ -499,20 +1004,23 @@ pub mod tests { .filter(|doc| (ids[0]..=ids[1]).contains(&doc.id) && doc.id_name == id_filter) .count(); let query = format!( - "{} AND id_name:{}", + "{} AND {}:{}", gen_query_inclusive("id", ids[0]..=ids[1]), + field_path("id_name"), &id_filter ); assert_eq!(get_num_hits(query_from_text(&query)), expected_num_hits); let query = format!( - "{} AND id_name:{}", + "{} AND {}:{}", gen_query_inclusive("id_f64", ids[0]..=ids[1]), + field_path("id_name"), &id_filter ); assert_eq!(get_num_hits(query_from_text(&query)), expected_num_hits); let query = format!( - "{} AND id_name:{}", + "{} AND {}:{}", gen_query_inclusive("id_i64", ids[0]..=ids[1]), + field_path("id_name"), &id_filter ); assert_eq!(get_num_hits(query_from_text(&query)), expected_num_hits); @@ -520,20 +1028,23 @@ pub mod tests { // Intersection search on multivalue id field let id_filter = sample_docs[0].id_name.to_string(); let query = format!( - "{} AND id_name:{}", + "{} AND {}:{}", gen_query_inclusive("ids", ids[0]..=ids[1]), + field_path("id_name"), &id_filter ); assert_eq!(get_num_hits(query_from_text(&query)), expected_num_hits); let query = format!( - "{} AND id_name:{}", + "{} AND {}:{}", gen_query_inclusive("ids_f64", ids[0]..=ids[1]), + field_path("id_name"), &id_filter ); assert_eq!(get_num_hits(query_from_text(&query)), expected_num_hits); let query = format!( - "{} AND id_name:{}", + "{} AND {}:{}", gen_query_inclusive("ids_i64", ids[0]..=ids[1]), + field_path("id_name"), &id_filter ); assert_eq!(get_num_hits(query_from_text(&query)), expected_num_hits); @@ -649,8 +1160,10 @@ pub mod ip_range_tests { let searcher = index.reader().unwrap().searcher(); let range_weight = FastFieldRangeWeight::new( ips_field, - Bound::Included(Term::from_field_ip_addr(ips_field, ip_addrs[1])), - Bound::Included(Term::from_field_ip_addr(ips_field, ip_addrs[2])), + BoundsRange::new( + Bound::Included(Term::from_field_ip_addr(ips_field, ip_addrs[1])), + Bound::Included(Term::from_field_ip_addr(ips_field, ip_addrs[2])), + ), ); let count = @@ -783,7 +1296,7 @@ mod bench { }) .collect(); - create_index_from_docs(&docs) + create_index_from_docs(&docs, false) } fn get_90_percent() -> RangeInclusive { diff --git a/src/schema/field_type.rs b/src/schema/field_type.rs index cc7daf8b0e..2c1253737e 100644 --- a/src/schema/field_type.rs +++ b/src/schema/field_type.rs @@ -3,7 +3,7 @@ use std::str::FromStr; use base64::engine::general_purpose::STANDARD as BASE64; use base64::Engine; -use columnar::ColumnType; +use columnar::{ColumnType, NumericalType}; use serde::{Deserialize, Serialize}; use serde_json::Value as JsonValue; use thiserror::Error; @@ -102,6 +102,15 @@ const ALL_TYPES: [Type; 10] = [ ]; impl Type { + pub fn numerical_type(&self) -> Option { + match self { + Type::I64 => Some(NumericalType::I64), + Type::U64 => Some(NumericalType::U64), + Type::F64 => Some(NumericalType::F64), + _ => None, + } + } + /// Returns an iterator over the different values /// the Type enum can tape. pub fn iter_values() -> impl Iterator { @@ -196,6 +205,11 @@ impl FieldType { } } + /// returns true if this is an json field + pub fn is_json(&self) -> bool { + matches!(self, FieldType::JsonObject(_)) + } + /// returns true if this is an ip address field pub fn is_ip_addr(&self) -> bool { matches!(self, FieldType::IpAddr(_)) diff --git a/src/schema/term.rs b/src/schema/term.rs index a0d1a8f677..58c81c785a 100644 --- a/src/schema/term.rs +++ b/src/schema/term.rs @@ -7,7 +7,7 @@ use common::json_path_writer::{JSON_END_OF_PATH, JSON_PATH_SEGMENT_SEP_STR}; use common::JsonPathWriter; use super::date_time_options::DATE_TIME_PRECISION_INDEXED; -use super::Field; +use super::{Field, Schema}; use crate::fastfield::FastValue; use crate::json_utils::split_json_path; use crate::schema::{Facet, Type}; @@ -57,6 +57,25 @@ impl Term { term } + /// Gets the full path of the field name + optional json path. + pub fn get_full_path(&self, schema: &Schema) -> String { + let field = self.field(); + let field_type = schema.get_field_entry(field).field_type(); + let mut field = schema.get_field_name(field).to_string(); + let field_name = if field_type.is_json() { + field.push('.'); + let value = self.value(); + let json_path = value.as_json().expect("expected json type in term").0; + field.push_str(unsafe { + std::str::from_utf8_unchecked(&json_path[..json_path.len() - 1]) + }); + field + } else { + field + }; + field_name + } + pub(crate) fn with_type_and_field(typ: Type, field: Field) -> Term { let mut term = Self::with_capacity(8); term.set_field_and_type(field, typ); @@ -324,6 +343,11 @@ where B: AsRef<[u8]> ValueBytes(data) } + /// Wraps a object holding Vec + pub fn to_owned(&self) -> ValueBytes> { + ValueBytes(self.0.as_ref().to_vec()) + } + fn typ_code(&self) -> u8 { self.0.as_ref()[0] } @@ -345,7 +369,7 @@ where B: AsRef<[u8]> if self.typ() != T::to_type() { return None; } - let value_bytes = self.value_bytes(); + let value_bytes = self.raw_value_bytes_payload(); let value_u64 = u64::from_be_bytes(value_bytes.try_into().ok()?); Some(T::from_u64(value_u64)) } @@ -390,7 +414,7 @@ where B: AsRef<[u8]> if self.typ() != Type::Str { return None; } - str::from_utf8(self.value_bytes()).ok() + str::from_utf8(self.raw_value_bytes_payload()).ok() } /// Returns the facet associated with the term. @@ -401,7 +425,7 @@ where B: AsRef<[u8]> if self.typ() != Type::Facet { return None; } - let facet_encode_str = str::from_utf8(self.value_bytes()).ok()?; + let facet_encode_str = str::from_utf8(self.raw_value_bytes_payload()).ok()?; Some(Facet::from_encoded_string(facet_encode_str.to_string())) } @@ -412,7 +436,7 @@ where B: AsRef<[u8]> if self.typ() != Type::Bytes { return None; } - Some(self.value_bytes()) + Some(self.raw_value_bytes_payload()) } /// Returns a `Ipv6Addr` value from the term. @@ -420,7 +444,7 @@ where B: AsRef<[u8]> if self.typ() != Type::IpAddr { return None; } - let ip_u128 = u128::from_be_bytes(self.value_bytes().try_into().ok()?); + let ip_u128 = u128::from_be_bytes(self.raw_value_bytes_payload().try_into().ok()?); Some(Ipv6Addr::from_u128(ip_u128)) } @@ -441,7 +465,7 @@ where B: AsRef<[u8]> if self.typ() != Type::Json { return None; } - let bytes = self.value_bytes(); + let bytes = self.raw_value_bytes_payload(); let pos = bytes.iter().cloned().position(|b| b == JSON_END_OF_PATH)?; // split at pos + 1, so that json_path_bytes includes the JSON_END_OF_PATH byte. @@ -456,16 +480,25 @@ where B: AsRef<[u8]> if self.typ() != Type::Json { return None; } - let bytes = self.value_bytes(); + let bytes = self.raw_value_bytes_payload(); let pos = bytes.iter().cloned().position(|b| b == JSON_END_OF_PATH)?; Some(ValueBytes::wrap(&bytes[pos + 1..])) } - /// Returns the serialized value of ValueBytes without the type. - fn value_bytes(&self) -> &[u8] { + /// Returns the raw value of ValueBytes payload, without the type tag. + pub(crate) fn raw_value_bytes_payload(&self) -> &[u8] { &self.0.as_ref()[1..] } + /// Returns the serialized value of ValueBytes payload, without the type tag. + pub(crate) fn value_bytes_payload(&self) -> Vec { + if let Some(value_bytes) = self.as_json_value_bytes() { + value_bytes.raw_value_bytes_payload().to_vec() + } else { + self.raw_value_bytes_payload().to_vec() + } + } + /// Returns the serialized representation of Term. /// /// Do NOT rely on this byte representation in the index. diff --git a/sstable/src/dictionary.rs b/sstable/src/dictionary.rs index 298a70e838..4cc438713b 100644 --- a/sstable/src/dictionary.rs +++ b/sstable/src/dictionary.rs @@ -4,6 +4,7 @@ use std::marker::PhantomData; use std::ops::{Bound, RangeBounds}; use std::sync::Arc; +use common::bounds::{transform_bound_inner_res, TransformBound}; use common::file_slice::FileSlice; use common::{BinarySerializable, OwnedBytes}; use tantivy_fst::automaton::AlwaysMatch; @@ -56,29 +57,6 @@ impl Dictionary { } } -fn map_bound(bound: &Bound, transform: impl Fn(&TFrom) -> TTo) -> Bound { - use self::Bound::*; - match bound { - Excluded(ref from_val) => Bound::Excluded(transform(from_val)), - Included(ref from_val) => Bound::Included(transform(from_val)), - Unbounded => Unbounded, - } -} - -/// Takes a bound and transforms the inner value into a new bound via a closure. -/// The bound variant may change by the value returned value from the closure. -fn transform_bound_inner( - bound: &Bound, - transform: impl Fn(&TFrom) -> io::Result>, -) -> io::Result> { - use self::Bound::*; - Ok(match bound { - Excluded(ref from_val) => transform(from_val)?, - Included(ref from_val) => transform(from_val)?, - Unbounded => Unbounded, - }) -} - #[derive(Debug, Clone, PartialEq, Eq)] pub enum TermOrdHit { /// Exact term ord hit @@ -409,18 +387,18 @@ impl Dictionary { lower_bound: Bound, upper_bound: Bound, ) -> io::Result<(Bound, Bound)> { - let lower_bound = transform_bound_inner(&lower_bound, |start_bound_bytes| { + let lower_bound = transform_bound_inner_res(&lower_bound, |start_bound_bytes| { let ord = self.term_ord_or_next(start_bound_bytes)?; match ord { - TermOrdHit::Exact(ord) => Ok(map_bound(&lower_bound, |_| ord)), - TermOrdHit::Next(ord) => Ok(Bound::Included(ord)), // Change bounds to included + TermOrdHit::Exact(ord) => Ok(TransformBound::Existing(ord)), + TermOrdHit::Next(ord) => Ok(TransformBound::NewBound(Bound::Included(ord))), /* Change bounds to included */ } })?; - let upper_bound = transform_bound_inner(&upper_bound, |end_bound_bytes| { + let upper_bound = transform_bound_inner_res(&upper_bound, |end_bound_bytes| { let ord = self.term_ord_or_next(end_bound_bytes)?; match ord { - TermOrdHit::Exact(ord) => Ok(map_bound(&upper_bound, |_| ord)), - TermOrdHit::Next(ord) => Ok(Bound::Excluded(ord)), // Change bounds to excluded + TermOrdHit::Exact(ord) => Ok(TransformBound::Existing(ord)), + TermOrdHit::Next(ord) => Ok(TransformBound::NewBound(Bound::Excluded(ord))), /* Change bounds to excluded */ } })?; Ok((lower_bound, upper_bound))