Skip to content

Commit

Permalink
support ff range queries on json fields
Browse files Browse the repository at this point in the history
  • Loading branch information
PSeitz committed Jul 29, 2024
1 parent d8843c6 commit ba33a2a
Show file tree
Hide file tree
Showing 9 changed files with 877 additions and 219 deletions.
130 changes: 130 additions & 0 deletions common/src/bounds.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
use std::io;
use std::ops::Bound;

#[derive(Clone, Debug)]
pub struct BoundsRange<T> {
pub lower_bound: Bound<T>,
pub upper_bound: Bound<T>,
}
impl<T> BoundsRange<T> {
pub fn new(lower_bound: Bound<T>, upper_bound: Bound<T>) -> Self {
BoundsRange {
lower_bound,
upper_bound,
}
}
pub fn is_unbounded(&self) -> bool {
matches!(self.lower_bound, Bound::Unbounded) && matches!(self.upper_bound, Bound::Unbounded)
}
pub fn map_bound<TTo>(&self, transform: impl Fn(&T) -> TTo) -> BoundsRange<TTo> {
BoundsRange {
lower_bound: map_bound(&self.lower_bound, &transform),
upper_bound: map_bound(&self.upper_bound, &transform),
}
}

pub fn map_bound_res<TTo, Err>(
&self,
transform: impl Fn(&T) -> Result<TTo, Err>,
) -> Result<BoundsRange<TTo>, Err> {
Ok(BoundsRange {
lower_bound: map_bound_res(&self.lower_bound, &transform)?,
upper_bound: map_bound_res(&self.upper_bound, &transform)?,
})
}

pub fn transform_inner<TTo>(
&self,
transform_lower: impl Fn(&T) -> TransformBound<TTo>,
transform_upper: impl Fn(&T) -> TransformBound<TTo>,
) -> BoundsRange<TTo> {
BoundsRange {
lower_bound: transform_bound_inner(&self.lower_bound, &transform_lower),
upper_bound: transform_bound_inner(&self.upper_bound, &transform_upper),
}
}

/// Returns the first set inner value
pub fn get_inner(&self) -> Option<&T> {
inner_bound(&self.lower_bound).or(inner_bound(&self.upper_bound))
}
}

pub enum TransformBound<T> {
/// Overwrite the bounds
NewBound(Bound<T>),
/// Use Existing bounds with new value
Existing(T),
}

/// Takes a bound and transforms the inner value into a new bound via a closure.
/// The bound variant may change by the value returned value from the closure.
pub fn transform_bound_inner_res<TFrom, TTo>(
bound: &Bound<TFrom>,
transform: impl Fn(&TFrom) -> io::Result<TransformBound<TTo>>,
) -> io::Result<Bound<TTo>> {
use self::Bound::*;
Ok(match bound {
Excluded(ref from_val) => match transform(from_val)? {
TransformBound::NewBound(new_val) => new_val,
TransformBound::Existing(new_val) => Excluded(new_val),
},
Included(ref from_val) => match transform(from_val)? {
TransformBound::NewBound(new_val) => new_val,
TransformBound::Existing(new_val) => Included(new_val),
},
Unbounded => Unbounded,
})
}

/// Takes a bound and transforms the inner value into a new bound via a closure.
/// The bound variant may change by the value returned value from the closure.
pub fn transform_bound_inner<TFrom, TTo>(
bound: &Bound<TFrom>,
transform: impl Fn(&TFrom) -> TransformBound<TTo>,
) -> Bound<TTo> {
use self::Bound::*;
match bound {
Excluded(ref from_val) => match transform(from_val) {
TransformBound::NewBound(new_val) => new_val,
TransformBound::Existing(new_val) => Excluded(new_val),
},
Included(ref from_val) => match transform(from_val) {
TransformBound::NewBound(new_val) => new_val,
TransformBound::Existing(new_val) => Included(new_val),
},
Unbounded => Unbounded,
}
}

/// Returns the inner value of a `Bound`
pub fn inner_bound<T>(val: &Bound<T>) -> Option<&T> {
match val {
Bound::Included(term) | Bound::Excluded(term) => Some(term),
Bound::Unbounded => None,
}
}

pub fn map_bound<TFrom, TTo>(
bound: &Bound<TFrom>,
transform: impl Fn(&TFrom) -> TTo,
) -> Bound<TTo> {
use self::Bound::*;
match bound {
Excluded(ref from_val) => Bound::Excluded(transform(from_val)),
Included(ref from_val) => Bound::Included(transform(from_val)),
Unbounded => Unbounded,
}
}

pub fn map_bound_res<TFrom, TTo, Err>(
bound: &Bound<TFrom>,
transform: impl Fn(&TFrom) -> Result<TTo, Err>,
) -> Result<Bound<TTo>, Err> {
use self::Bound::*;
Ok(match bound {
Excluded(ref from_val) => Excluded(transform(from_val)?),
Included(ref from_val) => Included(transform(from_val)?),
Unbounded => Unbounded,
})
}
1 change: 1 addition & 0 deletions common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use std::ops::Deref;
pub use byteorder::LittleEndian as Endianness;

mod bitset;
pub mod bounds;
mod byte_count;
mod datetime;
pub mod file_slice;
Expand Down
32 changes: 24 additions & 8 deletions src/query/query_parser/query_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -482,16 +482,32 @@ impl QueryParser {
});
if terms.len() != 1 {
return Err(QueryParserError::UnsupportedQuery(format!(
"Range query boundary cannot have multiple tokens: {phrase:?}."
"Range query boundary cannot have multiple tokens: {phrase:?} [{terms:?}]."
)));
}
Ok(terms.into_iter().next().unwrap())
}
FieldType::JsonObject(_) => {
// Json range are not supported.
Err(QueryParserError::UnsupportedQuery(
"Range query are not supported on json field.".to_string(),
))
FieldType::JsonObject(ref json_options) => {
let get_term_with_path = || {
Term::from_field_json_path(
field,
json_path,
json_options.is_expand_dots_enabled(),
)
};
if let Some(term) =
// Try to convert the phrase to a fast value
convert_to_fast_value_and_append_to_json_term(
get_term_with_path(),
phrase,
)
{
Ok(term)
} else {
let mut term = get_term_with_path();
term.append_type_and_str(phrase);
Ok(term)
}
}
FieldType::Facet(_) => match Facet::from_text(phrase) {
Ok(facet) => Ok(Term::from_facet(field, &facet)),
Expand Down Expand Up @@ -1123,8 +1139,8 @@ mod test {
let query = make_query_parser().parse_query("title:[A TO B]").unwrap();
assert_eq!(
format!("{query:?}"),
"RangeQuery { lower_bound: Included(Term(field=0, type=Str, \"a\")), upper_bound: \
Included(Term(field=0, type=Str, \"b\")), limit: None }"
"RangeQuery { bounds: BoundsRange { lower_bound: Included(Term(field=0, type=Str, \
\"a\")), upper_bound: Included(Term(field=0, type=Str, \"b\")) }, limit: None }"
);
}

Expand Down
29 changes: 4 additions & 25 deletions src/query/range_query/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
use std::ops::Bound;

use crate::schema::Type;

mod fast_field_range_doc_set;
Expand All @@ -12,29 +10,10 @@ pub use self::range_query_u64_fastfield::FastFieldRangeWeight;
// TODO is this correct?
pub(crate) fn is_type_valid_for_fastfield_range_query(typ: Type) -> bool {
match typ {
Type::Str | Type::U64 | Type::I64 | Type::F64 | Type::Bool | Type::Date => true,
Type::Str | Type::U64 | Type::I64 | Type::F64 | Type::Bool | Type::Date | Type::Json => {
true
}
Type::IpAddr => true,
Type::Facet | Type::Bytes | Type::Json => false,
}
}

fn map_bound<TFrom, TTo>(bound: &Bound<TFrom>, transform: impl Fn(&TFrom) -> TTo) -> Bound<TTo> {
use self::Bound::*;
match bound {
Excluded(ref from_val) => Excluded(transform(from_val)),
Included(ref from_val) => Included(transform(from_val)),
Unbounded => Unbounded,
Type::Facet | Type::Bytes => false,
}
}

fn map_bound_res<TFrom, TTo, Err>(
bound: &Bound<TFrom>,
transform: impl Fn(&TFrom) -> Result<TTo, Err>,
) -> Result<Bound<TTo>, Err> {
use self::Bound::*;
Ok(match bound {
Excluded(ref from_val) => Excluded(transform(from_val)?),
Included(ref from_val) => Included(transform(from_val)?),
Unbounded => Unbounded,
})
}
32 changes: 13 additions & 19 deletions src/query/range_query/range_query.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use std::io;
use std::ops::Bound;

use common::bounds::{map_bound, BoundsRange};
use common::BitSet;

use super::map_bound;
use super::range_query_u64_fastfield::FastFieldRangeWeight;
use crate::index::SegmentReader;
use crate::query::explanation::does_not_match;
Expand Down Expand Up @@ -69,28 +69,18 @@ use crate::{DocId, Score};
/// ```
#[derive(Clone, Debug)]
pub struct RangeQuery {
lower_bound: Bound<Term>,
upper_bound: Bound<Term>,
bounds: BoundsRange<Term>,
limit: Option<u64>,
}

/// Returns the inner value of a `Bound`
pub(crate) fn inner_bound(val: &Bound<Term>) -> Option<&Term> {
match val {
Bound::Included(term) | Bound::Excluded(term) => Some(term),
Bound::Unbounded => None,
}
}

impl RangeQuery {
/// Creates a new `RangeQuery` from bounded start and end terms.
///
/// If the value type is not correct, something may go terribly wrong when
/// the `Weight` object is created.
pub fn new(lower_bound: Bound<Term>, upper_bound: Bound<Term>) -> RangeQuery {
RangeQuery {
lower_bound,
upper_bound,
bounds: BoundsRange::new(lower_bound, upper_bound),
limit: None,
}
}
Expand All @@ -106,8 +96,8 @@ impl RangeQuery {
}

pub(crate) fn get_term(&self) -> &Term {
inner_bound(&self.lower_bound)
.or(inner_bound(&self.upper_bound))
self.bounds
.get_inner()
.expect("At least one bound must be set")
}

Expand All @@ -128,15 +118,19 @@ impl Query for RangeQuery {
if field_type.is_fast() && is_type_valid_for_fastfield_range_query(self.value_type()) {
Ok(Box::new(FastFieldRangeWeight::new(
self.field(),
self.lower_bound.clone(),
self.upper_bound.clone(),
self.bounds.clone(),
)))
} else {
if field_type.is_json() {
return Err(crate::TantivyError::InvalidArgument(
"RangeQuery on JSON is only supported for fast fields currently".to_string(),
));
}
let verify_and_unwrap_term = |val: &Term| val.serialized_value_bytes().to_owned();
Ok(Box::new(RangeWeight {
field: self.field(),
lower_bound: map_bound(&self.lower_bound, verify_and_unwrap_term),
upper_bound: map_bound(&self.upper_bound, verify_and_unwrap_term),
lower_bound: map_bound(&self.bounds.lower_bound, verify_and_unwrap_term),
upper_bound: map_bound(&self.bounds.upper_bound, verify_and_unwrap_term),
limit: self.limit,
}))
}
Expand Down
Loading

0 comments on commit ba33a2a

Please sign in to comment.