Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make allocating field names avoidable for range and exists queries. #2308

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 12 additions & 10 deletions src/query/exist_query.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use core::fmt::Debug;
use std::borrow::Cow;

use columnar::{ColumnIndex, DynamicColumn};

Expand All @@ -14,7 +14,7 @@ use crate::{DocId, Score, TantivyError};
/// All of the matched documents get the score 1.0.
#[derive(Clone, Debug)]
pub struct ExistsQuery {
field_name: String,
field: Cow<'static, str>,
}

impl ExistsQuery {
Expand All @@ -23,40 +23,42 @@ impl ExistsQuery {
/// This query matches all documents with at least one non-null value in the specified field.
/// This constructor never fails, but executing the search with this query will return an
/// error if the specified field doesn't exists or is not a fast field.
pub fn new_exists_query(field: String) -> ExistsQuery {
ExistsQuery { field_name: field }
pub fn new_exists_query<F: Into<Cow<'static, str>>>(field: F) -> ExistsQuery {
ExistsQuery {
field: field.into(),
}
}
}

impl Query for ExistsQuery {
fn weight(&self, enable_scoring: EnableScoring) -> crate::Result<Box<dyn Weight>> {
let schema = enable_scoring.schema();
let Some((field, _path)) = schema.find_field(&self.field_name) else {
return Err(TantivyError::FieldNotFound(self.field_name.clone()));
let Some((field, _path)) = schema.find_field(&self.field) else {
return Err(TantivyError::FieldNotFound(self.field.to_string()));
};
let field_type = schema.get_field_entry(field).field_type();
if !field_type.is_fast() {
return Err(TantivyError::SchemaError(format!(
"Field {} is not a fast field.",
self.field_name
self.field
)));
}
Ok(Box::new(ExistsWeight {
field_name: self.field_name.clone(),
field: self.field.clone(),
}))
}
}

/// Weight associated with the `ExistsQuery` query.
pub struct ExistsWeight {
field_name: String,
field: Cow<'static, str>,
}

impl Weight for ExistsWeight {
fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
let fast_field_reader = reader.fast_fields();
let dynamic_columns: crate::Result<Vec<DynamicColumn>> = fast_field_reader
.dynamic_column_handles(&self.field_name)?
.dynamic_column_handles(&self.field)?
.into_iter()
.map(|handle| handle.open().map_err(|io_error| io_error.into()))
.collect();
Expand Down
63 changes: 32 additions & 31 deletions src/query/range_query/range_query.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::borrow::Cow;
use std::io;
use std::net::Ipv6Addr;
use std::ops::{Bound, Range};
Expand Down Expand Up @@ -68,7 +69,7 @@ use crate::{DateTime, DocId, Score};
/// ```
#[derive(Clone, Debug)]
pub struct RangeQuery {
field: String,
field: Cow<'static, str>,
value_type: Type,
lower_bound: Bound<Vec<u8>>,
upper_bound: Bound<Vec<u8>>,
Expand All @@ -80,15 +81,15 @@ impl RangeQuery {
///
/// If the value type is not correct, something may go terribly wrong when
/// the `Weight` object is created.
pub fn new_term_bounds(
field: String,
pub fn new_term_bounds<F: Into<Cow<'static, str>>>(
field: F,
value_type: Type,
lower_bound: &Bound<Term>,
upper_bound: &Bound<Term>,
) -> RangeQuery {
let verify_and_unwrap_term = |val: &Term| val.serialized_value_bytes().to_owned();
RangeQuery {
field,
field: field.into(),
value_type,
lower_bound: map_bound(lower_bound, verify_and_unwrap_term),
upper_bound: map_bound(upper_bound, verify_and_unwrap_term),
Expand All @@ -100,7 +101,7 @@ impl RangeQuery {
///
/// If the field is not of the type `i64`, tantivy
/// will panic when the `Weight` object is created.
pub fn new_i64(field: String, range: Range<i64>) -> RangeQuery {
pub fn new_i64<F: Into<Cow<'static, str>>>(field: F, range: Range<i64>) -> RangeQuery {
RangeQuery::new_i64_bounds(
field,
Bound::Included(range.start),
Expand All @@ -115,8 +116,8 @@ impl RangeQuery {
///
/// If the field is not of the type `i64`, tantivy
/// will panic when the `Weight` object is created.
pub fn new_i64_bounds(
field: String,
pub fn new_i64_bounds<F: Into<Cow<'static, str>>>(
field: F,
lower_bound: Bound<i64>,
upper_bound: Bound<i64>,
) -> RangeQuery {
Expand All @@ -126,7 +127,7 @@ impl RangeQuery {
.to_owned()
};
RangeQuery {
field,
field: field.into(),
value_type: Type::I64,
lower_bound: map_bound(&lower_bound, make_term_val),
upper_bound: map_bound(&upper_bound, make_term_val),
Expand All @@ -138,7 +139,7 @@ impl RangeQuery {
///
/// If the field is not of the type `f64`, tantivy
/// will panic when the `Weight` object is created.
pub fn new_f64(field: String, range: Range<f64>) -> RangeQuery {
pub fn new_f64<F: Into<Cow<'static, str>>>(field: F, range: Range<f64>) -> RangeQuery {
RangeQuery::new_f64_bounds(
field,
Bound::Included(range.start),
Expand All @@ -153,8 +154,8 @@ impl RangeQuery {
///
/// If the field is not of the type `f64`, tantivy
/// will panic when the `Weight` object is created.
pub fn new_f64_bounds(
field: String,
pub fn new_f64_bounds<F: Into<Cow<'static, str>>>(
field: F,
lower_bound: Bound<f64>,
upper_bound: Bound<f64>,
) -> RangeQuery {
Expand All @@ -164,7 +165,7 @@ impl RangeQuery {
.to_owned()
};
RangeQuery {
field,
field: field.into(),
value_type: Type::F64,
lower_bound: map_bound(&lower_bound, make_term_val),
upper_bound: map_bound(&upper_bound, make_term_val),
Expand All @@ -179,8 +180,8 @@ impl RangeQuery {
///
/// If the field is not of the type `u64`, tantivy
/// will panic when the `Weight` object is created.
pub fn new_u64_bounds(
field: String,
pub fn new_u64_bounds<F: Into<Cow<'static, str>>>(
field: F,
lower_bound: Bound<u64>,
upper_bound: Bound<u64>,
) -> RangeQuery {
Expand All @@ -190,7 +191,7 @@ impl RangeQuery {
.to_owned()
};
RangeQuery {
field,
field: field.into(),
value_type: Type::U64,
lower_bound: map_bound(&lower_bound, make_term_val),
upper_bound: map_bound(&upper_bound, make_term_val),
Expand All @@ -202,8 +203,8 @@ impl RangeQuery {
///
/// If the field is not of the type `ip`, tantivy
/// will panic when the `Weight` object is created.
pub fn new_ip_bounds(
field: String,
pub fn new_ip_bounds<F: Into<Cow<'static, str>>>(
field: F,
lower_bound: Bound<Ipv6Addr>,
upper_bound: Bound<Ipv6Addr>,
) -> RangeQuery {
Expand All @@ -213,7 +214,7 @@ impl RangeQuery {
.to_owned()
};
RangeQuery {
field,
field: field.into(),
value_type: Type::IpAddr,
lower_bound: map_bound(&lower_bound, make_term_val),
upper_bound: map_bound(&upper_bound, make_term_val),
Expand All @@ -225,7 +226,7 @@ impl RangeQuery {
///
/// If the field is not of the type `u64`, tantivy
/// will panic when the `Weight` object is created.
pub fn new_u64(field: String, range: Range<u64>) -> RangeQuery {
pub fn new_u64<F: Into<Cow<'static, str>>>(field: F, range: Range<u64>) -> RangeQuery {
RangeQuery::new_u64_bounds(
field,
Bound::Included(range.start),
Expand All @@ -240,8 +241,8 @@ impl RangeQuery {
///
/// If the field is not of the type `date`, tantivy
/// will panic when the `Weight` object is created.
pub fn new_date_bounds(
field: String,
pub fn new_date_bounds<F: Into<Cow<'static, str>>>(
field: F,
lower_bound: Bound<DateTime>,
upper_bound: Bound<DateTime>,
) -> RangeQuery {
Expand All @@ -251,7 +252,7 @@ impl RangeQuery {
.to_owned()
};
RangeQuery {
field,
field: field.into(),
value_type: Type::Date,
lower_bound: map_bound(&lower_bound, make_term_val),
upper_bound: map_bound(&upper_bound, make_term_val),
Expand All @@ -263,7 +264,7 @@ impl RangeQuery {
///
/// If the field is not of the type `date`, tantivy
/// will panic when the `Weight` object is created.
pub fn new_date(field: String, range: Range<DateTime>) -> RangeQuery {
pub fn new_date<F: Into<Cow<'static, str>>>(field: F, range: Range<DateTime>) -> RangeQuery {
RangeQuery::new_date_bounds(
field,
Bound::Included(range.start),
Expand All @@ -278,14 +279,14 @@ impl RangeQuery {
///
/// If the field is not of the type `Str`, tantivy
/// will panic when the `Weight` object is created.
pub fn new_str_bounds(
field: String,
pub fn new_str_bounds<F: Into<Cow<'static, str>>>(
field: F,
lower_bound: Bound<&str>,
upper_bound: Bound<&str>,
) -> RangeQuery {
let make_term_val = |val: &&str| val.as_bytes().to_vec();
RangeQuery {
field,
field: field.into(),
value_type: Type::Str,
lower_bound: map_bound(&lower_bound, make_term_val),
upper_bound: map_bound(&upper_bound, make_term_val),
Expand All @@ -297,7 +298,7 @@ impl RangeQuery {
///
/// If the field is not of the type `Str`, tantivy
/// will panic when the `Weight` object is created.
pub fn new_str(field: String, range: Range<&str>) -> RangeQuery {
pub fn new_str<F: Into<Cow<'static, str>>>(field: F, range: Range<&str>) -> RangeQuery {
RangeQuery::new_str_bounds(
field,
Bound::Included(range.start),
Expand Down Expand Up @@ -358,7 +359,7 @@ impl Query for RangeQuery {
let lower_bound = map_bound_res(&self.lower_bound, parse_ip_from_bytes)?;
let upper_bound = map_bound_res(&self.upper_bound, parse_ip_from_bytes)?;
Ok(Box::new(IPFastFieldRangeWeight::new(
self.field.to_string(),
self.field.clone(),
lower_bound,
upper_bound,
)))
Expand All @@ -373,14 +374,14 @@ impl Query for RangeQuery {
let lower_bound = map_bound(&self.lower_bound, parse_from_bytes);
let upper_bound = map_bound(&self.upper_bound, parse_from_bytes);
Ok(Box::new(FastFieldRangeWeight::new_u64_lenient(
self.field.to_string(),
self.field.clone(),
lower_bound,
upper_bound,
)))
}
} else {
Ok(Box::new(RangeWeight {
field: self.field.to_string(),
field: self.field.clone(),
lower_bound: self.lower_bound.clone(),
upper_bound: self.upper_bound.clone(),
limit: self.limit,
Expand All @@ -390,7 +391,7 @@ impl Query for RangeQuery {
}

pub struct RangeWeight {
field: String,
field: Cow<'static, str>,
lower_bound: Bound<Vec<u8>>,
upper_bound: Bound<Vec<u8>>,
limit: Option<u64>,
Expand Down
11 changes: 8 additions & 3 deletions src/query/range_query/range_query_ip_fastfield.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
//! We use this variant only if the fastfield exists, otherwise the default in `range_query` is
//! used, which uses the term dictionary + postings.

use std::borrow::Cow;
use std::net::Ipv6Addr;
use std::ops::{Bound, RangeInclusive};

Expand All @@ -13,14 +14,18 @@ use crate::{DocId, DocSet, Score, SegmentReader, TantivyError};

/// `IPFastFieldRangeWeight` uses the ip address fast field to execute range queries.
pub struct IPFastFieldRangeWeight {
field: String,
field: Cow<'static, str>,
lower_bound: Bound<Ipv6Addr>,
upper_bound: Bound<Ipv6Addr>,
}

impl IPFastFieldRangeWeight {
/// Creates a new IPFastFieldRangeWeight.
pub fn new(field: String, lower_bound: Bound<Ipv6Addr>, upper_bound: Bound<Ipv6Addr>) -> Self {
pub fn new(
field: Cow<'static, str>,
lower_bound: Bound<Ipv6Addr>,
upper_bound: Bound<Ipv6Addr>,
) -> Self {
Self {
field,
lower_bound,
Expand Down Expand Up @@ -171,7 +176,7 @@ pub mod tests {
writer.commit().unwrap();
let searcher = index.reader().unwrap().searcher();
let range_weight = IPFastFieldRangeWeight {
field: "ips".to_string(),
field: Cow::Borrowed("ips"),
lower_bound: Bound::Included(ip_addrs[1]),
upper_bound: Bound::Included(ip_addrs[2]),
};
Expand Down
10 changes: 6 additions & 4 deletions src/query/range_query/range_query_u64_fastfield.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
//! We use this variant only if the fastfield exists, otherwise the default in `range_query` is
//! used, which uses the term dictionary + postings.

use std::borrow::Cow;
use std::ops::{Bound, RangeInclusive};

use columnar::{ColumnType, HasAssociatedColumnType, MonotonicallyMappableToU64};
Expand All @@ -14,7 +15,7 @@ use crate::{DocId, DocSet, Score, SegmentReader, TantivyError};
/// `FastFieldRangeWeight` uses the fast field to execute range queries.
#[derive(Clone, Debug)]
pub struct FastFieldRangeWeight {
field: String,
field: Cow<'static, str>,
lower_bound: Bound<u64>,
upper_bound: Bound<u64>,
column_type_opt: Option<ColumnType>,
Expand All @@ -23,7 +24,7 @@ pub struct FastFieldRangeWeight {
impl FastFieldRangeWeight {
/// Create a new FastFieldRangeWeight, using the u64 representation of any fast field.
pub(crate) fn new_u64_lenient(
field: String,
field: Cow<'static, str>,
lower_bound: Bound<u64>,
upper_bound: Bound<u64>,
) -> Self {
Expand All @@ -39,7 +40,7 @@ impl FastFieldRangeWeight {

/// Create a new `FastFieldRangeWeight` for a range of a u64-mappable type .
pub fn new<T: HasAssociatedColumnType + MonotonicallyMappableToU64>(
field: String,
field: Cow<'static, str>,
lower_bound: Bound<T>,
upper_bound: Bound<T>,
) -> Self {
Expand Down Expand Up @@ -130,6 +131,7 @@ fn bound_to_value_range<T: MonotonicallyMappableToU64>(

#[cfg(test)]
pub mod tests {
use std::borrow::Cow;
use std::ops::{Bound, RangeInclusive};

use proptest::prelude::*;
Expand Down Expand Up @@ -214,7 +216,7 @@ pub mod tests {
writer.commit().unwrap();
let searcher = index.reader().unwrap().searcher();
let range_query = FastFieldRangeWeight::new_u64_lenient(
"test_field".to_string(),
Cow::Borrowed("test_field"),
Bound::Included(50_000),
Bound::Included(50_002),
);
Expand Down
Loading