From a76f3de513b125f3aab71f437681273757ba32cc Mon Sep 17 00:00:00 2001 From: tongjianlin Date: Tue, 8 Sep 2020 15:50:41 +0800 Subject: [PATCH] min_should_match for pure shoulds --- src/core/search/query/boolean_query.rs | 39 +++++++++++------ src/core/search/query/query_string.rs | 28 ++++++------ src/core/search/scorer/disjunction_scorer.rs | 45 ++++++++++++++------ src/core/search/scorer/req_not_scorer.rs | 4 +- src/core/search/scorer/req_opt_scorer.rs | 2 +- 5 files changed, 75 insertions(+), 43 deletions(-) diff --git a/src/core/search/query/boolean_query.rs b/src/core/search/query/boolean_query.rs index 1c83506..5445f1c 100644 --- a/src/core/search/query/boolean_query.rs +++ b/src/core/search/query/boolean_query.rs @@ -31,7 +31,7 @@ pub struct BooleanQuery { should_queries: Vec>>, filter_queries: Vec>>, must_not_queries: Vec>>, - minimum_should_match: i32, + min_should_match: i32, } pub const BOOLEAN: &str = "boolean"; @@ -42,8 +42,18 @@ impl BooleanQuery { shoulds: Vec>>, filters: Vec>>, must_nots: Vec>>, + min_should_match: i32, ) -> Result>> { - let minimum_should_match = if musts.is_empty() { 1 } else { 0 }; + let min_should_match = if min_should_match > 0 { + min_should_match + } else { + if musts.is_empty() { + 1 + } else { + 0 + } + }; + let mut musts = musts; let mut shoulds = shoulds; let mut filters = filters; @@ -72,7 +82,7 @@ impl BooleanQuery { should_queries: shoulds, filter_queries: filters, must_not_queries: must_nots, - minimum_should_match, + min_should_match, })) } @@ -110,6 +120,7 @@ impl Query for BooleanQuery { should_weights, must_not_weights, needs_scores, + self.min_should_match, ))) } @@ -145,7 +156,7 @@ impl fmt::Display for BooleanQuery { write!( f, "BooleanQuery(must: [{}], should: [{}], filters: [{}], must_not: [{}], match: {})", - must_str, should_str, filters_str, must_not_str, self.minimum_should_match + must_str, should_str, filters_str, must_not_str, self.min_should_match ) } } @@ -154,8 +165,7 @@ struct BooleanWeight { must_weights: Vec>>, should_weights: Vec>>, must_not_weights: Vec>>, - #[allow(dead_code)] - minimum_should_match: i32, + min_should_match: i32, needs_scores: bool, } @@ -165,13 +175,13 @@ impl BooleanWeight { shoulds: Vec>>, must_nots: Vec>>, needs_scores: bool, + min_should_match: i32, ) -> BooleanWeight { - let minimum_should_match = if musts.is_empty() { 1 } else { 0 }; BooleanWeight { must_weights: musts, should_weights: shoulds, must_not_weights: must_nots, - minimum_should_match, + min_should_match, needs_scores, } } @@ -217,6 +227,7 @@ impl Weight for BooleanWeight { _ => Some(Box::new(DisjunctionSumScorer::new( scorers, self.needs_scores, + self.min_should_match, ))), } }; @@ -230,7 +241,11 @@ impl Weight for BooleanWeight { match scorers.len() { 0 => None, 1 => Some(scorers.remove(0)), - _ => Some(Box::new(DisjunctionSumScorer::new(scorers, false))), + _ => Some(Box::new(DisjunctionSumScorer::new( + scorers, + false, + self.min_should_match, + ))), } }; @@ -348,13 +363,13 @@ impl Weight for BooleanWeight { "No matching clauses".to_string(), subs, )) - } else if should_match_count < self.minimum_should_match { + } else if should_match_count < self.min_should_match { Ok(Explanation::new( false, 0.0f32, format!( "Failure to match minimum number of optional clauses: {}<{}", - should_match_count, self.minimum_should_match + should_match_count, self.min_should_match ), subs, )) @@ -394,7 +409,7 @@ impl fmt::Display for BooleanWeight { f, "BooleanWeight(must: [{}], should: [{}], must_not: [{}], min match: {}, needs score: \ {})", - must_str, should_str, must_not_str, self.minimum_should_match, self.needs_scores + must_str, should_str, must_not_str, self.min_should_match, self.needs_scores ) } } diff --git a/src/core/search/query/query_string.rs b/src/core/search/query/query_string.rs index c4c696d..a6e6d82 100644 --- a/src/core/search/query/query_string.rs +++ b/src/core/search/query/query_string.rs @@ -29,7 +29,7 @@ pub struct QueryStringQueryBuilder { query_string: String, fields: Vec<(String, f32)>, #[allow(dead_code)] - minimum_should_match: i32, + min_should_match: i32, #[allow(dead_code)] boost: f32, } @@ -38,13 +38,13 @@ impl QueryStringQueryBuilder { pub fn new( query_string: String, fields: Vec<(String, f32)>, - minimum_should_match: i32, + min_should_match: i32, boost: f32, ) -> QueryStringQueryBuilder { QueryStringQueryBuilder { query_string, fields, - minimum_should_match, + min_should_match, boost, } } @@ -171,7 +171,7 @@ impl QueryStringQueryBuilder { shoulds.remove(0) } } else { - BooleanQuery::build(musts, shoulds, vec![], vec![])? + BooleanQuery::build(musts, shoulds, vec![], vec![], self.min_should_match)? }; Ok(Some(query)) } @@ -190,7 +190,7 @@ impl QueryStringQueryBuilder { let res = if queries.len() == 1 { queries.remove(0) } else { - BooleanQuery::build(Vec::new(), queries, vec![], vec![])? + BooleanQuery::build(Vec::new(), queries, vec![], vec![], self.min_should_match)? }; Ok(res) } @@ -259,7 +259,7 @@ mod tests { let term = String::from("test"); let field = String::from("title"); let q: Box> = - QueryStringQueryBuilder::new(term.clone(), vec![(field, 1.0)], 1, 1.0) + QueryStringQueryBuilder::new(term.clone(), vec![(field, 1.0)], 0, 1.0) .build() .unwrap(); let term_str: String = q.to_string(); @@ -271,7 +271,7 @@ mod tests { let term = String::from("(test^0.2 | 测试^2)"); let field = String::from("title"); let q: Box> = - QueryStringQueryBuilder::new(term.clone(), vec![(field, 1.0)], 1, 2.0) + QueryStringQueryBuilder::new(term.clone(), vec![(field, 1.0)], 0, 2.0) .build() .unwrap(); let term_str: String = q.to_string(); @@ -287,7 +287,7 @@ mod tests { let term = String::from("test^0.2 \"测试\"^2"); let field = String::from("title"); let q: Box> = - QueryStringQueryBuilder::new(term.clone(), vec![(field, 1.0)], 1, 2.0) + QueryStringQueryBuilder::new(term.clone(), vec![(field, 1.0)], 0, 2.0) .build() .unwrap(); let term_str: String = q.to_string(); @@ -302,7 +302,7 @@ mod tests { let field = String::from("title"); let q: Box> = - QueryStringQueryBuilder::new(String::from("+test"), vec![(field, 1.0)], 1, 1.0) + QueryStringQueryBuilder::new(String::from("+test"), vec![(field, 1.0)], 0, 1.0) .build() .unwrap(); let term_str: String = q.to_string(); @@ -314,7 +314,7 @@ mod tests { let query_string = String::from("test search"); let field = String::from("title"); let q: Box> = - QueryStringQueryBuilder::new(query_string.clone(), vec![(field, 1.0)], 1, 1.0) + QueryStringQueryBuilder::new(query_string.clone(), vec![(field, 1.0)], 0, 1.0) .build() .unwrap(); let term_str: String = q.to_string(); @@ -330,7 +330,7 @@ mod tests { let query_string = String::from("test +search"); let field = String::from("title"); let q: Box> = - QueryStringQueryBuilder::new(query_string.clone(), vec![(field, 1.0)], 1, 1.0) + QueryStringQueryBuilder::new(query_string.clone(), vec![(field, 1.0)], 0, 1.0) .build() .unwrap(); let term_str: String = q.to_string(); @@ -346,7 +346,7 @@ mod tests { let query_string = String::from("test +(search 搜索)"); let field = String::from("title"); let q: Box> = - QueryStringQueryBuilder::new(query_string.clone(), vec![(field, 1.0)], 1, 1.0) + QueryStringQueryBuilder::new(query_string.clone(), vec![(field, 1.0)], 0, 1.0) .build() .unwrap(); let term_str: String = q.to_string(); @@ -364,7 +364,7 @@ mod tests { let q: Box> = QueryStringQueryBuilder::new( query_string.clone(), vec![("title".to_string(), 1.0), ("content".to_string(), 1.0)], - 1, + 0, 1.0, ) .build() @@ -387,7 +387,7 @@ mod tests { ); let field = String::from("title"); let q: Box> = - QueryStringQueryBuilder::new(query_string.clone(), vec![(field, 1.0)], 1, 1.0) + QueryStringQueryBuilder::new(query_string.clone(), vec![(field, 1.0)], 0, 1.0) .build() .unwrap(); let term_str: String = q.to_string(); diff --git a/src/core/search/scorer/disjunction_scorer.rs b/src/core/search/scorer/disjunction_scorer.rs index 749adbc..6da3644 100644 --- a/src/core/search/scorer/disjunction_scorer.rs +++ b/src/core/search/scorer/disjunction_scorer.rs @@ -23,15 +23,20 @@ pub struct DisjunctionSumScorer { sub_scorers: SubScorers, needs_scores: bool, cost: usize, + min_should_match: i32, } impl DisjunctionSumScorer { - pub fn new(children: Vec, needs_scores: bool) -> DisjunctionSumScorer { + pub fn new( + children: Vec, + needs_scores: bool, + min_should_match: i32, + ) -> DisjunctionSumScorer { assert!(children.len() > 1); let cost = children.iter().map(|w| w.cost()).sum(); - let sub_scorers = if children.len() < 10 { + let sub_scorers = if children.len() < 10 || min_should_match > 1 { SubScorers::SQ(SimpleQueue::new(children)) } else { SubScorers::DPQ(DisiPriorityQueue::new(children)) @@ -41,6 +46,7 @@ impl DisjunctionSumScorer { sub_scorers, needs_scores, cost, + min_should_match, } } } @@ -81,7 +87,8 @@ impl DocIterator for DisjunctionSumScorer { } fn approximate_next(&mut self) -> Result { - self.sub_scorers.approximate_next() + self.sub_scorers + .approximate_next(Some(self.min_should_match)) } fn approximate_advance(&mut self, target: DocId) -> Result { @@ -162,7 +169,7 @@ impl DocIterator for DisjunctionMaxScorer { } fn approximate_next(&mut self) -> Result { - self.sub_scorers.approximate_next() + self.sub_scorers.approximate_next(None) } fn approximate_advance(&mut self, target: DocId) -> Result { @@ -278,23 +285,33 @@ impl SubScorers { } } - fn approximate_next(&mut self) -> Result { + fn approximate_next(&mut self, min_should_match: Option) -> Result { + let min_should_match = min_should_match.unwrap_or(0); + match self { SubScorers::SQ(sq) => { - let curr_doc = sq.curr_doc; - let mut min_doc = NO_MORE_DOCS; - for s in sq.scorers.iter_mut() { - if s.doc_id() == curr_doc { - s.approximate_next()?; + loop { + // curr_doc = current min_doc, (not -1) + let curr_doc = sq.curr_doc; + let mut min_doc = NO_MORE_DOCS; + let mut should_count = 0; + for s in sq.scorers.iter_mut() { + if s.doc_id() == curr_doc { + should_count += 1; + s.approximate_next()?; + } + + min_doc = min_doc.min(s.doc_id()); } - min_doc = min_doc.min(s.doc_id()); + sq.curr_doc = min_doc; + if should_count >= min_should_match || sq.curr_doc == NO_MORE_DOCS { + return Ok(sq.curr_doc); + } } - - sq.curr_doc = min_doc; - Ok(sq.curr_doc) } SubScorers::DPQ(dbq) => { + // reset with -1, @posting_reader.rs#1208 let doc = dbq.peek().doc(); loop { diff --git a/src/core/search/scorer/req_not_scorer.rs b/src/core/search/scorer/req_not_scorer.rs index 6f0c9b1..f86ba7e 100644 --- a/src/core/search/scorer/req_not_scorer.rs +++ b/src/core/search/scorer/req_not_scorer.rs @@ -133,7 +133,7 @@ mod tests { let conjunction_scorer: Box = Box::new(ConjunctionScorer::new(vec![s1, s2])); let disjunction_scorer: Box = - Box::new(DisjunctionSumScorer::new(vec![s3, s4], true)); + Box::new(DisjunctionSumScorer::new(vec![s3, s4], true, 0)); let mut scorer = ReqNotScorer::new(conjunction_scorer, disjunction_scorer); assert_eq!(scorer.doc_id(), -1); @@ -154,7 +154,7 @@ mod tests { let conjunction_scorer: Box = Box::new(ConjunctionScorer::new(vec![s1, s2])); let disjunction_scorer: Box = - Box::new(DisjunctionSumScorer::new(vec![s3, s4], true)); + Box::new(DisjunctionSumScorer::new(vec![s3, s4], true, 0)); let mut scorer = ReqNotScorer::new(conjunction_scorer, disjunction_scorer); // 2, 3, 5, 7, 9 diff --git a/src/core/search/scorer/req_opt_scorer.rs b/src/core/search/scorer/req_opt_scorer.rs index 0b93238..61b65b1 100644 --- a/src/core/search/scorer/req_opt_scorer.rs +++ b/src/core/search/scorer/req_opt_scorer.rs @@ -111,7 +111,7 @@ mod tests { let conjunction_scorer: Box = Box::new(ConjunctionScorer::new(vec![s1, s2])); let disjunction_scorer: Box = - Box::new(DisjunctionSumScorer::new(vec![s3, s4], true)); + Box::new(DisjunctionSumScorer::new(vec![s3, s4], true, 0)); let mut scorer = ReqOptScorer::new(conjunction_scorer, disjunction_scorer); assert_eq!(scorer.doc_id(), -1);