Skip to content

Commit

Permalink
feat: Add top-level grouping parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
ppodolsky committed Sep 16, 2023
1 parent 978ecef commit a51ba55
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 15 deletions.
2 changes: 1 addition & 1 deletion summa-core/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "summa-core"
version = "0.19.4"
version = "0.19.5"
authors = ["Pasha Podolsky <[email protected]>"]
edition = "2021"
license-file = "LICENSE"
Expand Down
2 changes: 1 addition & 1 deletion summa-core/src/components/query_parser/summa_ql.pest
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,6 @@ term = { positive_term | negative_term | default_term }
grouping = { positive_grouping | negative_grouping | default_grouping }
search_group = ${ field_name ~ ":" ~ (grouping | term) }

statement = ${ (isbn | wrapped_doi | search_group | term) ~ ("^" ~ boost)? }
statement = ${ (isbn | wrapped_doi | search_group | grouping | term) ~ ("^" ~ boost)? }
statements = _{ statement_sep* ~ statement? ~ (statement_sep+ ~ statement)* ~ statement_sep* }
main = _{SOI ~ statements}
35 changes: 29 additions & 6 deletions summa-core/src/components/query_parser/summa_ql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -589,11 +589,11 @@ impl QueryParser {

fn parse_statement(&self, pair: Pair<Rule>) -> Result<Box<dyn Query>, QueryParserError> {
let mut statement_pairs = pair.into_inner();
let isbn_doi_or_search_group_or_term = statement_pairs.next().expect("grammar failure");
let isbn_doi_or_search_group_or_grouping_or_term = statement_pairs.next().expect("grammar failure");
let statement_boost = statement_pairs.next().map(|boost| f32::from_str(boost.as_str()).expect("grammar failure"));
let statement_result = match isbn_doi_or_search_group_or_term.as_rule() {
let statement_result = match isbn_doi_or_search_group_or_grouping_or_term.as_rule() {
Rule::search_group => {
let mut search_group = isbn_doi_or_search_group_or_term.into_inner();
let mut search_group = isbn_doi_or_search_group_or_grouping_or_term.into_inner();
let field_name = search_group.next().expect("grammar failure");
let grouping_or_term = search_group.next().expect("grammar failure");
match grouping_or_term.as_rule() {
Expand Down Expand Up @@ -656,7 +656,8 @@ impl QueryParser {
for term_field_mapper_name in ["doi", "doi_isbn"] {
if let Some(term_field_mapper_config) = self.query_parser_config.0.term_field_mapper_configs.get(term_field_mapper_name) {
if let Some(term_field_mapper) = self.term_field_mappers_manager.get(term_field_mapper_name) {
if let Some(query) = term_field_mapper.map(isbn_doi_or_search_group_or_term.as_str(), &term_field_mapper_config.fields) {
if let Some(query) = term_field_mapper.map(isbn_doi_or_search_group_or_grouping_or_term.as_str(), &term_field_mapper_config.fields)
{
queries.push((Occur::Should, query));
}
}
Expand All @@ -671,7 +672,8 @@ impl QueryParser {
for term_field_mapper_name in ["isbn"] {
if let Some(term_field_mapper_config) = self.query_parser_config.0.term_field_mapper_configs.get(term_field_mapper_name) {
if let Some(term_field_mapper) = self.term_field_mappers_manager.get(term_field_mapper_name) {
if let Some(query) = term_field_mapper.map(isbn_doi_or_search_group_or_term.as_str(), &term_field_mapper_config.fields) {
if let Some(query) = term_field_mapper.map(isbn_doi_or_search_group_or_grouping_or_term.as_str(), &term_field_mapper_config.fields)
{
queries.push((Occur::Should, query));
}
}
Expand All @@ -680,7 +682,21 @@ impl QueryParser {

Ok(Box::new(BooleanQuery::new(queries)) as Box<dyn Query>)
}
Rule::term => self.default_field_queries(isbn_doi_or_search_group_or_term, statement_boost),
Rule::term => self.default_field_queries(isbn_doi_or_search_group_or_grouping_or_term, statement_boost),
Rule::grouping => {
let grouping = isbn_doi_or_search_group_or_grouping_or_term.into_inner().next().expect("grammar failure");
let occur = self.parse_occur(&grouping);
let mut intermediate_results = vec![];
for term in grouping.into_inner() {
intermediate_results.push(self.default_field_queries(term, statement_boost)?)
}
let group_query = Box::new(BooleanQuery::new(intermediate_results.into_iter().map(|q| (Occur::Should, q)).collect())) as Box<dyn Query>;
match occur {
Occur::Should => Ok(group_query),
Occur::Must => Ok(Box::new(BooleanQuery::new(vec![(Occur::Must, group_query)])) as Box<dyn Query>),
Occur::MustNot => Ok(Box::new(BooleanQuery::new(vec![(Occur::MustNot, group_query)])) as Box<dyn Query>),
}
}
e => panic!("{e:?}"),
}?;
Ok(statement_result)
Expand Down Expand Up @@ -1169,4 +1185,11 @@ mod tests {
let query = query_parser.parse_query("iso 34-1:2022");
assert_eq!(format!("{:?}", query), "Ok(BooleanQuery { subqueries: [(Should, DisjunctionMaxQuery { disjuncts: [TermQuery(Term(field=0, type=Str, \"iso\")), TermQuery(Term(field=0, type=Str, \"isos\"))], tie_breaker: 0.3 }), (Should, TermQuery(Term(field=0, type=Str, \"34\"))), (Should, TermQuery(Term(field=0, type=Str, \"1\")))] })");
}

#[test]
pub fn test_root_grouping() {
let query_parser = create_query_parser();
let query = query_parser.parse_query("(test1 test2) -(test3) +(test4 test5)");
assert_eq!(format!("{:?}", query), "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"test1\"))), (Should, TermQuery(Term(field=0, type=Str, \"test2\"))), (MustNot, TermQuery(Term(field=0, type=Str, \"test3\"))), (Must, BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"test4\"))), (Should, TermQuery(Term(field=0, type=Str, \"test5\")))] })] })");
}
}
6 changes: 3 additions & 3 deletions summa-embed-py/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "summa-embed-py"
version = "0.19.4"
version = "0.19.5"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
Expand All @@ -17,8 +17,8 @@ pyo3-asyncio = { version = "0.19", features = ["attributes", "tokio-runtime"] }
pyo3-log = "0.8"
pythonize = "0.19"
serde_json = "1.0"
summa-core = { version = "0.19.4", path = "../summa-core", default_features = false, features = ["fs", "hyper-external-request", "tokio-rt"] }
summa-server = { version = "0.19.4", path = "../summa-server", default_features = false }
summa-core = { version = "0.19.5", path = "../summa-core", default_features = false, features = ["fs", "hyper-external-request", "tokio-rt"] }
summa-server = { version = "0.19.5", path = "../summa-server", default_features = false }
summa-proto = { workspace = true }
tantivy = { workspace = true }
tokio = { workspace = true }
6 changes: 3 additions & 3 deletions summa-server/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
edition = "2021"
name = "summa-server"
version = "0.19.4"
version = "0.19.5"
license-file = "LICENSE"
description = "Fast full-text search server"
homepage = "https://github.com/izihawa/summa"
Expand Down Expand Up @@ -30,7 +30,7 @@ maintenance = { status = "actively-developed" }
tonic-build = { version = "0.9.1", default-features = false, features = ["prost", "transport"] }

[dev-dependencies]
summa-core = { version = "0.19.4", path = "../summa-core", features = ["fs", "hyper-external-request", "tokio-rt"] }
summa-core = { version = "0.19.5", path = "../summa-core", features = ["fs", "hyper-external-request", "tokio-rt"] }
tempdir = "0.3.7"

[dependencies]
Expand Down Expand Up @@ -59,7 +59,7 @@ serde = { workspace = true }
serde_derive = "1.0"
serde_json = { workspace = true }
serde_yaml = { workspace = true }
summa-core = { version = "0.19.4", path = "../summa-core", features = ["fs", "hyper-external-request", "tokio-rt"] }
summa-core = { version = "0.19.5", path = "../summa-core", features = ["fs", "hyper-external-request", "tokio-rt"] }
summa-proto = { workspace = true, features = ["grpc"] }
take_mut = { workspace = true }
tantivy = { workspace = true }
Expand Down
2 changes: 1 addition & 1 deletion summa-wasm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ prost = { workspace = true }
serde = { workspace = true }
serde-wasm-bindgen = "0.5"
strfmt = { workspace = true }
summa-core = { version = "0.19.4", path = "../summa-core", default_features = false }
summa-core = { version = "0.19.5", path = "../summa-core", default_features = false }
summa-proto = { workspace = true }
tantivy = { workspace = true, features = ["wasm"] }
thiserror = { workspace = true }
Expand Down

0 comments on commit a51ba55

Please sign in to comment.