diff --git a/summa-core/Cargo.toml b/summa-core/Cargo.toml index c181fdca..04ce9d98 100644 --- a/summa-core/Cargo.toml +++ b/summa-core/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "summa-core" -version = "0.19.4" +version = "0.19.5" authors = ["Pasha Podolsky "] edition = "2021" license-file = "LICENSE" diff --git a/summa-core/src/components/query_parser/summa_ql.pest b/summa-core/src/components/query_parser/summa_ql.pest index 1c95f66b..b983762f 100644 --- a/summa-core/src/components/query_parser/summa_ql.pest +++ b/summa-core/src/components/query_parser/summa_ql.pest @@ -38,6 +38,6 @@ term = { positive_term | negative_term | default_term } grouping = { positive_grouping | negative_grouping | default_grouping } search_group = ${ field_name ~ ":" ~ (grouping | term) } -statement = ${ (isbn | wrapped_doi | search_group | term) ~ ("^" ~ boost)? } +statement = ${ (isbn | wrapped_doi | search_group | grouping | term) ~ ("^" ~ boost)? } statements = _{ statement_sep* ~ statement? ~ (statement_sep+ ~ statement)* ~ statement_sep* } main = _{SOI ~ statements} diff --git a/summa-core/src/components/query_parser/summa_ql.rs b/summa-core/src/components/query_parser/summa_ql.rs index 059d34b1..e1e434c9 100644 --- a/summa-core/src/components/query_parser/summa_ql.rs +++ b/summa-core/src/components/query_parser/summa_ql.rs @@ -589,11 +589,11 @@ impl QueryParser { fn parse_statement(&self, pair: Pair) -> Result, QueryParserError> { let mut statement_pairs = pair.into_inner(); - let isbn_doi_or_search_group_or_term = statement_pairs.next().expect("grammar failure"); + let isbn_doi_or_search_group_or_grouping_or_term = statement_pairs.next().expect("grammar failure"); let statement_boost = statement_pairs.next().map(|boost| f32::from_str(boost.as_str()).expect("grammar failure")); - let statement_result = match isbn_doi_or_search_group_or_term.as_rule() { + let statement_result = match isbn_doi_or_search_group_or_grouping_or_term.as_rule() { Rule::search_group => { - let mut search_group = isbn_doi_or_search_group_or_term.into_inner(); + let mut search_group = isbn_doi_or_search_group_or_grouping_or_term.into_inner(); let field_name = search_group.next().expect("grammar failure"); let grouping_or_term = search_group.next().expect("grammar failure"); match grouping_or_term.as_rule() { @@ -656,7 +656,8 @@ impl QueryParser { for term_field_mapper_name in ["doi", "doi_isbn"] { if let Some(term_field_mapper_config) = self.query_parser_config.0.term_field_mapper_configs.get(term_field_mapper_name) { if let Some(term_field_mapper) = self.term_field_mappers_manager.get(term_field_mapper_name) { - if let Some(query) = term_field_mapper.map(isbn_doi_or_search_group_or_term.as_str(), &term_field_mapper_config.fields) { + if let Some(query) = term_field_mapper.map(isbn_doi_or_search_group_or_grouping_or_term.as_str(), &term_field_mapper_config.fields) + { queries.push((Occur::Should, query)); } } @@ -671,7 +672,8 @@ impl QueryParser { for term_field_mapper_name in ["isbn"] { if let Some(term_field_mapper_config) = self.query_parser_config.0.term_field_mapper_configs.get(term_field_mapper_name) { if let Some(term_field_mapper) = self.term_field_mappers_manager.get(term_field_mapper_name) { - if let Some(query) = term_field_mapper.map(isbn_doi_or_search_group_or_term.as_str(), &term_field_mapper_config.fields) { + if let Some(query) = term_field_mapper.map(isbn_doi_or_search_group_or_grouping_or_term.as_str(), &term_field_mapper_config.fields) + { queries.push((Occur::Should, query)); } } @@ -680,7 +682,21 @@ impl QueryParser { Ok(Box::new(BooleanQuery::new(queries)) as Box) } - Rule::term => self.default_field_queries(isbn_doi_or_search_group_or_term, statement_boost), + Rule::term => self.default_field_queries(isbn_doi_or_search_group_or_grouping_or_term, statement_boost), + Rule::grouping => { + let grouping = isbn_doi_or_search_group_or_grouping_or_term.into_inner().next().expect("grammar failure"); + let occur = self.parse_occur(&grouping); + let mut intermediate_results = vec![]; + for term in grouping.into_inner() { + intermediate_results.push(self.default_field_queries(term, statement_boost)?) + } + let group_query = Box::new(BooleanQuery::new(intermediate_results.into_iter().map(|q| (Occur::Should, q)).collect())) as Box; + match occur { + Occur::Should => Ok(group_query), + Occur::Must => Ok(Box::new(BooleanQuery::new(vec![(Occur::Must, group_query)])) as Box), + Occur::MustNot => Ok(Box::new(BooleanQuery::new(vec![(Occur::MustNot, group_query)])) as Box), + } + } e => panic!("{e:?}"), }?; Ok(statement_result) @@ -1169,4 +1185,11 @@ mod tests { let query = query_parser.parse_query("iso 34-1:2022"); assert_eq!(format!("{:?}", query), "Ok(BooleanQuery { subqueries: [(Should, DisjunctionMaxQuery { disjuncts: [TermQuery(Term(field=0, type=Str, \"iso\")), TermQuery(Term(field=0, type=Str, \"isos\"))], tie_breaker: 0.3 }), (Should, TermQuery(Term(field=0, type=Str, \"34\"))), (Should, TermQuery(Term(field=0, type=Str, \"1\")))] })"); } + + #[test] + pub fn test_root_grouping() { + let query_parser = create_query_parser(); + let query = query_parser.parse_query("(test1 test2) -(test3) +(test4 test5)"); + assert_eq!(format!("{:?}", query), "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"test1\"))), (Should, TermQuery(Term(field=0, type=Str, \"test2\"))), (MustNot, TermQuery(Term(field=0, type=Str, \"test3\"))), (Must, BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"test4\"))), (Should, TermQuery(Term(field=0, type=Str, \"test5\")))] })] })"); + } } diff --git a/summa-embed-py/Cargo.toml b/summa-embed-py/Cargo.toml index b6efae29..1136aecc 100644 --- a/summa-embed-py/Cargo.toml +++ b/summa-embed-py/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "summa-embed-py" -version = "0.19.4" +version = "0.19.5" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html @@ -17,8 +17,8 @@ pyo3-asyncio = { version = "0.19", features = ["attributes", "tokio-runtime"] } pyo3-log = "0.8" pythonize = "0.19" serde_json = "1.0" -summa-core = { version = "0.19.4", path = "../summa-core", default_features = false, features = ["fs", "hyper-external-request", "tokio-rt"] } -summa-server = { version = "0.19.4", path = "../summa-server", default_features = false } +summa-core = { version = "0.19.5", path = "../summa-core", default_features = false, features = ["fs", "hyper-external-request", "tokio-rt"] } +summa-server = { version = "0.19.5", path = "../summa-server", default_features = false } summa-proto = { workspace = true } tantivy = { workspace = true } tokio = { workspace = true } \ No newline at end of file diff --git a/summa-server/Cargo.toml b/summa-server/Cargo.toml index 63a8cee0..33eccdb6 100644 --- a/summa-server/Cargo.toml +++ b/summa-server/Cargo.toml @@ -1,7 +1,7 @@ [package] edition = "2021" name = "summa-server" -version = "0.19.4" +version = "0.19.5" license-file = "LICENSE" description = "Fast full-text search server" homepage = "https://github.com/izihawa/summa" @@ -30,7 +30,7 @@ maintenance = { status = "actively-developed" } tonic-build = { version = "0.9.1", default-features = false, features = ["prost", "transport"] } [dev-dependencies] -summa-core = { version = "0.19.4", path = "../summa-core", features = ["fs", "hyper-external-request", "tokio-rt"] } +summa-core = { version = "0.19.5", path = "../summa-core", features = ["fs", "hyper-external-request", "tokio-rt"] } tempdir = "0.3.7" [dependencies] @@ -59,7 +59,7 @@ serde = { workspace = true } serde_derive = "1.0" serde_json = { workspace = true } serde_yaml = { workspace = true } -summa-core = { version = "0.19.4", path = "../summa-core", features = ["fs", "hyper-external-request", "tokio-rt"] } +summa-core = { version = "0.19.5", path = "../summa-core", features = ["fs", "hyper-external-request", "tokio-rt"] } summa-proto = { workspace = true, features = ["grpc"] } take_mut = { workspace = true } tantivy = { workspace = true } diff --git a/summa-wasm/Cargo.toml b/summa-wasm/Cargo.toml index 6fe03a1a..b87e8a3c 100644 --- a/summa-wasm/Cargo.toml +++ b/summa-wasm/Cargo.toml @@ -26,7 +26,7 @@ prost = { workspace = true } serde = { workspace = true } serde-wasm-bindgen = "0.5" strfmt = { workspace = true } -summa-core = { version = "0.19.4", path = "../summa-core", default_features = false } +summa-core = { version = "0.19.5", path = "../summa-core", default_features = false } summa-proto = { workspace = true } tantivy = { workspace = true, features = ["wasm"] } thiserror = { workspace = true }