From 39554ac94a1a85c024e8292fa3be31610c5aba45 Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Thu, 16 Feb 2023 11:34:07 +0800 Subject: [PATCH] update examples for literate docs --- examples/aggregation.rs | 352 ++++++++++++++---- examples/custom_collector.rs | 2 +- examples/custom_tokenizer.rs | 4 +- examples/date_time_field.rs | 8 +- examples/faceted_search_with_tweaked_score.rs | 11 + examples/fuzzy_search.rs | 170 +++++++++ ...ucer.rs => index_from_multiple_threads.rs} | 0 ...orking_with_json.rs => index_with_json.rs} | 0 examples/ip_field.rs | 60 ++- examples/warmer.rs | 9 +- 10 files changed, 512 insertions(+), 104 deletions(-) create mode 100644 examples/fuzzy_search.rs rename examples/{multiple_producer.rs => index_from_multiple_threads.rs} (100%) rename examples/{working_with_json.rs => index_with_json.rs} (100%) diff --git a/examples/aggregation.rs b/examples/aggregation.rs index 39648ed155..d5bbda4163 100644 --- a/examples/aggregation.rs +++ b/examples/aggregation.rs @@ -1,129 +1,319 @@ // # Aggregation example // // This example shows how you can use built-in aggregations. -// We will use range buckets and compute the average in each bucket. -// +// We will use nested aggregations with buckets and metrics: +// - Range buckets and compute the average in each bucket. +// - Term aggregation and compute the min price in each bucket +// --- -use serde_json::Value; +use serde_json::{Deserializer, Value}; use tantivy::aggregation::agg_req::{ Aggregation, Aggregations, BucketAggregation, BucketAggregationType, MetricAggregation, RangeAggregation, }; use tantivy::aggregation::agg_result::AggregationResults; +use tantivy::aggregation::bucket::RangeAggregationRange; use tantivy::aggregation::metric::AverageAggregation; use tantivy::aggregation::AggregationCollector; -use tantivy::query::TermQuery; -use tantivy::schema::{self, IndexRecordOption, Schema, TextFieldIndexing}; -use tantivy::{doc, Index, Term}; +use tantivy::query::AllQuery; +use tantivy::schema::{self, IndexRecordOption, Schema, TextFieldIndexing, FAST}; +use tantivy::Index; fn main() -> tantivy::Result<()> { + // # Create Schema + // + // Lets create a schema for a footwear shop, with 4 fields: name, category, stock and price. + // category, stock and price will be fast fields as that's the requirement + // for aggregation queries. + // + let mut schema_builder = Schema::builder(); + // In preparation of the `TermsAggregation`, the category field is configured with: + // - `set_fast` + // - `raw` tokenizer + // + // The tokenizer is set to "raw", because the fast field uses the same dictionary as the + // inverted index. (This behaviour will change in tantivy 0.20, where the fast field will + // always be raw tokenized independent from the regular tokenizing) + // let text_fieldtype = schema::TextOptions::default() .set_indexing_options( - TextFieldIndexing::default().set_index_option(IndexRecordOption::WithFreqs), + TextFieldIndexing::default() + .set_index_option(IndexRecordOption::WithFreqs) + .set_tokenizer("raw"), ) + .set_fast() .set_stored(); - let text_field = schema_builder.add_text_field("text", text_fieldtype); - let score_fieldtype = crate::schema::NumericOptions::default().set_fast(); - let highscore_field = schema_builder.add_f64_field("highscore", score_fieldtype.clone()); - let price_field = schema_builder.add_f64_field("price", score_fieldtype); + schema_builder.add_text_field("category", text_fieldtype); + schema_builder.add_f64_field("stock", FAST); + schema_builder.add_f64_field("price", FAST); let schema = schema_builder.build(); // # Indexing documents // // Lets index a bunch of documents for this example. - let index = Index::create_in_ram(schema); + let index = Index::create_in_ram(schema.clone()); + + let data = r#"{ + "name": "Almond Toe Court Shoes, Patent Black", + "category": "Womens Footwear", + "price": 99.00, + "stock": 5 + } + { + "name": "Suede Shoes, Blue", + "category": "Womens Footwear", + "price": 42.00, + "stock": 4 + } + { + "name": "Leather Driver Saddle Loafers, Tan", + "category": "Mens Footwear", + "price": 34.00, + "stock": 12 + } + { + "name": "Flip Flops, Red", + "category": "Mens Footwear", + "price": 19.00, + "stock": 6 + } + { + "name": "Flip Flops, Blue", + "category": "Mens Footwear", + "price": 19.00, + "stock": 0 + } + { + "name": "Gold Button Cardigan, Black", + "category": "Womens Casualwear", + "price": 167.00, + "stock": 6 + } + { + "name": "Cotton Shorts, Medium Red", + "category": "Womens Casualwear", + "price": 30.00, + "stock": 5 + } + { + "name": "Fine Stripe Short SleeveShirt, Grey", + "category": "Mens Casualwear", + "price": 49.99, + "stock": 9 + } + { + "name": "Fine Stripe Short SleeveShirt, Green", + "category": "Mens Casualwear", + "price": 49.99, + "offer": 39.99, + "stock": 9 + } + { + "name": "Sharkskin Waistcoat, Charcoal", + "category": "Mens Formalwear", + "price": 75.00, + "stock": 2 + } + { + "name": "Lightweight Patch PocketBlazer, Deer", + "category": "Mens Formalwear", + "price": 175.50, + "stock": 1 + } + { + "name": "Bird Print Dress, Black", + "category": "Womens Formalwear", + "price": 270.00, + "stock": 10 + } + { + "name": "Mid Twist Cut-Out Dress, Pink", + "category": "Womens Formalwear", + "price": 540.00, + "stock": 5 + }"#; + + let stream = Deserializer::from_str(data).into_iter::(); let mut index_writer = index.writer(50_000_000)?; - // writing the segment - index_writer.add_document(doc!( - text_field => "cool", - highscore_field => 1f64, - price_field => 0f64, - ))?; - index_writer.add_document(doc!( - text_field => "cool", - highscore_field => 3f64, - price_field => 1f64, - ))?; - index_writer.add_document(doc!( - text_field => "cool", - highscore_field => 5f64, - price_field => 1f64, - ))?; - index_writer.add_document(doc!( - text_field => "nohit", - highscore_field => 6f64, - price_field => 2f64, - ))?; - index_writer.add_document(doc!( - text_field => "cool", - highscore_field => 7f64, - price_field => 2f64, - ))?; - index_writer.commit()?; - index_writer.add_document(doc!( - text_field => "cool", - highscore_field => 11f64, - price_field => 10f64, - ))?; - index_writer.add_document(doc!( - text_field => "cool", - highscore_field => 14f64, - price_field => 15f64, - ))?; - - index_writer.add_document(doc!( - text_field => "cool", - highscore_field => 15f64, - price_field => 20f64, - ))?; + let mut num_indexed = 0; + for value in stream { + let doc = schema.parse_document(&serde_json::to_string(&value.unwrap())?)?; + index_writer.add_document(doc)?; + num_indexed += 1; + if num_indexed > 4 { + // Writing the first segment + index_writer.commit()?; + } + } + // Writing the second segment index_writer.commit()?; + // We have two segments now. The `AggregationCollector` will run the aggregation on each + // segment and then merge the results into an `IntermediateAggregationResult`. + let reader = index.reader()?; - let text_field = reader.searcher().schema().get_field("text").unwrap(); - - let term_query = TermQuery::new( - Term::from_field_text(text_field, "cool"), - IndexRecordOption::Basic, - ); - - let sub_agg_req_1: Aggregations = vec![( - "average_price".to_string(), - Aggregation::Metric(MetricAggregation::Average( - AverageAggregation::from_field_name("price".to_string()), - )), - )] - .into_iter() - .collect(); + let searcher = reader.searcher(); + // --- + // # Aggregation Query + // + // + // We can construct the query by building the request structure or by deserializing from JSON. + // The JSON API is more stable and therefore recommended. + // + // ## Request 1 + + let agg_req_str = r#" + { + "group_by_stock": { + "aggs": { + "average_price": { "avg": { "field": "price" } } + }, + "range": { + "field": "stock", + "ranges": [ + { "key": "few", "to": 1.0 }, + { "key": "some", "from": 1.0, "to": 10.0 }, + { "key": "many", "from": 10.0 } + ] + } + } + } "#; + + // In this Aggregation we want to get the average price for different groups, depending on how + // many items are in stock. We define custom ranges `few`, `some`, `many` via the + // range aggregation. + // For every bucket we want the average price, so we create a nested metric aggregation on the + // range bucket aggregation. Only buckets support nested aggregations. + // ### Request JSON API + // + + let agg_req: Aggregations = serde_json::from_str(agg_req_str)?; + let collector = AggregationCollector::from_aggs(agg_req, None, index.schema()); + + let agg_res: AggregationResults = searcher.search(&AllQuery, &collector).unwrap(); + let res2: Value = serde_json::to_value(agg_res)?; + + // ### Request Rust API + // + // This is exactly the same request as above, but via the rust structures. + // - let agg_req_1: Aggregations = vec![( - "score_ranges".to_string(), + let agg_req: Aggregations = vec![( + "group_by_stock".to_string(), Aggregation::Bucket(BucketAggregation { bucket_agg: BucketAggregationType::Range(RangeAggregation { - field: "highscore".to_string(), + field: "stock".to_string(), ranges: vec![ - (-1f64..9f64).into(), - (9f64..14f64).into(), - (14f64..20f64).into(), + RangeAggregationRange { + key: Some("few".into()), + from: None, + to: Some(1f64), + }, + RangeAggregationRange { + key: Some("some".into()), + from: Some(1f64), + to: Some(10f64), + }, + RangeAggregationRange { + key: Some("many".into()), + from: Some(10f64), + to: None, + }, ], ..Default::default() }), - sub_aggregation: sub_agg_req_1, + sub_aggregation: vec![( + "average_price".to_string(), + Aggregation::Metric(MetricAggregation::Average( + AverageAggregation::from_field_name("price".to_string()), + )), + )] + .into_iter() + .collect(), }), )] .into_iter() .collect(); - let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema()); + let collector = AggregationCollector::from_aggs(agg_req, None, index.schema()); + // We use the `AllQuery` which will pass all documents to the AggregationCollector. + let agg_res: AggregationResults = searcher.search(&AllQuery, &collector).unwrap(); - let searcher = reader.searcher(); - let agg_res: AggregationResults = searcher.search(&term_query, &collector).unwrap(); + let res1: Value = serde_json::to_value(agg_res)?; + + // ### Aggregation Result + // + // The resulting structure deserializes in the same JSON format as elastic search. + // + let expected_res = r#" + { + "group_by_stock":{ + "buckets":[ + {"average_price":{"value":19.0},"doc_count":1,"key":"few","to":1.0}, + {"average_price":{"value":124.748},"doc_count":10,"from":1.0,"key":"some","to":10.0}, + {"average_price":{"value":152.0},"doc_count":2,"from":10.0,"key":"many"} + ] + } + } + "#; + let expected_json: Value = serde_json::from_str(expected_res)?; + assert_eq!(expected_json, res1); + assert_eq!(expected_json, res2); + // ### Request 2 + // + // Now we are interested in the minimum price per category, so we create a bucket per + // category via `TermsAggregation`. We are interested in the highest minimum prices, and set the + // order of the buckets `"order": { "min_price": "desc" }` to be sorted by the the metric of + // the sub aggregation. (awesome) + // + let agg_req_str = r#" + { + "min_price_per_category": { + "aggs": { + "min_price": { "min": { "field": "price" } } + }, + "terms": { + "field": "category", + "min_doc_count": 1, + "order": { "min_price": "desc" } + } + } + } "#; + + let agg_req: Aggregations = serde_json::from_str(agg_req_str)?; + + let collector = AggregationCollector::from_aggs(agg_req, None, index.schema()); + + let agg_res: AggregationResults = searcher.search(&AllQuery, &collector).unwrap(); let res: Value = serde_json::to_value(agg_res)?; - println!("{}", serde_json::to_string_pretty(&res)?); + + // Minimum price per category, sorted by minimum price descending + // + // As you can see, the starting prices for `Formalwear` are higher than `Casualwear`. + // + let expected_res = r#" + { + "min_price_per_category": { + "buckets": [ + { "doc_count": 2, "key": "Womens Formalwear", "min_price": { "value": 270.0 } }, + { "doc_count": 2, "key": "Mens Formalwear", "min_price": { "value": 75.0 } }, + { "doc_count": 2, "key": "Mens Casualwear", "min_price": { "value": 49.99 } }, + { "doc_count": 2, "key": "Womens Footwear", "min_price": { "value": 42.0 } }, + { "doc_count": 2, "key": "Womens Casualwear", "min_price": { "value": 30.0 } }, + { "doc_count": 3, "key": "Mens Footwear", "min_price": { "value": 19.0 } } + ], + "sum_other_doc_count": 0 + } + } + "#; + let expected_json: Value = serde_json::from_str(expected_res)?; + + assert_eq!(expected_json, res); Ok(()) } diff --git a/examples/custom_collector.rs b/examples/custom_collector.rs index 60cb3dea5d..c0fb7d8f78 100644 --- a/examples/custom_collector.rs +++ b/examples/custom_collector.rs @@ -171,7 +171,7 @@ fn main() -> tantivy::Result<()> { let searcher = reader.searcher(); let query_parser = QueryParser::for_index(&index, vec![product_name, product_description]); - // here we want to get a hit on the 'ken' in Frankenstein + // here we want to search for `broom` and use `StatsCollector` on the hits. let query = query_parser.parse_query("broom")?; if let Some(stats) = searcher.search(&query, &StatsCollector::with_field("price".to_string()))? diff --git a/examples/custom_tokenizer.rs b/examples/custom_tokenizer.rs index 4a8a4b754d..fce73a5842 100644 --- a/examples/custom_tokenizer.rs +++ b/examples/custom_tokenizer.rs @@ -1,7 +1,7 @@ // # Defining a tokenizer pipeline // -// In this example, we'll see how to define a tokenizer pipeline -// by aligning a bunch of `TokenFilter`. +// In this example, we'll see how to define a tokenizer +// by creating a custom `NgramTokenizer`. use tantivy::collector::TopDocs; use tantivy::query::QueryParser; use tantivy::schema::*; diff --git a/examples/date_time_field.rs b/examples/date_time_field.rs index b42d4208fa..7383b79fcc 100644 --- a/examples/date_time_field.rs +++ b/examples/date_time_field.rs @@ -14,6 +14,7 @@ fn main() -> tantivy::Result<()> { .set_stored() .set_fast() .set_precision(tantivy::DatePrecision::Seconds); + // Add `occurred_at` date field type let occurred_at = schema_builder.add_date_field("occurred_at", opts); let event_type = schema_builder.add_text_field("event", STRING | STORED); let schema = schema_builder.build(); @@ -22,6 +23,7 @@ fn main() -> tantivy::Result<()> { let index = Index::create_in_ram(schema.clone()); let mut index_writer = index.writer(50_000_000)?; + // The dates are passed as string in the RFC3339 format let doc = schema.parse_document( r#"{ "occurred_at": "2022-06-22T12:53:50.53Z", @@ -41,14 +43,16 @@ fn main() -> tantivy::Result<()> { let reader = index.reader()?; let searcher = reader.searcher(); - // # Default fields: event_type + // # Search let query_parser = QueryParser::for_index(&index, vec![event_type]); { - let query = query_parser.parse_query("event:comment")?; + // Simple exact search on the date + let query = query_parser.parse_query("occurred_at:\"2022-06-22T12:53:50.53Z\"")?; let count_docs = searcher.search(&*query, &TopDocs::with_limit(5))?; assert_eq!(count_docs.len(), 1); } { + // Range query on the date field let query = query_parser .parse_query(r#"occurred_at:[2022-06-22T12:58:00Z TO 2022-06-23T00:00:00Z}"#)?; let count_docs = searcher.search(&*query, &TopDocs::with_limit(4))?; diff --git a/examples/faceted_search_with_tweaked_score.rs b/examples/faceted_search_with_tweaked_score.rs index d05addaf4a..44fed8b9db 100644 --- a/examples/faceted_search_with_tweaked_score.rs +++ b/examples/faceted_search_with_tweaked_score.rs @@ -1,3 +1,12 @@ +// # Faceted Search With Tweak Score +// +// This example covers the faceted search functionalities of +// tantivy. +// +// We will : +// - define a text field "name" in our schema +// - define a facet field "classification" in our schema + use std::collections::HashSet; use tantivy::collector::TopDocs; @@ -55,6 +64,7 @@ fn main() -> tantivy::Result<()> { .collect(), ); let top_docs_by_custom_score = + // Call TopDocs with a custom tweak score TopDocs::with_limit(2).tweak_score(move |segment_reader: &SegmentReader| { let ingredient_reader = segment_reader.facet_reader("ingredient").unwrap(); let facet_dict = ingredient_reader.facet_dict(); @@ -65,6 +75,7 @@ fn main() -> tantivy::Result<()> { .collect(); move |doc: DocId, original_score: Score| { + // Update the original score with a tweaked score let missing_ingredients = ingredient_reader .facet_ords(doc) .filter(|ord| !query_ords.contains(ord)) diff --git a/examples/fuzzy_search.rs b/examples/fuzzy_search.rs new file mode 100644 index 0000000000..541656d125 --- /dev/null +++ b/examples/fuzzy_search.rs @@ -0,0 +1,170 @@ +// # Basic Example +// +// This example covers the basic functionalities of +// tantivy. +// +// We will : +// - define our schema +// - create an index in a directory +// - index a few documents into our index +// - search for the best document matching a basic query +// - retrieve the best document's original content. + +use std::collections::HashSet; + +// --- +// Importing tantivy... +use tantivy::collector::{Count, TopDocs}; +use tantivy::query::{FuzzyTermQuery, QueryParser}; +use tantivy::schema::*; +use tantivy::{doc, DocId, Index, ReloadPolicy, Score, SegmentReader}; +use tempfile::TempDir; + +fn main() -> tantivy::Result<()> { + // Let's create a temporary directory for the + // sake of this example + let index_path = TempDir::new()?; + + // # Defining the schema + // + // The Tantivy index requires a very strict schema. + // The schema declares which fields are in the index, + // and for each field, its type and "the way it should + // be indexed". + + // First we need to define a schema ... + let mut schema_builder = Schema::builder(); + + // Our first field is title. + // We want full-text search for it, and we also want + // to be able to retrieve the document after the search. + // + // `TEXT | STORED` is some syntactic sugar to describe + // that. + // + // `TEXT` means the field should be tokenized and indexed, + // along with its term frequency and term positions. + // + // `STORED` means that the field will also be saved + // in a compressed, row-oriented key-value store. + // This store is useful for reconstructing the + // documents that were selected during the search phase. + let title = schema_builder.add_text_field("title", TEXT | STORED); + + let schema = schema_builder.build(); + + // # Indexing documents + // + // Let's create a brand new index. + // + // This will actually just save a meta.json + // with our schema in the directory. + let index = Index::create_in_dir(&index_path, schema.clone())?; + + // To insert a document we will need an index writer. + // There must be only one writer at a time. + // This single `IndexWriter` is already + // multithreaded. + // + // Here we give tantivy a budget of `50MB`. + // Using a bigger memory_arena for the indexer may increase + // throughput, but 50 MB is already plenty. + let mut index_writer = index.writer(50_000_000)?; + + // Let's index our documents! + // We first need a handle on the title and the body field. + + // ### Adding documents + // + index_writer.add_document(doc!( + title => "The Name of the Wind", + ))?; + index_writer.add_document(doc!( + title => "The Diary of Muadib", + ))?; + index_writer.add_document(doc!( + title => "A Dairy Cow", + ))?; + index_writer.add_document(doc!( + title => "The Diary of a Young Girl", + ))?; + index_writer.commit()?; + + // ### Committing + // + // At this point our documents are not searchable. + // + // + // We need to call `.commit()` explicitly to force the + // `index_writer` to finish processing the documents in the queue, + // flush the current index to the disk, and advertise + // the existence of new documents. + // + // This call is blocking. + index_writer.commit()?; + + // If `.commit()` returns correctly, then all of the + // documents that have been added are guaranteed to be + // persistently indexed. + // + // In the scenario of a crash or a power failure, + // tantivy behaves as if it has rolled back to its last + // commit. + + // # Searching + // + // ### Searcher + // + // A reader is required first in order to search an index. + // It acts as a `Searcher` pool that reloads itself, + // depending on a `ReloadPolicy`. + // + // For a search server you will typically create one reader for the entire lifetime of your + // program, and acquire a new searcher for every single request. + // + // In the code below, we rely on the 'ON_COMMIT' policy: the reader + // will reload the index automatically after each commit. + let reader = index + .reader_builder() + .reload_policy(ReloadPolicy::OnCommit) + .try_into()?; + + // We now need to acquire a searcher. + // + // A searcher points to a snapshotted, immutable version of the index. + // + // Some search experience might require more than + // one query. Using the same searcher ensures that all of these queries will run on the + // same version of the index. + // + // Acquiring a `searcher` is very cheap. + // + // You should acquire a searcher every time you start processing a request and + // and release it right after your query is finished. + let searcher = reader.searcher(); + + // ### FuzzyTermQuery + { + let term = Term::from_field_text(title, "Diary"); + let query = FuzzyTermQuery::new(term, 2, true); + + let (top_docs, count) = searcher + .search(&query, &(TopDocs::with_limit(5), Count)) + .unwrap(); + assert_eq!(count, 3); + assert_eq!(top_docs.len(), 3); + for (score, doc_address) in top_docs { + let retrieved_doc = searcher.doc(doc_address)?; + // Note that the score is not lower for the fuzzy hit. + // There's an issue open for that: https://github.com/quickwit-oss/tantivy/issues/563 + println!("score {score:?} doc {}", schema.to_json(&retrieved_doc)); + // score 1.0 doc {"title":["The Diary of Muadib"]} + // + // score 1.0 doc {"title":["The Diary of a Young Girl"]} + // + // score 1.0 doc {"title":["A Dairy Cow"]} + } + } + + Ok(()) +} diff --git a/examples/multiple_producer.rs b/examples/index_from_multiple_threads.rs similarity index 100% rename from examples/multiple_producer.rs rename to examples/index_from_multiple_threads.rs diff --git a/examples/working_with_json.rs b/examples/index_with_json.rs similarity index 100% rename from examples/working_with_json.rs rename to examples/index_with_json.rs diff --git a/examples/ip_field.rs b/examples/ip_field.rs index c1f34c2a24..ab1c99fcbc 100644 --- a/examples/ip_field.rs +++ b/examples/ip_field.rs @@ -10,6 +10,10 @@ use tantivy::Index; fn main() -> tantivy::Result<()> { // # Defining the schema + // We set the IP field as `INDEXED`, so it can be searched + // `FAST` will create a fast field. The fast field will be used to execute search queries. + // `FAST` is not a requirement for range queries, it can also be executed on the inverted index + // which is created by `INDEXED`. let mut schema_builder = Schema::builder(); let event_type = schema_builder.add_text_field("event_type", STRING | STORED); let ip = schema_builder.add_ip_addr_field("ip", STORED | INDEXED | FAST); @@ -19,51 +23,81 @@ fn main() -> tantivy::Result<()> { let index = Index::create_in_ram(schema.clone()); let mut index_writer = index.writer(50_000_000)?; + + // ### IPv4 + // Adding documents that contain an IPv4 address. Notice that the IP addresses are passed as + // `String`. Since the field is of type ip, we parse the IP address from the string and store it + // internally as IPv6. let doc = schema.parse_document( r#"{ - "ip": "192.168.0.33", - "event_type": "login" - }"#, + "ip": "192.168.0.33", + "event_type": "login" + }"#, )?; index_writer.add_document(doc)?; let doc = schema.parse_document( r#"{ - "ip": "192.168.0.80", - "event_type": "checkout" - }"#, + "ip": "192.168.0.80", + "event_type": "checkout" + }"#, )?; index_writer.add_document(doc)?; + // ### IPv6 + // Adding a document that contains an IPv6 address. let doc = schema.parse_document( r#"{ - "ip": "2001:0db8:85a3:0000:0000:8a2e:0370:7334", - "event_type": "checkout" - }"#, + "ip": "2001:0db8:85a3:0000:0000:8a2e:0370:7334", + "event_type": "checkout" + }"#, )?; index_writer.add_document(doc)?; + // Commit will create a segment containing our documents. index_writer.commit()?; let reader = index.reader()?; let searcher = reader.searcher(); + // # Search + // Range queries on IPv4. Since we created a fast field, the fast field will be used to execute + // the search. + // ### Range Queries let query_parser = QueryParser::for_index(&index, vec![event_type, ip]); { - let query = query_parser.parse_query("ip:[192.168.0.0 TO 192.168.0.100]")?; + // Inclusive range queries + let query = query_parser.parse_query("ip:[192.168.0.80 TO 192.168.0.100]")?; let count_docs = searcher.search(&*query, &TopDocs::with_limit(5))?; - assert_eq!(count_docs.len(), 2); + assert_eq!(count_docs.len(), 1); } { - let query = query_parser.parse_query("ip:[192.168.1.0 TO 192.168.1.100]")?; + // Exclusive range queries + let query = query_parser.parse_query("ip:{192.168.0.80 TO 192.168.1.100]")?; let count_docs = searcher.search(&*query, &TopDocs::with_limit(2))?; assert_eq!(count_docs.len(), 0); } + { + // Find docs with IP addresses smaller equal 192.168.1.100 + let query = query_parser.parse_query("ip:[* TO 192.168.1.100]")?; + let count_docs = searcher.search(&*query, &TopDocs::with_limit(2))?; + assert_eq!(count_docs.len(), 2); + } + { + // Find docs with IP addresses smaller than 192.168.1.100 + let query = query_parser.parse_query("ip:[* TO 192.168.1.100}")?; + let count_docs = searcher.search(&*query, &TopDocs::with_limit(2))?; + assert_eq!(count_docs.len(), 2); + } + + // ### Exact Queries + // Exact search on IPv4. { let query = query_parser.parse_query("ip:192.168.0.80")?; let count_docs = searcher.search(&*query, &Count)?; assert_eq!(count_docs, 1); } + // Exact search on IPv6. + // IpV6 addresses need to be quoted because they contain `:` { - // IpV6 needs to be escaped because it contains `:` let query = query_parser.parse_query("ip:\"2001:0db8:85a3:0000:0000:8a2e:0370:7334\"")?; let count_docs = searcher.search(&*query, &Count)?; assert_eq!(count_docs, 1); diff --git a/examples/warmer.rs b/examples/warmer.rs index 1cf64c0282..53979e3d4c 100644 --- a/examples/warmer.rs +++ b/examples/warmer.rs @@ -17,7 +17,6 @@ use tantivy::{ type ProductId = u64; -/// Price type Price = u32; pub trait PriceFetcher: Send + Sync + 'static { @@ -90,10 +89,10 @@ impl Warmer for DynamicPriceColumn { } } -/// For the sake of this example, the table is just an editable HashMap behind a RwLock. -/// This map represents a map (ProductId -> Price) -/// -/// In practise, it could be fetching things from an external service, like a SQL table. +// For the sake of this example, the table is just an editable HashMap behind a RwLock. +// This map represents a map (ProductId -> Price) +// +// In practise, it could be fetching things from an external service, like a SQL table. #[derive(Default, Clone)] pub struct ExternalPriceTable { prices: Arc>>,