diff --git a/src/tokenizer/alphanum_only.rs b/src/tokenizer/alphanum_only.rs index 6e05581b42..c0175e736e 100644 --- a/src/tokenizer/alphanum_only.rs +++ b/src/tokenizer/alphanum_only.rs @@ -2,16 +2,18 @@ //! ```rust //! use tantivy::tokenizer::*; //! -//! let tokenizer = TextAnalyzer::from(RawTokenizer) -//! .filter(AlphaNumOnlyFilter); +//! let tokenizer = TextAnalyzer::builder(RawTokenizer) +//! .filter(AlphaNumOnlyFilter) +//! .build(); //! //! let mut stream = tokenizer.token_stream("hello there"); //! // is none because the raw filter emits one token that //! // contains a space //! assert!(stream.next().is_none()); //! -//! let tokenizer = TextAnalyzer::from(SimpleTokenizer) -//! .filter(AlphaNumOnlyFilter); +//! let tokenizer = TextAnalyzer::builder(SimpleTokenizer) +//! .filter(AlphaNumOnlyFilter) +//! .build(); //! //! let mut stream = tokenizer.token_stream("hello there 💣"); //! assert!(stream.next().is_some()); diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs index 37f69f5047..7b9ef75bbe 100644 --- a/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -66,10 +66,11 @@ //! ```rust //! use tantivy::tokenizer::*; //! -//! let en_stem = TextAnalyzer::from(SimpleTokenizer) +//! let en_stem = TextAnalyzer::builder(SimpleTokenizer) //! .filter(RemoveLongFilter::limit(40)) //! .filter(LowerCaser) -//! .filter(Stemmer::new(Language::English)); +//! .filter(Stemmer::new(Language::English)) +//! .build(); //! ``` //! //! Once your tokenizer is defined, you need to @@ -112,9 +113,10 @@ //! let index = Index::create_in_ram(schema); //! //! // We need to register our tokenizer : -//! let custom_en_tokenizer = TextAnalyzer::from(SimpleTokenizer) +//! let custom_en_tokenizer = TextAnalyzer::builder(SimpleTokenizer) //! .filter(RemoveLongFilter::limit(40)) -//! .filter(LowerCaser); +//! .filter(LowerCaser) +//! .build(); //! index //! .tokenizers() //! .register("custom_en", custom_en_tokenizer); diff --git a/src/tokenizer/remove_long.rs b/src/tokenizer/remove_long.rs index 2c3fefbca9..933e98adb8 100644 --- a/src/tokenizer/remove_long.rs +++ b/src/tokenizer/remove_long.rs @@ -2,8 +2,9 @@ //! ```rust //! use tantivy::tokenizer::*; //! -//! let tokenizer = TextAnalyzer::from(SimpleTokenizer) -//! .filter(RemoveLongFilter::limit(5)); +//! let tokenizer = TextAnalyzer::builder(SimpleTokenizer) +//! .filter(RemoveLongFilter::limit(5)) +//! .build(); //! //! let mut stream = tokenizer.token_stream("toolong nice"); //! // because `toolong` is more than 5 characters, it is filtered diff --git a/src/tokenizer/split_compound_words.rs b/src/tokenizer/split_compound_words.rs index 995575725f..e79c48bac1 100644 --- a/src/tokenizer/split_compound_words.rs +++ b/src/tokenizer/split_compound_words.rs @@ -23,9 +23,11 @@ use super::{Token, TokenFilter, TokenStream, Tokenizer}; /// use tantivy::tokenizer::{SimpleTokenizer, SplitCompoundWords, TextAnalyzer}; /// /// let tokenizer = -/// TextAnalyzer::from(SimpleTokenizer).filter(SplitCompoundWords::from_dictionary([ +/// TextAnalyzer::builder(SimpleTokenizer) +/// .filter(SplitCompoundWords::from_dictionary([ /// "dampf", "schiff", "fahrt", "brot", "backen", "automat", -/// ])); +/// ])) +/// .build(); /// /// let mut stream = tokenizer.token_stream("dampfschifffahrt"); /// assert_eq!(stream.next().unwrap().text, "dampf"); diff --git a/src/tokenizer/stop_word_filter/mod.rs b/src/tokenizer/stop_word_filter/mod.rs index 215b16d394..adfbf17d4a 100644 --- a/src/tokenizer/stop_word_filter/mod.rs +++ b/src/tokenizer/stop_word_filter/mod.rs @@ -2,8 +2,9 @@ //! ```rust //! use tantivy::tokenizer::*; //! -//! let tokenizer = TextAnalyzer::from(SimpleTokenizer) -//! .filter(StopWordFilter::remove(vec!["the".to_string(), "is".to_string()])); +//! let tokenizer = TextAnalyzer::builder(SimpleTokenizer) +//! .filter(StopWordFilter::remove(vec!["the".to_string(), "is".to_string()])) +//! .build(); //! //! let mut stream = tokenizer.token_stream("the fox is crafty"); //! assert_eq!(stream.next().unwrap().text, "fox"); diff --git a/src/tokenizer/tokenizer.rs b/src/tokenizer/tokenizer.rs index 97520fa92f..7e1394076c 100644 --- a/src/tokenizer/tokenizer.rs +++ b/src/tokenizer/tokenizer.rs @@ -54,7 +54,7 @@ impl TextAnalyzerBuilder { /// ```rust /// use tantivy::tokenizer::*; /// - /// let en_stem = TextAnalyzer::build(SimpleTokenizer) + /// let en_stem = TextAnalyzer::builder(SimpleTokenizer) /// .filter(RemoveLongFilter::limit(40)) /// .filter(LowerCaser) /// .filter(Stemmer::default())