From f8dcf9caf7f7028c6f9f828f11400562ae602178 Mon Sep 17 00:00:00 2001 From: Christopher Akiki Date: Tue, 15 Oct 2024 15:26:40 +0200 Subject: [PATCH 1/2] [MINOR:TYPO] Update pre_tokenizers.rs --- bindings/python/src/pre_tokenizers.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindings/python/src/pre_tokenizers.rs b/bindings/python/src/pre_tokenizers.rs index bac3284ad..64a3d8a68 100644 --- a/bindings/python/src/pre_tokenizers.rs +++ b/bindings/python/src/pre_tokenizers.rs @@ -334,7 +334,7 @@ impl PyWhitespaceSplit { /// /// Args: /// pattern (:obj:`str` or :class:`~tokenizers.Regex`): -/// A pattern used to split the string. Usually a string or a a regex built with `tokenizers.Regex`. +/// A pattern used to split the string. Usually a string or a regex built with `tokenizers.Regex`. /// If you want to use a regex pattern, it has to be wrapped around a `tokenizer.Regex`, /// otherwise we consider is as a string pattern. For example `pattern="|"` /// means you want to split on `|` (imagine a csv file for example), while From 055aaa32da8dc87dd0e51907e468432c8d090c28 Mon Sep 17 00:00:00 2001 From: Christopher Akiki Date: Tue, 15 Oct 2024 15:27:14 +0200 Subject: [PATCH 2/2] [MINOR:TYPO] Update __init__.pyi --- bindings/python/py_src/tokenizers/pre_tokenizers/__init__.pyi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindings/python/py_src/tokenizers/pre_tokenizers/__init__.pyi b/bindings/python/py_src/tokenizers/pre_tokenizers/__init__.pyi index ea1b4954e..a583945fc 100644 --- a/bindings/python/py_src/tokenizers/pre_tokenizers/__init__.pyi +++ b/bindings/python/py_src/tokenizers/pre_tokenizers/__init__.pyi @@ -421,7 +421,7 @@ class Split(PreTokenizer): Args: pattern (:obj:`str` or :class:`~tokenizers.Regex`): - A pattern used to split the string. Usually a string or a a regex built with `tokenizers.Regex`. + A pattern used to split the string. Usually a string or a regex built with `tokenizers.Regex`. If you want to use a regex pattern, it has to be wrapped around a `tokenizer.Regex`, otherwise we consider is as a string pattern. For example `pattern="|"` means you want to split on `|` (imagine a csv file for example), while