Use \m and \M instead of \< and \> (#56)

Ref #55
facelessuser · Feb 11, 2018 · 39e6741 · 39e6741
1 parent 85c034e
commit 39e6741
Show file tree

Hide file tree

Showing 8 changed files with 27 additions and 44 deletions.
diff --git a/backrefs/__init__.py b/backrefs/__init__.py
@@ -1,7 +1,7 @@
 """Backrefs package."""
 
 #   (major, minor, micro, release type, pre-release build, post-release build)
-version_info = (3, 0, 5, 'final', 0, 0)
+version_info = (3, 1, 0, 'final', 0, 0)
 
 
 def _version():

diff --git a/backrefs/_bre_parse.py b/backrefs/_bre_parse.py
@@ -53,7 +53,7 @@ class GlobalRetryException(Exception):
 class _SearchParser(object):
     """Search Template."""
 
-    _new_refs = ("e", "l", "L", "c", "C", "p", "P", "N", "Q", "E", "<", ">")
+    _new_refs = ("e", "l", "L", "c", "C", "p", "P", "N", "Q", "E", "m", "M")
     _re_escape = r"\x1b"
     _re_start_wb = r"\b(?=\w)"
     _re_end_wb = r"\b(?<=\w)"
@@ -265,9 +265,9 @@ def reference(self, t, i, in_group=False):
 
         current = []
 
-        if not in_group and t == "<":
+        if not in_group and t == "m":
             current.append(self._re_start_wb)
-        elif not in_group and t == ">":
+        elif not in_group and t == "M":
             current.append(self._re_end_wb)
         elif t == "e":
             current.append(self._re_escape)

diff --git a/backrefs/_bregex_parse.py b/backrefs/_bregex_parse.py
@@ -47,10 +47,8 @@ class GlobalRetryException(Exception):
 class _SearchParser(object):
     """Search Template."""
 
-    _new_refs = ("e", "R", "Q", "E", "<", ">")
+    _new_refs = ("e", "R", "Q", "E")
     _re_escape = r"\x1b"
-    _re_start_wb = r"\b(?=\w)"
-    _re_end_wb = r"\b(?<=\w)"
     _line_break = r'(?>\r\n|\n|\x0b|\f|\r|\x85|\u2028|\u2029)'
     _binary_line_break = r'(?>\r\n|\n|\x0b|\f|\r|\x85)'
 
@@ -165,11 +163,7 @@ def reference(self, t, i, in_group=False):
 
         current = []
 
-        if not in_group and t == "<":
-            current.append(self._re_start_wb)
-        elif not in_group and t == ">":
-            current.append(self._re_end_wb)
-        elif not in_group and t == "R":
+        if not in_group and t == "R":
             current.append(self._re_line_break)
         elif t == 'e':
             current.extend(self._re_escape)

diff --git a/backrefs/bregex.py b/backrefs/bregex.py
@@ -10,8 +10,6 @@
  - `\u0000` and `\U00000000`                                    - Unicode characters (replace)
  - `\R`                                                         - Generic line breaks (search)
  - `\e`                                                         - Escape character (search)
- - `\<`                                                         - Starting word boundary (search)
- - `\>`                                                         - Ending word boundary (search)
 
 Licensed under MIT
 Copyright (c) 2015 - 2018 Isaac Muse <[email protected]>

diff --git a/docs/src/markdown/changelog.md b/docs/src/markdown/changelog.md
@@ -1,5 +1,10 @@
 # Changelog
 
+## 3.1.0
+
+- **NEW**: Start and end word boundary back references are now specified with `\m` and `\M` like Regex does.  `\<` and `\>` have been removed from Regex.
+- **FIX**: Escaped `\<` and `\>` are no longer processed as Re is known to escape these in versions less than Python 3.7.
+
 ## 3.0.5
 
 Feb 9, 2018

diff --git a/docs/src/markdown/index.md b/docs/src/markdown/index.md
@@ -225,23 +225,23 @@ Back\ References      | Description
 `\PX`                 | Inverse Unicode property character class where `X` is the uppercase letter that represents the General Category property. For instance, `\PL` would be equivalent to `\P{L}` or `\P{Letter}`.
 `[[:alnum:]]`         | Though not really a back reference, support for POSIX style character classes is available. See [POSIX Style Properties](#posix-style-properties) for more info.
 `\N{UnicodeName}`     | Named characters are are normally ignored in Re, but Backrefs adds support for them.
-`\<`                  | Start word boundary. Translates to `\b(?=\w)`.
-`\>`                  | End word boundary. Translates to `\b(?<=\w)`.
+`\m`                  | Start word boundary. Translates to `\b(?=\w)`.
+`\M`                  | End word boundary. Translates to `\b(?<=\w)`.
 
 ### Regex
 
 !!! note
     Regex already natively supports `\p{...}`, `\P{...}`, `\pX`, `\PX`, and `\N{...}`, so Backrefs does not attempt to add this to search patterns.
 
+    `\m` and `\M` are also features already present in Regex.
+
     `\c`, `\l`, `L` and `L` are not used as some of these flags are already taken by Regex itself  These references are just shortcuts for the related POSIX properties in Backrefs.
 
 Back\ References | Description
 ---------------- | -----------
 `\e`             | Escape character `\x1b`.
 `\Q...\E`        | Quotes (escapes) text for regular expression.  `\E` signifies the end of the quoting. Affects any and all characters no matter where in the regular expression pattern it is placed.
 `\R`             | Generic line breaks. When searching a Unicode string, this will use an atomic group and match `(?>\r\n|\n|\x0b|\f|\r|\x85|\u2028|\u2029)`, and when applied to byte strings, this will match `(?>\r\n|\n|\x0b|\f|\r|\x85)`. Because it uses atomic groups, which Re does not support, this feature is only for Regex.
-`\<`             | Start word boundary. Translates to `\b(?=\w)`.
-`\>`             | End word boundary. Translates to `\b(?<=\w)`.
 
 ## Replace Back References
 

diff --git a/tests/test_bre.py b/tests/test_bre.py
@@ -139,21 +139,26 @@ def test_named_unicode_failures(self):
     def test_word_boundary(self):
         """Test word boundary."""
 
-        pattern = bre.compile_search(r'\<test')
+        pattern = bre.compile_search(r'\mtest')
         self.assertEqual(
             pattern.pattern,
             r"\b(?=\w)test"
         )
-        pattern = bre.compile_search(r'test\>')
+        pattern = bre.compile_search(r'test\M')
         self.assertEqual(
             pattern.pattern,
             r"test\b(?<=\w)"
         )
-        pattern = bre.compile_search(r'[\<]test')
-        self.assertEqual(
-            pattern.pattern,
-            r"[\<]test"
-        )
+
+        if PY36_PLUS:
+            with pytest.raises(sre_constants.error):
+                bre.compile_search(r'[\m]test')
+        else:
+            pattern = bre.compile_search(r'[\m]test')
+            self.assertEqual(
+                pattern.pattern,
+                r"[\m]test"
+            )
 
     def test_cache(self):
         """Test cache."""

diff --git a/tests/test_bregex.py b/tests/test_bregex.py
@@ -93,25 +93,6 @@ def test_posix_property_bad_syntax(self):
         self.assertTrue(bregex.compile(r'[[:a]', regex.V0).match('a') is not None)
         self.assertTrue(bregex.compile(r'[[:graph:a]', regex.V0).match('a') is not None)
 
-    def test_word_boundary(self):
-        """Test word boundary."""
-
-        pattern = bregex.compile_search(r'\<test')
-        self.assertEqual(
-            pattern.pattern,
-            r"\b(?=\w)test"
-        )
-        pattern = bregex.compile_search(r'test\>')
-        self.assertEqual(
-            pattern.pattern,
-            r"test\b(?<=\w)"
-        )
-        pattern = bregex.compile_search(r'[\<]test')
-        self.assertEqual(
-            pattern.pattern,
-            r"[\<]test"
-        )
-
     def test_cache(self):
         """Test cache."""