Skip to content

Commit

Permalink
Use \m and \M instead of \< and \> (#56)
Browse files Browse the repository at this point in the history
Ref #55
  • Loading branch information
facelessuser authored Feb 11, 2018
1 parent 85c034e commit 39e6741
Show file tree
Hide file tree
Showing 8 changed files with 27 additions and 44 deletions.
2 changes: 1 addition & 1 deletion backrefs/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Backrefs package."""

# (major, minor, micro, release type, pre-release build, post-release build)
version_info = (3, 0, 5, 'final', 0, 0)
version_info = (3, 1, 0, 'final', 0, 0)


def _version():
Expand Down
6 changes: 3 additions & 3 deletions backrefs/_bre_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class GlobalRetryException(Exception):
class _SearchParser(object):
"""Search Template."""

_new_refs = ("e", "l", "L", "c", "C", "p", "P", "N", "Q", "E", "<", ">")
_new_refs = ("e", "l", "L", "c", "C", "p", "P", "N", "Q", "E", "m", "M")
_re_escape = r"\x1b"
_re_start_wb = r"\b(?=\w)"
_re_end_wb = r"\b(?<=\w)"
Expand Down Expand Up @@ -265,9 +265,9 @@ def reference(self, t, i, in_group=False):

current = []

if not in_group and t == "<":
if not in_group and t == "m":
current.append(self._re_start_wb)
elif not in_group and t == ">":
elif not in_group and t == "M":
current.append(self._re_end_wb)
elif t == "e":
current.append(self._re_escape)
Expand Down
10 changes: 2 additions & 8 deletions backrefs/_bregex_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,8 @@ class GlobalRetryException(Exception):
class _SearchParser(object):
"""Search Template."""

_new_refs = ("e", "R", "Q", "E", "<", ">")
_new_refs = ("e", "R", "Q", "E")
_re_escape = r"\x1b"
_re_start_wb = r"\b(?=\w)"
_re_end_wb = r"\b(?<=\w)"
_line_break = r'(?>\r\n|\n|\x0b|\f|\r|\x85|\u2028|\u2029)'
_binary_line_break = r'(?>\r\n|\n|\x0b|\f|\r|\x85)'

Expand Down Expand Up @@ -165,11 +163,7 @@ def reference(self, t, i, in_group=False):

current = []

if not in_group and t == "<":
current.append(self._re_start_wb)
elif not in_group and t == ">":
current.append(self._re_end_wb)
elif not in_group and t == "R":
if not in_group and t == "R":
current.append(self._re_line_break)
elif t == 'e':
current.extend(self._re_escape)
Expand Down
2 changes: 0 additions & 2 deletions backrefs/bregex.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
- `\u0000` and `\U00000000` - Unicode characters (replace)
- `\R` - Generic line breaks (search)
- `\e` - Escape character (search)
- `\<` - Starting word boundary (search)
- `\>` - Ending word boundary (search)
Licensed under MIT
Copyright (c) 2015 - 2018 Isaac Muse <[email protected]>
Expand Down
5 changes: 5 additions & 0 deletions docs/src/markdown/changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Changelog

## 3.1.0

- **NEW**: Start and end word boundary back references are now specified with `\m` and `\M` like Regex does. `\<` and `\>` have been removed from Regex.
- **FIX**: Escaped `\<` and `\>` are no longer processed as Re is known to escape these in versions less than Python 3.7.

## 3.0.5

Feb 9, 2018
Expand Down
8 changes: 4 additions & 4 deletions docs/src/markdown/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -225,23 +225,23 @@ Back\ References | Description
`\PX` | Inverse Unicode property character class where `X` is the uppercase letter that represents the General Category property. For instance, `\PL` would be equivalent to `\P{L}` or `\P{Letter}`.
`[[:alnum:]]` | Though not really a back reference, support for POSIX style character classes is available. See [POSIX Style Properties](#posix-style-properties) for more info.
`\N{UnicodeName}` | Named characters are are normally ignored in Re, but Backrefs adds support for them.
`\<` | Start word boundary. Translates to `\b(?=\w)`.
`\>` | End word boundary. Translates to `\b(?<=\w)`.
`\m` | Start word boundary. Translates to `\b(?=\w)`.
`\M` | End word boundary. Translates to `\b(?<=\w)`.

### Regex

!!! note
Regex already natively supports `\p{...}`, `\P{...}`, `\pX`, `\PX`, and `\N{...}`, so Backrefs does not attempt to add this to search patterns.

`\m` and `\M` are also features already present in Regex.

`\c`, `\l`, `L` and `L` are not used as some of these flags are already taken by Regex itself These references are just shortcuts for the related POSIX properties in Backrefs.

Back\ References | Description
---------------- | -----------
`\e` | Escape character `\x1b`.
`\Q...\E` | Quotes (escapes) text for regular expression. `\E` signifies the end of the quoting. Affects any and all characters no matter where in the regular expression pattern it is placed.
`\R` | Generic line breaks. When searching a Unicode string, this will use an atomic group and match `(?>\r\n|\n|\x0b|\f|\r|\x85|\u2028|\u2029)`, and when applied to byte strings, this will match `(?>\r\n|\n|\x0b|\f|\r|\x85)`. Because it uses atomic groups, which Re does not support, this feature is only for Regex.
`\<` | Start word boundary. Translates to `\b(?=\w)`.
`\>` | End word boundary. Translates to `\b(?<=\w)`.

## Replace Back References

Expand Down
19 changes: 12 additions & 7 deletions tests/test_bre.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,21 +139,26 @@ def test_named_unicode_failures(self):
def test_word_boundary(self):
"""Test word boundary."""

pattern = bre.compile_search(r'\<test')
pattern = bre.compile_search(r'\mtest')
self.assertEqual(
pattern.pattern,
r"\b(?=\w)test"
)
pattern = bre.compile_search(r'test\>')
pattern = bre.compile_search(r'test\M')
self.assertEqual(
pattern.pattern,
r"test\b(?<=\w)"
)
pattern = bre.compile_search(r'[\<]test')
self.assertEqual(
pattern.pattern,
r"[\<]test"
)

if PY36_PLUS:
with pytest.raises(sre_constants.error):
bre.compile_search(r'[\m]test')
else:
pattern = bre.compile_search(r'[\m]test')
self.assertEqual(
pattern.pattern,
r"[\m]test"
)

def test_cache(self):
"""Test cache."""
Expand Down
19 changes: 0 additions & 19 deletions tests/test_bregex.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,25 +93,6 @@ def test_posix_property_bad_syntax(self):
self.assertTrue(bregex.compile(r'[[:a]', regex.V0).match('a') is not None)
self.assertTrue(bregex.compile(r'[[:graph:a]', regex.V0).match('a') is not None)

def test_word_boundary(self):
"""Test word boundary."""

pattern = bregex.compile_search(r'\<test')
self.assertEqual(
pattern.pattern,
r"\b(?=\w)test"
)
pattern = bregex.compile_search(r'test\>')
self.assertEqual(
pattern.pattern,
r"test\b(?<=\w)"
)
pattern = bregex.compile_search(r'[\<]test')
self.assertEqual(
pattern.pattern,
r"[\<]test"
)

def test_cache(self):
"""Test cache."""

Expand Down

0 comments on commit 39e6741

Please sign in to comment.