From 9edba85fc14f034b7109534220702bf60178ff15 Mon Sep 17 00:00:00 2001
From: Waylan Limberg
Date: Fri, 8 Mar 2024 09:44:38 -0500
Subject: [PATCH] Refactor abbr escaping
A alternate fix to #1444. This does not exclude the use of carrots or square
brackets in abbreviations. It still excludes backslashse, however. I played
with backslashes and it just doesn't make sense to support them as they
have special meaning in the Markdown, not because of their use in regular
expressions.
---
docs/changelog.md | 3 +-
docs/extensions/abbreviations.md | 10 ++---
markdown/extensions/abbr.py | 14 ++----
tests/test_syntax/extensions/test_abbr.py | 52 +++++++++++++++++------
4 files changed, 46 insertions(+), 33 deletions(-)
diff --git a/docs/changelog.md b/docs/changelog.md
index a71dbf25..9c2b302e 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -34,7 +34,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
* Include `scripts/*.py` in the generated source tarballs (#1430).
* Ensure lines after heading in loose list are properly detabbed (#1443).
* Give smarty tree processor higher priority than toc (#1440).
-* Explicitly omit carrot (`^`) and backslash (`\`) from abbreviations (#1444).
+* Permit carrots (`^`) and square brackets (`]`) but explicitly exclude
+ backslashes (`\`) from abbreviations (#1444).
## [3.5.2] -- 2024-01-10
diff --git a/docs/extensions/abbreviations.md b/docs/extensions/abbreviations.md
index 9a98a91b..8a35e526 100644
--- a/docs/extensions/abbreviations.md
+++ b/docs/extensions/abbreviations.md
@@ -36,13 +36,9 @@ will be rendered as:
is maintained by the W3C.
```
-The following three characters are not permitted in an abbreviation. Any
-abbreviation definitions which include one will not be recognized as an
-abbreviation definition.
-
-1. carrot (`^`)
-2. backslash (`\`)
-3. left square bracket (`]`)
+The backslash (`\`) is not permitted in an abbreviation. Any abbreviation
+definitions which include one or more backslashes between the square brackets
+will not be recognized as an abbreviation definition.
Usage
-----
diff --git a/markdown/extensions/abbr.py b/markdown/extensions/abbr.py
index 46d3f35c..1c7185b2 100644
--- a/markdown/extensions/abbr.py
+++ b/markdown/extensions/abbr.py
@@ -41,7 +41,7 @@ def extendMarkdown(self, md):
class AbbrPreprocessor(BlockProcessor):
""" Abbreviation Preprocessor - parse text for abbr references. """
- RE = re.compile(r'^[*]\[(?P[^\]\^\\]*)\][ ]?:[ ]*\n?[ ]*(?P.*)$', re.MULTILINE)
+ RE = re.compile(r'^[*]\[(?P[^\\]*?)\][ ]?:[ ]*\n?[ ]*(?P.*)$', re.MULTILINE)
def test(self, parent: etree.Element, block: str) -> bool:
return True
@@ -72,16 +72,8 @@ def run(self, parent: etree.Element, blocks: list[str]) -> bool:
return False
def _generate_pattern(self, text: str) -> str:
- """
- Given a string, returns a regex pattern to match that string.
-
- 'HTML' -> r'(?P\b[H][T][M][L]\b)'
-
- Note: we force each char as a literal match via a character set (in brackets)
- as we don't know what they will be beforehand.
-
- """
- return f"(?P\\b{ ''.join(f'[{ c }]' for c in text) }\\b)"
+ """ Given a string, returns a regex pattern to match that string. """
+ return f"(?P\\b{ re.escape(text) }\\b)"
class AbbrInlineProcessor(InlineProcessor):
diff --git a/tests/test_syntax/extensions/test_abbr.py b/tests/test_syntax/extensions/test_abbr.py
index 708af51b..e11e8d30 100644
--- a/tests/test_syntax/extensions/test_abbr.py
+++ b/tests/test_syntax/extensions/test_abbr.py
@@ -24,6 +24,7 @@
class TestAbbr(TestCase):
+ maxDiff = None
default_kwargs = {'extensions': ['abbr']}
@@ -260,28 +261,19 @@ def test_abbr_single_quoted(self):
)
)
- def test_abbr_ignore_special_chars(self):
+ def test_abbr_ignore_backslash(self):
self.assertMarkdownRenders(
self.dedent(
r"""
- [^] [\\] [\]] []]
+ \\foo
- *[^]: Not an abbreviation
-
- *[\\]: Not an abbreviation
-
- *[\]]: Not an abbreviation
-
- *[]]: Not an abbreviation
+ *[\\foo]: Not an abbreviation
"""
),
self.dedent(
r"""
- [^] [\] []] []]
- *[^]: Not an abbreviation
- *[\]: Not an abbreviation
- *[]]: Not an abbreviation
- *[]]: Not an abbreviation
+ \foo
+ *[\foo]: Not an abbreviation
"""
)
)
@@ -301,3 +293,35 @@ def test_abbr_hyphen(self):
"""
)
)
+
+ def test_abbr_carrot(self):
+ self.assertMarkdownRenders(
+ self.dedent(
+ """
+ ABBR^abbr
+
+ *[ABBR^abbr]: Abbreviation
+ """
+ ),
+ self.dedent(
+ """
+ ABBR^abbr
+ """
+ )
+ )
+
+ def test_abbr_bracket(self):
+ self.assertMarkdownRenders(
+ self.dedent(
+ """
+ ABBR]abbr
+
+ *[ABBR]abbr]: Abbreviation
+ """
+ ),
+ self.dedent(
+ """
+ ABBR]abbr
+ """
+ )
+ )