From b609500b3cb28d79ed0fb72f0b47c9d7e885ef87 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michal=20=C4=8Ciha=C5=99?= <michal@cihar.com>
Date: Wed, 15 Jan 2025 12:40:56 +0100
Subject: [PATCH] feat(checks): make punctuation spacing ignore markup

Strip markup before checking for punctuation spacing.

Fixes #6470
---
 docs/changes.rst                          |  1 +
 weblate/checks/chars.py                   |  3 +++
 weblate/checks/same.py                    |  7 ++++---
 weblate/checks/tests/test_chars_checks.py | 20 ++++++++++++++++++++
 4 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/docs/changes.rst b/docs/changes.rst
index 9d7d94e931e8..9e78d5f0c895 100644
--- a/docs/changes.rst
+++ b/docs/changes.rst
@@ -10,6 +10,7 @@ Not yet released.
 * Improved error handling in :ref:`machine-translation-setup`.
 * :envvar:`WEBLATE_REGISTRATION_CAPTCHA` is now available in Docker container.
 * :guilabel:`Synchronize` on shared repository now operates on all its components.
+* :ref:`check-punctuation-spacing` ignores markup such as Markdown or reStructuredText.
 
 **Bug fixes**
 
diff --git a/weblate/checks/chars.py b/weblate/checks/chars.py
index 2f566afafa8c..dbf75de18f88 100644
--- a/weblate/checks/chars.py
+++ b/weblate/checks/chars.py
@@ -13,6 +13,7 @@
 from weblate.checks.base import CountingCheck, TargetCheck, TargetCheckParametrized
 from weblate.checks.markup import strip_entities
 from weblate.checks.parser import single_value_flag
+from weblate.checks.same import strip_format
 
 if TYPE_CHECKING:
     from collections.abc import Iterable
@@ -470,6 +471,8 @@ def check_single(self, source: str, target: str, unit: Unit) -> bool:
         ):
             return False
 
+        # Remove possible markup
+        target = strip_format(target, unit.all_flags)
         # Remove XML/HTML entities to simplify parsing
         target = strip_entities(target)
 
diff --git a/weblate/checks/same.py b/weblate/checks/same.py
index a740feddf349..969c41e9a3a9 100644
--- a/weblate/checks/same.py
+++ b/weblate/checks/same.py
@@ -18,6 +18,7 @@
 from weblate.checks.ruby import RUBY_FORMAT_MATCH
 
 if TYPE_CHECKING:
+    from weblate.checks.flags import Flags
     from weblate.trans.models import Unit
 
 # Email address to ignore
@@ -60,7 +61,7 @@
 DB_TAGS = ("screen", "indexterm", "programlisting")
 
 
-def strip_format(msg, flags):
+def strip_format(msg: str, flags: Flags) -> str:
     """
     Remove format strings from the strings.
 
@@ -85,7 +86,7 @@ def strip_format(msg, flags):
     return regex.sub("", msg)
 
 
-def strip_string(msg):
+def strip_string(msg: str) -> str:
     """Strip (usually) untranslated parts from the string."""
     # Strip HTML markup
     stripped = strip_tags(msg)
@@ -122,7 +123,7 @@ def test_word(word, extra_ignore):
     )
 
 
-def strip_placeholders(msg, unit: Unit):
+def strip_placeholders(msg: str, unit: Unit) -> str:
     return re.sub(
         "|".join(
             re.escape(param) if isinstance(param, str) else param.pattern
diff --git a/weblate/checks/tests/test_chars_checks.py b/weblate/checks/tests/test_chars_checks.py
index 17788accdc12..dff05daa8135 100644
--- a/weblate/checks/tests/test_chars_checks.py
+++ b/weblate/checks/tests/test_chars_checks.py
@@ -457,3 +457,23 @@ def test_markdown(self) -> None:
             ),
             "fr",
         )
+
+    def test_restructured_text(self) -> None:
+        self.do_test(
+            True,
+            (
+                ":ref:`document` here",
+                ":ref:`document` tam",
+                "",
+            ),
+            "fr",
+        )
+        self.do_test(
+            False,
+            (
+                ":ref:`document` here",
+                ":ref:`document` tam",
+                "rst-text",
+            ),
+            "fr",
+        )