From b609500b3cb28d79ed0fb72f0b47c9d7e885ef87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20=C4=8Ciha=C5=99?= Date: Wed, 15 Jan 2025 12:40:56 +0100 Subject: [PATCH] feat(checks): make punctuation spacing ignore markup Strip markup before checking for punctuation spacing. Fixes #6470 --- docs/changes.rst | 1 + weblate/checks/chars.py | 3 +++ weblate/checks/same.py | 7 ++++--- weblate/checks/tests/test_chars_checks.py | 20 ++++++++++++++++++++ 4 files changed, 28 insertions(+), 3 deletions(-) diff --git a/docs/changes.rst b/docs/changes.rst index 9d7d94e931e8..9e78d5f0c895 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -10,6 +10,7 @@ Not yet released. * Improved error handling in :ref:`machine-translation-setup`. * :envvar:`WEBLATE_REGISTRATION_CAPTCHA` is now available in Docker container. * :guilabel:`Synchronize` on shared repository now operates on all its components. +* :ref:`check-punctuation-spacing` ignores markup such as Markdown or reStructuredText. **Bug fixes** diff --git a/weblate/checks/chars.py b/weblate/checks/chars.py index 2f566afafa8c..dbf75de18f88 100644 --- a/weblate/checks/chars.py +++ b/weblate/checks/chars.py @@ -13,6 +13,7 @@ from weblate.checks.base import CountingCheck, TargetCheck, TargetCheckParametrized from weblate.checks.markup import strip_entities from weblate.checks.parser import single_value_flag +from weblate.checks.same import strip_format if TYPE_CHECKING: from collections.abc import Iterable @@ -470,6 +471,8 @@ def check_single(self, source: str, target: str, unit: Unit) -> bool: ): return False + # Remove possible markup + target = strip_format(target, unit.all_flags) # Remove XML/HTML entities to simplify parsing target = strip_entities(target) diff --git a/weblate/checks/same.py b/weblate/checks/same.py index a740feddf349..969c41e9a3a9 100644 --- a/weblate/checks/same.py +++ b/weblate/checks/same.py @@ -18,6 +18,7 @@ from weblate.checks.ruby import RUBY_FORMAT_MATCH if TYPE_CHECKING: + from weblate.checks.flags import Flags from weblate.trans.models import Unit # Email address to ignore @@ -60,7 +61,7 @@ DB_TAGS = ("screen", "indexterm", "programlisting") -def strip_format(msg, flags): +def strip_format(msg: str, flags: Flags) -> str: """ Remove format strings from the strings. @@ -85,7 +86,7 @@ def strip_format(msg, flags): return regex.sub("", msg) -def strip_string(msg): +def strip_string(msg: str) -> str: """Strip (usually) untranslated parts from the string.""" # Strip HTML markup stripped = strip_tags(msg) @@ -122,7 +123,7 @@ def test_word(word, extra_ignore): ) -def strip_placeholders(msg, unit: Unit): +def strip_placeholders(msg: str, unit: Unit) -> str: return re.sub( "|".join( re.escape(param) if isinstance(param, str) else param.pattern diff --git a/weblate/checks/tests/test_chars_checks.py b/weblate/checks/tests/test_chars_checks.py index 17788accdc12..dff05daa8135 100644 --- a/weblate/checks/tests/test_chars_checks.py +++ b/weblate/checks/tests/test_chars_checks.py @@ -457,3 +457,23 @@ def test_markdown(self) -> None: ), "fr", ) + + def test_restructured_text(self) -> None: + self.do_test( + True, + ( + ":ref:`document` here", + ":ref:`document` tam", + "", + ), + "fr", + ) + self.do_test( + False, + ( + ":ref:`document` here", + ":ref:`document` tam", + "rst-text", + ), + "fr", + )