Skip to content

Commit

Permalink
feat(checks): make punctuation spacing ignore markup
Browse files Browse the repository at this point in the history
Strip markup before checking for punctuation spacing.

Fixes #6470
  • Loading branch information
nijel committed Jan 15, 2025
1 parent ecdec31 commit b609500
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 3 deletions.
1 change: 1 addition & 0 deletions docs/changes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ Not yet released.
* Improved error handling in :ref:`machine-translation-setup`.
* :envvar:`WEBLATE_REGISTRATION_CAPTCHA` is now available in Docker container.
* :guilabel:`Synchronize` on shared repository now operates on all its components.
* :ref:`check-punctuation-spacing` ignores markup such as Markdown or reStructuredText.

**Bug fixes**

Expand Down
3 changes: 3 additions & 0 deletions weblate/checks/chars.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from weblate.checks.base import CountingCheck, TargetCheck, TargetCheckParametrized
from weblate.checks.markup import strip_entities
from weblate.checks.parser import single_value_flag
from weblate.checks.same import strip_format

if TYPE_CHECKING:
from collections.abc import Iterable
Expand Down Expand Up @@ -470,6 +471,8 @@ def check_single(self, source: str, target: str, unit: Unit) -> bool:
):
return False

# Remove possible markup
target = strip_format(target, unit.all_flags)
# Remove XML/HTML entities to simplify parsing
target = strip_entities(target)

Expand Down
7 changes: 4 additions & 3 deletions weblate/checks/same.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from weblate.checks.ruby import RUBY_FORMAT_MATCH

if TYPE_CHECKING:
from weblate.checks.flags import Flags
from weblate.trans.models import Unit

# Email address to ignore
Expand Down Expand Up @@ -60,7 +61,7 @@
DB_TAGS = ("screen", "indexterm", "programlisting")


def strip_format(msg, flags):
def strip_format(msg: str, flags: Flags) -> str:
"""
Remove format strings from the strings.
Expand All @@ -85,7 +86,7 @@ def strip_format(msg, flags):
return regex.sub("", msg)


def strip_string(msg):
def strip_string(msg: str) -> str:
"""Strip (usually) untranslated parts from the string."""
# Strip HTML markup
stripped = strip_tags(msg)
Expand Down Expand Up @@ -122,7 +123,7 @@ def test_word(word, extra_ignore):
)


def strip_placeholders(msg, unit: Unit):
def strip_placeholders(msg: str, unit: Unit) -> str:
return re.sub(
"|".join(
re.escape(param) if isinstance(param, str) else param.pattern
Expand Down
20 changes: 20 additions & 0 deletions weblate/checks/tests/test_chars_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,3 +457,23 @@ def test_markdown(self) -> None:
),
"fr",
)

def test_restructured_text(self) -> None:
self.do_test(
True,
(
":ref:`document` here",
":ref:`document` tam",
"",
),
"fr",
)
self.do_test(
False,
(
":ref:`document` here",
":ref:`document` tam",
"rst-text",
),
"fr",
)

0 comments on commit b609500

Please sign in to comment.