From c1b0d5e005431f5ce4fa6797f48639a8ccaa5042 Mon Sep 17 00:00:00 2001
From: Nik <git@mail.vaessen.tech>
Date: Fri, 14 Feb 2025 22:12:14 -0500
Subject: [PATCH] use ruff to format and lint (#117)

---
 .github/workflows/pythonpackage.yml | 13 ++---
 docs/gen_ref_pages.py               |  1 +
 justfile                            |  8 +++
 pyproject.toml                      |  9 +++-
 src/jiwer/__init__.py               | 81 +++++++++++++++++++++++++++--
 src/jiwer/alignment.py              | 19 +++----
 src/jiwer/cli.py                    |  3 +-
 src/jiwer/measures.py               |  9 ++--
 src/jiwer/process.py                | 17 +++---
 src/jiwer/transformations.py        |  6 +--
 src/jiwer/transforms.py             |  9 ++--
 tests/test_alignment.py             |  2 +
 tests/test_cer.py                   |  3 --
 tests/test_empty_ref.py             |  1 +
 tests/test_large_vocab.py           |  1 +
 tests/test_measures.py              | 16 +++---
 tests/test_transforms.py            | 19 ++++++-
 17 files changed, 160 insertions(+), 57 deletions(-)

diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml
index fb463ce..7d96829 100644
--- a/.github/workflows/pythonpackage.yml
+++ b/.github/workflows/pythonpackage.yml
@@ -34,17 +34,14 @@ jobs:
       run: |
         uv sync --all-extras --all-groups
 
-    - name: Lint with flake8
+    - name: Lint with ruff
       run: |
-        # stop the build if there are Python syntax errors or undefined names
-        uv run flake8 src/jiwer --count --select=E9,F63,F7,F82 --show-source --statistics
-        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-        uv run flake8 src/jiwer --count --exit-zero --max-complexity=10 --max-line-length=88 --statistics
+        uv run ruff check .
 
-    - name: Check formatting with black
+    - name: Check formatting with ruff
       run: |
-        uv run black . --check
-     
+        uv run ruff format . --diff
+
   build:
     runs-on: ubuntu-latest
     strategy:
diff --git a/docs/gen_ref_pages.py b/docs/gen_ref_pages.py
index 032573d..e6e8307 100644
--- a/docs/gen_ref_pages.py
+++ b/docs/gen_ref_pages.py
@@ -1,6 +1,7 @@
 """Generate the code reference pages and navigation."""
 
 from pathlib import Path
+
 import mkdocs_gen_files
 
 nav = mkdocs_gen_files.Nav()
diff --git a/justfile b/justfile
index eaa7001..19493d2 100644
--- a/justfile
+++ b/justfile
@@ -6,6 +6,14 @@ test-quick:
 test:
     uv run --group dev pytest
 
+lint:
+    uv run --group dev ruff check .
+    uv run --group dev ruff format . --diff
+
+format:
+    uv run --group dev ruff check --select I,RUF022 --fix .
+    uv run --group dev ruff format .
+
 serve-docs:
     uv run --group docs mkdocs serve
 
diff --git a/pyproject.toml b/pyproject.toml
index e947410..1714c20 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,8 +18,7 @@ jiwer = "jiwer.cli:cli"
 
 [dependency-groups]
 dev = [
-    "black>=24.8.0",
-    "flake8>=5.0.4",
+    "ruff>=0.9.6",
     "pytest>=8.3.4",
     "pytest-benchmark>=4.0.0",
 ]
@@ -34,3 +33,9 @@ docs = [
 [build-system]
 requires = ["hatchling"]
 build-backend = "hatchling.build"
+
+[tool.ruff.lint.isort]
+lines-between-types = 1
+
+[tool.ruff.lint]
+select = ["E4", "E7", "E9", "F", "B", "Q", "N", "I"]
diff --git a/src/jiwer/__init__.py b/src/jiwer/__init__.py
index b19f00d..d65e61f 100644
--- a/src/jiwer/__init__.py
+++ b/src/jiwer/__init__.py
@@ -1,7 +1,78 @@
-from .measures import *
-from .transforms import *
-from .transformations import *
-from .alignment import *
-from .process import *
+from .alignment import collect_error_counts, visualize_alignment, visualize_error_counts
+from .measures import cer, mer, wer, wil, wip
+from .process import (
+    AlignmentChunk,
+    CharacterOutput,
+    WordOutput,
+    process_characters,
+    process_words,
+)
+from .transformations import (
+    cer_contiguous,
+    cer_default,
+    wer_contiguous,
+    wer_default,
+    wer_standardize,
+    wer_standardize_contiguous,
+)
+from .transforms import (
+    AbstractTransform,
+    Compose,
+    ExpandCommonEnglishContractions,
+    ReduceToListOfListOfChars,
+    ReduceToListOfListOfWords,
+    ReduceToSingleSentence,
+    RemoveEmptyStrings,
+    RemoveKaldiNonWords,
+    RemoveMultipleSpaces,
+    RemovePunctuation,
+    RemoveSpecificWords,
+    RemoveWhiteSpace,
+    Strip,
+    SubstituteRegexes,
+    SubstituteWords,
+    ToLowerCase,
+    ToUpperCase,
+)
 
 name = "jiwer"
+
+__version__ = "4.0.0"
+__all__ = [
+    visualize_alignment,
+    visualize_error_counts,
+    collect_error_counts,
+    cer,
+    mer,
+    wer,
+    wil,
+    wip,
+    AlignmentChunk,
+    CharacterOutput,
+    WordOutput,
+    process_characters,
+    process_words,
+    AbstractTransform,
+    Compose,
+    ExpandCommonEnglishContractions,
+    ReduceToListOfListOfChars,
+    ReduceToListOfListOfWords,
+    ReduceToSingleSentence,
+    RemoveEmptyStrings,
+    RemoveKaldiNonWords,
+    RemoveMultipleSpaces,
+    RemovePunctuation,
+    RemoveSpecificWords,
+    RemoveWhiteSpace,
+    Strip,
+    SubstituteRegexes,
+    SubstituteWords,
+    ToLowerCase,
+    ToUpperCase,
+    cer_contiguous,
+    cer_default,
+    wer_contiguous,
+    wer_default,
+    wer_standardize,
+    wer_standardize_contiguous,
+]
diff --git a/src/jiwer/alignment.py b/src/jiwer/alignment.py
index 6d63cb3..cb970dc 100644
--- a/src/jiwer/alignment.py
+++ b/src/jiwer/alignment.py
@@ -20,12 +20,13 @@
 Utility method to visualize the alignment and errors between one or more reference
 and hypothesis pairs.
 """
+
 from collections import defaultdict
-from typing import List, Union, Optional
+from typing import List, Optional, Union
 
-from jiwer.process import CharacterOutput, WordOutput, AlignmentChunk
+from jiwer.process import AlignmentChunk, CharacterOutput, WordOutput
 
-__all__ = ["visualize_alignment", "collect_error_counts", "visualize_error_counts"]
+__all__ = ["collect_error_counts", "visualize_alignment", "visualize_error_counts"]
 
 
 def visualize_alignment(
@@ -131,7 +132,7 @@ def visualize_alignment(
         ):
             continue
 
-        final_str += f"=== SENTENCE {idx+1} ===\n\n"
+        final_str += f"=== SENTENCE {idx + 1} ===\n\n"
         final_str += _construct_comparison_string(
             gt, hp, chunks, include_space_seperator=not is_cer, line_width=line_width
         )
@@ -146,12 +147,12 @@ def visualize_alignment(
         final_str += f"hits={output.hits}\n"
 
         if is_cer:
-            final_str += f"\ncer={output.cer*100:.2f}%\n"
+            final_str += f"\ncer={output.cer * 100:.2f}%\n"
         else:
-            final_str += f"\nmer={output.mer*100:.2f}%"
-            final_str += f"\nwil={output.wil*100:.2f}%"
-            final_str += f"\nwip={output.wip*100:.2f}%"
-            final_str += f"\nwer={output.wer*100:.2f}%\n"
+            final_str += f"\nmer={output.mer * 100:.2f}%"
+            final_str += f"\nwil={output.wil * 100:.2f}%"
+            final_str += f"\nwip={output.wip * 100:.2f}%"
+            final_str += f"\nwer={output.wer * 100:.2f}%\n"
     else:
         # remove last newline
         final_str = final_str[:-1]
diff --git a/src/jiwer/cli.py b/src/jiwer/cli.py
index 3eff73a..688d02a 100644
--- a/src/jiwer/cli.py
+++ b/src/jiwer/cli.py
@@ -20,9 +20,10 @@
 Provide a simple CLI wrapper for JiWER. The CLI does not support custom transforms.
 """
 
-import click
 import pathlib
 
+import click
+
 import jiwer
 
 
diff --git a/src/jiwer/measures.py b/src/jiwer/measures.py
index 8a26b2c..bcca730 100644
--- a/src/jiwer/measures.py
+++ b/src/jiwer/measures.py
@@ -41,18 +41,19 @@
 [jiwer.CharacterOutput][process.CharacterOutput]
 classes.
 """
+
 from typing import List, Union
 
 from jiwer import transforms as tr
-from jiwer.transformations import wer_default, cer_default
-from jiwer.process import process_words, process_characters
+from jiwer.process import process_characters, process_words
+from jiwer.transformations import cer_default, wer_default
 
 __all__ = [
-    "wer",
+    "cer",
     "mer",
+    "wer",
     "wil",
     "wip",
-    "cer",
 ]
 
 ########################################################################################
diff --git a/src/jiwer/process.py b/src/jiwer/process.py
index ac378f2..0e6a8a7 100644
--- a/src/jiwer/process.py
+++ b/src/jiwer/process.py
@@ -21,22 +21,21 @@
 so that measures can be computed and an alignment can be visualized.
 """
 
-from dataclasses import dataclass
 from collections import defaultdict
+from dataclasses import dataclass
 from typing import Any, List, Union
 
 import rapidfuzz
 
 from jiwer import transforms as tr
-from jiwer.transformations import wer_default, cer_default
-
+from jiwer.transformations import cer_default, wer_default
 
 __all__ = [
     "AlignmentChunk",
-    "WordOutput",
     "CharacterOutput",
-    "process_words",
+    "WordOutput",
     "process_characters",
+    "process_words",
 ]
 
 
@@ -228,7 +227,7 @@ def process_words(
         alignments.append(sentence_op_chunks)
 
     # Compute all measures
-    S, D, I, H = num_substitutions, num_deletions, num_insertions, num_hits
+    subs, dels, ins, hits = num_substitutions, num_deletions, num_insertions, num_hits
 
     # special edge-case for empty references
     if num_rf_words == 0:
@@ -244,12 +243,12 @@ def process_words(
             wip = 0
 
     else:
-        wer = float(S + D + I) / float(H + S + D)
-        mer = float(S + D + I) / float(H + S + D + I)
+        wer = float(subs + dels + ins) / float(hits + subs + dels)
+        mer = float(subs + dels + ins) / float(hits + subs + dels + ins)
 
         # there is an edge-case when hypothesis is empty
         if num_hp_words >= 1:
-            wip = (float(H) / num_rf_words) * (float(H) / num_hp_words)
+            wip = (float(hits) / num_rf_words) * (float(hits) / num_hp_words)
         else:
             wip = 0
 
diff --git a/src/jiwer/transformations.py b/src/jiwer/transformations.py
index c35662e..c9c3ca4 100644
--- a/src/jiwer/transformations.py
+++ b/src/jiwer/transformations.py
@@ -26,12 +26,12 @@
 import jiwer.transforms as tr
 
 __all__ = [
-    "wer_default",
+    "cer_contiguous",
+    "cer_default",
     "wer_contiguous",
+    "wer_default",
     "wer_standardize",
     "wer_standardize_contiguous",
-    "cer_default",
-    "cer_contiguous",
 ]
 
 ########################################################################################
diff --git a/src/jiwer/transforms.py b/src/jiwer/transforms.py
index 3932a3a..0989c7b 100644
--- a/src/jiwer/transforms.py
+++ b/src/jiwer/transforms.py
@@ -33,23 +33,22 @@
 [transforms.ReduceToListOfListOfChars][].
 """
 
-import sys
 import functools
 import re
 import string
+import sys
 import unicodedata
 
-from typing import Iterable, Union, List, Mapping
-
+from typing import Iterable, List, Mapping, Union
 
 __all__ = [
     "AbstractTransform",
     "Compose",
     "ExpandCommonEnglishContractions",
-    "RemoveEmptyStrings",
-    "ReduceToListOfListOfWords",
     "ReduceToListOfListOfChars",
+    "ReduceToListOfListOfWords",
     "ReduceToSingleSentence",
+    "RemoveEmptyStrings",
     "RemoveKaldiNonWords",
     "RemoveMultipleSpaces",
     "RemovePunctuation",
diff --git a/tests/test_alignment.py b/tests/test_alignment.py
index 1754c75..82d2b88 100644
--- a/tests/test_alignment.py
+++ b/tests/test_alignment.py
@@ -1,5 +1,7 @@
 import unittest
+
 import jiwer
+
 from jiwer import visualize_alignment
 
 
diff --git a/tests/test_cer.py b/tests/test_cer.py
index a1bb544..27cf287 100644
--- a/tests/test_cer.py
+++ b/tests/test_cer.py
@@ -1,10 +1,7 @@
 import unittest
-import pytest
 
 import jiwer
 
-from .test_measures import assert_dict_almost_equal
-
 
 class TestCERInputMethods(unittest.TestCase):
     def test_input_ref_string_hyp_string(self):
diff --git a/tests/test_empty_ref.py b/tests/test_empty_ref.py
index 278a966..353c5b0 100644
--- a/tests/test_empty_ref.py
+++ b/tests/test_empty_ref.py
@@ -1,4 +1,5 @@
 import pytest
+
 import jiwer
 
 
diff --git a/tests/test_large_vocab.py b/tests/test_large_vocab.py
index f6b42b4..9df16bc 100644
--- a/tests/test_large_vocab.py
+++ b/tests/test_large_vocab.py
@@ -1,4 +1,5 @@
 import pytest
+
 from jiwer import process_words, wer
 
 
diff --git a/tests/test_measures.py b/tests/test_measures.py
index 441249f..5a39ec1 100644
--- a/tests/test_measures.py
+++ b/tests/test_measures.py
@@ -1,5 +1,6 @@
+import functools
 import unittest
-import pytest
+
 import jiwer
 
 
@@ -217,11 +218,14 @@ def test_fail_on_different_sentence_length(self):
             jiwer.wip,
             jiwer.mer,
         ]:
-
-            def callback():
-                method(["hello", "this", "sentence", "is fractured"], ["this sentence"])
-
-            self.assertRaises(ValueError, callback)
+            self.assertRaises(
+                ValueError,
+                functools.partial(
+                    method,
+                    ["hello", "this", "sentence", "is fractured"],
+                    ["this sentence"],
+                ),
+            )
 
     def test_known_values(self):
         # Taken from the "From WER and RIL to MER and WIL" paper, for link see README.md
diff --git a/tests/test_transforms.py b/tests/test_transforms.py
index 62be954..62a6720 100644
--- a/tests/test_transforms.py
+++ b/tests/test_transforms.py
@@ -1,7 +1,22 @@
 import unittest
 
-from jiwer.transforms import *
-from jiwer.transforms import ReduceToListOfListOfChars
+from jiwer.transforms import (
+    ExpandCommonEnglishContractions,
+    ReduceToListOfListOfChars,
+    ReduceToListOfListOfWords,
+    ReduceToSingleSentence,
+    RemoveEmptyStrings,
+    RemoveKaldiNonWords,
+    RemoveMultipleSpaces,
+    RemovePunctuation,
+    RemoveSpecificWords,
+    RemoveWhiteSpace,
+    Strip,
+    SubstituteRegexes,
+    SubstituteWords,
+    ToLowerCase,
+    ToUpperCase,
+)
 
 
 def _apply_test_on(self: unittest.TestCase, tr, cases):