From c1b0d5e005431f5ce4fa6797f48639a8ccaa5042 Mon Sep 17 00:00:00 2001 From: Nik Date: Fri, 14 Feb 2025 22:12:14 -0500 Subject: [PATCH] use ruff to format and lint (#117) --- .github/workflows/pythonpackage.yml | 13 ++--- docs/gen_ref_pages.py | 1 + justfile | 8 +++ pyproject.toml | 9 +++- src/jiwer/__init__.py | 81 +++++++++++++++++++++++++++-- src/jiwer/alignment.py | 19 +++---- src/jiwer/cli.py | 3 +- src/jiwer/measures.py | 9 ++-- src/jiwer/process.py | 17 +++--- src/jiwer/transformations.py | 6 +-- src/jiwer/transforms.py | 9 ++-- tests/test_alignment.py | 2 + tests/test_cer.py | 3 -- tests/test_empty_ref.py | 1 + tests/test_large_vocab.py | 1 + tests/test_measures.py | 16 +++--- tests/test_transforms.py | 19 ++++++- 17 files changed, 160 insertions(+), 57 deletions(-) diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index fb463ce..7d96829 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -34,17 +34,14 @@ jobs: run: | uv sync --all-extras --all-groups - - name: Lint with flake8 + - name: Lint with ruff run: | - # stop the build if there are Python syntax errors or undefined names - uv run flake8 src/jiwer --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - uv run flake8 src/jiwer --count --exit-zero --max-complexity=10 --max-line-length=88 --statistics + uv run ruff check . - - name: Check formatting with black + - name: Check formatting with ruff run: | - uv run black . --check - + uv run ruff format . --diff + build: runs-on: ubuntu-latest strategy: diff --git a/docs/gen_ref_pages.py b/docs/gen_ref_pages.py index 032573d..e6e8307 100644 --- a/docs/gen_ref_pages.py +++ b/docs/gen_ref_pages.py @@ -1,6 +1,7 @@ """Generate the code reference pages and navigation.""" from pathlib import Path + import mkdocs_gen_files nav = mkdocs_gen_files.Nav() diff --git a/justfile b/justfile index eaa7001..19493d2 100644 --- a/justfile +++ b/justfile @@ -6,6 +6,14 @@ test-quick: test: uv run --group dev pytest +lint: + uv run --group dev ruff check . + uv run --group dev ruff format . --diff + +format: + uv run --group dev ruff check --select I,RUF022 --fix . + uv run --group dev ruff format . + serve-docs: uv run --group docs mkdocs serve diff --git a/pyproject.toml b/pyproject.toml index e947410..1714c20 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,8 +18,7 @@ jiwer = "jiwer.cli:cli" [dependency-groups] dev = [ - "black>=24.8.0", - "flake8>=5.0.4", + "ruff>=0.9.6", "pytest>=8.3.4", "pytest-benchmark>=4.0.0", ] @@ -34,3 +33,9 @@ docs = [ [build-system] requires = ["hatchling"] build-backend = "hatchling.build" + +[tool.ruff.lint.isort] +lines-between-types = 1 + +[tool.ruff.lint] +select = ["E4", "E7", "E9", "F", "B", "Q", "N", "I"] diff --git a/src/jiwer/__init__.py b/src/jiwer/__init__.py index b19f00d..d65e61f 100644 --- a/src/jiwer/__init__.py +++ b/src/jiwer/__init__.py @@ -1,7 +1,78 @@ -from .measures import * -from .transforms import * -from .transformations import * -from .alignment import * -from .process import * +from .alignment import collect_error_counts, visualize_alignment, visualize_error_counts +from .measures import cer, mer, wer, wil, wip +from .process import ( + AlignmentChunk, + CharacterOutput, + WordOutput, + process_characters, + process_words, +) +from .transformations import ( + cer_contiguous, + cer_default, + wer_contiguous, + wer_default, + wer_standardize, + wer_standardize_contiguous, +) +from .transforms import ( + AbstractTransform, + Compose, + ExpandCommonEnglishContractions, + ReduceToListOfListOfChars, + ReduceToListOfListOfWords, + ReduceToSingleSentence, + RemoveEmptyStrings, + RemoveKaldiNonWords, + RemoveMultipleSpaces, + RemovePunctuation, + RemoveSpecificWords, + RemoveWhiteSpace, + Strip, + SubstituteRegexes, + SubstituteWords, + ToLowerCase, + ToUpperCase, +) name = "jiwer" + +__version__ = "4.0.0" +__all__ = [ + visualize_alignment, + visualize_error_counts, + collect_error_counts, + cer, + mer, + wer, + wil, + wip, + AlignmentChunk, + CharacterOutput, + WordOutput, + process_characters, + process_words, + AbstractTransform, + Compose, + ExpandCommonEnglishContractions, + ReduceToListOfListOfChars, + ReduceToListOfListOfWords, + ReduceToSingleSentence, + RemoveEmptyStrings, + RemoveKaldiNonWords, + RemoveMultipleSpaces, + RemovePunctuation, + RemoveSpecificWords, + RemoveWhiteSpace, + Strip, + SubstituteRegexes, + SubstituteWords, + ToLowerCase, + ToUpperCase, + cer_contiguous, + cer_default, + wer_contiguous, + wer_default, + wer_standardize, + wer_standardize_contiguous, +] diff --git a/src/jiwer/alignment.py b/src/jiwer/alignment.py index 6d63cb3..cb970dc 100644 --- a/src/jiwer/alignment.py +++ b/src/jiwer/alignment.py @@ -20,12 +20,13 @@ Utility method to visualize the alignment and errors between one or more reference and hypothesis pairs. """ + from collections import defaultdict -from typing import List, Union, Optional +from typing import List, Optional, Union -from jiwer.process import CharacterOutput, WordOutput, AlignmentChunk +from jiwer.process import AlignmentChunk, CharacterOutput, WordOutput -__all__ = ["visualize_alignment", "collect_error_counts", "visualize_error_counts"] +__all__ = ["collect_error_counts", "visualize_alignment", "visualize_error_counts"] def visualize_alignment( @@ -131,7 +132,7 @@ def visualize_alignment( ): continue - final_str += f"=== SENTENCE {idx+1} ===\n\n" + final_str += f"=== SENTENCE {idx + 1} ===\n\n" final_str += _construct_comparison_string( gt, hp, chunks, include_space_seperator=not is_cer, line_width=line_width ) @@ -146,12 +147,12 @@ def visualize_alignment( final_str += f"hits={output.hits}\n" if is_cer: - final_str += f"\ncer={output.cer*100:.2f}%\n" + final_str += f"\ncer={output.cer * 100:.2f}%\n" else: - final_str += f"\nmer={output.mer*100:.2f}%" - final_str += f"\nwil={output.wil*100:.2f}%" - final_str += f"\nwip={output.wip*100:.2f}%" - final_str += f"\nwer={output.wer*100:.2f}%\n" + final_str += f"\nmer={output.mer * 100:.2f}%" + final_str += f"\nwil={output.wil * 100:.2f}%" + final_str += f"\nwip={output.wip * 100:.2f}%" + final_str += f"\nwer={output.wer * 100:.2f}%\n" else: # remove last newline final_str = final_str[:-1] diff --git a/src/jiwer/cli.py b/src/jiwer/cli.py index 3eff73a..688d02a 100644 --- a/src/jiwer/cli.py +++ b/src/jiwer/cli.py @@ -20,9 +20,10 @@ Provide a simple CLI wrapper for JiWER. The CLI does not support custom transforms. """ -import click import pathlib +import click + import jiwer diff --git a/src/jiwer/measures.py b/src/jiwer/measures.py index 8a26b2c..bcca730 100644 --- a/src/jiwer/measures.py +++ b/src/jiwer/measures.py @@ -41,18 +41,19 @@ [jiwer.CharacterOutput][process.CharacterOutput] classes. """ + from typing import List, Union from jiwer import transforms as tr -from jiwer.transformations import wer_default, cer_default -from jiwer.process import process_words, process_characters +from jiwer.process import process_characters, process_words +from jiwer.transformations import cer_default, wer_default __all__ = [ - "wer", + "cer", "mer", + "wer", "wil", "wip", - "cer", ] ######################################################################################## diff --git a/src/jiwer/process.py b/src/jiwer/process.py index ac378f2..0e6a8a7 100644 --- a/src/jiwer/process.py +++ b/src/jiwer/process.py @@ -21,22 +21,21 @@ so that measures can be computed and an alignment can be visualized. """ -from dataclasses import dataclass from collections import defaultdict +from dataclasses import dataclass from typing import Any, List, Union import rapidfuzz from jiwer import transforms as tr -from jiwer.transformations import wer_default, cer_default - +from jiwer.transformations import cer_default, wer_default __all__ = [ "AlignmentChunk", - "WordOutput", "CharacterOutput", - "process_words", + "WordOutput", "process_characters", + "process_words", ] @@ -228,7 +227,7 @@ def process_words( alignments.append(sentence_op_chunks) # Compute all measures - S, D, I, H = num_substitutions, num_deletions, num_insertions, num_hits + subs, dels, ins, hits = num_substitutions, num_deletions, num_insertions, num_hits # special edge-case for empty references if num_rf_words == 0: @@ -244,12 +243,12 @@ def process_words( wip = 0 else: - wer = float(S + D + I) / float(H + S + D) - mer = float(S + D + I) / float(H + S + D + I) + wer = float(subs + dels + ins) / float(hits + subs + dels) + mer = float(subs + dels + ins) / float(hits + subs + dels + ins) # there is an edge-case when hypothesis is empty if num_hp_words >= 1: - wip = (float(H) / num_rf_words) * (float(H) / num_hp_words) + wip = (float(hits) / num_rf_words) * (float(hits) / num_hp_words) else: wip = 0 diff --git a/src/jiwer/transformations.py b/src/jiwer/transformations.py index c35662e..c9c3ca4 100644 --- a/src/jiwer/transformations.py +++ b/src/jiwer/transformations.py @@ -26,12 +26,12 @@ import jiwer.transforms as tr __all__ = [ - "wer_default", + "cer_contiguous", + "cer_default", "wer_contiguous", + "wer_default", "wer_standardize", "wer_standardize_contiguous", - "cer_default", - "cer_contiguous", ] ######################################################################################## diff --git a/src/jiwer/transforms.py b/src/jiwer/transforms.py index 3932a3a..0989c7b 100644 --- a/src/jiwer/transforms.py +++ b/src/jiwer/transforms.py @@ -33,23 +33,22 @@ [transforms.ReduceToListOfListOfChars][]. """ -import sys import functools import re import string +import sys import unicodedata -from typing import Iterable, Union, List, Mapping - +from typing import Iterable, List, Mapping, Union __all__ = [ "AbstractTransform", "Compose", "ExpandCommonEnglishContractions", - "RemoveEmptyStrings", - "ReduceToListOfListOfWords", "ReduceToListOfListOfChars", + "ReduceToListOfListOfWords", "ReduceToSingleSentence", + "RemoveEmptyStrings", "RemoveKaldiNonWords", "RemoveMultipleSpaces", "RemovePunctuation", diff --git a/tests/test_alignment.py b/tests/test_alignment.py index 1754c75..82d2b88 100644 --- a/tests/test_alignment.py +++ b/tests/test_alignment.py @@ -1,5 +1,7 @@ import unittest + import jiwer + from jiwer import visualize_alignment diff --git a/tests/test_cer.py b/tests/test_cer.py index a1bb544..27cf287 100644 --- a/tests/test_cer.py +++ b/tests/test_cer.py @@ -1,10 +1,7 @@ import unittest -import pytest import jiwer -from .test_measures import assert_dict_almost_equal - class TestCERInputMethods(unittest.TestCase): def test_input_ref_string_hyp_string(self): diff --git a/tests/test_empty_ref.py b/tests/test_empty_ref.py index 278a966..353c5b0 100644 --- a/tests/test_empty_ref.py +++ b/tests/test_empty_ref.py @@ -1,4 +1,5 @@ import pytest + import jiwer diff --git a/tests/test_large_vocab.py b/tests/test_large_vocab.py index f6b42b4..9df16bc 100644 --- a/tests/test_large_vocab.py +++ b/tests/test_large_vocab.py @@ -1,4 +1,5 @@ import pytest + from jiwer import process_words, wer diff --git a/tests/test_measures.py b/tests/test_measures.py index 441249f..5a39ec1 100644 --- a/tests/test_measures.py +++ b/tests/test_measures.py @@ -1,5 +1,6 @@ +import functools import unittest -import pytest + import jiwer @@ -217,11 +218,14 @@ def test_fail_on_different_sentence_length(self): jiwer.wip, jiwer.mer, ]: - - def callback(): - method(["hello", "this", "sentence", "is fractured"], ["this sentence"]) - - self.assertRaises(ValueError, callback) + self.assertRaises( + ValueError, + functools.partial( + method, + ["hello", "this", "sentence", "is fractured"], + ["this sentence"], + ), + ) def test_known_values(self): # Taken from the "From WER and RIL to MER and WIL" paper, for link see README.md diff --git a/tests/test_transforms.py b/tests/test_transforms.py index 62be954..62a6720 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -1,7 +1,22 @@ import unittest -from jiwer.transforms import * -from jiwer.transforms import ReduceToListOfListOfChars +from jiwer.transforms import ( + ExpandCommonEnglishContractions, + ReduceToListOfListOfChars, + ReduceToListOfListOfWords, + ReduceToSingleSentence, + RemoveEmptyStrings, + RemoveKaldiNonWords, + RemoveMultipleSpaces, + RemovePunctuation, + RemoveSpecificWords, + RemoveWhiteSpace, + Strip, + SubstituteRegexes, + SubstituteWords, + ToLowerCase, + ToUpperCase, +) def _apply_test_on(self: unittest.TestCase, tr, cases):