From a69a71fdf0af6401c2310676da271099c01f81d8 Mon Sep 17 00:00:00 2001 From: Evan Chen Date: Tue, 2 Jan 2024 17:20:21 -0800 Subject: [PATCH 1/7] feat: Implement ignore_words_cased logic Still need to write tests for it though --- codespell_lib/_codespell.py | 46 +++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py index 9eeb8c3ac4..afbfbfa333 100644 --- a/codespell_lib/_codespell.py +++ b/codespell_lib/_codespell.py @@ -655,14 +655,20 @@ def parse_options( return options, parser, used_cfg_files -def parse_ignore_words_option(ignore_words_option: List[str]) -> Set[str]: +def parse_ignore_words_option( + ignore_words_option: List[str], +) -> Tuple[Set[str], Set[str]]: ignore_words: Set[str] = set() + ignore_words_cased: Set[str] = set() if ignore_words_option: for comma_separated_words in ignore_words_option: - ignore_words.update( - word.strip() for word in comma_separated_words.split(",") - ) - return ignore_words + for word in comma_separated_words.split(","): + word = word.strip() + if word == word.lower(): + ignore_words.add(word) + else: + ignore_words_cased.add(word) + return (ignore_words, ignore_words_cased) def build_exclude_hashes(filename: str, exclude_lines: Set[str]) -> None: @@ -670,9 +676,16 @@ def build_exclude_hashes(filename: str, exclude_lines: Set[str]) -> None: exclude_lines.update(line.rstrip() for line in f) -def build_ignore_words(filename: str, ignore_words: Set[str]) -> None: +def build_ignore_words( + filename: str, ignore_words: Set[str], ignore_words_cased: Set[str] +) -> None: with open(filename, encoding="utf-8") as f: - ignore_words.update(line.strip() for line in f) + for line in f: + word = line.strip() + if word == word.lower(): + ignore_words.add(word) + else: + ignore_words_cased.add(word) def add_misspelling( @@ -865,6 +878,7 @@ def parse_file( colors: TermColors, summary: Optional[Summary], misspellings: Dict[str, Misspelling], + ignore_words_cased: Set[str], exclude_lines: Set[str], file_opener: FileOpener, word_regex: Pattern[str], @@ -885,6 +899,8 @@ def parse_file( else: if options.check_filenames: for word in extract_words(filename, word_regex, ignore_word_regex): + if word in ignore_words_cased: + continue lword = word.lower() if lword not in misspellings: continue @@ -958,6 +974,8 @@ def parse_file( ) for match in check_matches: word = match.group() + if word in ignore_words_cased: + continue lword = word.lower() if lword in misspellings: # Sometimes we find a 'misspelling' which is actually a valid word @@ -1112,7 +1130,10 @@ def main(*args: str) -> int: ignore_word_regex = None ignore_words_files = options.ignore_words or [] - ignore_words = parse_ignore_words_option(options.ignore_words_list) + ignore_words, ignore_words_cased = parse_ignore_words_option( + options.ignore_words_list + ) + for ignore_words_file in ignore_words_files: if not os.path.isfile(ignore_words_file): print( @@ -1121,7 +1142,7 @@ def main(*args: str) -> int: ) parser.print_help() return EX_USAGE - build_ignore_words(ignore_words_file, ignore_words) + build_ignore_words(ignore_words_file, ignore_words, ignore_words_cased) uri_regex = options.uri_regex or uri_regex_def try: @@ -1133,7 +1154,10 @@ def main(*args: str) -> int: ) parser.print_help() return EX_USAGE - uri_ignore_words = parse_ignore_words_option(options.uri_ignore_words_list) + uri_ignore_words_lowercase, uri_ignore_words_cased = parse_ignore_words_option( + options.uri_ignore_words_list + ) + uri_ignore_words = uri_ignore_words_lowercase | uri_ignore_words_cased dictionaries = options.dictionary or ["-"] @@ -1242,6 +1266,7 @@ def main(*args: str) -> int: colors, summary, misspellings, + ignore_words_cased, exclude_lines, file_opener, word_regex, @@ -1266,6 +1291,7 @@ def main(*args: str) -> int: colors, summary, misspellings, + ignore_words_cased, exclude_lines, file_opener, word_regex, From 4a78cb5058a4a427257c281f14aa2481c82626c8 Mon Sep 17 00:00:00 2001 From: Evan Chen Date: Tue, 2 Jan 2024 17:39:36 -0800 Subject: [PATCH 2/7] tests: Add tests for the case feature --- codespell_lib/tests/test_basic.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/codespell_lib/tests/test_basic.py b/codespell_lib/tests/test_basic.py index e768917ceb..2e1705a5f6 100644 --- a/codespell_lib/tests/test_basic.py +++ b/codespell_lib/tests/test_basic.py @@ -343,6 +343,34 @@ def test_ignore_dictionary( assert cs.main("-I", fname, bad_name) == 1 +def test_ignore_words_with_cases( + tmp_path: Path, + capsys: pytest.CaptureFixture[str], +) -> None: + """Test ignore dictionary functionality.""" + bad_name = tmp_path / "bad.txt" + bad_name.write_text( + "1 MIS (Management Information System) 1\n" + "2 Les Mis (1980 musical) 2\n" + "3 mis 3\n" + ) + assert cs.main(bad_name) == 3 + fname = tmp_path / "ignore.txt" + + fname.write_text("miS") + assert cs.main("-I", fname, bad_name) == 3 + assert cs.main("-LmiS", bad_name) == 3 + fname.write_text("MIS") + assert cs.main("-I", fname, bad_name) == 2 + assert cs.main("-LMIS", bad_name) == 2 + fname.write_text("MIS\nMis") + assert cs.main("-I", fname, bad_name) == 1 + assert cs.main("-LMIS,Mis", bad_name) == 1 + fname.write_text("mis") + assert cs.main("-I", fname, bad_name) == 0 + assert cs.main("-Lmis", bad_name) == 0 + + def test_ignore_word_list( tmp_path: Path, capsys: pytest.CaptureFixture[str], From 734580bc65bfb2af5c2b1d576b9a4f5813dbc763 Mon Sep 17 00:00:00 2001 From: Evan Chen Date: Tue, 2 Jan 2024 17:52:46 -0800 Subject: [PATCH 3/7] Pass ruff --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 561d1777c2..2a8176beb0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -160,7 +160,7 @@ max-complexity = 45 [tool.ruff.lint.pylint] allow-magic-value-types = ["bytes", "int", "str",] -max-args = 12 +max-args = 13 max-branches = 49 max-returns = 11 -max-statements = 111 +max-statements = 113 From d4c77cf4211ab29bbd2f327f049c94fc0adf71cf Mon Sep 17 00:00:00 2001 From: Evan Chen Date: Wed, 3 Jan 2024 11:05:54 -0800 Subject: [PATCH 4/7] tests: also check case sensitivity on filename --- codespell_lib/tests/test_basic.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/codespell_lib/tests/test_basic.py b/codespell_lib/tests/test_basic.py index 2e1705a5f6..0fa3b320cf 100644 --- a/codespell_lib/tests/test_basic.py +++ b/codespell_lib/tests/test_basic.py @@ -347,28 +347,37 @@ def test_ignore_words_with_cases( tmp_path: Path, capsys: pytest.CaptureFixture[str], ) -> None: - """Test ignore dictionary functionality.""" - bad_name = tmp_path / "bad.txt" + """Test case-sensitivity implemented for -I and -L options in #3272.""" + bad_name = tmp_path / "MIS.txt" bad_name.write_text( "1 MIS (Management Information System) 1\n" "2 Les Mis (1980 musical) 2\n" "3 mis 3\n" ) assert cs.main(bad_name) == 3 + assert cs.main(bad_name, "-f") == 4 fname = tmp_path / "ignore.txt" fname.write_text("miS") assert cs.main("-I", fname, bad_name) == 3 assert cs.main("-LmiS", bad_name) == 3 + assert cs.main("-I", fname, "-f", bad_name) == 4 + assert cs.main("-LmiS", "-f", bad_name) == 4 fname.write_text("MIS") assert cs.main("-I", fname, bad_name) == 2 assert cs.main("-LMIS", bad_name) == 2 + assert cs.main("-I", fname, "-f", bad_name) == 2 + assert cs.main("-LMIS", "-f", bad_name) == 2 fname.write_text("MIS\nMis") assert cs.main("-I", fname, bad_name) == 1 assert cs.main("-LMIS,Mis", bad_name) == 1 + assert cs.main("-I", fname, "-f", bad_name) == 1 + assert cs.main("-LMIS,Mis", "-f", bad_name) == 1 fname.write_text("mis") assert cs.main("-I", fname, bad_name) == 0 assert cs.main("-Lmis", bad_name) == 0 + assert cs.main("-I", fname, "-f", bad_name) == 0 + assert cs.main("-Lmis", "-f", bad_name) == 0 def test_ignore_word_list( From 8b200cd8e697d6a3f8cfae0fbb762ab7fb3e2e4e Mon Sep 17 00:00:00 2001 From: Evan Chen Date: Thu, 4 Jan 2024 09:32:00 -0800 Subject: [PATCH 5/7] refactor: Incorporate suggestions --- codespell_lib/_codespell.py | 41 ++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py index afbfbfa333..b25ad9543c 100644 --- a/codespell_lib/_codespell.py +++ b/codespell_lib/_codespell.py @@ -20,12 +20,13 @@ import configparser import ctypes import fnmatch +import itertools import os import re import sys import textwrap from ctypes import wintypes -from typing import Any, Dict, List, Match, Optional, Pattern, Sequence, Set, Tuple +from typing import Any, Dict, List, Iterable, Match, Optional, Pattern, Sequence, Set, Tuple # autogenerated by setuptools_scm from ._version import ( # type: ignore[import-not-found] @@ -655,6 +656,17 @@ def parse_options( return options, parser, used_cfg_files +def process_ignore_words( + words: Iterable[str], ignore_words: Set[str], ignore_words_cased: Set[str] +) -> None: + for word in words: + word = word.strip() + if word == word.lower(): + ignore_words.add(word) + else: + ignore_words_cased.add(word) + + def parse_ignore_words_option( ignore_words_option: List[str], ) -> Tuple[Set[str], Set[str]]: @@ -662,12 +674,11 @@ def parse_ignore_words_option( ignore_words_cased: Set[str] = set() if ignore_words_option: for comma_separated_words in ignore_words_option: - for word in comma_separated_words.split(","): - word = word.strip() - if word == word.lower(): - ignore_words.add(word) - else: - ignore_words_cased.add(word) + process_ignore_words( + (word.strip() for word in comma_separated_words.split(",")), + ignore_words, + ignore_words_cased, + ) return (ignore_words, ignore_words_cased) @@ -680,12 +691,9 @@ def build_ignore_words( filename: str, ignore_words: Set[str], ignore_words_cased: Set[str] ) -> None: with open(filename, encoding="utf-8") as f: - for line in f: - word = line.strip() - if word == word.lower(): - ignore_words.add(word) - else: - ignore_words_cased.add(word) + process_ignore_words( + (line.strip() for line in f), ignore_words, ignore_words_cased + ) def add_misspelling( @@ -1154,10 +1162,9 @@ def main(*args: str) -> int: ) parser.print_help() return EX_USAGE - uri_ignore_words_lowercase, uri_ignore_words_cased = parse_ignore_words_option( - options.uri_ignore_words_list - ) - uri_ignore_words = uri_ignore_words_lowercase | uri_ignore_words_cased + uri_ignore_words = { + itertools.chain(parse_ignore_words_option(options.uri_ignore_words_list)) + } dictionaries = options.dictionary or ["-"] From efd930fa0e42d2ef9d6e1f7e66cbd6f567867f67 Mon Sep 17 00:00:00 2001 From: Evan Chen Date: Thu, 4 Jan 2024 09:39:01 -0800 Subject: [PATCH 6/7] fix: just do set().union(*...) --- codespell_lib/_codespell.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py index b25ad9543c..f5519acd3a 100644 --- a/codespell_lib/_codespell.py +++ b/codespell_lib/_codespell.py @@ -20,13 +20,23 @@ import configparser import ctypes import fnmatch -import itertools import os import re import sys import textwrap from ctypes import wintypes -from typing import Any, Dict, List, Iterable, Match, Optional, Pattern, Sequence, Set, Tuple +from typing import ( + Any, + Dict, + Iterable, + List, + Match, + Optional, + Pattern, + Sequence, + Set, + Tuple, +) # autogenerated by setuptools_scm from ._version import ( # type: ignore[import-not-found] @@ -1162,9 +1172,9 @@ def main(*args: str) -> int: ) parser.print_help() return EX_USAGE - uri_ignore_words = { - itertools.chain(parse_ignore_words_option(options.uri_ignore_words_list)) - } + uri_ignore_words = set().union( + *parse_ignore_words_option(options.uri_ignore_words_list) + ) dictionaries = options.dictionary or ["-"] From 0619eab0a7d998b7c0ff0f4be31575590475bbce Mon Sep 17 00:00:00 2001 From: Evan Chen Date: Thu, 4 Jan 2024 10:50:44 -0800 Subject: [PATCH 7/7] Use itertools.chain instead of set().union --- codespell_lib/_codespell.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py index f5519acd3a..777c645f91 100644 --- a/codespell_lib/_codespell.py +++ b/codespell_lib/_codespell.py @@ -20,6 +20,7 @@ import configparser import ctypes import fnmatch +import itertools import os import re import sys @@ -1172,8 +1173,9 @@ def main(*args: str) -> int: ) parser.print_help() return EX_USAGE - uri_ignore_words = set().union( - *parse_ignore_words_option(options.uri_ignore_words_list) + + uri_ignore_words = set( + itertools.chain(*parse_ignore_words_option(options.uri_ignore_words_list)) ) dictionaries = options.dictionary or ["-"]