diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py index 9f211247b7..0e4209a1a5 100644 --- a/codespell_lib/_codespell.py +++ b/codespell_lib/_codespell.py @@ -23,7 +23,7 @@ import re import sys import textwrap -from typing import Dict, List, Optional, Pattern, Sequence, Set, Tuple +from typing import Dict, List, Match, Optional, Pattern, Sequence, Set, Tuple # autogenerated by setuptools_scm from ._version import __version__ as VERSION @@ -680,18 +680,26 @@ def fix_case(word: str, fixword: str) -> str: def ask_for_word_fix( line: str, - wrongword: str, + match: Match[str], misspelling: Misspelling, interactivity: int, + colors: TermColors, ) -> Tuple[bool, str]: + wrongword = match.group() if interactivity <= 0: return misspelling.fix, fix_case(wrongword, misspelling.data) + line_ui = ( + f"{line[:match.start()]}" + f"{colors.WWORD}{wrongword}{colors.DISABLE}" + f"{line[match.end():]}" + ) + if misspelling.fix and interactivity & 1: r = "" fixword = fix_case(wrongword, misspelling.data) while not r: - print(f"{line}\t{wrongword} ==> {fixword} (Y/n) ", end="", flush=True) + print(f"{line_ui}\t{wrongword} ==> {fixword} (Y/n) ", end="", flush=True) r = sys.stdin.readline().strip().upper() if not r: r = "Y" @@ -709,7 +717,7 @@ def ask_for_word_fix( r = "" opt = [w.strip() for w in misspelling.data.split(",")] while not r: - print(f"{line} Choose an option (blank for none): ", end="") + print(f"{line_ui} Choose an option (blank for none): ", end="") for i, o in enumerate(opt): fixword = fix_case(wrongword, o) print(f" {i}) {fixword}", end="") @@ -743,30 +751,50 @@ def print_context( print("{} {}".format(">" if i == index else ":", lines[i].rstrip())) +def _ignore_word_sub( + text: str, + ignore_word_regex: Optional[Pattern[str]], +) -> str: + if ignore_word_regex: + text = ignore_word_regex.sub(" ", text) + return text + + def extract_words( text: str, word_regex: Pattern[str], ignore_word_regex: Optional[Pattern[str]], ) -> List[str]: - if ignore_word_regex: - text = ignore_word_regex.sub(" ", text) - return word_regex.findall(text) + return word_regex.findall(_ignore_word_sub(text, ignore_word_regex)) + + +def extract_words_iter( + text: str, + word_regex: Pattern[str], + ignore_word_regex: Optional[Pattern[str]], +) -> List[Match[str]]: + return list(word_regex.finditer(_ignore_word_sub(text, ignore_word_regex))) def apply_uri_ignore_words( - check_words: List[str], + check_matches: List[Match[str]], line: str, word_regex: Pattern[str], ignore_word_regex: Optional[Pattern[str]], uri_regex: Pattern[str], uri_ignore_words: Set[str], -) -> None: +) -> List[Match[str]]: if not uri_ignore_words: - return + return check_matches for uri in re.findall(uri_regex, line): for uri_word in extract_words(uri, word_regex, ignore_word_regex): if uri_word in uri_ignore_words: - check_words.remove(uri_word) + # determine/remove only the first among matches + for i, match in enumerate(check_matches): + if match.group() == uri_word: + check_matches = check_matches[:i] + check_matches[i + 1 :] + break + return check_matches def parse_file( @@ -855,18 +883,18 @@ def parse_file( # outside, it will still be a spelling error. if "*" in uri_ignore_words: line = uri_regex.sub(" ", line) - check_words = extract_words(line, word_regex, ignore_word_regex) + check_matches = extract_words_iter(line, word_regex, ignore_word_regex) if "*" not in uri_ignore_words: - apply_uri_ignore_words( - check_words, + check_matches = apply_uri_ignore_words( + check_matches, line, word_regex, ignore_word_regex, uri_regex, uri_ignore_words, ) - - for word in check_words: + for match in check_matches: + word = match.group() lword = word.lower() if lword in misspellings: context_shown = False @@ -878,7 +906,11 @@ def parse_file( context_shown = True print_context(lines, i, context) fix, fixword = ask_for_word_fix( - lines[i], word, misspellings[lword], options.interactive + lines[i], + match, + misspellings[lword], + options.interactive, + colors=colors, ) asked_for.add(lword)