Skip to content

Commit

Permalink
♻️ update spelling tools workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
brutus committed Aug 3, 2021
1 parent 4c43b91 commit 486a866
Show file tree
Hide file tree
Showing 3 changed files with 140 additions and 46 deletions.
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@ lint:
check-guides:
.venv/bin/python check-guides.py --check --warn source/guide_*.rst

get-new-words: clean spelling
.venv/bin/python spelling_tools.py

add-new-words:
.venv/bin/python spelling_tools.py --merge

serve:
sphinx-autobuild -b html $(SOURCEDIR) $(BUILDDIR)/html

Expand Down
31 changes: 27 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,28 +40,51 @@ make clean serve
This will start a local webserver on http://127.0.0.1:8000, which always serves
the most recent version.

### Linting
## Linting

To lint all files, you can use `pre-commit`:

```shell
make lint
```

Or just to check the guides for consisitency:
Or just to check the guides for consistency:

```shell
make check-guides
```

### Spellcheck
## Spellcheck

To check the spelling you can use the spell check function of sphinx.
To check the spelling you can use the spell check function of Sphinx:

```shell
make spelling
```

### Add Words to Guide

If your guide needs to use words, that should not go into the _global
dictionary_ (see below), you can flag them with the `spelling` directive like
this (usually near the top of your guide):

```rst
.. spelling::
passwörd
anotherword
```

### Add Words to Global Dictionary

1. run `make get-new-words` to write a list of all spelling errors found
to `new_words.txt`
1. edit the resulting `new_words.txt`
1. decide wich words to keep for the global dict,
1. and wich might be better put into a guide local list (see the `spelling`
directive above for that)
1. if satisfied, run `make add-new-words` to merge them to the global dictionary
1. commit your changes :pencil2:

## License

All text and code in this repository is licensed under [CC-BY-NC-SA 4.0][].
Expand Down
149 changes: 107 additions & 42 deletions spelling_tools.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,56 +1,121 @@
import os
from os.path import isfile
from os.path import join
#!.venv/bin/python
"""
Creates `new_words.txt` with words that failed a spellcheck.
# sorts the given dict by the name and removes duplicates.
# writes the result to sorted_dict.txt
def sort_dict():
with open("source/dict.txt", "r") as f:
words = f.readlines()
If the `--merge` flag is given, the content of that file is merged in the
spelling dictionary instead.
sorted_words = sorted(words, key=lambda v: v.lower())
Run `make spelling` to run the actual spellcheck.
"""
import argparse
import sys
from collections import Counter
from pathlib import Path
from typing import List
from typing import Optional

with open("source/sorted_dict.txt", "w") as f:
last = ""
for word in sorted_words:
if last != word:
print(word[:-1], file=f)
last = word

*_, EPILOG = __doc__.strip().split("\n")
DESCRIPTION = "\n".join(_)

# reads the result of the spell checking (make spelling), counts for every misspelled words its amount
# and returns a sorted list of word and amount to the shell and creates a new_words.txt just with the
# words found in the analysis.
def read_terms_from_errors():
BASE_PATH = Path(__file__).parent.absolute()
NEW_WORDS_FILE = BASE_PATH / "new_words.txt"
SPELLCHECK_DIR = BASE_PATH / "build/spelling"
WORDLIST_FILE = BASE_PATH / "source/dict.txt"

new_words = {}
build_dir = "build/spelling/"
total_words = 0

for f in os.listdir(build_dir):
if isfile(join(build_dir, f)):
with open(join(build_dir, f), "r") as f:
lines = f.readlines()

for line in lines:
def get_misspelled_words(error_dir: Path) -> Counter:
"""Return counter for misspelled words form files in *error_dir*."""
words = Counter()
for error_file in (e for e in error_dir.glob("*.spelling")):
lines = error_file.read_text().split("\n")
for line in (line for l in lines if (line := l.strip())):
try:
word = line.split("(")[1].split(")")[0]
total_words += 1
except IndexError:
print("failed to parse line: {line}", file=sys.stderr)
continue
words[word] += 1
return words


def merge_wordfiles(*files: List[Path], out_file: Optional[Path] = None) -> List:
"""Combine words form *files*, return sorted results and write to *out_file* if set."""
words = set()
for wordfile in (f for f in files if f.exists()):
with wordfile.open() as f:
for line in (line for l in f if (line := l.strip())):
words.add(line)
words = sorted(words, key=lambda word: (word.lower(), word))
if out_file is not None:
out_file.write_text("\n".join(words) + "\n")
return words


def write_errors_to_newlist(error_dir: Path, outfile: Path) -> Counter:
"""Write errors found in *error_dir* to *outfile*, sorted by hit count."""
if not (error_dir.exists() and error_dir.is_dir()):
raise ValueError(f"'{error_dir}' not found")
error_counter = get_misspelled_words(error_dir)
outfile.write_text("\n".join((t[0] for t in error_counter.most_common())))
return error_counter

if word not in new_words:
new_words[word] = 1
else:
new_words[word] += 1

with open("new_words.txt", "w") as f:
for word in sorted(new_words, key=lambda w: new_words[w], reverse=True):
print(word, file=f)
print(word, new_words[word])
def merge_newlist_to_wordlist(newlist: Path, wordlist: Path) -> List:
"""Merge *newlist* to *wordlist* and return the results."""
if not (newlist.exists() and newlist.is_file()):
raise ValueError(f"'{newlist}' not found")
return merge_wordfiles(newlist, wordlist, out_file=wordlist)

print(
"\nFound %d unique words in a total of %d misspelled words"
% (len(new_words), total_words)

def get_args(argv=None):
ap = argparse.ArgumentParser(description=DESCRIPTION, epilog=EPILOG)
ap.add_argument(
"--merge", action="store_true", help="merge new words to dictionary"
)
return ap.parse_args(argv)


def main(
merge=False,
newlist: Path = NEW_WORDS_FILE,
wordlist: Path = WORDLIST_FILE,
error_dir: Path = SPELLCHECK_DIR,
):
"""
Write errors found in *error_dir* to *newlist*, sorted by hit count.
Or merge *newlist* to *wordlist* instead, if *merge* is set.
"""
if merge:
try:
words = merge_newlist_to_wordlist(newlist, wordlist)
print(f"Wrote {len(words)} words to dictionary '{wordlist}'.")
except ValueError:
print(
f"[ERROR] no wordlist found in '{newlist}'.\n",
"You can run without the `--merge` flag, to create it.",
file=sys.stderr,
)
return 4
else:
try:
error_counter = write_errors_to_newlist(error_dir, newlist)
print(
f"Found {len(error_counter)} unique words in a total of "
f"{sum(error_counter.values())} misspelled words."
)
except ValueError:
print(
f"[ERROR] no spellcheck results found in '{error_dir}'.\n",
"You can run `make spelling` to create them.",
file=sys.stderr,
)
return 3

return 0


read_terms_from_errors()
sort_dict()
if __name__ == "__main__":
args = get_args()
sys.exit(main(**vars(args)))

0 comments on commit 486a866

Please sign in to comment.