-
Notifications
You must be signed in to change notification settings - Fork 39
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
make rules more inspectable (#5), add Python tests
- Loading branch information
1 parent
68f8157
commit 7057e90
Showing
8 changed files
with
269 additions
and
51 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
import pytest | ||
import pickle | ||
from nlprule import Tokenizer, Rules, SplitOn | ||
|
||
|
||
@pytest.fixture() | ||
def tokenizer_and_rules(): | ||
tokenizer = Tokenizer("../../storage/en_tokenizer.bin") | ||
rules = Rules("../../storage/en_rules.bin", tokenizer, SplitOn([".", "?", "!"])) | ||
return tokenizer, rules | ||
|
||
|
||
def test_correct(tokenizer_and_rules): | ||
(tokenizer, rules) = tokenizer_and_rules | ||
|
||
# just some sample corrections, tests covering all rules are done in rust | ||
|
||
assert ( | ||
rules.correct("He wants that you send him an email.") | ||
== "He wants you to send him an email." | ||
) | ||
|
||
assert ( | ||
rules.correct("Thanks for your’s and Lucy’s help.") | ||
== "Thanks for yours and Lucy’s help." | ||
) | ||
|
||
assert rules.correct("I can due his homework.") == "I can do his homework." | ||
|
||
|
||
def test_suggest(tokenizer_and_rules): | ||
(tokenizer, rules) = tokenizer_and_rules | ||
|
||
text = "She was not been here since Monday instead off working." | ||
|
||
suggestions = rules.suggest(text) | ||
assert len(suggestions) == 2 | ||
|
||
assert (suggestions[0].start, suggestions[0].end) == (4, 16) | ||
assert set(suggestions[0].replacements) == {"was not", "has not been"} | ||
|
||
assert (suggestions[1].start, suggestions[1].end) == (35, 46) | ||
assert set(suggestions[1].replacements) == {"instead of"} | ||
|
||
assert ( | ||
rules.apply_suggestions(text, suggestions) | ||
== "She was not here since Monday instead of working." | ||
) | ||
|
||
|
||
def test_rules_inspectable(tokenizer_and_rules): | ||
(tokenizer, rules) = tokenizer_and_rules | ||
|
||
suggestion = rules.suggest("He was taken back by my response.")[0] | ||
|
||
rule = rules.rule(suggestion.source) | ||
assert rule.id == suggestion.source | ||
|
||
assert rule.short == "Commonly confused word" | ||
assert rule.url == "https://www.merriam-webster.com/dictionary/take%20aback" | ||
assert rule.id == "BACK_ABACK" | ||
|
||
assert len(rule.examples) == 2 | ||
assert rule.examples[0].text == "He was totally taken back by my response." | ||
assert rule.examples[0].suggestion is not None | ||
|
||
assert ( | ||
rules.apply_suggestions(rule.examples[0].text, [rule.examples[0].suggestion]) | ||
== "He was totally taken aback by my response." | ||
) | ||
|
||
assert rule.examples[1].text == "He was totally taken a bag by my response." | ||
assert rule.examples[1].suggestion is not None | ||
|
||
assert ( | ||
rules.apply_suggestions(rule.examples[0].text, [rule.examples[0].suggestion]) | ||
== "He was totally taken aback by my response." | ||
) | ||
|
||
|
||
def test_pickle_roundtrip_works(tokenizer_and_rules): | ||
(tokenizer, rules) = tokenizer_and_rules | ||
|
||
dump = pickle.dumps((tokenizer, rules)) | ||
(tokenizer, rules) = pickle.loads(dump) | ||
|
||
assert len(rules.rules) > 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.