Skip to content

Commit

Permalink
make rules more inspectable (#5), add Python tests
Browse files Browse the repository at this point in the history
  • Loading branch information
bminixhofer committed Jan 16, 2021
1 parent 68f8157 commit 7057e90
Show file tree
Hide file tree
Showing 8 changed files with 269 additions and 51 deletions.
106 changes: 95 additions & 11 deletions bindings/python/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
use flate2::read::GzDecoder;
use nlprule::types::*;
use nlprule::{
rule::Rule,
rule::{Example, Rule},
rules::{correct, Rules},
tokenizer::{finalize, tag::Tagger},
tokenizer::{Tokenizer, TokenizerOptions},
types::*,
};
use pyo3::prelude::*;
use pyo3::types::PyString;
Expand Down Expand Up @@ -403,8 +403,12 @@ impl PySuggestion {
}

#[getter]
fn text(&self) -> Vec<&str> {
self.suggestion.text.iter().map(|x| x.as_str()).collect()
fn replacements(&self) -> Vec<&str> {
self.suggestion
.replacements
.iter()
.map(|x| x.as_str())
.collect()
}

#[getter]
Expand Down Expand Up @@ -573,21 +577,73 @@ impl PyTokenizer {
}
}

/// An example associated with a grammatical rule.
///
/// Attributes:
/// * text (str): the text of this example
/// * suggestion (Option[Suggestion]): The suggestion for this example.
/// If this is None, it is an example of where the rule should not trigger.
#[pyclass(name = "Example", module = "nlprule")]
struct PyExample {
text: String,
suggestion: Option<Py<PySuggestion>>,
}

impl PyExample {
fn from_example(py: Python, example: &Example) -> PyResult<Self> {
Ok(PyExample {
text: example.text().to_owned(),
suggestion: if let Some(suggestion) = example.suggestion() {
let suggestion = PySuggestion::from((*suggestion).clone());
Some(Py::new(py, suggestion)?)
} else {
None
},
})
}
}

#[pymethods]
impl PyExample {
#[getter]
fn text(&self) -> &str {
&self.text
}

#[getter]
fn suggestion<'py>(&'py self, py: Python<'py>) -> Option<PyRef<'py, PySuggestion>> {
self.suggestion.as_ref().map(|x| x.borrow(py))
}
}

/// One grammatical rule.
///
/// Can not be created directly but accessed by the `.rules` attribute on the rules.
/// Attributes:
/// * id (str): The id of this rule.
/// * url (Option[str]): A URL for more information.
/// * short (Option[str]): A short description of this rule e. g. "Possible typo".
/// * examples (List[Example]): Examples associated with this rule. Always at least one.
#[pyclass(name = "Rule", module = "nlprule")]
struct PyRule {
id: String,
url: Option<String>,
short: Option<String>,
examples: Vec<Py<PyExample>>,
}

impl PyRule {
fn from_rule(rule: &Rule) -> Self {
PyRule {
id: rule.id().to_string(),
}
fn from_rule(py: Python, rule: &Rule) -> PyResult<Self> {
Ok(PyRule {
id: rule.id().to_owned(),
url: rule.url().map(String::from),
short: rule.short().map(String::from),
examples: rule
.examples()
.iter()
.map(|x| PyExample::from_example(py, x).and_then(|x| Py::new(py, x)))
.collect::<PyResult<Vec<_>>>()?,
})
}
}

Expand All @@ -597,6 +653,21 @@ impl PyRule {
fn id(&self) -> &str {
&self.id
}

#[getter]
fn url(&self) -> Option<&str> {
self.url.as_deref()
}

#[getter]
fn short(&self) -> Option<&str> {
self.short.as_deref()
}

#[getter]
fn examples<'py>(&'py self, py: Python<'py>) -> Vec<PyRef<'py, PyExample>> {
self.examples.iter().map(|x| x.borrow(py)).collect()
}
}

/// The grammatical rules.
Expand Down Expand Up @@ -666,8 +737,21 @@ impl PyRules {
}

#[getter]
fn rules(&self) -> Vec<PyRule> {
self.rules.rules().iter().map(PyRule::from_rule).collect()
fn rules(&self, py: Python) -> PyResult<Vec<PyRule>> {
self.rules
.rules()
.iter()
.map(|x| PyRule::from_rule(py, x))
.collect::<PyResult<Vec<_>>>()
}

/// Finds a rule by ID.
fn rule(&self, py: Python, id: &str) -> PyResult<Option<PyRule>> {
if let Some(rule) = self.rules.rule(id) {
Ok(Some(PyRule::from_rule(py, rule)?))
} else {
Ok(None)
}
}

/// Get suggestions for the given sentence.
Expand Down Expand Up @@ -794,7 +878,7 @@ impl PyRules {
Suggestion {
source: x.source().to_string(),
message: x.message().to_string(),
text: x.text().iter().map(|x| x.to_string()).collect(),
replacements: x.replacements().iter().map(|x| x.to_string()).collect(),
start: x.start(),
end: x.end(),
}
Expand Down
87 changes: 87 additions & 0 deletions bindings/python/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import pytest
import pickle
from nlprule import Tokenizer, Rules, SplitOn


@pytest.fixture()
def tokenizer_and_rules():
tokenizer = Tokenizer("../../storage/en_tokenizer.bin")
rules = Rules("../../storage/en_rules.bin", tokenizer, SplitOn([".", "?", "!"]))
return tokenizer, rules


def test_correct(tokenizer_and_rules):
(tokenizer, rules) = tokenizer_and_rules

# just some sample corrections, tests covering all rules are done in rust

assert (
rules.correct("He wants that you send him an email.")
== "He wants you to send him an email."
)

assert (
rules.correct("Thanks for your’s and Lucy’s help.")
== "Thanks for yours and Lucy’s help."
)

assert rules.correct("I can due his homework.") == "I can do his homework."


def test_suggest(tokenizer_and_rules):
(tokenizer, rules) = tokenizer_and_rules

text = "She was not been here since Monday instead off working."

suggestions = rules.suggest(text)
assert len(suggestions) == 2

assert (suggestions[0].start, suggestions[0].end) == (4, 16)
assert set(suggestions[0].replacements) == {"was not", "has not been"}

assert (suggestions[1].start, suggestions[1].end) == (35, 46)
assert set(suggestions[1].replacements) == {"instead of"}

assert (
rules.apply_suggestions(text, suggestions)
== "She was not here since Monday instead of working."
)


def test_rules_inspectable(tokenizer_and_rules):
(tokenizer, rules) = tokenizer_and_rules

suggestion = rules.suggest("He was taken back by my response.")[0]

rule = rules.rule(suggestion.source)
assert rule.id == suggestion.source

assert rule.short == "Commonly confused word"
assert rule.url == "https://www.merriam-webster.com/dictionary/take%20aback"
assert rule.id == "BACK_ABACK"

assert len(rule.examples) == 2
assert rule.examples[0].text == "He was totally taken back by my response."
assert rule.examples[0].suggestion is not None

assert (
rules.apply_suggestions(rule.examples[0].text, [rule.examples[0].suggestion])
== "He was totally taken aback by my response."
)

assert rule.examples[1].text == "He was totally taken a bag by my response."
assert rule.examples[1].suggestion is not None

assert (
rules.apply_suggestions(rule.examples[0].text, [rule.examples[0].suggestion])
== "He was totally taken aback by my response."
)


def test_pickle_roundtrip_works(tokenizer_and_rules):
(tokenizer, rules) = tokenizer_and_rules

dump = pickle.dumps((tokenizer, rules))
(tokenizer, rules) = pickle.loads(dump)

assert len(rules.rules) > 0
33 changes: 18 additions & 15 deletions nlprule/src/compile/parse_structure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -767,7 +767,7 @@ impl Rule {

assert!(!message_parts.is_empty(), "Rules must have a message.");

let mut tests = Vec::new();
let mut examples = Vec::new();
for example in &data.examples {
if example.kind.is_some() {
return Err(Error::Unimplemented(
Expand Down Expand Up @@ -796,26 +796,27 @@ impl Rule {
let length = marker.text.chars().count();

if let Some(correction_text) = &example.correction {
let mut text: Vec<_> =
let mut replacements: Vec<_> =
correction_text.split('|').map(|x| x.to_string()).collect();

text = if char_length == 0 {
replacements = if char_length == 0 {
// title case if at start
text.into_iter()
replacements
.into_iter()
.map(|x| {
utils::apply_to_first(&x, |c| c.to_uppercase().collect())
})
.collect()
} else {
text
replacements
};

suggestion = Some(Suggestion {
source: "_Test".to_string(),
message: "_Test".to_string(),
start: char_length,
end: char_length + length,
text,
replacements,
});
}

Expand All @@ -824,22 +825,24 @@ impl Rule {
}
}

tests.push(Test {
examples.push(Example {
text: texts.join(""),
suggestion,
});
}

Ok(Rule {
engine,
tests,
examples,
start,
end,
suggesters,
message: Synthesizer {
parts: message_parts,
use_titlecase_adjust: true,
},
url: data.url.map(|x| x.to_string()),
short: data.short.map(|x| x.to_string()),
id: String::new(),
on: data.default.map_or(true, |x| x != "off"),
})
Expand Down Expand Up @@ -1202,10 +1205,10 @@ impl DisambiguationRule {
None
};

let mut tests = Vec::new();
let mut examples = Vec::new();

if let Some(examples) = data.examples.as_ref() {
for example in examples {
if let Some(examples_structure) = data.examples.as_ref() {
for example in examples_structure {
let mut texts = Vec::new();
let mut char_span: Option<(usize, usize)> = None;
let mut char_length = 0;
Expand Down Expand Up @@ -1236,8 +1239,8 @@ impl DisambiguationRule {
let text = texts.join("");

let test = match example.kind.as_str() {
"untouched" => DisambiguationTest::Unchanged(text),
"ambiguous" => DisambiguationTest::Changed(DisambiguationChange {
"untouched" => DisambiguationExample::Unchanged(text),
"ambiguous" => DisambiguationExample::Changed(DisambiguationChange {
text,
before: parse_tag_form(
example
Expand All @@ -1258,7 +1261,7 @@ impl DisambiguationRule {
x => panic!("unknown disambiguation example type {}", x),
};

tests.push(test);
examples.push(test);
}
}

Expand All @@ -1271,7 +1274,7 @@ impl DisambiguationRule {
disambiguations,
start,
end,
tests,
examples,
id: String::new(),
})
}
Expand Down
2 changes: 1 addition & 1 deletion nlprule/src/rule/disambiguation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ pub struct DisambiguationChange {
}

#[derive(Debug, Serialize, Deserialize)]
pub enum DisambiguationTest {
pub enum DisambiguationExample {
Unchanged(String),
Changed(DisambiguationChange),
}
Loading

0 comments on commit 7057e90

Please sign in to comment.