Skip to content

Commit

Permalink
derive more clones, add to_writer impl, restore lost api (bminixhofer#58
Browse files Browse the repository at this point in the history
)
  • Loading branch information
bminixhofer authored and drahnr committed Apr 7, 2021
1 parent f309492 commit 111ea25
Show file tree
Hide file tree
Showing 14 changed files with 94 additions and 88 deletions.
7 changes: 2 additions & 5 deletions nlprule/src/compile/impls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use crate::{
id::Category,
DisambiguationRule, Rule,
},
rules::{Rules, RulesLangOptions, RulesOptions},
rules::{Rules, RulesLangOptions},
tokenizer::{
chunk,
multiword::{MultiwordTagger, MultiwordTaggerFields},
Expand Down Expand Up @@ -353,10 +353,7 @@ impl Rules {
);
}

Rules {
rules,
options: RulesOptions::default(),
}
Rules { rules }
}
}

Expand Down
25 changes: 13 additions & 12 deletions nlprule/src/compile/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,35 +61,37 @@ impl BuildFilePaths {
#[derive(Error, Debug)]
#[allow(missing_docs)]
pub enum Error {
#[error("input/output error")]
#[error(transparent)]
Io(#[from] std::io::Error),
#[error("serialization error")]
#[error(transparent)]
Serialization(#[from] bincode::Error),
#[error("JSON deserialization error")]
#[error(transparent)]
NlpruleError(#[from] crate::Error),
#[error(transparent)]
Json(#[from] serde_json::Error),
#[error("error loading SRX")]
#[error(transparent)]
Srx(#[from] srx::Error),
#[error("language options do not exist for '{lang_code}'")]
LanguageOptionsDoNotExist { lang_code: String },
#[error("regex syntax error: {0}")]
#[error(transparent)]
RegexSyntax(#[from] regex_syntax::ast::Error),
#[error("regex compilation error: {0}")]
Regex(Box<dyn std::error::Error + Send + Sync + 'static>),
#[error("unexpected condition: {0}")]
Unexpected(String),
#[error("feature not implemented: {0}")]
Unimplemented(String),
#[error("error parsing to integer: {0}")]
#[error(transparent)]
ParseError(#[from] ParseIntError),
#[error("unknown error")]
#[error("unknown error: {0}")]
Other(#[from] Box<dyn std::error::Error + Send + Sync + 'static>),
}

/// Compiles the binaries from a build directory.
pub fn compile(
build_dir: impl AsRef<Path>,
mut rules_dest: impl io::Write,
mut tokenizer_dest: impl io::Write,
rules_dest: impl io::Write,
tokenizer_dest: impl io::Write,
) -> Result<(), Error> {
let paths = BuildFilePaths::new(&build_dir);

Expand Down Expand Up @@ -185,12 +187,11 @@ pub fn compile(
srx::SRX::from_str(&fs::read_to_string(&paths.srx_path)?)?.language_rules(lang_code),
tokenizer_lang_options,
)?;

bincode::serialize_into(&mut tokenizer_dest, &tokenizer)?;
tokenizer.to_writer(tokenizer_dest)?;

info!("Creating grammar rules.");
let rules = Rules::from_xml(&paths.grammar_path, &mut build_info, rules_lang_options);
bincode::serialize_into(&mut rules_dest, &rules)?;
rules.to_writer(rules_dest)?;

// we need to write the regex cache after building the rules, otherwise it isn't fully populated
let f = BufWriter::new(File::create(&paths.regex_cache_path)?);
Expand Down
4 changes: 2 additions & 2 deletions nlprule/src/filter/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use enum_dispatch::enum_dispatch;
use serde::{Deserialize, Serialize};

#[enum_dispatch]
#[derive(Serialize, Deserialize)]
#[derive(Debug, Serialize, Deserialize, Clone)]
pub enum Filter {
NoDisambiguationEnglishPartialPosTagFilter,
}
Expand All @@ -14,7 +14,7 @@ pub trait Filterable {
fn keep(&self, sentence: &MatchSentence, graph: &MatchGraph) -> bool;
}

#[derive(Serialize, Deserialize)]
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct NoDisambiguationEnglishPartialPosTagFilter {
pub(crate) id: GraphId,
pub(crate) regexp: Regex,
Expand Down
3 changes: 2 additions & 1 deletion nlprule/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,9 @@ pub use tokenizer::Tokenizer;
pub enum Error {
#[error(transparent)]
Io(#[from] io::Error),
/// (De)serialization error. Can have occured during deserialization or during serialization.
#[error(transparent)]
Deserialization(#[from] bincode::Error),
Serialization(#[from] bincode::Error),
#[error(transparent)]
IdError(#[from] rule::id::Error),
}
Expand Down
6 changes: 3 additions & 3 deletions nlprule/src/rule/disambiguation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ impl PosFilter {
}
}

#[derive(Serialize, Deserialize)]
#[derive(Debug, Serialize, Deserialize, Clone)]
pub enum Disambiguation {
Remove(Vec<either::Either<owned::WordData, PosFilter>>),
Add(Vec<owned::WordData>),
Expand Down Expand Up @@ -210,15 +210,15 @@ impl Disambiguation {
}
}

#[derive(Debug, Deserialize, Serialize)]
#[derive(Debug, Deserialize, Serialize, Clone)]
pub struct DisambiguationChange {
pub text: String,
pub char_span: Range<usize>,
pub before: owned::Word,
pub after: owned::Word,
}

#[derive(Debug, Serialize, Deserialize)]
#[derive(Debug, Serialize, Deserialize, Clone)]
pub enum DisambiguationExample {
Unchanged(String),
Changed(DisambiguationChange),
Expand Down
34 changes: 17 additions & 17 deletions nlprule/src/rule/engine/composition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use unicase::UniCase;

type Context<'a, 't> = (&'a MatchSentence<'t>, &'a MatchGraph<'t>);

#[derive(Debug, Serialize, Deserialize)]
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct Matcher {
pub matcher: either::Either<either::Either<String, GraphId>, Regex>,
pub negate: bool,
Expand Down Expand Up @@ -80,7 +80,7 @@ impl Matcher {
}
}

#[derive(Debug, Serialize, Deserialize)]
#[derive(Debug, Serialize, Deserialize, Clone)]
pub(crate) struct TextMatcher {
pub(crate) matcher: Matcher,
pub(crate) set: Option<DefaultHashSet<WordIdInt>>,
Expand Down Expand Up @@ -119,7 +119,7 @@ impl PosMatcher {
}
}

#[derive(Debug, Serialize, Deserialize)]
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct WordDataMatcher {
pub(crate) pos_matcher: Option<PosMatcher>,
pub(crate) inflect_matcher: Option<TextMatcher>,
Expand Down Expand Up @@ -153,7 +153,7 @@ impl WordDataMatcher {
}
}

#[derive(Debug, Serialize, Deserialize)]
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct Quantifier {
pub min: usize,
pub max: usize,
Expand All @@ -165,7 +165,7 @@ pub trait Atomable: Send + Sync {
}

#[enum_dispatch(Atomable)]
#[derive(Debug, Serialize, Deserialize)]
#[derive(Debug, Serialize, Deserialize, Clone)]
pub enum Atom {
ChunkAtom(concrete::ChunkAtom),
SpaceBeforeAtom(concrete::SpaceBeforeAtom),
Expand All @@ -183,7 +183,7 @@ pub mod concrete {
use super::{Atomable, Context, Matcher, TextMatcher, WordDataMatcher};
use serde::{Deserialize, Serialize};

#[derive(Debug, Serialize, Deserialize)]
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct TextAtom {
pub(crate) matcher: TextMatcher,
}
Expand All @@ -197,7 +197,7 @@ pub mod concrete {
}
}

#[derive(Debug, Serialize, Deserialize)]
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct ChunkAtom {
pub(crate) matcher: Matcher,
}
Expand All @@ -211,7 +211,7 @@ pub mod concrete {
}
}

#[derive(Debug, Serialize, Deserialize)]
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct SpaceBeforeAtom {
pub(crate) value: bool,
}
Expand All @@ -224,7 +224,7 @@ pub mod concrete {
}
}

#[derive(Debug, Serialize, Deserialize)]
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct WordDataAtom {
pub(crate) matcher: WordDataMatcher,
pub(crate) case_sensitive: bool,
Expand All @@ -241,7 +241,7 @@ pub mod concrete {
}
}

#[derive(Debug, Serialize, Deserialize, Default)]
#[derive(Debug, Serialize, Deserialize, Default, Clone)]
pub struct TrueAtom {}

impl Atomable for TrueAtom {
Expand All @@ -250,7 +250,7 @@ impl Atomable for TrueAtom {
}
}

#[derive(Debug, Serialize, Deserialize, Default)]
#[derive(Debug, Serialize, Deserialize, Default, Clone)]
pub struct FalseAtom {}

impl Atomable for FalseAtom {
Expand All @@ -259,7 +259,7 @@ impl Atomable for FalseAtom {
}
}

#[derive(Debug, Serialize, Deserialize)]
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct AndAtom {
pub(crate) atoms: Vec<Atom>,
}
Expand All @@ -270,7 +270,7 @@ impl Atomable for AndAtom {
}
}

#[derive(Debug, Serialize, Deserialize)]
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct OrAtom {
pub(crate) atoms: Vec<Atom>,
}
Expand All @@ -281,7 +281,7 @@ impl Atomable for OrAtom {
}
}

#[derive(Debug, Serialize, Deserialize)]
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct NotAtom {
pub(crate) atom: Box<Atom>,
}
Expand All @@ -292,7 +292,7 @@ impl Atomable for NotAtom {
}
}

#[derive(Debug, Serialize, Deserialize)]
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct OffsetAtom {
pub(crate) atom: Box<Atom>,
pub(crate) offset: isize,
Expand Down Expand Up @@ -489,7 +489,7 @@ impl<'t> MatchGraph<'t> {
}
}

#[derive(Serialize, Deserialize, Debug)]
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct Part {
pub atom: Atom,
pub quantifier: Quantifier,
Expand All @@ -498,7 +498,7 @@ pub struct Part {
pub unify: Option<bool>,
}

#[derive(Serialize, Deserialize, Debug)]
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct Composition {
pub(crate) parts: Vec<Part>,
pub(crate) id_to_idx: DefaultHashMap<GraphId, usize>,
Expand Down
4 changes: 2 additions & 2 deletions nlprule/src/rule/engine/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ pub mod composition;

use composition::{Composition, GraphId, Group, MatchGraph, MatchSentence};

#[derive(Serialize, Deserialize, Debug)]
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct TokenEngine {
pub(crate) composition: Composition,
pub(crate) antipatterns: Vec<Composition>,
Expand Down Expand Up @@ -52,7 +52,7 @@ impl TokenEngine {
}
}

#[derive(Serialize, Deserialize, Debug)]
#[derive(Serialize, Deserialize, Debug, Clone)]
pub enum Engine {
Token(TokenEngine),
// regex with the `fancy_regex` backend is large on the stack
Expand Down
12 changes: 6 additions & 6 deletions nlprule/src/rule/grammar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use crate::types::*;
use crate::utils::{self, regex::Regex};
use serde::{Deserialize, Serialize};

#[derive(Debug, Serialize, Deserialize)]
#[derive(Debug, Serialize, Deserialize, Clone)]
pub enum Conversion {
Nop,
AllLower,
Expand All @@ -25,7 +25,7 @@ impl Conversion {
}

/// An example associated with a [Rule][crate::rule::Rule].
#[derive(Debug, Serialize, Deserialize)]
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct Example {
pub(crate) text: String,
pub(crate) suggestion: Option<Suggestion>,
Expand All @@ -45,7 +45,7 @@ impl Example {
}
}

#[derive(Serialize, Deserialize, Debug)]
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct PosReplacer {
pub(crate) matcher: PosMatcher,
}
Expand Down Expand Up @@ -85,7 +85,7 @@ impl PosReplacer {
}
}

#[derive(Serialize, Deserialize, Debug)]
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct Match {
pub(crate) id: GraphId,
pub(crate) conversion: Conversion,
Expand Down Expand Up @@ -118,14 +118,14 @@ impl Match {
}
}

#[derive(Debug, Serialize, Deserialize)]
#[derive(Debug, Serialize, Deserialize, Clone)]
pub enum SynthesizerPart {
Text(String),
// Regex with the `fancy_regex` backend is large on the stack
Match(Box<Match>),
}

#[derive(Debug, Serialize, Deserialize)]
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct Synthesizer {
pub(crate) use_titlecase_adjust: bool,
pub(crate) parts: Vec<SynthesizerPart>,
Expand Down
6 changes: 3 additions & 3 deletions nlprule/src/rule/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ use self::{
/// A *Unification* makes an otherwise matching pattern invalid if no combination of its filters
/// matches all tokens marked with "unify".
/// Can also be negated.
#[derive(Serialize, Deserialize, Debug)]
#[derive(Serialize, Deserialize, Debug, Clone)]
pub(crate) struct Unification {
pub(crate) mask: Vec<Option<bool>>,
pub(crate) filters: Vec<Vec<PosFilter>>,
Expand Down Expand Up @@ -81,7 +81,7 @@ impl Unification {
/// <disambig action="replace"><wd lemma="have" pos="VB"></wd></disambig>
/// </rule>
/// ```
#[derive(Serialize, Deserialize)]
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct DisambiguationRule {
pub(crate) id: Index,
pub(crate) engine: Engine,
Expand Down Expand Up @@ -371,7 +371,7 @@ impl<'a, 't> Iterator for Suggestions<'a, 't> {
/// <example correction="doesn't">He <marker>dosn't</marker> know about it.</example>
/// </rule>
/// ```
#[derive(Serialize, Deserialize, Debug)]
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct Rule {
pub(crate) id: Index,
pub(crate) engine: Engine,
Expand Down
Loading

0 comments on commit 111ea25

Please sign in to comment.