From 525796681c2dfac1bd20df109e8c053907917a71 Mon Sep 17 00:00:00 2001 From: Laysa Uchoa Date: Sun, 21 Feb 2021 09:53:44 +0100 Subject: [PATCH] hunspell/fix: not check emoji or vulgar fractions --- Cargo.lock | 5 +++ Cargo.toml | 1 + src/checker/hunspell.rs | 83 ++++++++++++++++++++++++++++++++++------- 3 files changed, 76 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 21ac6015..702d4b89 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -284,6 +284,7 @@ dependencies = [ "toml", "url 2.2.1", "unic-emoji-char", + "unicode_names2", "walkdir", "xz2", ] @@ -3175,6 +3176,10 @@ name = "untrusted" version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" +name = "unicode_names2" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87d6678d7916394abad0d4b19df4d3802e1fd84abd7d701f39b75ee71b9e8cf1" [[package]] name = "url" diff --git a/Cargo.toml b/Cargo.toml index 3a5b3d88..0ca74dc0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -54,6 +54,7 @@ toml = "0.5" # for the config file walkdir = "2" unic-emoji-char = "0.9.0" +unicode_names2 = "0.4.0" # config parsing, must be independent of features diff --git a/src/checker/hunspell.rs b/src/checker/hunspell.rs index b326e469..7e1c0238 100644 --- a/src/checker/hunspell.rs +++ b/src/checker/hunspell.rs @@ -29,12 +29,31 @@ pub struct HunspellWrapper(pub Arc); unsafe impl Send for HunspellWrapper {} unsafe impl Sync for HunspellWrapper {} -// The value is `true` if all characters are emoji -pub fn is_emoji(word: String) -> bool { - if !word.is_empty() { - return word.clone().chars().all(unic_emoji_char::is_emoji); +// The value is `true` if string is made of emoji's +// or unicode `VULGAR FRACTION` +pub fn vulgar_fraction_or_emoji(word: String) -> bool { + if word.is_empty() { + return false; + } + for c in word.clone().chars() { + if is_vulgar_fraction(c) || unic_emoji_char::is_emoji(c) { + continue; + } else { + return false; + } + } + return true; +} + +// The value is `true` if given character is a vulgar fraction +pub fn is_vulgar_fraction(character: char) -> bool { + let unicode_name = unicode_names2::name(character).map(|n| n.to_string()); + let vulgar = "VULGAR FRACTION"; + match unicode_name.as_deref() { + Some(s) if s.starts_with(vulgar) => true, + _ => false, + None => false, } - false } pub struct HunspellChecker; @@ -251,9 +270,9 @@ fn obtain_suggestions<'s>( .filter(|x| x.len() > 1) // single char suggestions tend to be useless .collect::>(); - // strings made of emojis - if is_emoji(word.clone()) { - trace!(target: "quirks", "Found emoji character, treating {} as ok", &word); + // strings made of vulgar fraction or emoji + if vulgar_fraction_or_emoji(word.clone()) { + trace!(target: "quirks", "Found emoji or vulgar fraction character, treating {} as ok", &word); return; } let chars: Vec = word.clone().chars().collect(); @@ -289,23 +308,61 @@ fn obtain_suggestions<'s>( } } -macro_rules! parametrized_is_emoji { +macro_rules! parametrized_vulgar_fraction_or_emoji { ($($name:ident: $value:expr,)*) => { $( #[test] fn $name() { let (input, expected) = $value; - assert_eq!(expected, is_emoji(input)); + assert_eq!(expected, vulgar_fraction_or_emoji(input)); } )* } } -parametrized_is_emoji! { +parametrized_vulgar_fraction_or_emoji! { empty: ("".to_string(), false), emojis: ("πŸπŸ€—πŸ¦€".to_string(), true), contains_emojis: ("contains emoji πŸ¦€".to_string(), false), - contains_only_unicode: ("β…”".to_string(), false), - contains_emoji_and_unicode: ("πŸπŸ€—πŸ¦€β…”".to_string(), false), + contains_only_unicode: ("β…”β…”β…”".to_string(), true), + contains_emoji_and_unicode: ("πŸπŸ€—πŸ¦€β…”".to_string(), true), no_emojis: ("no emoji string".to_string(), false), } + +macro_rules! parametrized_is_vulgar_fraction { + ($($name:ident: $value:expr,)*) => { + $( + #[test] + fn $name() { + let (input, expected) = $value; + assert_eq!(expected, is_vulgar_fraction(input)); + } + )* + } +} + +parametrized_is_vulgar_fraction! { + is_emoji: ('πŸ¦€', false), + is_latin_letter: ('a', false), + vulgar_fraction_one_quarter: ('ΒΌ', true), + vulgar_fraction_one_half: ('Β½', true), + vulgar_fraction_three_quarters: ('ΒΎ', true), + vulgar_fraction_one_seventh: ('⅐', true), + vulgar_fraction_one_ninth: ('β…‘', true), + vulgar_fraction_one_tenth: ('β…’', true), + vulgar_fraction_one_third: ('β…“', true), + vulgar_fraction_two_thirds: ('β…”', true), + vulgar_fraction_one_fifth: ('β…•', true), + vulgar_fraction_two_fifth: ('β…–', true), + vulgar_fraction_three_fifths: ('β…—', true), + vulgar_fraction_four_fifths: ( 'β…˜', true), + vulgar_fraction_one_sixth: ('β…™', true), + vulgar_fraction_five_sixths: ('β…š', true), + vulgar_fraction_one_eighth: ('β…›', true), + vulgar_fraction_three_eighths: ('β…œ', true), + vulgar_fraction_five_eights: ('⅝', true), + vulgar_fraction_five_eighths: ('⅝', true), + vulgar_fraction_seven_eighths: ('β…ž', true), + vulgar_fraction_zero_thirds: ('↉', true), + +}