Merge branch 'overhaul-ergonomics'

alerque · Oct 29, 2024 · ef75905 · ef75905
2 parents 94c60c5 + dd354f4
commit ef75905
Show file tree

Hide file tree

Showing 5 changed files with 154 additions and 74 deletions.
diff --git a/src/content.rs b/src/content.rs
@@ -3,20 +3,27 @@
 
 use regex::Regex;
 use std::{borrow::Cow, fmt, fmt::Display, str::FromStr};
+use unicode_titlecase::StrTitleCase;
 
 use snafu::prelude::*;
 
+#[derive(Clone, Debug)]
+#[non_exhaustive]
+pub struct Chunk {
+    pub segments: Vec<Segment>,
+}
+
 #[derive(Clone, Debug, PartialEq)]
 #[non_exhaustive]
 pub enum Segment {
     Separator(String),
-    Word(String),
+    Word(Word),
 }
 
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq)]
 #[non_exhaustive]
-pub struct Chunk {
-    pub segments: Vec<Segment>,
+pub struct Word {
+    pub word: String,
 }
 
 #[derive(Snafu)]
@@ -42,7 +49,9 @@ fn split_chunk(s: &str) -> Chunk {
         if let Some(m) = capture.name("separator") {
             segments.push(Segment::Separator(m.as_str().to_string()));
         } else if let Some(m) = capture.name("word") {
-            segments.push(Segment::Word(m.as_str().to_string()));
+            segments.push(Segment::Word(Word {
+                word: m.as_str().to_owned(),
+            }));
         }
     }
     Chunk { segments }
@@ -79,13 +88,13 @@ impl FromStr for Chunk {
     }
 }
 
-impl Display for Segment {
-    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
-        let _ = match self {
-            Segment::Separator(string) => fmt.write_str(string),
-            Segment::Word(string) => fmt.write_str(string),
-        };
-        Ok(())
+impl From<Chunk> for String {
+    fn from(c: Chunk) -> Self {
+        let mut s = String::new();
+        for segment in c.segments {
+            s.push_str(segment.to_string().as_ref());
+        }
+        s
     }
 }
 
@@ -97,3 +106,56 @@ impl Display for Chunk {
         Ok(())
     }
 }
+
+impl Display for Segment {
+    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            Segment::Separator(string) => fmt.write_str(string)?,
+            Segment::Word(word) => fmt.write_str(word.to_string().as_ref())?,
+        };
+        Ok(())
+    }
+}
+
+impl Word {
+    pub fn to_lowercase(&self) -> String {
+        self.word.to_lowercase()
+    }
+    pub fn to_uppercase(&self) -> String {
+        self.word.to_uppercase()
+    }
+}
+
+impl From<String> for Word {
+    fn from(word: String) -> Self {
+        Self { word }
+    }
+}
+
+impl StrTitleCase for Word {
+    fn to_titlecase(&self) -> String {
+        self.word.to_titlecase()
+    }
+    fn to_titlecase_lower_rest(&self) -> String {
+        self.word.to_titlecase_lower_rest()
+    }
+    fn to_titlecase_tr_or_az(&self) -> String {
+        self.word.to_titlecase_tr_or_az()
+    }
+    fn to_titlecase_tr_or_az_lower_rest(&self) -> String {
+        self.word.to_titlecase_tr_or_az_lower_rest()
+    }
+    fn starts_titlecase(&self) -> bool {
+        self.word.starts_titlecase()
+    }
+    fn starts_titlecase_rest_lower(&self) -> bool {
+        self.word.starts_titlecase_rest_lower()
+    }
+}
+
+impl Display for Word {
+    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+        fmt.write_str(self.word.as_ref())?;
+        Ok(())
+    }
+}
diff --git a/src/en.rs b/src/en.rs
@@ -1,7 +1,7 @@
 // SPDX-FileCopyrightText: © 2023 Caleb Maclennan <[email protected]>
 // SPDX-License-Identifier: LGPL-3.0-only
 
-use crate::content::{Chunk, Segment};
+use crate::content::{Chunk, Segment, Word};
 use crate::types::StyleGuide;
 
 use regex::Regex;
@@ -20,32 +20,32 @@ pub fn titlecase(chunk: Chunk, style: StyleGuide) -> String {
 
 fn titlecase_ap(chunk: Chunk) -> String {
     eprintln!("AP style guide not implemented, string returned as-is!");
-    chunk.to_string()
+    chunk.into()
 }
 
 fn titlecase_cmos(chunk: Chunk) -> String {
-    let mut done_first = false;
     let mut chunk = chunk.clone();
-    let mut segments = chunk.segments.iter_mut().peekable();
-    while let Some(segment) = segments.next() {
-        if let Segment::Word(s) = segment {
-            *s = if !done_first {
-                done_first = true;
-                s.to_string().to_titlecase_lower_rest()
-            } else if segments.peek().is_none() {
-                // TODO: I think a bug is hiding here since peek() might give use a separator
-                // that happens to be a trailing trivia. We need a custom iterator or peeker
-                // that knows how to answer about first/last *word* segments.
-                s.to_string().to_titlecase_lower_rest()
-            } else {
-                match is_reserved(s.to_string()) {
-                    true => s.to_string().to_lowercase(),
-                    false => s.to_string().to_titlecase_lower_rest(),
-                }
-            }
-        }
+    let mut words = chunk
+        .segments
+        .iter_mut()
+        .filter_map(|segment| match segment {
+            Segment::Word(word) => Some(word),
+            _ => None,
+        })
+        .peekable();
+    if let Some(word) = words.next() {
+        word.word = word.to_titlecase_lower_rest();
+    }
+    while let Some(word) = words.next() {
+        word.word = match words.peek().is_none() {
+            true => word.to_titlecase_lower_rest(),
+            false => match is_reserved(word) {
+                true => word.to_lowercase(),
+                false => word.to_titlecase_lower_rest(),
+            },
+        };
     }
-    chunk.to_string()
+    chunk.into()
 }
 
 fn titlecase_gruber(chunk: Chunk) -> String {
@@ -61,11 +61,11 @@ fn titlecase_gruber(chunk: Chunk) -> String {
     } else {
         String::from("")
     };
-    let titilized = gruber_titlecase(&chunk.to_string());
+    let titilized = gruber_titlecase(chunk.to_string().as_ref());
     format!("{}{}{}", leading_trivia, titilized, trailing_trivia)
 }
 
-fn is_reserved(word: String) -> bool {
+fn is_reserved(word: &Word) -> bool {
     let word = word.to_lowercase();
     let word = word.as_str();
     let article = Regex::new(r"^(a|an|the)$").unwrap();
@@ -77,35 +77,35 @@ fn is_reserved(word: String) -> bool {
 pub fn lowercase(chunk: Chunk) -> String {
     let mut chunk = chunk.clone();
     chunk.segments.iter_mut().for_each(|segment| {
-        if let Segment::Word(s) = segment {
-            *s = s.to_string().to_lowercase()
+        if let Segment::Word(word) = segment {
+            word.word = word.to_lowercase()
         }
     });
-    chunk.to_string()
+    chunk.into()
 }
 
 pub fn uppercase(chunk: Chunk) -> String {
     let mut chunk = chunk.clone();
     chunk.segments.iter_mut().for_each(|segment| {
-        if let Segment::Word(s) = segment {
-            *s = s.to_string().to_uppercase()
+        if let Segment::Word(word) = segment {
+            word.word = word.to_uppercase()
         }
     });
-    chunk.to_string()
+    chunk.into()
 }
 
 pub fn sentencecase(chunk: Chunk) -> String {
     let mut chunk = chunk.clone();
     let mut done_first = false;
     chunk.segments.iter_mut().for_each(|segment| {
-        if let Segment::Word(s) = segment {
-            *s = if !done_first {
+        if let Segment::Word(word) = segment {
+            word.word = if !done_first {
                 done_first = true;
-                s.to_string().to_titlecase_lower_rest()
+                word.to_titlecase_lower_rest()
             } else {
-                s.to_string().to_lowercase()
+                word.to_lowercase()
             }
         }
     });
-    chunk.to_string()
+    chunk.into()
 }
diff --git a/src/tr.rs b/src/tr.rs
@@ -1,7 +1,7 @@
 // SPDX-FileCopyrightText: © 2023 Caleb Maclennan <[email protected]>
 // SPDX-License-Identifier: LGPL-3.0-only
 
-use crate::content::{Chunk, Segment};
+use crate::content::{Chunk, Segment, Word};
 use crate::types::StyleGuide;
 
 use regex::Regex;
@@ -20,63 +20,63 @@ fn titlecase_tdk(chunk: Chunk) -> String {
     let mut chunk = chunk.clone();
     let mut done_first = false;
     chunk.segments.iter_mut().for_each(|segment| {
-        if let Segment::Word(s) = segment {
-            *s = if !done_first {
+        if let Segment::Word(word) = segment {
+            word.word = if !done_first {
                 done_first = true;
-                s.to_string().to_titlecase_tr_or_az_lower_rest()
+                word.to_titlecase_tr_or_az_lower_rest()
             } else {
-                match is_reserved(s.to_string()) {
-                    true => s.to_string().to_lowercase_tr_az(),
-                    false => s.to_titlecase_tr_or_az_lower_rest(),
+                match is_reserved(word) {
+                    true => word.word.to_lowercase_tr_az(),
+                    false => word.word.to_titlecase_tr_or_az_lower_rest(),
                 }
             }
         }
     });
-    chunk.to_string()
+    chunk.into()
 }
 
-fn is_reserved(word: String) -> bool {
-    let baglac = Regex::new(
-        r"^([Vv][Ee]|[İi][Ll][Ee]|[Yy][Aa]|[Vv][Ee]|[Yy][Aa][Hh][Uu][Tt]|[Kk][İi]|[Dd][AaEe])$",
-    )
-    .unwrap();
+fn is_reserved(word: &Word) -> bool {
+    let word = word.to_string();
+    let word = word.as_ref();
+    let baglac =
+        Regex::new(r"^([Vv][Ee]|[İi][Ll][Ee]|[Yy][Aa]|[Yy][Aa][Hh][Uu][Tt]|[Kk][İi]|[Dd][AaEe])$")
+            .unwrap();
     let soruek = Regex::new(r"^([Mm][İiIıUuÜü])([Dd][İiIıUuÜü][Rr]([Ll][AaEe][Rr])?|[Ss][İiIıUuÜü][Nn]|[Yy][İiIıUuÜü][Zz]|[Ss][İiIıUuÜü][Nn][İiIıUuÜü][Zz]|[Ll][AaEe][Rr])?$").unwrap();
-    let word = word.as_str();
     baglac.is_match(word) || soruek.is_match(word)
 }
 
 pub fn lowercase(chunk: Chunk) -> String {
     let mut chunk = chunk.clone();
     chunk.segments.iter_mut().for_each(|segment| {
-        if let Segment::Word(s) = segment {
-            *s = s.to_string().to_lowercase_tr_az()
+        if let Segment::Word(word) = segment {
+            word.word = word.word.to_lowercase_tr_az()
         }
     });
-    chunk.to_string()
+    chunk.into()
 }
 
 pub fn uppercase(chunk: Chunk) -> String {
     let mut chunk = chunk.clone();
     chunk.segments.iter_mut().for_each(|segment| {
-        if let Segment::Word(s) = segment {
-            *s = s.to_string().to_uppercase_tr_az()
+        if let Segment::Word(word) = segment {
+            word.word = word.word.to_uppercase_tr_az()
         }
     });
-    chunk.to_string()
+    chunk.into()
 }
 
 pub fn sentencecase(chunk: Chunk) -> String {
     let mut chunk = chunk.clone();
     let mut done_first = false;
     chunk.segments.iter_mut().for_each(|segment| {
-        if let Segment::Word(s) = segment {
-            *s = if !done_first {
+        if let Segment::Word(word) = segment {
+            word.word = if !done_first {
                 done_first = true;
-                s.to_string().to_titlecase_tr_or_az_lower_rest()
+                word.word.to_titlecase_tr_or_az_lower_rest()
             } else {
-                s.to_string().to_lowercase_tr_az()
+                word.word.to_lowercase_tr_az()
             }
         }
     });
-    chunk.to_string()
+    chunk.into()
 }
diff --git a/src/types.rs b/src/types.rs
@@ -84,8 +84,8 @@ impl FromStr for Locale {
     type Err = Error;
     fn from_str(s: &str) -> Result<Self> {
         match s.to_ascii_lowercase().as_str() {
-            "en" | "English" | "en_en" => Ok(Locale::EN),
-            "tr" | "Turkish" | "tr_tr" | "türkçe" => Ok(Locale::TR),
+            "en" | "english" | "en_en" => Ok(Locale::EN),
+            "tr" | "turkish" | "tr_tr" | "türkçe" => Ok(Locale::TR),
             input => LocaleSnafu { input }.fail()?,
         }
     }

diff --git a/tests/lib.rs b/tests/lib.rs
@@ -65,6 +65,24 @@ case!(
     "a b c"
 );
 
+case!(
+    trivia_en,
+    Case::Title,
+    Locale::EN,
+    StyleGuide::LanguageDefault,
+    "  foo  bar  ",
+    "  Foo  Bar  "
+);
+
+case!(
+    trivia_tr,
+    Case::Title,
+    Locale::TR,
+    StyleGuide::LanguageDefault,
+    "  foo  bar  ",
+    "  Foo  Bar  "
+);
+
 macro_rules! titlecase {
     ($name:ident, $locale:expr, $style:expr, $input:expr, $expected:expr) => {
         #[test]