From e6c0630df265486d4f4fe6d2ea3fe7ebd0638cfc Mon Sep 17 00:00:00 2001 From: Caleb Maclennan Date: Sun, 27 Oct 2024 15:29:15 +0300 Subject: [PATCH 1/8] fix(crate): Accept input languages as string in any case --- src/types.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/types.rs b/src/types.rs index 1d8454f..42d1e8d 100644 --- a/src/types.rs +++ b/src/types.rs @@ -84,8 +84,8 @@ impl FromStr for Locale { type Err = Error; fn from_str(s: &str) -> Result { match s.to_ascii_lowercase().as_str() { - "en" | "English" | "en_en" => Ok(Locale::EN), - "tr" | "Turkish" | "tr_tr" | "türkçe" => Ok(Locale::TR), + "en" | "english" | "en_en" => Ok(Locale::EN), + "tr" | "turkish" | "tr_tr" | "türkçe" => Ok(Locale::TR), input => LocaleSnafu { input }.fail()?, } } From 52d9fd9642633d3bdfcd353d833543f3e58810a8 Mon Sep 17 00:00:00 2001 From: Caleb Maclennan Date: Sun, 27 Oct 2024 16:29:50 +0300 Subject: [PATCH 2/8] refactor(crate): Cleanup EN & TR modules with less type shenanigans --- src/en.rs | 22 +++++++++++----------- src/tr.rs | 17 ++++++++--------- 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/src/en.rs b/src/en.rs index 3ee13ed..711bc4f 100644 --- a/src/en.rs +++ b/src/en.rs @@ -31,16 +31,16 @@ fn titlecase_cmos(chunk: Chunk) -> String { if let Segment::Word(s) = segment { *s = if !done_first { done_first = true; - s.to_string().to_titlecase_lower_rest() + s.to_titlecase_lower_rest() } else if segments.peek().is_none() { - // TODO: I think a bug is hiding here since peek() might give use a separator + // TODO: I think a bug is hiding here since peek() might give us a separator // that happens to be a trailing trivia. We need a custom iterator or peeker // that knows how to answer about first/last *word* segments. - s.to_string().to_titlecase_lower_rest() + s.to_titlecase_lower_rest() } else { - match is_reserved(s.to_string()) { - true => s.to_string().to_lowercase(), - false => s.to_string().to_titlecase_lower_rest(), + match is_reserved(s) { + true => s.to_lowercase(), + false => s.to_titlecase_lower_rest(), } } } @@ -65,7 +65,7 @@ fn titlecase_gruber(chunk: Chunk) -> String { format!("{}{}{}", leading_trivia, titilized, trailing_trivia) } -fn is_reserved(word: String) -> bool { +fn is_reserved(word: &str) -> bool { let word = word.to_lowercase(); let word = word.as_str(); let article = Regex::new(r"^(a|an|the)$").unwrap(); @@ -78,7 +78,7 @@ pub fn lowercase(chunk: Chunk) -> String { let mut chunk = chunk.clone(); chunk.segments.iter_mut().for_each(|segment| { if let Segment::Word(s) = segment { - *s = s.to_string().to_lowercase() + *s = s.to_lowercase() } }); chunk.to_string() @@ -88,7 +88,7 @@ pub fn uppercase(chunk: Chunk) -> String { let mut chunk = chunk.clone(); chunk.segments.iter_mut().for_each(|segment| { if let Segment::Word(s) = segment { - *s = s.to_string().to_uppercase() + *s = s.to_uppercase() } }); chunk.to_string() @@ -101,9 +101,9 @@ pub fn sentencecase(chunk: Chunk) -> String { if let Segment::Word(s) = segment { *s = if !done_first { done_first = true; - s.to_string().to_titlecase_lower_rest() + s.to_titlecase_lower_rest() } else { - s.to_string().to_lowercase() + s.to_lowercase() } } }); diff --git a/src/tr.rs b/src/tr.rs index 2b6d72e..6a934ff 100644 --- a/src/tr.rs +++ b/src/tr.rs @@ -23,10 +23,10 @@ fn titlecase_tdk(chunk: Chunk) -> String { if let Segment::Word(s) = segment { *s = if !done_first { done_first = true; - s.to_string().to_titlecase_tr_or_az_lower_rest() + s.to_titlecase_tr_or_az_lower_rest() } else { - match is_reserved(s.to_string()) { - true => s.to_string().to_lowercase_tr_az(), + match is_reserved(s) { + true => s.to_lowercase_tr_az(), false => s.to_titlecase_tr_or_az_lower_rest(), } } @@ -35,13 +35,12 @@ fn titlecase_tdk(chunk: Chunk) -> String { chunk.to_string() } -fn is_reserved(word: String) -> bool { +fn is_reserved(word: &str) -> bool { let baglac = Regex::new( r"^([Vv][Ee]|[İi][Ll][Ee]|[Yy][Aa]|[Vv][Ee]|[Yy][Aa][Hh][Uu][Tt]|[Kk][İi]|[Dd][AaEe])$", ) .unwrap(); let soruek = Regex::new(r"^([Mm][İiIıUuÜü])([Dd][İiIıUuÜü][Rr]([Ll][AaEe][Rr])?|[Ss][İiIıUuÜü][Nn]|[Yy][İiIıUuÜü][Zz]|[Ss][İiIıUuÜü][Nn][İiIıUuÜü][Zz]|[Ll][AaEe][Rr])?$").unwrap(); - let word = word.as_str(); baglac.is_match(word) || soruek.is_match(word) } @@ -49,7 +48,7 @@ pub fn lowercase(chunk: Chunk) -> String { let mut chunk = chunk.clone(); chunk.segments.iter_mut().for_each(|segment| { if let Segment::Word(s) = segment { - *s = s.to_string().to_lowercase_tr_az() + *s = s.to_lowercase_tr_az() } }); chunk.to_string() @@ -59,7 +58,7 @@ pub fn uppercase(chunk: Chunk) -> String { let mut chunk = chunk.clone(); chunk.segments.iter_mut().for_each(|segment| { if let Segment::Word(s) = segment { - *s = s.to_string().to_uppercase_tr_az() + *s = s.to_uppercase_tr_az() } }); chunk.to_string() @@ -72,9 +71,9 @@ pub fn sentencecase(chunk: Chunk) -> String { if let Segment::Word(s) = segment { *s = if !done_first { done_first = true; - s.to_string().to_titlecase_tr_or_az_lower_rest() + s.to_titlecase_tr_or_az_lower_rest() } else { - s.to_string().to_lowercase_tr_az() + s.to_lowercase_tr_az() } } }); From cfc5239348c9abf345bbfeb68f92fefd235875a1 Mon Sep 17 00:00:00 2001 From: Caleb Maclennan Date: Sun, 27 Oct 2024 16:59:29 +0300 Subject: [PATCH 3/8] refactor(crate): Introduce Word scruct to handle more info about words --- src/content.rs | 39 +++++++++++++++++++++++++++------------ src/en.rs | 30 +++++++++++++++--------------- src/tr.rs | 28 ++++++++++++++-------------- 3 files changed, 56 insertions(+), 41 deletions(-) diff --git a/src/content.rs b/src/content.rs index 5e8c405..d7ee65f 100644 --- a/src/content.rs +++ b/src/content.rs @@ -6,17 +6,23 @@ use std::{borrow::Cow, fmt, fmt::Display, str::FromStr}; use snafu::prelude::*; +#[derive(Clone, Debug)] +#[non_exhaustive] +pub struct Chunk { + pub segments: Vec, +} + #[derive(Clone, Debug, PartialEq)] #[non_exhaustive] pub enum Segment { Separator(String), - Word(String), + Word(Word), } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq)] #[non_exhaustive] -pub struct Chunk { - pub segments: Vec, +pub struct Word { + pub word: String, } #[derive(Snafu)] @@ -42,7 +48,9 @@ fn split_chunk(s: &str) -> Chunk { if let Some(m) = capture.name("separator") { segments.push(Segment::Separator(m.as_str().to_string())); } else if let Some(m) = capture.name("word") { - segments.push(Segment::Word(m.as_str().to_string())); + segments.push(Segment::Word(Word { + word: m.as_str().to_owned(), + })); } } Chunk { segments } @@ -79,21 +87,28 @@ impl FromStr for Chunk { } } +impl Display for Chunk { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + for segment in &self.segments { + fmt.write_str(segment.to_string().as_ref())?; + } + Ok(()) + } +} + impl Display for Segment { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - let _ = match self { - Segment::Separator(string) => fmt.write_str(string), - Segment::Word(string) => fmt.write_str(string), + match self { + Segment::Separator(string) => fmt.write_str(string)?, + Segment::Word(word) => fmt.write_str(word.to_string().as_ref())?, }; Ok(()) } } -impl Display for Chunk { +impl Display for Word { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - for segment in &self.segments { - fmt.write_str(segment.to_string().as_ref())?; - } + fmt.write_str(self.word.as_ref())?; Ok(()) } } diff --git a/src/en.rs b/src/en.rs index 711bc4f..3b4284d 100644 --- a/src/en.rs +++ b/src/en.rs @@ -28,19 +28,19 @@ fn titlecase_cmos(chunk: Chunk) -> String { let mut chunk = chunk.clone(); let mut segments = chunk.segments.iter_mut().peekable(); while let Some(segment) = segments.next() { - if let Segment::Word(s) = segment { - *s = if !done_first { + if let Segment::Word(word) = segment { + word.word = if !done_first { done_first = true; - s.to_titlecase_lower_rest() + word.word.to_titlecase_lower_rest() } else if segments.peek().is_none() { // TODO: I think a bug is hiding here since peek() might give us a separator // that happens to be a trailing trivia. We need a custom iterator or peeker // that knows how to answer about first/last *word* segments. - s.to_titlecase_lower_rest() + word.word.to_titlecase_lower_rest() } else { - match is_reserved(s) { - true => s.to_lowercase(), - false => s.to_titlecase_lower_rest(), + match is_reserved(word.word.as_ref()) { + true => word.word.to_lowercase(), + false => word.word.to_titlecase_lower_rest(), } } } @@ -77,8 +77,8 @@ fn is_reserved(word: &str) -> bool { pub fn lowercase(chunk: Chunk) -> String { let mut chunk = chunk.clone(); chunk.segments.iter_mut().for_each(|segment| { - if let Segment::Word(s) = segment { - *s = s.to_lowercase() + if let Segment::Word(word) = segment { + word.word = word.word.to_lowercase() } }); chunk.to_string() @@ -87,8 +87,8 @@ pub fn lowercase(chunk: Chunk) -> String { pub fn uppercase(chunk: Chunk) -> String { let mut chunk = chunk.clone(); chunk.segments.iter_mut().for_each(|segment| { - if let Segment::Word(s) = segment { - *s = s.to_uppercase() + if let Segment::Word(word) = segment { + word.word = word.word.to_uppercase() } }); chunk.to_string() @@ -98,12 +98,12 @@ pub fn sentencecase(chunk: Chunk) -> String { let mut chunk = chunk.clone(); let mut done_first = false; chunk.segments.iter_mut().for_each(|segment| { - if let Segment::Word(s) = segment { - *s = if !done_first { + if let Segment::Word(word) = segment { + word.word = if !done_first { done_first = true; - s.to_titlecase_lower_rest() + word.word.to_titlecase_lower_rest() } else { - s.to_lowercase() + word.word.to_lowercase() } } }); diff --git a/src/tr.rs b/src/tr.rs index 6a934ff..2c312f1 100644 --- a/src/tr.rs +++ b/src/tr.rs @@ -20,14 +20,14 @@ fn titlecase_tdk(chunk: Chunk) -> String { let mut chunk = chunk.clone(); let mut done_first = false; chunk.segments.iter_mut().for_each(|segment| { - if let Segment::Word(s) = segment { - *s = if !done_first { + if let Segment::Word(word) = segment { + word.word = if !done_first { done_first = true; - s.to_titlecase_tr_or_az_lower_rest() + word.word.to_titlecase_tr_or_az_lower_rest() } else { - match is_reserved(s) { - true => s.to_lowercase_tr_az(), - false => s.to_titlecase_tr_or_az_lower_rest(), + match is_reserved(word.word.as_ref()) { + true => word.word.to_lowercase_tr_az(), + false => word.word.to_titlecase_tr_or_az_lower_rest(), } } } @@ -47,8 +47,8 @@ fn is_reserved(word: &str) -> bool { pub fn lowercase(chunk: Chunk) -> String { let mut chunk = chunk.clone(); chunk.segments.iter_mut().for_each(|segment| { - if let Segment::Word(s) = segment { - *s = s.to_lowercase_tr_az() + if let Segment::Word(word) = segment { + word.word = word.word.to_lowercase_tr_az() } }); chunk.to_string() @@ -57,8 +57,8 @@ pub fn lowercase(chunk: Chunk) -> String { pub fn uppercase(chunk: Chunk) -> String { let mut chunk = chunk.clone(); chunk.segments.iter_mut().for_each(|segment| { - if let Segment::Word(s) = segment { - *s = s.to_uppercase_tr_az() + if let Segment::Word(word) = segment { + word.word = word.word.to_uppercase_tr_az() } }); chunk.to_string() @@ -68,12 +68,12 @@ pub fn sentencecase(chunk: Chunk) -> String { let mut chunk = chunk.clone(); let mut done_first = false; chunk.segments.iter_mut().for_each(|segment| { - if let Segment::Word(s) = segment { - *s = if !done_first { + if let Segment::Word(word) = segment { + word.word = if !done_first { done_first = true; - s.to_titlecase_tr_or_az_lower_rest() + word.word.to_titlecase_tr_or_az_lower_rest() } else { - s.to_lowercase_tr_az() + word.word.to_lowercase_tr_az() } } }); From 7c35fff4131cb05fe0c107476da2da62a5bc6240 Mon Sep 17 00:00:00 2001 From: Caleb Maclennan Date: Sun, 27 Oct 2024 15:48:34 +0300 Subject: [PATCH 4/8] feat(crate): Impl From for String --- src/content.rs | 10 ++++++++++ src/en.rs | 12 ++++++------ src/tr.rs | 8 ++++---- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/src/content.rs b/src/content.rs index d7ee65f..9dd0eda 100644 --- a/src/content.rs +++ b/src/content.rs @@ -87,6 +87,16 @@ impl FromStr for Chunk { } } +impl From for String { + fn from(c: Chunk) -> Self { + let mut s = String::new(); + for segment in c.segments { + s.push_str(segment.to_string().as_ref()); + } + s + } +} + impl Display for Chunk { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { for segment in &self.segments { diff --git a/src/en.rs b/src/en.rs index 3b4284d..c96fe2e 100644 --- a/src/en.rs +++ b/src/en.rs @@ -20,7 +20,7 @@ pub fn titlecase(chunk: Chunk, style: StyleGuide) -> String { fn titlecase_ap(chunk: Chunk) -> String { eprintln!("AP style guide not implemented, string returned as-is!"); - chunk.to_string() + chunk.into() } fn titlecase_cmos(chunk: Chunk) -> String { @@ -45,7 +45,7 @@ fn titlecase_cmos(chunk: Chunk) -> String { } } } - chunk.to_string() + chunk.into() } fn titlecase_gruber(chunk: Chunk) -> String { @@ -61,7 +61,7 @@ fn titlecase_gruber(chunk: Chunk) -> String { } else { String::from("") }; - let titilized = gruber_titlecase(&chunk.to_string()); + let titilized = gruber_titlecase(chunk.to_string().as_ref()); format!("{}{}{}", leading_trivia, titilized, trailing_trivia) } @@ -81,7 +81,7 @@ pub fn lowercase(chunk: Chunk) -> String { word.word = word.word.to_lowercase() } }); - chunk.to_string() + chunk.into() } pub fn uppercase(chunk: Chunk) -> String { @@ -91,7 +91,7 @@ pub fn uppercase(chunk: Chunk) -> String { word.word = word.word.to_uppercase() } }); - chunk.to_string() + chunk.into() } pub fn sentencecase(chunk: Chunk) -> String { @@ -107,5 +107,5 @@ pub fn sentencecase(chunk: Chunk) -> String { } } }); - chunk.to_string() + chunk.into() } diff --git a/src/tr.rs b/src/tr.rs index 2c312f1..7497638 100644 --- a/src/tr.rs +++ b/src/tr.rs @@ -32,7 +32,7 @@ fn titlecase_tdk(chunk: Chunk) -> String { } } }); - chunk.to_string() + chunk.into() } fn is_reserved(word: &str) -> bool { @@ -51,7 +51,7 @@ pub fn lowercase(chunk: Chunk) -> String { word.word = word.word.to_lowercase_tr_az() } }); - chunk.to_string() + chunk.into() } pub fn uppercase(chunk: Chunk) -> String { @@ -61,7 +61,7 @@ pub fn uppercase(chunk: Chunk) -> String { word.word = word.word.to_uppercase_tr_az() } }); - chunk.to_string() + chunk.into() } pub fn sentencecase(chunk: Chunk) -> String { @@ -77,5 +77,5 @@ pub fn sentencecase(chunk: Chunk) -> String { } } }); - chunk.to_string() + chunk.into() } From 238b9bcb34c7629c1483aca46f17e4dba71f9145 Mon Sep 17 00:00:00 2001 From: Caleb Maclennan Date: Mon, 28 Oct 2024 12:44:44 +0300 Subject: [PATCH 5/8] chore(crate): Implement more traits for Word to make handling more ergonomic --- src/content.rs | 37 +++++++++++++++++++++++++++++++++++++ src/en.rs | 22 +++++++++++----------- src/tr.rs | 10 ++++++---- 3 files changed, 54 insertions(+), 15 deletions(-) diff --git a/src/content.rs b/src/content.rs index 9dd0eda..b1831fa 100644 --- a/src/content.rs +++ b/src/content.rs @@ -3,6 +3,7 @@ use regex::Regex; use std::{borrow::Cow, fmt, fmt::Display, str::FromStr}; +use unicode_titlecase::StrTitleCase; use snafu::prelude::*; @@ -116,6 +117,42 @@ impl Display for Segment { } } +impl Word { + pub fn to_lowercase(&self) -> String { + self.word.to_lowercase() + } + pub fn to_uppercase(&self) -> String { + self.word.to_uppercase() + } +} + +impl From for Word { + fn from(word: String) -> Self { + Self { word } + } +} + +impl StrTitleCase for Word { + fn to_titlecase(&self) -> String { + self.word.to_titlecase() + } + fn to_titlecase_lower_rest(&self) -> String { + self.word.to_titlecase_lower_rest() + } + fn to_titlecase_tr_or_az(&self) -> String { + self.word.to_titlecase_tr_or_az() + } + fn to_titlecase_tr_or_az_lower_rest(&self) -> String { + self.word.to_titlecase_tr_or_az_lower_rest() + } + fn starts_titlecase(&self) -> bool { + self.word.starts_titlecase() + } + fn starts_titlecase_rest_lower(&self) -> bool { + self.word.starts_titlecase_rest_lower() + } +} + impl Display for Word { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { fmt.write_str(self.word.as_ref())?; diff --git a/src/en.rs b/src/en.rs index c96fe2e..83e4ccd 100644 --- a/src/en.rs +++ b/src/en.rs @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: © 2023 Caleb Maclennan // SPDX-License-Identifier: LGPL-3.0-only -use crate::content::{Chunk, Segment}; +use crate::content::{Chunk, Segment, Word}; use crate::types::StyleGuide; use regex::Regex; @@ -31,16 +31,16 @@ fn titlecase_cmos(chunk: Chunk) -> String { if let Segment::Word(word) = segment { word.word = if !done_first { done_first = true; - word.word.to_titlecase_lower_rest() + word.to_titlecase_lower_rest() } else if segments.peek().is_none() { // TODO: I think a bug is hiding here since peek() might give us a separator // that happens to be a trailing trivia. We need a custom iterator or peeker // that knows how to answer about first/last *word* segments. - word.word.to_titlecase_lower_rest() + word.to_titlecase_lower_rest() } else { - match is_reserved(word.word.as_ref()) { - true => word.word.to_lowercase(), - false => word.word.to_titlecase_lower_rest(), + match is_reserved(word) { + true => word.to_lowercase(), + false => word.to_titlecase_lower_rest(), } } } @@ -65,7 +65,7 @@ fn titlecase_gruber(chunk: Chunk) -> String { format!("{}{}{}", leading_trivia, titilized, trailing_trivia) } -fn is_reserved(word: &str) -> bool { +fn is_reserved(word: &Word) -> bool { let word = word.to_lowercase(); let word = word.as_str(); let article = Regex::new(r"^(a|an|the)$").unwrap(); @@ -78,7 +78,7 @@ pub fn lowercase(chunk: Chunk) -> String { let mut chunk = chunk.clone(); chunk.segments.iter_mut().for_each(|segment| { if let Segment::Word(word) = segment { - word.word = word.word.to_lowercase() + word.word = word.to_lowercase() } }); chunk.into() @@ -88,7 +88,7 @@ pub fn uppercase(chunk: Chunk) -> String { let mut chunk = chunk.clone(); chunk.segments.iter_mut().for_each(|segment| { if let Segment::Word(word) = segment { - word.word = word.word.to_uppercase() + word.word = word.to_uppercase() } }); chunk.into() @@ -101,9 +101,9 @@ pub fn sentencecase(chunk: Chunk) -> String { if let Segment::Word(word) = segment { word.word = if !done_first { done_first = true; - word.word.to_titlecase_lower_rest() + word.to_titlecase_lower_rest() } else { - word.word.to_lowercase() + word.to_lowercase() } } }); diff --git a/src/tr.rs b/src/tr.rs index 7497638..4cab6b3 100644 --- a/src/tr.rs +++ b/src/tr.rs @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: © 2023 Caleb Maclennan // SPDX-License-Identifier: LGPL-3.0-only -use crate::content::{Chunk, Segment}; +use crate::content::{Chunk, Segment, Word}; use crate::types::StyleGuide; use regex::Regex; @@ -23,9 +23,9 @@ fn titlecase_tdk(chunk: Chunk) -> String { if let Segment::Word(word) = segment { word.word = if !done_first { done_first = true; - word.word.to_titlecase_tr_or_az_lower_rest() + word.to_titlecase_tr_or_az_lower_rest() } else { - match is_reserved(word.word.as_ref()) { + match is_reserved(word) { true => word.word.to_lowercase_tr_az(), false => word.word.to_titlecase_tr_or_az_lower_rest(), } @@ -35,7 +35,9 @@ fn titlecase_tdk(chunk: Chunk) -> String { chunk.into() } -fn is_reserved(word: &str) -> bool { +fn is_reserved(word: &Word) -> bool { + let word = word.to_string(); + let word = word.as_ref(); let baglac = Regex::new( r"^([Vv][Ee]|[İi][Ll][Ee]|[Yy][Aa]|[Vv][Ee]|[Yy][Aa][Hh][Uu][Tt]|[Kk][İi]|[Dd][AaEe])$", ) From fe4e4885ca25832b6135e93bd22de8421abdd36b Mon Sep 17 00:00:00 2001 From: Caleb Maclennan Date: Mon, 28 Oct 2024 15:23:23 +0300 Subject: [PATCH 6/8] chore(crate): Remove duplicate pattern fragment from regex --- src/tr.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/tr.rs b/src/tr.rs index 4cab6b3..619d321 100644 --- a/src/tr.rs +++ b/src/tr.rs @@ -38,10 +38,9 @@ fn titlecase_tdk(chunk: Chunk) -> String { fn is_reserved(word: &Word) -> bool { let word = word.to_string(); let word = word.as_ref(); - let baglac = Regex::new( - r"^([Vv][Ee]|[İi][Ll][Ee]|[Yy][Aa]|[Vv][Ee]|[Yy][Aa][Hh][Uu][Tt]|[Kk][İi]|[Dd][AaEe])$", - ) - .unwrap(); + let baglac = + Regex::new(r"^([Vv][Ee]|[İi][Ll][Ee]|[Yy][Aa]|[Yy][Aa][Hh][Uu][Tt]|[Kk][İi]|[Dd][AaEe])$") + .unwrap(); let soruek = Regex::new(r"^([Mm][İiIıUuÜü])([Dd][İiIıUuÜü][Rr]([Ll][AaEe][Rr])?|[Ss][İiIıUuÜü][Nn]|[Yy][İiIıUuÜü][Zz]|[Ss][İiIıUuÜü][Nn][İiIıUuÜü][Zz]|[Ll][AaEe][Rr])?$").unwrap(); baglac.is_match(word) || soruek.is_match(word) } From d7440dc1cf8735dd6b2ed08d43a1da8fa2c96bc7 Mon Sep 17 00:00:00 2001 From: Caleb Maclennan Date: Mon, 28 Oct 2024 16:01:02 +0300 Subject: [PATCH 7/8] refactor(crate): Redo iteration to avoid trailing trivia gotcha --- src/en.rs | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/src/en.rs b/src/en.rs index 83e4ccd..4f2850d 100644 --- a/src/en.rs +++ b/src/en.rs @@ -24,26 +24,26 @@ fn titlecase_ap(chunk: Chunk) -> String { } fn titlecase_cmos(chunk: Chunk) -> String { - let mut done_first = false; let mut chunk = chunk.clone(); - let mut segments = chunk.segments.iter_mut().peekable(); - while let Some(segment) = segments.next() { - if let Segment::Word(word) = segment { - word.word = if !done_first { - done_first = true; - word.to_titlecase_lower_rest() - } else if segments.peek().is_none() { - // TODO: I think a bug is hiding here since peek() might give us a separator - // that happens to be a trailing trivia. We need a custom iterator or peeker - // that knows how to answer about first/last *word* segments. - word.to_titlecase_lower_rest() - } else { - match is_reserved(word) { - true => word.to_lowercase(), - false => word.to_titlecase_lower_rest(), - } - } - } + let mut words = chunk + .segments + .iter_mut() + .filter_map(|segment| match segment { + Segment::Word(word) => Some(word), + _ => None, + }) + .peekable(); + if let Some(word) = words.next() { + word.word = word.to_titlecase_lower_rest(); + } + while let Some(word) = words.next() { + word.word = match words.peek().is_none() { + true => word.to_titlecase_lower_rest(), + false => match is_reserved(word) { + true => word.to_lowercase(), + false => word.to_titlecase_lower_rest(), + }, + }; } chunk.into() } From dd354f45158d4346c769d8d1d1f63f1c2e8199e6 Mon Sep 17 00:00:00 2001 From: Caleb Maclennan Date: Mon, 28 Oct 2024 18:32:10 +0300 Subject: [PATCH 8/8] test(crate): Add tests for leading and trailing trivia --- tests/lib.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/lib.rs b/tests/lib.rs index f84ea75..63f5dd7 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -65,6 +65,24 @@ case!( "a b c" ); +case!( + trivia_en, + Case::Title, + Locale::EN, + StyleGuide::LanguageDefault, + " foo bar ", + " Foo Bar " +); + +case!( + trivia_tr, + Case::Title, + Locale::TR, + StyleGuide::LanguageDefault, + " foo bar ", + " Foo Bar " +); + macro_rules! titlecase { ($name:ident, $locale:expr, $style:expr, $input:expr, $expected:expr) => { #[test]