Skip to content

Commit

Permalink
Merge branch 'overhaul-ergonomics'
Browse files Browse the repository at this point in the history
  • Loading branch information
alerque committed Oct 29, 2024
2 parents 94c60c5 + dd354f4 commit ef75905
Show file tree
Hide file tree
Showing 5 changed files with 154 additions and 74 deletions.
86 changes: 74 additions & 12 deletions src/content.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,27 @@

use regex::Regex;
use std::{borrow::Cow, fmt, fmt::Display, str::FromStr};
use unicode_titlecase::StrTitleCase;

use snafu::prelude::*;

#[derive(Clone, Debug)]
#[non_exhaustive]
pub struct Chunk {
pub segments: Vec<Segment>,
}

#[derive(Clone, Debug, PartialEq)]
#[non_exhaustive]
pub enum Segment {
Separator(String),
Word(String),
Word(Word),
}

#[derive(Clone, Debug)]
#[derive(Clone, Debug, PartialEq)]
#[non_exhaustive]
pub struct Chunk {
pub segments: Vec<Segment>,
pub struct Word {
pub word: String,
}

#[derive(Snafu)]
Expand All @@ -42,7 +49,9 @@ fn split_chunk(s: &str) -> Chunk {
if let Some(m) = capture.name("separator") {
segments.push(Segment::Separator(m.as_str().to_string()));
} else if let Some(m) = capture.name("word") {
segments.push(Segment::Word(m.as_str().to_string()));
segments.push(Segment::Word(Word {
word: m.as_str().to_owned(),
}));
}
}
Chunk { segments }
Expand Down Expand Up @@ -79,13 +88,13 @@ impl FromStr for Chunk {
}
}

impl Display for Segment {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
let _ = match self {
Segment::Separator(string) => fmt.write_str(string),
Segment::Word(string) => fmt.write_str(string),
};
Ok(())
impl From<Chunk> for String {
fn from(c: Chunk) -> Self {
let mut s = String::new();
for segment in c.segments {
s.push_str(segment.to_string().as_ref());
}
s
}
}

Expand All @@ -97,3 +106,56 @@ impl Display for Chunk {
Ok(())
}
}

impl Display for Segment {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
match self {
Segment::Separator(string) => fmt.write_str(string)?,
Segment::Word(word) => fmt.write_str(word.to_string().as_ref())?,
};
Ok(())
}
}

impl Word {
pub fn to_lowercase(&self) -> String {
self.word.to_lowercase()
}
pub fn to_uppercase(&self) -> String {
self.word.to_uppercase()
}
}

impl From<String> for Word {
fn from(word: String) -> Self {
Self { word }
}
}

impl StrTitleCase for Word {
fn to_titlecase(&self) -> String {
self.word.to_titlecase()
}
fn to_titlecase_lower_rest(&self) -> String {
self.word.to_titlecase_lower_rest()
}
fn to_titlecase_tr_or_az(&self) -> String {
self.word.to_titlecase_tr_or_az()
}
fn to_titlecase_tr_or_az_lower_rest(&self) -> String {
self.word.to_titlecase_tr_or_az_lower_rest()
}
fn starts_titlecase(&self) -> bool {
self.word.starts_titlecase()
}
fn starts_titlecase_rest_lower(&self) -> bool {
self.word.starts_titlecase_rest_lower()
}
}

impl Display for Word {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
fmt.write_str(self.word.as_ref())?;
Ok(())
}
}
70 changes: 35 additions & 35 deletions src/en.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// SPDX-FileCopyrightText: © 2023 Caleb Maclennan <[email protected]>
// SPDX-License-Identifier: LGPL-3.0-only

use crate::content::{Chunk, Segment};
use crate::content::{Chunk, Segment, Word};
use crate::types::StyleGuide;

use regex::Regex;
Expand All @@ -20,32 +20,32 @@ pub fn titlecase(chunk: Chunk, style: StyleGuide) -> String {

fn titlecase_ap(chunk: Chunk) -> String {
eprintln!("AP style guide not implemented, string returned as-is!");
chunk.to_string()
chunk.into()
}

fn titlecase_cmos(chunk: Chunk) -> String {
let mut done_first = false;
let mut chunk = chunk.clone();
let mut segments = chunk.segments.iter_mut().peekable();
while let Some(segment) = segments.next() {
if let Segment::Word(s) = segment {
*s = if !done_first {
done_first = true;
s.to_string().to_titlecase_lower_rest()
} else if segments.peek().is_none() {
// TODO: I think a bug is hiding here since peek() might give use a separator
// that happens to be a trailing trivia. We need a custom iterator or peeker
// that knows how to answer about first/last *word* segments.
s.to_string().to_titlecase_lower_rest()
} else {
match is_reserved(s.to_string()) {
true => s.to_string().to_lowercase(),
false => s.to_string().to_titlecase_lower_rest(),
}
}
}
let mut words = chunk
.segments
.iter_mut()
.filter_map(|segment| match segment {
Segment::Word(word) => Some(word),
_ => None,
})
.peekable();
if let Some(word) = words.next() {
word.word = word.to_titlecase_lower_rest();
}
while let Some(word) = words.next() {
word.word = match words.peek().is_none() {
true => word.to_titlecase_lower_rest(),
false => match is_reserved(word) {
true => word.to_lowercase(),
false => word.to_titlecase_lower_rest(),
},
};
}
chunk.to_string()
chunk.into()
}

fn titlecase_gruber(chunk: Chunk) -> String {
Expand All @@ -61,11 +61,11 @@ fn titlecase_gruber(chunk: Chunk) -> String {
} else {
String::from("")
};
let titilized = gruber_titlecase(&chunk.to_string());
let titilized = gruber_titlecase(chunk.to_string().as_ref());
format!("{}{}{}", leading_trivia, titilized, trailing_trivia)
}

fn is_reserved(word: String) -> bool {
fn is_reserved(word: &Word) -> bool {
let word = word.to_lowercase();
let word = word.as_str();
let article = Regex::new(r"^(a|an|the)$").unwrap();
Expand All @@ -77,35 +77,35 @@ fn is_reserved(word: String) -> bool {
pub fn lowercase(chunk: Chunk) -> String {
let mut chunk = chunk.clone();
chunk.segments.iter_mut().for_each(|segment| {
if let Segment::Word(s) = segment {
*s = s.to_string().to_lowercase()
if let Segment::Word(word) = segment {
word.word = word.to_lowercase()
}
});
chunk.to_string()
chunk.into()
}

pub fn uppercase(chunk: Chunk) -> String {
let mut chunk = chunk.clone();
chunk.segments.iter_mut().for_each(|segment| {
if let Segment::Word(s) = segment {
*s = s.to_string().to_uppercase()
if let Segment::Word(word) = segment {
word.word = word.to_uppercase()
}
});
chunk.to_string()
chunk.into()
}

pub fn sentencecase(chunk: Chunk) -> String {
let mut chunk = chunk.clone();
let mut done_first = false;
chunk.segments.iter_mut().for_each(|segment| {
if let Segment::Word(s) = segment {
*s = if !done_first {
if let Segment::Word(word) = segment {
word.word = if !done_first {
done_first = true;
s.to_string().to_titlecase_lower_rest()
word.to_titlecase_lower_rest()
} else {
s.to_string().to_lowercase()
word.to_lowercase()
}
}
});
chunk.to_string()
chunk.into()
}
50 changes: 25 additions & 25 deletions src/tr.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// SPDX-FileCopyrightText: © 2023 Caleb Maclennan <[email protected]>
// SPDX-License-Identifier: LGPL-3.0-only

use crate::content::{Chunk, Segment};
use crate::content::{Chunk, Segment, Word};
use crate::types::StyleGuide;

use regex::Regex;
Expand All @@ -20,63 +20,63 @@ fn titlecase_tdk(chunk: Chunk) -> String {
let mut chunk = chunk.clone();
let mut done_first = false;
chunk.segments.iter_mut().for_each(|segment| {
if let Segment::Word(s) = segment {
*s = if !done_first {
if let Segment::Word(word) = segment {
word.word = if !done_first {
done_first = true;
s.to_string().to_titlecase_tr_or_az_lower_rest()
word.to_titlecase_tr_or_az_lower_rest()
} else {
match is_reserved(s.to_string()) {
true => s.to_string().to_lowercase_tr_az(),
false => s.to_titlecase_tr_or_az_lower_rest(),
match is_reserved(word) {
true => word.word.to_lowercase_tr_az(),
false => word.word.to_titlecase_tr_or_az_lower_rest(),
}
}
}
});
chunk.to_string()
chunk.into()
}

fn is_reserved(word: String) -> bool {
let baglac = Regex::new(
r"^([Vv][Ee]|[İi][Ll][Ee]|[Yy][Aa]|[Vv][Ee]|[Yy][Aa][Hh][Uu][Tt]|[Kk][İi]|[Dd][AaEe])$",
)
.unwrap();
fn is_reserved(word: &Word) -> bool {
let word = word.to_string();
let word = word.as_ref();
let baglac =
Regex::new(r"^([Vv][Ee]|[İi][Ll][Ee]|[Yy][Aa]|[Yy][Aa][Hh][Uu][Tt]|[Kk][İi]|[Dd][AaEe])$")
.unwrap();
let soruek = Regex::new(r"^([Mm][İiIıUuÜü])([Dd][İiIıUuÜü][Rr]([Ll][AaEe][Rr])?|[Ss][İiIıUuÜü][Nn]|[Yy][İiIıUuÜü][Zz]|[Ss][İiIıUuÜü][Nn][İiIıUuÜü][Zz]|[Ll][AaEe][Rr])?$").unwrap();
let word = word.as_str();
baglac.is_match(word) || soruek.is_match(word)
}

pub fn lowercase(chunk: Chunk) -> String {
let mut chunk = chunk.clone();
chunk.segments.iter_mut().for_each(|segment| {
if let Segment::Word(s) = segment {
*s = s.to_string().to_lowercase_tr_az()
if let Segment::Word(word) = segment {
word.word = word.word.to_lowercase_tr_az()
}
});
chunk.to_string()
chunk.into()
}

pub fn uppercase(chunk: Chunk) -> String {
let mut chunk = chunk.clone();
chunk.segments.iter_mut().for_each(|segment| {
if let Segment::Word(s) = segment {
*s = s.to_string().to_uppercase_tr_az()
if let Segment::Word(word) = segment {
word.word = word.word.to_uppercase_tr_az()
}
});
chunk.to_string()
chunk.into()
}

pub fn sentencecase(chunk: Chunk) -> String {
let mut chunk = chunk.clone();
let mut done_first = false;
chunk.segments.iter_mut().for_each(|segment| {
if let Segment::Word(s) = segment {
*s = if !done_first {
if let Segment::Word(word) = segment {
word.word = if !done_first {
done_first = true;
s.to_string().to_titlecase_tr_or_az_lower_rest()
word.word.to_titlecase_tr_or_az_lower_rest()
} else {
s.to_string().to_lowercase_tr_az()
word.word.to_lowercase_tr_az()
}
}
});
chunk.to_string()
chunk.into()
}
4 changes: 2 additions & 2 deletions src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,8 @@ impl FromStr for Locale {
type Err = Error;
fn from_str(s: &str) -> Result<Self> {
match s.to_ascii_lowercase().as_str() {
"en" | "English" | "en_en" => Ok(Locale::EN),
"tr" | "Turkish" | "tr_tr" | "türkçe" => Ok(Locale::TR),
"en" | "english" | "en_en" => Ok(Locale::EN),
"tr" | "turkish" | "tr_tr" | "türkçe" => Ok(Locale::TR),
input => LocaleSnafu { input }.fail()?,
}
}
Expand Down
18 changes: 18 additions & 0 deletions tests/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,24 @@ case!(
"a b c"
);

case!(
trivia_en,
Case::Title,
Locale::EN,
StyleGuide::LanguageDefault,
" foo bar ",
" Foo Bar "
);

case!(
trivia_tr,
Case::Title,
Locale::TR,
StyleGuide::LanguageDefault,
" foo bar ",
" Foo Bar "
);

macro_rules! titlecase {
($name:ident, $locale:expr, $style:expr, $input:expr, $expected:expr) => {
#[test]
Expand Down

0 comments on commit ef75905

Please sign in to comment.