Rewrite implementation to not require any dependencies

… outside of `core` and `alloc`. The new implementation uses `char::is_alphanumeric` for detecting word boundaries. Raises MSRV to 1.56.
withoutboats · Aug 11, 2023 · df8dc40 · df8dc40
1 parent 76a8274
commit df8dc40
Show file tree

Hide file tree

Showing 13 changed files with 74 additions and 73 deletions.
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
@@ -15,18 +15,13 @@ jobs:
     # Use MSRV for the build job
     - uses: actions-rs/toolchain@v1
       with:
-        toolchain: 1.32
+        toolchain: 1.56
         default: true
         profile: minimal
-    - name: Build default
+    - name: Build
       uses: actions-rs/cargo@v1
       with:
         command: build
-    - name: Build with unicode segmentation on
-      uses: actions-rs/cargo@v1
-      with:
-        args: --features unicode
-        command: build
     # Use stable for other jobs
     - uses: actions-rs/toolchain@v1
       with:

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,9 @@
+# unreleased
+
+- Add `no_std` support.
+- Removed non-additive `unicode` feature. The library now uses `char::is_alphanumeric`
+  instead of the `uncode-segmentation` library to determine word boundaries in all cases.
+
 # 0.4.1
 
 Improvements:

diff --git a/Cargo.toml b/Cargo.toml
@@ -1,20 +1,14 @@
 [package]
 authors = ["Without Boats <[email protected]>"]
 name = "heck"
-version = "0.4.1"
+version = "0.5.0"
 edition = "2018"
 license = "MIT OR Apache-2.0"
 description = "heck is a case conversion library."
 homepage = "https://github.com/withoutboats/heck"
 repository = "https://github.com/withoutboats/heck"
 documentation = "https://docs.rs/heck"
 keywords = ["string", "case", "camel", "snake", "unicode"]
+categories = ["no-std"]
 readme = "README.md"
 include = ["src/**/*", "LICENSE-*", "README.md", "CHANGELOG.md"]
-
-[features]
-default = []
-unicode = ["unicode-segmentation"]
-
-[dependencies]
-unicode-segmentation = { version = "1.2.0", optional = true }
diff --git a/README.md b/README.md
@@ -8,15 +8,16 @@ consistent, and reasonably well performing.
 
 ## Definition of a word boundary
 
-Word boundaries are defined as the "unicode words" defined in the
-`unicode_segmentation` library, as well as within those words in this manner:
+Word boundaries are defined by non-alphanumeric characters, as well as
+within those words in this manner:
 
-1. All underscore characters are considered word boundaries.
-2. If an uppercase character is followed by lowercase letters, a word boundary
-is considered to be just prior to that uppercase character.
-3. If multiple uppercase characters are consecutive, they are considered to be
-within a single word, except that the last will be part of the next word if it
-is followed by lowercase characters (see rule 2).
+1. If an uppercase character is followed by lowercase letters, a word
+boundary is considered to be just prior to that uppercase character.
+2. If multiple uppercase characters are consecutive, they are considered to
+be within a single word, except that the last will be part of the next word
+if it is followed by lowercase characters (see rule 1).
+3. Non-alphabetic chraracters inherit the case of the preceding character
+for use in rules 1 and 2.
 
 That is, "HelloWorld" is segmented `Hello|World` whereas "XMLHttpRequest" is
 segmented `XML|Http|Request`.
@@ -51,7 +52,7 @@ Bug reports & fixes always welcome. :-)
 
 ## MSRV
 
-The minimum supported Rust version for this crate is 1.32.0. This may change in
+The minimum supported Rust version for this crate is 1.56.0. This may change in
 minor or patch releases, but we probably won't ever require a very recent
 version. If you would like to have a stronger guarantee than that, please open
 an issue.

diff --git a/src/kebab.rs b/src/kebab.rs
@@ -1,4 +1,6 @@
-use std::fmt;
+use core::fmt;
+
+use alloc::{borrow::ToOwned, string::ToString};
 
 use crate::{lowercase, transform};
 
@@ -64,7 +66,7 @@ mod tests {
     t!(test6: "SHOUTY_SNAKE_CASE" => "shouty-snake-case");
     t!(test7: "snake_case" => "snake-case");
     t!(test8: "this-contains_ ALLKinds OfWord_Boundaries" => "this-contains-all-kinds-of-word-boundaries");
-    #[cfg(feature = "unicode")]
     t!(test9: "XΣXΣ baﬄe" => "xσxς-baﬄe");
     t!(test10: "XMLHttpRequest" => "xml-http-request");
+    t!(test11: "لِنَذْهَبْ إِلَى السِّيْنَمَا" => "لِنَذْهَبْ-إِلَى-السِّيْنَمَا");
 }
diff --git a/src/lib.rs b/src/lib.rs
@@ -6,16 +6,16 @@
 //!
 //! ## Definition of a word boundary
 //!
-//! Word boundaries are defined as the "unicode words" defined in the
-//! `unicode_segmentation` library, as well as within those words in this
-//! manner:
+//! Word boundaries are defined by non-alphanumeric characters, as well as
+//! within those words in this manner:
 //!
-//! 1. All underscore characters are considered word boundaries.
-//! 2. If an uppercase character is followed by lowercase letters, a word
+//! 1. If an uppercase character is followed by lowercase letters, a word
 //! boundary is considered to be just prior to that uppercase character.
-//! 3. If multiple uppercase characters are consecutive, they are considered to
+//! 2. If multiple uppercase characters are consecutive, they are considered to
 //! be within a single word, except that the last will be part of the next word
-//! if it is followed by lowercase characters (see rule 2).
+//! if it is followed by lowercase characters (see rule 1).
+//! 3. Non-alphabetic chraracters inherit the case of the preceding character
+//! for use in rules 1 and 2.
 //!
 //! That is, "HelloWorld" is segmented `Hello|World` whereas "XMLHttpRequest" is
 //! segmented `XML|Http|Request`.
@@ -40,6 +40,9 @@
 //! 8. Train-Case
 #![deny(missing_docs)]
 #![forbid(unsafe_code)]
+#![no_std]
+
+extern crate alloc;
 
 mod kebab;
 mod lower_camel;
@@ -63,17 +66,7 @@ pub use upper_camel::{
     AsUpperCamelCase, AsUpperCamelCase as AsPascalCase, ToPascalCase, ToUpperCamelCase,
 };
 
-use std::fmt;
-
-#[cfg(feature = "unicode")]
-fn get_iterator(s: &str) -> unicode_segmentation::UnicodeWords {
-    use unicode_segmentation::UnicodeSegmentation;
-    s.unicode_words()
-}
-#[cfg(not(feature = "unicode"))]
-fn get_iterator(s: &str) -> impl Iterator<Item = &str> {
-    s.split(|letter: char| !letter.is_ascii_alphanumeric())
-}
+use core::fmt;
 
 fn transform<F, G>(
     s: &str,
@@ -107,20 +100,12 @@ where
 
     let mut first_word = true;
 
-    for word in get_iterator(s) {
+    for word in s.split(|c: char| !c.is_alphanumeric()) {
         let mut char_indices = word.char_indices().peekable();
         let mut init = 0;
         let mut mode = WordMode::Boundary;
 
         while let Some((i, c)) = char_indices.next() {
-            // Skip underscore characters
-            if c == '_' {
-                if init == i {
-                    init += 1;
-                }
-                continue;
-            }
-
             if let Some(&(next_i, next)) = char_indices.peek() {
                 // The mode including the current character, assuming the
                 // current character does not result in a word boundary.
@@ -132,9 +117,9 @@ where
                     mode
                 };
 
-                // Word boundary after if next is underscore or current is
-                // not uppercase and next is uppercase
-                if next == '_' || (next_mode == WordMode::Lowercase && next.is_uppercase()) {
+                // Word boundary after if current is not uppercase and next
+                // is uppercase
+                if next_mode == WordMode::Lowercase && next.is_uppercase() {
                     if !first_word {
                         boundary(f)?;
                     }

diff --git a/src/lower_camel.rs b/src/lower_camel.rs
@@ -1,4 +1,9 @@
-use std::fmt;
+use core::fmt;
+
+use alloc::{
+    borrow::ToOwned,
+    string::{String, ToString},
+};
 
 use crate::{capitalize, lowercase, transform};
 
@@ -78,7 +83,6 @@ mod tests {
     t!(test6: "SHOUTY_SNAKE_CASE" => "shoutySnakeCase");
     t!(test7: "snake_case" => "snakeCase");
     t!(test8: "this-contains_ ALLKinds OfWord_Boundaries" => "thisContainsAllKindsOfWordBoundaries");
-    #[cfg(feature = "unicode")]
     t!(test9: "XΣXΣ baﬄe" => "xσxςBaﬄe");
     t!(test10: "XMLHttpRequest" => "xmlHttpRequest");
     // TODO unicode tests

diff --git a/src/shouty_kebab.rs b/src/shouty_kebab.rs
@@ -1,4 +1,6 @@
-use std::fmt;
+use core::fmt;
+
+use alloc::{borrow::ToOwned, string::ToString};
 
 use crate::{transform, uppercase};
 
@@ -65,7 +67,6 @@ mod tests {
     t!(test6: "SHOUTY_SNAKE_CASE" => "SHOUTY-SNAKE-CASE");
     t!(test7: "snake_case" => "SNAKE-CASE");
     t!(test8: "this-contains_ ALLKinds OfWord_Boundaries" => "THIS-CONTAINS-ALL-KINDS-OF-WORD-BOUNDARIES");
-    #[cfg(feature = "unicode")]
     t!(test9: "XΣXΣ baﬄe" => "XΣXΣ-BAFFLE");
     t!(test10: "XMLHttpRequest" => "XML-HTTP-REQUEST");
     t!(test11: "SHOUTY-KEBAB-CASE" => "SHOUTY-KEBAB-CASE");

diff --git a/src/shouty_snake.rs b/src/shouty_snake.rs
@@ -1,4 +1,6 @@
-use std::fmt;
+use core::fmt;
+
+use alloc::{borrow::ToOwned, string::ToString};
 
 use crate::{transform, uppercase};
 
@@ -20,7 +22,7 @@ pub trait ToShoutySnakeCase: ToOwned {
     fn to_shouty_snake_case(&self) -> Self::Owned;
 }
 
-/// Oh heck, ToShoutySnekCase is an alias for ToShoutySnakeCase. See
+/// Oh heck, `ToShoutySnekCase` is an alias for [`ToShoutySnakeCase`]. See
 /// ToShoutySnakeCase for more documentation.
 pub trait ToShoutySnekCase: ToOwned {
     /// CONVERT THIS TYPE TO SNEK CASE.
@@ -79,7 +81,6 @@ mod tests {
     t!(test6: "SHOUTY_SNAKE_CASE" => "SHOUTY_SNAKE_CASE");
     t!(test7: "snake_case" => "SNAKE_CASE");
     t!(test8: "this-contains_ ALLKinds OfWord_Boundaries" => "THIS_CONTAINS_ALL_KINDS_OF_WORD_BOUNDARIES");
-    #[cfg(feature = "unicode")]
     t!(test9: "XΣXΣ baﬄe" => "XΣXΣ_BAFFLE");
     t!(test10: "XMLHttpRequest" => "XML_HTTP_REQUEST");
 }
diff --git a/src/snake.rs b/src/snake.rs
@@ -1,4 +1,8 @@
-use std::fmt;
+use alloc::{
+    borrow::ToOwned,
+    fmt,
+    string::{String, ToString},
+};
 
 use crate::{lowercase, transform};
 
@@ -19,7 +23,7 @@ pub trait ToSnakeCase: ToOwned {
     fn to_snake_case(&self) -> Self::Owned;
 }
 
-/// Oh heck, SnekCase is an alias for ToSnakeCase. See ToSnakeCase for
+/// Oh heck, `SnekCase` is an alias for [`ToSnakeCase`]. See ToSnakeCase for
 /// more documentation.
 pub trait ToSnekCase: ToOwned {
     /// Convert this type to snek case.
@@ -77,13 +81,11 @@ mod tests {
     t!(test6: "SHOUTY_SNAKE_CASE" => "shouty_snake_case");
     t!(test7: "snake_case" => "snake_case");
     t!(test8: "this-contains_ ALLKinds OfWord_Boundaries" => "this_contains_all_kinds_of_word_boundaries");
-    #[cfg(feature = "unicode")]
     t!(test9: "XΣXΣ baﬄe" => "xσxς_baﬄe");
     t!(test10: "XMLHttpRequest" => "xml_http_request");
     t!(test11: "FIELD_NAME11" => "field_name11");
     t!(test12: "99BOTTLES" => "99bottles");
     t!(test13: "FieldNamE11" => "field_nam_e11");
-
     t!(test14: "abc123def456" => "abc123def456");
     t!(test16: "abc123DEF456" => "abc123_def456");
     t!(test17: "abc123Def456" => "abc123_def456");

diff --git a/src/title.rs b/src/title.rs
@@ -1,4 +1,9 @@
-use std::fmt;
+use core::fmt;
+
+use alloc::{
+    borrow::ToOwned,
+    string::{String, ToString},
+};
 
 use crate::{capitalize, transform};
 
@@ -65,7 +70,6 @@ mod tests {
     t!(test6: "SHOUTY_SNAKE_CASE" => "Shouty Snake Case");
     t!(test7: "snake_case" => "Snake Case");
     t!(test8: "this-contains_ ALLKinds OfWord_Boundaries" => "This Contains All Kinds Of Word Boundaries");
-    #[cfg(feature = "unicode")]
     t!(test9: "XΣXΣ baﬄe" => "Xσxς Baﬄe");
     t!(test10: "XMLHttpRequest" => "Xml Http Request");
 }
diff --git a/src/train.rs b/src/train.rs
@@ -1,4 +1,6 @@
-use std::fmt;
+use core::fmt;
+
+use alloc::{borrow::ToOwned, string::ToString};
 
 use crate::{capitalize, transform};
 

diff --git a/src/upper_camel.rs b/src/upper_camel.rs
@@ -1,4 +1,9 @@
-use std::fmt;
+use core::fmt;
+
+use alloc::{
+    borrow::ToOwned,
+    string::{String, ToString},
+};
 
 use crate::{capitalize, transform};
 
@@ -26,7 +31,7 @@ impl ToUpperCamelCase for str {
     }
 }
 
-/// ToPascalCase is an alias for ToUpperCamelCase. See ToUpperCamelCase for more
+/// `ToPascalCase` is an alias for [`ToUpperCamelCase`]. See ToUpperCamelCase for more
 /// documentation.
 pub trait ToPascalCase: ToOwned {
     /// Convert this type to upper camel case.
@@ -78,7 +83,6 @@ mod tests {
     t!(test6: "SHOUTY_SNAKE_CASE" => "ShoutySnakeCase");
     t!(test7: "snake_case" => "SnakeCase");
     t!(test8: "this-contains_ ALLKinds OfWord_Boundaries" => "ThisContainsAllKindsOfWordBoundaries");
-    #[cfg(feature = "unicode")]
     t!(test9: "XΣXΣ baﬄe" => "XσxςBaﬄe");
     t!(test10: "XMLHttpRequest" => "XmlHttpRequest");
 }