From 6defa3fbc621aa32cc4a502827401cc77b228fd1 Mon Sep 17 00:00:00 2001 From: Moritz Moeller Date: Mon, 27 Feb 2023 20:19:56 +0100 Subject: [PATCH] Added array containing all blocks and put behind a feature flag of same name. --- Cargo.toml | 5 + README.md | 29 +++-- src/all.rs | 335 +++++++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 27 ++++- 4 files changed, 378 insertions(+), 18 deletions(-) create mode 100644 src/all.rs diff --git a/Cargo.toml b/Cargo.toml index 64685b6..696eb1a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,3 +13,8 @@ license = "MIT" include = ["src/**/*", "Cargo.toml", "README.md", "LICENSE"] [dependencies] + +[features] +default = [] +# Include an array that contains references to all unicode blocks. +all = [] diff --git a/README.md b/README.md index 213e816..c2b3c49 100644 --- a/README.md +++ b/README.md @@ -1,22 +1,35 @@ -Unicode Blocks -==================== +# `unicode-blocks` [![CI](https://github.com/magiclen/unicode-blocks/actions/workflows/ci.yml/badge.svg)](https://github.com/magiclen/unicode-blocks/actions/workflows/ci.yml) +[![Documentation](https://docs.rs/unicode-blocks/badge.svg)](https://docs.rs/unicode-blocks) +[![Crate](https://img.shields.io/crates/v/unicode-blocks.svg)](https://crates.io/crates/unicode-blocks) This crate contains a list of all unicode blocks and provides some functions to search across them. +## Features + +* `all` – Include an array that contains references to all unicode blocks. + + As this takes quite a bit of memory (approx 2.6k on a 64bit system) this is + turned off by default. + ## Examples #### Given a character, determine what unicode block contains it. ```rust -assert_eq!(unicode_blocks::BASIC_LATIN, unicode_blocks::find_unicode_block('A').unwrap()); +assert_eq!( + unicode_blocks::BASIC_LATIN, + unicode_blocks::find_unicode_block('A').unwrap() +); ``` #### Given a unicode block, determine whether it is used in CJK. ```rust -assert!(unicode_blocks::is_cjk_block(unicode_blocks::CJK_UNIFIED_IDEOGRAPHS)); +assert!( + unicode_blocks::is_cjk_block(unicode_blocks::CJK_UNIFIED_IDEOGRAPHS) +); ``` #### Given a character, determine whether it is in CJK. @@ -25,14 +38,6 @@ assert!(unicode_blocks::is_cjk_block(unicode_blocks::CJK_UNIFIED_IDEOGRAPHS)); assert!(unicode_blocks::is_cjk('。')); ``` -## Crates.io - -https://crates.io/crates/unicode-blocks - -## Documentation - -https://docs.rs/unicode-blocks - ## License [MIT](LICENSE) \ No newline at end of file diff --git a/src/all.rs b/src/all.rs new file mode 100644 index 0000000..fd75fc3 --- /dev/null +++ b/src/all.rs @@ -0,0 +1,335 @@ +//! References to all unicode block constants in one array. + +use crate::*; + +/// An array containing references to all unicode blocks. +/// E.g. for iterating over. +pub const ALL: [&UnicodeBlock; 327] = [ + &ADLAM, + &AEGEAN_NUMBERS, + &AHOM, + &ALCHEMICAL_SYMBOLS, + &ALPHABETIC_PRESENTATION_FORMS, + &ANATOLIAN_HIEROGLYPHS, + &ANCIENT_GREEK_MUSICAL_NOTATION, + &ANCIENT_GREEK_NUMBERS, + &ANCIENT_SYMBOLS, + &ARABIC, + &ARABIC_EXTENDED_A, + &ARABIC_EXTENDED_B, + &ARABIC_EXTENDED_C, + &ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS, + &ARABIC_PRESENTATION_FORMS_A, + &ARABIC_PRESENTATION_FORMS_B, + &ARABIC_SUPPLEMENT, + &ARMENIAN, + &ARROWS, + &AVESTAN, + &BALINESE, + &BAMUM, + &BAMUM_SUPPLEMENT, + &BASIC_LATIN, + &BASSA_VAH, + &BATAK, + &BENGALI, + &BHAIKSUKI, + &BLOCK_ELEMENTS, + &BOPOMOFO, + &BOPOMOFO_EXTENDED, + &BOX_DRAWING, + &BRAHMI, + &BRAILLE_PATTERNS, + &BUGINESE, + &BUHID, + &BYZANTINE_MUSICAL_SYMBOLS, + &CARIAN, + &CAUCASIAN_ALBANIAN, + &CHAKMA, + &CHAM, + &CHEROKEE, + &CHEROKEE_SUPPLEMENT, + &CHESS_SYMBOLS, + &CHORASMIAN, + &CJK_COMPATIBILITY, + &CJK_COMPATIBILITY_FORMS, + &CJK_COMPATIBILITY_IDEOGRAPHS, + &CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, + &CJK_RADICALS_SUPPLEMENT, + &CJK_STROKES, + &CJK_SYMBOLS_AND_PUNCTUATION, + &CJK_UNIFIED_IDEOGRAPHS, + &CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, + &CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, + &CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C, + &CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D, + &CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E, + &CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F, + &CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G, + &CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H, + &COMBINING_DIACRITICAL_MARKS, + &COMBINING_DIACRITICAL_MARKS_EXTENDED, + &COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS, + &COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, + &COMBINING_HALF_MARKS, + &COMMON_INDIC_NUMBER_FORMS, + &CONTROL_PICTURES, + &COPTIC, + &COPTIC_EPACT_NUMBERS, + &COUNTING_ROD_NUMERALS, + &CUNEIFORM, + &CUNEIFORM_NUMBERS_AND_PUNCTUATION, + &CURRENCY_SYMBOLS, + &CYPRIOT_SYLLABARY, + &CYPRO_MINOAN, + &CYRILLIC, + &CYRILLIC_EXTENDED_A, + &CYRILLIC_EXTENDED_B, + &CYRILLIC_EXTENDED_C, + &CYRILLIC_EXTENDED_D, + &CYRILLIC_SUPPLEMENT, + &DESERET, + &DEVANAGARI, + &DEVANAGARI_EXTENDED, + &DEVANAGARI_EXTENDED_A, + &DINGBATS, + &DIVES_AKURU, + &DOGRA, + &DOMINO_TILES, + &DUPLOYAN, + &EARLY_DYNASTIC_CUNEIFORM, + &EGYPTIAN_HIEROGLYPHS, + &EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS, + &ELBASAN, + &ELYMAIC, + &EMOTICONS, + &ENCLOSED_ALPHANUMERICS, + &ENCLOSED_ALPHANUMERIC_SUPPLEMENT, + &ENCLOSED_CJK_LETTERS_AND_MONTHS, + &ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, + ÐIOPIC, + ÐIOPIC_EXTENDED, + ÐIOPIC_EXTENDED_A, + ÐIOPIC_EXTENDED_B, + ÐIOPIC_SUPPLEMENT, + &GENERAL_PUNCTUATION, + &GEOMETRIC_SHAPES, + &GEOMETRIC_SHAPES_EXTENDED, + &GEORGIAN, + &GEORGIAN_EXTENDED, + &GEORGIAN_SUPPLEMENT, + &GLAGOLITIC, + &GLAGOLITIC_SUPPLEMENT, + &GOTHIC, + &GRANTHA, + &GREEK_AND_COPTIC, + &GREEK_EXTENDED, + &GUJARATI, + &GUNJALA_GONDI, + &GURMUKHI, + &HALFWIDTH_AND_FULLWIDTH_FORMS, + &HANGUL_COMPATIBILITY_JAMO, + &HANGUL_JAMO, + &HANGUL_JAMO_EXTENDED_A, + &HANGUL_JAMO_EXTENDED_B, + &HANGUL_SYLLABLES, + &HANIFI_ROHINGYA, + &HANUNOO, + &HATRAN, + &HEBREW, + &HIGH_PRIVATE_USE_SURROGATES, + &HIGH_SURROGATES, + &HIRAGANA, + &IDEOGRAPHIC_DESCRIPTION_CHARACTERS, + &IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION, + &IMPERIAL_ARAMAIC, + &INDIC_SIYAQ_NUMBERS, + &INSCRIPTIONAL_PAHLAVI, + &INSCRIPTIONAL_PARTHIAN, + &IPA_EXTENSIONS, + &JAVANESE, + &KAITHI, + &KAKTOVIK_NUMERALS, + &KANA_EXTENDED_A, + &KANA_EXTENDED_B, + &KANA_SUPPLEMENT, + &KANBUN, + &KANGXI_RADICALS, + &KANNADA, + &KATAKANA, + &KATAKANA_PHONETIC_EXTENSIONS, + &KAWI, + &KAYAH_LI, + &KHAROSHTHI, + &KHITAN_SMALL_SCRIPT, + &KHMER, + &KHMER_SYMBOLS, + &KHOJKI, + &KHUDAWADI, + &LAO, + &LATIN_1_SUPPLEMENT, + &LATIN_EXTENDED_A, + &LATIN_EXTENDED_ADDITIONAL, + &LATIN_EXTENDED_B, + &LATIN_EXTENDED_C, + &LATIN_EXTENDED_D, + &LATIN_EXTENDED_E, + &LATIN_EXTENDED_F, + &LATIN_EXTENDED_G, + &LEPCHA, + &LETTERLIKE_SYMBOLS, + &LIMBU, + &LINEAR_A, + &LINEAR_B_IDEOGRAMS, + &LINEAR_B_SYLLABARY, + &LISU, + &LISU_SUPPLEMENT, + &LOW_SURROGATES, + &LYCIAN, + &LYDIAN, + &MAHAJANI, + &MAHJONG_TILES, + &MAKASAR, + &MALAYALAM, + &MANDAIC, + &MANICHAEAN, + &MARCHEN, + &MASARAM_GONDI, + &MATHEMATICAL_ALPHANUMERIC_SYMBOLS, + &MATHEMATICAL_OPERATORS, + &MAYAN_NUMERALS, + &MEDEFAIDRIN, + &MEETEI_MAYEK, + &MEETEI_MAYEK_EXTENSIONS, + &MENDE_KIKAKUI, + &MEROITIC_CURSIVE, + &MEROITIC_HIEROGLYPHS, + &MIAO, + &MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, + &MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, + &MISCELLANEOUS_SYMBOLS, + &MISCELLANEOUS_SYMBOLS_AND_ARROWS, + &MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS, + &MISCELLANEOUS_TECHNICAL, + &MODI, + &MODIFIER_TONE_LETTERS, + &MONGOLIAN, + &MONGOLIAN_SUPPLEMENT, + &MRO, + &MULTANI, + &MUSICAL_SYMBOLS, + &MYANMAR, + &MYANMAR_EXTENDED_A, + &MYANMAR_EXTENDED_B, + &NABATAEAN, + &NAG_MUNDARI, + &NANDINAGARI, + &NEWA, + &NEW_TAI_LUE, + &NKO, + &NUMBER_FORMS, + &NUSHU, + &NYIAKENG_PUACHUE_HMONG, + &OGHAM, + &OLD_HUNGARIAN, + &OLD_ITALIC, + &OLD_NORTH_ARABIAN, + &OLD_PERMIC, + &OLD_PERSIAN, + &OLD_SOGDIAN, + &OLD_SOUTH_ARABIAN, + &OLD_TURKIC, + &OLD_UYGHUR, + &OL_CHIKI, + &OPTICAL_CHARACTER_RECOGNITION, + &ORIYA, + &ORNAMENTAL_DINGBATS, + &OSAGE, + &OSMANYA, + &OTTOMAN_SIYAQ_NUMBERS, + &PAHAWH_HMONG, + &PALMYRENE, + &PAU_CIN_HAU, + &PHAGS_PA, + &PHAISTOS_DISC, + &PHOENICIAN, + &PHONETIC_EXTENSIONS, + &PHONETIC_EXTENSIONS_SUPPLEMENT, + &PLAYING_CARDS, + &PRIVATE_USE_AREA, + &PSALTER_PAHLAVI, + &REJANG, + &RUMI_NUMERAL_SYMBOLS, + &RUNIC, + &SAMARITAN, + &SAURASHTRA, + &SHARADA, + &SHAVIAN, + &SHORTHAND_FORMAT_CONTROLS, + &SIDDHAM, + &SINHALA, + &SINHALA_ARCHAIC_NUMBERS, + &SMALL_FORM_VARIANTS, + &SMALL_KANA_EXTENSION, + &SOGDIAN, + &SORA_SOMPENG, + &SOYOMBO, + &SPACING_MODIFIER_LETTERS, + &SPECIALS, + &SUNDANESE, + &SUNDANESE_SUPPLEMENT, + &SUPERSCRIPTS_AND_SUBSCRIPTS, + &SUPPLEMENTAL_ARROWS_A, + &SUPPLEMENTAL_ARROWS_B, + &SUPPLEMENTAL_ARROWS_C, + &SUPPLEMENTAL_MATHEMATICAL_OPERATORS, + &SUPPLEMENTAL_PUNCTUATION, + &SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS, + &SUPPLEMENTARY_PRIVATE_USE_AREA_A, + &SUPPLEMENTARY_PRIVATE_USE_AREA_B, + &SUTTON_SIGNWRITING, + &SYLOTI_NAGRI, + &SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A, + &SYMBOLS_FOR_LEGACY_COMPUTING, + &SYRIAC, + &SYRIAC_SUPPLEMENT, + &TAGALOG, + &TAGBANWA, + &TAGS, + &TAI_LE, + &TAI_THAM, + &TAI_VIET, + &TAI_XUAN_JING_SYMBOLS, + &TAKRI, + &TAMIL, + &TAMIL_SUPPLEMENT, + &TANGSA, + &TANGUT, + &TANGUT_COMPONENTS, + &TANGUT_SUPPLEMENT, + &TELUGU, + &THAANA, + &THAI, + &TIBETAN, + &TIFINAGH, + &TIRHUTA, + &TOTO, + &TRANSPORT_AND_MAP_SYMBOLS, + &UGARITIC, + &UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, + &UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED, + &UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A, + &VAI, + &VARIATION_SELECTORS, + &VARIATION_SELECTORS_SUPPLEMENT, + &VEDIC_EXTENSIONS, + &VERTICAL_FORMS, + &VITHKUQI, + &WANCHO, + &WARANG_CITI, + &YEZIDI, + &YIJING_HEXAGRAM_SYMBOLS, + &YI_RADICALS, + &YI_SYLLABLES, + &ZANABAZAR_SQUARE, + &ZNAMENNY_MUSICAL_NOTATION, +]; diff --git a/src/lib.rs b/src/lib.rs index b5d6dc6..cc8d3a4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,31 +1,46 @@ /*! # Unicode Blocks -This crate contains a list of all unicode blocks and provides some functions to search across them. +This crate contains a list of all unicode blocks and provides some functions +to search across them. + +## Features + +* `all` – Include an array that contains references to all unicode blocks. + + As this takes quite a bit of memory (approx 2.6k on a 64bit system) this is + turned off by default. ## Examples #### Given a character, determine what unicode block contains it. -```rust -assert_eq!(unicode_blocks::BASIC_LATIN, unicode_blocks::find_unicode_block('A').unwrap()); +``` +assert_eq!( + unicode_blocks::BASIC_LATIN, + unicode_blocks::find_unicode_block('A').unwrap() +); ``` #### Given a unicode block, determine whether it is used in CJK. -```rust -assert!(unicode_blocks::is_cjk_block(unicode_blocks::CJK_UNIFIED_IDEOGRAPHS)); +``` +assert!( + unicode_blocks::is_cjk_block(unicode_blocks::CJK_UNIFIED_IDEOGRAPHS) +); ``` #### Given a character, determine whether it is in CJK. -```rust +``` assert!(unicode_blocks::is_cjk('。')); ``` */ #![no_std] +#[cfg(feature = "all")] +pub mod all; mod cjk; mod unicode_block; mod unicode_blocks;