From 9239e7e2feb996e8d764dbda23614663a1882ece Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sun, 29 Sep 2024 08:29:29 -0400 Subject: [PATCH] data: update to UCD 16 --- regex-automata/src/nfa/thompson/compiler.rs | 8 +- .../src/util/unicode_data/perl_word.rs | 65 +- regex-syntax/src/hir/translate.rs | 21 + regex-syntax/src/unicode.rs | 2 + regex-syntax/src/unicode_tables/age.rs | 77 +- .../src/unicode_tables/case_folding_simple.rs | 66 +- .../src/unicode_tables/general_category.rs | 427 +++++-- .../unicode_tables/grapheme_cluster_break.rs | 104 +- .../src/unicode_tables/perl_decimal.rs | 13 +- regex-syntax/src/unicode_tables/perl_space.rs | 6 +- regex-syntax/src/unicode_tables/perl_word.rs | 65 +- .../src/unicode_tables/property_bool.rs | 1122 ++++++++++++++--- .../src/unicode_tables/property_names.rs | 23 +- .../src/unicode_tables/property_values.rs | 38 +- regex-syntax/src/unicode_tables/script.rs | 105 +- .../src/unicode_tables/script_extension.rs | 425 +++++-- .../src/unicode_tables/sentence_break.rs | 113 +- regex-syntax/src/unicode_tables/word_break.rs | 94 +- 18 files changed, 2149 insertions(+), 625 deletions(-) diff --git a/regex-automata/src/nfa/thompson/compiler.rs b/regex-automata/src/nfa/thompson/compiler.rs index 668bca87c..ced17719d 100644 --- a/regex-automata/src/nfa/thompson/compiler.rs +++ b/regex-automata/src/nfa/thompson/compiler.rs @@ -230,15 +230,15 @@ impl Config { /// # if cfg!(miri) { return Ok(()); } // miri takes too long /// use regex_automata::nfa::thompson::NFA; /// - /// // 300KB isn't enough! + /// // 400KB isn't enough! /// NFA::compiler() - /// .configure(NFA::config().nfa_size_limit(Some(300_000))) + /// .configure(NFA::config().nfa_size_limit(Some(400_000))) /// .build(r"\w{20}") /// .unwrap_err(); /// - /// // ... but 400KB probably is. + /// // ... but 500KB probably is. /// let nfa = NFA::compiler() - /// .configure(NFA::config().nfa_size_limit(Some(400_000))) + /// .configure(NFA::config().nfa_size_limit(Some(500_000))) /// .build(r"\w{20}")?; /// /// assert_eq!(nfa.pattern_len(), 1); diff --git a/regex-automata/src/util/unicode_data/perl_word.rs b/regex-automata/src/util/unicode_data/perl_word.rs index 74d62656f..21c8c0f9c 100644 --- a/regex-automata/src/util/unicode_data/perl_word.rs +++ b/regex-automata/src/util/unicode_data/perl_word.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate perl-word tmp/ucd-15.0.0/ --chars +// ucd-generate perl-word ucd-16.0.0 --chars // -// Unicode version: 15.0.0. +// Unicode version: 16.0.0. // -// ucd-generate 0.2.15 is available on crates.io. +// ucd-generate 0.3.1 is available on crates.io. pub const PERL_WORD: &'static [(char, char)] = &[ ('0', '9'), @@ -59,7 +59,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('ࡠ', 'ࡪ'), ('ࡰ', 'ࢇ'), ('ࢉ', 'ࢎ'), - ('\u{898}', '\u{8e1}'), + ('\u{897}', '\u{8e1}'), ('\u{8e3}', '\u{963}'), ('०', '९'), ('ॱ', 'ঃ'), @@ -158,8 +158,8 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('ಪ', 'ಳ'), ('ವ', 'ಹ'), ('\u{cbc}', 'ೄ'), - ('\u{cc6}', 'ೈ'), - ('ೊ', '\u{ccd}'), + ('\u{cc6}', '\u{cc8}'), + ('\u{cca}', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), ('ೝ', 'ೞ'), ('ೠ', '\u{ce3}'), @@ -243,8 +243,8 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('ᚁ', 'ᚚ'), ('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ'), - ('ᜀ', '᜕'), - ('ᜟ', '᜴'), + ('ᜀ', '\u{1715}'), + ('ᜟ', '\u{1734}'), ('ᝀ', '\u{1753}'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), @@ -276,11 +276,11 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('\u{1b00}', 'ᭌ'), ('᭐', '᭙'), ('\u{1b6b}', '\u{1b73}'), - ('\u{1b80}', '᯳'), + ('\u{1b80}', '\u{1bf3}'), ('ᰀ', '\u{1c37}'), ('᱀', '᱉'), ('ᱍ', 'ᱽ'), - ('ᲀ', 'ᲈ'), + ('ᲀ', 'ᲊ'), ('Ა', 'Ჺ'), ('Ჽ', 'Ჿ'), ('\u{1cd0}', '\u{1cd2}'), @@ -367,10 +367,10 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('ꙿ', '\u{a6f1}'), ('ꜗ', 'ꜟ'), ('Ꜣ', 'ꞈ'), - ('Ꞌ', 'ꟊ'), + ('Ꞌ', 'ꟍ'), ('Ꟑ', 'ꟑ'), ('ꟓ', 'ꟓ'), - ('ꟕ', 'ꟙ'), + ('ꟕ', 'Ƛ'), ('ꟲ', 'ꠧ'), ('\u{a82c}', '\u{a82c}'), ('ꡀ', 'ꡳ'), @@ -379,9 +379,9 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('\u{a8e0}', 'ꣷ'), ('ꣻ', 'ꣻ'), ('ꣽ', '\u{a92d}'), - ('ꤰ', '꥓'), + ('ꤰ', '\u{a953}'), ('ꥠ', 'ꥼ'), - ('\u{a980}', '꧀'), + ('\u{a980}', '\u{a9c0}'), ('ꧏ', '꧙'), ('ꧠ', 'ꧾ'), ('ꨀ', '\u{aa36}'), @@ -468,6 +468,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𐖣', '𐖱'), ('𐖳', '𐖹'), ('𐖻', '𐖼'), + ('𐗀', '𐗳'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), @@ -508,10 +509,14 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𐳀', '𐳲'), ('𐴀', '\u{10d27}'), ('𐴰', '𐴹'), + ('𐵀', '𐵥'), + ('\u{10d69}', '\u{10d6d}'), + ('𐵯', '𐶅'), ('𐺀', '𐺩'), ('\u{10eab}', '\u{10eac}'), ('𐺰', '𐺱'), - ('\u{10efd}', '𐼜'), + ('𐻂', '𐻄'), + ('\u{10efc}', '𐼜'), ('𐼧', '𐼧'), ('𐼰', '\u{10f50}'), ('𐽰', '\u{10f85}'), @@ -551,12 +556,22 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𑌵', '𑌹'), ('\u{1133b}', '𑍄'), ('𑍇', '𑍈'), - ('𑍋', '𑍍'), + ('𑍋', '\u{1134d}'), ('𑍐', '𑍐'), ('\u{11357}', '\u{11357}'), ('𑍝', '𑍣'), ('\u{11366}', '\u{1136c}'), ('\u{11370}', '\u{11374}'), + ('𑎀', '𑎉'), + ('𑎋', '𑎋'), + ('𑎎', '𑎎'), + ('𑎐', '𑎵'), + ('𑎷', '\u{113c0}'), + ('\u{113c2}', '\u{113c2}'), + ('\u{113c5}', '\u{113c5}'), + ('\u{113c7}', '𑏊'), + ('𑏌', '𑏓'), + ('\u{113e1}', '\u{113e2}'), ('𑐀', '𑑊'), ('𑑐', '𑑙'), ('\u{1145e}', '𑑡'), @@ -571,6 +586,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𑙐', '𑙙'), ('𑚀', '𑚸'), ('𑛀', '𑛉'), + ('𑛐', '𑛣'), ('𑜀', '𑜚'), ('\u{1171d}', '\u{1172b}'), ('𑜰', '𑜹'), @@ -594,6 +610,8 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𑩐', '\u{11a99}'), ('𑪝', '𑪝'), ('𑪰', '𑫸'), + ('𑯀', '𑯠'), + ('𑯰', '𑯹'), ('𑰀', '𑰈'), ('𑰊', '\u{11c36}'), ('\u{11c38}', '𑱀'), @@ -618,7 +636,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('\u{11f00}', '𑼐'), ('𑼒', '\u{11f3a}'), ('𑼾', '\u{11f42}'), - ('𑽐', '𑽙'), + ('𑽐', '\u{11f5a}'), ('𑾰', '𑾰'), ('𒀀', '𒎙'), ('𒐀', '𒑮'), @@ -626,7 +644,9 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𒾐', '𒿰'), ('𓀀', '𓐯'), ('\u{13440}', '\u{13455}'), + ('𓑠', '𔏺'), ('𔐀', '𔙆'), + ('𖄀', '𖄹'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖩠', '𖩩'), @@ -639,16 +659,18 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𖭐', '𖭙'), ('𖭣', '𖭷'), ('𖭽', '𖮏'), + ('𖵀', '𖵬'), + ('𖵰', '𖵹'), ('𖹀', '𖹿'), ('𖼀', '𖽊'), ('\u{16f4f}', '𖾇'), ('\u{16f8f}', '𖾟'), ('𖿠', '𖿡'), ('𖿣', '\u{16fe4}'), - ('𖿰', '𖿱'), + ('\u{16ff0}', '\u{16ff1}'), ('𗀀', '𘟷'), ('𘠀', '𘳕'), - ('𘴀', '𘴈'), + ('𘳿', '𘴈'), ('𚿰', '𚿳'), ('𚿵', '𚿻'), ('𚿽', '𚿾'), @@ -663,10 +685,11 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𛲀', '𛲈'), ('𛲐', '𛲙'), ('\u{1bc9d}', '\u{1bc9e}'), + ('𜳰', '𜳹'), ('\u{1cf00}', '\u{1cf2d}'), ('\u{1cf30}', '\u{1cf46}'), ('\u{1d165}', '\u{1d169}'), - ('𝅭', '\u{1d172}'), + ('\u{1d16d}', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), ('\u{1d1aa}', '\u{1d1ad}'), @@ -724,6 +747,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𞊐', '\u{1e2ae}'), ('𞋀', '𞋹'), ('𞓐', '𞓹'), + ('𞗐', '𞗺'), ('𞟠', '𞟦'), ('𞟨', '𞟫'), ('𞟭', '𞟮'), @@ -774,6 +798,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), + ('𮯰', '𮹝'), ('丽', '𪘀'), ('𰀀', '𱍊'), ('𱍐', '𲎯'), diff --git a/regex-syntax/src/hir/translate.rs b/regex-syntax/src/hir/translate.rs index 3749ce307..e8e5a8812 100644 --- a/regex-syntax/src/hir/translate.rs +++ b/regex-syntax/src/hir/translate.rs @@ -3143,10 +3143,31 @@ mod tests { #[cfg(feature = "unicode-script")] assert_eq!( t(r"[\p{sc:Greek}~~\p{scx:Greek}]"), + // Class({ + // '·'..='·', + // '\u{300}'..='\u{301}', + // '\u{304}'..='\u{304}', + // '\u{306}'..='\u{306}', + // '\u{308}'..='\u{308}', + // '\u{313}'..='\u{313}', + // '\u{342}'..='\u{342}', + // '\u{345}'..='\u{345}', + // 'ʹ'..='ʹ', + // '\u{1dc0}'..='\u{1dc1}', + // '⁝'..='⁝', + // }) hir_uclass(&[ + ('·', '·'), + ('\u{0300}', '\u{0301}'), + ('\u{0304}', '\u{0304}'), + ('\u{0306}', '\u{0306}'), + ('\u{0308}', '\u{0308}'), + ('\u{0313}', '\u{0313}'), ('\u{0342}', '\u{0342}'), ('\u{0345}', '\u{0345}'), + ('ʹ', 'ʹ'), ('\u{1DC0}', '\u{1DC1}'), + ('⁝', '⁝'), ]) ); assert_eq!(t(r"[a-g~~c-j]"), hir_uclass(&[('a', 'b'), ('h', 'j')])); diff --git a/regex-syntax/src/unicode.rs b/regex-syntax/src/unicode.rs index 393a4c018..07f78194b 100644 --- a/regex-syntax/src/unicode.rs +++ b/regex-syntax/src/unicode.rs @@ -675,6 +675,8 @@ fn ages(canonical_age: &str) -> Result, Error> { ("V13_0", age::V13_0), ("V14_0", age::V14_0), ("V15_0", age::V15_0), + ("V15_1", age::V15_1), + ("V16_0", age::V16_0), ]; assert_eq!(AGES.len(), age::BY_NAME.len(), "ages are out of sync"); diff --git a/regex-syntax/src/unicode_tables/age.rs b/regex-syntax/src/unicode_tables/age.rs index 71f4861e0..466510c9e 100644 --- a/regex-syntax/src/unicode_tables/age.rs +++ b/regex-syntax/src/unicode_tables/age.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate age ucd-15.0.0 --chars +// ucd-generate age ucd-16.0.0 --chars // -// Unicode version: 15.0.0. +// Unicode version: 16.0.0. // -// ucd-generate 0.2.14 is available on crates.io. +// ucd-generate 0.3.1 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("V10_0", V10_0), @@ -14,6 +14,8 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("V13_0", V13_0), ("V14_0", V14_0), ("V15_0", V15_0), + ("V15_1", V15_1), + ("V16_0", V16_0), ("V1_1", V1_1), ("V2_0", V2_0), ("V2_1", V2_1), @@ -238,7 +240,7 @@ pub const V13_0: &'static [(char, char)] = &[ ('𑥐', '𑥙'), ('𑾰', '𑾰'), ('\u{16fe4}', '\u{16fe4}'), - ('𖿰', '𖿱'), + ('\u{16ff0}', '\u{16ff1}'), ('𘫳', '𘳕'), ('𘴀', '𘴈'), ('🄍', '🄏'), @@ -277,7 +279,7 @@ pub const V14_0: &'static [(char, char)] = &[ ('ౝ', 'ౝ'), ('ೝ', 'ೝ'), ('ᜍ', 'ᜍ'), - ('᜕', '᜕'), + ('\u{1715}', '\u{1715}'), ('ᜟ', 'ᜟ'), ('\u{180f}', '\u{180f}'), ('\u{1ac1}', '\u{1ace}'), @@ -382,6 +384,59 @@ pub const V15_0: &'static [(char, char)] = &[ ('𱍐', '𲎯'), ]; +pub const V15_1: &'static [(char, char)] = + &[('⿼', '⿿'), ('㇯', '㇯'), ('𮯰', '𮹝')]; + +pub const V16_0: &'static [(char, char)] = &[ + ('\u{897}', '\u{897}'), + ('᭎', '᭏'), + ('᭿', '᭿'), + ('Ᲊ', 'ᲊ'), + ('␧', '␩'), + ('㇤', '㇥'), + ('Ɤ', 'ꟍ'), + ('Ꟛ', 'Ƛ'), + ('𐗀', '𐗳'), + ('𐵀', '𐵥'), + ('\u{10d69}', '𐶅'), + ('𐶎', '𐶏'), + ('𐻂', '𐻄'), + ('\u{10efc}', '\u{10efc}'), + ('𑎀', '𑎉'), + ('𑎋', '𑎋'), + ('𑎎', '𑎎'), + ('𑎐', '𑎵'), + ('𑎷', '\u{113c0}'), + ('\u{113c2}', '\u{113c2}'), + ('\u{113c5}', '\u{113c5}'), + ('\u{113c7}', '𑏊'), + ('𑏌', '𑏕'), + ('𑏗', '𑏘'), + ('\u{113e1}', '\u{113e2}'), + ('𑛐', '𑛣'), + ('𑯀', '𑯡'), + ('𑯰', '𑯹'), + ('\u{11f5a}', '\u{11f5a}'), + ('𓑠', '𔏺'), + ('𖄀', '𖄹'), + ('𖵀', '𖵹'), + ('𘳿', '𘳿'), + ('𜰀', '𜳹'), + ('𜴀', '𜺳'), + ('𞗐', '𞗺'), + ('𞗿', '𞗿'), + ('🢲', '🢻'), + ('🣀', '🣁'), + ('🪉', '🪉'), + ('🪏', '🪏'), + ('🪾', '🪾'), + ('🫆', '🫆'), + ('🫜', '🫜'), + ('🫟', '🫟'), + ('🫩', '🫩'), + ('🯋', '🯯'), +]; + pub const V1_1: &'static [(char, char)] = &[ ('\0', 'ǵ'), ('Ǻ', 'ȗ'), @@ -530,8 +585,8 @@ pub const V1_1: &'static [(char, char)] = &[ ('ಪ', 'ಳ'), ('ವ', 'ಹ'), ('ಾ', 'ೄ'), - ('\u{cc6}', 'ೈ'), - ('ೊ', '\u{ccd}'), + ('\u{cc6}', '\u{cc8}'), + ('\u{cca}', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), ('ೞ', 'ೞ'), ('ೠ', 'ೡ'), @@ -1174,7 +1229,7 @@ pub const V5_1: &'static [(char, char)] = &[ ('ၚ', '႙'), ('႞', '႟'), ('ᢪ', 'ᢪ'), - ('\u{1b80}', '᮪'), + ('\u{1b80}', '\u{1baa}'), ('ᮮ', '᮹'), ('ᰀ', '\u{1c37}'), ('᰻', '᱉'), @@ -1212,7 +1267,7 @@ pub const V5_1: &'static [(char, char)] = &[ ('ꟻ', 'ꟿ'), ('ꢀ', '\u{a8c4}'), ('꣎', '꣙'), - ('꤀', '꥓'), + ('꤀', '\u{a953}'), ('꥟', '꥟'), ('ꨀ', '\u{aa36}'), ('ꩀ', 'ꩍ'), @@ -1340,7 +1395,7 @@ pub const V6_0: &'static [(char, char)] = &[ ('ྌ', '\u{f8f}'), ('࿙', '࿚'), ('\u{135d}', '\u{135e}'), - ('ᯀ', '᯳'), + ('ᯀ', '\u{1bf3}'), ('᯼', '᯿'), ('\u{1dfc}', '\u{1dfc}'), ('ₕ', 'ₜ'), @@ -1606,7 +1661,7 @@ pub const V7_0: &'static [(char, char)] = &[ ('𑌵', '𑌹'), ('\u{1133c}', '𑍄'), ('𑍇', '𑍈'), - ('𑍋', '𑍍'), + ('𑍋', '\u{1134d}'), ('\u{11357}', '\u{11357}'), ('𑍝', '𑍣'), ('\u{11366}', '\u{1136c}'), diff --git a/regex-syntax/src/unicode_tables/case_folding_simple.rs b/regex-syntax/src/unicode_tables/case_folding_simple.rs index 23f9364ce..07f6ff2f5 100644 --- a/regex-syntax/src/unicode_tables/case_folding_simple.rs +++ b/regex-syntax/src/unicode_tables/case_folding_simple.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate case-folding-simple ucd-15.0.0 --chars --all-pairs +// ucd-generate case-folding-simple ucd-16.0.0 --chars --all-pairs // -// Unicode version: 15.0.0. +// Unicode version: 16.0.0. // -// ucd-generate 0.2.14 is available on crates.io. +// ucd-generate 0.3.1 is available on crates.io. pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[ ('A', &['a']), @@ -272,6 +272,7 @@ pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[ ('Ƙ', &['ƙ']), ('ƙ', &['Ƙ']), ('ƚ', &['Ƚ']), + ('ƛ', &['Ƛ']), ('Ɯ', &['ɯ']), ('Ɲ', &['ɲ']), ('ƞ', &['Ƞ']), @@ -448,6 +449,7 @@ pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[ ('ɠ', &['Ɠ']), ('ɡ', &['Ɡ']), ('ɣ', &['Ɣ']), + ('ɤ', &['Ɤ']), ('ɥ', &['Ɥ']), ('ɦ', &['Ɦ']), ('ɨ', &['Ɨ']), @@ -490,6 +492,7 @@ pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[ ('Ό', &['ό']), ('Ύ', &['ύ']), ('Ώ', &['ώ']), + ('ΐ', &['ΐ']), ('Α', &['α']), ('Β', &['β', 'ϐ']), ('Γ', &['γ']), @@ -520,6 +523,7 @@ pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[ ('έ', &['Έ']), ('ή', &['Ή']), ('ί', &['Ί']), + ('ΰ', &['ΰ']), ('α', &['Α']), ('β', &['Β', 'ϐ']), ('γ', &['Γ']), @@ -1153,6 +1157,8 @@ pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[ ('ᲆ', &['Ъ', 'ъ']), ('ᲇ', &['Ѣ', 'ѣ']), ('ᲈ', &['Ꙋ', 'ꙋ']), + ('Ᲊ', &['ᲊ']), + ('ᲊ', &['Ᲊ']), ('Ა', &['ა']), ('Ბ', &['ბ']), ('Გ', &['გ']), @@ -1625,12 +1631,14 @@ pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[ ('ῌ', &['ῃ']), ('ῐ', &['Ῐ']), ('ῑ', &['Ῑ']), + ('ΐ', &['ΐ']), ('Ῐ', &['ῐ']), ('Ῑ', &['ῑ']), ('Ὶ', &['ὶ']), ('Ί', &['ί']), ('ῠ', &['Ῠ']), ('ῡ', &['Ῡ']), + ('ΰ', &['ΰ']), ('ῥ', &['Ῥ']), ('Ῠ', &['ῠ']), ('Ῡ', &['ῡ']), @@ -2224,12 +2232,18 @@ pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[ ('ꟈ', &['Ꟈ']), ('Ꟊ', &['ꟊ']), ('ꟊ', &['Ꟊ']), + ('Ɤ', &['ɤ']), + ('Ꟍ', &['ꟍ']), + ('ꟍ', &['Ꟍ']), ('Ꟑ', &['ꟑ']), ('ꟑ', &['Ꟑ']), ('Ꟗ', &['ꟗ']), ('ꟗ', &['Ꟗ']), ('Ꟙ', &['ꟙ']), ('ꟙ', &['Ꟙ']), + ('Ꟛ', &['ꟛ']), + ('ꟛ', &['Ꟛ']), + ('Ƛ', &['ƛ']), ('Ꟶ', &['ꟶ']), ('ꟶ', &['Ꟶ']), ('ꭓ', &['Ꭓ']), @@ -2313,6 +2327,8 @@ pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[ ('ꮽ', &['Ꮽ']), ('ꮾ', &['Ꮾ']), ('ꮿ', &['Ꮿ']), + ('ſt', &['st']), + ('st', &['ſt']), ('A', &['a']), ('B', &['b']), ('C', &['c']), @@ -2689,6 +2705,50 @@ pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[ ('𐳰', &['𐲰']), ('𐳱', &['𐲱']), ('𐳲', &['𐲲']), + ('𐵐', &['𐵰']), + ('𐵑', &['𐵱']), + ('𐵒', &['𐵲']), + ('𐵓', &['𐵳']), + ('𐵔', &['𐵴']), + ('𐵕', &['𐵵']), + ('𐵖', &['𐵶']), + ('𐵗', &['𐵷']), + ('𐵘', &['𐵸']), + ('𐵙', &['𐵹']), + ('𐵚', &['𐵺']), + ('𐵛', &['𐵻']), + ('𐵜', &['𐵼']), + ('𐵝', &['𐵽']), + ('𐵞', &['𐵾']), + ('𐵟', &['𐵿']), + ('𐵠', &['𐶀']), + ('𐵡', &['𐶁']), + ('𐵢', &['𐶂']), + ('𐵣', &['𐶃']), + ('𐵤', &['𐶄']), + ('𐵥', &['𐶅']), + ('𐵰', &['𐵐']), + ('𐵱', &['𐵑']), + ('𐵲', &['𐵒']), + ('𐵳', &['𐵓']), + ('𐵴', &['𐵔']), + ('𐵵', &['𐵕']), + ('𐵶', &['𐵖']), + ('𐵷', &['𐵗']), + ('𐵸', &['𐵘']), + ('𐵹', &['𐵙']), + ('𐵺', &['𐵚']), + ('𐵻', &['𐵛']), + ('𐵼', &['𐵜']), + ('𐵽', &['𐵝']), + ('𐵾', &['𐵞']), + ('𐵿', &['𐵟']), + ('𐶀', &['𐵠']), + ('𐶁', &['𐵡']), + ('𐶂', &['𐵢']), + ('𐶃', &['𐵣']), + ('𐶄', &['𐵤']), + ('𐶅', &['𐵥']), ('𑢠', &['𑣀']), ('𑢡', &['𑣁']), ('𑢢', &['𑣂']), diff --git a/regex-syntax/src/unicode_tables/general_category.rs b/regex-syntax/src/unicode_tables/general_category.rs index 8fc928912..6ff6b5384 100644 --- a/regex-syntax/src/unicode_tables/general_category.rs +++ b/regex-syntax/src/unicode_tables/general_category.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate general-category ucd-15.0.0 --chars --exclude surrogate +// ucd-generate general-category ucd-16.0.0 --chars --exclude surrogate // -// Unicode version: 15.0.0. +// Unicode version: 16.0.0. // -// ucd-generate 0.2.14 is available on crates.io. +// ucd-generate 0.3.1 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Cased_Letter", CASED_LETTER), @@ -76,7 +76,7 @@ pub const CASED_LETTER: &'static [(char, char)] = &[ ('ჽ', 'ჿ'), ('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), - ('ᲀ', 'ᲈ'), + ('ᲀ', 'ᲊ'), ('Ა', 'Ჺ'), ('Ჽ', 'Ჿ'), ('ᴀ', 'ᴫ'), @@ -128,10 +128,10 @@ pub const CASED_LETTER: &'static [(char, char)] = &[ ('Ꜣ', 'ꝯ'), ('ꝱ', 'ꞇ'), ('Ꞌ', 'ꞎ'), - ('Ꞑ', 'ꟊ'), + ('Ꞑ', 'ꟍ'), ('Ꟑ', 'ꟑ'), ('ꟓ', 'ꟓ'), - ('ꟕ', 'ꟙ'), + ('ꟕ', 'Ƛ'), ('Ꟶ', 'ꟶ'), ('ꟺ', 'ꟺ'), ('ꬰ', 'ꭚ'), @@ -154,6 +154,8 @@ pub const CASED_LETTER: &'static [(char, char)] = &[ ('𐖻', '𐖼'), ('𐲀', '𐲲'), ('𐳀', '𐳲'), + ('𐵐', '𐵥'), + ('𐵰', '𐶅'), ('𑢠', '𑣟'), ('𖹀', '𖹿'), ('𝐀', '𝑔'), @@ -326,6 +328,7 @@ pub const DASH_PUNCTUATION: &'static [(char, char)] = &[ ('﹘', '﹘'), ('﹣', '﹣'), ('-', '-'), + ('𐵮', '𐵮'), ('𐺭', '𐺭'), ]; @@ -369,6 +372,7 @@ pub const DECIMAL_NUMBER: &'static [(char, char)] = &[ ('0', '9'), ('𐒠', '𐒩'), ('𐴰', '𐴹'), + ('𐵀', '𐵉'), ('𑁦', '𑁯'), ('𑃰', '𑃹'), ('𑄶', '𑄿'), @@ -378,20 +382,26 @@ pub const DECIMAL_NUMBER: &'static [(char, char)] = &[ ('𑓐', '𑓙'), ('𑙐', '𑙙'), ('𑛀', '𑛉'), + ('𑛐', '𑛣'), ('𑜰', '𑜹'), ('𑣠', '𑣩'), ('𑥐', '𑥙'), + ('𑯰', '𑯹'), ('𑱐', '𑱙'), ('𑵐', '𑵙'), ('𑶠', '𑶩'), ('𑽐', '𑽙'), + ('𖄰', '𖄹'), ('𖩠', '𖩩'), ('𖫀', '𖫉'), ('𖭐', '𖭙'), + ('𖵰', '𖵹'), + ('𜳰', '𜳹'), ('𝟎', '𝟿'), ('𞅀', '𞅉'), ('𞋰', '𞋹'), ('𞓰', '𞓹'), + ('𞗱', '𞗺'), ('𞥐', '𞥙'), ('🯰', '🯹'), ]; @@ -681,7 +691,7 @@ pub const LETTER: &'static [(char, char)] = &[ ('ᰀ', 'ᰣ'), ('ᱍ', 'ᱏ'), ('ᱚ', 'ᱽ'), - ('ᲀ', 'ᲈ'), + ('ᲀ', 'ᲊ'), ('Ა', 'Ჺ'), ('Ჽ', 'Ჿ'), ('ᳩ', 'ᳬ'), @@ -765,10 +775,10 @@ pub const LETTER: &'static [(char, char)] = &[ ('ꚠ', 'ꛥ'), ('ꜗ', 'ꜟ'), ('Ꜣ', 'ꞈ'), - ('Ꞌ', 'ꟊ'), + ('Ꞌ', 'ꟍ'), ('Ꟑ', 'ꟑ'), ('ꟓ', 'ꟓ'), - ('ꟕ', 'ꟙ'), + ('ꟕ', 'Ƛ'), ('ꟲ', 'ꠁ'), ('ꠃ', 'ꠅ'), ('ꠇ', 'ꠊ'), @@ -865,6 +875,7 @@ pub const LETTER: &'static [(char, char)] = &[ ('𐖣', '𐖱'), ('𐖳', '𐖹'), ('𐖻', '𐖼'), + ('𐗀', '𐗳'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), @@ -901,8 +912,11 @@ pub const LETTER: &'static [(char, char)] = &[ ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𐴀', '𐴣'), + ('𐵊', '𐵥'), + ('𐵯', '𐶅'), ('𐺀', '𐺩'), ('𐺰', '𐺱'), + ('𐻂', '𐻄'), ('𐼀', '𐼜'), ('𐼧', '𐼧'), ('𐼰', '𐽅'), @@ -941,6 +955,13 @@ pub const LETTER: &'static [(char, char)] = &[ ('𑌽', '𑌽'), ('𑍐', '𑍐'), ('𑍝', '𑍡'), + ('𑎀', '𑎉'), + ('𑎋', '𑎋'), + ('𑎎', '𑎎'), + ('𑎐', '𑎵'), + ('𑎷', '𑎷'), + ('𑏑', '𑏑'), + ('𑏓', '𑏓'), ('𑐀', '𑐴'), ('𑑇', '𑑊'), ('𑑟', '𑑡'), @@ -975,6 +996,7 @@ pub const LETTER: &'static [(char, char)] = &[ ('𑩜', '𑪉'), ('𑪝', '𑪝'), ('𑪰', '𑫸'), + ('𑯀', '𑯠'), ('𑰀', '𑰈'), ('𑰊', '𑰮'), ('𑱀', '𑱀'), @@ -997,7 +1019,9 @@ pub const LETTER: &'static [(char, char)] = &[ ('𒾐', '𒿰'), ('𓀀', '𓐯'), ('𓑁', '𓑆'), + ('𓑠', '𔏺'), ('𔐀', '𔙆'), + ('𖄀', '𖄝'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖩰', '𖪾'), @@ -1006,6 +1030,7 @@ pub const LETTER: &'static [(char, char)] = &[ ('𖭀', '𖭃'), ('𖭣', '𖭷'), ('𖭽', '𖮏'), + ('𖵀', '𖵬'), ('𖹀', '𖹿'), ('𖼀', '𖽊'), ('𖽐', '𖽐'), @@ -1014,7 +1039,7 @@ pub const LETTER: &'static [(char, char)] = &[ ('𖿣', '𖿣'), ('𗀀', '𘟷'), ('𘠀', '𘳕'), - ('𘴀', '𘴈'), + ('𘳿', '𘴈'), ('𚿰', '𚿳'), ('𚿵', '𚿻'), ('𚿽', '𚿾'), @@ -1067,6 +1092,8 @@ pub const LETTER: &'static [(char, char)] = &[ ('𞊐', '𞊭'), ('𞋀', '𞋫'), ('𞓐', '𞓫'), + ('𞗐', '𞗭'), + ('𞗰', '𞗰'), ('𞟠', '𞟦'), ('𞟨', '𞟫'), ('𞟭', '𞟮'), @@ -1112,6 +1139,7 @@ pub const LETTER: &'static [(char, char)] = &[ ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), + ('𮯰', '𮹝'), ('丽', '𪘀'), ('𰀀', '𱍊'), ('𱍐', '𲎯'), @@ -1410,6 +1438,7 @@ pub const LOWERCASE_LETTER: &'static [(char, char)] = &[ ('ჽ', 'ჿ'), ('ᏸ', 'ᏽ'), ('ᲀ', 'ᲈ'), + ('ᲊ', 'ᲊ'), ('ᴀ', 'ᴫ'), ('ᵫ', 'ᵷ'), ('ᵹ', 'ᶚ'), @@ -1740,11 +1769,13 @@ pub const LOWERCASE_LETTER: &'static [(char, char)] = &[ ('ꟃ', 'ꟃ'), ('ꟈ', 'ꟈ'), ('ꟊ', 'ꟊ'), + ('ꟍ', 'ꟍ'), ('ꟑ', 'ꟑ'), ('ꟓ', 'ꟓ'), ('ꟕ', 'ꟕ'), ('ꟗ', 'ꟗ'), ('ꟙ', 'ꟙ'), + ('ꟛ', 'ꟛ'), ('ꟶ', 'ꟶ'), ('ꟺ', 'ꟺ'), ('ꬰ', 'ꭚ'), @@ -1760,6 +1791,7 @@ pub const LOWERCASE_LETTER: &'static [(char, char)] = &[ ('𐖳', '𐖹'), ('𐖻', '𐖼'), ('𐳀', '𐳲'), + ('𐵰', '𐶅'), ('𑣀', '𑣟'), ('𖹠', '𖹿'), ('𝐚', '𝐳'), @@ -1821,7 +1853,7 @@ pub const MARK: &'static [(char, char)] = &[ ('\u{825}', '\u{827}'), ('\u{829}', '\u{82d}'), ('\u{859}', '\u{85b}'), - ('\u{898}', '\u{89f}'), + ('\u{897}', '\u{89f}'), ('\u{8ca}', '\u{8e1}'), ('\u{8e3}', 'ः'), ('\u{93a}', '\u{93c}'), @@ -1873,8 +1905,8 @@ pub const MARK: &'static [(char, char)] = &[ ('\u{c81}', 'ಃ'), ('\u{cbc}', '\u{cbc}'), ('ಾ', 'ೄ'), - ('\u{cc6}', 'ೈ'), - ('ೊ', '\u{ccd}'), + ('\u{cc6}', '\u{cc8}'), + ('\u{cca}', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), ('\u{ce2}', '\u{ce3}'), ('ೳ', 'ೳ'), @@ -1917,8 +1949,8 @@ pub const MARK: &'static [(char, char)] = &[ ('ႏ', 'ႏ'), ('ႚ', '\u{109d}'), ('\u{135d}', '\u{135f}'), - ('\u{1712}', '᜕'), - ('\u{1732}', '᜴'), + ('\u{1712}', '\u{1715}'), + ('\u{1732}', '\u{1734}'), ('\u{1752}', '\u{1753}'), ('\u{1772}', '\u{1773}'), ('\u{17b4}', '\u{17d3}'), @@ -1935,11 +1967,11 @@ pub const MARK: &'static [(char, char)] = &[ ('\u{1a7f}', '\u{1a7f}'), ('\u{1ab0}', '\u{1ace}'), ('\u{1b00}', 'ᬄ'), - ('\u{1b34}', '᭄'), + ('\u{1b34}', '\u{1b44}'), ('\u{1b6b}', '\u{1b73}'), ('\u{1b80}', 'ᮂ'), ('ᮡ', '\u{1bad}'), - ('\u{1be6}', '᯳'), + ('\u{1be6}', '\u{1bf3}'), ('ᰤ', '\u{1c37}'), ('\u{1cd0}', '\u{1cd2}'), ('\u{1cd4}', '\u{1ce8}'), @@ -1967,9 +1999,9 @@ pub const MARK: &'static [(char, char)] = &[ ('\u{a8e0}', '\u{a8f1}'), ('\u{a8ff}', '\u{a8ff}'), ('\u{a926}', '\u{a92d}'), - ('\u{a947}', '꥓'), + ('\u{a947}', '\u{a953}'), ('\u{a980}', 'ꦃ'), - ('\u{a9b3}', '꧀'), + ('\u{a9b3}', '\u{a9c0}'), ('\u{a9e5}', '\u{a9e5}'), ('\u{aa29}', '\u{aa36}'), ('\u{aa43}', '\u{aa43}'), @@ -1997,8 +2029,9 @@ pub const MARK: &'static [(char, char)] = &[ ('\u{10a3f}', '\u{10a3f}'), ('\u{10ae5}', '\u{10ae6}'), ('\u{10d24}', '\u{10d27}'), + ('\u{10d69}', '\u{10d6d}'), ('\u{10eab}', '\u{10eac}'), - ('\u{10efd}', '\u{10eff}'), + ('\u{10efc}', '\u{10eff}'), ('\u{10f46}', '\u{10f50}'), ('\u{10f82}', '\u{10f85}'), ('𑀀', '𑀂'), @@ -2013,7 +2046,7 @@ pub const MARK: &'static [(char, char)] = &[ ('𑅅', '𑅆'), ('\u{11173}', '\u{11173}'), ('\u{11180}', '𑆂'), - ('𑆳', '𑇀'), + ('𑆳', '\u{111c0}'), ('\u{111c9}', '\u{111cc}'), ('𑇎', '\u{111cf}'), ('𑈬', '\u{11237}'), @@ -2024,11 +2057,18 @@ pub const MARK: &'static [(char, char)] = &[ ('\u{1133b}', '\u{1133c}'), ('\u{1133e}', '𑍄'), ('𑍇', '𑍈'), - ('𑍋', '𑍍'), + ('𑍋', '\u{1134d}'), ('\u{11357}', '\u{11357}'), ('𑍢', '𑍣'), ('\u{11366}', '\u{1136c}'), ('\u{11370}', '\u{11374}'), + ('\u{113b8}', '\u{113c0}'), + ('\u{113c2}', '\u{113c2}'), + ('\u{113c5}', '\u{113c5}'), + ('\u{113c7}', '𑏊'), + ('𑏌', '\u{113d0}'), + ('\u{113d2}', '\u{113d2}'), + ('\u{113e1}', '\u{113e2}'), ('𑐵', '\u{11446}'), ('\u{1145e}', '\u{1145e}'), ('\u{114b0}', '\u{114c3}'), @@ -2070,20 +2110,22 @@ pub const MARK: &'static [(char, char)] = &[ ('𑼃', '𑼃'), ('𑼴', '\u{11f3a}'), ('𑼾', '\u{11f42}'), + ('\u{11f5a}', '\u{11f5a}'), ('\u{13440}', '\u{13440}'), ('\u{13447}', '\u{13455}'), + ('\u{1611e}', '\u{1612f}'), ('\u{16af0}', '\u{16af4}'), ('\u{16b30}', '\u{16b36}'), ('\u{16f4f}', '\u{16f4f}'), ('𖽑', '𖾇'), ('\u{16f8f}', '\u{16f92}'), ('\u{16fe4}', '\u{16fe4}'), - ('𖿰', '𖿱'), + ('\u{16ff0}', '\u{16ff1}'), ('\u{1bc9d}', '\u{1bc9e}'), ('\u{1cf00}', '\u{1cf2d}'), ('\u{1cf30}', '\u{1cf46}'), ('\u{1d165}', '\u{1d169}'), - ('𝅭', '\u{1d172}'), + ('\u{1d16d}', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), ('\u{1d1aa}', '\u{1d1ad}'), @@ -2104,6 +2146,7 @@ pub const MARK: &'static [(char, char)] = &[ ('\u{1e2ae}', '\u{1e2ae}'), ('\u{1e2ec}', '\u{1e2ef}'), ('\u{1e4ec}', '\u{1e4ef}'), + ('\u{1e5ee}', '\u{1e5ef}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e944}', '\u{1e94a}'), ('\u{e0100}', '\u{e01ef}'), @@ -2163,6 +2206,7 @@ pub const MATH_SYMBOL: &'static [(char, char)] = &[ ('~', '~'), ('¬', '¬'), ('←', '↓'), + ('𐶎', '𐶏'), ('𝛁', '𝛁'), ('𝛛', '𝛛'), ('𝛻', '𝛻'), @@ -2237,7 +2281,11 @@ pub const MODIFIER_LETTER: &'static [(char, char)] = &[ ('𐞀', '𐞅'), ('𐞇', '𐞰'), ('𐞲', '𐞺'), + ('𐵎', '𐵎'), + ('𐵯', '𐵯'), ('𖭀', '𖭃'), + ('𖵀', '𖵂'), + ('𖵫', '𖵬'), ('𖾓', '𖾟'), ('𖿠', '𖿡'), ('𖿣', '𖿣'), @@ -2309,7 +2357,7 @@ pub const NONSPACING_MARK: &'static [(char, char)] = &[ ('\u{825}', '\u{827}'), ('\u{829}', '\u{82d}'), ('\u{859}', '\u{85b}'), - ('\u{898}', '\u{89f}'), + ('\u{897}', '\u{89f}'), ('\u{8ca}', '\u{8e1}'), ('\u{8e3}', '\u{902}'), ('\u{93a}', '\u{93a}'), @@ -2507,8 +2555,9 @@ pub const NONSPACING_MARK: &'static [(char, char)] = &[ ('\u{10a3f}', '\u{10a3f}'), ('\u{10ae5}', '\u{10ae6}'), ('\u{10d24}', '\u{10d27}'), + ('\u{10d69}', '\u{10d6d}'), ('\u{10eab}', '\u{10eac}'), - ('\u{10efd}', '\u{10eff}'), + ('\u{10efc}', '\u{10eff}'), ('\u{10f46}', '\u{10f50}'), ('\u{10f82}', '\u{10f85}'), ('\u{11001}', '\u{11001}'), @@ -2539,6 +2588,11 @@ pub const NONSPACING_MARK: &'static [(char, char)] = &[ ('\u{11340}', '\u{11340}'), ('\u{11366}', '\u{1136c}'), ('\u{11370}', '\u{11374}'), + ('\u{113bb}', '\u{113c0}'), + ('\u{113ce}', '\u{113ce}'), + ('\u{113d0}', '\u{113d0}'), + ('\u{113d2}', '\u{113d2}'), + ('\u{113e1}', '\u{113e2}'), ('\u{11438}', '\u{1143f}'), ('\u{11442}', '\u{11444}'), ('\u{11446}', '\u{11446}'), @@ -2558,7 +2612,8 @@ pub const NONSPACING_MARK: &'static [(char, char)] = &[ ('\u{116ad}', '\u{116ad}'), ('\u{116b0}', '\u{116b5}'), ('\u{116b7}', '\u{116b7}'), - ('\u{1171d}', '\u{1171f}'), + ('\u{1171d}', '\u{1171d}'), + ('\u{1171f}', '\u{1171f}'), ('\u{11722}', '\u{11725}'), ('\u{11727}', '\u{1172b}'), ('\u{1182f}', '\u{11837}'), @@ -2597,8 +2652,11 @@ pub const NONSPACING_MARK: &'static [(char, char)] = &[ ('\u{11f36}', '\u{11f3a}'), ('\u{11f40}', '\u{11f40}'), ('\u{11f42}', '\u{11f42}'), + ('\u{11f5a}', '\u{11f5a}'), ('\u{13440}', '\u{13440}'), ('\u{13447}', '\u{13455}'), + ('\u{1611e}', '\u{16129}'), + ('\u{1612d}', '\u{1612f}'), ('\u{16af0}', '\u{16af4}'), ('\u{16b30}', '\u{16b36}'), ('\u{16f4f}', '\u{16f4f}'), @@ -2628,6 +2686,7 @@ pub const NONSPACING_MARK: &'static [(char, char)] = &[ ('\u{1e2ae}', '\u{1e2ae}'), ('\u{1e2ec}', '\u{1e2ef}'), ('\u{1e4ec}', '\u{1e4ef}'), + ('\u{1e5ee}', '\u{1e5ef}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e944}', '\u{1e94a}'), ('\u{e0100}', '\u{e01ef}'), @@ -2727,6 +2786,7 @@ pub const NUMBER: &'static [(char, char)] = &[ ('𐮩', '𐮯'), ('𐳺', '𐳿'), ('𐴰', '𐴹'), + ('𐵀', '𐵉'), ('𐹠', '𐹾'), ('𐼝', '𐼦'), ('𐽑', '𐽔'), @@ -2741,20 +2801,25 @@ pub const NUMBER: &'static [(char, char)] = &[ ('𑓐', '𑓙'), ('𑙐', '𑙙'), ('𑛀', '𑛉'), + ('𑛐', '𑛣'), ('𑜰', '𑜻'), ('𑣠', '𑣲'), ('𑥐', '𑥙'), + ('𑯰', '𑯹'), ('𑱐', '𑱬'), ('𑵐', '𑵙'), ('𑶠', '𑶩'), ('𑽐', '𑽙'), ('𑿀', '𑿔'), ('𒐀', '𒑮'), + ('𖄰', '𖄹'), ('𖩠', '𖩩'), ('𖫀', '𖫉'), ('𖭐', '𖭙'), ('𖭛', '𖭡'), + ('𖵰', '𖵹'), ('𖺀', '𖺖'), + ('𜳰', '𜳹'), ('𝋀', '𝋓'), ('𝋠', '𝋳'), ('𝍠', '𝍸'), @@ -2762,6 +2827,7 @@ pub const NUMBER: &'static [(char, char)] = &[ ('𞅀', '𞅉'), ('𞋰', '𞋹'), ('𞓰', '𞓹'), + ('𞗱', '𞗺'), ('𞣇', '𞣏'), ('𞥐', '𞥙'), ('𞱱', '𞲫'), @@ -2882,7 +2948,7 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{85c}', '\u{85d}'), ('\u{85f}', '\u{85f}'), ('\u{86b}', '\u{86f}'), - ('\u{88f}', '\u{897}'), + ('\u{88f}', '\u{896}'), ('\u{8e2}', '\u{8e2}'), ('\u{984}', '\u{984}'), ('\u{98d}', '\u{98e}'), @@ -3076,12 +3142,11 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{1a9a}', '\u{1a9f}'), ('\u{1aae}', '\u{1aaf}'), ('\u{1acf}', '\u{1aff}'), - ('\u{1b4d}', '\u{1b4f}'), - ('\u{1b7f}', '\u{1b7f}'), + ('\u{1b4d}', '\u{1b4d}'), ('\u{1bf4}', '\u{1bfb}'), ('\u{1c38}', '\u{1c3a}'), ('\u{1c4a}', '\u{1c4c}'), - ('\u{1c89}', '\u{1c8f}'), + ('\u{1c8b}', '\u{1c8f}'), ('\u{1cbb}', '\u{1cbc}'), ('\u{1cc8}', '\u{1ccf}'), ('\u{1cfb}', '\u{1cff}'), @@ -3110,7 +3175,7 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{20c1}', '\u{20cf}'), ('\u{20f1}', '\u{20ff}'), ('\u{218c}', '\u{218f}'), - ('\u{2427}', '\u{243f}'), + ('\u{242a}', '\u{243f}'), ('\u{244b}', '\u{245f}'), ('\u{2b74}', '\u{2b75}'), ('\u{2b96}', '\u{2b96}'), @@ -3133,22 +3198,21 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{2e9a}', '\u{2e9a}'), ('\u{2ef4}', '\u{2eff}'), ('\u{2fd6}', '\u{2fef}'), - ('\u{2ffc}', '\u{2fff}'), ('\u{3040}', '\u{3040}'), ('\u{3097}', '\u{3098}'), ('\u{3100}', '\u{3104}'), ('\u{3130}', '\u{3130}'), ('\u{318f}', '\u{318f}'), - ('\u{31e4}', '\u{31ef}'), + ('\u{31e6}', '\u{31ee}'), ('\u{321f}', '\u{321f}'), ('\u{a48d}', '\u{a48f}'), ('\u{a4c7}', '\u{a4cf}'), ('\u{a62c}', '\u{a63f}'), ('\u{a6f8}', '\u{a6ff}'), - ('\u{a7cb}', '\u{a7cf}'), + ('\u{a7ce}', '\u{a7cf}'), ('\u{a7d2}', '\u{a7d2}'), ('\u{a7d4}', '\u{a7d4}'), - ('\u{a7da}', '\u{a7f1}'), + ('\u{a7dd}', '\u{a7f1}'), ('\u{a82d}', '\u{a82f}'), ('\u{a83a}', '\u{a83f}'), ('\u{a878}', '\u{a87f}'), @@ -3237,7 +3301,8 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{105a2}', '\u{105a2}'), ('\u{105b2}', '\u{105b2}'), ('\u{105ba}', '\u{105ba}'), - ('\u{105bd}', '\u{105ff}'), + ('\u{105bd}', '\u{105bf}'), + ('\u{105f4}', '\u{105ff}'), ('\u{10737}', '\u{1073f}'), ('\u{10756}', '\u{1075f}'), ('\u{10768}', '\u{1077f}'), @@ -3280,11 +3345,15 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{10cb3}', '\u{10cbf}'), ('\u{10cf3}', '\u{10cf9}'), ('\u{10d28}', '\u{10d2f}'), - ('\u{10d3a}', '\u{10e5f}'), + ('\u{10d3a}', '\u{10d3f}'), + ('\u{10d66}', '\u{10d68}'), + ('\u{10d86}', '\u{10d8d}'), + ('\u{10d90}', '\u{10e5f}'), ('\u{10e7f}', '\u{10e7f}'), ('\u{10eaa}', '\u{10eaa}'), ('\u{10eae}', '\u{10eaf}'), - ('\u{10eb2}', '\u{10efc}'), + ('\u{10eb2}', '\u{10ec1}'), + ('\u{10ec5}', '\u{10efb}'), ('\u{10f28}', '\u{10f2f}'), ('\u{10f5a}', '\u{10f6f}'), ('\u{10f8a}', '\u{10faf}'), @@ -3324,7 +3393,18 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{11358}', '\u{1135c}'), ('\u{11364}', '\u{11365}'), ('\u{1136d}', '\u{1136f}'), - ('\u{11375}', '\u{113ff}'), + ('\u{11375}', '\u{1137f}'), + ('\u{1138a}', '\u{1138a}'), + ('\u{1138c}', '\u{1138d}'), + ('\u{1138f}', '\u{1138f}'), + ('\u{113b6}', '\u{113b6}'), + ('\u{113c1}', '\u{113c1}'), + ('\u{113c3}', '\u{113c4}'), + ('\u{113c6}', '\u{113c6}'), + ('\u{113cb}', '\u{113cb}'), + ('\u{113d6}', '\u{113d6}'), + ('\u{113d9}', '\u{113e0}'), + ('\u{113e3}', '\u{113ff}'), ('\u{1145c}', '\u{1145c}'), ('\u{11462}', '\u{1147f}'), ('\u{114c8}', '\u{114cf}'), @@ -3335,7 +3415,8 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{1165a}', '\u{1165f}'), ('\u{1166d}', '\u{1167f}'), ('\u{116ba}', '\u{116bf}'), - ('\u{116ca}', '\u{116ff}'), + ('\u{116ca}', '\u{116cf}'), + ('\u{116e4}', '\u{116ff}'), ('\u{1171b}', '\u{1171c}'), ('\u{1172c}', '\u{1172f}'), ('\u{11747}', '\u{117ff}'), @@ -3355,7 +3436,9 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{11a48}', '\u{11a4f}'), ('\u{11aa3}', '\u{11aaf}'), ('\u{11af9}', '\u{11aff}'), - ('\u{11b0a}', '\u{11bff}'), + ('\u{11b0a}', '\u{11bbf}'), + ('\u{11be2}', '\u{11bef}'), + ('\u{11bfa}', '\u{11bff}'), ('\u{11c09}', '\u{11c09}'), ('\u{11c37}', '\u{11c37}'), ('\u{11c46}', '\u{11c4f}'), @@ -3379,7 +3462,7 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{11ef9}', '\u{11eff}'), ('\u{11f11}', '\u{11f11}'), ('\u{11f3b}', '\u{11f3d}'), - ('\u{11f5a}', '\u{11faf}'), + ('\u{11f5b}', '\u{11faf}'), ('\u{11fb1}', '\u{11fbf}'), ('\u{11ff2}', '\u{11ffe}'), ('\u{1239a}', '\u{123ff}'), @@ -3388,8 +3471,10 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{12544}', '\u{12f8f}'), ('\u{12ff3}', '\u{12fff}'), ('\u{13430}', '\u{1343f}'), - ('\u{13456}', '\u{143ff}'), - ('\u{14647}', '\u{167ff}'), + ('\u{13456}', '\u{1345f}'), + ('\u{143fb}', '\u{143ff}'), + ('\u{14647}', '\u{160ff}'), + ('\u{1613a}', '\u{167ff}'), ('\u{16a39}', '\u{16a3f}'), ('\u{16a5f}', '\u{16a5f}'), ('\u{16a6a}', '\u{16a6d}'), @@ -3401,7 +3486,8 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{16b5a}', '\u{16b5a}'), ('\u{16b62}', '\u{16b62}'), ('\u{16b78}', '\u{16b7c}'), - ('\u{16b90}', '\u{16e3f}'), + ('\u{16b90}', '\u{16d3f}'), + ('\u{16d7a}', '\u{16e3f}'), ('\u{16e9b}', '\u{16eff}'), ('\u{16f4b}', '\u{16f4e}'), ('\u{16f88}', '\u{16f8e}'), @@ -3409,7 +3495,7 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{16fe5}', '\u{16fef}'), ('\u{16ff2}', '\u{16fff}'), ('\u{187f8}', '\u{187ff}'), - ('\u{18cd6}', '\u{18cff}'), + ('\u{18cd6}', '\u{18cfe}'), ('\u{18d09}', '\u{1afef}'), ('\u{1aff4}', '\u{1aff4}'), ('\u{1affc}', '\u{1affc}'), @@ -3424,7 +3510,9 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{1bc7d}', '\u{1bc7f}'), ('\u{1bc89}', '\u{1bc8f}'), ('\u{1bc9a}', '\u{1bc9b}'), - ('\u{1bca0}', '\u{1ceff}'), + ('\u{1bca0}', '\u{1cbff}'), + ('\u{1ccfa}', '\u{1ccff}'), + ('\u{1ceb4}', '\u{1ceff}'), ('\u{1cf2e}', '\u{1cf2f}'), ('\u{1cf47}', '\u{1cf4f}'), ('\u{1cfc4}', '\u{1cfff}'), @@ -3476,7 +3564,9 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{1e2af}', '\u{1e2bf}'), ('\u{1e2fa}', '\u{1e2fe}'), ('\u{1e300}', '\u{1e4cf}'), - ('\u{1e4fa}', '\u{1e7df}'), + ('\u{1e4fa}', '\u{1e5cf}'), + ('\u{1e5fb}', '\u{1e5fe}'), + ('\u{1e600}', '\u{1e7df}'), ('\u{1e7e7}', '\u{1e7e7}'), ('\u{1e7ec}', '\u{1e7ec}'), ('\u{1e7ef}', '\u{1e7ef}'), @@ -3546,24 +3636,24 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{1f85a}', '\u{1f85f}'), ('\u{1f888}', '\u{1f88f}'), ('\u{1f8ae}', '\u{1f8af}'), - ('\u{1f8b2}', '\u{1f8ff}'), + ('\u{1f8bc}', '\u{1f8bf}'), + ('\u{1f8c2}', '\u{1f8ff}'), ('\u{1fa54}', '\u{1fa5f}'), ('\u{1fa6e}', '\u{1fa6f}'), ('\u{1fa7d}', '\u{1fa7f}'), - ('\u{1fa89}', '\u{1fa8f}'), - ('\u{1fabe}', '\u{1fabe}'), - ('\u{1fac6}', '\u{1facd}'), - ('\u{1fadc}', '\u{1fadf}'), - ('\u{1fae9}', '\u{1faef}'), + ('\u{1fa8a}', '\u{1fa8e}'), + ('\u{1fac7}', '\u{1facd}'), + ('\u{1fadd}', '\u{1fade}'), + ('\u{1faea}', '\u{1faef}'), ('\u{1faf9}', '\u{1faff}'), ('\u{1fb93}', '\u{1fb93}'), - ('\u{1fbcb}', '\u{1fbef}'), ('\u{1fbfa}', '\u{1ffff}'), ('\u{2a6e0}', '\u{2a6ff}'), ('\u{2b73a}', '\u{2b73f}'), ('\u{2b81e}', '\u{2b81f}'), ('\u{2cea2}', '\u{2ceaf}'), - ('\u{2ebe1}', '\u{2f7ff}'), + ('\u{2ebe1}', '\u{2ebef}'), + ('\u{2ee5e}', '\u{2f7ff}'), ('\u{2fa1e}', '\u{2ffff}'), ('\u{3134b}', '\u{3134f}'), ('\u{323b0}', '\u{e00ff}'), @@ -3880,6 +3970,7 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('𐑐', '𐒝'), ('𐔀', '𐔧'), ('𐔰', '𐕣'), + ('𐗀', '𐗳'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), @@ -3911,8 +4002,11 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('𐮀', '𐮑'), ('𐰀', '𐱈'), ('𐴀', '𐴣'), + ('𐵊', '𐵍'), + ('𐵏', '𐵏'), ('𐺀', '𐺩'), ('𐺰', '𐺱'), + ('𐻂', '𐻄'), ('𐼀', '𐼜'), ('𐼧', '𐼧'), ('𐼰', '𐽅'), @@ -3951,6 +4045,13 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('𑌽', '𑌽'), ('𑍐', '𑍐'), ('𑍝', '𑍡'), + ('𑎀', '𑎉'), + ('𑎋', '𑎋'), + ('𑎎', '𑎎'), + ('𑎐', '𑎵'), + ('𑎷', '𑎷'), + ('𑏑', '𑏑'), + ('𑏓', '𑏓'), ('𑐀', '𑐴'), ('𑑇', '𑑊'), ('𑑟', '𑑡'), @@ -3984,6 +4085,7 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('𑩜', '𑪉'), ('𑪝', '𑪝'), ('𑪰', '𑫸'), + ('𑯀', '𑯠'), ('𑰀', '𑰈'), ('𑰊', '𑰮'), ('𑱀', '𑱀'), @@ -4006,7 +4108,9 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('𒾐', '𒿰'), ('𓀀', '𓐯'), ('𓑁', '𓑆'), + ('𓑠', '𔏺'), ('𔐀', '𔙆'), + ('𖄀', '𖄝'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖩰', '𖪾'), @@ -4014,11 +4118,12 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('𖬀', '𖬯'), ('𖭣', '𖭷'), ('𖭽', '𖮏'), + ('𖵃', '𖵪'), ('𖼀', '𖽊'), ('𖽐', '𖽐'), ('𗀀', '𘟷'), ('𘠀', '𘳕'), - ('𘴀', '𘴈'), + ('𘳿', '𘴈'), ('𛀀', '𛄢'), ('𛄲', '𛄲'), ('𛅐', '𛅒'), @@ -4035,6 +4140,8 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('𞊐', '𞊭'), ('𞋀', '𞋫'), ('𞓐', '𞓪'), + ('𞗐', '𞗭'), + ('𞗰', '𞗰'), ('𞟠', '𞟦'), ('𞟨', '𞟫'), ('𞟭', '𞟮'), @@ -4078,6 +4185,7 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), + ('𮯰', '𮹝'), ('丽', '𪘀'), ('𰀀', '𱍊'), ('𱍐', '𲎯'), @@ -4218,8 +4326,9 @@ pub const OTHER_PUNCTUATION: &'static [(char, char)] = &[ ('᨞', '᨟'), ('᪠', '᪦'), ('᪨', '᪭'), + ('᭎', '᭏'), ('᭚', '᭠'), - ('᭽', '᭾'), + ('᭽', '᭿'), ('᯼', '᯿'), ('᰻', '᰿'), ('᱾', '᱿'), @@ -4314,6 +4423,8 @@ pub const OTHER_PUNCTUATION: &'static [(char, char)] = &[ ('𑇝', '𑇟'), ('𑈸', '𑈽'), ('𑊩', '𑊩'), + ('𑏔', '𑏕'), + ('𑏗', '𑏘'), ('𑑋', '𑑏'), ('𑑚', '𑑛'), ('𑑝', '𑑝'), @@ -4330,6 +4441,7 @@ pub const OTHER_PUNCTUATION: &'static [(char, char)] = &[ ('𑪚', '𑪜'), ('𑪞', '𑪢'), ('𑬀', '𑬉'), + ('𑯡', '𑯡'), ('𑱁', '𑱅'), ('𑱰', '𑱱'), ('𑻷', '𑻸'), @@ -4341,10 +4453,12 @@ pub const OTHER_PUNCTUATION: &'static [(char, char)] = &[ ('𖫵', '𖫵'), ('𖬷', '𖬻'), ('𖭄', '𖭄'), + ('𖵭', '𖵯'), ('𖺗', '𖺚'), ('𖿢', '𖿢'), ('𛲟', '𛲟'), ('𝪇', '𝪋'), + ('𞗿', '𞗿'), ('𞥞', '𞥟'), ]; @@ -4415,7 +4529,7 @@ pub const OTHER_SYMBOL: &'static [(char, char)] = &[ ('⌫', '⍻'), ('⍽', '⎚'), ('⎴', '⏛'), - ('⏢', '␦'), + ('⏢', '␩'), ('⑀', '⑊'), ('⒜', 'ⓩ'), ('─', '▶'), @@ -4435,7 +4549,7 @@ pub const OTHER_SYMBOL: &'static [(char, char)] = &[ ('⺀', '⺙'), ('⺛', '⻳'), ('⼀', '⿕'), - ('⿰', '⿻'), + ('⿰', '⿿'), ('〄', '〄'), ('〒', '〓'), ('〠', '〠'), @@ -4443,7 +4557,8 @@ pub const OTHER_SYMBOL: &'static [(char, char)] = &[ ('〾', '〿'), ('㆐', '㆑'), ('㆖', '㆟'), - ('㇀', '㇣'), + ('㇀', '㇥'), + ('㇯', '㇯'), ('㈀', '㈞'), ('㈪', '㉇'), ('㉐', '㉐'), @@ -4477,6 +4592,8 @@ pub const OTHER_SYMBOL: &'static [(char, char)] = &[ ('𖬼', '𖬿'), ('𖭅', '𖭅'), ('𛲜', '𛲜'), + ('𜰀', '𜳯'), + ('𜴀', '𜺳'), ('𜽐', '𜿃'), ('𝀀', '𝃵'), ('𝄀', '𝄦'), @@ -4521,18 +4638,18 @@ pub const OTHER_SYMBOL: &'static [(char, char)] = &[ ('🡐', '🡙'), ('🡠', '🢇'), ('🢐', '🢭'), - ('🢰', '🢱'), + ('🢰', '🢻'), + ('🣀', '🣁'), ('🤀', '🩓'), ('🩠', '🩭'), ('🩰', '🩼'), - ('🪀', '🪈'), - ('🪐', '🪽'), - ('🪿', '🫅'), - ('🫎', '🫛'), - ('🫠', '🫨'), + ('🪀', '🪉'), + ('🪏', '🫆'), + ('🫎', '🫜'), + ('🫟', '🫩'), ('🫰', '🫸'), ('🬀', '🮒'), - ('🮔', '🯊'), + ('🮔', '🯯'), ]; pub const PARAGRAPH_SEPARATOR: &'static [(char, char)] = @@ -4610,8 +4727,9 @@ pub const PUNCTUATION: &'static [(char, char)] = &[ ('᨞', '᨟'), ('᪠', '᪦'), ('᪨', '᪭'), + ('᭎', '᭏'), ('᭚', '᭠'), - ('᭽', '᭾'), + ('᭽', '᭿'), ('᯼', '᯿'), ('᰻', '᰿'), ('᱾', '᱿'), @@ -4690,6 +4808,7 @@ pub const PUNCTUATION: &'static [(char, char)] = &[ ('𐫰', '𐫶'), ('𐬹', '𐬿'), ('𐮙', '𐮜'), + ('𐵮', '𐵮'), ('𐺭', '𐺭'), ('𐽕', '𐽙'), ('𐾆', '𐾉'), @@ -4704,6 +4823,8 @@ pub const PUNCTUATION: &'static [(char, char)] = &[ ('𑇝', '𑇟'), ('𑈸', '𑈽'), ('𑊩', '𑊩'), + ('𑏔', '𑏕'), + ('𑏗', '𑏘'), ('𑑋', '𑑏'), ('𑑚', '𑑛'), ('𑑝', '𑑝'), @@ -4720,6 +4841,7 @@ pub const PUNCTUATION: &'static [(char, char)] = &[ ('𑪚', '𑪜'), ('𑪞', '𑪢'), ('𑬀', '𑬉'), + ('𑯡', '𑯡'), ('𑱁', '𑱅'), ('𑱰', '𑱱'), ('𑻷', '𑻸'), @@ -4731,10 +4853,12 @@ pub const PUNCTUATION: &'static [(char, char)] = &[ ('𖫵', '𖫵'), ('𖬷', '𖬻'), ('𖭄', '𖭄'), + ('𖵭', '𖵯'), ('𖺗', '𖺚'), ('𖿢', '𖿢'), ('𛲟', '𛲟'), ('𝪇', '𝪋'), + ('𞗿', '𞗿'), ('𞥞', '𞥟'), ]; @@ -4791,9 +4915,9 @@ pub const SPACING_MARK: &'static [(char, char)] = &[ ('ు', 'ౄ'), ('ಂ', 'ಃ'), ('ಾ', 'ಾ'), - ('ೀ', 'ೄ'), - ('ೇ', 'ೈ'), - ('ೊ', 'ೋ'), + ('\u{cc0}', 'ೄ'), + ('\u{cc7}', '\u{cc8}'), + ('\u{cca}', '\u{ccb}'), ('\u{cd5}', '\u{cd6}'), ('ೳ', 'ೳ'), ('ം', 'ഃ'), @@ -4818,8 +4942,8 @@ pub const SPACING_MARK: &'static [(char, char)] = &[ ('ႇ', 'ႌ'), ('ႏ', 'ႏ'), ('ႚ', 'ႜ'), - ('᜕', '᜕'), - ('᜴', '᜴'), + ('\u{1715}', '\u{1715}'), + ('\u{1734}', '\u{1734}'), ('ា', 'ា'), ('ើ', 'ៅ'), ('ះ', 'ៈ'), @@ -4835,17 +4959,17 @@ pub const SPACING_MARK: &'static [(char, char)] = &[ ('ᩭ', 'ᩲ'), ('ᬄ', 'ᬄ'), ('\u{1b35}', '\u{1b35}'), - ('ᬻ', 'ᬻ'), - ('ᬽ', 'ᭁ'), - ('ᭃ', '᭄'), + ('\u{1b3b}', '\u{1b3b}'), + ('\u{1b3d}', 'ᭁ'), + ('\u{1b43}', '\u{1b44}'), ('ᮂ', 'ᮂ'), ('ᮡ', 'ᮡ'), ('ᮦ', 'ᮧ'), - ('᮪', '᮪'), + ('\u{1baa}', '\u{1baa}'), ('ᯧ', 'ᯧ'), ('ᯪ', 'ᯬ'), ('ᯮ', 'ᯮ'), - ('᯲', '᯳'), + ('\u{1bf2}', '\u{1bf3}'), ('ᰤ', 'ᰫ'), ('ᰴ', 'ᰵ'), ('᳡', '᳡'), @@ -4855,11 +4979,11 @@ pub const SPACING_MARK: &'static [(char, char)] = &[ ('ꠧ', 'ꠧ'), ('ꢀ', 'ꢁ'), ('ꢴ', 'ꣃ'), - ('ꥒ', '꥓'), + ('ꥒ', '\u{a953}'), ('ꦃ', 'ꦃ'), ('ꦴ', 'ꦵ'), ('ꦺ', 'ꦻ'), - ('ꦾ', '꧀'), + ('ꦾ', '\u{a9c0}'), ('ꨯ', 'ꨰ'), ('ꨳ', 'ꨴ'), ('ꩍ', 'ꩍ'), @@ -4881,19 +5005,25 @@ pub const SPACING_MARK: &'static [(char, char)] = &[ ('𑅅', '𑅆'), ('𑆂', '𑆂'), ('𑆳', '𑆵'), - ('𑆿', '𑇀'), + ('𑆿', '\u{111c0}'), ('𑇎', '𑇎'), ('𑈬', '𑈮'), ('𑈲', '𑈳'), - ('𑈵', '𑈵'), + ('\u{11235}', '\u{11235}'), ('𑋠', '𑋢'), ('𑌂', '𑌃'), ('\u{1133e}', '𑌿'), ('𑍁', '𑍄'), ('𑍇', '𑍈'), - ('𑍋', '𑍍'), + ('𑍋', '\u{1134d}'), ('\u{11357}', '\u{11357}'), ('𑍢', '𑍣'), + ('\u{113b8}', '𑎺'), + ('\u{113c2}', '\u{113c2}'), + ('\u{113c5}', '\u{113c5}'), + ('\u{113c7}', '𑏊'), + ('𑏌', '𑏍'), + ('\u{113cf}', '\u{113cf}'), ('𑐵', '𑐷'), ('𑑀', '𑑁'), ('𑑅', '𑑅'), @@ -4909,14 +5039,15 @@ pub const SPACING_MARK: &'static [(char, char)] = &[ ('𑘾', '𑘾'), ('𑚬', '𑚬'), ('𑚮', '𑚯'), - ('𑚶', '𑚶'), + ('\u{116b6}', '\u{116b6}'), + ('𑜞', '𑜞'), ('𑜠', '𑜡'), ('𑜦', '𑜦'), ('𑠬', '𑠮'), ('𑠸', '𑠸'), ('\u{11930}', '𑤵'), ('𑤷', '𑤸'), - ('𑤽', '𑤽'), + ('\u{1193d}', '\u{1193d}'), ('𑥀', '𑥀'), ('𑥂', '𑥂'), ('𑧑', '𑧓'), @@ -4937,11 +5068,12 @@ pub const SPACING_MARK: &'static [(char, char)] = &[ ('𑼃', '𑼃'), ('𑼴', '𑼵'), ('𑼾', '𑼿'), - ('𑽁', '𑽁'), + ('\u{11f41}', '\u{11f41}'), + ('𖄪', '𖄬'), ('𖽑', '𖾇'), - ('𖿰', '𖿱'), - ('\u{1d165}', '𝅦'), - ('𝅭', '\u{1d172}'), + ('\u{16ff0}', '\u{16ff1}'), + ('\u{1d165}', '\u{1d166}'), + ('\u{1d16d}', '\u{1d172}'), ]; pub const SYMBOL: &'static [(char, char)] = &[ @@ -5035,7 +5167,7 @@ pub const SYMBOL: &'static [(char, char)] = &[ ('↊', '↋'), ('←', '⌇'), ('⌌', '⌨'), - ('⌫', '␦'), + ('⌫', '␩'), ('⑀', '⑊'), ('⒜', 'ⓩ'), ('─', '❧'), @@ -5052,7 +5184,7 @@ pub const SYMBOL: &'static [(char, char)] = &[ ('⺀', '⺙'), ('⺛', '⻳'), ('⼀', '⿕'), - ('⿰', '⿻'), + ('⿰', '⿿'), ('〄', '〄'), ('〒', '〓'), ('〠', '〠'), @@ -5061,7 +5193,8 @@ pub const SYMBOL: &'static [(char, char)] = &[ ('゛', '゜'), ('㆐', '㆑'), ('㆖', '㆟'), - ('㇀', '㇣'), + ('㇀', '㇥'), + ('㇯', '㇯'), ('㈀', '㈞'), ('㈪', '㉇'), ('㉐', '㉐'), @@ -5104,11 +5237,14 @@ pub const SYMBOL: &'static [(char, char)] = &[ ('𐇐', '𐇼'), ('𐡷', '𐡸'), ('𐫈', '𐫈'), + ('𐶎', '𐶏'), ('𑜿', '𑜿'), ('𑿕', '𑿱'), ('𖬼', '𖬿'), ('𖭅', '𖭅'), ('𛲜', '𛲜'), + ('𜰀', '𜳯'), + ('𜴀', '𜺳'), ('𜽐', '𜿃'), ('𝀀', '𝃵'), ('𝄀', '𝄦'), @@ -5165,18 +5301,18 @@ pub const SYMBOL: &'static [(char, char)] = &[ ('🡐', '🡙'), ('🡠', '🢇'), ('🢐', '🢭'), - ('🢰', '🢱'), + ('🢰', '🢻'), + ('🣀', '🣁'), ('🤀', '🩓'), ('🩠', '🩭'), ('🩰', '🩼'), - ('🪀', '🪈'), - ('🪐', '🪽'), - ('🪿', '🫅'), - ('🫎', '🫛'), - ('🫠', '🫨'), + ('🪀', '🪉'), + ('🪏', '🫆'), + ('🫎', '🫜'), + ('🫟', '🫩'), ('🫰', '🫸'), ('🬀', '🮒'), - ('🮔', '🯊'), + ('🮔', '🯯'), ]; pub const TITLECASE_LETTER: &'static [(char, char)] = &[ @@ -5215,7 +5351,7 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{85f}', '\u{85f}'), ('\u{86b}', '\u{86f}'), ('\u{88f}', '\u{88f}'), - ('\u{892}', '\u{897}'), + ('\u{892}', '\u{896}'), ('\u{984}', '\u{984}'), ('\u{98d}', '\u{98e}'), ('\u{991}', '\u{992}'), @@ -5407,12 +5543,11 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{1a9a}', '\u{1a9f}'), ('\u{1aae}', '\u{1aaf}'), ('\u{1acf}', '\u{1aff}'), - ('\u{1b4d}', '\u{1b4f}'), - ('\u{1b7f}', '\u{1b7f}'), + ('\u{1b4d}', '\u{1b4d}'), ('\u{1bf4}', '\u{1bfb}'), ('\u{1c38}', '\u{1c3a}'), ('\u{1c4a}', '\u{1c4c}'), - ('\u{1c89}', '\u{1c8f}'), + ('\u{1c8b}', '\u{1c8f}'), ('\u{1cbb}', '\u{1cbc}'), ('\u{1cc8}', '\u{1ccf}'), ('\u{1cfb}', '\u{1cff}'), @@ -5439,7 +5574,7 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{20c1}', '\u{20cf}'), ('\u{20f1}', '\u{20ff}'), ('\u{218c}', '\u{218f}'), - ('\u{2427}', '\u{243f}'), + ('\u{242a}', '\u{243f}'), ('\u{244b}', '\u{245f}'), ('\u{2b74}', '\u{2b75}'), ('\u{2b96}', '\u{2b96}'), @@ -5462,22 +5597,21 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{2e9a}', '\u{2e9a}'), ('\u{2ef4}', '\u{2eff}'), ('\u{2fd6}', '\u{2fef}'), - ('\u{2ffc}', '\u{2fff}'), ('\u{3040}', '\u{3040}'), ('\u{3097}', '\u{3098}'), ('\u{3100}', '\u{3104}'), ('\u{3130}', '\u{3130}'), ('\u{318f}', '\u{318f}'), - ('\u{31e4}', '\u{31ef}'), + ('\u{31e6}', '\u{31ee}'), ('\u{321f}', '\u{321f}'), ('\u{a48d}', '\u{a48f}'), ('\u{a4c7}', '\u{a4cf}'), ('\u{a62c}', '\u{a63f}'), ('\u{a6f8}', '\u{a6ff}'), - ('\u{a7cb}', '\u{a7cf}'), + ('\u{a7ce}', '\u{a7cf}'), ('\u{a7d2}', '\u{a7d2}'), ('\u{a7d4}', '\u{a7d4}'), - ('\u{a7da}', '\u{a7f1}'), + ('\u{a7dd}', '\u{a7f1}'), ('\u{a82d}', '\u{a82f}'), ('\u{a83a}', '\u{a83f}'), ('\u{a878}', '\u{a87f}'), @@ -5567,7 +5701,8 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{105a2}', '\u{105a2}'), ('\u{105b2}', '\u{105b2}'), ('\u{105ba}', '\u{105ba}'), - ('\u{105bd}', '\u{105ff}'), + ('\u{105bd}', '\u{105bf}'), + ('\u{105f4}', '\u{105ff}'), ('\u{10737}', '\u{1073f}'), ('\u{10756}', '\u{1075f}'), ('\u{10768}', '\u{1077f}'), @@ -5610,11 +5745,15 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{10cb3}', '\u{10cbf}'), ('\u{10cf3}', '\u{10cf9}'), ('\u{10d28}', '\u{10d2f}'), - ('\u{10d3a}', '\u{10e5f}'), + ('\u{10d3a}', '\u{10d3f}'), + ('\u{10d66}', '\u{10d68}'), + ('\u{10d86}', '\u{10d8d}'), + ('\u{10d90}', '\u{10e5f}'), ('\u{10e7f}', '\u{10e7f}'), ('\u{10eaa}', '\u{10eaa}'), ('\u{10eae}', '\u{10eaf}'), - ('\u{10eb2}', '\u{10efc}'), + ('\u{10eb2}', '\u{10ec1}'), + ('\u{10ec5}', '\u{10efb}'), ('\u{10f28}', '\u{10f2f}'), ('\u{10f5a}', '\u{10f6f}'), ('\u{10f8a}', '\u{10faf}'), @@ -5654,7 +5793,18 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{11358}', '\u{1135c}'), ('\u{11364}', '\u{11365}'), ('\u{1136d}', '\u{1136f}'), - ('\u{11375}', '\u{113ff}'), + ('\u{11375}', '\u{1137f}'), + ('\u{1138a}', '\u{1138a}'), + ('\u{1138c}', '\u{1138d}'), + ('\u{1138f}', '\u{1138f}'), + ('\u{113b6}', '\u{113b6}'), + ('\u{113c1}', '\u{113c1}'), + ('\u{113c3}', '\u{113c4}'), + ('\u{113c6}', '\u{113c6}'), + ('\u{113cb}', '\u{113cb}'), + ('\u{113d6}', '\u{113d6}'), + ('\u{113d9}', '\u{113e0}'), + ('\u{113e3}', '\u{113ff}'), ('\u{1145c}', '\u{1145c}'), ('\u{11462}', '\u{1147f}'), ('\u{114c8}', '\u{114cf}'), @@ -5665,7 +5815,8 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{1165a}', '\u{1165f}'), ('\u{1166d}', '\u{1167f}'), ('\u{116ba}', '\u{116bf}'), - ('\u{116ca}', '\u{116ff}'), + ('\u{116ca}', '\u{116cf}'), + ('\u{116e4}', '\u{116ff}'), ('\u{1171b}', '\u{1171c}'), ('\u{1172c}', '\u{1172f}'), ('\u{11747}', '\u{117ff}'), @@ -5685,7 +5836,9 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{11a48}', '\u{11a4f}'), ('\u{11aa3}', '\u{11aaf}'), ('\u{11af9}', '\u{11aff}'), - ('\u{11b0a}', '\u{11bff}'), + ('\u{11b0a}', '\u{11bbf}'), + ('\u{11be2}', '\u{11bef}'), + ('\u{11bfa}', '\u{11bff}'), ('\u{11c09}', '\u{11c09}'), ('\u{11c37}', '\u{11c37}'), ('\u{11c46}', '\u{11c4f}'), @@ -5709,7 +5862,7 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{11ef9}', '\u{11eff}'), ('\u{11f11}', '\u{11f11}'), ('\u{11f3b}', '\u{11f3d}'), - ('\u{11f5a}', '\u{11faf}'), + ('\u{11f5b}', '\u{11faf}'), ('\u{11fb1}', '\u{11fbf}'), ('\u{11ff2}', '\u{11ffe}'), ('\u{1239a}', '\u{123ff}'), @@ -5717,8 +5870,10 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{12475}', '\u{1247f}'), ('\u{12544}', '\u{12f8f}'), ('\u{12ff3}', '\u{12fff}'), - ('\u{13456}', '\u{143ff}'), - ('\u{14647}', '\u{167ff}'), + ('\u{13456}', '\u{1345f}'), + ('\u{143fb}', '\u{143ff}'), + ('\u{14647}', '\u{160ff}'), + ('\u{1613a}', '\u{167ff}'), ('\u{16a39}', '\u{16a3f}'), ('\u{16a5f}', '\u{16a5f}'), ('\u{16a6a}', '\u{16a6d}'), @@ -5730,7 +5885,8 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{16b5a}', '\u{16b5a}'), ('\u{16b62}', '\u{16b62}'), ('\u{16b78}', '\u{16b7c}'), - ('\u{16b90}', '\u{16e3f}'), + ('\u{16b90}', '\u{16d3f}'), + ('\u{16d7a}', '\u{16e3f}'), ('\u{16e9b}', '\u{16eff}'), ('\u{16f4b}', '\u{16f4e}'), ('\u{16f88}', '\u{16f8e}'), @@ -5738,7 +5894,7 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{16fe5}', '\u{16fef}'), ('\u{16ff2}', '\u{16fff}'), ('\u{187f8}', '\u{187ff}'), - ('\u{18cd6}', '\u{18cff}'), + ('\u{18cd6}', '\u{18cfe}'), ('\u{18d09}', '\u{1afef}'), ('\u{1aff4}', '\u{1aff4}'), ('\u{1affc}', '\u{1affc}'), @@ -5753,7 +5909,9 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{1bc7d}', '\u{1bc7f}'), ('\u{1bc89}', '\u{1bc8f}'), ('\u{1bc9a}', '\u{1bc9b}'), - ('\u{1bca4}', '\u{1ceff}'), + ('\u{1bca4}', '\u{1cbff}'), + ('\u{1ccfa}', '\u{1ccff}'), + ('\u{1ceb4}', '\u{1ceff}'), ('\u{1cf2e}', '\u{1cf2f}'), ('\u{1cf47}', '\u{1cf4f}'), ('\u{1cfc4}', '\u{1cfff}'), @@ -5804,7 +5962,9 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{1e2af}', '\u{1e2bf}'), ('\u{1e2fa}', '\u{1e2fe}'), ('\u{1e300}', '\u{1e4cf}'), - ('\u{1e4fa}', '\u{1e7df}'), + ('\u{1e4fa}', '\u{1e5cf}'), + ('\u{1e5fb}', '\u{1e5fe}'), + ('\u{1e600}', '\u{1e7df}'), ('\u{1e7e7}', '\u{1e7e7}'), ('\u{1e7ec}', '\u{1e7ec}'), ('\u{1e7ef}', '\u{1e7ef}'), @@ -5874,24 +6034,24 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{1f85a}', '\u{1f85f}'), ('\u{1f888}', '\u{1f88f}'), ('\u{1f8ae}', '\u{1f8af}'), - ('\u{1f8b2}', '\u{1f8ff}'), + ('\u{1f8bc}', '\u{1f8bf}'), + ('\u{1f8c2}', '\u{1f8ff}'), ('\u{1fa54}', '\u{1fa5f}'), ('\u{1fa6e}', '\u{1fa6f}'), ('\u{1fa7d}', '\u{1fa7f}'), - ('\u{1fa89}', '\u{1fa8f}'), - ('\u{1fabe}', '\u{1fabe}'), - ('\u{1fac6}', '\u{1facd}'), - ('\u{1fadc}', '\u{1fadf}'), - ('\u{1fae9}', '\u{1faef}'), + ('\u{1fa8a}', '\u{1fa8e}'), + ('\u{1fac7}', '\u{1facd}'), + ('\u{1fadd}', '\u{1fade}'), + ('\u{1faea}', '\u{1faef}'), ('\u{1faf9}', '\u{1faff}'), ('\u{1fb93}', '\u{1fb93}'), - ('\u{1fbcb}', '\u{1fbef}'), ('\u{1fbfa}', '\u{1ffff}'), ('\u{2a6e0}', '\u{2a6ff}'), ('\u{2b73a}', '\u{2b73f}'), ('\u{2b81e}', '\u{2b81f}'), ('\u{2cea2}', '\u{2ceaf}'), - ('\u{2ebe1}', '\u{2f7ff}'), + ('\u{2ebe1}', '\u{2ebef}'), + ('\u{2ee5e}', '\u{2f7ff}'), ('\u{2fa1e}', '\u{2ffff}'), ('\u{3134b}', '\u{3134f}'), ('\u{323b0}', '\u{e0000}'), @@ -6179,6 +6339,7 @@ pub const UPPERCASE_LETTER: &'static [(char, char)] = &[ ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'), ('Ꭰ', 'Ᏽ'), + ('Ᲊ', 'Ᲊ'), ('Ა', 'Ჺ'), ('Ჽ', 'Ჿ'), ('Ḁ', 'Ḁ'), @@ -6503,9 +6664,12 @@ pub const UPPERCASE_LETTER: &'static [(char, char)] = &[ ('Ꟃ', 'Ꟃ'), ('Ꞔ', 'Ꟈ'), ('Ꟊ', 'Ꟊ'), + ('Ɤ', 'Ꟍ'), ('Ꟑ', 'Ꟑ'), ('Ꟗ', 'Ꟗ'), ('Ꟙ', 'Ꟙ'), + ('Ꟛ', 'Ꟛ'), + ('Ƛ', 'Ƛ'), ('Ꟶ', 'Ꟶ'), ('A', 'Z'), ('𐐀', '𐐧'), @@ -6515,6 +6679,7 @@ pub const UPPERCASE_LETTER: &'static [(char, char)] = &[ ('𐖌', '𐖒'), ('𐖔', '𐖕'), ('𐲀', '𐲲'), + ('𐵐', '𐵥'), ('𑢠', '𑢿'), ('𖹀', '𖹟'), ('𝐀', '𝐙'), diff --git a/regex-syntax/src/unicode_tables/grapheme_cluster_break.rs b/regex-syntax/src/unicode_tables/grapheme_cluster_break.rs index 294dfbdcc..6a6ec2af5 100644 --- a/regex-syntax/src/unicode_tables/grapheme_cluster_break.rs +++ b/regex-syntax/src/unicode_tables/grapheme_cluster_break.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate grapheme-cluster-break ucd-15.0.0 --chars +// ucd-generate grapheme-cluster-break ucd-16.0.0 --chars // -// Unicode version: 15.0.0. +// Unicode version: 16.0.0. // -// ucd-generate 0.2.14 is available on crates.io. +// ucd-generate 0.3.1 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("CR", CR), @@ -71,7 +71,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{825}', '\u{827}'), ('\u{829}', '\u{82d}'), ('\u{859}', '\u{85b}'), - ('\u{898}', '\u{89f}'), + ('\u{897}', '\u{89f}'), ('\u{8ca}', '\u{8e1}'), ('\u{8e3}', '\u{902}'), ('\u{93a}', '\u{93a}'), @@ -125,10 +125,10 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{c62}', '\u{c63}'), ('\u{c81}', '\u{c81}'), ('\u{cbc}', '\u{cbc}'), - ('\u{cbf}', '\u{cbf}'), + ('\u{cbf}', '\u{cc0}'), ('\u{cc2}', '\u{cc2}'), - ('\u{cc6}', '\u{cc6}'), - ('\u{ccc}', '\u{ccd}'), + ('\u{cc6}', '\u{cc8}'), + ('\u{cca}', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), ('\u{ce2}', '\u{ce3}'), ('\u{d00}', '\u{d01}'), @@ -172,8 +172,8 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{108d}', '\u{108d}'), ('\u{109d}', '\u{109d}'), ('\u{135d}', '\u{135f}'), - ('\u{1712}', '\u{1714}'), - ('\u{1732}', '\u{1733}'), + ('\u{1712}', '\u{1715}'), + ('\u{1732}', '\u{1734}'), ('\u{1752}', '\u{1753}'), ('\u{1772}', '\u{1773}'), ('\u{17b4}', '\u{17b5}'), @@ -200,18 +200,16 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{1a7f}', '\u{1a7f}'), ('\u{1ab0}', '\u{1ace}'), ('\u{1b00}', '\u{1b03}'), - ('\u{1b34}', '\u{1b3a}'), - ('\u{1b3c}', '\u{1b3c}'), - ('\u{1b42}', '\u{1b42}'), + ('\u{1b34}', '\u{1b3d}'), + ('\u{1b42}', '\u{1b44}'), ('\u{1b6b}', '\u{1b73}'), ('\u{1b80}', '\u{1b81}'), ('\u{1ba2}', '\u{1ba5}'), - ('\u{1ba8}', '\u{1ba9}'), - ('\u{1bab}', '\u{1bad}'), + ('\u{1ba8}', '\u{1bad}'), ('\u{1be6}', '\u{1be6}'), ('\u{1be8}', '\u{1be9}'), ('\u{1bed}', '\u{1bed}'), - ('\u{1bef}', '\u{1bf1}'), + ('\u{1bef}', '\u{1bf3}'), ('\u{1c2c}', '\u{1c33}'), ('\u{1c36}', '\u{1c37}'), ('\u{1cd0}', '\u{1cd2}'), @@ -242,10 +240,12 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{a8ff}', '\u{a8ff}'), ('\u{a926}', '\u{a92d}'), ('\u{a947}', '\u{a951}'), + ('\u{a953}', '\u{a953}'), ('\u{a980}', '\u{a982}'), ('\u{a9b3}', '\u{a9b3}'), ('\u{a9b6}', '\u{a9b9}'), ('\u{a9bc}', '\u{a9bd}'), + ('\u{a9c0}', '\u{a9c0}'), ('\u{a9e5}', '\u{a9e5}'), ('\u{aa29}', '\u{aa2e}'), ('\u{aa31}', '\u{aa32}'), @@ -277,8 +277,9 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{10a3f}', '\u{10a3f}'), ('\u{10ae5}', '\u{10ae6}'), ('\u{10d24}', '\u{10d27}'), + ('\u{10d69}', '\u{10d6d}'), ('\u{10eab}', '\u{10eac}'), - ('\u{10efd}', '\u{10eff}'), + ('\u{10efc}', '\u{10eff}'), ('\u{10f46}', '\u{10f50}'), ('\u{10f82}', '\u{10f85}'), ('\u{11001}', '\u{11001}'), @@ -295,11 +296,11 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{11173}', '\u{11173}'), ('\u{11180}', '\u{11181}'), ('\u{111b6}', '\u{111be}'), + ('\u{111c0}', '\u{111c0}'), ('\u{111c9}', '\u{111cc}'), ('\u{111cf}', '\u{111cf}'), ('\u{1122f}', '\u{11231}'), - ('\u{11234}', '\u{11234}'), - ('\u{11236}', '\u{11237}'), + ('\u{11234}', '\u{11237}'), ('\u{1123e}', '\u{1123e}'), ('\u{11241}', '\u{11241}'), ('\u{112df}', '\u{112df}'), @@ -308,9 +309,18 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{1133b}', '\u{1133c}'), ('\u{1133e}', '\u{1133e}'), ('\u{11340}', '\u{11340}'), + ('\u{1134d}', '\u{1134d}'), ('\u{11357}', '\u{11357}'), ('\u{11366}', '\u{1136c}'), ('\u{11370}', '\u{11374}'), + ('\u{113b8}', '\u{113b8}'), + ('\u{113bb}', '\u{113c0}'), + ('\u{113c2}', '\u{113c2}'), + ('\u{113c5}', '\u{113c5}'), + ('\u{113c7}', '\u{113c9}'), + ('\u{113ce}', '\u{113d0}'), + ('\u{113d2}', '\u{113d2}'), + ('\u{113e1}', '\u{113e2}'), ('\u{11438}', '\u{1143f}'), ('\u{11442}', '\u{11444}'), ('\u{11446}', '\u{11446}'), @@ -331,16 +341,15 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{1163f}', '\u{11640}'), ('\u{116ab}', '\u{116ab}'), ('\u{116ad}', '\u{116ad}'), - ('\u{116b0}', '\u{116b5}'), - ('\u{116b7}', '\u{116b7}'), - ('\u{1171d}', '\u{1171f}'), + ('\u{116b0}', '\u{116b7}'), + ('\u{1171d}', '\u{1171d}'), + ('\u{1171f}', '\u{1171f}'), ('\u{11722}', '\u{11725}'), ('\u{11727}', '\u{1172b}'), ('\u{1182f}', '\u{11837}'), ('\u{11839}', '\u{1183a}'), ('\u{11930}', '\u{11930}'), - ('\u{1193b}', '\u{1193c}'), - ('\u{1193e}', '\u{1193e}'), + ('\u{1193b}', '\u{1193e}'), ('\u{11943}', '\u{11943}'), ('\u{119d4}', '\u{119d7}'), ('\u{119da}', '\u{119db}'), @@ -371,21 +380,23 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{11ef3}', '\u{11ef4}'), ('\u{11f00}', '\u{11f01}'), ('\u{11f36}', '\u{11f3a}'), - ('\u{11f40}', '\u{11f40}'), - ('\u{11f42}', '\u{11f42}'), + ('\u{11f40}', '\u{11f42}'), + ('\u{11f5a}', '\u{11f5a}'), ('\u{13440}', '\u{13440}'), ('\u{13447}', '\u{13455}'), + ('\u{1611e}', '\u{16129}'), + ('\u{1612d}', '\u{1612f}'), ('\u{16af0}', '\u{16af4}'), ('\u{16b30}', '\u{16b36}'), ('\u{16f4f}', '\u{16f4f}'), ('\u{16f8f}', '\u{16f92}'), ('\u{16fe4}', '\u{16fe4}'), + ('\u{16ff0}', '\u{16ff1}'), ('\u{1bc9d}', '\u{1bc9e}'), ('\u{1cf00}', '\u{1cf2d}'), ('\u{1cf30}', '\u{1cf46}'), - ('\u{1d165}', '\u{1d165}'), - ('\u{1d167}', '\u{1d169}'), - ('\u{1d16e}', '\u{1d172}'), + ('\u{1d165}', '\u{1d169}'), + ('\u{1d16d}', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), ('\u{1d1aa}', '\u{1d1ad}'), @@ -406,6 +417,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{1e2ae}', '\u{1e2ae}'), ('\u{1e2ec}', '\u{1e2ef}'), ('\u{1e4ec}', '\u{1e4ef}'), + ('\u{1e5ee}', '\u{1e5ef}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e944}', '\u{1e94a}'), ('🏻', '🏿'), @@ -1231,6 +1243,7 @@ pub const PREPEND: &'static [(char, char)] = &[ ('\u{110bd}', '\u{110bd}'), ('\u{110cd}', '\u{110cd}'), ('𑇂', '𑇃'), + ('𑏑', '𑏑'), ('𑤿', '𑤿'), ('𑥁', '𑥁'), ('𑨺', '𑨺'), @@ -1269,10 +1282,8 @@ pub const SPACINGMARK: &'static [(char, char)] = &[ ('ు', 'ౄ'), ('ಂ', 'ಃ'), ('ಾ', 'ಾ'), - ('ೀ', 'ು'), + ('ು', 'ು'), ('ೃ', 'ೄ'), - ('ೇ', 'ೈ'), - ('ೊ', 'ೋ'), ('ೳ', 'ೳ'), ('ം', 'ഃ'), ('ി', 'ീ'), @@ -1290,8 +1301,6 @@ pub const SPACINGMARK: &'static [(char, char)] = &[ ('ျ', 'ြ'), ('ၖ', 'ၗ'), ('ႄ', 'ႄ'), - ('᜕', '᜕'), - ('᜴', '᜴'), ('ា', 'ា'), ('ើ', 'ៅ'), ('ះ', 'ៈ'), @@ -1304,17 +1313,13 @@ pub const SPACINGMARK: &'static [(char, char)] = &[ ('ᩗ', 'ᩗ'), ('ᩭ', 'ᩲ'), ('ᬄ', 'ᬄ'), - ('ᬻ', 'ᬻ'), - ('ᬽ', 'ᭁ'), - ('ᭃ', '᭄'), + ('ᬾ', 'ᭁ'), ('ᮂ', 'ᮂ'), ('ᮡ', 'ᮡ'), ('ᮦ', 'ᮧ'), - ('᮪', '᮪'), ('ᯧ', 'ᯧ'), ('ᯪ', 'ᯬ'), ('ᯮ', 'ᯮ'), - ('᯲', '᯳'), ('ᰤ', 'ᰫ'), ('ᰴ', 'ᰵ'), ('᳡', '᳡'), @@ -1323,11 +1328,11 @@ pub const SPACINGMARK: &'static [(char, char)] = &[ ('ꠧ', 'ꠧ'), ('ꢀ', 'ꢁ'), ('ꢴ', 'ꣃ'), - ('ꥒ', '꥓'), + ('ꥒ', 'ꥒ'), ('ꦃ', 'ꦃ'), ('ꦴ', 'ꦵ'), ('ꦺ', 'ꦻ'), - ('ꦾ', '꧀'), + ('ꦾ', 'ꦿ'), ('ꨯ', 'ꨰ'), ('ꨳ', 'ꨴ'), ('ꩍ', 'ꩍ'), @@ -1347,18 +1352,20 @@ pub const SPACINGMARK: &'static [(char, char)] = &[ ('𑅅', '𑅆'), ('𑆂', '𑆂'), ('𑆳', '𑆵'), - ('𑆿', '𑇀'), + ('𑆿', '𑆿'), ('𑇎', '𑇎'), ('𑈬', '𑈮'), ('𑈲', '𑈳'), - ('𑈵', '𑈵'), ('𑋠', '𑋢'), ('𑌂', '𑌃'), ('𑌿', '𑌿'), ('𑍁', '𑍄'), ('𑍇', '𑍈'), - ('𑍋', '𑍍'), + ('𑍋', '𑍌'), ('𑍢', '𑍣'), + ('𑎹', '𑎺'), + ('𑏊', '𑏊'), + ('𑏌', '𑏍'), ('𑐵', '𑐷'), ('𑑀', '𑑁'), ('𑑅', '𑑅'), @@ -1375,13 +1382,12 @@ pub const SPACINGMARK: &'static [(char, char)] = &[ ('𑘾', '𑘾'), ('𑚬', '𑚬'), ('𑚮', '𑚯'), - ('𑚶', '𑚶'), + ('𑜞', '𑜞'), ('𑜦', '𑜦'), ('𑠬', '𑠮'), ('𑠸', '𑠸'), ('𑤱', '𑤵'), ('𑤷', '𑤸'), - ('𑤽', '𑤽'), ('𑥀', '𑥀'), ('𑥂', '𑥂'), ('𑧑', '𑧓'), @@ -1402,15 +1408,13 @@ pub const SPACINGMARK: &'static [(char, char)] = &[ ('𑼃', '𑼃'), ('𑼴', '𑼵'), ('𑼾', '𑼿'), - ('𑽁', '𑽁'), + ('𖄪', '𖄬'), ('𖽑', '𖾇'), - ('𖿰', '𖿱'), - ('𝅦', '𝅦'), - ('𝅭', '𝅭'), ]; pub const T: &'static [(char, char)] = &[('ᆨ', 'ᇿ'), ('ퟋ', 'ퟻ')]; -pub const V: &'static [(char, char)] = &[('ᅠ', 'ᆧ'), ('ힰ', 'ퟆ')]; +pub const V: &'static [(char, char)] = + &[('ᅠ', 'ᆧ'), ('ힰ', 'ퟆ'), ('𖵣', '𖵣'), ('𖵧', '𖵪')]; pub const ZWJ: &'static [(char, char)] = &[('\u{200d}', '\u{200d}')]; diff --git a/regex-syntax/src/unicode_tables/perl_decimal.rs b/regex-syntax/src/unicode_tables/perl_decimal.rs index 4f4c08a12..18996c2bf 100644 --- a/regex-syntax/src/unicode_tables/perl_decimal.rs +++ b/regex-syntax/src/unicode_tables/perl_decimal.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate general-category ucd-15.0.0 --chars --include decimalnumber +// ucd-generate general-category ucd-16.0.0 --chars --include decimalnumber // -// Unicode version: 15.0.0. +// Unicode version: 16.0.0. // -// ucd-generate 0.2.14 is available on crates.io. +// ucd-generate 0.3.1 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[("Decimal_Number", DECIMAL_NUMBER)]; @@ -49,6 +49,7 @@ pub const DECIMAL_NUMBER: &'static [(char, char)] = &[ ('0', '9'), ('𐒠', '𐒩'), ('𐴰', '𐴹'), + ('𐵀', '𐵉'), ('𑁦', '𑁯'), ('𑃰', '𑃹'), ('𑄶', '𑄿'), @@ -58,20 +59,26 @@ pub const DECIMAL_NUMBER: &'static [(char, char)] = &[ ('𑓐', '𑓙'), ('𑙐', '𑙙'), ('𑛀', '𑛉'), + ('𑛐', '𑛣'), ('𑜰', '𑜹'), ('𑣠', '𑣩'), ('𑥐', '𑥙'), + ('𑯰', '𑯹'), ('𑱐', '𑱙'), ('𑵐', '𑵙'), ('𑶠', '𑶩'), ('𑽐', '𑽙'), + ('𖄰', '𖄹'), ('𖩠', '𖩩'), ('𖫀', '𖫉'), ('𖭐', '𖭙'), + ('𖵰', '𖵹'), + ('𜳰', '𜳹'), ('𝟎', '𝟿'), ('𞅀', '𞅉'), ('𞋰', '𞋹'), ('𞓰', '𞓹'), + ('𞗱', '𞗺'), ('𞥐', '𞥙'), ('🯰', '🯹'), ]; diff --git a/regex-syntax/src/unicode_tables/perl_space.rs b/regex-syntax/src/unicode_tables/perl_space.rs index 174169579..c969e3733 100644 --- a/regex-syntax/src/unicode_tables/perl_space.rs +++ b/regex-syntax/src/unicode_tables/perl_space.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate property-bool ucd-15.0.0 --chars --include whitespace +// ucd-generate property-bool ucd-16.0.0 --chars --include whitespace // -// Unicode version: 15.0.0. +// Unicode version: 16.0.0. // -// ucd-generate 0.2.14 is available on crates.io. +// ucd-generate 0.3.1 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[("White_Space", WHITE_SPACE)]; diff --git a/regex-syntax/src/unicode_tables/perl_word.rs b/regex-syntax/src/unicode_tables/perl_word.rs index c1b66bd9a..21c8c0f9c 100644 --- a/regex-syntax/src/unicode_tables/perl_word.rs +++ b/regex-syntax/src/unicode_tables/perl_word.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate perl-word ucd-15.0.0 --chars +// ucd-generate perl-word ucd-16.0.0 --chars // -// Unicode version: 15.0.0. +// Unicode version: 16.0.0. // -// ucd-generate 0.2.14 is available on crates.io. +// ucd-generate 0.3.1 is available on crates.io. pub const PERL_WORD: &'static [(char, char)] = &[ ('0', '9'), @@ -59,7 +59,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('ࡠ', 'ࡪ'), ('ࡰ', 'ࢇ'), ('ࢉ', 'ࢎ'), - ('\u{898}', '\u{8e1}'), + ('\u{897}', '\u{8e1}'), ('\u{8e3}', '\u{963}'), ('०', '९'), ('ॱ', 'ঃ'), @@ -158,8 +158,8 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('ಪ', 'ಳ'), ('ವ', 'ಹ'), ('\u{cbc}', 'ೄ'), - ('\u{cc6}', 'ೈ'), - ('ೊ', '\u{ccd}'), + ('\u{cc6}', '\u{cc8}'), + ('\u{cca}', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), ('ೝ', 'ೞ'), ('ೠ', '\u{ce3}'), @@ -243,8 +243,8 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('ᚁ', 'ᚚ'), ('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ'), - ('ᜀ', '᜕'), - ('ᜟ', '᜴'), + ('ᜀ', '\u{1715}'), + ('ᜟ', '\u{1734}'), ('ᝀ', '\u{1753}'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), @@ -276,11 +276,11 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('\u{1b00}', 'ᭌ'), ('᭐', '᭙'), ('\u{1b6b}', '\u{1b73}'), - ('\u{1b80}', '᯳'), + ('\u{1b80}', '\u{1bf3}'), ('ᰀ', '\u{1c37}'), ('᱀', '᱉'), ('ᱍ', 'ᱽ'), - ('ᲀ', 'ᲈ'), + ('ᲀ', 'ᲊ'), ('Ა', 'Ჺ'), ('Ჽ', 'Ჿ'), ('\u{1cd0}', '\u{1cd2}'), @@ -367,10 +367,10 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('ꙿ', '\u{a6f1}'), ('ꜗ', 'ꜟ'), ('Ꜣ', 'ꞈ'), - ('Ꞌ', 'ꟊ'), + ('Ꞌ', 'ꟍ'), ('Ꟑ', 'ꟑ'), ('ꟓ', 'ꟓ'), - ('ꟕ', 'ꟙ'), + ('ꟕ', 'Ƛ'), ('ꟲ', 'ꠧ'), ('\u{a82c}', '\u{a82c}'), ('ꡀ', 'ꡳ'), @@ -379,9 +379,9 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('\u{a8e0}', 'ꣷ'), ('ꣻ', 'ꣻ'), ('ꣽ', '\u{a92d}'), - ('ꤰ', '꥓'), + ('ꤰ', '\u{a953}'), ('ꥠ', 'ꥼ'), - ('\u{a980}', '꧀'), + ('\u{a980}', '\u{a9c0}'), ('ꧏ', '꧙'), ('ꧠ', 'ꧾ'), ('ꨀ', '\u{aa36}'), @@ -468,6 +468,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𐖣', '𐖱'), ('𐖳', '𐖹'), ('𐖻', '𐖼'), + ('𐗀', '𐗳'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), @@ -508,10 +509,14 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𐳀', '𐳲'), ('𐴀', '\u{10d27}'), ('𐴰', '𐴹'), + ('𐵀', '𐵥'), + ('\u{10d69}', '\u{10d6d}'), + ('𐵯', '𐶅'), ('𐺀', '𐺩'), ('\u{10eab}', '\u{10eac}'), ('𐺰', '𐺱'), - ('\u{10efd}', '𐼜'), + ('𐻂', '𐻄'), + ('\u{10efc}', '𐼜'), ('𐼧', '𐼧'), ('𐼰', '\u{10f50}'), ('𐽰', '\u{10f85}'), @@ -551,12 +556,22 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𑌵', '𑌹'), ('\u{1133b}', '𑍄'), ('𑍇', '𑍈'), - ('𑍋', '𑍍'), + ('𑍋', '\u{1134d}'), ('𑍐', '𑍐'), ('\u{11357}', '\u{11357}'), ('𑍝', '𑍣'), ('\u{11366}', '\u{1136c}'), ('\u{11370}', '\u{11374}'), + ('𑎀', '𑎉'), + ('𑎋', '𑎋'), + ('𑎎', '𑎎'), + ('𑎐', '𑎵'), + ('𑎷', '\u{113c0}'), + ('\u{113c2}', '\u{113c2}'), + ('\u{113c5}', '\u{113c5}'), + ('\u{113c7}', '𑏊'), + ('𑏌', '𑏓'), + ('\u{113e1}', '\u{113e2}'), ('𑐀', '𑑊'), ('𑑐', '𑑙'), ('\u{1145e}', '𑑡'), @@ -571,6 +586,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𑙐', '𑙙'), ('𑚀', '𑚸'), ('𑛀', '𑛉'), + ('𑛐', '𑛣'), ('𑜀', '𑜚'), ('\u{1171d}', '\u{1172b}'), ('𑜰', '𑜹'), @@ -594,6 +610,8 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𑩐', '\u{11a99}'), ('𑪝', '𑪝'), ('𑪰', '𑫸'), + ('𑯀', '𑯠'), + ('𑯰', '𑯹'), ('𑰀', '𑰈'), ('𑰊', '\u{11c36}'), ('\u{11c38}', '𑱀'), @@ -618,7 +636,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('\u{11f00}', '𑼐'), ('𑼒', '\u{11f3a}'), ('𑼾', '\u{11f42}'), - ('𑽐', '𑽙'), + ('𑽐', '\u{11f5a}'), ('𑾰', '𑾰'), ('𒀀', '𒎙'), ('𒐀', '𒑮'), @@ -626,7 +644,9 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𒾐', '𒿰'), ('𓀀', '𓐯'), ('\u{13440}', '\u{13455}'), + ('𓑠', '𔏺'), ('𔐀', '𔙆'), + ('𖄀', '𖄹'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖩠', '𖩩'), @@ -639,16 +659,18 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𖭐', '𖭙'), ('𖭣', '𖭷'), ('𖭽', '𖮏'), + ('𖵀', '𖵬'), + ('𖵰', '𖵹'), ('𖹀', '𖹿'), ('𖼀', '𖽊'), ('\u{16f4f}', '𖾇'), ('\u{16f8f}', '𖾟'), ('𖿠', '𖿡'), ('𖿣', '\u{16fe4}'), - ('𖿰', '𖿱'), + ('\u{16ff0}', '\u{16ff1}'), ('𗀀', '𘟷'), ('𘠀', '𘳕'), - ('𘴀', '𘴈'), + ('𘳿', '𘴈'), ('𚿰', '𚿳'), ('𚿵', '𚿻'), ('𚿽', '𚿾'), @@ -663,10 +685,11 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𛲀', '𛲈'), ('𛲐', '𛲙'), ('\u{1bc9d}', '\u{1bc9e}'), + ('𜳰', '𜳹'), ('\u{1cf00}', '\u{1cf2d}'), ('\u{1cf30}', '\u{1cf46}'), ('\u{1d165}', '\u{1d169}'), - ('𝅭', '\u{1d172}'), + ('\u{1d16d}', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), ('\u{1d1aa}', '\u{1d1ad}'), @@ -724,6 +747,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𞊐', '\u{1e2ae}'), ('𞋀', '𞋹'), ('𞓐', '𞓹'), + ('𞗐', '𞗺'), ('𞟠', '𞟦'), ('𞟨', '𞟫'), ('𞟭', '𞟮'), @@ -774,6 +798,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), + ('𮯰', '𮹝'), ('丽', '𪘀'), ('𰀀', '𱍊'), ('𱍐', '𲎯'), diff --git a/regex-syntax/src/unicode_tables/property_bool.rs b/regex-syntax/src/unicode_tables/property_bool.rs index a3e84b519..3d62edc42 100644 --- a/regex-syntax/src/unicode_tables/property_bool.rs +++ b/regex-syntax/src/unicode_tables/property_bool.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate property-bool ucd-15.0.0 --chars +// ucd-generate property-bool ucd-16.0.0 --chars // -// Unicode version: 15.0.0. +// Unicode version: 16.0.0. // -// ucd-generate 0.2.14 is available on crates.io. +// ucd-generate 0.3.1 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("ASCII_Hex_Digit", ASCII_HEX_DIGIT), @@ -36,13 +36,18 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Hyphen", HYPHEN), ("IDS_Binary_Operator", IDS_BINARY_OPERATOR), ("IDS_Trinary_Operator", IDS_TRINARY_OPERATOR), + ("IDS_Unary_Operator", IDS_UNARY_OPERATOR), + ("ID_Compat_Math_Continue", ID_COMPAT_MATH_CONTINUE), + ("ID_Compat_Math_Start", ID_COMPAT_MATH_START), ("ID_Continue", ID_CONTINUE), ("ID_Start", ID_START), ("Ideographic", IDEOGRAPHIC), + ("InCB", INCB), ("Join_Control", JOIN_CONTROL), ("Logical_Order_Exception", LOGICAL_ORDER_EXCEPTION), ("Lowercase", LOWERCASE), ("Math", MATH), + ("Modifier_Combining_Mark", MODIFIER_COMBINING_MARK), ("Noncharacter_Code_Point", NONCHARACTER_CODE_POINT), ("Other_Alphabetic", OTHER_ALPHABETIC), ("Other_Default_Ignorable_Code_Point", OTHER_DEFAULT_IGNORABLE_CODE_POINT), @@ -86,7 +91,7 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('ˬ', 'ˬ'), ('ˮ', 'ˮ'), ('\u{345}', '\u{345}'), - ('Ͱ', 'ʹ'), + ('\u{363}', 'ʹ'), ('Ͷ', 'ͷ'), ('ͺ', 'ͽ'), ('Ϳ', 'Ϳ'), @@ -127,6 +132,7 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('ࡠ', 'ࡪ'), ('ࡰ', 'ࢇ'), ('ࢉ', 'ࢎ'), + ('\u{897}', '\u{897}'), ('ࢠ', 'ࣉ'), ('\u{8d4}', '\u{8df}'), ('\u{8e3}', '\u{8e9}'), @@ -225,8 +231,8 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('ಪ', 'ಳ'), ('ವ', 'ಹ'), ('ಽ', 'ೄ'), - ('\u{cc6}', 'ೈ'), - ('ೊ', '\u{ccc}'), + ('\u{cc6}', '\u{cc8}'), + ('\u{cca}', '\u{ccc}'), ('\u{cd5}', '\u{cd6}'), ('ೝ', 'ೞ'), ('ೠ', '\u{ce3}'), @@ -331,7 +337,7 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('\u{1abf}', '\u{1ac0}'), ('\u{1acc}', '\u{1ace}'), ('\u{1b00}', 'ᬳ'), - ('\u{1b35}', 'ᭃ'), + ('\u{1b35}', '\u{1b43}'), ('ᭅ', 'ᭌ'), ('\u{1b80}', '\u{1ba9}'), ('\u{1bac}', 'ᮯ'), @@ -340,7 +346,7 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('ᰀ', '\u{1c36}'), ('ᱍ', 'ᱏ'), ('ᱚ', 'ᱽ'), - ('ᲀ', 'ᲈ'), + ('ᲀ', 'ᲊ'), ('Ა', 'Ჺ'), ('Ჽ', 'Ჿ'), ('ᳩ', 'ᳬ'), @@ -348,7 +354,7 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('ᳵ', 'ᳶ'), ('ᳺ', 'ᳺ'), ('ᴀ', 'ᶿ'), - ('\u{1de7}', '\u{1df4}'), + ('\u{1dd3}', '\u{1df4}'), ('Ḁ', 'ἕ'), ('Ἐ', 'Ἕ'), ('ἠ', 'ὅ'), @@ -428,10 +434,10 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('ꙿ', 'ꛯ'), ('ꜗ', 'ꜟ'), ('Ꜣ', 'ꞈ'), - ('Ꞌ', 'ꟊ'), + ('Ꞌ', 'ꟍ'), ('Ꟑ', 'ꟑ'), ('ꟓ', 'ꟓ'), - ('ꟕ', 'ꟙ'), + ('ꟕ', 'Ƛ'), ('ꟲ', 'ꠅ'), ('ꠇ', 'ꠧ'), ('ꡀ', 'ꡳ'), @@ -522,6 +528,7 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('𐖣', '𐖱'), ('𐖳', '𐖹'), ('𐖻', '𐖼'), + ('𐗀', '𐗳'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), @@ -559,9 +566,14 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𐴀', '\u{10d27}'), + ('𐵊', '𐵥'), + ('\u{10d69}', '\u{10d69}'), + ('𐵯', '𐶅'), ('𐺀', '𐺩'), ('\u{10eab}', '\u{10eac}'), ('𐺰', '𐺱'), + ('𐻂', '𐻄'), + ('\u{10efc}', '\u{10efc}'), ('𐼀', '𐼜'), ('𐼧', '𐼧'), ('𐼰', '𐽅'), @@ -605,6 +617,17 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('𑍐', '𑍐'), ('\u{11357}', '\u{11357}'), ('𑍝', '𑍣'), + ('𑎀', '𑎉'), + ('𑎋', '𑎋'), + ('𑎎', '𑎎'), + ('𑎐', '𑎵'), + ('𑎷', '\u{113c0}'), + ('\u{113c2}', '\u{113c2}'), + ('\u{113c5}', '\u{113c5}'), + ('\u{113c7}', '𑏊'), + ('𑏌', '𑏍'), + ('𑏑', '𑏑'), + ('𑏓', '𑏓'), ('𑐀', '𑑁'), ('\u{11443}', '𑑅'), ('𑑇', '𑑊'), @@ -643,6 +666,7 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('𑩐', '𑪗'), ('𑪝', '𑪝'), ('𑪰', '𑫸'), + ('𑯀', '𑯠'), ('𑰀', '𑰈'), ('𑰊', '\u{11c36}'), ('\u{11c38}', '𑰾'), @@ -675,7 +699,9 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('𒾐', '𒿰'), ('𓀀', '𓐯'), ('𓑁', '𓑆'), + ('𓑠', '𔏺'), ('𔐀', '𔙆'), + ('𖄀', '\u{1612e}'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖩰', '𖪾'), @@ -684,16 +710,17 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('𖭀', '𖭃'), ('𖭣', '𖭷'), ('𖭽', '𖮏'), + ('𖵀', '𖵬'), ('𖹀', '𖹿'), ('𖼀', '𖽊'), ('\u{16f4f}', '𖾇'), ('\u{16f8f}', '𖾟'), ('𖿠', '𖿡'), ('𖿣', '𖿣'), - ('𖿰', '𖿱'), + ('\u{16ff0}', '\u{16ff1}'), ('𗀀', '𘟷'), ('𘠀', '𘳕'), - ('𘴀', '𘴈'), + ('𘳿', '𘴈'), ('𚿰', '𚿳'), ('𚿵', '𚿻'), ('𚿽', '𚿾'), @@ -753,6 +780,8 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('𞊐', '𞊭'), ('𞋀', '𞋫'), ('𞓐', '𞓫'), + ('𞗐', '𞗭'), + ('𞗰', '𞗰'), ('𞟠', '𞟦'), ('𞟨', '𞟫'), ('𞟭', '𞟮'), @@ -802,6 +831,7 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), + ('𮯰', '𮹝'), ('丽', '𪘀'), ('𰀀', '𱍊'), ('𱍐', '𲎯'), @@ -846,7 +876,7 @@ pub const BIDI_MIRRORED: &'static [(char, char)] = &[ ('≟', '≠'), ('≢', '≢'), ('≤', '≫'), - ('≮', '⊌'), + ('≭', '⊌'), ('⊏', '⊒'), ('⊘', '⊘'), ('⊢', '⊣'), @@ -976,7 +1006,7 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('\u{859}', '\u{85b}'), ('࢈', '࢈'), ('\u{890}', '\u{891}'), - ('\u{898}', '\u{89f}'), + ('\u{897}', '\u{89f}'), ('ࣉ', '\u{902}'), ('\u{93a}', '\u{93a}'), ('\u{93c}', '\u{93c}'), @@ -1233,8 +1263,11 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('\u{10a3f}', '\u{10a3f}'), ('\u{10ae5}', '\u{10ae6}'), ('\u{10d24}', '\u{10d27}'), + ('𐵎', '𐵎'), + ('\u{10d69}', '\u{10d6d}'), + ('𐵯', '𐵯'), ('\u{10eab}', '\u{10eac}'), - ('\u{10efd}', '\u{10eff}'), + ('\u{10efc}', '\u{10eff}'), ('\u{10f46}', '\u{10f50}'), ('\u{10f82}', '\u{10f85}'), ('\u{11001}', '\u{11001}'), @@ -1267,6 +1300,11 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('\u{11340}', '\u{11340}'), ('\u{11366}', '\u{1136c}'), ('\u{11370}', '\u{11374}'), + ('\u{113bb}', '\u{113c0}'), + ('\u{113ce}', '\u{113ce}'), + ('\u{113d0}', '\u{113d0}'), + ('\u{113d2}', '\u{113d2}'), + ('\u{113e1}', '\u{113e2}'), ('\u{11438}', '\u{1143f}'), ('\u{11442}', '\u{11444}'), ('\u{11446}', '\u{11446}'), @@ -1286,7 +1324,8 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('\u{116ad}', '\u{116ad}'), ('\u{116b0}', '\u{116b5}'), ('\u{116b7}', '\u{116b7}'), - ('\u{1171d}', '\u{1171f}'), + ('\u{1171d}', '\u{1171d}'), + ('\u{1171f}', '\u{1171f}'), ('\u{11722}', '\u{11725}'), ('\u{11727}', '\u{1172b}'), ('\u{1182f}', '\u{11837}'), @@ -1325,11 +1364,16 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('\u{11f36}', '\u{11f3a}'), ('\u{11f40}', '\u{11f40}'), ('\u{11f42}', '\u{11f42}'), + ('\u{11f5a}', '\u{11f5a}'), ('\u{13430}', '\u{13440}'), ('\u{13447}', '\u{13455}'), + ('\u{1611e}', '\u{16129}'), + ('\u{1612d}', '\u{1612f}'), ('\u{16af0}', '\u{16af4}'), ('\u{16b30}', '\u{16b36}'), ('𖭀', '𖭃'), + ('𖵀', '𖵂'), + ('𖵫', '𖵬'), ('\u{16f4f}', '\u{16f4f}'), ('\u{16f8f}', '𖾟'), ('𖿠', '𖿡'), @@ -1363,6 +1407,7 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('\u{1e2ae}', '\u{1e2ae}'), ('\u{1e2ec}', '\u{1e2ef}'), ('𞓫', '\u{1e4ef}'), + ('\u{1e5ee}', '\u{1e5ef}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e944}', '𞥋'), ('🏻', '🏿'), @@ -1406,7 +1451,7 @@ pub const CASED: &'static [(char, char)] = &[ ('ჼ', 'ჿ'), ('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), - ('ᲀ', 'ᲈ'), + ('ᲀ', 'ᲊ'), ('Ა', 'Ჺ'), ('Ჽ', 'Ჿ'), ('ᴀ', 'ᶿ'), @@ -1459,10 +1504,10 @@ pub const CASED: &'static [(char, char)] = &[ ('Ꚁ', 'ꚝ'), ('Ꜣ', 'ꞇ'), ('Ꞌ', 'ꞎ'), - ('Ꞑ', 'ꟊ'), + ('Ꞑ', 'ꟍ'), ('Ꟑ', 'ꟑ'), ('ꟓ', 'ꟓ'), - ('ꟕ', 'ꟙ'), + ('ꟕ', 'Ƛ'), ('ꟲ', 'ꟶ'), ('ꟸ', 'ꟺ'), ('ꬰ', 'ꭚ'), @@ -1489,6 +1534,8 @@ pub const CASED: &'static [(char, char)] = &[ ('𐞲', '𐞺'), ('𐲀', '𐲲'), ('𐳀', '𐳲'), + ('𐵐', '𐵥'), + ('𐵰', '𐶅'), ('𑢠', '𑣟'), ('𖹀', '𖹿'), ('𝐀', '𝑔'), @@ -1814,7 +1861,7 @@ pub const CHANGES_WHEN_CASEFOLDED: &'static [(char, char)] = &[ ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'), ('ᏸ', 'ᏽ'), - ('ᲀ', 'ᲈ'), + ('ᲀ', 'Ᲊ'), ('Ა', 'Ჺ'), ('Ჽ', 'Ჿ'), ('Ḁ', 'Ḁ'), @@ -2136,9 +2183,12 @@ pub const CHANGES_WHEN_CASEFOLDED: &'static [(char, char)] = &[ ('Ꟃ', 'Ꟃ'), ('Ꞔ', 'Ꟈ'), ('Ꟊ', 'Ꟊ'), + ('Ɤ', 'Ꟍ'), ('Ꟑ', 'Ꟑ'), ('Ꟗ', 'Ꟗ'), ('Ꟙ', 'Ꟙ'), + ('Ꟛ', 'Ꟛ'), + ('Ƛ', 'Ƛ'), ('Ꟶ', 'Ꟶ'), ('ꭰ', 'ꮿ'), ('ff', 'st'), @@ -2151,6 +2201,7 @@ pub const CHANGES_WHEN_CASEFOLDED: &'static [(char, char)] = &[ ('𐖌', '𐖒'), ('𐖔', '𐖕'), ('𐲀', '𐲲'), + ('𐵐', '𐵥'), ('𑢠', '𑢿'), ('𖹀', '𖹟'), ('𞤀', '𞤡'), @@ -2164,8 +2215,7 @@ pub const CHANGES_WHEN_CASEMAPPED: &'static [(char, char)] = &[ ('Ø', 'ö'), ('ø', 'ķ'), ('Ĺ', 'ƌ'), - ('Ǝ', 'ƚ'), - ('Ɯ', 'Ʃ'), + ('Ǝ', 'Ʃ'), ('Ƭ', 'ƹ'), ('Ƽ', 'ƽ'), ('ƿ', 'ƿ'), @@ -2176,8 +2226,7 @@ pub const CHANGES_WHEN_CASEMAPPED: &'static [(char, char)] = &[ ('ə', 'ə'), ('ɛ', 'ɜ'), ('ɠ', 'ɡ'), - ('ɣ', 'ɣ'), - ('ɥ', 'ɦ'), + ('ɣ', 'ɦ'), ('ɨ', 'ɬ'), ('ɯ', 'ɯ'), ('ɱ', 'ɲ'), @@ -2211,7 +2260,7 @@ pub const CHANGES_WHEN_CASEMAPPED: &'static [(char, char)] = &[ ('ჽ', 'ჿ'), ('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), - ('ᲀ', 'ᲈ'), + ('ᲀ', 'ᲊ'), ('Ა', 'Ჺ'), ('Ჽ', 'Ჿ'), ('ᵹ', 'ᵹ'), @@ -2262,9 +2311,9 @@ pub const CHANGES_WHEN_CASEMAPPED: &'static [(char, char)] = &[ ('Ꞌ', 'Ɥ'), ('Ꞑ', 'ꞔ'), ('Ꞗ', 'Ɪ'), - ('Ʞ', 'ꟊ'), + ('Ʞ', 'ꟍ'), ('Ꟑ', 'ꟑ'), - ('Ꟗ', 'ꟙ'), + ('Ꟗ', 'Ƛ'), ('Ꟶ', 'ꟶ'), ('ꭓ', 'ꭓ'), ('ꭰ', 'ꮿ'), @@ -2285,6 +2334,8 @@ pub const CHANGES_WHEN_CASEMAPPED: &'static [(char, char)] = &[ ('𐖻', '𐖼'), ('𐲀', '𐲲'), ('𐳀', '𐳲'), + ('𐵐', '𐵥'), + ('𐵰', '𐶅'), ('𑢠', '𑣟'), ('𖹀', '𖹿'), ('𞤀', '𞥃'), @@ -2566,6 +2617,7 @@ pub const CHANGES_WHEN_LOWERCASED: &'static [(char, char)] = &[ ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'), ('Ꭰ', 'Ᏽ'), + ('Ᲊ', 'Ᲊ'), ('Ა', 'Ჺ'), ('Ჽ', 'Ჿ'), ('Ḁ', 'Ḁ'), @@ -2885,9 +2937,12 @@ pub const CHANGES_WHEN_LOWERCASED: &'static [(char, char)] = &[ ('Ꟃ', 'Ꟃ'), ('Ꞔ', 'Ꟈ'), ('Ꟊ', 'Ꟊ'), + ('Ɤ', 'Ꟍ'), ('Ꟑ', 'Ꟑ'), ('Ꟗ', 'Ꟗ'), ('Ꟙ', 'Ꟙ'), + ('Ꟛ', 'Ꟛ'), + ('Ƛ', 'Ƛ'), ('Ꟶ', 'Ꟶ'), ('A', 'Z'), ('𐐀', '𐐧'), @@ -2897,6 +2952,7 @@ pub const CHANGES_WHEN_LOWERCASED: &'static [(char, char)] = &[ ('𐖌', '𐖒'), ('𐖔', '𐖕'), ('𐲀', '𐲲'), + ('𐵐', '𐵥'), ('𑢠', '𑢿'), ('𖹀', '𖹟'), ('𞤀', '𞤡'), @@ -2975,7 +3031,7 @@ pub const CHANGES_WHEN_TITLECASED: &'static [(char, char)] = &[ ('ƌ', 'ƌ'), ('ƒ', 'ƒ'), ('ƕ', 'ƕ'), - ('ƙ', 'ƚ'), + ('ƙ', 'ƛ'), ('ƞ', 'ƞ'), ('ơ', 'ơ'), ('ƣ', 'ƣ'), @@ -3052,8 +3108,7 @@ pub const CHANGES_WHEN_TITLECASED: &'static [(char, char)] = &[ ('ə', 'ə'), ('ɛ', 'ɜ'), ('ɠ', 'ɡ'), - ('ɣ', 'ɣ'), - ('ɥ', 'ɦ'), + ('ɣ', 'ɦ'), ('ɨ', 'ɬ'), ('ɯ', 'ɯ'), ('ɱ', 'ɲ'), @@ -3191,6 +3246,7 @@ pub const CHANGES_WHEN_TITLECASED: &'static [(char, char)] = &[ ('ա', 'և'), ('ᏸ', 'ᏽ'), ('ᲀ', 'ᲈ'), + ('ᲊ', 'ᲊ'), ('ᵹ', 'ᵹ'), ('ᵽ', 'ᵽ'), ('ᶎ', 'ᶎ'), @@ -3510,9 +3566,11 @@ pub const CHANGES_WHEN_TITLECASED: &'static [(char, char)] = &[ ('ꟃ', 'ꟃ'), ('ꟈ', 'ꟈ'), ('ꟊ', 'ꟊ'), + ('ꟍ', 'ꟍ'), ('ꟑ', 'ꟑ'), ('ꟗ', 'ꟗ'), ('ꟙ', 'ꟙ'), + ('ꟛ', 'ꟛ'), ('ꟶ', 'ꟶ'), ('ꭓ', 'ꭓ'), ('ꭰ', 'ꮿ'), @@ -3526,6 +3584,7 @@ pub const CHANGES_WHEN_TITLECASED: &'static [(char, char)] = &[ ('𐖳', '𐖹'), ('𐖻', '𐖼'), ('𐳀', '𐳲'), + ('𐵰', '𐶅'), ('𑣀', '𑣟'), ('𖹠', '𖹿'), ('𞤢', '𞥃'), @@ -3604,7 +3663,7 @@ pub const CHANGES_WHEN_UPPERCASED: &'static [(char, char)] = &[ ('ƌ', 'ƌ'), ('ƒ', 'ƒ'), ('ƕ', 'ƕ'), - ('ƙ', 'ƚ'), + ('ƙ', 'ƛ'), ('ƞ', 'ƞ'), ('ơ', 'ơ'), ('ƣ', 'ƣ'), @@ -3680,8 +3739,7 @@ pub const CHANGES_WHEN_UPPERCASED: &'static [(char, char)] = &[ ('ə', 'ə'), ('ɛ', 'ɜ'), ('ɠ', 'ɡ'), - ('ɣ', 'ɣ'), - ('ɥ', 'ɦ'), + ('ɣ', 'ɦ'), ('ɨ', 'ɬ'), ('ɯ', 'ɯ'), ('ɱ', 'ɲ'), @@ -3821,6 +3879,7 @@ pub const CHANGES_WHEN_UPPERCASED: &'static [(char, char)] = &[ ('ჽ', 'ჿ'), ('ᏸ', 'ᏽ'), ('ᲀ', 'ᲈ'), + ('ᲊ', 'ᲊ'), ('ᵹ', 'ᵹ'), ('ᵽ', 'ᵽ'), ('ᶎ', 'ᶎ'), @@ -4140,9 +4199,11 @@ pub const CHANGES_WHEN_UPPERCASED: &'static [(char, char)] = &[ ('ꟃ', 'ꟃ'), ('ꟈ', 'ꟈ'), ('ꟊ', 'ꟊ'), + ('ꟍ', 'ꟍ'), ('ꟑ', 'ꟑ'), ('ꟗ', 'ꟗ'), ('ꟙ', 'ꟙ'), + ('ꟛ', 'ꟛ'), ('ꟶ', 'ꟶ'), ('ꭓ', 'ꭓ'), ('ꭰ', 'ꮿ'), @@ -4156,6 +4217,7 @@ pub const CHANGES_WHEN_UPPERCASED: &'static [(char, char)] = &[ ('𐖳', '𐖹'), ('𐖻', '𐖼'), ('𐳀', '𐳲'), + ('𐵰', '𐶅'), ('𑣀', '𑣟'), ('𖹠', '𖹿'), ('𞤢', '𞥃'), @@ -4184,6 +4246,7 @@ pub const DASH: &'static [(char, char)] = &[ ('﹘', '﹘'), ('﹣', '﹣'), ('-', '-'), + ('𐵮', '𐵮'), ('𐺭', '𐺭'), ]; @@ -4272,6 +4335,7 @@ pub const DIACRITIC: &'static [(char, char)] = &[ ('\u{d3b}', '\u{d3c}'), ('\u{d4d}', '\u{d4d}'), ('\u{dca}', '\u{dca}'), + ('\u{e3a}', '\u{e3a}'), ('\u{e47}', '\u{e4c}'), ('\u{e4e}', '\u{e4e}'), ('\u{eba}', '\u{eba}'), @@ -4292,18 +4356,22 @@ pub const DIACRITIC: &'static [(char, char)] = &[ ('ႏ', 'ႏ'), ('ႚ', 'ႛ'), ('\u{135d}', '\u{135f}'), - ('\u{1714}', '᜕'), + ('\u{1714}', '\u{1715}'), + ('\u{1734}', '\u{1734}'), ('\u{17c9}', '\u{17d3}'), ('\u{17dd}', '\u{17dd}'), ('\u{1939}', '\u{193b}'), + ('\u{1a60}', '\u{1a60}'), ('\u{1a75}', '\u{1a7c}'), ('\u{1a7f}', '\u{1a7f}'), ('\u{1ab0}', '\u{1abe}'), ('\u{1ac1}', '\u{1acb}'), ('\u{1b34}', '\u{1b34}'), - ('᭄', '᭄'), + ('\u{1b44}', '\u{1b44}'), ('\u{1b6b}', '\u{1b73}'), - ('᮪', '\u{1bab}'), + ('\u{1baa}', '\u{1bab}'), + ('\u{1be6}', '\u{1be6}'), + ('\u{1bf2}', '\u{1bf3}'), ('\u{1c36}', '\u{1c37}'), ('ᱸ', 'ᱽ'), ('\u{1cd0}', '\u{1ce8}'), @@ -4332,12 +4400,14 @@ pub const DIACRITIC: &'static [(char, char)] = &[ ('꜀', '꜡'), ('ꞈ', '꞊'), ('ꟸ', 'ꟹ'), + ('\u{a806}', '\u{a806}'), + ('\u{a82c}', '\u{a82c}'), ('\u{a8c4}', '\u{a8c4}'), ('\u{a8e0}', '\u{a8f1}'), ('\u{a92b}', '꤮'), - ('꥓', '꥓'), + ('\u{a953}', '\u{a953}'), ('\u{a9b3}', '\u{a9b3}'), - ('꧀', '꧀'), + ('\u{a9c0}', '\u{a9c0}'), ('\u{a9e5}', '\u{a9e5}'), ('ꩻ', 'ꩽ'), ('\u{aabf}', 'ꫂ'), @@ -4356,8 +4426,12 @@ pub const DIACRITIC: &'static [(char, char)] = &[ ('𐞀', '𐞅'), ('𐞇', '𐞰'), ('𐞲', '𐞺'), + ('\u{10a38}', '\u{10a3a}'), + ('\u{10a3f}', '\u{10a3f}'), ('\u{10ae5}', '\u{10ae6}'), ('𐴢', '\u{10d27}'), + ('𐵎', '𐵎'), + ('\u{10d69}', '\u{10d6d}'), ('\u{10efd}', '\u{10eff}'), ('\u{10f46}', '\u{10f50}'), ('\u{10f82}', '\u{10f85}'), @@ -4366,23 +4440,26 @@ pub const DIACRITIC: &'static [(char, char)] = &[ ('\u{110b9}', '\u{110ba}'), ('\u{11133}', '\u{11134}'), ('\u{11173}', '\u{11173}'), - ('𑇀', '𑇀'), + ('\u{111c0}', '\u{111c0}'), ('\u{111ca}', '\u{111cc}'), - ('𑈵', '\u{11236}'), + ('\u{11235}', '\u{11236}'), ('\u{112e9}', '\u{112ea}'), - ('\u{1133c}', '\u{1133c}'), - ('𑍍', '𑍍'), + ('\u{1133b}', '\u{1133c}'), + ('\u{1134d}', '\u{1134d}'), ('\u{11366}', '\u{1136c}'), ('\u{11370}', '\u{11374}'), + ('\u{113ce}', '\u{113d0}'), + ('\u{113d2}', '𑏓'), + ('\u{113e1}', '\u{113e2}'), ('\u{11442}', '\u{11442}'), ('\u{11446}', '\u{11446}'), ('\u{114c2}', '\u{114c3}'), ('\u{115bf}', '\u{115c0}'), ('\u{1163f}', '\u{1163f}'), - ('𑚶', '\u{116b7}'), + ('\u{116b6}', '\u{116b7}'), ('\u{1172b}', '\u{1172b}'), ('\u{11839}', '\u{1183a}'), - ('𑤽', '\u{1193e}'), + ('\u{1193d}', '\u{1193e}'), ('\u{11943}', '\u{11943}'), ('\u{119e0}', '\u{119e0}'), ('\u{11a34}', '\u{11a34}'), @@ -4392,18 +4469,22 @@ pub const DIACRITIC: &'static [(char, char)] = &[ ('\u{11d42}', '\u{11d42}'), ('\u{11d44}', '\u{11d45}'), ('\u{11d97}', '\u{11d97}'), + ('\u{11f41}', '\u{11f42}'), + ('\u{11f5a}', '\u{11f5a}'), ('\u{13447}', '\u{13455}'), + ('\u{1612f}', '\u{1612f}'), ('\u{16af0}', '\u{16af4}'), ('\u{16b30}', '\u{16b36}'), + ('𖵫', '𖵬'), ('\u{16f8f}', '𖾟'), - ('𖿰', '𖿱'), + ('\u{16ff0}', '\u{16ff1}'), ('𚿰', '𚿳'), ('𚿵', '𚿻'), ('𚿽', '𚿾'), ('\u{1cf00}', '\u{1cf2d}'), ('\u{1cf30}', '\u{1cf46}'), ('\u{1d167}', '\u{1d169}'), - ('𝅭', '\u{1d172}'), + ('\u{1d16d}', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), ('\u{1d1aa}', '\u{1d1ad}'), @@ -4411,6 +4492,7 @@ pub const DIACRITIC: &'static [(char, char)] = &[ ('\u{1e130}', '\u{1e136}'), ('\u{1e2ae}', '\u{1e2ae}'), ('\u{1e2ec}', '\u{1e2ef}'), + ('\u{1e5ee}', '\u{1e5ef}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e944}', '\u{1e946}'), ('\u{1e948}', '\u{1e94a}'), @@ -4562,11 +4644,10 @@ pub const EMOJI: &'static [(char, char)] = &[ ('🤼', '🥅'), ('🥇', '🧿'), ('🩰', '🩼'), - ('🪀', '🪈'), - ('🪐', '🪽'), - ('🪿', '🫅'), - ('🫎', '🫛'), - ('🫠', '🫨'), + ('🪀', '🪉'), + ('🪏', '🫆'), + ('🫎', '🫜'), + ('🫟', '🫩'), ('🫰', '🫸'), ]; @@ -4704,11 +4785,10 @@ pub const EMOJI_PRESENTATION: &'static [(char, char)] = &[ ('🤼', '🥅'), ('🥇', '🧿'), ('🩰', '🩼'), - ('🪀', '🪈'), - ('🪐', '🪽'), - ('🪿', '🫅'), - ('🫎', '🫛'), - ('🫠', '🫨'), + ('🪀', '🪉'), + ('🪏', '🫆'), + ('🫎', '🫜'), + ('🫟', '🫩'), ('🫰', '🫸'), ]; @@ -4798,6 +4878,8 @@ pub const EXTENDER: &'static [(char, char)] = &[ ('ː', 'ˑ'), ('ـ', 'ـ'), ('ߺ', 'ߺ'), + ('\u{a71}', '\u{a71}'), + ('\u{afb}', '\u{afb}'), ('\u{b55}', '\u{b55}'), ('ๆ', 'ๆ'), ('ໆ', 'ໆ'), @@ -4819,13 +4901,19 @@ pub const EXTENDER: &'static [(char, char)] = &[ ('ꫳ', 'ꫴ'), ('ー', 'ー'), ('𐞁', '𐞂'), + ('𐵎', '𐵎'), + ('\u{10d6a}', '\u{10d6a}'), + ('𐵯', '𐵯'), + ('\u{11237}', '\u{11237}'), ('𑍝', '𑍝'), + ('\u{113d2}', '𑏓'), ('𑗆', '𑗈'), ('\u{11a98}', '\u{11a98}'), ('𖭂', '𖭃'), ('𖿠', '𖿡'), ('𖿣', '𖿣'), ('𞄼', '𞄽'), + ('\u{1e5ef}', '\u{1e5ef}'), ('\u{1e944}', '\u{1e946}'), ]; @@ -4972,10 +5060,8 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('ಪ', 'ಳ'), ('ವ', 'ಹ'), ('ಽ', 'ಾ'), - ('ೀ', 'ು'), + ('ು', 'ು'), ('ೃ', 'ೄ'), - ('ೇ', 'ೈ'), - ('ೊ', 'ೋ'), ('ೝ', 'ೞ'), ('ೠ', 'ೡ'), ('೦', '೯'), @@ -5066,9 +5152,8 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('᐀', '᚜'), ('ᚠ', 'ᛸ'), ('ᜀ', 'ᜑ'), - ('᜕', '᜕'), ('ᜟ', 'ᜱ'), - ('᜴', '᜶'), + ('᜵', '᜶'), ('ᝀ', 'ᝑ'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), @@ -5108,23 +5193,20 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('᪐', '᪙'), ('᪠', '᪭'), ('ᬄ', 'ᬳ'), - ('ᬻ', 'ᬻ'), - ('ᬽ', 'ᭁ'), - ('ᭃ', 'ᭌ'), - ('᭐', '᭪'), - ('᭴', '᭾'), + ('ᬾ', 'ᭁ'), + ('ᭅ', 'ᭌ'), + ('᭎', '᭪'), + ('᭴', '᭿'), ('ᮂ', 'ᮡ'), ('ᮦ', 'ᮧ'), - ('᮪', '᮪'), ('ᮮ', 'ᯥ'), ('ᯧ', 'ᯧ'), ('ᯪ', 'ᯬ'), ('ᯮ', 'ᯮ'), - ('᯲', '᯳'), ('᯼', 'ᰫ'), ('ᰴ', 'ᰵ'), ('᰻', '᱉'), - ('ᱍ', 'ᲈ'), + ('ᱍ', 'ᲊ'), ('Ა', 'Ჺ'), ('Ჽ', '᳇'), ('᳓', '᳓'), @@ -5158,7 +5240,7 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('ₐ', 'ₜ'), ('₠', '⃀'), ('℀', '↋'), - ('←', '␦'), + ('←', '␩'), ('⑀', '⑊'), ('①', '⭳'), ('⭶', '⮕'), @@ -5182,15 +5264,14 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('⺀', '⺙'), ('⺛', '⻳'), ('⼀', '⿕'), - ('⿰', '⿻'), - ('\u{3000}', '〩'), + ('⿰', '〩'), ('〰', '〿'), ('ぁ', 'ゖ'), ('゛', 'ヿ'), ('ㄅ', 'ㄯ'), ('ㄱ', 'ㆎ'), - ('㆐', '㇣'), - ('ㇰ', '㈞'), + ('㆐', '㇥'), + ('㇯', '㈞'), ('㈠', 'ꒌ'), ('꒐', '꓆'), ('ꓐ', 'ꘫ'), @@ -5199,10 +5280,10 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('꙾', 'ꚝ'), ('ꚠ', 'ꛯ'), ('꛲', '꛷'), - ('꜀', 'ꟊ'), + ('꜀', 'ꟍ'), ('Ꟑ', 'ꟑ'), ('ꟓ', 'ꟓ'), - ('ꟕ', 'ꟙ'), + ('ꟕ', 'Ƛ'), ('ꟲ', 'ꠁ'), ('ꠃ', 'ꠅ'), ('ꠇ', 'ꠊ'), @@ -5215,12 +5296,13 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('ꣲ', 'ꣾ'), ('꤀', 'ꤥ'), ('꤮', 'ꥆ'), - ('ꥒ', '꥓'), + ('ꥒ', 'ꥒ'), ('꥟', 'ꥼ'), ('ꦃ', 'ꦲ'), ('ꦴ', 'ꦵ'), ('ꦺ', 'ꦻ'), - ('ꦾ', '꧍'), + ('ꦾ', 'ꦿ'), + ('꧁', '꧍'), ('ꧏ', '꧙'), ('꧞', 'ꧤ'), ('ꧦ', 'ꧾ'), @@ -5319,6 +5401,7 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𐖣', '𐖱'), ('𐖳', '𐖹'), ('𐖻', '𐖼'), + ('𐗀', '𐗳'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), @@ -5360,10 +5443,14 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𐳀', '𐳲'), ('𐳺', '𐴣'), ('𐴰', '𐴹'), + ('𐵀', '𐵥'), + ('𐵮', '𐶅'), + ('𐶎', '𐶏'), ('𐹠', '𐹾'), ('𐺀', '𐺩'), ('𐺭', '𐺭'), ('𐺰', '𐺱'), + ('𐻂', '𐻄'), ('𐼀', '𐼧'), ('𐼰', '𐽅'), ('𐽑', '𐽙'), @@ -5389,14 +5476,14 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𑅐', '𑅲'), ('𑅴', '𑅶'), ('𑆂', '𑆵'), - ('𑆿', '𑇈'), + ('𑆿', '𑆿'), + ('𑇁', '𑇈'), ('𑇍', '𑇎'), ('𑇐', '𑇟'), ('𑇡', '𑇴'), ('𑈀', '𑈑'), ('𑈓', '𑈮'), ('𑈲', '𑈳'), - ('𑈵', '𑈵'), ('𑈸', '𑈽'), ('𑈿', '𑉀'), ('𑊀', '𑊆'), @@ -5418,9 +5505,20 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𑌿', '𑌿'), ('𑍁', '𑍄'), ('𑍇', '𑍈'), - ('𑍋', '𑍍'), + ('𑍋', '𑍌'), ('𑍐', '𑍐'), ('𑍝', '𑍣'), + ('𑎀', '𑎉'), + ('𑎋', '𑎋'), + ('𑎎', '𑎎'), + ('𑎐', '𑎵'), + ('𑎷', '𑎷'), + ('𑎹', '𑎺'), + ('𑏊', '𑏊'), + ('𑏌', '𑏍'), + ('𑏑', '𑏑'), + ('𑏓', '𑏕'), + ('𑏗', '𑏘'), ('𑐀', '𑐷'), ('𑑀', '𑑁'), ('𑑅', '𑑅'), @@ -5449,10 +5547,11 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𑚀', '𑚪'), ('𑚬', '𑚬'), ('𑚮', '𑚯'), - ('𑚶', '𑚶'), ('𑚸', '𑚹'), ('𑛀', '𑛉'), + ('𑛐', '𑛣'), ('𑜀', '𑜚'), + ('𑜞', '𑜞'), ('𑜠', '𑜡'), ('𑜦', '𑜦'), ('𑜰', '𑝆'), @@ -5467,7 +5566,6 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𑤘', '𑤯'), ('𑤱', '𑤵'), ('𑤷', '𑤸'), - ('𑤽', '𑤽'), ('𑤿', '𑥂'), ('𑥄', '𑥆'), ('𑥐', '𑥙'), @@ -5486,6 +5584,8 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𑪚', '𑪢'), ('𑪰', '𑫸'), ('𑬀', '𑬉'), + ('𑯀', '𑯡'), + ('𑯰', '𑯹'), ('𑰀', '𑰈'), ('𑰊', '𑰯'), ('𑰾', '𑰾'), @@ -5512,7 +5612,6 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𑼂', '𑼐'), ('𑼒', '𑼵'), ('𑼾', '𑼿'), - ('𑽁', '𑽁'), ('𑽃', '𑽙'), ('𑾰', '𑾰'), ('𑿀', '𑿱'), @@ -5523,7 +5622,11 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𒾐', '𒿲'), ('𓀀', '𓐯'), ('𓑁', '𓑆'), + ('𓑠', '𔏺'), ('𔐀', '𔙆'), + ('𖄀', '𖄝'), + ('𖄪', '𖄬'), + ('𖄰', '𖄹'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖩠', '𖩩'), @@ -5537,15 +5640,15 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𖭛', '𖭡'), ('𖭣', '𖭷'), ('𖭽', '𖮏'), + ('𖵀', '𖵹'), ('𖹀', '𖺚'), ('𖼀', '𖽊'), ('𖽐', '𖾇'), ('𖾓', '𖾟'), ('𖿠', '𖿣'), - ('𖿰', '𖿱'), ('𗀀', '𘟷'), ('𘠀', '𘳕'), - ('𘴀', '𘴈'), + ('𘳿', '𘴈'), ('𚿰', '𚿳'), ('𚿵', '𚿻'), ('𚿽', '𚿾'), @@ -5561,12 +5664,13 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𛲐', '𛲙'), ('𛲜', '𛲜'), ('𛲟', '𛲟'), + ('𜰀', '𜳹'), + ('𜴀', '𜺳'), ('𜽐', '𜿃'), ('𝀀', '𝃵'), ('𝄀', '𝄦'), ('𝄩', '𝅘𝅥𝅲'), - ('𝅦', '𝅦'), - ('𝅪', '𝅭'), + ('𝅪', '𝅬'), ('𝆃', '𝆄'), ('𝆌', '𝆩'), ('𝆮', '𝇪'), @@ -5614,6 +5718,9 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𞋿', '𞋿'), ('𞓐', '𞓫'), ('𞓰', '𞓹'), + ('𞗐', '𞗭'), + ('𞗰', '𞗺'), + ('𞗿', '𞗿'), ('𞟠', '𞟦'), ('𞟨', '𞟫'), ('𞟭', '𞟮'), @@ -5684,24 +5791,24 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('🡐', '🡙'), ('🡠', '🢇'), ('🢐', '🢭'), - ('🢰', '🢱'), + ('🢰', '🢻'), + ('🣀', '🣁'), ('🤀', '🩓'), ('🩠', '🩭'), ('🩰', '🩼'), - ('🪀', '🪈'), - ('🪐', '🪽'), - ('🪿', '🫅'), - ('🫎', '🫛'), - ('🫠', '🫨'), + ('🪀', '🪉'), + ('🪏', '🫆'), + ('🫎', '🫜'), + ('🫟', '🫩'), ('🫰', '🫸'), ('🬀', '🮒'), - ('🮔', '🯊'), - ('🯰', '🯹'), + ('🮔', '🯹'), ('𠀀', '𪛟'), ('𪜀', '𫜹'), ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), + ('𮯰', '𮹝'), ('丽', '𪘀'), ('𰀀', '𱍊'), ('𱍐', '𲎯'), @@ -5732,7 +5839,7 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{825}', '\u{827}'), ('\u{829}', '\u{82d}'), ('\u{859}', '\u{85b}'), - ('\u{898}', '\u{89f}'), + ('\u{897}', '\u{89f}'), ('\u{8ca}', '\u{8e1}'), ('\u{8e3}', '\u{902}'), ('\u{93a}', '\u{93a}'), @@ -5786,10 +5893,10 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{c62}', '\u{c63}'), ('\u{c81}', '\u{c81}'), ('\u{cbc}', '\u{cbc}'), - ('\u{cbf}', '\u{cbf}'), + ('\u{cbf}', '\u{cc0}'), ('\u{cc2}', '\u{cc2}'), - ('\u{cc6}', '\u{cc6}'), - ('\u{ccc}', '\u{ccd}'), + ('\u{cc6}', '\u{cc8}'), + ('\u{cca}', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), ('\u{ce2}', '\u{ce3}'), ('\u{d00}', '\u{d01}'), @@ -5833,8 +5940,8 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{108d}', '\u{108d}'), ('\u{109d}', '\u{109d}'), ('\u{135d}', '\u{135f}'), - ('\u{1712}', '\u{1714}'), - ('\u{1732}', '\u{1733}'), + ('\u{1712}', '\u{1715}'), + ('\u{1732}', '\u{1734}'), ('\u{1752}', '\u{1753}'), ('\u{1772}', '\u{1773}'), ('\u{17b4}', '\u{17b5}'), @@ -5861,18 +5968,16 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{1a7f}', '\u{1a7f}'), ('\u{1ab0}', '\u{1ace}'), ('\u{1b00}', '\u{1b03}'), - ('\u{1b34}', '\u{1b3a}'), - ('\u{1b3c}', '\u{1b3c}'), - ('\u{1b42}', '\u{1b42}'), + ('\u{1b34}', '\u{1b3d}'), + ('\u{1b42}', '\u{1b44}'), ('\u{1b6b}', '\u{1b73}'), ('\u{1b80}', '\u{1b81}'), ('\u{1ba2}', '\u{1ba5}'), - ('\u{1ba8}', '\u{1ba9}'), - ('\u{1bab}', '\u{1bad}'), + ('\u{1ba8}', '\u{1bad}'), ('\u{1be6}', '\u{1be6}'), ('\u{1be8}', '\u{1be9}'), ('\u{1bed}', '\u{1bed}'), - ('\u{1bef}', '\u{1bf1}'), + ('\u{1bef}', '\u{1bf3}'), ('\u{1c2c}', '\u{1c33}'), ('\u{1c36}', '\u{1c37}'), ('\u{1cd0}', '\u{1cd2}'), @@ -5903,10 +6008,12 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{a8ff}', '\u{a8ff}'), ('\u{a926}', '\u{a92d}'), ('\u{a947}', '\u{a951}'), + ('\u{a953}', '\u{a953}'), ('\u{a980}', '\u{a982}'), ('\u{a9b3}', '\u{a9b3}'), ('\u{a9b6}', '\u{a9b9}'), ('\u{a9bc}', '\u{a9bd}'), + ('\u{a9c0}', '\u{a9c0}'), ('\u{a9e5}', '\u{a9e5}'), ('\u{aa29}', '\u{aa2e}'), ('\u{aa31}', '\u{aa32}'), @@ -5938,8 +6045,9 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{10a3f}', '\u{10a3f}'), ('\u{10ae5}', '\u{10ae6}'), ('\u{10d24}', '\u{10d27}'), + ('\u{10d69}', '\u{10d6d}'), ('\u{10eab}', '\u{10eac}'), - ('\u{10efd}', '\u{10eff}'), + ('\u{10efc}', '\u{10eff}'), ('\u{10f46}', '\u{10f50}'), ('\u{10f82}', '\u{10f85}'), ('\u{11001}', '\u{11001}'), @@ -5956,11 +6064,11 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{11173}', '\u{11173}'), ('\u{11180}', '\u{11181}'), ('\u{111b6}', '\u{111be}'), + ('\u{111c0}', '\u{111c0}'), ('\u{111c9}', '\u{111cc}'), ('\u{111cf}', '\u{111cf}'), ('\u{1122f}', '\u{11231}'), - ('\u{11234}', '\u{11234}'), - ('\u{11236}', '\u{11237}'), + ('\u{11234}', '\u{11237}'), ('\u{1123e}', '\u{1123e}'), ('\u{11241}', '\u{11241}'), ('\u{112df}', '\u{112df}'), @@ -5969,9 +6077,18 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{1133b}', '\u{1133c}'), ('\u{1133e}', '\u{1133e}'), ('\u{11340}', '\u{11340}'), + ('\u{1134d}', '\u{1134d}'), ('\u{11357}', '\u{11357}'), ('\u{11366}', '\u{1136c}'), ('\u{11370}', '\u{11374}'), + ('\u{113b8}', '\u{113b8}'), + ('\u{113bb}', '\u{113c0}'), + ('\u{113c2}', '\u{113c2}'), + ('\u{113c5}', '\u{113c5}'), + ('\u{113c7}', '\u{113c9}'), + ('\u{113ce}', '\u{113d0}'), + ('\u{113d2}', '\u{113d2}'), + ('\u{113e1}', '\u{113e2}'), ('\u{11438}', '\u{1143f}'), ('\u{11442}', '\u{11444}'), ('\u{11446}', '\u{11446}'), @@ -5992,16 +6109,15 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{1163f}', '\u{11640}'), ('\u{116ab}', '\u{116ab}'), ('\u{116ad}', '\u{116ad}'), - ('\u{116b0}', '\u{116b5}'), - ('\u{116b7}', '\u{116b7}'), - ('\u{1171d}', '\u{1171f}'), + ('\u{116b0}', '\u{116b7}'), + ('\u{1171d}', '\u{1171d}'), + ('\u{1171f}', '\u{1171f}'), ('\u{11722}', '\u{11725}'), ('\u{11727}', '\u{1172b}'), ('\u{1182f}', '\u{11837}'), ('\u{11839}', '\u{1183a}'), ('\u{11930}', '\u{11930}'), - ('\u{1193b}', '\u{1193c}'), - ('\u{1193e}', '\u{1193e}'), + ('\u{1193b}', '\u{1193e}'), ('\u{11943}', '\u{11943}'), ('\u{119d4}', '\u{119d7}'), ('\u{119da}', '\u{119db}'), @@ -6032,21 +6148,23 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{11ef3}', '\u{11ef4}'), ('\u{11f00}', '\u{11f01}'), ('\u{11f36}', '\u{11f3a}'), - ('\u{11f40}', '\u{11f40}'), - ('\u{11f42}', '\u{11f42}'), + ('\u{11f40}', '\u{11f42}'), + ('\u{11f5a}', '\u{11f5a}'), ('\u{13440}', '\u{13440}'), ('\u{13447}', '\u{13455}'), + ('\u{1611e}', '\u{16129}'), + ('\u{1612d}', '\u{1612f}'), ('\u{16af0}', '\u{16af4}'), ('\u{16b30}', '\u{16b36}'), ('\u{16f4f}', '\u{16f4f}'), ('\u{16f8f}', '\u{16f92}'), ('\u{16fe4}', '\u{16fe4}'), + ('\u{16ff0}', '\u{16ff1}'), ('\u{1bc9d}', '\u{1bc9e}'), ('\u{1cf00}', '\u{1cf2d}'), ('\u{1cf30}', '\u{1cf46}'), - ('\u{1d165}', '\u{1d165}'), - ('\u{1d167}', '\u{1d169}'), - ('\u{1d16e}', '\u{1d172}'), + ('\u{1d165}', '\u{1d169}'), + ('\u{1d16d}', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), ('\u{1d1aa}', '\u{1d1ad}'), @@ -6067,6 +6185,7 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{1e2ae}', '\u{1e2ae}'), ('\u{1e2ec}', '\u{1e2ef}'), ('\u{1e4ec}', '\u{1e4ef}'), + ('\u{1e5ee}', '\u{1e5ef}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e944}', '\u{1e94a}'), ('\u{e0020}', '\u{e007f}'), @@ -6089,19 +6208,19 @@ pub const GRAPHEME_LINK: &'static [(char, char)] = &[ ('\u{eba}', '\u{eba}'), ('\u{f84}', '\u{f84}'), ('\u{1039}', '\u{103a}'), - ('\u{1714}', '᜕'), - ('᜴', '᜴'), + ('\u{1714}', '\u{1715}'), + ('\u{1734}', '\u{1734}'), ('\u{17d2}', '\u{17d2}'), ('\u{1a60}', '\u{1a60}'), - ('᭄', '᭄'), - ('᮪', '\u{1bab}'), - ('᯲', '᯳'), + ('\u{1b44}', '\u{1b44}'), + ('\u{1baa}', '\u{1bab}'), + ('\u{1bf2}', '\u{1bf3}'), ('\u{2d7f}', '\u{2d7f}'), ('\u{a806}', '\u{a806}'), ('\u{a82c}', '\u{a82c}'), ('\u{a8c4}', '\u{a8c4}'), - ('꥓', '꥓'), - ('꧀', '꧀'), + ('\u{a953}', '\u{a953}'), + ('\u{a9c0}', '\u{a9c0}'), ('\u{aaf6}', '\u{aaf6}'), ('\u{abed}', '\u{abed}'), ('\u{10a3f}', '\u{10a3f}'), @@ -6110,18 +6229,19 @@ pub const GRAPHEME_LINK: &'static [(char, char)] = &[ ('\u{1107f}', '\u{1107f}'), ('\u{110b9}', '\u{110b9}'), ('\u{11133}', '\u{11134}'), - ('𑇀', '𑇀'), - ('𑈵', '𑈵'), + ('\u{111c0}', '\u{111c0}'), + ('\u{11235}', '\u{11235}'), ('\u{112ea}', '\u{112ea}'), - ('𑍍', '𑍍'), + ('\u{1134d}', '\u{1134d}'), + ('\u{113ce}', '\u{113d0}'), ('\u{11442}', '\u{11442}'), ('\u{114c2}', '\u{114c2}'), ('\u{115bf}', '\u{115bf}'), ('\u{1163f}', '\u{1163f}'), - ('𑚶', '𑚶'), + ('\u{116b6}', '\u{116b6}'), ('\u{1172b}', '\u{1172b}'), ('\u{11839}', '\u{11839}'), - ('𑤽', '\u{1193e}'), + ('\u{1193d}', '\u{1193e}'), ('\u{119e0}', '\u{119e0}'), ('\u{11a34}', '\u{11a34}'), ('\u{11a47}', '\u{11a47}'), @@ -6129,7 +6249,8 @@ pub const GRAPHEME_LINK: &'static [(char, char)] = &[ ('\u{11c3f}', '\u{11c3f}'), ('\u{11d44}', '\u{11d45}'), ('\u{11d97}', '\u{11d97}'), - ('𑽁', '\u{11f42}'), + ('\u{11f41}', '\u{11f42}'), + ('\u{1612f}', '\u{1612f}'), ]; pub const HEX_DIGIT: &'static [(char, char)] = &[ @@ -6155,10 +6276,49 @@ pub const HYPHEN: &'static [(char, char)] = &[ ]; pub const IDS_BINARY_OPERATOR: &'static [(char, char)] = - &[('⿰', '⿱'), ('⿴', '⿻')]; + &[('⿰', '⿱'), ('⿴', '⿽'), ('㇯', '㇯')]; pub const IDS_TRINARY_OPERATOR: &'static [(char, char)] = &[('⿲', '⿳')]; +pub const IDS_UNARY_OPERATOR: &'static [(char, char)] = &[('⿾', '⿿')]; + +pub const ID_COMPAT_MATH_CONTINUE: &'static [(char, char)] = &[ + ('²', '³'), + ('¹', '¹'), + ('⁰', '⁰'), + ('⁴', '⁾'), + ('₀', '₎'), + ('∂', '∂'), + ('∇', '∇'), + ('∞', '∞'), + ('𝛁', '𝛁'), + ('𝛛', '𝛛'), + ('𝛻', '𝛻'), + ('𝜕', '𝜕'), + ('𝜵', '𝜵'), + ('𝝏', '𝝏'), + ('𝝯', '𝝯'), + ('𝞉', '𝞉'), + ('𝞩', '𝞩'), + ('𝟃', '𝟃'), +]; + +pub const ID_COMPAT_MATH_START: &'static [(char, char)] = &[ + ('∂', '∂'), + ('∇', '∇'), + ('∞', '∞'), + ('𝛁', '𝛁'), + ('𝛛', '𝛛'), + ('𝛻', '𝛻'), + ('𝜕', '𝜕'), + ('𝜵', '𝜵'), + ('𝝏', '𝝏'), + ('𝝯', '𝝯'), + ('𝞉', '𝞉'), + ('𝞩', '𝞩'), + ('𝟃', '𝟃'), +]; + pub const ID_CONTINUE: &'static [(char, char)] = &[ ('0', '9'), ('A', 'Z'), @@ -6213,7 +6373,7 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('ࡠ', 'ࡪ'), ('ࡰ', 'ࢇ'), ('ࢉ', 'ࢎ'), - ('\u{898}', '\u{8e1}'), + ('\u{897}', '\u{8e1}'), ('\u{8e3}', '\u{963}'), ('०', '९'), ('ॱ', 'ঃ'), @@ -6312,8 +6472,8 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('ಪ', 'ಳ'), ('ವ', 'ಹ'), ('\u{cbc}', 'ೄ'), - ('\u{cc6}', 'ೈ'), - ('ೊ', '\u{ccd}'), + ('\u{cc6}', '\u{cc8}'), + ('\u{cca}', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), ('ೝ', 'ೞ'), ('ೠ', '\u{ce3}'), @@ -6398,8 +6558,8 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('ᚁ', 'ᚚ'), ('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ'), - ('ᜀ', '᜕'), - ('ᜟ', '᜴'), + ('ᜀ', '\u{1715}'), + ('ᜟ', '\u{1734}'), ('ᝀ', '\u{1753}'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), @@ -6432,11 +6592,11 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('\u{1b00}', 'ᭌ'), ('᭐', '᭙'), ('\u{1b6b}', '\u{1b73}'), - ('\u{1b80}', '᯳'), + ('\u{1b80}', '\u{1bf3}'), ('ᰀ', '\u{1c37}'), ('᱀', '᱉'), ('ᱍ', 'ᱽ'), - ('ᲀ', 'ᲈ'), + ('ᲀ', 'ᲊ'), ('Ა', 'Ჺ'), ('Ჽ', 'Ჿ'), ('\u{1cd0}', '\u{1cd2}'), @@ -6460,6 +6620,7 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('ῠ', 'Ῥ'), ('ῲ', 'ῴ'), ('ῶ', 'ῼ'), + ('\u{200c}', '\u{200d}'), ('‿', '⁀'), ('⁔', '⁔'), ('ⁱ', 'ⁱ'), @@ -6504,8 +6665,7 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('〸', '〼'), ('ぁ', 'ゖ'), ('\u{3099}', 'ゟ'), - ('ァ', 'ヺ'), - ('ー', 'ヿ'), + ('ァ', 'ヿ'), ('ㄅ', 'ㄯ'), ('ㄱ', 'ㆎ'), ('ㆠ', 'ㆿ'), @@ -6520,10 +6680,10 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('ꙿ', '\u{a6f1}'), ('ꜗ', 'ꜟ'), ('Ꜣ', 'ꞈ'), - ('Ꞌ', 'ꟊ'), + ('Ꞌ', 'ꟍ'), ('Ꟑ', 'ꟑ'), ('ꟓ', 'ꟓ'), - ('ꟕ', 'ꟙ'), + ('ꟕ', 'Ƛ'), ('ꟲ', 'ꠧ'), ('\u{a82c}', '\u{a82c}'), ('ꡀ', 'ꡳ'), @@ -6532,9 +6692,9 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('\u{a8e0}', 'ꣷ'), ('ꣻ', 'ꣻ'), ('ꣽ', '\u{a92d}'), - ('ꤰ', '꥓'), + ('ꤰ', '\u{a953}'), ('ꥠ', 'ꥼ'), - ('\u{a980}', '꧀'), + ('\u{a980}', '\u{a9c0}'), ('ꧏ', '꧙'), ('ꧠ', 'ꧾ'), ('ꨀ', '\u{aa36}'), @@ -6583,7 +6743,7 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('A', 'Z'), ('_', '_'), ('a', 'z'), - ('ヲ', 'ᄒ'), + ('・', 'ᄒ'), ('ᅡ', 'ᅦ'), ('ᅧ', 'ᅬ'), ('ᅭ', 'ᅲ'), @@ -6621,6 +6781,7 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('𐖣', '𐖱'), ('𐖳', '𐖹'), ('𐖻', '𐖼'), + ('𐗀', '𐗳'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), @@ -6661,10 +6822,14 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('𐳀', '𐳲'), ('𐴀', '\u{10d27}'), ('𐴰', '𐴹'), + ('𐵀', '𐵥'), + ('\u{10d69}', '\u{10d6d}'), + ('𐵯', '𐶅'), ('𐺀', '𐺩'), ('\u{10eab}', '\u{10eac}'), ('𐺰', '𐺱'), - ('\u{10efd}', '𐼜'), + ('𐻂', '𐻄'), + ('\u{10efc}', '𐼜'), ('𐼧', '𐼧'), ('𐼰', '\u{10f50}'), ('𐽰', '\u{10f85}'), @@ -6704,12 +6869,22 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('𑌵', '𑌹'), ('\u{1133b}', '𑍄'), ('𑍇', '𑍈'), - ('𑍋', '𑍍'), + ('𑍋', '\u{1134d}'), ('𑍐', '𑍐'), ('\u{11357}', '\u{11357}'), ('𑍝', '𑍣'), ('\u{11366}', '\u{1136c}'), ('\u{11370}', '\u{11374}'), + ('𑎀', '𑎉'), + ('𑎋', '𑎋'), + ('𑎎', '𑎎'), + ('𑎐', '𑎵'), + ('𑎷', '\u{113c0}'), + ('\u{113c2}', '\u{113c2}'), + ('\u{113c5}', '\u{113c5}'), + ('\u{113c7}', '𑏊'), + ('𑏌', '𑏓'), + ('\u{113e1}', '\u{113e2}'), ('𑐀', '𑑊'), ('𑑐', '𑑙'), ('\u{1145e}', '𑑡'), @@ -6724,6 +6899,7 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('𑙐', '𑙙'), ('𑚀', '𑚸'), ('𑛀', '𑛉'), + ('𑛐', '𑛣'), ('𑜀', '𑜚'), ('\u{1171d}', '\u{1172b}'), ('𑜰', '𑜹'), @@ -6747,6 +6923,8 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('𑩐', '\u{11a99}'), ('𑪝', '𑪝'), ('𑪰', '𑫸'), + ('𑯀', '𑯠'), + ('𑯰', '𑯹'), ('𑰀', '𑰈'), ('𑰊', '\u{11c36}'), ('\u{11c38}', '𑱀'), @@ -6771,7 +6949,7 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('\u{11f00}', '𑼐'), ('𑼒', '\u{11f3a}'), ('𑼾', '\u{11f42}'), - ('𑽐', '𑽙'), + ('𑽐', '\u{11f5a}'), ('𑾰', '𑾰'), ('𒀀', '𒎙'), ('𒐀', '𒑮'), @@ -6779,7 +6957,9 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('𒾐', '𒿰'), ('𓀀', '𓐯'), ('\u{13440}', '\u{13455}'), + ('𓑠', '𔏺'), ('𔐀', '𔙆'), + ('𖄀', '𖄹'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖩠', '𖩩'), @@ -6792,16 +6972,18 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('𖭐', '𖭙'), ('𖭣', '𖭷'), ('𖭽', '𖮏'), + ('𖵀', '𖵬'), + ('𖵰', '𖵹'), ('𖹀', '𖹿'), ('𖼀', '𖽊'), ('\u{16f4f}', '𖾇'), ('\u{16f8f}', '𖾟'), ('𖿠', '𖿡'), ('𖿣', '\u{16fe4}'), - ('𖿰', '𖿱'), + ('\u{16ff0}', '\u{16ff1}'), ('𗀀', '𘟷'), ('𘠀', '𘳕'), - ('𘴀', '𘴈'), + ('𘳿', '𘴈'), ('𚿰', '𚿳'), ('𚿵', '𚿻'), ('𚿽', '𚿾'), @@ -6816,10 +6998,11 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('𛲀', '𛲈'), ('𛲐', '𛲙'), ('\u{1bc9d}', '\u{1bc9e}'), + ('𜳰', '𜳹'), ('\u{1cf00}', '\u{1cf2d}'), ('\u{1cf30}', '\u{1cf46}'), ('\u{1d165}', '\u{1d169}'), - ('𝅭', '\u{1d172}'), + ('\u{1d16d}', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), ('\u{1d1aa}', '\u{1d1ad}'), @@ -6877,6 +7060,7 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('𞊐', '\u{1e2ae}'), ('𞋀', '𞋹'), ('𞓐', '𞓹'), + ('𞗐', '𞗺'), ('𞟠', '𞟦'), ('𞟨', '𞟫'), ('𞟭', '𞟮'), @@ -6924,6 +7108,7 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), + ('𮯰', '𮹝'), ('丽', '𪘀'), ('𰀀', '𱍊'), ('𱍐', '𲎯'), @@ -7155,7 +7340,7 @@ pub const ID_START: &'static [(char, char)] = &[ ('ᰀ', 'ᰣ'), ('ᱍ', 'ᱏ'), ('ᱚ', 'ᱽ'), - ('ᲀ', 'ᲈ'), + ('ᲀ', 'ᲊ'), ('Ა', 'Ჺ'), ('Ჽ', 'Ჿ'), ('ᳩ', 'ᳬ'), @@ -7238,10 +7423,10 @@ pub const ID_START: &'static [(char, char)] = &[ ('ꚠ', 'ꛯ'), ('ꜗ', 'ꜟ'), ('Ꜣ', 'ꞈ'), - ('Ꞌ', 'ꟊ'), + ('Ꞌ', 'ꟍ'), ('Ꟑ', 'ꟑ'), ('ꟓ', 'ꟓ'), - ('ꟕ', 'ꟙ'), + ('ꟕ', 'Ƛ'), ('ꟲ', 'ꠁ'), ('ꠃ', 'ꠅ'), ('ꠇ', 'ꠊ'), @@ -7339,6 +7524,7 @@ pub const ID_START: &'static [(char, char)] = &[ ('𐖣', '𐖱'), ('𐖳', '𐖹'), ('𐖻', '𐖼'), + ('𐗀', '𐗳'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), @@ -7375,8 +7561,11 @@ pub const ID_START: &'static [(char, char)] = &[ ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𐴀', '𐴣'), + ('𐵊', '𐵥'), + ('𐵯', '𐶅'), ('𐺀', '𐺩'), ('𐺰', '𐺱'), + ('𐻂', '𐻄'), ('𐼀', '𐼜'), ('𐼧', '𐼧'), ('𐼰', '𐽅'), @@ -7415,6 +7604,13 @@ pub const ID_START: &'static [(char, char)] = &[ ('𑌽', '𑌽'), ('𑍐', '𑍐'), ('𑍝', '𑍡'), + ('𑎀', '𑎉'), + ('𑎋', '𑎋'), + ('𑎎', '𑎎'), + ('𑎐', '𑎵'), + ('𑎷', '𑎷'), + ('𑏑', '𑏑'), + ('𑏓', '𑏓'), ('𑐀', '𑐴'), ('𑑇', '𑑊'), ('𑑟', '𑑡'), @@ -7449,6 +7645,7 @@ pub const ID_START: &'static [(char, char)] = &[ ('𑩜', '𑪉'), ('𑪝', '𑪝'), ('𑪰', '𑫸'), + ('𑯀', '𑯠'), ('𑰀', '𑰈'), ('𑰊', '𑰮'), ('𑱀', '𑱀'), @@ -7472,7 +7669,9 @@ pub const ID_START: &'static [(char, char)] = &[ ('𒾐', '𒿰'), ('𓀀', '𓐯'), ('𓑁', '𓑆'), + ('𓑠', '𔏺'), ('𔐀', '𔙆'), + ('𖄀', '𖄝'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖩰', '𖪾'), @@ -7481,6 +7680,7 @@ pub const ID_START: &'static [(char, char)] = &[ ('𖭀', '𖭃'), ('𖭣', '𖭷'), ('𖭽', '𖮏'), + ('𖵀', '𖵬'), ('𖹀', '𖹿'), ('𖼀', '𖽊'), ('𖽐', '𖽐'), @@ -7489,7 +7689,7 @@ pub const ID_START: &'static [(char, char)] = &[ ('𖿣', '𖿣'), ('𗀀', '𘟷'), ('𘠀', '𘳕'), - ('𘴀', '𘴈'), + ('𘳿', '𘴈'), ('𚿰', '𚿳'), ('𚿵', '𚿻'), ('𚿽', '𚿾'), @@ -7542,6 +7742,8 @@ pub const ID_START: &'static [(char, char)] = &[ ('𞊐', '𞊭'), ('𞋀', '𞋫'), ('𞓐', '𞓫'), + ('𞗐', '𞗭'), + ('𞗰', '𞗰'), ('𞟠', '𞟦'), ('𞟨', '𞟫'), ('𞟭', '𞟮'), @@ -7587,6 +7789,7 @@ pub const ID_START: &'static [(char, char)] = &[ ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), + ('𮯰', '𮹝'), ('丽', '𪘀'), ('𰀀', '𱍊'), ('𱍐', '𲎯'), @@ -7603,18 +7806,420 @@ pub const IDEOGRAPHIC: &'static [(char, char)] = &[ ('\u{16fe4}', '\u{16fe4}'), ('𗀀', '𘟷'), ('𘠀', '𘳕'), - ('𘴀', '𘴈'), + ('𘳿', '𘴈'), ('𛅰', '𛋻'), ('𠀀', '𪛟'), ('𪜀', '𫜹'), ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), + ('𮯰', '𮹝'), ('丽', '𪘀'), ('𰀀', '𱍊'), ('𱍐', '𲎯'), ]; +pub const INCB: &'static [(char, char)] = &[ + ('\u{300}', '\u{36f}'), + ('\u{483}', '\u{489}'), + ('\u{591}', '\u{5bd}'), + ('\u{5bf}', '\u{5bf}'), + ('\u{5c1}', '\u{5c2}'), + ('\u{5c4}', '\u{5c5}'), + ('\u{5c7}', '\u{5c7}'), + ('\u{610}', '\u{61a}'), + ('\u{64b}', '\u{65f}'), + ('\u{670}', '\u{670}'), + ('\u{6d6}', '\u{6dc}'), + ('\u{6df}', '\u{6e4}'), + ('\u{6e7}', '\u{6e8}'), + ('\u{6ea}', '\u{6ed}'), + ('\u{711}', '\u{711}'), + ('\u{730}', '\u{74a}'), + ('\u{7a6}', '\u{7b0}'), + ('\u{7eb}', '\u{7f3}'), + ('\u{7fd}', '\u{7fd}'), + ('\u{816}', '\u{819}'), + ('\u{81b}', '\u{823}'), + ('\u{825}', '\u{827}'), + ('\u{829}', '\u{82d}'), + ('\u{859}', '\u{85b}'), + ('\u{897}', '\u{89f}'), + ('\u{8ca}', '\u{8e1}'), + ('\u{8e3}', '\u{902}'), + ('क', '\u{93a}'), + ('\u{93c}', '\u{93c}'), + ('\u{941}', '\u{948}'), + ('\u{94d}', '\u{94d}'), + ('\u{951}', 'य़'), + ('\u{962}', '\u{963}'), + ('ॸ', 'ॿ'), + ('\u{981}', '\u{981}'), + ('ক', 'ন'), + ('প', 'র'), + ('ল', 'ল'), + ('শ', 'হ'), + ('\u{9bc}', '\u{9bc}'), + ('\u{9be}', '\u{9be}'), + ('\u{9c1}', '\u{9c4}'), + ('\u{9cd}', '\u{9cd}'), + ('\u{9d7}', '\u{9d7}'), + ('ড়', 'ঢ়'), + ('য়', 'য়'), + ('\u{9e2}', '\u{9e3}'), + ('ৰ', 'ৱ'), + ('\u{9fe}', '\u{9fe}'), + ('\u{a01}', '\u{a02}'), + ('\u{a3c}', '\u{a3c}'), + ('\u{a41}', '\u{a42}'), + ('\u{a47}', '\u{a48}'), + ('\u{a4b}', '\u{a4d}'), + ('\u{a51}', '\u{a51}'), + ('\u{a70}', '\u{a71}'), + ('\u{a75}', '\u{a75}'), + ('\u{a81}', '\u{a82}'), + ('ક', 'ન'), + ('પ', 'ર'), + ('લ', 'ળ'), + ('વ', 'હ'), + ('\u{abc}', '\u{abc}'), + ('\u{ac1}', '\u{ac5}'), + ('\u{ac7}', '\u{ac8}'), + ('\u{acd}', '\u{acd}'), + ('\u{ae2}', '\u{ae3}'), + ('ૹ', '\u{aff}'), + ('\u{b01}', '\u{b01}'), + ('କ', 'ନ'), + ('ପ', 'ର'), + ('ଲ', 'ଳ'), + ('ଵ', 'ହ'), + ('\u{b3c}', '\u{b3c}'), + ('\u{b3e}', '\u{b3f}'), + ('\u{b41}', '\u{b44}'), + ('\u{b4d}', '\u{b4d}'), + ('\u{b55}', '\u{b57}'), + ('ଡ଼', 'ଢ଼'), + ('ୟ', 'ୟ'), + ('\u{b62}', '\u{b63}'), + ('ୱ', 'ୱ'), + ('\u{b82}', '\u{b82}'), + ('\u{bbe}', '\u{bbe}'), + ('\u{bc0}', '\u{bc0}'), + ('\u{bcd}', '\u{bcd}'), + ('\u{bd7}', '\u{bd7}'), + ('\u{c00}', '\u{c00}'), + ('\u{c04}', '\u{c04}'), + ('క', 'న'), + ('ప', 'హ'), + ('\u{c3c}', '\u{c3c}'), + ('\u{c3e}', '\u{c40}'), + ('\u{c46}', '\u{c48}'), + ('\u{c4a}', '\u{c4d}'), + ('\u{c55}', '\u{c56}'), + ('ౘ', 'ౚ'), + ('\u{c62}', '\u{c63}'), + ('\u{c81}', '\u{c81}'), + ('\u{cbc}', '\u{cbc}'), + ('\u{cbf}', '\u{cc0}'), + ('\u{cc2}', '\u{cc2}'), + ('\u{cc6}', '\u{cc8}'), + ('\u{cca}', '\u{ccd}'), + ('\u{cd5}', '\u{cd6}'), + ('\u{ce2}', '\u{ce3}'), + ('\u{d00}', '\u{d01}'), + ('ക', '\u{d3c}'), + ('\u{d3e}', '\u{d3e}'), + ('\u{d41}', '\u{d44}'), + ('\u{d4d}', '\u{d4d}'), + ('\u{d57}', '\u{d57}'), + ('\u{d62}', '\u{d63}'), + ('\u{d81}', '\u{d81}'), + ('\u{dca}', '\u{dca}'), + ('\u{dcf}', '\u{dcf}'), + ('\u{dd2}', '\u{dd4}'), + ('\u{dd6}', '\u{dd6}'), + ('\u{ddf}', '\u{ddf}'), + ('\u{e31}', '\u{e31}'), + ('\u{e34}', '\u{e3a}'), + ('\u{e47}', '\u{e4e}'), + ('\u{eb1}', '\u{eb1}'), + ('\u{eb4}', '\u{ebc}'), + ('\u{ec8}', '\u{ece}'), + ('\u{f18}', '\u{f19}'), + ('\u{f35}', '\u{f35}'), + ('\u{f37}', '\u{f37}'), + ('\u{f39}', '\u{f39}'), + ('\u{f71}', '\u{f7e}'), + ('\u{f80}', '\u{f84}'), + ('\u{f86}', '\u{f87}'), + ('\u{f8d}', '\u{f97}'), + ('\u{f99}', '\u{fbc}'), + ('\u{fc6}', '\u{fc6}'), + ('\u{102d}', '\u{1030}'), + ('\u{1032}', '\u{1037}'), + ('\u{1039}', '\u{103a}'), + ('\u{103d}', '\u{103e}'), + ('\u{1058}', '\u{1059}'), + ('\u{105e}', '\u{1060}'), + ('\u{1071}', '\u{1074}'), + ('\u{1082}', '\u{1082}'), + ('\u{1085}', '\u{1086}'), + ('\u{108d}', '\u{108d}'), + ('\u{109d}', '\u{109d}'), + ('\u{135d}', '\u{135f}'), + ('\u{1712}', '\u{1715}'), + ('\u{1732}', '\u{1734}'), + ('\u{1752}', '\u{1753}'), + ('\u{1772}', '\u{1773}'), + ('\u{17b4}', '\u{17b5}'), + ('\u{17b7}', '\u{17bd}'), + ('\u{17c6}', '\u{17c6}'), + ('\u{17c9}', '\u{17d3}'), + ('\u{17dd}', '\u{17dd}'), + ('\u{180b}', '\u{180d}'), + ('\u{180f}', '\u{180f}'), + ('\u{1885}', '\u{1886}'), + ('\u{18a9}', '\u{18a9}'), + ('\u{1920}', '\u{1922}'), + ('\u{1927}', '\u{1928}'), + ('\u{1932}', '\u{1932}'), + ('\u{1939}', '\u{193b}'), + ('\u{1a17}', '\u{1a18}'), + ('\u{1a1b}', '\u{1a1b}'), + ('\u{1a56}', '\u{1a56}'), + ('\u{1a58}', '\u{1a5e}'), + ('\u{1a60}', '\u{1a60}'), + ('\u{1a62}', '\u{1a62}'), + ('\u{1a65}', '\u{1a6c}'), + ('\u{1a73}', '\u{1a7c}'), + ('\u{1a7f}', '\u{1a7f}'), + ('\u{1ab0}', '\u{1ace}'), + ('\u{1b00}', '\u{1b03}'), + ('\u{1b34}', '\u{1b3d}'), + ('\u{1b42}', '\u{1b44}'), + ('\u{1b6b}', '\u{1b73}'), + ('\u{1b80}', '\u{1b81}'), + ('\u{1ba2}', '\u{1ba5}'), + ('\u{1ba8}', '\u{1bad}'), + ('\u{1be6}', '\u{1be6}'), + ('\u{1be8}', '\u{1be9}'), + ('\u{1bed}', '\u{1bed}'), + ('\u{1bef}', '\u{1bf3}'), + ('\u{1c2c}', '\u{1c33}'), + ('\u{1c36}', '\u{1c37}'), + ('\u{1cd0}', '\u{1cd2}'), + ('\u{1cd4}', '\u{1ce0}'), + ('\u{1ce2}', '\u{1ce8}'), + ('\u{1ced}', '\u{1ced}'), + ('\u{1cf4}', '\u{1cf4}'), + ('\u{1cf8}', '\u{1cf9}'), + ('\u{1dc0}', '\u{1dff}'), + ('\u{200d}', '\u{200d}'), + ('\u{20d0}', '\u{20f0}'), + ('\u{2cef}', '\u{2cf1}'), + ('\u{2d7f}', '\u{2d7f}'), + ('\u{2de0}', '\u{2dff}'), + ('\u{302a}', '\u{302f}'), + ('\u{3099}', '\u{309a}'), + ('\u{a66f}', '\u{a672}'), + ('\u{a674}', '\u{a67d}'), + ('\u{a69e}', '\u{a69f}'), + ('\u{a6f0}', '\u{a6f1}'), + ('\u{a802}', '\u{a802}'), + ('\u{a806}', '\u{a806}'), + ('\u{a80b}', '\u{a80b}'), + ('\u{a825}', '\u{a826}'), + ('\u{a82c}', '\u{a82c}'), + ('\u{a8c4}', '\u{a8c5}'), + ('\u{a8e0}', '\u{a8f1}'), + ('\u{a8ff}', '\u{a8ff}'), + ('\u{a926}', '\u{a92d}'), + ('\u{a947}', '\u{a951}'), + ('\u{a953}', '\u{a953}'), + ('\u{a980}', '\u{a982}'), + ('\u{a9b3}', '\u{a9b3}'), + ('\u{a9b6}', '\u{a9b9}'), + ('\u{a9bc}', '\u{a9bd}'), + ('\u{a9c0}', '\u{a9c0}'), + ('\u{a9e5}', '\u{a9e5}'), + ('\u{aa29}', '\u{aa2e}'), + ('\u{aa31}', '\u{aa32}'), + ('\u{aa35}', '\u{aa36}'), + ('\u{aa43}', '\u{aa43}'), + ('\u{aa4c}', '\u{aa4c}'), + ('\u{aa7c}', '\u{aa7c}'), + ('\u{aab0}', '\u{aab0}'), + ('\u{aab2}', '\u{aab4}'), + ('\u{aab7}', '\u{aab8}'), + ('\u{aabe}', '\u{aabf}'), + ('\u{aac1}', '\u{aac1}'), + ('\u{aaec}', '\u{aaed}'), + ('\u{aaf6}', '\u{aaf6}'), + ('\u{abe5}', '\u{abe5}'), + ('\u{abe8}', '\u{abe8}'), + ('\u{abed}', '\u{abed}'), + ('\u{fb1e}', '\u{fb1e}'), + ('\u{fe00}', '\u{fe0f}'), + ('\u{fe20}', '\u{fe2f}'), + ('\u{ff9e}', '\u{ff9f}'), + ('\u{101fd}', '\u{101fd}'), + ('\u{102e0}', '\u{102e0}'), + ('\u{10376}', '\u{1037a}'), + ('\u{10a01}', '\u{10a03}'), + ('\u{10a05}', '\u{10a06}'), + ('\u{10a0c}', '\u{10a0f}'), + ('\u{10a38}', '\u{10a3a}'), + ('\u{10a3f}', '\u{10a3f}'), + ('\u{10ae5}', '\u{10ae6}'), + ('\u{10d24}', '\u{10d27}'), + ('\u{10d69}', '\u{10d6d}'), + ('\u{10eab}', '\u{10eac}'), + ('\u{10efc}', '\u{10eff}'), + ('\u{10f46}', '\u{10f50}'), + ('\u{10f82}', '\u{10f85}'), + ('\u{11001}', '\u{11001}'), + ('\u{11038}', '\u{11046}'), + ('\u{11070}', '\u{11070}'), + ('\u{11073}', '\u{11074}'), + ('\u{1107f}', '\u{11081}'), + ('\u{110b3}', '\u{110b6}'), + ('\u{110b9}', '\u{110ba}'), + ('\u{110c2}', '\u{110c2}'), + ('\u{11100}', '\u{11102}'), + ('\u{11127}', '\u{1112b}'), + ('\u{1112d}', '\u{11134}'), + ('\u{11173}', '\u{11173}'), + ('\u{11180}', '\u{11181}'), + ('\u{111b6}', '\u{111be}'), + ('\u{111c0}', '\u{111c0}'), + ('\u{111c9}', '\u{111cc}'), + ('\u{111cf}', '\u{111cf}'), + ('\u{1122f}', '\u{11231}'), + ('\u{11234}', '\u{11237}'), + ('\u{1123e}', '\u{1123e}'), + ('\u{11241}', '\u{11241}'), + ('\u{112df}', '\u{112df}'), + ('\u{112e3}', '\u{112ea}'), + ('\u{11300}', '\u{11301}'), + ('\u{1133b}', '\u{1133c}'), + ('\u{1133e}', '\u{1133e}'), + ('\u{11340}', '\u{11340}'), + ('\u{1134d}', '\u{1134d}'), + ('\u{11357}', '\u{11357}'), + ('\u{11366}', '\u{1136c}'), + ('\u{11370}', '\u{11374}'), + ('\u{113b8}', '\u{113b8}'), + ('\u{113bb}', '\u{113c0}'), + ('\u{113c2}', '\u{113c2}'), + ('\u{113c5}', '\u{113c5}'), + ('\u{113c7}', '\u{113c9}'), + ('\u{113ce}', '\u{113d0}'), + ('\u{113d2}', '\u{113d2}'), + ('\u{113e1}', '\u{113e2}'), + ('\u{11438}', '\u{1143f}'), + ('\u{11442}', '\u{11444}'), + ('\u{11446}', '\u{11446}'), + ('\u{1145e}', '\u{1145e}'), + ('\u{114b0}', '\u{114b0}'), + ('\u{114b3}', '\u{114b8}'), + ('\u{114ba}', '\u{114ba}'), + ('\u{114bd}', '\u{114bd}'), + ('\u{114bf}', '\u{114c0}'), + ('\u{114c2}', '\u{114c3}'), + ('\u{115af}', '\u{115af}'), + ('\u{115b2}', '\u{115b5}'), + ('\u{115bc}', '\u{115bd}'), + ('\u{115bf}', '\u{115c0}'), + ('\u{115dc}', '\u{115dd}'), + ('\u{11633}', '\u{1163a}'), + ('\u{1163d}', '\u{1163d}'), + ('\u{1163f}', '\u{11640}'), + ('\u{116ab}', '\u{116ab}'), + ('\u{116ad}', '\u{116ad}'), + ('\u{116b0}', '\u{116b7}'), + ('\u{1171d}', '\u{1171d}'), + ('\u{1171f}', '\u{1171f}'), + ('\u{11722}', '\u{11725}'), + ('\u{11727}', '\u{1172b}'), + ('\u{1182f}', '\u{11837}'), + ('\u{11839}', '\u{1183a}'), + ('\u{11930}', '\u{11930}'), + ('\u{1193b}', '\u{1193e}'), + ('\u{11943}', '\u{11943}'), + ('\u{119d4}', '\u{119d7}'), + ('\u{119da}', '\u{119db}'), + ('\u{119e0}', '\u{119e0}'), + ('\u{11a01}', '\u{11a0a}'), + ('\u{11a33}', '\u{11a38}'), + ('\u{11a3b}', '\u{11a3e}'), + ('\u{11a47}', '\u{11a47}'), + ('\u{11a51}', '\u{11a56}'), + ('\u{11a59}', '\u{11a5b}'), + ('\u{11a8a}', '\u{11a96}'), + ('\u{11a98}', '\u{11a99}'), + ('\u{11c30}', '\u{11c36}'), + ('\u{11c38}', '\u{11c3d}'), + ('\u{11c3f}', '\u{11c3f}'), + ('\u{11c92}', '\u{11ca7}'), + ('\u{11caa}', '\u{11cb0}'), + ('\u{11cb2}', '\u{11cb3}'), + ('\u{11cb5}', '\u{11cb6}'), + ('\u{11d31}', '\u{11d36}'), + ('\u{11d3a}', '\u{11d3a}'), + ('\u{11d3c}', '\u{11d3d}'), + ('\u{11d3f}', '\u{11d45}'), + ('\u{11d47}', '\u{11d47}'), + ('\u{11d90}', '\u{11d91}'), + ('\u{11d95}', '\u{11d95}'), + ('\u{11d97}', '\u{11d97}'), + ('\u{11ef3}', '\u{11ef4}'), + ('\u{11f00}', '\u{11f01}'), + ('\u{11f36}', '\u{11f3a}'), + ('\u{11f40}', '\u{11f42}'), + ('\u{11f5a}', '\u{11f5a}'), + ('\u{13440}', '\u{13440}'), + ('\u{13447}', '\u{13455}'), + ('\u{1611e}', '\u{16129}'), + ('\u{1612d}', '\u{1612f}'), + ('\u{16af0}', '\u{16af4}'), + ('\u{16b30}', '\u{16b36}'), + ('\u{16f4f}', '\u{16f4f}'), + ('\u{16f8f}', '\u{16f92}'), + ('\u{16fe4}', '\u{16fe4}'), + ('\u{16ff0}', '\u{16ff1}'), + ('\u{1bc9d}', '\u{1bc9e}'), + ('\u{1cf00}', '\u{1cf2d}'), + ('\u{1cf30}', '\u{1cf46}'), + ('\u{1d165}', '\u{1d169}'), + ('\u{1d16d}', '\u{1d172}'), + ('\u{1d17b}', '\u{1d182}'), + ('\u{1d185}', '\u{1d18b}'), + ('\u{1d1aa}', '\u{1d1ad}'), + ('\u{1d242}', '\u{1d244}'), + ('\u{1da00}', '\u{1da36}'), + ('\u{1da3b}', '\u{1da6c}'), + ('\u{1da75}', '\u{1da75}'), + ('\u{1da84}', '\u{1da84}'), + ('\u{1da9b}', '\u{1da9f}'), + ('\u{1daa1}', '\u{1daaf}'), + ('\u{1e000}', '\u{1e006}'), + ('\u{1e008}', '\u{1e018}'), + ('\u{1e01b}', '\u{1e021}'), + ('\u{1e023}', '\u{1e024}'), + ('\u{1e026}', '\u{1e02a}'), + ('\u{1e08f}', '\u{1e08f}'), + ('\u{1e130}', '\u{1e136}'), + ('\u{1e2ae}', '\u{1e2ae}'), + ('\u{1e2ec}', '\u{1e2ef}'), + ('\u{1e4ec}', '\u{1e4ef}'), + ('\u{1e5ee}', '\u{1e5ef}'), + ('\u{1e8d0}', '\u{1e8d6}'), + ('\u{1e944}', '\u{1e94a}'), + ('🏻', '🏿'), + ('\u{e0020}', '\u{e007f}'), + ('\u{e0100}', '\u{e01ef}'), +]; + pub const JOIN_CONTROL: &'static [(char, char)] = &[('\u{200c}', '\u{200d}')]; pub const LOGICAL_ORDER_EXCEPTION: &'static [(char, char)] = &[ @@ -7907,6 +8512,7 @@ pub const LOWERCASE: &'static [(char, char)] = &[ ('ჼ', 'ჿ'), ('ᏸ', 'ᏽ'), ('ᲀ', 'ᲈ'), + ('ᲊ', 'ᲊ'), ('ᴀ', 'ᶿ'), ('ḁ', 'ḁ'), ('ḃ', 'ḃ'), @@ -8239,11 +8845,13 @@ pub const LOWERCASE: &'static [(char, char)] = &[ ('ꟃ', 'ꟃ'), ('ꟈ', 'ꟈ'), ('ꟊ', 'ꟊ'), + ('ꟍ', 'ꟍ'), ('ꟑ', 'ꟑ'), ('ꟓ', 'ꟓ'), ('ꟕ', 'ꟕ'), ('ꟗ', 'ꟗ'), ('ꟙ', 'ꟙ'), + ('ꟛ', 'ꟛ'), ('ꟲ', 'ꟴ'), ('ꟶ', 'ꟶ'), ('ꟸ', 'ꟺ'), @@ -8264,6 +8872,7 @@ pub const LOWERCASE: &'static [(char, char)] = &[ ('𐞇', '𐞰'), ('𐞲', '𐞺'), ('𐳀', '𐳲'), + ('𐵰', '𐶅'), ('𑣀', '𑣟'), ('𖹠', '𖹿'), ('𝐚', '𝐳'), @@ -8385,6 +8994,7 @@ pub const MATH: &'static [(char, char)] = &[ ('~', '~'), ('¬', '¬'), ('←', '↓'), + ('𐶎', '𐶏'), ('𝐀', '𝑔'), ('𝑖', '𝒜'), ('𝒞', '𝒟'), @@ -8442,6 +9052,18 @@ pub const MATH: &'static [(char, char)] = &[ ('𞻰', '𞻱'), ]; +pub const MODIFIER_COMBINING_MARK: &'static [(char, char)] = &[ + ('\u{654}', '\u{655}'), + ('\u{658}', '\u{658}'), + ('\u{6dc}', '\u{6dc}'), + ('\u{6e3}', '\u{6e3}'), + ('\u{6e7}', '\u{6e8}'), + ('\u{8ca}', '\u{8cb}'), + ('\u{8cd}', '\u{8cf}'), + ('\u{8d3}', '\u{8d3}'), + ('\u{8f3}', '\u{8f3}'), +]; + pub const NONCHARACTER_CODE_POINT: &'static [(char, char)] = &[ ('\u{fdd0}', '\u{fdef}'), ('\u{fffe}', '\u{ffff}'), @@ -8465,6 +9087,7 @@ pub const NONCHARACTER_CODE_POINT: &'static [(char, char)] = &[ pub const OTHER_ALPHABETIC: &'static [(char, char)] = &[ ('\u{345}', '\u{345}'), + ('\u{363}', '\u{36f}'), ('\u{5b0}', '\u{5bd}'), ('\u{5bf}', '\u{5bf}'), ('\u{5c1}', '\u{5c2}'), @@ -8485,6 +9108,7 @@ pub const OTHER_ALPHABETIC: &'static [(char, char)] = &[ ('\u{81b}', '\u{823}'), ('\u{825}', '\u{827}'), ('\u{829}', '\u{82c}'), + ('\u{897}', '\u{897}'), ('\u{8d4}', '\u{8df}'), ('\u{8e3}', '\u{8e9}'), ('\u{8f0}', 'ः'), @@ -8531,8 +9155,8 @@ pub const OTHER_ALPHABETIC: &'static [(char, char)] = &[ ('\u{c62}', '\u{c63}'), ('\u{c81}', 'ಃ'), ('ಾ', 'ೄ'), - ('\u{cc6}', 'ೈ'), - ('ೊ', '\u{ccc}'), + ('\u{cc6}', '\u{cc8}'), + ('\u{cca}', '\u{ccc}'), ('\u{cd5}', '\u{cd6}'), ('\u{ce2}', '\u{ce3}'), ('ೳ', 'ೳ'), @@ -8583,13 +9207,13 @@ pub const OTHER_ALPHABETIC: &'static [(char, char)] = &[ ('\u{1abf}', '\u{1ac0}'), ('\u{1acc}', '\u{1ace}'), ('\u{1b00}', 'ᬄ'), - ('\u{1b35}', 'ᭃ'), + ('\u{1b35}', '\u{1b43}'), ('\u{1b80}', 'ᮂ'), ('ᮡ', '\u{1ba9}'), ('\u{1bac}', '\u{1bad}'), ('ᯧ', '\u{1bf1}'), ('ᰤ', '\u{1c36}'), - ('\u{1de7}', '\u{1df4}'), + ('\u{1dd3}', '\u{1df4}'), ('Ⓐ', 'ⓩ'), ('\u{2de0}', '\u{2dff}'), ('\u{a674}', '\u{a67b}'), @@ -8623,7 +9247,9 @@ pub const OTHER_ALPHABETIC: &'static [(char, char)] = &[ ('\u{10a05}', '\u{10a06}'), ('\u{10a0c}', '\u{10a0f}'), ('\u{10d24}', '\u{10d27}'), + ('\u{10d69}', '\u{10d69}'), ('\u{10eab}', '\u{10eac}'), + ('\u{10efc}', '\u{10efc}'), ('𑀀', '𑀂'), ('\u{11038}', '\u{11045}'), ('\u{11073}', '\u{11074}'), @@ -8647,6 +9273,11 @@ pub const OTHER_ALPHABETIC: &'static [(char, char)] = &[ ('𑍋', '𑍌'), ('\u{11357}', '\u{11357}'), ('𑍢', '𑍣'), + ('\u{113b8}', '\u{113c0}'), + ('\u{113c2}', '\u{113c2}'), + ('\u{113c5}', '\u{113c5}'), + ('\u{113c7}', '𑏊'), + ('𑏌', '𑏍'), ('𑐵', '𑑁'), ('\u{11443}', '𑑅'), ('\u{114b0}', '𑓁'), @@ -8689,10 +9320,11 @@ pub const OTHER_ALPHABETIC: &'static [(char, char)] = &[ ('𑼃', '𑼃'), ('𑼴', '\u{11f3a}'), ('𑼾', '\u{11f40}'), + ('\u{1611e}', '\u{1612e}'), ('\u{16f4f}', '\u{16f4f}'), ('𖽑', '𖾇'), ('\u{16f8f}', '\u{16f92}'), - ('𖿰', '𖿱'), + ('\u{16ff0}', '\u{16ff1}'), ('\u{1bc9e}', '\u{1bc9e}'), ('\u{1e000}', '\u{1e006}'), ('\u{1e008}', '\u{1e018}'), @@ -8727,29 +9359,60 @@ pub const OTHER_GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{b57}', '\u{b57}'), ('\u{bbe}', '\u{bbe}'), ('\u{bd7}', '\u{bd7}'), + ('\u{cc0}', '\u{cc0}'), ('\u{cc2}', '\u{cc2}'), + ('\u{cc7}', '\u{cc8}'), + ('\u{cca}', '\u{ccb}'), ('\u{cd5}', '\u{cd6}'), ('\u{d3e}', '\u{d3e}'), ('\u{d57}', '\u{d57}'), ('\u{dcf}', '\u{dcf}'), ('\u{ddf}', '\u{ddf}'), + ('\u{1715}', '\u{1715}'), + ('\u{1734}', '\u{1734}'), ('\u{1b35}', '\u{1b35}'), + ('\u{1b3b}', '\u{1b3b}'), + ('\u{1b3d}', '\u{1b3d}'), + ('\u{1b43}', '\u{1b44}'), + ('\u{1baa}', '\u{1baa}'), + ('\u{1bf2}', '\u{1bf3}'), ('\u{200c}', '\u{200c}'), ('\u{302e}', '\u{302f}'), + ('\u{a953}', '\u{a953}'), + ('\u{a9c0}', '\u{a9c0}'), ('\u{ff9e}', '\u{ff9f}'), + ('\u{111c0}', '\u{111c0}'), + ('\u{11235}', '\u{11235}'), ('\u{1133e}', '\u{1133e}'), + ('\u{1134d}', '\u{1134d}'), ('\u{11357}', '\u{11357}'), + ('\u{113b8}', '\u{113b8}'), + ('\u{113c2}', '\u{113c2}'), + ('\u{113c5}', '\u{113c5}'), + ('\u{113c7}', '\u{113c9}'), + ('\u{113cf}', '\u{113cf}'), ('\u{114b0}', '\u{114b0}'), ('\u{114bd}', '\u{114bd}'), ('\u{115af}', '\u{115af}'), + ('\u{116b6}', '\u{116b6}'), ('\u{11930}', '\u{11930}'), - ('\u{1d165}', '\u{1d165}'), - ('\u{1d16e}', '\u{1d172}'), + ('\u{1193d}', '\u{1193d}'), + ('\u{11f41}', '\u{11f41}'), + ('\u{16ff0}', '\u{16ff1}'), + ('\u{1d165}', '\u{1d166}'), + ('\u{1d16d}', '\u{1d172}'), ('\u{e0020}', '\u{e007f}'), ]; -pub const OTHER_ID_CONTINUE: &'static [(char, char)] = - &[('·', '·'), ('·', '·'), ('፩', '፱'), ('᧚', '᧚')]; +pub const OTHER_ID_CONTINUE: &'static [(char, char)] = &[ + ('·', '·'), + ('·', '·'), + ('፩', '፱'), + ('᧚', '᧚'), + ('\u{200c}', '\u{200d}'), + ('・', '・'), + ('・', '・'), +]; pub const OTHER_ID_START: &'static [(char, char)] = &[('\u{1885}', '\u{1886}'), ('℘', '℘'), ('℮', '℮'), ('゛', '゜')]; @@ -9013,17 +9676,21 @@ pub const SENTENCE_TERMINAL: &'static [(char, char)] = &[ ('፧', '፨'), ('᙮', '᙮'), ('᜵', '᜶'), + ('។', '៕'), ('᠃', '᠃'), ('᠉', '᠉'), ('᥄', '᥅'), ('᪨', '᪫'), + ('᭎', '᭏'), ('᭚', '᭛'), ('᭞', '᭟'), - ('᭽', '᭾'), + ('᭽', '᭿'), ('᰻', '᰼'), ('᱾', '᱿'), + ('․', '․'), ('‼', '‽'), ('⁇', '⁉'), + ('⳹', '⳻'), ('⸮', '⸮'), ('⸼', '⸼'), ('⹓', '⹔'), @@ -9039,6 +9706,8 @@ pub const SENTENCE_TERMINAL: &'static [(char, char)] = &[ ('꩝', '꩟'), ('꫰', '꫱'), ('꯫', '꯫'), + ('︒', '︒'), + ('︕', '︖'), ('﹒', '﹒'), ('﹖', '﹗'), ('!', '!'), @@ -9057,6 +9726,7 @@ pub const SENTENCE_TERMINAL: &'static [(char, char)] = &[ ('𑈸', '𑈹'), ('𑈻', '𑈼'), ('𑊩', '𑊩'), + ('𑏔', '𑏕'), ('𑑋', '𑑌'), ('𑗂', '𑗃'), ('𑗉', '𑗗'), @@ -9073,6 +9743,7 @@ pub const SENTENCE_TERMINAL: &'static [(char, char)] = &[ ('𖫵', '𖫵'), ('𖬷', '𖬸'), ('𖭄', '𖭄'), + ('𖵮', '𖵯'), ('𖺘', '𖺘'), ('𛲟', '𛲟'), ('𝪈', '𝪈'), @@ -9132,7 +9803,8 @@ pub const TERMINAL_PUNCTUATION: &'static [(char, char)] = &[ ('܀', '܊'), ('܌', '܌'), ('߸', '߹'), - ('࠰', '࠾'), + ('࠰', '࠵'), + ('࠷', '࠾'), ('࡞', '࡞'), ('।', '॥'), ('๚', '๛'), @@ -9149,13 +9821,16 @@ pub const TERMINAL_PUNCTUATION: &'static [(char, char)] = &[ ('᠈', '᠉'), ('᥄', '᥅'), ('᪨', '᪫'), + ('᭎', '᭏'), ('᭚', '᭛'), ('᭝', '᭟'), - ('᭽', '᭾'), + ('᭽', '᭿'), ('᰻', '᰿'), ('᱾', '᱿'), + ('․', '․'), ('‼', '‽'), ('⁇', '⁉'), + ('⳹', '⳻'), ('⸮', '⸮'), ('⸼', '⸼'), ('⹁', '⹁'), @@ -9174,6 +9849,8 @@ pub const TERMINAL_PUNCTUATION: &'static [(char, char)] = &[ ('꫟', '꫟'), ('꫰', '꫱'), ('꯫', '꯫'), + ('︒', '︒'), + ('︕', '︖'), ('﹐', '﹒'), ('﹔', '﹗'), ('!', '!'), @@ -9201,6 +9878,7 @@ pub const TERMINAL_PUNCTUATION: &'static [(char, char)] = &[ ('𑇞', '𑇟'), ('𑈸', '𑈼'), ('𑊩', '𑊩'), + ('𑏔', '𑏕'), ('𑑋', '𑑍'), ('𑑚', '𑑛'), ('𑗂', '𑗅'), @@ -9221,6 +9899,7 @@ pub const TERMINAL_PUNCTUATION: &'static [(char, char)] = &[ ('𖫵', '𖫵'), ('𖬷', '𖬹'), ('𖭄', '𖭄'), + ('𖵮', '𖵯'), ('𖺗', '𖺘'), ('𛲟', '𛲟'), ('𝪇', '𝪊'), @@ -9241,6 +9920,7 @@ pub const UNIFIED_IDEOGRAPH: &'static [(char, char)] = &[ ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), + ('𮯰', '𮹝'), ('𰀀', '𱍊'), ('𱍐', '𲎯'), ]; @@ -9522,6 +10202,7 @@ pub const UPPERCASE: &'static [(char, char)] = &[ ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'), ('Ꭰ', 'Ᏽ'), + ('Ᲊ', 'Ᲊ'), ('Ა', 'Ჺ'), ('Ჽ', 'Ჿ'), ('Ḁ', 'Ḁ'), @@ -9848,9 +10529,12 @@ pub const UPPERCASE: &'static [(char, char)] = &[ ('Ꟃ', 'Ꟃ'), ('Ꞔ', 'Ꟈ'), ('Ꟊ', 'Ꟊ'), + ('Ɤ', 'Ꟍ'), ('Ꟑ', 'Ꟑ'), ('Ꟗ', 'Ꟗ'), ('Ꟙ', 'Ꟙ'), + ('Ꟛ', 'Ꟛ'), + ('Ƛ', 'Ƛ'), ('Ꟶ', 'Ꟶ'), ('A', 'Z'), ('𐐀', '𐐧'), @@ -9860,6 +10544,7 @@ pub const UPPERCASE: &'static [(char, char)] = &[ ('𐖌', '𐖒'), ('𐖔', '𐖕'), ('𐲀', '𐲲'), + ('𐵐', '𐵥'), ('𑢠', '𑢿'), ('𖹀', '𖹟'), ('𝐀', '𝐙'), @@ -9973,7 +10658,7 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('ࡠ', 'ࡪ'), ('ࡰ', 'ࢇ'), ('ࢉ', 'ࢎ'), - ('\u{898}', '\u{8e1}'), + ('\u{897}', '\u{8e1}'), ('\u{8e3}', '\u{963}'), ('०', '९'), ('ॱ', 'ঃ'), @@ -10072,8 +10757,8 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('ಪ', 'ಳ'), ('ವ', 'ಹ'), ('\u{cbc}', 'ೄ'), - ('\u{cc6}', 'ೈ'), - ('ೊ', '\u{ccd}'), + ('\u{cc6}', '\u{cc8}'), + ('\u{cca}', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), ('ೝ', 'ೞ'), ('ೠ', '\u{ce3}'), @@ -10158,8 +10843,8 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('ᚁ', 'ᚚ'), ('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ'), - ('ᜀ', '᜕'), - ('ᜟ', '᜴'), + ('ᜀ', '\u{1715}'), + ('ᜟ', '\u{1734}'), ('ᝀ', '\u{1753}'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), @@ -10192,11 +10877,11 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('\u{1b00}', 'ᭌ'), ('᭐', '᭙'), ('\u{1b6b}', '\u{1b73}'), - ('\u{1b80}', '᯳'), + ('\u{1b80}', '\u{1bf3}'), ('ᰀ', '\u{1c37}'), ('᱀', '᱉'), ('ᱍ', 'ᱽ'), - ('ᲀ', 'ᲈ'), + ('ᲀ', 'ᲊ'), ('Ა', 'Ჺ'), ('Ჽ', 'Ჿ'), ('\u{1cd0}', '\u{1cd2}'), @@ -10220,6 +10905,7 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('ῠ', 'Ῥ'), ('ῲ', 'ῴ'), ('ῶ', 'ῼ'), + ('\u{200c}', '\u{200d}'), ('‿', '⁀'), ('⁔', '⁔'), ('ⁱ', 'ⁱ'), @@ -10265,8 +10951,7 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('ぁ', 'ゖ'), ('\u{3099}', '\u{309a}'), ('ゝ', 'ゟ'), - ('ァ', 'ヺ'), - ('ー', 'ヿ'), + ('ァ', 'ヿ'), ('ㄅ', 'ㄯ'), ('ㄱ', 'ㆎ'), ('ㆠ', 'ㆿ'), @@ -10281,10 +10966,10 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('ꙿ', '\u{a6f1}'), ('ꜗ', 'ꜟ'), ('Ꜣ', 'ꞈ'), - ('Ꞌ', 'ꟊ'), + ('Ꞌ', 'ꟍ'), ('Ꟑ', 'ꟑ'), ('ꟓ', 'ꟓ'), - ('ꟕ', 'ꟙ'), + ('ꟕ', 'Ƛ'), ('ꟲ', 'ꠧ'), ('\u{a82c}', '\u{a82c}'), ('ꡀ', 'ꡳ'), @@ -10293,9 +10978,9 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('\u{a8e0}', 'ꣷ'), ('ꣻ', 'ꣻ'), ('ꣽ', '\u{a92d}'), - ('ꤰ', '꥓'), + ('ꤰ', '\u{a953}'), ('ꥠ', 'ꥼ'), - ('\u{a980}', '꧀'), + ('\u{a980}', '\u{a9c0}'), ('ꧏ', '꧙'), ('ꧠ', 'ꧾ'), ('ꨀ', '\u{aa36}'), @@ -10350,7 +11035,7 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('A', 'Z'), ('_', '_'), ('a', 'z'), - ('ヲ', 'ᄒ'), + ('・', 'ᄒ'), ('ᅡ', 'ᅦ'), ('ᅧ', 'ᅬ'), ('ᅭ', 'ᅲ'), @@ -10388,6 +11073,7 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('𐖣', '𐖱'), ('𐖳', '𐖹'), ('𐖻', '𐖼'), + ('𐗀', '𐗳'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), @@ -10428,10 +11114,14 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('𐳀', '𐳲'), ('𐴀', '\u{10d27}'), ('𐴰', '𐴹'), + ('𐵀', '𐵥'), + ('\u{10d69}', '\u{10d6d}'), + ('𐵯', '𐶅'), ('𐺀', '𐺩'), ('\u{10eab}', '\u{10eac}'), ('𐺰', '𐺱'), - ('\u{10efd}', '𐼜'), + ('𐻂', '𐻄'), + ('\u{10efc}', '𐼜'), ('𐼧', '𐼧'), ('𐼰', '\u{10f50}'), ('𐽰', '\u{10f85}'), @@ -10471,12 +11161,22 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('𑌵', '𑌹'), ('\u{1133b}', '𑍄'), ('𑍇', '𑍈'), - ('𑍋', '𑍍'), + ('𑍋', '\u{1134d}'), ('𑍐', '𑍐'), ('\u{11357}', '\u{11357}'), ('𑍝', '𑍣'), ('\u{11366}', '\u{1136c}'), ('\u{11370}', '\u{11374}'), + ('𑎀', '𑎉'), + ('𑎋', '𑎋'), + ('𑎎', '𑎎'), + ('𑎐', '𑎵'), + ('𑎷', '\u{113c0}'), + ('\u{113c2}', '\u{113c2}'), + ('\u{113c5}', '\u{113c5}'), + ('\u{113c7}', '𑏊'), + ('𑏌', '𑏓'), + ('\u{113e1}', '\u{113e2}'), ('𑐀', '𑑊'), ('𑑐', '𑑙'), ('\u{1145e}', '𑑡'), @@ -10491,6 +11191,7 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('𑙐', '𑙙'), ('𑚀', '𑚸'), ('𑛀', '𑛉'), + ('𑛐', '𑛣'), ('𑜀', '𑜚'), ('\u{1171d}', '\u{1172b}'), ('𑜰', '𑜹'), @@ -10514,6 +11215,8 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('𑩐', '\u{11a99}'), ('𑪝', '𑪝'), ('𑪰', '𑫸'), + ('𑯀', '𑯠'), + ('𑯰', '𑯹'), ('𑰀', '𑰈'), ('𑰊', '\u{11c36}'), ('\u{11c38}', '𑱀'), @@ -10538,7 +11241,7 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('\u{11f00}', '𑼐'), ('𑼒', '\u{11f3a}'), ('𑼾', '\u{11f42}'), - ('𑽐', '𑽙'), + ('𑽐', '\u{11f5a}'), ('𑾰', '𑾰'), ('𒀀', '𒎙'), ('𒐀', '𒑮'), @@ -10546,7 +11249,9 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('𒾐', '𒿰'), ('𓀀', '𓐯'), ('\u{13440}', '\u{13455}'), + ('𓑠', '𔏺'), ('𔐀', '𔙆'), + ('𖄀', '𖄹'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖩠', '𖩩'), @@ -10559,16 +11264,18 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('𖭐', '𖭙'), ('𖭣', '𖭷'), ('𖭽', '𖮏'), + ('𖵀', '𖵬'), + ('𖵰', '𖵹'), ('𖹀', '𖹿'), ('𖼀', '𖽊'), ('\u{16f4f}', '𖾇'), ('\u{16f8f}', '𖾟'), ('𖿠', '𖿡'), ('𖿣', '\u{16fe4}'), - ('𖿰', '𖿱'), + ('\u{16ff0}', '\u{16ff1}'), ('𗀀', '𘟷'), ('𘠀', '𘳕'), - ('𘴀', '𘴈'), + ('𘳿', '𘴈'), ('𚿰', '𚿳'), ('𚿵', '𚿻'), ('𚿽', '𚿾'), @@ -10583,10 +11290,11 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('𛲀', '𛲈'), ('𛲐', '𛲙'), ('\u{1bc9d}', '\u{1bc9e}'), + ('𜳰', '𜳹'), ('\u{1cf00}', '\u{1cf2d}'), ('\u{1cf30}', '\u{1cf46}'), ('\u{1d165}', '\u{1d169}'), - ('𝅭', '\u{1d172}'), + ('\u{1d16d}', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), ('\u{1d1aa}', '\u{1d1ad}'), @@ -10644,6 +11352,7 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('𞊐', '\u{1e2ae}'), ('𞋀', '𞋹'), ('𞓐', '𞓹'), + ('𞗐', '𞗺'), ('𞟠', '𞟦'), ('𞟨', '𞟫'), ('𞟭', '𞟮'), @@ -10691,6 +11400,7 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), + ('𮯰', '𮹝'), ('丽', '𪘀'), ('𰀀', '𱍊'), ('𱍐', '𲎯'), @@ -10922,7 +11632,7 @@ pub const XID_START: &'static [(char, char)] = &[ ('ᰀ', 'ᰣ'), ('ᱍ', 'ᱏ'), ('ᱚ', 'ᱽ'), - ('ᲀ', 'ᲈ'), + ('ᲀ', 'ᲊ'), ('Ა', 'Ჺ'), ('Ჽ', 'Ჿ'), ('ᳩ', 'ᳬ'), @@ -11005,10 +11715,10 @@ pub const XID_START: &'static [(char, char)] = &[ ('ꚠ', 'ꛯ'), ('ꜗ', 'ꜟ'), ('Ꜣ', 'ꞈ'), - ('Ꞌ', 'ꟊ'), + ('Ꞌ', 'ꟍ'), ('Ꟑ', 'ꟑ'), ('ꟓ', 'ꟓ'), - ('ꟕ', 'ꟙ'), + ('ꟕ', 'Ƛ'), ('ꟲ', 'ꠁ'), ('ꠃ', 'ꠅ'), ('ꠇ', 'ꠊ'), @@ -11113,6 +11823,7 @@ pub const XID_START: &'static [(char, char)] = &[ ('𐖣', '𐖱'), ('𐖳', '𐖹'), ('𐖻', '𐖼'), + ('𐗀', '𐗳'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), @@ -11149,8 +11860,11 @@ pub const XID_START: &'static [(char, char)] = &[ ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𐴀', '𐴣'), + ('𐵊', '𐵥'), + ('𐵯', '𐶅'), ('𐺀', '𐺩'), ('𐺰', '𐺱'), + ('𐻂', '𐻄'), ('𐼀', '𐼜'), ('𐼧', '𐼧'), ('𐼰', '𐽅'), @@ -11189,6 +11903,13 @@ pub const XID_START: &'static [(char, char)] = &[ ('𑌽', '𑌽'), ('𑍐', '𑍐'), ('𑍝', '𑍡'), + ('𑎀', '𑎉'), + ('𑎋', '𑎋'), + ('𑎎', '𑎎'), + ('𑎐', '𑎵'), + ('𑎷', '𑎷'), + ('𑏑', '𑏑'), + ('𑏓', '𑏓'), ('𑐀', '𑐴'), ('𑑇', '𑑊'), ('𑑟', '𑑡'), @@ -11223,6 +11944,7 @@ pub const XID_START: &'static [(char, char)] = &[ ('𑩜', '𑪉'), ('𑪝', '𑪝'), ('𑪰', '𑫸'), + ('𑯀', '𑯠'), ('𑰀', '𑰈'), ('𑰊', '𑰮'), ('𑱀', '𑱀'), @@ -11246,7 +11968,9 @@ pub const XID_START: &'static [(char, char)] = &[ ('𒾐', '𒿰'), ('𓀀', '𓐯'), ('𓑁', '𓑆'), + ('𓑠', '𔏺'), ('𔐀', '𔙆'), + ('𖄀', '𖄝'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖩰', '𖪾'), @@ -11255,6 +11979,7 @@ pub const XID_START: &'static [(char, char)] = &[ ('𖭀', '𖭃'), ('𖭣', '𖭷'), ('𖭽', '𖮏'), + ('𖵀', '𖵬'), ('𖹀', '𖹿'), ('𖼀', '𖽊'), ('𖽐', '𖽐'), @@ -11263,7 +11988,7 @@ pub const XID_START: &'static [(char, char)] = &[ ('𖿣', '𖿣'), ('𗀀', '𘟷'), ('𘠀', '𘳕'), - ('𘴀', '𘴈'), + ('𘳿', '𘴈'), ('𚿰', '𚿳'), ('𚿵', '𚿻'), ('𚿽', '𚿾'), @@ -11316,6 +12041,8 @@ pub const XID_START: &'static [(char, char)] = &[ ('𞊐', '𞊭'), ('𞋀', '𞋫'), ('𞓐', '𞓫'), + ('𞗐', '𞗭'), + ('𞗰', '𞗰'), ('𞟠', '𞟦'), ('𞟨', '𞟫'), ('𞟭', '𞟮'), @@ -11361,6 +12088,7 @@ pub const XID_START: &'static [(char, char)] = &[ ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), + ('𮯰', '𮹝'), ('丽', '𪘀'), ('𰀀', '𱍊'), ('𱍐', '𲎯'), diff --git a/regex-syntax/src/unicode_tables/property_names.rs b/regex-syntax/src/unicode_tables/property_names.rs index 599a123ae..a27b49133 100644 --- a/regex-syntax/src/unicode_tables/property_names.rs +++ b/regex-syntax/src/unicode_tables/property_names.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate property-names ucd-15.0.0 +// ucd-generate property-names ucd-16.0.0 // -// Unicode version: 15.0.0. +// Unicode version: 16.0.0. // -// ucd-generate 0.2.14 is available on crates.io. +// ucd-generate 0.3.1 is available on crates.io. pub const PROPERTY_NAMES: &'static [(&'static str, &'static str)] = &[ ("age", "Age"), @@ -116,6 +116,8 @@ pub const PROPERTY_NAMES: &'static [(&'static str, &'static str)] = &[ ("hst", "Hangul_Syllable_Type"), ("hyphen", "Hyphen"), ("idc", "ID_Continue"), + ("idcompatmathcontinue", "ID_Compat_Math_Continue"), + ("idcompatmathstart", "ID_Compat_Math_Start"), ("idcontinue", "ID_Continue"), ("ideo", "Ideographic"), ("ideographic", "Ideographic"), @@ -125,6 +127,10 @@ pub const PROPERTY_NAMES: &'static [(&'static str, &'static str)] = &[ ("idst", "IDS_Trinary_Operator"), ("idstart", "ID_Start"), ("idstrinaryoperator", "IDS_Trinary_Operator"), + ("idsu", "IDS_Unary_Operator"), + ("idsunaryoperator", "IDS_Unary_Operator"), + ("incb", "Indic_Conjunct_Break"), + ("indicconjunctbreak", "Indic_Conjunct_Break"), ("indicpositionalcategory", "Indic_Positional_Category"), ("indicsyllabiccategory", "Indic_Syllabic_Category"), ("inpc", "Indic_Positional_Category"), @@ -140,6 +146,13 @@ pub const PROPERTY_NAMES: &'static [(&'static str, &'static str)] = &[ ("jt", "Joining_Type"), ("kaccountingnumeric", "kAccountingNumeric"), ("kcompatibilityvariant", "kCompatibilityVariant"), + ("kehcat", "kEH_Cat"), + ("kehdesc", "kEH_Desc"), + ("kehhg", "kEH_HG"), + ("kehifao", "kEH_IFAO"), + ("kehjsesh", "kEH_JSesh"), + ("kehnomirror", "kEH_NoMirror"), + ("kehnorotate", "kEH_NoRotate"), ("kiicore", "kIICore"), ("kirggsource", "kIRG_GSource"), ("kirghsource", "kIRG_HSource"), @@ -164,6 +177,8 @@ pub const PROPERTY_NAMES: &'static [(&'static str, &'static str)] = &[ ("lowercase", "Lowercase"), ("lowercasemapping", "Lowercase_Mapping"), ("math", "Math"), + ("mcm", "Modifier_Combining_Mark"), + ("modifiercombiningmark", "Modifier_Combining_Mark"), ("na", "Name"), ("na1", "Unicode_1_Name"), ("name", "Name"), @@ -177,6 +192,8 @@ pub const PROPERTY_NAMES: &'static [(&'static str, &'static str)] = &[ ("nfkccf", "NFKC_Casefold"), ("nfkcqc", "NFKC_Quick_Check"), ("nfkcquickcheck", "NFKC_Quick_Check"), + ("nfkcscf", "NFKC_Simple_Casefold"), + ("nfkcsimplecasefold", "NFKC_Simple_Casefold"), ("nfkdqc", "NFKD_Quick_Check"), ("nfkdquickcheck", "NFKD_Quick_Check"), ("noncharactercodepoint", "Noncharacter_Code_Point"), diff --git a/regex-syntax/src/unicode_tables/property_values.rs b/regex-syntax/src/unicode_tables/property_values.rs index cb2d32fb7..2270d6638 100644 --- a/regex-syntax/src/unicode_tables/property_values.rs +++ b/regex-syntax/src/unicode_tables/property_values.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate property-values ucd-15.0.0 --include gc,script,scx,age,gcb,wb,sb +// ucd-generate property-values ucd-16.0.0 --include gc,script,scx,age,gcb,wb,sb // -// Unicode version: 15.0.0. +// Unicode version: 16.0.0. // -// ucd-generate 0.2.14 is available on crates.io. +// ucd-generate 0.3.1 is available on crates.io. pub const PROPERTY_VALUES: &'static [( &'static str, @@ -21,6 +21,8 @@ pub const PROPERTY_VALUES: &'static [( ("13.0", "V13_0"), ("14.0", "V14_0"), ("15.0", "V15_0"), + ("15.1", "V15_1"), + ("16.0", "V16_0"), ("2.0", "V2_0"), ("2.1", "V2_1"), ("3.0", "V3_0"), @@ -48,6 +50,8 @@ pub const PROPERTY_VALUES: &'static [( ("v130", "V13_0"), ("v140", "V14_0"), ("v150", "V15_0"), + ("v151", "V15_1"), + ("v160", "V16_0"), ("v20", "V2_0"), ("v21", "V2_1"), ("v30", "V3_0"), @@ -262,6 +266,8 @@ pub const PROPERTY_VALUES: &'static [( ("elymaic", "Elymaic"), ("ethi", "Ethiopic"), ("ethiopic", "Ethiopic"), + ("gara", "Garay"), + ("garay", "Garay"), ("geor", "Georgian"), ("georgian", "Georgian"), ("glag", "Glagolitic"), @@ -276,9 +282,11 @@ pub const PROPERTY_VALUES: &'static [( ("grek", "Greek"), ("gujarati", "Gujarati"), ("gujr", "Gujarati"), + ("gukh", "Gurung_Khema"), ("gunjalagondi", "Gunjala_Gondi"), ("gurmukhi", "Gurmukhi"), ("guru", "Gurmukhi"), + ("gurungkhema", "Gurung_Khema"), ("han", "Han"), ("hang", "Hangul"), ("hangul", "Hangul"), @@ -320,8 +328,10 @@ pub const PROPERTY_VALUES: &'static [( ("khoj", "Khojki"), ("khojki", "Khojki"), ("khudawadi", "Khudawadi"), + ("kiratrai", "Kirat_Rai"), ("kits", "Khitan_Small_Script"), ("knda", "Kannada"), + ("krai", "Kirat_Rai"), ("kthi", "Kaithi"), ("lana", "Tai_Tham"), ("lao", "Lao"), @@ -401,6 +411,8 @@ pub const PROPERTY_VALUES: &'static [( ("oldsoutharabian", "Old_South_Arabian"), ("oldturkic", "Old_Turkic"), ("olduyghur", "Old_Uyghur"), + ("olonal", "Ol_Onal"), + ("onao", "Ol_Onal"), ("oriya", "Oriya"), ("orkh", "Old_Turkic"), ("orya", "Oriya"), @@ -456,6 +468,8 @@ pub const PROPERTY_VALUES: &'static [( ("soyombo", "Soyombo"), ("sund", "Sundanese"), ("sundanese", "Sundanese"), + ("sunu", "Sunuwar"), + ("sunuwar", "Sunuwar"), ("sylo", "Syloti_Nagri"), ("sylotinagri", "Syloti_Nagri"), ("syrc", "Syriac"), @@ -489,7 +503,11 @@ pub const PROPERTY_VALUES: &'static [( ("tirh", "Tirhuta"), ("tirhuta", "Tirhuta"), ("tnsa", "Tangsa"), + ("todhri", "Todhri"), + ("todr", "Todhri"), ("toto", "Toto"), + ("tulutigalari", "Tulu_Tigalari"), + ("tutg", "Tulu_Tigalari"), ("ugar", "Ugaritic"), ("ugaritic", "Ugaritic"), ("unknown", "Unknown"), @@ -591,6 +609,8 @@ pub const PROPERTY_VALUES: &'static [( ("elymaic", "Elymaic"), ("ethi", "Ethiopic"), ("ethiopic", "Ethiopic"), + ("gara", "Garay"), + ("garay", "Garay"), ("geor", "Georgian"), ("georgian", "Georgian"), ("glag", "Glagolitic"), @@ -605,9 +625,11 @@ pub const PROPERTY_VALUES: &'static [( ("grek", "Greek"), ("gujarati", "Gujarati"), ("gujr", "Gujarati"), + ("gukh", "Gurung_Khema"), ("gunjalagondi", "Gunjala_Gondi"), ("gurmukhi", "Gurmukhi"), ("guru", "Gurmukhi"), + ("gurungkhema", "Gurung_Khema"), ("han", "Han"), ("hang", "Hangul"), ("hangul", "Hangul"), @@ -649,8 +671,10 @@ pub const PROPERTY_VALUES: &'static [( ("khoj", "Khojki"), ("khojki", "Khojki"), ("khudawadi", "Khudawadi"), + ("kiratrai", "Kirat_Rai"), ("kits", "Khitan_Small_Script"), ("knda", "Kannada"), + ("krai", "Kirat_Rai"), ("kthi", "Kaithi"), ("lana", "Tai_Tham"), ("lao", "Lao"), @@ -730,6 +754,8 @@ pub const PROPERTY_VALUES: &'static [( ("oldsoutharabian", "Old_South_Arabian"), ("oldturkic", "Old_Turkic"), ("olduyghur", "Old_Uyghur"), + ("olonal", "Ol_Onal"), + ("onao", "Ol_Onal"), ("oriya", "Oriya"), ("orkh", "Old_Turkic"), ("orya", "Oriya"), @@ -785,6 +811,8 @@ pub const PROPERTY_VALUES: &'static [( ("soyombo", "Soyombo"), ("sund", "Sundanese"), ("sundanese", "Sundanese"), + ("sunu", "Sunuwar"), + ("sunuwar", "Sunuwar"), ("sylo", "Syloti_Nagri"), ("sylotinagri", "Syloti_Nagri"), ("syrc", "Syriac"), @@ -818,7 +846,11 @@ pub const PROPERTY_VALUES: &'static [( ("tirh", "Tirhuta"), ("tirhuta", "Tirhuta"), ("tnsa", "Tangsa"), + ("todhri", "Todhri"), + ("todr", "Todhri"), ("toto", "Toto"), + ("tulutigalari", "Tulu_Tigalari"), + ("tutg", "Tulu_Tigalari"), ("ugar", "Ugaritic"), ("ugaritic", "Ugaritic"), ("unknown", "Unknown"), diff --git a/regex-syntax/src/unicode_tables/script.rs b/regex-syntax/src/unicode_tables/script.rs index cc5c400dd..3e437ca9c 100644 --- a/regex-syntax/src/unicode_tables/script.rs +++ b/regex-syntax/src/unicode_tables/script.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate script ucd-15.0.0 --chars +// ucd-generate script ucd-16.0.0 --chars // -// Unicode version: 15.0.0. +// Unicode version: 16.0.0. // -// ucd-generate 0.2.14 is available on crates.io. +// ucd-generate 0.3.1 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Adlam", ADLAM), @@ -46,6 +46,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Elbasan", ELBASAN), ("Elymaic", ELYMAIC), ("Ethiopic", ETHIOPIC), + ("Garay", GARAY), ("Georgian", GEORGIAN), ("Glagolitic", GLAGOLITIC), ("Gothic", GOTHIC), @@ -54,6 +55,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Gujarati", GUJARATI), ("Gunjala_Gondi", GUNJALA_GONDI), ("Gurmukhi", GURMUKHI), + ("Gurung_Khema", GURUNG_KHEMA), ("Han", HAN), ("Hangul", HANGUL), ("Hanifi_Rohingya", HANIFI_ROHINGYA), @@ -76,6 +78,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Khmer", KHMER), ("Khojki", KHOJKI), ("Khudawadi", KHUDAWADI), + ("Kirat_Rai", KIRAT_RAI), ("Lao", LAO), ("Latin", LATIN), ("Lepcha", LEPCHA), @@ -113,6 +116,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Nyiakeng_Puachue_Hmong", NYIAKENG_PUACHUE_HMONG), ("Ogham", OGHAM), ("Ol_Chiki", OL_CHIKI), + ("Ol_Onal", OL_ONAL), ("Old_Hungarian", OLD_HUNGARIAN), ("Old_Italic", OLD_ITALIC), ("Old_North_Arabian", OLD_NORTH_ARABIAN), @@ -144,6 +148,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Sora_Sompeng", SORA_SOMPENG), ("Soyombo", SOYOMBO), ("Sundanese", SUNDANESE), + ("Sunuwar", SUNUWAR), ("Syloti_Nagri", SYLOTI_NAGRI), ("Syriac", SYRIAC), ("Tagalog", TAGALOG), @@ -161,7 +166,9 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Tibetan", TIBETAN), ("Tifinagh", TIFINAGH), ("Tirhuta", TIRHUTA), + ("Todhri", TODHRI), ("Toto", TOTO), + ("Tulu_Tigalari", TULU_TIGALARI), ("Ugaritic", UGARITIC), ("Vai", VAI), ("Vithkuqi", VITHKUQI), @@ -193,7 +200,7 @@ pub const ARABIC: &'static [(char, char)] = &[ ('ݐ', 'ݿ'), ('ࡰ', 'ࢎ'), ('\u{890}', '\u{891}'), - ('\u{898}', '\u{8e1}'), + ('\u{897}', '\u{8e1}'), ('\u{8e3}', '\u{8ff}'), ('ﭐ', '﯂'), ('ﯓ', 'ﴽ'), @@ -204,7 +211,8 @@ pub const ARABIC: &'static [(char, char)] = &[ ('ﹰ', 'ﹴ'), ('ﹶ', 'ﻼ'), ('𐹠', '𐹾'), - ('\u{10efd}', '\u{10eff}'), + ('𐻂', '𐻄'), + ('\u{10efc}', '\u{10eff}'), ('𞸀', '𞸃'), ('𞸅', '𞸟'), ('𞸡', '𞸢'), @@ -246,14 +254,14 @@ pub const ARMENIAN: &'static [(char, char)] = pub const AVESTAN: &'static [(char, char)] = &[('𐬀', '𐬵'), ('𐬹', '𐬿')]; -pub const BALINESE: &'static [(char, char)] = &[('\u{1b00}', 'ᭌ'), ('᭐', '᭾')]; +pub const BALINESE: &'static [(char, char)] = &[('\u{1b00}', 'ᭌ'), ('᭎', '᭿')]; pub const BAMUM: &'static [(char, char)] = &[('ꚠ', '꛷'), ('𖠀', '𖨸')]; pub const BASSA_VAH: &'static [(char, char)] = &[('𖫐', '𖫭'), ('\u{16af0}', '𖫵')]; -pub const BATAK: &'static [(char, char)] = &[('ᯀ', '᯳'), ('᯼', '᯿')]; +pub const BATAK: &'static [(char, char)] = &[('ᯀ', '\u{1bf3}'), ('᯼', '᯿')]; pub const BENGALI: &'static [(char, char)] = &[ ('ঀ', 'ঃ'), @@ -354,15 +362,14 @@ pub const COMMON: &'static [(char, char)] = &[ ('ℳ', '⅍'), ('⅏', '⅟'), ('↉', '↋'), - ('←', '␦'), + ('←', '␩'), ('⑀', '⑊'), ('①', '⟿'), ('⤀', '⭳'), ('⭶', '⮕'), ('⮗', '⯿'), ('⸀', '⹝'), - ('⿰', '⿻'), - ('\u{3000}', '〄'), + ('⿰', '〄'), ('〆', '〆'), ('〈', '〠'), ('〰', '〷'), @@ -371,7 +378,8 @@ pub const COMMON: &'static [(char, char)] = &[ ('゠', '゠'), ('・', 'ー'), ('㆐', '㆟'), - ('㇀', '㇣'), + ('㇀', '㇥'), + ('㇯', '㇯'), ('㈠', '㉟'), ('㉿', '㋏'), ('㋿', '㋿'), @@ -405,10 +413,12 @@ pub const COMMON: &'static [(char, char)] = &[ ('𐇐', '𐇼'), ('𐋡', '𐋻'), ('\u{1bca0}', '\u{1bca3}'), + ('𜰀', '𜳹'), + ('𜴀', '𜺳'), ('𜽐', '𜿃'), ('𝀀', '𝃵'), ('𝄀', '𝄦'), - ('𝄩', '𝅦'), + ('𝄩', '\u{1d166}'), ('𝅪', '\u{1d17a}'), ('𝆃', '𝆄'), ('𝆌', '𝆩'), @@ -465,19 +475,18 @@ pub const COMMON: &'static [(char, char)] = &[ ('🡐', '🡙'), ('🡠', '🢇'), ('🢐', '🢭'), - ('🢰', '🢱'), + ('🢰', '🢻'), + ('🣀', '🣁'), ('🤀', '🩓'), ('🩠', '🩭'), ('🩰', '🩼'), - ('🪀', '🪈'), - ('🪐', '🪽'), - ('🪿', '🫅'), - ('🫎', '🫛'), - ('🫠', '🫨'), + ('🪀', '🪉'), + ('🪏', '🫆'), + ('🫎', '🫜'), + ('🫟', '🫩'), ('🫰', '🫸'), ('🬀', '🮒'), - ('🮔', '🯊'), - ('🯰', '🯹'), + ('🮔', '🯹'), ('\u{e0001}', '\u{e0001}'), ('\u{e0020}', '\u{e007f}'), ]; @@ -496,7 +505,7 @@ pub const CYPRO_MINOAN: &'static [(char, char)] = &[('𒾐', '𒿲')]; pub const CYRILLIC: &'static [(char, char)] = &[ ('Ѐ', '\u{484}'), ('\u{487}', 'ԯ'), - ('ᲀ', 'ᲈ'), + ('ᲀ', 'ᲊ'), ('ᴫ', 'ᴫ'), ('ᵸ', 'ᵸ'), ('\u{2de0}', '\u{2dff}'), @@ -533,7 +542,7 @@ pub const DUPLOYAN: &'static [(char, char)] = &[('𛰀', '𛱪'), ('𛱰', '𛱼'), ('𛲀', '𛲈'), ('𛲐', '𛲙'), ('𛲜', '𛲟')]; pub const EGYPTIAN_HIEROGLYPHS: &'static [(char, char)] = - &[('𓀀', '\u{13455}')]; + &[('𓀀', '\u{13455}'), ('𓑠', '𔏺')]; pub const ELBASAN: &'static [(char, char)] = &[('𐔀', '𐔧')]; @@ -578,6 +587,9 @@ pub const ETHIOPIC: &'static [(char, char)] = &[ ('𞟰', '𞟾'), ]; +pub const GARAY: &'static [(char, char)] = + &[('𐵀', '𐵥'), ('\u{10d69}', '𐶅'), ('𐶎', '𐶏')]; + pub const GEORGIAN: &'static [(char, char)] = &[ ('Ⴀ', 'Ⴥ'), ('Ⴧ', 'Ⴧ'), @@ -612,7 +624,7 @@ pub const GRANTHA: &'static [(char, char)] = &[ ('𑌵', '𑌹'), ('\u{1133c}', '𑍄'), ('𑍇', '𑍈'), - ('𑍋', '𑍍'), + ('𑍋', '\u{1134d}'), ('𑍐', '𑍐'), ('\u{11357}', '\u{11357}'), ('𑍝', '𑍣'), @@ -704,6 +716,8 @@ pub const GURMUKHI: &'static [(char, char)] = &[ ('੦', '੶'), ]; +pub const GURUNG_KHEMA: &'static [(char, char)] = &[('𖄀', '𖄹')]; + pub const HAN: &'static [(char, char)] = &[ ('⺀', '⺙'), ('⺛', '⻳'), @@ -717,12 +731,13 @@ pub const HAN: &'static [(char, char)] = &[ ('豈', '舘'), ('並', '龎'), ('𖿢', '𖿣'), - ('𖿰', '𖿱'), + ('\u{16ff0}', '\u{16ff1}'), ('𠀀', '𪛟'), ('𪜀', '𫜹'), ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), + ('𮯰', '𮹝'), ('丽', '𪘀'), ('𰀀', '𱍊'), ('𱍐', '𲎯'), @@ -748,7 +763,7 @@ pub const HANGUL: &'static [(char, char)] = &[ pub const HANIFI_ROHINGYA: &'static [(char, char)] = &[('𐴀', '\u{10d27}'), ('𐴰', '𐴹')]; -pub const HANUNOO: &'static [(char, char)] = &[('ᜠ', '᜴')]; +pub const HANUNOO: &'static [(char, char)] = &[('ᜠ', '\u{1734}')]; pub const HATRAN: &'static [(char, char)] = &[('𐣠', '𐣲'), ('𐣴', '𐣵'), ('𐣻', '𐣿')]; @@ -828,8 +843,8 @@ pub const KANNADA: &'static [(char, char)] = &[ ('ಪ', 'ಳ'), ('ವ', 'ಹ'), ('\u{cbc}', 'ೄ'), - ('\u{cc6}', 'ೈ'), - ('ೊ', '\u{ccd}'), + ('\u{cc6}', '\u{cc8}'), + ('\u{cca}', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), ('ೝ', 'ೞ'), ('ೠ', '\u{ce3}'), @@ -855,7 +870,7 @@ pub const KATAKANA: &'static [(char, char)] = &[ ]; pub const KAWI: &'static [(char, char)] = - &[('\u{11f00}', '𑼐'), ('𑼒', '\u{11f3a}'), ('𑼾', '𑽙')]; + &[('\u{11f00}', '𑼐'), ('𑼒', '\u{11f3a}'), ('𑼾', '\u{11f5a}')]; pub const KAYAH_LI: &'static [(char, char)] = &[('꤀', '\u{a92d}'), ('꤯', '꤯')]; @@ -871,7 +886,7 @@ pub const KHAROSHTHI: &'static [(char, char)] = &[ ]; pub const KHITAN_SMALL_SCRIPT: &'static [(char, char)] = - &[('\u{16fe4}', '\u{16fe4}'), ('𘬀', '𘳕')]; + &[('\u{16fe4}', '\u{16fe4}'), ('𘬀', '𘳕'), ('𘳿', '𘳿')]; pub const KHMER: &'static [(char, char)] = &[('ក', '\u{17dd}'), ('០', '៩'), ('៰', '៹'), ('᧠', '᧿')]; @@ -881,6 +896,8 @@ pub const KHOJKI: &'static [(char, char)] = &[('𑈀', '𑈑'), ('𑈓', '\u{112 pub const KHUDAWADI: &'static [(char, char)] = &[('𑊰', '\u{112ea}'), ('𑋰', '𑋹')]; +pub const KIRAT_RAI: &'static [(char, char)] = &[('𖵀', '𖵹')]; + pub const LAO: &'static [(char, char)] = &[ ('ກ', 'ຂ'), ('ຄ', 'ຄ'), @@ -919,10 +936,10 @@ pub const LATIN: &'static [(char, char)] = &[ ('Ⅰ', 'ↈ'), ('Ⱡ', 'Ɀ'), ('Ꜣ', 'ꞇ'), - ('Ꞌ', 'ꟊ'), + ('Ꞌ', 'ꟍ'), ('Ꟑ', 'ꟑ'), ('ꟓ', 'ꟓ'), - ('ꟕ', 'ꟙ'), + ('ꟕ', 'Ƛ'), ('ꟲ', 'ꟿ'), ('ꬰ', 'ꭚ'), ('ꭜ', 'ꭤ'), @@ -1026,7 +1043,7 @@ pub const MULTANI: &'static [(char, char)] = &[('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'), ('𑊏', '𑊝'), ('𑊟', '𑊩')]; pub const MYANMAR: &'static [(char, char)] = - &[('က', '႟'), ('ꧠ', 'ꧾ'), ('ꩠ', 'ꩿ')]; + &[('က', '႟'), ('ꧠ', 'ꧾ'), ('ꩠ', 'ꩿ'), ('𑛐', '𑛣')]; pub const NABATAEAN: &'static [(char, char)] = &[('𐢀', '𐢞'), ('𐢧', '𐢯')]; @@ -1051,6 +1068,8 @@ pub const OGHAM: &'static [(char, char)] = &[('\u{1680}', '᚜')]; pub const OL_CHIKI: &'static [(char, char)] = &[('᱐', '᱿')]; +pub const OL_ONAL: &'static [(char, char)] = &[('𞗐', '𞗺'), ('𞗿', '𞗿')]; + pub const OLD_HUNGARIAN: &'static [(char, char)] = &[('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𐳺', '𐳿')]; @@ -1105,7 +1124,7 @@ pub const PHOENICIAN: &'static [(char, char)] = &[('𐤀', '𐤛'), ('𐤟', ' pub const PSALTER_PAHLAVI: &'static [(char, char)] = &[('𐮀', '𐮑'), ('𐮙', '𐮜'), ('𐮩', '𐮯')]; -pub const REJANG: &'static [(char, char)] = &[('ꤰ', '꥓'), ('꥟', '꥟')]; +pub const REJANG: &'static [(char, char)] = &[('ꤰ', '\u{a953}'), ('꥟', '꥟')]; pub const RUNIC: &'static [(char, char)] = &[('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ')]; @@ -1149,12 +1168,14 @@ pub const SOYOMBO: &'static [(char, char)] = &[('𑩐', '𑪢')]; pub const SUNDANESE: &'static [(char, char)] = &[('\u{1b80}', 'ᮿ'), ('᳀', '᳇')]; +pub const SUNUWAR: &'static [(char, char)] = &[('𑯀', '𑯡'), ('𑯰', '𑯹')]; + pub const SYLOTI_NAGRI: &'static [(char, char)] = &[('ꠀ', '\u{a82c}')]; pub const SYRIAC: &'static [(char, char)] = &[('܀', '܍'), ('\u{70f}', '\u{74a}'), ('ݍ', 'ݏ'), ('ࡠ', 'ࡪ')]; -pub const TAGALOG: &'static [(char, char)] = &[('ᜀ', '᜕'), ('ᜟ', 'ᜟ')]; +pub const TAGALOG: &'static [(char, char)] = &[('ᜀ', '\u{1715}'), ('ᜟ', 'ᜟ')]; pub const TAGBANWA: &'static [(char, char)] = &[('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), ('\u{1772}', '\u{1773}')]; @@ -1234,8 +1255,24 @@ pub const TIFINAGH: &'static [(char, char)] = pub const TIRHUTA: &'static [(char, char)] = &[('𑒀', '𑓇'), ('𑓐', '𑓙')]; +pub const TODHRI: &'static [(char, char)] = &[('𐗀', '𐗳')]; + pub const TOTO: &'static [(char, char)] = &[('𞊐', '\u{1e2ae}')]; +pub const TULU_TIGALARI: &'static [(char, char)] = &[ + ('𑎀', '𑎉'), + ('𑎋', '𑎋'), + ('𑎎', '𑎎'), + ('𑎐', '𑎵'), + ('𑎷', '\u{113c0}'), + ('\u{113c2}', '\u{113c2}'), + ('\u{113c5}', '\u{113c5}'), + ('\u{113c7}', '𑏊'), + ('𑏌', '𑏕'), + ('𑏗', '𑏘'), + ('\u{113e1}', '\u{113e2}'), +]; + pub const UGARITIC: &'static [(char, char)] = &[('𐎀', '𐎝'), ('𐎟', '𐎟')]; pub const VAI: &'static [(char, char)] = &[('ꔀ', 'ꘫ')]; diff --git a/regex-syntax/src/unicode_tables/script_extension.rs b/regex-syntax/src/unicode_tables/script_extension.rs index 42625e21b..e3f492e2d 100644 --- a/regex-syntax/src/unicode_tables/script_extension.rs +++ b/regex-syntax/src/unicode_tables/script_extension.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate script-extension ucd-15.0.0 --chars +// ucd-generate script-extension ucd-16.0.0 --chars // -// Unicode version: 15.0.0. +// Unicode version: 16.0.0. // -// ucd-generate 0.2.14 is available on crates.io. +// ucd-generate 0.3.1 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Adlam", ADLAM), @@ -46,6 +46,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Elbasan", ELBASAN), ("Elymaic", ELYMAIC), ("Ethiopic", ETHIOPIC), + ("Garay", GARAY), ("Georgian", GEORGIAN), ("Glagolitic", GLAGOLITIC), ("Gothic", GOTHIC), @@ -54,6 +55,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Gujarati", GUJARATI), ("Gunjala_Gondi", GUNJALA_GONDI), ("Gurmukhi", GURMUKHI), + ("Gurung_Khema", GURUNG_KHEMA), ("Han", HAN), ("Hangul", HANGUL), ("Hanifi_Rohingya", HANIFI_ROHINGYA), @@ -76,6 +78,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Khmer", KHMER), ("Khojki", KHOJKI), ("Khudawadi", KHUDAWADI), + ("Kirat_Rai", KIRAT_RAI), ("Lao", LAO), ("Latin", LATIN), ("Lepcha", LEPCHA), @@ -113,6 +116,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Nyiakeng_Puachue_Hmong", NYIAKENG_PUACHUE_HMONG), ("Ogham", OGHAM), ("Ol_Chiki", OL_CHIKI), + ("Ol_Onal", OL_ONAL), ("Old_Hungarian", OLD_HUNGARIAN), ("Old_Italic", OLD_ITALIC), ("Old_North_Arabian", OLD_NORTH_ARABIAN), @@ -144,6 +148,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Sora_Sompeng", SORA_SOMPENG), ("Soyombo", SOYOMBO), ("Sundanese", SUNDANESE), + ("Sunuwar", SUNUWAR), ("Syloti_Nagri", SYLOTI_NAGRI), ("Syriac", SYRIAC), ("Tagalog", TAGALOG), @@ -161,7 +166,9 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Tibetan", TIBETAN), ("Tifinagh", TIFINAGH), ("Tirhuta", TIRHUTA), + ("Todhri", TODHRI), ("Toto", TOTO), + ("Tulu_Tigalari", TULU_TIGALARI), ("Ugaritic", UGARITIC), ("Vai", VAI), ("Vithkuqi", VITHKUQI), @@ -172,8 +179,15 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Zanabazar_Square", ZANABAZAR_SQUARE), ]; -pub const ADLAM: &'static [(char, char)] = - &[('؟', '؟'), ('ـ', 'ـ'), ('𞤀', '𞥋'), ('𞥐', '𞥙'), ('𞥞', '𞥟')]; +pub const ADLAM: &'static [(char, char)] = &[ + ('؟', '؟'), + ('ـ', 'ـ'), + ('⁏', '⁏'), + ('⹁', '⹁'), + ('𞤀', '𞥋'), + ('𞥐', '𞥙'), + ('𞥞', '𞥟'), +]; pub const AHOM: &'static [(char, char)] = &[('𑜀', '𑜚'), ('\u{1171d}', '\u{1172b}'), ('𑜰', '𑝆')]; @@ -187,8 +201,10 @@ pub const ARABIC: &'static [(char, char)] = &[ ('ݐ', 'ݿ'), ('ࡰ', 'ࢎ'), ('\u{890}', '\u{891}'), - ('\u{898}', '\u{8e1}'), + ('\u{897}', '\u{8e1}'), ('\u{8e3}', '\u{8ff}'), + ('⁏', '⁏'), + ('⹁', '⹁'), ('ﭐ', '﯂'), ('ﯓ', 'ﶏ'), ('ﶒ', 'ﷇ'), @@ -198,7 +214,8 @@ pub const ARABIC: &'static [(char, char)] = &[ ('ﹶ', 'ﻼ'), ('\u{102e0}', '𐋻'), ('𐹠', '𐹾'), - ('\u{10efd}', '\u{10eff}'), + ('𐻂', '𐻄'), + ('\u{10efc}', '\u{10eff}'), ('𞸀', '𞸃'), ('𞸅', '𞸟'), ('𞸡', '𞸢'), @@ -236,20 +253,22 @@ pub const ARABIC: &'static [(char, char)] = &[ ]; pub const ARMENIAN: &'static [(char, char)] = - &[('Ա', 'Ֆ'), ('ՙ', '֊'), ('֍', '֏'), ('ﬓ', 'ﬗ')]; + &[('\u{308}', '\u{308}'), ('Ա', 'Ֆ'), ('ՙ', '֊'), ('֍', '֏'), ('ﬓ', 'ﬗ')]; -pub const AVESTAN: &'static [(char, char)] = &[('𐬀', '𐬵'), ('𐬹', '𐬿')]; +pub const AVESTAN: &'static [(char, char)] = + &[('·', '·'), ('⸰', '⸱'), ('𐬀', '𐬵'), ('𐬹', '𐬿')]; -pub const BALINESE: &'static [(char, char)] = &[('\u{1b00}', 'ᭌ'), ('᭐', '᭾')]; +pub const BALINESE: &'static [(char, char)] = &[('\u{1b00}', 'ᭌ'), ('᭎', '᭿')]; pub const BAMUM: &'static [(char, char)] = &[('ꚠ', '꛷'), ('𖠀', '𖨸')]; pub const BASSA_VAH: &'static [(char, char)] = &[('𖫐', '𖫭'), ('\u{16af0}', '𖫵')]; -pub const BATAK: &'static [(char, char)] = &[('ᯀ', '᯳'), ('᯼', '᯿')]; +pub const BATAK: &'static [(char, char)] = &[('ᯀ', '\u{1bf3}'), ('᯼', '᯿')]; pub const BENGALI: &'static [(char, char)] = &[ + ('ʼ', 'ʼ'), ('\u{951}', '\u{952}'), ('।', '॥'), ('ঀ', 'ঃ'), @@ -282,6 +301,9 @@ pub const BHAIKSUKI: &'static [(char, char)] = &[('𑰀', '𑰈'), ('𑰊', '\u{11c36}'), ('\u{11c38}', '𑱅'), ('𑱐', '𑱬')]; pub const BOPOMOFO: &'static [(char, char)] = &[ + ('ˇ', 'ˇ'), + ('ˉ', 'ˋ'), + ('˙', '˙'), ('˪', '˫'), ('、', '〃'), ('〈', '】'), @@ -309,10 +331,16 @@ pub const BUHID: &'static [(char, char)] = &[('᜵', '᜶'), ('ᝀ', '\u{1753}') pub const CANADIAN_ABORIGINAL: &'static [(char, char)] = &[('᐀', 'ᙿ'), ('ᢰ', 'ᣵ'), ('𑪰', '𑪿')]; -pub const CARIAN: &'static [(char, char)] = &[('𐊠', '𐋐')]; +pub const CARIAN: &'static [(char, char)] = + &[('·', '·'), ('⁚', '⁚'), ('⁝', '⁝'), ('⸱', '⸱'), ('𐊠', '𐋐')]; -pub const CAUCASIAN_ALBANIAN: &'static [(char, char)] = - &[('𐔰', '𐕣'), ('𐕯', '𐕯')]; +pub const CAUCASIAN_ALBANIAN: &'static [(char, char)] = &[ + ('\u{304}', '\u{304}'), + ('\u{331}', '\u{331}'), + ('\u{35e}', '\u{35e}'), + ('𐔰', '𐕣'), + ('𐕯', '𐕯'), +]; pub const CHAKMA: &'static [(char, char)] = &[('০', '৯'), ('၀', '၉'), ('\u{11100}', '\u{11134}'), ('𑄶', '𑅇')]; @@ -320,8 +348,16 @@ pub const CHAKMA: &'static [(char, char)] = pub const CHAM: &'static [(char, char)] = &[('ꨀ', '\u{aa36}'), ('ꩀ', 'ꩍ'), ('꩐', '꩙'), ('꩜', '꩟')]; -pub const CHEROKEE: &'static [(char, char)] = - &[('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), ('ꭰ', 'ꮿ')]; +pub const CHEROKEE: &'static [(char, char)] = &[ + ('\u{300}', '\u{302}'), + ('\u{304}', '\u{304}'), + ('\u{30b}', '\u{30c}'), + ('\u{323}', '\u{324}'), + ('\u{330}', '\u{331}'), + ('Ꭰ', 'Ᏽ'), + ('ᏸ', 'ᏽ'), + ('ꭰ', 'ꮿ'), +]; pub const CHORASMIAN: &'static [(char, char)] = &[('𐾰', '𐿋')]; @@ -329,14 +365,20 @@ pub const COMMON: &'static [(char, char)] = &[ ('\0', '@'), ('[', '`'), ('{', '©'), - ('«', '¹'), + ('«', '¶'), + ('¸', '¹'), ('»', '¿'), ('×', '×'), ('÷', '÷'), - ('ʹ', '˟'), + ('ʹ', 'ʻ'), + ('ʽ', 'ˆ'), + ('ˈ', 'ˈ'), + ('ˌ', 'ˌ'), + ('ˎ', '˖'), + ('˘', '˘'), + ('˚', '˟'), ('˥', '˩'), ('ˬ', '˿'), - ('ʹ', 'ʹ'), (';', ';'), ('΅', '΅'), ('·', '·'), @@ -345,10 +387,12 @@ pub const COMMON: &'static [(char, char)] = &[ ('\u{8e2}', '\u{8e2}'), ('฿', '฿'), ('࿕', '࿘'), - ('᛫', '᛭'), ('\u{2000}', '\u{200b}'), ('\u{200e}', '\u{202e}'), - ('‰', '\u{2064}'), + ('‰', '⁎'), + ('⁐', '⁙'), + ('⁛', '⁜'), + ('⁞', '\u{2064}'), ('\u{2066}', '⁰'), ('⁴', '⁾'), ('₀', '₎'), @@ -359,15 +403,18 @@ pub const COMMON: &'static [(char, char)] = &[ ('ℳ', '⅍'), ('⅏', '⅟'), ('↉', '↋'), - ('←', '␦'), + ('←', '␩'), ('⑀', '⑊'), ('①', '⟿'), ('⤀', '⭳'), ('⭶', '⮕'), ('⮗', '⯿'), - ('⸀', '⹂'), + ('⸀', '⸖'), + ('⸘', 'ⸯ'), + ('⸲', '⸻'), + ('⸽', '⹀'), + ('⹂', '⹂'), ('⹄', '⹝'), - ('⿰', '⿻'), ('\u{3000}', '\u{3000}'), ('〄', '〄'), ('〒', '〒'), @@ -399,10 +446,12 @@ pub const COMMON: &'static [(char, char)] = &[ ('\u{fff9}', '�'), ('𐆐', '𐆜'), ('𐇐', '𐇼'), + ('𜰀', '𜳹'), + ('𜴀', '𜺳'), ('𜽐', '𜿃'), ('𝀀', '𝃵'), ('𝄀', '𝄦'), - ('𝄩', '𝅦'), + ('𝄩', '\u{1d166}'), ('𝅪', '\u{1d17a}'), ('𝆃', '𝆄'), ('𝆌', '𝆩'), @@ -458,25 +507,34 @@ pub const COMMON: &'static [(char, char)] = &[ ('🡐', '🡙'), ('🡠', '🢇'), ('🢐', '🢭'), - ('🢰', '🢱'), + ('🢰', '🢻'), + ('🣀', '🣁'), ('🤀', '🩓'), ('🩠', '🩭'), ('🩰', '🩼'), - ('🪀', '🪈'), - ('🪐', '🪽'), - ('🪿', '🫅'), - ('🫎', '🫛'), - ('🫠', '🫨'), + ('🪀', '🪉'), + ('🪏', '🫆'), + ('🫎', '🫜'), + ('🫟', '🫩'), ('🫰', '🫸'), ('🬀', '🮒'), - ('🮔', '🯊'), - ('🯰', '🯹'), + ('🮔', '🯹'), ('\u{e0001}', '\u{e0001}'), ('\u{e0020}', '\u{e007f}'), ]; -pub const COPTIC: &'static [(char, char)] = - &[('Ϣ', 'ϯ'), ('Ⲁ', 'ⳳ'), ('⳹', '⳿'), ('\u{102e0}', '𐋻')]; +pub const COPTIC: &'static [(char, char)] = &[ + ('·', '·'), + ('\u{300}', '\u{300}'), + ('\u{304}', '\u{305}'), + ('\u{307}', '\u{307}'), + ('ʹ', '͵'), + ('Ϣ', 'ϯ'), + ('Ⲁ', 'ⳳ'), + ('⳹', '⳿'), + ('⸗', '⸗'), + ('\u{102e0}', '𐋻'), +]; pub const CUNEIFORM: &'static [(char, char)] = &[('𒀀', '𒎙'), ('𒐀', '𒑮'), ('𒑰', '𒑴'), ('𒒀', '𒕃')]; @@ -496,8 +554,15 @@ pub const CYPRIOT: &'static [(char, char)] = &[ pub const CYPRO_MINOAN: &'static [(char, char)] = &[('𐄀', '𐄁'), ('𒾐', '𒿲')]; pub const CYRILLIC: &'static [(char, char)] = &[ + ('ʼ', 'ʼ'), + ('\u{300}', '\u{302}'), + ('\u{304}', '\u{304}'), + ('\u{306}', '\u{306}'), + ('\u{308}', '\u{308}'), + ('\u{30b}', '\u{30b}'), + ('\u{311}', '\u{311}'), ('Ѐ', 'ԯ'), - ('ᲀ', 'ᲈ'), + ('ᲀ', 'ᲊ'), ('ᴫ', 'ᴫ'), ('ᵸ', 'ᵸ'), ('\u{1df8}', '\u{1df8}'), @@ -512,6 +577,7 @@ pub const CYRILLIC: &'static [(char, char)] = &[ pub const DESERET: &'static [(char, char)] = &[('𐐀', '𐑏')]; pub const DEVANAGARI: &'static [(char, char)] = &[ + ('ʼ', 'ʼ'), ('\u{900}', '\u{952}'), ('\u{955}', 'ॿ'), ('\u{1cd0}', 'ᳶ'), @@ -536,17 +602,29 @@ pub const DIVES_AKURU: &'static [(char, char)] = &[ pub const DOGRA: &'static [(char, char)] = &[('।', '९'), ('꠰', '꠹'), ('𑠀', '𑠻')]; -pub const DUPLOYAN: &'static [(char, char)] = - &[('𛰀', '𛱪'), ('𛱰', '𛱼'), ('𛲀', '𛲈'), ('𛲐', '𛲙'), ('𛲜', '\u{1bca3}')]; +pub const DUPLOYAN: &'static [(char, char)] = &[ + ('·', '·'), + ('\u{307}', '\u{308}'), + ('\u{30a}', '\u{30a}'), + ('\u{323}', '\u{324}'), + ('⸼', '⸼'), + ('𛰀', '𛱪'), + ('𛱰', '𛱼'), + ('𛲀', '𛲈'), + ('𛲐', '𛲙'), + ('𛲜', '\u{1bca3}'), +]; pub const EGYPTIAN_HIEROGLYPHS: &'static [(char, char)] = - &[('𓀀', '\u{13455}')]; + &[('𓀀', '\u{13455}'), ('𓑠', '𔏺')]; -pub const ELBASAN: &'static [(char, char)] = &[('𐔀', '𐔧')]; +pub const ELBASAN: &'static [(char, char)] = + &[('·', '·'), ('\u{305}', '\u{305}'), ('𐔀', '𐔧')]; pub const ELYMAIC: &'static [(char, char)] = &[('𐿠', '𐿶')]; pub const ETHIOPIC: &'static [(char, char)] = &[ + ('\u{30e}', '\u{30e}'), ('ሀ', 'ቈ'), ('ቊ', 'ቍ'), ('ቐ', 'ቖ'), @@ -585,21 +663,40 @@ pub const ETHIOPIC: &'static [(char, char)] = &[ ('𞟰', '𞟾'), ]; +pub const GARAY: &'static [(char, char)] = &[ + ('،', '،'), + ('؛', '؛'), + ('؟', '؟'), + ('𐵀', '𐵥'), + ('\u{10d69}', '𐶅'), + ('𐶎', '𐶏'), +]; + pub const GEORGIAN: &'static [(char, char)] = &[ + ('·', '·'), + ('։', '։'), ('Ⴀ', 'Ⴥ'), ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'), ('ა', 'ჿ'), ('Ა', 'Ჺ'), ('Ჽ', 'Ჿ'), + ('⁚', '⁚'), ('ⴀ', 'ⴥ'), ('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'), + ('⸱', '⸱'), ]; pub const GLAGOLITIC: &'static [(char, char)] = &[ + ('·', '·'), + ('\u{303}', '\u{303}'), + ('\u{305}', '\u{305}'), ('\u{484}', '\u{484}'), ('\u{487}', '\u{487}'), + ('։', '։'), + ('჻', '჻'), + ('⁚', '⁚'), ('Ⰰ', 'ⱟ'), ('⹃', '⹃'), ('\u{a66f}', '\u{a66f}'), @@ -610,7 +707,13 @@ pub const GLAGOLITIC: &'static [(char, char)] = &[ ('\u{1e026}', '\u{1e02a}'), ]; -pub const GOTHIC: &'static [(char, char)] = &[('𐌰', '𐍊')]; +pub const GOTHIC: &'static [(char, char)] = &[ + ('·', '·'), + ('\u{304}', '\u{305}'), + ('\u{308}', '\u{308}'), + ('\u{331}', '\u{331}'), + ('𐌰', '𐍊'), +]; pub const GRANTHA: &'static [(char, char)] = &[ ('\u{951}', '\u{952}'), @@ -630,7 +733,7 @@ pub const GRANTHA: &'static [(char, char)] = &[ ('𑌵', '𑌹'), ('\u{1133b}', '𑍄'), ('𑍇', '𑍈'), - ('𑍋', '𑍍'), + ('𑍋', '\u{1134d}'), ('𑍐', '𑍐'), ('\u{11357}', '\u{11357}'), ('𑍝', '𑍣'), @@ -641,10 +744,15 @@ pub const GRANTHA: &'static [(char, char)] = &[ ]; pub const GREEK: &'static [(char, char)] = &[ + ('·', '·'), + ('\u{300}', '\u{301}'), + ('\u{304}', '\u{304}'), + ('\u{306}', '\u{306}'), + ('\u{308}', '\u{308}'), + ('\u{313}', '\u{313}'), ('\u{342}', '\u{342}'), ('\u{345}', '\u{345}'), - ('Ͱ', 'ͳ'), - ('͵', 'ͷ'), + ('Ͱ', 'ͷ'), ('ͺ', 'ͽ'), ('Ϳ', 'Ϳ'), ('΄', '΄'), @@ -674,6 +782,7 @@ pub const GREEK: &'static [(char, char)] = &[ ('῝', '`'), ('ῲ', 'ῴ'), ('ῶ', '῾'), + ('⁝', '⁝'), ('Ω', 'Ω'), ('ꭥ', 'ꭥ'), ('𐅀', '𐆎'), @@ -702,6 +811,7 @@ pub const GUJARATI: &'static [(char, char)] = &[ ]; pub const GUNJALA_GONDI: &'static [(char, char)] = &[ + ('·', '·'), ('।', '॥'), ('𑵠', '𑵥'), ('𑵧', '𑵨'), @@ -733,10 +843,14 @@ pub const GURMUKHI: &'static [(char, char)] = &[ ('꠰', '꠹'), ]; +pub const GURUNG_KHEMA: &'static [(char, char)] = &[('॥', '॥'), ('𖄀', '𖄹')]; + pub const HAN: &'static [(char, char)] = &[ + ('·', '·'), ('⺀', '⺙'), ('⺛', '⻳'), ('⼀', '⿕'), + ('⿰', '⿿'), ('、', '〃'), ('々', '】'), ('〓', '〟'), @@ -745,7 +859,8 @@ pub const HAN: &'static [(char, char)] = &[ ('〷', '〿'), ('・', '・'), ('㆐', '㆟'), - ('㇀', '㇣'), + ('㇀', '㇥'), + ('㇯', '㇯'), ('㈠', '㉇'), ('㊀', '㊰'), ('㋀', '㋋'), @@ -761,7 +876,7 @@ pub const HAN: &'static [(char, char)] = &[ ('﹅', '﹆'), ('。', '・'), ('𖿢', '𖿣'), - ('𖿰', '𖿱'), + ('\u{16ff0}', '\u{16ff1}'), ('𝍠', '𝍱'), ('🉐', '🉑'), ('𠀀', '𪛟'), @@ -769,6 +884,7 @@ pub const HAN: &'static [(char, char)] = &[ ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), + ('𮯰', '𮹝'), ('丽', '𪘀'), ('𰀀', '𱍊'), ('𱍐', '𲎯'), @@ -814,6 +930,7 @@ pub const HATRAN: &'static [(char, char)] = &[('𐣠', '𐣲'), ('𐣴', '𐣵'), ('𐣻', '𐣿')]; pub const HEBREW: &'static [(char, char)] = &[ + ('\u{307}', '\u{308}'), ('\u{591}', '\u{5c7}'), ('א', 'ת'), ('ׯ', '״'), @@ -849,9 +966,17 @@ pub const IMPERIAL_ARAMAIC: &'static [(char, char)] = &[('𐡀', '𐡕'), ('𐡗', '𐡟')]; pub const INHERITED: &'static [(char, char)] = &[ - ('\u{300}', '\u{341}'), + ('\u{30f}', '\u{30f}'), + ('\u{312}', '\u{312}'), + ('\u{314}', '\u{31f}'), + ('\u{321}', '\u{322}'), + ('\u{326}', '\u{32c}'), + ('\u{32f}', '\u{32f}'), + ('\u{332}', '\u{341}'), ('\u{343}', '\u{344}'), - ('\u{346}', '\u{362}'), + ('\u{346}', '\u{357}'), + ('\u{359}', '\u{35d}'), + ('\u{35f}', '\u{362}'), ('\u{953}', '\u{954}'), ('\u{1ab0}', '\u{1ace}'), ('\u{1dc2}', '\u{1df7}'), @@ -882,6 +1007,7 @@ pub const JAVANESE: &'static [(char, char)] = pub const KAITHI: &'static [(char, char)] = &[ ('०', '९'), + ('⸱', '⸱'), ('꠰', '꠹'), ('\u{11080}', '\u{110c2}'), ('\u{110cd}', '\u{110cd}'), @@ -896,15 +1022,15 @@ pub const KANNADA: &'static [(char, char)] = &[ ('ಪ', 'ಳ'), ('ವ', 'ಹ'), ('\u{cbc}', 'ೄ'), - ('\u{cc6}', 'ೈ'), - ('ೊ', '\u{ccd}'), + ('\u{cc6}', '\u{cc8}'), + ('\u{cca}', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), ('ೝ', 'ೞ'), ('ೠ', '\u{ce3}'), ('೦', '೯'), ('ೱ', 'ೳ'), ('\u{1cd0}', '\u{1cd0}'), - ('\u{1cd2}', '\u{1cd2}'), + ('\u{1cd2}', '᳓'), ('\u{1cda}', '\u{1cda}'), ('ᳲ', 'ᳲ'), ('\u{1cf4}', '\u{1cf4}'), @@ -912,6 +1038,8 @@ pub const KANNADA: &'static [(char, char)] = &[ ]; pub const KATAKANA: &'static [(char, char)] = &[ + ('\u{305}', '\u{305}'), + ('\u{323}', '\u{323}'), ('、', '〃'), ('〈', '】'), ('〓', '〟'), @@ -935,7 +1063,7 @@ pub const KATAKANA: &'static [(char, char)] = &[ ]; pub const KAWI: &'static [(char, char)] = - &[('\u{11f00}', '𑼐'), ('𑼒', '\u{11f3a}'), ('𑼾', '𑽙')]; + &[('\u{11f00}', '𑼐'), ('𑼒', '\u{11f3a}'), ('𑼾', '\u{11f5a}')]; pub const KAYAH_LI: &'static [(char, char)] = &[('꤀', '꤯')]; @@ -951,7 +1079,7 @@ pub const KHAROSHTHI: &'static [(char, char)] = &[ ]; pub const KHITAN_SMALL_SCRIPT: &'static [(char, char)] = - &[('\u{16fe4}', '\u{16fe4}'), ('𘬀', '𘳕')]; + &[('\u{16fe4}', '\u{16fe4}'), ('𘬀', '𘳕'), ('𘳿', '𘳿')]; pub const KHMER: &'static [(char, char)] = &[('ក', '\u{17dd}'), ('០', '៩'), ('៰', '៹'), ('᧠', '᧿')]; @@ -962,6 +1090,8 @@ pub const KHOJKI: &'static [(char, char)] = pub const KHUDAWADI: &'static [(char, char)] = &[('।', '॥'), ('꠰', '꠹'), ('𑊰', '\u{112ea}'), ('𑋰', '𑋹')]; +pub const KIRAT_RAI: &'static [(char, char)] = &[('𖵀', '𖵹')]; + pub const LAO: &'static [(char, char)] = &[ ('ກ', 'ຂ'), ('ຄ', 'ຄ'), @@ -980,11 +1110,27 @@ pub const LATIN: &'static [(char, char)] = &[ ('A', 'Z'), ('a', 'z'), ('ª', 'ª'), + ('·', '·'), ('º', 'º'), ('À', 'Ö'), ('Ø', 'ö'), ('ø', 'ʸ'), + ('ʼ', 'ʼ'), + ('ˇ', 'ˇ'), + ('ˉ', 'ˋ'), + ('ˍ', 'ˍ'), + ('˗', '˗'), + ('˙', '˙'), ('ˠ', 'ˤ'), + ('\u{300}', '\u{30e}'), + ('\u{310}', '\u{311}'), + ('\u{313}', '\u{313}'), + ('\u{320}', '\u{320}'), + ('\u{323}', '\u{325}'), + ('\u{32d}', '\u{32e}'), + ('\u{330}', '\u{331}'), + ('\u{358}', '\u{358}'), + ('\u{35e}', '\u{35e}'), ('\u{363}', '\u{36f}'), ('\u{485}', '\u{486}'), ('\u{951}', '\u{952}'), @@ -994,6 +1140,7 @@ pub const LATIN: &'static [(char, char)] = &[ ('ᵢ', 'ᵥ'), ('ᵫ', 'ᵷ'), ('ᵹ', 'ᶾ'), + ('\u{1df8}', '\u{1df8}'), ('Ḁ', 'ỿ'), ('\u{202f}', '\u{202f}'), ('ⁱ', 'ⁱ'), @@ -1005,12 +1152,13 @@ pub const LATIN: &'static [(char, char)] = &[ ('ⅎ', 'ⅎ'), ('Ⅰ', 'ↈ'), ('Ⱡ', 'Ɀ'), + ('⸗', '⸗'), ('꜀', '꜇'), ('Ꜣ', 'ꞇ'), - ('Ꞌ', 'ꟊ'), + ('Ꞌ', 'ꟍ'), ('Ꟑ', 'ꟑ'), ('ꟓ', 'ꟓ'), - ('ꟕ', 'ꟙ'), + ('ꟕ', 'Ƛ'), ('ꟲ', 'ꟿ'), ('꤮', '꤮'), ('ꬰ', 'ꭚ'), @@ -1054,14 +1202,16 @@ pub const LINEAR_B: &'static [(char, char)] = &[ ('𐄷', '𐄿'), ]; -pub const LISU: &'static [(char, char)] = &[('ꓐ', '꓿'), ('𑾰', '𑾰')]; +pub const LISU: &'static [(char, char)] = + &[('ʼ', 'ʼ'), ('ˍ', 'ˍ'), ('《', '》'), ('ꓐ', '꓿'), ('𑾰', '𑾰')]; -pub const LYCIAN: &'static [(char, char)] = &[('𐊀', '𐊜')]; +pub const LYCIAN: &'static [(char, char)] = &[('⁚', '⁚'), ('𐊀', '𐊜')]; -pub const LYDIAN: &'static [(char, char)] = &[('𐤠', '𐤹'), ('𐤿', '𐤿')]; +pub const LYDIAN: &'static [(char, char)] = + &[('·', '·'), ('⸱', '⸱'), ('𐤠', '𐤹'), ('𐤿', '𐤿')]; pub const MAHAJANI: &'static [(char, char)] = - &[('।', '९'), ('꠰', '꠹'), ('𑅐', '𑅶')]; + &[('·', '·'), ('।', '९'), ('꠰', '꠹'), ('𑅐', '𑅶')]; pub const MAKASAR: &'static [(char, char)] = &[('𑻠', '𑻸')]; @@ -1076,6 +1226,7 @@ pub const MALAYALAM: &'static [(char, char)] = &[ ('ൔ', '\u{d63}'), ('൦', 'ൿ'), ('\u{1cda}', '\u{1cda}'), + ('ᳲ', 'ᳲ'), ('꠰', '꠲'), ]; @@ -1110,7 +1261,8 @@ pub const MENDE_KIKAKUI: &'static [(char, char)] = pub const MEROITIC_CURSIVE: &'static [(char, char)] = &[('𐦠', '𐦷'), ('𐦼', '𐧏'), ('𐧒', '𐧿')]; -pub const MEROITIC_HIEROGLYPHS: &'static [(char, char)] = &[('𐦀', '𐦟')]; +pub const MEROITIC_HIEROGLYPHS: &'static [(char, char)] = + &[('⁝', '⁝'), ('𐦀', '𐦟')]; pub const MIAO: &'static [(char, char)] = &[('𖼀', '𖽊'), ('\u{16f4f}', '𖾇'), ('\u{16f8f}', '𖾟')]; @@ -1123,6 +1275,8 @@ pub const MONGOLIAN: &'static [(char, char)] = &[ ('ᠠ', 'ᡸ'), ('ᢀ', 'ᢪ'), ('\u{202f}', '\u{202f}'), + ('、', '。'), + ('〈', '》'), ('𑙠', '𑙬'), ]; @@ -1132,7 +1286,7 @@ pub const MULTANI: &'static [(char, char)] = &[('੦', '੯'), ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'), ('𑊏', '𑊝'), ('𑊟', '𑊩')]; pub const MYANMAR: &'static [(char, char)] = - &[('က', '႟'), ('꤮', '꤮'), ('ꧠ', 'ꧾ'), ('ꩠ', 'ꩿ')]; + &[('က', '႟'), ('꤮', '꤮'), ('ꧠ', 'ꧾ'), ('ꩠ', 'ꩿ'), ('𑛐', '𑛣')]; pub const NABATAEAN: &'static [(char, char)] = &[('𐢀', '𐢞'), ('𐢧', '𐢯')]; @@ -1173,15 +1327,31 @@ pub const OGHAM: &'static [(char, char)] = &[('\u{1680}', '᚜')]; pub const OL_CHIKI: &'static [(char, char)] = &[('᱐', '᱿')]; -pub const OLD_HUNGARIAN: &'static [(char, char)] = - &[('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𐳺', '𐳿')]; +pub const OL_ONAL: &'static [(char, char)] = + &[('।', '॥'), ('𞗐', '𞗺'), ('𞗿', '𞗿')]; + +pub const OLD_HUNGARIAN: &'static [(char, char)] = &[ + ('⁚', '⁚'), + ('⁝', '⁝'), + ('⸱', '⸱'), + ('⹁', '⹁'), + ('𐲀', '𐲲'), + ('𐳀', '𐳲'), + ('𐳺', '𐳿'), +]; pub const OLD_ITALIC: &'static [(char, char)] = &[('𐌀', '𐌣'), ('𐌭', '𐌯')]; pub const OLD_NORTH_ARABIAN: &'static [(char, char)] = &[('𐪀', '𐪟')]; -pub const OLD_PERMIC: &'static [(char, char)] = - &[('\u{483}', '\u{483}'), ('𐍐', '\u{1037a}')]; +pub const OLD_PERMIC: &'static [(char, char)] = &[ + ('·', '·'), + ('\u{300}', '\u{300}'), + ('\u{306}', '\u{308}'), + ('\u{313}', '\u{313}'), + ('\u{483}', '\u{483}'), + ('𐍐', '\u{1037a}'), +]; pub const OLD_PERSIAN: &'static [(char, char)] = &[('𐎠', '𐏃'), ('𐏈', '𐏕')]; @@ -1189,7 +1359,8 @@ pub const OLD_SOGDIAN: &'static [(char, char)] = &[('𐼀', '𐼧')]; pub const OLD_SOUTH_ARABIAN: &'static [(char, char)] = &[('𐩠', '𐩿')]; -pub const OLD_TURKIC: &'static [(char, char)] = &[('𐰀', '𐱈')]; +pub const OLD_TURKIC: &'static [(char, char)] = + &[('⁚', '⁚'), ('⸰', '⸰'), ('𐰀', '𐱈')]; pub const OLD_UYGHUR: &'static [(char, char)] = &[('ـ', 'ـ'), ('𐫲', '𐫲'), ('𐽰', '𐾉')]; @@ -1215,7 +1386,14 @@ pub const ORIYA: &'static [(char, char)] = &[ ('ᳲ', 'ᳲ'), ]; -pub const OSAGE: &'static [(char, char)] = &[('𐒰', '𐓓'), ('𐓘', '𐓻')]; +pub const OSAGE: &'static [(char, char)] = &[ + ('\u{301}', '\u{301}'), + ('\u{304}', '\u{304}'), + ('\u{30b}', '\u{30b}'), + ('\u{358}', '\u{358}'), + ('𐒰', '𐓓'), + ('𐓘', '𐓻'), +]; pub const OSMANYA: &'static [(char, char)] = &[('𐒀', '𐒝'), ('𐒠', '𐒩')]; @@ -1226,19 +1404,25 @@ pub const PALMYRENE: &'static [(char, char)] = &[('𐡠', '𐡿')]; pub const PAU_CIN_HAU: &'static [(char, char)] = &[('𑫀', '𑫸')]; -pub const PHAGS_PA: &'static [(char, char)] = - &[('᠂', '᠃'), ('᠅', '᠅'), ('ꡀ', '꡷')]; +pub const PHAGS_PA: &'static [(char, char)] = &[ + ('᠂', '᠃'), + ('᠅', '᠅'), + ('\u{202f}', '\u{202f}'), + ('。', '。'), + ('ꡀ', '꡷'), +]; pub const PHOENICIAN: &'static [(char, char)] = &[('𐤀', '𐤛'), ('𐤟', '𐤟')]; pub const PSALTER_PAHLAVI: &'static [(char, char)] = &[('ـ', 'ـ'), ('𐮀', '𐮑'), ('𐮙', '𐮜'), ('𐮩', '𐮯')]; -pub const REJANG: &'static [(char, char)] = &[('ꤰ', '꥓'), ('꥟', '꥟')]; +pub const REJANG: &'static [(char, char)] = &[('ꤰ', '\u{a953}'), ('꥟', '꥟')]; -pub const RUNIC: &'static [(char, char)] = &[('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ')]; +pub const RUNIC: &'static [(char, char)] = &[('ᚠ', 'ᛸ')]; -pub const SAMARITAN: &'static [(char, char)] = &[('ࠀ', '\u{82d}'), ('࠰', '࠾')]; +pub const SAMARITAN: &'static [(char, char)] = + &[('ࠀ', '\u{82d}'), ('࠰', '࠾'), ('⸱', '⸱')]; pub const SAURASHTRA: &'static [(char, char)] = &[('ꢀ', '\u{a8c5}'), ('꣎', '꣙')]; @@ -1249,10 +1433,12 @@ pub const SHARADA: &'static [(char, char)] = &[ ('\u{1cd9}', '\u{1cd9}'), ('\u{1cdc}', '\u{1cdd}'), ('\u{1ce0}', '\u{1ce0}'), + ('꠰', '꠵'), + ('꠸', '꠸'), ('\u{11180}', '𑇟'), ]; -pub const SHAVIAN: &'static [(char, char)] = &[('𐑐', '𐑿')]; +pub const SHAVIAN: &'static [(char, char)] = &[('·', '·'), ('𐑐', '𐑿')]; pub const SIDDHAM: &'static [(char, char)] = &[('𑖀', '\u{115b5}'), ('𑖸', '\u{115dd}')]; @@ -1274,6 +1460,7 @@ pub const SINHALA: &'static [(char, char)] = &[ ('ෘ', '\u{ddf}'), ('෦', '෯'), ('ෲ', '෴'), + ('ᳲ', 'ᳲ'), ('𑇡', '𑇴'), ]; @@ -1286,10 +1473,28 @@ pub const SOYOMBO: &'static [(char, char)] = &[('𑩐', '𑪢')]; pub const SUNDANESE: &'static [(char, char)] = &[('\u{1b80}', 'ᮿ'), ('᳀', '᳇')]; +pub const SUNUWAR: &'static [(char, char)] = &[ + ('\u{300}', '\u{301}'), + ('\u{303}', '\u{303}'), + ('\u{30d}', '\u{30d}'), + ('\u{310}', '\u{310}'), + ('\u{32d}', '\u{32d}'), + ('\u{331}', '\u{331}'), + ('𑯀', '𑯡'), + ('𑯰', '𑯹'), +]; + pub const SYLOTI_NAGRI: &'static [(char, char)] = &[('।', '॥'), ('০', '৯'), ('ꠀ', '\u{a82c}')]; pub const SYRIAC: &'static [(char, char)] = &[ + ('\u{303}', '\u{304}'), + ('\u{307}', '\u{308}'), + ('\u{30a}', '\u{30a}'), + ('\u{320}', '\u{320}'), + ('\u{323}', '\u{325}'), + ('\u{32d}', '\u{32e}'), + ('\u{330}', '\u{330}'), ('،', '،'), ('؛', '\u{61c}'), ('؟', '؟'), @@ -1305,13 +1510,19 @@ pub const SYRIAC: &'static [(char, char)] = &[ ]; pub const TAGALOG: &'static [(char, char)] = - &[('ᜀ', '᜕'), ('ᜟ', 'ᜟ'), ('᜵', '᜶')]; + &[('ᜀ', '\u{1715}'), ('ᜟ', 'ᜟ'), ('᜵', '᜶')]; pub const TAGBANWA: &'static [(char, char)] = &[('᜵', '᜶'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), ('\u{1772}', '\u{1773}')]; -pub const TAI_LE: &'static [(char, char)] = - &[('၀', '၉'), ('ᥐ', 'ᥭ'), ('ᥰ', 'ᥴ')]; +pub const TAI_LE: &'static [(char, char)] = &[ + ('\u{300}', '\u{301}'), + ('\u{307}', '\u{308}'), + ('\u{30c}', '\u{30c}'), + ('၀', '၉'), + ('ᥐ', 'ᥭ'), + ('ᥰ', 'ᥴ'), +]; pub const TAI_THAM: &'static [(char, char)] = &[ ('ᨠ', '\u{1a5e}'), @@ -1356,8 +1567,14 @@ pub const TAMIL: &'static [(char, char)] = &[ pub const TANGSA: &'static [(char, char)] = &[('𖩰', '𖪾'), ('𖫀', '𖫉')]; -pub const TANGUT: &'static [(char, char)] = - &[('𖿠', '𖿠'), ('𗀀', '𘟷'), ('𘠀', '𘫿'), ('𘴀', '𘴈')]; +pub const TANGUT: &'static [(char, char)] = &[ + ('⿰', '⿿'), + ('㇯', '㇯'), + ('𖿠', '𖿠'), + ('𗀀', '𘟷'), + ('𘠀', '𘫿'), + ('𘴀', '𘴈'), +]; pub const TELUGU: &'static [(char, char)] = &[ ('\u{951}', '\u{952}'), @@ -1389,7 +1606,14 @@ pub const THAANA: &'static [(char, char)] = &[ ('﷽', '﷽'), ]; -pub const THAI: &'static [(char, char)] = &[('ก', '\u{e3a}'), ('เ', '๛')]; +pub const THAI: &'static [(char, char)] = &[ + ('ʼ', 'ʼ'), + ('˗', '˗'), + ('\u{303}', '\u{303}'), + ('\u{331}', '\u{331}'), + ('ก', '\u{e3a}'), + ('เ', '๛'), +]; pub const TIBETAN: &'static [(char, char)] = &[ ('ༀ', 'ཇ'), @@ -1399,10 +1623,18 @@ pub const TIBETAN: &'static [(char, char)] = &[ ('྾', '࿌'), ('࿎', '࿔'), ('࿙', '࿚'), + ('〈', '》'), ]; -pub const TIFINAGH: &'static [(char, char)] = - &[('ⴰ', 'ⵧ'), ('ⵯ', '⵰'), ('\u{2d7f}', '\u{2d7f}')]; +pub const TIFINAGH: &'static [(char, char)] = &[ + ('\u{302}', '\u{302}'), + ('\u{304}', '\u{304}'), + ('\u{307}', '\u{307}'), + ('\u{309}', '\u{309}'), + ('ⴰ', 'ⵧ'), + ('ⵯ', '⵰'), + ('\u{2d7f}', '\u{2d7f}'), +]; pub const TIRHUTA: &'static [(char, char)] = &[ ('\u{951}', '\u{952}'), @@ -1413,7 +1645,36 @@ pub const TIRHUTA: &'static [(char, char)] = &[ ('𑓐', '𑓙'), ]; -pub const TOTO: &'static [(char, char)] = &[('𞊐', '\u{1e2ae}')]; +pub const TODHRI: &'static [(char, char)] = &[ + ('\u{301}', '\u{301}'), + ('\u{304}', '\u{304}'), + ('\u{307}', '\u{307}'), + ('\u{311}', '\u{311}'), + ('\u{313}', '\u{313}'), + ('\u{35e}', '\u{35e}'), + ('𐗀', '𐗳'), +]; + +pub const TOTO: &'static [(char, char)] = &[('ʼ', 'ʼ'), ('𞊐', '\u{1e2ae}')]; + +pub const TULU_TIGALARI: &'static [(char, char)] = &[ + ('೦', '೯'), + ('ᳲ', 'ᳲ'), + ('\u{1cf4}', '\u{1cf4}'), + ('꠰', '꠵'), + ('\u{a8f1}', '\u{a8f1}'), + ('𑎀', '𑎉'), + ('𑎋', '𑎋'), + ('𑎎', '𑎎'), + ('𑎐', '𑎵'), + ('𑎷', '\u{113c0}'), + ('\u{113c2}', '\u{113c2}'), + ('\u{113c5}', '\u{113c5}'), + ('\u{113c7}', '𑏊'), + ('𑏌', '𑏕'), + ('𑏗', '𑏘'), + ('\u{113e1}', '\u{113e2}'), +]; pub const UGARITIC: &'static [(char, char)] = &[('𐎀', '𐎝'), ('𐎟', '𐎟')]; diff --git a/regex-syntax/src/unicode_tables/sentence_break.rs b/regex-syntax/src/unicode_tables/sentence_break.rs index 24348736f..af1c5bea9 100644 --- a/regex-syntax/src/unicode_tables/sentence_break.rs +++ b/regex-syntax/src/unicode_tables/sentence_break.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate sentence-break ucd-15.0.0 --chars +// ucd-generate sentence-break ucd-16.0.0 --chars // -// Unicode version: 15.0.0. +// Unicode version: 16.0.0. // -// ucd-generate 0.2.14 is available on crates.io. +// ucd-generate 0.3.1 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("ATerm", ATERM), @@ -101,7 +101,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{825}', '\u{827}'), ('\u{829}', '\u{82d}'), ('\u{859}', '\u{85b}'), - ('\u{898}', '\u{89f}'), + ('\u{897}', '\u{89f}'), ('\u{8ca}', '\u{8e1}'), ('\u{8e3}', 'ः'), ('\u{93a}', '\u{93c}'), @@ -153,8 +153,8 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{c81}', 'ಃ'), ('\u{cbc}', '\u{cbc}'), ('ಾ', 'ೄ'), - ('\u{cc6}', 'ೈ'), - ('ೊ', '\u{ccd}'), + ('\u{cc6}', '\u{cc8}'), + ('\u{cca}', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), ('\u{ce2}', '\u{ce3}'), ('ೳ', 'ೳ'), @@ -197,8 +197,8 @@ pub const EXTEND: &'static [(char, char)] = &[ ('ႏ', 'ႏ'), ('ႚ', '\u{109d}'), ('\u{135d}', '\u{135f}'), - ('\u{1712}', '᜕'), - ('\u{1732}', '᜴'), + ('\u{1712}', '\u{1715}'), + ('\u{1732}', '\u{1734}'), ('\u{1752}', '\u{1753}'), ('\u{1772}', '\u{1773}'), ('\u{17b4}', '\u{17d3}'), @@ -215,11 +215,11 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{1a7f}', '\u{1a7f}'), ('\u{1ab0}', '\u{1ace}'), ('\u{1b00}', 'ᬄ'), - ('\u{1b34}', '᭄'), + ('\u{1b34}', '\u{1b44}'), ('\u{1b6b}', '\u{1b73}'), ('\u{1b80}', 'ᮂ'), ('ᮡ', '\u{1bad}'), - ('\u{1be6}', '᯳'), + ('\u{1be6}', '\u{1bf3}'), ('ᰤ', '\u{1c37}'), ('\u{1cd0}', '\u{1cd2}'), ('\u{1cd4}', '\u{1ce8}'), @@ -248,9 +248,9 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{a8e0}', '\u{a8f1}'), ('\u{a8ff}', '\u{a8ff}'), ('\u{a926}', '\u{a92d}'), - ('\u{a947}', '꥓'), + ('\u{a947}', '\u{a953}'), ('\u{a980}', 'ꦃ'), - ('\u{a9b3}', '꧀'), + ('\u{a9b3}', '\u{a9c0}'), ('\u{a9e5}', '\u{a9e5}'), ('\u{aa29}', '\u{aa36}'), ('\u{aa43}', '\u{aa43}'), @@ -279,8 +279,9 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{10a3f}', '\u{10a3f}'), ('\u{10ae5}', '\u{10ae6}'), ('\u{10d24}', '\u{10d27}'), + ('\u{10d69}', '\u{10d6d}'), ('\u{10eab}', '\u{10eac}'), - ('\u{10efd}', '\u{10eff}'), + ('\u{10efc}', '\u{10eff}'), ('\u{10f46}', '\u{10f50}'), ('\u{10f82}', '\u{10f85}'), ('𑀀', '𑀂'), @@ -295,7 +296,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('𑅅', '𑅆'), ('\u{11173}', '\u{11173}'), ('\u{11180}', '𑆂'), - ('𑆳', '𑇀'), + ('𑆳', '\u{111c0}'), ('\u{111c9}', '\u{111cc}'), ('𑇎', '\u{111cf}'), ('𑈬', '\u{11237}'), @@ -306,11 +307,18 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{1133b}', '\u{1133c}'), ('\u{1133e}', '𑍄'), ('𑍇', '𑍈'), - ('𑍋', '𑍍'), + ('𑍋', '\u{1134d}'), ('\u{11357}', '\u{11357}'), ('𑍢', '𑍣'), ('\u{11366}', '\u{1136c}'), ('\u{11370}', '\u{11374}'), + ('\u{113b8}', '\u{113c0}'), + ('\u{113c2}', '\u{113c2}'), + ('\u{113c5}', '\u{113c5}'), + ('\u{113c7}', '𑏊'), + ('𑏌', '\u{113d0}'), + ('\u{113d2}', '\u{113d2}'), + ('\u{113e1}', '\u{113e2}'), ('𑐵', '\u{11446}'), ('\u{1145e}', '\u{1145e}'), ('\u{114b0}', '\u{114c3}'), @@ -352,20 +360,22 @@ pub const EXTEND: &'static [(char, char)] = &[ ('𑼃', '𑼃'), ('𑼴', '\u{11f3a}'), ('𑼾', '\u{11f42}'), + ('\u{11f5a}', '\u{11f5a}'), ('\u{13440}', '\u{13440}'), ('\u{13447}', '\u{13455}'), + ('\u{1611e}', '\u{1612f}'), ('\u{16af0}', '\u{16af4}'), ('\u{16b30}', '\u{16b36}'), ('\u{16f4f}', '\u{16f4f}'), ('𖽑', '𖾇'), ('\u{16f8f}', '\u{16f92}'), ('\u{16fe4}', '\u{16fe4}'), - ('𖿰', '𖿱'), + ('\u{16ff0}', '\u{16ff1}'), ('\u{1bc9d}', '\u{1bc9e}'), ('\u{1cf00}', '\u{1cf2d}'), ('\u{1cf30}', '\u{1cf46}'), ('\u{1d165}', '\u{1d169}'), - ('𝅭', '\u{1d172}'), + ('\u{1d16d}', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), ('\u{1d1aa}', '\u{1d1ad}'), @@ -386,6 +396,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{1e2ae}', '\u{1e2ae}'), ('\u{1e2ec}', '\u{1e2ef}'), ('\u{1e4ec}', '\u{1e4ef}'), + ('\u{1e5ee}', '\u{1e5ef}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e944}', '\u{1e94a}'), ('\u{e0020}', '\u{e007f}'), @@ -394,12 +405,8 @@ pub const EXTEND: &'static [(char, char)] = &[ pub const FORMAT: &'static [(char, char)] = &[ ('\u{ad}', '\u{ad}'), - ('\u{600}', '\u{605}'), ('\u{61c}', '\u{61c}'), - ('\u{6dd}', '\u{6dd}'), ('\u{70f}', '\u{70f}'), - ('\u{890}', '\u{891}'), - ('\u{8e2}', '\u{8e2}'), ('\u{180e}', '\u{180e}'), ('\u{200b}', '\u{200b}'), ('\u{200e}', '\u{200f}'), @@ -408,8 +415,6 @@ pub const FORMAT: &'static [(char, char)] = &[ ('\u{2066}', '\u{206f}'), ('\u{feff}', '\u{feff}'), ('\u{fff9}', '\u{fffb}'), - ('\u{110bd}', '\u{110bd}'), - ('\u{110cd}', '\u{110cd}'), ('\u{13430}', '\u{1343f}'), ('\u{1bca0}', '\u{1bca3}'), ('\u{1d173}', '\u{1d17a}'), @@ -696,6 +701,7 @@ pub const LOWER: &'static [(char, char)] = &[ ('ჼ', 'ჼ'), ('ᏸ', 'ᏽ'), ('ᲀ', 'ᲈ'), + ('ᲊ', 'ᲊ'), ('ᴀ', 'ᶿ'), ('ḁ', 'ḁ'), ('ḃ', 'ḃ'), @@ -1028,11 +1034,13 @@ pub const LOWER: &'static [(char, char)] = &[ ('ꟃ', 'ꟃ'), ('ꟈ', 'ꟈ'), ('ꟊ', 'ꟊ'), + ('ꟍ', 'ꟍ'), ('ꟑ', 'ꟑ'), ('ꟓ', 'ꟓ'), ('ꟕ', 'ꟕ'), ('ꟗ', 'ꟗ'), ('ꟙ', 'ꟙ'), + ('ꟛ', 'ꟛ'), ('ꟲ', 'ꟴ'), ('ꟶ', 'ꟶ'), ('ꟸ', 'ꟺ'), @@ -1053,6 +1061,7 @@ pub const LOWER: &'static [(char, char)] = &[ ('𐞇', '𐞰'), ('𐞲', '𐞺'), ('𐳀', '𐳲'), + ('𐵰', '𐶅'), ('𑣀', '𑣟'), ('𖹠', '𖹿'), ('𝐚', '𝐳'), @@ -1092,10 +1101,14 @@ pub const LOWER: &'static [(char, char)] = &[ pub const NUMERIC: &'static [(char, char)] = &[ ('0', '9'), + ('\u{600}', '\u{605}'), ('٠', '٩'), ('٫', '٬'), + ('\u{6dd}', '\u{6dd}'), ('۰', '۹'), ('߀', '߉'), + ('\u{890}', '\u{891}'), + ('\u{8e2}', '\u{8e2}'), ('०', '९'), ('০', '৯'), ('੦', '੯'), @@ -1114,7 +1127,7 @@ pub const NUMERIC: &'static [(char, char)] = &[ ('០', '៩'), ('᠐', '᠙'), ('᥆', '᥏'), - ('᧐', '᧙'), + ('᧐', '᧚'), ('᪀', '᪉'), ('᪐', '᪙'), ('᭐', '᭙'), @@ -1131,7 +1144,10 @@ pub const NUMERIC: &'static [(char, char)] = &[ ('0', '9'), ('𐒠', '𐒩'), ('𐴰', '𐴹'), + ('𐵀', '𐵉'), ('𑁦', '𑁯'), + ('\u{110bd}', '\u{110bd}'), + ('\u{110cd}', '\u{110cd}'), ('𑃰', '𑃹'), ('𑄶', '𑄿'), ('𑇐', '𑇙'), @@ -1140,20 +1156,26 @@ pub const NUMERIC: &'static [(char, char)] = &[ ('𑓐', '𑓙'), ('𑙐', '𑙙'), ('𑛀', '𑛉'), + ('𑛐', '𑛣'), ('𑜰', '𑜹'), ('𑣠', '𑣩'), ('𑥐', '𑥙'), + ('𑯰', '𑯹'), ('𑱐', '𑱙'), ('𑵐', '𑵙'), ('𑶠', '𑶩'), ('𑽐', '𑽙'), + ('𖄰', '𖄹'), ('𖩠', '𖩩'), ('𖫀', '𖫉'), ('𖭐', '𖭙'), + ('𖵰', '𖵹'), + ('𜳰', '𜳹'), ('𝟎', '𝟿'), ('𞅀', '𞅉'), ('𞋰', '𞋹'), ('𞓰', '𞓹'), + ('𞗱', '𞗺'), ('𞥐', '𞥙'), ('🯰', '🯹'), ]; @@ -1490,6 +1512,7 @@ pub const OLETTER: &'static [(char, char)] = &[ ('𐑐', '𐒝'), ('𐔀', '𐔧'), ('𐔰', '𐕣'), + ('𐗀', '𐗳'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), @@ -1522,8 +1545,11 @@ pub const OLETTER: &'static [(char, char)] = &[ ('𐮀', '𐮑'), ('𐰀', '𐱈'), ('𐴀', '𐴣'), + ('𐵊', '𐵏'), + ('𐵯', '𐵯'), ('𐺀', '𐺩'), ('𐺰', '𐺱'), + ('𐻂', '𐻄'), ('𐼀', '𐼜'), ('𐼧', '𐼧'), ('𐼰', '𐽅'), @@ -1562,6 +1588,13 @@ pub const OLETTER: &'static [(char, char)] = &[ ('𑌽', '𑌽'), ('𑍐', '𑍐'), ('𑍝', '𑍡'), + ('𑎀', '𑎉'), + ('𑎋', '𑎋'), + ('𑎎', '𑎎'), + ('𑎐', '𑎵'), + ('𑎷', '𑎷'), + ('𑏑', '𑏑'), + ('𑏓', '𑏓'), ('𑐀', '𑐴'), ('𑑇', '𑑊'), ('𑑟', '𑑡'), @@ -1595,6 +1628,7 @@ pub const OLETTER: &'static [(char, char)] = &[ ('𑩜', '𑪉'), ('𑪝', '𑪝'), ('𑪰', '𑫸'), + ('𑯀', '𑯠'), ('𑰀', '𑰈'), ('𑰊', '𑰮'), ('𑱀', '𑱀'), @@ -1618,7 +1652,9 @@ pub const OLETTER: &'static [(char, char)] = &[ ('𒾐', '𒿰'), ('𓀀', '𓐯'), ('𓑁', '𓑆'), + ('𓑠', '𔏺'), ('𔐀', '𔙆'), + ('𖄀', '𖄝'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖩰', '𖪾'), @@ -1627,6 +1663,7 @@ pub const OLETTER: &'static [(char, char)] = &[ ('𖭀', '𖭃'), ('𖭣', '𖭷'), ('𖭽', '𖮏'), + ('𖵀', '𖵬'), ('𖼀', '𖽊'), ('𖽐', '𖽐'), ('𖾓', '𖾟'), @@ -1634,7 +1671,7 @@ pub const OLETTER: &'static [(char, char)] = &[ ('𖿣', '𖿣'), ('𗀀', '𘟷'), ('𘠀', '𘳕'), - ('𘴀', '𘴈'), + ('𘳿', '𘴈'), ('𚿰', '𚿳'), ('𚿵', '𚿻'), ('𚿽', '𚿾'), @@ -1655,6 +1692,8 @@ pub const OLETTER: &'static [(char, char)] = &[ ('𞊐', '𞊭'), ('𞋀', '𞋫'), ('𞓐', '𞓫'), + ('𞗐', '𞗭'), + ('𞗰', '𞗰'), ('𞟠', '𞟦'), ('𞟨', '𞟫'), ('𞟭', '𞟮'), @@ -1699,6 +1738,7 @@ pub const OLETTER: &'static [(char, char)] = &[ ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), + ('𮯰', '𮹝'), ('丽', '𪘀'), ('𰀀', '𱍊'), ('𱍐', '𲎯'), @@ -1706,7 +1746,8 @@ pub const OLETTER: &'static [(char, char)] = &[ pub const SCONTINUE: &'static [(char, char)] = &[ (',', '-'), - (':', ':'), + (':', ';'), + (';', ';'), ('՝', '՝'), ('،', '؍'), ('߸', '߸'), @@ -1715,14 +1756,14 @@ pub const SCONTINUE: &'static [(char, char)] = &[ ('–', '—'), ('、', '、'), ('︐', '︑'), - ('︓', '︓'), + ('︓', '︔'), ('︱', '︲'), ('﹐', '﹑'), - ('﹕', '﹕'), + ('﹔', '﹕'), ('﹘', '﹘'), ('﹣', '﹣'), (',', '-'), - (':', ':'), + (':', ';'), ('、', '、'), ]; @@ -1743,17 +1784,20 @@ pub const STERM: &'static [(char, char)] = &[ ('፧', '፨'), ('᙮', '᙮'), ('᜵', '᜶'), + ('។', '៕'), ('᠃', '᠃'), ('᠉', '᠉'), ('᥄', '᥅'), ('᪨', '᪫'), + ('᭎', '᭏'), ('᭚', '᭛'), ('᭞', '᭟'), - ('᭽', '᭾'), + ('᭽', '᭿'), ('᰻', '᰼'), ('᱾', '᱿'), ('‼', '‽'), ('⁇', '⁉'), + ('⳹', '⳻'), ('⸮', '⸮'), ('⸼', '⸼'), ('⹓', '⹔'), @@ -1769,6 +1813,8 @@ pub const STERM: &'static [(char, char)] = &[ ('꩝', '꩟'), ('꫰', '꫱'), ('꯫', '꯫'), + ('︒', '︒'), + ('︕', '︖'), ('﹖', '﹗'), ('!', '!'), ('?', '?'), @@ -1785,6 +1831,7 @@ pub const STERM: &'static [(char, char)] = &[ ('𑈸', '𑈹'), ('𑈻', '𑈼'), ('𑊩', '𑊩'), + ('𑏔', '𑏕'), ('𑑋', '𑑌'), ('𑗂', '𑗃'), ('𑗉', '𑗗'), @@ -1801,6 +1848,7 @@ pub const STERM: &'static [(char, char)] = &[ ('𖫵', '𖫵'), ('𖬷', '𖬸'), ('𖭄', '𖭄'), + ('𖵮', '𖵯'), ('𖺘', '𖺘'), ('𛲟', '𛲟'), ('𝪈', '𝪈'), @@ -2098,6 +2146,7 @@ pub const UPPER: &'static [(char, char)] = &[ ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'), ('Ꭰ', 'Ᏽ'), + ('Ᲊ', 'Ᲊ'), ('Ḁ', 'Ḁ'), ('Ḃ', 'Ḃ'), ('Ḅ', 'Ḅ'), @@ -2425,9 +2474,12 @@ pub const UPPER: &'static [(char, char)] = &[ ('Ꟃ', 'Ꟃ'), ('Ꞔ', 'Ꟈ'), ('Ꟊ', 'Ꟊ'), + ('Ɤ', 'Ꟍ'), ('Ꟑ', 'Ꟑ'), ('Ꟗ', 'Ꟗ'), ('Ꟙ', 'Ꟙ'), + ('Ꟛ', 'Ꟛ'), + ('Ƛ', 'Ƛ'), ('Ꟶ', 'Ꟶ'), ('A', 'Z'), ('𐐀', '𐐧'), @@ -2437,6 +2489,7 @@ pub const UPPER: &'static [(char, char)] = &[ ('𐖌', '𐖒'), ('𐖔', '𐖕'), ('𐲀', '𐲲'), + ('𐵐', '𐵥'), ('𑢠', '𑢿'), ('𖹀', '𖹟'), ('𝐀', '𝐙'), diff --git a/regex-syntax/src/unicode_tables/word_break.rs b/regex-syntax/src/unicode_tables/word_break.rs index c0714956f..b764d34ac 100644 --- a/regex-syntax/src/unicode_tables/word_break.rs +++ b/regex-syntax/src/unicode_tables/word_break.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate word-break ucd-15.0.0 --chars +// ucd-generate word-break ucd-16.0.0 --chars // -// Unicode version: 15.0.0. +// Unicode version: 16.0.0. // -// ucd-generate 0.2.14 is available on crates.io. +// ucd-generate 0.3.1 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("ALetter", ALETTER), @@ -62,7 +62,7 @@ pub const ALETTER: &'static [(char, char)] = &[ ('ۮ', 'ۯ'), ('ۺ', 'ۼ'), ('ۿ', 'ۿ'), - ('ܐ', 'ܐ'), + ('\u{70f}', 'ܐ'), ('ܒ', 'ܯ'), ('ݍ', 'ޥ'), ('ޱ', 'ޱ'), @@ -219,7 +219,7 @@ pub const ALETTER: &'static [(char, char)] = &[ ('ᰀ', 'ᰣ'), ('ᱍ', 'ᱏ'), ('ᱚ', 'ᱽ'), - ('ᲀ', 'ᲈ'), + ('ᲀ', 'ᲊ'), ('Ა', 'Ჺ'), ('Ჽ', 'Ჿ'), ('ᳩ', 'ᳬ'), @@ -295,10 +295,10 @@ pub const ALETTER: &'static [(char, char)] = &[ ('Ꙁ', 'ꙮ'), ('ꙿ', 'ꚝ'), ('ꚠ', 'ꛯ'), - ('꜈', 'ꟊ'), + ('꜈', 'ꟍ'), ('Ꟑ', 'ꟑ'), ('ꟓ', 'ꟓ'), - ('ꟕ', 'ꟙ'), + ('ꟕ', 'Ƛ'), ('ꟲ', 'ꠁ'), ('ꠃ', 'ꠅ'), ('ꠇ', 'ꠊ'), @@ -374,6 +374,7 @@ pub const ALETTER: &'static [(char, char)] = &[ ('𐖣', '𐖱'), ('𐖳', '𐖹'), ('𐖻', '𐖼'), + ('𐗀', '𐗳'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), @@ -410,8 +411,11 @@ pub const ALETTER: &'static [(char, char)] = &[ ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𐴀', '𐴣'), + ('𐵊', '𐵥'), + ('𐵯', '𐶅'), ('𐺀', '𐺩'), ('𐺰', '𐺱'), + ('𐻂', '𐻄'), ('𐼀', '𐼜'), ('𐼧', '𐼧'), ('𐼰', '𐽅'), @@ -450,6 +454,13 @@ pub const ALETTER: &'static [(char, char)] = &[ ('𑌽', '𑌽'), ('𑍐', '𑍐'), ('𑍝', '𑍡'), + ('𑎀', '𑎉'), + ('𑎋', '𑎋'), + ('𑎎', '𑎎'), + ('𑎐', '𑎵'), + ('𑎷', '𑎷'), + ('𑏑', '𑏑'), + ('𑏓', '𑏓'), ('𑐀', '𑐴'), ('𑑇', '𑑊'), ('𑑟', '𑑡'), @@ -482,6 +493,7 @@ pub const ALETTER: &'static [(char, char)] = &[ ('𑩜', '𑪉'), ('𑪝', '𑪝'), ('𑪰', '𑫸'), + ('𑯀', '𑯠'), ('𑰀', '𑰈'), ('𑰊', '𑰮'), ('𑱀', '𑱀'), @@ -505,7 +517,9 @@ pub const ALETTER: &'static [(char, char)] = &[ ('𒾐', '𒿰'), ('𓀀', '𓐯'), ('𓑁', '𓑆'), + ('𓑠', '𔏺'), ('𔐀', '𔙆'), + ('𖄀', '𖄝'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖩰', '𖪾'), @@ -514,6 +528,7 @@ pub const ALETTER: &'static [(char, char)] = &[ ('𖭀', '𖭃'), ('𖭣', '𖭷'), ('𖭽', '𖮏'), + ('𖵀', '𖵬'), ('𖹀', '𖹿'), ('𖼀', '𖽊'), ('𖽐', '𖽐'), @@ -563,6 +578,8 @@ pub const ALETTER: &'static [(char, char)] = &[ ('𞊐', '𞊭'), ('𞋀', '𞋫'), ('𞓐', '𞓫'), + ('𞗐', '𞗭'), + ('𞗰', '𞗰'), ('𞟠', '𞟦'), ('𞟨', '𞟫'), ('𞟭', '𞟮'), @@ -637,7 +654,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{825}', '\u{827}'), ('\u{829}', '\u{82d}'), ('\u{859}', '\u{85b}'), - ('\u{898}', '\u{89f}'), + ('\u{897}', '\u{89f}'), ('\u{8ca}', '\u{8e1}'), ('\u{8e3}', 'ः'), ('\u{93a}', '\u{93c}'), @@ -689,8 +706,8 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{c81}', 'ಃ'), ('\u{cbc}', '\u{cbc}'), ('ಾ', 'ೄ'), - ('\u{cc6}', 'ೈ'), - ('ೊ', '\u{ccd}'), + ('\u{cc6}', '\u{cc8}'), + ('\u{cca}', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), ('\u{ce2}', '\u{ce3}'), ('ೳ', 'ೳ'), @@ -733,8 +750,8 @@ pub const EXTEND: &'static [(char, char)] = &[ ('ႏ', 'ႏ'), ('ႚ', '\u{109d}'), ('\u{135d}', '\u{135f}'), - ('\u{1712}', '᜕'), - ('\u{1732}', '᜴'), + ('\u{1712}', '\u{1715}'), + ('\u{1732}', '\u{1734}'), ('\u{1752}', '\u{1753}'), ('\u{1772}', '\u{1773}'), ('\u{17b4}', '\u{17d3}'), @@ -751,11 +768,11 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{1a7f}', '\u{1a7f}'), ('\u{1ab0}', '\u{1ace}'), ('\u{1b00}', 'ᬄ'), - ('\u{1b34}', '᭄'), + ('\u{1b34}', '\u{1b44}'), ('\u{1b6b}', '\u{1b73}'), ('\u{1b80}', 'ᮂ'), ('ᮡ', '\u{1bad}'), - ('\u{1be6}', '᯳'), + ('\u{1be6}', '\u{1bf3}'), ('ᰤ', '\u{1c37}'), ('\u{1cd0}', '\u{1cd2}'), ('\u{1cd4}', '\u{1ce8}'), @@ -784,9 +801,9 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{a8e0}', '\u{a8f1}'), ('\u{a8ff}', '\u{a8ff}'), ('\u{a926}', '\u{a92d}'), - ('\u{a947}', '꥓'), + ('\u{a947}', '\u{a953}'), ('\u{a980}', 'ꦃ'), - ('\u{a9b3}', '꧀'), + ('\u{a9b3}', '\u{a9c0}'), ('\u{a9e5}', '\u{a9e5}'), ('\u{aa29}', '\u{aa36}'), ('\u{aa43}', '\u{aa43}'), @@ -815,8 +832,9 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{10a3f}', '\u{10a3f}'), ('\u{10ae5}', '\u{10ae6}'), ('\u{10d24}', '\u{10d27}'), + ('\u{10d69}', '\u{10d6d}'), ('\u{10eab}', '\u{10eac}'), - ('\u{10efd}', '\u{10eff}'), + ('\u{10efc}', '\u{10eff}'), ('\u{10f46}', '\u{10f50}'), ('\u{10f82}', '\u{10f85}'), ('𑀀', '𑀂'), @@ -831,7 +849,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('𑅅', '𑅆'), ('\u{11173}', '\u{11173}'), ('\u{11180}', '𑆂'), - ('𑆳', '𑇀'), + ('𑆳', '\u{111c0}'), ('\u{111c9}', '\u{111cc}'), ('𑇎', '\u{111cf}'), ('𑈬', '\u{11237}'), @@ -842,11 +860,18 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{1133b}', '\u{1133c}'), ('\u{1133e}', '𑍄'), ('𑍇', '𑍈'), - ('𑍋', '𑍍'), + ('𑍋', '\u{1134d}'), ('\u{11357}', '\u{11357}'), ('𑍢', '𑍣'), ('\u{11366}', '\u{1136c}'), ('\u{11370}', '\u{11374}'), + ('\u{113b8}', '\u{113c0}'), + ('\u{113c2}', '\u{113c2}'), + ('\u{113c5}', '\u{113c5}'), + ('\u{113c7}', '𑏊'), + ('𑏌', '\u{113d0}'), + ('\u{113d2}', '\u{113d2}'), + ('\u{113e1}', '\u{113e2}'), ('𑐵', '\u{11446}'), ('\u{1145e}', '\u{1145e}'), ('\u{114b0}', '\u{114c3}'), @@ -888,20 +913,22 @@ pub const EXTEND: &'static [(char, char)] = &[ ('𑼃', '𑼃'), ('𑼴', '\u{11f3a}'), ('𑼾', '\u{11f42}'), + ('\u{11f5a}', '\u{11f5a}'), ('\u{13440}', '\u{13440}'), ('\u{13447}', '\u{13455}'), + ('\u{1611e}', '\u{1612f}'), ('\u{16af0}', '\u{16af4}'), ('\u{16b30}', '\u{16b36}'), ('\u{16f4f}', '\u{16f4f}'), ('𖽑', '𖾇'), ('\u{16f8f}', '\u{16f92}'), ('\u{16fe4}', '\u{16fe4}'), - ('𖿰', '𖿱'), + ('\u{16ff0}', '\u{16ff1}'), ('\u{1bc9d}', '\u{1bc9e}'), ('\u{1cf00}', '\u{1cf2d}'), ('\u{1cf30}', '\u{1cf46}'), ('\u{1d165}', '\u{1d169}'), - ('𝅭', '\u{1d172}'), + ('\u{1d16d}', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), ('\u{1d1aa}', '\u{1d1ad}'), @@ -922,6 +949,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{1e2ae}', '\u{1e2ae}'), ('\u{1e2ec}', '\u{1e2ef}'), ('\u{1e4ec}', '\u{1e4ef}'), + ('\u{1e5ee}', '\u{1e5ef}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e944}', '\u{1e94a}'), ('🏻', '🏿'), @@ -941,12 +969,7 @@ pub const EXTENDNUMLET: &'static [(char, char)] = &[ pub const FORMAT: &'static [(char, char)] = &[ ('\u{ad}', '\u{ad}'), - ('\u{600}', '\u{605}'), ('\u{61c}', '\u{61c}'), - ('\u{6dd}', '\u{6dd}'), - ('\u{70f}', '\u{70f}'), - ('\u{890}', '\u{891}'), - ('\u{8e2}', '\u{8e2}'), ('\u{180e}', '\u{180e}'), ('\u{200e}', '\u{200f}'), ('\u{202a}', '\u{202e}'), @@ -954,8 +977,6 @@ pub const FORMAT: &'static [(char, char)] = &[ ('\u{2066}', '\u{206f}'), ('\u{feff}', '\u{feff}'), ('\u{fff9}', '\u{fffb}'), - ('\u{110bd}', '\u{110bd}'), - ('\u{110cd}', '\u{110cd}'), ('\u{13430}', '\u{1343f}'), ('\u{1bca0}', '\u{1bca3}'), ('\u{1d173}', '\u{1d17a}'), @@ -1016,8 +1037,6 @@ pub const MIDNUM: &'static [(char, char)] = &[ ('٬', '٬'), ('߸', '߸'), ('⁄', '⁄'), - ('︐', '︐'), - ('︔', '︔'), ('﹐', '﹐'), ('﹔', '﹔'), (',', ','), @@ -1038,10 +1057,14 @@ pub const NEWLINE: &'static [(char, char)] = pub const NUMERIC: &'static [(char, char)] = &[ ('0', '9'), + ('\u{600}', '\u{605}'), ('٠', '٩'), ('٫', '٫'), + ('\u{6dd}', '\u{6dd}'), ('۰', '۹'), ('߀', '߉'), + ('\u{890}', '\u{891}'), + ('\u{8e2}', '\u{8e2}'), ('०', '९'), ('০', '৯'), ('੦', '੯'), @@ -1060,7 +1083,7 @@ pub const NUMERIC: &'static [(char, char)] = &[ ('០', '៩'), ('᠐', '᠙'), ('᥆', '᥏'), - ('᧐', '᧙'), + ('᧐', '᧚'), ('᪀', '᪉'), ('᪐', '᪙'), ('᭐', '᭙'), @@ -1077,7 +1100,10 @@ pub const NUMERIC: &'static [(char, char)] = &[ ('0', '9'), ('𐒠', '𐒩'), ('𐴰', '𐴹'), + ('𐵀', '𐵉'), ('𑁦', '𑁯'), + ('\u{110bd}', '\u{110bd}'), + ('\u{110cd}', '\u{110cd}'), ('𑃰', '𑃹'), ('𑄶', '𑄿'), ('𑇐', '𑇙'), @@ -1086,20 +1112,26 @@ pub const NUMERIC: &'static [(char, char)] = &[ ('𑓐', '𑓙'), ('𑙐', '𑙙'), ('𑛀', '𑛉'), + ('𑛐', '𑛣'), ('𑜰', '𑜹'), ('𑣠', '𑣩'), ('𑥐', '𑥙'), + ('𑯰', '𑯹'), ('𑱐', '𑱙'), ('𑵐', '𑵙'), ('𑶠', '𑶩'), ('𑽐', '𑽙'), + ('𖄰', '𖄹'), ('𖩠', '𖩩'), ('𖫀', '𖫉'), ('𖭐', '𖭙'), + ('𖵰', '𖵹'), + ('𜳰', '𜳹'), ('𝟎', '𝟿'), ('𞅀', '𞅉'), ('𞋰', '𞋹'), ('𞓰', '𞓹'), + ('𞗱', '𞗺'), ('𞥐', '𞥙'), ('🯰', '🯹'), ];