From 427cc2b9fd820df22375bd3de68a8221041c1452 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= Date: Mon, 30 Jan 2023 17:42:57 -0800 Subject: [PATCH] upgrade to Unicode 15 --- maint/Unicode.tables/BidiMirroring.txt | 8 +- maint/Unicode.tables/CaseFolding.txt | 10 +- maint/Unicode.tables/DerivedBidiClass.txt | 324 +- .../Unicode.tables/DerivedCoreProperties.txt | 253 +- .../Unicode.tables/DerivedGeneralCategory.txt | 157 +- .../Unicode.tables/GraphemeBreakProperty.txt | 38 +- maint/Unicode.tables/PropList.txt | 56 +- maint/Unicode.tables/PropertyAliases.txt | 10 +- maint/Unicode.tables/PropertyValueAliases.txt | 24 +- maint/Unicode.tables/ScriptExtensions.txt | 10 +- maint/Unicode.tables/Scripts.txt | 106 +- maint/Unicode.tables/UnicodeData.txt | 300 +- maint/Unicode.tables/emoji-data.txt | 85 +- src/pcre2_ucd.c | 6563 +++++++++-------- src/pcre2_ucp.h | 2 + src/pcre2_ucptables.c | 529 +- testdata/testinput26 | 48 +- testdata/testoutput26 | 68 +- testdata/testoutput4 | 2 +- testdata/testoutput7 | 2 +- 20 files changed, 4652 insertions(+), 3943 deletions(-) diff --git a/maint/Unicode.tables/BidiMirroring.txt b/maint/Unicode.tables/BidiMirroring.txt index bd8e2c5d0..5861d6e7f 100644 --- a/maint/Unicode.tables/BidiMirroring.txt +++ b/maint/Unicode.tables/BidiMirroring.txt @@ -1,6 +1,6 @@ -# BidiMirroring-14.0.0.txt -# Date: 2021-08-08, 22:55:00 GMT [KW, RP] -# © 2021 Unicode®, Inc. +# BidiMirroring-15.0.0.txt +# Date: 2022-05-03, 18:47:00 GMT [KW, RP] +# © 2022 Unicode®, Inc. # For terms of use, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database @@ -15,7 +15,7 @@ # value, for which there is another Unicode character that typically has a glyph # that is the mirror image of the original character's glyph. # -# The repertoire covered by the file is Unicode 14.0.0. +# The repertoire covered by the file is Unicode 15.0.0. # # The file contains a list of lines with mappings from one code point # to another one for character-based mirroring. diff --git a/maint/Unicode.tables/CaseFolding.txt b/maint/Unicode.tables/CaseFolding.txt index 932ace29e..65aa0fcd6 100644 --- a/maint/Unicode.tables/CaseFolding.txt +++ b/maint/Unicode.tables/CaseFolding.txt @@ -1,11 +1,11 @@ -# CaseFolding-14.0.0.txt -# Date: 2021-03-08, 19:35:41 GMT -# © 2021 Unicode®, Inc. +# CaseFolding-15.0.0.txt +# Date: 2022-02-02, 23:35:35 GMT +# © 2022 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see http://www.unicode.org/terms_of_use.html +# For terms of use, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database -# For documentation, see http://www.unicode.org/reports/tr44/ +# For documentation, see https://www.unicode.org/reports/tr44/ # # Case Folding Properties # diff --git a/maint/Unicode.tables/DerivedBidiClass.txt b/maint/Unicode.tables/DerivedBidiClass.txt index 4012dc25d..55b30a66c 100644 --- a/maint/Unicode.tables/DerivedBidiClass.txt +++ b/maint/Unicode.tables/DerivedBidiClass.txt @@ -1,36 +1,19 @@ -# DerivedBidiClass-14.0.0.txt -# Date: 2021-07-10, 00:35:02 GMT -# © 2021 Unicode®, Inc. +# DerivedBidiClass-15.0.0.txt +# Date: 2022-08-05, 17:39:24 GMT +# © 2022 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see http://www.unicode.org/terms_of_use.html +# For terms of use, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database -# For documentation, see http://www.unicode.org/reports/tr44/ +# For documentation, see https://www.unicode.org/reports/tr44/ # ================================================ # Bidi Class (listing UnicodeData.txt, field 4: see UAX #44: https://www.unicode.org/reports/tr44/) # Unlike other properties, unassigned code points in blocks -# reserved for right-to-left scripts are given either types R or AL. -# -# The unassigned code points that default to AL are in the ranges: -# [\u0600-\u07BF \u0860-\u08FF \uFB50-\uFDCF \uFDF0-\uFDFF \uFE70-\uFEFF -# \U00010D00-\U00010D3F \U00010F30-\U00010F6F -# \U0001EC70-\U0001ECBF \U0001ED00-\U0001ED4F \U0001EE00-\U0001EEFF] -# -# This includes code points in the Arabic, Syriac, and Thaana blocks, among others. -# -# The unassigned code points that default to R are in the ranges: -# [\u0590-\u05FF \u07C0-\u085F \uFB1D-\uFB4F -# \U00010800-\U00010CFF \U00010D40-\U00010F2F \U00010F70-\U00010FFF -# \U0001E800-\U0001EC6F \U0001ECC0-\U0001ECFF \U0001ED50-\U0001EDFF \U0001EF00-\U0001EFFF] -# -# This includes code points in the Hebrew, NKo, and Phoenician blocks, among others. -# -# The unassigned code points that default to ET are in the range: -# [\u20A0-\u20CF] -# -# This consists of code points in the Currency Symbols block. +# reserved for right-to-left scripts are given either values R or AL, +# and unassigned code points in the Currency Symbols block are given the value ET. +# For details see the @missing lines below. # # The unassigned code points that default to BN have one of the following properties: # Default_Ignorable_Code_Point @@ -43,6 +26,101 @@ # @missing: 0000..10FFFF; Left_To_Right +# 0590..05FF Hebrew +# @missing: 0590..05FF; Right_To_Left + +# 0600..06FF Arabic +# 0700..074F Syriac +# 0750..077F Arabic_Supplement +# 0780..07BF Thaana +# @missing: 0600..07BF; Arabic_Letter + +# 07C0..07FF NKo +# 0800..083F Samaritan +# 0840..085F Mandaic +# @missing: 07C0..085F; Right_To_Left + +# 0860..086F Syriac_Supplement +# 0870..089F Arabic_Extended_B +# 08A0..08FF Arabic_Extended_A +# @missing: 0860..08FF; Arabic_Letter + +# 20A0..20CF Currency_Symbols +# @missing: 20A0..20CF; European_Terminator + +# FB00..FB4F Alphabetic_Presentation_Forms (partial) +# @missing: FB1D..FB4F; Right_To_Left + +# FB50..FDFF Arabic_Presentation_Forms_A (partial) +# @missing: FB50..FDCF; Arabic_Letter + +# FB50..FDFF Arabic_Presentation_Forms_A (partial) +# @missing: FDF0..FDFF; Arabic_Letter + +# FE70..FEFF Arabic_Presentation_Forms_B +# @missing: FE70..FEFF; Arabic_Letter + +# 10800..1083F Cypriot_Syllabary +# 10840..1085F Imperial_Aramaic +# 10860..1087F Palmyrene +# 10880..108AF Nabataean +# 108E0..108FF Hatran +# 10900..1091F Phoenician +# 10920..1093F Lydian +# 10980..1099F Meroitic_Hieroglyphs +# 109A0..109FF Meroitic_Cursive +# 10A00..10A5F Kharoshthi +# 10A60..10A7F Old_South_Arabian +# 10A80..10A9F Old_North_Arabian +# 10AC0..10AFF Manichaean +# 10B00..10B3F Avestan +# 10B40..10B5F Inscriptional_Parthian +# 10B60..10B7F Inscriptional_Pahlavi +# 10B80..10BAF Psalter_Pahlavi +# 10C00..10C4F Old_Turkic +# 10C80..10CFF Old_Hungarian +# @missing: 10800..10CFF; Right_To_Left + +# 10D00..10D3F Hanifi_Rohingya +# @missing: 10D00..10D3F; Arabic_Letter + +# 10E60..10E7F Rumi_Numeral_Symbols +# 10E80..10EBF Yezidi +# @missing: 10D40..10EBF; Right_To_Left + +# 10EC0..10EFF Arabic_Extended_C +# @missing: 10EC0..10EFF; Arabic_Letter + +# 10F00..10F2F Old_Sogdian +# @missing: 10F00..10F2F; Right_To_Left + +# 10F30..10F6F Sogdian +# @missing: 10F30..10F6F; Arabic_Letter + +# 10F70..10FAF Old_Uyghur +# 10FB0..10FDF Chorasmian +# 10FE0..10FFF Elymaic +# @missing: 10F70..10FFF; Right_To_Left + +# 1E800..1E8DF Mende_Kikakui +# 1E900..1E95F Adlam +# @missing: 1E800..1EC6F; Right_To_Left + +# 1EC70..1ECBF Indic_Siyaq_Numbers +# @missing: 1EC70..1ECBF; Arabic_Letter + +# @missing: 1ECC0..1ECFF; Right_To_Left + +# 1ED00..1ED4F Ottoman_Siyaq_Numbers +# @missing: 1ED00..1ED4F; Arabic_Letter + +# @missing: 1ED50..1EDFF; Right_To_Left + +# 1EE00..1EEFF Arabic_Mathematical_Alphabetic_Symbols +# @missing: 1EE00..1EEFF; Arabic_Letter + +# @missing: 1EF00..1EFFF; Right_To_Left + # ================================================ # Bidi_Class=Left_To_Right @@ -219,6 +297,7 @@ 0CE0..0CE1 ; L # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE6..0CEF ; L # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE 0CF1..0CF2 ; L # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA +0CF3 ; L # Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT 0D02..0D03 ; L # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA 0D04..0D0C ; L # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L 0D0E..0D10 ; L # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI @@ -795,6 +874,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 11232..11233 ; L # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU 11235 ; L # Mc KHOJKI SIGN VIRAMA 11238..1123D ; L # Po [6] KHOJKI DANDA..KHOJKI ABBREVIATION SIGN +1123F..11240 ; L # Lo [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I 11280..11286 ; L # Lo [7] MULTANI LETTER A..MULTANI LETTER GA 11288 ; L # Lo MULTANI LETTER GHA 1128A..1128D ; L # Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA @@ -910,6 +990,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 11A9D ; L # Lo SOYOMBO MARK PLUTA 11A9E..11AA2 ; L # Po [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2 11AB0..11AF8 ; L # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11B00..11B09 ; L # Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU 11C00..11C08 ; L # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L 11C0A..11C2E ; L # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA 11C2F ; L # Mc BHAIKSUKI VOWEL SIGN AA @@ -940,6 +1021,15 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 11EE0..11EF2 ; L # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA 11EF5..11EF6 ; L # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O 11EF7..11EF8 ; L # Po [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION +11F02 ; L # Lo KAWI SIGN REPHA +11F03 ; L # Mc KAWI SIGN VISARGA +11F04..11F10 ; L # Lo [13] KAWI LETTER A..KAWI LETTER O +11F12..11F33 ; L # Lo [34] KAWI LETTER KA..KAWI LETTER JNYA +11F34..11F35 ; L # Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA +11F3E..11F3F ; L # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI +11F41 ; L # Mc KAWI SIGN KILLER +11F43..11F4F ; L # Po [13] KAWI DANDA..KAWI PUNCTUATION CLOSING SPIRAL +11F50..11F59 ; L # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE 11FB0 ; L # Lo LISU LETTER YHA 11FC0..11FD4 ; L # No [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH 11FFF ; L # Po TAMIL PUNCTUATION END OF TEXT @@ -949,8 +1039,9 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 12480..12543 ; L # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU 12F90..12FF0 ; L # Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 12FF1..12FF2 ; L # Po [2] CYPRO-MINOAN SIGN CM301..CYPRO-MINOAN SIGN CM302 -13000..1342E ; L # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 -13430..13438 ; L # Cf [9] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END SEGMENT +13000..1342F ; L # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D +13430..1343F ; L # Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE +13441..13446 ; L # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN 14400..14646 ; L # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 16800..16A38 ; L # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ 16A40..16A5E ; L # Lo [31] MRO LETTER TA..MRO LETTER TEK @@ -987,7 +1078,9 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 1AFF5..1AFFB ; L # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; L # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 1B000..1B122 ; L # Lo [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU +1B132 ; L # Lo HIRAGANA LETTER SMALL KO 1B150..1B152 ; L # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO +1B155 ; L # Lo KATAKANA LETTER SMALL KO 1B164..1B167 ; L # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N 1B170..1B2FB ; L # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 1BC00..1BC6A ; L # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M @@ -1006,6 +1099,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 1D183..1D184 ; L # So [2] MUSICAL SYMBOL ARPEGGIATO UP..MUSICAL SYMBOL ARPEGGIATO DOWN 1D18C..1D1A9 ; L # So [30] MUSICAL SYMBOL RINFORZANDO..MUSICAL SYMBOL DEGREE SLASH 1D1AE..1D1E8 ; L # So [59] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL KIEVAN FLAT SIGN +1D2C0..1D2D3 ; L # No [20] KAKTOVIK NUMERAL ZERO..KAKTOVIK NUMERAL NINETEEN 1D2E0..1D2F3 ; L # No [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN 1D360..1D378 ; L # No [25] COUNTING ROD UNIT DIGIT ONE..TALLY MARK FIVE 1D400..1D454 ; L # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G @@ -1052,6 +1146,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 1DF00..1DF09 ; L # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; L # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; L # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL +1DF25..1DF2A ; L # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1E030..1E06D ; L # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; L # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; L # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER 1E140..1E149 ; L # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE @@ -1060,6 +1156,9 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 1E290..1E2AD ; L # Lo [30] TOTO LETTER PA..TOTO LETTER A 1E2C0..1E2EB ; L # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH 1E2F0..1E2F9 ; L # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE +1E4D0..1E4EA ; L # Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL +1E4EB ; L # Lm NAG MUNDARI SIGN OJOD +1E4F0..1E4F9 ; L # Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE 1E7E0..1E7E6 ; L # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO 1E7E8..1E7EB ; L # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE 1E7ED..1E7EE ; L # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE @@ -1072,188 +1171,124 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 1F240..1F248 ; L # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 1F250..1F251 ; L # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT 20000..2A6DF ; L # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF -2A700..2B738 ; L # Lo [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738 +2A700..2B739 ; L # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 2B740..2B81D ; L # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2B820..2CEA1 ; L # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 2CEB0..2EBE0 ; L # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 2F800..2FA1D ; L # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; L # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A +31350..323AF ; L # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 825575 code points not listed here. -# Total code points: 1096333 +# The above property value applies to 821089 code points not listed here. +# Total code points: 1096272 # ================================================ # Bidi_Class=Right_To_Left -0590 ; R # Cn 05BE ; R # Pd HEBREW PUNCTUATION MAQAF 05C0 ; R # Po HEBREW PUNCTUATION PASEQ 05C3 ; R # Po HEBREW PUNCTUATION SOF PASUQ 05C6 ; R # Po HEBREW PUNCTUATION NUN HAFUKHA -05C8..05CF ; R # Cn [8] .. 05D0..05EA ; R # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV -05EB..05EE ; R # Cn [4] .. 05EF..05F2 ; R # Lo [4] HEBREW YOD TRIANGLE..HEBREW LIGATURE YIDDISH DOUBLE YOD 05F3..05F4 ; R # Po [2] HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM -05F5..05FF ; R # Cn [11] .. 07C0..07C9 ; R # Nd [10] NKO DIGIT ZERO..NKO DIGIT NINE 07CA..07EA ; R # Lo [33] NKO LETTER A..NKO LETTER JONA RA 07F4..07F5 ; R # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE 07FA ; R # Lm NKO LAJANYALAN -07FB..07FC ; R # Cn [2] .. 07FE..07FF ; R # Sc [2] NKO DOROME SIGN..NKO TAMAN SIGN 0800..0815 ; R # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF 081A ; R # Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT 0824 ; R # Lm SAMARITAN MODIFIER LETTER SHORT A 0828 ; R # Lm SAMARITAN MODIFIER LETTER I -082E..082F ; R # Cn [2] .. 0830..083E ; R # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU -083F ; R # Cn 0840..0858 ; R # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN -085C..085D ; R # Cn [2] .. 085E ; R # Po MANDAIC PUNCTUATION -085F ; R # Cn 200F ; R # Cf RIGHT-TO-LEFT MARK FB1D ; R # Lo HEBREW LETTER YOD WITH HIRIQ FB1F..FB28 ; R # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV FB2A..FB36 ; R # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH -FB37 ; R # Cn FB38..FB3C ; R # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH -FB3D ; R # Cn FB3E ; R # Lo HEBREW LETTER MEM WITH DAGESH -FB3F ; R # Cn FB40..FB41 ; R # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH -FB42 ; R # Cn FB43..FB44 ; R # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH -FB45 ; R # Cn FB46..FB4F ; R # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE ALEF LAMED 10800..10805 ; R # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA -10806..10807 ; R # Cn [2] .. 10808 ; R # Lo CYPRIOT SYLLABLE JO -10809 ; R # Cn 1080A..10835 ; R # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO -10836 ; R # Cn 10837..10838 ; R # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE -10839..1083B ; R # Cn [3] .. 1083C ; R # Lo CYPRIOT SYLLABLE ZA -1083D..1083E ; R # Cn [2] .. 1083F..10855 ; R # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW -10856 ; R # Cn 10857 ; R # Po IMPERIAL ARAMAIC SECTION SIGN 10858..1085F ; R # No [8] IMPERIAL ARAMAIC NUMBER ONE..IMPERIAL ARAMAIC NUMBER TEN THOUSAND 10860..10876 ; R # Lo [23] PALMYRENE LETTER ALEPH..PALMYRENE LETTER TAW 10877..10878 ; R # So [2] PALMYRENE LEFT-POINTING FLEURON..PALMYRENE RIGHT-POINTING FLEURON 10879..1087F ; R # No [7] PALMYRENE NUMBER ONE..PALMYRENE NUMBER TWENTY 10880..1089E ; R # Lo [31] NABATAEAN LETTER FINAL ALEPH..NABATAEAN LETTER TAW -1089F..108A6 ; R # Cn [8] .. 108A7..108AF ; R # No [9] NABATAEAN NUMBER ONE..NABATAEAN NUMBER ONE HUNDRED -108B0..108DF ; R # Cn [48] .. 108E0..108F2 ; R # Lo [19] HATRAN LETTER ALEPH..HATRAN LETTER QOPH -108F3 ; R # Cn 108F4..108F5 ; R # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW -108F6..108FA ; R # Cn [5] .. 108FB..108FF ; R # No [5] HATRAN NUMBER ONE..HATRAN NUMBER ONE HUNDRED 10900..10915 ; R # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10916..1091B ; R # No [6] PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THREE -1091C..1091E ; R # Cn [3] .. 10920..10939 ; R # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C -1093A..1093E ; R # Cn [5] .. 1093F ; R # Po LYDIAN TRIANGULAR MARK -10940..1097F ; R # Cn [64] .. 10980..109B7 ; R # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA -109B8..109BB ; R # Cn [4] .. 109BC..109BD ; R # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF 109BE..109BF ; R # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 109C0..109CF ; R # No [16] MEROITIC CURSIVE NUMBER ONE..MEROITIC CURSIVE NUMBER SEVENTY -109D0..109D1 ; R # Cn [2] .. 109D2..109FF ; R # No [46] MEROITIC CURSIVE NUMBER ONE HUNDRED..MEROITIC CURSIVE FRACTION TEN TWELFTHS 10A00 ; R # Lo KHAROSHTHI LETTER A -10A04 ; R # Cn -10A07..10A0B ; R # Cn [5] .. 10A10..10A13 ; R # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA -10A14 ; R # Cn 10A15..10A17 ; R # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA -10A18 ; R # Cn 10A19..10A35 ; R # Lo [29] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER VHA -10A36..10A37 ; R # Cn [2] .. -10A3B..10A3E ; R # Cn [4] .. 10A40..10A48 ; R # No [9] KHAROSHTHI DIGIT ONE..KHAROSHTHI FRACTION ONE HALF -10A49..10A4F ; R # Cn [7] .. 10A50..10A58 ; R # Po [9] KHAROSHTHI PUNCTUATION DOT..KHAROSHTHI PUNCTUATION LINES -10A59..10A5F ; R # Cn [7] .. 10A60..10A7C ; R # Lo [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH 10A7D..10A7E ; R # No [2] OLD SOUTH ARABIAN NUMBER ONE..OLD SOUTH ARABIAN NUMBER FIFTY 10A7F ; R # Po OLD SOUTH ARABIAN NUMERIC INDICATOR 10A80..10A9C ; R # Lo [29] OLD NORTH ARABIAN LETTER HEH..OLD NORTH ARABIAN LETTER ZAH 10A9D..10A9F ; R # No [3] OLD NORTH ARABIAN NUMBER ONE..OLD NORTH ARABIAN NUMBER TWENTY -10AA0..10ABF ; R # Cn [32] .. 10AC0..10AC7 ; R # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW 10AC8 ; R # So MANICHAEAN SIGN UD 10AC9..10AE4 ; R # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW -10AE7..10AEA ; R # Cn [4] .. 10AEB..10AEF ; R # No [5] MANICHAEAN NUMBER ONE..MANICHAEAN NUMBER ONE HUNDRED 10AF0..10AF6 ; R # Po [7] MANICHAEAN PUNCTUATION STAR..MANICHAEAN PUNCTUATION LINE FILLER -10AF7..10AFF ; R # Cn [9] .. 10B00..10B35 ; R # Lo [54] AVESTAN LETTER A..AVESTAN LETTER HE -10B36..10B38 ; R # Cn [3] .. 10B40..10B55 ; R # Lo [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW -10B56..10B57 ; R # Cn [2] .. 10B58..10B5F ; R # No [8] INSCRIPTIONAL PARTHIAN NUMBER ONE..INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND 10B60..10B72 ; R # Lo [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW -10B73..10B77 ; R # Cn [5] .. 10B78..10B7F ; R # No [8] INSCRIPTIONAL PAHLAVI NUMBER ONE..INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND 10B80..10B91 ; R # Lo [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW -10B92..10B98 ; R # Cn [7] .. 10B99..10B9C ; R # Po [4] PSALTER PAHLAVI SECTION MARK..PSALTER PAHLAVI FOUR DOTS WITH DOT -10B9D..10BA8 ; R # Cn [12] .. 10BA9..10BAF ; R # No [7] PSALTER PAHLAVI NUMBER ONE..PSALTER PAHLAVI NUMBER ONE HUNDRED -10BB0..10BFF ; R # Cn [80] .. 10C00..10C48 ; R # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH -10C49..10C7F ; R # Cn [55] .. 10C80..10CB2 ; R # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US -10CB3..10CBF ; R # Cn [13] .. 10CC0..10CF2 ; R # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US -10CF3..10CF9 ; R # Cn [7] .. 10CFA..10CFF ; R # No [6] OLD HUNGARIAN NUMBER ONE..OLD HUNGARIAN NUMBER ONE THOUSAND -10D40..10E5F ; R # Cn [288] .. -10E7F ; R # Cn 10E80..10EA9 ; R # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET -10EAA ; R # Cn 10EAD ; R # Pd YEZIDI HYPHENATION MARK -10EAE..10EAF ; R # Cn [2] .. 10EB0..10EB1 ; R # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE -10EB2..10EFF ; R # Cn [78] .. 10F00..10F1C ; R # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; R # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF 10F27 ; R # Lo OLD SOGDIAN LIGATURE AYIN-DALETH -10F28..10F2F ; R # Cn [8] .. 10F70..10F81 ; R # Lo [18] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER LESH 10F86..10F89 ; R # Po [4] OLD UYGHUR PUNCTUATION BAR..OLD UYGHUR PUNCTUATION FOUR DOTS -10F8A..10FAF ; R # Cn [38] .. 10FB0..10FC4 ; R # Lo [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW 10FC5..10FCB ; R # No [7] CHORASMIAN NUMBER ONE..CHORASMIAN NUMBER ONE HUNDRED -10FCC..10FDF ; R # Cn [20] .. 10FE0..10FF6 ; R # Lo [23] ELYMAIC LETTER ALEPH..ELYMAIC LIGATURE ZAYIN-YODH -10FF7..10FFF ; R # Cn [9] .. 1E800..1E8C4 ; R # Lo [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON -1E8C5..1E8C6 ; R # Cn [2] .. 1E8C7..1E8CF ; R # No [9] MENDE KIKAKUI DIGIT ONE..MENDE KIKAKUI DIGIT NINE -1E8D7..1E8FF ; R # Cn [41] .. 1E900..1E943 ; R # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA 1E94B ; R # Lm ADLAM NASALIZATION MARK -1E94C..1E94F ; R # Cn [4] .. 1E950..1E959 ; R # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE -1E95A..1E95D ; R # Cn [4] .. 1E95E..1E95F ; R # Po [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK -1E960..1EC6F ; R # Cn [784] .. -1ECC0..1ECFF ; R # Cn [64] .. -1ED50..1EDFF ; R # Cn [176] .. -1EF00..1EFFF ; R # Cn [256] .. -# Total code points: 3711 +# The above property value applies to 2156 code points not listed here. +# Total code points: 3647 # ================================================ @@ -1313,7 +1348,6 @@ FF0D ; ES # Pd FULLWIDTH HYPHEN-MINUS 17DB ; ET # Sc KHMER CURRENCY SYMBOL RIEL 2030..2034 ; ET # Po [5] PER MILLE SIGN..TRIPLE PRIME 20A0..20C0 ; ET # Sc [33] EURO-CURRENCY SIGN..SOM SIGN -20C1..20CF ; ET # Cn [15] .. 212E ; ET # So ESTIMATED SYMBOL 2213 ; ET # Sm MINUS-OR-PLUS SIGN A838 ; ET # Sc NORTH INDIC RUPEE MARK @@ -1329,6 +1363,7 @@ FFE5..FFE6 ; ET # Sc [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN 11FDD..11FE0 ; ET # Sc [4] TAMIL SIGN KAACU..TAMIL SIGN VARAAKAN 1E2FF ; ET # Sc WANCHO NGUN SIGN +# The above property value applies to 15 code points not listed here. # Total code points: 92 # ================================================ @@ -1887,10 +1922,10 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 1F300..1F3FA ; ON # So [251] CYCLONE..AMPHORA 1F3FB..1F3FF ; ON # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6 1F400..1F6D7 ; ON # So [728] RAT..ELEVATOR -1F6DD..1F6EC ; ON # So [16] PLAYGROUND SLIDE..AIRPLANE ARRIVING +1F6DC..1F6EC ; ON # So [17] WIRELESS..AIRPLANE ARRIVING 1F6F0..1F6FC ; ON # So [13] SATELLITE..ROLLER SKATE -1F700..1F773 ; ON # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE -1F780..1F7D8 ; ON # So [89] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..NEGATIVE CIRCLED SQUARE +1F700..1F776 ; ON # So [119] ALCHEMICAL SYMBOL FOR QUINTESSENCE..LUNAR ECLIPSE +1F77B..1F7D9 ; ON # So [95] HAUMEA..NINE POINTED WHITE STAR 1F7E0..1F7EB ; ON # So [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE 1F7F0 ; ON # So HEAVY EQUALS SIGN 1F800..1F80B ; ON # So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD @@ -1901,19 +1936,17 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 1F8B0..1F8B1 ; ON # So [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST 1F900..1FA53 ; ON # So [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP 1FA60..1FA6D ; ON # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER -1FA70..1FA74 ; ON # So [5] BALLET SHOES..THONG SANDAL -1FA78..1FA7C ; ON # So [5] DROP OF BLOOD..CRUTCH -1FA80..1FA86 ; ON # So [7] YO-YO..NESTING DOLLS -1FA90..1FAAC ; ON # So [29] RINGED PLANET..HAMSA -1FAB0..1FABA ; ON # So [11] FLY..NEST WITH EGGS -1FAC0..1FAC5 ; ON # So [6] ANATOMICAL HEART..PERSON WITH CROWN -1FAD0..1FAD9 ; ON # So [10] BLUEBERRIES..JAR -1FAE0..1FAE7 ; ON # So [8] MELTING FACE..BUBBLES -1FAF0..1FAF6 ; ON # So [7] HAND WITH INDEX FINGER AND THUMB CROSSED..HEART HANDS +1FA70..1FA7C ; ON # So [13] BALLET SHOES..CRUTCH +1FA80..1FA88 ; ON # So [9] YO-YO..FLUTE +1FA90..1FABD ; ON # So [46] RINGED PLANET..WING +1FABF..1FAC5 ; ON # So [7] GOOSE..PERSON WITH CROWN +1FACE..1FADB ; ON # So [14] MOOSE..PEA POD +1FAE0..1FAE8 ; ON # So [9] MELTING FACE..SHAKING FACE +1FAF0..1FAF8 ; ON # So [9] HAND WITH INDEX FINGER AND THUMB CROSSED..RIGHTWARDS PUSHING HAND 1FB00..1FB92 ; ON # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBCA ; ON # So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON -# Total code points: 6000 +# Total code points: 6029 # ================================================ @@ -2054,7 +2087,7 @@ FFFFE..FFFFF ; BN # Cn [2] .. 0E47..0E4E ; NSM # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN 0EB1 ; NSM # Mn LAO VOWEL SIGN MAI KAN 0EB4..0EBC ; NSM # Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO -0EC8..0ECD ; NSM # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA +0EC8..0ECE ; NSM # Mn [7] LAO TONE MAI EK..LAO YAMAKKAN 0F18..0F19 ; NSM # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS 0F35 ; NSM # Mn TIBETAN MARK NGAS BZUNG NYI ZLA 0F37 ; NSM # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS @@ -2189,6 +2222,7 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 10AE5..10AE6 ; NSM # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW 10D24..10D27 ; NSM # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10EAB..10EAC ; NSM # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EFD..10EFF ; NSM # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; NSM # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; NSM # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; NSM # Mn BRAHMI SIGN ANUSVARA @@ -2211,6 +2245,7 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 11234 ; NSM # Mn KHOJKI SIGN ANUSVARA 11236..11237 ; NSM # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA 1123E ; NSM # Mn KHOJKI SIGN SUKUN +11241 ; NSM # Mn KHOJKI VOWEL SIGN VOCALIC R 112DF ; NSM # Mn KHUDAWADI SIGN ANUSVARA 112E3..112EA ; NSM # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA 11300..11301 ; NSM # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU @@ -2272,6 +2307,12 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 11D95 ; NSM # Mn GUNJALA GONDI SIGN ANUSVARA 11D97 ; NSM # Mn GUNJALA GONDI VIRAMA 11EF3..11EF4 ; NSM # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U +11F00..11F01 ; NSM # Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA +11F36..11F3A ; NSM # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R +11F40 ; NSM # Mn KAWI VOWEL SIGN EU +11F42 ; NSM # Mn KAWI CONJOINER +13440 ; NSM # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY +13447..13455 ; NSM # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED 16AF0..16AF4 ; NSM # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; NSM # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM 16F4F ; NSM # Mn MIAO SIGN CONSONANT MODIFIER BAR @@ -2296,14 +2337,16 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 1E01B..1E021 ; NSM # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI 1E023..1E024 ; NSM # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS 1E026..1E02A ; NSM # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E08F ; NSM # Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I 1E130..1E136 ; NSM # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D 1E2AE ; NSM # Mn TOTO SIGN RISING TONE 1E2EC..1E2EF ; NSM # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +1E4EC..1E4EF ; NSM # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH 1E8D0..1E8D6 ; NSM # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS 1E944..1E94A ; NSM # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1958 +# Total code points: 1993 # ================================================ @@ -2329,125 +2372,74 @@ E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 06FD..06FE ; AL # So [2] ARABIC SIGN SINDHI AMPERSAND..ARABIC SIGN SINDHI POSTPOSITION MEN 06FF ; AL # Lo ARABIC LETTER HEH WITH INVERTED V 0700..070D ; AL # Po [14] SYRIAC END OF PARAGRAPH..SYRIAC HARKLEAN ASTERISCUS -070E ; AL # Cn 070F ; AL # Cf SYRIAC ABBREVIATION MARK 0710 ; AL # Lo SYRIAC LETTER ALAPH 0712..072F ; AL # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH -074B..074C ; AL # Cn [2] .. 074D..07A5 ; AL # Lo [89] SYRIAC LETTER SOGDIAN ZHAIN..THAANA LETTER WAAVU 07B1 ; AL # Lo THAANA LETTER NAA -07B2..07BF ; AL # Cn [14] .. 0860..086A ; AL # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA -086B..086F ; AL # Cn [5] .. 0870..0887 ; AL # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT 0888 ; AL # Sk ARABIC RAISED ROUND DOT 0889..088E ; AL # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL -088F ; AL # Cn -0892..0897 ; AL # Cn [6] .. 08A0..08C8 ; AL # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; AL # Lm ARABIC SMALL FARSI YEH FB50..FBB1 ; AL # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM FBB2..FBC2 ; AL # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE -FBC3..FBD2 ; AL # Cn [16] .. FBD3..FD3D ; AL # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM FD50..FD8F ; AL # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM -FD90..FD91 ; AL # Cn [2] .. FD92..FDC7 ; AL # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM -FDC8..FDCE ; AL # Cn [7] .. FDF0..FDFB ; AL # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU FDFC ; AL # Sc RIAL SIGN FE70..FE74 ; AL # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM -FE75 ; AL # Cn FE76..FEFC ; AL # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM -FEFD..FEFE ; AL # Cn [2] .. 10D00..10D23 ; AL # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA -10D28..10D2F ; AL # Cn [8] .. -10D3A..10D3F ; AL # Cn [6] .. 10F30..10F45 ; AL # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN 10F51..10F54 ; AL # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED 10F55..10F59 ; AL # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT -10F5A..10F6F ; AL # Cn [22] .. -1EC70 ; AL # Cn 1EC71..1ECAB ; AL # No [59] INDIC SIYAQ NUMBER ONE..INDIC SIYAQ NUMBER PREFIXED NINE 1ECAC ; AL # So INDIC SIYAQ PLACEHOLDER 1ECAD..1ECAF ; AL # No [3] INDIC SIYAQ FRACTION ONE QUARTER..INDIC SIYAQ FRACTION THREE QUARTERS 1ECB0 ; AL # Sc INDIC SIYAQ RUPEE MARK 1ECB1..1ECB4 ; AL # No [4] INDIC SIYAQ NUMBER ALTERNATE ONE..INDIC SIYAQ ALTERNATE LAKH MARK -1ECB5..1ECBF ; AL # Cn [11] .. -1ED00 ; AL # Cn 1ED01..1ED2D ; AL # No [45] OTTOMAN SIYAQ NUMBER ONE..OTTOMAN SIYAQ NUMBER NINETY THOUSAND 1ED2E ; AL # So OTTOMAN SIYAQ MARRATAN 1ED2F..1ED3D ; AL # No [15] OTTOMAN SIYAQ ALTERNATE NUMBER TWO..OTTOMAN SIYAQ FRACTION ONE SIXTH -1ED3E..1ED4F ; AL # Cn [18] .. 1EE00..1EE03 ; AL # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL -1EE04 ; AL # Cn 1EE05..1EE1F ; AL # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF -1EE20 ; AL # Cn 1EE21..1EE22 ; AL # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM -1EE23 ; AL # Cn 1EE24 ; AL # Lo ARABIC MATHEMATICAL INITIAL HEH -1EE25..1EE26 ; AL # Cn [2] .. 1EE27 ; AL # Lo ARABIC MATHEMATICAL INITIAL HAH -1EE28 ; AL # Cn 1EE29..1EE32 ; AL # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF -1EE33 ; AL # Cn 1EE34..1EE37 ; AL # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH -1EE38 ; AL # Cn 1EE39 ; AL # Lo ARABIC MATHEMATICAL INITIAL DAD -1EE3A ; AL # Cn 1EE3B ; AL # Lo ARABIC MATHEMATICAL INITIAL GHAIN -1EE3C..1EE41 ; AL # Cn [6] .. 1EE42 ; AL # Lo ARABIC MATHEMATICAL TAILED JEEM -1EE43..1EE46 ; AL # Cn [4] .. 1EE47 ; AL # Lo ARABIC MATHEMATICAL TAILED HAH -1EE48 ; AL # Cn 1EE49 ; AL # Lo ARABIC MATHEMATICAL TAILED YEH -1EE4A ; AL # Cn 1EE4B ; AL # Lo ARABIC MATHEMATICAL TAILED LAM -1EE4C ; AL # Cn 1EE4D..1EE4F ; AL # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN -1EE50 ; AL # Cn 1EE51..1EE52 ; AL # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF -1EE53 ; AL # Cn 1EE54 ; AL # Lo ARABIC MATHEMATICAL TAILED SHEEN -1EE55..1EE56 ; AL # Cn [2] .. 1EE57 ; AL # Lo ARABIC MATHEMATICAL TAILED KHAH -1EE58 ; AL # Cn 1EE59 ; AL # Lo ARABIC MATHEMATICAL TAILED DAD -1EE5A ; AL # Cn 1EE5B ; AL # Lo ARABIC MATHEMATICAL TAILED GHAIN -1EE5C ; AL # Cn 1EE5D ; AL # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON -1EE5E ; AL # Cn 1EE5F ; AL # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF -1EE60 ; AL # Cn 1EE61..1EE62 ; AL # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM -1EE63 ; AL # Cn 1EE64 ; AL # Lo ARABIC MATHEMATICAL STRETCHED HEH -1EE65..1EE66 ; AL # Cn [2] .. 1EE67..1EE6A ; AL # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF -1EE6B ; AL # Cn 1EE6C..1EE72 ; AL # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF -1EE73 ; AL # Cn 1EE74..1EE77 ; AL # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH -1EE78 ; AL # Cn 1EE79..1EE7C ; AL # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH -1EE7D ; AL # Cn 1EE7E ; AL # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH -1EE7F ; AL # Cn 1EE80..1EE89 ; AL # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH -1EE8A ; AL # Cn 1EE8B..1EE9B ; AL # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN -1EE9C..1EEA0 ; AL # Cn [5] .. 1EEA1..1EEA3 ; AL # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL -1EEA4 ; AL # Cn 1EEA5..1EEA9 ; AL # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH -1EEAA ; AL # Cn 1EEAB..1EEBB ; AL # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN -1EEBC..1EEEF ; AL # Cn [52] .. -1EEF2..1EEFF ; AL # Cn [14] .. -# Total code points: 1708 +# The above property value applies to 298 code points not listed here. +# Total code points: 1769 # ================================================ diff --git a/maint/Unicode.tables/DerivedCoreProperties.txt b/maint/Unicode.tables/DerivedCoreProperties.txt index afc2abd97..8b482b5c1 100644 --- a/maint/Unicode.tables/DerivedCoreProperties.txt +++ b/maint/Unicode.tables/DerivedCoreProperties.txt @@ -1,11 +1,11 @@ -# DerivedCoreProperties-14.0.0.txt -# Date: 2021-08-12, 23:12:53 GMT -# © 2021 Unicode®, Inc. +# DerivedCoreProperties-15.0.0.txt +# Date: 2022-08-05, 22:17:05 GMT +# © 2022 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see http://www.unicode.org/terms_of_use.html +# For terms of use, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database -# For documentation, see http://www.unicode.org/reports/tr44/ +# For documentation, see https://www.unicode.org/reports/tr44/ # ================================================ @@ -462,6 +462,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 0BD7 ; Alphabetic # Mc TAMIL AU LENGTH MARK 0C00 ; Alphabetic # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE 0C01..0C03 ; Alphabetic # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA +0C04 ; Alphabetic # Mn TELUGU SIGN COMBINING ANUSVARA ABOVE 0C05..0C0C ; Alphabetic # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L 0C0E..0C10 ; Alphabetic # Lo [3] TELUGU LETTER E..TELUGU LETTER AI 0C12..0C28 ; Alphabetic # Lo [23] TELUGU LETTER O..TELUGU LETTER NA @@ -497,6 +498,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 0CE0..0CE1 ; Alphabetic # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; Alphabetic # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CF1..0CF2 ; Alphabetic # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA +0CF3 ; Alphabetic # Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT 0D00..0D01 ; Alphabetic # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU 0D02..0D03 ; Alphabetic # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA 0D04..0D0C ; Alphabetic # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L @@ -552,7 +554,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 0F49..0F6C ; Alphabetic # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA 0F71..0F7E ; Alphabetic # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO 0F7F ; Alphabetic # Mc TIBETAN SIGN RNAM BCAD -0F80..0F81 ; Alphabetic # Mn [2] TIBETAN VOWEL SIGN REVERSED I..TIBETAN VOWEL SIGN REVERSED II +0F80..0F83 ; Alphabetic # Mn [4] TIBETAN VOWEL SIGN REVERSED I..TIBETAN SIGN SNA LDAN 0F88..0F8C ; Alphabetic # Lo [5] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN INVERTED MCHU CAN 0F8D..0F97 ; Alphabetic # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA 0F99..0FBC ; Alphabetic # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA @@ -1053,6 +1055,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 11071..11072 ; Alphabetic # Lo [2] BRAHMI LETTER OLD TAMIL SHORT E..BRAHMI LETTER OLD TAMIL SHORT O 11073..11074 ; Alphabetic # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O 11075 ; Alphabetic # Lo BRAHMI LETTER OLD TAMIL LLA +11080..11081 ; Alphabetic # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA 11082 ; Alphabetic # Mc KAITHI SIGN VISARGA 11083..110AF ; Alphabetic # Lo [45] KAITHI LETTER A..KAITHI LETTER HA 110B0..110B2 ; Alphabetic # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II @@ -1089,6 +1092,8 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 11234 ; Alphabetic # Mn KHOJKI SIGN ANUSVARA 11237 ; Alphabetic # Mn KHOJKI SIGN SHADDA 1123E ; Alphabetic # Mn KHOJKI SIGN SUKUN +1123F..11240 ; Alphabetic # Lo [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I +11241 ; Alphabetic # Mn KHOJKI VOWEL SIGN VOCALIC R 11280..11286 ; Alphabetic # Lo [7] MULTANI LETTER A..MULTANI LETTER GA 11288 ; Alphabetic # Lo MULTANI LETTER GHA 1128A..1128D ; Alphabetic # Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA @@ -1243,12 +1248,22 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 11EE0..11EF2 ; Alphabetic # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA 11EF3..11EF4 ; Alphabetic # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U 11EF5..11EF6 ; Alphabetic # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O +11F00..11F01 ; Alphabetic # Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA +11F02 ; Alphabetic # Lo KAWI SIGN REPHA +11F03 ; Alphabetic # Mc KAWI SIGN VISARGA +11F04..11F10 ; Alphabetic # Lo [13] KAWI LETTER A..KAWI LETTER O +11F12..11F33 ; Alphabetic # Lo [34] KAWI LETTER KA..KAWI LETTER JNYA +11F34..11F35 ; Alphabetic # Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA +11F36..11F3A ; Alphabetic # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R +11F3E..11F3F ; Alphabetic # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI +11F40 ; Alphabetic # Mn KAWI VOWEL SIGN EU 11FB0 ; Alphabetic # Lo LISU LETTER YHA 12000..12399 ; Alphabetic # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U 12400..1246E ; Alphabetic # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM 12480..12543 ; Alphabetic # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU 12F90..12FF0 ; Alphabetic # Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 -13000..1342E ; Alphabetic # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 +13000..1342F ; Alphabetic # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D +13441..13446 ; Alphabetic # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN 14400..14646 ; Alphabetic # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 16800..16A38 ; Alphabetic # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ 16A40..16A5E ; Alphabetic # Lo [31] MRO LETTER TA..MRO LETTER TEK @@ -1275,7 +1290,9 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 1AFF5..1AFFB ; Alphabetic # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; Alphabetic # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 1B000..1B122 ; Alphabetic # Lo [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU +1B132 ; Alphabetic # Lo HIRAGANA LETTER SMALL KO 1B150..1B152 ; Alphabetic # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO +1B155 ; Alphabetic # Lo KATAKANA LETTER SMALL KO 1B164..1B167 ; Alphabetic # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N 1B170..1B2FB ; Alphabetic # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 1BC00..1BC6A ; Alphabetic # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M @@ -1316,16 +1333,21 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 1DF00..1DF09 ; Alphabetic # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; Alphabetic # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; Alphabetic # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL +1DF25..1DF2A ; Alphabetic # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK 1E000..1E006 ; Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; Alphabetic # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI 1E023..1E024 ; Alphabetic # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS 1E026..1E02A ; Alphabetic # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E030..1E06D ; Alphabetic # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE +1E08F ; Alphabetic # Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I 1E100..1E12C ; Alphabetic # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; Alphabetic # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER 1E14E ; Alphabetic # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ 1E290..1E2AD ; Alphabetic # Lo [30] TOTO LETTER PA..TOTO LETTER A 1E2C0..1E2EB ; Alphabetic # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH +1E4D0..1E4EA ; Alphabetic # Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL +1E4EB ; Alphabetic # Lm NAG MUNDARI SIGN OJOD 1E7E0..1E7E6 ; Alphabetic # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO 1E7E8..1E7EB ; Alphabetic # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE 1E7ED..1E7EE ; Alphabetic # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE @@ -1371,14 +1393,15 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 1F150..1F169 ; Alphabetic # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Alphabetic # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z 20000..2A6DF ; Alphabetic # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF -2A700..2B738 ; Alphabetic # Lo [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738 +2A700..2B739 ; Alphabetic # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 2B740..2B81D ; Alphabetic # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2B820..2CEA1 ; Alphabetic # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 2CEB0..2EBE0 ; Alphabetic # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 2F800..2FA1D ; Alphabetic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A +31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 133396 +# Total code points: 137765 # ================================================ @@ -1663,6 +1686,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 052F ; Lowercase # L& CYRILLIC SMALL LETTER EL WITH DESCENDER 0560..0588 ; Lowercase # L& [41] ARMENIAN SMALL LETTER TURNED AYB..ARMENIAN SMALL LETTER YI WITH STROKE 10D0..10FA ; Lowercase # L& [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN +10FC ; Lowercase # Lm MODIFIER LETTER GEORGIAN NAR 10FD..10FF ; Lowercase # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN 13F8..13FD ; Lowercase # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV 1C80..1C88 ; Lowercase # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK @@ -2012,12 +2036,14 @@ A7D3 ; Lowercase # L& LATIN SMALL LETTER DOUBLE THORN A7D5 ; Lowercase # L& LATIN SMALL LETTER DOUBLE WYNN A7D7 ; Lowercase # L& LATIN SMALL LETTER MIDDLE SCOTS S A7D9 ; Lowercase # L& LATIN SMALL LETTER SIGMOID S +A7F2..A7F4 ; Lowercase # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F6 ; Lowercase # L& LATIN SMALL LETTER REVERSED HALF H A7F8..A7F9 ; Lowercase # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; Lowercase # L& LATIN LETTER SMALL CAPITAL TURNED M AB30..AB5A ; Lowercase # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG AB5C..AB5F ; Lowercase # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK AB60..AB68 ; Lowercase # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE +AB69 ; Lowercase # Lm MODIFIER LETTER SMALL TURNED W AB70..ABBF ; Lowercase # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA FB00..FB06 ; Lowercase # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; Lowercase # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -2065,9 +2091,11 @@ FF41..FF5A ; Lowercase # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 1D7CB ; Lowercase # L& MATHEMATICAL BOLD SMALL DIGAMMA 1DF00..1DF09 ; Lowercase # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0B..1DF1E ; Lowercase # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL +1DF25..1DF2A ; Lowercase # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1E030..1E06D ; Lowercase # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E922..1E943 ; Lowercase # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2471 +# Total code points: 2544 # ================================================ @@ -2767,6 +2795,7 @@ FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH 10C7 ; Cased # L& GEORGIAN CAPITAL LETTER YN 10CD ; Cased # L& GEORGIAN CAPITAL LETTER AEN 10D0..10FA ; Cased # L& [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN +10FC ; Cased # Lm MODIFIER LETTER GEORGIAN NAR 10FD..10FF ; Cased # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN 13A0..13F5 ; Cased # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV 13F8..13FD ; Cased # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV @@ -2837,12 +2866,14 @@ A790..A7CA ; Cased # L& [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SM A7D0..A7D1 ; Cased # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; Cased # L& LATIN SMALL LETTER DOUBLE THORN A7D5..A7D9 ; Cased # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7F2..A7F4 ; Cased # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; Cased # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F8..A7F9 ; Cased # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; Cased # L& LATIN LETTER SMALL CAPITAL TURNED M AB30..AB5A ; Cased # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG AB5C..AB5F ; Cased # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK AB60..AB68 ; Cased # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE +AB69 ; Cased # Lm MODIFIER LETTER SMALL TURNED W AB70..ABBF ; Cased # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA FB00..FB06 ; Cased # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; Cased # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -2899,12 +2930,14 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1D7C4..1D7CB ; Cased # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1DF00..1DF09 ; Cased # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0B..1DF1E ; Cased # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL +1DF25..1DF2A ; Cased # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1E030..1E06D ; Cased # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E900..1E943 ; Cased # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA 1F130..1F149 ; Cased # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z 1F150..1F169 ; Cased # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Cased # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 4453 +# Total code points: 4526 # ================================================ @@ -3054,7 +3087,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 0EB1 ; Case_Ignorable # Mn LAO VOWEL SIGN MAI KAN 0EB4..0EBC ; Case_Ignorable # Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO 0EC6 ; Case_Ignorable # Lm LAO KO LA -0EC8..0ECD ; Case_Ignorable # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA +0EC8..0ECE ; Case_Ignorable # Mn [7] LAO TONE MAI EK..LAO YAMAKKAN 0F18..0F19 ; Case_Ignorable # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS 0F35 ; Case_Ignorable # Mn TIBETAN MARK NGAS BZUNG NYI ZLA 0F37 ; Case_Ignorable # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS @@ -3263,6 +3296,7 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 10AE5..10AE6 ; Case_Ignorable # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW 10D24..10D27 ; Case_Ignorable # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10EAB..10EAC ; Case_Ignorable # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EFD..10EFF ; Case_Ignorable # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Case_Ignorable # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Case_Ignorable # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; Case_Ignorable # Mn BRAHMI SIGN ANUSVARA @@ -3287,6 +3321,7 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 11234 ; Case_Ignorable # Mn KHOJKI SIGN ANUSVARA 11236..11237 ; Case_Ignorable # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA 1123E ; Case_Ignorable # Mn KHOJKI SIGN SUKUN +11241 ; Case_Ignorable # Mn KHOJKI VOWEL SIGN VOCALIC R 112DF ; Case_Ignorable # Mn KHUDAWADI SIGN ANUSVARA 112E3..112EA ; Case_Ignorable # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA 11300..11301 ; Case_Ignorable # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU @@ -3348,7 +3383,13 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 11D95 ; Case_Ignorable # Mn GUNJALA GONDI SIGN ANUSVARA 11D97 ; Case_Ignorable # Mn GUNJALA GONDI VIRAMA 11EF3..11EF4 ; Case_Ignorable # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U -13430..13438 ; Case_Ignorable # Cf [9] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END SEGMENT +11F00..11F01 ; Case_Ignorable # Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA +11F36..11F3A ; Case_Ignorable # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R +11F40 ; Case_Ignorable # Mn KAWI VOWEL SIGN EU +11F42 ; Case_Ignorable # Mn KAWI CONJOINER +13430..1343F ; Case_Ignorable # Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE +13440 ; Case_Ignorable # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY +13447..13455 ; Case_Ignorable # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED 16AF0..16AF4 ; Case_Ignorable # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; Case_Ignorable # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM 16B40..16B43 ; Case_Ignorable # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM @@ -3382,10 +3423,14 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 1E01B..1E021 ; Case_Ignorable # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI 1E023..1E024 ; Case_Ignorable # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS 1E026..1E02A ; Case_Ignorable # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E030..1E06D ; Case_Ignorable # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE +1E08F ; Case_Ignorable # Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I 1E130..1E136 ; Case_Ignorable # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D 1E137..1E13D ; Case_Ignorable # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER 1E2AE ; Case_Ignorable # Mn TOTO SIGN RISING TONE 1E2EC..1E2EF ; Case_Ignorable # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +1E4EB ; Case_Ignorable # Lm NAG MUNDARI SIGN OJOD +1E4EC..1E4EF ; Case_Ignorable # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH 1E8D0..1E8D6 ; Case_Ignorable # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS 1E944..1E94A ; Case_Ignorable # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA 1E94B ; Case_Ignorable # Lm ADLAM NASALIZATION MARK @@ -3394,7 +3439,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2602 +# Total code points: 2707 # ================================================ @@ -6617,6 +6662,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 111DC ; ID_Start # Lo SHARADA HEADSTROKE 11200..11211 ; ID_Start # Lo [18] KHOJKI LETTER A..KHOJKI LETTER JJA 11213..1122B ; ID_Start # Lo [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA +1123F..11240 ; ID_Start # Lo [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I 11280..11286 ; ID_Start # Lo [7] MULTANI LETTER A..MULTANI LETTER GA 11288 ; ID_Start # Lo MULTANI LETTER GHA 1128A..1128D ; ID_Start # Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA @@ -6679,12 +6725,16 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 11D6A..11D89 ; ID_Start # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA 11D98 ; ID_Start # Lo GUNJALA GONDI OM 11EE0..11EF2 ; ID_Start # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA +11F02 ; ID_Start # Lo KAWI SIGN REPHA +11F04..11F10 ; ID_Start # Lo [13] KAWI LETTER A..KAWI LETTER O +11F12..11F33 ; ID_Start # Lo [34] KAWI LETTER KA..KAWI LETTER JNYA 11FB0 ; ID_Start # Lo LISU LETTER YHA 12000..12399 ; ID_Start # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U 12400..1246E ; ID_Start # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM 12480..12543 ; ID_Start # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU 12F90..12FF0 ; ID_Start # Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 -13000..1342E ; ID_Start # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 +13000..1342F ; ID_Start # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D +13441..13446 ; ID_Start # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN 14400..14646 ; ID_Start # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 16800..16A38 ; ID_Start # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ 16A40..16A5E ; ID_Start # Lo [31] MRO LETTER TA..MRO LETTER TEK @@ -6707,7 +6757,9 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1AFF5..1AFFB ; ID_Start # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; ID_Start # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 1B000..1B122 ; ID_Start # Lo [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU +1B132 ; ID_Start # Lo HIRAGANA LETTER SMALL KO 1B150..1B152 ; ID_Start # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO +1B155 ; ID_Start # Lo KATAKANA LETTER SMALL KO 1B164..1B167 ; ID_Start # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N 1B170..1B2FB ; ID_Start # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 1BC00..1BC6A ; ID_Start # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M @@ -6747,11 +6799,15 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1DF00..1DF09 ; ID_Start # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; ID_Start # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; ID_Start # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL +1DF25..1DF2A ; ID_Start # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1E030..1E06D ; ID_Start # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; ID_Start # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; ID_Start # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER 1E14E ; ID_Start # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ 1E290..1E2AD ; ID_Start # Lo [30] TOTO LETTER PA..TOTO LETTER A 1E2C0..1E2EB ; ID_Start # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH +1E4D0..1E4EA ; ID_Start # Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL +1E4EB ; ID_Start # Lm NAG MUNDARI SIGN OJOD 1E7E0..1E7E6 ; ID_Start # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO 1E7E8..1E7EB ; ID_Start # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE 1E7ED..1E7EE ; ID_Start # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE @@ -6793,14 +6849,15 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1EEA5..1EEA9 ; ID_Start # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH 1EEAB..1EEBB ; ID_Start # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 20000..2A6DF ; ID_Start # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF -2A700..2B738 ; ID_Start # Lo [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738 +2A700..2B739 ; ID_Start # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 2B740..2B81D ; ID_Start # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2B820..2CEA1 ; ID_Start # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 2CEB0..2EBE0 ; ID_Start # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 2F800..2FA1D ; ID_Start # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A +31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 131997 +# Total code points: 136345 # ================================================ @@ -7083,6 +7140,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 0CE2..0CE3 ; ID_Continue # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE6..0CEF ; ID_Continue # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE 0CF1..0CF2 ; ID_Continue # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA +0CF3 ; ID_Continue # Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT 0D00..0D01 ; ID_Continue # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU 0D02..0D03 ; ID_Continue # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA 0D04..0D0C ; ID_Continue # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L @@ -7136,7 +7194,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 0EBD ; ID_Continue # Lo LAO SEMIVOWEL SIGN NYO 0EC0..0EC4 ; ID_Continue # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI 0EC6 ; ID_Continue # Lm LAO KO LA -0EC8..0ECD ; ID_Continue # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA +0EC8..0ECE ; ID_Continue # Mn [7] LAO TONE MAI EK..LAO YAMAKKAN 0ED0..0ED9 ; ID_Continue # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE 0EDC..0EDF ; ID_Continue # Lo [4] LAO HO NO..LAO LETTER KHMU NYO 0F00 ; ID_Continue # Lo TIBETAN SYLLABLE OM @@ -7719,6 +7777,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 10E80..10EA9 ; ID_Continue # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EAB..10EAC ; ID_Continue # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK 10EB0..10EB1 ; ID_Continue # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10EFD..10EFF ; ID_Continue # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; ID_Continue # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; ID_Continue # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; ID_Continue # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -7781,6 +7840,8 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 11235 ; ID_Continue # Mc KHOJKI SIGN VIRAMA 11236..11237 ; ID_Continue # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA 1123E ; ID_Continue # Mn KHOJKI SIGN SUKUN +1123F..11240 ; ID_Continue # Lo [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I +11241 ; ID_Continue # Mn KHOJKI VOWEL SIGN VOCALIC R 11280..11286 ; ID_Continue # Lo [7] MULTANI LETTER A..MULTANI LETTER GA 11288 ; ID_Continue # Lo MULTANI LETTER GHA 1128A..1128D ; ID_Continue # Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA @@ -7963,12 +8024,27 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 11EE0..11EF2 ; ID_Continue # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA 11EF3..11EF4 ; ID_Continue # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U 11EF5..11EF6 ; ID_Continue # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O +11F00..11F01 ; ID_Continue # Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA +11F02 ; ID_Continue # Lo KAWI SIGN REPHA +11F03 ; ID_Continue # Mc KAWI SIGN VISARGA +11F04..11F10 ; ID_Continue # Lo [13] KAWI LETTER A..KAWI LETTER O +11F12..11F33 ; ID_Continue # Lo [34] KAWI LETTER KA..KAWI LETTER JNYA +11F34..11F35 ; ID_Continue # Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA +11F36..11F3A ; ID_Continue # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R +11F3E..11F3F ; ID_Continue # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI +11F40 ; ID_Continue # Mn KAWI VOWEL SIGN EU +11F41 ; ID_Continue # Mc KAWI SIGN KILLER +11F42 ; ID_Continue # Mn KAWI CONJOINER +11F50..11F59 ; ID_Continue # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE 11FB0 ; ID_Continue # Lo LISU LETTER YHA 12000..12399 ; ID_Continue # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U 12400..1246E ; ID_Continue # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM 12480..12543 ; ID_Continue # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU 12F90..12FF0 ; ID_Continue # Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 -13000..1342E ; ID_Continue # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 +13000..1342F ; ID_Continue # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D +13440 ; ID_Continue # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY +13441..13446 ; ID_Continue # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN +13447..13455 ; ID_Continue # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED 14400..14646 ; ID_Continue # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 16800..16A38 ; ID_Continue # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ 16A40..16A5E ; ID_Continue # Lo [31] MRO LETTER TA..MRO LETTER TEK @@ -8001,7 +8077,9 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 1AFF5..1AFFB ; ID_Continue # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; ID_Continue # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 1B000..1B122 ; ID_Continue # Lo [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU +1B132 ; ID_Continue # Lo HIRAGANA LETTER SMALL KO 1B150..1B152 ; ID_Continue # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO +1B155 ; ID_Continue # Lo KATAKANA LETTER SMALL KO 1B164..1B167 ; ID_Continue # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N 1B170..1B2FB ; ID_Continue # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 1BC00..1BC6A ; ID_Continue # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M @@ -8058,11 +8136,14 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 1DF00..1DF09 ; ID_Continue # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; ID_Continue # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; ID_Continue # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL +1DF25..1DF2A ; ID_Continue # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK 1E000..1E006 ; ID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; ID_Continue # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; ID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI 1E023..1E024 ; ID_Continue # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS 1E026..1E02A ; ID_Continue # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E030..1E06D ; ID_Continue # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE +1E08F ; ID_Continue # Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I 1E100..1E12C ; ID_Continue # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E130..1E136 ; ID_Continue # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D 1E137..1E13D ; ID_Continue # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -8073,6 +8154,10 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 1E2C0..1E2EB ; ID_Continue # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH 1E2EC..1E2EF ; ID_Continue # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI 1E2F0..1E2F9 ; ID_Continue # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE +1E4D0..1E4EA ; ID_Continue # Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL +1E4EB ; ID_Continue # Lm NAG MUNDARI SIGN OJOD +1E4EC..1E4EF ; ID_Continue # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH +1E4F0..1E4F9 ; ID_Continue # Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE 1E7E0..1E7E6 ; ID_Continue # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO 1E7E8..1E7EB ; ID_Continue # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE 1E7ED..1E7EE ; ID_Continue # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE @@ -8118,15 +8203,16 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 1EEAB..1EEBB ; ID_Continue # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 1FBF0..1FBF9 ; ID_Continue # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE 20000..2A6DF ; ID_Continue # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF -2A700..2B738 ; ID_Continue # Lo [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738 +2A700..2B739 ; ID_Continue # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 2B740..2B81D ; ID_Continue # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2B820..2CEA1 ; ID_Continue # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 2CEB0..2EBE0 ; ID_Continue # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 2F800..2FA1D ; ID_Continue # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; ID_Continue # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A +31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 135072 +# Total code points: 139482 # ================================================ @@ -8685,6 +8771,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 111DC ; XID_Start # Lo SHARADA HEADSTROKE 11200..11211 ; XID_Start # Lo [18] KHOJKI LETTER A..KHOJKI LETTER JJA 11213..1122B ; XID_Start # Lo [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA +1123F..11240 ; XID_Start # Lo [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I 11280..11286 ; XID_Start # Lo [7] MULTANI LETTER A..MULTANI LETTER GA 11288 ; XID_Start # Lo MULTANI LETTER GHA 1128A..1128D ; XID_Start # Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA @@ -8747,12 +8834,16 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 11D6A..11D89 ; XID_Start # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA 11D98 ; XID_Start # Lo GUNJALA GONDI OM 11EE0..11EF2 ; XID_Start # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA +11F02 ; XID_Start # Lo KAWI SIGN REPHA +11F04..11F10 ; XID_Start # Lo [13] KAWI LETTER A..KAWI LETTER O +11F12..11F33 ; XID_Start # Lo [34] KAWI LETTER KA..KAWI LETTER JNYA 11FB0 ; XID_Start # Lo LISU LETTER YHA 12000..12399 ; XID_Start # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U 12400..1246E ; XID_Start # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM 12480..12543 ; XID_Start # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU 12F90..12FF0 ; XID_Start # Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 -13000..1342E ; XID_Start # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 +13000..1342F ; XID_Start # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D +13441..13446 ; XID_Start # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN 14400..14646 ; XID_Start # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 16800..16A38 ; XID_Start # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ 16A40..16A5E ; XID_Start # Lo [31] MRO LETTER TA..MRO LETTER TEK @@ -8775,7 +8866,9 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 1AFF5..1AFFB ; XID_Start # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; XID_Start # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 1B000..1B122 ; XID_Start # Lo [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU +1B132 ; XID_Start # Lo HIRAGANA LETTER SMALL KO 1B150..1B152 ; XID_Start # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO +1B155 ; XID_Start # Lo KATAKANA LETTER SMALL KO 1B164..1B167 ; XID_Start # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N 1B170..1B2FB ; XID_Start # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 1BC00..1BC6A ; XID_Start # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M @@ -8815,11 +8908,15 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 1DF00..1DF09 ; XID_Start # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; XID_Start # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; XID_Start # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL +1DF25..1DF2A ; XID_Start # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1E030..1E06D ; XID_Start # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; XID_Start # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; XID_Start # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER 1E14E ; XID_Start # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ 1E290..1E2AD ; XID_Start # Lo [30] TOTO LETTER PA..TOTO LETTER A 1E2C0..1E2EB ; XID_Start # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH +1E4D0..1E4EA ; XID_Start # Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL +1E4EB ; XID_Start # Lm NAG MUNDARI SIGN OJOD 1E7E0..1E7E6 ; XID_Start # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO 1E7E8..1E7EB ; XID_Start # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE 1E7ED..1E7EE ; XID_Start # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE @@ -8861,14 +8958,15 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 1EEA5..1EEA9 ; XID_Start # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH 1EEAB..1EEBB ; XID_Start # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 20000..2A6DF ; XID_Start # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF -2A700..2B738 ; XID_Start # Lo [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738 +2A700..2B739 ; XID_Start # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 2B740..2B81D ; XID_Start # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2B820..2CEA1 ; XID_Start # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 2CEB0..2EBE0 ; XID_Start # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 2F800..2FA1D ; XID_Start # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A +31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 131974 +# Total code points: 136322 # ================================================ @@ -9147,6 +9245,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 0CE2..0CE3 ; XID_Continue # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE6..0CEF ; XID_Continue # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE 0CF1..0CF2 ; XID_Continue # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA +0CF3 ; XID_Continue # Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT 0D00..0D01 ; XID_Continue # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU 0D02..0D03 ; XID_Continue # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA 0D04..0D0C ; XID_Continue # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L @@ -9200,7 +9299,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 0EBD ; XID_Continue # Lo LAO SEMIVOWEL SIGN NYO 0EC0..0EC4 ; XID_Continue # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI 0EC6 ; XID_Continue # Lm LAO KO LA -0EC8..0ECD ; XID_Continue # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA +0EC8..0ECE ; XID_Continue # Mn [7] LAO TONE MAI EK..LAO YAMAKKAN 0ED0..0ED9 ; XID_Continue # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE 0EDC..0EDF ; XID_Continue # Lo [4] LAO HO NO..LAO LETTER KHMU NYO 0F00 ; XID_Continue # Lo TIBETAN SYLLABLE OM @@ -9788,6 +9887,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 10E80..10EA9 ; XID_Continue # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EAB..10EAC ; XID_Continue # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK 10EB0..10EB1 ; XID_Continue # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10EFD..10EFF ; XID_Continue # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; XID_Continue # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; XID_Continue # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; XID_Continue # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -9850,6 +9950,8 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 11235 ; XID_Continue # Mc KHOJKI SIGN VIRAMA 11236..11237 ; XID_Continue # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA 1123E ; XID_Continue # Mn KHOJKI SIGN SUKUN +1123F..11240 ; XID_Continue # Lo [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I +11241 ; XID_Continue # Mn KHOJKI VOWEL SIGN VOCALIC R 11280..11286 ; XID_Continue # Lo [7] MULTANI LETTER A..MULTANI LETTER GA 11288 ; XID_Continue # Lo MULTANI LETTER GHA 1128A..1128D ; XID_Continue # Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA @@ -10032,12 +10134,27 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 11EE0..11EF2 ; XID_Continue # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA 11EF3..11EF4 ; XID_Continue # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U 11EF5..11EF6 ; XID_Continue # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O +11F00..11F01 ; XID_Continue # Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA +11F02 ; XID_Continue # Lo KAWI SIGN REPHA +11F03 ; XID_Continue # Mc KAWI SIGN VISARGA +11F04..11F10 ; XID_Continue # Lo [13] KAWI LETTER A..KAWI LETTER O +11F12..11F33 ; XID_Continue # Lo [34] KAWI LETTER KA..KAWI LETTER JNYA +11F34..11F35 ; XID_Continue # Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA +11F36..11F3A ; XID_Continue # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R +11F3E..11F3F ; XID_Continue # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI +11F40 ; XID_Continue # Mn KAWI VOWEL SIGN EU +11F41 ; XID_Continue # Mc KAWI SIGN KILLER +11F42 ; XID_Continue # Mn KAWI CONJOINER +11F50..11F59 ; XID_Continue # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE 11FB0 ; XID_Continue # Lo LISU LETTER YHA 12000..12399 ; XID_Continue # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U 12400..1246E ; XID_Continue # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM 12480..12543 ; XID_Continue # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU 12F90..12FF0 ; XID_Continue # Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 -13000..1342E ; XID_Continue # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 +13000..1342F ; XID_Continue # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D +13440 ; XID_Continue # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY +13441..13446 ; XID_Continue # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN +13447..13455 ; XID_Continue # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED 14400..14646 ; XID_Continue # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 16800..16A38 ; XID_Continue # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ 16A40..16A5E ; XID_Continue # Lo [31] MRO LETTER TA..MRO LETTER TEK @@ -10070,7 +10187,9 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 1AFF5..1AFFB ; XID_Continue # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; XID_Continue # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 1B000..1B122 ; XID_Continue # Lo [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU +1B132 ; XID_Continue # Lo HIRAGANA LETTER SMALL KO 1B150..1B152 ; XID_Continue # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO +1B155 ; XID_Continue # Lo KATAKANA LETTER SMALL KO 1B164..1B167 ; XID_Continue # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N 1B170..1B2FB ; XID_Continue # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 1BC00..1BC6A ; XID_Continue # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M @@ -10127,11 +10246,14 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 1DF00..1DF09 ; XID_Continue # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; XID_Continue # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; XID_Continue # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL +1DF25..1DF2A ; XID_Continue # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK 1E000..1E006 ; XID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; XID_Continue # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; XID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI 1E023..1E024 ; XID_Continue # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS 1E026..1E02A ; XID_Continue # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E030..1E06D ; XID_Continue # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE +1E08F ; XID_Continue # Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I 1E100..1E12C ; XID_Continue # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E130..1E136 ; XID_Continue # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D 1E137..1E13D ; XID_Continue # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -10142,6 +10264,10 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 1E2C0..1E2EB ; XID_Continue # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH 1E2EC..1E2EF ; XID_Continue # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI 1E2F0..1E2F9 ; XID_Continue # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE +1E4D0..1E4EA ; XID_Continue # Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL +1E4EB ; XID_Continue # Lm NAG MUNDARI SIGN OJOD +1E4EC..1E4EF ; XID_Continue # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH +1E4F0..1E4F9 ; XID_Continue # Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE 1E7E0..1E7E6 ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO 1E7E8..1E7EB ; XID_Continue # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE 1E7ED..1E7EE ; XID_Continue # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE @@ -10187,15 +10313,16 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 1EEAB..1EEBB ; XID_Continue # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 1FBF0..1FBF9 ; XID_Continue # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE 20000..2A6DF ; XID_Continue # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF -2A700..2B738 ; XID_Continue # Lo [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738 +2A700..2B739 ; XID_Continue # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 2B740..2B81D ; XID_Continue # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2B820..2CEA1 ; XID_Continue # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 2CEB0..2EBE0 ; XID_Continue # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 2F800..2FA1D ; XID_Continue # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; XID_Continue # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A +31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 135053 +# Total code points: 139463 # ================================================ @@ -10206,7 +10333,7 @@ E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTO # + Variation_Selector # - White_Space # - FFF9..FFFB (Interlinear annotation format characters) -# - 13430..13438 (Egyptian hieroglyph format characters) +# - 13430..13440 (Egyptian hieroglyph format characters) # - Prepended_Concatenation_Mark (Exceptional format characters that should be visible) 00AD ; Default_Ignorable_Code_Point # Cf SOFT HYPHEN @@ -10351,7 +10478,7 @@ E01F0..E0FFF ; Default_Ignorable_Code_Point # Cn [3600] .. 0CE4..0CE5 ; Cn # [2] .. 0CF0 ; Cn # -0CF3..0CFF ; Cn # [13] .. +0CF4..0CFF ; Cn # [12] .. 0D0D ; Cn # 0D11 ; Cn # 0D45 ; Cn # @@ -166,7 +166,7 @@ 0EBE..0EBF ; Cn # [2] .. 0EC5 ; Cn # 0EC7 ; Cn # -0ECE..0ECF ; Cn # [2] .. +0ECF ; Cn # 0EDA..0EDB ; Cn # [2] .. 0EE0..0EFF ; Cn # [32] .. 0F48 ; Cn # @@ -436,7 +436,7 @@ FFFE..FFFF ; Cn # [2] .. 10E7F ; Cn # 10EAA ; Cn # 10EAE..10EAF ; Cn # [2] .. -10EB2..10EFF ; Cn # [78] .. +10EB2..10EFC ; Cn # [75] .. 10F28..10F2F ; Cn # [8] .. 10F5A..10F6F ; Cn # [22] .. 10F8A..10FAF ; Cn # [38] .. @@ -454,7 +454,7 @@ FFFE..FFFF ; Cn # [2] .. 111E0 ; Cn # 111F5..111FF ; Cn # [11] .. 11212 ; Cn # -1123F..1127F ; Cn # [65] .. +11242..1127F ; Cn # [62] .. 11287 ; Cn # 11289 ; Cn # 1128E ; Cn # @@ -506,7 +506,8 @@ FFFE..FFFF ; Cn # [2] .. 119E5..119FF ; Cn # [27] .. 11A48..11A4F ; Cn # [8] .. 11AA3..11AAF ; Cn # [13] .. -11AF9..11BFF ; Cn # [263] .. +11AF9..11AFF ; Cn # [7] .. +11B0A..11BFF ; Cn # [246] .. 11C09 ; Cn # 11C37 ; Cn # 11C46..11C4F ; Cn # [10] .. @@ -527,7 +528,10 @@ FFFE..FFFF ; Cn # [2] .. 11D92 ; Cn # 11D99..11D9F ; Cn # [7] .. 11DAA..11EDF ; Cn # [310] .. -11EF9..11FAF ; Cn # [183] .. +11EF9..11EFF ; Cn # [7] .. +11F11 ; Cn # +11F3B..11F3D ; Cn # [3] .. +11F5A..11FAF ; Cn # [86] .. 11FB1..11FBF ; Cn # [15] .. 11FF2..11FFE ; Cn # [13] .. 1239A..123FF ; Cn # [102] .. @@ -535,8 +539,7 @@ FFFE..FFFF ; Cn # [2] .. 12475..1247F ; Cn # [11] .. 12544..12F8F ; Cn # [2636] .. 12FF3..12FFF ; Cn # [13] .. -1342F ; Cn # -13439..143FF ; Cn # [4039] .. +13456..143FF ; Cn # [4010] .. 14647..167FF ; Cn # [8633] .. 16A39..16A3F ; Cn # [7] .. 16A5F ; Cn # @@ -562,8 +565,10 @@ FFFE..FFFF ; Cn # [2] .. 1AFF4 ; Cn # 1AFFC ; Cn # 1AFFF ; Cn # -1B123..1B14F ; Cn # [45] .. -1B153..1B163 ; Cn # [17] .. +1B123..1B131 ; Cn # [15] .. +1B133..1B14F ; Cn # [29] .. +1B153..1B154 ; Cn # [2] .. +1B156..1B163 ; Cn # [14] .. 1B168..1B16F ; Cn # [8] .. 1B2FC..1BBFF ; Cn # [2308] .. 1BC6B..1BC6F ; Cn # [5] .. @@ -577,7 +582,8 @@ FFFE..FFFF ; Cn # [2] .. 1D0F6..1D0FF ; Cn # [10] .. 1D127..1D128 ; Cn # [2] .. 1D1EB..1D1FF ; Cn # [21] .. -1D246..1D2DF ; Cn # [154] .. +1D246..1D2BF ; Cn # [122] .. +1D2D4..1D2DF ; Cn # [12] .. 1D2F4..1D2FF ; Cn # [12] .. 1D357..1D35F ; Cn # [9] .. 1D379..1D3FF ; Cn # [135] .. @@ -604,19 +610,23 @@ FFFE..FFFF ; Cn # [2] .. 1DA8C..1DA9A ; Cn # [15] .. 1DAA0 ; Cn # 1DAB0..1DEFF ; Cn # [1104] .. -1DF1F..1DFFF ; Cn # [225] .. +1DF1F..1DF24 ; Cn # [6] .. +1DF2B..1DFFF ; Cn # [213] .. 1E007 ; Cn # 1E019..1E01A ; Cn # [2] .. 1E022 ; Cn # 1E025 ; Cn # -1E02B..1E0FF ; Cn # [213] .. +1E02B..1E02F ; Cn # [5] .. +1E06E..1E08E ; Cn # [33] .. +1E090..1E0FF ; Cn # [112] .. 1E12D..1E12F ; Cn # [3] .. 1E13E..1E13F ; Cn # [2] .. 1E14A..1E14D ; Cn # [4] .. 1E150..1E28F ; Cn # [320] .. 1E2AF..1E2BF ; Cn # [17] .. 1E2FA..1E2FE ; Cn # [5] .. -1E300..1E7DF ; Cn # [1248] .. +1E300..1E4CF ; Cn # [464] .. +1E4FA..1E7DF ; Cn # [742] .. 1E7E7 ; Cn # 1E7EC ; Cn # 1E7EF ; Cn # @@ -674,11 +684,11 @@ FFFE..FFFF ; Cn # [2] .. 1F249..1F24F ; Cn # [7] .. 1F252..1F25F ; Cn # [14] .. 1F266..1F2FF ; Cn # [154] .. -1F6D8..1F6DC ; Cn # [5] .. +1F6D8..1F6DB ; Cn # [4] .. 1F6ED..1F6EF ; Cn # [3] .. 1F6FD..1F6FF ; Cn # [3] .. -1F774..1F77F ; Cn # [12] .. -1F7D9..1F7DF ; Cn # [7] .. +1F777..1F77A ; Cn # [4] .. +1F7DA..1F7DF ; Cn # [6] .. 1F7EC..1F7EF ; Cn # [4] .. 1F7F1..1F7FF ; Cn # [15] .. 1F80C..1F80F ; Cn # [4] .. @@ -689,32 +699,31 @@ FFFE..FFFF ; Cn # [2] .. 1F8B2..1F8FF ; Cn # [78] .. 1FA54..1FA5F ; Cn # [12] .. 1FA6E..1FA6F ; Cn # [2] .. -1FA75..1FA77 ; Cn # [3] .. 1FA7D..1FA7F ; Cn # [3] .. -1FA87..1FA8F ; Cn # [9] .. -1FAAD..1FAAF ; Cn # [3] .. -1FABB..1FABF ; Cn # [5] .. -1FAC6..1FACF ; Cn # [10] .. -1FADA..1FADF ; Cn # [6] .. -1FAE8..1FAEF ; Cn # [8] .. -1FAF7..1FAFF ; Cn # [9] .. +1FA89..1FA8F ; Cn # [7] .. +1FABE ; Cn # +1FAC6..1FACD ; Cn # [8] .. +1FADC..1FADF ; Cn # [4] .. +1FAE9..1FAEF ; Cn # [7] .. +1FAF9..1FAFF ; Cn # [7] .. 1FB93 ; Cn # 1FBCB..1FBEF ; Cn # [37] .. 1FBFA..1FFFF ; Cn # [1030] .. 2A6E0..2A6FF ; Cn # [32] .. -2B739..2B73F ; Cn # [7] .. +2B73A..2B73F ; Cn # [6] .. 2B81E..2B81F ; Cn # [2] .. 2CEA2..2CEAF ; Cn # [14] .. 2EBE1..2F7FF ; Cn # [3103] .. 2FA1E..2FFFF ; Cn # [1506] .. -3134B..E0000 ; Cn # [715958] .. +3134B..3134F ; Cn # [5] .. +323B0..E0000 ; Cn # [711761] .. E0002..E001F ; Cn # [30] .. E0080..E00FF ; Cn # [128] .. E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 829834 +# Total code points: 825345 # ================================================ @@ -2029,9 +2038,10 @@ FF41..FF5A ; Ll # [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL 1D7CB ; Ll # MATHEMATICAL BOLD SMALL DIGAMMA 1DF00..1DF09 ; Ll # [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0B..1DF1E ; Ll # [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL +1DF25..1DF2A ; Ll # [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK 1E922..1E943 ; Ll # [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2227 +# Total code points: 2233 # ================================================ @@ -2121,10 +2131,12 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 1AFF0..1AFF3 ; Lm # [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; Lm # [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; Lm # [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 +1E030..1E06D ; Lm # [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E137..1E13D ; Lm # [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER +1E4EB ; Lm # NAG MUNDARI SIGN OJOD 1E94B ; Lm # ADLAM NASALIZATION MARK -# Total code points: 334 +# Total code points: 397 # ================================================ @@ -2494,6 +2506,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 111DC ; Lo # SHARADA HEADSTROKE 11200..11211 ; Lo # [18] KHOJKI LETTER A..KHOJKI LETTER JJA 11213..1122B ; Lo # [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA +1123F..11240 ; Lo # [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I 11280..11286 ; Lo # [7] MULTANI LETTER A..MULTANI LETTER GA 11288 ; Lo # MULTANI LETTER GHA 1128A..1128D ; Lo # [4] MULTANI LETTER CA..MULTANI LETTER JJA @@ -2555,11 +2568,15 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 11D6A..11D89 ; Lo # [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA 11D98 ; Lo # GUNJALA GONDI OM 11EE0..11EF2 ; Lo # [19] MAKASAR LETTER KA..MAKASAR ANGKA +11F02 ; Lo # KAWI SIGN REPHA +11F04..11F10 ; Lo # [13] KAWI LETTER A..KAWI LETTER O +11F12..11F33 ; Lo # [34] KAWI LETTER KA..KAWI LETTER JNYA 11FB0 ; Lo # LISU LETTER YHA 12000..12399 ; Lo # [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U 12480..12543 ; Lo # [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU 12F90..12FF0 ; Lo # [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 -13000..1342E ; Lo # [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 +13000..1342F ; Lo # [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D +13441..13446 ; Lo # [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN 14400..14646 ; Lo # [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 16800..16A38 ; Lo # [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ 16A40..16A5E ; Lo # [31] MRO LETTER TA..MRO LETTER TEK @@ -2574,7 +2591,9 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 18800..18CD5 ; Lo # [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 18D00..18D08 ; Lo # [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 1B000..1B122 ; Lo # [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU +1B132 ; Lo # HIRAGANA LETTER SMALL KO 1B150..1B152 ; Lo # [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO +1B155 ; Lo # KATAKANA LETTER SMALL KO 1B164..1B167 ; Lo # [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N 1B170..1B2FB ; Lo # [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 1BC00..1BC6A ; Lo # [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M @@ -2586,6 +2605,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 1E14E ; Lo # NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ 1E290..1E2AD ; Lo # [30] TOTO LETTER PA..TOTO LETTER A 1E2C0..1E2EB ; Lo # [44] WANCHO LETTER AA..WANCHO LETTER YIH +1E4D0..1E4EA ; Lo # [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL 1E7E0..1E7E6 ; Lo # [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO 1E7E8..1E7EB ; Lo # [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE 1E7ED..1E7EE ; Lo # [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE @@ -2625,14 +2645,15 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 1EEA5..1EEA9 ; Lo # [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH 1EEAB..1EEBB ; Lo # [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 20000..2A6DF ; Lo # [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF -2A700..2B738 ; Lo # [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738 +2A700..2B739 ; Lo # [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 2B740..2B81D ; Lo # [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2B820..2CEA1 ; Lo # [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 2CEB0..2EBE0 ; Lo # [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 2F800..2FA1D ; Lo # [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A +31350..323AF ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 127333 +# Total code points: 131612 # ================================================ @@ -2730,7 +2751,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 0E47..0E4E ; Mn # [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN 0EB1 ; Mn # LAO VOWEL SIGN MAI KAN 0EB4..0EBC ; Mn # [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO -0EC8..0ECD ; Mn # [6] LAO TONE MAI EK..LAO NIGGAHITA +0EC8..0ECE ; Mn # [7] LAO TONE MAI EK..LAO YAMAKKAN 0F18..0F19 ; Mn # [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS 0F35 ; Mn # TIBETAN MARK NGAS BZUNG NYI ZLA 0F37 ; Mn # TIBETAN MARK NGAS BZUNG SGOR RTAGS @@ -2861,6 +2882,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 10AE5..10AE6 ; Mn # [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW 10D24..10D27 ; Mn # [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10EAB..10EAC ; Mn # [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EFD..10EFF ; Mn # [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Mn # [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Mn # [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; Mn # BRAHMI SIGN ANUSVARA @@ -2883,6 +2905,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 11234 ; Mn # KHOJKI SIGN ANUSVARA 11236..11237 ; Mn # [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA 1123E ; Mn # KHOJKI SIGN SUKUN +11241 ; Mn # KHOJKI VOWEL SIGN VOCALIC R 112DF ; Mn # KHUDAWADI SIGN ANUSVARA 112E3..112EA ; Mn # [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA 11300..11301 ; Mn # [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU @@ -2944,6 +2967,12 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 11D95 ; Mn # GUNJALA GONDI SIGN ANUSVARA 11D97 ; Mn # GUNJALA GONDI VIRAMA 11EF3..11EF4 ; Mn # [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U +11F00..11F01 ; Mn # [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA +11F36..11F3A ; Mn # [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R +11F40 ; Mn # KAWI VOWEL SIGN EU +11F42 ; Mn # KAWI CONJOINER +13440 ; Mn # EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY +13447..13455 ; Mn # [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED 16AF0..16AF4 ; Mn # [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; Mn # [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM 16F4F ; Mn # MIAO SIGN CONSONANT MODIFIER BAR @@ -2968,14 +2997,16 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 1E01B..1E021 ; Mn # [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI 1E023..1E024 ; Mn # [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS 1E026..1E02A ; Mn # [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E08F ; Mn # COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I 1E130..1E136 ; Mn # [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D 1E2AE ; Mn # TOTO SIGN RISING TONE 1E2EC..1E2EF ; Mn # [4] WANCHO TONE TUP..WANCHO TONE KOINI +1E4EC..1E4EF ; Mn # [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH 1E8D0..1E8D6 ; Mn # [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS 1E944..1E94A ; Mn # [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1950 +# Total code points: 1985 # ================================================ @@ -3028,6 +3059,7 @@ A670..A672 ; Me # [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRIL 0CC7..0CC8 ; Mc # [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI 0CCA..0CCB ; Mc # [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CD5..0CD6 ; Mc # [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0CF3 ; Mc # KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT 0D02..0D03 ; Mc # [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA 0D3E..0D40 ; Mc # [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II 0D46..0D48 ; Mc # [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI @@ -3166,12 +3198,16 @@ ABEC ; Mc # MEETEI MAYEK LUM IYEK 11D93..11D94 ; Mc # [2] GUNJALA GONDI VOWEL SIGN OO..GUNJALA GONDI VOWEL SIGN AU 11D96 ; Mc # GUNJALA GONDI SIGN VISARGA 11EF5..11EF6 ; Mc # [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O +11F03 ; Mc # KAWI SIGN VISARGA +11F34..11F35 ; Mc # [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA +11F3E..11F3F ; Mc # [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI +11F41 ; Mc # KAWI SIGN KILLER 16F51..16F87 ; Mc # [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI 16FF0..16FF1 ; Mc # [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 1D165..1D166 ; Mc # [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM 1D16D..1D172 ; Mc # [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 -# Total code points: 445 +# Total code points: 452 # ================================================ @@ -3231,16 +3267,18 @@ FF10..FF19 ; Nd # [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 11C50..11C59 ; Nd # [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE 11D50..11D59 ; Nd # [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE 11DA0..11DA9 ; Nd # [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11F50..11F59 ; Nd # [10] KAWI DIGIT ZERO..KAWI DIGIT NINE 16A60..16A69 ; Nd # [10] MRO DIGIT ZERO..MRO DIGIT NINE 16AC0..16AC9 ; Nd # [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE 16B50..16B59 ; Nd # [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE 1D7CE..1D7FF ; Nd # [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE 1E140..1E149 ; Nd # [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE 1E2F0..1E2F9 ; Nd # [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE +1E4F0..1E4F9 ; Nd # [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE 1E950..1E959 ; Nd # [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE 1FBF0..1FBF9 ; Nd # [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE -# Total code points: 660 +# Total code points: 680 # ================================================ @@ -3327,6 +3365,7 @@ A830..A835 ; No # [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTIO 11FC0..11FD4 ; No # [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH 16B5B..16B61 ; No # [7] PAHAWH HMONG NUMBER TENS..PAHAWH HMONG NUMBER TRILLIONS 16E80..16E96 ; No # [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM +1D2C0..1D2D3 ; No # [20] KAKTOVIK NUMERAL ZERO..KAKTOVIK NUMERAL NINETEEN 1D2E0..1D2F3 ; No # [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN 1D360..1D378 ; No # [25] COUNTING ROD UNIT DIGIT ONE..TALLY MARK FIVE 1E8C7..1E8CF ; No # [9] MENDE KIKAKUI DIGIT ONE..MENDE KIKAKUI DIGIT NINE @@ -3337,7 +3376,7 @@ A830..A835 ; No # [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTIO 1ED2F..1ED3D ; No # [15] OTTOMAN SIYAQ ALTERNATE NUMBER TWO..OTTOMAN SIYAQ FRACTION ONE SIXTH 1F100..1F10C ; No # [13] DIGIT ZERO FULL STOP..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO -# Total code points: 895 +# Total code points: 915 # ================================================ @@ -3398,13 +3437,13 @@ FEFF ; Cf # ZERO WIDTH NO-BREAK SPACE FFF9..FFFB ; Cf # [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR 110BD ; Cf # KAITHI NUMBER SIGN 110CD ; Cf # KAITHI NUMBER SIGN ABOVE -13430..13438 ; Cf # [9] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END SEGMENT +13430..1343F ; Cf # [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE 1BCA0..1BCA3 ; Cf # [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP 1D173..1D17A ; Cf # [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE E0001 ; Cf # LANGUAGE TAG E0020..E007F ; Cf # [96] TAG SPACE..CANCEL TAG -# Total code points: 163 +# Total code points: 170 # ================================================ @@ -3806,9 +3845,11 @@ FF64..FF65 ; Po # [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDL 11A3F..11A46 ; Po # [8] ZANABAZAR SQUARE INITIAL HEAD MARK..ZANABAZAR SQUARE CLOSING DOUBLE-LINED HEAD MARK 11A9A..11A9C ; Po # [3] SOYOMBO MARK TSHEG..SOYOMBO MARK DOUBLE SHAD 11A9E..11AA2 ; Po # [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2 +11B00..11B09 ; Po # [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU 11C41..11C45 ; Po # [5] BHAIKSUKI DANDA..BHAIKSUKI GAP FILLER-2 11C70..11C71 ; Po # [2] MARCHEN HEAD MARK..MARCHEN MARK SHAD 11EF7..11EF8 ; Po # [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION +11F43..11F4F ; Po # [13] KAWI DANDA..KAWI PUNCTUATION CLOSING SPIRAL 11FFF ; Po # TAMIL PUNCTUATION END OF TEXT 12470..12474 ; Po # [5] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON 12FF1..12FF2 ; Po # [2] CYPRO-MINOAN SIGN CM301..CYPRO-MINOAN SIGN CM302 @@ -3822,7 +3863,7 @@ FF64..FF65 ; Po # [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDL 1DA87..1DA8B ; Po # [5] SIGNWRITING COMMA..SIGNWRITING PARENTHESIS 1E95E..1E95F ; Po # [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK -# Total code points: 605 +# Total code points: 628 # ================================================ @@ -4126,10 +4167,10 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1F260..1F265 ; So # [6] ROUNDED SYMBOL FOR FU..ROUNDED SYMBOL FOR CAI 1F300..1F3FA ; So # [251] CYCLONE..AMPHORA 1F400..1F6D7 ; So # [728] RAT..ELEVATOR -1F6DD..1F6EC ; So # [16] PLAYGROUND SLIDE..AIRPLANE ARRIVING +1F6DC..1F6EC ; So # [17] WIRELESS..AIRPLANE ARRIVING 1F6F0..1F6FC ; So # [13] SATELLITE..ROLLER SKATE -1F700..1F773 ; So # [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE -1F780..1F7D8 ; So # [89] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..NEGATIVE CIRCLED SQUARE +1F700..1F776 ; So # [119] ALCHEMICAL SYMBOL FOR QUINTESSENCE..LUNAR ECLIPSE +1F77B..1F7D9 ; So # [95] HAUMEA..NINE POINTED WHITE STAR 1F7E0..1F7EB ; So # [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE 1F7F0 ; So # HEAVY EQUALS SIGN 1F800..1F80B ; So # [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD @@ -4140,19 +4181,17 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1F8B0..1F8B1 ; So # [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST 1F900..1FA53 ; So # [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP 1FA60..1FA6D ; So # [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER -1FA70..1FA74 ; So # [5] BALLET SHOES..THONG SANDAL -1FA78..1FA7C ; So # [5] DROP OF BLOOD..CRUTCH -1FA80..1FA86 ; So # [7] YO-YO..NESTING DOLLS -1FA90..1FAAC ; So # [29] RINGED PLANET..HAMSA -1FAB0..1FABA ; So # [11] FLY..NEST WITH EGGS -1FAC0..1FAC5 ; So # [6] ANATOMICAL HEART..PERSON WITH CROWN -1FAD0..1FAD9 ; So # [10] BLUEBERRIES..JAR -1FAE0..1FAE7 ; So # [8] MELTING FACE..BUBBLES -1FAF0..1FAF6 ; So # [7] HAND WITH INDEX FINGER AND THUMB CROSSED..HEART HANDS +1FA70..1FA7C ; So # [13] BALLET SHOES..CRUTCH +1FA80..1FA88 ; So # [9] YO-YO..FLUTE +1FA90..1FABD ; So # [46] RINGED PLANET..WING +1FABF..1FAC5 ; So # [7] GOOSE..PERSON WITH CROWN +1FACE..1FADB ; So # [14] MOOSE..PEA POD +1FAE0..1FAE8 ; So # [9] MELTING FACE..SHAKING FACE +1FAF0..1FAF8 ; So # [9] HAND WITH INDEX FINGER AND THUMB CROSSED..RIGHTWARDS PUSHING HAND 1FB00..1FB92 ; So # [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBCA ; So # [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON -# Total code points: 6605 +# Total code points: 6634 # ================================================ diff --git a/maint/Unicode.tables/GraphemeBreakProperty.txt b/maint/Unicode.tables/GraphemeBreakProperty.txt index dd2569064..a12b5eef1 100644 --- a/maint/Unicode.tables/GraphemeBreakProperty.txt +++ b/maint/Unicode.tables/GraphemeBreakProperty.txt @@ -1,11 +1,11 @@ -# GraphemeBreakProperty-14.0.0.txt -# Date: 2021-08-12, 23:13:02 GMT -# © 2021 Unicode®, Inc. +# GraphemeBreakProperty-15.0.0.txt +# Date: 2022-04-27, 17:07:38 GMT +# © 2022 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see http://www.unicode.org/terms_of_use.html +# For terms of use, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database -# For documentation, see http://www.unicode.org/reports/tr44/ +# For documentation, see https://www.unicode.org/reports/tr44/ # ================================================ @@ -32,8 +32,9 @@ 11A3A ; Prepend # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA 11A84..11A89 ; Prepend # Lo [6] SOYOMBO SIGN JIHVAMULIYA..SOYOMBO CLUSTER-INITIAL LETTER SA 11D46 ; Prepend # Lo MASARAM GONDI REPHA +11F02 ; Prepend # Lo KAWI SIGN REPHA -# Total code points: 26 +# Total code points: 27 # ================================================ @@ -67,7 +68,7 @@ FEFF ; Control # Cf ZERO WIDTH NO-BREAK SPACE FFF0..FFF8 ; Control # Cn [9] .. FFF9..FFFB ; Control # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR -13430..13438 ; Control # Cf [9] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END SEGMENT +13430..1343F ; Control # Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE 1BCA0..1BCA3 ; Control # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP 1D173..1D17A ; Control # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE E0000 ; Control # Cn @@ -76,7 +77,7 @@ E0002..E001F ; Control # Cn [30] .. E0080..E00FF ; Control # Cn [128] .. E01F0..E0FFF ; Control # Cn [3600] .. -# Total code points: 3886 +# Total code points: 3893 # ================================================ @@ -185,7 +186,7 @@ E01F0..E0FFF ; Control # Cn [3600] .. 0E47..0E4E ; Extend # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN 0EB1 ; Extend # Mn LAO VOWEL SIGN MAI KAN 0EB4..0EBC ; Extend # Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO -0EC8..0ECD ; Extend # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA +0EC8..0ECE ; Extend # Mn [7] LAO TONE MAI EK..LAO YAMAKKAN 0F18..0F19 ; Extend # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS 0F35 ; Extend # Mn TIBETAN MARK NGAS BZUNG NYI ZLA 0F37 ; Extend # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS @@ -324,6 +325,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 10AE5..10AE6 ; Extend # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW 10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EFD..10EFF ; Extend # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; Extend # Mn BRAHMI SIGN ANUSVARA @@ -346,6 +348,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11234 ; Extend # Mn KHOJKI SIGN ANUSVARA 11236..11237 ; Extend # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA 1123E ; Extend # Mn KHOJKI SIGN SUKUN +11241 ; Extend # Mn KHOJKI VOWEL SIGN VOCALIC R 112DF ; Extend # Mn KHUDAWADI SIGN ANUSVARA 112E3..112EA ; Extend # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA 11300..11301 ; Extend # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU @@ -413,6 +416,12 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11D95 ; Extend # Mn GUNJALA GONDI SIGN ANUSVARA 11D97 ; Extend # Mn GUNJALA GONDI VIRAMA 11EF3..11EF4 ; Extend # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U +11F00..11F01 ; Extend # Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA +11F36..11F3A ; Extend # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R +11F40 ; Extend # Mn KAWI VOWEL SIGN EU +11F42 ; Extend # Mn KAWI CONJOINER +13440 ; Extend # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY +13447..13455 ; Extend # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED 16AF0..16AF4 ; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM 16F4F ; Extend # Mn MIAO SIGN CONSONANT MODIFIER BAR @@ -439,16 +448,18 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 1E01B..1E021 ; Extend # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI 1E023..1E024 ; Extend # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS 1E026..1E02A ; Extend # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E08F ; Extend # Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I 1E130..1E136 ; Extend # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D 1E2AE ; Extend # Mn TOTO SIGN RISING TONE 1E2EC..1E2EF ; Extend # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +1E4EC..1E4EF ; Extend # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH 1E8D0..1E8D6 ; Extend # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS 1E944..1E94A ; Extend # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA 1F3FB..1F3FF ; Extend # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6 E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2095 +# Total code points: 2130 # ================================================ @@ -489,6 +500,7 @@ E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 0CC3..0CC4 ; SpacingMark # Mc [2] KANNADA VOWEL SIGN VOCALIC R..KANNADA VOWEL SIGN VOCALIC RR 0CC7..0CC8 ; SpacingMark # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI 0CCA..0CCB ; SpacingMark # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO +0CF3 ; SpacingMark # Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT 0D02..0D03 ; SpacingMark # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA 0D3F..0D40 ; SpacingMark # Mc [2] MALAYALAM VOWEL SIGN I..MALAYALAM VOWEL SIGN II 0D46..0D48 ; SpacingMark # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI @@ -614,12 +626,16 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK 11D93..11D94 ; SpacingMark # Mc [2] GUNJALA GONDI VOWEL SIGN OO..GUNJALA GONDI VOWEL SIGN AU 11D96 ; SpacingMark # Mc GUNJALA GONDI SIGN VISARGA 11EF5..11EF6 ; SpacingMark # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O +11F03 ; SpacingMark # Mc KAWI SIGN VISARGA +11F34..11F35 ; SpacingMark # Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA +11F3E..11F3F ; SpacingMark # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI +11F41 ; SpacingMark # Mc KAWI SIGN KILLER 16F51..16F87 ; SpacingMark # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI 16FF0..16FF1 ; SpacingMark # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 1D166 ; SpacingMark # Mc MUSICAL SYMBOL COMBINING SPRECHGESANG STEM 1D16D ; SpacingMark # Mc MUSICAL SYMBOL COMBINING AUGMENTATION DOT -# Total code points: 388 +# Total code points: 395 # ================================================ diff --git a/maint/Unicode.tables/PropList.txt b/maint/Unicode.tables/PropList.txt index 0a5a93468..b49d6460c 100644 --- a/maint/Unicode.tables/PropList.txt +++ b/maint/Unicode.tables/PropList.txt @@ -1,11 +1,11 @@ -# PropList-14.0.0.txt -# Date: 2021-08-12, 23:13:05 GMT -# © 2021 Unicode®, Inc. +# PropList-15.0.0.txt +# Date: 2022-08-05, 22:17:16 GMT +# © 2022 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see http://www.unicode.org/terms_of_use.html +# For terms of use, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database -# For documentation, see http://www.unicode.org/reports/tr44/ +# For documentation, see https://www.unicode.org/reports/tr44/ # ================================================ @@ -215,6 +215,7 @@ FF64 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA 11C41..11C43 ; Terminal_Punctuation # Po [3] BHAIKSUKI DANDA..BHAIKSUKI WORD SEPARATOR 11C71 ; Terminal_Punctuation # Po MARCHEN MARK SHAD 11EF7..11EF8 ; Terminal_Punctuation # Po [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION +11F43..11F44 ; Terminal_Punctuation # Po [2] KAWI DANDA..KAWI DOUBLE DANDA 12470..12474 ; Terminal_Punctuation # Po [5] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON 16A6E..16A6F ; Terminal_Punctuation # Po [2] MRO DANDA..MRO DOUBLE DANDA 16AF5 ; Terminal_Punctuation # Po BASSA VAH FULL STOP @@ -224,7 +225,7 @@ FF64 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA 1BC9F ; Terminal_Punctuation # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP 1DA87..1DA8A ; Terminal_Punctuation # Po [4] SIGNWRITING COMMA..SIGNWRITING COLON -# Total code points: 276 +# Total code points: 278 # ================================================ @@ -507,6 +508,7 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 0BD7 ; Other_Alphabetic # Mc TAMIL AU LENGTH MARK 0C00 ; Other_Alphabetic # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE 0C01..0C03 ; Other_Alphabetic # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA +0C04 ; Other_Alphabetic # Mn TELUGU SIGN COMBINING ANUSVARA ABOVE 0C3E..0C40 ; Other_Alphabetic # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II 0C41..0C44 ; Other_Alphabetic # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR 0C46..0C48 ; Other_Alphabetic # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI @@ -524,6 +526,7 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 0CCC ; Other_Alphabetic # Mn KANNADA VOWEL SIGN AU 0CD5..0CD6 ; Other_Alphabetic # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK 0CE2..0CE3 ; Other_Alphabetic # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL +0CF3 ; Other_Alphabetic # Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT 0D00..0D01 ; Other_Alphabetic # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU 0D02..0D03 ; Other_Alphabetic # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA 0D3E..0D40 ; Other_Alphabetic # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II @@ -548,7 +551,7 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 0ECD ; Other_Alphabetic # Mn LAO NIGGAHITA 0F71..0F7E ; Other_Alphabetic # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO 0F7F ; Other_Alphabetic # Mc TIBETAN SIGN RNAM BCAD -0F80..0F81 ; Other_Alphabetic # Mn [2] TIBETAN VOWEL SIGN REVERSED I..TIBETAN VOWEL SIGN REVERSED II +0F80..0F83 ; Other_Alphabetic # Mn [4] TIBETAN VOWEL SIGN REVERSED I..TIBETAN SIGN SNA LDAN 0F8D..0F97 ; Other_Alphabetic # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA 0F99..0FBC ; Other_Alphabetic # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA 102B..102C ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA @@ -692,6 +695,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 11002 ; Other_Alphabetic # Mc BRAHMI SIGN VISARGA 11038..11045 ; Other_Alphabetic # Mn [14] BRAHMI VOWEL SIGN AA..BRAHMI VOWEL SIGN AU 11073..11074 ; Other_Alphabetic # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O +11080..11081 ; Other_Alphabetic # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA 11082 ; Other_Alphabetic # Mc KAITHI SIGN VISARGA 110B0..110B2 ; Other_Alphabetic # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II 110B3..110B6 ; Other_Alphabetic # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI @@ -715,6 +719,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 11234 ; Other_Alphabetic # Mn KHOJKI SIGN ANUSVARA 11237 ; Other_Alphabetic # Mn KHOJKI SIGN SHADDA 1123E ; Other_Alphabetic # Mn KHOJKI SIGN SUKUN +11241 ; Other_Alphabetic # Mn KHOJKI VOWEL SIGN VOCALIC R 112DF ; Other_Alphabetic # Mn KHUDAWADI SIGN ANUSVARA 112E0..112E2 ; Other_Alphabetic # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II 112E3..112E8 ; Other_Alphabetic # Mn [6] KHUDAWADI VOWEL SIGN U..KHUDAWADI VOWEL SIGN AU @@ -807,6 +812,12 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 11D96 ; Other_Alphabetic # Mc GUNJALA GONDI SIGN VISARGA 11EF3..11EF4 ; Other_Alphabetic # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U 11EF5..11EF6 ; Other_Alphabetic # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O +11F00..11F01 ; Other_Alphabetic # Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA +11F03 ; Other_Alphabetic # Mc KAWI SIGN VISARGA +11F34..11F35 ; Other_Alphabetic # Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA +11F36..11F3A ; Other_Alphabetic # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R +11F3E..11F3F ; Other_Alphabetic # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI +11F40 ; Other_Alphabetic # Mn KAWI VOWEL SIGN EU 16F4F ; Other_Alphabetic # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F51..16F87 ; Other_Alphabetic # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI 16F8F..16F92 ; Other_Alphabetic # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW @@ -817,12 +828,13 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 1E01B..1E021 ; Other_Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI 1E023..1E024 ; Other_Alphabetic # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS 1E026..1E02A ; Other_Alphabetic # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E08F ; Other_Alphabetic # Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I 1E947 ; Other_Alphabetic # Mn ADLAM HAMZA 1F130..1F149 ; Other_Alphabetic # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z 1F150..1F169 ; Other_Alphabetic # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Other_Alphabetic # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1404 +# Total code points: 1425 # ================================================ @@ -840,14 +852,15 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 18D00..18D08 ; Ideographic # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 1B170..1B2FB ; Ideographic # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 20000..2A6DF ; Ideographic # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF -2A700..2B738 ; Ideographic # Lo [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738 +2A700..2B739 ; Ideographic # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 2B740..2B81D ; Ideographic # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2B820..2CEA1 ; Ideographic # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 2CEB0..2EBE0 ; Ideographic # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 2F800..2FA1D ; Ideographic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; Ideographic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A +31350..323AF ; Ideographic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 101661 +# Total code points: 105854 # ================================================ @@ -1028,6 +1041,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 10AE5..10AE6 ; Diacritic # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW 10D22..10D23 ; Diacritic # Lo [2] HANIFI ROHINGYA MARK SAKIN..HANIFI ROHINGYA MARK NA KHONNA 10D24..10D27 ; Diacritic # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10EFD..10EFF ; Diacritic # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Diacritic # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Diacritic # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11046 ; Diacritic # Mn BRAHMI VIRAMA @@ -1064,6 +1078,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 11D42 ; Diacritic # Mn MASARAM GONDI SIGN NUKTA 11D44..11D45 ; Diacritic # Mn [2] MASARAM GONDI SIGN HALANTA..MASARAM GONDI VIRAMA 11D97 ; Diacritic # Mn GUNJALA GONDI VIRAMA +13447..13455 ; Diacritic # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED 16AF0..16AF4 ; Diacritic # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; Diacritic # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM 16F8F..16F92 ; Diacritic # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW @@ -1079,6 +1094,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 1D17B..1D182 ; Diacritic # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE 1D185..1D18B ; Diacritic # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE 1D1AA..1D1AD ; Diacritic # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO +1E030..1E06D ; Diacritic # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E130..1E136 ; Diacritic # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D 1E2AE ; Diacritic # Mn TOTO SIGN RISING TONE 1E2EC..1E2EF ; Diacritic # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI @@ -1086,7 +1102,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 1E944..1E946 ; Diacritic # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK 1E948..1E94A ; Diacritic # Mn [3] ADLAM CONSONANT MODIFIER..ADLAM NUKTA -# Total code points: 1064 +# Total code points: 1144 # ================================================ @@ -1135,6 +1151,7 @@ FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND 02E0..02E4 ; Other_Lowercase # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP 0345 ; Other_Lowercase # Mn COMBINING GREEK YPOGEGRAMMENI 037A ; Other_Lowercase # Lm GREEK YPOGEGRAMMENI +10FC ; Other_Lowercase # Lm MODIFIER LETTER GEORGIAN NAR 1D2C..1D6A ; Other_Lowercase # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI 1D78 ; Other_Lowercase # Lm MODIFIER LETTER CYRILLIC EN 1D9B..1DBF ; Other_Lowercase # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA @@ -1146,14 +1163,17 @@ FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND 2C7C..2C7D ; Other_Lowercase # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V A69C..A69D ; Other_Lowercase # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN A770 ; Other_Lowercase # Lm MODIFIER LETTER US +A7F2..A7F4 ; Other_Lowercase # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F8..A7F9 ; Other_Lowercase # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE AB5C..AB5F ; Other_Lowercase # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +AB69 ; Other_Lowercase # Lm MODIFIER LETTER SMALL TURNED W 10780 ; Other_Lowercase # Lm MODIFIER LETTER SMALL CAPITAL AA 10783..10785 ; Other_Lowercase # Lm [3] MODIFIER LETTER SMALL AE..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; Other_Lowercase # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK 107B2..107BA ; Other_Lowercase # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +1E030..1E06D ; Other_Lowercase # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE -# Total code points: 244 +# Total code points: 311 # ================================================ @@ -1251,13 +1271,14 @@ FA21 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA21 FA23..FA24 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA23..CJK COMPATIBILITY IDEOGRAPH-FA24 FA27..FA29 ; Unified_Ideograph # Lo [3] CJK COMPATIBILITY IDEOGRAPH-FA27..CJK COMPATIBILITY IDEOGRAPH-FA29 20000..2A6DF ; Unified_Ideograph # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF -2A700..2B738 ; Unified_Ideograph # Lo [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738 +2A700..2B739 ; Unified_Ideograph # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 2B740..2B81D ; Unified_Ideograph # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2B820..2CEA1 ; Unified_Ideograph # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 2CEB0..2EBE0 ; Unified_Ideograph # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 30000..3134A ; Unified_Ideograph # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A +31350..323AF ; Unified_Ideograph # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 92865 +# Total code points: 97058 # ================================================ @@ -1323,8 +1344,10 @@ E0001 ; Deprecated # Cf LANGUAGE TAG 1D65E..1D65F ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL I..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL J 1D692..1D693 ; Soft_Dotted # L& [2] MATHEMATICAL MONOSPACE SMALL I..MATHEMATICAL MONOSPACE SMALL J 1DF1A ; Soft_Dotted # L& LATIN SMALL LETTER I WITH STROKE AND RETROFLEX HOOK +1E04C..1E04D ; Soft_Dotted # Lm [2] MODIFIER LETTER CYRILLIC SMALL BYELORUSSIAN-UKRAINIAN I..MODIFIER LETTER CYRILLIC SMALL JE +1E068 ; Soft_Dotted # Lm CYRILLIC SUBSCRIPT SMALL LETTER BYELORUSSIAN-UKRAINIAN I -# Total code points: 47 +# Total code points: 50 # ================================================ @@ -1430,6 +1453,7 @@ FF61 ; Sentence_Terminal # Po HALFWIDTH IDEOGRAPHIC FULL STOP 11A9B..11A9C ; Sentence_Terminal # Po [2] SOYOMBO MARK SHAD..SOYOMBO MARK DOUBLE SHAD 11C41..11C42 ; Sentence_Terminal # Po [2] BHAIKSUKI DANDA..BHAIKSUKI DOUBLE DANDA 11EF7..11EF8 ; Sentence_Terminal # Po [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION +11F43..11F44 ; Sentence_Terminal # Po [2] KAWI DANDA..KAWI DOUBLE DANDA 16A6E..16A6F ; Sentence_Terminal # Po [2] MRO DANDA..MRO DOUBLE DANDA 16AF5 ; Sentence_Terminal # Po BASSA VAH FULL STOP 16B37..16B38 ; Sentence_Terminal # Po [2] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS TSHAB CEEB @@ -1438,7 +1462,7 @@ FF61 ; Sentence_Terminal # Po HALFWIDTH IDEOGRAPHIC FULL STOP 1BC9F ; Sentence_Terminal # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP 1DA88 ; Sentence_Terminal # Po SIGNWRITING FULL STOP -# Total code points: 152 +# Total code points: 154 # ================================================ diff --git a/maint/Unicode.tables/PropertyAliases.txt b/maint/Unicode.tables/PropertyAliases.txt index 3e4b429e4..2de24e26f 100644 --- a/maint/Unicode.tables/PropertyAliases.txt +++ b/maint/Unicode.tables/PropertyAliases.txt @@ -1,11 +1,11 @@ -# PropertyAliases-14.0.0.txt -# Date: 2021-03-08, 19:35:48 GMT -# © 2021 Unicode®, Inc. +# PropertyAliases-15.0.0.txt +# Date: 2022-02-02, 23:35:44 GMT +# © 2022 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see http://www.unicode.org/terms_of_use.html +# For terms of use, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database -# For documentation, see http://www.unicode.org/reports/tr44/ +# For documentation, see https://www.unicode.org/reports/tr44/ # # This file contains aliases for properties used in the UCD. # These names can be used for XML formats of UCD data, for regular-expression diff --git a/maint/Unicode.tables/PropertyValueAliases.txt b/maint/Unicode.tables/PropertyValueAliases.txt index f0cb26bda..863301b1e 100644 --- a/maint/Unicode.tables/PropertyValueAliases.txt +++ b/maint/Unicode.tables/PropertyValueAliases.txt @@ -1,11 +1,11 @@ -# PropertyValueAliases-14.0.0.txt -# Date: 2021-05-10, 21:08:53 GMT -# © 2021 Unicode®, Inc. +# PropertyValueAliases-15.0.0.txt +# Date: 2022-08-05, 23:42:17 GMT +# © 2022 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see http://www.unicode.org/terms_of_use.html +# For terms of use, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database -# For documentation, see http://www.unicode.org/reports/tr44/ +# For documentation, see https://www.unicode.org/reports/tr44/ # # This file contains aliases for property values used in the UCD. # These names can be used for XML formats of UCD data, for regular-expression @@ -90,6 +90,7 @@ age; 12.0 ; V12_0 age; 12.1 ; V12_1 age; 13.0 ; V13_0 age; 14.0 ; V14_0 +age; 15.0 ; V15_0 age; NA ; Unassigned # Alphabetic (Alpha) @@ -135,7 +136,6 @@ Bidi_M; Y ; Yes ; T # Bidi_Mirroring_Glyph (bmg) -# @missing: 0000..10FFFF; Bidi_Mirroring_Glyph; # Bidi_Paired_Bracket (bpb) @@ -162,6 +162,7 @@ blk; Ancient_Symbols ; Ancient_Symbols blk; Arabic ; Arabic blk; Arabic_Ext_A ; Arabic_Extended_A blk; Arabic_Ext_B ; Arabic_Extended_B +blk; Arabic_Ext_C ; Arabic_Extended_C blk; Arabic_Math ; Arabic_Mathematical_Alphabetic_Symbols blk; Arabic_PF_A ; Arabic_Presentation_Forms_A ; Arabic_Presentation_Forms-A blk; Arabic_PF_B ; Arabic_Presentation_Forms_B @@ -206,6 +207,7 @@ blk; CJK_Ext_D ; CJK_Unified_Ideographs_Extension_D blk; CJK_Ext_E ; CJK_Unified_Ideographs_Extension_E blk; CJK_Ext_F ; CJK_Unified_Ideographs_Extension_F blk; CJK_Ext_G ; CJK_Unified_Ideographs_Extension_G +blk; CJK_Ext_H ; CJK_Unified_Ideographs_Extension_H blk; CJK_Radicals_Sup ; CJK_Radicals_Supplement blk; CJK_Strokes ; CJK_Strokes blk; CJK_Symbols ; CJK_Symbols_And_Punctuation @@ -223,10 +225,12 @@ blk; Cyrillic ; Cyrillic blk; Cyrillic_Ext_A ; Cyrillic_Extended_A blk; Cyrillic_Ext_B ; Cyrillic_Extended_B blk; Cyrillic_Ext_C ; Cyrillic_Extended_C +blk; Cyrillic_Ext_D ; Cyrillic_Extended_D blk; Cyrillic_Sup ; Cyrillic_Supplement ; Cyrillic_Supplementary blk; Deseret ; Deseret blk; Devanagari ; Devanagari blk; Devanagari_Ext ; Devanagari_Extended +blk; Devanagari_Ext_A ; Devanagari_Extended_A blk; Diacriticals ; Combining_Diacritical_Marks blk; Diacriticals_Ext ; Combining_Diacritical_Marks_Extended blk; Diacriticals_For_Symbols ; Combining_Diacritical_Marks_For_Symbols; Combining_Marks_For_Symbols @@ -288,6 +292,7 @@ blk; Jamo_Ext_A ; Hangul_Jamo_Extended_A blk; Jamo_Ext_B ; Hangul_Jamo_Extended_B blk; Javanese ; Javanese blk; Kaithi ; Kaithi +blk; Kaktovik_Numerals ; Kaktovik_Numerals blk; Kana_Ext_A ; Kana_Extended_A blk; Kana_Ext_B ; Kana_Extended_B blk; Kana_Sup ; Kana_Supplement @@ -296,6 +301,7 @@ blk; Kangxi ; Kangxi_Radicals blk; Kannada ; Kannada blk; Katakana ; Katakana blk; Katakana_Ext ; Katakana_Phonetic_Extensions +blk; Kawi ; Kawi blk; Kayah_Li ; Kayah_Li blk; Kharoshthi ; Kharoshthi blk; Khitan_Small_Script ; Khitan_Small_Script @@ -360,6 +366,7 @@ blk; Myanmar ; Myanmar blk; Myanmar_Ext_A ; Myanmar_Extended_A blk; Myanmar_Ext_B ; Myanmar_Extended_B blk; Nabataean ; Nabataean +blk; Nag_Mundari ; Nag_Mundari blk; Nandinagari ; Nandinagari blk; NB ; No_Block blk; New_Tai_Lue ; New_Tai_Lue @@ -663,7 +670,6 @@ EPres; Y ; Yes ; T # Equivalent_Unified_Ideograph (EqUIdeo) -# @missing: 0000..10FFFF; Equivalent_Unified_Ideograph; # Expands_On_NFC (XO_NFC) @@ -1143,7 +1149,6 @@ NFD_QC; Y ; Yes # NFKC_Casefold (NFKC_CF) -# @missing: 0000..10FFFF; NFKC_Casefold; # NFKC_Quick_Check (NFKC_QC) @@ -1313,6 +1318,7 @@ sc ; Ital ; Old_Italic sc ; Java ; Javanese sc ; Kali ; Kayah_Li sc ; Kana ; Katakana +sc ; Kawi ; Kawi sc ; Khar ; Kharoshthi sc ; Khmr ; Khmer sc ; Khoj ; Khojki @@ -1345,6 +1351,7 @@ sc ; Mroo ; Mro sc ; Mtei ; Meetei_Mayek sc ; Mult ; Multani sc ; Mymr ; Myanmar +sc ; Nagm ; Nag_Mundari sc ; Nand ; Nandinagari sc ; Narb ; Old_North_Arabian sc ; Nbat ; Nabataean @@ -1418,7 +1425,6 @@ sc ; Zzzz ; Unknown # Script_Extensions (scx) -# @missing: 0000..10FFFF; Script_Extensions;