diff --git a/toml.abnf b/toml.abnf index 0446f8b6..db270092 100644 --- a/toml.abnf +++ b/toml.abnf @@ -52,16 +52,13 @@ simple-key = quoted-key / unquoted-key ;; Unquoted key -unquoted-key = 1*unquoted-key-char -unquoted-key-char = ALPHA / DIGIT / %x2D / %x5F ; a-z A-Z 0-9 - _ -unquoted-key-char =/ %xB2 / %xB3 / %xB9 / %xBC-BE ; superscript digits, fractions -unquoted-key-char =/ %xC0-D6 / %xD8-F6 / %xF8-37D ; non-symbol chars in Latin block -unquoted-key-char =/ %x37F-1FFF ; exclude GREEK QUESTION MARK, which is basically a semi-colon -unquoted-key-char =/ %x200C-200D / %x203F-2040 ; from General Punctuation Block, include the two tie symbols and ZWNJ, ZWJ -unquoted-key-char =/ %x2070-218F / %x2460-24FF ; include super-/subscripts, letterlike/numberlike forms, enclosed alphanumerics -unquoted-key-char =/ %x2C00-2FEF / %x3001-D7FF ; skip arrows, math, box drawing etc, skip 2FF0-3000 ideographic up/down markers and spaces -unquoted-key-char =/ %xF900-FDCF / %xFDF0-FFFD ; skip D800-DFFF surrogate block, E000-F8FF Private Use area, FDD0-FDEF intended for process-internal use (unicode) -unquoted-key-char =/ %x10000-EFFFF ; all chars outside BMP range, excluding Private Use planes (F0000-10FFFF) +unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _ + +; These cannot be easily expressed in ABNF. +; unquoted-key =/ unicode-letter +; unquoted-key =/ unicode-digit +; unicode-letter = Lu / Ll / Lt / Lm / Lo ; Unicode categories +; unicode-digit = Nd ; Unicode categories ;; Quoted and dotted key diff --git a/toml.md b/toml.md index 08cc9f67..af0e4839 100644 --- a/toml.md +++ b/toml.md @@ -103,14 +103,13 @@ first = "Tom" last = "Preston-Werner" # INVALID A key may be either bare, quoted, or dotted. -**Bare keys** may contain any letter-like or number-like Unicode character from -any Unicode script, as well as ASCII digits, dashes and underscores. -Punctuation, spaces, arrows, box drawing and private use characters are not -allowed. Note that bare keys are allowed to be composed of only ASCII digits, -e.g. 1234, but are always interpreted as strings. +**Bare keys** may only contain letters, digits, underscores, and dashes. Bare +keys are allowed to be composed of only digits, e.g. `1234`, but are always +interpreted as strings. -ℹ️ The exact ranges of allowed code points can be found in the -[ABNF grammar file][abnf]. +A "letter" is any character in the Unicode category `Lu`, `Ll`, `Lt`, `Lm`, or +`Lo`. A "digit" is any character in the category `Nd`. Implementations must +support at least Unicode 9.0, or optionally any later version. ```toml key = "value" @@ -118,15 +117,14 @@ bare_key = "value" bare-key = "value" 1234 = "value" Fuß = "value" -😂 = "value" 汉语大字典 = "value" 辭源 = "value" பெண்டிரேம் = "value" ``` -**Quoted keys** follow the exact same rules as either basic strings or literal -strings and allow you to use any Unicode character in a key name, including -spaces. Best practice is to use bare keys except when absolutely necessary. +**Quoted keys** follow the same rules as either basic strings or literal +strings, and allow you to use any character in a key name including spaces. Best +practice is to use bare keys except when absolutely necessary. ```toml "127.0.0.1" = "value"