diff --git a/README.md b/README.md index dd999419..7c098f8e 100644 --- a/README.md +++ b/README.md @@ -118,14 +118,12 @@ on the same line (though some values can be broken over multiple lines). key = "value" ``` -Keys may be either bare or quoted. **Bare keys** may only contain letters, -numbers, underscores, and dashes (`A-Za-z0-9_-`). Note that bare keys are -allowed to be composed of only digits, e.g. `1234`, but are always interpreted -as strings. **Quoted keys** follow the exact same rules as either basic strings -or literal strings and allow you to use a much broader set of key names. Best -practice is to use bare keys except when absolutely necessary. - - +Keys may be either bare or quoted. **Bare keys** may only contain ASCII letters, +ASCII digits, underscores, and dashes (`A-Za-z0-9_-`). Note that bare keys are +allowed to be composed of only ASCII digits, e.g. `1234`, but are always +interpreted as strings. **Quoted keys** follow the exact same rules as either +basic strings or literal strings and allow you to use a much broader set of key +names. Best practice is to use bare keys except when absolutely necessary. ```toml key = "value" @@ -149,8 +147,8 @@ discouraged). '' = 'blank' # VALID but discouraged ``` -Values must be of the following types: String, Integer, Float, Boolean, Datetime, -Array, or Inline Table. Unspecified values are invalid. +Values must be of the following types: String, Integer, Float, Boolean, +Datetime, Array, or Inline Table. Unspecified values are invalid. ```toml key = # INVALID @@ -164,7 +162,7 @@ multi-line literal. All strings must contain only valid UTF-8 characters. **Basic strings** are surrounded by quotation marks. Any Unicode character may be used except those that must be escaped: quotation mark, backslash, and the -control characters (U+0000 to U+001F). +control characters (U+0000 to U+001F, U+007F). ```toml str = "I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF." @@ -289,8 +287,8 @@ int3 = 0 int4 = -17 ``` -For large numbers, you may use underscores to enhance readability. Each -underscore must be surrounded by at least one digit. +For large numbers, you may use underscores between digits to enhance +readability. Each underscore must be surrounded by at least one digit. ```toml int5 = 1_000 @@ -327,6 +325,7 @@ flt6 = -2E-2 # both flt7 = 6.626e-34 ``` + A fractional part is a decimal point followed by one or more digits. An exponent part is an E (upper or lower case) followed by an integer part @@ -409,7 +408,6 @@ timezone. ```toml lt1 = 07:32:00 lt2 = 00:32:00.999999 - ``` The precision of fractional seconds is implementation specific, but at least diff --git a/toml.abnf b/toml.abnf index 002dd470..0f9360ca 100644 --- a/toml.abnf +++ b/toml.abnf @@ -4,39 +4,39 @@ ;; This is an attempt to define TOML in ABNF according to the grammar defined ;; in RFC 5234 (http://www.ietf.org/rfc/rfc5234.txt). -;; You can try out this grammar interactively via the online ABNF tool at -;; http://www.coasttocoastresearch.com/interactiveapg +;; You can try out this grammar using https://www.npmjs.com/package/apg-html + ;; Note that due to the limitations of ABNF parsers, in order for multi-line ;; strings to work in that tool, the following rules must be ammended to ;; disallow the use of unescaped double- or single-quotes: ;; ml-basic-unescaped = basic-unescaped ;; ml-literal-char = literal-char -;; TOML +;; Overall Structure toml = expression *( newline expression ) -expression = ( ( ws comment ) / - ( ws keyval ws [ comment ] ) / - ( ws table ws [ comment ] ) / - ws ) - -;; Newline - -newline = ( %x0A / ; LF - %x0D.0A ) ; CRLF +expression = ws [ comment ] +expression =/ ws keyval ws [ comment ] +expression =/ ws table ws [ comment ] ;; Whitespace ws = *wschar +wschar = %x20 ; Space +wschar =/ %x09 ; Horizontal tab + +;; Newline -wschar = ( %x20 / ; Space - %x09 ) ; Horizontal tab +newline = %x0A ; LF +newline =/ %x0D.0A ; CRLF ;; Comment comment-start-symbol = %x23 ; # -non-eol = %x09 / %x20-10FFFF +non-eol = %x09 +non-eol =/ %x20-10FFFF + comment = comment-start-symbol *non-eol ;; Key-Value pairs @@ -45,53 +45,12 @@ keyval = key keyval-sep val key = unquoted-key / quoted-key unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _ -quoted-key = quotation-mark 1*basic-char quotation-mark ; See Basic Strings +quoted-key = basic-string / literal-string keyval-sep = ws %x3D ws ; = val = string / boolean / array / inline-table / date-time / float / integer -;; Table - -table = std-table / array-table - -;; Standard Table - -std-table = std-table-open key *( table-key-sep key) std-table-close - -std-table-open = %x5B ws ; [ Left square bracket -std-table-close = ws %x5D ; ] Right square bracket -table-key-sep = ws %x2E ws ; . Period - -;; Array Table - -array-table = array-table-open key *( table-key-sep key) array-table-close - -array-table-open = %x5B.5B ws ; [[ Double left square bracket -array-table-close = ws %x5D.5D ; ]] Double right quare bracket - -;; Integer - -integer = [ minus / plus ] int - -minus = %x2D ; - -plus = %x2B ; + - -int = DIGIT / digit1-9 1*( DIGIT / underscore DIGIT ) -digit1-9 = %x31-39 ; 1-9 -underscore = %x5F ; _ - -;; Float - -float = integer ( frac / ( frac exp ) / exp ) - -frac = decimal-point zero-prefixable-int -decimal-point = %x2E ; . -zero-prefixable-int = DIGIT *( DIGIT / underscore DIGIT ) - -exp = e integer -e = %x65 / %x45 ; e E - ;; String string = ml-basic-string / basic-string / ml-literal-string / literal-string @@ -103,19 +62,20 @@ basic-string = quotation-mark *basic-char quotation-mark quotation-mark = %x22 ; " basic-char = basic-unescaped / escaped -basic-unescaped = %x20-21 / %x23-5B / %x5D-10FFFF -escaped = escape ( %x22 / ; " quotation mark U+0022 - %x5C / ; \ reverse solidus U+005C - %x2F / ; / solidus U+002F - %x62 / ; b backspace U+0008 - %x66 / ; f form feed U+000C - %x6E / ; n line feed U+000A - %x72 / ; r carriage return U+000D - %x74 / ; t tab U+0009 - %x75 4HEXDIG / ; uXXXX U+XXXX - %x55 8HEXDIG ) ; UXXXXXXXX U+XXXXXXXX - -escape = %x5C ; \ +basic-unescaped = %x20-21 / %x23-5B / %x5D-7E / %x80-10FFFF +escaped = escape escape-seq-char + +escape = %x5C ; \ +escape-seq-char = escape %x22 ; " quotation mark U+0022 +escape-seq-char =/ escape %x5C ; \ reverse solidus U+005C +escape-seq-char =/ escape %x2F ; / solidus U+002F +escape-seq-char =/ escape %x62 ; b backspace U+0008 +escape-seq-char =/ escape %x66 ; f form feed U+000C +escape-seq-char =/ escape %x6E ; n line feed U+000A +escape-seq-char =/ escape %x72 ; r carriage return U+000D +escape-seq-char =/ escape %x74 ; t tab U+0009 +escape-seq-char =/ escape %x75 4HEXDIG ; uXXXX U+XXXX +escape-seq-char =/ escape %x55 8HEXDIG ; UXXXXXXXX U+XXXXXXXX ;; Multiline Basic String @@ -123,7 +83,7 @@ ml-basic-string = ml-basic-string-delim ml-basic-body ml-basic-string-delim ml-basic-string-delim = 3quotation-mark -ml-basic-body = *( ml-basic-char / newline / ( escape ws newline )) +ml-basic-body = *( ml-basic-char / newline / ( escape ws newline ) ) ml-basic-char = ml-basic-unescaped / escaped ml-basic-unescaped = %x20-5B / %x5D-10FFFF @@ -144,6 +104,27 @@ ml-literal-string-delim = 3apostrophe ml-literal-body = *( ml-literal-char / newline ) ml-literal-char = %x09 / %x20-10FFFF +;; Integer + +integer = [ minus / plus ] int + +minus = %x2D ; - +plus = %x2B ; + + +int = DIGIT / digit1-9 1*( DIGIT / underscore DIGIT ) +digit1-9 = %x31-39 ; 1-9 +underscore = %x5F ; _ + +;; Float + +float = integer ( frac / ( frac exp ) / exp ) + +frac = decimal-point zero-prefixable-int +decimal-point = %x2E ; . +zero-prefixable-int = DIGIT *( DIGIT / underscore DIGIT ) + +exp = "e" integer + ;; Boolean boolean = true / false @@ -165,7 +146,7 @@ time-secfrac = "." 1*DIGIT time-numoffset = ( "+" / "-" ) time-hour ":" time-minute time-offset = "Z" / time-numoffset -partial-time = time-hour ":" time-minute ":" time-second [time-secfrac] +partial-time = time-hour ":" time-minute ":" time-second [ time-secfrac ] full-date = date-fullyear "-" date-month "-" date-mday full-time = partial-time time-offset @@ -200,6 +181,18 @@ array-sep = ws %x2C ws ; , Comma ws-newline = *( wschar / newline ) ws-newlines = newline *( wschar / newline ) +;; Table + +table = std-table / array-table + +;; Standard Table + +std-table = std-table-open key *( table-key-sep key ) std-table-close + +std-table-open = %x5B ws ; [ Left square bracket +std-table-close = ws %x5D ; ] Right square bracket +table-key-sep = ws %x2E ws ; . Period + ;; Inline Table inline-table = inline-table-open inline-table-keyvals inline-table-close @@ -209,8 +202,14 @@ inline-table-close = ws %x7D ; } inline-table-sep = ws %x2C ws ; , Comma inline-table-keyvals = [ inline-table-keyvals-non-empty ] -inline-table-keyvals-non-empty = ( key keyval-sep val inline-table-sep inline-table-keyvals-non-empty ) / - ( key keyval-sep val ) +inline-table-keyvals-non-empty = key keyval-sep val [ inline-table-sep inline-table-keyvals-non-empty ] + +;; Array Table + +array-table = array-table-open key *( table-key-sep key ) array-table-close + +array-table-open = %x5B.5B ws ; [[ Double left square bracket +array-table-close = ws %x5D.5D ; ]] Double right quare bracket ;; Built-in ABNF terms, reproduced here for clarity