Skip to content

Commit

Permalink
feat: touch up the grammar, improve string-like rules, add file docs
Browse files Browse the repository at this point in the history
  • Loading branch information
amaanq committed Feb 23, 2023
1 parent 0d01fcd commit 21cbe63
Show file tree
Hide file tree
Showing 5 changed files with 2,568 additions and 2,555 deletions.
20 changes: 20 additions & 0 deletions .eslintrc.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
module.exports = {
'env': {
'commonjs': true,
'es2021': true,
},
'extends': 'google',
'overrides': [
],
'parserOptions': {
'ecmaVersion': 'latest',
'sourceType': 'module',
},
'rules': {
'indent': ['error', 2, {'SwitchCase': 1}],
'max-len': [
'error',
{'code': 120, 'ignoreComments': true, 'ignoreUrls': true, 'ignoreStrings': true},
],
},
};
220 changes: 132 additions & 88 deletions grammar.js
Original file line number Diff line number Diff line change
@@ -1,149 +1,193 @@
/**
* @file ChainPack Object Notation (CPON) grammar for tree-sitter
* @author Fanda Vacek <[email protected]>
* @author Amaan Qureshi <[email protected]>
* @license MIT
* @see {@link https://github.com/silicon-heaven/libshv/wiki/ChainPack-RPC#cpon-chainpack-object-notation|official syntax spec}
* @see {@link https://github.com/silicon-heaven/libshv/wiki/cpon|additional info}
*/

/* eslint-disable arrow-parens */
/* eslint-disable camelcase */
/* eslint-disable-next-line spaced-comment */
/// <reference types="tree-sitter-cli/dsl" />
// @ts-check

module.exports = grammar({
name: 'cpon',

extras: $ => [
/\s/,
$.comment,
/\s/,
],

// supertypes: $ => [
// $._value
// ],

rules: {
document: $ => $._value,

_value: $ => seq(
optional($.meta),
optional($.meta_map),
choice(
$.map,
$.imap,
$.array,
$.number,
$.float,
$.datetime,
$.string,
$.hexblob,
$.escblob,
$.true,
$.false,
$.null
)
$.hex_blob,
$.esc_blob,
$.boolean,
$.null,
),
),

meta: $ => seq(
"<", commaSep($.mpair), ">"
meta_map: $ => seq(
'<', optionalCommaSep($._meta_pair), '>',
),

mpair: $ => seq(
field("key", choice($.string, $.number)),
":",
field("value", $._value)
_meta_pair: $ => seq(
field('key', choice($.string, $.number, $.float)),
':',
field('value', $._value),
),

map: $ => seq(
"{", commaSep($.pair), "}"
),
map: $ => seq('{', optionalCommaSep($.pair), '}'),

pair: $ => seq(
field("key", $.string),
":",
field("value", $._value)
field('key', $.string),
':',
field('value', $._value),
),

imap: $ => seq(
"i{", commaSep($.ipair), "}"
),
imap: $ => seq('i', '{', optionalCommaSep($.ipair), '}'),

ipair: $ => seq(
field("key", $.number),
":",
field("value", $._value)
field('key', choice($.number, $.float)),
':',
field('value', $._value),
),

array: $ => seq(
"[", commaSep($._value), "]"
),
array: $ => seq('[', optionalCommaSep($._value), ']'),

string: $ => choice(
seq('"', '"'),
seq('"', $.string_content, '"')
string: $ => seq(
'"',
repeat(choice(
$.string_content,
$._escape_sequence,
)),
'"',
),

string_content: $ => repeat1(choice(
token.immediate(/[^\\"\n]+/),
$.escape_sequence
)),
string_content: _ => token(prec(-1, /[^"\\]+/)),

escape_sequence: $ => token.immediate(seq(
_escape_sequence: $ =>
choice(
prec(2, token.immediate(seq('\\', /[^abfnrtvxu'\"\\\?]/))),
prec(1, $.escape_sequence),
),
escape_sequence: _ => token.immediate(seq(
'\\',
/(\"|\\|\/|b|f|n|r|t)/
)),

number: $ => {
choice(
/[^xu0-7]/,
/[0-7]{1,3}/,
/x[0-9a-fA-F]{2}/,
/u[0-9a-fA-F]{4}/,
/u{[0-9a-fA-F]+}/,
))),

number: _ => {
const hex_literal = seq(
choice('0x', '0X'),
/[\da-fA-F]+u?/
)
const int_literal = /\d+u?/

const decimal_digits = /\d+/
const signed_integer = seq(optional(choice('-', '+')), decimal_digits)
const exponent_part = seq(choice('e', 'E'), signed_integer)

// const binary_literal = seq(choice('0b', '0B'), /[0-1]+/)

// const octal_literal = seq(choice('0o', '0O'), /[0-7]+/)

const decimal_integer_literal = seq(
optional(choice('-', '+')),
choice(
'0',
seq(/[1-9]/, optional(decimal_digits))
)
)
/[\da-fA-F]+u?/,
);

const int_literal = /\d+u?/;

const decimal_digits = /\d+/;
const signed_integer = seq(optional(choice('-', '+')), decimal_digits);

const decimal_integer_literal =choice(
'0',
seq(/[1-9]/, optional(decimal_digits)),
);

const decimal_literal = choice(
seq(decimal_integer_literal, '.', optional(decimal_digits), optional(exponent_part)),
seq('.', decimal_digits, optional(exponent_part)),
seq(decimal_integer_literal, optional(exponent_part))
)
seq(optional(choice('-', '+')), decimal_integer_literal),
decimal_digits,
signed_integer,
);

return token(choice(
hex_literal,
decimal_literal,
int_literal,
decimal_literal
// binary_literal,
// octal_literal
))
));
},

// datetime: $ => /d"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}"/,
datetime: $ => /d"(\d{4})-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01])T(0[0-9]|1[0-9]|2[0123]):([012345][0-9]):([012345][0-9])(\.\d{3})?(Z|[+-](0[1-9]|1[012])([012345][0-9])?)?"/,
float: _ => /[+-]?(\d+(\.\d+)?|\.\d+)([Ee][+-]?\d+)?/,

true: $ => "true",

hexblob: $ => /x"([0-9a-fA-F]{2})*"/,
escblob: $ => /b"(\\[0-9a-fA-F]{2}|[ -~]|\\\\)*"/,
datetime: _ => seq(
'd',
'"',
// eslint-disable-next-line max-len
/(\d{4})-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01])T(0[0-9]|1[0-9]|2[0123]):([012345][0-9]):([012345][0-9])(\.\d{3})?(Z|[+-](0[1-9]|1[012])([012345][0-9])?)?/,
'"',
),

boolean: _ => choice('true', 'false'),

false: $ => "false",
null: _ => 'null',

null: $ => "null",
// hexblob: _ => /x"([0-9a-fA-F]{2})*"/,
hex_blob: _ => seq(
'x',
'"',
/([0-9a-fA-F]{2})*/,
'"',
),
// escblob: _ => /b"(\\[0-9a-fA-F]{2}|[ -~]|\\\\)*"/,
esc_blob: $ => seq(
'b',
'"',
repeat(choice(
$.string_content,
$._escape_sequence,
)),
'"',
),

comment: $ => token(choice(
// http://stackoverflow.com/questions/13014947/regex-to-match-a-c-style-multiline-comment/36328890#36328890
comment: _ => token(choice(
seq('//', /.*/),
seq(
'/*',
/[^*]*\*+([^/*][^*]*\*+)*/,
'/'
)
'/',
),
)),
}
},
});

function commaSep1(rule) {
return seq(rule, repeat(seq(repeat1(choice(",", "\n", " ", "\t")), rule)))
/**
* Creates a rule to match one or more of the rules optionally separated by commas
*
* @param {Rule} rule
*
* @return {SeqRule}
*
*/
function optionalCommaSep1(rule) {
return seq(rule, repeat(seq(repeat(','), rule)), repeat(','));
}

function commaSep(rule) {
return optional(seq(commaSep1(rule),repeat(choice(",", "\n", " ", "\t"))))
/**
* Creates a rule to optionally match one or more of the rules optionally separated by commas
*
* @param {Rule} rule
*
* @return {ChoiceRule}
*
*/
function optionalCommaSep(rule) {
return optional(optionalCommaSep1(rule));
}
Loading

0 comments on commit 21cbe63

Please sign in to comment.