From a34ff30682a18b3f0dd353a450e899f0509671e1 Mon Sep 17 00:00:00 2001 From: Federico Della Rovere Date: Fri, 13 Mar 2020 15:51:31 +0100 Subject: [PATCH] optional export code comments --- src/hcl/api.py | 20 +++++-- src/hcl/lexer.py | 26 ++++++++- src/hcl/parser.py | 17 +++++- tests/test_lexer.py | 130 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 184 insertions(+), 9 deletions(-) diff --git a/src/hcl/api.py b/src/hcl/api.py index e5390f6..a8e7f5a 100644 --- a/src/hcl/api.py +++ b/src/hcl/api.py @@ -42,28 +42,40 @@ def isHcl(s): raise ValueError("No HCL object could be decoded") -def load(fp): +def load(fp, export_comments=None): ''' Deserializes a file-pointer like object into a python dictionary. The contents of the file must either be JSON or HCL. :param fp: An object that has a read() function + :param export_comments: optional string that allow to export also coded comments. it could be: + 'LINE': to export only single-line comments (// or #) + 'MULTILINE': to export only multi-line comments (/* ... */) + 'ALL': to export both 'LINE' and 'MULTILINE' comments + default None :returns: Dictionary ''' - return loads(fp.read()) + return loads(fp.read(), export_comments=export_comments) -def loads(s): +def loads(s, export_comments=None): ''' Deserializes a string and converts it to a dictionary. The contents of the string must either be JSON or HCL. + :param s: string to parse + :param export_comments: optional string that allow to export also coded comments. it could be: + 'LINE': to export only single-line comments (// or #) + 'MULTILINE': to export only multi-line comments (/* ... */) + 'ALL': to export both 'LINE' and 'MULTILINE' comments + default None + :returns: Dictionary ''' s = u(s) if isHcl(s): - return HclParser().parse(s) + return HclParser().parse(s, export_comments=export_comments) else: return json.loads(s) diff --git a/src/hcl/lexer.py b/src/hcl/lexer.py index 92b1439..6a568e9 100644 --- a/src/hcl/lexer.py +++ b/src/hcl/lexer.py @@ -34,6 +34,8 @@ class Lexer(object): 'FLOAT', 'NUMBER', 'COMMA', + 'COMMENT', + 'MULTICOMMENT', 'IDENTIFIER', 'EQUAL', 'STRING', @@ -68,6 +70,8 @@ class Lexer(object): ('tabbedheredoc', 'exclusive'), ) + can_export_comments = [] + def t_BOOL(self, t): r'(true)|(false)' t.value = t.value == 'true' @@ -319,12 +323,15 @@ def t_heredoc_eof(self, t): def t_COMMENT(self, t): r'(\#|(//)).*' - pass + if 'COMMENT' in self.can_export_comments: + t.value = t.value.lstrip('#').lstrip('//').lstrip() + return t def t_MULTICOMMENT(self, t): r'/\*(.|\n)*?(\*/)' t.lexer.lineno += t.value.count('\n') - pass + if 'MULTICOMMENT' in self.can_export_comments: + return t # Define a rule so we can track line numbers def t_newline(self, t): @@ -356,7 +363,20 @@ def t_error(self, t): else: _raise_error(t) - def __init__(self): + def __init__(self, export_comments=None): + if export_comments is not None: + if export_comments == 'LINE': + self.can_export_comments = ['COMMENT'] + elif export_comments == 'MULTILINE': + self.can_export_comments = ['MULTICOMMENT'] + elif export_comments == 'ALL': + self.can_export_comments = ['COMMENT', 'MULTICOMMENT'] + else: + raise ValueError( + 'Only `LINE`, `MULTILINE` and `ALL` value are allowed for ' + '`export_comments`. given: `%s`.' % export_comments + ) + self.lex = lex.lex( module=self, debug=False, diff --git a/src/hcl/parser.py b/src/hcl/parser.py index 4f83cd1..df0a208 100644 --- a/src/hcl/parser.py +++ b/src/hcl/parser.py @@ -50,6 +50,8 @@ class HclParser(object): 'NUMBER', 'COMMA', 'COMMAEND', + 'COMMENT', + 'MULTICOMMENT', 'IDENTIFIER', 'EQUAL', 'STRING', @@ -568,6 +570,15 @@ def p_exp_1(self, p): self.print_p(p) p[0] = "e-{0}".format(p[2]) + def p_comment_0(self, p): + ''' + block : COMMENT + | MULTICOMMENT + ''' + if DEBUG: + self.print_p(p) + p[0] = ("comment-L{:03d}".format(p.lineno(1)), p[1]) + # useful for debugging the parser def print_p(self, p): if DEBUG: @@ -606,5 +617,7 @@ def __init__(self): module=self, debug=False, optimize=1, picklefile=pickle_file ) - def parse(self, s): - return self.yacc.parse(s, lexer=Lexer()) + def parse(self, s, export_comments=None): + return self.yacc.parse( + s, lexer=Lexer(export_comments=export_comments), debug=True + ) diff --git a/tests/test_lexer.py b/tests/test_lexer.py index 8628116..1a76c10 100644 --- a/tests/test_lexer.py +++ b/tests/test_lexer.py @@ -391,6 +391,136 @@ def test_tokens(token, input_string): assert token == lex_tok.type assert lexer.token() is None +@pytest.mark.parametrize("token,input_string", TOKEN_FIXTURES) +def test_tokens_with_export_comments_wrong_parameter(token, input_string): + + print(input_string) + + lexer = hcl.lexer.Lexer(export_comments="WRONG") + lexer.input(input_string) + + lex_tok = lexer.token() + + if lex_tok is None: + assert token is None + else: + assert token == lex_tok.type + assert lexer.token() is None + +ONE_LINE_COMMENT_FIXTURES = [ + ("COMMENT", "//"), + ("COMMENT", "////"), + ("COMMENT", "// comment"), + ("COMMENT", "// /* comment */"), + ("COMMENT", "// // comment //"), + ("COMMENT", "//" + f100), + ("COMMENT", "#"), + ("COMMENT", "##"), + ("COMMENT", "# comment"), + ("COMMENT", "# /* comment */"), + ("COMMENT", "# # comment #"), + ("COMMENT", "#" + f100), + (None, "/**/"), + (None, "/***/"), + (None, "/* comment */"), + (None, "/* // comment */"), + (None, "/* /* comment */"), + (None, "/*\n comment\n*/"), + (None, "/*" + f100 + "*/") +] + +@pytest.mark.parametrize("token,input_string", ONE_LINE_COMMENT_FIXTURES) +def test_one_line_comments_extract(token, input_string): + + print(input_string) + + lexer = hcl.lexer.Lexer(export_comments='LINE') + lexer.input(input_string) + + lex_tok = lexer.token() + + if lex_tok is None: + assert token is None + else: + assert token == lex_tok.type + assert lexer.token() is None + +MULTI_LINE_COMMENT_FIXTURES = [ + (None, "//"), + (None, "////"), + (None, "// comment"), + (None, "// /* comment */"), + (None, "// // comment //"), + (None, "//" + f100), + (None, "#"), + (None, "##"), + (None, "# comment"), + (None, "# /* comment */"), + (None, "# # comment #"), + (None, "#" + f100), + ("MULTICOMMENT", "/**/"), + ("MULTICOMMENT", "/***/"), + ("MULTICOMMENT", "/* comment */"), + ("MULTICOMMENT", "/* // comment */"), + ("MULTICOMMENT", "/* /* comment */"), + ("MULTICOMMENT", "/*\n comment\n*/"), + ("MULTICOMMENT", "/*" + f100 + "*/") +] + +@pytest.mark.parametrize("token,input_string", MULTI_LINE_COMMENT_FIXTURES) +def test_multi_line_comments_extract(token, input_string): + + print(input_string) + + lexer = hcl.lexer.Lexer(export_comments='MULTILINE') + lexer.input(input_string) + + lex_tok = lexer.token() + + if lex_tok is None: + assert token is None + else: + assert token == lex_tok.type + assert lexer.token() is None + +COMMENT_FIXTURES = [ + ("COMMENT", "//"), + ("COMMENT", "////"), + ("COMMENT", "// comment"), + ("COMMENT", "// /* comment */"), + ("COMMENT", "// // comment //"), + ("COMMENT", "//" + f100), + ("COMMENT", "#"), + ("COMMENT", "##"), + ("COMMENT", "# comment"), + ("COMMENT", "# /* comment */"), + ("COMMENT", "# # comment #"), + ("COMMENT", "#" + f100), + ("MULTICOMMENT", "/**/"), + ("MULTICOMMENT", "/***/"), + ("MULTICOMMENT", "/* comment */"), + ("MULTICOMMENT", "/* // comment */"), + ("MULTICOMMENT", "/* /* comment */"), + ("MULTICOMMENT", "/*\n comment\n*/"), + ("MULTICOMMENT", "/*" + f100 + "*/") +] + +@pytest.mark.parametrize("token,input_string", COMMENT_FIXTURES) +def test_multi_line_comments_extract(token, input_string): + + print(input_string) + + lexer = hcl.lexer.Lexer(export_comments='ALL') + lexer.input(input_string) + + lex_tok = lexer.token() + + if lex_tok is None: + assert token is None + else: + assert token == lex_tok.type + assert lexer.token() is None + # Testing EPLUS and EMINUS can't be done on their own since they # require positive lookbehinds and therefore the lexer will find at least one # other token