Merge pull request #64 from FedeDR/export_hcl_comments

Optional export code comments
virtuald · Mar 18, 2020 · 06f849c · 06f849c
2 parents 51a7524 + daa695a
commit 06f849c
Show file tree

Hide file tree

Showing 17 changed files with 271 additions and 22 deletions.
diff --git a/src/hcl/api.py b/src/hcl/api.py
@@ -42,28 +42,40 @@ def isHcl(s):
     raise ValueError("No HCL object could be decoded")
 
 
-def load(fp):
+def load(fp, export_comments=None):
     '''
         Deserializes a file-pointer like object into a python dictionary.
         The contents of the file must either be JSON or HCL.
         
         :param fp: An object that has a read() function
+        :param export_comments: optional string that allow to export also coded comments. it could be:
+            'LINE': to export only single-line comments (// or #)
+            'MULTILINE': to export only multi-line comments (/* ... */)
+            'ALL': to export both 'LINE' and 'MULTILINE' comments
+            default None
         
         :returns: Dictionary
     '''
-    return loads(fp.read())
+    return loads(fp.read(), export_comments=export_comments)
 
 
-def loads(s):
+def loads(s, export_comments=None):
     '''
         Deserializes a string and converts it to a dictionary. The contents
         of the string must either be JSON or HCL.
         
+        :param s: string to parse
+        :param export_comments: optional string that allow to export also coded comments. it could be:
+            'LINE': to export only single-line comments (// or #)
+            'MULTILINE': to export only multi-line comments (/* ... */)
+            'ALL': to export both 'LINE' and 'MULTILINE' comments
+            default None
+        
         :returns: Dictionary 
     '''
     s = u(s)
     if isHcl(s):
-        return HclParser().parse(s)
+        return HclParser().parse(s, export_comments=export_comments)
     else:
         return json.loads(s)
 

diff --git a/src/hcl/lexer.py b/src/hcl/lexer.py
@@ -34,6 +34,8 @@ class Lexer(object):
         'FLOAT',
         'NUMBER',
         'COMMA',
+        'COMMENT',
+        'MULTICOMMENT',
         'IDENTIFIER',
         'EQUAL',
         'STRING',
@@ -68,6 +70,8 @@ class Lexer(object):
         ('tabbedheredoc', 'exclusive'),
     )
 
+    can_export_comments = []
+
     def t_BOOL(self, t):
         r'(true)|(false)'
         t.value = t.value == 'true'
@@ -319,12 +323,15 @@ def t_heredoc_eof(self, t):
 
     def t_COMMENT(self, t):
         r'(\#|(//)).*'
-        pass
+        if 'COMMENT' in self.can_export_comments:
+            t.value = t.value.lstrip('#').lstrip('//').lstrip()
+            return t
 
     def t_MULTICOMMENT(self, t):
         r'/\*(.|\n)*?(\*/)'
         t.lexer.lineno += t.value.count('\n')
-        pass
+        if 'MULTICOMMENT' in self.can_export_comments:
+            return t
 
     # Define a rule so we can track line numbers
     def t_newline(self, t):
@@ -356,7 +363,20 @@ def t_error(self, t):
         else:
             _raise_error(t)
 
-    def __init__(self):
+    def __init__(self, export_comments=None):
+        if export_comments is not None:
+            if export_comments == 'LINE':
+                self.can_export_comments = ['COMMENT']
+            elif export_comments == 'MULTILINE':
+                self.can_export_comments = ['MULTICOMMENT']
+            elif export_comments == 'ALL':
+                self.can_export_comments = ['COMMENT', 'MULTICOMMENT']
+            else:
+                raise ValueError(
+                    'Only `LINE`, `MULTILINE` and `ALL` value are allowed for '
+                    '`export_comments`. given: `%s`.' % export_comments
+                )
+
         self.lex = lex.lex(
             module=self,
             debug=False,

diff --git a/src/hcl/parser.py b/src/hcl/parser.py
@@ -50,6 +50,8 @@ class HclParser(object):
         'NUMBER',
         'COMMA',
         'COMMAEND',
+        'COMMENT',
+        'MULTICOMMENT',
         'IDENTIFIER',
         'EQUAL',
         'STRING',
@@ -461,6 +463,16 @@ def p_listitems_4(self, p):
         p[2].insert(0, p[1])
         p[0] = p[2]
 
+    def p_listitems_5(self, p):
+        '''
+        listitems : listitems COMMA COMMENT
+                  | listitems COMMA MULTICOMMENT
+        '''
+        # skip comments in lists
+        if DEBUG:
+            self.print_p(p)
+        p[0] = p[1]
+
     def p_listitem_0(self, p):
         '''
         listitem : number
@@ -568,6 +580,15 @@ def p_exp_1(self, p):
             self.print_p(p)
         p[0] = "e-{0}".format(p[2])
 
+    def p_comment_0(self, p):
+        '''
+        block : COMMENT
+              | MULTICOMMENT
+        '''
+        if DEBUG:
+            self.print_p(p)
+        p[0] = ("comment-L{:03d}".format(p.lineno(1)), p[1])
+
     # useful for debugging the parser
     def print_p(self, p):
         if DEBUG:
@@ -606,5 +627,7 @@ def __init__(self):
             module=self, debug=False, optimize=1, picklefile=pickle_file
         )
 
-    def parse(self, s):
-        return self.yacc.parse(s, lexer=Lexer())
+    def parse(self, s, export_comments=None):
+        return self.yacc.parse(
+            s, lexer=Lexer(export_comments=export_comments), debug=True
+        )
diff --git a/tests/fixtures/multi_line_comment.hcl b/tests/fixtures/multi_line_comment.hcl
@@ -0,0 +1,4 @@
+/*
+    test multi line comment
+*/
+foo = "bar"
diff --git a/tests/fixtures/multi_line_comment.json b/tests/fixtures/multi_line_comment.json
@@ -0,0 +1,3 @@
+{
+    "foo": "bar"
+}
diff --git a/tests/fixtures/multi_line_comment_M.json b/tests/fixtures/multi_line_comment_M.json
@@ -0,0 +1,4 @@
+{
+    "comment-L001": "/*\n    test multi line comment\n*/",
+    "foo": "bar"
+}
diff --git a/tests/fixtures/single_line_comment.hcl b/tests/fixtures/single_line_comment.hcl
@@ -0,0 +1,3 @@
+// test single line comment with slash
+# test single line comment with hashtag
+foo = "bar"
diff --git a/tests/fixtures/single_line_comment.json b/tests/fixtures/single_line_comment.json
@@ -0,0 +1,3 @@
+{
+    "foo": "bar"
+}
diff --git a/tests/fixtures/single_line_comment_L.json b/tests/fixtures/single_line_comment_L.json
@@ -0,0 +1,5 @@
+{
+    "comment-L001": "test single line comment with slash",
+    "comment-L002": "test single line comment with hashtag",
+    "foo": "bar"
+}
diff --git a/tests/fixtures/structure_comment.hcl b/tests/fixtures/structure_comment.hcl
@@ -0,0 +1,7 @@
+foo {
+    // single line comment
+    /*
+        multi line comment
+    */
+    foo = "bar"
+}
diff --git a/tests/fixtures/structure_comment_A.json b/tests/fixtures/structure_comment_A.json
@@ -0,0 +1,7 @@
+{
+    "foo": {
+        "comment-L002": "single line comment",
+        "comment-L003": "/*\n        multi line comment\n    */",
+        "foo": "bar"
+    }
+}
diff --git a/tests/fixtures/structure_comment_L.json b/tests/fixtures/structure_comment_L.json
@@ -0,0 +1,6 @@
+{
+    "foo": {
+        "comment-L002": "single line comment",
+        "foo": "bar"
+    }
+}
diff --git a/tests/fixtures/structure_comment_M.json b/tests/fixtures/structure_comment_M.json
@@ -0,0 +1,6 @@
+{
+    "foo": {
+        "comment-L003": "/*\n        multi line comment\n    */",
+        "foo": "bar"
+    }
+}
diff --git a/tests/test_decoder.py b/tests/test_decoder.py
@@ -60,3 +60,28 @@ def test_decoder(hcl_fname, json_fname, struct):
 
     if struct is not None:
         assert hcl_json == struct
+
+
+COMMENTED_FIXTURES = [
+    ('single_line_comment.hcl', 'single_line_comment_L.json', "single_line_comment.json", 'single_line_comment_L.json'),
+    ('multi_line_comment.hcl', 'multi_line_comment.json', 'multi_line_comment_M.json', 'multi_line_comment_M.json'),
+    ('structure_comment.hcl', 'structure_comment_L.json', 'structure_comment_M.json', 'structure_comment_A.json'),
+    ('array_comment.hcl', 'array_comment.json', 'array_comment.json', 'array_comment.json')
+]
+
+@pytest.mark.parametrize("export_comments", ['LINE', 'MULTILINE', 'ALL'])
+@pytest.mark.parametrize("hcl_fname,sline_fname,mline_fname,aline_fname", COMMENTED_FIXTURES)
+def test_decoder_export_comments(hcl_fname, sline_fname, mline_fname, aline_fname, export_comments):
+    with open(join(FIXTURE_DIR, hcl_fname), 'r') as fp:
+        hcl_json = hcl.load(fp, export_comments)
+
+    json_fname = {
+        "LINE": sline_fname,
+        "MULTILINE": mline_fname,
+        "ALL": aline_fname
+    }
+
+    with open(join(FIXTURE_DIR, json_fname[export_comments]), 'r') as fp:
+            good_json = json.load(fp)
+
+    assert hcl_json == good_json
diff --git a/tests/test_lexer.py b/tests/test_lexer.py
@@ -391,6 +391,124 @@ def test_tokens(token, input_string):
         assert token == lex_tok.type
         assert lexer.token() is None
 
+def test_export_comments_wrong_parameter():
+    with pytest.raises(ValueError):
+        lexer = hcl.lexer.Lexer(export_comments="WRONG")
+
+ONE_LINE_COMMENT_FIXTURES = [
+    ("COMMENT", "//"),
+    ("COMMENT", "////"),
+    ("COMMENT", "// comment"),
+    ("COMMENT", "// /* comment */"),
+    ("COMMENT", "// // comment //"),
+    ("COMMENT", "//" + f100),
+    ("COMMENT", "#"),
+    ("COMMENT", "##"),
+    ("COMMENT", "# comment"),
+    ("COMMENT", "# /* comment */"),
+    ("COMMENT", "# # comment #"),
+    ("COMMENT", "#" + f100),
+    (None, "/**/"),
+    (None, "/***/"),
+    (None, "/* comment */"),
+    (None, "/* // comment */"),
+    (None, "/* /* comment */"),
+    (None, "/*\n comment\n*/"),
+    (None, "/*" + f100 + "*/")
+]
+
+@pytest.mark.parametrize("token,input_string", ONE_LINE_COMMENT_FIXTURES)
+def test_one_line_comments_extract(token, input_string):
+
+    print(input_string)
+
+    lexer = hcl.lexer.Lexer(export_comments='LINE')
+    lexer.input(input_string)
+
+    lex_tok = lexer.token()
+
+    if lex_tok is None:
+        assert token is None
+    else:
+        assert token == lex_tok.type
+        assert lexer.token() is None
+
+MULTI_LINE_COMMENT_FIXTURES = [
+    (None, "//"),
+    (None, "////"),
+    (None, "// comment"),
+    (None, "// /* comment */"),
+    (None, "// // comment //"),
+    (None, "//" + f100),
+    (None, "#"),
+    (None, "##"),
+    (None, "# comment"),
+    (None, "# /* comment */"),
+    (None, "# # comment #"),
+    (None, "#" + f100),
+    ("MULTICOMMENT", "/**/"),
+    ("MULTICOMMENT", "/***/"),
+    ("MULTICOMMENT", "/* comment */"),
+    ("MULTICOMMENT", "/* // comment */"),
+    ("MULTICOMMENT", "/* /* comment */"),
+    ("MULTICOMMENT", "/*\n comment\n*/"),
+    ("MULTICOMMENT", "/*" + f100 + "*/")
+]
+
+@pytest.mark.parametrize("token,input_string", MULTI_LINE_COMMENT_FIXTURES)
+def test_multi_line_comments_extract(token, input_string):
+
+    print(input_string)
+
+    lexer = hcl.lexer.Lexer(export_comments='MULTILINE')
+    lexer.input(input_string)
+
+    lex_tok = lexer.token()
+
+    if lex_tok is None:
+        assert token is None
+    else:
+        assert token == lex_tok.type
+        assert lexer.token() is None
+
+COMMENT_FIXTURES = [
+    ("COMMENT", "//"),
+    ("COMMENT", "////"),
+    ("COMMENT", "// comment"),
+    ("COMMENT", "// /* comment */"),
+    ("COMMENT", "// // comment //"),
+    ("COMMENT", "//" + f100),
+    ("COMMENT", "#"),
+    ("COMMENT", "##"),
+    ("COMMENT", "# comment"),
+    ("COMMENT", "# /* comment */"),
+    ("COMMENT", "# # comment #"),
+    ("COMMENT", "#" + f100),
+    ("MULTICOMMENT", "/**/"),
+    ("MULTICOMMENT", "/***/"),
+    ("MULTICOMMENT", "/* comment */"),
+    ("MULTICOMMENT", "/* // comment */"),
+    ("MULTICOMMENT", "/* /* comment */"),
+    ("MULTICOMMENT", "/*\n comment\n*/"),
+    ("MULTICOMMENT", "/*" + f100 + "*/")
+]
+
+@pytest.mark.parametrize("token,input_string", COMMENT_FIXTURES)
+def test_multi_line_comments_extract(token, input_string):
+
+    print(input_string)
+
+    lexer = hcl.lexer.Lexer(export_comments='ALL')
+    lexer.input(input_string)
+
+    lex_tok = lexer.token()
+
+    if lex_tok is None:
+        assert token is None
+    else:
+        assert token == lex_tok.type
+        assert lexer.token() is None
+
 # Testing EPLUS and EMINUS can't be done on their own since they
 # require positive lookbehinds and therefore the lexer will find at least one
 # other token