From a34ff30682a18b3f0dd353a450e899f0509671e1 Mon Sep 17 00:00:00 2001
From: Federico Della Rovere <federico.dellarovere@nextroll.com>
Date: Fri, 13 Mar 2020 15:51:31 +0100
Subject: [PATCH] optional export code comments

---
 src/hcl/api.py      |  20 +++++--
 src/hcl/lexer.py    |  26 ++++++++-
 src/hcl/parser.py   |  17 +++++-
 tests/test_lexer.py | 130 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 184 insertions(+), 9 deletions(-)

diff --git a/src/hcl/api.py b/src/hcl/api.py
index e5390f6..a8e7f5a 100644
--- a/src/hcl/api.py
+++ b/src/hcl/api.py
@@ -42,28 +42,40 @@ def isHcl(s):
     raise ValueError("No HCL object could be decoded")
 
 
-def load(fp):
+def load(fp, export_comments=None):
     '''
         Deserializes a file-pointer like object into a python dictionary.
         The contents of the file must either be JSON or HCL.
         
         :param fp: An object that has a read() function
+        :param export_comments: optional string that allow to export also coded comments. it could be:
+            'LINE': to export only single-line comments (// or #)
+            'MULTILINE': to export only multi-line comments (/* ... */)
+            'ALL': to export both 'LINE' and 'MULTILINE' comments
+            default None
         
         :returns: Dictionary
     '''
-    return loads(fp.read())
+    return loads(fp.read(), export_comments=export_comments)
 
 
-def loads(s):
+def loads(s, export_comments=None):
     '''
         Deserializes a string and converts it to a dictionary. The contents
         of the string must either be JSON or HCL.
         
+        :param s: string to parse
+        :param export_comments: optional string that allow to export also coded comments. it could be:
+            'LINE': to export only single-line comments (// or #)
+            'MULTILINE': to export only multi-line comments (/* ... */)
+            'ALL': to export both 'LINE' and 'MULTILINE' comments
+            default None
+        
         :returns: Dictionary 
     '''
     s = u(s)
     if isHcl(s):
-        return HclParser().parse(s)
+        return HclParser().parse(s, export_comments=export_comments)
     else:
         return json.loads(s)
 
diff --git a/src/hcl/lexer.py b/src/hcl/lexer.py
index 92b1439..6a568e9 100644
--- a/src/hcl/lexer.py
+++ b/src/hcl/lexer.py
@@ -34,6 +34,8 @@ class Lexer(object):
         'FLOAT',
         'NUMBER',
         'COMMA',
+        'COMMENT',
+        'MULTICOMMENT',
         'IDENTIFIER',
         'EQUAL',
         'STRING',
@@ -68,6 +70,8 @@ class Lexer(object):
         ('tabbedheredoc', 'exclusive'),
     )
 
+    can_export_comments = []
+
     def t_BOOL(self, t):
         r'(true)|(false)'
         t.value = t.value == 'true'
@@ -319,12 +323,15 @@ def t_heredoc_eof(self, t):
 
     def t_COMMENT(self, t):
         r'(\#|(//)).*'
-        pass
+        if 'COMMENT' in self.can_export_comments:
+            t.value = t.value.lstrip('#').lstrip('//').lstrip()
+            return t
 
     def t_MULTICOMMENT(self, t):
         r'/\*(.|\n)*?(\*/)'
         t.lexer.lineno += t.value.count('\n')
-        pass
+        if 'MULTICOMMENT' in self.can_export_comments:
+            return t
 
     # Define a rule so we can track line numbers
     def t_newline(self, t):
@@ -356,7 +363,20 @@ def t_error(self, t):
         else:
             _raise_error(t)
 
-    def __init__(self):
+    def __init__(self, export_comments=None):
+        if export_comments is not None:
+            if export_comments == 'LINE':
+                self.can_export_comments = ['COMMENT']
+            elif export_comments == 'MULTILINE':
+                self.can_export_comments = ['MULTICOMMENT']
+            elif export_comments == 'ALL':
+                self.can_export_comments = ['COMMENT', 'MULTICOMMENT']
+            else:
+                raise ValueError(
+                    'Only `LINE`, `MULTILINE` and `ALL` value are allowed for '
+                    '`export_comments`. given: `%s`.' % export_comments
+                )
+
         self.lex = lex.lex(
             module=self,
             debug=False,
diff --git a/src/hcl/parser.py b/src/hcl/parser.py
index 4f83cd1..df0a208 100644
--- a/src/hcl/parser.py
+++ b/src/hcl/parser.py
@@ -50,6 +50,8 @@ class HclParser(object):
         'NUMBER',
         'COMMA',
         'COMMAEND',
+        'COMMENT',
+        'MULTICOMMENT',
         'IDENTIFIER',
         'EQUAL',
         'STRING',
@@ -568,6 +570,15 @@ def p_exp_1(self, p):
             self.print_p(p)
         p[0] = "e-{0}".format(p[2])
 
+    def p_comment_0(self, p):
+        '''
+        block : COMMENT
+              | MULTICOMMENT
+        '''
+        if DEBUG:
+            self.print_p(p)
+        p[0] = ("comment-L{:03d}".format(p.lineno(1)), p[1])
+
     # useful for debugging the parser
     def print_p(self, p):
         if DEBUG:
@@ -606,5 +617,7 @@ def __init__(self):
             module=self, debug=False, optimize=1, picklefile=pickle_file
         )
 
-    def parse(self, s):
-        return self.yacc.parse(s, lexer=Lexer())
+    def parse(self, s, export_comments=None):
+        return self.yacc.parse(
+            s, lexer=Lexer(export_comments=export_comments), debug=True
+        )
diff --git a/tests/test_lexer.py b/tests/test_lexer.py
index 8628116..1a76c10 100644
--- a/tests/test_lexer.py
+++ b/tests/test_lexer.py
@@ -391,6 +391,136 @@ def test_tokens(token, input_string):
         assert token == lex_tok.type
         assert lexer.token() is None
 
+@pytest.mark.parametrize("token,input_string", TOKEN_FIXTURES)
+def test_tokens_with_export_comments_wrong_parameter(token, input_string):
+
+    print(input_string)
+
+    lexer = hcl.lexer.Lexer(export_comments="WRONG")
+    lexer.input(input_string)
+
+    lex_tok = lexer.token()
+
+    if lex_tok is None:
+        assert token is None
+    else:
+        assert token == lex_tok.type
+        assert lexer.token() is None
+
+ONE_LINE_COMMENT_FIXTURES = [
+    ("COMMENT", "//"),
+    ("COMMENT", "////"),
+    ("COMMENT", "// comment"),
+    ("COMMENT", "// /* comment */"),
+    ("COMMENT", "// // comment //"),
+    ("COMMENT", "//" + f100),
+    ("COMMENT", "#"),
+    ("COMMENT", "##"),
+    ("COMMENT", "# comment"),
+    ("COMMENT", "# /* comment */"),
+    ("COMMENT", "# # comment #"),
+    ("COMMENT", "#" + f100),
+    (None, "/**/"),
+    (None, "/***/"),
+    (None, "/* comment */"),
+    (None, "/* // comment */"),
+    (None, "/* /* comment */"),
+    (None, "/*\n comment\n*/"),
+    (None, "/*" + f100 + "*/")
+]
+
+@pytest.mark.parametrize("token,input_string", ONE_LINE_COMMENT_FIXTURES)
+def test_one_line_comments_extract(token, input_string):
+
+    print(input_string)
+
+    lexer = hcl.lexer.Lexer(export_comments='LINE')
+    lexer.input(input_string)
+
+    lex_tok = lexer.token()
+
+    if lex_tok is None:
+        assert token is None
+    else:
+        assert token == lex_tok.type
+        assert lexer.token() is None
+
+MULTI_LINE_COMMENT_FIXTURES = [
+    (None, "//"),
+    (None, "////"),
+    (None, "// comment"),
+    (None, "// /* comment */"),
+    (None, "// // comment //"),
+    (None, "//" + f100),
+    (None, "#"),
+    (None, "##"),
+    (None, "# comment"),
+    (None, "# /* comment */"),
+    (None, "# # comment #"),
+    (None, "#" + f100),
+    ("MULTICOMMENT", "/**/"),
+    ("MULTICOMMENT", "/***/"),
+    ("MULTICOMMENT", "/* comment */"),
+    ("MULTICOMMENT", "/* // comment */"),
+    ("MULTICOMMENT", "/* /* comment */"),
+    ("MULTICOMMENT", "/*\n comment\n*/"),
+    ("MULTICOMMENT", "/*" + f100 + "*/")
+]
+
+@pytest.mark.parametrize("token,input_string", MULTI_LINE_COMMENT_FIXTURES)
+def test_multi_line_comments_extract(token, input_string):
+
+    print(input_string)
+
+    lexer = hcl.lexer.Lexer(export_comments='MULTILINE')
+    lexer.input(input_string)
+
+    lex_tok = lexer.token()
+
+    if lex_tok is None:
+        assert token is None
+    else:
+        assert token == lex_tok.type
+        assert lexer.token() is None
+
+COMMENT_FIXTURES = [
+    ("COMMENT", "//"),
+    ("COMMENT", "////"),
+    ("COMMENT", "// comment"),
+    ("COMMENT", "// /* comment */"),
+    ("COMMENT", "// // comment //"),
+    ("COMMENT", "//" + f100),
+    ("COMMENT", "#"),
+    ("COMMENT", "##"),
+    ("COMMENT", "# comment"),
+    ("COMMENT", "# /* comment */"),
+    ("COMMENT", "# # comment #"),
+    ("COMMENT", "#" + f100),
+    ("MULTICOMMENT", "/**/"),
+    ("MULTICOMMENT", "/***/"),
+    ("MULTICOMMENT", "/* comment */"),
+    ("MULTICOMMENT", "/* // comment */"),
+    ("MULTICOMMENT", "/* /* comment */"),
+    ("MULTICOMMENT", "/*\n comment\n*/"),
+    ("MULTICOMMENT", "/*" + f100 + "*/")
+]
+
+@pytest.mark.parametrize("token,input_string", COMMENT_FIXTURES)
+def test_multi_line_comments_extract(token, input_string):
+
+    print(input_string)
+
+    lexer = hcl.lexer.Lexer(export_comments='ALL')
+    lexer.input(input_string)
+
+    lex_tok = lexer.token()
+
+    if lex_tok is None:
+        assert token is None
+    else:
+        assert token == lex_tok.type
+        assert lexer.token() is None
+
 # Testing EPLUS and EMINUS can't be done on their own since they
 # require positive lookbehinds and therefore the lexer will find at least one
 # other token