uiri · amunchet · Jul 8, 2021 · Jul 17, 2021 · Jul 17, 2021 · Jul 17, 2021
diff --git a/tests/test_api.py b/tests/test_api.py
@@ -272,6 +272,12 @@ def test_comment_preserve_decoder_encoder():
 
     assert len(s) == len(test_str) and sorted(test_str) == sorted(s)
 
+    new_str = "# Comment outside area\n" + test_str
+    s = toml.dumps(toml.loads(new_str,
+                              decoder=toml.TomlPreserveCommentDecoder()),
+                   encoder=toml.TomlPreserveCommentEncoder())
+    # This should match the original string, not the one with an invalid comment
+    assert len(s) == len(test_str) and sorted(test_str) == sorted(s)
 
 def test_deepcopy_timezone():
     import copy

diff --git a/tests/test_before_tags.py b/tests/test_before_tags.py
@@ -0,0 +1,128 @@
+#!/usr/bin/env python
+import toml
+
+TEST_STR = """
+            # Global tags can be specified here in key="value" format.
+            [global_tags]
+            # dc = "us-east-1" # will tag all metrics with dc=us-east-1
+            # rack = "1a"
+            ## Environment variables can be used as tags, and throughout the config file
+            user = "$USER"
+
+
+            # Configuration for telegraf agent
+            [agent]
+            ## Default data collection interval for all inputs
+            interval = "10s"
+            ## Rounds collection interval to 'interval'
+            ## ie, if interval="10s" then always collect on :00, :10, :20, etc.
+            round_interval = true
+
+            # # Gather Azure Storage Queue metrics
+            [[inputs.azure_storage_queue]]
+
+            #   ## Required Azure Storage Account name
+
+            account_name = "mystorageaccount" # Inline comment
+            #
+            #   ## Required Azure Storage Account access key
+            account_key = "storageaccountaccesskey"
+            #
+            #   ## Set to false to disable peeking age of oldest message (executes faster)
+                peek_oldest_message_age = true
+    """
+
+
+def test_before_comments():
+    """Tests handling before comments"""
+
+    decoder = toml.TomlPreserveCommentDecoder(beforeComments=True)
+    data = toml.loads(TEST_STR, decoder=decoder)
+
+    parsed_tags = {}
+
+    for line in decoder.before_tags:
+        parsed_tags[line["name"]] = line
+        del parsed_tags[line["name"]]["name"]
+
+    # Global tags
+    assert parsed_tags["[global_tags]"] == {
+        "comments": ["""Global tags can be specified here in key="value" format."""],
+    }
+
+    # user = "$USER"
+    expected = {
+        "comments": [
+            """dc = "us-east-1" # will tag all metrics with dc=us-east-1""",
+            'rack = "1a"',
+            """Environment variables can be used as tags, and throughout the config file"""
+        ],
+        "parent": "[global_tags]"
+    }
+
+    assert parsed_tags["user = \"$USER\""] == expected
+
+    # Agent
+    expected = {
+        "comments": ["""Configuration for telegraf agent"""],
+    }
+
+    assert parsed_tags["[agent]"] == expected
+
+    # interval = "10s"
+    expected = {
+        "comments": [
+            "Default data collection interval for all inputs"
+        ],
+        "parent": "[agent]"
+    }
+    assert parsed_tags["interval = \"10s\""] == expected
+
+    # round_interval = true
+    expected = {
+        "comments": [
+            "Rounds collection interval to 'interval'",
+            'ie, if interval="10s" then always collect on :00, :10, :20, etc.'
+        ],
+        "parent": "[agent]"
+    }
+    assert parsed_tags["round_interval = true"] == expected
+
+    expected = {
+        "comments": ["Gather Azure Storage Queue metrics"]
+    }
+
+    assert parsed_tags["[[inputs.azure_storage_queue]]"] == expected
+
+    # account_name
+
+    expected = {
+        "comments": [
+            "Required Azure Storage Account name",
+            "Inline comment"
+        ],
+        "parent": "[[inputs.azure_storage_queue]]"
+    }
+
+    assert parsed_tags["account_name = \"mystorageaccount\""] == expected
+
+    # account_key
+    expected = {
+        "comments": [
+            "Required Azure Storage Account access key"
+        ],
+        "parent": "[[inputs.azure_storage_queue]]"
+    }
+
+    assert parsed_tags["account_key = \"storageaccountaccesskey\""] == expected
+
+    # peek_oldest_message_age
+
+    expected = {
+        "comments": [
+            "Set to false to disable peeking age of oldest message (executes faster)"
+        ],
+        "parent": "[[inputs.azure_storage_queue]]"
+    }
+
+    assert parsed_tags["peek_oldest_message_age = true"] == expected
diff --git a/toml/decoder.py b/toml/decoder.py
@@ -371,7 +371,11 @@ def loads(s, _dict=dict, decoder=None):
         if idx > 0:
             pos += len(s[idx - 1]) + 1
 
-        decoder.embed_comments(idx, currentlevel)
+        if "beforeComments" in dir(decoder) and decoder.beforeComments == True:
+            decoder.embed_comments(idx, currentlevel, line=line)
+        else:
+            decoder.embed_comments(idx, currentlevel)
+
 
         if not multilinestr or multibackslash or '\n' not in multilinestr:
             line = line.strip()
@@ -1041,17 +1045,70 @@ def embed_comments(self, idx, currentlevel):
 
 class TomlPreserveCommentDecoder(TomlDecoder):
 
-    def __init__(self, _dict=dict):
+    def __init__(self, beforeComments=False, _dict=dict):
         self.saved_comments = {}
         super(TomlPreserveCommentDecoder, self).__init__(_dict)
 
+        self.beforeComments = beforeComments
+
+        self.stored_comments = []
+        self.stored_line = 0
+
+        self.parent_line = ""
+
+        self.before_tags = []
+
     def preserve_comment(self, line_no, key, comment, beginline):
         self.saved_comments[line_no] = (key, comment, beginline)
 
-    def embed_comments(self, idx, currentlevel):
+    def embed_comments(self, idx, currentlevel, line=""):
+
+        def strip_comment(inp):
+            return re.sub(r'^(\s?#?)+', '',inp)
+
+        if self.beforeComments:
+            if line.strip():
+                temp = "\n".join(self.stored_comments)
+
+                retval = {
+                    "name" : line.strip(),
+                    "comments" : [x for x in self.stored_comments if x != ""]
+                }
+
+                if "]" in line:
+                    self.parent_line = line.strip()
+                else:
+                    retval["parent"] = self.parent_line
+
+                # Handle inline comments - want to associate with the line they're on
+                if idx+1 in self.saved_comments and self.saved_comments[idx+1] != "":
+                    retval["comments"].append(strip_comment(self.saved_comments[idx+1][1]).strip())
+
+                    # BREAKING - to avoid duplicate comments with inlines, we will remove from saved_comments
+                    del self.saved_comments[idx+1]
+
+                self.before_tags.append(retval)
+
+                self.stored_line = idx
+                self.stored_comments = []
+            else:
+                found_comments = [strip_comment(self.saved_comments[x][1].strip()) for x in self.saved_comments if x > self.stored_line and x <= idx + 1 ]
+
+
+                self.stored_comments += found_comments
+                self.remove_before_duplicates()
+
         if idx not in self.saved_comments:
             return
-
         key, comment, beginline = self.saved_comments[idx]
-        currentlevel[key] = CommentValue(currentlevel[key], comment, beginline,
+        if key in currentlevel:
+            currentlevel[key] = CommentValue(currentlevel[key], comment, beginline,
                                          self._dict)
+    def remove_before_duplicates(self):
+        seen = set()
+        result = []
+        for item in self.stored_comments:
+            if item not in seen:
+                seen.add(item)
+                result.append(item)
+        self.stored_comments = result