diff --git a/tests/test_api.py b/tests/test_api.py index 1acc26f..caeceb0 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -272,6 +272,12 @@ def test_comment_preserve_decoder_encoder(): assert len(s) == len(test_str) and sorted(test_str) == sorted(s) + new_str = "# Comment outside area\n" + test_str + s = toml.dumps(toml.loads(new_str, + decoder=toml.TomlPreserveCommentDecoder()), + encoder=toml.TomlPreserveCommentEncoder()) + # This should match the original string, not the one with an invalid comment + assert len(s) == len(test_str) and sorted(test_str) == sorted(s) def test_deepcopy_timezone(): import copy diff --git a/tests/test_before_tags.py b/tests/test_before_tags.py new file mode 100644 index 0000000..b1f25f3 --- /dev/null +++ b/tests/test_before_tags.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python +import toml + +TEST_STR = """ + # Global tags can be specified here in key="value" format. + [global_tags] + # dc = "us-east-1" # will tag all metrics with dc=us-east-1 + # rack = "1a" + ## Environment variables can be used as tags, and throughout the config file + user = "$USER" + + + # Configuration for telegraf agent + [agent] + ## Default data collection interval for all inputs + interval = "10s" + ## Rounds collection interval to 'interval' + ## ie, if interval="10s" then always collect on :00, :10, :20, etc. + round_interval = true + + # # Gather Azure Storage Queue metrics + [[inputs.azure_storage_queue]] + + # ## Required Azure Storage Account name + + account_name = "mystorageaccount" # Inline comment + # + # ## Required Azure Storage Account access key + account_key = "storageaccountaccesskey" + # + # ## Set to false to disable peeking age of oldest message (executes faster) + peek_oldest_message_age = true + """ + + +def test_before_comments(): + """Tests handling before comments""" + + decoder = toml.TomlPreserveCommentDecoder(beforeComments=True) + data = toml.loads(TEST_STR, decoder=decoder) + + parsed_tags = {} + + for line in decoder.before_tags: + parsed_tags[line["name"]] = line + del parsed_tags[line["name"]]["name"] + + # Global tags + assert parsed_tags["[global_tags]"] == { + "comments": ["""Global tags can be specified here in key="value" format."""], + } + + # user = "$USER" + expected = { + "comments": [ + """dc = "us-east-1" # will tag all metrics with dc=us-east-1""", + 'rack = "1a"', + """Environment variables can be used as tags, and throughout the config file""" + ], + "parent": "[global_tags]" + } + + assert parsed_tags["user = \"$USER\""] == expected + + # Agent + expected = { + "comments": ["""Configuration for telegraf agent"""], + } + + assert parsed_tags["[agent]"] == expected + + # interval = "10s" + expected = { + "comments": [ + "Default data collection interval for all inputs" + ], + "parent": "[agent]" + } + assert parsed_tags["interval = \"10s\""] == expected + + # round_interval = true + expected = { + "comments": [ + "Rounds collection interval to 'interval'", + 'ie, if interval="10s" then always collect on :00, :10, :20, etc.' + ], + "parent": "[agent]" + } + assert parsed_tags["round_interval = true"] == expected + + expected = { + "comments": ["Gather Azure Storage Queue metrics"] + } + + assert parsed_tags["[[inputs.azure_storage_queue]]"] == expected + + # account_name + + expected = { + "comments": [ + "Required Azure Storage Account name", + "Inline comment" + ], + "parent": "[[inputs.azure_storage_queue]]" + } + + assert parsed_tags["account_name = \"mystorageaccount\""] == expected + + # account_key + expected = { + "comments": [ + "Required Azure Storage Account access key" + ], + "parent": "[[inputs.azure_storage_queue]]" + } + + assert parsed_tags["account_key = \"storageaccountaccesskey\""] == expected + + # peek_oldest_message_age + + expected = { + "comments": [ + "Set to false to disable peeking age of oldest message (executes faster)" + ], + "parent": "[[inputs.azure_storage_queue]]" + } + + assert parsed_tags["peek_oldest_message_age = true"] == expected diff --git a/toml/decoder.py b/toml/decoder.py index bf400e9..a82476f 100644 --- a/toml/decoder.py +++ b/toml/decoder.py @@ -371,7 +371,11 @@ def loads(s, _dict=dict, decoder=None): if idx > 0: pos += len(s[idx - 1]) + 1 - decoder.embed_comments(idx, currentlevel) + if "beforeComments" in dir(decoder) and decoder.beforeComments == True: + decoder.embed_comments(idx, currentlevel, line=line) + else: + decoder.embed_comments(idx, currentlevel) + if not multilinestr or multibackslash or '\n' not in multilinestr: line = line.strip() @@ -1041,17 +1045,70 @@ def embed_comments(self, idx, currentlevel): class TomlPreserveCommentDecoder(TomlDecoder): - def __init__(self, _dict=dict): + def __init__(self, beforeComments=False, _dict=dict): self.saved_comments = {} super(TomlPreserveCommentDecoder, self).__init__(_dict) + self.beforeComments = beforeComments + + self.stored_comments = [] + self.stored_line = 0 + + self.parent_line = "" + + self.before_tags = [] + def preserve_comment(self, line_no, key, comment, beginline): self.saved_comments[line_no] = (key, comment, beginline) - def embed_comments(self, idx, currentlevel): + def embed_comments(self, idx, currentlevel, line=""): + + def strip_comment(inp): + return re.sub(r'^(\s?#?)+', '',inp) + + if self.beforeComments: + if line.strip(): + temp = "\n".join(self.stored_comments) + + retval = { + "name" : line.strip(), + "comments" : [x for x in self.stored_comments if x != ""] + } + + if "]" in line: + self.parent_line = line.strip() + else: + retval["parent"] = self.parent_line + + # Handle inline comments - want to associate with the line they're on + if idx+1 in self.saved_comments and self.saved_comments[idx+1] != "": + retval["comments"].append(strip_comment(self.saved_comments[idx+1][1]).strip()) + + # BREAKING - to avoid duplicate comments with inlines, we will remove from saved_comments + del self.saved_comments[idx+1] + + self.before_tags.append(retval) + + self.stored_line = idx + self.stored_comments = [] + else: + found_comments = [strip_comment(self.saved_comments[x][1].strip()) for x in self.saved_comments if x > self.stored_line and x <= idx + 1 ] + + + self.stored_comments += found_comments + self.remove_before_duplicates() + if idx not in self.saved_comments: return - key, comment, beginline = self.saved_comments[idx] - currentlevel[key] = CommentValue(currentlevel[key], comment, beginline, + if key in currentlevel: + currentlevel[key] = CommentValue(currentlevel[key], comment, beginline, self._dict) + def remove_before_duplicates(self): + seen = set() + result = [] + for item in self.stored_comments: + if item not in seen: + seen.add(item) + result.append(item) + self.stored_comments = result