From 13471519836b0fb713c6b5e4af7b155873b4a61f Mon Sep 17 00:00:00 2001 From: Chester Enright Date: Thu, 8 Jul 2021 14:00:25 -0500 Subject: [PATCH 1/6] fix: correct handling of invalid comments --- tests/test_api.py | 6 ++++++ toml/decoder.py | 3 ++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/test_api.py b/tests/test_api.py index 1acc26f..caeceb0 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -272,6 +272,12 @@ def test_comment_preserve_decoder_encoder(): assert len(s) == len(test_str) and sorted(test_str) == sorted(s) + new_str = "# Comment outside area\n" + test_str + s = toml.dumps(toml.loads(new_str, + decoder=toml.TomlPreserveCommentDecoder()), + encoder=toml.TomlPreserveCommentEncoder()) + # This should match the original string, not the one with an invalid comment + assert len(s) == len(test_str) and sorted(test_str) == sorted(s) def test_deepcopy_timezone(): import copy diff --git a/toml/decoder.py b/toml/decoder.py index bf400e9..94722d6 100644 --- a/toml/decoder.py +++ b/toml/decoder.py @@ -1053,5 +1053,6 @@ def embed_comments(self, idx, currentlevel): return key, comment, beginline = self.saved_comments[idx] - currentlevel[key] = CommentValue(currentlevel[key], comment, beginline, + if key in currentlevel: + currentlevel[key] = CommentValue(currentlevel[key], comment, beginline, self._dict) From 5c1f945620bbfde3fe522002232e63a9db75678c Mon Sep 17 00:00:00 2001 From: Chester Enright <30327507+amunchet@users.noreply.github.com> Date: Sat, 17 Jul 2021 20:39:53 -0500 Subject: [PATCH 2/6] Merge Before Comments changes (#1) * test: wip - working on poc for before comments * feat: wip - have stored comments in decoder mostly working * test: wip - working on testing * feat: inline comment handling * test: successful passing of tests * chore: small cleanups * chore(tests): small tweaks Co-authored-by: Automatic Linting --- tests/test_before_tags.py | 128 ++++++++++++++++++++++++++++++++++++++ toml/decoder.py | 64 +++++++++++++++++-- 2 files changed, 188 insertions(+), 4 deletions(-) create mode 100644 tests/test_before_tags.py diff --git a/tests/test_before_tags.py b/tests/test_before_tags.py new file mode 100644 index 0000000..b1f25f3 --- /dev/null +++ b/tests/test_before_tags.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python +import toml + +TEST_STR = """ + # Global tags can be specified here in key="value" format. + [global_tags] + # dc = "us-east-1" # will tag all metrics with dc=us-east-1 + # rack = "1a" + ## Environment variables can be used as tags, and throughout the config file + user = "$USER" + + + # Configuration for telegraf agent + [agent] + ## Default data collection interval for all inputs + interval = "10s" + ## Rounds collection interval to 'interval' + ## ie, if interval="10s" then always collect on :00, :10, :20, etc. + round_interval = true + + # # Gather Azure Storage Queue metrics + [[inputs.azure_storage_queue]] + + # ## Required Azure Storage Account name + + account_name = "mystorageaccount" # Inline comment + # + # ## Required Azure Storage Account access key + account_key = "storageaccountaccesskey" + # + # ## Set to false to disable peeking age of oldest message (executes faster) + peek_oldest_message_age = true + """ + + +def test_before_comments(): + """Tests handling before comments""" + + decoder = toml.TomlPreserveCommentDecoder(beforeComments=True) + data = toml.loads(TEST_STR, decoder=decoder) + + parsed_tags = {} + + for line in decoder.before_tags: + parsed_tags[line["name"]] = line + del parsed_tags[line["name"]]["name"] + + # Global tags + assert parsed_tags["[global_tags]"] == { + "comments": ["""Global tags can be specified here in key="value" format."""], + } + + # user = "$USER" + expected = { + "comments": [ + """dc = "us-east-1" # will tag all metrics with dc=us-east-1""", + 'rack = "1a"', + """Environment variables can be used as tags, and throughout the config file""" + ], + "parent": "[global_tags]" + } + + assert parsed_tags["user = \"$USER\""] == expected + + # Agent + expected = { + "comments": ["""Configuration for telegraf agent"""], + } + + assert parsed_tags["[agent]"] == expected + + # interval = "10s" + expected = { + "comments": [ + "Default data collection interval for all inputs" + ], + "parent": "[agent]" + } + assert parsed_tags["interval = \"10s\""] == expected + + # round_interval = true + expected = { + "comments": [ + "Rounds collection interval to 'interval'", + 'ie, if interval="10s" then always collect on :00, :10, :20, etc.' + ], + "parent": "[agent]" + } + assert parsed_tags["round_interval = true"] == expected + + expected = { + "comments": ["Gather Azure Storage Queue metrics"] + } + + assert parsed_tags["[[inputs.azure_storage_queue]]"] == expected + + # account_name + + expected = { + "comments": [ + "Required Azure Storage Account name", + "Inline comment" + ], + "parent": "[[inputs.azure_storage_queue]]" + } + + assert parsed_tags["account_name = \"mystorageaccount\""] == expected + + # account_key + expected = { + "comments": [ + "Required Azure Storage Account access key" + ], + "parent": "[[inputs.azure_storage_queue]]" + } + + assert parsed_tags["account_key = \"storageaccountaccesskey\""] == expected + + # peek_oldest_message_age + + expected = { + "comments": [ + "Set to false to disable peeking age of oldest message (executes faster)" + ], + "parent": "[[inputs.azure_storage_queue]]" + } + + assert parsed_tags["peek_oldest_message_age = true"] == expected diff --git a/toml/decoder.py b/toml/decoder.py index 94722d6..a82476f 100644 --- a/toml/decoder.py +++ b/toml/decoder.py @@ -371,7 +371,11 @@ def loads(s, _dict=dict, decoder=None): if idx > 0: pos += len(s[idx - 1]) + 1 - decoder.embed_comments(idx, currentlevel) + if "beforeComments" in dir(decoder) and decoder.beforeComments == True: + decoder.embed_comments(idx, currentlevel, line=line) + else: + decoder.embed_comments(idx, currentlevel) + if not multilinestr or multibackslash or '\n' not in multilinestr: line = line.strip() @@ -1041,18 +1045,70 @@ def embed_comments(self, idx, currentlevel): class TomlPreserveCommentDecoder(TomlDecoder): - def __init__(self, _dict=dict): + def __init__(self, beforeComments=False, _dict=dict): self.saved_comments = {} super(TomlPreserveCommentDecoder, self).__init__(_dict) + self.beforeComments = beforeComments + + self.stored_comments = [] + self.stored_line = 0 + + self.parent_line = "" + + self.before_tags = [] + def preserve_comment(self, line_no, key, comment, beginline): self.saved_comments[line_no] = (key, comment, beginline) - def embed_comments(self, idx, currentlevel): + def embed_comments(self, idx, currentlevel, line=""): + + def strip_comment(inp): + return re.sub(r'^(\s?#?)+', '',inp) + + if self.beforeComments: + if line.strip(): + temp = "\n".join(self.stored_comments) + + retval = { + "name" : line.strip(), + "comments" : [x for x in self.stored_comments if x != ""] + } + + if "]" in line: + self.parent_line = line.strip() + else: + retval["parent"] = self.parent_line + + # Handle inline comments - want to associate with the line they're on + if idx+1 in self.saved_comments and self.saved_comments[idx+1] != "": + retval["comments"].append(strip_comment(self.saved_comments[idx+1][1]).strip()) + + # BREAKING - to avoid duplicate comments with inlines, we will remove from saved_comments + del self.saved_comments[idx+1] + + self.before_tags.append(retval) + + self.stored_line = idx + self.stored_comments = [] + else: + found_comments = [strip_comment(self.saved_comments[x][1].strip()) for x in self.saved_comments if x > self.stored_line and x <= idx + 1 ] + + + self.stored_comments += found_comments + self.remove_before_duplicates() + if idx not in self.saved_comments: return - key, comment, beginline = self.saved_comments[idx] if key in currentlevel: currentlevel[key] = CommentValue(currentlevel[key], comment, beginline, self._dict) + def remove_before_duplicates(self): + seen = set() + result = [] + for item in self.stored_comments: + if item not in seen: + seen.add(item) + result.append(item) + self.stored_comments = result From 4361d182aad0608746ad8ebb98917f5a8628aac3 Mon Sep 17 00:00:00 2001 From: Chester Enright <30327507+amunchet@users.noreply.github.com> Date: Mon, 19 Jul 2021 14:20:51 -0500 Subject: [PATCH 3/6] chore: version bump --- toml/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/toml/__init__.py b/toml/__init__.py index 7719ac2..1ff5fcd 100644 --- a/toml/__init__.py +++ b/toml/__init__.py @@ -6,7 +6,7 @@ from toml import encoder from toml import decoder -__version__ = "0.10.2" +__version__ = "0.10.3" _spec_ = "0.5.0" load = decoder.load From 0b14c491352c05259209805818d2a4636fcc289d Mon Sep 17 00:00:00 2001 From: Chester Enright <30327507+amunchet@users.noreply.github.com> Date: Tue, 20 Jul 2021 14:01:45 -0500 Subject: [PATCH 4/6] Fix before comments improper parents (#2) * test: created bugfix test * fix: corrected incorrect parent attribution --- tests/test_before_tags.py | 27 +++++++++++++++++++++++++++ toml/decoder.py | 2 +- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/tests/test_before_tags.py b/tests/test_before_tags.py index b1f25f3..43a823f 100644 --- a/tests/test_before_tags.py +++ b/tests/test_before_tags.py @@ -126,3 +126,30 @@ def test_before_comments(): } assert parsed_tags["peek_oldest_message_age = true"] == expected + +def test_bugfix_improper_parents(): + """ + Tests bug that sets arrays to be parents due to [ ] being present + """ + test_str = """ + [[outputs.influxdb]] + ## The full HTTP or UDP URL for your InfluxDB instance. + ## + ## Multiple URLs can be specified for a single cluster, only ONE of the + ## urls will be written to each interval. + # urls = ["unix:///var/run/influxdb.sock"] + # urls = ["udp://127.0.0.1:8089"] + urls = ["http://127.0.0.1:8086"] + + ## The target database for metrics; will be created as needed. + ## For UDP url endpoint database needs to be configured on server side. + # database = "telegraf" + ## The value of this tag will be used to determine the database. If this + ## tag is not set the 'database' option is used as the default. + database_tag = "" + """ + + decoder = toml.TomlPreserveCommentDecoder(beforeComments=True) + data = toml.loads(test_str, decoder=decoder) + + assert decoder.before_tags[-1]["parent"] == "[[outputs.influxdb]]" \ No newline at end of file diff --git a/toml/decoder.py b/toml/decoder.py index a82476f..1bc9588 100644 --- a/toml/decoder.py +++ b/toml/decoder.py @@ -1075,7 +1075,7 @@ def strip_comment(inp): "comments" : [x for x in self.stored_comments if x != ""] } - if "]" in line: + if "]" in line and "=" not in line: self.parent_line = line.strip() else: retval["parent"] = self.parent_line From 97c691f531dbcfe55873ae7902b97edcf64cde66 Mon Sep 17 00:00:00 2001 From: Chester Enright <30327507+amunchet@users.noreply.github.com> Date: Tue, 20 Jul 2021 14:12:33 -0500 Subject: [PATCH 5/6] chore: version bump --- toml/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/toml/__init__.py b/toml/__init__.py index 1ff5fcd..ba31344 100644 --- a/toml/__init__.py +++ b/toml/__init__.py @@ -6,7 +6,7 @@ from toml import encoder from toml import decoder -__version__ = "0.10.3" +__version__ = "0.10.4" _spec_ = "0.5.0" load = decoder.load From 3fa4f8dbdda8e3da26baa5d3f663db567b29d7ea Mon Sep 17 00:00:00 2001 From: Chester Enright Date: Tue, 20 Jul 2021 18:54:44 -0500 Subject: [PATCH 6/6] fix: deleting from saved_comments made drift in before_tags --- tests/test_before_tags.py | 231 ++++++++++++++++++++++++++++++-------- toml/decoder.py | 8 +- 2 files changed, 190 insertions(+), 49 deletions(-) diff --git a/tests/test_before_tags.py b/tests/test_before_tags.py index 43a823f..c9f2a16 100644 --- a/tests/test_before_tags.py +++ b/tests/test_before_tags.py @@ -1,41 +1,41 @@ #!/usr/bin/env python import toml -TEST_STR = """ - # Global tags can be specified here in key="value" format. - [global_tags] - # dc = "us-east-1" # will tag all metrics with dc=us-east-1 - # rack = "1a" - ## Environment variables can be used as tags, and throughout the config file - user = "$USER" - - - # Configuration for telegraf agent - [agent] - ## Default data collection interval for all inputs - interval = "10s" - ## Rounds collection interval to 'interval' - ## ie, if interval="10s" then always collect on :00, :10, :20, etc. - round_interval = true - - # # Gather Azure Storage Queue metrics - [[inputs.azure_storage_queue]] - - # ## Required Azure Storage Account name - - account_name = "mystorageaccount" # Inline comment - # - # ## Required Azure Storage Account access key - account_key = "storageaccountaccesskey" - # - # ## Set to false to disable peeking age of oldest message (executes faster) - peek_oldest_message_age = true - """ + def test_before_comments(): """Tests handling before comments""" - + TEST_STR = """ + # Global tags can be specified here in key="value" format. + [global_tags] + # dc = "us-east-1" # will tag all metrics with dc=us-east-1 + # rack = "1a" + ## Environment variables can be used as tags, and throughout the config file + user = "$USER" + + + # Configuration for telegraf agent + [agent] + ## Default data collection interval for all inputs + interval = "10s" + ## Rounds collection interval to 'interval' + ## ie, if interval="10s" then always collect on :00, :10, :20, etc. + round_interval = true + + # # Gather Azure Storage Queue metrics + [[inputs.azure_storage_queue]] + + # ## Required Azure Storage Account name + + account_name = "mystorageaccount" # Inline comment + # + # ## Required Azure Storage Account access key + account_key = "storageaccountaccesskey" + # + # ## Set to false to disable peeking age of oldest message (executes faster) + peek_oldest_message_age = true + """ decoder = toml.TomlPreserveCommentDecoder(beforeComments=True) data = toml.loads(TEST_STR, decoder=decoder) @@ -132,24 +132,161 @@ def test_bugfix_improper_parents(): Tests bug that sets arrays to be parents due to [ ] being present """ test_str = """ - [[outputs.influxdb]] - ## The full HTTP or UDP URL for your InfluxDB instance. - ## - ## Multiple URLs can be specified for a single cluster, only ONE of the - ## urls will be written to each interval. - # urls = ["unix:///var/run/influxdb.sock"] - # urls = ["udp://127.0.0.1:8089"] - urls = ["http://127.0.0.1:8086"] - - ## The target database for metrics; will be created as needed. - ## For UDP url endpoint database needs to be configured on server side. - # database = "telegraf" - ## The value of this tag will be used to determine the database. If this - ## tag is not set the 'database' option is used as the default. - database_tag = "" + [[outputs.influxdb]] + ## The full HTTP or UDP URL for your InfluxDB instance. + ## + ## Multiple URLs can be specified for a single cluster, only ONE of the + ## urls will be written to each interval. + # urls = ["unix:///var/run/influxdb.sock"] + # urls = ["udp://127.0.0.1:8089"] + urls = ["http://127.0.0.1:8086"] + + ## The target database for metrics; will be created as needed. + ## For UDP url endpoint database needs to be configured on server side. + # database = "telegraf" + ## The value of this tag will be used to determine the database. If this + ## tag is not set the 'database' option is used as the default. + database_tag = "" """ decoder = toml.TomlPreserveCommentDecoder(beforeComments=True) data = toml.loads(test_str, decoder=decoder) - assert decoder.before_tags[-1]["parent"] == "[[outputs.influxdb]]" \ No newline at end of file + assert decoder.before_tags[-1]["parent"] == "[[outputs.influxdb]]" + +def test_bugfix_incorrect_comments(): + """ + Tests bug that causes comments to be put with the wrong item + """ + test_str = """ + [[inputs.vsphere]] + # ## List of vCenter URLs to be monitored. These three lines must be uncommented + # ## and edited for the plugin to work. + + vcenters = [ "https://vcenter.local/sdk" ] + username = "user@corp.local" + password = "secret" + + # ## VMs + # ## Typical VM metrics (if omitted or empty, all metrics are collected) + vm_include = [ "/*/vm/**"] # Inventory path to VMs to collect (by default all are collected) + vm_exclude = [] # Inventory paths to exclude + vm_metric_include = ["cpu.demand.average","cpu.idle.summation",] + vm_metric_exclude = [] ## Nothing is excluded by default + vm_instances = true ## true by default + + # ## Hosts + + # ## Typical host metrics (if omitted or empty, all metrics are collected) + + host_include = [ "/*/host/**"] # Inventory path to hosts to collect (by default all are collected) + # # host_exclude [] # Inventory paths to exclude + + host_metric_include = ["cpu.coreUtilization.average","cpu.costop.summation",] + # ## Clusters + + cluster_include = [ "/*/host/**"] # Inventory path to clusters to collect (by default all are collected) + cluster_exclude = [] # Inventory paths to exclude + cluster_metric_include = [] ## if omitted or empty, all metrics are collected + cluster_metric_exclude = [] ## Nothing excluded by default + cluster_instances = false ## false by default + + # ## Datastores + + datastore_include = [ "/*/datastore/**"] # Inventory path to datastores to collect (by default all are collected) + datastore_exclude = [] # Inventory paths to exclude + datastore_metric_include = [] ## if omitted or empty, all metrics are collected + datastore_metric_exclude = [] ## Nothing excluded by default + datastore_instances = false ## false by default + + # ## Datacenters + + datacenter_include = [ "/*/host/**"] # Inventory path to clusters to collect (by default all are collected) + datacenter_exclude = [] # Inventory paths to exclude + datacenter_metric_include = [] ## if omitted or empty, all metrics are collected + datacenter_metric_exclude = [ "*" ] ## Datacenters are not collected by default. + datacenter_instances = false ## false by default + + # ## Plugin Settings + # ## separator character to use for measurement and field names (default: "_") + + separator = "_" + + # ## number of objects to retrieve per query for realtime resources (vms and hosts) + # ## set to 64 for vCenter 5.5 and 6.0 (default: 256) + + max_query_objects = 256 + + # ## number of metrics to retrieve per query for non-realtime resources (clusters and datastores) + # ## set to 64 for vCenter 5.5 and 6.0 (default: 256) + + max_query_metrics = 256 + + # ## number of go routines to use for collection and discovery of objects and metrics + + collect_concurrency = 1 + discover_concurrency = 1 + + # ## the interval before (re)discovering objects subject to metrics collection (default: 300s) + + object_discovery_interval = "300s" + + # ## timeout applies to any of the api request made to vcenter + + timeout = "60s" + + # ## When set to true, all samples are sent as integers. This makes the output + # ## data types backwards compatible with Telegraf 1.9 or lower. Normally all + # ## samples from vCenter, with the exception of percentages, are integer + # ## values, but under some conditions, some averaging takes place internally in + # ## the plugin. Setting this flag to "false" will send values as floats to + # ## preserve the full precision when averaging takes place. + + use_int_samples = true + + # ## Custom attributes from vCenter can be very useful for queries in order to slice the + # ## metrics along different dimension and for forming ad-hoc relationships. They are disabled + # ## by default, since they can add a considerable amount of tags to the resulting metrics. To + # ## enable, simply set custom_attribute_exclude to [] (empty set) and use custom_attribute_include + # ## to select the attributes you want to include. + # ## By default, since they can add a considerable amount of tags to the resulting metrics. To + # ## enable, simply set custom_attribute_exclude to [] (empty set) and use custom_attribute_include + # ## to select the attributes you want to include. + + custom_attribute_include = [] + custom_attribute_exclude = ["*"] + + # ## The number of vSphere 5 minute metric collection cycles to look back for non-realtime metrics. In + # ## some versions (6.7, 7.0 and possible more), certain metrics, such as cluster metrics, may be reported + # ## with a significant delay (>30min). If this happens, try increasing this number. Please note that increasing + # ## it too much may cause performance issues. + + metric_lookback = 3 + + # ## Optional SSL Config + + ssl_ca = "/path/to/cafile" + ssl_cert = "/path/to/certfile" + ssl_key = "/path/to/keyfile" + # ## Use SSL but skip chain & host verification + + insecure_skip_verify = false + + # ## The Historical Interval value must match EXACTLY the interval in the daily + # # "Interval Duration" found on the VCenter server under Configure > General > Statistics > Statistic intervals + + historical_interval = "5m" + + + # # A Webhooks Event collector + + [[inputs.webhooks]] + # ## Address and port to host Webhook listener on + service_address = ":1619" + """ + decoder = toml.TomlPreserveCommentDecoder(beforeComments=True) + data = toml.loads(test_str, decoder=decoder) + + assert "Address and port to host Webhook listener on" in decoder.before_tags[-1]["comments"] + + assert "Nothing is excluded by default" in decoder.before_tags[7]["comments"] \ No newline at end of file diff --git a/toml/decoder.py b/toml/decoder.py index 1bc9588..0ac6550 100644 --- a/toml/decoder.py +++ b/toml/decoder.py @@ -1085,14 +1085,18 @@ def strip_comment(inp): retval["comments"].append(strip_comment(self.saved_comments[idx+1][1]).strip()) # BREAKING - to avoid duplicate comments with inlines, we will remove from saved_comments - del self.saved_comments[idx+1] + self.saved_comments[idx+1] = (self.saved_comments[idx+1][0], "", self.saved_comments[idx+1][2]) self.before_tags.append(retval) self.stored_line = idx self.stored_comments = [] else: - found_comments = [strip_comment(self.saved_comments[x][1].strip()) for x in self.saved_comments if x > self.stored_line and x <= idx + 1 ] + found_comments = [ + strip_comment(self.saved_comments[x][1].strip()) + for x in self.saved_comments + if x > self.stored_line and x <= idx + 1 + ] self.stored_comments += found_comments