From b98d9ba381efaed0b6f8c42121ee914176cf536f Mon Sep 17 00:00:00 2001 From: Kenji Okimoto Date: Tue, 26 Dec 2017 12:18:07 +0900 Subject: [PATCH 1/2] Add parameter delimiter_pattern to parser_ltsv The feature is taken from https://github.com/fluent-plugins-nursery/fluent-plugin-kv-parser fluent-plugin-kv-parser is very simple and similar to parser_ltsv. So we can integrate fluent-plugin-kv-parser with parser_ltsv. --- lib/fluent/plugin/parser_ltsv.rb | 7 ++++++- test/plugin/test_parser_labeled_tsv.rb | 17 +++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/lib/fluent/plugin/parser_ltsv.rb b/lib/fluent/plugin/parser_ltsv.rb index 630a9c5cf0..449b2ef35c 100644 --- a/lib/fluent/plugin/parser_ltsv.rb +++ b/lib/fluent/plugin/parser_ltsv.rb @@ -23,6 +23,10 @@ class LabeledTSVParser < Parser desc 'The delimiter character (or string) of TSV values' config_param :delimiter, :string, default: "\t" + desc 'The delimiter pattern of TSV values' + config_param :delimiter_pattern, default: nil do |value| + Regexp.compile(value[1..-2]) if value + end desc 'The delimiter character between field name and value' config_param :label_delimiter, :string, default: ":" @@ -30,7 +34,8 @@ class LabeledTSVParser < Parser def parse(text) r = {} - text.split(@delimiter).each do |pair| + delimiter = @delimiter_pattern || @delimiter + text.split(delimiter).each do |pair| key, value = pair.split(@label_delimiter, 2) r[key] = value end diff --git a/test/plugin/test_parser_labeled_tsv.rb b/test/plugin/test_parser_labeled_tsv.rb index 76cac67029..bdd428bc95 100644 --- a/test/plugin/test_parser_labeled_tsv.rb +++ b/test/plugin/test_parser_labeled_tsv.rb @@ -125,4 +125,21 @@ def test_parse_with_null_empty_string assert_equal record['b'], ' ' end end + + data("single space" => ["k1=v1 k2=v2", { "k1" => "v1", "k2" => "v2" }], + "multiple space" => ["k1=v1 k2=v2", { "k1" => "v1", "k2" => "v2" }], + "reverse" => ["k2=v2 k1=v1", { "k1" => "v1", "k2" => "v2" }], + "tab" => ["k2=v2\tk1=v1", { "k1" => "v1", "k2" => "v2" }], + "tab and space" => ["k2=v2\t k1=v1", { "k1" => "v1", "k2" => "v2" }]) + def test_parse_with_delimiter_pattern(data) + text, expected = data + parser = Fluent::Test::Driver::Parser.new(Fluent::Plugin::LabeledTSVParser) + parser.configure( + 'delimiter_pattern' => '/\s+/', + 'label_delimiter' => '=' + ) + parser.instance.parse(text) do |_time, record| + assert_equal(expected, record) + end + end end From 3f79f933e80ab90dfe1fd04bf1ff11e69fb174e4 Mon Sep 17 00:00:00 2001 From: Kenji Okimoto Date: Tue, 26 Dec 2017 16:49:58 +0900 Subject: [PATCH 2/2] Reduce redundant local variable assignment Because `#parse` is called for each event. On the other hand `#configure` is called once on boot. --- lib/fluent/plugin/parser_ltsv.rb | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/fluent/plugin/parser_ltsv.rb b/lib/fluent/plugin/parser_ltsv.rb index 449b2ef35c..2aa7d94015 100644 --- a/lib/fluent/plugin/parser_ltsv.rb +++ b/lib/fluent/plugin/parser_ltsv.rb @@ -32,10 +32,14 @@ class LabeledTSVParser < Parser config_set_default :time_key, 'time' + def configure(conf) + super + @delimiter = @delimiter_pattern || @delimiter + end + def parse(text) r = {} - delimiter = @delimiter_pattern || @delimiter - text.split(delimiter).each do |pair| + text.split(@delimiter).each do |pair| key, value = pair.split(@label_delimiter, 2) r[key] = value end