From bad8c6f0b2a145f7b5dd240ea8ea61421b160d38 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Tue, 18 May 2021 12:19:35 +0200 Subject: [PATCH] add new parameters for quoted-string parser --- src/parser.c | 85 ++++++++++++++++++++++++---- src/parser.h | 4 +- src/pdag.c | 4 +- tests/Makefile.am | 3 + tests/quote-string-dash-empty.sh | 27 +++++++++ tests/quote-string-escape.sh | 18 ++++++ tests/quote-string-quote-optional.sh | 20 +++++++ 7 files changed, 147 insertions(+), 14 deletions(-) create mode 100755 tests/quote-string-dash-empty.sh create mode 100755 tests/quote-string-escape.sh create mode 100755 tests/quote-string-quote-optional.sh diff --git a/src/parser.c b/src/parser.c index 7343d844..2d4cbcfc 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1,6 +1,6 @@ /* * liblognorm - a fast samples-based log normalization library - * Copyright 2010-2018 by Rainer Gerhards and Adiscon GmbH. + * Copyright 2010-2021 by Rainer Gerhards and Adiscon GmbH. * * Modified by Pavel Levshin (pavel@levshin.spb.ru) in 2013 * @@ -1644,6 +1644,12 @@ PARSER_Parse(OpQuotedString) } + +struct data_QuotedString { + int dashIsEmpty; + int quotesOptional; + int supportEscape; +}; /** * Parse a quoted string. In this initial implementation, escaping of the quote * char is not supported. A quoted string is one start starts with a double quote, @@ -1653,38 +1659,97 @@ PARSER_Parse(OpQuotedString) */ PARSER_Parse(QuotedString) const char *c; + struct data_QuotedString *const data = (struct data_QuotedString*) pdata; size_t i; + int hadQuote = 0; assert(npb->str != NULL); assert(offs != NULL); assert(parsed != NULL); c = npb->str; i = *offs; - if(i + 2 > npb->strLen) - goto done; /* needs at least 2 characters */ + if(i + 1 > npb->strLen) + goto done; /* needs at least 1 characters (with quotesQptional...) */ - if(c[i] != '"') - goto done; - ++i; + if(c[i] == '"') { + hadQuote = 1; + ++i; + } else { + if(!data->quotesOptional) { + goto done; + } + } +fprintf(stderr, "start loop %zd, char %c\n", i, c[i]); /* search end of string */ - while(i < npb->strLen && c[i] != '"') + while(i < npb->strLen && + ( (hadQuote && c[i] != '"') || (!hadQuote && c[i] != ' ') ) + ) { +fprintf(stderr, "in loop %zd, char %c\n", i, c[i]); + if(data->supportEscape && c[i] == '\\' && (i < npb->strLen)) { + i++; /* next char is escaped */ + } i++; + } - if(i == npb->strLen || c[i] != '"') + if(hadQuote && (i == npb->strLen || c[i] != '"')) goto done; /* success, persist */ - *parsed = i + 1 - *offs; /* "eat" terminal double quote */ + const size_t charsFound = i - *offs + (hadQuote ? 1 : 0); +fprintf(stderr, "charsFound %zd, i %zd, offs %zd\n", charsFound, i, *offs); + *parsed = charsFound; /* "eat" terminal double quote */ /* create JSON value to save quoted string contents */ if(value != NULL) { - *value = json_object_new_string_len(npb->str+(*offs), *parsed); + if(charsFound == 3 && data->dashIsEmpty && !strncmp(npb->str+(*offs), "\"-\"", 3)) { + *value = json_object_new_string_len("", 0); + } else { + *value = json_object_new_string_len(npb->str+(*offs), *parsed); + } } r = 0; /* success */ done: return r; } +PARSER_Construct(QuotedString) +{ + int r = 0; + struct data_QuotedString *data = (struct data_QuotedString*) calloc(1, sizeof(struct data_QuotedString)); + + if(json == NULL) + goto done; + + struct json_object_iterator it = json_object_iter_begin(json); + struct json_object_iterator itEnd = json_object_iter_end(json); + while (!json_object_iter_equal(&it, &itEnd)) { + const char *key = json_object_iter_peek_name(&it); + struct json_object *const val = json_object_iter_peek_value(&it); + if(!strcasecmp(key, "option.quotesOptional")) { + data->quotesOptional = json_object_get_boolean(val); + } else if(!strcasecmp(key, "option.dashIsEmpty")) { + data->dashIsEmpty = json_object_get_boolean(val); + } else if(!strcasecmp(key, "option.supportEscape")) { + data->supportEscape = json_object_get_boolean(val); + } else { + ln_errprintf(ctx, 0, "invalid param for QuotedString: %s", + json_object_to_json_string(val)); + } + json_object_iter_next(&it); + } + +done: + *pdata = data; + if(r != 0) + free(data); + return r; +} +PARSER_Destruct(QuotedString) +{ + free(pdata); +} + + /** * Parse an ISO date, that is YYYY-MM-DD (exactly this format). diff --git a/src/parser.h b/src/parser.h index 61a4254f..81c6de8d 100644 --- a/src/parser.h +++ b/src/parser.h @@ -1,6 +1,6 @@ /* * liblognorm - a fast samples-based log normalization library - * Copyright 2010-2015 by Rainer Gerhards and Adiscon GmbH. + * Copyright 2010-2021 by Rainer Gerhards and Adiscon GmbH. * * Modified by Pavel Levshin (pavel@levshin.spb.ru) in 2013 * @@ -63,7 +63,7 @@ PARSERDEF(Repeat); PARSERDEF(String); PARSERDEF_NO_DATA(Rest); PARSERDEF_NO_DATA(OpQuotedString); -PARSERDEF_NO_DATA(QuotedString); +PARSERDEF(QuotedString); PARSERDEF_NO_DATA(ISODate); PARSERDEF_NO_DATA(Time12hr); PARSERDEF_NO_DATA(Time24hr); diff --git a/src/pdag.c b/src/pdag.c index 44a3847b..298f247e 100644 --- a/src/pdag.c +++ b/src/pdag.c @@ -3,7 +3,7 @@ * @brief Implementation of the parse dag object. * @class ln_pdag pdag.h *//* - * Copyright 2015 by Rainer Gerhards and Adiscon GmbH. + * Copyright 2015-2021 by Rainer Gerhards and Adiscon GmbH. * * Released under ASL 2.0. */ @@ -83,7 +83,7 @@ static struct ln_parser_info parser_lookup_table[] = { PARSER_ENTRY_NO_DATA("alpha", Alpha, 32), PARSER_ENTRY_NO_DATA("rest", Rest, 255), PARSER_ENTRY_NO_DATA("op-quoted-string", OpQuotedString, 64), - PARSER_ENTRY_NO_DATA("quoted-string", QuotedString, 64), + PARSER_ENTRY("quoted-string", QuotedString, 64), PARSER_ENTRY_NO_DATA("date-iso", ISODate, 8), PARSER_ENTRY_NO_DATA("time-24hr", Time24hr, 8), PARSER_ENTRY_NO_DATA("time-12hr", Time12hr, 8), diff --git a/tests/Makefile.am b/tests/Makefile.am index 8e2fdb2b..8f8ec454 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -63,6 +63,9 @@ TESTS_SHELLSCRIPTS = \ strict_prefix_actual_sample1.sh \ strict_prefix_matching_1.sh \ strict_prefix_matching_2.sh \ + quote-string-escape.sh \ + quote-string-dash-empty.sh \ + quote-string-quote-optional.sh \ field_string.sh \ field_string_perm_chars.sh \ field_string_lazy_matching.sh \ diff --git a/tests/quote-string-dash-empty.sh b/tests/quote-string-dash-empty.sh new file mode 100755 index 00000000..04461399 --- /dev/null +++ b/tests/quote-string-dash-empty.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# added 2021-06-07 by Rainer Gerhards +# This file is part of the liblognorm project, released under ASL 2.0 +. $srcdir/exec.sh +no_solaris10 +test_def $0 "quoted string with dash" + +add_rule 'version=2' +add_rule 'rule=:% + {"type":"quoted-string", "name":"str", "option.dashIsEmpty":True} + %' + +execute '"-"' +assert_output_json_eq '{ "str": ""}' + +reset_rules +add_rule 'version=2' +add_rule 'rule=:% + {"type":"quoted-string", "name":"str"} + %' + +execute '"-"' +assert_output_json_eq '{ "str": "\"-\""}' + + +cleanup_tmp_files + diff --git a/tests/quote-string-escape.sh b/tests/quote-string-escape.sh new file mode 100755 index 00000000..44b89b65 --- /dev/null +++ b/tests/quote-string-escape.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# added 2021-05-15 by Rainer Gerhards +# This file is part of the liblognorm project, released under ASL 2.0 +. $srcdir/exec.sh +no_solaris10 + +test_def $0 "quoted string with escapes" +add_rule 'version=2' +add_rule 'rule=:% + {"type":"quoted-string", "name":"str", "option.supportEscape":True} + %' + +execute '"word1\"word2"' +assert_output_json_eq '{ "str": "\"word1\\\"word2\""}' + + +cleanup_tmp_files + diff --git a/tests/quote-string-quote-optional.sh b/tests/quote-string-quote-optional.sh new file mode 100755 index 00000000..34827a73 --- /dev/null +++ b/tests/quote-string-quote-optional.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# added 2021-06-07 by Rainer Gerhards +# This file is part of the liblognorm project, released under ASL 2.0 +. $srcdir/exec.sh +no_solaris10 +test_def $0 "quoted string with quotesOptional" + +add_rule 'version=2' +add_rule 'rule=:% + {"type":"quoted-string", "name":"str", "option.quotesOptional":True} + %' + +execute '"line 1"' +assert_output_json_eq '{ "str": "\"line 1\""}' + +execute 'line2' +assert_output_json_eq '{ "str": "line2"}' + + +cleanup_tmp_files