Skip to content

Commit

Permalink
add new parameters for quoted-string parser
Browse files Browse the repository at this point in the history
  • Loading branch information
rgerhards committed Jun 8, 2021
1 parent 2053745 commit bad8c6f
Show file tree
Hide file tree
Showing 7 changed files with 147 additions and 14 deletions.
85 changes: 75 additions & 10 deletions src/parser.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* liblognorm - a fast samples-based log normalization library
* Copyright 2010-2018 by Rainer Gerhards and Adiscon GmbH.
* Copyright 2010-2021 by Rainer Gerhards and Adiscon GmbH.
*
* Modified by Pavel Levshin ([email protected]) in 2013
*
Expand Down Expand Up @@ -1644,6 +1644,12 @@ PARSER_Parse(OpQuotedString)
}



struct data_QuotedString {
int dashIsEmpty;
int quotesOptional;
int supportEscape;
};
/**
* Parse a quoted string. In this initial implementation, escaping of the quote
* char is not supported. A quoted string is one start starts with a double quote,
Expand All @@ -1653,38 +1659,97 @@ PARSER_Parse(OpQuotedString)
*/
PARSER_Parse(QuotedString)
const char *c;
struct data_QuotedString *const data = (struct data_QuotedString*) pdata;
size_t i;
int hadQuote = 0;

assert(npb->str != NULL);
assert(offs != NULL);
assert(parsed != NULL);
c = npb->str;
i = *offs;
if(i + 2 > npb->strLen)
goto done; /* needs at least 2 characters */
if(i + 1 > npb->strLen)
goto done; /* needs at least 1 characters (with quotesQptional...) */

if(c[i] != '"')
goto done;
++i;
if(c[i] == '"') {
hadQuote = 1;
++i;
} else {
if(!data->quotesOptional) {
goto done;
}
}

fprintf(stderr, "start loop %zd, char %c\n", i, c[i]);
/* search end of string */
while(i < npb->strLen && c[i] != '"')
while(i < npb->strLen &&
( (hadQuote && c[i] != '"') || (!hadQuote && c[i] != ' ') )
) {
fprintf(stderr, "in loop %zd, char %c\n", i, c[i]);
if(data->supportEscape && c[i] == '\\' && (i < npb->strLen)) {
i++; /* next char is escaped */
}
i++;
}

if(i == npb->strLen || c[i] != '"')
if(hadQuote && (i == npb->strLen || c[i] != '"'))
goto done;

/* success, persist */
*parsed = i + 1 - *offs; /* "eat" terminal double quote */
const size_t charsFound = i - *offs + (hadQuote ? 1 : 0);
fprintf(stderr, "charsFound %zd, i %zd, offs %zd\n", charsFound, i, *offs);
*parsed = charsFound; /* "eat" terminal double quote */
/* create JSON value to save quoted string contents */
if(value != NULL) {
*value = json_object_new_string_len(npb->str+(*offs), *parsed);
if(charsFound == 3 && data->dashIsEmpty && !strncmp(npb->str+(*offs), "\"-\"", 3)) {
*value = json_object_new_string_len("", 0);
} else {
*value = json_object_new_string_len(npb->str+(*offs), *parsed);
}
}
r = 0; /* success */
done:
return r;
}

PARSER_Construct(QuotedString)
{
int r = 0;
struct data_QuotedString *data = (struct data_QuotedString*) calloc(1, sizeof(struct data_QuotedString));

if(json == NULL)
goto done;

struct json_object_iterator it = json_object_iter_begin(json);
struct json_object_iterator itEnd = json_object_iter_end(json);
while (!json_object_iter_equal(&it, &itEnd)) {
const char *key = json_object_iter_peek_name(&it);
struct json_object *const val = json_object_iter_peek_value(&it);
if(!strcasecmp(key, "option.quotesOptional")) {
data->quotesOptional = json_object_get_boolean(val);
} else if(!strcasecmp(key, "option.dashIsEmpty")) {
data->dashIsEmpty = json_object_get_boolean(val);
} else if(!strcasecmp(key, "option.supportEscape")) {
data->supportEscape = json_object_get_boolean(val);
} else {
ln_errprintf(ctx, 0, "invalid param for QuotedString: %s",
json_object_to_json_string(val));
}
json_object_iter_next(&it);
}

done:
*pdata = data;
if(r != 0)
free(data);
return r;
}
PARSER_Destruct(QuotedString)
{
free(pdata);
}



/**
* Parse an ISO date, that is YYYY-MM-DD (exactly this format).
Expand Down
4 changes: 2 additions & 2 deletions src/parser.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* liblognorm - a fast samples-based log normalization library
* Copyright 2010-2015 by Rainer Gerhards and Adiscon GmbH.
* Copyright 2010-2021 by Rainer Gerhards and Adiscon GmbH.
*
* Modified by Pavel Levshin ([email protected]) in 2013
*
Expand Down Expand Up @@ -63,7 +63,7 @@ PARSERDEF(Repeat);
PARSERDEF(String);
PARSERDEF_NO_DATA(Rest);
PARSERDEF_NO_DATA(OpQuotedString);
PARSERDEF_NO_DATA(QuotedString);
PARSERDEF(QuotedString);
PARSERDEF_NO_DATA(ISODate);
PARSERDEF_NO_DATA(Time12hr);
PARSERDEF_NO_DATA(Time24hr);
Expand Down
4 changes: 2 additions & 2 deletions src/pdag.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* @brief Implementation of the parse dag object.
* @class ln_pdag pdag.h
*//*
* Copyright 2015 by Rainer Gerhards and Adiscon GmbH.
* Copyright 2015-2021 by Rainer Gerhards and Adiscon GmbH.
*
* Released under ASL 2.0.
*/
Expand Down Expand Up @@ -83,7 +83,7 @@ static struct ln_parser_info parser_lookup_table[] = {
PARSER_ENTRY_NO_DATA("alpha", Alpha, 32),
PARSER_ENTRY_NO_DATA("rest", Rest, 255),
PARSER_ENTRY_NO_DATA("op-quoted-string", OpQuotedString, 64),
PARSER_ENTRY_NO_DATA("quoted-string", QuotedString, 64),
PARSER_ENTRY("quoted-string", QuotedString, 64),
PARSER_ENTRY_NO_DATA("date-iso", ISODate, 8),
PARSER_ENTRY_NO_DATA("time-24hr", Time24hr, 8),
PARSER_ENTRY_NO_DATA("time-12hr", Time12hr, 8),
Expand Down
3 changes: 3 additions & 0 deletions tests/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ TESTS_SHELLSCRIPTS = \
strict_prefix_actual_sample1.sh \
strict_prefix_matching_1.sh \
strict_prefix_matching_2.sh \
quote-string-escape.sh \
quote-string-dash-empty.sh \
quote-string-quote-optional.sh \
field_string.sh \
field_string_perm_chars.sh \
field_string_lazy_matching.sh \
Expand Down
27 changes: 27 additions & 0 deletions tests/quote-string-dash-empty.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/bin/bash
# added 2021-06-07 by Rainer Gerhards
# This file is part of the liblognorm project, released under ASL 2.0
. $srcdir/exec.sh
no_solaris10
test_def $0 "quoted string with dash"

add_rule 'version=2'
add_rule 'rule=:%
{"type":"quoted-string", "name":"str", "option.dashIsEmpty":True}
%'

execute '"-"'
assert_output_json_eq '{ "str": ""}'

reset_rules
add_rule 'version=2'
add_rule 'rule=:%
{"type":"quoted-string", "name":"str"}
%'

execute '"-"'
assert_output_json_eq '{ "str": "\"-\""}'


cleanup_tmp_files

18 changes: 18 additions & 0 deletions tests/quote-string-escape.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/bin/bash
# added 2021-05-15 by Rainer Gerhards
# This file is part of the liblognorm project, released under ASL 2.0
. $srcdir/exec.sh
no_solaris10

test_def $0 "quoted string with escapes"
add_rule 'version=2'
add_rule 'rule=:%
{"type":"quoted-string", "name":"str", "option.supportEscape":True}
%'

execute '"word1\"word2"'
assert_output_json_eq '{ "str": "\"word1\\\"word2\""}'


cleanup_tmp_files

20 changes: 20 additions & 0 deletions tests/quote-string-quote-optional.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash
# added 2021-06-07 by Rainer Gerhards
# This file is part of the liblognorm project, released under ASL 2.0
. $srcdir/exec.sh
no_solaris10
test_def $0 "quoted string with quotesOptional"

add_rule 'version=2'
add_rule 'rule=:%
{"type":"quoted-string", "name":"str", "option.quotesOptional":True}
%'

execute '"line 1"'
assert_output_json_eq '{ "str": "\"line 1\""}'

execute 'line2'
assert_output_json_eq '{ "str": "line2"}'


cleanup_tmp_files

0 comments on commit bad8c6f

Please sign in to comment.