diff --git a/include/fluent-bit/flb_utils.h b/include/fluent-bit/flb_utils.h index 7e42a107c0d..eec48243162 100644 --- a/include/fluent-bit/flb_utils.h +++ b/include/fluent-bit/flb_utils.h @@ -44,6 +44,7 @@ int flb_utils_set_daemon(); void flb_utils_print_setup(struct flb_config *config); struct mk_list *flb_utils_split(const char *line, int separator, int max_split); +struct mk_list *flb_utils_split_quoted(const char *line, int separator, int max_split); void flb_utils_split_free_entry(struct flb_split_entry *entry); void flb_utils_split_free(struct mk_list *list); int flb_utils_timer_consume(flb_pipefd_t fd); diff --git a/plugins/filter_modify/modify.c b/plugins/filter_modify/modify.c index d4940efb20a..a1281a2f98f 100644 --- a/plugins/filter_modify/modify.c +++ b/plugins/filter_modify/modify.c @@ -131,7 +131,7 @@ static int setup(struct filter_modify_ctx *ctx, mk_list_foreach(head, &f_ins->properties) { kv = mk_list_entry(head, struct flb_kv, _head); - split = flb_utils_split(kv->val, ' ', 3); + split = flb_utils_split_quoted(kv->val, ' ', 3); list_size = mk_list_size(split); // Conditions are, diff --git a/src/flb_utils.c b/src/flb_utils.c index 76ad5a648f1..eb4eda79a37 100644 --- a/src/flb_utils.c +++ b/src/flb_utils.c @@ -257,7 +257,118 @@ void flb_utils_print_setup(struct flb_config *config) } } -struct mk_list *flb_utils_split(const char *line, int separator, int max_split) +/* + * quoted_string_len returns the length of a quoted string, not including the quotes. + */ +static int quoted_string_len(const char *str) +{ + int len = 0; + char quote = *str++; /* Consume the quote character. */ + + while (quote != 0) { + char c = *str++; + switch (c) { + case '\0': + /* Error: string ends before end-quote was seen. */ + return -1; + case '\\': + /* Skip escaped quote or \\. */ + if (*str == quote || *str == '\\') { + str++; + } + break; + case '\'': + case '"': + /* End-quote seen: stop iterating. */ + if (c == quote) { + quote = 0; + } + break; + default: + break; + } + len++; + } + + /* Go back one character to ignore end-quote */ + len--; + + return len; +} + +/* + * next_token returns the next token in the string 'str' delimited by 'separator'. + * 'out' is set to the beginning of the token. + * 'out_len' is set to the length of the token. + * 'parse_quotes' is set to FLB_TRUE when quotes shall be considered when tokenizing the 'str'. + * The function returns offset to next token in the string. + */ +static int next_token(const char *str, int separator, char **out, int *out_len, int parse_quotes) { + const char *token_in = str; + char *token_out; + int next_separator = 0; + int quote = 0; /* Parser state: 0 not inside quoted string, or '"' or '\'' when inside quoted string. */ + int len = 0; + int i; + + /* Skip leading separators. */ + while (*token_in == separator) { + token_in++; + } + + /* Should quotes be parsed? Or is token quoted? If not, copy until separator or the end of string. */ + if (parse_quotes == FLB_FALSE || (*token_in != '"' && *token_in != '\'')) { + len = (int)strlen(token_in); + next_separator = mk_string_char_search(token_in, separator, len); + if (next_separator > 0) { + len = next_separator; + } + *out_len = len; + *out = mk_string_copy_substr(token_in, 0, len); + if (*out == NULL) { + return -1; + } + + return (int)(token_in - str) + len; + } + + /* Token is quoted. */ + + len = quoted_string_len(token_in); + if (len < 0) { + return -1; + } + + /* Consume the quote character. */ + quote = *token_in++; + + token_out = flb_malloc(len + 1); + if (!token_out) { + return -1; + } + + /* Copy the token */ + for (i = 0; i < len; i++) { + /* Handle escapes when inside quoted token: + * \" -> " + * \' -> ' + * \\ -> \ + */ + if (*token_in == '\\' && (token_in[1] == quote || token_in[1] == '\\')) { + token_in++; + } + token_out[i] = *token_in++; + } + token_out[i] = '\0'; + + *out = token_out; + *out_len = len; + + return (int)(token_in - str); +} + + +static struct mk_list *split(const char *line, int separator, int max_split, int quoted) { int i = 0; int count = 0; @@ -281,26 +392,15 @@ struct mk_list *flb_utils_split(const char *line, int separator, int max_split) len = strlen(line); while (i < len) { - end = mk_string_char_search(line + i, separator, len - i); - if (end >= 0 && end + i < len) { - end += i; - - if (i == (unsigned int) end) { - i++; - continue; - } - - val = mk_string_copy_substr(line, i, end); - val_len = end - i; - } - else { - val = mk_string_copy_substr(line, i, len); - val_len = len - i; - end = len; + end = next_token(line + i, separator, &val, &val_len, quoted); + if (end == -1) { + flb_error("Parsing failed: %s", line); + flb_utils_split_free(list); + return NULL; } /* Update last position */ - i = end; + i += end; /* Create new entry */ new = flb_malloc(sizeof(struct flb_split_entry)); @@ -341,6 +441,17 @@ struct mk_list *flb_utils_split(const char *line, int separator, int max_split) return list; } +struct mk_list *flb_utils_split_quoted(const char *line, int separator, int max_split) +{ + return split(line, separator, max_split, FLB_TRUE); +} + +struct mk_list *flb_utils_split(const char *line, int separator, int max_split) +{ + return split(line, separator, max_split, FLB_FALSE); +} + + void flb_utils_split_free_entry(struct flb_split_entry *entry) { mk_list_del(&entry->_head); diff --git a/tests/internal/utils.c b/tests/internal/utils.c index 30093de76f2..119094bdeb7 100644 --- a/tests/internal/utils.c +++ b/tests/internal/utils.c @@ -5,6 +5,7 @@ #include #include #include "flb_tests_internal.h" +#include "fluent-bit/flb_macros.h" struct url_check { @@ -474,7 +475,7 @@ void test_proxy_url_split() { } } -static int compare_split_entry(const char* input, int separator, int max_split, ...) +static int compare_split_entry(const char* input, int separator, int max_split, int quoted, ...) { va_list ap; int count = 1; @@ -484,7 +485,13 @@ static int compare_split_entry(const char* input, int separator, int max_split, struct mk_list *head = NULL; struct flb_split_entry *entry = NULL; - split = flb_utils_split(input, separator, max_split); + if (quoted) { + split = flb_utils_split_quoted(input, separator, max_split); + } + else { + split = flb_utils_split(input, separator, max_split); + } + if (!TEST_CHECK(split != NULL)) { TEST_MSG("flb_utils_split failed. input=%s", input); return -1; @@ -494,7 +501,7 @@ static int compare_split_entry(const char* input, int separator, int max_split, return -1; } - va_start(ap, max_split); + va_start(ap, quoted); mk_list_foreach_safe(head, tmp_list, split) { if (max_split > 0 && !TEST_CHECK(count <= max_split) ) { TEST_MSG("count error. got=%d expect=%d input=%s", count, max_split, input); @@ -525,31 +532,77 @@ static int compare_split_entry(const char* input, int separator, int max_split, void test_flb_utils_split() { - compare_split_entry("aa,bb", ',', 2, "aa","bb" ); - compare_split_entry("localhost:12345", ':', 2, "localhost","12345" ); - compare_split_entry("https://fluentbit.io/announcements/", '/', -1, "https:", "fluentbit.io","announcements" ); + compare_split_entry("aa,bb", ',', 2, FLB_FALSE, "aa","bb" ); + compare_split_entry("localhost:12345", ':', 2, FLB_FALSE, "localhost","12345" ); + compare_split_entry("https://fluentbit.io/announcements/", '/', -1, FLB_FALSE, "https:", "fluentbit.io","announcements" ); /* /proc/net/dev example */ compare_split_entry("enp0s3: 1955136 1768 0 0 0 0 0 0 89362 931 0 0 0 0 0 0", - ' ', 256, + ' ', 256, FLB_FALSE, "enp0s3:", "1955136", "1768", "0", "0", "0", "0", "0", "0", "89362", "931", "0", "0", "0", "0", "0", "0", "0"); /* filter_grep configuration */ - compare_split_entry("Regex test *a*", ' ', 3, "Regex", "test", "*a*"); + compare_split_entry("Regex test *a*", ' ', 3, FLB_FALSE, "Regex", "test", "*a*"); /* filter_modify configuration */ - compare_split_entry("Condition Key_Value_Does_Not_Equal cpustats KNOWN", ' ', 4, - "Condition", "Key_Value_Does_Not_Equal", "cpustats", "KNOWN"); + compare_split_entry("Condition Key_Value_Does_Not_Equal cpustats KNOWN", ' ', 4, + FLB_FALSE, "Condition", "Key_Value_Does_Not_Equal", "cpustats", "KNOWN"); /* nginx_exporter_metrics example */ compare_split_entry("Active connections: 1\nserver accepts handled requests\n 10 10 10\nReading: 0 Writing: 1 Waiting: 0", '\n', 4, - "Active connections: 1", "server accepts handled requests", " 10 10 10","Reading: 0 Writing: 1 Waiting: 0"); + FLB_FALSE, "Active connections: 1", "server accepts handled requests", " 10 10 10","Reading: 0 Writing: 1 Waiting: 0"); /* out_cloudwatch_logs example */ compare_split_entry("dimension_1,dimension_2;dimension_3", ';', 256, - "dimension_1,dimension_2", "dimension_3"); + FLB_FALSE, "dimension_1,dimension_2", "dimension_3"); /* separator is not contained */ - compare_split_entry("aa,bb", '/', 2, "aa,bb"); + compare_split_entry("aa,bb", '/', 2, FLB_FALSE, "aa,bb"); + + /* do not parse quotes when tokenizing */ + compare_split_entry("aa \"bb cc\" dd", ' ', 256, FLB_FALSE, "aa", "\"bb", "cc\"", "dd"); +} + +void test_flb_utils_split_quoted() +{ + /* Tokens quoted with "..." */ + compare_split_entry("aa \"double quote\" bb", ' ', 256, FLB_TRUE, "aa", "double quote", "bb"); + compare_split_entry("\"begin with double quote\" aa", ' ', 256, FLB_TRUE, "begin with double quote", "aa"); + compare_split_entry("aa \"end with double quote\"", ' ', 256, FLB_TRUE, "aa", "end with double quote"); + + /* Tokens quoted with '...' */ + compare_split_entry("aa bb 'single quote' cc", ' ', 256, FLB_TRUE, "aa", "bb", "single quote", "cc"); + compare_split_entry("'begin with single quote' aa", ' ', 256, FLB_TRUE, "begin with single quote", "aa"); + compare_split_entry("aa 'end with single quote'", ' ', 256, FLB_TRUE, "aa", "end with single quote"); + + /* Tokens surrounded by more than one separator character */ + compare_split_entry(" aa \" spaces bb \" cc ' spaces dd ' ff", ' ', 256, FLB_TRUE, + "aa", " spaces bb ", "cc", " spaces dd ", "ff"); + + /* Escapes within quoted token */ + compare_split_entry("aa \"escaped \\\" quote\" bb", ' ', 256, FLB_TRUE, "aa", "escaped \" quote", "bb"); + compare_split_entry("aa 'escaped \\' quote\' bb", ' ', 256, FLB_TRUE, "aa", "escaped \' quote", "bb"); + compare_split_entry("aa \"\\\"escaped balanced quotes\\\"\" bb", ' ', 256, FLB_TRUE, + "aa", "\"escaped balanced quotes\"", "bb"); + compare_split_entry("aa '\\'escaped balanced quotes\\'\' bb", ' ', 256, FLB_TRUE, + "aa", "'escaped balanced quotes'", "bb"); + compare_split_entry("aa 'escaped \\\\ escape\' bb", ' ', 256, FLB_TRUE, "aa", "escaped \\ escape", "bb"); + + /* Escapes that are not processed */ + compare_split_entry("\\\"aa bb", ' ', 256, FLB_TRUE, "\\\"aa", "bb"); + compare_split_entry("\\'aa bb", ' ', 256, FLB_TRUE, "\\'aa", "bb"); + compare_split_entry("\\\\aa bb", ' ', 256, FLB_TRUE, "\\\\aa", "bb"); + compare_split_entry("aa\\ bb", ' ', 256, FLB_TRUE, "aa\\", "bb"); + +} + +void test_flb_utils_split_quoted_errors() +{ + struct mk_list *split = NULL; + + split = flb_utils_split_quoted("aa \"unbalanced quotes should fail", ' ', 256); + TEST_CHECK(split == NULL); + split = flb_utils_split_quoted("aa 'unbalanced quotes should fail", ' ', 256); + TEST_CHECK(split == NULL); } TEST_LIST = { @@ -563,5 +616,7 @@ TEST_LIST = { { "test_write_str_buffer_overrun", test_write_str_buffer_overrun }, { "proxy_url_split", test_proxy_url_split }, { "test_flb_utils_split", test_flb_utils_split }, + { "test_flb_utils_split_quoted", test_flb_utils_split_quoted}, + { "test_flb_utils_split_quoted_errors", test_flb_utils_split_quoted_errors}, { 0 } }; diff --git a/tests/runtime/filter_modify.c b/tests/runtime/filter_modify.c index 75e2d86331a..6a54ca6b259 100644 --- a/tests/runtime/filter_modify.c +++ b/tests/runtime/filter_modify.c @@ -1454,7 +1454,7 @@ static void flb_test_issue_4319() } -/* +/* * to check issue https://github.com/fluent/fluent-bit/issues/4319 Key_value_does_not_match case */ @@ -1504,6 +1504,48 @@ static void flb_test_issue_4319_2() filter_test_destroy(ctx); } +/* https://github.com/fluent/fluent-bit/issues/1225 */ +static void flb_test_issue_1225() +{ + int len; + int ret; + int bytes; + char *p; + struct flb_lib_out_cb cb_data; + struct filter_test *ctx; + + /* Create test context */ + ctx = filter_test_create((void *) &cb_data); + if (!ctx) { + exit(EXIT_FAILURE); + } + + /* Configure filter */ + ret = flb_filter_set(ctx->flb, ctx->f_ffd, + "condition", "key_value_matches \"key 1\" \".*with spaces.*\"", + "add", "\"key 2\" \"second value with spaces\"", + NULL); + TEST_CHECK(ret == 0); + + /* Prepare output callback with expected result */ + cb_data.cb = cb_check_result; + cb_data.data = "\"key 1\":\"first value with spaces\","\ + "\"key 2\":\"second value with spaces\""; + + /* Start the engine */ + ret = flb_start(ctx->flb); + TEST_CHECK(ret == 0); + + /* Ingest data samples */ + p = "[0,{\"key 1\":\"first value with spaces\"}]"; + len = strlen(p); + bytes = flb_lib_push(ctx->flb, ctx->i_ffd, p, len); + TEST_CHECK(bytes == len); + + filter_test_destroy(ctx); +} + + /* * to check issue https://github.com/fluent/fluent-bit/issues/7075 */ @@ -1596,6 +1638,7 @@ TEST_LIST = { {"cond_key_value_does_not_equal and key does not exist", flb_test_issue_4319 }, {"cond_key_value_does_not_matches and key does not exist", flb_test_issue_4319_2 }, {"Key_value_matches and value is bool type", flb_test_issue_7075}, + {"operation_with_whitespace", flb_test_issue_1225 }, {NULL, NULL} };