Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

utils: parse quotes when splitting strings #6387

Merged
merged 5 commits into from
May 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/fluent-bit/flb_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ int flb_utils_set_daemon();
void flb_utils_print_setup(struct flb_config *config);

struct mk_list *flb_utils_split(const char *line, int separator, int max_split);
struct mk_list *flb_utils_split_quoted(const char *line, int separator, int max_split);
void flb_utils_split_free_entry(struct flb_split_entry *entry);
void flb_utils_split_free(struct mk_list *list);
int flb_utils_timer_consume(flb_pipefd_t fd);
Expand Down
2 changes: 1 addition & 1 deletion plugins/filter_modify/modify.c
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ static int setup(struct filter_modify_ctx *ctx,
mk_list_foreach(head, &f_ins->properties) {
kv = mk_list_entry(head, struct flb_kv, _head);

split = flb_utils_split(kv->val, ' ', 3);
split = flb_utils_split_quoted(kv->val, ' ', 3);
list_size = mk_list_size(split);

// Conditions are,
Expand Down
147 changes: 129 additions & 18 deletions src/flb_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,118 @@ void flb_utils_print_setup(struct flb_config *config)
}
}

struct mk_list *flb_utils_split(const char *line, int separator, int max_split)
/*
* quoted_string_len returns the length of a quoted string, not including the quotes.
*/
static int quoted_string_len(const char *str)
{
int len = 0;
char quote = *str++; /* Consume the quote character. */

while (quote != 0) {
char c = *str++;
switch (c) {
case '\0':
/* Error: string ends before end-quote was seen. */
return -1;
case '\\':
/* Skip escaped quote or \\. */
if (*str == quote || *str == '\\') {
str++;
}
break;
case '\'':
case '"':
/* End-quote seen: stop iterating. */
if (c == quote) {
quote = 0;
}
break;
default:
break;
}
len++;
}

/* Go back one character to ignore end-quote */
len--;

return len;
}

/*
* next_token returns the next token in the string 'str' delimited by 'separator'.
* 'out' is set to the beginning of the token.
* 'out_len' is set to the length of the token.
* 'parse_quotes' is set to FLB_TRUE when quotes shall be considered when tokenizing the 'str'.
* The function returns offset to next token in the string.
*/
static int next_token(const char *str, int separator, char **out, int *out_len, int parse_quotes) {
const char *token_in = str;
char *token_out;
int next_separator = 0;
int quote = 0; /* Parser state: 0 not inside quoted string, or '"' or '\'' when inside quoted string. */
int len = 0;
int i;

/* Skip leading separators. */
while (*token_in == separator) {
token_in++;
}

/* Should quotes be parsed? Or is token quoted? If not, copy until separator or the end of string. */
if (parse_quotes == FLB_FALSE || (*token_in != '"' && *token_in != '\'')) {
len = (int)strlen(token_in);
next_separator = mk_string_char_search(token_in, separator, len);
if (next_separator > 0) {
len = next_separator;
}
*out_len = len;
*out = mk_string_copy_substr(token_in, 0, len);
if (*out == NULL) {
return -1;
}

return (int)(token_in - str) + len;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since the caller function expects this function to return -1 on failure we should check the result of mk_string_copy_substr and if it returns NULL we should return -1.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.

}

/* Token is quoted. */

len = quoted_string_len(token_in);
if (len < 0) {
return -1;
}

/* Consume the quote character. */
quote = *token_in++;

token_out = flb_malloc(len + 1);
if (!token_out) {
return -1;
}

/* Copy the token */
for (i = 0; i < len; i++) {
/* Handle escapes when inside quoted token:
* \" -> "
* \' -> '
* \\ -> \
*/
if (*token_in == '\\' && (token_in[1] == quote || token_in[1] == '\\')) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you have a bug here, if I understood the code correctly, you would expect quote to hold the value coerresponding to the opening quote character but I can't spot any explicit sets or calls that should set it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

True, thank you for spotting this! I've now fixed it and added test that would have caught it.

token_in++;
}
token_out[i] = *token_in++;
}
token_out[i] = '\0';

*out = token_out;
*out_len = len;

return (int)(token_in - str);
}


static struct mk_list *split(const char *line, int separator, int max_split, int quoted)
{
int i = 0;
int count = 0;
Expand All @@ -281,26 +392,15 @@ struct mk_list *flb_utils_split(const char *line, int separator, int max_split)

len = strlen(line);
while (i < len) {
end = mk_string_char_search(line + i, separator, len - i);
if (end >= 0 && end + i < len) {
end += i;

if (i == (unsigned int) end) {
i++;
continue;
}

val = mk_string_copy_substr(line, i, end);
val_len = end - i;
}
else {
val = mk_string_copy_substr(line, i, len);
val_len = len - i;
end = len;
end = next_token(line + i, separator, &val, &val_len, quoted);
if (end == -1) {
flb_error("Parsing failed: %s", line);
flb_utils_split_free(list);
return NULL;
}

/* Update last position */
i = end;
i += end;

/* Create new entry */
new = flb_malloc(sizeof(struct flb_split_entry));
Expand Down Expand Up @@ -341,6 +441,17 @@ struct mk_list *flb_utils_split(const char *line, int separator, int max_split)
return list;
}

struct mk_list *flb_utils_split_quoted(const char *line, int separator, int max_split)
{
return split(line, separator, max_split, FLB_TRUE);
}

struct mk_list *flb_utils_split(const char *line, int separator, int max_split)
{
return split(line, separator, max_split, FLB_FALSE);
}


void flb_utils_split_free_entry(struct flb_split_entry *entry)
{
mk_list_del(&entry->_head);
Expand Down
81 changes: 68 additions & 13 deletions tests/internal/utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <fluent-bit/flb_utils.h>
#include <stdarg.h>
#include "flb_tests_internal.h"
#include "fluent-bit/flb_macros.h"


struct url_check {
Expand Down Expand Up @@ -474,7 +475,7 @@ void test_proxy_url_split() {
}
}

static int compare_split_entry(const char* input, int separator, int max_split, ...)
static int compare_split_entry(const char* input, int separator, int max_split, int quoted, ...)
{
va_list ap;
int count = 1;
Expand All @@ -484,7 +485,13 @@ static int compare_split_entry(const char* input, int separator, int max_split,
struct mk_list *head = NULL;
struct flb_split_entry *entry = NULL;

split = flb_utils_split(input, separator, max_split);
if (quoted) {
split = flb_utils_split_quoted(input, separator, max_split);
}
else {
split = flb_utils_split(input, separator, max_split);
}

if (!TEST_CHECK(split != NULL)) {
TEST_MSG("flb_utils_split failed. input=%s", input);
return -1;
Expand All @@ -494,7 +501,7 @@ static int compare_split_entry(const char* input, int separator, int max_split,
return -1;
}

va_start(ap, max_split);
va_start(ap, quoted);
mk_list_foreach_safe(head, tmp_list, split) {
if (max_split > 0 && !TEST_CHECK(count <= max_split) ) {
TEST_MSG("count error. got=%d expect=%d input=%s", count, max_split, input);
Expand Down Expand Up @@ -525,31 +532,77 @@ static int compare_split_entry(const char* input, int separator, int max_split,

void test_flb_utils_split()
{
compare_split_entry("aa,bb", ',', 2, "aa","bb" );
compare_split_entry("localhost:12345", ':', 2, "localhost","12345" );
compare_split_entry("https://fluentbit.io/announcements/", '/', -1, "https:", "fluentbit.io","announcements" );
compare_split_entry("aa,bb", ',', 2, FLB_FALSE, "aa","bb" );
compare_split_entry("localhost:12345", ':', 2, FLB_FALSE, "localhost","12345" );
compare_split_entry("https://fluentbit.io/announcements/", '/', -1, FLB_FALSE, "https:", "fluentbit.io","announcements" );

/* /proc/net/dev example */
compare_split_entry("enp0s3: 1955136 1768 0 0 0 0 0 0 89362 931 0 0 0 0 0 0",
' ', 256,
' ', 256, FLB_FALSE,
"enp0s3:", "1955136", "1768", "0", "0", "0", "0", "0", "0", "89362", "931", "0", "0", "0", "0", "0", "0", "0");

/* filter_grep configuration */
compare_split_entry("Regex test *a*", ' ', 3, "Regex", "test", "*a*");
compare_split_entry("Regex test *a*", ' ', 3, FLB_FALSE, "Regex", "test", "*a*");

/* filter_modify configuration */
compare_split_entry("Condition Key_Value_Does_Not_Equal cpustats KNOWN", ' ', 4,
"Condition", "Key_Value_Does_Not_Equal", "cpustats", "KNOWN");
compare_split_entry("Condition Key_Value_Does_Not_Equal cpustats KNOWN", ' ', 4,
FLB_FALSE, "Condition", "Key_Value_Does_Not_Equal", "cpustats", "KNOWN");

/* nginx_exporter_metrics example */
compare_split_entry("Active connections: 1\nserver accepts handled requests\n 10 10 10\nReading: 0 Writing: 1 Waiting: 0", '\n', 4,
"Active connections: 1", "server accepts handled requests", " 10 10 10","Reading: 0 Writing: 1 Waiting: 0");
FLB_FALSE, "Active connections: 1", "server accepts handled requests", " 10 10 10","Reading: 0 Writing: 1 Waiting: 0");

/* out_cloudwatch_logs example */
compare_split_entry("dimension_1,dimension_2;dimension_3", ';', 256,
"dimension_1,dimension_2", "dimension_3");
FLB_FALSE, "dimension_1,dimension_2", "dimension_3");
/* separator is not contained */
compare_split_entry("aa,bb", '/', 2, "aa,bb");
compare_split_entry("aa,bb", '/', 2, FLB_FALSE, "aa,bb");

/* do not parse quotes when tokenizing */
compare_split_entry("aa \"bb cc\" dd", ' ', 256, FLB_FALSE, "aa", "\"bb", "cc\"", "dd");
}

void test_flb_utils_split_quoted()
{
/* Tokens quoted with "..." */
compare_split_entry("aa \"double quote\" bb", ' ', 256, FLB_TRUE, "aa", "double quote", "bb");
compare_split_entry("\"begin with double quote\" aa", ' ', 256, FLB_TRUE, "begin with double quote", "aa");
compare_split_entry("aa \"end with double quote\"", ' ', 256, FLB_TRUE, "aa", "end with double quote");

/* Tokens quoted with '...' */
compare_split_entry("aa bb 'single quote' cc", ' ', 256, FLB_TRUE, "aa", "bb", "single quote", "cc");
compare_split_entry("'begin with single quote' aa", ' ', 256, FLB_TRUE, "begin with single quote", "aa");
compare_split_entry("aa 'end with single quote'", ' ', 256, FLB_TRUE, "aa", "end with single quote");

/* Tokens surrounded by more than one separator character */
compare_split_entry(" aa \" spaces bb \" cc ' spaces dd ' ff", ' ', 256, FLB_TRUE,
"aa", " spaces bb ", "cc", " spaces dd ", "ff");

/* Escapes within quoted token */
compare_split_entry("aa \"escaped \\\" quote\" bb", ' ', 256, FLB_TRUE, "aa", "escaped \" quote", "bb");
compare_split_entry("aa 'escaped \\' quote\' bb", ' ', 256, FLB_TRUE, "aa", "escaped \' quote", "bb");
compare_split_entry("aa \"\\\"escaped balanced quotes\\\"\" bb", ' ', 256, FLB_TRUE,
"aa", "\"escaped balanced quotes\"", "bb");
compare_split_entry("aa '\\'escaped balanced quotes\\'\' bb", ' ', 256, FLB_TRUE,
"aa", "'escaped balanced quotes'", "bb");
compare_split_entry("aa 'escaped \\\\ escape\' bb", ' ', 256, FLB_TRUE, "aa", "escaped \\ escape", "bb");

/* Escapes that are not processed */
compare_split_entry("\\\"aa bb", ' ', 256, FLB_TRUE, "\\\"aa", "bb");
compare_split_entry("\\'aa bb", ' ', 256, FLB_TRUE, "\\'aa", "bb");
compare_split_entry("\\\\aa bb", ' ', 256, FLB_TRUE, "\\\\aa", "bb");
compare_split_entry("aa\\ bb", ' ', 256, FLB_TRUE, "aa\\", "bb");

}

void test_flb_utils_split_quoted_errors()
{
struct mk_list *split = NULL;

split = flb_utils_split_quoted("aa \"unbalanced quotes should fail", ' ', 256);
TEST_CHECK(split == NULL);
split = flb_utils_split_quoted("aa 'unbalanced quotes should fail", ' ', 256);
TEST_CHECK(split == NULL);
}

TEST_LIST = {
Expand All @@ -563,5 +616,7 @@ TEST_LIST = {
{ "test_write_str_buffer_overrun", test_write_str_buffer_overrun },
{ "proxy_url_split", test_proxy_url_split },
{ "test_flb_utils_split", test_flb_utils_split },
{ "test_flb_utils_split_quoted", test_flb_utils_split_quoted},
{ "test_flb_utils_split_quoted_errors", test_flb_utils_split_quoted_errors},
{ 0 }
};
45 changes: 44 additions & 1 deletion tests/runtime/filter_modify.c
Original file line number Diff line number Diff line change
Expand Up @@ -1454,7 +1454,7 @@ static void flb_test_issue_4319()
}


/*
/*
* to check issue https://github.com/fluent/fluent-bit/issues/4319
Key_value_does_not_match case
*/
Expand Down Expand Up @@ -1504,6 +1504,48 @@ static void flb_test_issue_4319_2()
filter_test_destroy(ctx);
}

/* https://github.com/fluent/fluent-bit/issues/1225 */
static void flb_test_issue_1225()
{
int len;
int ret;
int bytes;
char *p;
struct flb_lib_out_cb cb_data;
struct filter_test *ctx;

/* Create test context */
ctx = filter_test_create((void *) &cb_data);
if (!ctx) {
exit(EXIT_FAILURE);
}

/* Configure filter */
ret = flb_filter_set(ctx->flb, ctx->f_ffd,
"condition", "key_value_matches \"key 1\" \".*with spaces.*\"",
"add", "\"key 2\" \"second value with spaces\"",
NULL);
TEST_CHECK(ret == 0);

/* Prepare output callback with expected result */
cb_data.cb = cb_check_result;
cb_data.data = "\"key 1\":\"first value with spaces\","\
"\"key 2\":\"second value with spaces\"";

/* Start the engine */
ret = flb_start(ctx->flb);
TEST_CHECK(ret == 0);

/* Ingest data samples */
p = "[0,{\"key 1\":\"first value with spaces\"}]";
len = strlen(p);
bytes = flb_lib_push(ctx->flb, ctx->i_ffd, p, len);
TEST_CHECK(bytes == len);

filter_test_destroy(ctx);
}


/*
* to check issue https://github.com/fluent/fluent-bit/issues/7075
*/
Expand Down Expand Up @@ -1596,6 +1638,7 @@ TEST_LIST = {
{"cond_key_value_does_not_equal and key does not exist", flb_test_issue_4319 },
{"cond_key_value_does_not_matches and key does not exist", flb_test_issue_4319_2 },
{"Key_value_matches and value is bool type", flb_test_issue_7075},
{"operation_with_whitespace", flb_test_issue_1225 },

{NULL, NULL}
};