-
Notifications
You must be signed in to change notification settings - Fork 1.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
utils: parse quotes when splitting strings #6387
Changes from all commits
2f330f8
cc013e6
6e2d060
ac7ff03
9bbf9bd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -257,7 +257,118 @@ void flb_utils_print_setup(struct flb_config *config) | |
} | ||
} | ||
|
||
struct mk_list *flb_utils_split(const char *line, int separator, int max_split) | ||
/* | ||
* quoted_string_len returns the length of a quoted string, not including the quotes. | ||
*/ | ||
static int quoted_string_len(const char *str) | ||
{ | ||
int len = 0; | ||
char quote = *str++; /* Consume the quote character. */ | ||
|
||
while (quote != 0) { | ||
char c = *str++; | ||
switch (c) { | ||
case '\0': | ||
/* Error: string ends before end-quote was seen. */ | ||
return -1; | ||
case '\\': | ||
/* Skip escaped quote or \\. */ | ||
if (*str == quote || *str == '\\') { | ||
str++; | ||
} | ||
break; | ||
case '\'': | ||
case '"': | ||
/* End-quote seen: stop iterating. */ | ||
if (c == quote) { | ||
quote = 0; | ||
} | ||
break; | ||
default: | ||
break; | ||
} | ||
len++; | ||
} | ||
|
||
/* Go back one character to ignore end-quote */ | ||
len--; | ||
|
||
return len; | ||
} | ||
|
||
/* | ||
* next_token returns the next token in the string 'str' delimited by 'separator'. | ||
* 'out' is set to the beginning of the token. | ||
* 'out_len' is set to the length of the token. | ||
* 'parse_quotes' is set to FLB_TRUE when quotes shall be considered when tokenizing the 'str'. | ||
* The function returns offset to next token in the string. | ||
*/ | ||
static int next_token(const char *str, int separator, char **out, int *out_len, int parse_quotes) { | ||
const char *token_in = str; | ||
char *token_out; | ||
int next_separator = 0; | ||
int quote = 0; /* Parser state: 0 not inside quoted string, or '"' or '\'' when inside quoted string. */ | ||
int len = 0; | ||
int i; | ||
|
||
/* Skip leading separators. */ | ||
while (*token_in == separator) { | ||
token_in++; | ||
} | ||
|
||
/* Should quotes be parsed? Or is token quoted? If not, copy until separator or the end of string. */ | ||
if (parse_quotes == FLB_FALSE || (*token_in != '"' && *token_in != '\'')) { | ||
len = (int)strlen(token_in); | ||
next_separator = mk_string_char_search(token_in, separator, len); | ||
if (next_separator > 0) { | ||
len = next_separator; | ||
} | ||
*out_len = len; | ||
*out = mk_string_copy_substr(token_in, 0, len); | ||
if (*out == NULL) { | ||
return -1; | ||
} | ||
|
||
return (int)(token_in - str) + len; | ||
} | ||
|
||
/* Token is quoted. */ | ||
|
||
len = quoted_string_len(token_in); | ||
if (len < 0) { | ||
return -1; | ||
} | ||
|
||
/* Consume the quote character. */ | ||
quote = *token_in++; | ||
|
||
token_out = flb_malloc(len + 1); | ||
if (!token_out) { | ||
return -1; | ||
} | ||
|
||
/* Copy the token */ | ||
for (i = 0; i < len; i++) { | ||
/* Handle escapes when inside quoted token: | ||
* \" -> " | ||
* \' -> ' | ||
* \\ -> \ | ||
*/ | ||
if (*token_in == '\\' && (token_in[1] == quote || token_in[1] == '\\')) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you have a bug here, if I understood the code correctly, you would expect There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. True, thank you for spotting this! I've now fixed it and added test that would have caught it. |
||
token_in++; | ||
} | ||
token_out[i] = *token_in++; | ||
} | ||
token_out[i] = '\0'; | ||
|
||
*out = token_out; | ||
*out_len = len; | ||
|
||
return (int)(token_in - str); | ||
} | ||
|
||
|
||
static struct mk_list *split(const char *line, int separator, int max_split, int quoted) | ||
{ | ||
int i = 0; | ||
int count = 0; | ||
|
@@ -281,26 +392,15 @@ struct mk_list *flb_utils_split(const char *line, int separator, int max_split) | |
|
||
len = strlen(line); | ||
while (i < len) { | ||
end = mk_string_char_search(line + i, separator, len - i); | ||
if (end >= 0 && end + i < len) { | ||
end += i; | ||
|
||
if (i == (unsigned int) end) { | ||
i++; | ||
continue; | ||
} | ||
|
||
val = mk_string_copy_substr(line, i, end); | ||
val_len = end - i; | ||
} | ||
else { | ||
val = mk_string_copy_substr(line, i, len); | ||
val_len = len - i; | ||
end = len; | ||
end = next_token(line + i, separator, &val, &val_len, quoted); | ||
if (end == -1) { | ||
flb_error("Parsing failed: %s", line); | ||
flb_utils_split_free(list); | ||
return NULL; | ||
} | ||
|
||
/* Update last position */ | ||
i = end; | ||
i += end; | ||
|
||
/* Create new entry */ | ||
new = flb_malloc(sizeof(struct flb_split_entry)); | ||
|
@@ -341,6 +441,17 @@ struct mk_list *flb_utils_split(const char *line, int separator, int max_split) | |
return list; | ||
} | ||
|
||
struct mk_list *flb_utils_split_quoted(const char *line, int separator, int max_split) | ||
{ | ||
return split(line, separator, max_split, FLB_TRUE); | ||
} | ||
|
||
struct mk_list *flb_utils_split(const char *line, int separator, int max_split) | ||
{ | ||
return split(line, separator, max_split, FLB_FALSE); | ||
} | ||
|
||
|
||
void flb_utils_split_free_entry(struct flb_split_entry *entry) | ||
{ | ||
mk_list_del(&entry->_head); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since the caller function expects this function to return
-1
on failure we should check the result ofmk_string_copy_substr
and if it returnsNULL
we should return-1
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.