Skip to content

Commit

Permalink
EQL: Allow Unicode escape sequences in strings
Browse files Browse the repository at this point in the history
Occationally, it's useful to be able to use non-printable,
RTL (right-to-left) or other non-standard unicode characters
in an EQL query.

Introducing the standard \uXXXX escape sequence as well as
the variable 2-8 char escape sequence \u{XXXXXXXX}, e.g.:

```
\u0023
\u{35}
\u{1f2da}
\u{002acd1}
```

Closes: elastic#62832
  • Loading branch information
matriv committed Mar 17, 2021
1 parent 8a9055a commit 07c0620
Show file tree
Hide file tree
Showing 6 changed files with 507 additions and 392 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,45 @@ public void testIndexWildcardPatterns() throws Exception {
deleteIndex("test2");
}

@SuppressWarnings("unchecked")
public void testUnicodeChars() throws Exception {
createIndex("test", Settings.EMPTY, null, null);

StringBuilder bulk = new StringBuilder();
bulk.append("{\"index\": {\"_index\": \"test\", \"_id\": 1}}\n");
bulk.append("{\"event\":{\"category\":\"process\"},\"@timestamp\":\"2020-09-04T12:34:56Z\",\"log\" : \"prefix_ë_suffix\"}\n");
bulk.append("{\"index\": {\"_index\": \"test\", \"_id\": 2}}\n");
bulk.append("{\"event\":{\"category\":\"process\"},\"@timestamp\":\"2020-09-05T12:34:57Z\",\"log\" : \"prefix_𖠋_suffix\"}\n");
bulkIndex(bulk.toString());

String endpoint = "/test/_eql/search";
Request request = new Request("GET", endpoint);
request.setJsonEntity("{\"query\":\"process where log==\\\"prefix_\\\\u00eb_suffix\\\"\"}");
Response response = client().performRequest(request);

Map<String, Object> responseMap;
try (InputStream content = response.getEntity().getContent()) {
responseMap = XContentHelper.convertToMap(JsonXContent.jsonXContent, content, false);
}
Map<String, Object> hits = (Map<String, Object>) responseMap.get("hits");
List<Map<String, Object>> events = (List<Map<String, Object>>) hits.get("events");
assertEquals(1, events.size());
assertEquals("1", events.get(0).get("_id"));

request.setJsonEntity("{\"query\":\"process where log==\\\"prefix_\\\\u{01680b}_suffix\\\"\"}");
response = client().performRequest(request);

try (InputStream content = response.getEntity().getContent()) {
responseMap = XContentHelper.convertToMap(JsonXContent.jsonXContent, content, false);
}
hits = (Map<String, Object>) responseMap.get("hits");
events = (List<Map<String, Object>>) hits.get("events");
assertEquals(1, events.size());
assertEquals("2", events.get(0).get("_id"));

deleteIndex("test");
}

private void bulkIndex(String bulk) throws IOException {
Request bulkRequest = new Request("POST", "/_bulk");
bulkRequest.setJsonEntity(bulk);
Expand Down
24 changes: 21 additions & 3 deletions x-pack/plugin/eql/src/main/antlr/EqlBase.g4
Original file line number Diff line number Diff line change
Expand Up @@ -205,12 +205,30 @@ LP: '(';
RP: ')';
PIPE: '|';

fragment STRING_ESCAPE
: '\\' [btnfr"'\\]
;
fragment HEX_DIGIT
: [0-9abcdefABCDEF]
;
fragment UNICODE_ESCAPE
: '\\u' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT
| '\\u' '{' HEX_DIGIT+ '}' // 2-8 hex
;
fragment UNESCAPED_CHARS
: ~[\r\n"\\]
;

STRING
: '\'' ('\\' [btnfr"'\\] | ~[\r\n'\\])* '\''
| '"' ('\\' [btnfr"'\\] | ~[\r\n"\\])* '"'
: '"' (STRING_ESCAPE | UNICODE_ESCAPE | UNESCAPED_CHARS)* '"'
| '"""' (~[\r\n])*? '"""' '"'? '"'?
// Old style quoting of string, handled as errors in AbstractBuilder
| '\'' ('\\' [btnfr"'\\] | ~[\r\n'\\])* '\''
| '?"' ('\\"' |~["\r\n])* '"'
| '?\'' ('\\\'' |~['\r\n])* '\''
| '"""' (~[\r\n])*? '"""' '"'? '"'?
;

INTEGER_VALUE
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,8 @@ public static String unquoteString(Source source) {
checkForSingleQuotedString(source, text, 0);

text = text.substring(1, text.length() - 1);
StringBuffer resultString = new StringBuffer();
text = handleUnicodeChars(source, text);
StringBuilder resultString = new StringBuilder();
Matcher regexMatcher = slashPattern.matcher(text);

while (regexMatcher.find()) {
Expand Down Expand Up @@ -183,6 +184,50 @@ public static String unquoteString(Source source) {
return resultString.toString();
}

private static String handleUnicodeChars(Source source, String text) {
StringBuilder sb = new StringBuilder();

int startIdx = 0;
int endIdx = 0;
int idx = text.indexOf("\\u");
while (idx >= 0) {
String fullSequence;
String unicodeSequence;
if (text.charAt(idx + 2) == '{') {
endIdx = text.indexOf("}", idx + 1) + 1;
unicodeSequence = text.substring(idx + 3, endIdx - 1);
int length = unicodeSequence.length();
if (length < 2 || length > 8) {
throw new ParsingException(source, "Unicode sequence in curly braces should use [2-8] hex digits, [{}] has [{}]",
text.substring(idx, endIdx), length);
}
unicodeSequence = text.substring(idx + 3, endIdx - 1);
} else {
endIdx = idx + 6;
unicodeSequence = text.substring(idx + 2, endIdx);
}
sb.append(text, startIdx, idx).append(hexToUnicode(source, unicodeSequence));
idx = text.indexOf("\\u", endIdx);
startIdx = endIdx;
}
if (endIdx < text.length()) {
sb.append(text.substring(endIdx));
}
return sb.toString();
}

private static String hexToUnicode(Source source, String hex) {
int code = Integer.parseInt(hex, 16);
if (code == 0) {
throw new ParsingException(source, "Unicode sequence results in null");
}
try {
return String.valueOf(Character.toChars(code));
} catch (IllegalArgumentException e) {
throw new ParsingException(source, "Invalid unicode character code [{}]", hex);
}
}

private static void checkForSingleQuotedString(Source source, String text, int i) {
if (text.charAt(i) == '\'') {
throw new ParsingException(source,
Expand Down
Loading

0 comments on commit 07c0620

Please sign in to comment.