From 8505ac238ebff03a701cda95a0d85483f8eeb911 Mon Sep 17 00:00:00 2001 From: Brian Demers Date: Sat, 21 Dec 2024 20:01:25 -0500 Subject: [PATCH] Handle unicode characters in filters - Updated ANTLR4 grammar to support unicode chars - Added test - Removed logic that escapes/unescapes JSON values, should is handled by at the JSON parser (jackson) Fixes: #564 --- .../src/main/antlr4/imports/Json.g4 | 15 +++++++++----- .../filter/AttributeComparisonExpression.java | 11 +--------- .../filter/ExpressionBuildingListener.java | 20 +++---------------- .../scim/spec/filter/FilterBuilderTest.java | 10 ++++++++++ 4 files changed, 24 insertions(+), 32 deletions(-) diff --git a/scim-spec/scim-spec-schema/src/main/antlr4/imports/Json.g4 b/scim-spec/scim-spec-schema/src/main/antlr4/imports/Json.g4 index 218e76a81..ee541245e 100644 --- a/scim-spec/scim-spec-schema/src/main/antlr4/imports/Json.g4 +++ b/scim-spec/scim-spec-schema/src/main/antlr4/imports/Json.g4 @@ -34,8 +34,13 @@ PLUS: '+'; ZERO: '0'; // Json String - -STRING: '"' (CHAR)* '"'; -fragment CHAR: UNESCAPED | ESCAPED; -fragment UNESCAPED : (' ' .. '!') | ('#' .. '[') | (']' .. '~'); -fragment ESCAPED: '\\' ('"' | '\\' | '/' | 'b' | 'f' | 'n' | 'r' | 't'); \ No newline at end of file +STRING + : '"' (ESC | SAFECODEPOINT)* '"'; +fragment ESC + : '\\' (["\\/bfnrt] | UNICODE); +fragment UNICODE + : 'u' HEX HEX HEX HEX; +fragment HEX + : [0-9a-fA-F]; +fragment SAFECODEPOINT + : ~ ["\\\u0000-\u001F]; diff --git a/scim-spec/scim-spec-schema/src/main/java/org/apache/directory/scim/spec/filter/AttributeComparisonExpression.java b/scim-spec/scim-spec-schema/src/main/java/org/apache/directory/scim/spec/filter/AttributeComparisonExpression.java index 4800aae1c..3bf9d021f 100644 --- a/scim-spec/scim-spec-schema/src/main/java/org/apache/directory/scim/spec/filter/AttributeComparisonExpression.java +++ b/scim-spec/scim-spec-schema/src/main/java/org/apache/directory/scim/spec/filter/AttributeComparisonExpression.java @@ -25,8 +25,6 @@ import java.time.format.DateTimeFormatter; import java.util.Date; -import org.apache.commons.lang3.StringEscapeUtils; - import org.apache.directory.scim.spec.filter.attribute.AttributeReference; import lombok.Value; @@ -88,14 +86,7 @@ private String createCompareValueString() { if (this.compareValue == null) { compareValueString = "null"; } else if (this.compareValue instanceof String) { - // TODO change this to escapeJson() when dependencies get upgraded - String escaped = StringEscapeUtils.escapeEcmaScript((String) this.compareValue) - // StringEscapeUtils follows the outdated JSON spec requiring "/" to be escaped, this could subtly break things - .replaceAll("\\\\/", "/") - // We don't want single-quotes escaped, this will be unnecessary with escapeJson() - .replaceAll("\\\\'", "'"); - - compareValueString = QUOTE + escaped + QUOTE; + compareValueString = QUOTE + this.compareValue + QUOTE; } else if (this.compareValue instanceof Date) { compareValueString = QUOTE + toDateTimeString((Date) this.compareValue) + QUOTE; } else if (this.compareValue instanceof LocalDate) { diff --git a/scim-spec/scim-spec-schema/src/main/java/org/apache/directory/scim/spec/filter/ExpressionBuildingListener.java b/scim-spec/scim-spec-schema/src/main/java/org/apache/directory/scim/spec/filter/ExpressionBuildingListener.java index 87d7b637d..8cfc03d26 100644 --- a/scim-spec/scim-spec-schema/src/main/java/org/apache/directory/scim/spec/filter/ExpressionBuildingListener.java +++ b/scim-spec/scim-spec-schema/src/main/java/org/apache/directory/scim/spec/filter/ExpressionBuildingListener.java @@ -23,10 +23,6 @@ import java.util.Deque; import java.util.Locale; -import org.apache.commons.lang3.StringEscapeUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import org.apache.directory.scim.spec.filter.FilterParser.AttributeCompareExpressionContext; import org.apache.directory.scim.spec.filter.FilterParser.AttributeGroupExpressionContext; import org.apache.directory.scim.spec.filter.FilterParser.AttributeLogicExpressionContext; @@ -41,8 +37,6 @@ public class ExpressionBuildingListener extends FilterBaseListener { - private static final Logger LOG = LoggerFactory.getLogger(ExpressionBuildingListener.class); - protected Deque expressionStack = new ArrayDeque<>(); @Override @@ -160,14 +154,8 @@ public FilterExpression getFilterExpression() { private static Object parseJsonType(String jsonValue) { if (jsonValue.startsWith("\"")) { - String doubleEscaped = jsonValue.substring(1, jsonValue.length() - 1) - // StringEscapeUtils follows the outdated JSON spec requiring "/" to be escaped, this could subtly break things - .replaceAll("\\\\/", "\\\\\\\\/") - // Just in case someone needs a single-quote with a backslash in front of it, this will be unnecessary with escapeJson() - .replaceAll("\\\\'", "\\\\\\\\'"); - - // TODO change this to escapeJson() when dependencies get upgraded - return StringEscapeUtils.unescapeEcmaScript(doubleEscaped); + // remove leading and trailing slash + return jsonValue.substring(1, jsonValue.length() - 1); } else if ("null".equals(jsonValue)) { return null; } else if ("true".equals(jsonValue)) { @@ -182,11 +170,9 @@ private static Object parseJsonType(String jsonValue) { return Integer.parseInt(jsonValue); } } catch (NumberFormatException e) { - LOG.warn("Unable to parse a json number: " + jsonValue); + throw new IllegalStateException("Unable to parse JSON number: " + jsonValue, e); } } - - throw new IllegalStateException("Unable to parse JSON Value"); } } diff --git a/scim-spec/scim-spec-schema/src/test/java/org/apache/directory/scim/spec/filter/FilterBuilderTest.java b/scim-spec/scim-spec-schema/src/test/java/org/apache/directory/scim/spec/filter/FilterBuilderTest.java index e044cdad2..713978320 100644 --- a/scim-spec/scim-spec-schema/src/test/java/org/apache/directory/scim/spec/filter/FilterBuilderTest.java +++ b/scim-spec/scim-spec-schema/src/test/java/org/apache/directory/scim/spec/filter/FilterBuilderTest.java @@ -38,6 +38,16 @@ public void testSimpleAnd() throws FilterParseException { .build(); assertThat(filter).isEqualTo(new Filter("name.givenName EQ \"Bilbo\" AND name.familyName EQ \"Baggins\"")); } + + @Test + public void testExtendedCharsAnd() throws FilterParseException { + Filter filter = FilterBuilder.create() + .equalTo("name.givenName", "Bílbo") + .and(r -> r.equalTo("name.familyName", "Bággins")) + .build(); + Filter expected = new Filter("name.givenName EQ \"Bílbo\" AND name.familyName EQ \"Bággins\""); + assertThat(filter).isEqualTo(expected); + } @Test public void testSimpleOr() throws FilterParseException {