From 2ee4f8db76ad5820d8b9a54175392f4a870c307c Mon Sep 17 00:00:00 2001 From: Chittaranjan Prasad Date: Thu, 27 Aug 2020 12:46:44 -0700 Subject: [PATCH 1/4] adds support for 0x, X'', x'' type hex in udf:encode --- .../ksql/function/udf/string/Encode.java | 39 +++++++++++++++++-- .../ksql/function/udf/string/EncodeTest.java | 22 +++++++++++ .../query-validation-tests/encode.json | 16 +++++++- .../test-runner/correct/from_docs/input.json | 1 + 4 files changed, 72 insertions(+), 6 deletions(-) diff --git a/ksqldb-engine/src/main/java/io/confluent/ksql/function/udf/string/Encode.java b/ksqldb-engine/src/main/java/io/confluent/ksql/function/udf/string/Encode.java index 8e5d827fda83..4acde2dd7a88 100644 --- a/ksqldb-engine/src/main/java/io/confluent/ksql/function/udf/string/Encode.java +++ b/ksqldb-engine/src/main/java/io/confluent/ksql/function/udf/string/Encode.java @@ -75,7 +75,6 @@ public String encode( return ENCODER_MAP.get(encodedString).apply(str); } - interface Encoder { String apply(String input) throws KsqlFunctionException; } @@ -85,7 +84,11 @@ static class HexToAscii implements Encoder { @Override public String apply(final String input) { try { - final byte[] decoded = Hex.decodeHex(input); + //strip away "Ox" from front or "X\'" + "\'" from front or back of hex if present + final String processedInput; + processedInput = hexStrip(input); + + final byte[] decoded = Hex.decodeHex(processedInput); return new String(decoded, StandardCharsets.US_ASCII); } catch (DecoderException e) { throw new KsqlFunctionException(e.getMessage()); @@ -98,8 +101,11 @@ static class HexToBase64 implements Encoder { @Override public String apply(final String input) throws KsqlFunctionException { final byte[] decodedHex; + //strip away "Ox" from front or "X\'" + "\'" from front and back of hex if present + final String processedInput; + processedInput = hexStrip(input); try { - decodedHex = Hex.decodeHex(input); + decodedHex = Hex.decodeHex(processedInput); } catch (DecoderException e) { throw new KsqlFunctionException(e.getMessage()); } @@ -114,8 +120,11 @@ static class HexToUtf8 implements Encoder { @Override public String apply(final String input) throws KsqlFunctionException { final byte[] decodedHex; + //strip away "Ox" from front or "X\'" + "\'" from front and back of hex if present + final String processedInput; + processedInput = hexStrip(input); try { - decodedHex = Hex.decodeHex(input); + decodedHex = Hex.decodeHex(processedInput); } catch (DecoderException e) { throw new KsqlFunctionException(e.getMessage()); } @@ -203,4 +212,26 @@ public String apply(final String input) throws KsqlFunctionException { return new String(decodedB64, StandardCharsets.US_ASCII); } } + + /** + Strips away the "0x" from hex of type "0xAB79" and + strips away the "X\'" from front and "\'" from end of hex of type "X'AB79'". + Leaves every other type of hex (like AB79) untouched + + @param hexString unstripped hex String + @return the string after removing + */ + public static String hexStrip(final String hexString) { + final int hexLen = hexString.length(); + + if (hexString.matches("0x.*")) { + //matches with things like "0x" and "0x...." + return hexLen % 2 != 0 ? "0" + hexString.substring(2) : hexString.substring(2); + } else if (hexString.matches("(x|X)\'.*\'")) { + //matches with things like "x''", "X''", "x'....'" and "X'....'" + return hexString.substring(2, hexLen - 1); + } else { + return hexString; + } + } } diff --git a/ksqldb-engine/src/test/java/io/confluent/ksql/function/udf/string/EncodeTest.java b/ksqldb-engine/src/test/java/io/confluent/ksql/function/udf/string/EncodeTest.java index c228252dc46d..efdf52763448 100644 --- a/ksqldb-engine/src/test/java/io/confluent/ksql/function/udf/string/EncodeTest.java +++ b/ksqldb-engine/src/test/java/io/confluent/ksql/function/udf/string/EncodeTest.java @@ -41,6 +41,15 @@ public void shouldEncodeHexToAscii() { assertThat(udf.encode("31202b2031203d2031", "hex", "ascii"), is("1 + 1 = 1")); assertThat(udf.encode("ce95cebbcebbceacceb4ceb1", "hex", "ascii"), is("������������")); assertThat(udf.encode("c39c6265726d656e736368", "hex", "ascii"), is("��bermensch")); + + assertThat(udf.encode("0x48656c6c6f20576f726c6421", "hex", "ascii"), is("Hello World!")); + assertThat(udf.encode("0x9", "hex", "ascii"), is("\t")); + assertThat(udf.encode("0x", "hex", "ascii"), is("")); + assertThat(udf.encode("X'436c6f7564792a7e2a3f'", "hex", "ascii"), is("Cloudy*~*?")); + assertThat(udf.encode("x'4578616d706C6521'", "hex", "ascii"), is("Example!")); + + assertThat(udf.encode("X''", "hex", "ascii"), is("")); + assertThat(udf.encode("x''", "hex", "ascii"), is("")); } @Test @@ -51,6 +60,13 @@ public void shouldEncodeHexToUtf8() { assertThat(udf.encode("ce95cebbcebbceacceb4ceb1", "hex", "utf8"), is("Ελλάδα")); assertThat(udf.encode("c39c6265726d656e736368", "hex", "utf8"), is("Übermensch")); + assertThat(udf.encode("0x4578616d706c6521", "hex", "utf8"), is("Example!")); + assertThat(udf.encode("0x", "hex", "utf8"), is("")); + assertThat(udf.encode("X'506C6174666F726D2D7C5F5F5F5F5F7C2D'", "hex", "utf8"), is("Platform-|_____|-")); + assertThat(udf.encode("x'31202b2031203d2031'", "hex", "utf8"), is("1 + 1 = 1")); + + assertThat(udf.encode("X''", "hex", "utf8"), is("")); + assertThat(udf.encode("x''", "hex", "utf8"), is("")); } @Test @@ -61,6 +77,12 @@ public void shouldEncodeHexToBase64() { assertThat(udf.encode("ce95cebbcebbceacceb4ceb1", "hex", "base64"), is("zpXOu867zqzOtM6x")); assertThat(udf.encode("c39c6265726d656e736368", "hex", "base64"), is("w5xiZXJtZW5zY2g=")); + assertThat(udf.encode("0x4578616d706c6521", "hex", "base64"), is("RXhhbXBsZSE=")); + assertThat(udf.encode("X'7e8a016abfff'", "hex", "base64"), is("fooBar//")); + assertThat(udf.encode("x'328ba7b5a8a75627b0'", "hex", "base64"), is("MountainView")); + assertThat(udf.encode("0x", "hex", "base64"), is("")); + assertThat(udf.encode("X''", "hex", "base64"), is("")); + assertThat(udf.encode("x''", "hex", "base64"), is("")); } @Test diff --git a/ksqldb-functional-tests/src/test/resources/query-validation-tests/encode.json b/ksqldb-functional-tests/src/test/resources/query-validation-tests/encode.json index 6ec9616d81b9..28647129d98f 100644 --- a/ksqldb-functional-tests/src/test/resources/query-validation-tests/encode.json +++ b/ksqldb-functional-tests/src/test/resources/query-validation-tests/encode.json @@ -13,13 +13,25 @@ {"topic": "test_topic", "value": {"input_string": "4578616d706C6521"}}, {"topic": "test_topic", "value": {"input_string": "ce95cebbcebbceacceb4ceb1"}}, {"topic": "test_topic", "value": {"input_string": "c39c6265726d656e736368"}}, - {"topic": "test_topic", "value": {"input_string": null}} + {"topic": "test_topic", "value": {"input_string": null}}, + {"topic": "test_topic", "value": {"input_string": "0x4578616d706C6521"}}, + {"topic": "test_topic", "value": {"input_string": "X'4578616d706C6521'"}}, + {"topic": "test_topic", "value": {"input_string": "x'4578616d706C6521'"}}, + {"topic": "test_topic", "value": {"input_string": "0x"}}, + {"topic": "test_topic", "value": {"input_string": "X''"}}, + {"topic": "test_topic", "value": {"input_string": "x''"}} ], "outputs": [ {"topic": "OUTPUT", "value": {"ASCII":"Example!", "UTF8": "Example!", "BASE64": "RXhhbXBsZSE="}}, {"topic": "OUTPUT", "value": {"ASCII":"������������", "UTF8": "Ελλάδα", "BASE64": "zpXOu867zqzOtM6x"}}, {"topic": "OUTPUT", "value": {"ASCII":"��bermensch", "UTF8": "Übermensch", "BASE64": "w5xiZXJtZW5zY2g="}}, - {"topic": "OUTPUT", "value": {"HEX":null, "UTF8": null, "BASE64": null}} + {"topic": "OUTPUT", "value": {"HEX":null, "UTF8": null, "BASE64": null}}, + {"topic": "OUTPUT", "value": {"ASCII":"Example!", "UTF8": "Example!", "BASE64": "RXhhbXBsZSE="}}, + {"topic": "OUTPUT", "value": {"ASCII":"Example!", "UTF8": "Example!", "BASE64": "RXhhbXBsZSE="}}, + {"topic": "OUTPUT", "value": {"ASCII":"Example!", "UTF8": "Example!", "BASE64": "RXhhbXBsZSE="}}, + {"topic": "OUTPUT", "value": {"ASCII":"", "UTF8": "", "BASE64": ""}}, + {"topic": "OUTPUT", "value": {"ASCII":"", "UTF8": "", "BASE64": ""}}, + {"topic": "OUTPUT", "value": {"ASCII":"", "UTF8": "", "BASE64": ""}} ] }, { diff --git a/ksqldb-functional-tests/src/test/resources/test-runner/correct/from_docs/input.json b/ksqldb-functional-tests/src/test/resources/test-runner/correct/from_docs/input.json index ab0bfe7a8e28..2afeabd57883 100644 --- a/ksqldb-functional-tests/src/test/resources/test-runner/correct/from_docs/input.json +++ b/ksqldb-functional-tests/src/test/resources/test-runner/correct/from_docs/input.json @@ -1,3 +1,4 @@ + { "inputs": [ {"topic": "test_topic", "timestamp": 0, "key": 0, "value": {"ORDERUNITS": 2.0}}, From a404b0b11b1de09717f0a7c7e6eb87c221c61791 Mon Sep 17 00:00:00 2001 From: Chittaranjan Prasad Date: Fri, 28 Aug 2020 10:37:27 -0700 Subject: [PATCH 2/4] added one line of comment --- .../main/java/io/confluent/ksql/function/udf/string/Encode.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ksqldb-engine/src/main/java/io/confluent/ksql/function/udf/string/Encode.java b/ksqldb-engine/src/main/java/io/confluent/ksql/function/udf/string/Encode.java index 4acde2dd7a88..930173ed58f9 100644 --- a/ksqldb-engine/src/main/java/io/confluent/ksql/function/udf/string/Encode.java +++ b/ksqldb-engine/src/main/java/io/confluent/ksql/function/udf/string/Encode.java @@ -226,6 +226,8 @@ public static String hexStrip(final String hexString) { if (hexString.matches("0x.*")) { //matches with things like "0x" and "0x...." + + //add an extra "0" to the front if there are odd number of digits return hexLen % 2 != 0 ? "0" + hexString.substring(2) : hexString.substring(2); } else if (hexString.matches("(x|X)\'.*\'")) { //matches with things like "x''", "X''", "x'....'" and "X'....'" From d61f269371b4edb316cb55482d925f37eb3f3211 Mon Sep 17 00:00:00 2001 From: Chittaranjan Prasad Date: Fri, 28 Aug 2020 11:39:52 -0700 Subject: [PATCH 3/4] added some more tests for odd length hex and corrupted hex --- .../ksql/function/udf/string/EncodeTest.java | 6 ++++++ .../query-validation-tests/encode.json | 18 ++++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/ksqldb-engine/src/test/java/io/confluent/ksql/function/udf/string/EncodeTest.java b/ksqldb-engine/src/test/java/io/confluent/ksql/function/udf/string/EncodeTest.java index efdf52763448..fba113e38677 100644 --- a/ksqldb-engine/src/test/java/io/confluent/ksql/function/udf/string/EncodeTest.java +++ b/ksqldb-engine/src/test/java/io/confluent/ksql/function/udf/string/EncodeTest.java @@ -20,6 +20,7 @@ import static org.hamcrest.Matchers.nullValue; import io.confluent.ksql.function.KsqlFunctionException; +import org.junit.Assert; import org.junit.Test; public class EncodeTest { @@ -50,6 +51,11 @@ public void shouldEncodeHexToAscii() { assertThat(udf.encode("X''", "hex", "ascii"), is("")); assertThat(udf.encode("x''", "hex", "ascii"), is("")); + assertThat(udf.encode("0x578616d706C6521", "hex", "ascii"), is("\u0005xample!")); + Assert.assertThrows(KsqlFunctionException.class, () -> udf.encode("578616d706C6521", "hex", "ascii")); + Assert.assertThrows(KsqlFunctionException.class, () -> udf.encode("X'578616d706C6521'", "hex", "ascii")); + Assert.assertThrows(KsqlFunctionException.class, () -> udf.encode("x'578616d706C6521'", "hex", "ascii")); + } @Test diff --git a/ksqldb-functional-tests/src/test/resources/query-validation-tests/encode.json b/ksqldb-functional-tests/src/test/resources/query-validation-tests/encode.json index 28647129d98f..3004f0be5534 100644 --- a/ksqldb-functional-tests/src/test/resources/query-validation-tests/encode.json +++ b/ksqldb-functional-tests/src/test/resources/query-validation-tests/encode.json @@ -19,7 +19,15 @@ {"topic": "test_topic", "value": {"input_string": "x'4578616d706C6521'"}}, {"topic": "test_topic", "value": {"input_string": "0x"}}, {"topic": "test_topic", "value": {"input_string": "X''"}}, - {"topic": "test_topic", "value": {"input_string": "x''"}} + {"topic": "test_topic", "value": {"input_string": "x''"}}, + {"topic": "test_topic", "value": {"input_string": "0x0x"}}, + {"topic": "test_topic", "value": {"input_string": "X'"}}, + {"topic": "test_topic", "value": {"input_string": "x'4578616d706C6521"}}, + {"topic": "test_topic", "value": {"input_string": "x'578616d706C6521'"}}, + {"topic": "test_topic", "value": {"input_string": "0x578616d706C6521"}}, + {"topic": "test_topic", "value": {"input_string": "578616d706C6521"}} + + ], "outputs": [ {"topic": "OUTPUT", "value": {"ASCII":"Example!", "UTF8": "Example!", "BASE64": "RXhhbXBsZSE="}}, @@ -31,7 +39,13 @@ {"topic": "OUTPUT", "value": {"ASCII":"Example!", "UTF8": "Example!", "BASE64": "RXhhbXBsZSE="}}, {"topic": "OUTPUT", "value": {"ASCII":"", "UTF8": "", "BASE64": ""}}, {"topic": "OUTPUT", "value": {"ASCII":"", "UTF8": "", "BASE64": ""}}, - {"topic": "OUTPUT", "value": {"ASCII":"", "UTF8": "", "BASE64": ""}} + {"topic": "OUTPUT", "value": {"ASCII":"", "UTF8": "", "BASE64": ""}}, + {"topic": "OUTPUT", "value": {"ASCII":null, "UTF8": null, "BASE64": null}}, + {"topic": "OUTPUT", "value": {"ASCII":null, "UTF8": null, "BASE64": null}}, + {"topic": "OUTPUT", "value": {"ASCII":null, "UTF8": null, "BASE64": null}}, + {"topic": "OUTPUT", "value": {"ASCII":null, "UTF8": null, "BASE64": null}}, + {"topic": "OUTPUT", "value": {"ASCII":"\u0005xample!", "UTF8": "\u0005xample!", "BASE64": "BXhhbXBsZSE="}}, + {"topic": "OUTPUT", "value": {"ASCII":null, "UTF8": null, "BASE64": null}} ] }, { From d56a975bd6d18318f4001fd2c0e5353433c21f9e Mon Sep 17 00:00:00 2001 From: Chittaranjan Prasad Date: Mon, 31 Aug 2020 11:15:54 -0700 Subject: [PATCH 4/4] remove input.json from pr --- .../src/test/resources/test-runner/correct/from_docs/input.json | 1 - 1 file changed, 1 deletion(-) diff --git a/ksqldb-functional-tests/src/test/resources/test-runner/correct/from_docs/input.json b/ksqldb-functional-tests/src/test/resources/test-runner/correct/from_docs/input.json index 2afeabd57883..ab0bfe7a8e28 100644 --- a/ksqldb-functional-tests/src/test/resources/test-runner/correct/from_docs/input.json +++ b/ksqldb-functional-tests/src/test/resources/test-runner/correct/from_docs/input.json @@ -1,4 +1,3 @@ - { "inputs": [ {"topic": "test_topic", "timestamp": 0, "key": 0, "value": {"ORDERUNITS": 2.0}},