diff --git a/ksqldb-engine/src/main/java/io/confluent/ksql/function/udf/string/Encode.java b/ksqldb-engine/src/main/java/io/confluent/ksql/function/udf/string/Encode.java index 8e5d827fda83..930173ed58f9 100644 --- a/ksqldb-engine/src/main/java/io/confluent/ksql/function/udf/string/Encode.java +++ b/ksqldb-engine/src/main/java/io/confluent/ksql/function/udf/string/Encode.java @@ -75,7 +75,6 @@ public String encode( return ENCODER_MAP.get(encodedString).apply(str); } - interface Encoder { String apply(String input) throws KsqlFunctionException; } @@ -85,7 +84,11 @@ static class HexToAscii implements Encoder { @Override public String apply(final String input) { try { - final byte[] decoded = Hex.decodeHex(input); + //strip away "Ox" from front or "X\'" + "\'" from front or back of hex if present + final String processedInput; + processedInput = hexStrip(input); + + final byte[] decoded = Hex.decodeHex(processedInput); return new String(decoded, StandardCharsets.US_ASCII); } catch (DecoderException e) { throw new KsqlFunctionException(e.getMessage()); @@ -98,8 +101,11 @@ static class HexToBase64 implements Encoder { @Override public String apply(final String input) throws KsqlFunctionException { final byte[] decodedHex; + //strip away "Ox" from front or "X\'" + "\'" from front and back of hex if present + final String processedInput; + processedInput = hexStrip(input); try { - decodedHex = Hex.decodeHex(input); + decodedHex = Hex.decodeHex(processedInput); } catch (DecoderException e) { throw new KsqlFunctionException(e.getMessage()); } @@ -114,8 +120,11 @@ static class HexToUtf8 implements Encoder { @Override public String apply(final String input) throws KsqlFunctionException { final byte[] decodedHex; + //strip away "Ox" from front or "X\'" + "\'" from front and back of hex if present + final String processedInput; + processedInput = hexStrip(input); try { - decodedHex = Hex.decodeHex(input); + decodedHex = Hex.decodeHex(processedInput); } catch (DecoderException e) { throw new KsqlFunctionException(e.getMessage()); } @@ -203,4 +212,28 @@ public String apply(final String input) throws KsqlFunctionException { return new String(decodedB64, StandardCharsets.US_ASCII); } } + + /** + Strips away the "0x" from hex of type "0xAB79" and + strips away the "X\'" from front and "\'" from end of hex of type "X'AB79'". + Leaves every other type of hex (like AB79) untouched + + @param hexString unstripped hex String + @return the string after removing + */ + public static String hexStrip(final String hexString) { + final int hexLen = hexString.length(); + + if (hexString.matches("0x.*")) { + //matches with things like "0x" and "0x...." + + //add an extra "0" to the front if there are odd number of digits + return hexLen % 2 != 0 ? "0" + hexString.substring(2) : hexString.substring(2); + } else if (hexString.matches("(x|X)\'.*\'")) { + //matches with things like "x''", "X''", "x'....'" and "X'....'" + return hexString.substring(2, hexLen - 1); + } else { + return hexString; + } + } } diff --git a/ksqldb-engine/src/test/java/io/confluent/ksql/function/udf/string/EncodeTest.java b/ksqldb-engine/src/test/java/io/confluent/ksql/function/udf/string/EncodeTest.java index c228252dc46d..fba113e38677 100644 --- a/ksqldb-engine/src/test/java/io/confluent/ksql/function/udf/string/EncodeTest.java +++ b/ksqldb-engine/src/test/java/io/confluent/ksql/function/udf/string/EncodeTest.java @@ -20,6 +20,7 @@ import static org.hamcrest.Matchers.nullValue; import io.confluent.ksql.function.KsqlFunctionException; +import org.junit.Assert; import org.junit.Test; public class EncodeTest { @@ -41,6 +42,20 @@ public void shouldEncodeHexToAscii() { assertThat(udf.encode("31202b2031203d2031", "hex", "ascii"), is("1 + 1 = 1")); assertThat(udf.encode("ce95cebbcebbceacceb4ceb1", "hex", "ascii"), is("������������")); assertThat(udf.encode("c39c6265726d656e736368", "hex", "ascii"), is("��bermensch")); + + assertThat(udf.encode("0x48656c6c6f20576f726c6421", "hex", "ascii"), is("Hello World!")); + assertThat(udf.encode("0x9", "hex", "ascii"), is("\t")); + assertThat(udf.encode("0x", "hex", "ascii"), is("")); + assertThat(udf.encode("X'436c6f7564792a7e2a3f'", "hex", "ascii"), is("Cloudy*~*?")); + assertThat(udf.encode("x'4578616d706C6521'", "hex", "ascii"), is("Example!")); + + assertThat(udf.encode("X''", "hex", "ascii"), is("")); + assertThat(udf.encode("x''", "hex", "ascii"), is("")); + assertThat(udf.encode("0x578616d706C6521", "hex", "ascii"), is("\u0005xample!")); + Assert.assertThrows(KsqlFunctionException.class, () -> udf.encode("578616d706C6521", "hex", "ascii")); + Assert.assertThrows(KsqlFunctionException.class, () -> udf.encode("X'578616d706C6521'", "hex", "ascii")); + Assert.assertThrows(KsqlFunctionException.class, () -> udf.encode("x'578616d706C6521'", "hex", "ascii")); + } @Test @@ -51,6 +66,13 @@ public void shouldEncodeHexToUtf8() { assertThat(udf.encode("ce95cebbcebbceacceb4ceb1", "hex", "utf8"), is("Ελλάδα")); assertThat(udf.encode("c39c6265726d656e736368", "hex", "utf8"), is("Übermensch")); + assertThat(udf.encode("0x4578616d706c6521", "hex", "utf8"), is("Example!")); + assertThat(udf.encode("0x", "hex", "utf8"), is("")); + assertThat(udf.encode("X'506C6174666F726D2D7C5F5F5F5F5F7C2D'", "hex", "utf8"), is("Platform-|_____|-")); + assertThat(udf.encode("x'31202b2031203d2031'", "hex", "utf8"), is("1 + 1 = 1")); + + assertThat(udf.encode("X''", "hex", "utf8"), is("")); + assertThat(udf.encode("x''", "hex", "utf8"), is("")); } @Test @@ -61,6 +83,12 @@ public void shouldEncodeHexToBase64() { assertThat(udf.encode("ce95cebbcebbceacceb4ceb1", "hex", "base64"), is("zpXOu867zqzOtM6x")); assertThat(udf.encode("c39c6265726d656e736368", "hex", "base64"), is("w5xiZXJtZW5zY2g=")); + assertThat(udf.encode("0x4578616d706c6521", "hex", "base64"), is("RXhhbXBsZSE=")); + assertThat(udf.encode("X'7e8a016abfff'", "hex", "base64"), is("fooBar//")); + assertThat(udf.encode("x'328ba7b5a8a75627b0'", "hex", "base64"), is("MountainView")); + assertThat(udf.encode("0x", "hex", "base64"), is("")); + assertThat(udf.encode("X''", "hex", "base64"), is("")); + assertThat(udf.encode("x''", "hex", "base64"), is("")); } @Test diff --git a/ksqldb-functional-tests/src/test/resources/query-validation-tests/encode.json b/ksqldb-functional-tests/src/test/resources/query-validation-tests/encode.json index 6ec9616d81b9..3004f0be5534 100644 --- a/ksqldb-functional-tests/src/test/resources/query-validation-tests/encode.json +++ b/ksqldb-functional-tests/src/test/resources/query-validation-tests/encode.json @@ -13,13 +13,39 @@ {"topic": "test_topic", "value": {"input_string": "4578616d706C6521"}}, {"topic": "test_topic", "value": {"input_string": "ce95cebbcebbceacceb4ceb1"}}, {"topic": "test_topic", "value": {"input_string": "c39c6265726d656e736368"}}, - {"topic": "test_topic", "value": {"input_string": null}} + {"topic": "test_topic", "value": {"input_string": null}}, + {"topic": "test_topic", "value": {"input_string": "0x4578616d706C6521"}}, + {"topic": "test_topic", "value": {"input_string": "X'4578616d706C6521'"}}, + {"topic": "test_topic", "value": {"input_string": "x'4578616d706C6521'"}}, + {"topic": "test_topic", "value": {"input_string": "0x"}}, + {"topic": "test_topic", "value": {"input_string": "X''"}}, + {"topic": "test_topic", "value": {"input_string": "x''"}}, + {"topic": "test_topic", "value": {"input_string": "0x0x"}}, + {"topic": "test_topic", "value": {"input_string": "X'"}}, + {"topic": "test_topic", "value": {"input_string": "x'4578616d706C6521"}}, + {"topic": "test_topic", "value": {"input_string": "x'578616d706C6521'"}}, + {"topic": "test_topic", "value": {"input_string": "0x578616d706C6521"}}, + {"topic": "test_topic", "value": {"input_string": "578616d706C6521"}} + + ], "outputs": [ {"topic": "OUTPUT", "value": {"ASCII":"Example!", "UTF8": "Example!", "BASE64": "RXhhbXBsZSE="}}, {"topic": "OUTPUT", "value": {"ASCII":"������������", "UTF8": "Ελλάδα", "BASE64": "zpXOu867zqzOtM6x"}}, {"topic": "OUTPUT", "value": {"ASCII":"��bermensch", "UTF8": "Übermensch", "BASE64": "w5xiZXJtZW5zY2g="}}, - {"topic": "OUTPUT", "value": {"HEX":null, "UTF8": null, "BASE64": null}} + {"topic": "OUTPUT", "value": {"HEX":null, "UTF8": null, "BASE64": null}}, + {"topic": "OUTPUT", "value": {"ASCII":"Example!", "UTF8": "Example!", "BASE64": "RXhhbXBsZSE="}}, + {"topic": "OUTPUT", "value": {"ASCII":"Example!", "UTF8": "Example!", "BASE64": "RXhhbXBsZSE="}}, + {"topic": "OUTPUT", "value": {"ASCII":"Example!", "UTF8": "Example!", "BASE64": "RXhhbXBsZSE="}}, + {"topic": "OUTPUT", "value": {"ASCII":"", "UTF8": "", "BASE64": ""}}, + {"topic": "OUTPUT", "value": {"ASCII":"", "UTF8": "", "BASE64": ""}}, + {"topic": "OUTPUT", "value": {"ASCII":"", "UTF8": "", "BASE64": ""}}, + {"topic": "OUTPUT", "value": {"ASCII":null, "UTF8": null, "BASE64": null}}, + {"topic": "OUTPUT", "value": {"ASCII":null, "UTF8": null, "BASE64": null}}, + {"topic": "OUTPUT", "value": {"ASCII":null, "UTF8": null, "BASE64": null}}, + {"topic": "OUTPUT", "value": {"ASCII":null, "UTF8": null, "BASE64": null}}, + {"topic": "OUTPUT", "value": {"ASCII":"\u0005xample!", "UTF8": "\u0005xample!", "BASE64": "BXhhbXBsZSE="}}, + {"topic": "OUTPUT", "value": {"ASCII":null, "UTF8": null, "BASE64": null}} ] }, {