Skip to content

Commit

Permalink
feat: add SIGN, REPLACE and INITCAP (#3189)
Browse files Browse the repository at this point in the history
* feat: add sign function

* feat: add INITCAP and REPLACE udfs

* refactor: update docs, return int instead of double

* docs: reword a sentence
  • Loading branch information
Zara Lim authored Aug 14, 2019
1 parent 51312cc commit ab67684
Show file tree
Hide file tree
Showing 10 changed files with 385 additions and 0 deletions.
13 changes: 13 additions & 0 deletions docs/developer-guide/syntax-reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1628,6 +1628,10 @@ Scalar functions
| | | VARCHAR values are supported for the input. The |
| | | return value must be a VARCHAR. |
+------------------------+---------------------------------------------------------------------------+---------------------------------------------------+
| INITCAP | ``INITCAP(col1)`` | Capitalize the first letter in each word and |
| | | convert all other letters to lowercase. Words are |
| | | delimited by whitespace. |
+------------------------+---------------------------------------------------------------------------+---------------------------------------------------+
| LCASE | ``LCASE(col1)`` | Convert a string to lowercase. |
+------------------------+---------------------------------------------------------------------------+---------------------------------------------------+
| LEN | ``LEN(col1)`` | The length of a string. |
Expand Down Expand Up @@ -1675,8 +1679,17 @@ Scalar functions
+------------------------+---------------------------------------------------------------------------+---------------------------------------------------+
| RANDOM | ``RANDOM()`` | Return a random DOUBLE value between 0.0 and 1.0. |
+------------------------+---------------------------------------------------------------------------+---------------------------------------------------+
| REPLACE | ``REPLACE(col1, 'foo', 'bar')`` | Replace all instances of a substring in a string |
| | | with a new string. |
+------------------------+---------------------------------------------------------------------------+---------------------------------------------------+
| ROUND | ``ROUND(col1)`` | Round a value to the nearest BIGINT value. |
+------------------------+---------------------------------------------------------------------------+---------------------------------------------------+
| SIGN | ``SIGN(col1)`` | The sign of a numeric value as an INTEGER: |
| | | * -1 if the argument is negative |
| | | * 0 if the argument is zero |
| | | * 1 if the argument is positive |
| | | * ``null`` argument is null |
+------------------------+---------------------------------------------------------------------------+---------------------------------------------------+
| SQRT | ``SQRT(col1)`` | The square root of a value. |
+------------------------+---------------------------------------------------------------------------+---------------------------------------------------+
| SLICE | ``SLICE(col1, from, to)`` | Slices a list based on the supplied indices. The |
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
* Copyright 2018 Confluent Inc.
*
* Licensed under the Confluent Community License (the "License"); you may not use
* this file except in compliance with the License. You may obtain a copy of the
* License at
*
* http://www.confluent.io/confluent-community-license
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*/

package io.confluent.ksql.function.udf.math;

import io.confluent.ksql.function.udf.Udf;
import io.confluent.ksql.function.udf.UdfDescription;
import io.confluent.ksql.function.udf.UdfParameter;
import io.confluent.ksql.util.KsqlConstants;

@SuppressWarnings("WeakerAccess") // Invoked via reflection
@UdfDescription(
name = "sign",
author = KsqlConstants.CONFLUENT_AUTHOR,
description = "The sign of a value."
)
public class Sign {
@Udf(description = "Returns the sign of an INT value, denoted by 1, 0 or -1.")
public Integer sign(
@UdfParameter(
value = "value",
description = "The value to get the sign of."
) final Integer value
) {
return value == null
? null
: Integer.signum(value);
}

@Udf(description = "Returns the sign of an BIGINT value, denoted by 1, 0 or -1.")
public Integer sign(
@UdfParameter(
value = "value",
description = "The value to get the sign of."
) final Long value
) {
return value == null
? null
: Long.signum(value);
}

@Udf(description = "Returns the sign of an DOUBLE value, denoted by 1, 0 or -1.")
public Integer sign(
@UdfParameter(
value = "value",
description = "The value to get the sign of."
) final Double value
) {
return value == null
? null
: (int) Math.signum(value);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* Copyright 2019 Confluent Inc.
*
* Licensed under the Confluent Community License (the "License"); you may not use
* this file except in compliance with the License. You may obtain a copy of the
* License at
*
* http://www.confluent.io/confluent-community-license
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*/

package io.confluent.ksql.function.udf.string;

import io.confluent.ksql.function.udf.Udf;
import io.confluent.ksql.function.udf.UdfDescription;
import io.confluent.ksql.function.udf.UdfParameter;
import io.confluent.ksql.util.KsqlConstants;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

@SuppressWarnings("unused") // Invoked via reflection.
@UdfDescription(name = "initcap",
author = KsqlConstants.CONFLUENT_AUTHOR,
description = "Capitalizes the first letter of each word in a string and the rest lowercased."
+ " Words are delimited by whitespace.")
public class InitCap {
@Udf(description = "Returns the string with the the first letter"
+ " of each word capitalized and the rest lowercased")
public String initcap(
@UdfParameter(
description = "The source string."
+ " If null, then function returns null.") final String str) {
if (str == null) {
return null;
}

final Pattern pattern = Pattern.compile("[^\\s]+\\s*");
final Matcher matcher = pattern.matcher(str.toLowerCase());
String initCapped = "";
while (matcher.find()) {
final String part = matcher.group();
initCapped = initCapped.concat(part.substring(0, 1).toUpperCase() + part.substring(1));
}

return initCapped;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* Copyright 2019 Confluent Inc.
*
* Licensed under the Confluent Community License (the "License"); you may not use
* this file except in compliance with the License. You may obtain a copy of the
* License at
*
* http://www.confluent.io/confluent-community-license
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*/

package io.confluent.ksql.function.udf.string;

import io.confluent.ksql.function.udf.Udf;
import io.confluent.ksql.function.udf.UdfDescription;
import io.confluent.ksql.function.udf.UdfParameter;
import io.confluent.ksql.util.KsqlConstants;

@SuppressWarnings("unused") // Invoked via reflection.
@UdfDescription(name = "replace",
author = KsqlConstants.CONFLUENT_AUTHOR,
description = "Replaces all occurances of a substring in a string with a new substring.")
public class Replace {
@Udf(description = "Returns a new string with all occurences of oldStr in str with newStr")
public String replace(
@UdfParameter(
description = "The source string. If null, then function returns null.") final String str,
@UdfParameter(
description = "The substring to replace."
+ " If null, then function returns null.") final String oldStr,
@UdfParameter(
description = "The string to replace the old substrings with."
+ " If null, then function returns null.") final String newStr) {
if (str == null || oldStr == null || newStr == null) {
return null;
}

return str.replace(oldStr, newStr);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/*
* Copyright 2019 Confluent Inc.
*
* Licensed under the Confluent Community License (the "License"); you may not use
* this file except in compliance with the License. You may obtain a copy of the
* License at
*
* http://www.confluent.io/confluent-community-license
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*/

package io.confluent.ksql.function.udf.math;

import org.junit.Before;
import org.junit.Test;

import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.nullValue;
import static org.junit.Assert.*;

public class SignTest {
private Sign udf;

@Before
public void setUp() {
udf = new Sign();
}

@Test
public void shouldHandleNull() {
assertThat(udf.sign((Integer)null), is(nullValue()));
assertThat(udf.sign((Long)null), is(nullValue()));
assertThat(udf.sign((Double)null), is(nullValue()));
}

@Test
public void shouldHandleNegative() {
assertThat(udf.sign(-10.5), is(-1));
assertThat(udf.sign(-10), is(-1));
assertThat(udf.sign(-1L), is(-1));
}

@Test
public void shouldHandleZero() {
assertThat(udf.sign(0.0), is(0));
assertThat(udf.sign(0), is(0));
}

@Test
public void shouldHandlePositive() {
assertThat(udf.sign(1), is(1));
assertThat(udf.sign(1L), is(1));
assertThat(udf.sign(1.5), is(1));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package io.confluent.ksql.function.udf.string;

import org.junit.Before;
import org.junit.Test;

import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.isEmptyOrNullString;
import static org.junit.Assert.*;

public class InitCapTest {
private InitCap udf;

@Before
public void setUp() {
udf = new InitCap();
}

@Test
public void shouldHandleNull() {
assertThat(udf.initcap(null), isEmptyOrNullString());
}

@Test
public void shouldInitCap() {
assertThat(udf.initcap("worD"), is("Word"));
assertThat(udf.initcap("a"), is("A"));
assertThat(udf.initcap("the Quick br0wn fOx"), is("The Quick Br0wn Fox"));
assertThat(udf.initcap("spacing should be preserved"), is("Spacing Should Be Preserved"));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
* Copyright 2019 Confluent Inc.
*
* Licensed under the Confluent Community License (the "License"); you may not use
* this file except in compliance with the License. You may obtain a copy of the
* License at
*
* http://www.confluent.io/confluent-community-license
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*/

package io.confluent.ksql.function.udf.string;

import org.junit.Before;
import org.junit.Test;

import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.isEmptyOrNullString;
import static org.junit.Assert.*;

public class ReplaceTest {
private Replace udf;

@Before
public void setUp() {
udf = new Replace();
}

@Test
public void shouldHandleNull() {
assertThat(udf.replace(null, "foo", "bar"), isEmptyOrNullString());
assertThat(udf.replace("foo", null, "bar"), isEmptyOrNullString());
assertThat(udf.replace("foo", "bar", null), isEmptyOrNullString());
}

@Test
public void shouldReplace() {
assertThat(udf.replace("foobar", "foo", "bar"), is("barbar"));
assertThat(udf.replace("foobar", "fooo", "bar"), is("foobar"));
assertThat(udf.replace("foobar", "o", ""), is("fbar"));
assertThat(udf.replace("abc", "", "n"), is("nanbncn"));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"comments": [
"Tests covering the use of the INITCAP function."
],
"tests": [
{
"name": "do initcap",
"format": ["JSON"],
"properties": {
"ksql.functions.substring.legacy.args": false
},
"statements": [
"CREATE STREAM TEST (source VARCHAR) WITH (kafka_topic='test_topic', value_format='JSON');",
"CREATE STREAM OUTPUT AS SELECT INITCAP(source) AS INITCAP FROM TEST;"
],
"inputs": [
{"topic": "test_topic", "value": {"source": "some_string"}},
{"topic": "test_topic", "value": {"source": null}},
{"topic": "test_topic", "value": {"source": "the Quick br0wn fOx"}}
],
"outputs": [
{"topic": "OUTPUT", "value": {"INITCAP":"Some_string"}},
{"topic": "OUTPUT", "value": {"INITCAP":null}},
{"topic": "OUTPUT", "value": {"INITCAP":"The Quick Br0wn Fox"}}
]
}
]
}
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,25 @@
{"topic": "OUTPUT", "value": {"I": 0.0, "L": 0.0, "D": 0.0}},
{"topic": "OUTPUT", "value": {"I": 1.0, "L": 1.4142135623730951, "D": 1.7320508075688772}}
]
},
{
"name": "sign",
"statements": [
"CREATE STREAM INPUT (i INT, l BIGINT, d DOUBLE) WITH (kafka_topic='input', value_format='JSON');",
"CREATE STREAM OUTPUT AS SELECT sign(i) i, sign(l) l, sign(d) d FROM INPUT;"
],
"inputs": [
{"topic": "input", "value": {"i": null, "l": null, "d": null}},
{"topic": "input", "value": {"i": -1, "l": -2, "d": -3.0}},
{"topic": "input", "value": {"i": 0, "l": 0, "d": 0.0}},
{"topic": "input", "value": {"i": 1, "l": 2, "d": 3.0}}
],
"outputs": [
{"topic": "OUTPUT", "value": {"I": null, "L": null, "D": null}},
{"topic": "OUTPUT", "value": {"I": -1, "L": -1, "D": -1}},
{"topic": "OUTPUT", "value": {"I": 0, "L": 0, "D": 0}},
{"topic": "OUTPUT", "value": {"I": 1, "L": 1, "D": 1}}
]
}
]
}
Loading

0 comments on commit ab67684

Please sign in to comment.