From 88d0ca62bbaa34b3c5bdabb948feaf47ef536c98 Mon Sep 17 00:00:00 2001 From: beliefer Date: Tue, 26 Jan 2021 13:52:51 -0800 Subject: [PATCH] [SPARK-34244][SQL] Remove the Scala function version of regexp_extract_all ### What changes were proposed in this pull request? https://github.com/apache/spark/pull/27507 implements `regexp_extract_all` and added the scala function version of it. According https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/functions.scala#L41-L59, it seems good for remove the scala function version. Although I think is regexp_extract_all is very useful, if we just reference the description. ### Why are the changes needed? `regexp_extract_all` is less common. ### Does this PR introduce _any_ user-facing change? 'No'. `regexp_extract_all` was added in Spark 3.1.0 which isn't released yet. ### How was this patch tested? Jenkins test. Closes #31346 from beliefer/SPARK-24884-followup. Authored-by: beliefer Signed-off-by: Dongjoon Hyun --- .../org/apache/spark/sql/functions.scala | 13 ---------- .../spark/sql/StringFunctionsSuite.scala | 26 ------------------- 2 files changed, 39 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index c67c526d1c192..eaedc813a48fc 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -2637,19 +2637,6 @@ object functions { RegExpExtract(e.expr, lit(exp).expr, lit(groupIdx).expr) } - /** - * Extract all specific groups matched by a Java regex, from the specified string column. - * If the regex did not match, or the specified group did not match, return an empty array. - * if the specified group index exceeds the group count of regex, an IllegalArgumentException - * will be thrown. - * - * @group string_funcs - * @since 3.1.0 - */ - def regexp_extract_all(e: Column, exp: String, groupIdx: Int): Column = withExpr { - RegExpExtractAll(e.expr, lit(exp).expr, lit(groupIdx).expr) - } - /** * Replace all substrings of the specified string value that match regexp with rep. * diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala index 8d5166b5398cc..10f2b3004d708 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala @@ -154,25 +154,9 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession { Row("300", "100") :: Row("400", "100") :: Row("400-400", "100") :: Nil) } - test("string regex_extract_all") { - val df = Seq( - ("100-200,300-400", "(\\d+)-(\\d+)"), - ("101-201,301-401", "(\\d+)-(\\d+)"), - ("102-202,302-402", "(\\d+)")).toDF("a", "b") - - checkAnswer( - df.select( - regexp_extract_all($"a", "(\\d+)-(\\d+)", 1), - regexp_extract_all($"a", "(\\d+)-(\\d+)", 2)), - Row(Seq("100", "300"), Seq("200", "400")) :: - Row(Seq("101", "301"), Seq("201", "401")) :: - Row(Seq("102", "302"), Seq("202", "402")) :: Nil) - } - test("non-matching optional group") { val df = Seq(Tuple1("aaaac")).toDF("s") - // regexp_extract checkAnswer( df.select(regexp_extract($"s", "(foo)", 1)), Row("") @@ -181,16 +165,6 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession { df.select(regexp_extract($"s", "(a+)(b)?(c)", 2)), Row("") ) - - // regexp_extract_all - checkAnswer( - df.select(regexp_extract_all($"s", "(foo)", 1)), - Row(Seq()) - ) - checkAnswer( - df.select(regexp_extract_all($"s", "(a+)(b)?(c)", 2)), - Row(Seq("")) - ) } test("string ascii function") {