Skip to content

Commit

Permalink
[SPARK-34244][SQL] Remove the Scala function version of regexp_extrac…
Browse files Browse the repository at this point in the history
…t_all

### What changes were proposed in this pull request?
apache#27507 implements `regexp_extract_all` and added the scala function version of it.
According https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/functions.scala#L41-L59, it seems good for remove the scala function version. Although I think is regexp_extract_all is very useful, if we just reference the description.

### Why are the changes needed?
`regexp_extract_all` is less common.

### Does this PR introduce _any_ user-facing change?
'No'. `regexp_extract_all` was added in Spark 3.1.0 which isn't released yet.

### How was this patch tested?
Jenkins test.

Closes apache#31346 from beliefer/SPARK-24884-followup.

Authored-by: beliefer <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
  • Loading branch information
beliefer authored and skestle committed Feb 3, 2021
1 parent 07e1f8c commit 88d0ca6
Show file tree
Hide file tree
Showing 2 changed files with 0 additions and 39 deletions.
13 changes: 0 additions & 13 deletions sql/core/src/main/scala/org/apache/spark/sql/functions.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2637,19 +2637,6 @@ object functions {
RegExpExtract(e.expr, lit(exp).expr, lit(groupIdx).expr)
}

/**
* Extract all specific groups matched by a Java regex, from the specified string column.
* If the regex did not match, or the specified group did not match, return an empty array.
* if the specified group index exceeds the group count of regex, an IllegalArgumentException
* will be thrown.
*
* @group string_funcs
* @since 3.1.0
*/
def regexp_extract_all(e: Column, exp: String, groupIdx: Int): Column = withExpr {
RegExpExtractAll(e.expr, lit(exp).expr, lit(groupIdx).expr)
}

/**
* Replace all substrings of the specified string value that match regexp with rep.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,25 +154,9 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
Row("300", "100") :: Row("400", "100") :: Row("400-400", "100") :: Nil)
}

test("string regex_extract_all") {
val df = Seq(
("100-200,300-400", "(\\d+)-(\\d+)"),
("101-201,301-401", "(\\d+)-(\\d+)"),
("102-202,302-402", "(\\d+)")).toDF("a", "b")

checkAnswer(
df.select(
regexp_extract_all($"a", "(\\d+)-(\\d+)", 1),
regexp_extract_all($"a", "(\\d+)-(\\d+)", 2)),
Row(Seq("100", "300"), Seq("200", "400")) ::
Row(Seq("101", "301"), Seq("201", "401")) ::
Row(Seq("102", "302"), Seq("202", "402")) :: Nil)
}

test("non-matching optional group") {
val df = Seq(Tuple1("aaaac")).toDF("s")

// regexp_extract
checkAnswer(
df.select(regexp_extract($"s", "(foo)", 1)),
Row("")
Expand All @@ -181,16 +165,6 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
df.select(regexp_extract($"s", "(a+)(b)?(c)", 2)),
Row("")
)

// regexp_extract_all
checkAnswer(
df.select(regexp_extract_all($"s", "(foo)", 1)),
Row(Seq())
)
checkAnswer(
df.select(regexp_extract_all($"s", "(a+)(b)?(c)", 2)),
Row(Seq(""))
)
}

test("string ascii function") {
Expand Down

0 comments on commit 88d0ca6

Please sign in to comment.