Skip to content

Commit

Permalink
[SPARK-23327][SQL] Update the description and tests of three external…
Browse files Browse the repository at this point in the history
… API or functions

## What changes were proposed in this pull request?
Update the description and tests of three external API or functions `createFunction `, `length` and `repartitionByRange `

## How was this patch tested?
N/A

Author: gatorsmile <[email protected]>

Closes #20495 from gatorsmile/updateFunc.

(cherry picked from commit c36fecc)
Signed-off-by: gatorsmile <[email protected]>
  • Loading branch information
gatorsmile committed Feb 7, 2018
1 parent f9c9132 commit 874d3f8
Show file tree
Hide file tree
Showing 8 changed files with 44 additions and 28 deletions.
4 changes: 3 additions & 1 deletion R/pkg/R/functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -1026,7 +1026,9 @@ setMethod("last_day",
})

#' @details
#' \code{length}: Computes the length of a given string or binary column.
#' \code{length}: Computes the character length of a string data or number of bytes
#' of a binary data. The length of string data includes the trailing spaces.
#' The length of binary data includes binary zeros.
#'
#' @rdname column_string_functions
#' @aliases length length,Column-method
Expand Down
8 changes: 5 additions & 3 deletions python/pyspark/sql/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1705,10 +1705,12 @@ def unhex(col):
@ignore_unicode_prefix
@since(1.5)
def length(col):
"""Calculates the length of a string or binary expression.
"""Computes the character length of string data or number of bytes of binary data.
The length of character data includes the trailing spaces. The length of binary data
includes binary zeros.
>>> spark.createDataFrame([('ABC',)], ['a']).select(length('a').alias('length')).collect()
[Row(length=3)]
>>> spark.createDataFrame([('ABC ',)], ['a']).select(length('a').alias('length')).collect()
[Row(length=4)]
"""
sc = SparkContext._active_spark_context
return Column(sc._jvm.functions.length(_to_java_column(col)))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -988,8 +988,11 @@ class SessionCatalog(
// -------------------------------------------------------

/**
* Create a metastore function in the database specified in `funcDefinition`.
* Create a function in the database specified in `funcDefinition`.
* If no such database is specified, create it in the current database.
*
* @param ignoreIfExists: When true, ignore if the function with the specified name exists
* in the specified database.
*/
def createFunction(funcDefinition: CatalogFunction, ignoreIfExists: Boolean): Unit = {
val db = formatDatabaseName(funcDefinition.identifier.database.getOrElse(getCurrentDatabase))
Expand Down Expand Up @@ -1061,7 +1064,7 @@ class SessionCatalog(
}

/**
* Check if the specified function exists.
* Check if the function with the specified name exists
*/
def functionExists(name: FunctionIdentifier): Boolean = {
val db = formatDatabaseName(name.database.getOrElse(getCurrentDatabase))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1653,19 +1653,19 @@ case class Left(str: Expression, len: Expression, child: Expression) extends Run
* A function that returns the char length of the given string expression or
* number of bytes of the given binary expression.
*/
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "_FUNC_(expr) - Returns the character length of `expr` or number of bytes in binary data.",
usage = "_FUNC_(expr) - Returns the character length of string data or number of bytes of " +
"binary data. The length of string data includes the trailing spaces. The length of binary " +
"data includes binary zeros.",
examples = """
Examples:
> SELECT _FUNC_('Spark SQL');
9
> SELECT CHAR_LENGTH('Spark SQL');
9
> SELECT CHARACTER_LENGTH('Spark SQL');
9
> SELECT _FUNC_('Spark SQL ');
10
> SELECT CHAR_LENGTH('Spark SQL ');
10
> SELECT CHARACTER_LENGTH('Spark SQL ');
10
""")
// scalastyle:on line.size.limit
case class Length(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def dataType: DataType = IntegerType
override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(StringType, BinaryType))
Expand All @@ -1687,7 +1687,7 @@ case class Length(child: Expression) extends UnaryExpression with ImplicitCastIn
* A function that returns the bit length of the given string or binary expression.
*/
@ExpressionDescription(
usage = "_FUNC_(expr) - Returns the bit length of `expr` or number of bits in binary data.",
usage = "_FUNC_(expr) - Returns the bit length of string data or number of bits of binary data.",
examples = """
Examples:
> SELECT _FUNC_('Spark SQL');
Expand Down Expand Up @@ -1716,7 +1716,8 @@ case class BitLength(child: Expression) extends UnaryExpression with ImplicitCas
* A function that returns the byte length of the given string or binary expression.
*/
@ExpressionDescription(
usage = "_FUNC_(expr) - Returns the byte length of `expr` or number of bytes in binary data.",
usage = "_FUNC_(expr) - Returns the byte length of string data or number of bytes of binary " +
"data.",
examples = """
Examples:
> SELECT _FUNC_('Spark SQL');
Expand Down
2 changes: 2 additions & 0 deletions sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2825,6 +2825,7 @@ class Dataset[T] private[sql](
*
* At least one partition-by expression must be specified.
* When no explicit sort order is specified, "ascending nulls first" is assumed.
* Note, the rows are not sorted in each partition of the resulting Dataset.
*
* @group typedrel
* @since 2.3.0
Expand All @@ -2848,6 +2849,7 @@ class Dataset[T] private[sql](
*
* At least one partition-by expression must be specified.
* When no explicit sort order is specified, "ascending nulls first" is assumed.
* Note, the rows are not sorted in each partition of the resulting Dataset.
*
* @group typedrel
* @since 2.3.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,24 +40,28 @@ import org.apache.spark.sql.types.{StringType, StructField, StructType}
* CREATE [OR REPLACE] FUNCTION [IF NOT EXISTS] [databaseName.]functionName
* AS className [USING JAR\FILE 'uri' [, JAR|FILE 'uri']]
* }}}
*
* @param ignoreIfExists: When true, ignore if the function with the specified name exists
* in the specified database.
* @param replace: When true, alter the function with the specified name
*/
case class CreateFunctionCommand(
databaseName: Option[String],
functionName: String,
className: String,
resources: Seq[FunctionResource],
isTemp: Boolean,
ifNotExists: Boolean,
ignoreIfExists: Boolean,
replace: Boolean)
extends RunnableCommand {

if (ifNotExists && replace) {
if (ignoreIfExists && replace) {
throw new AnalysisException("CREATE FUNCTION with both IF NOT EXISTS and REPLACE" +
" is not allowed.")
}

// Disallow to define a temporary function with `IF NOT EXISTS`
if (ifNotExists && isTemp) {
if (ignoreIfExists && isTemp) {
throw new AnalysisException(
"It is not allowed to define a TEMPORARY function with IF NOT EXISTS.")
}
Expand All @@ -79,12 +83,12 @@ case class CreateFunctionCommand(
// Handles `CREATE OR REPLACE FUNCTION AS ... USING ...`
if (replace && catalog.functionExists(func.identifier)) {
// alter the function in the metastore
catalog.alterFunction(CatalogFunction(func.identifier, className, resources))
catalog.alterFunction(func)
} else {
// For a permanent, we will store the metadata into underlying external catalog.
// This function will be loaded into the FunctionRegistry when a query uses it.
// We do not load it into FunctionRegistry right now.
catalog.createFunction(CatalogFunction(func.identifier, className, resources), ifNotExists)
catalog.createFunction(func, ignoreIfExists)
}
}
Seq.empty[Row]
Expand Down
4 changes: 3 additions & 1 deletion sql/core/src/main/scala/org/apache/spark/sql/functions.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2267,7 +2267,9 @@ object functions {
}

/**
* Computes the length of a given string or binary column.
* Computes the character length of a given string or number of bytes of a binary string.
* The length of character strings include the trailing spaces. The length of binary strings
* includes binary zeros.
*
* @group string_funcs
* @since 1.5.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -236,39 +236,39 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
Seq(
FunctionResource(FunctionResourceType.fromString("jar"), "/path/to/jar1"),
FunctionResource(FunctionResourceType.fromString("jar"), "/path/to/jar2")),
isTemp = true, ifNotExists = false, replace = false)
isTemp = true, ignoreIfExists = false, replace = false)
val expected2 = CreateFunctionCommand(
Some("hello"),
"world",
"com.matthewrathbone.example.SimpleUDFExample",
Seq(
FunctionResource(FunctionResourceType.fromString("archive"), "/path/to/archive"),
FunctionResource(FunctionResourceType.fromString("file"), "/path/to/file")),
isTemp = false, ifNotExists = false, replace = false)
isTemp = false, ignoreIfExists = false, replace = false)
val expected3 = CreateFunctionCommand(
None,
"helloworld3",
"com.matthewrathbone.example.SimpleUDFExample",
Seq(
FunctionResource(FunctionResourceType.fromString("jar"), "/path/to/jar1"),
FunctionResource(FunctionResourceType.fromString("jar"), "/path/to/jar2")),
isTemp = true, ifNotExists = false, replace = true)
isTemp = true, ignoreIfExists = false, replace = true)
val expected4 = CreateFunctionCommand(
Some("hello"),
"world1",
"com.matthewrathbone.example.SimpleUDFExample",
Seq(
FunctionResource(FunctionResourceType.fromString("archive"), "/path/to/archive"),
FunctionResource(FunctionResourceType.fromString("file"), "/path/to/file")),
isTemp = false, ifNotExists = false, replace = true)
isTemp = false, ignoreIfExists = false, replace = true)
val expected5 = CreateFunctionCommand(
Some("hello"),
"world2",
"com.matthewrathbone.example.SimpleUDFExample",
Seq(
FunctionResource(FunctionResourceType.fromString("archive"), "/path/to/archive"),
FunctionResource(FunctionResourceType.fromString("file"), "/path/to/file")),
isTemp = false, ifNotExists = true, replace = false)
isTemp = false, ignoreIfExists = true, replace = false)
comparePlans(parsed1, expected1)
comparePlans(parsed2, expected2)
comparePlans(parsed3, expected3)
Expand Down

0 comments on commit 874d3f8

Please sign in to comment.