From 91ea1f58b5daa27232c303f1bbeca09245a1aa9f Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Sun, 21 Jan 2018 22:16:46 -0800 Subject: [PATCH 1/2] update the doc. --- python/pyspark/sql/udf.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/python/pyspark/sql/udf.py b/python/pyspark/sql/udf.py index c77f19f89a442..1487f9d2463fe 100644 --- a/python/pyspark/sql/udf.py +++ b/python/pyspark/sql/udf.py @@ -200,7 +200,7 @@ def __init__(self, sparkSession): @since("1.3.1") def register(self, name, f, returnType=None): """Registers a Python function (including lambda function) or a user-defined function - in SQL statements. + as a SQL function. :param name: name of the user-defined function in SQL statements. :param f: a Python function, or a user-defined function. The user-defined function can @@ -213,6 +213,10 @@ def register(self, name, f, returnType=None): `returnType` can be optionally specified when `f` is a Python function but not when `f` is a user-defined function. Please see below. + To register a non-deterministic Python function, users need to first build + a nondeterministic user-defined function for the Python function and then register it + as a SQL function. + 1. When `f` is a Python function: `returnType` defaults to string type and can be optionally specified. The produced @@ -297,7 +301,7 @@ def register(self, name, f, returnType=None): @ignore_unicode_prefix @since(2.3) def registerJavaFunction(self, name, javaClassName, returnType=None): - """Register a Java user-defined function so it can be used in SQL statements. + """Register a Java user-defined function as a SQL function. In addition to a name and the function itself, the return type can be optionally specified. When the return type is not specified we would infer it via reflection. @@ -334,7 +338,7 @@ def registerJavaFunction(self, name, javaClassName, returnType=None): @ignore_unicode_prefix @since(2.3) def registerJavaUDAF(self, name, javaClassName): - """Register a Java user-defined aggregate function so it can be used in SQL statements. + """Register a Java user-defined aggregate function as a SQL function. :param name: name of the user-defined aggregate function :param javaClassName: fully qualified name of java class From 73a493af4bf4b0a37dc58863cc75682e8fec26cb Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Sun, 21 Jan 2018 23:21:48 -0800 Subject: [PATCH 2/2] update the doc. --- python/pyspark/sql/udf.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/python/pyspark/sql/udf.py b/python/pyspark/sql/udf.py index 1487f9d2463fe..134badb8485f5 100644 --- a/python/pyspark/sql/udf.py +++ b/python/pyspark/sql/udf.py @@ -199,7 +199,7 @@ def __init__(self, sparkSession): @ignore_unicode_prefix @since("1.3.1") def register(self, name, f, returnType=None): - """Registers a Python function (including lambda function) or a user-defined function + """Register a Python function (including lambda function) or a user-defined function as a SQL function. :param name: name of the user-defined function in SQL statements. @@ -210,13 +210,13 @@ def register(self, name, f, returnType=None): be either a :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string. :return: a user-defined function. - `returnType` can be optionally specified when `f` is a Python function but not - when `f` is a user-defined function. Please see below. - - To register a non-deterministic Python function, users need to first build + To register a nondeterministic Python function, users need to first build a nondeterministic user-defined function for the Python function and then register it as a SQL function. + `returnType` can be optionally specified when `f` is a Python function but not + when `f` is a user-defined function. Please see below. + 1. When `f` is a Python function: `returnType` defaults to string type and can be optionally specified. The produced