diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index be089eea0b280..5425d311f8c7f 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -2733,6 +2733,39 @@ def udf(f=None, returnType=StringType()): | 8| JOHN DOE| 22| +----------+--------------+------------+ """ + + # The following table shows most of Python data and SQL type conversions in normal UDFs that + # are not yet visible to the user. Some of behaviors are buggy and might be changed in the near + # future. The table might have to be eventually documented externally. + # Please see SPARK-25666's PR to see the codes in order to generate the table below. + # + # +-----------------------------+--------------+----------+------+-------+---------------+---------------+--------------------+-----------------------------+----------+----------------------+---------+--------------------+-----------------+------------+--------------+------------------+----------------------+ # noqa + # |SQL Type \ Python Value(Type)|None(NoneType)|True(bool)|1(int)|1(long)| a(str)| a(unicode)| 1970-01-01(date)|1970-01-01 00:00:00(datetime)|1.0(float)|array('i', [1])(array)|[1](list)| (1,)(tuple)| ABC(bytearray)| 1(Decimal)|{'a': 1}(dict)|Row(kwargs=1)(Row)|Row(namedtuple=1)(Row)| # noqa + # +-----------------------------+--------------+----------+------+-------+---------------+---------------+--------------------+-----------------------------+----------+----------------------+---------+--------------------+-----------------+------------+--------------+------------------+----------------------+ # noqa + # | boolean| None| True| None| None| None| None| None| None| None| None| None| None| None| None| None| X| X| # noqa + # | tinyint| None| None| 1| 1| None| None| None| None| None| None| None| None| None| None| None| X| X| # noqa + # | smallint| None| None| 1| 1| None| None| None| None| None| None| None| None| None| None| None| X| X| # noqa + # | int| None| None| 1| 1| None| None| None| None| None| None| None| None| None| None| None| X| X| # noqa + # | bigint| None| None| 1| 1| None| None| None| None| None| None| None| None| None| None| None| X| X| # noqa + # | string| None| u'true'| u'1'| u'1'| u'a'| u'a'|u'java.util.Grego...| u'java.util.Grego...| u'1.0'| u'[I@24a83055'| u'[1]'|u'[Ljava.lang.Obj...| u'[B@49093632'| u'1'| u'{a=1}'| X| X| # noqa + # | date| None| X| X| X| X| X|datetime.date(197...| datetime.date(197...| X| X| X| X| X| X| X| X| X| # noqa + # | timestamp| None| X| X| X| X| X| X| datetime.datetime...| X| X| X| X| X| X| X| X| X| # noqa + # | float| None| None| None| None| None| None| None| None| 1.0| None| None| None| None| None| None| X| X| # noqa + # | double| None| None| None| None| None| None| None| None| 1.0| None| None| None| None| None| None| X| X| # noqa + # | array| None| None| None| None| None| None| None| None| None| [1]| [1]| [1]| [65, 66, 67]| None| None| X| X| # noqa + # | binary| None| None| None| None|bytearray(b'a')|bytearray(b'a')| None| None| None| None| None| None|bytearray(b'ABC')| None| None| X| X| # noqa + # | decimal(10,0)| None| None| None| None| None| None| None| None| None| None| None| None| None|Decimal('1')| None| X| X| # noqa + # | map| None| None| None| None| None| None| None| None| None| None| None| None| None| None| {u'a': 1}| X| X| # noqa + # | struct<_1:int>| None| X| X| X| X| X| X| X| X| X|Row(_1=1)| Row(_1=1)| X| X| Row(_1=None)| Row(_1=1)| Row(_1=1)| # noqa + # +-----------------------------+--------------+----------+------+-------+---------------+---------------+--------------------+-----------------------------+----------+----------------------+---------+--------------------+-----------------+------------+--------------+------------------+----------------------+ # noqa + # + # Note: DDL formatted string is used for 'SQL Type' for simplicity. This string can be + # used in `returnType`. + # Note: The values inside of the table are generated by `repr`. + # Note: Python 2 is used to generate this table since it is used to check the backward + # compatibility often in practice. + # Note: 'X' means it throws an exception during the conversion. + # decorator @udf, @udf(), @udf(dataType()) if f is None or isinstance(f, (str, DataType)): # If DataType has been passed as a positional argument